Repository: OlafenwaMoses/ImageAI
Branch: master
Commit: 2156d1a39a19
Files: 152
Total size: 1.1 MB

Directory structure:
gitextract_hhet6k5q/

├── .codecov.yml
├── .github/
│   ├── FUNDING.yml
│   └── workflows/
│       └── build.yml
├── .gitignore
├── .travis.yml
├── BACKEND_MIGRATION.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── examples/
│   ├── camera_feed_detection.py
│   ├── custom_detection.py
│   ├── custom_detection_array_input_output.py
│   ├── custom_detection_extract_objects.py
│   ├── custom_detection_from_array_extract_objects_array.py
│   ├── custom_detection_from_file_extract_objects_array.py
│   ├── custom_detection_train.py
│   ├── custom_detection_video.py
│   ├── custom_model_prediction.py
│   ├── custom_model_training.py
│   ├── image_custom_object_detection.py
│   ├── image_prediction.py
│   ├── object_detection.py
│   ├── video_analysis_per_frame.py
│   ├── video_analysis_per_second.py
│   ├── video_custom_object_detection.py
│   └── video_object_detection.py
├── imageai/
│   ├── Classification/
│   │   ├── CUSTOMCLASSIFICATION.md
│   │   ├── CUSTOMTRAINING.md
│   │   ├── Custom/
│   │   │   ├── __init__.py
│   │   │   ├── data_transformation.py
│   │   │   └── training_params.py
│   │   ├── README.md
│   │   ├── __init__.py
│   │   └── imagenet_classes.txt
│   ├── Detection/
│   │   ├── Custom/
│   │   │   ├── CUSTOMDETECTION.md
│   │   │   ├── CUSTOMDETECTIONTRAINING.md
│   │   │   ├── CUSTOMVIDEODETECTION.md
│   │   │   ├── __init__.py
│   │   │   └── yolo/
│   │   │       ├── __init__.py
│   │   │       ├── compute_loss.py
│   │   │       ├── custom_anchors.py
│   │   │       ├── dataset.py
│   │   │       ├── metric.py
│   │   │       └── validate.py
│   │   ├── README.md
│   │   ├── VIDEO.md
│   │   ├── __init__.py
│   │   ├── coco91_classes.txt
│   │   └── coco_classes.txt
│   ├── __init__.py
│   ├── backend_check/
│   │   ├── __init__.py
│   │   ├── backend_check.py
│   │   └── model_extension.py
│   ├── densenet121/
│   │   └── __init__.py
│   ├── inceptionv3/
│   │   └── __init__.py
│   ├── mobilenetv2/
│   │   └── __init__.py
│   ├── resnet50/
│   │   └── __init__.py
│   ├── retinanet/
│   │   ├── __init__.py
│   │   └── utils.py
│   └── yolov3/
│       ├── __init__.py
│       ├── tiny_yolov3.py
│       ├── utils.py
│       └── yolov3.py
├── imageai_tf_deprecated/
│   ├── Classification/
│   │   ├── CUSTOMCLASSIFICATION.md
│   │   ├── CUSTOMTRAINING.md
│   │   ├── Custom/
│   │   │   └── __init__.py
│   │   ├── README.md
│   │   └── __init__.py
│   ├── Detection/
│   │   ├── Custom/
│   │   │   ├── CUSTOMDETECTION.md
│   │   │   ├── CUSTOMDETECTIONTRAINING.md
│   │   │   ├── CUSTOMVIDEODETECTION.md
│   │   │   ├── __init__.py
│   │   │   ├── callbacks.py
│   │   │   ├── evaluate.py
│   │   │   ├── gen_anchors.py
│   │   │   ├── generator.py
│   │   │   ├── utils/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── bbox.py
│   │   │   │   ├── colors.py
│   │   │   │   ├── image.py
│   │   │   │   ├── multi_gpu_model.py
│   │   │   │   └── utils.py
│   │   │   └── voc.py
│   │   ├── README.md
│   │   ├── VIDEO.md
│   │   ├── YOLO/
│   │   │   ├── __init__.py
│   │   │   ├── utils.py
│   │   │   └── yolov3.py
│   │   ├── __init__.py
│   │   └── keras_retinanet/
│   │       ├── __init__.py
│   │       ├── backend/
│   │       │   ├── __init__.py
│   │       │   └── backend.py
│   │       ├── bin/
│   │       │   ├── __init__.py
│   │       │   ├── convert_model.py
│   │       │   ├── debug.py
│   │       │   ├── evaluate.py
│   │       │   └── train.py
│   │       ├── callbacks/
│   │       │   ├── __init__.py
│   │       │   ├── coco.py
│   │       │   ├── common.py
│   │       │   └── eval.py
│   │       ├── initializers.py
│   │       ├── layers/
│   │       │   ├── __init__.py
│   │       │   ├── _misc.py
│   │       │   └── filter_detections.py
│   │       ├── losses.py
│   │       ├── models/
│   │       │   ├── __init__.py
│   │       │   ├── densenet.py
│   │       │   ├── effnet.py
│   │       │   ├── mobilenet.py
│   │       │   ├── resnet.py
│   │       │   ├── retinanet.py
│   │       │   ├── senet.py
│   │       │   └── vgg.py
│   │       ├── preprocessing/
│   │       │   ├── __init__.py
│   │       │   ├── coco.py
│   │       │   ├── csv_generator.py
│   │       │   ├── generator.py
│   │       │   ├── kitti.py
│   │       │   ├── open_images.py
│   │       │   └── pascal_voc.py
│   │       └── utils/
│   │           ├── __init__.py
│   │           ├── anchors.py
│   │           ├── coco_eval.py
│   │           ├── colors.py
│   │           ├── compute_overlap.pyx
│   │           ├── config.py
│   │           ├── eval.py
│   │           ├── gpu.py
│   │           ├── image.py
│   │           ├── model.py
│   │           ├── tf_version.py
│   │           ├── transform.py
│   │           └── visualization.py
│   ├── Prediction/
│   │   ├── Custom/
│   │   │   ├── __init__.py
│   │   │   └── custom_utils.py
│   │   ├── __init__.py
│   │   └── imagenet_utils.py
│   └── __init__.py
├── requirements.txt
├── requirements_extra.txt
├── requirements_gpu.txt
├── scripts/
│   └── pascal_voc_to_yolo.py
├── setup.py
└── test/
    ├── test_custom_classification.py
    ├── test_custom_classification_training.py
    ├── test_custom_detection_training.py
    ├── test_custom_object_detection.py
    ├── test_custom_video_detection.py
    ├── test_image_classification.py
    ├── test_object_detection.py
    └── test_video_object_detection.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .codecov.yml
================================================
codecov:
  notify:
    require_ci_to_pass: yes

coverage:
  precision: 2
  round: down
  range: "30...100"

  status:
    project: yes
    patch: yes
    changes: no

parsers:
  gcov:
    branch_detection:
      conditional: yes
      loop: yes
      method: no
      macro: no

comment:
  layout: "header, diff"
  behavior: default
  require_changes: no


================================================
FILE: .github/FUNDING.yml
================================================
github: OlafenwaMoses

================================================
FILE: .github/workflows/build.yml
================================================
name: Build and Testing

on:
  push:
    branches: [master]
  pull_request:
    branches: [master]

jobs:
  UnitestPython37:
    
    name: Python3.7 Tests
    runs-on: ubuntu-latest
    # needs: None
    steps:
    - uses: actions/checkout@v3
    - uses: actions/setup-python@v4
      with:
        python-version: '3.7'
        cache: 'pip'
    - name: Install Dependencies
      run: |
        pip install -r requirements.txt
        pip install -r requirements_extra.txt
    - name: Download and Setup Resources
      env:
        CI: false
      run: |
        sudo apt-get update
        sudo apt-get install unzip -y

        mkdir test/data-models
        mkdir test/data-json

        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/densenet121-a639ec97.pth -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/densenet121-idenprof-test_acc_0.82550_epoch-95.pt -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/inception_v3-idenprof-test_acc_0.81050_epoch-92.pt -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/inception_v3_google-1a9a5a14.pth -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/mobilenet_v2-b0353104.pth -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/mobilenet_v2-idenprof-test_acc_0.85300_epoch-92.pt -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/resnet50-19c8e357.pth -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/resnet50-idenprof-test_acc_0.78200_epoch-91.pt -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/retinanet_resnet50_fpn_coco-eeacb38b.pth -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/tiny-yolov3.pt -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/tiny_yolov3_number-plate-dataset-imageai_mAP-0.22595_epoch-20.pt -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3.pt -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3_number-plate-dataset-imageai_mAP-0.57145_epoch-11.pt -P test/data-models

        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/idenprof.json -P test/data-json
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/number-plate-dataset-imageai_tiny_yolov3_detection_config.json -P test/data-json
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/number-plate-dataset-imageai_yolov3_detection_config.json -P test/data-json
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/idenprof_model_classes.json -P test/data-json
        

        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/test-resources-v3/data-datasets.zip -P test
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/test-resources-v3/data-images.zip -P test
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/test-resources-v3/data-videos.zip -P test

        unzip test/data-datasets.zip -d test
        unzip test/data-images.zip -d test
        unzip test/data-videos.zip -d test
    - name: Run Unittest
      run: |
        pytest test -vvv  
  UnitestPython38:
    
    name: Python3.8 Tests
    runs-on: ubuntu-latest
    # needs: None
    steps:
    - uses: actions/checkout@v3
    - uses: actions/setup-python@v4
      with:
        python-version: '3.8'
        cache: 'pip'
    - name: Install Dependencies
      run: |
        pip install -r requirements.txt
        pip install -r requirements_extra.txt
    - name: Download and Setup Resources
      env:
        CI: false
      run: |
        sudo apt-get update
        sudo apt-get install unzip -y

        mkdir test/data-models
        mkdir test/data-json

        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/densenet121-a639ec97.pth -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/densenet121-idenprof-test_acc_0.82550_epoch-95.pt -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/inception_v3-idenprof-test_acc_0.81050_epoch-92.pt -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/inception_v3_google-1a9a5a14.pth -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/mobilenet_v2-b0353104.pth -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/mobilenet_v2-idenprof-test_acc_0.85300_epoch-92.pt -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/resnet50-19c8e357.pth -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/resnet50-idenprof-test_acc_0.78200_epoch-91.pt -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/retinanet_resnet50_fpn_coco-eeacb38b.pth -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/tiny-yolov3.pt -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/tiny_yolov3_number-plate-dataset-imageai_mAP-0.22595_epoch-20.pt -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3.pt -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3_number-plate-dataset-imageai_mAP-0.57145_epoch-11.pt -P test/data-models

        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/idenprof.json -P test/data-json
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/number-plate-dataset-imageai_tiny_yolov3_detection_config.json -P test/data-json
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/number-plate-dataset-imageai_yolov3_detection_config.json -P test/data-json
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/idenprof_model_classes.json -P test/data-json
        

        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/test-resources-v3/data-datasets.zip -P test
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/test-resources-v3/data-images.zip -P test
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/test-resources-v3/data-videos.zip -P test

        unzip test/data-datasets.zip -d test
        unzip test/data-images.zip -d test
        unzip test/data-videos.zip -d test
    - name: Run Unittest
      run: |
        pytest test -vvv
  
  UnitestPython39:
    
    name: Python3.9 Tests
    runs-on: ubuntu-latest
    # needs: None
    steps:
    - uses: actions/checkout@v3
    - uses: actions/setup-python@v4
      with:
        python-version: '3.9'
        cache: 'pip'
    - name: Install Dependencies
      run: |
        pip install -r requirements.txt
        pip install -r requirements_extra.txt
    - name: Download and Setup Resources
      env:
        CI: false
      run: |
        sudo apt-get update
        sudo apt-get install unzip -y

        mkdir test/data-models
        mkdir test/data-json

        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/densenet121-a639ec97.pth -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/densenet121-idenprof-test_acc_0.82550_epoch-95.pt -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/inception_v3-idenprof-test_acc_0.81050_epoch-92.pt -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/inception_v3_google-1a9a5a14.pth -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/mobilenet_v2-b0353104.pth -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/mobilenet_v2-idenprof-test_acc_0.85300_epoch-92.pt -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/resnet50-19c8e357.pth -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/resnet50-idenprof-test_acc_0.78200_epoch-91.pt -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/retinanet_resnet50_fpn_coco-eeacb38b.pth -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/tiny-yolov3.pt -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/tiny_yolov3_number-plate-dataset-imageai_mAP-0.22595_epoch-20.pt -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3.pt -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3_number-plate-dataset-imageai_mAP-0.57145_epoch-11.pt -P test/data-models

        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/idenprof.json -P test/data-json
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/number-plate-dataset-imageai_tiny_yolov3_detection_config.json -P test/data-json
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/number-plate-dataset-imageai_yolov3_detection_config.json -P test/data-json
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/idenprof_model_classes.json -P test/data-json
        

        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/test-resources-v3/data-datasets.zip -P test
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/test-resources-v3/data-images.zip -P test
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/test-resources-v3/data-videos.zip -P test

        unzip test/data-datasets.zip -d test
        unzip test/data-images.zip -d test
        unzip test/data-videos.zip -d test
    - name: Run Unittest
      run: |
        pytest test -vvv
  
  UnitestPython310:
    
    name: Python3.10 Tests
    runs-on: ubuntu-latest
    # needs: None
    steps:
    - uses: actions/checkout@v3
    - uses: actions/setup-python@v4
      with:
        python-version: '3.10'
        cache: 'pip'
    - name: Install Dependencies
      run: |
        pip install -r requirements.txt
        pip install -r requirements_extra.txt
    - name: Download and Setup Resources
      env:
        CI: false
      run: |
        sudo apt-get update
        sudo apt-get install unzip -y

        mkdir test/data-models
        mkdir test/data-json

        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/densenet121-a639ec97.pth -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/densenet121-idenprof-test_acc_0.82550_epoch-95.pt -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/inception_v3-idenprof-test_acc_0.81050_epoch-92.pt -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/inception_v3_google-1a9a5a14.pth -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/mobilenet_v2-b0353104.pth -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/mobilenet_v2-idenprof-test_acc_0.85300_epoch-92.pt -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/resnet50-19c8e357.pth -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/resnet50-idenprof-test_acc_0.78200_epoch-91.pt -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/retinanet_resnet50_fpn_coco-eeacb38b.pth -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/tiny-yolov3.pt -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/tiny_yolov3_number-plate-dataset-imageai_mAP-0.22595_epoch-20.pt -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3.pt -P test/data-models
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3_number-plate-dataset-imageai_mAP-0.57145_epoch-11.pt -P test/data-models

        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/idenprof.json -P test/data-json
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/number-plate-dataset-imageai_tiny_yolov3_detection_config.json -P test/data-json
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/number-plate-dataset-imageai_yolov3_detection_config.json -P test/data-json
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/idenprof_model_classes.json -P test/data-json
        

        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/test-resources-v3/data-datasets.zip -P test
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/test-resources-v3/data-images.zip -P test
        wget https://github.com/OlafenwaMoses/ImageAI/releases/download/test-resources-v3/data-videos.zip -P test

        unzip test/data-datasets.zip -d test
        unzip test/data-images.zip -d test
        unzip test/data-videos.zip -d test
    - name: Run Unittest
      run: |
        pytest test -vvv
  

================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
#   For a library or package, you might want to ignore these files since the code is
#   intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
#   However, in case of collaboration, if having platform-specific dependencies or dependencies
#   having no cross-platform support, pipenv may install dependencies that don't work, or not
#   install all needed dependencies.
#Pipfile.lock

# poetry
#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
#   This is especially recommended for binary packages to ensure reproducibility, and is more
#   commonly ignored for libraries.
#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock

# pdm
#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
#   in version control.
#   https://pdm.fming.dev/#use-with-ide
.pdm.toml

# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
#  and can be added to the global gitignore or merged into this file.  For a more nuclear
#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/


# Other files and folders
test/data-models
test/data-images
test/data-json
test/data-videos
test/data-datasets
experiment

================================================
FILE: .travis.yml
================================================
dist: xenial
sudo: required
language: python
python:
  - '3.7.6'
install:
  - pip install -r requirements.txt
  - pip install pytest
  - pip install pytest-cov
script:
  - python setup.py install
  - cd test
  - mkdir data-models
  - mkdir data-temp
  - wget -P data-models/ https://github.com/OlafenwaMoses/ImageAI/releases/download/1.0/DenseNet-BC-121-32.h5
  - wget -P data-models/ https://github.com/OlafenwaMoses/ImageAI/releases/download/1.0/inception_v3_weights_tf_dim_ordering_tf_kernels.h5
  - wget -P data-models/ https://github.com/OlafenwaMoses/ImageAI/releases/download/essentials-v5/resnet50_imagenet_tf.2.0.h5
  - wget -P data-models/ https://github.com/OlafenwaMoses/ImageAI/releases/download/essentials-v5/mobilenet_v2.h5
  - wget -P data-models/ https://github.com/OlafenwaMoses/ImageAI/releases/download/models-v3/idenprof_densenet-0.763500.h5
  - wget -P data-models/ https://github.com/OlafenwaMoses/ImageAI/releases/download/models-v3/idenprof_full_resnet_ex-001_acc-0.119792.h5
  - wget -P data-models/ https://github.com/OlafenwaMoses/ImageAI/releases/download/essentials-v5/idenprof_resnet_ex-056_acc-0.993062.h5
  - wget -P data-models/ https://github.com/OlafenwaMoses/ImageAI/releases/download/essentials-v5/resnet50_coco_best_v2.1.0.h5
  - wget -P data-models/ https://github.com/OlafenwaMoses/ImageAI/releases/download/1.0/yolo.h5
  - wget -P data-models/ https://github.com/OlafenwaMoses/ImageAI/releases/download/1.0/yolo-tiny.h5
  - wget -P data-models/ https://github.com/OlafenwaMoses/ImageAI/releases/download/essential-v4/pretrained-yolov3.h5
  - wget -P data-models/ https://github.com/OlafenwaMoses/ImageAI/releases/download/essential-v4/hololens-ex-60--loss-2.76.h5
  - pytest -v --cov
after_script:
  - bash <(curl -s https://codecov.io/bash)


================================================
FILE: BACKEND_MIGRATION.md
================================================
# Overview

In December 2022, ImageAI `3.0.2` was released which effected the change from Tensorflow backend to PyTorch backend. This change allows ImageAI to support `Python 3.7` up to `Python 3.10` for all its features and deprecates a number of functionalities for this and future versions of ImageAI.


# Deprecated functionalities
- Tensorflow backend no longer supported. Now replaced with PyTorch
- All `.h5` pretrained models and custom trained `.h5` models no longer supported. If you still intend to use these models, see the `Using Tensorflow backend` section.
- `Speed mode` have been removed from model loading
- Custom detection model training dataset format changed to YOLO format from Pascal VOC. To convert your dataset to YOLO format, see the  `Convert Pascal VOC dataset to YOLO format` section.
- Enhance data for custom classification model training now removed
- Detection model training standalone evaluation now removed

# Using Tensorflow backend
To use Tensorflow backend, do the following

- Install Python 3.7
- Install Tensorflow 
  - CPU: `pip install tensorflow==2.4.0`
  - GPU: `pip install tensorflow-gpu==2.4.0`
- Install other dependencies: `pip install keras==2.4.3 numpy==1.19.3 pillow==7.0.0 scipy==1.4.1 h5py==2.10.0 matplotlib==3.3.2 opencv-python keras-resnet==0.2.0`
- Install ImageAI **2.1.6**: `pip install imageai==2.1.6`
- Download the Tensorflow models from the releases below
  - [Models for Image Recognition and Object Detection](https://github.com/OlafenwaMoses/ImageAI/releases/tag/1.0)
  - [TF2.x Models [ Exclusives ]](https://github.com/OlafenwaMoses/ImageAI/releases/tag/essentials-v5)


# Convert Pascal VOC dataset to YOLO format
Because ImageAI now uses `YOLO format` for training custom object detection models; should you need to train a new model with the new ImageAI version, you will need to convert your `Pascal VOC` datasets to YOLO format by doing the following 
- Run the command below
    ```
    python scripts/pascal_voc_to_yolo.py --dataset_dir <path_to_your_dataset_folder>
    ```
- Once completed, you will find the YOLO version of the dataset next to your Pascal VOC dataset.
  - E.g, if your dataset is in `C:/Users/Troublemaker/Documents/datasets/headset`, your conversion command will be
    ```
    python scripts/pascal_voc_to_yolo.py --dataset_dir C:/Users/Troublemaker/Documents/datasets/headset
    ```
    and once completed, the output will be in `C:/Users/Troublemaker/Documents/datasets/headset-yolo`


================================================
FILE: LICENSE
================================================
MIT License

Copyright (c) 2019 MOSES OLAFENWA

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: MANIFEST.in
================================================
recursive-include imageai/Detection *.txt
recursive-include imageai/Classification *.txt

================================================
FILE: README.md
================================================
# ImageAI (v3.0.3)


[![Build Status](https://travis-ci.com/OlafenwaMoses/ImageAI.svg?branch=master)](https://travis-ci.com/OlafenwaMoses/ImageAI)  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://github.com/OlafenwaMoses/ImageAI/blob/master/LICENSE) [![PyPI version](https://badge.fury.io/py/imageai.svg)](https://badge.fury.io/py/imageai)   [![Downloads](https://pepy.tech/badge/imageai/month)](https://pepy.tech/project/imageai) [![Downloads](https://pepy.tech/badge/imageai/week)](https://pepy.tech/project/imageai)

An open-source python library built to empower developers to build applications and systems with self-contained Deep Learning and Computer Vision capabilities using simple and few lines of code.
 
 If you will like to sponsor this project, kindly visit the <strong>[Github sponsor page](https://github.com/sponsors/OlafenwaMoses)</strong>.
 
 
## ---------------------------------------------------
## Introducing Jarvis and TheiaEngine.

We the creators of ImageAI are glad to announce 2 new AI projects to provide state-of-the-art Generative AI, LLM and Image Understanding on your personal computer and servers. 


[![](jarvis.png)](https://jarvis.genxr.co)

Install Jarvis on PC/Mac to setup limitless access to LLM powered AI Chats for your every day work, research and generative AI needs with 100% privacy and full offline capability.


Visit [https://jarvis.genxr.co](https://jarvis.genxr.co/) to get started.


[![](theiaengine.png)](https://www.genxr.co/theia-engine)


[TheiaEngine](https://www.genxr.co/theia-engine), the next-generation computer Vision AI API capable of all Generative and Understanding computer vision tasks in a single API call and available via REST API to all programming languages. Features include
- **Detect 300+ objects** ( 220 more objects than ImageAI)
- **Provide answers to any content or context questions** asked on an image
  - very useful to get information on any object, action or information without needing to train a new custom model for every tasks
-  **Generate scene description and summary**
-  **Convert 2D image to 3D pointcloud and triangular mesh**
-  **Semantic Scene mapping of objects, walls, floors, etc**
-  **Stateless Face recognition and emotion detection**
-  **Image generation and augmentation from prompt**
-  etc.

Visit [https://www.genxr.co/theia-engine](https://www.genxr.co/theia-engine) to try the demo and join in the beta testing today.
## ---------------------------------------------------
 
![](logo1.png)

Developed and maintained by [Moses Olafenwa](https://twitter.com/OlafenwaMoses)

---

Built with simplicity in mind, **ImageAI** 
    supports a list of state-of-the-art Machine Learning algorithms for image prediction, custom image prediction, object detection, video detection, video object tracking
    and image predictions trainings. **ImageAI** currently supports image prediction and training using 4 different Machine Learning algorithms 
    trained on the ImageNet-1000 dataset. **ImageAI** also supports object detection, video detection and object tracking  using RetinaNet, YOLOv3 and TinyYOLOv3 trained on COCO dataset. Finally, **ImageAI** allows you to train custom models for performing detection and recognition of new objects. 
   
Eventually, **ImageAI** will provide support for a wider and more specialized aspects of Computer Vision


**New Release : ImageAI 3.0.2**

What's new:
- PyTorch backend
- TinyYOLOv3 model training


### TABLE OF CONTENTS
- <a href="#installation" > :white_square_button: Installation</a>
- <a href="#features" > :white_square_button: Features</a>
- <a href="#documentation" > :white_square_button: Documentation</a>
- <a href="#sponsors" > :white_square_button: Sponsors</a>
- <a href="#sample" > :white_square_button: Projects Built on ImageAI</a>
- <a href="#real-time-and-high-performance-implementation" > :white_square_button: High Performance Implementation</a>
- <a href="#recommendation" > :white_square_button: AI Practice Recommendations</a>
- <a href="#contact" > :white_square_button: Contact Developers</a>
- <a href="#citation" > :white_square_button: Citation</a>
- <a href="#ref" > :white_square_button: References</a>


## Installation
<div id="installation"></div>
 
To install ImageAI, run the python installation instruction below in the command line:

- [Download and Install](https://www.python.org/downloads/) **Python 3.7**, **Python 3.8**, **Python 3.9** or **Python 3.10**
- Install dependencies
  - **CPU**: Download [requirements.txt](https://github.com/OlafenwaMoses/ImageAI/blob/master/requirements.txt) file and install via the command
    ```
    pip install -r requirements.txt
    ```
    or simply copy and run the command below

    ```
    pip install cython pillow>=7.0.0 numpy>=1.18.1 opencv-python>=4.1.2 torch>=1.9.0 --extra-index-url https://download.pytorch.org/whl/cpu torchvision>=0.10.0 --extra-index-url https://download.pytorch.org/whl/cpu pytest==7.1.3 tqdm==4.64.1 scipy>=1.7.3 matplotlib>=3.4.3 mock==4.0.3
    ```

  - **GPU/CUDA**: Download [requirements_gpu.txt](https://github.com/OlafenwaMoses/ImageAI/blob/master/requirements_gpu.txt) file and install via the command
    ```
    pip install -r requirements_gpu.txt
    ```
    or smiply copy and run the command below
    ```
    pip install cython pillow>=7.0.0 numpy>=1.18.1 opencv-python>=4.1.2 torch>=1.9.0 --extra-index-url https://download.pytorch.org/whl/cu102 torchvision>=0.10.0 --extra-index-url https://download.pytorch.org/whl/cu102 pytest==7.1.3 tqdm==4.64.1 scipy>=1.7.3 matplotlib>=3.4.3 mock==4.0.3
    ```
- If you plan to train custom AI models, download [requirements_extra.txt](https://github.com/OlafenwaMoses/ImageAI/blob/master/requirements_extra.txt) file and install via the command
  
  ```
  pip install -r requirements_extra.txt
  ```
  or simply copy and run the command below
  ```
  pip install pycocotools@git+https://github.com/gautamchitnis/cocoapi.git@cocodataset-master#subdirectory=PythonAPI
  ```
- Then run the command below to install ImageAI
  ```
  pip install imageai --upgrade
  ```

## Features
<div id="features"></div>
<table>
  <tr>
    <td><h2> Image Classification</h2> </td>
  </tr>
  <tr>
    <td><img src="data-images/1.jpg" >
    <h4>ImageAI provides 4 different algorithms and model types to perform image prediction, trained on the ImageNet-1000 dataset. The 4 algorithms provided for image prediction include MobileNetV2, ResNet50, InceptionV3 and DenseNet121.
    Click the link below to see the full sample codes, explanations and best practices guide.</h4>
    <a href="imageai/Classification"> >>> Get Started</a>
    </td>
  </tr>
  
 </table>

 <div id="features"></div>
<table>
  <tr>
    <td><h2> Object Detection </h2> </td>
  </tr>
  <tr>
    <td>
        <img src="data-images/image2new.jpg">
        <h4>ImageAI provides very convenient and powerful methods to perform object detection on images and extract each object from the image. The object detection class provides support for RetinaNet, YOLOv3 and TinyYOLOv3, with options to adjust for state of the art performance or real time processing. Click the link below to see the full sample codes, explanations and best practices guide.</h4>
    <a href="imageai/Detection"> >>> Get Started</a>
    </td>
  </tr>
  
 </table>


<table>
  <tr>
    <td><h2> Video Object Detection & Analysis</h2> </td>
  </tr>
  <tr>
    <td><img src="data-images/video_analysis_visualization.jpg">
    <h4>ImageAI provides very convenient and powerful methods to perform object detection in videos. The video object detection class provided only supports the current state-of-the-art RetinaNet. Click the link to see the full videos, sample codes, explanations and best practices guide.</h4>
    <a href="imageai/Detection/VIDEO.md"> >>> Get Started</a>
    </td>
  </tr>
  
 </table>


 <table>
  <tr>
    <td><h2> Custom Classification model training </h2> </td>
  </tr>
  <tr>
    <td>
        <img src="data-images/idenprof.jpg">
        <h4>ImageAI provides classes and methods for you to train a new model that can be used to perform prediction on your own custom objects. You can train your custom models using MobileNetV2, ResNet50, InceptionV3 and DenseNet in 5 lines of code. Click the link below to see the guide to preparing training images, sample training codes, explanations and best practices.</h4>
    <a href="imageai/Classification/CUSTOMTRAINING.md"> >>> Get Started</a>
    </td>
  </tr>
  
 </table>

 <table>
  <tr>
    <td><h2> Custom Model Classification</h2> </td>
  </tr>
  <tr>
    <td><img src="data-images/4.jpg">
    <h4>ImageAI provides classes and methods for you to run image prediction your own custom objects using your own model trained with ImageAI Model Training class. You can use your custom models trained with MobileNetV2, ResNet50, InceptionV3 and DenseNet and the JSON file containing the mapping of the custom object names. Click the link below to see the guide to sample training codes, explanations, and best practices guide.</h4>
    <a href="imageai/Classification/CUSTOMCLASSIFICATION.md"> >>> Get Started</a>
    </td>
  </tr>
  
 </table>

 <table>
  <tr>
    <td><h2> Custom Detection Model Training </h2> </td>
  </tr>
  <tr>
    <td>
        <img src="data-images/headsets.jpg">
        <h4>ImageAI provides classes and methods for you to train new YOLOv3 or TinyYOLOv3 object detection models on your custom dataset. This means you can train a model to detect literally any object of interest by providing the images, the annotations and training with ImageAI. Click the link below to see the guide to sample training codes, explanations, and best practices guide.</h4>
    <a href="imageai/Detection/Custom/CUSTOMDETECTIONTRAINING.md"> >>> Get Started</a>
    </td>
  </tr>
  
 </table>

<table>
  <tr>
    <td><h2> Custom Object Detection</h2> </td>
  </tr>
  <tr>
    <td><img src="data-images/holo2-detected.jpg">
    <h4>ImageAI now provides classes and methods for you detect and recognize your own custom objects in images using your own model trained with the DetectionModelTrainer class. You can use your custom trained YOLOv3 or TinyYOLOv3 model and the **.json** file generated during the training. Click the link below to see the guide to sample training codes, explanations, and best practices guide.</h4>
    <a href="imageai/Detection/Custom/CUSTOMDETECTION.md"> >>> Get Started</a>
    </td>
  </tr>
 </table>


<table>
  <tr>
    <td><h2> Custom Video Object Detection & Analysis </h2> </td>
  </tr>
  <tr>
    <td>
        <img src="data-images/customvideodetection.gif">
        <h4>ImageAI now provides classes and methods for you detect and recognize your own custom objects in images using your own model trained with the DetectionModelTrainer class. You can use your custom trained YOLOv3 or TinyYOLOv3 model and the **.json** file generated during the training. Click the link below to see the guide to sample training codes, explanations, and best practices guide.</h4>
    <a href="imageai/Detection/Custom/CUSTOMVIDEODETECTION.md"> >>> Get Started</a>
    </td>
  </tr>
 </table>

## Documentation
<div id="documentation"></div>

We have provided full documentation for all **ImageAI** classes and functions. Visit the link below:

- Documentation - **English Version**  [https://imageai.readthedocs.io](https://imageai.readthedocs.io)


## Sponsors
<div id="sponsors"></div>


## Real-Time and High Performance Implementation
<div id="performance"></div>

**ImageAI** provides abstracted and convenient implementations of state-of-the-art Computer Vision technologies. All of **ImageAI** implementations and code can work on any computer system with moderate CPU capacity. However, the speed of processing for operations like image prediction, object detection and others on CPU is slow and not suitable for real-time applications. To perform real-time Computer Vision operations with high performance, you need to use GPU enabled technologies.

**ImageAI** uses the PyTorch backbone for it's Computer Vision operations. PyTorch supports both CPUs and GPUs ( Specifically NVIDIA GPUs.  You can get one for your PC or get a PC that has one) for machine learning and artificial intelligence algorithms' implementations.


## Projects Built on ImageAI
<div id="sample"></div>


## AI Practice Recommendations
<div id="recommendation"></div>

For anyone interested in building AI systems and using them for business, economic,  social and research purposes, it is critical that the person knows the likely positive, negative and unprecedented impacts the use of such technologies will have.
They must also be aware of approaches and practices recommended by experienced industry experts to ensure every use of AI brings overall benefit to mankind.
We therefore recommend to everyone that wishes to use ImageAI and other AI tools and resources to read Microsoft's January 2018 publication on AI titled "The Future Computed : Artificial Intelligence and its role in society".
Kindly follow the link below to download the publication.

[https://blogs.microsoft.com/blog/2018/01/17/future-computed-artificial-intelligence-role-society](https://blogs.microsoft.com/blog/2018/01/17/future-computed-artificial-intelligence-role-society/)

### Contact Developer
<div id="contact"></div>

- **Moses Olafenwa**
    * _Email:_ guymodscientist@gmail.com
    * _Twitter:_ [@OlafenwaMoses](https://twitter.com/OlafenwaMoses)
    * _Medium:_ [@guymodscientist](https://medium.com/@guymodscientist)
    * _Facebook:_ [moses.olafenwa](https://facebook.com/moses.olafenwa)
- **John Olafenwa**
    * _Email:_ johnolafenwa@gmail.com
    * _Website:_ [https://john.aicommons.science](https://john.aicommons.science)
    * _Twitter:_ [@johnolafenwa](https://twitter.com/johnolafenwa)
    * _Medium:_ [@johnolafenwa](https://medium.com/@johnolafenwa)
    * _Facebook:_ [olafenwajohn](https://facebook.com/olafenwajohn)


### Citation
<div id="citation"></div>

You can cite **ImageAI** in your projects and research papers via the **BibTeX** entry below.  
  
```
@misc {ImageAI,
    author = "Moses",
    title  = "ImageAI, an open source python library built to empower developers to build applications and systems  with self-contained Computer Vision capabilities",
    url    = "https://github.com/OlafenwaMoses/ImageAI",
    month  = "mar",
    year   = "2018--"
}
```


 ### References
 <div id="ref"></div>

 1. Somshubra Majumdar, DenseNet Implementation of the paper, Densely Connected Convolutional Networks in Keras
[https://github.com/titu1994/DenseNet](https://github.com/titu1994/DenseNet)
 2. Broad Institute of MIT and Harvard, Keras package for deep residual networks
[https://github.com/broadinstitute/keras-resnet](https://github.com/broadinstitute/keras-resnet)
 3. Fizyr, Keras implementation of RetinaNet object detection
[https://github.com/fizyr/keras-retinanet](https://github.com/fizyr/keras-retinanet)
 4. Francois Chollet, Keras code and weights files for popular deeplearning models
[https://github.com/fchollet/deep-learning-models](https://github.com/fchollet/deep-learning-models)
 5. Forrest N. et al, SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and <0.5MB model size
[https://arxiv.org/abs/1602.07360](https://arxiv.org/abs/1602.07360)
 6. Kaiming H. et al, Deep Residual Learning for Image Recognition
[https://arxiv.org/abs/1512.03385](https://arxiv.org/abs/1512.03385)
 7. Szegedy. et al, Rethinking the Inception Architecture for Computer Vision
[https://arxiv.org/abs/1512.00567](https://arxiv.org/abs/1512.00567)
 8. Gao. et al, Densely Connected Convolutional Networks
[https://arxiv.org/abs/1608.06993](https://arxiv.org/abs/1608.06993)
 9. Tsung-Yi. et al, Focal Loss for Dense Object Detection
[https://arxiv.org/abs/1708.02002](https://arxiv.org/abs/1708.02002)
 10. O Russakovsky et al, ImageNet Large Scale Visual Recognition Challenge
[https://arxiv.org/abs/1409.0575](https://arxiv.org/abs/1409.0575)
 11. TY Lin et al, Microsoft COCO: Common Objects in Context
[https://arxiv.org/abs/1405.0312](https://arxiv.org/abs/1405.0312)
 12. Moses & John Olafenwa, A collection of images of identifiable professionals.
[https://github.com/OlafenwaMoses/IdenProf](https://github.com/OlafenwaMoses/IdenProf)
 13. Joseph Redmon and Ali Farhadi, YOLOv3: An Incremental Improvement.
[https://arxiv.org/abs/1804.02767](https://arxiv.org/abs/1804.02767)
 14. Experiencor, Training and Detecting Objects with YOLO3
[https://github.com/experiencor/keras-yolo3](https://github.com/experiencor/keras-yolo3)
 15. MobileNetV2: Inverted Residuals and Linear Bottlenecks
[https://arxiv.org/abs/1801.04381](https://arxiv.org/abs/1801.04381)
 16. YOLOv3 in PyTorch > ONNX > CoreML > TFLite [https://github.com/ultralytics/yolov3](https://github.com/ultralytics/yolov3)


================================================
FILE: examples/camera_feed_detection.py
================================================
from imageai.Detection import VideoObjectDetection
import os
import cv2

execution_path = os.getcwd()

camera = cv2.VideoCapture(0)

detector = VideoObjectDetection()
detector.setModelTypeAsYOLOv3()
detector.setModelPath(os.path.join(execution_path , "yolov3.pt")) # Download the model via this link https://github.com/OlafenwaMoses/ImageAI/releases/tag/1.0
detector.loadModel()

video_path = detector.detectObjectsFromVideo(camera_input=camera,
                                output_file_path=os.path.join(execution_path, "camera_detected_video")
                                , frames_per_second=20, log_progress=True, minimum_percentage_probability=30)
print(video_path)

================================================
FILE: examples/custom_detection.py
================================================
from imageai.Detection.Custom import CustomObjectDetection

detector = CustomObjectDetection()
detector.setModelTypeAsYOLOv3()
detector.setModelPath("yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt") # https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt
detector.setJsonPath("hololens-yolo_yolov3_detection_config.json") # https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/hololens-yolo_yolov3_detection_config.json
detector.loadModel()
detections = detector.detectObjectsFromImage(input_image="holo2.jpg", output_image_path="holo2-detected.jpg")
for detection in detections:
    print(detection["name"], " : ", detection["percentage_probability"], " : ", detection["box_points"])


"""
EXAMPLE RESULT

hololens  :  39.69653248786926  :  [611, 74, 751, 154]
hololens  :  87.6643180847168  :  [23, 46, 90, 79]
hololens  :  89.25175070762634  :  [191, 66, 243, 95]
hololens  :  64.49641585350037  :  [437, 81, 514, 133]
hololens  :  91.78624749183655  :  [380, 113, 423, 138]

"""

================================================
FILE: examples/custom_detection_array_input_output.py
================================================
from imageai.Detection.Custom import CustomObjectDetection
import cv2

image_array = cv2.imread("holo2.jpg")

detector = CustomObjectDetection()
detector.setModelTypeAsYOLOv3()
detector.setModelPath("yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt") # https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt
detector.setJsonPath("hololens-yolo_yolov3_detection_config.json") # https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/hololens-yolo_yolov3_detection_config.json
detector.loadModel()
detected_image, detections = detector.detectObjectsFromImage(input_image=image_array, input_type="array", output_type="array")

for eachObject in detections:
    print(eachObject["name"], " : ", eachObject["percentage_probability"], " : ", eachObject["box_points"])

cv2.imshow("Main Image", detected_image)
cv2.waitKey()
cv2.destroyAllWindows()


"""
SAMPLE RESULT

hololens  :  39.69653248786926  :  [611, 74, 751, 154]
hololens  :  87.6643180847168  :  [23, 46, 90, 79]
hololens  :  89.25175070762634  :  [191, 66, 243, 95]
hololens  :  64.49641585350037  :  [437, 81, 514, 133]
hololens  :  91.78624749183655  :  [380, 113, 423, 138]
"""

================================================
FILE: examples/custom_detection_extract_objects.py
================================================
from imageai.Detection.Custom import CustomObjectDetection

detector = CustomObjectDetection()
detector.setModelTypeAsYOLOv3()
detector.setModelPath("yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt") # https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt
detector.setJsonPath("hololens-yolo_yolov3_detection_config.json") # https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/hololens-yolo_yolov3_detection_config.json
detector.loadModel()
detections, extracted_objects_array = detector.detectObjectsFromImage(input_image="holo2.jpg", output_image_path="holo2-detected.jpg", extract_detected_objects=True)

for detection, object_path in zip(detections, extracted_objects_array):
    print(object_path)
    print(detection["name"], " : ", detection["percentage_probability"], " : ", detection["box_points"])
    print("---------------")

"""
SAMPLE RESULT

holo2-detected-objects\hololens-1.jpg
hololens  :  39.69653248786926  :  [611, 74, 751, 154]
---------------

holo2-detected-objects\hololens-1.jpg
hololens  :  87.6643180847168  :  [23, 46, 90, 79]
---------------

holo2-detected-objects\hololens-1.jpg
hololens  :  89.25175070762634  :  [191, 66, 243, 95]
---------------

holo2-detected-objects\hololens-1.jpg
hololens  :  64.49641585350037  :  [437, 81, 514, 133]
---------------

holo2-detected-objects\hololens-1.jpg
hololens  :  91.78624749183655  :  [380, 113, 423, 138]
---------------
"""

================================================
FILE: examples/custom_detection_from_array_extract_objects_array.py
================================================
from imageai.Detection.Custom import CustomObjectDetection
import cv2

image_array = cv2.imread("holo2.jpg")

detector = CustomObjectDetection()
detector.setModelTypeAsYOLOv3()
detector.setModelPath("yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt") # https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt
detector.setJsonPath("hololens-yolo_yolov3_detection_config.json") # https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/hololens-yolo_yolov3_detection_config.json
detector.loadModel()
detected_image, detections, extracted_objects = detector.detectObjectsFromImage(input_image=image_array, extract_detected_objects=True, input_type="array", output_type="array")


for eachObject in detections:
    print(eachObject["name"], " : ", eachObject["percentage_probability"], " : ", eachObject["box_points"])


cv2.imshow("Main Image", detected_image)
count = 0
for img in extracted_objects:
    count += 1

    cv2.imshow("Window" + str(count), img)

cv2.waitKey()
cv2.destroyAllWindows()


"""
SAMPLE RESULT

hololens  :  39.69653248786926  :  [611, 74, 751, 154]
hololens  :  87.6643180847168  :  [23, 46, 90, 79]
hololens  :  89.25175070762634  :  [191, 66, 243, 95]
hololens  :  64.49641585350037  :  [437, 81, 514, 133]
hololens  :  91.78624749183655  :  [380, 113, 423, 138]
"""

================================================
FILE: examples/custom_detection_from_file_extract_objects_array.py
================================================
from imageai.Detection.Custom import CustomObjectDetection
import cv2


detector = CustomObjectDetection()
detector.setModelTypeAsYOLOv3()
detector.setModelPath("yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt") # https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt
detector.setJsonPath("hololens-yolo_yolov3_detection_config.json") # https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/hololens-yolo_yolov3_detection_config.json
detector.loadModel()
detected_image, detections, extracted_objects  = detector.detectObjectsFromImage(input_image="holo2.jpg", extract_detected_objects=True, output_type="array")


for eachObject in detections:
    print(eachObject["name"], " : ", eachObject["percentage_probability"], " : ", eachObject["box_points"])

cv2.imshow("Main Image", detected_image)
count = 0
for img in extracted_objects:
    count += 1

    cv2.imshow("Window" + str(count), img)

cv2.waitKey()
cv2.destroyAllWindows()


"""
SAMPLE RESULT

hololens  :  39.69653248786926  :  [611, 74, 751, 154]
hololens  :  87.6643180847168  :  [23, 46, 90, 79]
hololens  :  89.25175070762634  :  [191, 66, 243, 95]
hololens  :  64.49641585350037  :  [437, 81, 514, 133]
hololens  :  91.78624749183655  :  [380, 113, 423, 138]

"""

================================================
FILE: examples/custom_detection_train.py
================================================
from imageai.Detection.Custom import DetectionModelTrainer

trainer = DetectionModelTrainer()
trainer.setModelTypeAsYOLOv3()
trainer.setDataDirectory(data_directory="hololens")
trainer.setTrainConfig(object_names_array=["hololens"], batch_size=4, num_experiments=200, train_from_pretrained_model="yolov3.pt")
#download pre-trained model via https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3.pt
# If you are training to detect more than 1 object, set names of objects above like object_names_array=["hololens", "google-glass", "oculus", "magic-leap"]
trainer.trainModel()


"""
SAMPLE RESULT

Generating anchor boxes for training images...
thr=0.25: 1.0000 best possible recall, 6.93 anchors past thr
n=9, img_size=416, metric_all=0.463/0.856-mean/best, past_thr=0.549-mean:
====================
Pretrained YOLOv3 model loaded to initialize weights
====================
Epoch 1/100
----------
Train:
30it [00:14,  2.09it/s]
    box loss-> 0.09820, object loss-> 0.27985, class loss-> 0.00000
Validation:
15it [01:45,  7.05s/it]
    recall: 0.085714 precision: 0.000364 mAP@0.5: 0.000186, mAP@0.5-0.95: 0.000030

Epoch 2/100
----------
Train:
30it [00:07,  4.25it/s]
    box loss-> 0.08691, object loss-> 0.07011, class loss-> 0.00000
Validation:
15it [01:37,  6.53s/it]
    recall: 0.214286 precision: 0.000854 mAP@0.5: 0.000516, mAP@0.5-0.95: 0.000111
"""


================================================
FILE: examples/custom_detection_video.py
================================================
from imageai.Detection.Custom import CustomVideoObjectDetection
import os

execution_path = os.getcwd()

video_detector = CustomVideoObjectDetection()
video_detector.setModelTypeAsYOLOv3()
video_detector.setModelPath("yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt") # https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt
video_detector.setJsonPath("hololens-yolo_yolov3_detection_config.json") # https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/hololens-yolo_yolov3_detection_config.json
video_detector.loadModel()

video_detector.detectObjectsFromVideo(input_file_path="holo1.mp4",
                                          output_file_path=os.path.join(execution_path, "holo1-detected3"),
                                          frames_per_second=20,
                                          minimum_percentage_probability=40,
                                          log_progress=True)

================================================
FILE: examples/custom_model_prediction.py
================================================
from imageai.Classification.Custom import CustomImageClassification
import os

execution_path = os.getcwd()

prediction = CustomImageClassification()
prediction.setModelTypeAsResNet50()
prediction.setModelPath(os.path.join(execution_path, "resnet50-idenprof-test_acc_0.78200_epoch-91.pt")) # Download the model via this link https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/resnet50-idenprof-test_acc_0.78200_epoch-91.pt
prediction.setJsonPath(os.path.join(execution_path, "idenprof_model_classes.json")) # Download from here https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/idenprof_model_classes.json
prediction.loadModel(num_objects=10)

predictions, probabilities = prediction.classifyImage(os.path.join(execution_path, "9.jpg"), result_count=5)

for eachPrediction, eachProbability in zip(predictions, probabilities):
    print(eachPrediction , " : " , eachProbability)

================================================
FILE: examples/custom_model_training.py
================================================
from imageai.Classification.Custom import ClassificationModelTrainer


model_trainer = ClassificationModelTrainer()
model_trainer.setModelTypeAsResNet50()
model_trainer.setDataDirectory("idenprof")
model_trainer.trainModel(num_experiments=200, batch_size=32)


================================================
FILE: examples/image_custom_object_detection.py
================================================
from imageai.Detection import ObjectDetection
import os
from time import time

execution_path = os.getcwd()

detector = ObjectDetection()
detector.setModelTypeAsYOLOv3()
detector.setModelPath( os.path.join(execution_path , "yolov3.pt")) # Download the model via this link https://github.com/OlafenwaMoses/ImageAI/releases/tag/1.0
detector.loadModel()

our_time = time()

custom = detector.CustomObjects(bicycle=True, backpack=True)

detections = detector.detectCustomObjectsFromImage( custom_objects=custom, input_image=os.path.join(execution_path , "7.jpg"), output_image_path=os.path.join(execution_path , "7-detected.jpg"), minimum_percentage_probability=40)
for eachObject in detections:
    print(eachObject["name"] , " : " , eachObject["percentage_probability"], " : ", eachObject["box_points"]  )
    print("--------------------------------")


================================================
FILE: examples/image_prediction.py
================================================
from imageai.Classification import ImageClassification
import os

execution_path = os.getcwd()

prediction = ImageClassification()
prediction.setModelTypeAsResNet50()
prediction.setModelPath(os.path.join(execution_path, "resnet50-19c8e357.pth")) # Download the model via this link https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/resnet50-19c8e357.pth
prediction.loadModel()

predictions, probabilities = prediction.classifyImage(os.path.join(execution_path, "1.jpg"), result_count=10)
for eachPrediction, eachProbability in zip(predictions, probabilities):
    print(eachPrediction , " : " , eachProbability)

================================================
FILE: examples/object_detection.py
================================================
from imageai.Detection import ObjectDetection
import os

execution_path = os.getcwd()

detector = ObjectDetection()
detector.setModelTypeAsRetinaNet()
detector.setModelPath( os.path.join(execution_path , "retinanet_resnet50_fpn_coco-eeacb38b.pth")) # Download the model via this link https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/retinanet_resnet50_fpn_coco-eeacb38b.pth
detector.loadModel()
detections = detector.detectObjectsFromImage(input_image=os.path.join(execution_path , "2.jpg"), output_image_path=os.path.join(execution_path , "2_detected.jpg"), minimum_percentage_probability=40)

for eachObject in detections:
    print(eachObject["name"] , " : ", eachObject["percentage_probability"], " : ", eachObject["box_points"] )
    print("--------------------------------")

================================================
FILE: examples/video_analysis_per_frame.py
================================================
from imageai.Detection import VideoObjectDetection
import os
from matplotlib import pyplot as plt


execution_path = os.getcwd()

color_index = {'bus': 'red', 'handbag': 'steelblue', 'giraffe': 'orange', 'spoon': 'gray', 'cup': 'yellow', 'chair': 'green', 'elephant': 'pink', 'truck': 'indigo', 'motorcycle': 'azure', 'refrigerator': 'gold', 'keyboard': 'violet', 'cow': 'magenta', 'mouse': 'crimson', 'sports ball': 'raspberry', 'horse': 'maroon', 'cat': 'orchid', 'boat': 'slateblue', 'hot dog': 'navy', 'apple': 'cobalt', 'parking meter': 'aliceblue', 'sandwich': 'skyblue', 'skis': 'deepskyblue', 'microwave': 'peacock', 'knife': 'cadetblue', 'baseball bat': 'cyan', 'oven': 'lightcyan', 'carrot': 'coldgrey', 'scissors': 'seagreen', 'sheep': 'deepgreen', 'toothbrush': 'cobaltgreen', 'fire hydrant': 'limegreen', 'remote': 'forestgreen', 'bicycle': 'olivedrab', 'toilet': 'ivory', 'tv': 'khaki', 'skateboard': 'palegoldenrod', 'train': 'cornsilk', 'zebra': 'wheat', 'tie': 'burlywood', 'orange': 'melon', 'bird': 'bisque', 'dining table': 'chocolate', 'hair drier': 'sandybrown', 'cell phone': 'sienna', 'sink': 'coral', 'bench': 'salmon', 'bottle': 'brown', 'car': 'silver', 'bowl': 'maroon', 'tennis racket': 'palevilotered', 'airplane': 'lavenderblush', 'pizza': 'hotpink', 'umbrella': 'deeppink', 'bear': 'plum', 'fork': 'purple', 'laptop': 'indigo', 'vase': 'mediumpurple', 'baseball glove': 'slateblue', 'traffic light': 'mediumblue', 'bed': 'navy', 'broccoli': 'royalblue', 'backpack': 'slategray', 'snowboard': 'skyblue', 'kite': 'cadetblue', 'teddy bear': 'peacock', 'clock': 'lightcyan', 'wine glass': 'teal', 'frisbee': 'aquamarine', 'donut': 'mincream', 'suitcase': 'seagreen', 'dog': 'springgreen', 'banana': 'emeraldgreen', 'person': 'honeydew', 'surfboard': 'palegreen', 'cake': 'sapgreen', 'book': 'lawngreen', 'potted plant': 'greenyellow', 'toaster': 'ivory', 'stop sign': 'beige', 'couch': 'khaki'}


resized = False

def forFrame(frame_number, output_array, output_count, returned_frame):

    plt.clf()

    this_colors = []
    labels = []
    sizes = []

    counter = 0

    for eachItem in output_count:
        counter += 1
        labels.append(eachItem + " = " + str(output_count[eachItem]))
        sizes.append(output_count[eachItem])
        this_colors.append(color_index[eachItem])

    global resized

    if (resized == False):
        manager = plt.get_current_fig_manager()
        manager.resize(width=1000, height=500)
        resized = True

    plt.subplot(1, 2, 1)
    plt.title("Frame : " + str(frame_number))
    plt.axis("off")
    plt.imshow(returned_frame, interpolation="none")

    plt.subplot(1, 2, 2)
    plt.title("Analysis: " + str(frame_number))
    plt.pie(sizes, labels=labels, colors=this_colors, shadow=True, startangle=140, autopct="%1.1f%%")

    plt.pause(0.01)


video_detector = VideoObjectDetection()
video_detector.setModelTypeAsYOLOv3()
video_detector.setModelPath(os.path.join(execution_path, "yolov3.pt")) # https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3.pt
video_detector.loadModel()

plt.show()

video_detector.detectObjectsFromVideo(input_file_path=os.path.join(execution_path, "traffic.mp4"), output_file_path=os.path.join(execution_path, "video_frame_analysis") ,  frames_per_second=20, per_frame_function=forFrame,  minimum_percentage_probability=30, return_detected_frame=True)


================================================
FILE: examples/video_analysis_per_second.py
================================================
from imageai.Detection import VideoObjectDetection
import os
from matplotlib import pyplot as plt


execution_path = os.getcwd()

color_index = {'bus': 'red', 'handbag': 'steelblue', 'giraffe': 'orange', 'spoon': 'gray', 'cup': 'yellow', 'chair': 'green', 'elephant': 'pink', 'truck': 'indigo', 'motorcycle': 'azure', 'refrigerator': 'gold', 'keyboard': 'violet', 'cow': 'magenta', 'mouse': 'crimson', 'sports ball': 'raspberry', 'horse': 'maroon', 'cat': 'orchid', 'boat': 'slateblue', 'hot dog': 'navy', 'apple': 'cobalt', 'parking meter': 'aliceblue', 'sandwich': 'skyblue', 'skis': 'deepskyblue', 'microwave': 'peacock', 'knife': 'cadetblue', 'baseball bat': 'cyan', 'oven': 'lightcyan', 'carrot': 'coldgrey', 'scissors': 'seagreen', 'sheep': 'deepgreen', 'toothbrush': 'cobaltgreen', 'fire hydrant': 'limegreen', 'remote': 'forestgreen', 'bicycle': 'olivedrab', 'toilet': 'ivory', 'tv': 'khaki', 'skateboard': 'palegoldenrod', 'train': 'cornsilk', 'zebra': 'wheat', 'tie': 'burlywood', 'orange': 'melon', 'bird': 'bisque', 'dining table': 'chocolate', 'hair drier': 'sandybrown', 'cell phone': 'sienna', 'sink': 'coral', 'bench': 'salmon', 'bottle': 'brown', 'car': 'silver', 'bowl': 'maroon', 'tennis racket': 'palevilotered', 'airplane': 'lavenderblush', 'pizza': 'hotpink', 'umbrella': 'deeppink', 'bear': 'plum', 'fork': 'purple', 'laptop': 'indigo', 'vase': 'mediumpurple', 'baseball glove': 'slateblue', 'traffic light': 'mediumblue', 'bed': 'navy', 'broccoli': 'royalblue', 'backpack': 'slategray', 'snowboard': 'skyblue', 'kite': 'cadetblue', 'teddy bear': 'peacock', 'clock': 'lightcyan', 'wine glass': 'teal', 'frisbee': 'aquamarine', 'donut': 'mincream', 'suitcase': 'seagreen', 'dog': 'springgreen', 'banana': 'emeraldgreen', 'person': 'honeydew', 'surfboard': 'palegreen', 'cake': 'sapgreen', 'book': 'lawngreen', 'potted plant': 'greenyellow', 'toaster': 'ivory', 'stop sign': 'beige', 'couch': 'khaki'}


resized = False

def forSecond(frame_number, output_arrays, count_arrays, average_count, returned_frame):

    plt.clf()

    this_colors = []
    labels = []
    sizes = []

    counter = 0

    for eachItem in average_count:
        counter += 1
        labels.append(eachItem + " = " + str(average_count[eachItem]))
        sizes.append(average_count[eachItem])
        this_colors.append(color_index[eachItem])

    global resized

    if (resized == False):
        manager = plt.get_current_fig_manager()
        manager.resize(width=1000, height=500)
        resized = True

    plt.subplot(1, 2, 1)
    plt.title("Second : " + str(frame_number))
    plt.axis("off")
    plt.imshow(returned_frame, interpolation="none")

    plt.subplot(1, 2, 2)
    plt.title("Analysis: " + str(frame_number))
    plt.pie(sizes, labels=labels, colors=this_colors, shadow=True, startangle=140, autopct="%1.1f%%")

    plt.pause(0.01)


video_detector = VideoObjectDetection()
video_detector.setModelTypeAsYOLOv3()
video_detector.setModelPath(os.path.join(execution_path, "yolov3.pt")) # https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3.pt
video_detector.loadModel()

plt.show()

video_detector.detectObjectsFromVideo(input_file_path=os.path.join(execution_path, "traffic.mp4"), output_file_path=os.path.join(execution_path, "video_second_analysis") ,  frames_per_second=20, per_second_function=forSecond,  minimum_percentage_probability=30, return_detected_frame=True, log_progress=True)


================================================
FILE: examples/video_custom_object_detection.py
================================================
from imageai.Detection import VideoObjectDetection
import os

execution_path = os.getcwd()

detector = VideoObjectDetection()
detector.setModelTypeAsYOLOv3()
detector.setModelPath(os.path.join(execution_path, "yolov3.pt")) # https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3.pt
detector.loadModel()

custom = detector.CustomObjects(person=True, motorcycle=True, bus=True)

video_path = detector.detectCustomObjectsFromVideo(custom_objects=custom, input_file_path=os.path.join(execution_path, "traffic.mp4"),
                                output_file_path=os.path.join(execution_path, "traffic_detected_custom")
                                , frames_per_second=20, log_progress=True)
print(video_path)

================================================
FILE: examples/video_object_detection.py
================================================
from imageai.Detection import VideoObjectDetection
import os

execution_path = os.getcwd()

detector = VideoObjectDetection()
detector.setModelTypeAsYOLOv3()
detector.setModelPath(os.path.join(execution_path, "yolov3.pt")) # https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3.pt
detector.loadModel()

video_path = detector.detectObjectsFromVideo(input_file_path=os.path.join(execution_path, "traffic.mp4"),
                                output_file_path=os.path.join(execution_path, "traffic_detected")
                                , frames_per_second=20, log_progress=True)
print(video_path)

================================================
FILE: imageai/Classification/CUSTOMCLASSIFICATION.md
================================================
# ImageAI : Custom Image Classification

ImageAI provides 4 different algorithms and model types to perform custom image prediction using your custom models.
You will be able to use your model trained with **ImageAI** and the corresponding model_class JSON file to predict custom objects
that you have trained the model on.

### TABLE OF CONTENTS

- <a href="#customprediction" > :white_square_button: Custom Model Prediction</a>
- <a href="#custompredictionfullmodel" > :white_square_button: Custom Model Prediction with Full Model (NEW)</a>

### Custom Model Prediction
<div id="customprediction"></div>

In this example, we will be using the model trained for 20 experiments on **IdenProf**, a dataset of uniformed professionals and achieved 65.17% accuracy on the test dataset.
(You can use your own trained model and generated JSON file. This 'class' is provided mainly for the purpose to use your own custom models.)
Download the ResNet model of the model and JSON files in links below:

- [**ResNet50**](https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/resnet50-idenprof-test_acc_0.78200_epoch-91.pt) _(Size = 90.4 mb)_
- [**idenprof_model_class.json file**](https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/idenprof_model_classes.json)

Great!
Once you have downloaded this model file and the JSON file, start a new python project, and then copy the model file and the JSON file to your project folder where your python files (.py files) will be.
Download the image below, or take any image on your computer that include any of the following professionals(Chef, Doctor, Engineer, Farmer, Fireman, Judge, Mechanic, Pilot, Police and Waiter) and copy it to your python project's folder.
Then create a python file and give it a name; an example is **FirstCustomPrediction.py**.
Then write the code below into the python file:

### FirstCustomPrediction.py

```python
from imageai.Classification.Custom import CustomImageClassification
import os

execution_path = os.getcwd()

prediction = CustomImageClassification()
prediction.setModelTypeAsResNet50()
prediction.setModelPath(os.path.join(execution_path, "resnet50-idenprof-test_acc_0.78200_epoch-91.pt"))
prediction.setJsonPath(os.path.join(execution_path, "idenprof_model_class.json"))
prediction.loadModel(num_objects=10)

predictions, probabilities = prediction.classifyImage(os.path.join(execution_path, "4.jpg"), result_count=5)

for eachPrediction, eachProbability in zip(predictions, probabilities):
    print(eachPrediction + " : " + eachProbability)
```

**Sample Result:**

![Sample Result](../../data-images/4.jpg)
```
mechanic : 76.82620286941528
chef : 10.106072574853897
waiter : 4.036874696612358
police : 2.6663416996598244
pilot : 2.239348366856575
```

The code above works as follows:
```python
from imageai.Classification.Custom import CustomImageClassification
import os
```
The code above imports the **ImageAI** library for custom image prediction and the python **os** class.

```python
execution_path = os.getcwd()
```

The above line obtains the path to the folder that contains your python file (in this example, your FirstCustomPrediction.py).

```python
prediction = CustomImageClassification()
prediction.setModelTypeAsResNet50()
prediction.setModelPath(os.path.join(execution_path, "resnet50-idenprof-test_acc_0.78200_epoch-91.pt"))
prediction.setJsonPath(os.path.join(execution_path, "idenprof_model_class.json"))
prediction.loadModel(num_objects=10)
```

In the lines above, we created and instance of the `CustomImageClassification()`
 class in the first line, then we set the model type of the prediction object to ResNet by caling the `.setModelTypeAsResNet50()`
  in the second line, we set the model path of the prediction object to the path of the custom model file (`resnet50-idenprof-test_acc_0.78200_epoch-91.pt`) we copied to the python file folder
  in the third line, we set the path to the idenprof_model_class.json of the model, we load the model and parse the number of objected that can be predicted in the model.

```python
predictions, probabilities = prediction.classifyImage(os.path.join(execution_path, "4.jpg"), result_count=5)
```

In the above line, we defined 2 variables to be equal to the function called to predict an image, which is the `.classifyImage()` function, into which we parsed the path to our image and also state the number of prediction results we want to have (values from 1 to 10 in this case) parsing `result_count=5`. The `.classifyImage()` function will return 2 array objects with the first (**predictions**) being an array of predictions and the second (**percentage_probabilities**) being an array of the corresponding percentage probability for each prediction.

```python
for eachPrediction, eachProbability in zip(predictions, probabilities):
    print(eachPrediction + " : " + eachProbability)
```

The above line obtains each object in the **predictions** array, and also obtains the corresponding percentage probability from the **percentage_probabilities**, and finally prints the result of both to console.

**CustomImageClassification** class also supports the multiple predictions, input types and prediction speeds that are contained
in the **ImageClassification** class. Follow this [link](README.md) to see all the details.


### Documentation

We have provided full documentation for all **ImageAI** classes and functions in 3 major languages. Find links below:**

* Documentation - **English Version  [https://imageai.readthedocs.io](https://imageai.readthedocs.io)**


================================================
FILE: imageai/Classification/CUSTOMTRAINING.md
================================================
# ImageAI : Custom Prediction Model Training 

## ---------------------------------------------------
## Introducing Jarvis and TheiaEngine.

We the creators of ImageAI are glad to announce 2 new AI projects to provide state-of-the-art Generative AI, LLM and Image Understanding on your personal computer and servers. 


[![](../../jarvis.png)](https://jarvis.genxr.co)

Install Jarvis on PC/Mac to setup limitless access to LLM powered AI Chats for your every day work, research and generative AI needs with 100% privacy and full offline capability.


Visit [https://jarvis.genxr.co](https://jarvis.genxr.co/) to get started.


[![](../../theiaengine.png)]((https://www.genxr.co/theia-engine))


[TheiaEngine](https://www.genxr.co/theia-engine), the next-generation computer Vision AI API capable of all Generative and Understanding computer vision tasks in a single API call and available via REST API to all programming languages. Features include
- **Detect 300+ objects** ( 220 more objects than ImageAI)
- **Provide answers to any content or context questions** asked on an image
  - very useful to get information on any object, action or information without needing to train a new custom model for every tasks
-  **Generate scene description and summary**
-  **Convert 2D image to 3D pointcloud and triangular mesh**
-  **Semantic Scene mapping of objects, walls, floors, etc**
-  **Stateless Face recognition and emotion detection**
-  **Image generation and augmentation from prompt**
-  etc.

Visit [https://www.genxr.co/theia-engine](https://www.genxr.co/theia-engine) to try the demo and join in the beta testing today.
## ---------------------------------------------------

**ImageAI** provides the most simple and powerful approach to training custom image prediction models
using state-of-the-art SqueezeNet, ResNet50, InceptionV3 and DenseNet
which you can load into the `imageai.Classification.Custom.CustomImageClassification` class. This allows
 you to train your own model on any set of images that corresponds to any type of objects/persons.
The training process generates a JSON file that maps the objects types in your image dataset
and creates lots of models. You will then pick the model with the highest accuracy and perform custom
image prediction using the model and the JSON file generated.

### TABLE OF CONTENTS
- <a href="#customtraining" > :white_square_button: Custom Model Training Prediction</a> 
- <a href="#savefullmodel" > :white_square_button: Saving Full Custom Model </a> 
- <a href="#idenproftraining" > :white_square_button: Training on the IdenProf Dataset</a> 
- <a href="#continuoustraining" > :white_square_button: Continuous Model Training </a> 
- <a href="#transferlearning" > :white_square_button: Transfer Learning (Training from a pre-trained model)</a>


### Custom Model Training
<div id="customtraining"></div>

Because model training is a compute intensive tasks, we strongly advise you perform this experiment using a computer with a NVIDIA GPU and the GPU version of Tensorflow installed. Performing model training on CPU will my take hours or days. With NVIDIA GPU powered computer system, this will take a few hours.  You can use Google Colab for this experiment as it has an NVIDIA K80 GPU available.

To train a custom prediction model, you need to prepare the images you want to use to train the model.
You will prepare the images as follows:

1. Create a dataset folder with the name you will like your dataset to be called (e.g pets) 
2. In the dataset folder, create a folder by the name **train** 
3. In the dataset folder, create a folder by the name **test** 
4. In the train folder, create a folder for each object you want to the model to predict and give the folder a name that corresponds to the respective object name (e.g dog, cat, squirrel, snake) 
5. In the test folder, create a folder for each object you want to the model to predict and give
 the folder a name that corresponds to the respective object name (e.g dog, cat, squirrel, snake) 
6. In each folder present in the train folder, put the images of each object in its respective folder. This images are the ones to be used to train the model To produce a model that can perform well in practical applications, I recommend you about 500 or more images per object. 1000 images per object is just great 
7. In each folder present in the test folder, put about 100 to 200 images of each object in its respective folder. These images are the ones to be used to test the model as it trains 
8. Once you have done this, the structure of your image dataset folder should look like below:  
    ```
    pets//train//dog//dog-train-images
    pets//train//cat//cat-train-images
    pets//train//squirrel//squirrel-train-images
    pets//train//snake//snake-train-images 
    pets//test//dog//dog-test-images
    pets//test//cat//cat-test-images
    pets//test//squirrel//squirrel-test-images
    pets//test//snake//snake-test-images
    ```
9. Then your training code goes as follows:  
    ```python
    from imageai.Classification.Custom import ClassificationModelTrainer
    model_trainer = ClassificationModelTrainer()
    model_trainer.setModelTypeAsResNet50()
    model_trainer.setDataDirectory("pets")
    model_trainer.trainModel(num_objects=4, num_experiments=100, enhance_data=True, batch_size=32, show_network_summary=True)
    ```

 Yes! Just 5 lines of code and you can train any of the available 4 state-of-the-art Deep Learning algorithms on your custom dataset.
Now lets take a look at how the code above works.

```python
from imageai.Classification.Custom import ClassificationModelTrainer
model_trainer = ClassificationModelTrainer()
model_trainer.setModelTypeAsResNet50()
model_trainer.setDataDirectory("pets")
```

In the first line, we import the **ImageAI** model training class, then we define the model trainer in the second line,
 we set the network type in the third line and set the path to the image dataset we want to train the network on.

```python
model_trainer.trainModel(num_experiments=100, batch_size=32)
```

In the code above, we start the training process. The parameters stated in the function are as below:
- **num_experiments** : this is to state the number of times the network will train over all the training images,
 which is also called epochs 
- **batch_size** : This is to state the number of images the network will process at ones. The images
 are processed in batches until they are exhausted per each experiment performed. 


When you start the training, you should see something like this in the console:

```
==================================================
Training with GPU
==================================================
Epoch 1/100
----------
100%|█████████████████████████████████████████████████████████████████████████████████| 282/282 [02:15<00:00,  2.08it/s]
train Loss: 3.8062 Accuracy: 0.1178
100%|███████████████████████████████████████████████████████████████████████████████████| 63/63 [00:26<00:00,  2.36it/s]
test Loss: 2.2829 Accuracy: 0.1215
Epoch 2/100
----------
100%|█████████████████████████████████████████████████████████████████████████████████| 282/282 [01:57<00:00,  2.40it/s]
train Loss: 2.2682 Accuracy: 0.1303
100%|███████████████████████████████████████████████████████████████████████████████████| 63/63 [00:20<00:00,  3.07it/s]
test Loss: 2.2388 Accuracy: 0.1470
```

Let us explain the details shown above: 
1. The line **Epoch 1/100** means the network is training the first experiment of the targeted 100 
2. The line `1/25 [>.............................] - ETA: 52s - loss: 2.3026 - acc: 0.2500` represents the number of batches that has been trained in the present experiment
3. The best model is automatically saved to `<dataset-directory>/models>`
 
 Once you are done training your custom model, you can use the "CustomImageClassification" class to perform image prediction with your model. Simply follow the link below.
[imageai/Classification/CUSTOMCLASSIFICATION.md](https://github.com/OlafenwaMoses/ImageAI/blob/master/imageai/Classification/CUSTOMCLASSIFICATION.md)


### Documentation

We have provided full documentation for all **ImageAI** classes and functions. Find links below:

* Documentation - **English Version  [https://imageai.readthedocs.io](https://imageai.readthedocs.io)**


================================================
FILE: imageai/Classification/Custom/__init__.py
================================================
import time, warnings
import os
import copy
import re
import json
from typing import List, Tuple, Union
from PIL import Image
import numpy as np

import torch
import torch.nn as nn
from torch.optim import lr_scheduler
from torchvision import datasets
from torchvision import transforms
from torchvision.models import mobilenet_v2, inception_v3, resnet50, densenet121
from torchvision.models.inception import InceptionOutputs

from .data_transformation import data_transforms1, data_transforms2
from .training_params import resnet50_train_params, densenet121_train_params, inception_v3_train_params, mobilenet_v2_train_params
from tqdm import tqdm

from ...backend_check.model_extension import extension_check


class ClassificationModelTrainer():
    """
        This is the Classification Model training class, that allows you to define a deep learning network
        from the 4 available networks types supported by ImageAI which are MobileNetv2, ResNet50,
        InceptionV3 and DenseNet121 and then train on custom image data.
    """

    def __init__(self) -> None:
        self.__model_type = ""
        self.__device = "cuda" if torch.cuda.is_available() else "cpu"
        self.__data_dir = ""
        self.__data_loaders = None
        self.__class_names = None
        self.__dataset_sizes = None
        self.__dataset_name = ""
        self.__model = None
        self.__optimizer = None
        self.__lr_scheduler = None
        self.__loss_fn = nn.CrossEntropyLoss()
        self.__transfer_learning_mode = "fine_tune_all"
        self.__model_path = ""
        self.__training_params = None

    def __set_training_param(self) -> None:
        if not self.__model_type:
            raise RuntimeError("The model type is not set!!!")
        self.__model = self.__training_params["model"]
        optimizer = self.__training_params["optimizer"]
        lr_decay_rate = self.__training_params["lr_decay_rate"]
        lr_step_size = self.__training_params["lr_step_size"]
        lr = self.__training_params["lr"]
        weight_decay = self.__training_params["weight_decay"]

        if self.__model_path:
            self.__set_transfer_learning_mode()
            print("==> Transfer learning enabled")
        
        # change the last linear layer to have output features of
        # same size as the number of unique classes in the new
        # dataset.
        if self.__model_type == "mobilenet_v2":
            in_features = self.__model.classifier[1].in_features
            self.__model.classifier[1] = nn.Linear(in_features, len(self.__class_names))
        elif self.__model_type == "densenet121":
            in_features = self.__model.classifier.in_features
            self.__model.classifier = nn.Linear(in_features, len(self.__class_names))
        else:
            in_features = self.__model.fc.in_features
            self.__model.fc = nn.Linear(in_features, len(self.__class_names))

        self.__model.to(self.__device)
        self.__optimizer = optimizer(
                    self.__model.parameters(),
                    lr=lr,
                    momentum=0.9,
                    weight_decay=weight_decay
                )
        if lr_decay_rate and lr_step_size:
            self.__lr_scheduler = lr_scheduler.StepLR(
                                self.__optimizer,
                                gamma=lr_decay_rate,
                                step_size=lr_step_size
                            )

    def __set_transfer_learning_mode(self) -> None:

        state_dict = torch.load(self.__model_path)
        if self.__model_type == "densenet121":
            # '.'s are no longer allowed in module names, but previous densenet layers
            # as provided by the pytorch organization has names that uses '.'s.
            pattern = re.compile(
                    r"^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\."
                    "(?:weight|bias|running_mean|running_var))$"
                    )
            for key in list(state_dict.keys()):
                res = pattern.match(key)
                if res:
                    new_key = res.group(1) + res.group(2)
                    state_dict[new_key] = state_dict[key]
                    del state_dict[key]

        self.__model.load_state_dict(state_dict)
        self.__model.to(self.__device)

        if self.__transfer_learning_mode == "freeze_all":
            for param in self.__model.parameters():
                param.requires_grad = False

    def __load_data(self, batch_size : int = 8) -> None:
        
        if not self.__data_dir:
            raise RuntimeError("The dataset directory not yet set.")
        image_dataset = {
                        x:datasets.ImageFolder(
                                os.path.join(self.__data_dir, x),
                                data_transforms2[x] if self.__model_type=="inception_v3" else data_transforms1[x]
                            )
                        for x in ["train", "test"]
                    }
        self.__data_loaders = {
                        x:torch.utils.data.DataLoader(
                                image_dataset[x], batch_size=batch_size,
                                shuffle=True
                            )
                        for x in ["train", "test"]
                    }
        self.__dataset_sizes = {x:len(image_dataset[x]) for x in ["train", "test"]}
        self.__class_names = image_dataset["train"].classes
        self.__dataset_name = os.path.basename(self.__data_dir.rstrip(os.path.sep))

    def setDataDirectory(self, data_directory : str = "") -> None:
        """
        Sets the directory that contains the training and test dataset. The data directory should contain 'train' and 'test' subdirectories
        for the training and test datasets.

        In each of these subdirectories, each object must have a dedicated folder and the folder containing images for the object.

        The structure of the 'test' and 'train' folder must be as follows:
        
        >> train >> class1 >> class1_train_images
                    >> class2 >> class2_train_images
                    >> class3 >> class3_train_images
                    >> class4 >> class4_train_images
                    >> class5 >> class5_train_images
        >> test >> class1 >> class1_test_images
                >> class2 >> class2_test_images
                >> class3 >> class3_test_images
                >> class4 >> class4_test_images
                >> class5 >> class5_test_images

        """
        if os.path.isdir(data_directory):
            self.__data_dir = data_directory
            return
        raise ValueError("expected a path to a directory")

    def setModelTypeAsMobileNetV2(self) -> None:
        """
        'setModelTypeAsMobileNetV2()' is used to set the model type to the MobileNetV2 model.
        :return:
        """
        self.__model_type = "mobilenet_v2"
        self.__training_params = mobilenet_v2_train_params()

    def setModelTypeAsResNet50(self) -> None:
        """
        'setModelTypeAsResNet50()' is used to set the model type to the ResNet50 model.
        :return:
        """
        self.__model_type = "resnet50"
        self.__training_params = resnet50_train_params()

    def setModelTypeAsInceptionV3(self) -> None:
        """
        'setModelTypeAsInceptionV3()' is used to set the model type to the InceptionV3 model.
        :return:
        """
        self.__model_type = "inception_v3"
        self.__training_params = inception_v3_train_params()

    def setModelTypeAsDenseNet121(self) -> None:
        """
        'setModelTypeAsDenseNet()' is used to set the model type to the DenseNet model.
        :return:
        """
        self.__model_type = "densenet121"
        self.__training_params = densenet121_train_params()

    def freezeAllLayers(self) -> None:
        """
        Set the transfer learning mode to freeze all layers.

        NOTE: The last layer (fully connected layer) is trainable.
        """
        self.__transfer_learning_mode = "freeze_all"

    def fineTuneAllLayers(self) -> None:
        """
        Sets the transfer learning mode to fine-tune the pretrained weights
        """
        self.__transfer_learning_mode = "fine_tune_all"

    def trainModel(
                self,
                num_experiments : int = 100,
                batch_size : int = 8,
                model_directory  : str = None,
                transfer_from_model: str = None,
                verbose : bool = True
            ) -> None:
        
        """
        'trainModel()' function starts the model actual training. It accepts the following values:
        - num_experiments: Also known as epochs, is the number of times the network will process all the images in the training dataset
        - batch_size: The number of image data that will be loaded into memory at once during training
        - model_directory: Location where json mapping and trained models will be saved
        - transfer_from_model: Path to a pre-trained imagenet model that corresponds to the training model type
        - verbose: Option to enable/disable training logs
        
        :param num_experiments:
        :param batch_size:
        :model_directory:
        :transfer_from_model:
        :verbose:
        :return:
        """

        # Load dataset
        self.__load_data(batch_size)

        # Check and effect transfer learning if enabled
        if transfer_from_model:
            extension_check(transfer_from_model)
            self.__model_path = transfer_from_model

        # Load training parameters for the specified model type
        self.__set_training_param()

        
        # Create output directory to save trained models and json mappings
        if not model_directory:
            model_directory = os.path.join(self.__data_dir, "models")

        if not os.path.exists(model_directory):
            os.mkdir(model_directory)
        
        # Dump class mappings to json file
        with open(os.path.join(model_directory, f"{self.__dataset_name}_model_classes.json"), "w") as f:
            classes_dict = {}
            class_list = sorted(self.__class_names)
            for i in range(len(class_list)):
                classes_dict[str(i)] = class_list[i]
            json.dump(classes_dict, f)

        # Prep model weights for training
        since = time.time()

        best_model_weights = copy.deepcopy(self.__model.state_dict())
        best_acc = 0.0
        prev_save_name, recent_save_name = "", ""

        # Device check and log
        print("=" * 50)
        print("Training with GPU") if self.__device == "cuda" else print("Training with CPU. This might cause slower train.")
        print("=" * 50)


        for epoch in range(num_experiments):
            if verbose:
                print(f"Epoch {epoch + 1}/{num_experiments}", "-"*10, sep="\n")

            # each epoch has a training and test phase
            for phase in ["train", "test"]:
                if phase == "train":
                    self.__model.train()
                else:
                    self.__model.eval()

                running_loss = 0.0
                running_corrects = 0

                # Iterate on the dataset in batches
                for imgs, labels in tqdm(self.__data_loaders[phase]):
                    imgs = imgs.to(self.__device)
                    labels = labels.to(self.__device)

                    self.__optimizer.zero_grad()

                    with torch.set_grad_enabled(phase == "train"):
                        output = self.__model(imgs)
                        if self.__model_type == "inception_v3" and type(output) == InceptionOutputs:
                            output = output[0]
                        _, preds = torch.max(output, 1)
                        loss = self.__loss_fn(output, labels)

                        if phase=="train":
                            loss.backward()
                            self.__optimizer.step()
                    running_loss += loss.item() * imgs.size(0)
                    running_corrects += torch.sum(preds==labels.data)

                # Compute accuracy and loss metrics post epoch training
                if phase == "train" and isinstance(self.__lr_scheduler, torch.optim.lr_scheduler.StepLR):
                    self.__lr_scheduler.step()

                epoch_loss = running_loss / self.__dataset_sizes[phase]
                epoch_acc = running_corrects.double() / self.__dataset_sizes[phase]

                if verbose:
                    print(f"{phase} Loss: {epoch_loss:.4f} Accuracy: {epoch_acc:.4f}")
                if phase == "test" and epoch_acc > best_acc:
                    best_acc = epoch_acc
                    recent_save_name = self.__model_type+f"-{self.__dataset_name}-test_acc_{best_acc:.5f}_epoch-{epoch}.pt"
                    if prev_save_name:
                        os.remove(os.path.join(model_directory, prev_save_name))
                    best_model_weights = copy.deepcopy(self.__model.state_dict())
                    torch.save(
                            best_model_weights, os.path.join(model_directory, recent_save_name)
                        )
                    prev_save_name = recent_save_name
            

        time_elapsed = time.time() - since
        print(f"Training completed in {time_elapsed//60:.0f}m {time_elapsed % 60:.0f}s")
        print(f"Best test accuracy: {best_acc:.4f}")


class CustomImageClassification:
    """
    An implementation that allows for easy classification of images
    using the state of the art computer vision classification model
    trained on custom data.

    The class provides 4 different classification models which are ResNet50, DensesNet121, InceptionV3 and MobileNetV2.

    The following functions are required to be called before a classification can be made

    * At least of of the following and it must correspond to the model set in the setModelPath()
    [setModelTypeAsMobileNetV2(), setModelTypeAsResNet(), setModelTypeAsDenseNet, setModelTypeAsInceptionV3]

    * setModelPath: This is used to specify the absolute path to the trained model file.

    * setJsonPath: This is used to specify the absolute path to the
    json file saved during the training of the custom model.

    * useCPU (Optional): If you will like to force the image classification to be performed on CPU, call this function.

    * loadModel: Used to load the trained model weights and json data.

    * classifyImage(): Used for classifying an image.
    """
    def __init__(self) -> None:
        self.__model = None
        self.__model_type = ""
        self.__model_loaded = False
        self.__device = "cuda" if torch.cuda.is_available() else "cpu"
        self.__json_path = None
        self.__class_names = None
        self.__model_loaded = False

    def __load_image(self, image_input: Union[str, np.ndarray, Image.Image]) -> torch.Tensor:
        images = []
        preprocess = transforms.Compose([
                transforms.Resize(256),
                transforms.CenterCrop(224),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
            ])
        if type(image_input) == str:
            if os.path.isfile(image_input):
                img = Image.open(image_input).convert("RGB")
                images.append(preprocess(img))
            else:
                raise ValueError(f"image path '{image_input}' is not found or a valid file")
        elif type(image_input) == np.ndarray:
            img = Image.fromarray(image_input).convert("RGB")
            images.append(preprocess(img))
        elif "PIL" in str(type(image_input)):
            img = image_input.convert("RGB")
            images.append(preprocess(img))
        else:
            raise ValueError(f"Invalid image input format")

        return torch.stack(images)
    
    def __load_classes(self):
        if self.__json_path:
            with open(self.__json_path, 'r') as f:
                self.__class_names = list(json.load(f).values())
        else:
            raise ValueError("Invalid json path. Set a valid json mapping path by calling the 'setJsonPath()' function")

    def setModelPath(self, path : str) -> None:
        """
        Sets the path to the pretrained weight.
        """
        if os.path.isfile(path):
            extension_check(path)
            self.__model_path = path
            self.__model_loaded = False
        else:
            raise ValueError(
                f"The path '{path}' isn't a valid file. Ensure you specify the path to a valid trained model file."
            )
    
    def setJsonPath(self, path : str) -> None:
        """
        Sets the path to the pretrained weight.
        """
        if os.path.isfile(path):
            self.__json_path = path
        else:
            raise ValueError(
            "parameter path should be a valid path to the json mapping file."
            )

    def setModelTypeAsMobileNetV2(self) -> None:
        """
        'setModelTypeAsMobileNetV2()' is used to set the model type to the MobileNetV2 model.
        :return:
        """
        self.__model_type = "mobilenet_v2"

    def setModelTypeAsResNet50(self) -> None:
        """
        'setModelTypeAsResNet50()' is used to set the model type to the ResNet50 model.
        :return:
        """
        self.__model_type = "resnet50"

    def setModelTypeAsInceptionV3(self) -> None:
        """
        'setModelTypeAsInceptionV3()' is used to set the model type to the InceptionV3 model.
        :return:
        """
        self.__model_type = "inception_v3"

    def setModelTypeAsDenseNet121(self) -> None:
        """
        'setModelTypeAsDenseNet121()' is used to set the model type to the DenseNet121 model.
        :return:
        """
        self.__model_type = "densenet121"
    
    def useCPU(self):
        """
        Used to force classification to be done on CPU.
        By default, classification will occur on GPU compute if available else CPU compute.
        """
        self.__device = "cpu"
        if self.__model_loaded:
            self.__model_loaded = False
            self.loadModel()

    def loadModel(self) -> None:
        """
        'loadModel()' function is used to load the model weights into the model architecture from the file path defined
        in the setModelPath() function.
        :return:
        """
        if not self.__model_loaded:
            self.__load_classes()
            try:
                # change the last layer of the networks to conform to the number
                # of unique classes in the custom dataset used to train the custom
                # model

                if self.__model_type == "resnet50":
                    self.__model = resnet50(pretrained=False)
                    in_features = self.__model.fc.in_features
                    self.__model.fc = nn.Linear(in_features, len(self.__class_names))
                elif self.__model_type == "mobilenet_v2":
                    self.__model = mobilenet_v2(pretrained=False)
                    in_features = self.__model.classifier[1].in_features
                    self.__model.classifier[1] = nn.Linear(in_features, len(self.__class_names))
                elif self.__model_type == "inception_v3":
                    self.__model = inception_v3(pretrained=False)
                    in_features = self.__model.fc.in_features
                    self.__model.fc = nn.Linear(in_features, len(self.__class_names))
                elif self.__model_type == "densenet121":
                    self.__model = densenet121(pretrained=False)
                    in_features = self.__model.classifier.in_features
                    self.__model.classifier = nn.Linear(in_features, len(self.__class_names))
                else:
                    raise RuntimeError("Unknown model type.\nEnsure the model type is properly set.")

                state_dict = torch.load(self.__model_path, map_location=self.__device)

                if self.__model_type == "densenet121":
                    # '.'s are no longer allowed in module names, but previous densenet layers
                    # as provided by the pytorch organization has names that uses '.'s.
                    pattern = re.compile(
                            r"^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\."
                                    "(?:weight|bias|running_mean|running_var))$"
                            )
                    for key in list(state_dict.keys()):
                        res = pattern.match(key)
                        if res:
                            new_key = res.group(1) + res.group(2)
                            state_dict[new_key] = state_dict[key]
                            del state_dict[key]

                self.__model.load_state_dict(state_dict)
                self.__model.to(self.__device).eval()
                self.__model_loaded = True

            except Exception as e:
                raise Exception("Weight loading failed.\nEnsure the model path is"
                    " set and the weight file is in the specified model path.")

    def classifyImage(self, image_input: Union[str, np.ndarray, Image.Image], result_count: int) -> Tuple[List[str], List[float]]:
        """
        'classifyImage()' function is used to classify a given image by receiving the following arguments:
            * image_input: file path, numpy array or PIL image of the input image.
            * result_count (optional) , the number of classifications to be sent which must be whole numbers between 1 and total number of classes the model is trained to classify.

        This function returns 2 arrays namely 'classification_results' and 'classification_probabilities'. The 'classification_results'
        contains possible objects classes arranged in descending of their percentage probabilities. The 'classification_probabilities'
        contains the percentage probability of each object class. The position of each object class in the 'classification_results'
        array corresponds with the positions of the percentage probability in the 'classification_probabilities' array.
        
        :param image_input:
        :param result_count:
        :return classification_results, classification_probabilities:
        """
        if not self.__model_loaded:
            raise RuntimeError(
                "Model not yet loaded. You need to call '.loadModel()' before performing image classification"
            )

        images = self.__load_image(image_input)
        images = images.to(self.__device)
    
        with torch.no_grad():
            output = self.__model(images)
        probabilities = torch.softmax(output, dim=1)
        topN_prob, topN_catid = torch.topk(probabilities, result_count)
        
        predictions = [
                [
                    (self.__class_names[topN_catid[i][j]], topN_prob[i][j].item()*100)
                    for j in range(topN_prob.shape[1])
                ]
                for i in range(topN_prob.shape[0])
            ]
        
        labels_pred = []
        probabilities_pred = []

        for idx, pred in enumerate(predictions):
            for label, score in pred:
                labels_pred.append(label)
                probabilities_pred.append(round(score, 4))
        
        return labels_pred, probabilities_pred

================================================
FILE: imageai/Classification/Custom/data_transformation.py
================================================
from torchvision import transforms

data_transforms1 = {
            "train":transforms.Compose([
                        transforms.RandomResizedCrop(224),
                        transforms.RandomHorizontalFlip(),
                        transforms.ToTensor(),
                        transforms.Normalize(
                                        [0.485, 0.456, 0.406],
                                        [0.229, 0.224, 0.225]
                                    )
                    ]),
            "test": transforms.Compose([
                        transforms.Resize(256),
                        transforms.CenterCrop(224),
                        transforms.ToTensor(),
                        transforms.Normalize(
                                        [0.485, 0.456, 0.406],
                                        [0.229, 0.224, 0.225]
                                    )
                    ])
        }

data_transforms2 = {
            "train":transforms.Compose([
                        transforms.RandomResizedCrop(299),
                        transforms.RandomHorizontalFlip(),
                        transforms.ToTensor(),
                        transforms.Normalize(
                                        [0.485, 0.456, 0.406],
                                        [0.229, 0.224, 0.225]
                                    )
                    ]),
            "test": transforms.Compose([
                        transforms.Resize(299),
                        transforms.CenterCrop(299),
                        transforms.ToTensor(),
                        transforms.Normalize(
                                        [0.485, 0.456, 0.406],
                                        [0.229, 0.224, 0.225]
                                    )
                    ])
        }


================================================
FILE: imageai/Classification/Custom/training_params.py
================================================
import torch
from torch.optim import SGD
from torchvision.models import resnet50, inception_v3, mobilenet_v2, densenet121

model = resnet50(pretrained=False)


def resnet50_train_params():
    model = resnet50(pretrained=False)
    return {
            "model": model,
            "optimizer": SGD,
            "weight_decay":1e-4,
            "lr":0.1,
            "lr_decay_rate": None,
            "lr_step_size": None
        }

def inception_v3_train_params():
    model = inception_v3(pretrained=False, init_weights=False)

    return {
            "model": model,
            "optimizer": SGD,
            "weight_decay":0,
            "lr":0.045,
            "lr_decay_rate": 0.94,
            "lr_step_size":2
        }

def mobilenet_v2_train_params():
    model = mobilenet_v2(pretrained=False)
    
    return {
            "model": model,
            "optimizer": SGD,
            "weight_decay":4e-5,
            "lr":0.045,
            "lr_decay_rate": 0.98,
            "lr_step_size":1
        }

def densenet121_train_params():
    model = densenet121(pretrained=False)

    return {
            "model": model,
            "optimizer": SGD,
            "weight_decay":1e-4,
            "lr":0.1,
            "lr_decay_rate": None,
            "lr_step_size":None,
        }

================================================
FILE: imageai/Classification/README.md
================================================
# ImageAI : Image Classification


## ---------------------------------------------------
## Introducing Jarvis and TheiaEngine.

We the creators of ImageAI are glad to announce 2 new AI projects to provide state-of-the-art Generative AI, LLM and Image Understanding on your personal computer and servers. 


[![](../../jarvis.png)](https://jarvis.genxr.co)

Install Jarvis on PC/Mac to setup limitless access to LLM powered AI Chats for your every day work, research and generative AI needs with 100% privacy and full offline capability.


Visit [https://jarvis.genxr.co](https://jarvis.genxr.co/) to get started.


[![](../../theiaengine.png)](https://www.genxr.co/theia-engine)


[TheiaEngine](https://www.genxr.co/theia-engine), the next-generation computer Vision AI API capable of all Generative and Understanding computer vision tasks in a single API call and available via REST API to all programming languages. Features include
- **Detect 300+ objects** ( 220 more objects than ImageAI)
- **Provide answers to any content or context questions** asked on an image
  - very useful to get information on any object, action or information without needing to train a new custom model for every tasks
-  **Generate scene description and summary**
-  **Convert 2D image to 3D pointcloud and triangular mesh**
-  **Semantic Scene mapping of objects, walls, floors, etc**
-  **Stateless Face recognition and emotion detection**
-  **Image generation and augmentation from prompt**
-  etc.

Visit [https://www.genxr.co/theia-engine](https://www.genxr.co/theia-engine) to try the demo and join in the beta testing today.
## ---------------------------------------------------

### TABLE OF CONTENTS
- <a href="#firstprediction" > :white_square_button: First Prediction</a>
- <a href="#documentation" > :white_square_button: Documentation</a>

ImageAI provides 4 different algorithms and model types to perform image prediction.
To perform image prediction on any picture, take the following simple steps.  The 4 algorithms provided for
 image prediction include **MobileNetV2**, **ResNet50**, **InceptionV3** and **DenseNet121**. Each of these
  algorithms have individual model files which you must use depending on the choice of your algorithm. To download the
   model file for your choice of algorithm, click on any of the links below:
   
- **[MobileNetV2](https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/mobilenet_v2-b0353104.pth)** _(Size = 4.82 mb, fastest prediction time and moderate accuracy)_
- **[ResNet50](https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/resnet50-19c8e357.pth)** by Microsoft Research _(Size = 98 mb, fast prediction time and high accuracy)_
 - **[InceptionV3](https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/inception_v3_google-1a9a5a14.pth)** by Google Brain team _(Size = 91.6 mb, slow prediction time and higher accuracy)_
 - **[DenseNet121](https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/densenet121-a639ec97.pth)** by Facebook AI Research _(Size = 31.6 mb, slower prediction time and highest accuracy)_

 Great! Once you have downloaded this model file, start a new python project, and then copy the model file to your project
     folder where your python files (.py files) will be . Download the image below, or take any image on your computer
 and copy it to your python project's folder. Then create a python file and give it a name; an example is `FirstPrediction.py`.
      Then write the code below into the python file:
      
### FirstPrediction.py
<div id="firstprediction" ></div>

```python
from imageai.Classification import ImageClassification
import os

execution_path = os.getcwd()

prediction = ImageClassification()
prediction.setModelTypeAsResNet50()
prediction.setModelPath(os.path.join(execution_path, "resnet50-19c8e357.pth"))
prediction.loadModel()

predictions, probabilities = prediction.classifyImage(os.path.join(execution_path, "1.jpg"), result_count=5 )
for eachPrediction, eachProbability in zip(predictions, probabilities):
    print(eachPrediction , " : " , eachProbability)
```

Sample Result:
![](../../data-images/1.jpg)

```
convertible : 52.459555864334106
sports_car : 37.61284649372101
pickup : 3.1751200556755066
car_wheel : 1.817505806684494
minivan : 1.7487050965428352
```

The code above works as follows:
```python
from imageai.Classification import ImageClassification
import os
```
The code above imports the `ImageAI` library and the python `os` class.
```python
execution_path = os.getcwd()
```
The above line obtains the path to the folder that contains your python file (in this example, your FirstPrediction.py).

```python
prediction = ImageClassification()
prediction.setModelTypeAsResNet50()
prediction.setModelPath(os.path.join(execution_path, "resnet50-19c8e357.pth"))
```
In the lines above, we created and instance of the `ImagePrediction()` class in the first line, then we set the model type of the prediction object to ResNet by caling the `.setModelTypeAsResNet50()` in the second line and then we set the model path of the prediction object to the path of the model file (`resnet50-19c8e357.pth`) we copied to the python file folder in the third line.

```python
predictions, probabilities = prediction.classifyImage(os.path.join(execution_path, "1.jpg"), result_count=5 )
```

In the above line, we defined 2 variables to be equal to the function called to predict an image, which is the `.classifyImage()` function, into which we parsed the path to our image and also state the number of prediction results we want to have (values from 1 to 1000) parsing `result_count=5`. The `.classifyImage()` function will return 2 array objects with the first (**predictions**) being an array of predictions and the second (**percentage_probabilities**) being an array of the corresponding percentage probability for each prediction.

```python
for eachPrediction, eachProbability in zip(predictions, probabilities):
    print(eachPrediction, " : " , eachProbability)
```
The above line obtains each object in the **predictions** array, and also obtains the corresponding percentage probability from the **percentage_probabilities**, and finally prints the result of both to console.


### Documentation

We have provided full documentation for all **ImageAI** classes and functions. Find links below:**

* Documentation - **English Version  [https://imageai.readthedocs.io](https://imageai.readthedocs.io)**


================================================
FILE: imageai/Classification/__init__.py
================================================
import os, re
from typing import Union
from typing import List, Tuple
import numpy as np
import torch
from torchvision.models import resnet50, densenet121, mobilenet_v2, inception_v3
import torch.nn.functional as F
from torchvision import transforms
from PIL import Image
import traceback
from ..backend_check.model_extension import extension_check

classification_models = {
    "resnet50": {
        "model": resnet50(pretrained=False)
    },
    "densenet121": {
        "model": densenet121(pretrained=False)
    },
    "inceptionv3": {
        "model": inception_v3(pretrained=False)
    },
    "mobilenetv2": {
        "model": mobilenet_v2(pretrained=False)
    }
}

class ImageClassification:
    """
    This is the image classification class in the ImageAI library. It allows you to classify objects into all the 1000 different classes in the ImageNet dataset [ https://www.kaggle.com/c/imagenet-object-localization-challenge/overview/description ].

    The class provides 4 different classification models which are ResNet50, DensesNet121, InceptionV3 and MobileNetV2.

    The following functions are required to be called before a classification can be made

    * At least of of the following and it must correspond to the model set in the setModelPath()
    [setModelTypeAsMobileNetV2(), setModelTypeAsResNet(), setModelTypeAsDenseNet, setModelTypeAsInceptionV3]

    * setModelPath: This is used to specify the absolute path to a pretrained model file. Download any of the files in this release -> https://github.com/OlafenwaMoses/ImageAI/releases/tag/3.0.0-pretrained

    * useCPU (Optional): If you will like to force the image classification to be performed on CPU, call this function.

    * loadModel: Used to load the pretrained model weights

    * classifyImage(): Used for classifying an image.

    """
    def __init__(self) -> None:
        self.__model_type:str = None
        self.__model:Union[resnet50, densenet121, mobilenet_v2, inception_v3] = None
        self.__model_path: str = None
        self.__classes_path: str = os.path.join(os.path.dirname(os.path.abspath(__file__)), "imagenet_classes.txt")
        self.__model_loaded: bool = False
        self.__device: str = "cuda" if torch.cuda.is_available() else "cpu"
        self.__classes: List[str] = []
    
    def setModelPath(self, path: str):
        """
        'setModelPath()' function is required and is used to set the file path to the model adopted from the list of the
        available 4 model types. The model path must correspond to the model type set for the classification instance object.
        :param model_path:
        :return:
        """
        if os.path.isfile(path):
            extension_check(path)
            self.__model_path = path
        else:
            raise ValueError(
                f"The path '{path}' isn't a valid file. Ensure you specify the path to a valid trained model file."
            )

    def __load_classes(self) -> List[str]:
        with open(self.__classes_path) as f:
            self.__classes = [c.strip() for c in f.readlines()]
    
    def __load_image(self, image_input: Union[str, np.ndarray, Image.Image]) -> torch.Tensor:
        images = []
        preprocess = transforms.Compose([
                transforms.Resize(256),
                transforms.CenterCrop(224),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
            ])
        if type(image_input) == str:
            if os.path.isfile(image_input):
                img = Image.open(image_input).convert("RGB")
                images.append(preprocess(img))
            else:
                raise ValueError(f"image path '{image_input}' is not found or a valid file")
        elif type(image_input) == np.ndarray:
            img = Image.fromarray(image_input).convert("RGB")
            images.append(preprocess(img))
        elif "PIL" in str(type(image_input)):
            img = image_input.convert("RGB")
            images.append(preprocess(img))
        else:
            raise ValueError(f"Invalid image input format")

        return torch.stack(images)

    def setModelTypeAsResNet50(self):
        """
        'setModelTypeAsResNet50()' is used to set the model type to the ResNet50 model.
        :return:
        """
        if self.__model_type == None:
            self.__model_type = "resnet50"

    def setModelTypeAsDenseNet121(self):
        """
        'setModelTypeAsDenseNet121()' is used to set the model type to the DenseNet121 model.
        :return:
        """
        if self.__model_type == None:
            self.__model_type = "densenet121"
    
    def setModelTypeAsInceptionV3(self):
        """
        'setModelTypeAsInceptionV3()' is used to set the model type to the InceptionV3 model.
        :return:
        """
        if self.__model_type == None:
            self.__model_type = "inceptionv3"
    
    def setModelTypeAsMobileNetV2(self):
        """
        'setModelTypeAsMobileNetV2()' is used to set the model type to the MobileNetV2 model.
        :return:
        """
        if self.__model_type == None:
            self.__model_type = "mobilenetv2"
    
    def useCPU(self):
        """
        Used to force classification to be done on CPU.
        By default, classification will occur on GPU compute if available else CPU compute.
        """
        self.__device = "cpu"
        if self.__model_loaded:
            self.__model_loaded = False
            self.loadModel()

    def loadModel(self):
        """
        'loadModel()' function is used to load the model weights into the model architecture from the file path defined
        in the setModelPath() function.
        :return:
        """
        if not self.__model_loaded:
            try:
                if self.__model_path == None:
                    raise ValueError(
                        "Model path not specified. Call '.setModelPath()' and parse the path to the model file before loading the model."
                    )
                
                if self.__model_type in classification_models.keys():
                    self.__model = classification_models[self.__model_type]["model"]
                else:
                    raise ValueError(
                        f"Model type '{self.__model_type}' not supported."
                    )
                state_dict = torch.load(self.__model_path)
                if self.__model_type == "densenet121":
                    # '.'s are no longer allowed in module names, but previous densenet layers
                    # as provided by the Pytorch's model zoon has names that uses '.'s.
                    pattern = re.compile(
                            r"^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\."
                                    "(?:weight|bias|running_mean|running_var))$"
                            )
                    for key in list(state_dict.keys()):
                        res = pattern.match(key)
                        if res:
                            new_key = res.group(1) + res.group(2)
                            state_dict[new_key] = state_dict[key]
                            del state_dict[key]

                self.__model.load_state_dict(
                        state_dict
                    )
                self.__model.to(self.__device)
                self.__model_loaded = True
                self.__model.eval()
                self.__load_classes()
            except Exception:
                print(traceback.print_exc())
                print("Weight loading failed.\nEnsure the model path is"
                    " set and the weight file is in the specified model path.")
                
                
    def classifyImage(self, image_input: Union[str, np.ndarray, Image.Image], result_count: int=5) -> Tuple[List[str], List[float]]:

        """
        'classifyImage()' function is used to classify a given image by receiving the following arguments:
            * image_input: file path, numpy array or PIL image of the input image.
            * result_count (optional) , the number of classifications to be sent which must be whole numbers between
                1 and 1000. The default is 5.

        This function returns 2 arrays namely 'classification_results' and 'classification_probabilities'. The 'classification_results'
        contains possible objects classes arranged in descending of their percentage probabilities. The 'classification_probabilities'
        contains the percentage probability of each object class. The position of each object class in the 'classification_results'
        array corresponds with the positions of the percentage probability in the 'classification_probabilities' array.
        
        :param image_input:
        :param result_count:
        :return classification_results, classification_probabilities:
        """

        if not self.__model_loaded:
            raise RuntimeError(
                "Model not yet loaded. You need to call '.loadModel()' before performing image classification"
            )

        images = self.__load_image(image_input)
        images = images.to(self.__device)
    
        with torch.no_grad():
            output = self.__model(images)
        probabilities = torch.softmax(output, dim=1)
        topN_prob, topN_catid = torch.topk(probabilities, result_count)
        
        predictions = [
                [
                    (self.__classes[topN_catid[i][j]], topN_prob[i][j].item()*100)
                    for j in range(topN_prob.shape[1])
                ]
                for i in range(topN_prob.shape[0])
            ]
        
        labels_pred = []
        probabilities_pred = []

        for idx, pred in enumerate(predictions):
            for label, score in pred:
                labels_pred.append(label)
                probabilities_pred.append(round(score, 4))
        
        return labels_pred, probabilities_pred
    

================================================
FILE: imageai/Classification/imagenet_classes.txt
================================================
tench
goldfish
great white shark
tiger shark
hammerhead
electric ray
stingray
cock
hen
ostrich
brambling
goldfinch
house finch
junco
indigo bunting
robin
bulbul
jay
magpie
chickadee
water ouzel
kite
bald eagle
vulture
great grey owl
European fire salamander
common newt
eft
spotted salamander
axolotl
bullfrog
tree frog
tailed frog
loggerhead
leatherback turtle
mud turtle
terrapin
box turtle
banded gecko
common iguana
American chameleon
whiptail
agama
frilled lizard
alligator lizard
Gila monster
green lizard
African chameleon
Komodo dragon
African crocodile
American alligator
triceratops
thunder snake
ringneck snake
hognose snake
green snake
king snake
garter snake
water snake
vine snake
night snake
boa constrictor
rock python
Indian cobra
green mamba
sea snake
horned viper
diamondback
sidewinder
trilobite
harvestman
scorpion
black and gold garden spider
barn spider
garden spider
black widow
tarantula
wolf spider
tick
centipede
black grouse
ptarmigan
ruffed grouse
prairie chicken
peacock
quail
partridge
African grey
macaw
sulphur-crested cockatoo
lorikeet
coucal
bee eater
hornbill
hummingbird
jacamar
toucan
drake
red-breasted merganser
goose
black swan
tusker
echidna
platypus
wallaby
koala
wombat
jellyfish
sea anemone
brain coral
flatworm
nematode
conch
snail
slug
sea slug
chiton
chambered nautilus
Dungeness crab
rock crab
fiddler crab
king crab
American lobster
spiny lobster
crayfish
hermit crab
isopod
white stork
black stork
spoonbill
flamingo
little blue heron
American egret
bittern
crane
limpkin
European gallinule
American coot
bustard
ruddy turnstone
red-backed sandpiper
redshank
dowitcher
oystercatcher
pelican
king penguin
albatross
grey whale
killer whale
dugong
sea lion
Chihuahua
Japanese spaniel
Maltese dog
Pekinese
Shih-Tzu
Blenheim spaniel
papillon
toy terrier
Rhodesian ridgeback
Afghan hound
basset
beagle
bloodhound
bluetick
black-and-tan coonhound
Walker hound
English foxhound
redbone
borzoi
Irish wolfhound
Italian greyhound
whippet
Ibizan hound
Norwegian elkhound
otterhound
Saluki
Scottish deerhound
Weimaraner
Staffordshire bullterrier
American Staffordshire terrier
Bedlington terrier
Border terrier
Kerry blue terrier
Irish terrier
Norfolk terrier
Norwich terrier
Yorkshire terrier
wire-haired fox terrier
Lakeland terrier
Sealyham terrier
Airedale
cairn
Australian terrier
Dandie Dinmont
Boston bull
miniature schnauzer
giant schnauzer
standard schnauzer
Scotch terrier
Tibetan terrier
silky terrier
soft-coated wheaten terrier
West Highland white terrier
Lhasa
flat-coated retriever
curly-coated retriever
golden retriever
Labrador retriever
Chesapeake Bay retriever
German short-haired pointer
vizsla
English setter
Irish setter
Gordon setter
Brittany spaniel
clumber
English springer
Welsh springer spaniel
cocker spaniel
Sussex spaniel
Irish water spaniel
kuvasz
schipperke
groenendael
malinois
briard
kelpie
komondor
Old English sheepdog
Shetland sheepdog
collie
Border collie
Bouvier des Flandres
Rottweiler
German shepherd
Doberman
miniature pinscher
Greater Swiss Mountain dog
Bernese mountain dog
Appenzeller
EntleBucher
boxer
bull mastiff
Tibetan mastiff
French bulldog
Great Dane
Saint Bernard
Eskimo dog
malamute
Siberian husky
dalmatian
affenpinscher
basenji
pug
Leonberg
Newfoundland
Great Pyrenees
Samoyed
Pomeranian
chow
keeshond
Brabancon griffon
Pembroke
Cardigan
toy poodle
miniature poodle
standard poodle
Mexican hairless
timber wolf
white wolf
red wolf
coyote
dingo
dhole
African hunting dog
hyena
red fox
kit fox
Arctic fox
grey fox
tabby
tiger cat
Persian cat
Siamese cat
Egyptian cat
cougar
lynx
leopard
snow leopard
jaguar
lion
tiger
cheetah
brown bear
American black bear
ice bear
sloth bear
mongoose
meerkat
tiger beetle
ladybug
ground beetle
long-horned beetle
leaf beetle
dung beetle
rhinoceros beetle
weevil
fly
bee
ant
grasshopper
cricket
walking stick
cockroach
mantis
cicada
leafhopper
lacewing
dragonfly
damselfly
admiral
ringlet
monarch
cabbage butterfly
sulphur butterfly
lycaenid
starfish
sea urchin
sea cucumber
wood rabbit
hare
Angora
hamster
porcupine
fox squirrel
marmot
beaver
guinea pig
sorrel
zebra
hog
wild boar
warthog
hippopotamus
ox
water buffalo
bison
ram
bighorn
ibex
hartebeest
impala
gazelle
Arabian camel
llama
weasel
mink
polecat
black-footed ferret
otter
skunk
badger
armadillo
three-toed sloth
orangutan
gorilla
chimpanzee
gibbon
siamang
guenon
patas
baboon
macaque
langur
colobus
proboscis monkey
marmoset
capuchin
howler monkey
titi
spider monkey
squirrel monkey
Madagascar cat
indri
Indian elephant
African elephant
lesser panda
giant panda
barracouta
eel
coho
rock beauty
anemone fish
sturgeon
gar
lionfish
puffer
abacus
abaya
academic gown
accordion
acoustic guitar
aircraft carrier
airliner
airship
altar
ambulance
amphibian
analog clock
apiary
apron
ashcan
assault rifle
backpack
bakery
balance beam
balloon
ballpoint
Band Aid
banjo
bannister
barbell
barber chair
barbershop
barn
barometer
barrel
barrow
baseball
basketball
bassinet
bassoon
bathing cap
bath towel
bathtub
beach wagon
beacon
beaker
bearskin
beer bottle
beer glass
bell cote
bib
bicycle-built-for-two
bikini
binder
binoculars
birdhouse
boathouse
bobsled
bolo tie
bonnet
bookcase
bookshop
bottlecap
bow
bow tie
brass
brassiere
breakwater
breastplate
broom
bucket
buckle
bulletproof vest
bullet train
butcher shop
cab
caldron
candle
cannon
canoe
can opener
cardigan
car mirror
carousel
carpenter's kit
carton
car wheel
cash machine
cassette
cassette player
castle
catamaran
CD player
cello
cellular telephone
chain
chainlink fence
chain mail
chain saw
chest
chiffonier
chime
china cabinet
Christmas stocking
church
cinema
cleaver
cliff dwelling
cloak
clog
cocktail shaker
coffee mug
coffeepot
coil
combination lock
computer keyboard
confectionery
container ship
convertible
corkscrew
cornet
cowboy boot
cowboy hat
cradle
crane
crash helmet
crate
crib
Crock Pot
croquet ball
crutch
cuirass
dam
desk
desktop computer
dial telephone
diaper
digital clock
digital watch
dining table
dishrag
dishwasher
disk brake
dock
dogsled
dome
doormat
drilling platform
drum
drumstick
dumbbell
Dutch oven
electric fan
electric guitar
electric locomotive
entertainment center
envelope
espresso maker
face powder
feather boa
file
fireboat
fire engine
fire screen
flagpole
flute
folding chair
football helmet
forklift
fountain
fountain pen
four-poster
freight car
French horn
frying pan
fur coat
garbage truck
gasmask
gas pump
goblet
go-kart
golf ball
golfcart
gondola
gong
gown
grand piano
greenhouse
grille
grocery store
guillotine
hair slide
hair spray
half track
hammer
hamper
hand blower
hand-held computer
handkerchief
hard disc
harmonica
harp
harvester
hatchet
holster
home theater
honeycomb
hook
hoopskirt
horizontal bar
horse cart
hourglass
iPod
iron
jack-o'-lantern
jean
jeep
jersey
jigsaw puzzle
jinrikisha
joystick
kimono
knee pad
knot
lab coat
ladle
lampshade
laptop
lawn mower
lens cap
letter opener
library
lifeboat
lighter
limousine
liner
lipstick
Loafer
lotion
loudspeaker
loupe
lumbermill
magnetic compass
mailbag
mailbox
maillot
maillot
manhole cover
maraca
marimba
mask
matchstick
maypole
maze
measuring cup
medicine chest
megalith
microphone
microwave
military uniform
milk can
minibus
miniskirt
minivan
missile
mitten
mixing bowl
mobile home
Model T
modem
monastery
monitor
moped
mortar
mortarboard
mosque
mosquito net
motor scooter
mountain bike
mountain tent
mouse
mousetrap
moving van
muzzle
nail
neck brace
necklace
nipple
notebook
obelisk
oboe
ocarina
odometer
oil filter
organ
oscilloscope
overskirt
oxcart
oxygen mask
packet
paddle
paddlewheel
padlock
paintbrush
pajama
palace
panpipe
paper towel
parachute
parallel bars
park bench
parking meter
passenger car
patio
pay-phone
pedestal
pencil box
pencil sharpener
perfume
Petri dish
photocopier
pick
pickelhaube
picket fence
pickup
pier
piggy bank
pill bottle
pillow
ping-pong ball
pinwheel
pirate
pitcher
plane
planetarium
plastic bag
plate rack
plow
plunger
Polaroid camera
pole
police van
poncho
pool table
pop bottle
pot
potter's wheel
power drill
prayer rug
printer
prison
projectile
projector
puck
punching bag
purse
quill
quilt
racer
racket
radiator
radio
radio telescope
rain barrel
recreational vehicle
reel
reflex camera
refrigerator
remote control
restaurant
revolver
rifle
rocking chair
rotisserie
rubber eraser
rugby ball
rule
running shoe
safe
safety pin
saltshaker
sandal
sarong
sax
scabbard
scale
school bus
schooner
scoreboard
screen
screw
screwdriver
seat belt
sewing machine
shield
shoe shop
shoji
shopping basket
shopping cart
shovel
shower cap
shower curtain
ski
ski mask
sleeping bag
slide rule
sliding door
slot
snorkel
snowmobile
snowplow
soap dispenser
soccer ball
sock
solar dish
sombrero
soup bowl
space bar
space heater
space shuttle
spatula
speedboat
spider web
spindle
sports car
spotlight
stage
steam locomotive
steel arch bridge
steel drum
stethoscope
stole
stone wall
stopwatch
stove
strainer
streetcar
stretcher
studio couch
stupa
submarine
suit
sundial
sunglass
sunglasses
sunscreen
suspension bridge
swab
sweatshirt
swimming trunks
swing
switch
syringe
table lamp
tank
tape player
teapot
teddy
television
tennis ball
thatch
theater curtain
thimble
thresher
throne
tile roof
toaster
tobacco shop
toilet seat
torch
totem pole
tow truck
toyshop
tractor
trailer truck
tray
trench coat
tricycle
trimaran
tripod
triumphal arch
trolleybus
trombone
tub
turnstile
typewriter keyboard
umbrella
unicycle
upright
vacuum
vase
vault
velvet
vending machine
vestment
viaduct
violin
volleyball
waffle iron
wall clock
wallet
wardrobe
warplane
washbasin
washer
water bottle
water jug
water tower
whiskey jug
whistle
wig
window screen
window shade
Windsor tie
wine bottle
wing
wok
wooden spoon
wool
worm fence
wreck
yawl
yurt
web site
comic book
crossword puzzle
street sign
traffic light
book jacket
menu
plate
guacamole
consomme
hot pot
trifle
ice cream
ice lolly
French loaf
bagel
pretzel
cheeseburger
hotdog
mashed potato
head cabbage
broccoli
cauliflower
zucchini
spaghetti squash
acorn squash
butternut squash
cucumber
artichoke
bell pepper
cardoon
mushroom
Granny Smith
strawberry
orange
lemon
fig
pineapple
banana
jackfruit
custard apple
pomegranate
hay
carbonara
chocolate sauce
dough
meat loaf
pizza
potpie
burrito
red wine
espresso
cup
eggnog
alp
bubble
cliff
coral reef
geyser
lakeside
promontory
sandbar
seashore
valley
volcano
ballplayer
groom
scuba diver
rapeseed
daisy
yellow lady's slipper
corn
acorn
hip
buckeye
coral fungus
agaric
gyromitra
stinkhorn
earthstar
hen-of-the-woods
bolete
ear
toilet tissue

================================================
FILE: imageai/Detection/Custom/CUSTOMDETECTION.md
================================================
# ImageAI : Custom Object Detection

### TABLE OF CONTENTS

- <a href="#customdetection" > :white_square_button: Custom Object Detection</a>
- <a href="#objectextraction" > :white_square_button: Object Detection, Extraction and Fine-tune</a>
- <a href="#hidingdetails" > :white_square_button: Hiding/Showing Object Name and Probability</a>
- <a href="#inputoutputtype" > :white_square_button: Image Input & Output Types</a>
- <a href="#documentation" > :white_square_button: Documentation</a>


ImageAI provides very convenient and powerful methods to perform object detection on images and extract each object from the image using your own **custom YOLOv3 or TinyYOLOv3 model** and the corresponding **.json** generated during the training. To test the custom object detection, you can download a sample custom model we have trained to detect the Hololens headset and its **.json** file via the links below:

* [**yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt**](https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt) _(Size = 236 mb)_
* [**hololens-yolo_yolov3_detection_config.json**](https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/hololens-yolo_yolov3_detection_config.json)


 Once you download the custom object detection model file, you should copy the model file to the your project folder where your **.py** files will be.
 Then create a python file and give it a name; an example is FirstCustomDetection.py. Then write the code below into the python file: 

### FirstCustomDetection.py
<div id="customdetection" ></div>

```python
from imageai.Detection.Custom import CustomObjectDetection

detector = CustomObjectDetection()
detector.setModelTypeAsYOLOv3()
detector.setModelPath("yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt")
detector.setJsonPath("hololens-yolo_yolov3_detection_config.json")
detector.loadModel()
detections = detector.detectObjectsFromImage(input_image="holo2.jpg", output_image_path="holo2-detected.jpg")
for detection in detections:
    print(detection["name"], " : ", detection["percentage_probability"], " : ", detection["box_points"])

```

Sample Result - Input:

![Input](../../../data-images/holo2.jpg)

  Output: 
  
![Output](../../../data-images/holo2-detected.jpg)
          
```
hololens  :  39.69653248786926  :  [611, 74, 751, 154]
hololens  :  87.6643180847168  :  [23, 46, 90, 79]
hololens  :  89.25175070762634  :  [191, 66, 243, 95]
hololens  :  64.49641585350037  :  [437, 81, 514, 133]
hololens  :  91.78624749183655  :  [380, 113, 423, 138]

```


Let us make a breakdown of the object detection code that we used above.

```python
from imageai.Detection.Custom import CustomObjectDetection

detector = CustomObjectDetection()
detector.setModelTypeAsYOLOv3()
```
 In the 3 lines above , we import the **ImageAI custom object detection** class in the first line, created the class instance on the second line and set the model type to YOLOv3.
 
```python
detector.setModelPath("yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt")
detector.setJsonPath("hololens-yolo_yolov3_detection_config.json")
detector.loadModel()
```

  In the 3 lines above, we specified the file path to our downloaded model file in the first line , specified the path to our **hololens-yolo_yolov3_detection_config.json** file in the second line and loaded the model on the third line.

```python
detections = detector.detectObjectsFromImage(input_image="holo2.jpg", output_image_path="holo2-detected.jpg")
for detection in detections:
    print(detection["name"], " : ", detection["percentage_probability"], " : ", detection["box_points"])

```

In the 3 lines above, we ran the `detectObjectsFromImage()` function and parse in the path to our test image, and the path to the new
 image which the function will save. Then the function returns an array of dictionaries with each dictionary corresponding
 to the number of objects detected in the image. Each dictionary has the properties `name` (name of the object),
`percentage_probability` (percentage probability of the detection) and `box_points` (the x1,y1,x2 and y2 coordinates of the bounding box of the object). 


###  Object Detection, Extraction and Fine-tune
<div id="objectextraction" ></div>

In the examples we used above, we ran the object detection on an image and it
returned the detected objects in an array as well as save a new image with rectangular markers drawn on each object. In our next examples, we will be able to extract each object from the input image and save it independently.
  
  
In the example code below which is very identical to the previous object detection code, we will save each object detected as a separate image.

```python
from imageai.Detection.Custom import CustomObjectDetection

detector = CustomObjectDetection()
detector.setModelTypeAsYOLOv3()
detector.setModelPath("yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt")
detector.setJsonPath("hololens-yolo_yolov3_detection_config.json") 
detector.loadModel()
detections, extracted_objects_array = detector.detectObjectsFromImage(input_image="holo2.jpg", output_image_path="holo2-detected.jpg", extract_detected_objects=True)

for detection, object_path in zip(detections, extracted_objects_array):
    print(object_path)
    print(detection["name"], " : ", detection["percentage_probability"], " : ", detection["box_points"])
    print("---------------")
```


Sample Result: Output Images
    
![](../../../data-images/holo2-detected-objects/hololens-1.jpg)
![](../../../data-images/holo2-detected-objects/hololens-2.jpg)
![](../../../data-images/holo2-detected-objects/hololens-3.jpg)
![](../../../data-images/holo2-detected-objects/hololens-4.jpg)
![](../../../data-images/holo2-detected-objects/hololens-5.jpg)
![](../../../data-images/holo2-detected-objects/hololens-6.jpg)
![](../../../data-images/holo2-detected-objects/hololens-7.jpg)


Let us review the part of the code that perform the object detection and extract the images:

```python
detections, extracted_objects_array = detector.detectObjectsFromImage(input_image="holo2.jpg", output_image_path="holo2-detected.jpg", extract_detected_objects=True)

for detection, object_path in zip(detections, extracted_objects_array):
    print(object_path)
    print(detection["name"], " : ", detection["percentage_probability"], " : ", detection["box_points"])
    print("---------------")
```

In the above above lines, we called the `detectObjectsFromImage()` , parse in the input image path, output image part, and an
extra parameter `extract_detected_objects=True`. This parameter states that the function should extract each object detected from the image
and save it has a seperate image. The parameter is false by default. Once set to `true`, the function will create a directory
 which is the `output image path + "-objects"`. Then it saves all the extracted images into this new directory with
  each image's name being the `detected object name + "-" + a number` which corresponds to the order at which the objects
  were detected.
  
This new parameter we set to extract and save detected objects as an image will make the function to return 2 values. The
 first is the array of dictionaries with each dictionary corresponding to a detected object. The second is an array of the paths
  to the saved images of each object detected and extracted, and they are arranged in order at which the objects are in the
  first array.

  
### And one important feature you need to know!

You will recall that the percentage probability
   for each detected object is sent back by the `detectObjectsFromImage()` function. The function has a parameter
   `minimum_percentage_probability` , whose default value is `30` (value ranges between 0 - 100) , but it set to 30 in this example. That means the function will only return a detected
    object if it's percentage probability is **30 or above**. The value was kept at this number to ensure the integrity of the
     detection results. You fine-tune the object
      detection by setting `minimum_percentage_probability` equal to a smaller value to detect more number of objects or higher value to detect less number of objects.


###  Hiding/Showing Object Name and Probability
<div id="hidingdetails"></div>

**ImageAI** provides options to hide the name of objects detected and/or the percentage probability from being shown on the saved/returned detected image. Using the `detectObjectsFromImage()` and `detectCustomObjectsFromImage()` functions, the parameters `'display_object_name'` and `'display_percentage_probability'`  can be set to True of False individually. Take a look at the code below: 
```python
detections = detector.detectObjectsFromImage(input_image=os.path.join(execution_path , "holo2.jpg"), output_image_path=os.path.join(execution_path , "holo2_nodetails.jpg"), minimum_percentage_probability=30, display_percentage_probability=False, display_object_name=False)
```

In the above code, we specified that both the object name and percentage probability should not be shown. As you can see in the result below, both the names of the objects and their individual percentage probability is not shown in the detected image. 

**Result**

![](../../../data-images/holo2-nodetails.jpg)


### Image Input & Output Types
<div id="inputoutputtype"></div>

**ImageAI** custom object detection supports 2 input types of inputs which are **file path to image file**(default) and **numpy array of an image**
as well as 2 types of output which are image **file**(default) and numpy **array **.
This means you can now perform object detection in production applications such as on a web server and system
 that returns file in any of the above stated formats.
 To perform object detection with numpy array input, you just need to state the input type
in the `.detectObjectsFromImage()` function. See example below.

```python
detections = detector.detectObjectsFromImage(input_type="array", input_image=image_array , output_image_path=os.path.join(execution_path , "holo2-detected.jpg")) # For numpy array input type
```
To perform object detection with numpy array output you just need to state the output type
in the `.detectObjectsFromImage()` function. See example below.

```python
detected_image_array, detections = detector.detectObjectsFromImage(output_type="array", input_image="holo2.jpg" ) # For numpy array output type
```


### Documentation
<div id="documentation" ></div>

We have provided full documentation for all **ImageAI** classes and functions. Find links below: 

* Documentation - **English Version**  [https://imageai.readthedocs.io](https://imageai.readthedocs.io)

================================================
FILE: imageai/Detection/Custom/CUSTOMDETECTIONTRAINING.md
================================================
# ImageAI : Custom Detection Model Training 

## ---------------------------------------------------
## Introducing Jarvis and TheiaEngine.

We the creators of ImageAI are glad to announce 2 new AI projects to provide state-of-the-art Generative AI, LLM and Image Understanding on your personal computer and servers. 


[![](../../../jarvis.png)](https://jarvis.genxr.co)

Install Jarvis on PC/Mac to setup limitless access to LLM powered AI Chats for your every day work, research and generative AI needs with 100% privacy and full offline capability.


Visit [https://jarvis.genxr.co](https://jarvis.genxr.co/) to get started.


[![](../../../theiaengine.png)](https://www.genxr.co/theia-engine)


[TheiaEngine](https://www.genxr.co/theia-engine), the next-generation computer Vision AI API capable of all Generative and Understanding computer vision tasks in a single API call and available via REST API to all programming languages. Features include
- **Detect 300+ objects** ( 220 more objects than ImageAI)
- **Provide answers to any content or context questions** asked on an image
  - very useful to get information on any object, action or information without needing to train a new custom model for every tasks
-  **Generate scene description and summary**
-  **Convert 2D image to 3D pointcloud and triangular mesh**
-  **Semantic Scene mapping of objects, walls, floors, etc**
-  **Stateless Face recognition and emotion detection**
-  **Image generation and augmentation from prompt**
-  etc.

Visit [https://www.genxr.co/theia-engine](https://www.genxr.co/theia-engine) to try the demo and join in the beta testing today.
## ---------------------------------------------------

**ImageAI** provides the most simple and powerful approach to training custom object detection models
using the YOLOv3 architeture, which
which you can load into the `imageai.Detection.Custom.CustomObjectDetection` class. This allows
 you to train your own **YOLOv3** or **TinyYOLOv3** model on any set of images that corresponds to any type of objects of interest.
The training process generates a JSON file that maps the objects names in your image dataset and the detection anchors, as well as creates lots of models. In choosing the best model for your custom object detection task, an `evaluateModel()` function has been provided to compute the **mAP** of your saved models by allowing you to state your desired **IoU** and **Non-maximum Suppression** values. Then you can perform custom
object detection using the model and the JSON file generated. 

### TABLE OF CONTENTS
- <a href="#preparingdataset" > :white_square_button: Preparing your custom dataset</a>
- <a href="#trainingdataset" > :white_square_button: Training on your custom Dataset</a>
- <a href="#evaluatingmodels" > :white_square_button: Evaluating your saved detection models' mAP</a>


### Preparing your custom dataset
<div id="preparingdataset"></div>

To train a custom detection model, you need to prepare the images you want to use to train the model. 
You will prepare the images as follows: 

1. Decide the type of object(s) you want to detect and collect about **200 (minimum recommendation)** or more picture of each of the object(s)
2. Once you have collected the images, you need to annotate the object(s) in the images. **ImageAI** uses the **YOLO** for image annotation. You can generate this annotation for your images using the easy to use [**LabelImg**](https://github.com/tzutalin/labelImg) image annotation tool, available for Windows, Linux and MacOS systems. Open the link below to install the annotation tool. See: [https://github.com/tzutalin/labelImg](https://github.com/tzutalin/labelImg)
3. When you are done annotating your images, **annotation .txt** files will be generated for each image in your dataset. The **annotation .txt** file describes each or **all** of the objects in the image. For example,  if each image your image names are **image(1).jpg**, **image(2).jpg**, **image(3).jpg** till **image(z).jpg**; the corresponding annotation for each of the images will be **image(1).txt**, **image(2).txt**, **image(3).txt** till **image(z).txt**. 
4. Once you have the annotations for all your images, create a folder for your dataset (E.g headsets) and in this parent folder, create child folders **train** and **validation**
5. In the train folder, create **images** and **annotations**
 sub-folders. Put about 70-80% of your dataset of each object's images in the **images** folder and put the corresponding annotations for these images in the **annotations** folder.  
6. In the validation folder, create **images** and **annotations** sub-folders. Put the rest of your dataset images in the **images** folder and put the corresponding annotations for these images in the **annotations** folder.
7. Once you have done this, the structure of your image dataset folder should look like below: 
    ```
    >> train    >> images       >> img_1.jpg  (shows Object_1)
                >> images       >> img_2.jpg  (shows Object_2)
                >> images       >> img_3.jpg  (shows Object_1, Object_3 and Object_n)
                >> annotations  >> img_1.txt  (describes Object_1)
                >> annotations  >> img_2.txt  (describes Object_2)
                >> annotations  >> img_3.txt  (describes Object_1, Object_3 and Object_n)
    
    >> validation   >> images       >> img_151.jpg (shows Object_1, Object_3 and Object_n)
                    >> images       >> img_152.jpg (shows Object_2)
                    >> images       >> img_153.jpg (shows Object_1)
                    >> annotations  >> img_151.txt (describes Object_1, Object_3 and Object_n)
                    >> annotations  >> img_152.txt (describes Object_2)
                    >> annotations  >> img_153.txt (describes Object_1)
     ```
8. You can train your custom detection model completely from scratch or use transfer learning (recommended for better accuracy) from a pre-trained YOLOv3 model. Also, we have provided a sample annotated Hololens and Headsets (Hololens and Oculus) dataset for you to train with. Download the pre-trained YOLOv3 model and the sample datasets in the link below.  

Download dataset `hololens-yolo.zip` [here](https://github.com/OlafenwaMoses/ImageAI/releases/tag/test-resources-v3) and pre-trained model `yolov3.pt`  [here](https://github.com/OlafenwaMoses/ImageAI/releases/tag/3.0.0-pretrained)


### Training on your custom dataset
<div id="trainingdataset"></div>

Before you start training your custom detection model, kindly take note of the following: 

- The default **batch_size** is 4. If you are training with **Google Colab**, this will be fine. However, I will advice you use a more powerful GPU than the K80 offered by Colab as the higher your **batch_size (8, 16)**, the better the accuracy of your detection model. 

Then your training code goes as follows: 
```python
from imageai.Detection.Custom import DetectionModelTrainer

trainer = DetectionModelTrainer()
trainer.setModelTypeAsYOLOv3()
trainer.setDataDirectory(data_directory="hololens-yolo")
trainer.setTrainConfig(object_names_array=["hololens"], batch_size=4, num_experiments=200, train_from_pretrained_model="yolov3.pt")
# In the above,when training for detecting multiple objects,
#set object_names_array=["object1", "object2", "object3",..."objectz"]
trainer.trainModel()
```

 Yes! Just 6 lines of code and you can train object detection models on your custom dataset.
Now lets take a look at how the code above works. 

```python
from imageai.Detection.Custom import DetectionModelTrainer

trainer = DetectionModelTrainer()
trainer.setModelTypeAsYOLOv3()
trainer.setDataDirectory(data_directory="hololens-yolo")
```

In the first line, we import the **ImageAI** detection model training class, then we define the model trainer in the second line,
 we set the network type in the third line and set the path to the image dataset we want to train the network on.

```python
trainer.setTrainConfig(object_names_array=["hololens"], batch_size=4, num_experiments=200, train_from_pretrained_model="yolov3.pt")
```


In the line above, we configured our detection model trainer. The parameters we stated in the function as as below:  

- **num_objects** : this is an array containing the names of the objects in our dataset
- **batch_size** : this is to state the batch size for the training
- **num_experiments** : this is to state the number of times the network will train over all the training images,
 which is also called epochs 
- **train_from_pretrained_model(optional)** : this is to train using transfer learning from a pre-trained **YOLOv3** model

```python
trainer.trainModel()
```


When you start the training, you should see something like this in the console: 
```
Generating anchor boxes for training images...
thr=0.25: 1.0000 best possible recall, 6.93 anchors past thr
n=9, img_size=416, metric_all=0.463/0.856-mean/best, past_thr=0.549-mean:
====================
Pretrained YOLOv3 model loaded to initialize weights
====================
Epoch 1/100
----------
Train:
30it [00:14,  2.09it/s]
    box loss-> 0.09820, object loss-> 0.27985, class loss-> 0.00000
Validation:
15it [01:45,  7.05s/it]
    recall: 0.085714 precision: 0.000364 mAP@0.5: 0.000186, mAP@0.5-0.95: 0.000030

Epoch 2/100
----------
Train:
30it [00:07,  4.25it/s]
    box loss-> 0.08691, object loss-> 0.07011, class loss-> 0.00000
Validation:
15it [01:37,  6.53s/it]
    recall: 0.214286 precision: 0.000854 mAP@0.5: 0.000516, mAP@0.5-0.95: 0.000111
.
.
.
.

```

Let us explain the details shown above: 
```
Generating anchor boxes for training images...
thr=0.25: 1.0000 best possible recall, 6.93 anchors past thr
n=9, img_size=416, metric_all=0.463/0.856-mean/best, past_thr=0.549-mean:
====================
Pretrained YOLOv3 model loaded to initialize weights
====================
```

The above details signifies the following: 
- **ImageAI** autogenerates the best match detection **anchor boxes** for your image dataset. 

- A the pretrained **yolov3.pt** was loaded to initalize the weights used to train the model.

```
Epoch 1/100
----------
Train:
30it [00:14,  2.09it/s]
    box loss-> 0.09820, object loss-> 0.27985, class loss-> 0.00000
Validation:
15it [01:45,  7.05s/it]
    recall: 0.085714 precision: 0.000364 mAP@0.5: 0.000186, mAP@0.5-0.95: 0.000030

Epoch 2/100
----------
Train:
30it [00:07,  4.25it/s]
    box loss-> 0.08691, object loss-> 0.07011, class loss-> 0.00000
Validation:
15it [01:37,  6.53s/it]
    recall: 0.214286 precision: 0.000854 mAP@0.5: 0.000516, mAP@0.5-0.95: 0.000111
```

- The above signifies the progress of the training. 
- For each experiment (Epoch), a number of metrics are computed. The important once fo chosing an accuate models is detailed below
  - The bounding box loss `box loss` is reported and expected to drop as the training progresses
  - The object localization loss  `object loss` is reported and expected to drop as the training progresses
  - The class loss  `class loss` is reported and expected to drop as the training progresses. If the class loss persists at 0.0000, it's because your dataset has a single class.
  - The `mAP50` and `mAP0.5-0.95` metrics are expected to increase. This signifies the models accuracy increases. There might be flunctuations in these metrics sometimes.
- For each increase in the `mAP50`  after an experiment, a model is saved in the **hololens-yolo/models** folder. The higher the mAP50, the better the model. 

Once you are done training, you can visit the link below for performing object detection with your **custom detection model** and **detection_config.json** file.

[Detection/Custom/CUSTOMDETECTION.md](./CUSTOMDETECTION.md)
 
 
###  >> Documentation
<div id="documentation" ></div>

We have provided full documentation for all **ImageAI** classes and functions. Find links below: 

* Documentation - **English Version**  [https://imageai.readthedocs.io](https://imageai.readthedocs.io)


================================================
FILE: imageai/Detection/Custom/CUSTOMVIDEODETECTION.md
================================================
# ImageAI : Custom Video Object Detection, Tracking  and Analysis

### TABLE OF CONTENTS

- <a href="#videodetection" > :white_square_button: First Custom Video Object Detection</a>
- <a href="#camerainputs" > :white_square_button: Camera / Live Stream Video Detection</a>
- <a href="#videoanalysis" > :white_square_button: Video Analysis</a>
- <a href="#hidingdetails" > :white_square_button: Hiding/Showing Object Name and Probability</a>
- <a href="#videodetectionintervals" > :white_square_button: Frame Detection Intervals</a>
- <a href="#detectiontimeout" > :white_square_button: Video Detection Timeout (NEW)</a>
- <a href="#documentation" > :white_square_button: Documentation</a>


ImageAI provides convenient, flexible and powerful methods to perform object detection on videos using your own **custom YOLOv3 model** and the corresponding **.json** file generated during the training. This version of **ImageAI** provides commercial grade video objects detection features, which include but not limited to device/IP camera inputs, per frame, per second, per minute and entire video analysis for storing in databases and/or real-time visualizations and for future insights.
To test the custom video object detection,you can download a sample custom model we have trained to detect the Hololens headset and its **.json** file via the links below:

* [**yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt**](https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt) _(Size = 236 mb)_
* [**hololens-yolo_yolov3_detection_config.json**](https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/hololens-yolo_yolov3_detection_config.json)


Because video object detection is a compute intensive tasks, we advise you perform this experiment using a computer with a NVIDIA GPU and the GPU version of Tensorflow
 installed. Performing Video Object Detection CPU will be slower than using an NVIDIA GPU powered computer. You can use Google Colab for this
 experiment as it has an NVIDIA K80 GPU available for free.
<br/>
 Once you download the custom object detection model  and JSON files, you should copy the model and the JSON files to the your project folder where your .py files will be.
 Then create a python file and give it a name; an example is FirstCustomVideoObjectDetection.py. Then write the code below into the python file: <br/>


### FirstCustomVideoObjectDetection.py
<div id="videodetection" ></div>

```python
from imageai.Detection.Custom import CustomVideoObjectDetection
import os

execution_path = os.getcwd()

video_detector = CustomVideoObjectDetection()
video_detector.setModelTypeAsYOLOv3()
video_detector.setModelPath("yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt")
video_detector.setJsonPath("hololens-yolo_yolov3_detection_config.json")
video_detector.loadModel()

video_detector.detectObjectsFromVideo(input_file_path="holo1.mp4",
                                          output_file_path=os.path.join(execution_path, "holo1-detected3"),
                                          frames_per_second=20,
                                          minimum_percentage_probability=40,
                                          log_progress=True)
```

[**Input Video**](../../../data-videos/holo1.mp4)
[![Input Video](../../../data-images/holo-video.jpg)](../../../data-videos/holo1.mp4)
[**Output Video**](https://www.youtube.com/watch?v=4o5GyAR4Mpw)
[![Output Video](../../../data-images/holo-video-detected.jpg)](https://www.youtube.com/watch?v=4o5GyAR4Mpw)


Let us make a breakdown of the object detection code that we used above.

```python
from imageai.Detection.Custom import CustomVideoObjectDetection
import os

execution_path = os.getcwd()
```

In the 3 lines above , we import the **ImageAI custom video object detection** class in the first line, import the **os** in the second line and obtained
  the path to folder where our python file runs.
```python
video_detector = CustomVideoObjectDetection()
video_detector.setModelTypeAsYOLOv3()
video_detector.setModelPath("yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt")
video_detector.setJsonPath("hololens-yolo_yolov3_detection_config.json")
video_detector.loadModel()
```
In the 4 lines above, we created a new instance of the `CustomVideoObjectDetection` class in the first line, set the model type to YOLOv3 in the second line,
  set the model path to our custom YOLOv3 model file in the third line, specified the path to the model's corresponding **hololens-yolo_yolov3_detection_config.json** in the fourth line and load the model in the fifth line.

```python
video_detector.detectObjectsFromVideo(input_file_path="holo1.mp4",
                                          output_file_path=os.path.join(execution_path, "holo1-detected3"),
                                          frames_per_second=20,
                                          minimum_percentage_probability=40,
                                          log_progress=True)
```

In the code above, we ran the `detectObjectsFromVideo()` function and parse in the path to our video,the path to the new
 video (without the extension, it saves a .mp4 video by default) which the function will save, the number of frames per second (fps) that
 you we desire the output video to have and option to log the progress of the detection in the console. Then the function returns a the path to the saved video
 which contains boxes and percentage probabilities rendered on objects detected in the video.


### Camera / Live Stream Video Detection
<div id="camerainputs"></div>

**ImageAI** now allows live-video detection with support for camera inputs. Using **OpenCV**'s **VideoCapture()** function, you can load live-video streams from a device camera, cameras connected by cable or IP cameras, and parse it into **ImageAI**'s **detectObjectsFromVideo()** function. All features that are supported for detecting objects in a video file is also available for detecting objects in a camera's live-video feed. Find below an example of detecting live-video feed from the device camera. 

```python
from imageai.Detection.Custom import CustomVideoObjectDetection
import os
import cv2

execution_path = os.getcwd()
camera = cv2.VideoCapture(0)

video_detector = CustomVideoObjectDetection()
video_detector.setModelTypeAsYOLOv3()
video_detector.setModelPath("yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt")
video_detector.setJsonPath("hololens-yolo_yolov3_detection_config.json")
video_detector.loadModel()

video_detector.detectObjectsFromVideo(camera_input=camera,
                                          output_file_path=os.path.join(execution_path, "holo1-detected3"),
                                          frames_per_second=20,
                                          minimum_percentage_probability=40,
                                          log_progress=True)
```

The difference in the code above and the code for the detection of a video file is that we defined an **OpenCV VideoCapture** instance and loaded the default device camera into it. Then we parsed the camera we defined into the parameter **camera_input** which replaces the **input_file_path** that is used for video file. 


### Video Analysis
<div id="videoanalysis"></div>

**ImageAI** now provide commercial-grade video analysis in the Custom Video Object Detection class, for both video file inputs and camera inputs. This feature allows developers to obtain deep insights into any video processed with **ImageAI**. This insights can be visualized in real-time, stored in a NoSQL database for future review or analysis. <br/>

For video analysis, the **detectObjectsFromVideo()** now allows you to state your own defined functions which will be executed for every frame, seconds and/or minute of the video detected as well as a state a function that will be executed at the end of a video detection. Once this functions are stated, they will receive raw but comprehensive analytical data on the index of the frame/second/minute, objects detected (name, percentage_probability and box_points), number of instances of each unique object detected and average number of occurrence of each unique object detected over a second/minute and entire video.

To obtain the video analysis, all you need to do is specify a function, state the corresponding parameters it will be receiving and parse the function name into the **per_frame_function**, **per_second_function**, **per_minute_function** and **video_complete_function** parameters in the detection function. Find below examples of video analysis functions. 

```python
def forFrame(frame_number, output_array, output_count):
    print("FOR FRAME " , frame_number)
    print("Output for each object : ", output_array)
    print("Output count for unique objects : ", output_count)
    print("------------END OF A FRAME --------------")

def forSeconds(second_number, output_arrays, count_arrays, average_output_count):
    print("SECOND : ", second_number)
    print("Array for the outputs of each frame ", output_arrays)
    print("Array for output count for unique objects in each frame : ", count_arrays)
    print("Output average count for unique objects in the last second: ", average_output_count)
    print("------------END OF A SECOND --------------")

def forMinute(minute_number, output_arrays, count_arrays, average_output_count):
    print("MINUTE : ", minute_number)
    print("Array for the outputs of each frame ", output_arrays)
    print("Array for output count for unique objects in each frame : ", count_arrays)
    print("Output average count for unique objects in the last minute: ", average_output_count)
    print("------------END OF A MINUTE --------------")

video_detector = CustomVideoObjectDetection()
video_detector.setModelTypeAsYOLOv3()
video_detector.setModelPath("yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt")
video_detector.setJsonPath("hololens-yolo_yolov3_detection_config.json")
video_detector.loadModel()

video_detector.detectObjectsFromVideo(camera_input=camera,
                                          output_file_path=os.path.join(execution_path, "holo1-detected3"),
                                          frames_per_second=20, per_second_function=forSeconds, per_frame_function = forFrame, per_minute_function= forMinute,
                                          minimum_percentage_probability=40,
                                          log_progress=True)
```


**ImageAI** also allows you to obtain complete analysis of the entire video processed. All you need is to define a function like the forSecond or forMinute function and set the **video_complete_function** parameter into your **.detectObjectsFromVideo()** function. The same values for the per_second-function and per_minute_function will be returned. The difference is that no index will be returned and the other 3 values will be returned, and the 3 values will cover all frames in the video. Below is a sample function: 
```python
def forFull(output_arrays, count_arrays, average_output_count):
    #Perform action on the 3 parameters returned into the function


video_detector.detectObjectsFromVideo(camera_input=camera,
                                          output_file_path=os.path.join(execution_path, "holo1-detected3"),
                                          video_complete_function=forFull,
                                          minimum_percentage_probability=40,
                                          log_progress=True)

```

**FINAL NOTE ON VIDEO ANALYSIS** : **ImageAI** allows you to obtain the detected video frame as a Numpy array at each frame, second and minute function. All you need to do is specify one more parameter in your function and set **return_detected_frame=True** in your **detectObjectsFromVideo()** function. Once this is set, the extra parameter you sepecified in your function will be the Numpy array of the detected frame. See a sample below:

```python
def forFrame(frame_number, output_array, output_count, detected_frame):
    print("FOR FRAME " , frame_number)
    print("Output for each object : ", output_array)
    print("Output count for unique objects : ", output_count)
	print("Returned Objects is : ", type(detected_frame))
    print("------------END OF A FRAME --------------")


video_detector.detectObjectsFromVideo(camera_input=camera,
                                          output_file_path=os.path.join(execution_path, "holo1-detected3"),
                                          per_frame_function=forFrame,
                                          minimum_percentage_probability=40,
                                          log_progress=True, return_detected_frame=True)
```


### Frame Detection Intervals
<div id="videodetectionintervals" ></div>

The above video objects detection task are optimized for frame-real-time object detections that ensures that objects in every frame
of the video is detected. **ImageAI** provides you the option to adjust the video frame detections which can speed up
your video detection process. When calling the `.detectObjectsFromVideo()`, you can
specify at which frame interval detections should be made. By setting the **frame_detection_interval** parameter to be
 equal to 5 or 20, that means the object detections in the video will be updated after 5 frames or 20 frames.
If your output video **frames_per_second** is set to 20, that means the object detections in the video will
 be updated once in every quarter of a second or every second. This is useful in case scenarios where the available
 compute is less powerful and speeds of moving objects are low. This ensures you can have objects detected as second-real-time
, half-a-second-real-time or whichever way suits your needs. 


### Custom Video Detection Timeout
<div id="detectiontimeout"></div>

**ImageAI** now allows you to set a timeout in seconds for detection of objects in videos or camera live feed. To set a timeout for your video detection code, all you need to do is specify the `detection_timeout` parameter in the `detectObjectsFromVideo()` function to the number of desired seconds. In the example code below, we set `detection_timeout` to 120 seconds (2 minutes). 


```python
from imageai.Detection.Custom import CustomVideoObjectDetection
import os
import cv2

execution_path = os.getcwd()
camera = cv2.VideoCapture(0)

video_detector = CustomVideoObjectDetection()
video_detector.setModelTypeAsYOLOv3()
video_detector.setModelPath("yolov3_hololens-yolo_mAP-0.82726_epoch-73.pt")
video_detector.setJsonPath("hololens-yolo_yolov3_detection_config.json")
video_detector.loadModel()

video_detector.detectObjectsFromVideo(camera_input=camera,
                                          output_file_path=os.path.join(execution_path, "holo1-detected3"),
                                          frames_per_second=20,  minimum_percentage_probability=40,
                                          detection_timeout=120)
```


###  >> Documentation
<div id="documentation" ></div>

We have provided full documentation for all **ImageAI** classes and functions. Find links below: 

* Documentation - **English Version**  [https://imageai.readthedocs.io](https://imageai.readthedocs.io)**


================================================
FILE: imageai/Detection/Custom/__init__.py
================================================
import os
import time
import math
import json
import warnings
from typing import List, Union, Tuple, Dict
from collections import defaultdict

import numpy as np
from PIL import Image
import cv2
import torch
from torch.cuda import amp
from torch.utils.data import DataLoader
from torch.optim import SGD, lr_scheduler
from tqdm import tqdm

from .yolo.dataset import LoadImagesAndLabels
from .yolo.custom_anchors import generate_anchors
from .yolo.compute_loss import compute_loss
from .yolo import validate
from ...yolov3.tiny_yolov3 import YoloV3Tiny
from ...yolov3.yolov3 import YoloV3
from ...yolov3.utils import draw_bbox_and_label, get_predictions, prepare_image

from ...backend_check.model_extension import extension_check


class DetectionModelTrainer:
    """
    This is the Detection Model training class, which allows you to train object detection models
    on image datasets that are in YOLO format, using the YOLOv3.
    """

    def __init__(self) -> None:
        self.__device = "cuda" if torch.cuda.is_available() else "cpu"
        self.__cuda = (self.__device != "cpu")
        self.__model_type = ""
        self.__model = None
        self.__optimizer = None
        self.__data_dir = ""
        self.__classes: List[str] = None
        self.__num_classes = None
        self.__anchors = None
        self.__dataset_name = None
        self.__mini_batch_size: int = None
        self.__scaler = amp.GradScaler(enabled=self.__cuda)
        self.__lr_lambda = None
        self.__custom_train_dataset = None
        self.__custom_val_dataset = None
        self.__train_loader = None
        self.__val_loader = None

        self.__model_path: str = None
        self.__epochs: int = None
        self.__output_models_dir: str = None
        self.__output_json_dir: str = None

    def __set_training_param(self, epochs : int, accumulate : int) -> None:
        # self.__lr_lambda = lambda x : ((1 - math.cos(x * math.pi / epochs)) / 2  ) * (0.1 - 1.0) + 1.0
        self.__lr_lambda = lambda x: (1 - x / (epochs - 1)) * (1.0 - 0.01) + 0.01
        self.__anchors = generate_anchors(
                                self.__custom_train_dataset,
                                n=9 if self.__model_type=="yolov3" else 6
                            )
        self.__anchors = [round(i) for i in self.__anchors.reshape(-1).tolist()]
        if self.__model_type == "yolov3":
            self.__model = YoloV3(
                        num_classes=self.__num_classes,
                        anchors=self.__anchors,
                        device=self.__device
                    )
        elif self.__model_type == "tiny-yolov3":
            self.__model = YoloV3Tiny(
                        num_classes=self.__num_classes,
                        anchors=self.__anchors,
                        device=self.__device
                    )
        if self.__model_path:
            self.__load_model()

        w_d = (5e-4) * (self.__mini_batch_size * accumulate / 64) # scale weight decay
        g0, g1, g2 = [], [], []  # optimizer parameter groups
        for m in self.__model.modules():
            if hasattr(m, 'bias') and isinstance(m.bias, torch.nn.Parameter):  # bias
                g2.append(m.bias)
            if isinstance(m, torch.nn.BatchNorm2d):  # weight (no decay)
                g0.append(m.weight)
            elif hasattr(m, 'weight') and isinstance(m.weight, torch.nn.Parameter):  # weight (with decay)
                g1.append(m.weight)

        self.__optimizer = SGD(
                    g0,
                    lr=1e-2,
                    momentum=0.6,
                    # weight_decay=w_d,
                    nesterov=True
                )
        self.__optimizer.add_param_group({'params': g1, 'weight_decay': w_d})  # add g1 with weight_decay
        self.__optimizer.add_param_group({'params': g2})  # add g2 (biases)
        self.__lr_scheduler = lr_scheduler.LambdaLR(
                                self.__optimizer,
                                lr_lambda=self.__lr_lambda
                            )
        del g0, g1, g2
        self.__model.to(self.__device)

    def __load_model(self) -> None:
        try:
            state_dict = torch.load(self.__model_path, map_location=self.__device)
            # check against cases where number of classes differs, causing the
            # channel of the convolutional layer just before the detection layer
            # to differ.
            new_state_dict = {k:v for k,v in state_dict.items() if k in self.__model.state_dict().keys() and v.shape==self.__model.state_dict()[k].shape}
            self.__model.load_state_dict(new_state_dict, strict=False)
            print("="*20)
            print("Pretrained YOLOv3 model loaded to initialize weights")
            print("="*20)
        except Exception as e:
            print("="*20)
            print("pretrained weight loading failed. Defaulting to using random weight.")
            print("="*20)

    def __load_data(self) -> None:
        self.__num_classes = len(self.__classes)
        self.__dataset_name = os.path.basename(os.path.dirname(self.__data_dir+os.path.sep))
        self.__custom_train_dataset = LoadImagesAndLabels(self.__data_dir, train=True)
        self.__custom_val_dataset = LoadImagesAndLabels(self.__data_dir, train=False)
        self.__train_loader = DataLoader(
                            self.__custom_train_dataset, batch_size=self.__mini_batch_size,
                            shuffle=True,
                            collate_fn=self.__custom_train_dataset.collate_fn
                        )
        self.__val_loader = DataLoader(
                            self.__custom_val_dataset, batch_size=self.__mini_batch_size//2,
                            shuffle=True, collate_fn=self.__custom_val_dataset.collate_fn
                        )

    def setModelTypeAsYOLOv3(self) -> None:
        """
        'setModelTypeAsYOLOv3()' is used to set the model type to the YOLOv3 model.
        :return:
        """
        self.__model_type = "yolov3"

    def setModelTypeAsTinyYOLOv3(self) -> None:
        """
        'setModelTypeAsTinyYOLOv3()' is used to set the model type to the TinyYOLOv3 model.
        :return:
        """
        self.__model_type = "tiny-yolov3"

    def setDataDirectory(self, data_directory: str):
        """
        'setDataDirectory()' is required to set the path to which the data/dataset to be used for training is kept. The input dataset must be in the YOLO format. The directory can have any name, but it must have 'train' and 'validation'
        sub-directory. In the 'train' and 'validation' sub-directories, there must be 'images' and 'annotations'
        sub-directories respectively. The 'images' folder will contain the pictures for the dataset and the
        'annotations' folder will contain the TXT files with details of the annotations for each image in the
        'images folder'.
        N.B: Strictly take note that the filenames (without the extension) of the pictures in the 'images folder'
        must be the same as the filenames (except the extension) of their corresponding annotation TXT files in
        the 'annotations' folder.
        The structure of the 'train' and 'validation' folder must be as follows:
            >> train    >> images       >> img_1.jpg
                        >> images       >> img_2.jpg
                        >> images       >> img_3.jpg
                        >> annotations  >> img_1.txt
                        >> annotations  >> img_2.txt
                        >> annotations  >> img_3.txt
            >> validation   >> images       >> img_151.jpg
                            >> images       >> img_152.jpg
                            >> images       >> img_153.jpg
                            >> annotations  >> img_151.txt
                            >> annotations  >> img_152.txt
                            >> annotations  >> img_153.txt
        :param data_directory:
        :return:
        """
        if os.path.isdir(data_directory):
            self.__data_dir = data_directory
        else:
            raise ValueError(
                    "The parameter passed should point to a valid directory"
                )
    def setTrainConfig(self, object_names_array: List[str], batch_size: int=4, num_experiments=100, train_from_pretrained_model: str = None):
        """
        'setTrainConfig()' function allows you to set the properties for the training instances. It accepts the following values:
        - object_names_array , this is an array of the names of the different objects in your dataset, in the index order your dataset is annotated
        - batch_size (optional),  this is the batch size for the training instance
        - num_experiments (optional),   also known as epochs, it is the number of times the network will train on all the training dataset
        - train_from_pretrained_model (optional), this is used to perform transfer learning by specifying the path to a pre-trained YOLOv3 or TinyYOLOv3 model
        :param object_names_array:
        :param batch_size:
        :param num_experiments:
        :param train_from_pretrained_model:
        :return:
        """
        self.__model_path = train_from_pretrained_model
        if self.__model_path:
            extension_check(self.__model_path)
        self.__classes = object_names_array
        self.__mini_batch_size = batch_size
        self.__epochs = num_experiments
        self.__output_models_dir = os.path.join(self.__data_dir, "models")
        self.__output_json_dir = os.path.join(self.__data_dir, "json")

    def trainModel(self) -> None:
        """
        'trainModel()' function starts the actual model training. Once the training starts, the training instance
        creates 3 sub-folders in your dataset folder which are:
        - json,  where the JSON configuration file for using your trained model is stored
        - models, where your trained models are stored once they are generated after each improved experiments
        - cache , where temporary traing configuraton files are stored
        :return:
        """

        self.__load_data()
        os.makedirs(self.__output_models_dir, exist_ok=True)
        os.makedirs(self.__output_json_dir, exist_ok=True)

        mp, mr, map50, map50_95, best_fitness = 0, 0, 0, 0, 0.0
        nbs = 64 # norminal batch size
        nb = len(self.__train_loader) # number of batches
        nw = max(3 * nb, 1000)  # number of warmup iterations.
        last_opt_step = -1
        prev_save_name, recent_save_name = "", ""

        accumulate = max(round(nbs / self.__mini_batch_size), 1) # accumulate loss before optimizing.

        self.__set_training_param(self.__epochs, accumulate)

        with open(os.path.join(self.__output_json_dir, f"{self.__dataset_name}_{self.__model_type}_detection_config.json"), "w") as configWriter:
            json.dump(
                {
                    "labels": self.__classes,
                    "anchors": self.__anchors
                },
                configWriter
            )

        since = time.time()

        self.__lr_scheduler.last_epoch = -1

        for epoch in range(1, self.__epochs+1):
            self.__optimizer.zero_grad()
            mloss = torch.zeros(3, device=self.__device)
            print(f"Epoch {epoch}/{self.__epochs}", "-"*10, sep="\n")

            for phase in ["train", "validation"]:
                if phase=="train":
                    self.__model.train()
                    print("Train: ")
                    for batch_i, (data, anns) in tqdm(enumerate(self.__train_loader)):
                        batches_done = batch_i + nb * epoch

                        data = data.to(self.__device)
                        anns = anns.to(self.__device)

                        # warmup
                        if batches_done <= nw:
                            xi = [0, nw]  # x interp
                            accumulate = max(1, np.interp(batches_done, xi, [1, nbs / self.__mini_batch_size]).round())
                            for j, x in enumerate(self.__optimizer.param_groups):
                                # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
                                x['lr'] = np.interp(batches_done, xi, [0.1 if j == 2 else 0.0, 0.01 * self.__lr_lambda(epoch)])
                                if 'momentum' in x:
                                    x['momentum'] = np.interp(batches_done, xi, [0.8, 0.9])

                        with amp.autocast(enabled=self.__cuda):
                            _ = self.__model(data)
                            loss_layers = self.__model.get_loss_layers()
                            loss, loss_components = compute_loss(loss_layers, anns.detach(), self.__device)

                        self.__scaler.scale(loss).backward()
                        mloss = (mloss * batch_i + loss_components) / (batch_i + 1)

                       # Optimize
                        if batches_done - last_opt_step >= accumulate:
                            self.__scaler.step(self.__optimizer)  # optimizer.step
                            self.__scaler.update()
                            self.__optimizer.zero_grad()
                            last_opt_step = batches_done

                    print(f"    box loss-> {float(mloss[0]):.5f}, object loss-> {float(mloss[1]):.5f}, class loss-> {float(mloss[2]):.5f}")

                    self.__lr_scheduler.step()

                else:
                    self.__model.eval()
                    print("Validation:")

                    mp, mr, map50, map50_95 = validate.run(
                                                self.__model, self.__val_loader,
                                                self.__num_classes, device=self.__device
                                            )
                    
                    print(f"    recall: {mr:0.6f} precision: {mp:0.6f} mAP@0.5: {map50:0.6f}, mAP@0.5-0.95: {map50_95:0.6f}" "\n")

                    if map50 > best_fitness:
                        best_fitness = map50
                        recent_save_name = self.__model_type+f"_{self.__dataset_name}_mAP-{best_fitness:0.5f}_epoch-{epoch}.pt"
                        if prev_save_name:
                            os.remove(os.path.join(self.__output_models_dir, prev_save_name))
                        torch.save(
                            self.__model.state_dict(),
                            os.path.join(self.__output_models_dir, recent_save_name)
                        )
                        prev_save_name = recent_save_name

            if epoch == self.__epochs:
                torch.save(
                        self.__model.state_dict(),
                        os.path.join(self.__output_models_dir, self.__model_type+f"_{self.__dataset_name}_last.pt")
                    )

        elapsed_time = time.time() - since
        print(f"Training completed in {elapsed_time//60:.0f}m {elapsed_time % 60:.0f}s")
        torch.cuda.empty_cache()


class CustomObjectDetection:
    """
    This is the object detection class for using your custom trained models. 
    It supports your custom trained YOLOv3 and TinyYOLOv3 model and allows 
    to you to perform object detection in images.
    """
    def __init__(self) -> None:
        self.__device = "cuda" if torch.cuda.is_available() else "cpu"
        self.__anchors: List[int] = None
        self.__classes: List[str] = None 
        self.__model = None
        self.__model_loaded: bool = False
        self.__model_path: str = None
        self.__json_path: str = None
        self.__model_type: str = None
        self.__nms_score = 0.4
        self.__objectness_score = 0.4
    
    def setModelTypeAsYOLOv3(self) -> None:
        """
        'setModelTypeAsYOLOv3()' is used to set the model type to the YOLOv3 model.
        :return:
        """
        self.__model_type = "yolov3"

    def setModelTypeAsTinyYOLOv3(self) -> None:
        """
        'setModelTypeAsTinyYOLOv3()' is used to set the model type to the TinyYOLOv3 model.
        :return:
        """
        self.__model_type = "tiny-yolov3"
    
    def setModelPath(self, model_path: str):
        if os.path.isfile(model_path):
            extension_check(model_path)
            self.__model_path = model_path
            self.__model_loaded = False
        else:
            raise ValueError(
                        "invalid path, path not pointing to the weightfile."
                    ) from None
        self.__model_path = model_path
    
    def setJsonPath(self, configuration_json: str):
        self.__json_path = configuration_json
    
    def __load_classes_and_anchors(self) -> List[str]:

        with open(self.__json_path) as f:
            json_config = json.load(f)
            self.__anchors = json_config["anchors"]
            self.__classes = json_config["labels"]

    def __load_image_yolo(self, input_image : Union[str, np.ndarray, Image.Image]) -> Tuple[List[str], List[np.ndarray], torch.Tensor, torch.Tensor]:
        """
        Loads image/images from the given path. If the given path is a directory,
        this function only load the images in the directory (it does noot visit the
        subdirectories).
        """
        allowed_exts = ["jpg", "jpeg", "png"]
        fnames = []
        original_dims = []
        inputs = []
        original_imgs = []
        if type(input_image) == str:
            if os.path.isfile(input_image):
                if input_image.rsplit('.')[-1].lower() in allowed_exts:
                    img = cv2.imread(input_image)
            else:
                raise ValueError(f"image path '{input_image}' is not found or a valid file")
        elif type(input_image) == np.ndarray:
            img = input_image
        elif "PIL" in str(type(input_image)):
            img = np.asarray(input_image)
        else:
            raise ValueError(f"Invalid image input format")
        
        img_h, img_w, _ = img.shape

        original_imgs.append(np.array(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)).astype(np.uint8))
        original_dims.append((img_w, img_h))
        if type(input_image) == str:
            fnames.append(os.path.basename(input_image)) 
        else:
            fnames.append("") 
        inputs.append(prepare_image(img, (416, 416)))

        if original_dims:
            return (
                    fnames,
                    original_imgs,
                    torch.FloatTensor(original_dims).repeat(1,2).to(self.__device),
                    torch.cat(inputs, 0).to(self.__device)
                    )
        raise RuntimeError(
                    f"Error loading image."
                    "\nEnsure the file is a valid image,"
                    " allowed file extensions are .jpg, .jpeg, .png"
                )
    
    def useCPU(self):
        """
        Used to force classification to be done on CPU.
        By default, classification will occur on GPU compute if available else CPU compute.
        """

        self.__device = "cpu"
        if self.__model_loaded:
            self.__model_loaded = False
            self.loadModel()
    
    def loadModel(self) -> None:
        """
        Loads the pretrained weights in the specified model path.
        """
        self.__load_classes_and_anchors()

        if self.__model_type == "yolov3":
            self.__model = YoloV3(
                anchors=self.__anchors,
                num_classes=len(self.__classes),
                device=self.__device
            )
        elif self.__model_type == "tiny-yolov3":
            self.__model = YoloV3Tiny(
                anchors=self.__anchors,
                num_classes=len(self.__classes),
                device=self.__device
            )
        else:
            raise ValueError(f"Invalid model type. Call setModelTypeAsYOLOv3() or setModelTypeAsTinyYOLOv3() to set a model type before loading the model")
                            
        self.__model.to(self.__device)

        state_dict = torch.load(self.__model_path, map_location=self.__device)
        try:
            self.__model.load_state_dict(state_dict)
            self.__model_loaded = True
            self.__model.to(self.__device).eval()
        except Exception as e:
            raise RuntimeError(f"Invalid weights!!! {e}")


    def detectObjectsFromImage(self,
                input_image: Union[str, np.ndarray, Image.Image],
                output_image_path: str=None,
                output_type: str ="file",
                extract_detected_objects: bool=False, minimum_percentage_probability: int=40,
                display_percentage_probability: bool=True, display_object_name: bool=True,
                display_box: bool=True,
                custom_objects: List=None,
                nms_treshold: float= 0.4,
                objectness_treshold: float= 0.4,
               ) -> Union[List[List[Tuple[str, float, Dict[str, int]]]], np.ndarray, List[np.ndarray], List[str]]:
        """
        Detects objects in an image using the unique classes provided
        by COCO.

        :param input_image: path to an image file, cv2 image or PIL image
        :param output_image_path: path to save input image with predictions rendered
        :param output_type: type of output for rendered image. Acceptable values are 'file' and 'array` ( a cv2 image )
        :param extract_detected_objects: extract each object based on the output type
        :param minimum_percentage_probability: the minimum confidence a detected object must have
        :param display_percentage_probability: to diplay/not display the confidence on rendered image   
        :param display_object_name: to diplay/not display the object name on rendered image  
        :param display_box: to diplay/not display the object bounding box on rendered image 
        :param custom_objects: a dictionary of detectable objects set to boolean values
        
        :returns: A list of tuples containing the label of detected object and the
        confidence.
        """
        
        self.__nms_score = nms_treshold
        self.__objectness_score = objectness_treshold
        
        self.__model.eval()
        if not self.__model_loaded:
            if self.__model_path:
                warnings.warn(
                        "Model path has changed but pretrained weights in the"
                        " new path is yet to be loaded.",
                        ResourceWarning
                    )
            else:
                raise RuntimeError(
                        "Model path isn't set, pretrained weights aren't used."
                    )
        
        predictions = defaultdict(lambda : [])
        

        if self.__model_type == "yolov3" or self.__model_type == "tiny-yolov3":
            fnames, original_imgs, input_dims, imgs = self.__load_image_yolo(input_image)
            
            with torch.no_grad():
                output = self.__model(imgs)
            
            output = get_predictions(
                    pred=output.to(self.__device), num_classes=len(self.__classes),
                    nms_confidence_level=self.__nms_score, objectness_confidence= self.__objectness_score,
                    device=self.__device
                )
            
            if output is None:
                if output_type == "array":
                    if extract_detected_objects:
                        return original_imgs[0], [], []
                    else:
                        return original_imgs[0], []
                else:
                    if extract_detected_objects:
                        return original_imgs[0], []
                    else:
                        return []
            
            # scale the output to match the dimension of the original image
            input_dims = torch.index_select(input_dims, 0, output[:, 0].long())
            scaling_factor = torch.min(416 / input_dims, 1)[0].view(-1, 1)
            output[:, [1,3]] -= (416 - (scaling_factor * input_dims[:, 0].view(-1,1))) / 2
            output[:, [2,4]] -= (416 - (scaling_factor * input_dims[:, 1].view(-1,1))) / 2
            output[:, 1:5] /= scaling_factor

            #clip bounding box for those that extended outside the detected image.
            for idx in range(output.shape[0]):
                output[idx, [1,3]] = torch.clamp(output[idx, [1,3]], 0.0, input_dims[idx, 0])
                output[idx, [2,4]] = torch.clamp(output[idx, [2,4]], 0.0, input_dims[idx, 1])

            for pred in output:
                pred_label = self.__classes[int(pred[-1])]
                if custom_objects:
                    if pred_label.replace(" ", "_") in custom_objects.keys():
                        if not custom_objects[pred_label.replace(" ", "_")]:
                            continue
                    else:
                        continue
                predictions[int(pred[0])].append((
                        pred_label,
                        float(pred[-2]),
                        {k:v for k,v in zip(["x1", "y1", "x2", "y2"], map(int, pred[1:5]))},
                    ))
        
        # Render detection on copy of input image
        original_input_image = None
        output_image_array = None
        extracted_objects = []

        if self.__model_type == "yolov3" or self.__model_type == "tiny-yolov3":
            original_input_image = cv2.cvtColor(original_imgs[0], cv2.COLOR_RGB2BGR)
            if isinstance(output, torch.Tensor):
                for pred in output:
                    percentage_conf = round(float(pred[-2]) * 100, 2)
                    if percentage_conf < minimum_percentage_probability:
                        continue

                    displayed_label = ""
                    if display_object_name:
                        displayed_label = f"{self.__classes[int(pred[-1].item())]} : "
                    if display_percentage_probability:
                        displayed_label += f" {percentage_conf}%"


                    original_imgs[int(pred[0].item())] = draw_bbox_and_label(pred[1:5].int() if display_box else None,
                        displayed_label,
                        original_imgs[int(pred[0].item())]
                    )
                output_image_array = cv2.cvtColor(original_imgs[0], cv2.COLOR_RGB2BGR)

        # Format predictions for function reponse
        predictions_batch = list(predictions.values())
        predictions_list = predictions_batch[0] if len(predictions_batch) > 0 else []
        min_probability = minimum_percentage_probability / 100


        if output_type == "file":
            if output_image_path:
                cv2.imwrite(output_image_path, output_image_array)

                if extract_detected_objects:
                    extraction_dir = ".".join(output_image_path.split(".")[:-1]) + "-extracted"
                    os.mkdir(extraction_dir)
                    count = 0
                    for obj_prediction in predictions_list: 
                        if obj_prediction[1] >= min_probability:
                            count += 1
                            extracted_path = os.path.join(
                                extraction_dir, 
                                ".".join(os.path.basename(output_image_path).split(".")[:-1]) + f"-{count}.jpg"
                            )
                            obj_bbox = obj_prediction[2]
                            cv2.imwrite(extracted_path, original_input_image[obj_bbox["y1"] : obj_bbox["y2"], obj_bbox["x1"] : obj_bbox["x2"]])

                            extracted_objects.append(extracted_path)

        elif output_type == "array":
            if extract_detected_objects:
                for obj_prediction in predictions_list: 
                    if obj_prediction[1] >= min_probability:
                        obj_bbox = obj_prediction[2]

                        extracted_objects.append(original_input_image[obj_bbox["y1"] : obj_bbox["y2"], obj_bbox["x1"] : obj_bbox["x2"]])
        else:
            raise ValueError(f"Invalid output_type '{output_type}'. Supported values are 'file' and 'array' ")

        
        predictions_list = [
            {
                "name": prediction[0], "percentage_probability": round(prediction[1] * 100, 2),
                "box_points": [prediction[2]["x1"], prediction[2]["y1"], prediction[2]["x2"], prediction[2]["y2"]]
            } for prediction in predictions_list if prediction[1] >= min_probability
        ]


        if output_type == "array":
            if extract_detected_objects:
                return output_image_array, predictions_list, extracted_objects
            else:
                return output_image_array, predictions_list
        else:
            if extract_detected_objects:
                return predictions_list, extracted_objects
            else:
                return predictions_list


class CustomVideoObjectDetection:
    """
    This is the custom objects detection class for videos and camera live stream inputs in the ImageAI library. It provides support for YOLOv3 and TinyYOLOv3 object detection networks. After instantiating this class, you can set it's properties and
    make object detections using it's pre-defined functions.
    The following functions are required to be called before object detection can be made
    * setModelPath()
    * At least of of the following and it must correspond to the model set in the setModelPath()
    [setModelTypeAsRetinaNet(), setModelTypeAsYOLOv3(), setModelTinyYOLOv3()]
    * loadModel() [This must be called once only before performing object detection]
    Once the above functions have been called, you can call the detectObjectsFromVideo() function
    or the detectCustomObjectsFromVideo() of  the object detection instance object at anytime to
    obtain observable objects in any video or camera live stream.
    """

    def __init__(self):
        self.__detector = CustomObjectDetection()

    def setModelTypeAsYOLOv3(self):
        self.__detector.setModelTypeAsYOLOv3()
    
    def setModelTypeAsTinyYOLOv3(self):
        self.__detector.setModelTypeAsTinyYOLOv3()

    def setModelPath(self, model_path: str):
        extension_check(model_path)
        self.__detector.setModelPath(model_path)
    
    def setJsonPath(self, configuration_json: str):
        self.__detector.setJsonPath(configuration_json)

    def loadModel(self):
        self.__detector.loadModel()
    
    def useCPU(self):
        self.__detector.useCPU()

    def detectObjectsFromVideo(self, input_file_path="", camera_input=None, output_file_path="", frames_per_second=20,
                               frame_detection_interval=1, minimum_percentage_probability=40, log_progress=False,
                               display_percentage_probability=True, display_object_name=True, display_box=True, save_detected_video=True,
                               per_frame_function=None, per_second_function=None, per_minute_function=None,
                               video_complete_function=None, return_detected_frame=False, detection_timeout = None):

        """
        'detectObjectsFromVideo()' function is used to detect objects observable in the given video path or a camera input:
        * input_file_path , which is the file path to the input video. It is required only if 'camera_input' is not set
        * camera_input , allows you to parse in camera input for live video detections
        * output_file_path , which is the path to the output video. It is required only if 'save_detected_video' is not set to False
        * frames_per_second , which is the number of frames to be used in the output video
        * frame_detection_interval (optional, 1 by default)  , which is the intervals of frames that will be detected.
        * minimum_percentage_probability (optional, 50 by default) , option to set the minimum percentage probability for nominating a detected object for output.
        * log_progress (optional) , which states if the progress of the frame processed is to be logged to console
        * display_percentage_probability (optional), can be used to hide or show probability scores on the detected video frames
        * display_object_name (optional), can be used to show or hide object names on the detected video frames
        * save_save_detected_video (optional, True by default), can be set to or not to save the detected video
        * per_frame_function (optional), this parameter allows you to parse in a function you will want to execute after each frame of the video is detected. If this parameter is set to a function, after every video  frame is detected, the function will be executed with the following values parsed into it:
            -- position number of the frame
            -- an array of dictinaries, with each dictionary corresponding to each object detected. Each dictionary contains 'name', 'percentage_probability' and 'box_points'
            -- a dictionary with with keys being the name of each unique objects and value are the number of instances of the object present
            -- If return_detected_frame is set to True, the numpy array of the detected frame will be parsed as the fourth value into the function
        * per_second_function (optional), this parameter allows you to parse in a function you will want to execute after each second of the video is detected. If this parameter is set to a function, after every second of a video is detected, the function will be executed with the following values parsed into it:
            -- position number of the second
            -- an array of dictionaries whose keys are position number of each frame present in the last second , and the value for each key is the array for each frame that contains the dictionaries for each object detected in the frame
            -- an array of dictionaries, with each dictionary corresponding to each frame in the past second, and the keys of each dictionary are the name of the number of unique objects detected in each frame, and the key values are the number of instances of the objects found in the frame
            -- a dictionary with its keys being the name of each unique object detected throughout the past second, and the key values are the average number of instances of the object found in all the frames contained in the past second
            -- If return_detected_frame is set to True, the numpy array of the detected frame will be parsed
                                                                as the fifth value into the function
        * per_minute_function (optional), this parameter allows you to parse in a function you will want to execute after each minute of the video is detected. If this parameter is set to a function, after every minute of a video is detected, the function will be executed with the following values parsed into it:
            -- position number of the minute
            -- an array of dictionaries whose keys are position number of each frame present in the last minute , and the value for each key is the array for each frame that contains the dictionaries for each object detected in the frame
            -- an array of dictionaries, with each dictionary corresponding to each frame in the past minute, and the keys of each dictionary are the name of the number of unique objects detected in each frame, and the key values are the number of instances of the objects found in the frame
            -- a dictionary with its keys being the name of each unique object detected throughout the past minute, and the key values are the average number of instances of the object found in all the frames contained in the past minute
            -- If return_detected_frame is set to True, the numpy array of the detected frame will be parsed as the fifth value into the function
        * video_complete_function (optional), this parameter allows you to parse in a function you will want to execute after all of the video frames have been detected. If this parameter is set to a function, after all of frames of a video is detected, the function will be executed with the following values parsed into it:
            -- an array of dictionaries whose keys are position number of each frame present in the entire video , and the value for each key is the array for each frame that contains the dictionaries for each object detected in the frame
            -- an array of dictionaries, with each dictionary corresponding to each frame in the entire video, and the keys of each dictionary are the name of the number of unique objects detected in each frame, and the key values are the number of instances of the objects found in the frame
            -- a dictionary with its keys being the name of each unique object detected throughout the entire video, and the key values are the average number of instances of the object found in all the frames contained in the entire video
        * return_detected_frame (optionally, False by default), option to obtain the return the last detected video frame into the per_per_frame_function, per_per_second_function or per_per_minute_function
        * detection_timeout (optionally, None by default), option to state the number of seconds of a video that should be detected after which the detection function stop processing the video
        * thread_safe (optional, False by default), enforce the loaded detection model works across all threads if set to true, made possible by forcing all Tensorflow inference to run on the default graph.
                :param input_file_path:
                :param camera_input
                :param output_file_path:
                :param save_detected_video:
                :param frames_per_second:
                :param frame_detection_interval:
                :param minimum_percentage_probability:
                :param log_progress:
                :param display_percentage_probability:
                :param display_object_name:
                :param per_frame_function:
                :param per_second_function:
                :param per_minute_function:
                :param video_complete_function:
                :param return_detected_frame:
                :param detection_timeout:
                :param thread_safe:
                :return output_video_filepath:
                :return counting:
                :return output_objects_array:
                :return output_objects_count:
                :return detected_copy:
                :return this_second_output_object_array:
                :return this_second_counting_array:
                :return this_second_counting:
                :return this_minute_output_object_array:
                :return this_minute_counting_array:
                :return this_minute_counting:
                :return this_video_output_object_array:
                :return this_video_counting_array:
                :return this_video_counting:
        """

        if (input_file_path == "" and camera_input == None):
            raise ValueError(
                "You must set 'input_file_path' to a valid video file, or set 'camera_input' to a valid camera")
        elif (save_detected_video == True and output_file_path == ""):
            raise ValueError(
                "You must set 'output_video_filepath' to a valid video file name, in which the detected video will be saved. If you don't intend to save the detected video, set 'save_detected_video=False'")

        else:

            output_frames_dict = {}
            output_frames_count_dict = {}

            input_video = cv2.VideoCapture(input_file_path)
            if (camera_input != None):
                input_video = camera_input

            output_video_filepath = output_file_path + '.mp4'

            frame_width = int(input_video.get(3))
            frame_height = int(input_video.get(4))
            output_video = cv2.VideoWriter(output_video_filepath, cv2.VideoWriter_fourcc(*"MP4V"),
                                            frames_per_second,
                                            (frame_width, frame_height))

            counting = 0

            detection_timeout_count = 0
            video_frames_count = 0

            while (input_video.isOpened()):
                ret, frame = input_video.read()

                if (ret == True):

                    video_frames_count += 1
                    if (detection_timeout != None):
                        if ((video_frames_count % frames_per_second) == 0):
                            detection_timeout_count += 1

                        if (detection_timeout_count >= detection_timeout):
                            break

                    output_objects_array = []

                    counting += 1

                    if (log_progress == True):
                        print("Processing Frame : ", str(counting))

                    detected_copy = frame.copy()

                    check_frame_interval = counting % frame_detection_interval

                    if (counting == 1 or check_frame_interval == 0):
                        try:
                            detected_copy, output_objects_array = self.__detector.detectObjectsFromImage(
                                input_image=frame, output_type="array",
                                minimum_percentage_probability=minimum_percentage_probability,
                                display_percentage_probability=display_percentage_probability,
                                display_object_name=display_object_name,
                                display_box=display_box)
                            
                        except Exception as e:
                            warnings.warn()
                    
                    if (save_detected_video == True):
                        output_video.write(detected_copy)

                    if detected_copy is not None and output_objects_array is not None:

                        output_frames_dict[counting] = output_objects_array

                        output_objects_count = {}
                        for eachItem in output_objects_array:
                            eachItemName = eachItem["name"]
                            try:
                                output_objects_count[eachItemName] = output_objects_count[eachItemName] + 1
                            except:
                                output_objects_count[eachItemName] = 1

                        output_frames_count_dict[counting] = output_objects_count

                        if (counting == 1 or check_frame_interval == 0):
                            if (per_frame_function != None):
                                if (return_detected_frame == True):
                                    per_frame_function(counting, output_objects_array, output_objects_count,
                                                        detected_copy)
                                elif (return_detected_frame == False):
                                    per_frame_function(counting, output_objects_array, output_objects_count)

                        if (per_second_function != None):
                            if (counting != 1 and (counting % frames_per_second) == 0):

                                this_second_output_object_array = []
                                this_second_counting_array = []
                                this_second_counting = {}

                                for aa in range(counting):
                                    if (aa >= (counting - frames_per_second)):
                                        this_second_output_object_array.append(output_frames_dict[aa + 1])
                                        this_second_counting_array.append(output_frames_count_dict[aa + 1])

                                for eachCountingDict in this_second_counting_array:
                                    for eachItem in eachCountingDict:
                                        try:
                                            this_second_counting[eachItem] = this_second_counting[eachItem] + \
                                                                                eachCountingDict[eachItem]
                                        except:
                                            this_second_counting[eachItem] = eachCountingDict[eachItem]

                                for eachCountingItem in this_second_counting:
                                    this_second_counting[eachCountingItem] = int(this_second_counting[eachCountingItem] / frames_per_second)

                                if (return_detected_frame == True):
                                    per_second_function(int(counting / frames_per_second),
                                                        this_second_output_object_array, this_second_counting_array,
                                                        this_second_counting, detected_copy)

                                elif (return_detected_frame == False):
                                    per_second_function(int(counting / frames_per_second),
                                                        this_second_output_object_array, this_second_counting_array,
                                                        this_second_counting)

                        if (per_minute_function != None):

                            if (counting != 1 and (counting % (frames_per_second * 60)) == 0):

                                this_minute_output_object_array = []
                                this_minute_counting_array = []
                                this_minute_counting = {}

                                for aa in range(counting):
                                    if (aa >= (counting - (frames_per_second * 60))):
                                        this_minute_output_object_array.append(output_frames_dict[aa + 1])
                                        this_minute_counting_array.append(output_frames_count_dict[aa + 1])

                                for eachCountingDict in this_minute_counting_array:
                                    for eachItem in eachCountingDict:
                                        try:
                                            this_minute_counting[eachItem] = this_minute_counting[eachItem] + \
                                                                                eachCountingDict[eachItem]
                                        except:
                                            this_minute_counting[eachItem] = eachCountingDict[eachItem]

                                for eachCountingItem in this_minute_counting:
                                    this_minute_counting[eachCountingItem] = int(this_minute_counting[eachCountingItem] / (frames_per_second * 60))

                                if (return_detected_frame == True):
                                    per_minute_function(int(counting / (frames_per_second * 60)),
                                                        this_minute_output_object_array, this_minute_counting_array,
                                                        this_minute_counting, detected_copy)

                                elif (return_detected_frame == False):
                                    per_minute_function(int(counting / (frames_per_second * 60)),
                                                        this_minute_output_object_array, this_minute_counting_array,
                                                        this_minute_counting)
                else:
                    break

            if (video_complete_function != None):

                this_video_output_object_array = []
                this_video_counting_array = []
                this_video_counting = {}

                for aa in range(counting):
                    this_video_output_object_array.append(output_frames_dict[aa + 1])
                    this_video_counting_array.append(output_frames_count_dict[aa + 1])

                for eachCountingDict in this_video_counting_array:
                    for eachItem in eachCountingDict:
                        try:
                            this_video_counting[eachItem] = this_video_counting[eachItem] + \
                                                            eachCountingDict[eachItem]
                        except:
                            this_video_counting[eachItem] = eachCountingDict[eachItem]

                for eachCountingItem in this_video_counting:
                    this_video_counting[eachCountingItem] = int(this_video_counting[eachCountingItem] / counting)

                video_complete_function(this_video_output_object_array, this_video_counting_array,
                                        this_video_counting)

            input_video.release()
            output_video.release()

            if (save_detected_video == True):
                return output_video_filepath

            
================================================
FILE: imageai/Detection/Custom/yolo/__init__.py
================================================


================================================
FILE: imageai/Detection/Custom/yolo/compute_loss.py
================================================
import math

import torch
import torch.nn as nn

# This new loss function is based on https://github.com/ultralytics/yolov3/blob/master/utils/loss.py

def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-9):
    # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
    box2 = box2.T

    # Get the coordinates of bounding boxes
    if x1y1x2y2:  # x1, y1, x2, y2 = box1
        b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
        b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
    else:  # transform from xywh to xyxy
        b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
        b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
        b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
        b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2

    # Intersection area
    inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
            (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)

    # Union Area
    w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
    w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps
    union = w1 * h1 + w2 * h2 - inter + eps

    iou = inter / union
    if GIoU or DIoU or CIoU:
        # convex (smallest enclosing box) width
        cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1)
        ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1)  # convex height
        if CIoU or DIoU:  # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
            c2 = cw ** 2 + ch ** 2 + eps  # convex diagonal squared
            rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 +
                    (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4  # center distance squared
            if DIoU:
                return iou - rho2 / c2  # DIoU
            elif CIoU:  # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
                v = (4 / math.pi ** 2) * \
                    torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)
                with torch.no_grad():
                    alpha = v / ((1 + eps) - iou + v)
                return iou - (rho2 / c2 + v * alpha)  # CIoU
        else:  # GIoU https://arxiv.org/pdf/1902.09630.pdf
            c_area = cw * ch + eps  # convex area
            return iou - (c_area - union) / c_area  # GIoU
    else:
        return iou  # IoU


def compute_loss(loss_layers, targets, device="cpu"):
    nc = loss_layers[0].num_classes
    nl = len(loss_layers)
    # output at each layer
    predictions = [layer.pred for layer in loss_layers]

    # placeholders for the losses.
    lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device)

    # Build yolo targets
    tcls, tbox, indices, anchors = build_targets(predictions, targets, loss_layers, device)  # targets

    BCEcls = nn.BCEWithLogitsLoss(
        pos_weight=torch.tensor([1.0], device=device))
    BCEobj = nn.BCEWithLogitsLoss(
        pos_weight=torch.tensor([1.0], device=device))

    balance = [4.0, 1.0, 0.4]

    # Calculate losses for each yolo layer
    for layer_index, layer_predictions in enumerate(predictions):
        # Get image ids, anchors, grid index i and j for each target in the current yolo layer
        b, anchor, grid_j, grid_i = indices[layer_index]
        # Build empty object target tensor with the same shape as the object prediction
        tobj = torch.zeros_like(layer_predictions[..., 0], device=device)  # target obj
        # Get the number of targets for this layer.
        # Each target is a label box with some scaling and the association of an anchor box.
        # Label boxes may be associated to 0 or multiple anchors. So they are multiple times or not at all in the targets.
        num_targets = b.shape[0]
        # Check if there are targets for this batch
        if num_targets:
            # Load the corresponding values from the predictions for each of the targets
            ps = layer_predictions[b, anchor, grid_j, grid_i]

            # Regression of the box
            # Apply sigmoid to xy offset predictions in each cell that has a target
            pxy = ps[:, :2].sigmoid() * 2 - 0.5
            # Apply exponent to wh predictions and multiply with the anchor box that matched best with the label for each cell that has a target
            pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[layer_index]
            # Build box out of xy and wh
            pbox = torch.cat((pxy, pwh), 1)
            # Calculate CIoU or GIoU for each target with the predicted box for its cell + anchor
            iou = bbox_iou(pbox.T, tbox[layer_index], x1y1x2y2=False, CIoU=True)
            # We want to minimize our loss so we and the best possible IoU is 1 so we take 1 - IoU and reduce it with a mean
            lbox += (1.0 - iou).mean()  # iou loss

            # Classification of the objectness
            # Fill our empty object target tensor with the IoU we just calculated for each target at the targets position
            tobj[b, anchor, grid_j, grid_i] = iou.detach().clamp(0).type(tobj.dtype)  # Use cells with iou > 0 as object targets

            # Classification of the class
            # Check if we need to do a classification (number of classes > 1)
            if nc > 1:
                # Hot one class encoding
                t = torch.full_like(ps[:, 5:], 0.0, device=device)  # targets
                t[range(num_targets), tcls[layer_index]] = 1
                # Use the tensor to calculate the BCE loss
                lcls += BCEcls(ps[:, 5:], t)  # BCE

        # Classification of the objectness the sequel
        # Calculate the BCE loss between the on the fly generated target and the network prediction
        obji = BCEobj(layer_predictions[..., 4], tobj) # obj loss
        lobj += obji * balance[layer_index]

    lbox *= 0.05
    lobj *= (1.0 * ((416 / 640) ** 2)) # scale to image size
    lcls *= (0.5 * (nc / 80))  # scale to classes

    # Merge losses
    loss = (lbox + lobj + lcls) * tobj.shape[0]

    return loss, (torch.cat((lbox, lobj, lcls))).detach()


def build_targets(p, targets, loss_layers, device="cpu"):
    # Build targets for compute_loss(), input targets(image,class,x,y,w,h)
    na, nt = len(loss_layers[0].anchors), targets.shape[0]  # number of anchors, targets
    tcls, tbox, indices, anch = [], [], [], []
    gain = torch.ones(7, device=device)  # normalized to gridspace gain
    # Make a tensor that iterates 0-2 for 3 anchors and repeat that as many times as we have target boxes
    ai = torch.arange(na, device=device).float().view(na, 1).repeat(1, nt)
    # Copy target boxes anchor size times and append an anchor index to each copy the anchor index is also expressed by the new first dimension
    targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2)

    g = 0.5
    off = torch.tensor([
                        [0, 0], [1, 0], [0, 1],
                        [-1, 0], [0, -1]
                        ], device=device).float() * g #offsets

    for i, yolo_layer in enumerate(loss_layers):
        # Scale anchors by the yolo grid cell size so that an anchor with the size of the cell would result in 1
        anchors = yolo_layer.anchors / yolo_layer.stride
        # Add the number of yolo cells in this layer the gain tensor
        # The gain tensor matches the collums of our targets (img id, class, x, y, w, h, anchor id)
        gain[2:6] = torch.tensor(p[i].shape)[[3, 2, 3, 2]]  # xyxy gain
        # Scale targets by the number of yolo layer cells, they are now in the yolo cell coordinate system
        t = targets * gain
        # Check if we have targets
        if nt:
            # Calculate ration between anchor and target box for both width and height
            r = t[:, :, 4:6] / anchors[:, None]
            # Select the ratios that have the highest divergence in any axis and check if the ratio is less than 4
            j = torch.max(r, 1.0 / r).max(2)[0] < 4.0  # compare
            # Only use targets that have the correct ratios for their anchors
            # That means we only keep ones that have a matching anchor and we loose the anchor dimension
            # The anchor id is still saved in the 7th value of each target
            t = t[j]

            #offsets
            gxy = t[:, 2:4] #grid xy
            gxi = gain[[2,3]] - gxy
            j, k = ((gxy % 1 < g) & (gxy > 1)).T
            l, m = ((gxi % 1 < g) & (gxi > 1)).T
            j = torch.stack((torch.ones_like(j), j, k, l, m))
            t = t.repeat((5, 1, 1))[j]
            offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j]
        else:
            t = targets[0]
            offsets = 0

        # Extract image id in batch and class id
        b, c = t[:, :2].long().T
        # We isolate the target cell associations.
        # x, y, w, h are allready in the cell coordinate system meaning an x = 1.2 would be 1.2 times cellwidth
        gxy = t[:, 2:4] #grid xy
        gwh = t[:, 4:6]  # grid wh
        # Cast to int to get an cell index e.g. 1.2 gets associated to cell 1
        gij = (gxy - offsets).long()
        # Isolate x and y index dimensions
        gi, gj = gij.T  # grid xy indices

        # Convert anchor indexes to int
        a = t[:, 6].long()
        # Add target tensors for this yolo layer to the output lists
        # Add to index list and limit index range to prevent out of bounds
        indices.append((b, a, gj.clamp_(0, int(gain[3] - 1)), gi.clamp_(0, int(gain[2] - 1))))
        # Add to target box list and convert box coordinates from global grid coordinates to local offsets in the grid cell
        tbox.append(torch.cat((gxy - gij, gwh), 1))  # box
        # Add correct anchor for each target to the list
        anch.append(anchors[a])
        # Add class for each target to the list
        tcls.append(c)

    return tcls, tbox, indices, anch


================================================
FILE: imageai/Detection/Custom/yolo/custom_anchors.py
================================================
import random

import torch
import numpy as np
from scipy.cluster.vq import kmeans

# This new anchor generator function is based on https://github.com/ultralytics/yolov3/blob/master/utils/autoanchor.py

def generate_anchors(dataset, n=9, img_size=416, thr=4.0, gen=1000, verbose=True):
    """ Creates kmeans-evolved anchors from training dataset

        Arguments:
            dataset: a loaded dataset i.e. subclass of torch.utils.data.Dataset
            n: number of anchors
            img_size: image size used for training
            thr: anchor-label wh ratio threshold used for training, default=4.0
            gen: generations to evolve anchors using genetic algorithm
            verbose: print all results

        Return:
            k: kmeans evolved anchors
    """
    thr = 1 / thr

    def metric(k, wh):  # compute metrics
        r = wh[:, None] / k[None]
        x = torch.min(r, 1 / r).min(2)[0]  # ratio metric
        return x, x.max(1)[0]  # x, best_x

    def anchor_fitness(k):  # mutation fitness
        _, best = metric(torch.tensor(k, dtype=torch.float32), wh)
        return (best * (best > thr).float()).mean()  # fitness

    def print_results(k, verbose=True):
        k = k[np.argsort(k.prod(1))]  # sort small to large
        if verbose:
            x, best = metric(k, wh0)
            bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n  # best possible recall, anch > thr
            s = f'thr={thr:.2f}: {bpr:.4f} best possible recall, {aat:.2f} anchors past thr\n' \
                f'n={n}, img_size={img_size}, metric_all={x.mean():.3f}/{best.mean():.3f}-mean/best, ' \
                f'past_thr={x[x > thr].mean():.3f}-mean: '
            print(s)
        return k

    # Get label wh
    shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True)
    wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)])  # wh

    # Filter
    i = (wh0 < 3.0).any(1).sum()
    if i and verbose:
        print(f'WARNING: Extremely small objects found. {i} of {len(wh0)} labels are < 3 pixels in size.')
    wh = wh0[(wh0 >= 2.0).any(1)]  # filter > 2 pixels
    # wh = wh * (np.random.rand(wh.shape[0], 1) * 0.9 + 0.1)  # multiply by random scale 0-1

    # Kmeans calculation
    s = wh.std(0)  # sigmas for whitening
    k, dist = kmeans(wh / s, n, iter=30)  # points, mean distance
    assert len(k) == n, f'ERROR: scipy.cluster.vq.kmeans requested {n} points but returned only {len(k)}'
    k *= s
    wh = torch.tensor(wh, dtype=torch.float32)  # filtered
    wh0 = torch.tensor(wh0, dtype=torch.float32)  # unfiltered
    k = print_results(k, verbose=False)

    # Evolve
    npr = np.random
    f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1  # fitness, generations, mutation prob, sigma
    if verbose:
        print("Generating anchor boxes for training images...")
    for _ in range(gen):
        v = np.ones(sh)
        while (v == 1).all():  # mutate until a change occurs (prevent duplicates)
            v = ((npr.random(sh) < mp) * random.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0)
        kg = (k.copy() * v).clip(min=2.0)
        fg = anchor_fitness(kg)
        if fg > f:
            f, k = fg, kg.copy()

    return print_results(k)


================================================
FILE: imageai/Detection/Custom/yolo/dataset.py
================================================
import os
import warnings
from typing import Tuple, List

import cv2 as cv
import numpy as np
import torch
from torch.utils.data import Dataset
from torchvision import transforms

from ....yolov3.utils import prepare_image

class LoadImagesAndLabels(Dataset):

    def __init__(self, path : str, net_dim=(416, 416), train=True):
        if not os.path.isdir(path):
            raise NotADirectoryError("path is not a valid directory!!!")

        super().__init__()

        if train:
            path = os.path.join(path, "train")
        else:
            path = os.path.join(path, "validation")

        self.__net_width, self.__net_height = net_dim
        self.__images_paths = []
        self.shapes = []
        self.labels = []
        for img in os.listdir(os.path.join(path, "images")):
            p = os.path.join(path, "images", img)
            image = cv.imread(p)
            if isinstance(image, np.ndarray):
                l_p = self.__img_path2label_path(p)
                self.__images_paths.append(p)
                self.shapes.append((image.shape[1], image.shape[0]))
                self.labels.append(self.__load_raw_label(l_p))

        self.__nsamples = len(self.__images_paths)
        self.shapes = np.array(self.shapes)

    def __len__(self) -> int:
        return self.__nsamples

    def __img_path2label_path(self, path : str) -> str:
        im, lb = os.sep+"images"+os.sep, os.sep+"annotations"+os.sep
        return lb.join(path.rsplit(im, 1)).rsplit(".", 1)[0] + ".txt"

    def __getitem__(self, idx) -> Tuple[torch.Tensor, torch.Tensor]:
        if idx >= self.__nsamples:
            raise IndexError("Index out of range.")
        image_path = self.__images_paths[idx]
        label = self.labels[idx].copy()
        image, label = self.__load_data(image_path, label)
        return image, label

    def __xywhn2xyxy(self, nlabel : torch.Tensor, width : int, height : int) -> torch.Tensor:
        """
        Transformed label from normalized center_x, center_y, width, height to
        x_1, y_1, x_2, y_2
        """
        label = nlabel.clone()
        label[:, 1] = (nlabel[:, 1] - (nlabel[:, 3] / 2)) * width
        label[:, 2] = (nlabel[:, 2] - (nlabel[:, 4] / 2)) * height
        label[:, 3] = (nlabel[:, 1] + (nlabel[:, 3] / 2)) * width
        label[:, 4] = (nlabel[:, 2] + (nlabel[:, 4] / 2)) * height

        return label

    def __load_data(self, img_path : str, label : np.ndarray) -> Tuple[torch.Tensor, torch.Tensor]:
        img = cv.imread(img_path)
        img_h, img_w = img.shape[:2]
        img = prepare_image(img[:, :, :3], [self.__net_width, self.__net_height])
        lab = self.__process_label(label, img_w, img_h)
        return img.squeeze(), lab

    def __load_raw_label(self, label_path : str):
        if os.path.isfile(label_path):
            with warnings.catch_warnings():
                l = np.loadtxt(label_path).reshape(-1,5)
                assert (l >= 0).all(), "bounding box values should be positive and in range 0 - 1"
                assert (l[:, 1:] <= 1).all(), "bounding box values should be in the range 0 - 1"
        else:
            l = np.zeros((0,5), dtype=np.float32)
        return l

    def __process_label(self, label : np.ndarray, image_width : int, image_height : int) -> torch.Tensor:
        """
        Process corresponding label and resize the ground truth bounding boxes
        to match the dimension of the resizes image.
        """
        #max_box = 50
        scaling_factor = min(
                                self.__net_width/image_width,
                                self.__net_width/image_height
                            )
        #bs = torch.zeros((max_box, 6))
        bs = torch.zeros((len(label), 6))
        if label.size > 0:
            nlabels = torch.from_numpy(label)
            labels = self.__xywhn2xyxy(nlabels, image_width, image_height)
            # scale bounding box to match new image size
            labels[:, [1,3]] = ((labels[:, [1,3]] * scaling_factor) +\
                    (self.__net_width - (image_width * scaling_factor))/2)
            labels[:, [2,4]] = ((labels[:, [2,4]] * scaling_factor) +\
                    (self.__net_width - (image_height * scaling_factor))/2)
            
            # convert x1, y1, x2, y2 to center_x, center_y, width, height
            label_copy = labels.clone()
            labels[:, 1] = (label_copy[:, 3] + label_copy[:, 1])/2
            labels[:, 2] = (label_copy[:, 4] + label_copy[:, 2])/2
            labels[:, 3] = (label_copy[:, 3] - label_copy[:, 1])
            labels[:, 4] = (label_copy[:, 4] - label_copy[:, 2])


            # scale labels by new image dimension
            labels[:, 1:5] /= self.__net_width
            bs[:, 1:] = labels[:, :]
        return bs

    def collate_fn(self, batch) -> Tuple[torch.Tensor, torch.Tensor]:
        batch = [data for data in batch if data is not None]
        imgs, bboxes = list(zip(*batch))

        imgs = torch.stack(imgs)

        for i, boxes in enumerate(bboxes):
            boxes[:, 0] = i
        bboxes = torch.cat(bboxes, 0)

        return imgs, bboxes


================================================
FILE: imageai/Detection/Custom/yolo/metric.py
================================================
import math
import warnings

import numpy as np
import torch

# This new metric functions is based on https://github.com/ultralytics/yolov3/blob/master/utils/metric.py

def ap_per_class(tp, conf, pred_cls, target_cls):
    """ Compute the average precision, given the recall and precision curves.
    Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
    # Arguments
        tp:  True positives (nparray, nx1 or nx10).
        conf:  Objectness value from 0-1 (nparray).
        pred_cls:  Predicted object classes (nparray).
        target_cls:  True object classes (nparray).
    # Returns
        The average precision as computed in py-faster-rcnn.
    """

    # Sort by objectness
    i = np.argsort(-conf)
    tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]

    # Find unique classes
    unique_classes = np.unique(target_cls)
    nc = unique_classes.shape[0]  # number of classes, number of detections

    # Create Precision-Recall curve and compute AP for each class
    px = np.linspace(0, 1, 1000)
    ap, p, r = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000))
    for ci, c in enumerate(unique_classes):
        i = pred_cls == c
        n_l = (target_cls == c).sum()  # number of labels
        n_p = i.sum()  # number of predictions

        if n_p == 0 or n_l == 0:
            continue
        else:
            # Accumulate FPs and TPs
            fpc = (1 - tp[i]).cumsum(0)
            tpc = tp[i].cumsum(0)

            # Recall
            recall = tpc / (n_l + 1e-16)  # recall curve
            r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0)  # negative x, xp because xp decreases

            # Precision
            precision = tpc / (tpc + fpc)  # precision curve
            p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1)  # p at pr_score

            # AP from recall-precision curve
            for j in range(tp.shape[1]):
                ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])

    # Compute F1 (harmonic mean of precision and recall)
    f1 = 2 * p * r / (p + r + 1e-16)
    i = f1.mean(0).argmax()  # max F1 index

    return p[:, i], r[:, i], ap, f1[:, i], unique_classes.astype('int32')


def compute_ap(recall, precision):
    """ Compute the average precision, given the recall and precision curves
    # Arguments
        recall:    The recall curve (list)
        precision: The precision curve (list)
    # Returns
        Average precision, precision curve, recall curve
    """

    # Append sentinel values to beginning and end
    mrec = np.concatenate(([0.0], recall, [1.0]))
    mpre = np.concatenate(([1.0], precision, [0.0]))

    # Compute the precision envelope
    mpre = np.flip(np.maximum.accumulate(np.flip(mpre)))

    # Integrate area under curve
    x = np.linspace(0, 1, 101)  # 101-point interp (COCO)
    ap = np.trapz(np.interp(x, mrec, mpre), x)  # integrate

    return ap, mpre, mrec


================================================
FILE: imageai/Detection/Custom/yolo/validate.py
================================================
import os

import numpy as np
import torch
from torchvision.ops import box_iou

from ....yolov3.utils import get_predictions
from .metric import ap_per_class
from tqdm import tqdm

# This new validation function is based on https://github.com/ultralytics/yolov3/blob/master/val.py


def xywh2xyxy(box_coord : torch.Tensor):
    """
    Convert bounding box coordinates from center_x, center_y, width, height
    to x_1, y_1, x_2, x_3
    """
    n = box_coord.clone()
    n[:, 0] = (box_coord[:, 0] - (box_coord[:, 2] / 2))
    n[:, 1] = (box_coord[:, 1] - (box_coord[:, 3] / 2))
    n[:, 2] = (box_coord[:, 0] + (box_coord[:, 2] / 2))
    n[:, 3] = (box_coord[:, 1] + (box_coord[:, 3] / 2))

    return n

def process_batch(detections, labels, iouv):
    """
    Return correct predictions matrix. Both sets of boxes are in (x1, y1, x2, y2) format.
    Arguments:
        detections (Array[N, 6]), x1, y1, x2, y2, conf, class
        labels (Array[M, 5]), class, x1, y1, x2, y2
    Returns:
        correct (Array[N, 10]), for 10 IoU levels
    """
    detections[:, [1,3]] = torch.clamp(detections[:, [1,3]], 0.0, 416)
    detections[:, [2,4]] = torch.clamp(detections[:, [2,4]], 0.0, 416)
    
    correct = torch.zeros(detections.shape[0], iouv.shape[0], dtype=torch.bool, device=iouv.device)
    iou = box_iou(labels[:, 1:], detections[:, 1:5])
    x = torch.where((iou >= iouv[0]) & (labels[:, 0:1] == detections[:, 7]))  # IoU above threshold and classes match
    if x[0].shape[0]:
        matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()  # [label, detection, iou]
        if x[0].shape[0] > 1:
            matches = matches[matches[:, 2].argsort()[::-1]]
            matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
            matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
        matches = torch.Tensor(matches).to(iouv.device)
        correct[matches[:, 1].long()] = matches[:, 2:3] >= iouv
    return correct

@torch.no_grad()
def run(model, val_dataloader, num_class, net_dim=416, nms_thresh=0.6, objectness_thresh=0.001, device="cpu"):
    model.eval()
    nc = int(num_class)  # number of classes
    iouv = torch.linspace(0.5, 0.95, 10).to(device)  # iou vector for mAP@0.5:0.95
    niou = iouv.numel()

    p, r, f1, mp, mr, map50, map = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
    stats, ap, ap_class = [], [], []
 
    for batch_i, (im, targets) in tqdm(enumerate(val_dataloader)):
        im = im.to(device)
        targets = targets.to(device)
        nb = im.shape[0]  # batch

        # Inference
        out = model(im) # inference

        # NMS
        targets[:, 2:] *= torch.Tensor([net_dim, net_dim, net_dim, net_dim]).to(device)  # to pixels
        out = get_predictions(
                pred=out.to(device), num_classes=nc,
                objectness_confidence=objectness_thresh,
                nms_confidence_level=nms_thresh, device=device
            )

        # Metrics
        for si in range(nb):
            labels = targets[targets[:, 0] == si, 1:]
            pred = out[out[:, 0]==si, :] if isinstance(out, torch.Tensor) else torch.zeros((0,0), device=device)
            nl = len(labels)
            tcls = labels[:, 0].tolist() if nl else []  # target class

            if len(pred) == 0:
                if nl:
                    stats.append((torch.zeros(0, niou, dtype=torch.bool, device="cpu"), torch.Tensor(device="cpu"), torch.Tensor(device="cpu"), tcls))
                continue

            # Predictions
            if nc==1:
                pred[:, 7] = 0
            
            if pred.shape[0] > 300:
                pred = pred[:300, :]  # sorted by confidence
                
            predn = pred.clone()

            # Evaluate
            if nl:
                tbox = xywh2xyxy(labels[:, 1:5]).to(device)  # target boxes
                labelsn = torch.cat((labels[:, 0:1], tbox), 1).to(device)  # native-space labels
                correct = process_batch(predn, labelsn, iouv)
            else:
                correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool)
            stats.append((correct.cpu(), pred[:, 5].cpu(), pred[:, 7].cpu(), tcls))  # (correct, conf, pcls, tcls)

    # Compute metrics
    stats = [np.concatenate(x, 0) for x in zip(*stats)]  # to numpy
    if len(stats) and stats[0].any():
        p, r, ap, f1, ap_class = ap_per_class(*stats)
        ap50, ap = ap[:, 0], ap.mean(1)  # AP@0.5, AP@0.5:0.95
        mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean()

    return mp, mr, map50, map


================================================
FILE: imageai/Detection/README.md
================================================
# ImageAI : Object Detection

## ---------------------------------------------------
## Introducing Jarvis and TheiaEngine.

We the creators of ImageAI are glad to announce 2 new AI projects to provide state-of-the-art Generative AI, LLM and Image Understanding on your personal computer and servers. 


[![](../../jarvis.png)](https://jarvis.genxr.co)

Install Jarvis on PC/Mac to setup limitless access to LLM powered AI Chats for your every day work, research and generative AI needs with 100% privacy and full offline capability.


Visit [https://jarvis.genxr.co](https://jarvis.genxr.co/) to get started.


[![](../../theiaengine.png)](https://www.genxr.co/theia-engine)


[TheiaEngine](https://www.genxr.co/theia-engine), the next-generation computer Vision AI API capable of all Generative and Understanding computer vision tasks in a single API call and available via REST API to all programming languages. Features include
- **Detect 300+ objects** ( 220 more objects than ImageAI)
- **Provide answers to any content or context questions** asked on an image
  - very useful to get information on any object, action or information without needing to train a new custom model for every tasks
-  **Generate scene description and summary**
-  **Convert 2D image to 3D pointcloud and triangular mesh**
-  **Semantic Scene mapping of objects, walls, floors, etc**
-  **Stateless Face recognition and emotion detection**
-  **Image generation and augmentation from prompt**
-  etc.

Visit [https://www.genxr.co/theia-engine](https://www.genxr.co/theia-engine) to try the demo and join in the beta testing today.
## ---------------------------------------------------

### TABLE OF CONTENTS

- <a href="#firstdetection" > :white_square_button: First Object Detection</a>
- <a href="#objectextraction" > :white_square_button: Object Detection, Extraction and Fine-tune</a>
- <a href="#customdetection" > :white_square_button: Custom Object Detection</a>
- <a href="#detectionspeed" > :white_square_button: Detection Speed</a>
- <a href="#hidingdetails" > :white_square_button: Hiding/Showing Object Name and Probability</a>
- <a href="#inputoutputtype" > :white_square_button: Image Input & Output Types</a>
- <a href="#documentation" > :white_square_button: Documentation</a>


ImageAI provides very convenient and powerful methods to perform object detection on images and extract each object from the image. The object detection class supports RetinaNet, YOLOv3 and TinyYOLOv3. To start performing object detection, you must download the RetinaNet, YOLOv3 or TinyYOLOv3 object detection model via the links below: 
* **[RetinaNet](https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/retinanet_resnet50_fpn_coco-eeacb38b.pth)** _(Size = 130 mb, high performance and accuracy, with longer detection time)_
* **[YOLOv3](https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3.pt)** _(Size = 237 mb, moderate performance and accuracy, with a moderate detection time)_
* **[TinyYOLOv3](https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/tiny-yolov3.pt)** _(Size = 34 mb, optimized for speed and moderate performance, with fast detection time)_


 Once you download the object detection model file, you should copy the model file to the your project folder where your .py files will be.
 Then create a python file and give it a name; an example is FirstObjectDetection.py. Then write the code below into the python file:

### FirstObjectDetection.py
<div id="firstdetection" ></div>

```python
from imageai.Detection import ObjectDetection
import os

execution_path = os.getcwd()

detector = ObjectDetection()
detector.setModelTypeAsYOLOv3()
detector.setModelPath( os.path.join(execution_path , "yolov3.pt"))
detector.loadModel()
detections = detector.detectObjectsFromImage(input_image=os.path.join(execution_path , "image2.jpg"), output_image_path=os.path.join(execution_path , "image2new.jpg"), minimum_percentage_probability=30)

for eachObject in detections:
    print(eachObject["name"] , " : ", eachObject["percentage_probability"], " : ", eachObject["box_points"] )
    print("--------------------------------")
```

Sample Result:
Input Image
![Input Image](../../data-images/image2.jpg)
Output Image
![Output Image](../../data-images/yolo.jpg)

```
laptop  :  87.32235431671143  :  (306, 238, 390, 284)
--------------------------------
laptop  :  96.86298966407776  :  (121, 209, 258, 293)
--------------------------------
laptop  :  98.6301600933075  :  (279, 321, 401, 425)
--------------------------------
laptop  :  99.78572130203247  :  (451, 204, 579, 285)
--------------------------------
bed  :  94.02391314506531  :  (23, 205, 708, 553)
--------------------------------
apple  :  48.03136885166168  :  (527, 343, 557, 364)
--------------------------------
cup  :  34.09906327724457  :  (462, 347, 496, 379)
--------------------------------
cup  :  44.65090036392212  :  (582, 342, 618, 386)
--------------------------------
person  :  57.70219564437866  :  (27, 311, 341, 437)
--------------------------------
person  :  85.26121377944946  :  (304, 173, 387, 253)
--------------------------------
person  :  96.33603692054749  :  (415, 130, 538, 266)
--------------------------------
person  :  96.95255160331726  :  (174, 108, 278, 269)
--------------------------------
```

Let us make a breakdown of the object detection code that we used above.

```python
from imageai.Detection import ObjectDetection
import os

execution_path = os.getcwd()
```

 In the 3 lines above , we import the **ImageAI object detection** class in the first line, import the `os` in the second line and obtained the path to folder where our python file runs.
  
```python
detector = ObjectDetection()
detector.setModelTypeAsYOLOv3()
detector.setModelPath( os.path.join(execution_path , "yolov3.pt"))
detector.loadModel()
```

In the 4 lines above, we created a new instance of the `ObjectDetection` class in the first line, set the model type to YOLOv3 in the second line, set the model path to the YOLOv3 model file we downloaded and copied to the python file folder in the third line and load the model in the fourth line.

```python
detections = detector.detectObjectsFromImage(input_image=os.path.join(execution_path , "image2.jpg"), output_image_path=os.path.join(execution_path , "image2new.jpg"))

for eachObject in detections:
    print(eachObject["name"] , " : ", eachObject["percentage_probability"], " : ", eachObject["box_points"] )
    print("--------------------------------")
```

In the 2 lines above, we ran the `detectObjectsFromImage()` function and parse in the path to our image, and the path to the new image which the function will save. Then the function returns an array of dictionaries with each dictionary corresponding to the number of objects detected in the image. Each dictionary has the properties `name` (name of the object), `percentage_probability` (percentage probability of the detection) and `box_points` (the x1,y1,x2 and y2 coordinates of the bounding box of the object).

Should you want to use the RetinaNet which is appropriate for high-performance and high-accuracy demanding detection tasks, you will download the RetinaNet model file from the links above, copy it to your python file's folder, set the model type and model path in your python code as seen below:

```python
detector = ObjectDetection()
detector.setModelTypeAsRetinaNet()
detector.setModelPath( os.path.join(execution_path , "retinanet_resnet50_fpn_coco-eeacb38b.pth"))
detector.loadModel()
```

However, if you desire TinyYOLOv3 which is optimized for speed and embedded devices, you will download the TinyYOLOv3 model file from the links above, copy it to your python file's folder, set the model type and model path in your python code as seen below:

```python
detector = ObjectDetection()
detector.setModelTypeAsTinyYOLOv3()
detector.setModelPath( os.path.join(execution_path , "tiny-yolov3.pt"))
detector.loadModel()
```

## Object Detection, Extraction and Fine-tune
<div id="objectextraction" ></div>

In the examples we used above, we ran the object detection on an image and it returned the detected objects in an array as well as save a new image with rectangular markers drawn on each object. In our next examples, we will be able to extract each object from the input image
  and save it independently.

In the example code below which is very identical to the previous object detction code, we will save each object detected as a seperate image.

```python
from imageai.Detection import ObjectDetection
import os

execution_path = os.getcwd()

detector = ObjectDetection()
detector.setModelTypeAsYOLOv3()
detector.setModelPath( os.path.join(execution_path , "yolov3.pt"))
detector.loadModel()

detections, objects_path = detector.detectObjectsFromImage(input_image=os.path.join(execution_path , "image3.jpg"), output_image_path=os.path.join(execution_path , "image3new.jpg"), minimum_percentage_probability=30,  extract_detected_objects=True)

for eachObject, eachObjectPath in zip(detections, objects_path):
    print(eachObject["name"] , " : " , eachObject["percentage_probability"], " : ", eachObject["box_points"] )
    print("Object's image saved in " + eachObjectPath)
    print("--------------------------------")
```

![Input Image](../../data-images/image3.jpg)
![Output Images](../../data-images/image3new.jpg)

![dog](../../data-images/image3new-objects/dog-1.jpg)
![motorcycle](../../data-images/image3new-objects/motorcycle-3.jpg)
![car](../../data-images/image3new-objects/car-4.jpg)
![bicycle](../../data-images/image3new-objects/bicycle-5.jpg)
![person](../../data-images/image3new-objects/person-6.jpg)
![person](../../data-images/image3new-objects/person-7.jpg)
![person](../../data-images/image3new-objects/person-8.jpg)
![person](../../data-images/image3new-objects/person-9.jpg)
![person](../../data-images/image3new-objects/person-10.jpg)


Let us review the part of the code that perform the object detection and extract the images:

```python
detections, objects_path = detector.detectObjectsFromImage(input_image=os.path.join(execution_path , "image3.jpg"), output_image_path=os.path.join(execution_path , "image3new.jpg"), minimum_percentage_probability=30,  extract_detected_objects=True)

for eachObject, eachObjectPath in zip(detections, objects_path):
    print(eachObject["name"] , " : " , eachObject["percentage_probability"], " : ", eachObject["box_points"] )
    print("Object's image saved in " + eachObjectPath)
    print("--------------------------------")
```

In the above above lines, we called the `detectObjectsFromImage()` , parse in the input image path, output image path, and an extra parameter `extract_detected_objects=True`. This parameter states that the function should extract each object detected from the image and save it has a seperate image. The parameter is false by default. Once set to `true`, the function will create a directory which is the **output image path + "-objects"** . Then it saves all the extracted images into this new directory with each image's name being the **detected object name + "-" + a number** which corresponds to the order at which the objects were detected.

This new parameter we set to extract and save detected objects as an image will make the function to return 2 values. The first is the array of dictionaries with each dictionary corresponding to a detected object. The second is an array of the paths to the saved images of each object detected and extracted, and they are arranged in order at which the objects are in the first array.


**And one important feature you need to know!** You will recall that the percentage probability
   for each detected object is sent back by the `detectObjectsFromImage()` function. The function has a parameter `minimum_percentage_probability`, whose default value is `50` (value ranges between 0 - 100) , but it set to 30 in this example. That means the function will only return a detected object if it's percentage probability is **30 or above**. The value was kept at this number to ensure the integrity of the detection results. You fine-tune the object detection by setting **minimum_percentage_probability** equal to a smaller value to detect more number of objects or higher value to detect less number of objects.


## Custom Object Detection
<div id="customdetection" ></div>

The object detection model (**RetinaNet**) supported by **ImageAI** can detect 80 different types of objects. They include:
```
person,  bicycle,  car, motorcycle, airplane, bus, train,  truck,  boat,  traffic light,  fire hydrant, stop_sign,
parking meter,   bench,   bird,   cat,   dog,   horse,   sheep,   cow,   elephant,   bear,   zebra,
giraffe,   backpack,   umbrella,   handbag,   tie,   suitcase,   frisbee,   skis,   snowboard,
sports ball,   kite,   baseball bat,   baseball glove,   skateboard,   surfboard,   tennis racket,
bottle,   wine glass,   cup,   fork,   knife,   spoon,   bowl,   banana,   apple,   sandwich,   orange,
broccoli,   carrot,   hot dog,   pizza,   donot,   cake,   chair,   couch,   potted plant,   bed,
dining table,   toilet,   tv,   laptop,   mouse,   remote,   keyboard,   cell phone,   microwave,   oven,
toaster,   sink,   refrigerator,   book,   clock,   vase,   scissors,   teddy bear,   hair dryer,   toothbrush.
```

Interestingly, **ImageAI** allow you to perform detection for one or more of the items above. That means you can
 customize the type of object(s) you want to be detected in the image. Let's take a look at the code below:

```python
from imageai.Detection import ObjectDetection
import os

execution_path = os.getcwd()

detector = ObjectDetection()
detector.setModelTypeAsYOLOv3()
detector.setModelPath( os.path.join(execution_path , "yolov3.pt"))
detector.loadModel()

custom_objects = detector.CustomObjects(car=True, motorcycle=True)
detections = detector.detectCustomObjectsFromImage(custom_objects=custom_objects, input_image=os.path.join(execution_path , "image3.jpg"), output_image_path=os.path.join(execution_path , "image3custom.jpg"), minimum_percentage_probability=30)

for eachObject in detections:
    print(eachObject["name"] , " : ", eachObject["percentage_probability"], " : ", eachObject["box_points"] )
    print("--------------------------------")
```

![Result](../../data-images/image3custom.jpg)


Let us take a look at the part of the code that made this possible.
```python
custom_objects = detector.CustomObjects(car=True, motorcycle=True)
detections = detector.detectCustomObjectsFromImage(custom_objects=custom_objects, input_image=os.path.join(execution_path , "image3.jpg"), output_image_path=os.path.join(execution_path , "image3custom.jpg"), minimum_percentage_probability=30)
```

In the above code, after loading the model (can be done before loading the model as well), we defined a new variable
`custom_objects = detector.CustomObjects()`, in which we set its car and motorcycle properties equal to **True**.
This is to tell the model to detect only the object we set to True. Then we call the `detector.detectCustomObjectsFromImage()`
which is the function that allows us to perform detection of custom objects. Then we will set the `custom_objects` value
 to the custom objects variable we defined.


## Hiding/Showing Object Name and Probability
<div id="hidingdetails"></div>

**ImageAI** provides options to hide the name of objects detected and/or the percentage probability from being shown on the saved/returned detected image. Using the `detectObjectsFromImage()` and `detectCustomObjectsFromImage()` functions, the parameters `display_object_name` and `display_percentage_probability`  can be set to True of False individually. Take a look at the code below:

```python
detections = detector.detectObjectsFromImage(input_image=os.path.join(execution_path , "image3.jpg"), output_image_path=os.path.join(execution_path , "image3new_nodetails.jpg"), minimum_percentage_probability=30, display_percentage_probability=False, display_object_name=False)
```

In the above code, we specified that both the object name and percentage probability should not be shown. As you can see in the result below, both the names of the objects and their individual percentage probability is not shown in the detected image.

![Result](../../data-images/nodetails.jpg)


## Image Input & Output Types
<div id="inputoutputtype"></div>

**ImageAI** supports 3 types of inputs which are **file path to image file**(default), **numpy array of image** and **image file stream**
as well as 2 types of output which are image **file**(default) and numpy  **array **.
This means you can now perform object detection in production applications such as on a web server and system
 that returns file in any of the above stated formats.

To perform object detection with numpy array or file stream input, you just need to state the input type
in the `.detectObjectsFromImage()` function or the `.detectCustomObjectsFromImage()` function. See example below.

```python
detections = detector.detectObjectsFromImage(input_type="array", input_image=image_array , output_image_path=os.path.join(execution_path , "image.jpg")) # For numpy array input type
detections = detector.detectObjectsFromImage(input_type="stream", input_image=image_stream , output_image_path=os.path.join(execution_path , "test2new.jpg")) # For file stream input type
```

To perform object detection with numpy array output you just need to state the output type
in the `.detectObjectsFromImage()` function or the `.detectCustomObjectsFromImage()` function. See example below.

```python
detected_image_array, detections = detector.detectObjectsFromImage(output_type="array", input_image="image.jpg" ) # For numpy array output type
```


## Documentation
<div id="documentation" ></div>

We have provided full documentation for all **ImageAI** classes and functions. Find links below:

* Documentation - **English Version  [https://imageai.readthedocs.io](https://imageai.readthedocs.io)**

================================================
FILE: imageai/Detection/VIDEO.md
================================================
# ImageAI : Video Object Detection, Tracking  and Analysis

## ---------------------------------------------------
## Introducing Jarvis and TheiaEngine.

We the creators of ImageAI are glad to announce 2 new AI projects to provide state-of-the-art Generative AI, LLM and Image Understanding on your personal computer and servers. 


[![](../../jarvis.png)](https://jarvis.genxr.co)

Install Jarvis on PC/Mac to setup limitless access to LLM powered AI Chats for your every day work, research and generative AI needs with 100% privacy and full offline capability.


Visit [https://jarvis.genxr.co](https://jarvis.genxr.co/) to get started.


[![](../../theiaengine.png)](https://www.genxr.co/theia-engine)


[TheiaEngine](https://www.genxr.co/theia-engine), the next-generation computer Vision AI API capable of all Generative and Understanding computer vision tasks in a single API call and available via REST API to all programming languages. Features include
- **Detect 300+ objects** ( 220 more objects than ImageAI)
- **Provide answers to any content or context questions** asked on an image
  - very useful to get information on any object, action or information without needing to train a new custom model for every tasks
-  **Generate scene description and summary**
-  **Convert 2D image to 3D pointcloud and triangular mesh**
-  **Semantic Scene mapping of objects, walls, floors, etc**
-  **Stateless Face recognition and emotion detection**
-  **Image generation and augmentation from prompt**
-  etc.

Visit [https://www.genxr.co/theia-engine](https://www.genxr.co/theia-engine) to try the demo and join in the beta testing today.
## ---------------------------------------------------

## TABLE OF CONTENTS

- <a href="#videodetection" > :white_square_button: First Video Object Detection</a>
- <a href="#customvideodetection" > :white_square_button: Custom Video Object Detection (Object Tracking)</a>
- <a href="#camerainputs" > :white_square_button: Camera / Live Stream Video Detection</a>
- <a href="#videoanalysis" > :white_square_button: Video Analysis</a>
- <a href="#videodetectionspeed" > :white_square_button: Detection Speed</a>
- <a href="#hidingdetails" > :white_square_button: Hiding/Showing Object Name and Probability</a>
- <a href="#videodetectionintervals" > :white_square_button: Frame Detection Intervals</a>
- <a href="#detectiontimeout" > :white_square_button: Video Detection Timeout (NEW)</a>
- <a href="#documentation" > :white_square_button: Documentation</a>

ImageAI provides convenient, flexible and powerful methods to perform object detection on videos. The video object detection class provided only supports RetinaNet, YOLOv3 and TinyYOLOv3. This version of **ImageAI** provides commercial grade video objects detection features, which include but not limited to device/IP camera inputs, per frame, per second, per minute and entire video analysis for storing in databases and/or real-time visualizations and for future insights.

To start performing video object detection, you must download the RetinaNet, YOLOv3 or TinyYOLOv3 object detection model via the links below:

* **[RetinaNet](https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/retinanet_resnet50_fpn_coco-eeacb38b.pth)** _(Size = 130 mb, high performance and accuracy, with longer detection time)_
* **[YOLOv3](https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/yolov3.pt)** _(Size = 237 mb, moderate performance and accuracy, with a moderate detection time)_
* **[TinyYOLOv3](https://github.com/OlafenwaMoses/ImageAI/releases/download/3.0.0-pretrained/tiny-yolov3.pt)** _(Size = 34 mb, optimized for speed and moderate performance, with fast detection time)_

Because video object detection is a compute intensive tasks, we advise you perform this experiment using a computer with a NVIDIA GPU and the GPU version of Tensorflow installed. Performing Video Object Detection CPU will be slower than using an NVIDIA GPU powered computer. You can use Google Colab for this experiment as it has an NVIDIA K80 GPU available for free.

 Once you download the object detection model file, you should copy the model file to the your project folder where your .py files will be.
 Then create a python file and give it a name; an example is `FirstVideoObjectDetection.py`. Then write the code below into the python file:


### FirstVideoObjectDetection.py
<div id="videodetection" ></div>

```python
from imageai.Detection import VideoObjectDetection
import os

execution_path = os.getcwd()

detector = VideoObjectDetection()
detector.setModelTypeAsRetinaNet()
detector.setModelPath( os.path.join(execution_path , "retinanet_resnet50_fpn_coco-eeacb38b.pth"))
detector.loadModel()

video_path = detector.detectObjectsFromVideo(input_file_path=os.path.join(execution_path, "traffic.mp4"),
                                output_file_path=os.path.join(execution_path, "traffic_detected")
                                , frames_per_second=20, log_progress=True)
print(video_path)
```


Input Video (a 1min 24seconds video)

[![](../../data-images/video--1.jpg)](https://github.com/OlafenwaMoses/ImageAI/blob/master/data-videos/traffic.mp4)

Output Video
[![](../../data-images/video-2.jpg)](https://www.youtube.com/embed/qplVDqOmElI?rel=0)

Let us make a breakdown of the object detection code that we used above.

```python
from imageai.Detection import VideoObjectDetection
import os

execution_path = os.getcwd()
```

 In the 3 lines above , we import the **ImageAI video object detection ** class in the first line, import the **os** in the second line and obtained
  the path to folder where our python file runs.

```python
detector = VideoObjectDetection()
detector.setModelTypeAsRetinaNet()
detector.setModelPath( os.path.join(execution_path , "retinanet_resnet50_fpn_coco-eeacb38b.pth"))
detector.loadModel()
```

In the 4 lines above, we created a new instance of the **VideoObjectDetection** class in the first line, set the model type to RetinaNet in the second line, set the model path to the RetinaNet model file we downloaded and copied to the python file folder in the third line and load the model in the fourth line.

```python
video_path = detector.detectObjectsFromVideo(input_file_path=os.path.join(execution_path, "traffic.mp4"),
                                 output_file_path=os.path.join(execution_path, "traffic_detected"),
                                 frames_per_second=20, log_progress=True)
print(video_path)
```

In the 2 lines above, we ran the `detectObjectsFromVideo()` function and parse in the path to our video,the path to the new video (without the extension, it saves a .avi video by default) which the function will save, the number of frames per second (fps) that you we desire the output video to have and option to log the progress of the detection in the console. Then the function returns a the path to the saved video which contains boxes and percentage probabilities rendered on objects detected in the video.


### Custom Video Object Detection
<div id="customvideodetection" ></div>

The video object detection model (**RetinaNet**) supported by **ImageAI** can detect 80 different types of objects. They include: 
```
      person, bicycle, car, motorcycle, airplane, bus, train, truck, boat, traffic light, fire hydrant, stop_sign,
      parking meter,   bench,   bird,   cat,   dog,   horse,   sheep,   cow,   elephant,   bear,   zebra,
      giraffe,   backpack,   umbrella,   handbag,   tie,   suitcase,   frisbee,   skis,   snowboard,
      sports ball,   kite,   baseball bat,   baseball glove,   skateboard,   surfboard,   tennis racket,
      bottle,   wine glass,   cup,   fork,   knife,   spoon,   bowl,   banana,   apple,   sandwich,   orange,
      broccoli,   carrot,   hot dog,   pizza,   donot,   cake,   chair,   couch,   potted plant,   bed,
      dining table,   toilet,   tv,   laptop,   mouse,   remote,   keyboard,   cell phone,   microwave,
      oven,   toaster,   sink,   refrigerator,   book,   clock,   vase,   scissors,   teddy bear,   hair dryer,
      toothbrush.
```


Interestingly, **ImageAI** allow you to perform  detection for one or more of the items above. That means you can customize the type of object(s) you want to be detected in the video. Let's take a look at the code below:

```python
from imageai.Detection import VideoObjectDetection
import os

execution_path = os.getcwd()

detector = VideoObjectDetection()
detector.setModelTypeAsRetinaNet()
detector.setModelPath( os.path.join(execution_path , "retinanet_resnet50_fpn_coco-eeacb38b.pth"))
detector.loadModel()

custom_objects = detector.CustomObjects(person=True, bicycle=True, motorcycle=True)

video_path = detector.detectCustomObjectsFromVideo(
                custom_objects=custom_objects,
                input_file_path=os.path.join(execution_path, "traffic.mp4"),
                output_file_path=os.path.join(execution_path, "traffic_custom_detected"),
                frames_per_second=20, log_progress=True)
print(video_path)
```

Let us take a look at the part of the code that made this possible.

```python
custom_objects = detector.CustomObjects(person=True, bicycle=True, motorcycle=True)

video_path = detector.detectCustomObjectsFromVideo(
                custom_objects=custom_objects, 
                input_file_path=os.path.join(execution_path, "traffic.mp4"),
                output_file_path=os.path.join(execution_path, "traffic_custom_detected"),
                frames_per_second=20, log_progress=True)
```

In the above code, after loading the model (can be done before loading the model as well), we defined a new variable
`custom_objects = detector.CustomObjects()`, in which we set its person, car and motorcycle properties equal to **True**.
This is to tell the model to detect only the object we set to True. Then we call the `detector.detectCustomObjectsFromVideo()`
which is the function that allows us to perform detection of custom objects. Then we will set the `custom_objects` value
 to the custom objects variable we defined.

Output Video
[![Output Video](../../data-images/video-3.jpg)](https://www.youtube.com/embed/YfAycAzkwPM?rel=0)
C:\Users\User\PycharmProjects\ImageAITest\traffic_custom_detected.avi


### Camera / Live Stream Video Detection
<div id="camerainputs"></div>

**ImageAI** now allows live-video detection with support for camera inputs. Using **OpenCV**'s `VideoCapture()` function, you can load live-video streams from a device camera, cameras connected by cable or IP cameras, and parse it into **ImageAI**'s `detectObjectsFromVideo()` and `detectCustomObjectsFromVideo()` functions. All features that are supported for detecting objects in a video file is also available for detecting objects in a camera's live-video feed. Find below an example of detecting live-video feed from the device camera.

```python
from imageai.Detection import VideoObjectDetection
import os
import cv2

execution_path = os.getcwd()


camera = cv2.VideoCapture(0)

detector = VideoObjectDetection()
detector.setModelTypeAsRetinaNet()
detector.setModelPath(os.path.join(execution_path , "retinanet_resnet50_fpn_coco-eeacb38b.pth"))
detector.loadModel()


video_path = detector.detectObjectsFromVideo(
                camera_input=camera,
                output_file_path=os.path.join(execution_path, "camera_detected_video"),
                frames_per_second=20, log_progress=True, minimum_percentage_probability=40)
```

The difference in the code above and the code for the detection of a video file is that we defined an **OpenCV VideoCapture** instance and loaded the default device camera into it. Then we parsed the camera we defined into the parameter `camera_input` which replaces the `input_file_path` that is used for video file.

### Video Analysis
<div id="videoanalysis"></div>

**ImageAI** now provide commercial-grade video analysis in the Video Object Detection class, for both video file inputs and camera inputs. This feature allows developers to obtain deep insights into any video processed with **ImageAI**. This insights can be visualized in real-time, stored in a NoSQL database for future review or analysis.

For video analysis, the `detectObjectsFromVideo()` and `detectCustomObjectsFromVideo()` now allows you to state your own defined functions which will be executed for every frame, seconds and/or minute of the video detected as well as a state a function that will be executed at the end of a video detection. Once this functions are stated, they will receive raw but comprehensive analytical data on the index of the frame/second/minute, objects detected (name, percentage_probability and box_points), number of instances of each unique object detected and average number of occurrence of each unique object detected over a second/minute and entire video.

To obtain the video analysis, all you need to do is specify a function, state the corresponding parameters it will be receiving and parse the function name into the `per_frame_function`, `per_second_function`, `per_minute_function` and `video_complete_function` parameters in the detection function. Find below examples of video analysis functions.

```python
def forFrame(frame_number, output_array, output_count):
    print("FOR FRAME " , frame_number)
    print("Output for each object : ", output_array)
    print("Output count for unique objects : ", output_count)
    print("------------END OF A FRAME --------------")

def forSeconds(second_number, output_arrays, count_arrays, average_output_count):
    print("SECOND : ", second_number)
    print("Array for the outputs of each frame ", output_arrays)
    print("Array for output count for unique objects in each frame : ", count_arrays)
    print("Output average count for unique objects in the last second: ", average_output_count)
    print("------------END OF A SECOND --------------")

def forMinute(minute_number, output_arrays, count_arrays, average_output_count):
    print("MINUTE : ", minute_number)
    print("Array for the outputs of each frame ", output_arrays)
    print("Array for output count for unique objects in each frame : ", count_arrays)
    print("Output average count for unique objects in the last minute: ", average_output_count)
    print("------------END OF A MINUTE --------------")

video_detector = VideoObjectDetection()
video_detector.setModelTypeAsYOLOv3()
video_detector.setModelPath(os.path.join(execution_path, "yolov3.pt"))
video_detector.loadModel()

video_detector.detectObjectsFromVideo(
    input_file_path=os.path.join(execution_path, "traffic.mp4"),
    output_file_path=os.path.join(execution_path, "traffic_detected"),
    frames_per_second=10,
    per_second_function=forSeconds,
    per_frame_function=forFrame,
    per_minute_function=forMinute,
    minimum_percentage_probability=30
)
```

When the detection starts on a video feed, be it from a video file or camera input, the result will have the format as below:

**Results for the Frame function**
```
FOR FRAME : 1
 
Output for each object : [{'box_points': (362, 295, 443, 355), 'name': 'boat', 'percentage_probability': 26.666194200515747}, {'box_points': (319, 245, 386, 296), 'name': 'boat', 'percentage_probability': 30.052968859672546}, {'box_points': (219, 308, 341, 358), 'name': 'boat', 'percentage_probability': 47.46982455253601}, {'box_points': (589, 198, 621, 241), 'name': 'bus', 'percentage_probability': 24.62330162525177}, {'box_points': (519, 181, 583, 263), 'name': 'bus', 'percentage_probability': 27.446213364601135}, {'box_points': (493, 197, 561, 272), 'name': 'bus', 'percentage_probability': 59.81815457344055}, {'box_points': (432, 187, 491, 240), 'name': 'bus', 'percentage_probability': 64.42965269088745}, {'box_points': (157, 225, 220, 255), 'name': 'car', 'percentage_probability': 21.150341629981995}, {'box_points': (324, 249, 377, 293), 'name': 'car', 'percentage_probability': 24.089913070201874}, {'box_points': (152, 275, 260, 327), 'name': 'car', 'percentage_probability': 30.341443419456482}, {'box_points': (433, 198, 485, 244), 'name': 'car', 'percentage_probability': 37.205660343170166}, {'box_points': (184, 226, 233, 260), 'name': 'car', 'percentage_probability': 38.52525353431702}, {'box_points': (3, 296, 134, 359), 'name': 'car', 'percentage_probability': 47.80363142490387}, {'box_points': (357, 302, 439, 359), 'name': 'car', 'percentage_probability': 47.94844686985016}, {'box_points': (481, 266, 546, 314), 'name': 'car', 'percentage_probability': 65.8585786819458}, {'box_points': (597, 269, 624, 318), 'name': 'person', 'percentage_probability': 27.125394344329834}]
 
Output count for unique objects : {'bus': 4, 'boat': 3, 'person': 1, 'car': 8}

------------END OF A FRAME --------------
```

For any function you parse into the **per_frame_function**, the function will be executed after every single video frame is processed and he following will be parsed into it:

* **Frame Index:** This is the position number of the frame inside the video (e.g 1 for first frame and 20 for twentieth frame).
* **Output Array:** This is an array of dictionaries. Each dictionary corresponds to each detected object in the image and it contains the "name", "percentage_probabaility" and "box_points"(x1,y1,x2,y2) values of the object.
* **Output Count:** This is a dictionary that has the name of each unique object detected as its keys and the number of instances of the objects detected as the values.

**Results for the Second function**
```
FOR SECOND : 1
 
 Array for the outputs of each frame [[{'box_points': (362, 295, 443, 355), 'name': 'boat', 'percentage_probability': 26.666194200515747}, {'box_points': (319, 245, 386, 296), 'name': 'boat', 'percentage_probability': 30.052968859672546}, {'box_points': (219, 308, 341, 358), 'name': 'boat', 'percentage_probability': 47.46982455253601}, {'box_points': (589, 198, 621, 241), 'name': 'bus', 'percentage_probability': 24.62330162525177}, {'box_points': (519, 181, 583, 263), 'name': 'bus', 'percentage_probability': 27.446213364601135}, {'box_points': (493, 197, 561, 272), 'name': 'bus', 'percentage_probability': 59.81815457344055}, {'box_points': (432, 187, 491, 240), 'name': 'bus', 'percentage_probability': 64.42965269088745}, {'box_points': (157, 225, 220, 255), 'name': 'car', 'percentage_probability': 21.150341629981995}, {'box_points': (324, 249, 377, 293), 'name': 'car', 'percentage_probability': 24.089913070201874}, {'box_points': (152, 275, 260, 327), 'name': 'car', 'percentage_probability': 30.341443419456482}, {'box_points': (433, 198, 485, 244), 'name': 'car', 'percentage_probability': 37.205660343170166}, {'box_points': (184, 226, 233, 260), 'name': 'car', 'percentage_probability': 38.52525353431702}, {'box_points': (3, 296, 134, 359), 'name': 'car', 'percentage_probability': 47.80363142490387}, {'box_points': (357, 302, 439, 359), 'name': 'car', 'percentage_probability': 47.94844686985016}, {'box_points': (481, 266, 546, 314), 'name': 'car', 'percentage_probability': 65.8585786819458}, {'box_points': (597, 269, 624, 318), 'name': 'person', 'percentage_probability': 27.125394344329834}],
 [{'box_points': (316, 240, 384, 302), 'name': 'boat', 'percentage_probability': 29.594269394874573}, {'box_points': (361, 295, 441, 354), 'name': 'boat', 'percentage_probability': 36.11513376235962}, {'box_points': (216, 305, 340, 357), 'name': 'boat', 'percentage_probability': 44.89373862743378}, {'box_points': (432, 198, 488, 244), 'name': 'truck', 'percentage_probability': 22.914741933345795}, {'box_points': (589, 199, 623, 240), 'name': 'bus', 'percentage_probability': 20.545457303524017}, {'box_points': (519, 182, 583, 263), 'name': 'bus', 'percentage_probability': 24.467085301876068}, {'box_points': (492, 197, 563, 271), 'name': 'bus', 'percentage_probability': 61.112016439437866}, {'box_points': (433, 188, 490, 241), 'name': 'bus', 'percentage_probability': 65.08989334106445}, {'box_points': (352, 303, 442, 357), 'name': 'car', 'percentage_probability': 20.025095343589783}, {'box_points': (136, 172, 188, 195), 'name': 'car', 'percentage_probability': 21.571354568004608}, {'box_points': (152, 276, 261, 326), 'name': 'car', 'percentage_probability': 33.07966589927673}, {'box_points': (181, 225, 230, 256), 'name': 'car', 'percentage_probability': 35.111838579177856}, {'box_points': (432, 198, 488, 244), 'name': 'car', 'percentage_probability': 36.25282347202301}, {'box_points': (3, 292, 130, 360), 'name': 'car', 'percentage_probability': 67.55480170249939}, {'box_points': (479, 265, 546, 314), 'name': 'car', 'percentage_probability': 71.47912979125977}, {'box_points': (597, 269, 625, 318), 'name': 'person', 'percentage_probability': 25.903674960136414}],................, 
[{'box_points': (133, 250, 187, 278), 'name': 'umbrella', 'percentage_probability': 21.518094837665558}, {'box_points': (154, 233, 218, 259), 'name': 'umbrella', 'percentage_probability': 23.687003552913666}, {'box_points': (348, 311, 425, 360), 'name': 'boat', 'percentage_probability': 21.015766263008118}, {'box_points': (11, 164, 137, 225), 'name': 'bus', 'percentage_probability': 32.20453858375549}, {'box_points': (424, 187, 485, 243), 'name': 'bus', 'percentage_probability': 38.043853640556335}, {'box_points': (496, 186, 570, 264), 'name': 'bus', 'percentage_probability': 63.83994221687317}, {'box_points': (588, 197, 622, 240), 'name': 'car', 'percentage_probability': 23.51653128862381}, {'box_points': (58, 268, 111, 303), 'name': 'car', 'percentage_probability': 24.538707733154297}, {'box_points': (2, 246, 72, 301), 'name': 'car', 'percentage_probability': 28.433072566986084}, {'box_points': (472, 273, 539, 323), 'name': 'car', 'percentage_probability': 87.17672824859619}, {'box_points': (597, 270, 626, 317), 'name': 'person', 'percentage_probability': 27.459821105003357}]
 ]
 
Array for output count for unique objects in each frame : [{'bus': 4, 'boat': 3, 'person': 1, 'car': 8},
 {'truck': 1, 'bus': 4, 'boat': 3, 'person': 1, 'car': 7},
 {'bus': 5, 'boat': 2, 'person': 1, 'car': 5},
 {'bus': 5, 'boat': 1, 'person': 1, 'car': 9},
 {'truck': 1, 'bus': 2, 'car': 6, 'person': 1},
 {'truck': 2, 'bus': 4, 'boat': 2, 'person': 1, 'car': 7},
 {'truck': 1, 'bus': 3, 'car': 7, 'person': 1, 'umbrella': 1},
 {'bus': 4, 'car': 7, 'person': 1, 'umbrella': 2},
 {'bus': 3, 'car': 6, 'boat': 1, 'person': 1, 'umbrella': 3},
 {'bus': 3, 'car': 4, 'boat': 1, 'person': 1, 'umbrella': 2}]
 
Output average count for unique objects in the last second: {'truck': 0.5, 'bus': 3.7, 'umbrella': 0.8, 'boat': 1.3, 'person': 1.0, 'car': 6.6}

------------END OF A SECOND --------------
```

In the above result, the video was processed and saved in 10 frames per second (FPS). For any function you parse into the **per_second_function**, the function will be executed after every single second of the video that is processed and he following will be parsed into it:

- **Second Index:** This is the position number of the second inside the video (e.g 1 for first second and 20 for twentieth second).
- **Output Array:** This is an array of arrays, with each contained array and its position (array index + 1) corresponding to the equivalent frame in the last second of the video (In the above example, their are 10 arrays which corresponds to the 10 frames contained in one second). Each contained array contains dictionaries. Each dictionary corresponds to each detected object in the image and it contains the "name", "percentage_probabaility" and "box_points"(x1,y1,x2,y2) values of the object.
- **Count arrays:** This is an array of dictionaries. Each dictionary and its position (array index + 1)  corresponds to the equivalent frame in the last second of he video.  Each dictionary has the name of each unique object detected as its keys and the number of instances of the objects detected as the values.
- **Average Output Count:** This is a dictionary that has the name of each unique object detected in the last second as its keys and the average number of instances of the objects detected across the number of frames as the values.

**Results for the Minute function**
The above set of **4 parameters** that are returned for every second of the video processed is the same parameters to that will be returned for every minute of the video processed. The difference is that the index returned corresponds to the minute index, the **output_arrays** is an array that contains the number of FPS * 60  number of arrays (in the code example above, 10 frames per second(fps) * 60 seconds = 600 frames = 600 arrays), and the **count_arrays** is an array that contains the number of FPS * 60  number of dictionaries (in the code example above, 10 frames per second(fps) * 60 seconds = 600 frames = 600 dictionaries) and the **average_output_count** is a dictionary that covers all the objects detected in all the frames contained in the last minute.

**Results for the Video Complete Function**
**ImageAI** allows you to obtain complete analysis of the entire video processed. All you need is to define a function like the forSecond or forMinute function and set the **video_complete_function** parameter into your `.detectObjectsFromVideo()` or `.detectCustomObjectsFromVideo()` function. The same values for the per_second-function and per_minute_function will be returned. The difference is that no index will be returned and the other 3 values will be returned, and the 3 values will cover all frames in the video. Below is a sample function: 

```python
def forFull(output_arrays, count_arrays, average_output_count):
    #Perform action on the 3 parameters returned into the function

video_detector.detectObjectsFromVideo(
    input_file_path=os.path.join(execution_path, "traffic.mp4"),
    output_file_path=os.path.join(execution_path, "traffic_detected"),
    frames_per_second=10,
    video_complete_function=forFull,
    minimum_percentage_probability=30
)
```

**FINAL NOTE ON VIDEO ANALYSIS** : **ImageAI** allows you to obtain the detected video frame as a Numpy array at each frame, second and minute function. All you need to do is specify one more parameter in your function and set `return_detected_frame=True` in your `detectObjectsFromVideo()` or `detectCustomObjectsFrom()` function. Once this is set, the extra parameter you sepecified in your function will be the Numpy array of the detected frame. See a sample below:

```python
def forFrame(frame_number, output_array, output_count, detected_frame):
    print("FOR FRAME " , frame_number)
    print("Output for each object : ", output_array)
    print("Output count for unique objects : ", output_count)
	print("Returned Objects is : ", type(detected_frame))
    print("------------END OF A FRAME --------------")

video_detector.detectObjectsFromVideo(
    input_file_path=os.path.join(execution_path, "traffic.mp4"),
    output_file_path=os.path.join(execution_path, "traffic_detected"),
    frames_per_second=10,
    per_frame_function=forFrame,
    minimum_percentage_probability=30,
    return_detected_frame=True
)
```

### Frame Detection Intervals
<div id="videodetectionintervals" ></div>

The above video objects detection task are optimized for frame-real-time object detections that ensures that objects in every frame of the video is detected. **ImageAI** provides you the option to adjust the video frame detections which can speed up your video detection process. When calling the `.detectObjectsFromVideo()` or `.detectCustomObjectsFromVideo()`, you can specify at which frame interval detections should be made. By setting the **frame_detection_interval** parameter to be  equal to 5 or 20, that means the object detections in the video will be updated after 5 frames or 20 frames.
If your output video **frames_per_second** is set to 20, that means the object detections in the video will be updated once in every quarter of a second or every second. This is useful in case scenarious where the available compute is less powerful and speeds of moving objects are low. This ensures you can have objects detected as second-real-time , half-a-second-real-time or whichever way suits your needs. We conducted video object detection on the same input video we have been using all this while by applying a **frame_detection_interval** value equal to 5.


###Video Detection Timeout
<div id="detectiontimeout"></div>

**ImageAI** now allows you to set a timeout in seconds for detection of objects in videos or camera live feed.
To set a timeout for your video detection code, all you need to do is specify the `detection_timeout` parameter in the `detectObjectsFromVideo()` function to the number of desired seconds. In the example code below, we set `detection_timeout` to 120 seconds (2 minutes). 

```python
from imageai.Detection import VideoObjectDetection
import os
import cv2

execution_path = os.getcwd()
camera = cv2.VideoCapture(0)

detector = VideoObjectDetection()
detector.setModelTypeAsRetinaNet()
detector.setModelPath(os.path.join(execution_path , "retinanet_resnet50_fpn_coco-eeacb38b.pth"))
detector.loadModel()


video_path = detector.detectObjectsFromVideo(camera_input=camera,
                                             output_file_path=os.path.join(execution_path, "camera_detected_video"),
                                             frames_per_second=20,
                                             log_progress=True,
                                             minimum_percentage_probability=40,
                                             detection_timeout=120)
```


### Documentation
<div id="documentation" ></div>

We have provided full documentation for all **ImageAI** classes and functions. Find links below: 

- Documentation - **English Version  [https://imageai.readthedocs.io](https://imageai.readthedocs.io)**


================================================
FILE: imageai/Detection/__init__.py
================================================
import os, warnings
from tkinter import Image
from collections import defaultdict
from typing import List, Tuple, Dict, Union
from PIL import Image
import torchvision

import numpy as np
from enum import Enum
import torch
import cv2
from typing import Union, List

from ..yolov3.yolov3 import YoloV3
from ..yolov3.tiny_yolov3 import YoloV3Tiny
from ..yolov3.utils import draw_bbox_and_label, get_predictions, prepare_image
from ..retinanet.utils import read_image, draw_bounding_boxes_and_labels, tensor_to_ndarray
import uuid

from ..backend_check.model_extension import extension_check

warnings.filterwarnings("once", category=ResourceWarning)


class ImageReadMode(Enum):
    """
    Support for various modes while reading images.

    Use ``ImageReadMode.UNCHANGED`` for loading the image as-is,
    ``ImageReadMode.GRAY`` for converting to grayscale,
    ``ImageReadMode.GRAY_ALPHA`` for grayscale with transparency,
    ``ImageReadMode.RGB`` for RGB and ``ImageReadMode.RGB_ALPHA`` for
    RGB with transparency.
    """

    UNCHANGED = 0
    GRAY = 1
    GRAY_ALPHA = 2
    RGB = 3
    RGB_ALPHA = 4

class ObjectDetection:
    """
    This is the object detection class for images in the ImageAI library. It allows you to detect the 80 objects in the COCO dataset [ https://cocodataset.org/#home ] in any image. 
    
    This class provides support for RetinaNet, YOLOv3 and TinyYOLOv3 object detection networks . After instantiating this class, you can set its properties and make object detections using pretrained models.

    The following functions are required to be called before object detection can be made

    * setModelPath: Used to specify the filepath to the pretrained model.

    * At least of of the following and it must correspond to the model set in the setModelPath()
    [setModelTypeAsRetinaNet(), setModelTypeAsYOLOv3(), setModelTypeAsTinyYOLOv3()]

    * loadModel: [This must be called once only before performing object detection]
    Once the above functions have been called, you can call the detectObjectsFromImage() function of
    the object detection instance object at anytime to obtain observable objects in any image.

    * detectObjectsFromImage: Used to perform object detection on an image
    """
    def __init__(self) -> None:
        self.__device: str = "cuda" if torch.cuda.is_available() else "cpu"
        self.__nms_score: float = 0.4
        self.__objectness_score: float = 0.5
        self.__anchors: List[int] = None
        self.__anchors_yolov3: List[int] = [10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326]
        self.__anchors_tiny_yolov3: List[int] = [10, 14, 23, 27, 37, 58, 81, 82, 135, 169, 344, 319]
                          
        self.__classes = self.__load_classes(os.path.join(os.path.dirname(os.path.abspath(__file__)), "coco_classes.txt"))
        self.__model_type = ""
        self.__model = None
        self.__model_loaded = False
        self.__model_path = ""
    
    def __load_classes(self, path: str) -> List[str]:
        with open(path) as f:
            unique_classes = [c.strip() for c in f.readlines()]
        return unique_classes

    def __load_image_yolo(self, input_image : Union[str, np.ndarray, Image.Image]) -> Tuple[List[str], List[np.ndarray], torch.Tensor, torch.Tensor]:
        allowed_exts = ["jpg", "jpeg", "png"]
        fnames = []
        original_dims = []
        inputs = []
        original_imgs = []
        if type(input_image) == str:
            if os.path.isfile(input_image):
                if input_image.rsplit('.')[-1].lower() in allowed_exts:
                    img = cv2.imread(input_image)
            else:
                raise ValueError(f"image path '{input_image}' is not found or a valid file")
        elif type(input_image) == np.ndarray:
            img = input_image
        elif "PIL" in str(type(input_image)):
            img = np.asarray(input_image)
        else:
            raise ValueError(f"Invalid image input format")
        
        img_h, img_w, _ = img.shape

        original_imgs.append(np.array(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)).astype(np.uint8))
        original_dims.append((img_w, img_h))
        if type(input_image) == str:
            fnames.append(os.path.basename(input_image)) 
        else:
            fnames.append("") 
        inputs.append(prepare_image(img, (416, 416)))

        if original_dims:
            return (
                    fnames,
                    original_imgs,
                    torch.FloatTensor(original_dims).repeat(1,2).to(self.__device),
                    torch.cat(inputs, 0).to(self.__device)
                    )
        raise RuntimeError(
                    f"Error loading image."
                    "\nEnsure the file is a valid image,"
                    " allowed file extensions are .jpg, .jpeg, .png"
                )
    
    def __save_temp_img(self, input_image : Union[np.ndarray, Image.Image]) -> str:

        temp_path = os.path.join(
            os.path.dirname(os.path.abspath(__file__)),
            f"{str(uuid.uuid4())}.jpg" 
        ) 
        if type(input_image) == np.ndarray:
            cv2.imwrite(temp_path, input_image)
        elif "PIL" in str(type(input_image)):
            input_image.save(temp_path)
        else:
            raise ValueError(
                f"Invalid image input. Supported formats are OpenCV/Numpy array, PIL image or image file path"
            )

        return temp_path

    def __load_image_retinanet(self, input_image : str) -> Tuple[List[str], List[torch.Tensor], List[torch.Tensor]]:
        """
        Loads image from the given path.
        """
        allowed_file_extensions = ["jpg", "jpeg", "png"]
        images = []
        scaled_images = []
        fnames = []
        
        delete_file = False
        if type(input_image) is not str:
            input_image = self.__save_temp_img(input_image=input_image)
            delete_file = True


        if os.path.isfile(input_image):
            if input_image.rsplit('.')[-1].lower() in allowed_file_extensions:
                img = read_image(input_image, ImageReadMode.RGB)
                images.append(img)
                scaled_images.append(img.div(255.0).to(self.__device))
                fnames.append(os.path.basename(input_image))
        else:
            raise ValueError(f"Input image with path {input_image} not a valid file")

        if delete_file:
            os.remove(input_image)
        
        if images:
            return (fnames, images, scaled_images)
        raise RuntimeError(
                    f"Error loading image from input."
                    "\nEnsure the folder contains images,"
                    " allowed file extensions are .jpg, .jpeg, .png"
                )
    
    def setModelTypeAsYOLOv3(self):
        """
        'setModelTypeAsYOLOv3()' is used to set the model type to the YOLOv3 model.
        :return:
        """
        self.__anchors = self.__anchors_yolov3
        self.__model_type = "yolov3"
    
    def setModelTypeAsTinyYOLOv3(self):
        """
        'setModelTypeAsTinyYOLOv3()' is used to set the model type to the TinyYOLOv3 model.
        :return:
        """
        self.__anchors = self.__anchors_tiny_yolov3
        self.__model_type = "tiny-yolov3"
    
    def setModelTypeAsRetinaNet(self):
        """
        'setModelTypeAsRetinaNet()' is used to set the model type to the RetinaNet model.
        :return:
        """
        self.__anchors = self.__anchors_tiny_yolov3
        self.__model_type = "retinanet"

    def setModelPath(self, path: str) -> None:
        """
        'setModelPath()' function is required and is used to set the file path to the model adopted from the list of the
        available 3 model types. The model path must correspond to the model type.
        :param model_path:
        :return:
        """
        if os.path.isfile(path):
            extension_check(path)
            self.__model_path = path
            self.__model_loaded = False
        else:
            raise ValueError(
                        "invalid path, path not pointing to a valid file."
                    ) from None
    
    def useCPU(self):
        """
        Used to force classification to be done on CPU.
        By default, classification will occur on GPU compute if available else CPU compute.
        """

        self.__device = "cpu"
        if self.__model_loaded:
            self.__model_loaded = False
            self.loadModel()
    
    def loadModel(self) -> None:
        """
        'loadModel()' function is used to load the model weights into the model architecture from the file path defined
        in the setModelPath() function.
        :return:
        """
        if not self.__model_loaded:
            if self.__model_type=="yolov3":
                self.__model = YoloV3(
                        anchors=self.__anchors ,
                        num_classes=len(self.__classes),\
                        device=self.__device
                    )
            elif self.__model_type=="tiny-yolov3":
                self.__model = YoloV3Tiny(
                    anchors=self.__anchors,
                    num_classes=len(self.__classes),
                    device=self.__device
                    )
            elif self.__model_type=="retinanet":

                self.__classes = self.__load_classes(os.path.join(os.path.dirname(os.path.abspath(__file__)), "coco91_classes.txt"))

                self.__model = torchvision.models.detection.retinanet_resnet50_fpn(
                            pretrained=False, num_classes=91,
                            pretrained_backbone = False
                        )
            else:
                raise ValueError(f"Invalid model type. Call setModelTypeAsYOLOv3(), setModelTypeAsTinyYOLOv3() or setModelTypeAsRetinaNet to set a model type before loading the model")

            state_dict = torch.load(self.__model_path, map_location=self.__device)
            try:
                self.__model.load_state_dict(state_dict)
                self.__model_loaded = True
                self.__model.to(self.__device).eval()
            except:
                raise RuntimeError("Invalid weights!!!") from None
    
    def CustomObjects(self, **kwargs):

        """
        The 'CustomObjects()' function allows you to handpick the type of objects ( from the COCO classes ) you want to detect
        from an image. The objects are pre-initiated in the function variables and predefined as 'False',
        which you can easily set to true for any number of objects available.  This function
        returns a dictionary which must be parsed into the 'detectObjectsFromImage()'. Detecting
        custom objects only happens when you call the function 'detectObjectsFromImage()'

        Acceptable values are 'True' and 'False'  for all object values present
        :param boolean_values:
        :return: custom_objects_dict
        """

        if not self.__model_loaded:
            self.loadModel()
        all_objects_str = (obj_label.replace(" ", "_") for obj_label in self.__classes)
        all_objects_dict = {}
        for object_str in all_objects_str:
            all_objects_dict[object_str] = False
        
        for karg in kwargs:
            if karg in all_objects_dict:
                all_objects_dict[karg] = kwargs[karg]
            else:
                raise ValueError(f" object '{karg}' doesn't exist in the supported object classes")

        return all_objects_dict

        
    def detectObjectsFromImage(self,
                input_image: Union[str, np.ndarray, Image.Image],
                output_image_path: str=None,
                output_type: str ="file",
                extract_detected_objects: bool=False, minimum_percentage_probability: int=50,
                display_percentage_probability: bool=True, display_object_name: bool=True,
                display_box: bool=True,
                custom_objects: List=None
               ) -> Union[List[List[Tuple[str, float, Dict[str, int]]]], np.ndarray, List[np.ndarray], List[str]]:
        """
        Detects objects in an image using the unique classes provided
        by COCO.

        :param input_image: path to an image file, cv2 image or PIL image
        :param output_image_path: path to save input image with predictions rendered
        :param output_type: type of output for rendered image. Acceptable values are 'file' and 'array` ( a cv2 image )
        :param extract_detected_objects: extract each object based on the output type
        :param minimum_percentage_probability: the minimum confidence a detected object must have
        :param display_percentage_probability: to diplay/not display the confidence on rendered image   
        :param display_object_name: to diplay/not display the object name on rendered image  
        :param display_box: to diplay/not display the object bounding box on rendered image 
        :param custom_objects: a dictionary of detectable objects set to boolean values
        
        :returns: A list of tuples containing the label of detected object and the
        confidence.
        """
        
        
        self.__model.eval()
        if not self.__model_loaded:
            if self.__model_path:
                warnings.warn(
                        "Model path has changed but pretrained weights in the"
                        " new path is yet to be loaded.",
                        ResourceWarning
                    )
            else:
                raise RuntimeError(
                        "Model path isn't set, pretrained weights aren't used."
                    )
        predictions = defaultdict(lambda : [])
        

        if self.__model_type == "yolov3" or self.__model_type == "tiny-yolov3":
            fnames, original_imgs, input_dims, imgs = self.__load_image_yolo(input_image)
            
            with torch.no_grad():
                output = self.__model(imgs)
            
            output = get_predictions(
                    pred=output.to(self.__device), num_classes=len(self.__classes),
                    nms_confidence_level=self.__nms_score, objectness_confidence= self.__objectness_score,
                    device=self.__device
                )
            
            if output is None:
                if output_type == "array":
                    if extract_detected_objects:
                        return original_imgs[0], [], []
                    else:
                        return original_imgs[0], []
                else:
                    if extract_detected_objects:
                        return original_imgs[0], []
                    else:
                        return []
            
            # scale the output to match the dimension of the original image
            input_dims = torch.index_select(input_dims, 0, output[:, 0].long())
            scaling_factor = torch.min(416 / input_dims, 1)[0].view(-1, 1)
            output[:, [1,3]] -= (416 - (scaling_factor * input_dims[:, 0].view(-1,1))) / 2
            output[:, [2,4]] -= (416 - (scaling_factor * input_dims[:, 1].view(-1,1))) / 2
            output[:, 1:5] /= scaling_factor

            #clip bounding box for those that extended outside the detected image.
            for idx in range(output.shape[0]):
                output[idx, [1,3]] = torch.clamp(output[idx, [1,3]], 0.0, input_dims[idx, 0])
                output[idx, [2,4]] = torch.clamp(output[idx, [2,4]], 0.0, input_dims[idx, 1])

            for pred in output:
                pred_label = self.__classes[int(pred[-1])]
                if custom_objects:
                    if pred_label.replace(" ", "_") in custom_objects.keys():
                        if not custom_objects[pred_label.replace(" ", "_")]:
                            continue
                    else:
                        continue
                predictions[int(pred[0])].append((
                        pred_label,
                        float(pred[-2]),
                        {k:v for k,v in zip(["x1", "y1", "x2", "y2"], map(int, pred[1:5]))},
                    ))
        elif self.__model_type == "retinanet":
            fnames, original_imgs, scaled_images = self.__load_image_retinanet(input_image)
            with torch.no_grad():
                output = self.__model(scaled_images)
            
            if output is None:
                if output_type == "array":
                    if extract_detected_objects:
                        return original_imgs[0], [], []
                    else:
                        return original_imgs[0], []
                else:
                    if extract_detected_objects:
                        return original_imgs[0], []
                    else:
                        return []

            for idx, pred in enumerate(output):
                for id in range(pred["labels"].shape[0]):
                    if pred["scores"][id] >= self.__objectness_score:
                        pred_label = self.__classes[pred["labels"][id]]

                        if custom_objects:
                            if pred_label.replace(" ", "_") in custom_objects.keys():
                                if not custom_objects[pred_label.replace(" ", "_")]:
                                    continue
                            else:
                                continue

                        predictions[idx].append(
                                (
                                    pred_label,
                                    pred["scores"][id].item(),
                                    {k:v for k,v in zip(["x1", "y1", "x2", "y2"], map(int, pred["boxes"][id]))}
                                )
                            )
        
        # Render detection on copy of input image
        original_input_image = None
        output_image_array = None
        extracted_objects = []

        if self.__model_type == "yolov3" or self.__model_type == "tiny-yolov3":
            original_input_image = cv2.cvtColor(original_imgs[0], cv2.COLOR_RGB2BGR)
            if isinstance(output, torch.Tensor):
                for pred in output:
                    percentage_conf = round(float(pred[-2]) * 100, 2)
                    if percentage_conf < minimum_percentage_probability:
                        continue

                    displayed_label = ""
                    if display_object_name:
                        displayed_label = f"{self.__classes[int(pred[-1].item())]} : "
                    if display_percentage_probability:
                        displayed_label += f" {percentage_conf}%"


                    original_imgs[int(pred[0].item())] = draw_bbox_and_label(pred[1:5].int() if display_box else None,
                        displayed_label,
                        original_imgs[int(pred[0].item())]
                    )
                
                output_image_array = cv2.cvtColor(original_imgs[0], cv2.COLOR_RGB2BGR)
                
        elif self.__model_type == "retinanet":
            original_input_image = tensor_to_ndarray(original_imgs[0].div(255.0))
            original_input_image = cv2.cvtColor(original_input_image, cv2.COLOR_RGB2BGR)
            for idx, pred in predictions.items():
                
                max_dim = max(list(original_imgs[idx].size()))

                for label, score, bbox in pred:
                    percentage_conf = round(score * 100, 2)
                    if percentage_conf < minimum_percentage_probability:
                        continue
                    
                    displayed_label = ""
                    if display_object_name:
                        displayed_label = f"{label} :"
                    if display_percentage_probability:
                        displayed_label += f" {percentage_conf}%"

                    original_imgs[idx] = draw_bounding_boxes_and_labels(
                        image=original_imgs[idx],
                        boxes=torch.Tensor([[bbox["x1"], bbox["y1"], bbox["x2"], bbox["y2"]]]),
                        draw_boxes=display_box,
                        labels=[displayed_label],
                        label_color=(0, 0, 255),
                        box_color=(0, 255, 0),
                        width=1,
                        fill=False,
                        font_size=int(max_dim / 30)
                    )
                
            output_image_array = tensor_to_ndarray(original_imgs[0].div(255.0))
            output_image_array = cv2.cvtColor(output_image_array, cv2.COLOR_RGB2BGR)
        

        # Format predictions for function reponse
        predictions_batch = list(predictions.values())
        predictions_list = predictions_batch[0] if len(predictions_batch) > 0 else []
        min_probability = minimum_percentage_probability / 100


        if output_type == "file":
            if output_image_path:
                cv2.imwrite(output_image_path, output_image_array)

                if extract_detected_objects:
                    extraction_dir = ".".join(output_image_path.split(".")[:-1]) + "-extracted"
                    os.mkdir(extraction_dir)
                    count = 0
                    for obj_prediction in predictions_list: 
                        if obj_prediction[1] >= min_probability:
                            count += 1
                            extracted_path = os.path.join(
                                extraction_dir, 
                                ".".join(os.path.basename(output_image_path).split(".")[:-1]) + f"-{count}.jpg"
                            )
                            obj_bbox = obj_prediction[2]
                            cv2.imwrite(extracted_path, original_input_image[obj_bbox["y1"] : obj_bbox["y2"], obj_bbox["x1"] : obj_bbox["x2"]])

                            extracted_objects.append(extracted_path)

        elif output_type == "array":
            if extract_detected_objects:
                for obj_prediction in predictions_list: 
                    if obj_prediction[1] >= min_probability:
                        obj_bbox = obj_prediction[2]

                        extracted_objects.append(original_input_image[obj_bbox["y1"] : obj_bbox["y2"], obj_bbox["x1"] : obj_bbox["x2"]])
        else:
            raise ValueError(f"Invalid output_type '{output_type}'. Supported values are 'file' and 'array' ")

        
        predictions_list = [
            {
                "name": prediction[0], "percentage_probability": round(prediction[1] * 100, 2),
                "box_points": [prediction[2]["x1"], prediction[2]["y1"], prediction[2]["x2"], prediction[2]["y2"]]
            } for prediction in predictions_list if prediction[1] >= min_probability
        ]


        if output_type == "array":
            if extract_detected_objects:
                return output_image_array, predictions_list, extracted_objects
            else:
                return output_image_array, predictions_list
        else:
            if extract_detected_objects:
                return predictions_list, extracted_objects
            else:
                return predictions_list


class VideoObjectDetection:
    """
    This is the object detection class for videos and camera live stream inputs in the ImageAI library. It provides support for RetinaNet,
    YOLOv3 and TinyYOLOv3 object detection networks. After instantiating this class, you can set it's properties and
    make object detections using it's pre-defined functions.
    The following functions are required to be called before object detection can be made
    * setModelPath()
    * At least of of the following and it must correspond to the model set in the setModelPath()
    [setModelTypeAsRetinaNet(), setModelTypeAsYOLOv3(), setModelTinyYOLOv3()]
    * loadModel() [This must be called once only before performing object detection]
    Once the above functions have been called, you can call the detectObjectsFromVideo() function
    or the detectCustomObjectsFromVideo() of  the object detection instance object at anytime to
    obtain observable objects in any video or camera live stream.
    """

    def __init__(self):
        self.__detector = ObjectDetection()

    def setModelTypeAsYOLOv3(self):
        self.__detector.setModelTypeAsYOLOv3()
    
    def setModelTypeAsTinyYOLOv3(self):
        self.__detector.setModelTypeAsTinyYOLOv3()
    
    def setModelTypeAsRetinaNet(self):
        self.__detector.setModelTypeAsRetinaNet()

    def setModelPath(self, model_path: str):
        extension_check(model_path)
        self.__detector.setModelPath(model_path)

    def loadModel(self):
        self.__detector.loadModel()
    
    def useCPU(self):
        self.__detector.useCPU()
    
    def CustomObjects(self, **kwargs):
        return self.__detector.CustomObjects(**kwargs)

    def detectObjectsFromVideo(self, input_file_path="", camera_input=None, output_file_path="", frames_per_second=20,
                               frame_detection_interval=1, minimum_percentage_probability=50, log_progress=False,
                               display_percentage_probability=True, display_object_name=True, display_box=True, save_detected_video=True,
                               per_frame_function=None, per_second_function=None, per_minute_function=None,
                               video_complete_function=None, return_detected_frame=False, detection_timeout = None, custom_objects=None):

        """
        'detectObjectsFromVideo()' function is used to detect objects observable in the given video path or a camera input:
        * input_file_path , which is the file path to the input video. It is required only if 'camera_input' is not set
        * camera_input , allows you to parse in camera input for live video detections
        * output_file_path , which is the path to the output video. It is required only if 'save_detected_video' is not set to False
        * frames_per_second , which is the number of frames to be used in the output video
        * frame_detection_interval (optional, 1 by default)  , which is the intervals of frames that will be detected.
        * minimum_percentage_probability (optional, 50 by default) , option to set the minimum percentage probability for nominating a detected object for output.
        * log_progress (optional) , which states if the progress of the frame processed is to be logged to console
        * display_percentage_probability (optional), can be used to hide or show probability scores on the detected video frames
        * display_object_name (optional), can be used to show or hide object names on the detected video frames
        * save_save_detected_video (optional, True by default), can be set to or not to save the detected video
        * per_frame_function (optional), this parameter allows you to parse in a function you will want to execute after each frame of the video is detected. If this parameter is set to a function, after every video  frame is detected, the function will be executed with the following values parsed into it:
            -- position number of the frame
            -- an array of dictinaries, with each dictionary corresponding to each object detected. Each dictionary contains 'name', 'percentage_probability' and 'box_points'
            -- a dictionary with with keys being the name of each unique objects and value are the number of instances of the object present
            -- If return_detected_frame is set to True, the numpy array of the detected frame will be parsed as the fourth value into the function
        * per_second_function (optional), this parameter allows you to parse in a function you will want to execute after each second of the video is detected. If this parameter is set to a function, after every second of a video is detected, the function will be executed with the following values parsed into it:
            -- position number of the second
            -- an array of dictionaries whose keys are position number of each frame present in the last second , and the value for each key is the array for each frame that contains the dictionaries for each object detected in the frame
            -- an array of dictionaries, with each dictionary corresponding to each frame in the past second, and the keys of each dictionary are the name of the number of unique objects detected in each frame, and the key values are the number of instances of the objects found in the frame
            -- a dictionary with its keys being the name of each unique object detected throughout the past second, and the key values are the average number of instances of the object found in all the frames contained in the past second
            -- If return_detected_frame is set to True, the numpy array of the detected frame will be parsed
                                                                as the fifth value into the function
        * per_minute_function (optional), this parameter allows you to parse in a function you will want to execute after each minute of the video is detected. If this parameter is set to a function, after every minute of a video is detected, the function will be executed with the following values parsed into it:
            -- position number of the minute
            -- an array of dictionaries whose keys are position number of each frame present in the last minute , and the value for each key is the array for each frame that contains the dictionaries for each object detected in the frame
            -- an array of dictionaries, with each dictionary corresponding to each frame in the past minute, and the keys of each dictionary are the name of the number of unique objects detected in each frame, and the key values are the number of instances of the objects found in the frame
            -- a dictionary with its keys being the name of each unique object detected throughout the past minute, and the key values are the average number of instances of the object found in all the frames contained in the past minute
            -- If return_detected_frame is set to True, the numpy array of the detected frame will be parsed as the fifth value into the function
        * video_complete_function (optional), this parameter allows you to parse in a function you will want to execute after all of the video frames have been detected. If this parameter is set to a function, after all of frames of a video is detected, the function will be executed with the following values parsed into it:
            -- an array of dictionaries whose keys are position number of each frame present in the entire video , and the value for each key is the array for each frame that contains the dictionaries for each object detected in the frame
            -- an array of dictionaries, with each dictionary corresponding to each frame in the entire video, and the keys of each dictionary are the name of the number of unique objects detected in each frame, and the key values are the number of instances of the objects found in the frame
            -- a dictionary with its keys being the name of each unique object detected throughout the entire video, and the key values are the average number of instances of the object found in all the frames contained in the entire video
        * return_detected_frame (optionally, False by default), option to obtain the return the last detected video frame into the per_per_frame_function, per_per_second_function or per_per_minute_function
        * detection_timeout (optionally, None by default), option to state the number of seconds of a video that should be detected after which the detection function stop processing the video
        * thread_safe (optional, False by default), enforce the loaded detection model works across all threads if set to true, made possible by forcing all Tensorflow inference to run on the default graph.
                :param input_file_path:
                :param camera_input
                :param output_file_path:
                :param save_detected_video:
                :param frames_per_second:
                :param frame_detection_interval:
                :param minimum_percentage_probability:
                :param log_progress:
                :param display_percentage_probability:
                :param display_object_name:
                :param per_frame_function:
                :param per_second_function:
                :param per_minute_function:
                :param video_complete_function:
                :param return_detected_frame:
                :param detection_timeout:
                :param thread_safe:
                :return output_video_filepath:
                :return counting:
                :return output_objects_array:
                :return output_objects_count:
                :return detected_copy:
                :return this_second_output_object_array:
                :return this_second_counting_array:
                :return this_second_counting:
                :return this_minute_output_object_array:
                :return this_minute_counting_array:
                :return this_minute_counting:
                :return this_video_output_object_array:
                :return this_video_counting_array:
                :return this_video_counting:
        """

        if (input_file_path == "" and camera_input == None):
            raise ValueError(
                "You must set 'input_file_path' to a valid video file, or set 'camera_input' to a valid camera")
        elif (save_detected_video == True and output_file_path == ""):
            raise ValueError(
                "You must set 'output_video_filepath' to a valid video file name, in which the detected video will be saved. If you don't intend to save the detected video, set 'save_detected_video=False'")

        else:
            try:

                output_frames_dict = {}
                output_frames_count_dict = {}

                input_video = cv2.VideoCapture(input_file_path)
                if (camera_input != None):
                    input_video = camera_input

                output_video_filepath = output_file_path + '.mp4'

                frame_width = int(input_video.get(3))
                frame_height = int(input_video.get(4))
                output_video = cv2.VideoWriter(output_video_filepath, cv2.VideoWriter_fourcc(*"MP4V"),
                                               frames_per_second,
                                               (frame_width, frame_height))

                counting = 0

                detection_timeout_count = 0
                video_frames_count = 0

                while (input_video.isOpened()):
                    ret, frame = input_video.read()

                    if (ret == True):

                        video_frames_count += 1
                        if (detection_timeout != None):
                            if ((video_frames_count % frames_per_second) == 0):
                                detection_timeout_count += 1

                            if (detection_timeout_count >= detection_timeout):
                                break

                        output_objects_array = []

                        counting += 1

                        if (log_progress == True):
                            print("Processing Frame : ", str(counting))

                        detected_copy = frame.copy()

                        check_frame_interval = counting % frame_detection_interval

                        if (counting == 1 or check_frame_interval == 0):
                            try:
                                detected_copy, output_objects_array = self.__detector.detectObjectsFromImage(
                                    input_image=frame, output_type="array",
                                    minimum_percentage_probability=minimum_percentage_probability,
                                    display_percentage_probability=display_percentage_probability,
                                    display_object_name=display_object_name,
                                    display_box=display_box,
                                    custom_objects=custom_objects)
                            except:
                                None

                        output_frames_dict[counting] = output_objects_array

                        output_objects_count = {}
                        for eachItem in output_objects_array:
                            eachItemName = eachItem["name"]
                            try:
                                output_objects_count[eachItemName] = output_objects_count[eachItemName] + 1
                            except:
                                output_objects_count[eachItemName] = 1

                        output_frames_count_dict[counting] = output_objects_count

                        
                        if (save_detected_video == True):
                            output_video.write(detected_copy)

                        if (counting == 1 or check_frame_interval == 0):
                            if (per_frame_function != None):
                                if (return_detected_frame == True):
                                    per_frame_function(counting, output_objects_array, output_objects_count,
                                                       detected_copy)
                                elif (return_detected_frame == False):
                                    per_frame_function(counting, output_objects_array, output_objects_count)

                        if (per_second_function != None):
                            if (counting != 1 and (counting % frames_per_second) == 0):

                                this_second_output_object_array = []
                                this_second_counting_array = []
                                this_second_counting = {}

                                for aa in range(counting):
                                    if (aa >= (counting - frames_per_second)):
                                        this_second_output_object_array.append(output_frames_dict[aa + 1])
                                        this_second_counting_array.append(output_frames_count_dict[aa + 1])

                                for eachCountingDict in this_second_counting_array:
                                    for eachItem in eachCountingDict:
                                        try:
                                            this_second_counting[eachItem] = this_second_counting[eachItem] + \
                                                                             eachCountingDict[eachItem]
                                        except:
                                            this_second_counting[eachItem] = eachCountingDict[eachItem]

                                for eachCountingItem in this_second_counting:
                                    this_second_counting[eachCountingItem] = int(this_second_counting[eachCountingItem] / frames_per_second)

                                if (return_detected_frame == True):
                                    per_second_function(int(counting / frames_per_second),
                                                        this_second_output_object_array, this_second_counting_array,
                                                        this_second_counting, detected_copy)

                                elif (return_detected_frame == False):
                                    per_second_function(int(counting / frames_per_second),
                                                        this_second_output_object_array, this_second_counting_array,
                                                        this_second_counting)

                        if (per_minute_function != None):

                            if (counting != 1 and (counting % (frames_per_second * 60)) == 0):

                                this_minute_output_object_array = []
                                this_minute_counting_array = []
                                this_minute_counting = {}

                                for aa in range(counting):
                                    if (aa >= (counting - (frames_per_second * 60))):
                                        this_minute_output_object_array.append(output_frames_dict[aa + 1])
                                        this_minute_counting_array.append(output_frames_count_dict[aa + 1])

                                for eachCountingDict in this_minute_counting_array:
                                    for eachItem in eachCountingDict:
                                        try:
                                            this_minute_counting[eachItem] = this_minute_counting[eachItem] + \
                                                                             eachCountingDict[eachItem]
                                        except:
                                            this_minute_counting[eachItem] = eachCountingDict[eachItem]

                                for eachCountingItem in this_minute_counting:
                                    this_minute_counting[eachCountingItem] = int(this_minute_counting[eachCountingItem] / (frames_per_second * 60))

                                if (return_detected_frame == True):
                                    per_minute_function(int(counting / (frames_per_second * 60)),
                                                        this_minute_output_object_array, this_minute_counting_array,
                                                        this_minute_counting, detected_copy)

                                elif (return_detected_frame == False):
                                    per_minute_function(int(counting / (frames_per_second * 60)),
                                                        this_minute_output_object_array, this_minute_counting_array,
                                                        this_minute_counting)


                    else:
                        break

                if (video_complete_function != None):

                    this_video_output_object_array = []
                    this_video_counting_array = []
                    this_video_counting = {}

                    for aa in range(counting):
                        this_video_output_object_array.append(output_frames_dict[aa + 1])
                        this_video_counting_array.append(output_frames_count_dict[aa + 1])

                    for eachCountingDict in this_video_counting_array:
                        for eachItem in eachCountingDict:
                            try:
                                this_video_counting[eachItem] = this_video_counting[eachItem] + \
                                                                eachCountingDict[eachItem]
                            except:
                                this_video_counting[eachItem] = eachCountingDict[eachItem]

                    for eachCountingItem in this_video_counting:
                        this_video_counting[eachCountingItem] = int(this_video_counting[eachCountingItem] / counting)

                    video_complete_function(this_video_output_object_array, this_video_counting_array,
                                            this_video_counting)

                input_video.release()
                output_video.release()

                if (save_detected_video == True):
                    return output_video_filepath

            except:
                raise ValueError(
                    "An error occured. It may be that your input video is invalid. Ensure you specified a proper string value for 'output_file_path' is 'save_detected_video' is not False. "
                    "Also ensure your per_frame, per_second, per_minute or video_complete_analysis function is properly configured to receive the right parameters. ")

================================================
FILE: imageai/Detection/coco91_classes.txt
================================================
unlabeled
person
bicycle
car
motorcycle
airplane
bus
train
truck
boat
traffic light
fire hydrant
street sign
stop sign
parking meter
bench
bird
cat
dog
horse
sheep
cow
elephant
bear
zebra
giraffe
hat
backpack
umbrella
shoe
eye glasses
handbag
tie
suitcase
frisbee
skis
snowboard
sports ball
kite
baseball bat
baseball glove
skateboard
surfboard
tennis racket
bottle
plate
wine glass
cup
fork
knife
spoon
bowl
banana
apple
sandwich
orange
broccoli
carrot
hot dog
pizza
donut
cake
chair
couch
potted plant
bed
mirror
dining table
window
desk
toilet
door
tv
laptop
mouse
remote
keyboard
cell phone
microwave
oven
toaster
sink
refrigerator
blender
book
clock
vase
scissors
teddy bear
hair drier
toothbrush
hair brush

================================================
FILE: imageai/Detection/coco_classes.txt
================================================
person
bicycle
car
motorbike
aeroplane
bus
train
truck
boat
traffic light
fire hydrant
stop sign
parking meter
bench
bird
cat
dog
horse
sheep
cow
elephant
bear
zebra
giraffe
backpack
umbrella
handbag
tie
suitcase
frisbee
skis
snowboard
sports ball
kite
baseball bat
baseball glove
skateboard
surfboard
tennis racket
bottle
wine glass
cup
fork
knife
spoon
bowl
banana
apple
sandwich
orange
broccoli
carrot
hot dog
pizza
donut
cake
chair
sofa
pottedplant
bed
diningtable
toilet
tvmonitor
laptop
mouse
remote
keyboard
cell phone
microwave
oven
toaster
sink
refrigerator
book
clock
vase
scissors
teddy bear
hair drier
toothbrush

================================================
FILE: imageai/__init__.py
================================================
from .backend_check import backend_check

================================================
FILE: imageai/backend_check/__init__.py
================================================


================================================
FILE: imageai/backend_check/backend_check.py
================================================
try:
    import torch
    import torchvision
except:
    try:
        import tensorflow
        import keras

        raise RuntimeError("Dependency error!!! It appears you are trying to use ImageAI with a Tensorflow backend. ImageAI now uses PyTorch as backed as from version 3.0.2 . If you want to use the Tensorflow models or a customly trained '.h5' model, install ImageAI 2.1.6 or earlier. To use the latest Pytorch models, see the documentation in https://imageai.readthedocs.io/")
    except:
        raise RuntimeError("Dependency error!!! PyTorch and TorchVision are not installed. Please see installation instructions in the documentation https://imageai.readthedocs.io/")

================================================
FILE: imageai/backend_check/model_extension.py
================================================
import os

def extension_check(file_path: str):
    if file_path.endswith(".h5"):
        raise RuntimeError("You are trying to use a Tensorflow model with ImageAI. ImageAI now uses PyTorch as backed as from version 3.0.2 . If you want to use the Tensorflow models or a customly trained '.h5' model, install ImageAI 2.1.6 or earlier. To use the latest Pytorch models, see the documentation in https://imageai.readthedocs.io/")
    elif file_path.endswith(".pt") == False and file_path.endswith(".pth") == False:
        raise ValueError(f"Invalid model file {os.path.basename(file_path)}. Please parse in a '.pt' and '.pth' model file.")


================================================
FILE: imageai/densenet121/__init__.py
================================================
import os, warnings
from pathlib import Path
from typing import List, Tuple

import torch, torchvision
import torch.nn.functional as F
from torchvision import transforms
from PIL import Image

warnings.filterwarnings("once", category=ResourceWarning)

class DenseNet121Pretrained:
    """
    An implementation that allows for easy classification of images
    using the state of the art MobileNet computer vision model.
    """
    def __init__(self, label_path : str) -> None:
        self.__model = torchvision.models.densenet121(pretrained=False)
        self.__classes = self.__load_classes(label_path)
        self.__has_loaded_weights = False
        self.__device = "cuda" if torch.cuda.is_available() else "cpu"
        self.__model_path = ""
        
    def __load_classes(self, path : str) -> List[str]:
        with open(path) as f:
            unique_classes = [c.strip() for c in f.readlines()]
        return unique_classes

    def __load_image(self, image_path : str) -> Tuple[List[str], torch.Tensor]:
        """
        Loads image/images from the given path. If image_path is a directory, this
        function only load the images in the directory (it does not visit the sub-
        directories). This function also convert the loaded image/images to the
        specification expected by the MobileNetV2 architecture.
        """
        allowed_file_extensions = ["jpg", "jpeg", "png"]
        images = []
        fnames = []
        preprocess = transforms.Compose([
                transforms.Resize(256),
                transforms.CenterCrop(224),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
            ])
        if os.path.isfile(image_path):
            img = Image.open(image_path).convert("RGB")
            images.append(preprocess(img))
            fnames.append(os.path.basename(image_path))

        elif os.path.isdir(image_path):
            for file in os.listdir(image_path):
                if os.path.isfile(os.path.join(image_path, file)) and\
                        file.rsplit('.')[-1].lower() in allowed_file_extensions:
                            img = Image.open(os.path.join(image_path, file)).convert("RGB")
                            images.append(preprocess(img))
                            fnames.append(file)
        if images:
            return fnames, torch.stack(images)
        raise RuntimeError(
                f"Error loading images from {os.path.abspath(image_path)}."
                "\nEnsure the folder contains images,"
                " allowed file extensions are .jpg, .jpeg, .png"
            )

    # properties
    model_path = property(
                fget=lambda self : self.__model_path,
                fset=lambda self, path: self.set_model_path(path),
                doc="Path containing the pretrained weight."
            )

    def set_model_path(self, path : str) -> None:
        """
        Sets the path to the pretrained weight.
        """
        if os.path.isfile(path):
            self.__model_path = path
            self.__has_loaded_weights = False
        else:
            raise ValueError(
                "parameter path should be a path to the pretrianed weight file."
                )

    def load_model(self) -> None:
        """
        Loads the mobilenet vison weight into the model architecture.
        """
        if not self.__has_loaded_weights:
            try:
                import re
                state_dict = torch.load(self.__model_path, map_location=self.__device)
                # '.'s are no longer allowed in module names, but previous densenet layers
                # as provided by the pytorch organization has names that uses '.'s.
                pattern = re.compile(
                        r"^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\."
                                "(?:weight|bias|running_mean|running_var))$"
                        )
                for key in list(state_dict.keys()):
                    res = pattern.match(key)
                    if res:
                        new_key = res.group(1) + res.group(2)
                        state_dict[new_key] = state_dict[key]
                        del state_dict[key]
                self.__model.load_state_dict(state_dict)
                self.__has_loaded_weights = True
                self.__model.eval()
            except Exception:
                print("Weight loading failed.\nEnsure the model path is"
                    " set and the weight file is in the specified model path.")

    def classify(self, image_path : str, top_n : int = 5, verbose : bool = True) -> List[List[Tuple[str, str]]]:
        """
        Classfies image/images according to the classes provided by imagenet.

        Parameters:
        -----------
            image_path: a path to a single image or a path to a directory containing
                        images. If image_path is a path to a file, this functions
                        classifies the image according to the categories provided
                        by imagenet, else, if image_path is a path to a directory
                        that contains images, this function classifies all images in
                        the given directory (it doesn't visit the subdirectories).

            top_n: number of top predictions to return.
            verbose: if true, it prints the top_n predictions.
        """
        if not self.__has_loaded_weights:
            warnings.warn("Pretrained weights aren't loaded", ResourceWarning)

        fnames, images = self.__load_image(image_path)
        images = images.to(self.__device)
    
        with torch.no_grad():
            output = self.__model(images)
        probabilities = torch.softmax(output, dim=1)
        top5_prob, top5_catid = torch.topk(probabilities, 5)

        predictions = [
                [
                    (self.__classes[top5_catid[i][j]], f"{top5_prob[i][j].item()*100:.5f}%")
                    for j in range(top5_prob.shape[1])
                ]
                for i in range(top5_prob.shape[0])
            ]

        if verbose:
            for idx, pred in enumerate(predictions):
                print("-"*50, f"Top 5 predictions for {fnames[idx]}", "-"*50, sep="\n")
                for label, score in pred:
                    print(f"\t{label}:{score: >10}")
                print("-"*50, "\n")
        return predictions


================================================
FILE: imageai/inceptionv3/__init__.py
================================================
import os, warnings
from pathlib import Path
from typing import List, Tuple

import torch, torchvision
import torch.nn.functional as F
from torchvision import transforms
from PIL import Image

warnings.filterwarnings("once", category=ResourceWarning)

class InceptionV3Pretrained:
    """
    An implementation that allows for easy classification of images
    using the state of the art MobileNet computer vision model.
    """
    def __init__(self, label_path : str) -> None:
        self.__model = torchvision.models.inception_v3(pretrained=False)
        self.__classes = self.__load_classes(label_path)
        self.__has_loaded_weights = False
        self.__device = "cuda" if torch.cuda.is_available() else "cpu"
        self.__model_path = ""
        
    def __load_classes(self, path : str) -> List[str]:
        with open(path) as f:
            unique_classes = [c.strip() for c in f.readlines()]
        return unique_classes

    def __load_image(self, image_path : str) -> Tuple[List[str], torch.Tensor]:
        """
        Loads image/images from the given path. If image_path is a directory, this
        function only load the images in the directory (it does not visit the sub-
        directories). This function also convert the loaded image/images to the
        specification expected by the MobileNetV2 architecture.
        """
        allowed_file_extensions = ["jpg", "jpeg", "png"]
        images = []
        fnames = []
        preprocess = transforms.Compose([
                transforms.Resize(299),
                transforms.CenterCrop(299),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
            ])
        if os.path.isfile(image_path):
            img = Image.open(image_path).convert("RGB")
            images.append(preprocess(img))
            fnames.append(os.path.basename(image_path))

        elif os.path.isdir(image_path):
            for file in os.listdir(image_path):
                if os.path.isfile(os.path.join(image_path, file)) and\
                        file.rsplit('.')[-1].lower() in allowed_file_extensions:
                            img = Image.open(os.path.join(image_path, file)).convert("RGB")
                            images.append(preprocess(img))
                            fnames.append(file)
        if images:
            return fnames, torch.stack(images)
        raise RuntimeError(
                f"Error loading images from {os.path.abspath(image_path)}."
                "\nEnsure the folder contains images,"
                " allowed file extensions are .jpg, .jpeg, .png"
            )

    # properties
    model_path = property(
                fget=lambda self : self.__model_path,
                fset=lambda self, path: self.set_model_path(path),
                doc="Path containing the pretrained weight."
            )

    def set_model_path(self, path : str) -> None:
        """
        Sets the path to the pretrained weight.
        """
        if os.path.isfile(path):
            self.__model_path = path
            self.__has_loaded_weights = False
        else:
            raise ValueError(
            "parameter path should be a path to the pretrianed weight file."
            )

    def load_model(self) -> None:
        """
        Loads the mobilenet vison weight into the model architecture.
        """
        if not self.__has_loaded_weights:
            try:
                self.__model.load_state_dict(
                        torch.load(self.__model_path, map_location=self.__device)
                    )
                self.__has_loaded_weights = True
                self.__model.eval()
            except Exception:
                print("Weight loading failed.\nEnsure the model path is"
                    " set and the weight file is in the specified model path.")

    def classify(self, image_path : str, top_n : int = 5, verbose : bool = True) -> List[List[Tuple[str, str]]]:
        """
        Classfies image/images according to the classes provided by imagenet.

        Parameters:
        -----------
            image_path: a path to a single image or a path to a directory containing
                        images. If image_path is a path to a file, this functions
                        classifies the image according to the categories provided
                        by imagenet, else, if image_path is a path to a directory
                        that contains images, this function classifies all images in
                        the given directory (it doesn't visit the subdirectories).

            top_n: number of top predictions to return.
            verbose: if true, it prints the top_n predictions.
        """
        if not self.__has_loaded_weights:
            if self.__model_path:
                warnings.warn(
                        "Model path has changed but pretrained weights in the"
                        " new path are yet to be loaded.",
                        ResourceWarning
                    )
            else:
                warnings.warn(
                        "Model path isn't set, pretrained weights aren't used.",
                        ResourceWarning
                    )

        fnames, images = self.__load_image(image_path)
        images = images.to(self.__device)
        print(images.shape)
    
        with torch.no_grad():
            output = self.__model(images)
        probabilities = torch.softmax(output, dim=1)
        top5_prob, top5_catid = torch.topk(probabilities, 5)

        with open(os.path.join(str(Path(__file__).resolve().parent.parent), "imagenet_classes.txt")) as f:
            categories = [c.strip() for c in f.readlines()]
        predictions = [
                [
                    (categories[top5_catid[i][j]], f"{top5_prob[i][j].item()*100:.5f}%")
                    for j in range(top5_prob.shape[1])
                ]
                for i in range(top5_prob.shape[0])
            ]

        if verbose:
            for idx, pred in enumerate(predictions):
                print("-"*50, f"Top 5 predictions for {fnames[idx]}", "-"*50, sep="\n")
                for label, score in pred:
                    print(f"\t{label}:{score: >10}")
                print("-"*50, "\n")
        return predictions


================================================
FILE: imageai/mobilenetv2/__init__.py
================================================
import os, warnings
from pathlib import Path
from typing import List, Tuple

import torch, torchvision
import torch.nn.functional as F
from torchvision import transforms
from PIL import Image

warnings.filterwarnings("once", category=ResourceWarning)

class MobileNetV2Pretrained:
    """
    An implementation that allows for easy classification of images
    using the state of the art MobileNet computer vision model.
    """
    def __init__(self, label_path : str) -> None:
        self.__model = torchvision.models.mobilenet_v2(pretrained=False)
        self.__classes = self.__load_classes(label_path)
        self.__has_loaded_weights = False
        self.__device = "cuda" if torch.cuda.is_available() else "cpu"
        self.__model_path = ""
        
    def __load_classes(self, path : str) -> List[str]:
        with open(path) as f:
            unique_classes = [c.strip() for c in f.readlines()]
        return unique_classes

    def __load_image(self, image_path : str) -> Tuple[List[str], torch.Tensor]:
        """
        Loads image/images from the given path. If image_path is a directory, this
        function only load the images in the directory (it does not visit the sub-
        directories). This function also convert the loaded image/images to the
        specification expected by the MobileNetV2 architecture.
        """
        allowed_file_extensions = ["jpg", "jpeg", "png"]
        images = []
        fnames = []
        preprocess = transforms.Compose([
                transforms.Resize(256),
                transforms.CenterCrop(224),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
            ])
        if os.path.isfile(image_path):
            img = Image.open(image_path).convert("RGB")
            images.append(preprocess(img))
            fnames.append(os.path.basename(image_path))

        elif os.path.isdir(image_path):
            for file in os.listdir(image_path):
                if os.path.isfile(os.path.join(image_path, file)) and\
                        file.rsplit('.')[-1].lower() in allowed_file_extensions:
                            img = Image.open(os.path.join(image_path, file)).convert("RGB")
                            images.append(preprocess(img))
                            fnames.append(file)
        if images:
            return fnames, torch.stack(images)
        raise RuntimeError(
                f"Error loading images from {os.path.abspath(image_path)}."
                "\nEnsure the folder contains images,"
                " allowed file extensions are .jpg, .jpeg, .png"
            )

    # properties
    model_path = property(
                fget=lambda self : self.__model_path,
                fset=lambda self, path: self.set_model_path(path),
                doc="Path containing the pretrained weight."
            )

    def set_model_path(self, path : str) -> None:
        """
        Sets the path to the pretrained weight.
        """
        if os.path.isfile(path):
            self.__model_path = path
            self.__has_loaded_weight = False
        else:
            raise ValueError(
            "parameter path should be a valid path to the pretrianed weight file."
            )

    def load_model(self) -> None:
        """
        Loads the mobilenet vison weight into the model architecture.
        """
        if not self.__has_loaded_weights:
            try:
                self.__model.load_state_dict(
                        torch.load(self.__model_path, map_location=self.__device)
                    )
                self.__has_loaded_weights = True
                self.__model.eval()
            except Exception:
                print("Weight loading failed.\nEnsure the model path is"
                    " set and the weight file is in the specified model path.")

    def classify(self, image_path : str, top_n : int = 5, verbose : bool = True) -> List[List[Tuple[str, str]]]:
        """
        Classfies image/images according to the classes provided by imagenet.

        Parameters:
        -----------
            image_path: a path to a single image or a path to a directory containing
                        images. If image_path is a path to a file, this functions
                        classifies the image according to the categories provided
                        by imagenet, else, if image_path is a path to a directory
                        that contains images, this function classifies all images in
                        the given directory (it doesn't visit the subdirectories).

            top_n: number of top predictions to return.
            verbose: if true, it prints the top_n predictions.
        """
        if not self.__has_loaded_weights:
            if self.__model_path:
                warnings.warn(
                        "Model path has changed but pretrained weights in the"
                        " new path are yet to be loaded.",
                        ResourceWarning
                    )
            else:
                warnings.warn(
                        "Model path isn't set, pretrained weights aren't used.",
                        ResourceWarning
                    )

        fnames, images = self.__load_image(image_path)
        images = images.to(self.__device)
    
        with torch.no_grad():
            output = self.__model(images)
        probabilities = torch.softmax(output, dim=1)
        top5_prob, top5_catid = torch.topk(probabilities, 5)

        predictions = [
                [
                    (self.__classes[top5_catid[i][j]], f"{top5_prob[i][j].item()*100:.5f}%")
                    for j in range(top5_prob.shape[1])
                ]
                for i in range(top5_prob.shape[0])
            ]

        if verbose:
            for idx, pred in enumerate(predictions):
                print("-"*50, f"Top 5 predictions for {fnames[idx]}", "-"*50, sep="\n")
                for label, score in pred:
                    print(f"\t{label}:{score: >10}")
                print("-"*50, "\n")
        return predictions


================================================
FILE: imageai/resnet50/__init__.py
================================================
import os, warnings
from typing import List, Tuple

import torch, torchvision
import torch.nn.functional as F
from torchvision import transforms
from PIL import Image

warnings.filterwarnings("once", category=ResourceWarning)

class ResNet50Pretrained:
    """
    An implementation that allows for easy classification of images
    using the state of the art MobileNet computer vision model.
    """
    def __init__(self, label_path : str) -> None:
        self.__model = torchvision.models.resnet50(pretrained=False)
        self.__classes = self.__load_classes(label_path)
        self.__has_loaded_weights = False
        self.__device = "cuda" if torch.cuda.is_available() else "cpu"
        self.__model_path = ""
        
    def __load_classes(self, path : str) -> List[str]:
        with open(path) as f:
            unique_classes = [c.strip() for c in f.readlines()]
        return unique_classes

    def __load_image(self, image_path : str) -> Tuple[List[str], torch.Tensor]:
        """
        Loads image/images from the given path. If image_path is a directory, this
        function only load the images in the directory (it does not visit the sub-
        directories). This function also convert the loaded image/images to the
        specification expected by the MobileNetV2 architecture.
        """
        allowed_file_extensions = ["jpg", "jpeg", "png"]
        images = []
        fnames = []
        preprocess = transforms.Compose([
                transforms.Resize(256),
                transforms.CenterCrop(224),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
            ])
        if os.path.isfile(image_path):
            img = Image.open(image_path).convert("RGB")
            images.append(preprocess(img))
            fnames.append(os.path.basename(image_path))

        elif os.path.isdir(image_path):
            for file in os.listdir(image_path):
                if os.path.isfile(os.path.join(image_path, file)) and\
                        file.rsplit('.')[-1].lower() in allowed_file_extensions:
                            img = Image.open(os.path.join(image_path, file)).convert("RGB")
                            images.append(preprocess(img))
                            fnames.append(file)
        if images:
            return fnames, torch.stack(images)
        raise RuntimeError(
                f"Error loading images from {os.path.abspath(image_path)}."
                "\nEnsure the folder contains images,"
                " allowed file extensions are .jpg, .jpeg, .png"
            )

    # properties
    model_path = property(
                fget=lambda self : self.__model_path,
                fset=lambda self, path: self.set_model_path(path),
                doc="Path containing the pretrained weight."
            )

    def set_model_path(self, path : str) -> None:
        """
        Sets the path to the pretrained weight.
        """
        if os.path.isfile(path):
            self.__model_path = path
            self.__has_loaded_weights = False
        else:
            raise ValueError(
                "parameter path should be a path to the pretrianed weight file."
                )

    def load_model(self) -> None:
        """
        Loads the mobilenet vison weight into the model architecture.
        """
        if not self.__has_loaded_weights:
            try:
                self.__model.load_state_dict(
                        torch.load(self.__model_path, map_location=self.__device)
                    )
                self.__has_loaded_weights = True
                self.__model.eval()
            except Exception:
                print("Weight loading failed.\nEnsure the model path is"
                    " set and the weight file is in the specified model path.")

    def classify(self, image_path : str, top_n : int = 5, verbose : bool = True) -> List[List[Tuple[str, str]]]:
        """
        Classfies image/images according to the classes provided by imagenet.

        Parameters:
        -----------
            image_path: a path to a single image or a path to a directory containing
                        images. If image_path is a path to a file, this functions
                        classifies the image according to the categories provided
                        by imagenet, else, if image_path is a path to a directory
                        that contains images, this function classifies all images in
                        the given directory (it doesn't visit the subdirectories).

            top_n: number of top predictions to return.
            verbose: if true, it prints the top_n predictions.
        """
        if not self.__has_loaded_weights:
            if self.__model_path:
                warnings.warn(
                        "Model path has changed but pretrained weights in the"
                        " new path are yet to be loaded.",
                        ResourceWarning
                    )
            else:
                warnings.warn(
                        "Model path isn't set, pretrained weights aren't used.",
                        ResourceWarning
                    )

        fnames, images = self.__load_image(image_path)
        images = images.to(self.__device)
    
        with torch.no_grad():
            output = self.__model(images)
        probabilities = torch.softmax(output, dim=1)
        top5_prob, top5_catid = torch.topk(probabilities, 5)
        
        predictions = [
                [
                    (self.__classes[top5_catid[i][j]], f"{top5_prob[i][j].item()*100:.5f}%")
                    for j in range(top5_prob.shape[1])
                ]
                for i in range(top5_prob.shape[0])
            ]

        if verbose:
            for idx, pred in enumerate(predictions):
                print("-"*50, f"Top 5 predictions for {fnames[idx]}", "-"*50, sep="\n")
                for label, score in pred:
                    print(f"\t{label}:{score: >10}")
                print("-"*50, "\n")
        return predictions


================================================
FILE: imageai/retinanet/__init__.py
================================================


================================================
FILE: imageai/retinanet/utils.py
================================================

from torchvision.io import ImageReadMode
import torch
from PIL import Image, ImageColor, ImageDraw, ImageFont
from typing import List, Optional, Union, Tuple, BinaryIO
import numpy as np
import math
import warnings
import pathlib

def read_file(path: str) -> torch.Tensor:
    """
    Reads and outputs the bytes contents of a file as a uint8 Tensor
    with one dimension.

    Args:
        path (str): the path to the file to be read

    Returns:
        data (Tensor)
    """
    data = torch.ops.image.read_file(path)
    return data

def decode_image(input: torch.Tensor, mode: ImageReadMode = ImageReadMode.UNCHANGED) -> torch.Tensor:
    """
    Detects whether an image is a JPEG or PNG and performs the appropriate
    operation to decode the image into a 3 dimensional RGB or grayscale Tensor.

    Optionally converts the image to the desired format.
    The values of the output tensor are uint8 in [0, 255].

    Args:
        input (Tensor): a one dimensional uint8 tensor containing the raw bytes of the
            PNG or JPEG image.
        mode (ImageReadMode): the read mode used for optionally converting the image.
            Default: ``ImageReadMode.UNCHANGED``.
            See ``ImageReadMode`` class for more information on various
            available modes.

    Returns:
        output (Tensor[image_channels, image_height, image_width])
    """
    output = torch.ops.image.decode_image(input, mode.value)
    return output

def read_image(path: str, mode: ImageReadMode = ImageReadMode.UNCHANGED) -> torch.Tensor:
        """
        Reads a JPEG or PNG image into a 3 dimensional RGB or grayscale Tensor.
        Optionally converts the image to the desired format.
        The values of the output tensor are uint8 in [0, 255].

        Args:
            path (str): path of the JPEG or PNG image.
            mode (ImageReadMode): the read mode used for optionally converting the image.
                Default: ``ImageReadMode.UNCHANGED``.
                See ``ImageReadMode`` class for more information on various
                available modes.

        Returns:
            output (Tensor[image_channels, image_height, image_width])
        """
        
        data = read_file(path)
        return decode_image(data, mode)

def _generate_color_palette(num_objects: int):
    palette = torch.tensor([2 ** 25 - 1, 2 ** 15 - 1, 2 ** 21 - 1])
    return [tuple((i * palette) % 255) for i in range(num_objects)]

@torch.no_grad()
def make_grid(
    tensor: Union[torch.Tensor, List[torch.Tensor]],
    nrow: int = 8,
    padding: int = 2,
    normalize: bool = False,
    value_range: Optional[Tuple[int, int]] = None,
    scale_each: bool = False,
    pad_value: float = 0.0,
    **kwargs,
) -> torch.Tensor:
    """
    Make a grid of images.

    Args:
        tensor (Tensor or list): 4D mini-batch Tensor of shape (B x C x H x W)
            or a list of images all of the same size.
        nrow (int, optional): Number of images displayed in each row of the grid.
            The final grid size is ``(B / nrow, nrow)``. Default: ``8``.
        padding (int, optional): amount of padding. Default: ``2``.
        normalize (bool, optional): If True, shift the image to the range (0, 1),
            by the min and max values specified by ``value_range``. Default: ``False``.
        value_range (tuple, optional): tuple (min, max) where min and max are numbers,
            then these numbers are used to normalize the image. By default, min and max
            are computed from the tensor.
        range (tuple. optional):
            .. warning::
                This parameter was deprecated in ``0.12`` and will be removed in ``0.14``. Please use ``value_range``
                instead.
        scale_each (bool, optional): If ``True``, scale each image in the batch of
            images separately rather than the (min, max) over all images. Default: ``False``.
        pad_value (float, optional): Value for the padded pixels. Default: ``0``.

    Returns:
        grid (Tensor): the tensor containing grid of images.
    """
    if not (torch.is_tensor(tensor) or (isinstance(tensor, list) and all(torch.is_tensor(t) for t in tensor))):
        raise TypeError(f"tensor or list of tensors expected, got {type(tensor)}")

    if "range" in kwargs.keys():
        warnings.warn(
            "The parameter 'range' is deprecated since 0.12 and will be removed in 0.14. "
            "Please use 'value_range' instead."
        )
        value_range = kwargs["range"]

    # if list of tensors, convert to a 4D mini-batch Tensor
    if isinstance(tensor, list):
        tensor = torch.stack(tensor, dim=0)

    if tensor.dim() == 2:  # single image H x W
        tensor = tensor.unsqueeze(0)
    if tensor.dim() == 3:  # single image
        if tensor.size(0) == 1:  # if single-channel, convert to 3-channel
            tensor = torch.cat((tensor, tensor, tensor), 0)
        tensor = tensor.unsqueeze(0)

    if tensor.dim() == 4 and tensor.size(1) == 1:  # single-channel images
        tensor = torch.cat((tensor, tensor, tensor), 1)

    if normalize is True:
        tensor = tensor.clone()  # avoid modifying tensor in-place
        if value_range is not None:
            assert isinstance(
                value_range, tuple
            ), "value_range has to be a tuple (min, max) if specified. min and max are numbers"

        def norm_ip(img, low, high):
            img.clamp_(min=low, max=high)
            img.sub_(low).div_(max(high - low, 1e-5))

        def norm_range(t, value_range):
            if value_range is not None:
                norm_ip(t, value_range[0], value_range[1])
            else:
                norm_ip(t, float(t.min()), float(t.max()))

        if scale_each is True:
            for t in tensor:  # loop over mini-batch dimension
                norm_range(t, value_range)
        else:
            norm_range(tensor, value_range)

    assert isinstance(tensor, torch.Tensor)
    if tensor.size(0) == 1:
        return tensor.squeeze(0)

    # make the mini-batch of images into a grid
    nmaps = tensor.size(0)
    xmaps = min(nrow, nmaps)
    ymaps = int(math.ceil(float(nmaps) / xmaps))
    height, width = int(tensor.size(2) + padding), int(tensor.size(3) + padding)
    num_channels = tensor.size(1)
    grid = tensor.new_full((num_channels, height * ymaps + padding, width * xmaps + padding), pad_value)
    k = 0
    for y in range(ymaps):
        for x in range(xmaps):
            if k >= nmaps:
                break
            # Tensor.copy_() is a valid method but seems to be missing from the stubs
            # https://pytorch.org/docs/stable/tensors.html#torch.Tensor.copy_
            grid.narrow(1, y * height + padding, height - padding).narrow(  # type: ignore[attr-defined]
                2, x * width + padding, width - padding
            ).copy_(tensor[k])
            k = k + 1
    return grid


@torch.no_grad()
def draw_bounding_boxes_and_labels(
    image: torch.Tensor,
    boxes: torch.Tensor,
    draw_boxes: bool,
    labels: Optional[List[str]] = None,
    label_color: Optional[Union[List[Union[str, Tuple[int, int, int]]], str, Tuple[int, int, int]]] = None,
    box_color: Optional[Union[List[Union[str, Tuple[int, int, int]]], str, Tuple[int, int, int]]] = None,
    fill: Optional[bool] = False,
    width: int = 1,
    font: Optional[str] = None,
    font_size: int = 10,
) -> torch.Tensor:

    """
    Draws bounding boxes on given image.
    The values of the input image should be uint8 between 0 and 255.
    If fill is True, Resulting Tensor should be saved as PNG image.

    Args:
        image (Tensor): Tensor of shape (C x H x W) and dtype uint8.
        boxes (Tensor): Tensor of size (N, 4) containing bounding boxes in (xmin, ymin, xmax, ymax) format. Note that
            the boxes are absolute coordinates with respect to the image. In other words: `0 <= xmin < xmax < W` and
            `0 <= ymin < ymax < H`.
        labels (List[str]): List containing the labels of bounding boxes.
        colors (color or list of colors, optional): List containing the colors
            of the boxes or single color for all boxes. The color can be represented as
            PIL strings e.g. "red" or "#FF00FF", or as RGB tuples e.g. ``(240, 10, 157)``.
            By default, random colors are generated for boxes.
        fill (bool): If `True` fills the bounding box with specified color.
        width (int): Width of bounding box.
        font (str): A filename containing a TrueType font. If the file is not found in this filename, the loader may
            also search in other directories, such as the `fonts/` directory on Windows or `/Library/Fonts/`,
            `/System/Library/Fonts/` and `~/Library/Fonts/` on macOS.
        font_size (int): The requested font size in points.

    Returns:
        img (Tensor[C, H, W]): Image Tensor of dtype uint8 with bounding boxes plotted.
    """

    if not isinstance(image, torch.Tensor):
        raise TypeError(f"Tensor expected, got {type(image)}")
    elif image.dtype != torch.uint8:
        raise ValueError(f"Tensor uint8 expected, got {image.dtype}")
    elif image.dim() != 3:
        raise ValueError("Pass individual images, not batches")
    elif image.size(0) not in {1, 3}:
        raise ValueError("Only grayscale and RGB images are supported")

    num_boxes = boxes.shape[0]

    if labels is None:
        labels: Union[List[str], List[None]] = [None] * num_boxes  # type: ignore[no-redef]
    elif len(labels) != num_boxes:
        raise ValueError(
            f"Number of boxes ({num_boxes}) and labels ({len(labels)}) mismatch. Please specify labels for each box."
        )


    # Handle Grayscale images
    if image.size(0) == 1:
        image = torch.tile(image, (3, 1, 1))

    ndarr = image.permute(1, 2, 0).cpu().numpy()
    img_to_draw = Image.fromarray(ndarr)
    img_boxes = boxes.to(torch.int64).tolist()

    if fill:
        draw = ImageDraw.Draw(img_to_draw, "RGBA")
    else:
        draw = ImageDraw.Draw(img_to_draw)

    txt_font = ImageFont.load_default() if font is None else ImageFont.truetype(font=font, size=font_size)

    for bbox, label in zip(img_boxes, labels):
        if draw_boxes:
            if fill:
                fill_color = label_color + (100,)
                draw.rectangle(bbox, width=width, outline=label_color, fill=fill_color)
            else:
                draw.rectangle(bbox, width=width, outline=box_color)

        if label is not None:
            margin = width + 1
            draw.text((bbox[0] + margin, bbox[1] + margin), label, fill=label_color, font=txt_font)

    return torch.from_numpy(np.array(img_to_draw)).permute(2, 0, 1).to(dtype=torch.uint8)


@torch.no_grad()
def tensor_to_ndarray(
    tensor: Union[torch.Tensor, List[torch.Tensor]],
    **kwargs,
) -> None:
    """
    Convert a Tensor into ndarray and return the array

    Args:
        tensor (Tensor or list): Image to be saved. If given a mini-batch tensor,
            saves the tensor as a grid of images by calling ``make_grid``.
        fp (string or file object): A filename or a file object
        format(Optional):  If omitted, the format to use is determined from the filename extension.
            If a file object was used instead of a filename, this parameter should always be used.
        **kwargs: Other arguments are documented in ``make_grid``.
    """

    grid = make_grid(tensor, **kwargs)
    # Add 0.5 after unnormalizing to [0, 255] to round to nearest integer
    ndarr = grid.mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0).to("cpu", torch.uint8).numpy()
    
    return ndarr


================================================
FILE: imageai/yolov3/__init__.py
================================================


================================================
FILE: imageai/yolov3/tiny_yolov3.py
================================================
from typing import Union, List, Tuple, Optional

import torch
import torch.nn as nn
import numpy as np

from .yolov3 import DetectionLayer, ConvLayer


class YoloV3Tiny(nn.Module):

    def __init__(
                self,
                anchors : Union[List[int], Tuple[int,...]],
                num_classes : int=80,
                device : str="cpu"
            ):
        super().__init__()

        # Network Layers
        self.conv1 = ConvLayer(3, 16)
        self.maxpool1 = nn.MaxPool2d(2, 2)
        self.conv2 = ConvLayer(16, 32)
        self.maxpool2 = nn.MaxPool2d(2, 2)
        self.conv3 = ConvLayer(32, 64)
        self.maxpool3 = nn.MaxPool2d(2, 2)
        self.conv4 = ConvLayer(64, 128)
        self.maxpool4 = nn.MaxPool2d(2, 2)
        self.conv5 = ConvLayer(128, 256)
        self.maxpool5 = nn.MaxPool2d(2, 2)
        self.conv6 = ConvLayer(256, 512)
        self.zeropad = nn.ZeroPad2d((0, 1, 0, 1))
        self.maxpool6 = nn.MaxPool2d(2, 1)
        self.conv7 = ConvLayer(512, 1024)
        self.conv8 = ConvLayer(1024, 256, 1, 1)
        self.conv9 = ConvLayer(256, 512)
        self.conv10 = ConvLayer(
                    512, (3 * (5+num_classes)), 1, 1,
                    use_batch_norm=False,
                    activation="linear"
                )
        self.yolo1 = DetectionLayer(
                    num_classes=num_classes, anchors=anchors,
                    anchor_masks=(3, 4, 5), device=device, layer=1
                )
        # self.__route_layer(conv8)
        self.conv11 = ConvLayer(256, 128, 1, 1)
        self.upsample1 = nn.Upsample(
                    scale_factor=2, mode="nearest"
                    #align_corners=True
                )
        # self.__route_layer(upsample1, conv5)
        self.conv12 = ConvLayer(384, 256)
        self.conv13 = ConvLayer(
                    256, (3 * (5 + num_classes)), 1, 1,
                    use_batch_norm=False,
                    activation="linear"
                )
        self.yolo2 = DetectionLayer(
                    num_classes=num_classes, anchors=anchors,
                    anchor_masks=(0, 1, 2), device=device, layer=2
                )
    
    def get_loss_layers(self) -> List[torch.Tensor]:
        return [self.yolo1, self.yolo2]

    def __route_layer(self, y1 : torch.Tensor, y2 : Optional[torch.Tensor]=None) -> torch.Tensor:
        if isinstance(y2, torch.Tensor):
            return torch.cat([y1, y2], 1)
        return y1

    def forward(self, x : torch.Tensor) -> torch.Tensor:
        y = self.maxpool2(self.conv2(self.maxpool1(self.conv1(x))))
        y = self.maxpool4(self.conv4(self.maxpool3(self.conv3(y))))
        r1 = self.conv5(y) # route layer
        y = self.zeropad(self.conv6(self.maxpool5(r1)))
        y = self.conv7(self.maxpool6(y))
        r2 = self.conv8(y) # route layer
        y = self.conv10(self.conv9(r2))

        # first detection layer
        out = self.yolo1(y)
        y = self.conv11(self.__route_layer(r2))
        y = self.__route_layer(self.upsample1(y), r1)
        y = self.conv13(self.conv12(y))
        
        # second detection layer
        out = torch.cat([out, self.yolo2(y)], 1)

        return out


================================================
FILE: imageai/yolov3/utils.py
================================================
import math
from typing import Union, List, Tuple

import torch
import numpy as np
import cv2 as cv
from torchvision.ops import batched_nms


def draw_bbox_and_label(x : torch.Tensor, label : str, img : np.ndarray) -> np.ndarray:
    """
    Draws the predicted bounding boxes on the original image.
    """
    x1,y1,x2,y2 = tuple(map(int, x))
    if x is not None:
        img = cv.rectangle(img, (x1,y1), (x2,y2), (0, 255, 0), 1)
    t_size = cv.getTextSize(label, cv.FONT_HERSHEY_PLAIN, 1, 1)[0]
    c2 = (x1 + t_size[0] + 3, y1 + t_size[1] + 4)
    img = cv.putText(img, label, (x1, y1+t_size[1]+4), cv.FONT_HERSHEY_PLAIN, 1, (0,0,255), 1)

    return img 

def letterbox_image(
        image : np.ndarray,
        inp_dim : Tuple[int, int]) -> np.ndarray:
    """
    Resizes images into the dimension expected by the network. This
    function fills extra spaces in the image with grayscale, if the
    image is smaller than the expected dimesion. This implementation
    keeps the aspect ration of the original image.
    """
    img_w, img_h = image.shape[1], image.shape[0] # original image dimension
    net_w, net_h = inp_dim # the dimension expected by the network.

    # calculate the new dimension with same aspect ration as
    # the original image.
    scale_factor = min(net_w/img_w, net_h/img_h)
    new_w = int(round(img_w * scale_factor))
    new_h = int(round(img_h * scale_factor))

    resized_image = cv.resize(image, (new_w, new_h), interpolation=cv.INTER_CUBIC)
    canvas = np.full((net_w, net_h, 3), 128)
    canvas[(net_h - new_h)//2 : (net_h - new_h)//2 + new_h, (net_w - new_w)//2 : (net_w - new_w)//2 + new_w, :] = resized_image
    return canvas

def prepare_image(
        image : np.ndarray,
        inp_dim : Tuple[int, int]) -> torch.Tensor:
    """
    Prepared the input to match the expectation of the network.
    """
    img = letterbox_image(image, inp_dim)
    img = img[:, :, ::-1].transpose((2, 0, 1)).copy()
    img = torch.from_numpy(img).float().div(255.0).unsqueeze(0)
    return img

def bbox_iou(bbox1 : torch.Tensor, bbox2 : torch.Tensor, device="cpu"):
    """
    Returns the IoU value of overlapping boxes
    """
    b1_x1, b1_y1, b1_x2, b1_y2 = bbox1[:, 0], bbox1[:, 1], bbox1[:, 2], bbox1[:, 3]
    b2_x1, b2_y1, b2_x2, b2_y2 = bbox2[:, 0], bbox2[:, 1], bbox2[:, 2], bbox2[:, 3]

    # intersections
    inter_rect_x1 = torch.max(b1_x1, b2_x1)
    inter_rect_y1 = torch.max(b1_y1, b2_y1)
    inter_rect_x2 = torch.min(b1_x2, b2_x2)
    inter_rect_y2 = torch.min(b1_y2, b2_y2)
    inter_area = torch.max(inter_rect_x2 - inter_rect_x1+1, torch.zeros(inter_rect_x2.shape, device=device)) * \
                torch.max(inter_rect_y2 - inter_rect_y1+1, torch.zeros(inter_rect_y2.shape, device=device))

    b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
    b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
    
    return inter_area / (b1_area + b2_area - inter_area)

def transform_prediction(
        pred : torch.Tensor,
        inp_dim : int,
        anchors : Union[List[int], Tuple[int, ...], torch.Tensor],
        num_classes : int,
        device : str = "cpu"
        ) -> torch.Tensor:
    """
    Transforms the predictions of the convolutional layers
    from
        batch_size x (3 * 5+num_classes) x grid_size x grid_size
    to
        batch_size x (grid_size * grid_size * anchors) x num_classes
    aids the concatenation of the prediction at the three detection layers
    and also for easy representation of the predicted bounding boxes.

    Also, transforms the bounding box predictions and the objectness score
    to match the discription specified in the paper:
        Bx = sigmoid(Tx) + Cx
        By = sigmoid(Ty) + Cy
        Bw = Pw(exp(Tw))
        Bh = Ph(exp(Th))

    Parameters:
    -----------
        pred:           prediction of the convolutional layer
        inp_dim:        the dimension of images expected by the yolo neural network
        anchors:        a list of anchors
        num_classes:    the numbers of unique classes as specified by COCO.

    Returns:
    --------
        the transformed input.
    """
    batch_size = pred.shape[0]
    grid_size = pred.shape[2]
    stride = inp_dim // grid_size
    bbox_attrs = 5 + num_classes
    num_anchors = len(anchors)

    # transform input shape
    pred = pred.view(batch_size, bbox_attrs*num_anchors, grid_size*grid_size)
    pred = pred.transpose(1, 2).contiguous()
    pred = pred.view(batch_size, grid_size*grid_size*num_anchors, bbox_attrs)

    # since the dimensions of the anchors are in accordance with the original
    # dimension of the image, it's required to scale the dimension of the
    # anchors to match the dimension of the output of the convolutional
    # layer
    anchors = [(a[0] / stride, a[1] / stride) for a in anchors]

    # sigmoid the center_x, center_y and the objectness score
    pred[:, :, 0] = torch.sigmoid(pred[:, :, 0])
    pred[:, :, 1] = torch.sigmoid(pred[:, :, 1])
    pred[:, :, 4] = torch.sigmoid(pred[:, :, 4])

    # add the center offsets
    grid = torch.arange(grid_size, dtype=torch.float)
    grid = np.arange(grid_size)
    x_o, y_o = np.meshgrid(grid, grid)
    #x_offset, y_offset = torch.meshgrid(grid, grid)

    x_offset = torch.FloatTensor(x_o).view(-1, 1).to(device)
    y_offset = torch.FloatTensor(y_o).view(-1, 1).to(device)
    #x_offset = x_offset.transpose(0,1).reshape(-1,1).to(device)
    #y_offset = y_offset.transpose(0,1).reshape(-1,1).to(device)

    x_y_offset = torch.cat([x_offset, y_offset], dim=1).repeat(1, num_anchors).view(-1,2).unsqueeze(0)
    pred[:, :, :2] += x_y_offset
    
    # transform height and width
    anchors = torch.FloatTensor(anchors).to(device)
    anchors = anchors.repeat(grid_size*grid_size, 1).unsqueeze(0)
    pred[:, :, 2:4] = torch.exp(pred[:, :, 2:4])*anchors

    # apply sigmoid to class scores
    pred[:, :, 5:5+num_classes] = torch.sigmoid(pred[:, :, 5:5+num_classes])

    # resize bounding box prediction to the original image dimension
    pred[:, :, :4] *= stride

    return pred

def get_predictions(
        pred : torch.Tensor,
        num_classes : int,
        objectness_confidence : float = 0.5,
        nms_confidence_level : float = 0.4,
        device : str = "cpu") -> Union[torch.Tensor, int]:
    """
    This function filters the bounding boxes predicted by the network by first
    discarding bounding boxes that has low objectness score, and then proceeds
    to filter overlapping bounding boxes using the non-maximum suppression
    algorithm.

    Parameters:
    -----------
        pred:           a tensor (predicted output) of shape 
                        'batch_size x num_bboxes x bbox_attrs'
        num_classes:    the number of unique classes as provided by COCO.
        objectness_confidence_level:    probability threshold for bounding boxes
                                        containing a valid object.
        nms_convidence_level:           threshold for overlapping bounding boxes

    Returns:
    --------
        The prediction with reasonable bounding boxes.
    """
    nB = pred.shape[0] # number of batches
    bbox_attr = pred.shape[2] # center_x, center_y, height, width, class_probabilites
    nBBOX = pred.shape[1] # number of bounding boxes
    conf_mask = (pred[:, :, 4] > objectness_confidence).float().unsqueeze(2)
    pred = pred * conf_mask

    # transform the predicted centers, height and width to top-left corner and
    # right bottom corner coordinates to aid the ease computation of the IoU
    bbox_corner = pred.new(pred.shape)
    bbox_corner[:, :, 0] = (pred[:, :, 0] - (pred[:, :, 2] / 2)) # top-left_x
    bbox_corner[:, :, 1] = (pred[:, :, 1] - (pred[:, :, 3] / 2)) # top-left_y
    bbox_corner[:, :, 2] = (pred[:, :, 0] + (pred[:, :, 2] / 2)) # bottom_right_x
    bbox_corner[:, :, 3] = (pred[:, :, 1] + (pred[:, :, 3] / 2)) # bottom_right_y
    pred[:, :, :4] = bbox_corner[:, :, :4]

    n_pred = pred.view(-1, bbox_attr)
    idxs = torch.arange(nB).reshape(-1,1).repeat(1, nBBOX).view(-1).to(device) # image indices

    max_conf, max_idx = torch.max(n_pred[:, 5:5+num_classes], 1) # maximum class score and the index
    max_conf = max_conf.float().unsqueeze(1).to(device)
    max_idx = max_idx.float().unsqueeze(1).to(device)
    n_pred = torch.cat([idxs.unsqueeze(1), n_pred[:, :5], max_conf, max_idx], 1) # batch_idx, x1, y1, x2, y2, objectness_score, class_score, class_idx

    valid_bbox_indices = batched_nms(n_pred[:, 1:5].clone(), n_pred[:, 5].clone(), n_pred[:, 7].clone(), nms_confidence_level)

    if len(valid_bbox_indices):
        return n_pred[valid_bbox_indices, :]
    return None


================================================
FILE: imageai/yolov3/yolov3.py
================================================
from typing import Union, List, Tuple, Optional

import torch
import torch.nn as nn
import numpy as np

from .utils import transform_prediction


def noop(x):
    return x

class DetectionLayer(nn.Module):

    def __init__(
            self,
            anchors : Union[List[int], Tuple[int, ...]],
            anchor_masks : Tuple[int, int, int],
            layer : int,
            num_classes : int=80,
            device : str="cpu"
        ):
        super().__init__()
        self.height = 416
        self.width = 416
        self.num_classes = num_classes
        self.ignore_thresh = 0.7
        self.truth_thresh = 1
        self.rescore = 1
        self.device = device
        self.anchors = self.__get_anchors(anchors, anchor_masks)
        self.layer = layer
        self.layer_width = None
        self.layer_height = None
        self.layer_output = None
        self.pred = None
        self.stride = None
        self.grid = None
        self.anchor_grid = None

    def __get_anchors(
                self, anchors : Union[List[int], Tuple[int, ...]],
                anchor_masks : Tuple[int, int, int]
            ) -> torch.Tensor:
        a = [(anchors[i], anchors[i+1]) for i in range(0, len(anchors), 2)]
        return torch.tensor([a[i] for i in anchor_masks]).to(self.device)

    def forward(self, x : torch.Tensor):
        self.layer_height, self.layer_width = x.shape[2], x.shape[3]
        self.stride = self.height // self.layer_height
        if self.training:
            batch_size = x.shape[0]
            grid_size = x.shape[2]
            bbox_attrs = 5 + self.num_classes
            num_anchors = len(self.anchors)

            # transform input shape
            self.layer_output = x.detach()
            self.pred = x.view(batch_size, num_anchors, bbox_attrs, grid_size, grid_size).permute(0, 1, 3, 4, 2).contiguous()
            
            self.layer_output = self.layer_output.view(batch_size, bbox_attrs*num_anchors, grid_size*grid_size)
            self.layer_output = self.layer_output.transpose(1, 2).contiguous()
            self.layer_output = self.layer_output.view(batch_size, grid_size*grid_size*num_anchors, bbox_attrs)

        else:
            # transform the output of the network and scale it to match the
            # network dimension : 416x416
            self.layer_output =  transform_prediction(
                        x.data, self.width, self.anchors, self.num_classes,
                        self.device
                    )
        return self.layer_output


class ConvLayer(nn.Module):

    def __init__(self, in_f : int, out_f : int, kernel_size : int = 3,
                stride : int = 1, use_batch_norm : bool = True,
                activation : str ="leaky"):
        super().__init__()
        self.conv = nn.Conv2d(
                in_f, out_f, stride=stride, kernel_size=kernel_size,
                padding= kernel_size//2,
                bias=False if use_batch_norm else True
            )
        self.batch_norm = nn.BatchNorm2d(out_f) if use_batch_norm else noop
        self.leaky_relu = nn.LeakyReLU(0.1, inplace=True) if activation=="leaky" else noop

    def forward(self, x : torch.Tensor):
        return self.leaky_relu(self.batch_norm(self.conv(x)))

class YoloV3(nn.Module):

    def __init__(
            self,
            anchors : Union[List[int], Tuple[int, ...]],
            num_classes : int = 80,
            device : str ="cpu"):
        super().__init__()

        # Network Layers
        self.conv1 = ConvLayer(3, 32)
        self.conv2 = ConvLayer(32, 64, stride=2)
        self.conv3 = ConvLayer(64, 32, 1, 1)
        self.conv4 = ConvLayer(32, 64)
        # self.__shortcut_layer1(self.conv4, self.conv2)
        self.conv5 = ConvLayer(64, 128, stride=2)
        self.conv6 = ConvLayer(128, 64, 1, 1)
        self.conv7 = ConvLayer(64, 128, stride=1)
        # self.__shortcut_layer2(self.conv7, self.conv5)
        self.conv8 = ConvLayer(128, 64, 1, 1)
        self.conv9 = ConvLayer(64, 128, stride=1)
        # self.__shortcut_layer3(self.conv9, shortcut2)
        self.conv10 = ConvLayer(128, 256, stride=2)
        self.conv11 = ConvLayer(256, 128, 1, 1)
        self.conv12 = ConvLayer(128, 256)
        # self.__shortcut_layer4(self.con12, self.conv10)
        self.conv13 = ConvLayer(256, 128, 1, 1)
        self.conv14 = ConvLayer(128, 256)
        # self.__shortcut_layer5(self.conv14, shortcut4)
        self.conv15 = ConvLayer(256, 128, 1, 1)
        self.conv16 = ConvLayer(128, 256)
        # self.__shortcut_layer6(self.conv16, shortcut5)
        self.conv17 = ConvLayer(256, 128, 1, 1)
        self.conv18 = ConvLayer(128, 256)
        # self.__shortcut_layer7(self.conv18, shortcut6)
        self.conv19 = ConvLayer(256, 128, 1, 1)
        self.conv20 = ConvLayer(128, 256)
        # self.__shortcut_layer8(self.conv20, shortcut7)
        self.conv21 = ConvLayer(256, 128, 1, 1)
        self.conv22 = ConvLayer(128, 256)
        # self.__shortcut_layer9(self.conv22, shortcut8)
        self.conv23 = ConvLayer(256, 128, 1, 1)
        self.conv24 = ConvLayer(128, 256)
        # self.__shortcut_layer10(self.conv24, shortcut9)
        self.conv25 = ConvLayer(256, 128, 1, 1)
        self.conv26 = ConvLayer(128, 256)
        # self.__shortcut_layer11(self.conv26, shortcut10)
        self.conv27 = ConvLayer(256, 512, stride=2)
        self.conv28 = ConvLayer(512, 256, 1, 1)
        self.conv29 = ConvLayer(256, 512)
        # self.__shortcut_layer12(self.conv29, self.conv27)
        self.conv30 = ConvLayer(512, 256, 1, 1)
        self.conv31 = ConvLayer(256, 512)
        # self.__shortcut_layer13(self.conv31, shortcut12)
        self.conv32 = ConvLayer(512, 256, 1, 1)
        self.conv33 = ConvLayer(256, 512)
        # self.__shortcut_layer14(self.conv33, shortcut13)
        self.conv34 = ConvLayer(512, 256, 1, 1)
        self.conv35 = ConvLayer(256, 512)
        # self.__shortcut_layer15(self.conv35, shortcut14)
        self.conv36 = ConvLayer(512, 256, 1, 1)
        self.conv37 = ConvLayer(256, 512)
        # self.__shortcut_layer16(self.conv37, shortcut15)
        self.conv38 = ConvLayer(512, 256, 1, 1)
        self.conv39 = ConvLayer(256, 512)
        # self.__shortcut_layer17(self.conv39, shortcut16)
        self.conv40 = ConvLayer(512, 256, 1, 1)
        self.conv41 = ConvLayer(256, 512)
        # self.__shortcut_layer18(self.conv41, shortcut17)
        self.conv42 = ConvLayer(512, 256, 1, 1)
        self.conv43 = ConvLayer(256, 512)
        # self.__shortcut_layer19(self.conv43, shortcut18)
        self.conv44 = ConvLayer(512, 1024, stride=2)
        self.conv45 = ConvLayer(1024, 512, 1, 1)
        self.conv46 = ConvLayer(512, 1024)
        # self.__shortcut_layer20(self.conv46, self.conv44)
        self.conv47 = ConvLayer(1024, 512, 1, 1)
        self.conv48 = ConvLayer(512, 1024)
        # self.__shortcut_layer21(self.conv48, shortcut20)
        self.conv49 = ConvLayer(1024, 512, 1, 1)
        self.conv50 = ConvLayer(512, 1024)
        # self.__shortcut_layer22(self.conv50, shortcut21)
        self.conv51 = ConvLayer(1024, 512, 1, 1)
        self.conv52 = ConvLayer(512, 1024)
        # self.__shortcut_layer23(self.conv52, shortcut22)
        self.conv53 = ConvLayer(1024, 512, 1, 1)
        self.conv54 = ConvLayer(512, 1024)
        self.conv55 = ConvLayer(1024, 512, 1, 1)
        self.conv56 = ConvLayer(512, 1024)
        self.conv57 = ConvLayer(1024, 512, 1, 1)
        self.conv58 = ConvLayer(512, 1024)
        self.conv59 = ConvLayer(
                    1024, (3 * (5 + num_classes)), 1, 1, use_batch_norm=False,
                    activation="linear"
                )

        # yolo layer
        self.yolo1 = DetectionLayer(
                    num_classes=num_classes, anchors=anchors,
                    anchor_masks=(6, 7, 8), device=device, layer=1
                )

        # self.__route_layer(self.conv57)
        self.conv60 = ConvLayer(512, 256, 1, 1)
        self.upsample1 = nn.Upsample(
                    scale_factor=2, mode="nearest"
                    #align_corners=True
                )
        # self.__route_layer(self.upsample1, shortcut19)
        self.conv61 = ConvLayer(768, 256, 1, 1)
        self.conv62 = ConvLayer(256, 512)
        self.conv63 = ConvLayer(512, 256, 1, 1)
        self.conv64 = ConvLayer(256, 512)
        self.conv65 = ConvLayer(512, 256, 1, 1)
        self.conv66 = ConvLayer(256, 512)
        self.conv67 = ConvLayer(
                    512, (3 * (5 + num_classes)), 1, 1, use_batch_norm=False,
                    activation="linear"
                )
        
        # yolo layer
        self.yolo2 = DetectionLayer(
                    num_classes=num_classes, anchors=anchors,
                    anchor_masks=(3, 4, 5), device=device, layer=2
                )
        
        # self.__route_layer(self.conv65)
        self.conv68 = ConvLayer(256, 128, 1, 1)
        self.upsample2 = nn.Upsample(
                    scale_factor=2, mode="nearest"
                    #align_corners=True
                )
        # self.__route_layer(self.upsample2, shortcut11)

        self.conv69 = ConvLayer(384, 128, 1, 1)
        self.conv70 = ConvLayer(128, 256)
        self.conv71 = ConvLayer(256, 128, 1, 1)
        self.conv72 = ConvLayer(128, 256)
        self.conv73 = ConvLayer(256, 128, 1, 1)
        self.conv74 = ConvLayer(128, 256)
        self.conv75 = ConvLayer(
                    256, (3 * (5 + num_classes)), 1, 1, use_batch_norm=False,
                    activation="linear"
                )

        # yolo layer
        self.yolo3 = DetectionLayer(
                    num_classes=num_classes, anchors=anchors,
                    anchor_masks=(0, 1, 2), device=device, layer=3
                )
    
    def get_loss_layers(self) -> List[torch.Tensor]:
        return [self.yolo1, self.yolo2, self.yolo3]

    def __route_layer(self, y1 : torch.Tensor, y2 : Optional[torch.Tensor]=None):
        if isinstance(y2, torch.Tensor):
            return torch.cat([y1, y2], 1)
        return y1

    def __shortcut_layer(self,
                         y1 : torch.Tensor, y2 : torch.Tensor,
                         activation : str="linear"
                        ) -> torch.Tensor:
        actv = noop if activation=="linear" else nn.LeakyReLU(0.1)
        return actv(y1 + y2)

    def forward(self, x : torch.Tensor) -> torch.Tensor:
        y = self.conv2(self.conv1(x))
        # shortcut1
        y = self.conv5(self.__shortcut_layer(self.conv4(self.conv3(y)), y))
        y2 = self.conv7(self.conv6(y))
        # shortcut2
        y = self.__shortcut_layer(y2, y)
        y2 = self.conv9(self.conv8(y))
        # shortcut3
        y2 = self.conv10(self.__shortcut_layer(y2, y))
        y = self.conv12(self.conv11(y2))
        # shortcut4
        y2 = self.__shortcut_layer(y, y2)
        y = self.conv14(self.conv13(y2))
        # shortcut5
        y2 = self.__shortcut_layer(y, y2)
        y = self.conv16(self.conv15(self.__shortcut_layer(y2, y)))
        # shortcut6
        y2 = self.__shortcut_layer(y, y2)
        y = self.conv18(self.conv17(y2))
        # shortcut7
        y2 = self.__shortcut_layer(y, y2)
        y = self.conv20(self.conv19(y2))
        # shortcut8
        y2 = self.__shortcut_layer(y, y2)
        y = self.conv22(self.conv21(y2))
        # shortcut9
        y2 = self.__shortcut_layer(y, y2)
        y = self.conv24(self.conv23(y2))
        # shortcut10
        y2 = self.__shortcut_layer(y, y2)
        y = self.conv26(self.conv25(y2))
        # shortcut11
        r1 = self.__shortcut_layer(y, y2) # route_layer
        y = self.conv27(r1)
        y2 = self.conv29(self.conv28(y))
        # shortcut12
        y = self.__shortcut_layer(y2, y)
        y2 = self.conv31(self.conv30(y))
        # shortcut13
        y = self.__shortcut_layer(y2, y)
        y2 = self.conv33(self.conv32(y))
        # shortcut14
        y = self.__shortcut_layer(y2, y)
        y2 = self.conv35(self.conv34(y))
        # shortcut15
        y = self.__shortcut_layer(y2, y)
        y2 = self.conv37(self.conv36(y))
        # shortcut16
        y = self.__shortcut_layer(y2, y)
        y2 = self.conv39(self.conv38(y))
        # shortcut17
        y = self.__shortcut_layer(y2, y)
        y2 = self.conv41(self.conv40(y))
        # shortcut18
        y = self.__shortcut_layer(y2, y)
        y2 = self.conv43(self.conv42(y))
        # shortcut19
        r2 = self.__shortcut_layer(y2, y) # route_layer
        y2 = self.conv44(r2)
        y = self.conv46(self.conv45(y2))
        # shortcut20
        y2 = self.__shortcut_layer(y, y2)
        y = self.conv48(self.conv47(y2))
        # shortcut21
        y2 = self.__shortcut_layer(y, y2)
        y = self.conv50(self.conv49(y2))
        # shortcut22
        y2 = self.__shortcut_layer(y, y2)
        y = self.conv52(self.conv51(y2))
        # shortcut23
        y2 = self.__shortcut_layer(y, y2)
        y = self.conv54(self.conv53(y2))
        r3 = self.conv57(self.conv56(self.conv55(y))) # route_layer
        y = self.conv59(self.conv58(r3))

        # first detection layer
        out = self.yolo1(y)
        y = self.conv60(self.__route_layer(r3))
        y = self.conv62(self.conv61(self.__route_layer(self.upsample1(y), r2)))
        r4 = self.conv65(self.conv64(self.conv63(y))) # route_layer
        y = self.conv67(self.conv66(r4))

        # second detection layer
        out = torch.cat([out, self.yolo2(y)], dim=1)
        y = self.conv68(self.__route_layer(r4))
        y = self.conv70(self.conv69(self.__route_layer(self.upsample2(y), r1)))
        y = self.conv75(self.conv74(self.conv73(self.conv72(self.conv71(y)))))

        # third detection layer
        out = torch.cat([out, self.yolo3(y)], dim=1)

        return out


================================================
FILE: imageai_tf_deprecated/Classification/CUSTOMCLASSIFICATION.md
================================================
# ImageAI : Custom Image Classification
A **DeepQuest AI** project <a href="https://deepquestai.com" >https://deepquestai.com </a></p>

---

ImageAI provides 4 different algorithms and model types to perform custom image prediction using your custom models.
You will be able to use your model trained with **ImageAI** and the corresponding model_class JSON file to predict custom objects
that you have trained the model on.

### TABLE OF CONTENTS

- <a href="#customprediction" > :white_square_button: Custom Model Prediction</a>
- <a href="#custompredictionfullmodel" > :white_square_button: Custom Model Prediction with Full Model (NEW)</a>
- <a href="#custompredictionmultiple" > :white_square_button: Custom Prediction with multiple models (NEW)</a>
- <a href="#converttensorflow" > :white_square_button: Convert custom model to Tensorflow's format (NEW)</a>
- <a href="#convertdeepstack" > :white_square_button: Convert custom model to DeepStack's format (NEW)</a>


### Custom Model Prediction
<div id="customprediction"></div>

In this example, we will be using the model trained for 20 experiments on **IdenProf**, a dataset of uniformed professionals and achieved 65.17% accuracy on the test dataset.
(You can use your own trained model and generated JSON file. This 'class' is provided mainly for the purpose to use your own custom models.)
Download the ResNet model of the model and JSON files in links below:

- [**ResNet50**](https://github.com/OlafenwaMoses/ImageAI/releases/download/essentials-v5/idenprof_resnet_ex-056_acc-0.993062.h5) _(Size = 90.4 mb)_
- [**IdenProf model_class.json file**](https://github.com/OlafenwaMoses/ImageAI/releases/download/essentials-v5/idenprof.json)

Great!
Once you have downloaded this model file and the JSON file, start a new python project, and then copy the model file and the JSON file to your project folder where your python files (.py files) will be.
Download the image below, or take any image on your computer that include any of the following professionals(Chef, Doctor, Engineer, Farmer, Fireman, Judge, Mechanic, Pilot, Police and Waiter) and copy it to your python project's folder.
Then create a python file and give it a name; an example is **FirstCustomPrediction.py**.
Then write the code below into the python file:

### FirstCustomPrediction.py

```python
from imageai.Classification.Custom import CustomImageClassification
import os

execution_path = os.getcwd()

prediction = CustomImageClassification()
prediction.setModelTypeAsResNet50()
prediction.setModelPath(os.path.join(execution_path, "idenprof_resnet_ex-056_acc-0.993062.h5"))
prediction.setJsonPath(os.path.join(execution_path, "idenprof.json"))
prediction.loadModel(num_objects=10)

predictions, probabilities = prediction.classifyImage(os.path.join(execution_path, "4.jpg"), result_count=5)

for eachPrediction, eachProbability in zip(predictions, probabilities):
    print(eachPrediction + " : " + eachProbability)
```

**Sample Result:**

![Sample Result](../../data-images/4.jpg)
```
mechanic : 76.82620286941528
chef : 10.106072574853897
waiter : 4.036874696612358
police : 2.6663416996598244
pilot : 2.239348366856575
```

The code above works as follows:
```python
from imageai.Classification.Custom import CustomImageClassification
import os
```
The code above imports the **ImageAI** library for custom image prediction and the python **os** class.

```python
execution_path = os.getcwd()
```

The above line obtains the path to the folder that contains your python file (in this example, your FirstCustomPrediction.py).

```python
prediction = CustomImageClassification()
prediction.setModelTypeAsResNet50()
prediction.setModelPath(os.path.join(execution_path, "idenprof_resnet_ex-056_acc-0.993062.h5"))
prediction.setJsonPath(os.path.join(execution_path, "idenprof.json"))
prediction.loadModel(num_objects=10)
```

In the lines above, we created and instance of the `CustomImageClassification()`
 class in the first line, then we set the model type of the prediction object to ResNet by caling the `.setModelTypeAsResNet50()`
  in the second line, we set the model path of the prediction object to the path of the custom model file (`idenprof_resnet_ex-056_acc-0.993062.h5`) we copied to the python file folder
  in the third line, we set the path to  the model_class.json of the model, we load the model and parse the number of objected that can be predicted in the model.

```python
predictions, probabilities = prediction.classifyImage(os.path.join(execution_path, "4.jpg"), result_count=5)
```

In the above line, we defined 2 variables to be equal to the function called to predict an image, which is the `.classifyImage()` function, into which we parsed the path to our image and also state the number of prediction results we want to have (values from 1 to 10 in this case) parsing `result_count=5`. The `.classifyImage()` function will return 2 array objects with the first (**predictions**) being an array of predictions and the second (**percentage_probabilities**) being an array of the corresponding percentage probability for each prediction.

```python
for eachPrediction, eachProbability in zip(predictions, probabilities):
    print(eachPrediction + " : " + eachProbability)
```

The above line obtains each object in the **predictions** array, and also obtains the corresponding percentage probability from the **percentage_probabilities**, and finally prints the result of both to console.

**CustomImageClassification** class also supports the multiple predictions, input types and prediction speeds that are contained
in the **ImageClassification** class. Follow this [link](README.md) to see all the details.


### Custom Prediction with multiple models
<div id="custompredictionmultiple"></div>


In previous versions of **ImageAI**, running more than one custom model at once wasn't supported.
Now you can run multiple custom models, as many as your computer memory can accommodate.
See the example code below for running multiple custom prediction models.

```python
from imageai.Classification.Custom import CustomImageClassification
import os

execution_path = os.getcwd()

predictor = CustomImageClassification()
predictor.setModelPath(model_path=os.path.join(execution_path, "idenprof_resnet.h5"))
predictor.setJsonPath(model_json=os.path.join(execution_path, "idenprof.json"))
predictor.setModelTypeAsResNet50()
predictor.loadModel(num_objects=10)

predictor2 = CustomImageClassification()
predictor2.setModelPath(model_path=os.path.join(execution_path, "idenprof_inception_0.719500.h5"))
predictor2.setJsonPath(model_json=os.path.join(execution_path, "idenprof.json"))
predictor2.setModelTypeAsInceptionV3()
predictor2.loadModel(num_objects=10)

results, probabilities = predictor.classifyImage(image_input=os.path.join(execution_path, "9.jpg"), result_count=5)
print(results)
print(probabilities)


results2, probabilities2 = predictor3.classifyImage(image_input=os.path.join(execution_path, "9.jpg"),
                                                       result_count=5)
print(results2)
print(probabilities2)
print("-------------------------------")
```

### Documentation

We have provided full documentation for all **ImageAI** classes and functions in 3 major languages. Find links below:**

* Documentation - **English Version  [https://imageai.readthedocs.io](https://imageai.readthedocs.io)**
* Documentation - **Chinese Version  [https://imageai-cn.readthedocs.io](https://imageai-cn.readthedocs.io)**
* Documentation - **French Version  [https://imageai-fr.readthedocs.io](https://imageai-fr.readthedocs.io)**


================================================
FILE: imageai_tf_deprecated/Classification/CUSTOMTRAINING.md
================================================
# ImageAI : Custom Prediction Model Training 

---

**ImageAI** provides the most simple and powerful approach to training custom image prediction models
using state-of-the-art SqueezeNet, ResNet50, InceptionV3 and DenseNet
which you can load into the `imageai.Classification.Custom.CustomImageClassification` class. This allows
 you to train your own model on any set of images that corresponds to any type of objects/persons.
The training process generates a JSON file that maps the objects types in your image dataset
and creates lots of models. You will then pick the model with the highest accuracy and perform custom
image prediction using the model and the JSON file generated.

### TABLE OF CONTENTS
- <a href="#customtraining" > :white_square_button: Custom Model Training Prediction</a> 
- <a href="#savefullmodel" > :white_square_button: Saving Full Custom Model </a> 
- <a href="#idenproftraining" > :white_square_button: Training on the IdenProf Dataset</a> 
- <a href="#continuoustraining" > :white_square_button: Continuous Model Training </a> 
- <a href="#transferlearning" > :white_square_button: Transfer Learning (Training from a pre-trained model)</a>


### Custom Model Training
<div id="customtraining"></div>

Because model training is a compute intensive tasks, we strongly advise you perform this experiment using a computer with a NVIDIA GPU and the GPU version of Tensorflow installed. Performing model training on CPU will my take hours or days. With NVIDIA GPU powered computer system, this will take a few hours.  You can use Google Colab for this experiment as it has an NVIDIA K80 GPU available.

To train a custom prediction model, you need to prepare the images you want to use to train the model.
You will prepare the images as follows:

1. Create a dataset folder with the name you will like your dataset to be called (e.g pets) 
2. In the dataset folder, create a folder by the name **train** 
3. In the dataset folder, create a folder by the name **test** 
4. In the train folder, create a folder for each object you want to the model to predict and give the folder a name that corresponds to the respective object name (e.g dog, cat, squirrel, snake) 
5. In the test folder, create a folder for each object you want to the model to predict and give
 the folder a name that corresponds to the respective object name (e.g dog, cat, squirrel, snake) 
6. In each folder present in the train folder, put the images of each object in its respective folder. This images are the ones to be used to train the model To produce a model that can perform well in practical applications, I recommend you about 500 or more images per object. 1000 images per object is just great 
7. In each folder present in the test folder, put about 100 to 200 images of each object in its respective folder. These images are the ones to be used to test the model as it trains 
8. Once you have done this, the structure of your image dataset folder should look like below:  
    ```
    pets//train//dog//dog-train-images
    pets//train//cat//cat-train-images
    pets//train//squirrel//squirrel-train-images
    pets//train//snake//snake-train-images 
    pets//test//dog//dog-test-images
    pets//test//cat//cat-test-images
    pets//test//squirrel//squirrel-test-images
    pets//test//snake//snake-test-images
    ```
9. Then your training code goes as follows:  
    ```python
    from imageai.Classification.Custom import ClassificationModelTrainer
    model_trainer = ClassificationModelTrainer()
    model_trainer.setModelTypeAsResNet50()
    model_trainer.setDataDirectory("pets")
    model_trainer.trainModel(num_objects=4, num_experiments=100, enhance_data=True, batch_size=32, show_network_summary=True)
    ```

 Yes! Just 5 lines of code and you can train any of the available 4 state-of-the-art Deep Learning algorithms on your custom dataset.
Now lets take a look at how the code above works.

```python
from imageai.Classification.Custom import ClassificationModelTrainer
model_trainer = ClassificationModelTrainer()
model_trainer.setModelTypeAsResNet50()
model_trainer.setDataDirectory("pets")
```

In the first line, we import the **ImageAI** model training class, then we define the model trainer in the second line,
 we set the network type in the third line and set the path to the image dataset we want to train the network on.

```python
model_trainer.trainModel(num_objects=4, num_experiments=100, enhance_data=True, batch_size=32, show_network_summary=True)
```

In the code above, we start the training process. The parameters stated in the function are as below:
- **num_objects** : this is to state the number of object types in the image dataset 
- **num_experiments** : this is to state the number of times the network will train over all the training images,
 which is also called epochs 
- **enhance_data (optional)** : This is used to state if we want the network to produce modified copies of the training
images for better performance. 
- **batch_size** : This is to state the number of images the network will process at ones. The images
 are processed in batches until they are exhausted per each experiment performed. 
- **show_network_summary** : This is to state if the network should show the structure of the training
 network in the console.
 

When you start the training, you should see something like this in the console:
```
Total params: 23,608,202
Trainable params: 23,555,082
Non-trainable params: 53,120
____________________________________________________________________________________________________
Using Enhanced Data Generation
Found 4000 images belonging to 4 classes.
Found 800 images belonging to 4 classes.
JSON Mapping for the model classes saved to  C:\Users\User\PycharmProjects\ImageAITest\pets\json\model_class.json
Number of experiments (Epochs) :  100
```

When the training progress progresses, you will see results as follows in the console: 
```
Epoch 1/100
 1/25 [>.............................] - ETA: 52s - loss: 2.3026 - acc: 0.2500
 2/25 [=>............................] - ETA: 41s - loss: 2.3027 - acc: 0.1250
 3/25 [==>...........................] - ETA: 37s - loss: 2.2961 - acc: 0.1667
 4/25 [===>..........................] - ETA: 36s - loss: 2.2980 - acc: 0.1250
 5/25 [=====>........................] - ETA: 33s - loss: 2.3178 - acc: 0.1000
 6/25 [======>.......................] - ETA: 31s - loss: 2.3214 - acc: 0.0833
 7/25 [=======>......................] - ETA: 30s - loss: 2.3202 - acc: 0.0714
 8/25 [========>.....................] - ETA: 29s - loss: 2.3207 - acc: 0.0625
 9/25 [=========>....................] - ETA: 27s - loss: 2.3191 - acc: 0.0556
10/25 [===========>..................] - ETA: 25s - loss: 2.3167 - acc: 0.0750
11/25 [============>.................] - ETA: 23s - loss: 2.3162 - acc: 0.0682
12/25 [=============>................] - ETA: 21s - loss: 2.3143 - acc: 0.0833
13/25 [==============>...............] - ETA: 20s - loss: 2.3135 - acc: 0.0769
14/25 [===============>..............] - ETA: 18s - loss: 2.3132 - acc: 0.0714
15/25 [=================>............] - ETA: 16s - loss: 2.3128 - acc: 0.0667
16/25 [==================>...........] - ETA: 15s - loss: 2.3121 - acc: 0.0781
17/25 [===================>..........] - ETA: 13s - loss: 2.3116 - acc: 0.0735
18/25 [====================>.........] - ETA: 12s - loss: 2.3114 - acc: 0.0694
19/25 [=====================>........] - ETA: 10s - loss: 2.3112 - acc: 0.0658
20/25 [=======================>......] - ETA: 8s - loss: 2.3109 - acc: 0.0625
21/25 [========================>.....] - ETA: 7s - loss: 2.3107 - acc: 0.0595
22/25 [=========================>....] - ETA: 5s - loss: 2.3104 - acc: 0.0568
23/25 [==========================>...] - ETA: 3s - loss: 2.3101 - acc: 0.0543
24/25 [===========================>..] - ETA: 1s - loss: 2.3097 - acc: 0.0625Epoch 00000: saving model to C:\Users\Moses\Documents\Moses\W7\AI\Custom Datasets\IDENPROF\idenprof-small-test\idenprof\models\model_ex-000_acc-0.100000.h5

25/25 [==============================] - 51s - loss: 2.3095 - acc: 0.0600 - val_loss: 2.3026 - val_acc: 0.1000
```

Let us explain the details shown above: 
1. The line **Epoch 1/100** means the network is training the first experiment of the targeted 100 
2. The line `1/25 [>.............................] - ETA: 52s - loss: 2.3026 - acc: 0.2500` represents the number of batches that has been trained in the present experiment
3. The line  `Epoch 00000: saving model to C:\Users\User\PycharmProjects\ImageAITest\pets\models\model_ex-000_acc-0.100000.h5` refers to the model saved after the present experiment. The **ex_000** represents the experiment at this stage while the **acc_0.100000** and **val_acc: 0.1000** represents the accuracy of the model on the test images after the present experiment (maximum value value of accuracy is 1.0).  This result helps to know the best performed model you can use for custom image prediction.  
 
 Once you are done training your custom model, you can use the "CustomImagePrediction" class to perform image prediction with your model. Simply follow the link below.
[imageai/Classification/CUSTOMCLASSIFICATION.md](https://github.com/OlafenwaMoses/ImageAI/blob/master/imageai/Classification/CUSTOMCLASSIFICATION.md)


### Training on the IdenProf data

A sample from the IdenProf Dataset used to train a Model for predicting professionals.
![](../../data-images/idenprof.jpg)

Below we provide a sample code to train on **IdenProf**, a dataset which contains images of 10 uniformed professionals. The code below will download the dataset and initiate the training:

```python
from io import open
import requests
import shutil
from zipfile import ZipFile
import os
from imageai.Classification.Custom import ClassificationModelTrainer

execution_path = os.getcwd()

TRAIN_ZIP_ONE = os.path.join(execution_path, "idenprof-train1.zip")
TRAIN_ZIP_TWO = os.path.join(execution_path, "idenprof-train2.zip")
TEST_ZIP = os.path.join(execution_path, "idenprof-test.zip")

DATASET_DIR = os.path.join(execution_path, "idenprof")
DATASET_TRAIN_DIR = os.path.join(DATASET_DIR, "train")
DATASET_TEST_DIR = os.path.join(DATASET_DIR, "test")

if(os.path.exists(DATASET_DIR) == False):
    os.mkdir(DATASET_DIR)
if(os.path.exists(DATASET_TRAIN_DIR) == False):
    os.mkdir(DATASET_TRAIN_DIR)
if(os.path.exists(DATASET_TEST_DIR) == False):
    os.mkdir(DATASET_TEST_DIR)

if(len(os.listdir(DATASET_TRAIN_DIR)) < 10):
    if(os.path.exists(TRAIN_ZIP_ONE) == False):
        print("Downloading idenprof-train1.zip")
        data = requests.get("https://github.com/OlafenwaMoses/IdenProf/releases/download/v1.0/idenprof-train1.zip", stream = True)
        with open(TRAIN_ZIP_ONE, "wb") as file:
            shutil.copyfileobj(data.raw, file)
        del data
    if (os.path.exists(TRAIN_ZIP_TWO) == False):
        print("Downloading idenprof-train2.zip")
        data = requests.get("https://github.com/OlafenwaMoses/IdenProf/releases/download/v1.0/idenprof-train2.zip", stream=True)
        with open(TRAIN_ZIP_TWO, "wb") as file:
            shutil.copyfileobj(data.raw, file)
        del data
    print("Extracting idenprof-train1.zip")
    extract1 = ZipFile(TRAIN_ZIP_ONE)
    extract1.extractall(DATASET_TRAIN_DIR)
    extract1.close()
    print("Extracting idenprof-train2.zip")
    extract2 = ZipFile(TRAIN_ZIP_TWO)
    extract2.extractall(DATASET_TRAIN_DIR)
    extract2.close()

if(len(os.listdir(DATASET_TEST_DIR)) < 10):
    if (os.path.exists(TEST_ZIP) == False):
        print("Downloading idenprof-test.zip")
        data = requests.get("https://github.com/OlafenwaMoses/IdenProf/releases/download/v1.0/idenprof-test.zip", stream=True)
        with open(TEST_ZIP, "wb") as file:
            shutil.copyfileobj(data.raw, file)
        del data
    print("Extracting idenprof-test.zip")
    extract = ZipFile(TEST_ZIP)
    extract.extractall(DATASET_TEST_DIR)
    extract.close()


model_trainer = ClassificationModelTrainer()
model_trainer.setModelTypeAsResNet50()
model_trainer.setDataDirectory(DATASET_DIR)
model_trainer.trainModel(num_objects=10, num_experiments=100, enhance_data=True, batch_size=32, show_network_summary=True)
```

### Continuous Model Training
<div id="continuoustraining"></div>

**ImageAI** now allows you to continue training your custom model on your previously saved model.
This is useful in cases of incomplete training due compute time limits/large size of dataset or should you intend to further train your model.
Kindly note that **continuous training** is for using a previously saved model to train on the same dataset the model was trained on.
All you need to do is specify the `continue_from_model` parameter to the path of the previously saved model in your `trainModel()` function.
See an example code below.

```python
from imageai.Classification.Custom import ClassificationModelTrainer
import os

trainer = ClassificationModelTrainer()
trainer.setModelTypeAsDenseNet121()
trainer.setDataDirectory("idenprof")
trainer.trainModel(num_objects=10, num_experiments=50, enhance_data=True, batch_size=8, show_network_summary=True, continue_from_model="idenprof_densenet-0.763500.h5")
```

### Transfer Learning (Training from a pre-trained model)
<div id="transferlearning"></div>

From the feedbacks we have received over the past months, we discovered most custom models trained with **ImageAI** were based on datasets with few number of images as they fall short the minimum recommendation of 500 images per each class of objects, for a achieving a viable accuracy. 

To ensure they can still train very accurate custom models using few number of images, **ImageAI** now allows you to train by leveraging **transfer learning** . This means you can take any pre-trained **ResNet50**, **Squeezenet**, **InceptionV3** and **DenseNet121** model trained on larger datasets and use it to kickstart your custom model training.
All you need to do is specify the `transfer_from_model` parameter to the path of the pre-trained model, `initial_num_objects` parameter which corresponds to the number of objects in the previous dataset the pre-trained model was trained on, all in your `trainModel()` function. See an example code below, showing how to perform transfer learning from a ResNet50 model trained on the ImageNet dataset.

```python
from imageai.Classification.Custom import ClassificationModelTrainer
import os

trainer = ClassificationModelTrainer()
trainer.setModelTypeAsResNet50()
trainer.setDataDirectory("idenprof")
trainer.trainModel(num_objects=10, num_experiments=50, enhance_data=True, batch_size=32, show_network_summary=True,transfer_from_model="resnet50_imagenet_tf.2.0.h5", initial_num_objects=1000)
```


### Contact Developer
- **Moses Olafenwa**
    * _Email:_ guymodscientist@gmail.com
    * _Website:_ [https://moses.aicommons.science](https://moses.aicommons.science)
    * _Twitter:_ [@OlafenwaMoses](https://twitter.com/OlafenwaMoses)
    * _Medium:_ [@guymodscientist](https://medium.com/@guymodscientist)
    * _Facebook:_ [moses.olafenwa](https://facebook.com/moses.olafenwa)


### Documentation

We have provided full documentation for all **ImageAI** classes and functions in 3 major languages. Find links below:

* Documentation - **English Version  [https://imageai.readthedocs.io](https://imageai.readthedocs.io)**
* Documentation - **Chinese Version  [https://imageai-cn.readthedocs.io](https://imageai-cn.readthedocs.io)**
* Documentation - **French Version  [https://imageai-fr.readthedocs.io](https://imageai-fr.readthedocs.io)**


================================================
FILE: imageai_tf_deprecated/Classification/Custom/__init__.py
================================================
import tensorflow as tf
from PIL import Image
import time
import numpy as np
import os
import warnings
from matplotlib.cbook import deprecated
import json

class ClassificationModelTrainer:
    """
        This is the Classification Model training class, that allows you to define a deep learning network
        from the 4 available networks types supported by ImageAI which are MobileNetv2, ResNet50,
        InceptionV3 and DenseNet121.
    """

    def __init__(self):
        self.__modelType = ""
        self.__use_pretrained_model = False
        self.__data_dir = ""
        self.__train_dir = ""
        self.__test_dir = ""
        self.__logs_dir = ""
        self.__num_epochs = 10
        self.__trained_model_dir = ""
        self.__model_class_dir = ""
        self.__initial_learning_rate = 1e-3
        self.__model_collection = []


    def setModelTypeAsSqueezeNet(self):
        raise ValueError("ImageAI no longer support SqueezeNet. You can use MobileNetV2 instead by downloading the MobileNetV2 model and call the function 'setModelTypeAsMobileNetV2'")

    def setModelTypeAsMobileNetV2(self):
        """
        'setModelTypeAsMobileNetV2()' is used to set the model type to the MobileNetV2 model
        for the training instance object .
        :return:
        """
        self.__modelType = "mobilenetv2"

    @deprecated(since="2.1.6", message="'.setModelTypeAsResNet()' has been deprecated! Please use 'setModelTypeAsResNet50()' instead.")
    def setModelTypeAsResNet(self):
        return self.setModelTypeAsResNet50()

    def setModelTypeAsResNet50(self):
        """
         'setModelTypeAsResNet()' is used to set the model type to the ResNet model
                for the training instance object .
        :return:
        """
        self.__modelType = "resnet50"

    
    @deprecated(since="2.1.6", message="'.setModelTypeAsDenseNet()' has been deprecated! Please use 'setModelTypeAsDenseNet121()' instead.")
    def setModelTypeAsDenseNet(self):
        return self.setModelTypeAsDenseNet121()

    def setModelTypeAsDenseNet121(self):
        """
         'setModelTypeAsDenseNet()' is used to set the model type to the DenseNet model
                for the training instance object .
        :return:
        """
        self.__modelType = "densenet121"

    def setModelTypeAsInceptionV3(self):
        """
         'setModelTypeAsInceptionV3()' is used to set the model type to the InceptionV3 model
                for the training instance object .
        :return:
        """
        self.__modelType = "inceptionv3"

    def setDataDirectory(self, data_directory="", train_subdirectory="train", test_subdirectory="test",
                         models_subdirectory="models", json_subdirectory="json"):
        """
        'setDataDirectory()'

        - data_directory , is required to set the path to which the data/dataset to be used for
                 training is kept. The directory can have any name, but it must have 'train' and 'test'
                 sub-directory. In the 'train' and 'test' sub-directories, there must be sub-directories
                 with each having it's name corresponds to the name/label of the object whose images are
                to be kept. The structure of the 'test' and 'train' folder must be as follows:

                >> train >> class1 >> class1_train_images
                         >> class2 >> class2_train_images
                         >> class3 >> class3_train_images
                         >> class4 >> class4_train_images
                         >> class5 >> class5_train_images

                >> test >> class1 >> class1_test_images
                        >> class2 >> class2_test_images
                        >> class3 >> class3_test_images
                        >> class4 >> class4_test_images
                        >> class5 >> class5_test_images

        - train_subdirectory (optional), subdirectory within 'data_directory' where the training set is. Defaults to 'train'.
        - test_subdirectory (optional), subdirectory within 'data_directory' where the testing set is. Defaults to 'test'.
        - models_subdirectory (optional), subdirectory within 'data_directory' where the output models will be saved. Defaults to 'models'.
        - json_subdirectory (optional), subdirectory within 'data_directory' where the model classes json file will be saved. Defaults to 'json'.

        :param data_directory:
        :param train_subdirectory:
        :param test_subdirectory:
        :param models_subdirectory:
        :param json_subdirectory:
        :return:
        """

        self.__data_dir = data_directory

        self.__train_dir = os.path.join(self.__data_dir, train_subdirectory)
        self.__test_dir = os.path.join(self.__data_dir, test_subdirectory)
        self.__trained_model_dir = os.path.join(self.__data_dir, models_subdirectory)
        self.__model_class_dir = os.path.join(self.__data_dir, json_subdirectory)
        self.__logs_dir = os.path.join(self.__data_dir, "logs")

    def lr_schedule(self, epoch):

        # Learning Rate Schedule


        lr = self.__initial_learning_rate
        total_epochs = self.__num_epochs

        check_1 = int(total_epochs * 0.9)
        check_2 = int(total_epochs * 0.8)
        check_3 = int(total_epochs * 0.6)
        check_4 = int(total_epochs * 0.4)

        if epoch > check_1:
            lr *= 1e-4
        elif epoch > check_2:
            lr *= 1e-3
        elif epoch > check_3:
            lr *= 1e-2
        elif epoch > check_4:
            lr *= 1e-1


        return lr


    def trainModel(self, num_objects, num_experiments=200, enhance_data=False, batch_size = 32, initial_learning_rate=1e-3, show_network_summary=False, training_image_size = 224, continue_from_model=None, transfer_from_model=None, transfer_with_full_training=True, initial_num_objects = None, save_full_model = False):

        """
        'trainModel()' function starts the model actual training. It accepts the following values:
        - num_objects , which is the number of classes present in the dataset that is to be used for training
        - num_experiments , also known as epochs, it is the number of times the network will train on all the training dataset
        - enhance_data (optional) , this is used to modify the dataset and create more instance of the training set to enhance the training result
        - batch_size (optional) , due to memory constraints, the network trains on a batch at once, until all the training set is exhausted. The value is set to 32 by default, but can be increased or decreased depending on the meormory of the compute used for training. The batch_size is conventionally set to 16, 32, 64, 128.
        - initial_learning_rate(optional) , this value is used to adjust the weights generated in the network. You rae advised to keep this value as it is if you don't have deep understanding of this concept.
        - show_network_summary(optional) , this value is used to show the structure of the network should you desire to see it. It is set to False by default
        - training_image_size(optional) , this value is used to define the image size on which the model will be trained. The value is 224 by default and is kept at a minimum of 100.
        - continue_from_model (optional) , this is used to set the path to a model file trained on the same dataset. It is primarily for continuos training from a previously saved model.
        - transfer_from_model (optional) , this is used to set the path to a model file trained on another dataset. It is primarily used to perform tramsfer learning.
        - transfer_with_full_training (optional) , this is used to set the pre-trained model to be re-trained across all the layers or only at the top layers.
        - initial_num_objects (required if 'transfer_from_model' is set ), this is used to set the number of objects the model used for transfer learning is trained on. If 'transfer_from_model' is set, this must be set as well.
        - save_full_model ( optional ), this is used to save the trained models with their network types. Any model saved by this specification can be loaded without specifying the network type.


        :param num_objects:
        :param num_experiments:
        :param enhance_data:
        :param batch_size:
        :param initial_learning_rate:
        :param show_network_summary:
        :param training_image_size:
        :param continue_from_model:
        :param transfer_from_model:
        :param initial_num_objects:
        :param save_full_model:
        :return:
        """
        self.__num_epochs = num_experiments
        self.__initial_learning_rate = initial_learning_rate
        lr_scheduler = tf.keras.callbacks.LearningRateScheduler(self.lr_schedule)


        if(training_image_size < 100):
            warnings.warn("The specified training_image_size {} is less than 100. Hence the training_image_size will default to 100.".format(training_image_size))
            training_image_size = 100


        if (self.__modelType == "mobilenetv2"):
            if (continue_from_model != None):
                model = tf.keras.applications.MobileNetV2(input_shape=(training_image_size, training_image_size, 3), weights=continue_from_model, classes=num_objects,
                include_top=True)
                if (show_network_summary == True):
                    print("Training using weights from a previouly model")
            elif (transfer_from_model != None):
                base_model = tf.keras.applications.MobileNetV2(input_shape=(training_image_size, training_image_size, 3), weights= transfer_from_model,
                include_top=False, pooling="avg")

                network = base_model.output
                network = tf.keras.layers.Dense(num_objects, activation='softmax',
                         use_bias=True)(network)
                
                model = tf.keras.model.Models(inputs=base_model.input, outputs=network)

                if (show_network_summary == True):
                    print("Training using weights from a pre-trained ImageNet model")
            else:
                base_model = tf.keras.applications.MobileNetV2(input_shape=(training_image_size, training_image_size, 3), weights= None, classes=num_objects,
                include_top=False, pooling="avg")
                
                network = base_model.output
                network = tf.keras.layers.Dense(num_objects, activation='softmax',
                         use_bias=True)(network)
                
                model = tf.keras.models.Model(inputs=base_model.input, outputs=network)

        elif (self.__modelType == "resnet50"):
            if (continue_from_model != None):
                model = tf.keras.applications.ResNet50(input_shape=(training_image_size, training_image_size, 3), weights=continue_from_model, classes=num_objects,
                include_top=True)
                if (show_network_summary == True):
                    print("Training using weights from a previouly model")
            elif (transfer_from_model != None):
                base_model = tf.keras.applications.ResNet50(input_shape=(training_image_size, training_image_size, 3), weights= transfer_from_model,
                include_top=False, pooling="avg")

                network = base_model.output
                network = tf.keras.layers.Dense(num_objects, activation='softmax',
                         use_bias=True)(network)
                
                model = tf.keras.model.Models(inputs=base_model.input, outputs=network)

                if (show_network_summary == True):
                    print("Training using weights from a pre-trained ImageNet model")
            else:
                base_model = tf.keras.applications.ResNet50(input_shape=(training_image_size, training_image_size, 3), weights= None, classes=num_objects,
                include_top=False, pooling="avg")

                network = base_model.output
                network = tf.keras.layers.Dense(num_objects, activation='softmax',
                         use_bias=True)(network)
                
                model = tf.keras.models.Model(inputs=base_model.input, outputs=network)

        elif (self.__modelType == "inceptionv3"):

            if (continue_from_model != None):
                model = tf.keras.applications.InceptionV3(input_shape=(training_image_size, training_image_size, 3), weights=continue_from_model, classes=num_objects,
                include_top=True)
                if (show_network_summary == True):
                    print("Training using weights from a previouly model")
            elif (transfer_from_model != None):
                base_model = tf.keras.applications.InceptionV3(input_shape=(training_image_size, training_image_size, 3), weights= transfer_from_model,
                include_top=False, pooling="avg")

                network = base_model.output
                network = tf.keras.layers.Dense(num_objects, activation='softmax',
                         use_bias=True)(network)
                
                model = tf.keras.model.Models(inputs=base_model.input, outputs=network)

                if (show_network_summary == True):
                    print("Training using weights from a pre-trained ImageNet model")
            else:
                base_model = tf.keras.applications.InceptionV3(input_shape=(training_image_size, training_image_size, 3), weights= None, classes=num_objects,
                include_top=False, pooling="avg")

                network = base_model.output
                network = tf.keras.layers.Dense(num_objects, activation='softmax',
                         use_bias=True)(network)
                
                model = tf.keras.models.Model(inputs=base_model.input, outputs=network)

            base_model = tf.keras.applications.InceptionV3(input_shape=(training_image_size, training_image_size, 3), weights= None, classes=num_objects,
                include_top=False, pooling="avg")

        elif (self.__modelType == "densenet121"):
            if (continue_from_model != None):
                model = tf.keras.applications.DenseNet121(input_shape=(training_image_size, training_image_size, 3), weights=continue_from_model, classes=num_objects,
                include_top=True)
                if (show_network_summary == True):
                    print("Training using weights from a previouly model")
            elif (transfer_from_model != None):
                base_model = tf.keras.applications.DenseNet121(input_shape=(training_image_size, training_image_size, 3), weights= transfer_from_model,
                include_top=False, pooling="avg")

                network = base_model.output
                network = tf.keras.layers.Dense(num_objects, activation='softmax',
                         use_bias=True)(network)
                
                model = tf.keras.model.Models(inputs=base_model.input, outputs=network)

                if (show_network_summary == True):
                    print("Training using weights from a pre-trained ImageNet model")
            else:
                base_model = tf.keras.applications.DenseNet121(input_shape=(training_image_size, training_image_size, 3), weights= None, classes=num_objects,
                include_top=False, pooling="avg")

                network = base_model.output
                network = tf.keras.layers.Dense(num_objects, activation='softmax',
                         use_bias=True)(network)
                
                model = tf.keras.models.Model(inputs=base_model.input, outputs=network)

            base_model = tf.keras.applications.DenseNet121(input_shape=(training_image_size, training_image_size, 3), weights= None, classes=num_objects,
                include_top=False, pooling="avg")


        optimizer = tf.keras.optimizers.Adam(lr=self.__initial_learning_rate, decay=1e-4)
        model.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])
        if (show_network_summary == True):
            model.summary()

        model_name = 'model_ex-{epoch:03d}_acc-{accuracy:03f}.h5'

        log_name = '{}_lr-{}_{}'.format(self.__modelType, initial_learning_rate, time.strftime("%Y-%m-%d-%H-%M-%S"))

        if not os.path.isdir(self.__trained_model_dir):
            os.makedirs(self.__trained_model_dir)

        if not os.path.isdir(self.__model_class_dir):
            os.makedirs(self.__model_class_dir)

        if not os.path.isdir(self.__logs_dir):
            os.makedirs(self.__logs_dir)

        model_path = os.path.join(self.__trained_model_dir, model_name)


        logs_path = os.path.join(self.__logs_dir, log_name)
        if not os.path.isdir(logs_path):
            os.makedirs(logs_path)

        save_weights_condition = True

        if(save_full_model == True ):
            save_weights_condition = False


        checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath=model_path,
                                     monitor='accuracy',
                                     verbose=1,
                                     save_weights_only=save_weights_condition,
                                     save_best_only=True,
                                     period=1)


        tensorboard = tf.keras.callbacks.TensorBoard(log_dir=logs_path, 
                                  histogram_freq=0, 
                                  write_graph=False, 
                                  write_images=False)
        

        if (enhance_data == True):
            print("Using Enhanced Data Generation")

        height_shift = 0
        width_shift = 0
        if (enhance_data == True):
            height_shift = 0.1
            width_shift = 0.1

        train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
            rescale=1. / 255,
            horizontal_flip=enhance_data, height_shift_range=height_shift, width_shift_range=width_shift)

        test_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
            rescale=1. / 255)

        train_generator = train_datagen.flow_from_directory(self.__train_dir, target_size=(training_image_size, training_image_size),
                                                            batch_size=batch_size,
                                                            class_mode="categorical")
        test_generator = test_datagen.flow_from_directory(self.__test_dir, target_size=(training_image_size, training_image_size),
                                                          batch_size=batch_size,
                                                          class_mode="categorical")

        class_indices = train_generator.class_indices
        class_json = {}
        for eachClass in class_indices:
            class_json[str(class_indices[eachClass])] = eachClass

        with open(os.path.join(self.__model_class_dir, "model_class.json"), "w+") as json_file:
            json.dump(class_json, json_file, indent=4, separators=(",", " : "),
                      ensure_ascii=True)
            json_file.close()
        print("JSON Mapping for the model classes saved to ", os.path.join(self.__model_class_dir, "model_class.json"))

        num_train = len(train_generator.filenames)
        num_test = len(test_generator.filenames)
        print("Number of experiments (Epochs) : ", self.__num_epochs)

        
        model.fit_generator(train_generator, steps_per_epoch=int(num_train / batch_size), epochs=self.__num_epochs,
                            validation_data=test_generator,
                            validation_steps=int(num_test / batch_size), callbacks=[checkpoint, lr_scheduler])


class CustomImageClassification:
    """
    This is the image classification class for custom models trained with the 'ClassificationModelTrainer' class. It provides support for 4 different models which are:
    ResNet50, MobileNetV2, DenseNet121 and Inception V3. After instantiating this class, you can set it's properties and
    make image classification using it's pre-defined functions.

    The following functions are required to be called before a classification can be made
    * setModelPath() , path to your custom model
    * setJsonPath , , path to your custom model's corresponding JSON file
    * At least of of the following and it must correspond to the model set in the setModelPath()
    [setModelTypeAsMobileNetV2(), setModelTypeAsResNet50(), setModelTypeAsDenseNet121, setModelTypeAsInceptionV3]
    * loadModel() [This must be called once only before making a classification]

    Once the above functions have been called, you can call the classifyImage() function of the classification instance
    object at anytime to predict an image.
    """
    def __init__(self):
        self.__modelType = ""
        self.modelPath = ""
        self.jsonPath = ""
        self.numObjects = 10
        self.__model_classes = dict()
        self.__modelLoaded = False
        self.__model_collection = []
        self.__input_image_size = 224
    
    def setModelPath(self, model_path):
        """
        'setModelPath()' function is required and is used to set the file path to the model adopted from the list of the
        available 4 model types. The model path must correspond to the model type set for the classification instance object.

        :param model_path:
        :return:
        """
        self.modelPath = model_path

    def setJsonPath(self, model_json):
        """
        'setJsonPath()'

        :param model_path:
        :return:
        """
        self.jsonPath = model_json

    def setModelTypeAsMobileNetV2(self):
        """
        'setModelTypeAsMobileNetV2()' is used to set the model type to the MobileNetV2 model
        for the classification instance object .
        :return:
        """
        self.__modelType = "mobilenetv2"

    def setModelTypeAsResNet50(self):
        """
         'setModelTypeAsResNet50()' is used to set the model type to the ResNet50 model
                for the classification instance object .
        :return:
        """
        self.__modelType = "resnet50"

    def setModelTypeAsDenseNet121(self):
        """
         'setModelTypeAsDenseNet121()' is used to set the model type to the DenseNet121 model
                for the classification instance object .
        :return:
        """
        self.__modelType = "densenet121"

    def setModelTypeAsInceptionV3(self):
        """
         'setModelTypeAsInceptionV3()' is used to set the model type to the InceptionV3 model
                for the classification instance object .
        :return:
        """
        self.__modelType = "inceptionv3"

    def loadModel(self, classification_speed="normal", num_objects=10):
        """
        'loadModel()' function is used to load the model structure into the program from the file path defined
        in the setModelPath() function. This function receives an optional value which is "classification_speed".
        The value is used to reduce the time it takes to classify an image, down to about 50% of the normal time,
        with just slight changes or drop in classification accuracy, depending on the nature of the image.
        * classification_speed (optional); Acceptable values are "normal", "fast", "faster" and "fastest"

        :param classification_speed :
        :return:
        """

        self.__model_classes = json.load(open(self.jsonPath))

        if(classification_speed=="normal"):
            self.__input_image_size = 224
        elif(classification_speed=="fast"):
            self.__input_image_size = 160
        elif(classification_speed=="faster"):
            self.__input_image_size = 120
        elif (classification_speed == "fastest"):
            self.__input_image_size = 100

        if (self.__modelLoaded == False):

            image_input = tf.keras.layers.Input(shape=(self.__input_image_size, self.__input_image_size, 3))

            if(self.__modelType == "" ):
                raise ValueError("You must set a valid model type before loading the model.")

            elif(self.__modelType == "mobilenetv2"):
                model = tf.keras.applications.MobileNetV2(input_shape=(self.__input_image_size, self.__input_image_size, 3), weights=self.modelPath, classes = num_objects )
                self.__model_collection.append(model)
                self.__modelLoaded = True
                try:
                    None
                except:
                    raise ValueError("An error occured. Ensure your model file is a MobileNetV2 Model and is located in the path {}".format(self.modelPath))

            elif(self.__modelType == "resnet50"):
                try:
                    model = tf.keras.applications.ResNet50(input_shape=(self.__input_image_size, self.__input_image_size, 3), weights=None, classes = num_objects )
                    model.load_weights(self.modelPath)
                    self.__model_collection.append(model)
                    self.__modelLoaded = True
                except:
                    raise ValueError("An error occured. Ensure your model file is a ResNet50 Model and is located in the path {}".format(self.modelPath))

            elif (self.__modelType == "densenet121"):
                try:
                    model = tf.keras.applications.DenseNet121(input_shape=(self.__input_image_size, self.__input_image_size, 3), weights=self.modelPath, classes = num_objects)
                    self.__model_collection.append(model)
                    self.__modelLoaded = True
                except:
                    raise ValueError("An error occured. Ensure your model file is a DenseNet121 Model and is located in the path {}".format(self.modelPath))

            elif (self.__modelType == "inceptionv3"):
                try:
                    model = tf.keras.applications.InceptionV3(input_shape=(self.__input_image_size, self.__input_image_size, 3), weights=self.modelPath, classes = num_objects )
                    self.__model_collection.append(model)
                    self.__modelLoaded = True
                except:
                    raise ValueError("An error occured. Ensure your model file is in {}".format(self.modelPath))
    def loadFullModel(self, classification_speed="normal", num_objects=10):
        """
        'loadFullModel()' function is used to load the model structure into the program from the file path defined
        in the setModelPath() function. As opposed to the 'loadModel()' function, you don't need to specify the model type. This means you can load any Keras model trained with or without ImageAI and perform image prediction.
        - prediction_speed (optional), Acceptable values are "normal", "fast", "faster" and "fastest"
        - num_objects (required), the number of objects the model is trained to recognize

        :param prediction_speed:
        :param num_objects:
        :return:
        """

        self.numObjects = num_objects
        self.__model_classes = json.load(open(self.jsonPath))

        if (classification_speed == "normal"):
            self.__input_image_size = 224
        elif (classification_speed == "fast"):
            self.__input_image_size = 160
        elif (classification_speed == "faster"):
            self.__input_image_size = 120
        elif (classification_speed == "fastest"):
            self.__input_image_size = 100

        if (self.__modelLoaded == False):
            
            model = tf.keras.models.load_model(filepath=self.modelPath)
            self.__model_collection.append(model)
            self.__modelLoaded = True
            self.__modelType = "full"

    def getModels(self):
        """
        'getModels()' provides access to the internal model collection. Helpful if models are used down the line with tools like lime.
        :return:
        """
        return self.__model_collection


    def classifyImage(self, image_input, result_count=5, input_type="file"):
        """
        'classifyImage()' function is used to classify a given image by receiving the following arguments:
            * input_type (optional) , the type of input to be parsed. Acceptable values are "file", "array" and "stream"
            * image_input , file path/numpy array/image file stream of the image.
            * result_count (optional) , the number of classifications to be sent which must be whole numbers between
                1 and 1000. The default is 5.

        This function returns 2 arrays namely 'classification_results' and 'classification_probabilities'. The 'classification_results'
        contains possible objects classes arranged in descending of their percentage probabilities. The 'classification_probabilities'
        contains the percentage probability of each object class. The position of each object class in the 'classification_results'
        array corresponds with the positions of the percentage probability in the 'classification_probabilities' array.


        :param input_type:
        :param image_input:
        :param result_count:
        :return classification_results, classification_probabilities:
        """
        classification_results = []
        classification_probabilities = []
        if (self.__modelLoaded == False):
            raise ValueError("You must call the loadModel() function before making classification.")

        else:
            if (input_type == "file"):
                try:
                    image_to_predict = tf.keras.preprocessing.image.load_img(image_input, target_size=(self.__input_image_size, self.__input_image_size))
                    image_to_predict = tf.keras.preprocessing.image.img_to_array(image_to_predict, data_format="channels_last")
                    image_to_predict = np.expand_dims(image_to_predict, axis=0)
                except:
                    raise ValueError("You have set a path to an invalid image file.")
            elif (input_type == "array"):
                try:
                    image_input = Image.fromarray(np.uint8(image_input))
                    image_input = image_input.resize((self.__input_image_size, self.__input_image_size))
                    image_input = np.expand_dims(image_input, axis=0)
                    image_to_predict = image_input.copy()
                    image_to_predict = np.asarray(image_to_predict, dtype=np.float64)
                except:
                    raise ValueError("You have parsed in a wrong numpy array for the image")
            elif (input_type == "stream"):
                try:
                    image_input = Image.open(image_input)
                    image_input = image_input.resize((self.__input_image_size, self.__input_image_size))
                    image_input = np.expand_dims(image_input, axis=0)
                    image_to_predict = image_input.copy()
                    image_to_predict = np.asarray(image_to_predict, dtype=np.float64)
                    
                except:
                    raise ValueError("You have parsed in a wrong stream for the image")

            if (self.__modelType == "mobilenetv2"):
                image_to_predict = tf.keras.applications.mobilenet_v2.preprocess_input(image_to_predict)
            elif (self.__modelType == "full"):
                image_to_predict = tf.keras.applications.mobilenet_v2.preprocess_input(image_to_predict)
            elif (self.__modelType == "inceptionv3"):
                image_to_predict = tf.keras.applications.inception_v3.preprocess_input(image_to_predict)
            elif (self.__modelType == "densenet121"):
                image_to_predict = tf.keras.applications.densenet.preprocess_input(image_to_predict)
            try:
                model = self.__model_collection[0]
                prediction = model.predict(image_to_predict, steps=1)

                predictiondata = []
                for pred in prediction:
                    top_indices = pred.argsort()[-result_count:][::-1]
                    for i in top_indices:
                        each_result = []
                        each_result.append(self.__model_classes[str(i)])
                        each_result.append(pred[i])
                        predictiondata.append(each_result)

                for result in predictiondata:
                    classification_results.append(str(result[0]))
                    classification_probabilities.append(result[1] * 100)
                        
            except:
                raise ValueError("Error. Ensure your input image is valid")

            return classification_results, classification_probabilities
                

    @deprecated(since="2.1.6", message="'.predictImage()' has been deprecated! Please use 'classifyImage()' instead.")
    def predictImage(self, image_input, result_count=5, input_type="file"):

        return self.classifyImage(image_input, result_count, input_type)

================================================
FILE: imageai_tf_deprecated/Classification/README.md
================================================
# ImageAI : Image Prediction
A **DeepQuest AI** project [https://deepquestai.com](https://deepquestai.com)

---

### TABLE OF CONTENTS
- <a href="#firstprediction" > :white_square_button: First Prediction</a>
- <a href="#predictionspeed" > :white_square_button: Prediction Speed</a>
- <a href="#inputtype" > :white_square_button: Image Input Types</a>
- <a href="#threadprediction" > :white_square_button: Prediction in MultiThreading</a>
- <a href="#documentation" > :white_square_button: Documentation</a>

ImageAI provides 4 different algorithms and model types to perform image prediction.
To perform image prediction on any picture, take the following simple steps.  The 4 algorithms provided for
 image prediction include **MobileNetV2**, **ResNet50**, **InceptionV3** and **DenseNet121**. Each of these
  algorithms have individual model files which you must use depending on the choice of your algorithm. To download the
   model file for your choice of algorithm, click on any of the links below:
   
- **[MobileNetV2](https://github.com/OlafenwaMoses/ImageAI/releases/download/essentials-v5/mobilenet_v2.h5)** _(Size = 4.82 mb, fastest prediction time and moderate accuracy)_
- **[ResNet50](https://github.com/OlafenwaMoses/ImageAI/releases/download/essentials-v5/resnet50_imagenet_tf.2.0.h5)** by Microsoft Research _(Size = 98 mb, fast prediction time and high accuracy)_
 - **[InceptionV3](https://github.com/OlafenwaMoses/ImageAI/releases/download/1.0/inception_v3_weights_tf_dim_ordering_tf_kernels.h5)** by Google Brain team _(Size = 91.6 mb, slow prediction time and higher accuracy)_
 - **[DenseNet121](https://github.com/OlafenwaMoses/ImageAI/releases/download/1.0/DenseNet-BC-121-32.h5)** by Facebook AI Research _(Size = 31.6 mb, slower prediction time and highest accuracy)_

 Great! Once you have downloaded this model file, start a new python project, and then copy the model file to your project
     folder where your python files (.py files) will be . Download the image below, or take any image on your computer
 and copy it to your python project's folder. Then create a python file and give it a name; an example is `FirstPrediction.py`.
      Then write the code below into the python file:
      
### FirstPrediction.py
<div id="firstprediction" ></div>

```python
from imageai.Classification import ImageClassification
import os

execution_path = os.getcwd()

prediction = ImageClassification()
prediction.setModelTypeAsResNet50()
prediction.setModelPath(os.path.join(execution_path, "resnet50_imagenet_tf.2.0.h5"))
prediction.loadModel()

predictions, probabilities = prediction.classifyImage(os.path.join(execution_path, "1.jpg"), result_count=5 )
for eachPrediction, eachProbability in zip(predictions, probabilities):
    print(eachPrediction , " : " , eachProbability)
```

Sample Result:
![](../../data-images/1.jpg)

```
convertible : 52.459555864334106
sports_car : 37.61284649372101
pickup : 3.1751200556755066
car_wheel : 1.817505806684494
minivan : 1.7487050965428352
```

The code above works as follows:
```python
from imageai.Classification import ImageClassification
import os
```
The code above imports the `ImageAI` library and the python `os` class.
```python
execution_path = os.getcwd()
```
The above line obtains the path to the folder that contains your python file (in this example, your FirstPrediction.py).

```python
prediction = ImageClassification()
prediction.setModelTypeAsResNet50()
prediction.setModelPath(os.path.join(execution_path, "resnet50_imagenet_tf.2.0.h5"))
```
In the lines above, we created and instance of the `ImagePrediction()` class in the first line, then we set the model type of the prediction object to ResNet by caling the `.setModelTypeAsResNet50()` in the second line and then we set the model path of the prediction object to the path of the model file (`resnet50_imagenet_tf.2.0.h5`) we copied to the python file folder in the third line.

```python
predictions, probabilities = prediction.classifyImage(os.path.join(execution_path, "1.jpg"), result_count=5 )
```

In the above line, we defined 2 variables to be equal to the function called to predict an image, which is the `.classifyImage()` function, into which we parsed the path to our image and also state the number of prediction results we want to have (values from 1 to 1000) parsing `result_count=5`. The `.classifyImage()` function will return 2 array objects with the first (**predictions**) being an array of predictions and the second (**percentage_probabilities**) being an array of the corresponding percentage probability for each prediction.

```python
for eachPrediction, eachProbability in zip(predictions, probabilities):
    print(eachPrediction, " : " , eachProbability)
```
The above line obtains each object in the **predictions** array, and also obtains the corresponding percentage probability from the **percentage_probabilities**, and finally prints the result of both to console.


### Prediction Speed
<div id="predictionspeed"></div>

**ImageAI** now provides prediction speeds for all image prediction tasks. The prediction speeds allow you to reduce the time of prediction at a rate between 20% - 60%, and yet having just slight changes but accurate prediction results. The available prediction speeds are **"normal"**(default), **"fast"**, **"faster"** and **"fastest"**.
All you need to do is to state the speed mode you desire when loading the model as seen below.

```python
prediction.loadModel(prediction_speed="fast")
```

To observe the differences in the prediction speeds, look below for each speed applied to multiple prediction with time taken to predict and predictions given. The results below are obtained from predictions performed on a Windows 8 laptop with Intel Celeron N2820 CPU, with processor speed of 2.13GHz

**Prediction Speed = "normal" , Prediction Time = 5.9 seconds**
```
convertible : 52.459555864334106
sports_car : 37.61284649372101
pickup : 3.1751200556755066
car_wheel : 1.817505806684494
minivan : 1.7487050965428352
-----------------------
toilet_tissue : 13.99008333683014
jeep : 6.842949986457825
car_wheel : 6.71963095664978
seat_belt : 6.704962253570557
minivan : 5.861184373497963
-----------------------
bustard : 52.03368067741394
vulture : 20.936034619808197
crane : 10.620515048503876
kite : 10.20539253950119
white_stork : 1.6472270712256432
-----------------------
```

**Prediction Speed = "fast" , Prediction Time = 3.4 seconds**
```
sports_car : 55.5136501789093
pickup : 19.860029220581055
convertible : 17.88402795791626
tow_truck : 2.357563190162182
car_wheel : 1.8646160140633583
-----------------------
drum : 12.241223454475403
toilet_tissue : 10.96322312951088
car_wheel : 10.776633024215698
dial_telephone : 9.840480983257294
toilet_seat : 8.989936858415604
-----------------------
vulture : 52.81011462211609
bustard : 45.628002285957336
kite : 0.8065823465585709
goose : 0.3629807382822037
crane : 0.21266008261591196
-----------------------
```

**Prediction Speed = "faster" , Prediction Time = 2.7 seconds**
```
sports_car : 79.90474104881287
tow_truck : 9.751049429178238
convertible : 7.056044787168503
racer : 1.8735893070697784
car_wheel : 0.7379394955933094
-----------------------
oil_filter : 73.52778315544128
jeep : 11.926891654729843
reflex_camera : 7.9965077340602875
Polaroid_camera : 0.9798810817301273
barbell : 0.8661789819598198
-----------------------
vulture : 93.00530552864075
bustard : 6.636220961809158
kite : 0.15161558985710144
bald_eagle : 0.10513027664273977
crane : 0.05982434959150851
-----------------------
```

**Prediction Speed = "fastest" , Prediction Time = 2.2 seconds**
```
tow_truck : 62.5033438205719
sports_car : 31.26143217086792
racer : 2.2139860317111015
fire_engine : 1.7813067883253098
ambulance : 0.8790366351604462
-----------------------
reflex_camera : 94.00787949562073
racer : 2.345871739089489
jeep : 1.6016140580177307
oil_filter : 1.4121259562671185
lens_cap : 0.1283118617720902
-----------------------
kite : 98.5377550125122
vulture : 0.7469987496733665
bustard : 0.36855682265013456
bald_eagle : 0.2437378279864788
great_grey_owl : 0.0699841941241175
-----------------------
```

**PLEASE NOTE:**  When adjusting speed modes, it is best to use models that have higher accuracies like the DenseNet or InceptionV3 models, or use it in case scenarios where the images predicted are iconic.


### Image Input Types
<div id="inputtype"></div>

Previous version of **ImageAI** supported only file inputs and accepts file paths to an image for image prediction.
Now, **ImageAI** supports 3 input types which are **file path to image file**(default), **numpy array of image** and **image file stream**.
This means you can now perform image prediction in production applications such as on a web server and system
 that returns file in any of the above stated formats.

To perform image prediction with numpy array or file stream input, you just need to state the input type
in the `.classifyImage()` function. See example below.

```python
predictions, probabilities = prediction.classifyImage(image_array, result_count=5 , input_type="array" ) # For numpy array input type
predictions, probabilities = prediction.classifyImage(image_stream, result_count=5 , input_type="stream" ) # For file stream input type
```

### Prediction in MultiThreading
<div id="threadprediction"></div>

When developing programs that run heavy task on the deafult thread like User Interfaces (UI),
 you should consider running your predictions in a new thread. When running image prediction using ImageAI in
 a new thread, you must take note the following:
- You can create your prediction object, set its model type, set model path and json path
outside the new thread.
- The `.loadModel()` must be in the new thread and image prediction (`classifyImage()`) must take place in th new thread.

Take a look of a sample code below on image prediction using multithreading:
```python
from imageai.Prediction import ImageClassification
import os
import threading

execution_path = os.getcwd()

prediction = ImageClassification()
prediction.setModelTypeAsResNet()
prediction.setModelPath( os.path.join(execution_path, "resnet50_imagenet_tf.2.0.h5"))

picturesfolder = os.environ["USERPROFILE"] + "\\Pictures\\"
allfiles = os.listdir(picturesfolder)

class PredictionThread(threading.Thread):
    def __init__(self):
        threading.Thread.__init__(self)
    def run(self):
        prediction.loadModel()
        for eachPicture in allfiles:
            if eachPicture.endswith(".png") or eachPicture.endswith(".jpg"):
                predictions, percentage_probabilities = prediction.predictImage(picturesfolder + eachPicture, result_count=1)
                for prediction, percentage_probability in zip(predictions, probabilities):
                    print(prediction , " : " , percentage_probability)

predictionThread = PredictionThread ()
predictionThread.start()

```


### Documentation

We have provided full documentation for all **ImageAI** classes and functions in 3 major languages. Find links below:**

* Documentation - **English Version  [https://imageai.readthedocs.io](https://imageai.readthedocs.io)**
* Documentation - **Chinese Version  [https://imageai-cn.readthedocs.io](https://imageai-cn.readthedocs.io)**
* Documentation - **French Version  [https://imageai-fr.readthedocs.io](https://imageai-fr.readthedocs.io)**


================================================
FILE: imageai_tf_deprecated/Classification/__init__.py
================================================
import tensorflow as tf
from PIL import Image
import numpy as np
from matplotlib.cbook import deprecated


class ImageClassification:
    """
    This is the image classification class in the ImageAI library. It provides support for 4 different models which are:
    ResNet, MobileNetV2, DenseNet and Inception V3. After instantiating this class, you can set it's properties and
    make image classification using it's pre-defined functions.

    The following functions are required to be called before a classification can be made
    * setModelPath()
    * At least of of the following and it must correspond to the model set in the setModelPath()
    [setModelTypeAsMobileNetv2(), setModelTypeAsResNet(), setModelTypeAsDenseNet, setModelTypeAsInceptionV3]
    * loadModel() [This must be called once only before making a classification]

    Once the above functions have been called, you can call the classifyImage() function of the classification instance
    object at anytime to classify an image.
    """
    def __init__(self):
        self.__modelType = ""
        self.modelPath = ""
        self.__modelLoaded = False
        self.__model_collection = []
        self.__input_image_size = 224
    
    def setModelPath(self, model_path):
        """
        'setModelPath()' function is required and is used to set the file path to the model adopted from the list of the
        available 4 model types. The model path must correspond to the model type set for the classification instance object.

        :param model_path:
        :return:
        """
        self.modelPath = model_path

    def setModelTypeAsSqueezeNet(self):
        raise ValueError("ImageAI no longer support SqueezeNet. You can use MobileNetV2 instead by downloading the MobileNetV2 model and call the function 'setModelTypeAsMobileNetV2'")

    def setModelTypeAsMobileNetV2(self):
        """
        'setModelTypeAsMobileNetV2()' is used to set the model type to the MobileNetV2 model
        for the classification instance object .
        :return:
        """
        self.__modelType = "mobilenetv2"

    @deprecated(since="2.1.6", message="'.setModelTypeAsResNet()' has been deprecated! Please use 'setModelTypeAsResNet50()' instead.")
    def setModelTypeAsResNet(self):
        return self.setModelTypeAsResNet50()

    def setModelTypeAsResNet50(self):
        """
         'setModelTypeAsResNet50()' is used to set the model type to the ResNet50 model
                for the classification instance object .
        :return:
        """
        self.__modelType = "resnet50"

    @deprecated(since="2.1.6", message="'.setModelTypeAsDenseNet()' has been deprecated! Please use 'setModelTypeAsDenseNet121()' instead.")
    def setModelTypeAsDenseNet(self):
        return self.setModelTypeAsDenseNet121()

    def setModelTypeAsDenseNet121(self):
        """
         'setModelTypeAsDenseNet121()' is used to set the model type to the DenseNet121 model
                for the classification instance object .
        :return:
        """
        self.__modelType = "densenet121"

    def setModelTypeAsInceptionV3(self):
        """
         'setModelTypeAsInceptionV3()' is used to set the model type to the InceptionV3 model
                for the classification instance object .
        :return:
        """
        self.__modelType = "inceptionv3"

    def loadModel(self, classification_speed="normal"):
        """
        'loadModel()' function is used to load the model structure into the program from the file path defined
        in the setModelPath() function. This function receives an optional value which is "classification_speed".
        The value is used to reduce the time it takes to classify an image, down to about 50% of the normal time,
        with just slight changes or drop in classification accuracy, depending on the nature of the image.
        * classification_speed (optional); Acceptable values are "normal", "fast", "faster" and "fastest"

        :param classification_speed :
        :return:
        """

        if(classification_speed=="normal"):
            self.__input_image_size = 224
        elif(classification_speed=="fast"):
            self.__input_image_size = 160
        elif(classification_speed=="faster"):
            self.__input_image_size = 120
        elif (classification_speed == "fastest"):
            self.__input_image_size = 100

        if (self.__modelLoaded == False):

            if(self.__modelType == "" ):
                raise ValueError("You must set a valid model type before loading the model.")

            elif(self.__modelType == "mobilenetv2"):
                model = tf.keras.applications.MobileNetV2(input_shape=(self.__input_image_size, self.__input_image_size, 3), weights=None, classes = 1000 )
                model.load_weights(self.modelPath)
                self.__model_collection.append(model)
                self.__modelLoaded = True
                try:
                    None
                except:
                    raise ValueError("An error occured. Ensure your model file is a MobileNetV2 Model and is located in the path {}".format(self.modelPath))

            elif(self.__modelType == "resnet50"):
                try:
                    model = tf.keras.applications.ResNet50(input_shape=(self.__input_image_size, self.__input_image_size, 3), weights=None, classes = 1000 )
                    model.load_weights(self.modelPath)
                    self.__model_collection.append(model)
                    self.__modelLoaded = True
                except Exception as e:
                    raise ValueError("An error occured. Ensure your model file is a ResNet50 Model and is located in the path {}".format(self.modelPath))

            elif (self.__modelType == "densenet121"):
                try:
                    model = tf.keras.applications.DenseNet121(input_shape=(self.__input_image_size, self.__input_image_size, 3), weights=None, classes = 1000 )
                    model.load_weights(self.modelPath)
                    self.__model_collection.append(model)
                    self.__modelLoaded = True
                except:
                    raise ValueError("An error occured. Ensure your model file is a DenseNet121 Model and is located in the path {}".format(self.modelPath))

            elif (self.__modelType == "inceptionv3"):
                try:
                    model = tf.keras.applications.InceptionV3(input_shape=(self.__input_image_size, self.__input_image_size, 3), weights=None, classes = 1000 )
                    model.load_weights(self.modelPath)
                    self.__model_collection.append(model)
                    self.__modelLoaded = True
                except:
                    raise ValueError("An error occured. Ensure your model file is in {}".format(self.modelPath))


    def classifyImage(self, image_input, result_count=5, input_type="file"):
        """
        'classifyImage()' function is used to classify a given image by receiving the following arguments:
            * input_type (optional) , the type of input to be parsed. Acceptable values are "file", "array" and "stream"
            * image_input , file path/numpy array/image file stream of the image.
            * result_count (optional) , the number of classifications to be sent which must be whole numbers between
                1 and 1000. The default is 5.

        This function returns 2 arrays namely 'classification_results' and 'classification_probabilities'. The 'classification_results'
        contains possible objects classes arranged in descending of their percentage probabilities. The 'classification_probabilities'
        contains the percentage probability of each object class. The position of each object class in the 'classification_results'
        array corresponds with the positions of the percentage probability in the 'classification_probabilities' array.


        :param input_type:
        :param image_input:
        :param result_count:
        :return classification_results, classification_probabilities:
        """
        classification_results = []
        classification_probabilities = []
        if (self.__modelLoaded == False):
            raise ValueError("You must call the loadModel() function before making classification.")

        else:
            if (input_type == "file"):
                try:
                    image_to_predict = tf.keras.preprocessing.image.load_img(image_input, target_size=(self.__input_image_size, self.__input_image_size))
                    image_to_predict = tf.keras.preprocessing.image.img_to_array(image_to_predict, data_format="channels_last")
                    image_to_predict = np.expand_dims(image_to_predict, axis=0)
                except:
                    raise ValueError("You have set a path to an invalid image file.")
            elif (input_type == "array"):
                try:
                    image_input = Image.fromarray(np.uint8(image_input))
                    image_input = image_input.resize((self.__input_image_size, self.__input_image_size))
                    image_input = np.expand_dims(image_input, axis=0)
                    image_to_predict = image_input.copy()
                    image_to_predict = np.asarray(image_to_predict, dtype=np.float64)
                except:
                    raise ValueError("You have parsed in a wrong numpy array for the image")
            elif (input_type == "stream"):
                try:
                    image_input = Image.open(image_input)
                    image_input = image_input.resize((self.__input_image_size, self.__input_image_size))
                    image_input = np.expand_dims(image_input, axis=0)
                    image_to_predict = image_input.copy()
                    image_to_predict = np.asarray(image_to_predict, dtype=np.float64)
                    
                except:
                    raise ValueError("You have parsed in a wrong stream for the image")
            
            if (self.__modelType == "mobilenetv2"):
                image_to_predict = tf.keras.applications.mobilenet_v2.preprocess_input(image_to_predict)
            elif (self.__modelType == "densenet121"):
                image_to_predict = tf.keras.applications.densenet.preprocess_input(image_to_predict)
            elif (self.__modelType == "inceptionv3"):
                image_to_predict = tf.keras.applications.inception_v3.preprocess_input(image_to_predict)

            try:
                model = self.__model_collection[0]
                prediction = model.predict(image_to_predict, steps=1)

                if (self.__modelType == "mobilenetv2"):
                    predictiondata = tf.keras.applications.mobilenet_v2.decode_predictions(prediction, top=int(result_count))
                elif (self.__modelType == "resnet50"):
                    predictiondata = tf.keras.applications.resnet50.decode_predictions(prediction, top=int(result_count))
                elif (self.__modelType == "inceptionv3"):
                    predictiondata = tf.keras.applications.inception_v3.decode_predictions(prediction, top=int(result_count))
                elif (self.__modelType == "densenet121"):
                    predictiondata = tf.keras.applications.densenet.decode_predictions(prediction, top=int(result_count))

                
                for results in predictiondata:
                    for result in results:
                        classification_results.append(str(result[1]))
                        classification_probabilities.append(result[2] * 100)
            except:
                raise ValueError("An error occured! Try again.")

            return classification_results, classification_probabilities
                

    @deprecated(since="2.1.6", message="'.predictImage()' has been deprecated! Please use 'classifyImage()' instead.")
    def predictImage(self, image_input, result_count=5, input_type="file"):
        
        return self.classifyImage(image_input, result_count, input_type)

================================================
FILE: imageai_tf_deprecated/Detection/Custom/CUSTOMDETECTION.md
================================================
# ImageAI : Custom Object Detection

An **DeepQuest AI** project [https://deepquestai.com](https://deepquestai.com)

---


### TABLE OF CONTENTS

- <a href="#customdetection" > :white_square_button: Custom Object Detection</a>
- <a href="#objectextraction" > :white_square_button: Object Detection, Extraction and Fine-tune</a>
- <a href="#hidingdetails" > :white_square_button: Hiding/Showing Object Name and Probability</a>
- <a href="#inputoutputtype" > :white_square_button: Image Input & Output Types</a>
- <a href="#documentation" > :white_square_button: Documentation</a>


ImageAI provides very convenient and powerful methods to perform object detection on images and extract each object from the image using your own **custom YOLOv3 model** and the corresponding **detection_config.json** generated during the training. To test the custom object detection, you can download a sample custom model we have trained to detect the Hololens headset and its **detection_config.json** file via the links below:

* [**hololens-ex-60--loss-2.76.h5**](https://github.com/OlafenwaMoses/ImageAI/releases/download/essential-v4/hololens-ex-60--loss-2.76.h5) _(Size = 236 mb)_
* [**detection_config.json**](https://github.com/OlafenwaMoses/ImageAI/releases/download/essential-v4/detection_config.json)


 Once you download the custom object detection model file, you should copy the model file to the your project folder where your **.py** files will be.
 Then create a python file and give it a name; an example is FirstCustomDetection.py. Then write the code below into the python file: 

### FirstCustomDetection.py
<div id="customdetection" ></div>

```python
from imageai.Detection.Custom import CustomObjectDetection

detector = CustomObjectDetection()
detector.setModelTypeAsYOLOv3()
detector.setModelPath("hololens-ex-60--loss-2.76.h5")
detector.setJsonPath("detection_config.json")
detector.loadModel()
detections = detector.detectObjectsFromImage(input_image="holo2.jpg", output_image_path="holo2-detected.jpg")
for detection in detections:
    print(detection["name"], " : ", detection["percentage_probability"], " : ", detection["box_points"])

```

Sample Result - Input:

![Input](../../../data-images/holo2.jpg)

  Output: 
  
![Output](../../../data-images/holo2-detected.jpg)
          
```
hololens  :  39.69653248786926  :  [611, 74, 751, 154]
hololens  :  87.6643180847168  :  [23, 46, 90, 79]
hololens  :  89.25175070762634  :  [191, 66, 243, 95]
hololens  :  64.49641585350037  :  [437, 81, 514, 133]
hololens  :  91.78624749183655  :  [380, 113, 423, 138]

```


Let us make a breakdown of the object detection code that we used above.

```python
from imageai.Detection.Custom import CustomObjectDetection

detector = CustomObjectDetection()
detector.setModelTypeAsYOLOv3()
```
 In the 3 lines above , we import the **ImageAI custom object detection** class in the first line, created the class instance on the second line and set the model type to YOLOv3.
 
```python
detector.setModelPath("hololens-ex-60--loss-2.76.h5")
detector.setJsonPath("detection_config.json")
detector.loadModel()
```

  In the 3 lines above, we specified the file path to our downloaded model file in the first line , specified the path to our **detection_config.json** file in the second line and loaded the model on the third line.

```python
detections = detector.detectObjectsFromImage(input_image="holo2.jpg", output_image_path="holo2-detected.jpg")
for detection in detections:
    print(detection["name"], " : ", detection["percentage_probability"], " : ", detection["box_points"])

```

In the 3 lines above, we ran the `detectObjectsFromImage()` function and parse in the path to our test image, and the path to the new
 image which the function will save. Then the function returns an array of dictionaries with each dictionary corresponding
 to the number of objects detected in the image. Each dictionary has the properties `name` (name of the object),
`percentage_probability` (percentage probability of the detection) and `box_points` (the x1,y1,x2 and y2 coordinates of the bounding box of the object). 


###  Object Detection, Extraction and Fine-tune
<div id="objectextraction" ></div>

In the examples we used above, we ran the object detection on an image and it
returned the detected objects in an array as well as save a new image with rectangular markers drawn on each object. In our next examples, we will be able to extract each object from the input image and save it independently.
  
  
In the example code below which is very identical to the previous object detection code, we will save each object detected as a separate image.

```python
from imageai.Detection.Custom import CustomObjectDetection

detector = CustomObjectDetection()
detector.setModelTypeAsYOLOv3()
detector.setModelPath("hololens-ex-60--loss-2.76.h5")
detector.setJsonPath("detection_config.json") 
detector.loadModel()
detections, extracted_objects_array = detector.detectObjectsFromImage(input_image="holo2.jpg", output_image_path="holo2-detected.jpg", extract_detected_objects=True)

for detection, object_path in zip(detections, extracted_objects_array):
    print(object_path)
    print(detection["name"], " : ", detection["percentage_probability"], " : ", detection["box_points"])
    print("---------------")
```


Sample Result: Output Images
    
![](../../../data-images/holo2-detected-objects/hololens-1.jpg)
![](../../../data-images/holo2-detected-objects/hololens-2.jpg)
![](../../../data-images/holo2-detected-objects/hololens-3.jpg)
![](../../../data-images/holo2-detected-objects/hololens-4.jpg)
![](../../../data-images/holo2-detected-objects/hololens-5.jpg)
![](../../../data-images/holo2-detected-objects/hololens-6.jpg)
![](../../../data-images/holo2-detected-objects/hololens-7.jpg)


Let us review the part of the code that perform the object detection and extract the images:

```python
detections, extracted_objects_array = detector.detectObjectsFromImage(input_image="holo2.jpg", output_image_path="holo2-detected.jpg", extract_detected_objects=True)

for detection, object_path in zip(detections, extracted_objects_array):
    print(object_path)
    print(detection["name"], " : ", detection["percentage_probability"], " : ", detection["box_points"])
    print("---------------")
```

In the above above lines, we called the `detectObjectsFromImage()` , parse in the input image path, output image part, and an
extra parameter `extract_detected_objects=True`. This parameter states that the function should extract each object detected from the image
and save it has a seperate image. The parameter is false by default. Once set to `true`, the function will create a directory
 which is the `output image path + "-objects"`. Then it saves all the extracted images into this new directory with
  each image's name being the `detected object name + "-" + a number` which corresponds to the order at which the objects
  were detected.
  
This new parameter we set to extract and save detected objects as an image will make the function to return 2 values. The
 first is the array of dictionaries with each dictionary corresponding to a detected object. The second is an array of the paths
  to the saved images of each object detected and extracted, and they are arranged in order at which the objects are in the
  first array.

  
### And one important feature you need to know!

You will recall that the percentage probability
   for each detected object is sent back by the `detectObjectsFromImage()` function. The function has a parameter
   `minimum_percentage_probability` , whose default value is `30` (value ranges between 0 - 100) , but it set to 30 in this example. That means the function will only return a detected
    object if it's percentage probability is **30 or above**. The value was kept at this number to ensure the integrity of the
     detection results. You fine-tune the object
      detection by setting `minimum_percentage_probability` equal to a smaller value to detect more number of objects or higher value to detect less number of objects.


###  Hiding/Showing Object Name and Probability
<div id="hidingdetails"></div>

**ImageAI** provides options to hide the name of objects detected and/or the percentage probability from being shown on the saved/returned detected image. Using the `detectObjectsFromImage()` and `detectCustomObjectsFromImage()` functions, the parameters `'display_object_name'` and `'display_percentage_probability'`  can be set to True of False individually. Take a look at the code below: 
```python
detections = detector.detectObjectsFromImage(input_image=os.path.join(execution_path , "holo2.jpg"), output_image_path=os.path.join(execution_path , "holo2_nodetails.jpg"), minimum_percentage_probability=30, display_percentage_probability=False, display_object_name=False)
```

In the above code, we specified that both the object name and percentage probability should not be shown. As you can see in the result below, both the names of the objects and their individual percentage probability is not shown in the detected image. 

**Result**

![](../../../data-images/holo2-nodetails.jpg)


### Image Input & Output Types
<div id="inputoutputtype"></div>

**ImageAI** custom object detection supports 2 input types of inputs which are **file path to image file**(default) and **numpy array of an image**
as well as 2 types of output which are image **file**(default) and numpy **array **.
This means you can now perform object detection in production applications such as on a web server and system
 that returns file in any of the above stated formats.
 To perform object detection with numpy array input, you just need to state the input type
in the `.detectObjectsFromImage()` function. See example below.

```python
detections = detector.detectObjectsFromImage(input_type="array", input_image=image_array , output_image_path=os.path.join(execution_path , "holo2-detected.jpg")) # For numpy array input type
```
To perform object detection with numpy array output you just need to state the output type
in the `.detectObjectsFromImage()` function. See example below.

```python
detected_image_array, detections = detector.detectObjectsFromImage(output_type="array", input_image="holo2.jpg" ) # For numpy array output type
```


### Documentation
<div id="documentation" ></div>

We have provided full documentation for all **ImageAI** classes and functions in 3 major languages. Find links below: 

* Documentation - **English Version**  [https://imageai.readthedocs.io](https://imageai.readthedocs.io)** 
* Documentation - **Chinese Version**  [https://imageai-cn.readthedocs.io](https://imageai-cn.readthedocs.io)**
* Documentation - **French Version**  [https://imageai-fr.readthedocs.io](https://imageai-fr.readthedocs.io)**


================================================
FILE: imageai_tf_deprecated/Detection/Custom/CUSTOMDETECTIONTRAINING.md
================================================
# ImageAI : Custom Detection Model Training 

---

**ImageAI** provides the most simple and powerful approach to training custom object detection models
using the YOLOv3 architeture, which
which you can load into the `imageai.Detection.Custom.CustomObjectDetection` class. This allows
 you to train your own model on any set of images that corresponds to any type of objects of interest.
The training process generates a JSON file that maps the objects names in your image dataset and the detection anchors, as well as creates lots of models. In choosing the best model for your custom object detection task, an `evaluateModel()` function has been provided to compute the **mAP** of your saved models by allowing you to state your desired **IoU** and **Non-maximum Suppression** values. Then you can perform custom
object detection using the model and the JSON file generated. 

### TABLE OF CONTENTS
- <a href="#preparingdataset" > :white_square_button: Preparing your custom dataset</a>
- <a href="#trainingdataset" > :white_square_button: Training on your custom Dataset</a>
- <a href="#evaluatingmodels" > :white_square_button: Evaluating your saved detection models' mAP</a>


### Preparing your custom dataset
<div id="preparingdataset"></div>

To train a custom detection model, you need to prepare the images you want to use to train the model. 
You will prepare the images as follows: 

1. Decide the type of object(s) you want to detect and collect about **200 (minimum recommendation)** or more picture of each of the object(s)
2. Once you have collected the images, you need to annotate the object(s) in the images. **ImageAI** uses the **Pascal VOC format** for image annotation. You can generate this annotation for your images using the easy to use [**LabelImg**](https://github.com/tzutalin/labelImg) image annotation tool, available for Windows, Linux and MacOS systems. Open the link below to install the annotation tool. See: [https://github.com/tzutalin/labelImg](https://github.com/tzutalin/labelImg)
3. When you are done annotating your images, **annotation XML** files will be generated for each image in your dataset. The **annotation XML** file describes each or **all** of the objects in the image. For example,  if each image your image names are **image(1).jpg**, **image(2).jpg**, **image(3).jpg** till **image(z).jpg**; the corresponding annotation for each of the images will be **image(1).xml**, **image(2).xml**, **image(3).xml** till **image(z).xml**. 
4. Once you have the annotations for all your images, create a folder for your dataset (E.g headsets) and in this parent folder, create child folders **train** and **validation**
5. In the train folder, create **images** and **annotations**
 sub-folders. Put about 70-80% of your dataset of each object's images in the **images** folder and put the corresponding annotations for these images in the **annotations** folder.  
6. In the validation folder, create **images** and **annotations** sub-folders. Put the rest of your dataset images in the **images** folder and put the corresponding annotations for these images in the **annotations** folder.
7. Once you have done this, the structure of your image dataset folder should look like below: 
    ```
    >> train    >> images       >> img_1.jpg  (shows Object_1)
                >> images       >> img_2.jpg  (shows Object_2)
                >> images       >> img_3.jpg  (shows Object_1, Object_3 and Object_n)
                >> annotations  >> img_1.xml  (describes Object_1)
                >> annotations  >> img_2.xml  (describes Object_2)
                >> annotations  >> img_3.xml  (describes Object_1, Object_3 and Object_n)
    
    >> validation   >> images       >> img_151.jpg (shows Object_1, Object_3 and Object_n)
                    >> images       >> img_152.jpg (shows Object_2)
                    >> images       >> img_153.jpg (shows Object_1)
                    >> annotations  >> img_151.xml (describes Object_1, Object_3 and Object_n)
                    >> annotations  >> img_152.xml (describes Object_2)
                    >> annotations  >> img_153.xml (describes Object_1)
     ```
8. You can train your custom detection model completely from scratch or use transfer learning (recommended for better accuracy) from a pre-trained YOLOv3 model. Also, we have provided a sample annotated Hololens and Headsets (Hololens and Oculus) dataset for you to train with. Download the pre-trained YOLOv3 model and the sample datasets in the link below.  

[https://github.com/OlafenwaMoses/ImageAI/releases/tag/essential-v4](https://github.com/OlafenwaMoses/ImageAI/releases/tag/essential-v4)


### Training on your custom dataset
<div id="trainingdataset"></div>

Before you start training your custom detection model, kindly take note of the following: 

- The default **batch_size** is 4. If you are training with **Google Colab**, this will be fine. However, I will advice you use a more powerful GPU than the K80 offered by Colab as the higher your **batch_size (8, 16)**, the better the accuracy of your detection model. 
- If you experience <i>'_TfDeviceCaptureOp' object has no attribute '_set_device_from_string'</i> error in Google Colab, it is due to a bug in **Tensorflow**. You can solve this by installing **Tensorflow GPU 1.13.1**. 
    ```bash
     pip3 install tensorflow-gpu==1.13.1
    ```

Then your training code goes as follows: 
```python
from imageai.Detection.Custom import DetectionModelTrainer

trainer = DetectionModelTrainer()
trainer.setModelTypeAsYOLOv3()
trainer.setDataDirectory(data_directory="hololens")
trainer.setTrainConfig(object_names_array=["hololens"], batch_size=4, num_experiments=200, train_from_pretrained_model="pretrained-yolov3.h5")
# In the above,when training for detecting multiple objects,
#set object_names_array=["object1", "object2", "object3",..."objectz"]
trainer.trainModel()
```

 Yes! Just 6 lines of code and you can train object detection models on your custom dataset.
Now lets take a look at how the code above works. 

```python
from imageai.Detection.Custom import DetectionModelTrainer

trainer = DetectionModelTrainer()
trainer.setModelTypeAsYOLOv3()
trainer.setDataDirectory(data_directory="hololens")
```

In the first line, we import the **ImageAI** detection model training class, then we define the model trainer in the second line,
 we set the network type in the third line and set the path to the image dataset we want to train the network on.

```python
trainer.setTrainConfig(object_names_array=["hololens"], batch_size=4, num_experiments=200, train_from_pretrained_model="pretrained-yolov3.h5")
```


In the line above, we configured our detection model trainer. The parameters we stated in the function as as below:  

- **num_objects** : this is an array containing the names of the objects in our dataset
- **batch_size** : this is to state the batch size for the training
- **num_experiments** : this is to state the number of times the network will train over all the training images,
 which is also called epochs 
- **train_from_pretrained_model(optional)** : this is to train using transfer learning from a pre-trained **YOLOv3** model

```python
trainer.trainModel()
```


When you start the training, you should see something like this in the console: 
```
Using TensorFlow backend.
Generating anchor boxes for training images and annotation...
Average IOU for 9 anchors: 0.78
Anchor Boxes generated.
Detection configuration saved in  hololens/json/detection_config.json
Training on: 	['hololens']
Training with Batch Size:  4
Number of Experiments:  200

Epoch 1/200
480/480 [==============================] - 395s 823ms/step - loss: 36.9000 - yolo_layer_1_loss: 3.2970 - yolo_layer_2_loss: 9.4923 - yolo_layer_3_loss: 24.1107 - val_loss: 15.6321 - val_yolo_layer_1_loss: 2.0275 - val_yolo_layer_2_loss: 6.4191 - val_yolo_layer_3_loss: 7.1856
Epoch 2/200
480/480 [==============================] - 293s 610ms/step - loss: 11.9330 - yolo_layer_1_loss: 1.3968 - yolo_layer_2_loss: 4.2894 - yolo_layer_3_loss: 6.2468 - val_loss: 7.9868 - val_yolo_layer_1_loss: 1.7054 - val_yolo_layer_2_loss: 2.9156 - val_yolo_layer_3_loss: 3.3657
Epoch 3/200
480/480 [==============================] - 293s 610ms/step - loss: 7.1228 - yolo_layer_1_loss: 1.0583 - yolo_layer_2_loss: 2.2863 - yolo_layer_3_loss: 3.7782 - val_loss: 6.4964 - val_yolo_layer_1_loss: 1.1391 - val_yolo_layer_2_loss: 2.2058 - val_yolo_layer_3_loss: 3.1514
Epoch 4/200
480/480 [==============================] - 297s 618ms/step - loss: 5.5802 - yolo_layer_1_loss: 0.9742 - yolo_layer_2_loss: 1.8916 - yolo_layer_3_loss: 2.7144 - val_loss: 6.4275 - val_yolo_layer_1_loss: 1.6153 - val_yolo_layer_2_loss: 2.1203 - val_yolo_layer_3_loss: 2.6919
Epoch 5/200
480/480 [==============================] - 295s 615ms/step - loss: 4.8717 - yolo_layer_1_loss: 0.7568 - yolo_layer_2_loss: 1.6641 - yolo_layer_3_loss: 2.4508 - val_loss: 6.3723 - val_yolo_layer_1_loss: 1.6434 - val_yolo_layer_2_loss: 2.1188 - val_yolo_layer_3_loss: 2.6101
Epoch 6/200
480/480 [==============================] - 300s 624ms/step - loss: 4.7989 - yolo_layer_1_loss: 0.8708 - yolo_layer_2_loss: 1.6683 - yolo_layer_3_loss: 2.2598 - val_loss: 5.8672 - val_yolo_layer_1_loss: 1.2349 - val_yolo_layer_2_loss: 2.0504 - val_yolo_layer_3_loss: 2.5820
Epoch 7/200
```

Let us explain the details shown above: 
```
Using TensorFlow backend.
Generating anchor boxes for training images and annotation...
Average IOU for 9 anchors: 0.78
Anchor Boxes generated.
Detection configuration saved in  hololens/json/detection_config.json
Training on: 	['hololens']
Training with Batch Size:  4
Number of Experiments:  200
```

The above details signifies the following: 
- **ImageAI** autogenerates the best match detection **anchor boxes** for your image dataset. 

- The anchor boxes and the object names mapping are saved in 
**json/detection_config.json** path of in the image dataset folder. Please note that for every new training you start, a new **detection_config.json** file is generated and is only compatible with the model saved during that training.

```
Epoch 1/200
480/480 [==============================] - 395s 823ms/step - loss: 36.9000 - yolo_layer_1_loss: 3.2970 - yolo_layer_2_loss: 9.4923 - yolo_layer_3_loss: 24.1107 - val_loss: 15.6321 - val_yolo_layer_1_loss: 2.0275 - val_yolo_layer_2_loss: 6.4191 - val_yolo_layer_3_loss: 7.1856
Epoch 2/200
480/480 [==============================] - 293s 610ms/step - loss: 11.9330 - yolo_layer_1_loss: 1.3968 - yolo_layer_2_loss: 4.2894 - yolo_layer_3_loss: 6.2468 - val_loss: 7.9868 - val_yolo_layer_1_loss: 1.7054 - val_yolo_layer_2_loss: 2.9156 - val_yolo_layer_3_loss: 3.3657
Epoch 3/200
480/480 [==============================] - 293s 610ms/step - loss: 7.1228 - yolo_layer_1_loss: 1.0583 - yolo_layer_2_loss: 2.2863 - yolo_layer_3_loss: 3.7782 - val_loss: 6.4964 - val_yolo_layer_1_loss: 1.1391 - val_yolo_layer_2_loss: 2.2058 - val_yolo_layer_3_loss: 3.1514
Epoch 4/200
480/480 [==============================] - 297s 618ms/step - loss: 5.5802 - yolo_layer_1_loss: 0.9742 - yolo_layer_2_loss: 1.8916 - yolo_layer_3_loss: 2.7144 - val_loss: 6.4275 - val_yolo_layer_1_loss: 1.6153 - val_yolo_layer_2_loss: 2.1203 - val_yolo_layer_3_loss: 2.6919
Epoch 5/200
480/480 [==============================] - 295s 615ms/step - loss: 4.8717 - yolo_layer_1_loss: 0.7568 - yolo_layer_2_loss: 1.6641 - yolo_layer_3_loss: 2.4508 - val_loss: 6.3723 - val_yolo_layer_1_loss: 1.6434 - val_yolo_layer_2_loss: 2.1188 - val_yolo_layer_3_loss: 2.6101
Epoch 6/200
480/480 [==============================] - 300s 624ms/step - loss: 4.7989 - yolo_layer_1_loss: 0.8708 - yolo_layer_2_loss: 1.6683 - yolo_layer_3_loss: 2.2598 - val_loss: 5.8672 - val_yolo_layer_1_loss: 1.2349 - val_yolo_layer_2_loss: 2.0504 - val_yolo_layer_3_loss: 2.5820
Epoch 7/200
```

- The above signifies the progress of the training. 
- For each experiment (Epoch), the general  total validation loss (E.g - loss: 4.7582) is reported. 
- For each drop in the loss after an experiment, a model is saved in the **hololens/models** folder. The lower the loss, the better the model. 
- **Tensorboard** report file for the training will be saved in the **hololens/logs** folder.

Once you are done training, you can visit the link below for performing object detection with your **custom detection model** and **detection_config.json** file.

[Detection/Custom/CUSTOMDETECTION.md](./CUSTOMDETECTION.md)
 
 
### Evaluating your saved detection models' mAP
 <div id="evaluatingmodels"></div>

After training on your custom dataset, you can evaluate the mAP of your saved models by specifying your desired IoU and Non-maximum suppression values. See details as below:

- **Single Model Evaluation:** To evaluate a single model, simply use the example code below with the path to your dataset directory, the model file and the **detection_config.json** file saved during the training. In the example, we used an **object_threshold** of 0.3 ( percentage_score >= 30% ), **IoU** of 0.5 and **Non-maximum suppression** value of 0.5.
    ```python
    from imageai.Detection.Custom import DetectionModelTrainer
    
    trainer = DetectionModelTrainer()
    trainer.setModelTypeAsYOLOv3()
    trainer.setDataDirectory(data_directory="hololens")
    metrics = trainer.evaluateModel(model_path="detection_model-ex-60--loss-2.76.h5", json_path="detection_config.json", iou_threshold=0.5, object_threshold=0.3, nms_threshold=0.5)
    ```
    Consider that `trainer.evaluateModel` method will show the metrics on standard output as shown below, 
    but also returns a list of dicts containing all the information that is displayed. 
    
    Sample Result:
    ```
    Model File:  hololens_detection_model-ex-09--loss-4.01.h5 
    Using IoU :  0.5
    Using Object Threshold :  0.3
    Using Non-Maximum Suppression :  0.5
    hololens: 0.9613
    mAP: 0.9613
    ===============================
    ```
    Let's see how those metrics looks like:
    ```
    [{
        'average_precision': {'hololens': 0.9613334437735249},
        'map': 0.9613334437735249,
        'model_file': 'hololens_detection_model-ex-09--loss-4.01.h5',
        'using_iou': 0.5,
        'using_non_maximum_suppression': 0.5,
        'using_object_threshold': 0.3
    }]
    ```
- **Multi Model Evaluation:** To evaluate all your saved models, simply parse in the path to the folder containing the models as the **model_path** as seen in the example below:
    ```python
    from imageai.Detection.Custom import DetectionModelTrainer
    
    trainer = DetectionModelTrainer()
    trainer.setModelTypeAsYOLOv3()
    trainer.setDataDirectory(data_directory="hololens")
    metrics = trainer.evaluateModel(model_path="hololens/models", json_path="hololens/json/detection_config.json", iou_threshold=0.5, object_threshold=0.3, nms_threshold=0.5)
    ```
    Sample Result:
    ```
    Model File:  hololens/models/detection_model-ex-07--loss-4.42.h5 
    Using IoU :  0.5
    Using Object Threshold :  0.3
    Using Non-Maximum Suppression :  0.5
    hololens: 0.9231
    mAP: 0.9231
    ===============================
    Model File:  hololens/models/detection_model-ex-10--loss-3.95.h5 
    Using IoU :  0.5
    Using Object Threshold :  0.3
    Using Non-Maximum Suppression :  0.5
    hololens: 0.9725
    mAP: 0.9725
    ===============================
    Model File:  hololens/models/detection_model-ex-05--loss-5.26.h5 
    Using IoU :  0.5
    Using Object Threshold :  0.3
    Using Non-Maximum Suppression :  0.5
    hololens: 0.9204
    mAP: 0.9204
    ===============================
    Model File:  hololens/models/detection_model-ex-03--loss-6.44.h5 
    Using IoU :  0.5
    Using Object Threshold :  0.3
    Using Non-Maximum Suppression :  0.5
    hololens: 0.8120
    mAP: 0.8120
    ===============================
    Model File:  hololens/models/detection_model-ex-18--loss-2.96.h5 
    Using IoU :  0.5
    Using Object Threshold :  0.3
    Using Non-Maximum Suppression :  0.5
    hololens: 0.9431
    mAP: 0.9431
    ===============================
    Model File:  hololens/models/detection_model-ex-17--loss-3.10.h5 
    Using IoU :  0.5
    Using Object Threshold :  0.3
    Using Non-Maximum Suppression :  0.5
    hololens: 0.9404
    mAP: 0.9404
    ===============================
    Model File:  hololens/models/detection_model-ex-08--loss-4.16.h5 
    Using IoU :  0.5
    Using Object Threshold :  0.3
    Using Non-Maximum Suppression :  0.5
    hololens: 0.9725
    mAP: 0.9725
    ===============================
    ```
    Let's see how those metrics looks like:
    ```
    [{
        'average_precision': {'hololens': 0.9231334437735249},
        'map': 0.9231334437735249,
        'model_file': 'hololens/models/detection_model-ex-07--loss-4.42.h5',
        'using_iou': 0.5,
        'using_non_maximum_suppression': 0.5,
        'using_object_threshold': 0.3
    },
    {
        'average_precision': {'hololens': 0.9725334437735249},
        'map': 0.97251334437735249,
        'model_file': 'hololens/models/detection_model-ex-10--loss-3.95.h5',
        'using_iou': 0.5,
        'using_non_maximum_suppression': 0.5,
        'using_object_threshold': 0.3
    },
    {
        'average_precision': {'hololens': 0.92041334437735249},
        'map': 0.92041334437735249,
        'model_file': 'hololens/models/detection_model-ex-05--loss-5.26.h5',
        'using_iou': 0.5,
        'using_non_maximum_suppression': 0.5,
        'using_object_threshold': 0.3
    },
    {
        'average_precision': {'hololens': 0.81201334437735249},
        'map': 0.81201334437735249,
        'model_file': 'hololens/models/detection_model-ex-03--loss-6.44.h5',
        'using_iou': 0.5,
        'using_non_maximum_suppression': 0.5,
        'using_object_threshold': 0.3
    },
    {
        'average_precision': {'hololens': 0.94311334437735249},
        'map': 0.94311334437735249,
        'model_file': 'hololens/models/detection_model-ex-18--loss-2.96.h5',
        'using_iou': 0.5,
        'using_non_maximum_suppression': 0.5,
        'using_object_threshold': 0.3
    },
    {
        'average_precision': {'hololens': 0.94041334437735249},
        'map': 0.94041334437735249,
        'model_file': 'hololens/models/detection_model-ex-17--loss-3.10.h5',
        'using_iou': 0.5,
        'using_non_maximum_suppression': 0.5,
        'using_object_threshold': 0.3
    },
    {
        'average_precision': {'hololens': 0.97251334437735249},
        'map': 0.97251334437735249,
        'model_file': 'hololens/models/detection_model-ex-08--loss-4.16.h5',
        'using_iou': 0.5,
        'using_non_maximum_suppression': 0.5,
        'using_object_threshold': 0.3
    }
    ]
    ```


###  >> Documentation
<div id="documentation" ></div>

We have provided full documentation for all **ImageAI** classes and functions in 3 major languages. Find links below: 

* Documentation - **English Version**  [https://imageai.readthedocs.io](https://imageai.readthedocs.io) 
* Documentation - **Chinese Version**  [https://imageai-cn.readthedocs.io](https://imageai-cn.readthedocs.io)
* Documentation - **French Version**  [https://imageai-fr.readthedocs.io](https://imageai-fr.readthedocs.io)


================================================
FILE: imageai_tf_deprecated/Detection/Custom/CUSTOMVIDEODETECTION.md
================================================
# ImageAI : Custom Video Object Detection, Tracking  and Analysis

An **DeepQuest AI** project [https://deepquestai.com](https://deepquestai.com)

---

### TABLE OF CONTENTS

- <a href="#videodetection" > :white_square_button: First Custom Video Object Detection</a>
- <a href="#camerainputs" > :white_square_button: Camera / Live Stream Video Detection</a>
- <a href="#videoanalysis" > :white_square_button: Video Analysis</a>
- <a href="#hidingdetails" > :white_square_button: Hiding/Showing Object Name and Probability</a>
- <a href="#videodetectionintervals" > :white_square_button: Frame Detection Intervals</a>
- <a href="#detectiontimeout" > :white_square_button: Video Detection Timeout (NEW)</a>
- <a href="#documentation" > :white_square_button: Documentation</a>


ImageAI provides convenient, flexible and powerful methods to perform object detection on videos using your own **custom YOLOv3 model** and the corresponding **detection_config.json** generated during the training. This version of **ImageAI** provides commercial grade video objects detection features, which include but not limited to device/IP camera inputs, per frame, per second, per minute and entire video analysis for storing in databases and/or real-time visualizations and for future insights.
To test the custom video object detection,you can download a sample custom model we have trained to detect the Hololens headset and its **detection_config.json** file via the links below:
- [**hololens-ex-60--loss-2.76.h5**](https://github.com/OlafenwaMoses/ImageAI/releases/download/essential-v4/hololens-ex-60--loss-2.76.h5) _(Size = 236 mb)_
- [**detection_config.json**](https://github.com/OlafenwaMoses/ImageAI/releases/download/essential-v4/detection_config.json)


Because video object detection is a compute intensive tasks, we advise you perform this experiment using a computer with a NVIDIA GPU and the GPU version of Tensorflow
 installed. Performing Video Object Detection CPU will be slower than using an NVIDIA GPU powered computer. You can use Google Colab for this
 experiment as it has an NVIDIA K80 GPU available for free.
<br/>
 Once you download the custom object detection model  and JSON files, you should copy the model and the JSON files to the your project folder where your .py files will be.
 Then create a python file and give it a name; an example is FirstCustomVideoObjectDetection.py. Then write the code below into the python file: <br/>


### FirstCustomVideoObjectDetection.py
<div id="videodetection" ></div>

```python
from imageai.Detection.Custom import CustomVideoObjectDetection
import os

execution_path = os.getcwd()

video_detector = CustomVideoObjectDetection()
video_detector.setModelTypeAsYOLOv3()
video_detector.setModelPath("hololens-ex-60--loss-2.76.h5")
video_detector.setJsonPath("detection_config.json")
video_detector.loadModel()

video_detector.detectObjectsFromVideo(input_file_path="holo1.mp4",
                                          output_file_path=os.path.join(execution_path, "holo1-detected3"),
                                          frames_per_second=20,
                                          minimum_percentage_probability=40,
                                          log_progress=True)
```

[**Input Video**](../../../data-videos/holo1.mp4)
[![Input Video](../../../data-images/holo-video.jpg)](../../../data-videos/holo1.mp4)
[**Output Video**](https://www.youtube.com/watch?v=4o5GyAR4Mpw)
[![Output Video](../../../data-images/holo-video-detected.jpg)](https://www.youtube.com/watch?v=4o5GyAR4Mpw)


Let us make a breakdown of the object detection code that we used above.

```python
from imageai.Detection.Custom import CustomVideoObjectDetection
import os

execution_path = os.getcwd()
```

In the 3 lines above , we import the **ImageAI custom video object detection** class in the first line, import the **os** in the second line and obtained
  the path to folder where our python file runs.
```python
video_detector = CustomVideoObjectDetection()
video_detector.setModelTypeAsYOLOv3()
video_detector.setModelPath("hololens-ex-60--loss-2.76.h5")
video_detector.setJsonPath("detection_config.json")
video_detector.loadModel()
```
In the 4 lines above, we created a new instance of the `CustomVideoObjectDetection` class in the first line, set the model type to YOLOv3 in the second line,
  set the model path to our custom YOLOv3 model file in the third line, specified the path to the model's corresponding **detection_config.json** in the fourth line and load the model in the fifth line.

```python
video_detector.detectObjectsFromVideo(input_file_path="holo1.mp4",
                                          output_file_path=os.path.join(execution_path, "holo1-detected3"),
                                          frames_per_second=20,
                                          minimum_percentage_probability=40,
                                          log_progress=True)
```

In the code above, we ran the `detectObjectsFromVideo()` function and parse in the path to our video,the path to the new
 video (without the extension, it saves a .avi video by default) which the function will save, the number of frames per second (fps) that
 you we desire the output video to have and option to log the progress of the detection in the console. Then the function returns a the path to the saved video
 which contains boxes and percentage probabilities rendered on objects detected in the video.


### Camera / Live Stream Video Detection
<div id="camerainputs"></div>

**ImageAI** now allows live-video detection with support for camera inputs. Using **OpenCV**'s **VideoCapture()** function, you can load live-video streams from a device camera, cameras connected by cable or IP cameras, and parse it into **ImageAI**'s **detectObjectsFromVideo()** function. All features that are supported for detecting objects in a video file is also available for detecting objects in a camera's live-video feed. Find below an example of detecting live-video feed from the device camera. 

```python
from imageai.Detection.Custom import CustomVideoObjectDetection
import os
import cv2

execution_path = os.getcwd()
camera = cv2.VideoCapture(0)

video_detector = CustomVideoObjectDetection()
video_detector.setModelTypeAsYOLOv3()
video_detector.setModelPath("hololens-ex-60--loss-2.76.h5")
video_detector.setJsonPath("detection_config.json")
video_detector.loadModel()

video_detector.detectObjectsFromVideo(camera_input=camera,
                                          output_file_path=os.path.join(execution_path, "holo1-detected3"),
                                          frames_per_second=20,
                                          minimum_percentage_probability=40,
                                          log_progress=True)
```

The difference in the code above and the code for the detection of a video file is that we defined an **OpenCV VideoCapture** instance and loaded the default device camera into it. Then we parsed the camera we defined into the parameter **camera_input** which replaces the **input_file_path** that is used for video file. 


### Video Analysis
<div id="videoanalysis"></div>

**ImageAI** now provide commercial-grade video analysis in the Custom Video Object Detection class, for both video file inputs and camera inputs. This feature allows developers to obtain deep insights into any video processed with **ImageAI**. This insights can be visualized in real-time, stored in a NoSQL database for future review or analysis. <br/>

For video analysis, the **detectObjectsFromVideo()** now allows you to state your own defined functions which will be executed for every frame, seconds and/or minute of the video detected as well as a state a function that will be executed at the end of a video detection. Once this functions are stated, they will receive raw but comprehensive analytical data on the index of the frame/second/minute, objects detected (name, percentage_probability and box_points), number of instances of each unique object detected and average number of occurrence of each unique object detected over a second/minute and entire video.

To obtain the video analysis, all you need to do is specify a function, state the corresponding parameters it will be receiving and parse the function name into the **per_frame_function**, **per_second_function**, **per_minute_function** and **video_complete_function** parameters in the detection function. Find below examples of video analysis functions. 

```python
def forFrame(frame_number, output_array, output_count):
    print("FOR FRAME " , frame_number)
    print("Output for each object : ", output_array)
    print("Output count for unique objects : ", output_count)
    print("------------END OF A FRAME --------------")

def forSeconds(second_number, output_arrays, count_arrays, average_output_count):
    print("SECOND : ", second_number)
    print("Array for the outputs of each frame ", output_arrays)
    print("Array for output count for unique objects in each frame : ", count_arrays)
    print("Output average count for unique objects in the last second: ", average_output_count)
    print("------------END OF A SECOND --------------")

def forMinute(minute_number, output_arrays, count_arrays, average_output_count):
    print("MINUTE : ", minute_number)
    print("Array for the outputs of each frame ", output_arrays)
    print("Array for output count for unique objects in each frame : ", count_arrays)
    print("Output average count for unique objects in the last minute: ", average_output_count)
    print("------------END OF A MINUTE --------------")

video_detector = CustomVideoObjectDetection()
video_detector.setModelTypeAsYOLOv3()
video_detector.setModelPath("hololens-ex-60--loss-2.76.h5")
video_detector.setJsonPath("detection_config.json")
video_detector.loadModel()

video_detector.detectObjectsFromVideo(camera_input=camera,
                                          output_file_path=os.path.join(execution_path, "holo1-detected3"),
                                          frames_per_second=20, per_second_function=forSeconds, per_frame_function = forFrame, per_minute_function= forMinute,
                                          minimum_percentage_probability=40,
                                          log_progress=True)
```


**ImageAI** also allows you to obtain complete analysis of the entire video processed. All you need is to define a function like the forSecond or forMinute function and set the **video_complete_function** parameter into your **.detectObjectsFromVideo()** function. The same values for the per_second-function and per_minute_function will be returned. The difference is that no index will be returned and the other 3 values will be returned, and the 3 values will cover all frames in the video. Below is a sample function: 
```python
def forFull(output_arrays, count_arrays, average_output_count):
    #Perform action on the 3 parameters returned into the function


video_detector.detectObjectsFromVideo(camera_input=camera,
                                          output_file_path=os.path.join(execution_path, "holo1-detected3"),
                                          video_complete_function=forFull,
                                          minimum_percentage_probability=40,
                                          log_progress=True)

```

**FINAL NOTE ON VIDEO ANALYSIS** : **ImageAI** allows you to obtain the detected video frame as a Numpy array at each frame, second and minute function. All you need to do is specify one more parameter in your function and set **return_detected_frame=True** in your **detectObjectsFromVideo()** function. Once this is set, the extra parameter you sepecified in your function will be the Numpy array of the detected frame. See a sample below:

```python
def forFrame(frame_number, output_array, output_count, detected_frame):
    print("FOR FRAME " , frame_number)
    print("Output for each object : ", output_array)
    print("Output count for unique objects : ", output_count)
	print("Returned Objects is : ", type(detected_frame))
    print("------------END OF A FRAME --------------")


video_detector.detectObjectsFromVideo(camera_input=camera,
                                          output_file_path=os.path.join(execution_path, "holo1-detected3"),
                                          per_frame_function=forFrame,
                                          minimum_percentage_probability=40,
                                          log_progress=True, return_detected_frame=True)
```


### Frame Detection Intervals
<div id="videodetectionintervals" ></div>

The above video objects detection task are optimized for frame-real-time object detections that ensures that objects in every frame
of the video is detected. **ImageAI** provides you the option to adjust the video frame detections which can speed up
your video detection process. When calling the `.detectObjectsFromVideo()`, you can
specify at which frame interval detections should be made. By setting the **frame_detection_interval** parameter to be
 equal to 5 or 20, that means the object detections in the video will be updated after 5 frames or 20 frames.
If your output video **frames_per_second** is set to 20, that means the object detections in the video will
 be updated once in every quarter of a second or every second. This is useful in case scenarios where the available
 compute is less powerful and speeds of moving objects are low. This ensures you can have objects detected as second-real-time
, half-a-second-real-time or whichever way suits your needs. 


### Custom Video Detection Timeout
<div id="detectiontimeout"></div>

**ImageAI** now allows you to set a timeout in seconds for detection of objects in videos or camera live feed. To set a timeout for your video detection code, all you need to do is specify the `detection_timeout` parameter in the `detectObjectsFromVideo()` function to the number of desired seconds. In the example code below, we set `detection_timeout` to 120 seconds (2 minutes). 


```python
from imageai.Detection.Custom import CustomVideoObjectDetection
import os
import cv2

execution_path = os.getcwd()
camera = cv2.VideoCapture(0)

video_detector = CustomVideoObjectDetection()
video_detector.setModelTypeAsYOLOv3()
video_detector.setModelPath("hololens-ex-60--loss-2.76.h5")
video_detector.setJsonPath("detection_config.json")
video_detector.loadModel()

video_detector.detectObjectsFromVideo(camera_input=camera,
                                          output_file_path=os.path.join(execution_path, "holo1-detected3"),
                                          frames_per_second=20,  minimum_percentage_probability=40,
                                          detection_timeout=120)
```


###  >> Documentation
<div id="documentation" ></div>

We have provided full documentation for all **ImageAI** classes and functions in 3 major languages. Find links below: 

* Documentation - **English Version**  [https://imageai.readthedocs.io](https://imageai.readthedocs.io)** 
* Documentation - **Chinese Version**  [https://imageai-cn.readthedocs.io](https://imageai-cn.readthedocs.io)**
* Documentation - **French Version**  [https://imageai-fr.readthedocs.io](https://imageai-fr.readthedocs.io)**


================================================
FILE: imageai_tf_deprecated/Detection/Custom/__init__.py
================================================
import os
import re
import numpy as np
import json
from imageai.Detection.Custom.voc import parse_voc_annotation
from imageai.Detection.YOLO.yolov3 import yolov3_main, yolov3_train, dummy_loss
from imageai.Detection.Custom.generator import BatchGenerator
from imageai.Detection.Custom.utils.utils import normalize, evaluate, makedirs
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from imageai.Detection.Custom.callbacks import CustomModelCheckpoint
from imageai.Detection.Custom.utils.multi_gpu_model import multi_gpu_model
from imageai.Detection.Custom.gen_anchors import generateAnchors
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras import Input
from tensorflow.keras.callbacks import TensorBoard
import tensorflow.keras.backend as K
import cv2

tf.config.run_functions_eagerly(True)
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"


class DetectionModelTrainer:

    """
    This is the Detection Model training class, which allows you to train object detection models
    on image datasets that are in Pascal VOC annotation format, using the YOLOv3.
    """

    def __init__(self):
        self.__model_type = ""
        self.__training_mode = True

        self.__model_min_input_size = 288
        self.__model_max_input_size = 448
        self.__model_anchors = []
        self.__inference_anchors = []
        self.__json_directory = ""
        self.__model_labels = []
        self.__num_objects = 0
        self.__pre_trained_model = ""

        self.__train_images_folder = ""
        self.__train_annotations_folder = ""
        self.__train_cache_file = ""
        self.__train_times = 8
        self.__train_batch_size = 4
        self.__train_learning_rate = 1e-4
        self.__train_epochs = 100
        self.__train_warmup_epochs = 3
        self.__train_ignore_treshold = 0.5
        self.__train_gpus = "0"
        self.__train_grid_scales = [1, 1, 1]
        self.__train_obj_scale = 5
        self.__train_noobj_scale = 1
        self.__train_xywh_scale = 1
        self.__train_class_scale = 1
        self.__model_directory = ""
        self.__train_weights_name = ""
        self.__train_debug = True
        self.__logs_directory = ""

        self.__validation_images_folder = ""
        self.__validation_annotations_folder = ""
        self.__validation_cache_file = ""
        self.__validation_times = 1

    def setModelTypeAsYOLOv3(self):
        """
        'setModelTypeAsYOLOv3()' is used to set the model type to the YOLOv3 model
        for the training instance object .
        :return:
        """
        self.__model_type = "yolov3"

    def setDataDirectory(self, data_directory):

        """

        'setDataDirectory()' is required to set the path to which the data/dataset to be used for
                 training is kept. The directory can have any name, but it must have 'train' and 'validation'
                 sub-directory. In the 'train' and 'validation' sub-directories, there must be 'images' and 'annotations'
                 sub-directories respectively. The 'images' folder will contain the pictures for the dataset and the
                 'annotations' folder will contain the XML files with details of the annotations for each image in the
                 'images folder'.

                 N.B: Strictly take note that the filenames (without the extension) of the pictures in the 'images folder'
                  must be the same as the filenames (without the extension) of their corresponding annotation XML files in
                  the 'annotations' folder.

                 The structure of the 'train' and 'validation' folder must be as follows:

                >> train    >> images       >> img_1.jpg
                            >> images       >> img_2.jpg
                            >> images       >> img_3.jpg
                            >> annotations  >> img_1.xml
                            >> annotations  >> img_2.xml
                            >> annotations  >> img_3.xml


                >> validation   >> images       >> img_151.jpg
                                >> images       >> img_152.jpg
                                >> images       >> img_153.jpg
                                >> annotations  >> img_151.xml
                                >> annotations  >> img_152.xml
                                >> annotations  >> img_153.xml

        :param data_directory:
        :return:
        """

        self.__train_images_folder = os.path.join(data_directory, "train", "images")
        self.__train_annotations_folder = os.path.join(data_directory, "train", "annotations")
        self.__validation_images_folder = os.path.join(data_directory, "validation", "images")
        self.__validation_annotations_folder = os.path.join(data_directory, "validation", "annotations")

        os.makedirs(os.path.join(data_directory, "cache"), exist_ok=True)
        self.__train_cache_file = os.path.join(data_directory, "cache", "detection_train_data.pkl")
        self.__validation_cache_file = os.path.join(data_directory, "cache", "detection_test_data.pkl")

        os.makedirs(os.path.join(data_directory, "models"), exist_ok=True)

        os.makedirs(os.path.join(data_directory, "json"), exist_ok=True)

        os.makedirs(os.path.join(data_directory, "logs"), exist_ok=True)

        self.__model_directory = os.path.join(data_directory, "models")
        self.__train_weights_name = os.path.join(self.__model_directory, "detection_model-")
        self.__json_directory = os.path.join(data_directory, "json")
        self.__logs_directory = os.path.join(data_directory, "logs")

    def setGpuUsage(self, train_gpus):
        """
        'setGpuUsage' function allows you to set the GPUs to be used while training
        train_gpu can be:
        - an integer, indicating the number of GPUs to use
        - a list of integers, indicating the id of the GPUs to be used
        - a string, indicating the it og the id of the GPUs to be used, separated by commas
        :param train_gpus: gpus where to run
        :return:
        """
        # train_gpus, could be a string separated by comma, or a list of int or the number of GPUs to be used
        if type(train_gpus) == str:
            train_gpus = train_gpus.split(',')
        if type(train_gpus) == int:
            train_gpus = range(train_gpus)
        # let it as a string separated by commas
        self.__train_gpus = ','.join([str(gpu) for gpu in train_gpus])

    def setTrainConfig(self,  object_names_array, batch_size=4, num_experiments=100, train_from_pretrained_model=""):

        """

        'setTrainConfig()' function allows you to set the properties for the training instances. It accepts the following values:

        - object_names_array , this is an array of the names of the different objects in your dataset
        - batch_size (optional),  this is the batch size for the training instance
        - num_experiments (optional),   also known as epochs, it is the number of times the network will train on all the training dataset
        - train_from_pretrained_model (optional), this is used to perform transfer learning by specifying the path to a pre-trained YOLOv3 model

        :param object_names_array:
        :param batch_size:
        :param num_experiments:
        :param train_from_pretrained_model:
        :return:
        """

        # Remove cache files
        if os.path.isfile(self.__train_cache_file) == True:
            os.remove(self.__train_cache_file)

        if os.path.isfile(self.__validation_cache_file) == True:
            os.remove(self.__validation_cache_file)

        self.__model_anchors, self.__inference_anchors = generateAnchors(self.__train_annotations_folder,
                                                                         self.__train_images_folder,
                                                                         self.__train_cache_file, self.__model_labels)

        self.__model_labels = sorted(object_names_array)
        self.__num_objects = len(object_names_array)

        self.__train_batch_size = batch_size
        self.__train_epochs = num_experiments
        self.__pre_trained_model = train_from_pretrained_model

        json_data = dict()
        json_data["labels"] = self.__model_labels
        json_data["anchors"] = self.__inference_anchors

        with open(os.path.join(self.__json_directory, "detection_config.json"), "w+") as json_file:
            json.dump(json_data, json_file, indent=4, separators=(",", " : "),
                      ensure_ascii=True)

        print("Detection configuration saved in ", os.path.join(self.__json_directory, "detection_config.json"))

    def trainModel(self):

        """
        'trainModel()' function starts the actual model training. Once the training starts, the training instance
        creates 3 sub-folders in your dataset folder which are:

        - json,  where the JSON configuration file for using your trained model is stored
        - models, where your trained models are stored once they are generated after each improved experiments
        - cache , where temporary traing configuraton files are stored

        :return:
        """

        train_ints, valid_ints, labels, max_box_per_image = self._create_training_instances(
            self.__train_annotations_folder,
            self.__train_images_folder,
            self.__train_cache_file,
            self.__validation_annotations_folder,
            self.__validation_images_folder,
            self.__validation_cache_file,
            self.__model_labels

        )
        if self.__training_mode:
            print('Training on: \t' + str(labels) + '')
            print("Training with Batch Size: ", self.__train_batch_size)
            print("Number of Training Samples: ", len(train_ints))
            print("Number of Validation Samples: ", len(valid_ints))
            print("Number of Experiments: ", self.__train_epochs)

        ###############################
        #   Create the generators
        ###############################
        train_generator = BatchGenerator(
            instances=train_ints,
            anchors=self.__model_anchors,
            labels=labels,
            downsample=32,  # ratio between network input's size and network output's size, 32 for YOLOv3
            max_box_per_image=max_box_per_image,
            batch_size=self.__train_batch_size,
            min_net_size=self.__model_min_input_size,
            max_net_size=self.__model_max_input_size,
            shuffle=True,
            jitter=0.3,
            norm=normalize
        )

        valid_generator = BatchGenerator(
            instances=valid_ints,
            anchors=self.__model_anchors,
            labels=labels,
            downsample=32,  # ratio between network input's size and network output's size, 32 for YOLOv3
            max_box_per_image=max_box_per_image,
            batch_size=self.__train_batch_size,
            min_net_size=self.__model_min_input_size,
            max_net_size=self.__model_max_input_size,
            shuffle=True,
            jitter=0.0,
            norm=normalize
        )

        ###############################
        #   Create the model
        ###############################
        if os.path.exists(self.__pre_trained_model):
            self.__train_warmup_epochs = 0
        warmup_batches = self.__train_warmup_epochs * (self.__train_times * len(train_generator))

        os.environ['CUDA_VISIBLE_DEVICES'] = self.__train_gpus
        multi_gpu = [int(gpu) for gpu in self.__train_gpus.split(',')]

        """train_model, infer_model = self._create_model(
            nb_class=len(labels),
            anchors=self.__model_anchors,
            max_box_per_image=max_box_per_image,
            max_grid=[self.__model_max_input_size, self.__model_max_input_size],
            batch_size=self.__train_batch_size,
            warmup_batches=warmup_batches,
            ignore_thresh=self.__train_ignore_treshold,
            multi_gpu=multi_gpu,
            lr=self.__train_learning_rate,
            grid_scales=self.__train_grid_scales,
            obj_scale=self.__train_obj_scale,
            noobj_scale=self.__train_noobj_scale,
            xywh_scale=self.__train_xywh_scale,
            class_scale=self.__train_class_scale,
        )"""

        train_model, infer_model = self._create_model(
            nb_class=len(labels),
            anchors=self.__model_anchors,
            max_box_per_image=max_box_per_image,
            max_grid=[self.__model_max_input_size, self.__model_max_input_size],
            batch_size=self.__train_batch_size,
            warmup_batches=warmup_batches,
            ignore_thresh=self.__train_ignore_treshold,
            multi_gpu=multi_gpu,
            lr=self.__train_learning_rate,
            grid_scales=self.__train_grid_scales,
            obj_scale=self.__train_obj_scale,
            noobj_scale=self.__train_noobj_scale,
            xywh_scale=self.__train_xywh_scale,
            class_scale=self.__train_class_scale,
        )

        ###############################
        #   Kick off the training
        ###############################
        callbacks = self._create_callbacks(self.__train_weights_name, infer_model)

        train_model.fit_generator(
            generator=train_generator,
            steps_per_epoch=len(train_generator) * self.__train_times,
            validation_data=valid_generator,
            validation_steps=len(valid_generator) * self.__train_times,
            epochs=self.__train_epochs + self.__train_warmup_epochs,
            verbose=1,
            callbacks=callbacks,
            workers=4,
            max_queue_size=8
        )

    def evaluateModel(self, model_path, json_path, batch_size=4, iou_threshold=0.5, object_threshold=0.2, nms_threshold=0.45):
        """

        'evaluateModel()' is used to obtain the mAP metrics for your model(s). It accepts the following values:

        - model_path ( model file or folder), this value can be the part to your model file or the path to the folder containing all your saved model files
        - json_path ,   this is the path the the 'detection_config.json' file saved for the dataset during the training
        - iou_threshold , this value is used to set the desired 'IoU' to obtain the mAP metrics for your model(s)
        - object_threshold , this is used to set your desired minimum 'class score' to obtain the mAP metrics for your model(s)
        - nms_threshold , this is used to set your desired 'Non-maximum suppresion' to obtain the mAP metrics for your model(s)

        :param model_path:
        :param json_path:
        :param batch_size:
        :param iou_threshold:
        :param object_threshold:
        :param nms_threshold:
        :return: list of dictionaries, containing one dict per evaluated model.
            Each dict contains exactly the same metrics that are printed on standard output
        """

        self.__training_mode = False

        with open(json_path, 'r') as json_file:
            detection_model_json = json.load(json_file)

        temp_anchor_array = []
        new_anchor_array = []

        temp_anchor_array.append(detection_model_json["anchors"][2])
        temp_anchor_array.append(detection_model_json["anchors"][1])
        temp_anchor_array.append(detection_model_json["anchors"][0])

        for aa in temp_anchor_array:
            for aaa in aa:
                new_anchor_array.append(aaa)

        self.__model_anchors = new_anchor_array
        self.__model_labels = detection_model_json["labels"]
        self.__num_objects = len(self.__model_labels)

        self.__train_batch_size = batch_size
        self.__train_epochs = 100

        print("Starting Model evaluation....")

        _, valid_ints, labels, max_box_per_image = self._create_training_instances(
            self.__train_annotations_folder,
            self.__train_images_folder,
            self.__train_cache_file,
            self.__validation_annotations_folder,
            self.__validation_images_folder,
            self.__validation_cache_file,
            self.__model_labels

        )

        if len(valid_ints) == 0:
            print('Validation samples were not provided.')
            print('Please, check your validation samples are correctly provided:')
            print('\tAnnotations: {}\n\tImages: {}'.format(self.__validation_annotations_folder,
                                                           self.__validation_images_folder))

        valid_generator = BatchGenerator(
            instances=valid_ints,
            anchors=self.__model_anchors,
            labels=labels,
            downsample=32,  # ratio between network input's size and network output's size, 32 for YOLOv3
            max_box_per_image=max_box_per_image,
            batch_size=self.__train_batch_size,
            min_net_size=self.__model_min_input_size,
            max_net_size=self.__model_max_input_size,
            shuffle=True,
            jitter=0.0,
            norm=normalize
        )

        results = list()

        if os.path.isfile(model_path):
            # model_files must be a list containing the complete path to the files,
            # if a file is given, then the list contains just this file
            model_files = [model_path]
        elif os.path.isdir(model_path):
            # model_files must be a list containing the complete path to the files,
            # if a folder is given, then the list contains the complete path to each file on that folder
            model_files = sorted([os.path.join(model_path, file_name) for file_name in os.listdir(model_path)])
            # sort the files to make sure we're always evaluating them on same order
        else:
            print('model_path must be the path to a .h5 file or a directory. Found {}'.format(model_path))
            return results

        for model_file in model_files:
            if str(model_file).endswith(".h5"):
                try:
                    infer_model = load_model(model_file)

                    ###############################
                    #   Run the evaluation
                    ###############################
                    # compute mAP for all the classes
                    average_precisions = evaluate(infer_model, valid_generator, iou_threshold=iou_threshold,
                                                  obj_thresh=object_threshold, nms_thresh=nms_threshold)

                    result_dict = {
                        'model_file': model_file,
                        'using_iou': iou_threshold,
                        'using_object_threshold': object_threshold,
                        'using_non_maximum_suppression': nms_threshold,
                        'average_precision': dict(),
                        'evaluation_samples': len(valid_ints)
                    }
                    # print the score
                    print("Model File: ", model_file, '\n')
                    print("Evaluation samples: ", len(valid_ints))
                    print("Using IoU: ", iou_threshold)
                    print("Using Object Threshold: ", object_threshold)
                    print("Using Non-Maximum Suppression: ", nms_threshold)

                    for label, average_precision in average_precisions.items():
                        print(labels[label] + ': {:.4f}'.format(average_precision))
                        result_dict['average_precision'][labels[label]] = average_precision

                    print('mAP: {:.4f}'.format(sum(average_precisions.values()) / len(average_precisions)))
                    result_dict['map'] = sum(average_precisions.values()) / len(average_precisions)
                    print("===============================")

                    results.append(result_dict)
                except Exception as e:
                    print('skipping the evaluation of {} because following exception occurred: {}'.format(model_file, e))
                    continue
            else:
                print('skipping the evaluation of {} since it\'s not a .h5 file'.format(model_file))

        return results

    def _create_training_instances(self,
            train_annot_folder,
            train_image_folder,
            train_cache,
            valid_annot_folder,
            valid_image_folder,
            valid_cache,
            labels,
    ):

        # parse annotations of the training set
        train_ints, train_labels = parse_voc_annotation(train_annot_folder, train_image_folder, train_cache, labels)

        # parse annotations of the validation set, if any, otherwise split the training set

        if os.path.exists(valid_annot_folder):
            valid_ints, valid_labels = parse_voc_annotation(valid_annot_folder, valid_image_folder, valid_cache, labels)
            print('Evaluating over {} samples taken from {}'.format(len(valid_ints),
                                                                    os.path.dirname(valid_annot_folder)))
        else:

            train_portion = 0.8  # use 80% to train and the remaining 20% to evaluate
            train_valid_split = int(round(train_portion * len(train_ints)))
            np.random.seed(0)
            np.random.shuffle(train_ints)

            valid_ints = train_ints[train_valid_split:]
            train_ints = train_ints[:train_valid_split]
            print('Evaluating over {} samples taken as {:5.2f}% of the training set '
                  'given at {}'.format(len(valid_ints),
                                       (1 - train_portion)*100,
                                       os.path.dirname(train_annot_folder)))

        print('Training over {} samples  given at {}'.format(len(train_ints), os.path.dirname(train_annot_folder)))

        # compare the seen labels with the given labels in config.json
        if len(labels) > 0:
            overlap_labels = set(labels).intersection(set(train_labels.keys()))

            # return None, None, None if some given label is not in the dataset
            if len(overlap_labels) < len(labels):
                if self.__training_mode:
                    print('Some labels have no annotations! Please revise the list of labels in your configuration.')
                return None, None, None, None
        else:
            if self.__training_mode:
                print('No labels are provided. Train on all seen labels.')
                print(train_labels)

            labels = train_labels.keys()

        max_box_per_image = max([len(inst['object']) for inst in (train_ints + valid_ints)])

        return train_ints, valid_ints, sorted(labels), max_box_per_image

    def _create_callbacks(self, saved_weights_name, model_to_save):

        checkpoint = CustomModelCheckpoint(
            model_to_save=model_to_save,
            filepath=saved_weights_name + 'ex-{epoch:03d}--loss-{loss:08.3f}.h5',
            monitor='loss',
            verbose=0,
            save_best_only=True,
            mode='min',
            period=1
        )
        reduce_on_plateau = ReduceLROnPlateau(
            monitor='loss',
            factor=0.1,
            patience=2,
            verbose=0,
            mode='min',
            epsilon=0.01,
            cooldown=0,
            min_lr=0
        )
        tensor_board = TensorBoard(
            log_dir=self.__logs_directory
        )
        return [checkpoint, reduce_on_plateau, tensor_board]

    def _create_model(
            self,
            nb_class,
            anchors,
            max_box_per_image,
            max_grid, batch_size,
            warmup_batches,
            ignore_thresh,
            multi_gpu,
            lr,
            grid_scales,
            obj_scale,
            noobj_scale,
            xywh_scale,
            class_scale
    ):
        if len(multi_gpu) > 1:
            with tf.device('/cpu:0'):
                template_model, infer_model = yolov3_train(
                    num_classes=nb_class,
                    anchors=anchors,
                    max_box_per_image=max_box_per_image,
                    max_grid=max_grid,
                    batch_size=batch_size // len(multi_gpu),
                    warmup_batches=warmup_batches,
                    ignore_thresh=ignore_thresh,
                    grid_scales=grid_scales,
                    obj_scale=obj_scale,
                    noobj_scale=noobj_scale,
                    xywh_scale=xywh_scale,
                    class_scale=class_scale
                )
        else:
            template_model, infer_model = yolov3_train(
                num_classes=nb_class,
                anchors=anchors,
                max_box_per_image=max_box_per_image,
                max_grid=max_grid,
                batch_size=batch_size,
                warmup_batches=warmup_batches,
                ignore_thresh=ignore_thresh,
                grid_scales=grid_scales,
                obj_scale=obj_scale,
                noobj_scale=noobj_scale,
                xywh_scale=xywh_scale,
                class_scale=class_scale
            )

            # load the pretrained weight if exists, otherwise load the backend weight only

        if len(self.__pre_trained_model) > 3:
            if self.__training_mode:
                print("Training with transfer learning from pretrained Model")
            template_model.load_weights(self.__pre_trained_model, by_name=True)
        else:
            if self.__training_mode:
                print("Pre-trained Model not provided. Transfer learning not in use.")
                print("Training will start with 3 warmup experiments")

        if len(multi_gpu) > 1:
            train_model = multi_gpu_model(template_model, gpus=multi_gpu)
        else:
            train_model = template_model

        optimizer = Adam(lr=lr, clipnorm=0.001)
        train_model.compile(loss=dummy_loss, optimizer=optimizer)

        return train_model, infer_model


class CustomObjectDetection:

    """
    This is the object detection class for using your custom trained models. It supports your custom trained YOLOv3 model and allows to you to perform object detection in images.
    """

    def __init__(self):
        self.__model_type = ""
        self.__model_path = ""
        self.__model_labels = []
        self.__model_anchors = []
        self.__detection_config_json_path = ""
        self.__input_size = 416
        self.__object_threshold = 0.4
        self.__nms_threshold = 0.4
        self.__model = None
        self.__detection_utils = CustomDetectionUtils(labels=[])

    def setModelTypeAsYOLOv3(self):
        """
        'setModelTypeAsYOLOv3' is used to set your custom detection model as YOLOv3
        :return:
        """
        self.__model_type = "yolov3"

    def setModelPath(self, detection_model_path):
        """
        'setModelPath' is used to specify the filepath to your custom detection model
        :param detection_model_path: path to the .h5 model file.
            Usually is one of those under <data_directory>/models/detection_model-ex-ddd--loss-dddd.ddd.h5
        :return: None
        """
        self.__model_path = detection_model_path

    def setJsonPath(self, configuration_json):
        """
        'setJsonPath' is used to set the filepath to the configuration JSON file for your custom detection model
        :param configuration_json: path to the .json file. Usually it is <data_directory>/json/detection_config.json
        :return: None
        """
        self.__detection_config_json_path = configuration_json

    def loadModel(self):

        """
        'loadModel' is used to load the model into the CustomObjectDetection class
        :return: None
        """

        if self.__model_type == "yolov3":
            detection_model_json = json.load(open(self.__detection_config_json_path))

            self.__model_labels = detection_model_json["labels"]
            self.__model_anchors = detection_model_json["anchors"]

            self.__detection_utils = CustomDetectionUtils(labels=self.__model_labels)

            self.__model = yolov3_main(Input(shape=(None, None, 3)), 3, len(self.__model_labels))

            self.__model.load_weights(self.__model_path)

    def detectObjectsFromImage(self, input_image="", output_image_path="", input_type="file", output_type="file",
                               extract_detected_objects=False, minimum_percentage_probability=50, nms_treshold=0.4,
                               display_percentage_probability=True, display_object_name=True, thread_safe=False):

        """

        'detectObjectsFromImage()' function is used to detect objects observable in the given image:
                    * input_image , which can be a filepath or image numpy array in BGR
                    * output_image_path (only if output_type = file) , file path to the output image that will contain the detection boxes and label, if output_type="file"
                    * input_type (optional) , filepath/numpy array of the image. Acceptable values are "file" and "array"
                    * output_type (optional) , file path/numpy array/image file stream of the image. Acceptable values are "file" and "array"
                    * extract_detected_objects (optional) , option to save each object detected individually as an image and return an array of the objects' image path.
                    * minimum_percentage_probability (optional, 30 by default) , option to set the minimum percentage probability for nominating a detected object for output.
                    * nms_threshold (optional, o.45 by default) , option to set the Non-maximum suppression for the detection
                    * display_percentage_probability (optional, True by default), option to show or hide the percentage probability of each object in the saved/returned detected image
                    * display_display_object_name (optional, True by default), option to show or hide the name of each object in the saved/returned detected image
                    * thread_safe (optional, False by default), enforce the loaded detection model works across all threads if set to true, made possible by forcing all Keras inference to run on the default graph


            The values returned by this function depends on the parameters parsed. The possible values returnable
            are stated as below
            - If extract_detected_objects = False or at its default value and output_type = 'file' or
                at its default value, you must parse in the 'output_image_path' as a string to the path you want
                the detected image to be saved. Then the function will return:
                1. an array of dictionaries, with each dictionary corresponding to the objects
                    detected in the image. Each dictionary contains the following property:
                    * name (string)
                    * percentage_probability (float)
                    * box_points (list of x1,y1,x2 and y2 coordinates)

            - If extract_detected_objects = False or at its default value and output_type = 'array' ,
              Then the function will return:

                1. a numpy array of the detected image
                2. an array of dictionaries, with each dictionary corresponding to the objects
                    detected in the image. Each dictionary contains the following property:
                    * name (string)
                    * percentage_probability (float)
                    * box_points (list of x1,y1,x2 and y2 coordinates)

            - If extract_detected_objects = True and output_type = 'file' or
                at its default value, you must parse in the 'output_image_path' as a string to the path you want
                the detected image to be saved. Then the function will return:
                1. an array of dictionaries, with each dictionary corresponding to the objects
                    detected in the image. Each dictionary contains the following property:
                    * name (string)
                    * percentage_probability (float)
                    * box_points (list of x1,y1,x2 and y2 coordinates)
                2. an array of string paths to the image of each object extracted from the image

            - If extract_detected_objects = True and output_type = 'array', the the function will return:
                1. a numpy array of the detected image
                2. an array of dictionaries, with each dictionary corresponding to the objects
                    detected in the image. Each dictionary contains the following property:
                    * name (string)
                    * percentage_probability (float)
                    * box_points (list of x1,y1,x2 and y2 coordinates)
                3. an array of numpy arrays of each object detected in the image

        :param input_image:
        :param output_image_path:
        :param input_type:
        :param output_type:
        :param extract_detected_objects:
        :param minimum_percentage_probability:
        :param nms_treshold:
        :param display_percentage_probability:
        :param display_object_name:
        :param thread_safe:
        :return image_frame:
        :return output_objects_array:
        :return detected_objects_image_array:
        """

        if self.__model is None:
            raise ValueError("You must call the loadModel() function before making object detection.")
        else:
            if output_type == "file":
                # from the image file, lets keep the directory and the filename, but remove its  format
                # if output_image_path is path/to/the/output/image.png
                # then output_image_folder is  path/to/the/output/image
                # let's check if it is in the appropriated format soon to fail early
                output_image_folder, n_subs = re.subn(r'\.(?:jpe?g|png|tif|webp|PPM|PGM)$', '', output_image_path, flags=re.I)
                if n_subs == 0:
                    # if no substitution was done, the given output_image_path is not in a supported format,
                    # raise an error
                    raise ValueError("output_image_path must be the path where to write the image. "
                                     "Therefore it must end as one the following: "
                                     "'.jpg', '.png', '.tif', '.webp', '.PPM', '.PGM'. {} found".format(output_image_path))
                elif extract_detected_objects:
                    # Results must be written as files and need to extract detected objects as images,
                    # let's create a folder to store the object's images
                    objects_dir = output_image_folder + "-objects"

                    os.makedirs(objects_dir, exist_ok=True)

            self.__object_threshold = minimum_percentage_probability / 100
            self.__nms_threshold = nms_treshold

            output_objects_array = []
            detected_objects_image_array = []

            if input_type == "file":
                image = cv2.imread(input_image)
            elif input_type == "array":
                image = input_image
            else:
                raise ValueError("input_type must be 'file' or 'array'. {} found".format(input_type))

            image_frame = image.copy()

            height, width, channels = image.shape

            image = cv2.resize(image, (self.__input_size, self.__input_size))

            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

            image = image.astype("float32") / 255.

            # expand the image to batch
            image = np.expand_dims(image, 0)

            if self.__model_type == "yolov3":
                if thread_safe == True:
                    with K.get_session().graph.as_default():
                        yolo_results = self.__model.predict(image)
                else:
                    yolo_results = self.__model.predict(image)

                boxes = list()

                for idx, result in enumerate(yolo_results):
                    box_set = self.__detection_utils.decode_netout(result[0], self.__model_anchors[idx],
                                                                   self.__object_threshold, self.__input_size,
                                                                   self.__input_size)
                    boxes += box_set

                self.__detection_utils.correct_yolo_boxes(boxes, height, width, self.__input_size, self.__input_size)

                self.__detection_utils.do_nms(boxes, self.__nms_threshold)

                all_boxes, all_labels, all_scores = self.__detection_utils.get_boxes(boxes, self.__model_labels,
                                                                                     self.__object_threshold)

                for object_box, object_label, object_score in zip(all_boxes, all_labels, all_scores):
                    each_object_details = dict()
                    each_object_details["name"] = object_label
                    each_object_details["percentage_probability"] = object_score

                    if object_box.xmin < 0:
                        object_box.xmin = 0
                    if object_box.ymin < 0:
                        object_box.ymin = 0

                    each_object_details["box_points"] = [object_box.xmin, object_box.ymin, object_box.xmax, object_box.ymax]
                    output_objects_array.append(each_object_details)

                drawn_image = self.__detection_utils.draw_boxes_and_caption(image_frame.copy(), all_boxes, all_labels,
                                                                            all_scores, show_names=display_object_name,
                                                                            show_percentage=display_percentage_probability)

                if extract_detected_objects:

                    for cnt, each_object in enumerate(output_objects_array):

                        splitted_image = image_frame[each_object["box_points"][1]:each_object["box_points"][3],
                                                     each_object["box_points"][0]:each_object["box_points"][2]]
                        if output_type == "file":
                            splitted_image_path = os.path.join(objects_dir, "{}-{:05d}.jpg".format(each_object["name"],
                                                                                                   cnt))

                            cv2.imwrite(splitted_image_path, splitted_image)
                            detected_objects_image_array.append(splitted_image_path)
                        elif output_type == "array":
                            detected_objects_image_array.append(splitted_image.copy())

                if output_type == "file":
                    # we already validated that the output_image_path is a supported by OpenCV one
                    cv2.imwrite(output_image_path, drawn_image)

                if extract_detected_objects:
                    if output_type == "file":
                        return output_objects_array, detected_objects_image_array
                    elif output_type == "array":
                        return drawn_image, output_objects_array, detected_objects_image_array

                else:
                    if output_type == "file":
                        return output_objects_array
                    elif output_type == "array":
                        return drawn_image, output_objects_array


class CustomVideoObjectDetection:


    """

    This is the object detection class for videos and camera live stream inputs using your custom trained detection models. It provides support for your custom YOLOv3 models.

    """

    def __init__(self):
        self.__model_type = ""
        self.__model_path = ""
        self.__model_labels = []
        self.__model_anchors = []
        self.__detection_config_json_path = ""
        self.__model_loaded = False
        self.__input_size = 416
        self.__object_threshold = 0.4
        self.__nms_threshold = 0.4
        self.__detector = []
        self.__detection_utils = CustomDetectionUtils(labels=[])

    def setModelTypeAsYOLOv3(self):

        """
        'setModelTypeAsYOLOv3' is used to set your custom detection model as YOLOv3
        :return:
        """

        self.__model_type = "yolov3"


    def setModelPath(self, detection_model_path):
        """
        'setModelPath' is used to specify the filepath to your custom detection model

        :param detection_model_path:
        :return:
        """
        self.__model_path = detection_model_path


    def setJsonPath(self, configuration_json):
        """
        'setJsonPath' is used to set the filepath to the configuration JSON file for your custom detection model

        :param configuration_json:
        :return:
        """
        self.__detection_config_json_path = configuration_json

    def loadModel(self):
        """
        'loadModel' is used to load the model into the CustomVideoObjectDetection class

        :return:
        """

        if (self.__model_loaded == False):
            if(self.__model_type == "yolov3"):
                detector = CustomObjectDetection()
                detector.setModelTypeAsYOLOv3()
                detector.setModelPath(self.__model_path)
                detector.setJsonPath(self.__detection_config_json_path)
                detector.loadModel()

                self.__detector = detector
                self.__model_loaded = True


    def detectObjectsFromVideo(self, input_file_path="", camera_input=None, output_file_path="", frames_per_second=20,
                               frame_detection_interval=1, minimum_percentage_probability=50, log_progress=False,
                               display_percentage_probability=True, display_object_name=True, save_detected_video=True,
                               per_frame_function=None, per_second_function=None, per_minute_function=None,
                               video_complete_function=None, return_detected_frame=False, detection_timeout = None):


        """

        'detectObjectsFromVideo()' function is used to detect objects observable in the given video path or a camera input:
            * input_file_path , which is the file path to the input video. It is required only if 'camera_input' is not set
            * camera_input , allows you to parse in camera input for live video detections
            * output_file_path , which is the path to the output video. It is required only if 'save_detected_video' is not set to False
            * frames_per_second , which is the number of frames to be used in the output video
            * frame_detection_interval (optional, 1 by default)  , which is the intervals of frames that will be detected.
            * minimum_percentage_probability (optional, 50 by default) , option to set the minimum percentage probability for nominating a detected object for output.
            * log_progress (optional) , which states if the progress of the frame processed is to be logged to console
            * display_percentage_probability (optional), can be used to hide or show probability scores on the detected video frames
            * display_object_name (optional), can be used to show or hide object names on the detected video frames
            * save_save_detected_video (optional, True by default), can be set to or not to save the detected video
            * per_frame_function (optional), this parameter allows you to parse in a function you will want to execute after each frame of the video is detected. If this parameter is set to a function, after every video  frame is detected, the function will be executed with the following values parsed into it:
                -- position number of the frame
                -- an array of dictinaries, with each dictinary corresponding to each object detected. Each dictionary contains 'name', 'percentage_probability' and 'box_points'
                -- a dictionary with with keys being the name of each unique objects and value are the number of instances of the object present
                -- If return_detected_frame is set to True, the numpy array of the detected frame will be parsed as the fourth value into the function

            * per_second_function (optional), this parameter allows you to parse in a function you will want to execute after each second of the video is detected. If this parameter is set to a function, after every second of a video is detected, the function will be executed with the following values parsed into it:
                -- position number of the second
                -- an array of dictionaries whose keys are position number of each frame present in the last second , and the value for each key is the array for each frame that contains the dictionaries for each object detected in the frame
                -- an array of dictionaries, with each dictionary corresponding to each frame in the past second, and the keys of each dictionary are the name of the number of unique objects detected in each frame, and the key values are the number of instances of the objects found in the frame
                -- a dictionary with its keys being the name of each unique object detected throughout the past second, and the key values are the average number of instances of the object found in all the frames contained in the past second
                -- If return_detected_frame is set to True, the numpy array of the detected frame will be parsed
                                                                    as the fifth value into the function

            * per_minute_function (optional), this parameter allows you to parse in a function you will want to execute after each minute of the video is detected. If this parameter is set to a function, after every minute of a video is detected, the function will be executed with the following values parsed into it:
                -- position number of the minute
                -- an array of dictionaries whose keys are position number of each frame present in the last minute , and the value for each key is the array for each frame that contains the dictionaries for each object detected in the frame

                -- an array of dictionaries, with each dictionary corresponding to each frame in the past minute, and the keys of each dictionary are the name of the number of unique objects detected in each frame, and the key values are the number of instances of the objects found in the frame

                -- a dictionary with its keys being the name of each unique object detected throughout the past minute, and the key values are the average number of instances of the object found in all the frames contained in the past minute

                -- If return_detected_frame is set to True, the numpy array of the detected frame will be parsed as the fifth value into the function

            * video_complete_function (optional), this parameter allows you to parse in a function you will want to execute after all of the video frames have been detected. If this parameter is set to a function, after all of frames of a video is detected, the function will be executed with the following values parsed into it:
                -- an array of dictionaries whose keys are position number of each frame present in the entire video , and the value for each key is the array for each frame that contains the dictionaries for each object detected in the frame
                -- an array of dictionaries, with each dictionary corresponding to each frame in the entire video, and the keys of each dictionary are the name of the number of unique objects detected in each frame, and the key values are the number of instances of the objects found in the frame
                -- a dictionary with its keys being the name of each unique object detected throughout the entire video, and the key values are the average number of instances of the object found in all the frames contained in the entire video

            * return_detected_frame (optionally, False by default), option to obtain the return the last detected video frame into the per_per_frame_function, per_per_second_function or per_per_minute_function

            * detection_timeout (optionally, None by default), option to state the number of seconds of a video that should be detected after which the detection function stop processing the video

        :param input_file_path:
        :param camera_input:
        :param output_file_path:
        :param frames_per_second:
        :param frame_detection_interval:
        :param minimum_percentage_probability:
        :param log_progress:
        :param display_percentage_probability:
        :param display_object_name:
        :param save_detected_video:
        :param per_frame_function:
        :param per_second_function:
        :param per_minute_function:
        :param video_complete_function:
        :param return_detected_frame:
        :param detection_timeout:
        :return output_video_filepath:
        :return counting:
        :return output_objects_array:
        :return output_objects_count:
        :return detected_copy:
        :return this_second_output_object_array:
        :return this_second_counting_array:
        :return this_second_counting:
        :return this_minute_output_object_array:
        :return this_minute_counting_array:
        :return this_minute_counting:
        :return this_video_output_object_array:
        :return this_video_counting_array:
        :return this_video_counting:
        """

        output_frames_dict = {}
        output_frames_count_dict = {}

        input_video = cv2.VideoCapture(input_file_path)
        if (camera_input != None):
            input_video = camera_input

        output_video_filepath = output_file_path + '.avi'

        frame_width = int(input_video.get(3))
        frame_height = int(input_video.get(4))
        output_video = cv2.VideoWriter(output_video_filepath, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'),
                                       frames_per_second,
                                       (frame_width, frame_height))

        counting = 0
        predicted_numbers = None
        scores = None
        detections = None


        detection_timeout_count = 0
        video_frames_count = 0


        if(self.__model_type == "yolov3"):


            while (input_video.isOpened()):
                ret, frame = input_video.read()

                if (ret == True):

                    detected_frame = frame.copy()

                    video_frames_count += 1
                    if (detection_timeout != None):
                        if ((video_frames_count % frames_per_second) == 0):
                            detection_timeout_count += 1

                        if (detection_timeout_count >= detection_timeout):
                            break

                    output_objects_array = []

                    counting += 1

                    if (log_progress == True):
                        print("Processing Frame : ", str(counting))


                    check_frame_interval = counting % frame_detection_interval

                    if (counting == 1 or check_frame_interval == 0):
                        try:
                            detected_frame, output_objects_array = self.__detector.detectObjectsFromImage(
                                input_image=frame, input_type="array", output_type="array",
                                minimum_percentage_probability=minimum_percentage_probability,
                                display_percentage_probability=display_percentage_probability,
                                display_object_name=display_object_name)
                        except:
                            None


                    output_frames_dict[counting] = output_objects_array

                    output_objects_count = {}
                    for eachItem in output_objects_array:
                        eachItemName = eachItem["name"]
                        try:
                            output_objects_count[eachItemName] = output_objects_count[eachItemName] + 1
                        except:
                            output_objects_count[eachItemName] = 1

                    output_frames_count_dict[counting] = output_objects_count


                    if (save_detected_video == True):
                        output_video.write(detected_frame)

                    if (counting == 1 or check_frame_interval == 0):
                        if (per_frame_function != None):
                            if (return_detected_frame == True):
                                per_frame_function(counting, output_objects_array, output_objects_count,
                                                   detected_frame)
                            elif (return_detected_frame == False):
                                per_frame_function(counting, output_objects_array, output_objects_count)

                    if (per_second_function != None):
                        if (counting != 1 and (counting % frames_per_second) == 0):

                            this_second_output_object_array = []
                            this_second_counting_array = []
                            this_second_counting = {}

                            for aa in range(counting):
                                if (aa >= (counting - frames_per_second)):
                                    this_second_output_object_array.append(output_frames_dict[aa + 1])
                                    this_second_counting_array.append(output_frames_count_dict[aa + 1])

                            for eachCountingDict in this_second_counting_array:
                                for eachItem in eachCountingDict:
                                    try:
                                        this_second_counting[eachItem] = this_second_counting[eachItem] + \
                                                                         eachCountingDict[eachItem]
                                    except:
                                        this_second_counting[eachItem] = eachCountingDict[eachItem]

                            for eachCountingItem in this_second_counting:
                                this_second_counting[eachCountingItem] = int(this_second_counting[eachCountingItem] / frames_per_second)

                            if (return_detected_frame == True):
                                per_second_function(int(counting / frames_per_second),
                                                    this_second_output_object_array, this_second_counting_array,
                                                    this_second_counting, detected_frame)

                            elif (return_detected_frame == False):
                                per_second_function(int(counting / frames_per_second),
                                                    this_second_output_object_array, this_second_counting_array,
                                                    this_second_counting)

                    if (per_minute_function != None):

                        if (counting != 1 and (counting % (frames_per_second * 60)) == 0):

                            this_minute_output_object_array = []
                            this_minute_counting_array = []
                            this_minute_counting = {}

                            for aa in range(counting):
                                if (aa >= (counting - (frames_per_second * 60))):
                                    this_minute_output_object_array.append(output_frames_dict[aa + 1])
                                    this_minute_counting_array.append(output_frames_count_dict[aa + 1])

                            for eachCountingDict in this_minute_counting_array:
                                for eachItem in eachCountingDict:
                                    try:
                                        this_minute_counting[eachItem] = this_minute_counting[eachItem] + \
                                                                         eachCountingDict[eachItem]
                                    except:
                                        this_minute_counting[eachItem] = eachCountingDict[eachItem]

                            for eachCountingItem in this_minute_counting:
                                this_minute_counting[eachCountingItem] = int(this_minute_counting[eachCountingItem] / (frames_per_second * 60))

                            if (return_detected_frame == True):
                                per_minute_function(int(counting / (frames_per_second * 60)),
                                                    this_minute_output_object_array, this_minute_counting_array,
                                                    this_minute_counting, detected_frame)

                            elif (return_detected_frame == False):
                                per_minute_function(int(counting / (frames_per_second * 60)),
                                                    this_minute_output_object_array, this_minute_counting_array,
                                                    this_minute_counting)


                else:
                    break

            if (video_complete_function != None):

                this_video_output_object_array = []
                this_video_counting_array = []
                this_video_counting = {}

                for aa in range(counting):
                    this_video_output_object_array.append(output_frames_dict[aa + 1])
                    this_video_counting_array.append(output_frames_count_dict[aa + 1])

                for eachCountingDict in this_video_counting_array:
                    for eachItem in eachCountingDict:
                        try:
                            this_video_counting[eachItem] = this_video_counting[eachItem] + \
                                                            eachCountingDict[eachItem]
                        except:
                            this_video_counting[eachItem] = eachCountingDict[eachItem]

                for eachCountingItem in this_video_counting:
                    this_video_counting[eachCountingItem] = this_video_counting[
                                                                eachCountingItem] / counting

                video_complete_function(this_video_output_object_array, this_video_counting_array,
                                        this_video_counting)

            input_video.release()
            output_video.release()

            if (save_detected_video == True):
                return output_video_filepath


class BoundBox:
    def __init__(self, xmin, ymin, xmax, ymax, objness=None, classes=None):
        self.xmin = xmin
        self.ymin = ymin
        self.xmax = xmax
        self.ymax = ymax
        self.objness = objness
        self.classes = classes
        self.label = -1
        self.score = -1

    def get_label(self):
        if self.label == -1:
            self.label = np.argmax(self.classes)

        return self.label

    def get_score(self):
        if self.score == -1:
            self.score = self.classes[self.get_label()]

        return self.score


class CustomDetectionUtils:
    def __init__(self, labels):
        self.__labels = labels
        self.__colors = []

        for i in range(len(labels)):
            color_space_values = np.random.randint(50, 255, size=(3,))
            red, green, blue = color_space_values
            red, green, blue = int(red), int(green), int(blue)
            self.__colors.append([red, green, blue])

    @staticmethod
    def _sigmoid(x):
        return 1. / (1. + np.exp(-x))

    def decode_netout(self, netout, anchors, obj_thresh, net_h, net_w):
        grid_h, grid_w = netout.shape[:2]
        nb_box = 3
        netout = netout.reshape((grid_h, grid_w, nb_box, -1))
        nb_class = netout.shape[-1] - 5
        boxes = []
        netout[..., :2] = self._sigmoid(netout[..., :2])
        netout[..., 4:] = self._sigmoid(netout[..., 4:])
        netout[..., 5:] = netout[..., 4][..., np.newaxis] * netout[..., 5:]
        netout[..., 5:] *= netout[..., 5:] > obj_thresh

        for row in range(grid_h):
            for col in range(grid_w):
                for b in range(nb_box):
                    # 4th element is objectness score
                    objectness = netout[row, col, b, 4]

                    if objectness <= obj_thresh:
                        continue

                    # first 4 elements are x, y, w, and h
                    x, y, w, h = netout[row, col, b, :4]
                    x = (col + x) / grid_w  # center position, unit: image width
                    y = (row + y) / grid_h  # center position, unit: image height
                    w = anchors[2 * b + 0] * np.exp(w) / net_w  # unit: image width
                    h = anchors[2 * b + 1] * np.exp(h) / net_h  # unit: image height
                    # last elements are class probabilities
                    classes = netout[row, col, b, 5:]
                    box = BoundBox(x - w / 2, y - h / 2, x + w / 2, y + h / 2, objectness, classes)
                    boxes.append(box)

        return boxes

    @staticmethod
    def correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w):
        new_w, new_h = net_w, net_h
        for i in range(len(boxes)):
            x_offset, x_scale = (net_w - new_w) / 2. / net_w, float(new_w) / net_w
            y_offset, y_scale = (net_h - new_h) / 2. / net_h, float(new_h) / net_h
            boxes[i].xmin = int((boxes[i].xmin - x_offset) / x_scale * image_w)
            boxes[i].xmax = int((boxes[i].xmax - x_offset) / x_scale * image_w)
            boxes[i].ymin = int((boxes[i].ymin - y_offset) / y_scale * image_h)
            boxes[i].ymax = int((boxes[i].ymax - y_offset) / y_scale * image_h)

    def _interval_overlap(self, interval_a, interval_b):
        x1, x2 = interval_a
        x3, x4 = interval_b
        if x3 < x1:
            if x4 < x1:
                return 0
            else:
                return min(x2, x4) - x1
        else:
            if x2 < x3:
                return 0
            else:
                return min(x2, x4) - x3

    def bbox_iou(self, box1, box2):
        intersect_w = self._interval_overlap([box1.xmin, box1.xmax], [box2.xmin, box2.xmax])
        intersect_h = self._interval_overlap([box1.ymin, box1.ymax], [box2.ymin, box2.ymax])
        intersect = intersect_w * intersect_h
        w1, h1 = box1.xmax - box1.xmin, box1.ymax - box1.ymin
        w2, h2 = box2.xmax - box2.xmin, box2.ymax - box2.ymin
        union = w1 * h1 + w2 * h2 - intersect

        try:
            result = float(intersect) / float(union)
            return result
        except:
            return 0.0

    def do_nms(self, boxes, nms_thresh):
        if len(boxes) > 0:
            nb_class = len(boxes[0].classes)
        else:
            return

        for c in range(nb_class):
            sorted_indices = np.argsort([-box.classes[c] for box in boxes])

            for i in range(len(sorted_indices)):
                index_i = sorted_indices[i]

                if boxes[index_i].classes[c] == 0: continue

                for j in range(i + 1, len(sorted_indices)):
                    index_j = sorted_indices[j]

                    if self.bbox_iou(boxes[index_i], boxes[index_j]) >= nms_thresh:
                        boxes[index_j].classes[c] = 0

    def get_boxes(self, boxes, labels, thresh):
        v_boxes, v_labels, v_scores = list(), list(), list()
        # enumerate all boxes
        for box in boxes:
            # enumerate all possible labels
            for i in range(len(labels)):
                # check if the threshold for this label is high enough
                if box.classes[i] > thresh:
                    v_boxes.append(box)
                    v_labels.append(labels[i])
                    v_scores.append(box.classes[i] * 100)
                # don't break, many labels may trigger for one box
        return v_boxes, v_labels, v_scores

    def label_color(self, label):
        """ Return a color from a set of predefined colors. Contains 80 colors in total.

        Args
            label: The label to get the color for.

        Returns
            A list of three values representing a RGB color.

            If no color is defined for a certain label, the color green is returned and a warning is printed.
        """
        if label < len(self.__colors):
            return self.__colors[label]
        else:
            return 0, 255, 0

    def draw_boxes_and_caption(self, image_frame, v_boxes, v_labels, v_scores, show_names=False, show_percentage=False):

        for i in range(len(v_boxes)):
            box = v_boxes[i]
            y1, x1, y2, x2 = box.ymin, box.xmin, box.ymax, box.xmax
            width, height = x2 - x1, y2 - y1
            class_color = self.label_color(self.__labels.index(v_labels[i]))

            image_frame = cv2.rectangle(image_frame, (x1, y1), (x2, y2), class_color, 2)

            label = ""
            if show_names and show_percentage:
                label = "%s : %.3f" % (v_labels[i], v_scores[i])
            elif show_names:
                label = "%s" % (v_labels[i])
            elif show_percentage:
                label = "%.3f" % (v_scores[i])

            if show_names or show_percentage:
                b = np.array([x1, y1, x2, y2]).astype(int)
                cv2.putText(image_frame, label, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (200, 0, 0), 3)
                cv2.putText(image_frame, label, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 2)

        return image_frame


================================================
FILE: imageai_tf_deprecated/Detection/Custom/callbacks.py
================================================
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint
import tensorflow as tf
import numpy as np
import warnings

class CustomTensorBoard(TensorBoard):
    """ to log the loss after each batch
    """    
    def __init__(self, log_every=1, **kwargs):
        super(CustomTensorBoard, self).__init__(**kwargs)
        self.log_every = log_every
        self.counter = 0
    
    def on_batch_end(self, batch, logs=None):
        self.counter+=1
        if self.counter%self.log_every==0:
            for name, value in logs.items():
                if name in ['batch', 'size']:
                    continue
                summary = tf.Summary()
                summary_value = summary.value.add()
                summary_value.simple_value = value.item()
                summary_value.tag = name
                self.writer.add_summary(summary, self.counter)
            self.writer.flush()
        
        super(CustomTensorBoard, self).on_batch_end(batch, logs)

class CustomModelCheckpoint(ModelCheckpoint):
    """ to save the template model, not the multi-GPU model
    """
    def __init__(self, model_to_save, **kwargs):
        super(CustomModelCheckpoint, self).__init__(**kwargs)
        self.model_to_save = model_to_save

    def on_epoch_end(self, epoch, logs=None):
        logs = logs or {}
        self.epochs_since_last_save += 1
        if self.epochs_since_last_save >= self.period:
            self.epochs_since_last_save = 0
            filepath = self.filepath.format(epoch=epoch + 1, **logs)
            if self.save_best_only:
                current = logs.get(self.monitor)
                if current is None:
                    warnings.warn('Can save best model only with %s available, '
                                  'skipping.' % (self.monitor), RuntimeWarning)
                else:
                    if self.monitor_op(current, self.best):
                        if self.verbose > 0:
                            print('\nEpoch %05d: %s improved from %0.5f to %0.5f,'
                                  ' saving model to %s'
                                  % (epoch + 1, self.monitor, self.best,
                                     current, filepath))
                        self.best = current
                        if self.save_weights_only:
                            self.model_to_save.save_weights(filepath, overwrite=True)
                        else:
                            self.model_to_save.save(filepath, overwrite=True)
                    else:
                        if self.verbose > 0:
                            print('\nEpoch %05d: %s did not improve from %0.5f' %
                                  (epoch + 1, self.monitor, self.best))
            else:
                if self.verbose > 0:
                    print('\nEpoch %05d: saving model to %s' % (epoch + 1, filepath))
                if self.save_weights_only:
                    self.model_to_save.save_weights(filepath, overwrite=True)
                else:
                    self.model_to_save.save(filepath, overwrite=True)

        super(CustomModelCheckpoint, self).on_batch_end(epoch, logs)

================================================
FILE: imageai_tf_deprecated/Detection/Custom/evaluate.py
================================================
#! /usr/bin/env python

import argparse
import os
import json
from imageai.Detection.Custom.voc import parse_voc_annotation
from imageai.Detection.Custom.generator import BatchGenerator
from imageai.Detection.Custom.utils.utils import normalize, evaluate
from keras.models import load_model


def _main_(args):
    config_path = args.conf

    with open(config_path) as config_buffer:    
        config = json.loads(config_buffer.read())

    ###############################
    #   Create the validation generator
    ###############################  
    valid_ints, labels = parse_voc_annotation(
        config['valid']['valid_annot_folder'], 
        config['valid']['valid_image_folder'], 
        config['valid']['cache_name'],
        config['model']['labels']
    )

    labels = labels.keys() if len(config['model']['labels']) == 0 else config['model']['labels']
    labels = sorted(labels)
   
    valid_generator = BatchGenerator(
        instances           = valid_ints, 
        anchors             = config['model']['anchors'],   
        labels              = labels,        
        downsample          = 32, # ratio between network input's size and network output's size, 32 for YOLOv3
        max_box_per_image   = 0,
        batch_size          = config['train']['batch_size'],
        min_net_size        = config['model']['min_input_size'],
        max_net_size        = config['model']['max_input_size'],   
        shuffle             = True, 
        jitter              = 0.0, 
        norm                = normalize
    )

    ###############################
    #   Load the model and do evaluation
    ###############################
    os.environ['CUDA_VISIBLE_DEVICES'] = config['train']['gpus']

    infer_model = load_model(config['train']['saved_weights_name'])

    # compute mAP for all the classes
    average_precisions = evaluate(infer_model, valid_generator)

    # print the score
    for label, average_precision in average_precisions.items():
        print(labels[label] + ': {:.4f}'.format(average_precision))
    print('mAP: {:.4f}'.format(sum(average_precisions.values()) / len(average_precisions)))           


if __name__ == '__main__':
    argparser = argparse.ArgumentParser(description='Evaluate YOLO_v3 model on any dataset')
    argparser.add_argument('-c', '--conf', help='path to configuration file')    
    
    args = argparser.parse_args()
    _main_(args)


================================================
FILE: imageai_tf_deprecated/Detection/Custom/gen_anchors.py
================================================
import random
import numpy as np

from imageai.Detection.Custom.voc import parse_voc_annotation


def IOU(ann, centroids):
    w, h = ann
    similarities = []

    for centroid in centroids:
        c_w, c_h = centroid

        if c_w >= w and c_h >= h:
            similarity = w*h/(c_w*c_h)
        elif c_w >= w and c_h <= h:
            similarity = w*c_h/(w*h + (c_w-w)*c_h)
        elif c_w <= w and c_h >= h:
            similarity = c_w*h/(w*h + c_w*(c_h-h))
        else: #means both w,h are bigger than c_w and c_h respectively
            similarity = (c_w*c_h)/(w*h)
        similarities.append(similarity) # will become (k,) shape

    return np.array(similarities)


def avg_IOU(anns, centroids):
    n,d = anns.shape
    sum = 0.

    for i in range(anns.shape[0]):
        sum+= max(IOU(anns[i], centroids))

    return sum/n


def run_kmeans(ann_dims, anchor_num):
    ann_num = ann_dims.shape[0]
    iterations = 0
    prev_assignments = np.ones(ann_num)*(-1)
    iteration = 0
    old_distances = np.zeros((ann_num, anchor_num))

    indices = [random.randrange(ann_dims.shape[0]) for i in range(anchor_num)]
    centroids = ann_dims[indices]
    anchor_dim = ann_dims.shape[1]

    while True:
        distances = []
        iteration += 1
        for i in range(ann_num):
            d = 1 - IOU(ann_dims[i], centroids)
            distances.append(d)
        distances = np.array(distances) # distances.shape = (ann_num, anchor_num)

        #assign samples to centroids
        assignments = np.argmin(distances,axis=1)

        if (assignments == prev_assignments).all() :
            return centroids

        #calculate new centroids
        centroid_sums=np.zeros((anchor_num, anchor_dim), np.float)
        for i in range(ann_num):
            centroid_sums[assignments[i]]+=ann_dims[i]
        for j in range(anchor_num):
            centroids[j] = centroid_sums[j]/(np.sum(assignments==j) + 1e-6)

        prev_assignments = assignments.copy()
        old_distances = distances.copy()


def generateAnchors(train_annotation_folder, train_image_folder, train_cache_file, model_labels):

    print("Generating anchor boxes for training images and annotation...")
    num_anchors = 9

    train_imgs, train_labels = parse_voc_annotation(
        train_annotation_folder,
        train_image_folder,
        train_cache_file,
        model_labels
    )

    # run k_mean to find the anchors
    annotation_dims = []
    for image in train_imgs:

        for obj in image['object']:
            relative_w = (float(obj['xmax']) - float(obj['xmin']))/image['width']
            relative_h = (float(obj["ymax"]) - float(obj['ymin']))/image['height']
            annotation_dims.append(tuple(map(float, (relative_w,relative_h))))

    annotation_dims = np.array(annotation_dims)
    centroids = run_kmeans(annotation_dims, num_anchors)

    # write anchors to file
    print('Average IOU for', num_anchors, 'anchors:', '%0.2f' % avg_IOU(annotation_dims, centroids))

    anchors = centroids.copy()

    widths = anchors[:, 0]
    sorted_indices = np.argsort(widths)

    anchor_array = []
    reverse_anchor_array = []
    out_string = ""
    r = "anchors: ["
    for i in sorted_indices:
        anchor_array.append(int(anchors[i, 0] * 416))
        anchor_array.append(int(anchors[i, 1] * 416))

        out_string += str(int(anchors[i, 0] * 416)) + ',' + str(int(anchors[i, 1] * 416)) + ', '

    reverse_anchor_array.append(anchor_array[12:18])
    reverse_anchor_array.append(anchor_array[6:12])
    reverse_anchor_array.append(anchor_array[0:6])

    print("Anchor Boxes generated.")
    return anchor_array, reverse_anchor_array


================================================
FILE: imageai_tf_deprecated/Detection/Custom/generator.py
================================================
import cv2
import copy
import numpy as np
from tensorflow.keras.utils import Sequence
from imageai.Detection.Custom.utils.bbox import BoundBox, bbox_iou
from imageai.Detection.Custom.utils.image import apply_random_scale_and_crop, random_distort_image, random_flip, correct_bounding_boxes

class BatchGenerator(Sequence):
    def __init__(self, 
        instances, 
        anchors,   
        labels,        
        downsample=32, # ratio between network input's size and network output's size, 32 for YOLOv3
        max_box_per_image=30,
        batch_size=1,
        min_net_size=320,
        max_net_size=608,    
        shuffle=True, 
        jitter=True, 
        norm=None
    ):
        self.instances          = instances
        self.batch_size         = batch_size
        self.labels             = labels
        self.downsample         = downsample
        self.max_box_per_image  = max_box_per_image
        self.min_net_size       = (min_net_size//self.downsample)*self.downsample
        self.max_net_size       = (max_net_size//self.downsample)*self.downsample
        self.shuffle            = shuffle
        self.jitter             = jitter
        self.norm               = norm
        self.anchors            = [BoundBox(0, 0, anchors[2*i], anchors[2*i+1]) for i in range(len(anchors)//2)]
        self.net_h              = 416  
        self.net_w              = 416

        if shuffle: np.random.shuffle(self.instances)
            
    def __len__(self):
        return int(np.ceil(float(len(self.instances))/self.batch_size))           

    def __getitem__(self, idx):
        # get image input size, change every 10 batches
        net_h, net_w = self._get_net_size(idx)
        base_grid_h, base_grid_w = net_h//self.downsample, net_w//self.downsample

        # determine the first and the last indices of the batch
        l_bound = idx * self.batch_size
        r_bound = (idx+1) * self.batch_size

        if r_bound > len(self.instances):
            r_bound = len(self.instances)
            l_bound = r_bound - self.batch_size

        x_batch = np.zeros((r_bound - l_bound, net_h, net_w, 3))             # input images
        t_batch = np.zeros((r_bound - l_bound, 1, 1, 1,  self.max_box_per_image, 4))   # list of groundtruth boxes

        # initialize the inputs and the outputs
        yolo_1 = np.zeros((r_bound - l_bound, 1*base_grid_h,  1*base_grid_w, len(self.anchors)//3, 4+1+len(self.labels))) # desired network output 1
        yolo_2 = np.zeros((r_bound - l_bound, 2*base_grid_h,  2*base_grid_w, len(self.anchors)//3, 4+1+len(self.labels))) # desired network output 2
        yolo_3 = np.zeros((r_bound - l_bound, 4*base_grid_h,  4*base_grid_w, len(self.anchors)//3, 4+1+len(self.labels))) # desired network output 3
        yolos = [yolo_3, yolo_2, yolo_1]

        dummy_yolo_1 = np.zeros((r_bound - l_bound, 1))
        dummy_yolo_2 = np.zeros_like(dummy_yolo_1)
        dummy_yolo_3 = np.zeros_like(dummy_yolo_1)
        
        instance_count = 0
        true_box_index = 0

        # do the logic to fill in the inputs and the output
        for train_instance in self.instances[l_bound:r_bound]:
            # augment input image and fix object's position and size
            img, all_objs = self._aug_image(train_instance, net_h, net_w)
            
            for obj in all_objs:
                # find the best anchor box for this object
                max_anchor = None                
                max_index  = -1
                max_iou    = -1

                shifted_box = BoundBox(0, 
                                       0,
                                       obj['xmax']-obj['xmin'],                                                
                                       obj['ymax']-obj['ymin'])    
                
                for i in range(len(self.anchors)):
                    anchor = self.anchors[i]
                    iou    = bbox_iou(shifted_box, anchor)

                    if max_iou < iou:
                        max_anchor = anchor
                        max_index  = i
                        max_iou    = iou                
                
                # determine the yolo to be responsible for this bounding box
                yolo = yolos[max_index//3]
                grid_h, grid_w = yolo.shape[1:3]
                
                # determine the position of the bounding box on the grid
                center_x = .5*(obj['xmin'] + obj['xmax'])
                center_x = center_x / float(net_w) * grid_w # sigma(t_x) + c_x
                center_y = .5*(obj['ymin'] + obj['ymax'])
                center_y = center_y / float(net_h) * grid_h # sigma(t_y) + c_y
                
                # determine the sizes of the bounding box
                w = np.log((obj['xmax'] - obj['xmin']) / float(max_anchor.xmax)) # t_w
                h = np.log((obj['ymax'] - obj['ymin']) / float(max_anchor.ymax)) # t_h

                box = [center_x, center_y, w, h]

                # determine the index of the label
                obj_indx = self.labels.index(obj['name'])  

                # determine the location of the cell responsible for this object
                grid_x = int(np.floor(center_x))
                grid_y = int(np.floor(center_y))

                # assign ground truth x, y, w, h, confidence and class probs to y_batch
                yolo[instance_count, grid_y, grid_x, max_index%3]      = 0
                yolo[instance_count, grid_y, grid_x, max_index%3, 0:4] = box
                yolo[instance_count, grid_y, grid_x, max_index%3, 4  ] = 1.
                yolo[instance_count, grid_y, grid_x, max_index%3, 5+obj_indx] = 1

                # assign the true box to t_batch
                true_box = [center_x, center_y, obj['xmax'] - obj['xmin'], obj['ymax'] - obj['ymin']]
                t_batch[instance_count, 0, 0, 0, true_box_index] = true_box

                true_box_index += 1
                true_box_index  = true_box_index % self.max_box_per_image    

            # assign input image to x_batch
            if self.norm != None: 
                x_batch[instance_count] = self.norm(img)
            else:
                # plot image and bounding boxes for sanity check
                for obj in all_objs:
                    cv2.rectangle(img, (obj['xmin'],obj['ymin']), (obj['xmax'],obj['ymax']), (255,0,0), 3)
                    cv2.putText(img, obj['name'], 
                                (obj['xmin']+2, obj['ymin']+12), 
                                0, 1.2e-3 * img.shape[0], 
                                (0,255,0), 2)
                
                x_batch[instance_count] = img

            # increase instance counter in the current batch
            instance_count += 1                 
                
        return [x_batch, t_batch, yolo_1, yolo_2, yolo_3], [dummy_yolo_1, dummy_yolo_2, dummy_yolo_3]

    def _get_net_size(self, idx):
        if idx % 10 == 0:
            net_size = self.downsample*np.random.randint(self.min_net_size/self.downsample, \
                                                         self.max_net_size/self.downsample+1)

            self.net_h, self.net_w = net_size, net_size
        return self.net_h, self.net_w
    
    def _aug_image(self, instance, net_h, net_w):
        image_name = instance['filename']
        image = cv2.imread(image_name)  # BGR image
        
        if image is None:
            print('Cannot find ', image_name)

        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # RGB image
            
        image_h, image_w, _ = image.shape
        
        # determine the amount of scaling and cropping
        dw = self.jitter * image_w
        dh = self.jitter * image_h

        new_ar = (image_w + np.random.uniform(-dw, dw)) / (image_h + np.random.uniform(-dh, dh))
        scale = np.random.uniform(0.25, 2)

        if new_ar < 1:
            new_h = int(scale * net_h)
            new_w = int(net_h * new_ar)
        else:
            new_w = int(scale * net_w)
            new_h = int(net_w / new_ar)
            
        dx = int(np.random.uniform(0, net_w - new_w))
        dy = int(np.random.uniform(0, net_h - new_h))
        
        # apply scaling and cropping
        im_sized = apply_random_scale_and_crop(image, new_w, new_h, net_w, net_h, dx, dy)
        
        # randomly distort hsv space
        im_sized = random_distort_image(im_sized)
        
        # randomly flip
        flip = np.random.randint(2)
        im_sized = random_flip(im_sized, flip)
            
        # correct the size and pos of bounding boxes
        all_objs = correct_bounding_boxes(instance['object'], new_w, new_h, net_w, net_h, dx, dy, flip, image_w, image_h)
        
        return im_sized, all_objs   

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.instances)
            
    def num_classes(self):
        return len(self.labels)

    def size(self):
        return len(self.instances)    

    def get_anchors(self):
        anchors = []

        for anchor in self.anchors:
            anchors += [anchor.xmax, anchor.ymax]

        return anchors

    def load_annotation(self, i):
        annots = []

        for obj in self.instances[i]['object']:
            annot = [obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax'], self.labels.index(obj['name'])]
            annots += [annot]

        if len(annots) == 0:
            annots = [[]]

        return np.array(annots)

    def load_image(self, i):
        return cv2.imread(self.instances[i]['filename'])  # BGR image


================================================
FILE: imageai_tf_deprecated/Detection/Custom/utils/__init__.py
================================================


================================================
FILE: imageai_tf_deprecated/Detection/Custom/utils/bbox.py
================================================
import numpy as np
import os
import cv2
from .colors import get_color

class BoundBox:
    def __init__(self, xmin, ymin, xmax, ymax, c = None, classes = None):
        self.xmin = xmin
        self.ymin = ymin
        self.xmax = xmax
        self.ymax = ymax
        
        self.c       = c
        self.classes = classes

        self.label = -1
        self.score = -1

    def get_label(self):
        if self.label == -1:
            self.label = np.argmax(self.classes)
        
        return self.label
    
    def get_score(self):
        if self.score == -1:
            self.score = self.classes[self.get_label()]
            
        return self.score      

def _interval_overlap(interval_a, interval_b):
    x1, x2 = interval_a
    x3, x4 = interval_b

    if x3 < x1:
        if x4 < x1:
            return 0
        else:
            return min(x2,x4) - x1
    else:
        if x2 < x3:
             return 0
        else:
            return min(x2,x4) - x3    

def bbox_iou(box1, box2):
    intersect_w = _interval_overlap([box1.xmin, box1.xmax], [box2.xmin, box2.xmax])
    intersect_h = _interval_overlap([box1.ymin, box1.ymax], [box2.ymin, box2.ymax])  
    
    intersect = intersect_w * intersect_h

    w1, h1 = box1.xmax-box1.xmin, box1.ymax-box1.ymin
    w2, h2 = box2.xmax-box2.xmin, box2.ymax-box2.ymin
    
    union = w1*h1 + w2*h2 - intersect

    if(union <= 0):
        union = 1

    return float(intersect) / float(union)

def draw_boxes(image, boxes, labels, obj_thresh, quiet=True):
    for box in boxes:
        label_str = ''
        label = -1
        
        for i in range(len(labels)):
            if box.classes[i] > obj_thresh:
                if label_str != '': label_str += ', '
                label_str += (labels[i] + ' ' + str(round(box.get_score()*100, 2)) + '%')
                label = i
            if not quiet: print(label_str)
                
        if label >= 0:
            text_size = cv2.getTextSize(label_str, cv2.FONT_HERSHEY_SIMPLEX, 1.1e-3 * image.shape[0], 5)
            width, height = text_size[0][0], text_size[0][1]
            region = np.array([[box.xmin-3,        box.ymin], 
                               [box.xmin-3,        box.ymin-height-26], 
                               [box.xmin+width+13, box.ymin-height-26], 
                               [box.xmin+width+13, box.ymin]], dtype='int32')  

            cv2.rectangle(img=image, pt1=(box.xmin,box.ymin), pt2=(box.xmax,box.ymax), color=get_color(label), thickness=5)
            cv2.fillPoly(img=image, pts=[region], color=get_color(label))
            cv2.putText(img=image, 
                        text=label_str, 
                        org=(box.xmin+13, box.ymin - 13), 
                        fontFace=cv2.FONT_HERSHEY_SIMPLEX, 
                        fontScale=1e-3 * image.shape[0], 
                        color=(0,0,0), 
                        thickness=2)
        
    return image          

================================================
FILE: imageai_tf_deprecated/Detection/Custom/utils/colors.py
================================================
def get_color(label):
    """ Return a color from a set of predefined colors. Contains 80 colors in total.
    code originally from https://github.com/fizyr/keras-retinanet/
    Args
        label: The label to get the color for.
    Returns
        A list of three values representing a RGB color.
    """
    if label < len(colors):
        return colors[label]
    else:
        print('Label {} has no color, returning default.'.format(label))
        return (0, 255, 0)

colors = [
    [31  , 0   , 255] ,
    [0   , 159 , 255] ,
    [255 , 95  , 0]   ,
    [255 , 19  , 0]   ,
    [255 , 0   , 0]   ,
    [255 , 38  , 0]   ,
    [0   , 255 , 25]  ,
    [255 , 0   , 133] ,
    [255 , 172 , 0]   ,
    [108 , 0   , 255] ,
    [0   , 82  , 255] ,
    [0   , 255 , 6]   ,
    [255 , 0   , 152] ,
    [223 , 0   , 255] ,
    [12  , 0   , 255] ,
    [0   , 255 , 178] ,
    [108 , 255 , 0]   ,
    [184 , 0   , 255] ,
    [255 , 0   , 76]  ,
    [146 , 255 , 0]   ,
    [51  , 0   , 255] ,
    [0   , 197 , 255] ,
    [255 , 248 , 0]   ,
    [255 , 0   , 19]  ,
    [255 , 0   , 38]  ,
    [89  , 255 , 0]   ,
    [127 , 255 , 0]   ,
    [255 , 153 , 0]   ,
    [0   , 255 , 255] ,
    [0   , 255 , 216] ,
    [0   , 255 , 121] ,
    [255 , 0   , 248] ,
    [70  , 0   , 255] ,
    [0   , 255 , 159] ,
    [0   , 216 , 255] ,
    [0   , 6   , 255] ,
    [0   , 63  , 255] ,
    [31  , 255 , 0]   ,
    [255 , 57  , 0]   ,
    [255 , 0   , 210] ,
    [0   , 255 , 102] ,
    [242 , 255 , 0]   ,
    [255 , 191 , 0]   ,
    [0   , 255 , 63]  ,
    [255 , 0   , 95]  ,
    [146 , 0   , 255] ,
    [184 , 255 , 0]   ,
    [255 , 114 , 0]   ,
    [0   , 255 , 235] ,
    [255 , 229 , 0]   ,
    [0   , 178 , 255] ,
    [255 , 0   , 114] ,
    [255 , 0   , 57]  ,
    [0   , 140 , 255] ,
    [0   , 121 , 255] ,
    [12  , 255 , 0]   ,
    [255 , 210 , 0]   ,
    [0   , 255 , 44]  ,
    [165 , 255 , 0]   ,
    [0   , 25  , 255] ,
    [0   , 255 , 140] ,
    [0   , 101 , 255] ,
    [0   , 255 , 82]  ,
    [223 , 255 , 0]   ,
    [242 , 0   , 255] ,
    [89  , 0   , 255] ,
    [165 , 0   , 255] ,
    [70  , 255 , 0]   ,
    [255 , 0   , 172] ,
    [255 , 76  , 0]   ,
    [203 , 255 , 0]   ,
    [204 , 0   , 255] ,
    [255 , 0   , 229] ,
    [255 , 133 , 0]   ,
    [127 , 0   , 255] ,
    [0   , 235 , 255] ,
    [0   , 255 , 197] ,
    [255 , 0   , 191] ,
    [0   , 44  , 255] ,
    [50  , 255 , 0]
]


================================================
FILE: imageai_tf_deprecated/Detection/Custom/utils/image.py
================================================
import cv2
import numpy as np
import copy


def _rand_scale(scale):
    scale = np.random.uniform(1, scale)
    return scale if np.random.randint(2) == 0 else 1./scale


def _constrain(min_v, max_v, value):

    if value < min_v:
        return min_v

    if value > max_v:
        return max_v

    return value 


def random_flip(image, flip):
    if flip == 1:
        return cv2.flip(image, 1)
    return image


def correct_bounding_boxes(boxes, new_w, new_h, net_w, net_h, dx, dy, flip, image_w, image_h):
    boxes = copy.deepcopy(boxes)

    # randomize boxes' order
    np.random.shuffle(boxes)

    # correct sizes and positions
    sx, sy = float(new_w)/image_w, float(new_h)/image_h
    zero_boxes = []

    for i in range(len(boxes)):
        boxes[i]['xmin'] = int(_constrain(0, net_w, boxes[i]['xmin']*sx + dx))
        boxes[i]['xmax'] = int(_constrain(0, net_w, boxes[i]['xmax']*sx + dx))
        boxes[i]['ymin'] = int(_constrain(0, net_h, boxes[i]['ymin']*sy + dy))
        boxes[i]['ymax'] = int(_constrain(0, net_h, boxes[i]['ymax']*sy + dy))

        if boxes[i]['xmax'] <= boxes[i]['xmin'] or boxes[i]['ymax'] <= boxes[i]['ymin']:
            zero_boxes += [i]
            continue

        if flip == 1:
            swap = boxes[i]['xmin']
            boxes[i]['xmin'] = net_w - boxes[i]['xmax']
            boxes[i]['xmax'] = net_w - swap

    boxes = [boxes[i] for i in range(len(boxes)) if i not in zero_boxes]

    return boxes


def random_distort_image(image, hue=18, saturation=1.5, exposure=1.5):
    # determine scale factors
    dhue = np.random.uniform(-hue, hue)
    dsat = _rand_scale(saturation)
    dexp = _rand_scale(exposure)

    # convert RGB space to HSV space
    image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV).astype('float')
    
    # change satuation and exposure
    image[:, :, 1] *= dsat
    image[:, :, 2] *= dexp
    
    # change hue
    image[:, :, 0] += dhue
    image[:, :, 0] -= (image[:, :, 0] > 180) * 180
    image[:, :, 0] += (image[:, :, 0] < 0)   * 180
    
    # convert back to RGB from HSV
    return cv2.cvtColor(image.astype('uint8'), cv2.COLOR_HSV2RGB)


def apply_random_scale_and_crop(image, new_w, new_h, net_w, net_h, dx, dy):

    im_sized = cv2.resize(image, (new_w, new_h))
    
    if dx > 0: 
        im_sized = np.pad(im_sized, ((0, 0), (dx, 0), (0, 0)), mode='constant', constant_values=127)
    else:
        im_sized = im_sized[:, -dx:, :]
    if (new_w + dx) < net_w:
        im_sized = np.pad(im_sized, ((0, 0), (0, net_w - (new_w+dx)), (0, 0)), mode='constant', constant_values=127)
               
    if dy > 0: 
        im_sized = np.pad(im_sized, ((dy, 0), (0, 0), (0, 0)), mode='constant', constant_values=127)
    else:
        im_sized = im_sized[-dy:, :, :]
        
    if (new_h + dy) < net_h:
        im_sized = np.pad(im_sized, ((0, net_h - (new_h+dy)), (0, 0), (0, 0)), mode='constant', constant_values=127)
        
    return im_sized[:net_h, :net_w, :]


================================================
FILE: imageai_tf_deprecated/Detection/Custom/utils/multi_gpu_model.py
================================================
from keras.layers import Lambda, concatenate
from keras.models import Model
import tensorflow as tf


def multi_gpu_model(model, gpus):
    if isinstance(gpus, (list, tuple)):
        num_gpus = len(gpus)
        target_gpu_ids = gpus
    else:
        num_gpus = gpus
        target_gpu_ids = range(num_gpus)

    def get_slice(data, i, parts):
        shape = tf.shape(data)
        batch_size = shape[:1]
        input_shape = shape[1:]
        step = batch_size // parts
        if i == num_gpus - 1:
            size = batch_size - step * i
        else:
            size = step
        size = tf.concat([size, input_shape], axis=0)
        stride = tf.concat([step, input_shape * 0], axis=0)
        start = stride * i
        return tf.slice(data, start, size)

    all_outputs = []
    for i in range(len(model.outputs)):
        all_outputs.append([])

    # Place a copy of the model on each GPU,
    # each getting a slice of the inputs.
    for i, gpu_id in enumerate(target_gpu_ids):
        with tf.device('/gpu:%d' % gpu_id):
            with tf.name_scope('replica_%d' % gpu_id):
                inputs = []
                # Retrieve a slice of the input.
                for x in model.inputs:
                    input_shape = tuple(x.get_shape().as_list())[1:]
                    slice_i = Lambda(get_slice, output_shape=input_shape,
                                     arguments={'i': i, 'parts': num_gpus})(x)
                    inputs.append(slice_i)

                # Apply model on slice
                # (creating a model replica on the target device).
                outputs = model(inputs)
                if not isinstance(outputs, list):
                    outputs = [outputs]

                # Save the outputs for merging back together later.
                for o in range(len(outputs)):
                    all_outputs[o].append(outputs[o])

    # Merge outputs on CPU.
    with tf.device('/cpu:0'):
        merged = []
        for name, outputs in zip(model.output_names, all_outputs):
            merged.append(concatenate(outputs, axis=0, name=name))
        return Model(model.inputs, merged)


================================================
FILE: imageai_tf_deprecated/Detection/Custom/utils/utils.py
================================================
import cv2
import numpy as np
import os
from .bbox import BoundBox, bbox_iou
from scipy.special import expit


def _sigmoid(x):
    return expit(x)


def makedirs(path):
    try:
        os.makedirs(path)
    except OSError:
        if not os.path.isdir(path):
            raise


def evaluate(model,
             generator, 
             iou_threshold,
             obj_thresh,
             nms_thresh,
             net_h=416,
             net_w=416,
             save_path=None):
    """ Evaluate a given dataset using a given model.
    code originally from https://github.com/fizyr/keras-retinanet

    # Arguments
        model           : The model to evaluate.
        generator       : The generator that represents the dataset to evaluate.
        iou_threshold   : The threshold used to consider when a detection is positive or negative.
        obj_thresh      : The threshold used to distinguish between object and non-object
        nms_thresh      : The threshold used to determine whether two detections are duplicates
        net_h           : The height of the input image to the model, higher value results in better accuracy
        net_w           : The width of the input image to the model
        save_path       : The path to save images with visualized detections to.
    # Returns
        A dict mapping class names to mAP scores.
    """    
    # gather all detections and annotations
    all_detections     = [[None for i in range(generator.num_classes())] for j in range(generator.size())]
    all_annotations    = [[None for i in range(generator.num_classes())] for j in range(generator.size())]

    for i in range(generator.size()):
        raw_image = [generator.load_image(i)]

        # make the boxes and the labels
        pred_boxes = get_yolo_boxes(model, raw_image, net_h, net_w, generator.get_anchors(), obj_thresh, nms_thresh)[0]

        score = np.array([box.get_score() for box in pred_boxes])
        pred_labels = np.array([box.label for box in pred_boxes])        
        
        if len(pred_boxes) > 0:
            pred_boxes = np.array([[box.xmin, box.ymin, box.xmax, box.ymax, box.get_score()] for box in pred_boxes]) 
        else:
            pred_boxes = np.array([[]])  
        
        # sort the boxes and the labels according to scores
        score_sort = np.argsort(-score)
        pred_labels = pred_labels[score_sort]
        pred_boxes  = pred_boxes[score_sort]
        
        # copy detections to all_detections
        for label in range(generator.num_classes()):
            all_detections[i][label] = pred_boxes[pred_labels == label, :]

        annotations = generator.load_annotation(i)
        
        # copy detections to all_annotations
        for label in range(generator.num_classes()):
            all_annotations[i][label] = annotations[annotations[:, 4] == label, :4].copy()

    # compute mAP by comparing all detections and all annotations
    average_precisions = {}
    
    for label in range(generator.num_classes()):
        false_positives = np.zeros((0,))
        true_positives  = np.zeros((0,))
        scores          = np.zeros((0,))
        num_annotations = 0.0

        for i in range(generator.size()):
            detections           = all_detections[i][label]
            annotations          = all_annotations[i][label]
            num_annotations     += annotations.shape[0]
            detected_annotations = []

            for d in detections:
                scores = np.append(scores, d[4])

                if annotations.shape[0] == 0:
                    false_positives = np.append(false_positives, 1)
                    true_positives  = np.append(true_positives, 0)
                    continue

                overlaps            = compute_overlap(np.expand_dims(d, axis=0), annotations)
                assigned_annotation = np.argmax(overlaps, axis=1)
                max_overlap         = overlaps[0, assigned_annotation]

                if max_overlap >= iou_threshold and assigned_annotation not in detected_annotations:
                    false_positives = np.append(false_positives, 0)
                    true_positives  = np.append(true_positives, 1)
                    detected_annotations.append(assigned_annotation)
                else:
                    false_positives = np.append(false_positives, 1)
                    true_positives  = np.append(true_positives, 0)

        # no annotations -> AP for this class is 0 (is this correct?)
        if num_annotations == 0:
            average_precisions[label] = 0
            continue

        # sort by score
        indices         = np.argsort(-scores)
        false_positives = false_positives[indices]
        true_positives  = true_positives[indices]

        # compute false positives and true positives
        false_positives = np.cumsum(false_positives)
        true_positives  = np.cumsum(true_positives)

        # compute recall and precision
        recall    = true_positives / num_annotations
        precision = true_positives / np.maximum(true_positives + false_positives, np.finfo(np.float64).eps)

        # compute average precision
        average_precision  = compute_ap(recall, precision)  
        average_precisions[label] = average_precision

    return average_precisions    


def correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w):
    if (float(net_w)/image_w) < (float(net_h)/image_h):
        new_w = net_w
        new_h = (image_h*net_w)/image_w
    else:
        new_h = net_w
        new_w = (image_w*net_h)/image_h
        
    for i in range(len(boxes)):
        x_offset, x_scale = (net_w - new_w)/2./net_w, float(new_w)/net_w
        y_offset, y_scale = (net_h - new_h)/2./net_h, float(new_h)/net_h
        
        boxes[i].xmin = int((boxes[i].xmin - x_offset) / x_scale * image_w)
        boxes[i].xmax = int((boxes[i].xmax - x_offset) / x_scale * image_w)
        boxes[i].ymin = int((boxes[i].ymin - y_offset) / y_scale * image_h)
        boxes[i].ymax = int((boxes[i].ymax - y_offset) / y_scale * image_h)
        

def do_nms(boxes, nms_thresh):
    if len(boxes) > 0:
        nb_class = len(boxes[0].classes)
    else:
        return
        
    for c in range(nb_class):
        sorted_indices = np.argsort([-box.classes[c] for box in boxes])

        for i in range(len(sorted_indices)):
            index_i = sorted_indices[i]

            if boxes[index_i].classes[c] == 0: continue

            for j in range(i+1, len(sorted_indices)):
                index_j = sorted_indices[j]

                if bbox_iou(boxes[index_i], boxes[index_j]) >= nms_thresh:
                    boxes[index_j].classes[c] = 0


def decode_netout(netout, anchors, obj_thresh, net_h, net_w):
    grid_h, grid_w = netout.shape[:2]
    nb_box = 3
    netout = netout.reshape((grid_h, grid_w, nb_box, -1))
    nb_class = netout.shape[-1] - 5

    boxes = []

    netout[..., :2]  = _sigmoid(netout[..., :2])
    netout[..., 4]   = _sigmoid(netout[..., 4])
    netout[..., 5:]  = netout[..., 4][..., np.newaxis] * _softmax(netout[..., 5:])
    netout[..., 5:] *= netout[..., 5:] > obj_thresh

    for i in range(grid_h*grid_w):
        row = i // grid_w
        col = i % grid_w
        
        for b in range(nb_box):
            # 4th element is objectness score
            objectness = netout[row, col, b, 4]
            
            if objectness <= obj_thresh:
                continue
            
            # first 4 elements are x, y, w, and h
            x, y, w, h = netout[row, col, b, :4]

            x = (col + x) / grid_w  # center position, unit: image width
            y = (row + y) / grid_h  # center position, unit: image height
            w = anchors[2 * b + 0] * np.exp(w) / net_w  # unit: image width
            h = anchors[2 * b + 1] * np.exp(h) / net_h  # unit: image height
            
            # last elements are class probabilities
            classes = netout[row, col, b, 5:]
            
            box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, objectness, classes)

            boxes.append(box)

    return boxes


def preprocess_input(image, net_h, net_w):
    new_h, new_w, _ = image.shape

    # determine the new size of the image
    if (float(net_w)/new_w) < (float(net_h)/new_h):
        new_h = (new_h * net_w)//new_w
        new_w = net_w
    else:
        new_w = (new_w * net_h)//new_h
        new_h = net_h

    # resize the image to the new size
    resized = cv2.resize(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)/255., (new_w, new_h))

    # embed the image into the standard letter box
    new_image = np.ones((net_h, net_w, 3)) * 0.5
    new_image[(net_h-new_h)//2:(net_h+new_h)//2, (net_w-new_w)//2:(net_w+new_w)//2, :] = resized
    new_image = np.expand_dims(new_image, 0)

    return new_image


def normalize(image):
    return image/255.


def get_yolo_boxes(model, images, net_h, net_w, anchors, obj_thresh, nms_thresh):
    image_h, image_w, _ = images[0].shape
    nb_images           = len(images)
    batch_input         = np.zeros((nb_images, net_h, net_w, 3))

    # preprocess the input
    for i in range(nb_images):
        batch_input[i] = preprocess_input(images[i], net_h, net_w)        

    # run the prediction
    batch_output = model.predict_on_batch(batch_input)
    batch_boxes  = [None]*nb_images

    for i in range(nb_images):
        yolos = [batch_output[0][i], batch_output[1][i], batch_output[2][i]]
        boxes = []

        # decode the output of the network
        for j in range(len(yolos)):
            yolo_anchors = anchors[(2-j)*6:(3-j)*6] # config['model']['anchors']
            boxes += decode_netout(yolos[j], yolo_anchors, obj_thresh, net_h, net_w)

        # correct the sizes of the bounding boxes
        correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w)

        # suppress non-maximal boxes
        do_nms(boxes, nms_thresh)        
           
        batch_boxes[i] = boxes

    return batch_boxes        


def compute_overlap(a, b):
    """
    Code originally from https://github.com/rbgirshick/py-faster-rcnn.
    Parameters
    ----------
    a: (N, 4) ndarray of float
    b: (K, 4) ndarray of float
    Returns
    -------
    overlaps: (N, K) ndarray of overlap between boxes and query_boxes
    """
    area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])

    iw = np.minimum(np.expand_dims(a[:, 2], axis=1), b[:, 2]) - np.maximum(np.expand_dims(a[:, 0], 1), b[:, 0])
    ih = np.minimum(np.expand_dims(a[:, 3], axis=1), b[:, 3]) - np.maximum(np.expand_dims(a[:, 1], 1), b[:, 1])

    iw = np.maximum(iw, 0)
    ih = np.maximum(ih, 0)

    ua = np.expand_dims((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), axis=1) + area - iw * ih

    ua = np.maximum(ua, np.finfo(float).eps)

    intersection = iw * ih

    return intersection / ua  


def compute_ap(recall, precision):
    """ Compute the average precision, given the recall and precision curves.
    Code originally from https://github.com/rbgirshick/py-faster-rcnn.

    # Arguments
        recall:    The recall curve (list).
        precision: The precision curve (list).
    # Returns
        The average precision as computed in py-faster-rcnn.
    """
    # correct AP calculation
    # first append sentinel values at the end
    mrec = np.concatenate(([0.], recall, [1.]))
    mpre = np.concatenate(([0.], precision, [0.]))

    # compute the precision envelope
    for i in range(mpre.size - 1, 0, -1):
        mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])

    # to calculate area under PR curve, look for points
    # where X axis (recall) changes value
    i = np.where(mrec[1:] != mrec[:-1])[0]

    # and sum (\Delta recall) * prec
    ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
    return ap     


def _softmax(x, axis=-1):
    x = x - np.amax(x, axis, keepdims=True)
    e_x = np.exp(x)
    
    return e_x / e_x.sum(axis, keepdims=True)


================================================
FILE: imageai_tf_deprecated/Detection/Custom/voc.py
================================================
import os
import xml.etree.ElementTree as ET
import pickle


def parse_voc_annotation(ann_dir, img_dir, cache_name, labels=[]):
    if os.path.exists(cache_name):
        with open(cache_name, 'rb') as handle:
            cache = pickle.load(handle)
        all_insts, seen_labels = cache['all_insts'], cache['seen_labels']
    else:
        all_insts = list()
        seen_labels = dict()
        
        for ann in sorted(os.listdir(ann_dir)):
            img = {'object': list()}

            try:
                tree = ET.parse(os.path.join(ann_dir, ann))
            except Exception as e:
                print(e)
                print('Ignore this bad annotation: ' + os.path.join(ann_dir, ann))
                continue
            
            for elem in tree.iter():
                if 'filename' in elem.tag:
                    img['filename'] = os.path.join(img_dir, elem.text)
                if 'width' in elem.tag:
                    img['width'] = int(elem.text)
                if 'height' in elem.tag:
                    img['height'] = int(elem.text)
                if 'object' in elem.tag or 'part' in elem.tag:
                    obj = {}
                    
                    for attr in list(elem):
                        if 'name' in attr.tag:
                            obj['name'] = attr.text

                            if obj['name'] in seen_labels:
                                seen_labels[obj['name']] += 1
                            else:
                                seen_labels[obj['name']] = 1
                            
                            if len(labels) > 0 and obj['name'] not in labels:
                                break
                            else:
                                img['object'] += [obj]
                                
                        if 'bndbox' in attr.tag:
                            for dim in list(attr):
                                if 'xmin' in dim.tag:
                                    obj['xmin'] = int(round(float(dim.text)))
                                if 'ymin' in dim.tag:
                                    obj['ymin'] = int(round(float(dim.text)))
                                if 'xmax' in dim.tag:
                                    obj['xmax'] = int(round(float(dim.text)))
                                if 'ymax' in dim.tag:
                                    obj['ymax'] = int(round(float(dim.text)))

            if len(img['object']) > 0:
                all_insts += [img]

        cache = {'all_insts': all_insts, 'seen_labels': seen_labels}
        with open(cache_name, 'wb') as handle:
            pickle.dump(cache, handle, protocol=pickle.HIGHEST_PROTOCOL)    
                        
    return all_insts, seen_labels


================================================
FILE: imageai_tf_deprecated/Detection/README.md
================================================
# ImageAI : Object Detection

A **DeepQuest AI** project [https://deepquestai.com](https://deepquestai.com)

### TABLE OF CONTENTS

- <a href="#firstdetection" > :white_square_button: First Object Detection</a>
- <a href="#objectextraction" > :white_square_button: Object Detection, Extraction and Fine-tune</a>
- <a href="#customdetection" > :white_square_button: Custom Object Detection</a>
- <a href="#detectionspeed" > :white_square_button: Detection Speed</a>
- <a href="#hidingdetails" > :white_square_button: Hiding/Showing Object Name and Probability</a>
- <a href="#inputoutputtype" > :white_square_button: Image Input & Output Types</a>
- <a href="#documentation" > :white_square_button: Documentation</a>


ImageAI provides very convenient and powerful methods to perform object detection on images and extract each object from the image. The object detection class supports RetinaNet, YOLOv3 and TinyYOLOv3. To start performing object detection, you must download the RetinaNet, YOLOv3 or TinyYOLOv3 object detection model via the links below: 
* **[RetinaNet](https://github.com/OlafenwaMoses/ImageAI/releases/download/essentials-v5/resnet50_coco_best_v2.1.0.h5)** _(Size = 145 mb, high performance and accuracy, with longer detection time)_
* **[YOLOv3](https://github.com/OlafenwaMoses/ImageAI/releases/download/1.0/yolo.h5)** _(Size = 237 mb, moderate performance and accuracy, with a moderate detection time)_
* **[TinyYOLOv3](https://github.com/OlafenwaMoses/ImageAI/releases/download/1.0/yolo-tiny.h5)** _(Size = 34 mb, optimized for speed and moderate performance, with fast detection time)_


 Once you download the object detection model file, you should copy the model file to the your project folder where your .py files will be.
 Then create a python file and give it a name; an example is FirstObjectDetection.py. Then write the code below into the python file:

### FirstObjectDetection.py
<div id="firstdetection" ></div>

```python
from imageai.Detection import ObjectDetection
import os

execution_path = os.getcwd()

detector = ObjectDetection()
detector.setModelTypeAsYOLOv3()
detector.setModelPath( os.path.join(execution_path , "yolo.h5"))
detector.loadModel()
detections = detector.detectObjectsFromImage(input_image=os.path.join(execution_path , "image2.jpg"), output_image_path=os.path.join(execution_path , "image2new.jpg"), minimum_percentage_probability=30)

for eachObject in detections:
    print(eachObject["name"] , " : ", eachObject["percentage_probability"], " : ", eachObject["box_points"] )
    print("--------------------------------")
```

Sample Result:
Input Image
![Input Image](../../data-images/image2.jpg)
Output Image
![Output Image](../../data-images/yolo.jpg)

```
laptop  :  87.32235431671143  :  (306, 238, 390, 284)
--------------------------------
laptop  :  96.86298966407776  :  (121, 209, 258, 293)
--------------------------------
laptop  :  98.6301600933075  :  (279, 321, 401, 425)
--------------------------------
laptop  :  99.78572130203247  :  (451, 204, 579, 285)
--------------------------------
bed  :  94.02391314506531  :  (23, 205, 708, 553)
--------------------------------
apple  :  48.03136885166168  :  (527, 343, 557, 364)
--------------------------------
cup  :  34.09906327724457  :  (462, 347, 496, 379)
--------------------------------
cup  :  44.65090036392212  :  (582, 342, 618, 386)
--------------------------------
person  :  57.70219564437866  :  (27, 311, 341, 437)
--------------------------------
person  :  85.26121377944946  :  (304, 173, 387, 253)
--------------------------------
person  :  96.33603692054749  :  (415, 130, 538, 266)
--------------------------------
person  :  96.95255160331726  :  (174, 108, 278, 269)
--------------------------------
```

Let us make a breakdown of the object detection code that we used above.

```python
from imageai.Detection import ObjectDetection
import os

execution_path = os.getcwd()
```

 In the 3 lines above , we import the **ImageAI object detection** class in the first line, import the `os` in the second line and obtained the path to folder where our python file runs.
  
```python
detector = ObjectDetection()
detector.setModelTypeAsYOLOv3()
detector.setModelPath( os.path.join(execution_path , "yolo.h5"))
detector.loadModel()
```

In the 4 lines above, we created a new instance of the `ObjectDetection` class in the first line, set the model type to YOLOv3 in the second line, set the model path to the YOLOv3 model file we downloaded and copied to the python file folder in the third line and load the model in the fourth line.

```python
detections = detector.detectObjectsFromImage(input_image=os.path.join(execution_path , "image2.jpg"), output_image_path=os.path.join(execution_path , "image2new.jpg"))

for eachObject in detections:
    print(eachObject["name"] , " : ", eachObject["percentage_probability"], " : ", eachObject["box_points"] )
    print("--------------------------------")
```

In the 2 lines above, we ran the `detectObjectsFromImage()` function and parse in the path to our image, and the path to the new image which the function will save. Then the function returns an array of dictionaries with each dictionary corresponding to the number of objects detected in the image. Each dictionary has the properties `name` (name of the object), `percentage_probability` (percentage probability of the detection) and `box_points` (the x1,y1,x2 and y2 coordinates of the bounding box of the object).

Should you want to use the RetinaNet which is appropriate for high-performance and high-accuracy demanding detection tasks, you will download the RetinaNet model file from the links above, copy it to your python file's folder, set the model type and model path in your python code as seen below:

```python
detector = ObjectDetection()
detector.setModelTypeAsRetinaNet()
detector.setModelPath( os.path.join(execution_path , "resnet50_coco_best_v2.0.1.h5"))
detector.loadModel()
```

However, if you desire TinyYOLOv3 which is optimized for speed and embedded devices, you will download the TinyYOLOv3 model file from the links above, copy it to your python file's folder, set the model type and model path in your python code as seen below:

```python
detector = ObjectDetection()
detector.setModelTypeAsTinyYOLOv3()
detector.setModelPath( os.path.join(execution_path , "yolo-tiny.h5"))
detector.loadModel()
```

## Object Detection, Extraction and Fine-tune
<div id="objectextraction" ></div>

In the examples we used above, we ran the object detection on an image and it returned the detected objects in an array as well as save a new image with rectangular markers drawn on each object. In our next examples, we will be able to extract each object from the input image
  and save it independently.

In the example code below which is very identical to the previous object detction code, we will save each object detected as a seperate image.

```python
from imageai.Detection import ObjectDetection
import os

execution_path = os.getcwd()

detector = ObjectDetection()
detector.setModelTypeAsYOLOv3()
detector.setModelPath( os.path.join(execution_path , "yolo.h5"))
detector.loadModel()

detections, objects_path = detector.detectObjectsFromImage(input_image=os.path.join(execution_path , "image3.jpg"), output_image_path=os.path.join(execution_path , "image3new.jpg"), minimum_percentage_probability=30,  extract_detected_objects=True)

for eachObject, eachObjectPath in zip(detections, objects_path):
    print(eachObject["name"] , " : " , eachObject["percentage_probability"], " : ", eachObject["box_points"] )
    print("Object's image saved in " + eachObjectPath)
    print("--------------------------------")
```

![Input Image](../../data-images/image3.jpg)
![Output Images](../../data-images/image3new.jpg)

![dog](../../data-images/image3new-objects/dog-1.jpg)
![motorcycle](../../data-images/image3new-objects/motorcycle-3.jpg)
![car](../../data-images/image3new-objects/car-4.jpg)
![bicycle](../../data-images/image3new-objects/bicycle-5.jpg)
![person](../../data-images/image3new-objects/person-6.jpg)
![person](../../data-images/image3new-objects/person-7.jpg)
![person](../../data-images/image3new-objects/person-8.jpg)
![person](../../data-images/image3new-objects/person-9.jpg)
![person](../../data-images/image3new-objects/person-10.jpg)


Let us review the part of the code that perform the object detection and extract the images:

```python
detections, objects_path = detector.detectObjectsFromImage(input_image=os.path.join(execution_path , "image3.jpg"), output_image_path=os.path.join(execution_path , "image3new.jpg"), minimum_percentage_probability=30,  extract_detected_objects=True)

for eachObject, eachObjectPath in zip(detections, objects_path):
    print(eachObject["name"] , " : " , eachObject["percentage_probability"], " : ", eachObject["box_points"] )
    print("Object's image saved in " + eachObjectPath)
    print("--------------------------------")
```

In the above above lines, we called the `detectObjectsFromImage()` , parse in the input image path, output image path, and an extra parameter `extract_detected_objects=True`. This parameter states that the function should extract each object detected from the image and save it has a seperate image. The parameter is false by default. Once set to `true`, the function will create a directory which is the **output image path + "-objects"** . Then it saves all the extracted images into this new directory with each image's name being the **detected object name + "-" + a number** which corresponds to the order at which the objects were detected.

This new parameter we set to extract and save detected objects as an image will make the function to return 2 values. The first is the array of dictionaries with each dictionary corresponding to a detected object. The second is an array of the paths to the saved images of each object detected and extracted, and they are arranged in order at which the objects are in the first array.


**And one important feature you need to know!** You will recall that the percentage probability
   for each detected object is sent back by the `detectObjectsFromImage()` function. The function has a parameter `minimum_percentage_probability`, whose default value is `50` (value ranges between 0 - 100) , but it set to 30 in this example. That means the function will only return a detected object if it's percentage probability is **30 or above**. The value was kept at this number to ensure the integrity of the detection results. You fine-tune the object detection by setting **minimum_percentage_probability** equal to a smaller value to detect more number of objects or higher value to detect less number of objects.


## Custom Object Detection
<div id="customdetection" ></div>

The object detection model (**RetinaNet**) supported by **ImageAI** can detect 80 different types of objects. They include:
```
person,  bicycle,  car, motorcycle, airplane, bus, train,  truck,  boat,  traffic light,  fire hydrant, stop_sign,
parking meter,   bench,   bird,   cat,   dog,   horse,   sheep,   cow,   elephant,   bear,   zebra,
giraffe,   backpack,   umbrella,   handbag,   tie,   suitcase,   frisbee,   skis,   snowboard,
sports ball,   kite,   baseball bat,   baseball glove,   skateboard,   surfboard,   tennis racket,
bottle,   wine glass,   cup,   fork,   knife,   spoon,   bowl,   banana,   apple,   sandwich,   orange,
broccoli,   carrot,   hot dog,   pizza,   donot,   cake,   chair,   couch,   potted plant,   bed,
dining table,   toilet,   tv,   laptop,   mouse,   remote,   keyboard,   cell phone,   microwave,   oven,
toaster,   sink,   refrigerator,   book,   clock,   vase,   scissors,   teddy bear,   hair dryer,   toothbrush.
```

Interestingly, **ImageAI** allow you to perform detection for one or more of the items above. That means you can
 customize the type of object(s) you want to be detected in the image. Let's take a look at the code below:

```python
from imageai.Detection import ObjectDetection
import os

execution_path = os.getcwd()

detector = ObjectDetection()
detector.setModelTypeAsYOLOv3()
detector.setModelPath( os.path.join(execution_path , "yolo.h5"))
detector.loadModel()

custom_objects = detector.CustomObjects(car=True, motorcycle=True)
detections = detector.detectCustomObjectsFromImage(custom_objects=custom_objects, input_image=os.path.join(execution_path , "image3.jpg"), output_image_path=os.path.join(execution_path , "image3custom.jpg"), minimum_percentage_probability=30)

for eachObject in detections:
    print(eachObject["name"] , " : ", eachObject["percentage_probability"], " : ", eachObject["box_points"] )
    print("--------------------------------")
```

![Result](../../data-images/image3custom.jpg)


Let us take a look at the part of the code that made this possible.
```python
custom_objects = detector.CustomObjects(car=True, motorcycle=True)
detections = detector.detectCustomObjectsFromImage(custom_objects=custom_objects, input_image=os.path.join(execution_path , "image3.jpg"), output_image_path=os.path.join(execution_path , "image3custom.jpg"), minimum_percentage_probability=30)
```

In the above code, after loading the model (can be done before loading the model as well), we defined a new variable
`custom_objects = detector.CustomObjects()`, in which we set its car and motorcycle properties equal to **True**.
This is to tell the model to detect only the object we set to True. Then we call the `detector.detectCustomObjectsFromImage()`
which is the function that allows us to perform detection of custom objects. Then we will set the `custom_objects` value
 to the custom objects variable we defined.


## Detection Speed
<div id="detectionspeed"></div>

**ImageAI** now provides detection speeds for all object detection tasks. The detection speeds allow you to reduce
 the time of detection at a rate between 20% - 80%, and yet having just slight changes but accurate detection
results. Coupled with lowering the `minimum_percentage_probability` parameter, detections can match the normal
speed and yet reduce detection time drastically. The available detection speeds are **"normal"**(default), **"fast"**, **"faster"** , **"fastest"** and **"flash"**.
All you need to do is to state the speed mode you desire when loading the model as seen below.

```python
detector.loadModel(detection_speed="fast")
```


## Hiding/Showing Object Name and Probability
<div id="hidingdetails"></div>

**ImageAI** provides options to hide the name of objects detected and/or the percentage probability from being shown on the saved/returned detected image. Using the `detectObjectsFromImage()` and `detectCustomObjectsFromImage()` functions, the parameters `display_object_name` and `display_percentage_probability`  can be set to True of False individually. Take a look at the code below:

```python
detections = detector.detectObjectsFromImage(input_image=os.path.join(execution_path , "image3.jpg"), output_image_path=os.path.join(execution_path , "image3new_nodetails.jpg"), minimum_percentage_probability=30, display_percentage_probability=False, display_object_name=False)
```

In the above code, we specified that both the object name and percentage probability should not be shown. As you can see in the result below, both the names of the objects and their individual percentage probability is not shown in the detected image.

![Result](../../data-images/nodetails.jpg)


## Image Input & Output Types
<div id="inputoutputtype"></div>

**ImageAI** supports 3 types of inputs which are **file path to image file**(default), **numpy array of image** and **image file stream**
as well as 2 types of output which are image **file**(default) and numpy  **array **.
This means you can now perform object detection in production applications such as on a web server and system
 that returns file in any of the above stated formats.

To perform object detection with numpy array or file stream input, you just need to state the input type
in the `.detectObjectsFromImage()` function or the `.detectCustomObjectsFromImage()` function. See example below.

```python
detections = detector.detectObjectsFromImage(input_type="array", input_image=image_array , output_image_path=os.path.join(execution_path , "image.jpg")) # For numpy array input type
detections = detector.detectObjectsFromImage(input_type="stream", input_image=image_stream , output_image_path=os.path.join(execution_path , "test2new.jpg")) # For file stream input type
```

To perform object detection with numpy array output you just need to state the output type
in the `.detectObjectsFromImage()` function or the `.detectCustomObjectsFromImage()` function. See example below.

```python
detected_image_array, detections = detector.detectObjectsFromImage(output_type="array", input_image="image.jpg" ) # For numpy array output type
```


## Documentation
<div id="documentation" ></div>

We have provided full documentation for all **ImageAI** classes and functions in 3 major languages. Find links below:

* Documentation - **English Version  [https://imageai.readthedocs.io](https://imageai.readthedocs.io)**
* Documentation - **Chinese Version  [https://imageai-cn.readthedocs.io](https://imageai-cn.readthedocs.io)**
* Documentation - **French Version  [https://imageai-fr.readthedocs.io](https://imageai-fr.readthedocs.io)**


================================================
FILE: imageai_tf_deprecated/Detection/VIDEO.md
================================================
# ImageAI : Video Object Detection, Tracking  and Analysis

A **DeepQuest AI** project [https://deepquestai.com](https://deepquestai.com)

---

## TABLE OF CONTENTS

- <a href="#videodetection" > :white_square_button: First Video Object Detection</a>
- <a href="#customvideodetection" > :white_square_button: Custom Video Object Detection (Object Tracking)</a>
- <a href="#camerainputs" > :white_square_button: Camera / Live Stream Video Detection</a>
- <a href="#videoanalysis" > :white_square_button: Video Analysis</a>
- <a href="#videodetectionspeed" > :white_square_button: Detection Speed</a>
- <a href="#hidingdetails" > :white_square_button: Hiding/Showing Object Name and Probability</a>
- <a href="#videodetectionintervals" > :white_square_button: Frame Detection Intervals</a>
- <a href="#detectiontimeout" > :white_square_button: Video Detection Timeout (NEW)</a>
- <a href="#documentation" > :white_square_button: Documentation</a>

ImageAI provides convenient, flexible and powerful methods to perform object detection on videos. The video object detection class provided only supports RetinaNet, YOLOv3 and TinyYOLOv3. This version of **ImageAI** provides commercial grade video objects detection features, which include but not limited to device/IP camera inputs, per frame, per second, per minute and entire video analysis for storing in databases and/or real-time visualizations and for future insights.

To start performing video object detection, you must download the RetinaNet, YOLOv3 or TinyYOLOv3 object detection model via the links below:

- **[RetinaNet](https://github.com/OlafenwaMoses/ImageAI/releases/download/1.0/resnet50_coco_best_v2.0.1.h5)** _(Size = 145 mb, high performance and accuracy, with longer detection time)_
- **[YOLOv3](https://github.com/OlafenwaMoses/ImageAI/releases/download/1.0/yolo.h5)** _(Size = 237 mb, moderate performance and accuracy, with a moderate detection time)_
- **[TinyYOLOv3](https://github.com/OlafenwaMoses/ImageAI/releases/download/1.0/yolo-tiny.h5)** _(Size = 34 mb, optimized for speed and moderate performance, with fast detection time)_

Because video object detection is a compute intensive tasks, we advise you perform this experiment using a computer with a NVIDIA GPU and the GPU version of Tensorflow installed. Performing Video Object Detection CPU will be slower than using an NVIDIA GPU powered computer. You can use Google Colab for this experiment as it has an NVIDIA K80 GPU available for free.

 Once you download the object detection model file, you should copy the model file to the your project folder where your .py files will be.
 Then create a python file and give it a name; an example is `FirstVideoObjectDetection.py`. Then write the code below into the python file:


### FirstVideoObjectDetection.py
<div id="videodetection" ></div>

```python
from imageai.Detection import VideoObjectDetection
import os

execution_path = os.getcwd()

detector = VideoObjectDetection()
detector.setModelTypeAsRetinaNet()
detector.setModelPath( os.path.join(execution_path , "resnet50_coco_best_v2.0.1.h5"))
detector.loadModel()

video_path = detector.detectObjectsFromVideo(input_file_path=os.path.join(execution_path, "traffic.mp4"),
                                output_file_path=os.path.join(execution_path, "traffic_detected")
                                , frames_per_second=20, log_progress=True)
print(video_path)
```


Input Video (a 1min 24seconds video)

[![](../../data-images/video--1.jpg)](https://github.com/OlafenwaMoses/ImageAI/blob/master/data-videos/traffic.mp4)

Output Video
[![](../../data-images/video-2.jpg)](https://www.youtube.com/embed/qplVDqOmElI?rel=0)

Let us make a breakdown of the object detection code that we used above.

```python
from imageai.Detection import VideoObjectDetection
import os

execution_path = os.getcwd()
```

 In the 3 lines above , we import the **ImageAI video object detection ** class in the first line, import the **os** in the second line and obtained
  the path to folder where our python file runs.

```python
detector = VideoObjectDetection()
detector.setModelTypeAsRetinaNet()
detector.setModelPath( os.path.join(execution_path , "resnet50_coco_best_v2.0.1.h5"))
detector.loadModel()
```

In the 4 lines above, we created a new instance of the **VideoObjectDetection** class in the first line, set the model type to RetinaNet in the second line, set the model path to the RetinaNet model file we downloaded and copied to the python file folder in the third line and load the model in the fourth line.

```python
video_path = detector.detectObjectsFromVideo(input_file_path=os.path.join(execution_path, "traffic.mp4"),
                                 output_file_path=os.path.join(execution_path, "traffic_detected"),
                                 frames_per_second=20, log_progress=True)
print(video_path)
```

In the 2 lines above, we ran the `detectObjectsFromVideo()` function and parse in the path to our video,the path to the new video (without the extension, it saves a .avi video by default) which the function will save, the number of frames per second (fps) that you we desire the output video to have and option to log the progress of the detection in the console. Then the function returns a the path to the saved video which contains boxes and percentage probabilities rendered on objects detected in the video.


### Custom Video Object Detection
<div id="customvideodetection" ></div>

The video object detection model (**RetinaNet**) supported by **ImageAI** can detect 80 different types of objects. They include: 
```
      person, bicycle, car, motorcycle, airplane, bus, train, truck, boat, traffic light, fire hydrant, stop_sign,
      parking meter,   bench,   bird,   cat,   dog,   horse,   sheep,   cow,   elephant,   bear,   zebra,
      giraffe,   backpack,   umbrella,   handbag,   tie,   suitcase,   frisbee,   skis,   snowboard,
      sports ball,   kite,   baseball bat,   baseball glove,   skateboard,   surfboard,   tennis racket,
      bottle,   wine glass,   cup,   fork,   knife,   spoon,   bowl,   banana,   apple,   sandwich,   orange,
      broccoli,   carrot,   hot dog,   pizza,   donot,   cake,   chair,   couch,   potted plant,   bed,
      dining table,   toilet,   tv,   laptop,   mouse,   remote,   keyboard,   cell phone,   microwave,
      oven,   toaster,   sink,   refrigerator,   book,   clock,   vase,   scissors,   teddy bear,   hair dryer,
      toothbrush.
```


Interestingly, **ImageAI** allow you to perform  detection for one or more of the items above. That means you can customize the type of object(s) you want to be detected in the video. Let's take a look at the code below:

```python
from imageai.Detection import VideoObjectDetection
import os

execution_path = os.getcwd()

detector = VideoObjectDetection()
detector.setModelTypeAsRetinaNet()
detector.setModelPath( os.path.join(execution_path , "resnet50_coco_best_v2.0.1.h5"))
detector.loadModel()

custom_objects = detector.CustomObjects(person=True, bicycle=True, motorcycle=True)

video_path = detector.detectCustomObjectsFromVideo(
                custom_objects=custom_objects,
                input_file_path=os.path.join(execution_path, "traffic.mp4"),
                output_file_path=os.path.join(execution_path, "traffic_custom_detected"),
                frames_per_second=20, log_progress=True)
print(video_path)
```

Let us take a look at the part of the code that made this possible.

```python
custom_objects = detector.CustomObjects(person=True, bicycle=True, motorcycle=True)

video_path = detector.detectCustomObjectsFromVideo(
                custom_objects=custom_objects, 
                input_file_path=os.path.join(execution_path, "traffic.mp4"),
                output_file_path=os.path.join(execution_path, "traffic_custom_detected"),
                frames_per_second=20, log_progress=True)
```

In the above code, after loading the model (can be done before loading the model as well), we defined a new variable
`custom_objects = detector.CustomObjects()`, in which we set its person, car and motorcycle properties equal to **True**.
This is to tell the model to detect only the object we set to True. Then we call the `detector.detectCustomObjectsFromVideo()`
which is the function that allows us to perform detection of custom objects. Then we will set the `custom_objects` value
 to the custom objects variable we defined.

Output Video
[![Output Video](../../data-images/video-3.jpg)](https://www.youtube.com/embed/YfAycAzkwPM?rel=0)
C:\Users\User\PycharmProjects\ImageAITest\traffic_custom_detected.avi


### Camera / Live Stream Video Detection
<div id="camerainputs"></div>

**ImageAI** now allows live-video detection with support for camera inputs. Using **OpenCV**'s `VideoCapture()` function, you can load live-video streams from a device camera, cameras connected by cable or IP cameras, and parse it into **ImageAI**'s `detectObjectsFromVideo()` and `detectCustomObjectsFromVideo()` functions. All features that are supported for detecting objects in a video file is also available for detecting objects in a camera's live-video feed. Find below an example of detecting live-video feed from the device camera.

```python
from imageai.Detection import VideoObjectDetection
import os
import cv2

execution_path = os.getcwd()


camera = cv2.VideoCapture(0)

detector = VideoObjectDetection()
detector.setModelTypeAsRetinaNet()
detector.setModelPath(os.path.join(execution_path , "resnet50_coco_best_v2.0.1.h5"))
detector.loadModel()


video_path = detector.detectObjectsFromVideo(
                camera_input=camera,
                output_file_path=os.path.join(execution_path, "camera_detected_video"),
                frames_per_second=20, log_progress=True, minimum_percentage_probability=40)
```

The difference in the code above and the code for the detection of a video file is that we defined an **OpenCV VideoCapture** instance and loaded the default device camera into it. Then we parsed the camera we defined into the parameter `camera_input` which replaces the `input_file_path` that is used for video file.

### Video Analysis
<div id="videoanalysis"></div>

**ImageAI** now provide commercial-grade video analysis in the Video Object Detection class, for both video file inputs and camera inputs. This feature allows developers to obtain deep insights into any video processed with **ImageAI**. This insights can be visualized in real-time, stored in a NoSQL database for future review or analysis.

For video analysis, the `detectObjectsFromVideo()` and `detectCustomObjectsFromVideo()` now allows you to state your own defined functions which will be executed for every frame, seconds and/or minute of the video detected as well as a state a function that will be executed at the end of a video detection. Once this functions are stated, they will receive raw but comprehensive analytical data on the index of the frame/second/minute, objects detected (name, percentage_probability and box_points), number of instances of each unique object detected and average number of occurrence of each unique object detected over a second/minute and entire video.

To obtain the video analysis, all you need to do is specify a function, state the corresponding parameters it will be receiving and parse the function name into the `per_frame_function`, `per_second_function`, `per_minute_function` and `video_complete_function` parameters in the detection function. Find below examples of video analysis functions.

```python
def forFrame(frame_number, output_array, output_count):
    print("FOR FRAME " , frame_number)
    print("Output for each object : ", output_array)
    print("Output count for unique objects : ", output_count)
    print("------------END OF A FRAME --------------")

def forSeconds(second_number, output_arrays, count_arrays, average_output_count):
    print("SECOND : ", second_number)
    print("Array for the outputs of each frame ", output_arrays)
    print("Array for output count for unique objects in each frame : ", count_arrays)
    print("Output average count for unique objects in the last second: ", average_output_count)
    print("------------END OF A SECOND --------------")

def forMinute(minute_number, output_arrays, count_arrays, average_output_count):
    print("MINUTE : ", minute_number)
    print("Array for the outputs of each frame ", output_arrays)
    print("Array for output count for unique objects in each frame : ", count_arrays)
    print("Output average count for unique objects in the last minute: ", average_output_count)
    print("------------END OF A MINUTE --------------")

video_detector = VideoObjectDetection()
video_detector.setModelTypeAsYOLOv3()
video_detector.setModelPath(os.path.join(execution_path, "yolo.h5"))
video_detector.loadModel()

video_detector.detectObjectsFromVideo(
    input_file_path=os.path.join(execution_path, "traffic.mp4"),
    output_file_path=os.path.join(execution_path, "traffic_detected"),
    frames_per_second=10,
    per_second_function=forSeconds,
    per_frame_function=forFrame,
    per_minute_function=forMinute,
    minimum_percentage_probability=30
)
```

When the detection starts on a video feed, be it from a video file or camera input, the result will have the format as below:

**Results for the Frame function**
```
FOR FRAME : 1
 
Output for each object : [{'box_points': (362, 295, 443, 355), 'name': 'boat', 'percentage_probability': 26.666194200515747}, {'box_points': (319, 245, 386, 296), 'name': 'boat', 'percentage_probability': 30.052968859672546}, {'box_points': (219, 308, 341, 358), 'name': 'boat', 'percentage_probability': 47.46982455253601}, {'box_points': (589, 198, 621, 241), 'name': 'bus', 'percentage_probability': 24.62330162525177}, {'box_points': (519, 181, 583, 263), 'name': 'bus', 'percentage_probability': 27.446213364601135}, {'box_points': (493, 197, 561, 272), 'name': 'bus', 'percentage_probability': 59.81815457344055}, {'box_points': (432, 187, 491, 240), 'name': 'bus', 'percentage_probability': 64.42965269088745}, {'box_points': (157, 225, 220, 255), 'name': 'car', 'percentage_probability': 21.150341629981995}, {'box_points': (324, 249, 377, 293), 'name': 'car', 'percentage_probability': 24.089913070201874}, {'box_points': (152, 275, 260, 327), 'name': 'car', 'percentage_probability': 30.341443419456482}, {'box_points': (433, 198, 485, 244), 'name': 'car', 'percentage_probability': 37.205660343170166}, {'box_points': (184, 226, 233, 260), 'name': 'car', 'percentage_probability': 38.52525353431702}, {'box_points': (3, 296, 134, 359), 'name': 'car', 'percentage_probability': 47.80363142490387}, {'box_points': (357, 302, 439, 359), 'name': 'car', 'percentage_probability': 47.94844686985016}, {'box_points': (481, 266, 546, 314), 'name': 'car', 'percentage_probability': 65.8585786819458}, {'box_points': (597, 269, 624, 318), 'name': 'person', 'percentage_probability': 27.125394344329834}]
 
Output count for unique objects : {'bus': 4, 'boat': 3, 'person': 1, 'car': 8}

------------END OF A FRAME --------------
```

For any function you parse into the **per_frame_function**, the function will be executed after every single video frame is processed and he following will be parsed into it:

* **Frame Index:** This is the position number of the frame inside the video (e.g 1 for first frame and 20 for twentieth frame).
* **Output Array:** This is an array of dictionaries. Each dictionary corresponds to each detected object in the image and it contains the "name", "percentage_probabaility" and "box_points"(x1,y1,x2,y2) values of the object.
* **Output Count:** This is a dictionary that has the name of each unique object detected as its keys and the number of instances of the objects detected as the values.

**Results for the Second function**
```
FOR SECOND : 1
 
 Array for the outputs of each frame [[{'box_points': (362, 295, 443, 355), 'name': 'boat', 'percentage_probability': 26.666194200515747}, {'box_points': (319, 245, 386, 296), 'name': 'boat', 'percentage_probability': 30.052968859672546}, {'box_points': (219, 308, 341, 358), 'name': 'boat', 'percentage_probability': 47.46982455253601}, {'box_points': (589, 198, 621, 241), 'name': 'bus', 'percentage_probability': 24.62330162525177}, {'box_points': (519, 181, 583, 263), 'name': 'bus', 'percentage_probability': 27.446213364601135}, {'box_points': (493, 197, 561, 272), 'name': 'bus', 'percentage_probability': 59.81815457344055}, {'box_points': (432, 187, 491, 240), 'name': 'bus', 'percentage_probability': 64.42965269088745}, {'box_points': (157, 225, 220, 255), 'name': 'car', 'percentage_probability': 21.150341629981995}, {'box_points': (324, 249, 377, 293), 'name': 'car', 'percentage_probability': 24.089913070201874}, {'box_points': (152, 275, 260, 327), 'name': 'car', 'percentage_probability': 30.341443419456482}, {'box_points': (433, 198, 485, 244), 'name': 'car', 'percentage_probability': 37.205660343170166}, {'box_points': (184, 226, 233, 260), 'name': 'car', 'percentage_probability': 38.52525353431702}, {'box_points': (3, 296, 134, 359), 'name': 'car', 'percentage_probability': 47.80363142490387}, {'box_points': (357, 302, 439, 359), 'name': 'car', 'percentage_probability': 47.94844686985016}, {'box_points': (481, 266, 546, 314), 'name': 'car', 'percentage_probability': 65.8585786819458}, {'box_points': (597, 269, 624, 318), 'name': 'person', 'percentage_probability': 27.125394344329834}],
 [{'box_points': (316, 240, 384, 302), 'name': 'boat', 'percentage_probability': 29.594269394874573}, {'box_points': (361, 295, 441, 354), 'name': 'boat', 'percentage_probability': 36.11513376235962}, {'box_points': (216, 305, 340, 357), 'name': 'boat', 'percentage_probability': 44.89373862743378}, {'box_points': (432, 198, 488, 244), 'name': 'truck', 'percentage_probability': 22.914741933345795}, {'box_points': (589, 199, 623, 240), 'name': 'bus', 'percentage_probability': 20.545457303524017}, {'box_points': (519, 182, 583, 263), 'name': 'bus', 'percentage_probability': 24.467085301876068}, {'box_points': (492, 197, 563, 271), 'name': 'bus', 'percentage_probability': 61.112016439437866}, {'box_points': (433, 188, 490, 241), 'name': 'bus', 'percentage_probability': 65.08989334106445}, {'box_points': (352, 303, 442, 357), 'name': 'car', 'percentage_probability': 20.025095343589783}, {'box_points': (136, 172, 188, 195), 'name': 'car', 'percentage_probability': 21.571354568004608}, {'box_points': (152, 276, 261, 326), 'name': 'car', 'percentage_probability': 33.07966589927673}, {'box_points': (181, 225, 230, 256), 'name': 'car', 'percentage_probability': 35.111838579177856}, {'box_points': (432, 198, 488, 244), 'name': 'car', 'percentage_probability': 36.25282347202301}, {'box_points': (3, 292, 130, 360), 'name': 'car', 'percentage_probability': 67.55480170249939}, {'box_points': (479, 265, 546, 314), 'name': 'car', 'percentage_probability': 71.47912979125977}, {'box_points': (597, 269, 625, 318), 'name': 'person', 'percentage_probability': 25.903674960136414}],................, 
[{'box_points': (133, 250, 187, 278), 'name': 'umbrella', 'percentage_probability': 21.518094837665558}, {'box_points': (154, 233, 218, 259), 'name': 'umbrella', 'percentage_probability': 23.687003552913666}, {'box_points': (348, 311, 425, 360), 'name': 'boat', 'percentage_probability': 21.015766263008118}, {'box_points': (11, 164, 137, 225), 'name': 'bus', 'percentage_probability': 32.20453858375549}, {'box_points': (424, 187, 485, 243), 'name': 'bus', 'percentage_probability': 38.043853640556335}, {'box_points': (496, 186, 570, 264), 'name': 'bus', 'percentage_probability': 63.83994221687317}, {'box_points': (588, 197, 622, 240), 'name': 'car', 'percentage_probability': 23.51653128862381}, {'box_points': (58, 268, 111, 303), 'name': 'car', 'percentage_probability': 24.538707733154297}, {'box_points': (2, 246, 72, 301), 'name': 'car', 'percentage_probability': 28.433072566986084}, {'box_points': (472, 273, 539, 323), 'name': 'car', 'percentage_probability': 87.17672824859619}, {'box_points': (597, 270, 626, 317), 'name': 'person', 'percentage_probability': 27.459821105003357}]
 ]
 
Array for output count for unique objects in each frame : [{'bus': 4, 'boat': 3, 'person': 1, 'car': 8},
 {'truck': 1, 'bus': 4, 'boat': 3, 'person': 1, 'car': 7},
 {'bus': 5, 'boat': 2, 'person': 1, 'car': 5},
 {'bus': 5, 'boat': 1, 'person': 1, 'car': 9},
 {'truck': 1, 'bus': 2, 'car': 6, 'person': 1},
 {'truck': 2, 'bus': 4, 'boat': 2, 'person': 1, 'car': 7},
 {'truck': 1, 'bus': 3, 'car': 7, 'person': 1, 'umbrella': 1},
 {'bus': 4, 'car': 7, 'person': 1, 'umbrella': 2},
 {'bus': 3, 'car': 6, 'boat': 1, 'person': 1, 'umbrella': 3},
 {'bus': 3, 'car': 4, 'boat': 1, 'person': 1, 'umbrella': 2}]
 
Output average count for unique objects in the last second: {'truck': 0.5, 'bus': 3.7, 'umbrella': 0.8, 'boat': 1.3, 'person': 1.0, 'car': 6.6}

------------END OF A SECOND --------------
```

In the above result, the video was processed and saved in 10 frames per second (FPS). For any function you parse into the **per_second_function**, the function will be executed after every single second of the video that is processed and he following will be parsed into it:

- **Second Index:** This is the position number of the second inside the video (e.g 1 for first second and 20 for twentieth second).
- **Output Array:** This is an array of arrays, with each contained array and its position (array index + 1) corresponding to the equivalent frame in the last second of the video (In the above example, their are 10 arrays which corresponds to the 10 frames contained in one second). Each contained array contains dictionaries. Each dictionary corresponds to each detected object in the image and it contains the "name", "percentage_probabaility" and "box_points"(x1,y1,x2,y2) values of the object.
- **Count arrays:** This is an array of dictionaries. Each dictionary and its position (array index + 1)  corresponds to the equivalent frame in the last second of he video.  Each dictionary has the name of each unique object detected as its keys and the number of instances of the objects detected as the values.
- **Average Output Count:** This is a dictionary that has the name of each unique object detected in the last second as its keys and the average number of instances of the objects detected across the number of frames as the values.

**Results for the Minute function**
The above set of **4 parameters** that are returned for every second of the video processed is the same parameters to that will be returned for every minute of the video processed. The difference is that the index returned corresponds to the minute index, the **output_arrays** is an array that contains the number of FPS * 60  number of arrays (in the code example above, 10 frames per second(fps) * 60 seconds = 600 frames = 600 arrays), and the **count_arrays** is an array that contains the number of FPS * 60  number of dictionaries (in the code example above, 10 frames per second(fps) * 60 seconds = 600 frames = 600 dictionaries) and the **average_output_count** is a dictionary that covers all the objects detected in all the frames contained in the last minute.

**Results for the Video Complete Function**
**ImageAI** allows you to obtain complete analysis of the entire video processed. All you need is to define a function like the forSecond or forMinute function and set the **video_complete_function** parameter into your `.detectObjectsFromVideo()` or `.detectCustomObjectsFromVideo()` function. The same values for the per_second-function and per_minute_function will be returned. The difference is that no index will be returned and the other 3 values will be returned, and the 3 values will cover all frames in the video. Below is a sample function: 

```python
def forFull(output_arrays, count_arrays, average_output_count):
    #Perform action on the 3 parameters returned into the function

video_detector.detectObjectsFromVideo(
    input_file_path=os.path.join(execution_path, "traffic.mp4"),
    output_file_path=os.path.join(execution_path, "traffic_detected"),
    frames_per_second=10,
    video_complete_function=forFull,
    minimum_percentage_probability=30
)
```

**FINAL NOTE ON VIDEO ANALYSIS** : **ImageAI** allows you to obtain the detected video frame as a Numpy array at each frame, second and minute function. All you need to do is specify one more parameter in your function and set `return_detected_frame=True` in your `detectObjectsFromVideo()` or `detectCustomObjectsFrom()` function. Once this is set, the extra parameter you sepecified in your function will be the Numpy array of the detected frame. See a sample below:

```python
def forFrame(frame_number, output_array, output_count, detected_frame):
    print("FOR FRAME " , frame_number)
    print("Output for each object : ", output_array)
    print("Output count for unique objects : ", output_count)
	print("Returned Objects is : ", type(detected_frame))
    print("------------END OF A FRAME --------------")

video_detector.detectObjectsFromVideo(
    input_file_path=os.path.join(execution_path, "traffic.mp4"),
    output_file_path=os.path.join(execution_path, "traffic_detected"),
    frames_per_second=10,
    per_frame_function=forFrame,
    minimum_percentage_probability=30,
    return_detected_frame=True
)
```

### Video Detection Speed
<div id="videodetectionspeed"></div>

**ImageAI** now provides detection speeds for all video object detection tasks. The detection speeds allow you to reduce
 the time of detection at a rate between 20% - 80%, and yet having just slight changes but accurate detection
results. Coupled with lowering the **minimum_percentage_probability** parameter, detections can closely match the normal
speed and yet reduce detection time drastically. The available detection speeds are **"normal"**(default), **"fast"**, **"faster"** , **"fastest"** and **"flash"**.
All you need to do is to state the speed mode you desire when loading the model as seen below.

```python
detector.loadModel(detection_speed="fast")
```

To observe the differences in the detection speeds, look below for each speed applied to object detection with
 coupled with the adjustment of the minimum_percentage_probability , time taken to detect and detections given.
The results below are obtained from detections performed on a NVIDIA K80 GPU. Links are provided below to download
 the videos for each detection speed applied.

Video Length = 1min 24seconds, Detection Speed = "normal" , Minimum Percentage Probability = 50 (default), Detection Time = 29min 3seconds
[![](../../data-images/video-4.jpg)](https://www.youtube.com/embed/qplVDqOmElI?rel=0)


**Video Length = 1min 24seconds, Detection Speed = "fast" , Minimum Percentage Probability = 40, Detection Time = 11min 6seconds**
<a href="https://drive.google.com/open?id=118m6UnEG7aFdzxO7uhO_6C-981LJ3Gpf" ><button style="font-size: 12px; color: white; background-color: blue; height: 20px " > >>> Download detected video at speed "fast" </button></a>

**Video Length = 1min 24seconds, Detection Speed = "faster" , Minimum Percentage Probability = 30, Detection Time = 7min 47seconds**
<a href="https://drive.google.com/open?id=1s1FQWFsEX1Yf4FvUPVleK7vRxaQ6pgUy" ><button style="font-size: 12px; color: white; background-color: blue; height: 20px " > >>> Download detected video at speed "faster" </button></a>

**Video Length = 1min 24seconds, Detection Speed = "fastest" , Minimum Percentage Probability = 20, Detection Time = 6min 20seconds**
<a href="https://drive.google.com/open?id=1Wlt0DTGxl-JX7otd30MH4qhURv0rG9rw" ><button style="font-size: 12px; color: white; background-color: blue; height: 20px " > >>> Download detected video at speed "fastest" </button></a>

**Video Length = 1min 24seconds, Detection Speed = "flash" , Minimum Percentage Probability = 10, Detection Time = 3min 55seconds**
<a href="https://drive.google.com/open?id=1V3irCpP49bEUtpjG7Vuk6vEQQAZI-4PI" ><button style="font-size: 12px; color: white; background-color: blue; height: 20px " > >>> Download detected video at speed "flash" </button></a>

If you use more powerful NVIDIA GPUs, you will definitely have faster detection time than stated above.

### Frame Detection Intervals
<div id="videodetectionintervals" ></div>

The above video objects detection task are optimized for frame-real-time object detections that ensures that objects in every frame of the video is detected. **ImageAI** provides you the option to adjust the video frame detections which can speed up your video detection process. When calling the `.detectObjectsFromVideo()` or `.detectCustomObjectsFromVideo()`, you can specify at which frame interval detections should be made. By setting the **frame_detection_interval** parameter to be  equal to 5 or 20, that means the object detections in the video will be updated after 5 frames or 20 frames.
If your output video **frames_per_second** is set to 20, that means the object detections in the video will be updated once in every quarter of a second or every second. This is useful in case scenarious where the available compute is less powerful and speeds of moving objects are low. This ensures you can have objects detected as second-real-time , half-a-second-real-time or whichever way suits your needs. We conducted video object detection on the same input video we have been using all this while by applying a **frame_detection_interval** value equal to 5.
The results below are obtained from detections performed on a NVIDIA K80 GPU.
See the results and link to download the videos below:


**Video Length = 1min 24seconds, Detection Speed = "normal" , Minimum Percentage Probability = 50 (default), Frame Detection Interval = 5, Detection Time = 15min 49seconds**

<a href="https://drive.google.com/open?id=10m6kXlXWGOGc-IPw6TsKxBi-SXXOH9xK" ><button style="font-size: 12px; color: white; background-color: blue; height: 20px " > >>> Download detected video at speed "normal" and interval=5 </button></a>


**Video Length = 1min 24seconds, Detection Speed = "fast" , Minimum Percentage Probability = 40, Frame Detection Interval = 5, Detection Time = 5min 6seconds**

<a href="https://drive.google.com/open?id=17934YONVSXvd4uuJE0KwenEFks7fFYe4" ><button style="font-size: 12px; color: white; background-color: blue; height: 20px " > >>> Download detected video at speed "fast" and interval=5 </button></a>


**Video Length = 1min 24seconds, Detection Speed = "faster" , Minimum Percentage Probability = 30, Frame Detection Interval = 5, Detection Time = 3min 18seconds**

<a href="https://drive.google.com/open?id=1cs_06CuhXDvZp3fHJWFpam-31eclOhc-" ><button style="font-size: 12px; color: white; background-color: blue; height: 20px " > >>> Download detected video at speed "faster" and interval=5 </button></a>


**Video Length = 1min 24seconds, Detection Speed = "fastest" , Minimum Percentage Probability = 20 , Frame Detection Interval = 5, Detection Time = 2min 18seconds**
[![](../../data-images/video-3.jpg)](https://www.youtube.com/embed/S-jgBTQgbd4?rel=0)


**Video Length = 1min 24seconds, Detection Speed = "flash" , Minimum Percentage Probability = 10, Frame Detection Interval = 5, Detection Time = 1min 27seconds**

[Download detected video at speed "flash" and interval=5](https://drive.google.com/open?id=1aN2nnVoFjhUWpcz2Und3dsCT9OKrakM0)


###Video Detection Timeout
<div id="detectiontimeout"></div>

**ImageAI** now allows you to set a timeout in seconds for detection of objects in videos or camera live feed.
To set a timeout for your video detection code, all you need to do is specify the `detection_timeout` parameter in the `detectObjectsFromVideo()` function to the number of desired seconds. In the example code below, we set `detection_timeout` to 120 seconds (2 minutes). 

```python
from imageai.Detection import VideoObjectDetection
import os
import cv2

execution_path = os.getcwd()
camera = cv2.VideoCapture(0)

detector = VideoObjectDetection()
detector.setModelTypeAsRetinaNet()
detector.setModelPath(os.path.join(execution_path , "resnet50_coco_best_v2.0.1.h5"))
detector.loadModel()


video_path = detector.detectObjectsFromVideo(camera_input=camera,
                                             output_file_path=os.path.join(execution_path, "camera_detected_video"),
                                             frames_per_second=20,
                                             log_progress=True,
                                             minimum_percentage_probability=40,
                                             detection_timeout=120)
```


### Documentation
<div id="documentation" ></div>

We have provided full documentation for all **ImageAI** classes and functions in 3 major languages. Find links below: 

- Documentation - **English Version  [https://imageai.readthedocs.io](https://imageai.readthedocs.io)**
- Documentation - **Chinese Version  [https://imageai-cn.readthedocs.io](https://imageai-cn.readthedocs.io)**
- Documentation - **French Version  [https://imageai-fr.readthedocs.io](https://imageai-fr.readthedocs.io)**


================================================
FILE: imageai_tf_deprecated/Detection/YOLO/__init__.py
================================================


================================================
FILE: imageai_tf_deprecated/Detection/YOLO/utils.py
================================================
import tensorflow as tf
from keras import backend as K
import numpy as np
from PIL import Image
import cv2


def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):

    num_anchors = len(anchors)

    anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])

    grid_shape = K.shape(feats)[1:3]
    grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
        [1, grid_shape[1], 1, 1])
    grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
        [grid_shape[0], 1, 1, 1])
    grid = K.concatenate([grid_x, grid_y])
    grid = K.cast(grid, K.dtype(feats))

    feats = K.reshape(
        feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])


    box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats))
    box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats))
    box_confidence = K.sigmoid(feats[..., 4:5])
    box_class_probs = K.sigmoid(feats[..., 5:])

    if calc_loss == True:
        return grid, feats, box_xy, box_wh
    return box_xy, box_wh, box_confidence, box_class_probs


def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape):

    box_yx = box_xy[..., ::-1]
    box_hw = box_wh[..., ::-1]
    input_shape = K.cast(input_shape, K.dtype(box_yx))
    image_shape = K.cast(image_shape, K.dtype(box_yx))
    new_shape = K.round(image_shape * K.min(input_shape/image_shape))
    offset = (input_shape-new_shape)/2./input_shape
    scale = input_shape/new_shape
    box_yx = (box_yx - offset) * scale
    box_hw *= scale

    box_mins = box_yx - (box_hw / 2.)
    box_maxes = box_yx + (box_hw / 2.)
    boxes =  K.concatenate([
        box_mins[..., 0:1],
        box_mins[..., 1:2],
        box_maxes[..., 0:1],
        box_maxes[..., 1:2]
    ])


    boxes *= K.concatenate([image_shape, image_shape])
    return boxes


def yolo_boxes_and_scores(feats, anchors, num_classes, input_shape, image_shape):

    box_xy, box_wh, box_confidence, box_class_probs = yolo_head(feats,
        anchors, num_classes, input_shape)
    boxes = yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape)
    boxes = K.reshape(boxes, [-1, 4])
    box_scores = box_confidence * box_class_probs
    box_scores = K.reshape(box_scores, [-1, num_classes])
    return boxes, box_scores


def yolo_eval(yolo_outputs,
              anchors,
              num_classes,
              image_shape,
              max_boxes=20,
              score_threshold=.6,
              iou_threshold=.5):

    num_layers = len(yolo_outputs)
    anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]]
    input_shape = K.shape(yolo_outputs[0])[1:3] * 32
    boxes = []
    box_scores = []
    for l in range(num_layers):
        _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l],
            anchors[anchor_mask[l]], num_classes, input_shape, image_shape)
        boxes.append(_boxes)
        box_scores.append(_box_scores)
    boxes = K.concatenate(boxes, axis=0)
    box_scores = K.concatenate(box_scores, axis=0)

    mask = box_scores >= score_threshold
    max_boxes_tensor = K.constant(max_boxes, dtype='int32')
    boxes_ = []
    scores_ = []
    classes_ = []
    for c in range(num_classes):
        class_boxes = tf.boolean_mask(boxes, mask[:, c])
        class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c])
        nms_index = tf.image.non_max_suppression(
            class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold)
        class_boxes = K.gather(class_boxes, nms_index)
        class_box_scores = K.gather(class_box_scores, nms_index)
        classes = K.ones_like(class_box_scores, 'int32') * c
        boxes_.append(class_boxes)
        scores_.append(class_box_scores)
        classes_.append(classes)
    boxes_ = K.concatenate(boxes_, axis=0)
    scores_ = K.concatenate(scores_, axis=0)
    classes_ = K.concatenate(classes_, axis=0)

    return boxes_, scores_, classes_


def letterbox_image(image, size):
    iw, ih = image.size
    w, h = size
    scale = min(w/iw, h/ih)
    nw = int(iw*scale)
    nh = int(ih*scale)

    image = image.resize((nw,nh), Image.BICUBIC)
    new_image = Image.new('RGB', size, (128,128,128))
    new_image.paste(image, ((w-nw)//2, (h-nh)//2))
    return new_image


def correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w):
    if (float(net_w)/image_w) < (float(net_h)/image_h):
        new_w = net_w
        new_h = (image_h*net_w)/image_w
    else:
        new_h = net_w
        new_w = (image_w*net_h)/image_h
        
    for i in range(len(boxes)):
        x_offset, x_scale = (net_w - new_w)/2./net_w, float(new_w)/net_w
        y_offset, y_scale = (net_h - new_h)/2./net_h, float(new_h)/net_h
        
        boxes[i].xmin = int((boxes[i].xmin - x_offset) / x_scale * image_w)
        boxes[i].xmax = int((boxes[i].xmax - x_offset) / x_scale * image_w)
        boxes[i].ymin = int((boxes[i].ymin - y_offset) / y_scale * image_h)
        boxes[i].ymax = int((boxes[i].ymax - y_offset) / y_scale * image_h)
        

class BoundBox:
    def __init__(self, xmin, ymin, xmax, ymax, objness = None, classes = None):
        self.xmin = xmin
        self.ymin = ymin
        self.xmax = xmax
        self.ymax = ymax
        
        self.objness = objness
        self.classes = classes

        self.label = -1
        self.score = -1

    def get_label(self):
        if self.label == -1:
            self.label = np.argmax(self.classes)
        
        return self.label
    
    def get_score(self):
        if self.score == -1:
            self.score = self.classes[self.get_label()]
            
        return self.score


def _interval_overlap(interval_a, interval_b):
    x1, x2 = interval_a
    x3, x4 = interval_b

    if x3 < x1:
        if x4 < x1:
            return 0
        else:
            return min(x2,x4) - x1
    else:
        if x2 < x3:
             return 0
        else:
            return min(x2,x4) - x3          

def _sigmoid(x):
    return 1. / (1. + np.exp(-x))

def bbox_iou(box1, box2):
    intersect_w = _interval_overlap([box1.xmin, box1.xmax], [box2.xmin, box2.xmax])
    intersect_h = _interval_overlap([box1.ymin, box1.ymax], [box2.ymin, box2.ymax])
    
    intersect = intersect_w * intersect_h

    w1, h1 = box1.xmax-box1.xmin, box1.ymax-box1.ymin
    w2, h2 = box2.xmax-box2.xmin, box2.ymax-box2.ymin
    
    union = w1*h1 + w2*h2 - intersect
    
    return float(intersect) / union


def do_nms(boxes, nms_thresh):
    if len(boxes) > 0:
        nb_class = len(boxes[0].classes)
    else:
        return
        
    for c in range(nb_class):
        sorted_indices = np.argsort([-box.classes[c] for box in boxes])

        for i in range(len(sorted_indices)):
            index_i = sorted_indices[i]

            if boxes[index_i].classes[c] == 0: continue

            for j in range(i+1, len(sorted_indices)):
                index_j = sorted_indices[j]

                if bbox_iou(boxes[index_i], boxes[index_j]) >= nms_thresh:
                    boxes[index_j].classes[c] = 0

def decode_netout(netout, anchors, obj_thresh, nms_thresh, net_h, net_w):
    grid_h, grid_w = netout.shape[:2]
    nb_box = 3
    netout = netout.reshape((grid_h, grid_w, nb_box, -1))
    nb_class = netout.shape[-1] - 5

    boxes = []

    netout[..., :2]  = _sigmoid(netout[..., :2])
    netout[..., 4:]  = _sigmoid(netout[..., 4:])
    netout[..., 5:]  = netout[..., 4][..., np.newaxis] * netout[..., 5:]
    netout[..., 5:] *= netout[..., 5:] > obj_thresh

    for i in range(grid_h*grid_w):
        row = i / grid_w
        col = i % grid_w
        
        for b in range(nb_box):
            # 4th element is objectness score
            objectness = netout[int(row)][int(col)][b][4]
            #objectness = netout[..., :4]
            
            if(objectness.all() <= obj_thresh): continue
            
            # first 4 elements are x, y, w, and h
            x, y, w, h = netout[int(row)][int(col)][b][:4]

            x = (col + x) / grid_w # center position, unit: image width
            y = (row + y) / grid_h # center position, unit: image height
            w = anchors[2 * b + 0] * np.exp(w) / net_w # unit: image width
            h = anchors[2 * b + 1] * np.exp(h) / net_h # unit: image height  
            
            # last elements are class probabilities
            classes = netout[int(row)][col][b][5:]
            
            box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, objectness, classes)
            #box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, None, classes)

            boxes.append(box)

    return boxes

def preprocess_input(image, input_shape):
    net_h, net_w = input_shape
    new_h, new_w, _ = image.shape

    # determine the new size of the image
    if (float(net_w)/new_w) < (float(net_h)/new_h):
        new_h = int((new_h * net_w)/new_w)
        new_w = net_w
    else:
        new_w = int((new_w * net_h)/new_h)
        new_h = net_h

    # resize the image to the new size
    resized = cv2.resize(image[:,:,::-1]/255., (int(new_w), int(new_h)))

    # embed the image into the standard letter box
    new_image = np.ones((net_h, net_w, 3)) * 0.5
    new_image[int((net_h-new_h)//2):int((net_h+new_h)//2), int((net_w-new_w)//2):int((net_w+new_w)//2), :] = resized
    new_image = np.expand_dims(new_image, 0)

    return new_image

def retrieve_yolo_detections(yolo_result, anchors, min_probability, nms_thresh, image_input_size, image_size, labels_dict ):

    boxes = []

    for i in range(len(yolo_result)):
        # decode the output of the network
        boxes += decode_netout(yolo_result[i][0], 
        anchors[i], 
        min_probability, 
        nms_thresh, 
        image_input_size[0], 
        image_input_size[1])

    # correct the sizes of the bounding boxes
    correct_yolo_boxes(boxes, image_size[1], image_size[0], image_input_size[0], image_input_size[1])

    # suppress non-maximal boxes
    do_nms(boxes, nms_thresh)

    detections = list()
    for box in boxes:
        label = -1
        
        for i in range(len(labels_dict.keys())):
            if box.classes[i] > min_probability:
                label = labels_dict[i]
                

                percentage_probability = box.classes[i] * 100
                xmin = box.xmin
                ymin = box.ymin
                xmax = box.xmax
                ymax = box.ymax
                
                if xmin < 0:
                    xmin = 0
                
                if ymin < 0:
                    ymin = 0

                detection = dict()
                detection["name"] = label
                detection["percentage_probability"] = percentage_probability
                detection["box_points"] = [ xmin, ymin, xmax, ymax]

                detections.append(detection)
    
    return detections


def draw_boxes(image, box_points, draw_box, label, percentage_probability, color):
    
    xmin, ymin, xmax, ymax = box_points

    if draw_box is True:
        cv2.rectangle(image, (xmin,ymin), (xmax,ymax), color, 2)

    if label is not None:
        if percentage_probability is None:
            label = "{}".format(label)
        else:
            label = "{} {:.2f}%".format(label, percentage_probability)
    elif percentage_probability is not None:
        label = "{:.2f}".format(percentage_probability)
    
    if label is not None or percentage_probability is not None:
        cv2.putText(image, label, (xmin, ymin - 13), cv2.FONT_HERSHEY_SIMPLEX, 1e-3 * image.shape[0], (255, 0, 0), 2)
        cv2.putText(image, label, (xmin, ymin - 13), cv2.FONT_HERSHEY_SIMPLEX, 1e-3 * image.shape[0], (255, 255, 255), 1)
        
    return image 

================================================
FILE: imageai_tf_deprecated/Detection/YOLO/yolov3.py
================================================
from tensorflow.keras.layers import Conv2D, MaxPool2D, Add, ZeroPadding2D, UpSampling2D, Concatenate, LeakyReLU, Lambda
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.regularizers import l2
from tensorflow.keras.models import Model
from tensorflow.keras import Input
from tensorflow.keras.layers import add, concatenate
from tensorflow.keras.layers import Layer
import tensorflow as tf


class YoloLayer(Layer):
    def __init__(self, anchors, max_grid, batch_size, warmup_batches, ignore_thresh, 
                    grid_scale, obj_scale, noobj_scale, xywh_scale, class_scale, 
                    **kwargs):
        # make the model settings persistent
        self.ignore_thresh  = ignore_thresh
        self.warmup_batches = warmup_batches
        self.anchors        = tf.constant(anchors, dtype='float', shape=[1,1,1,3,2])
        self.grid_scale     = grid_scale
        self.obj_scale      = obj_scale
        self.noobj_scale    = noobj_scale
        self.xywh_scale     = xywh_scale
        self.class_scale    = class_scale        

        # make a persistent mesh grid
        max_grid_h, max_grid_w = max_grid

        cell_x = tf.cast(tf.reshape(tf.tile(tf.range(max_grid_w), [max_grid_h]), (1, max_grid_h, max_grid_w, 1, 1)), dtype=tf.float32)
        cell_y = tf.transpose(cell_x, (0,2,1,3,4))
        self.cell_grid = tf.tile(tf.concat([cell_x,cell_y],-1), [batch_size, 1, 1, 3, 1])

        super(YoloLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        super(YoloLayer, self).build(input_shape)  # Be sure to call this somewhere!

    def call(self, x):
        input_image, y_pred, y_true, true_boxes = x

        # adjust the shape of the y_predict [batch, grid_h, grid_w, 3, 4+1+nb_class]
        y_pred = tf.reshape(y_pred, tf.concat([tf.shape(y_pred)[:3], tf.constant([3, -1])], axis=0))
        
        # initialize the masks
        object_mask     = tf.expand_dims(y_true[..., 4], 4)

        # the variable to keep track of number of batches processed
        batch_seen = tf.Variable(0.)        

        # compute grid factor and net factor
        grid_h      = tf.shape(y_true)[1]
        grid_w      = tf.shape(y_true)[2]
        grid_factor = tf.reshape(tf.cast([grid_w, grid_h], tf.float32), [1,1,1,1,2])

        net_h       = tf.shape(input_image)[1]
        net_w       = tf.shape(input_image)[2]            
        net_factor  = tf.reshape(tf.cast([net_w, net_h], tf.float32), [1,1,1,1,2])
        
        """
        Adjust prediction
        """
        pred_box_xy    = (self.cell_grid[:,:grid_h,:grid_w,:,:] + tf.sigmoid(y_pred[..., :2]))  # sigma(t_xy) + c_xy
        pred_box_wh    = y_pred[..., 2:4]                                                       # t_wh
        pred_box_conf  = tf.expand_dims(tf.sigmoid(y_pred[..., 4]), 4)                          # adjust confidence
        pred_box_class = y_pred[..., 5:]                                                        # adjust class probabilities      

        """
        Adjust ground truth
        """
        true_box_xy    = y_true[..., 0:2] # (sigma(t_xy) + c_xy)
        true_box_wh    = y_true[..., 2:4] # t_wh
        true_box_conf  = tf.expand_dims(y_true[..., 4], 4)
        true_box_class = tf.argmax(y_true[..., 5:], -1)         

        """
        Compare each predicted box to all true boxes
        """        
        # initially, drag all objectness of all boxes to 0
        conf_delta  = pred_box_conf - 0 

        # then, ignore the boxes which have good overlap with some true box
        true_xy = true_boxes[..., 0:2] / grid_factor
        true_wh = true_boxes[..., 2:4] / net_factor
        
        true_wh_half = true_wh / 2.
        true_mins    = true_xy - true_wh_half
        true_maxes   = true_xy + true_wh_half
        
        pred_xy = tf.expand_dims(pred_box_xy / grid_factor, 4)
        pred_wh = tf.expand_dims(tf.exp(pred_box_wh) * self.anchors / net_factor, 4)
        
        pred_wh_half = pred_wh / 2.
        pred_mins    = pred_xy - pred_wh_half
        pred_maxes   = pred_xy + pred_wh_half    

        intersect_mins  = tf.maximum(pred_mins,  true_mins)
        intersect_maxes = tf.minimum(pred_maxes, true_maxes)

        intersect_wh    = tf.maximum(intersect_maxes - intersect_mins, 0.)
        intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
        
        true_areas = true_wh[..., 0] * true_wh[..., 1]
        pred_areas = pred_wh[..., 0] * pred_wh[..., 1]

        union_areas = pred_areas + true_areas - intersect_areas
        iou_scores  = tf.truediv(intersect_areas, union_areas)

        best_ious   = tf.reduce_max(iou_scores, axis=4)        
        conf_delta *= tf.expand_dims(tf.cast((best_ious < self.ignore_thresh), dtype=tf.float32), 4)

        """
        Compute some online statistics
        """            
        true_xy = true_box_xy / grid_factor
        true_wh = tf.exp(true_box_wh) * self.anchors / net_factor

        true_wh_half = true_wh / 2.
        true_mins    = true_xy - true_wh_half
        true_maxes   = true_xy + true_wh_half

        pred_xy = pred_box_xy / grid_factor
        pred_wh = tf.exp(pred_box_wh) * self.anchors / net_factor 
        
        pred_wh_half = pred_wh / 2.
        pred_mins    = pred_xy - pred_wh_half
        pred_maxes   = pred_xy + pred_wh_half      

        intersect_mins  = tf.maximum(pred_mins,  true_mins)
        intersect_maxes = tf.minimum(pred_maxes, true_maxes)
        intersect_wh    = tf.maximum(intersect_maxes - intersect_mins, 0.)
        intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
        
        true_areas = true_wh[..., 0] * true_wh[..., 1]
        pred_areas = pred_wh[..., 0] * pred_wh[..., 1]

        union_areas = pred_areas + true_areas - intersect_areas
        iou_scores  = tf.truediv(intersect_areas, union_areas)
        iou_scores  = object_mask * tf.expand_dims(iou_scores, 4)
        
        count       = tf.reduce_sum(object_mask)
        count_noobj = tf.reduce_sum(1 - object_mask)
        detect_mask = tf.cast((pred_box_conf*object_mask >= 0.5), dtype=tf.float32)
        class_mask  = tf.expand_dims(tf.cast(tf.equal(tf.argmax(pred_box_class, -1), true_box_class), dtype=tf.float32), 4)
        recall50    = tf.reduce_sum(tf.cast((iou_scores >= 0.5), dtype=tf.float32) * detect_mask  * class_mask) / (count + 1e-3)
        recall75    = tf.reduce_sum(tf.cast((iou_scores >= 0.75), dtype=tf.float32) * detect_mask  * class_mask) / (count + 1e-3)    
        avg_iou     = tf.reduce_sum(iou_scores) / (count + 1e-3)
        avg_obj     = tf.reduce_sum(pred_box_conf  * object_mask)  / (count + 1e-3)
        avg_noobj   = tf.reduce_sum(pred_box_conf  * (1-object_mask))  / (count_noobj + 1e-3)
        avg_cat     = tf.reduce_sum(object_mask * class_mask) / (count + 1e-3) 

        """
        Warm-up training
        """
        batch_seen = tf.compat.v1.assign_add(batch_seen, 1.)
        
        true_box_xy, true_box_wh, xywh_mask = tf.cond(tf.less(batch_seen, self.warmup_batches+1), 
                              lambda: [true_box_xy + (0.5 + self.cell_grid[:,:grid_h,:grid_w,:,:]) * (1-object_mask), 
                                       true_box_wh + tf.zeros_like(true_box_wh) * (1-object_mask), 
                                       tf.ones_like(object_mask)],
                              lambda: [true_box_xy, 
                                       true_box_wh,
                                       object_mask])

        """
        Compare each true box to all anchor boxes
        """      
        wh_scale = tf.exp(true_box_wh) * self.anchors / net_factor
        wh_scale = tf.expand_dims(2 - wh_scale[..., 0] * wh_scale[..., 1], axis=4) # the smaller the box, the bigger the scale

        xy_delta    = xywh_mask   * (pred_box_xy-true_box_xy) * wh_scale * self.xywh_scale
        wh_delta    = xywh_mask   * (pred_box_wh-true_box_wh) * wh_scale * self.xywh_scale
        conf_delta  = object_mask * (pred_box_conf-true_box_conf) * self.obj_scale + (1-object_mask) * conf_delta * self.noobj_scale
        class_delta = object_mask * \
                      tf.expand_dims(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class), 4) * \
                      self.class_scale

        loss_xy    = tf.reduce_sum(tf.square(xy_delta),       list(range(1,5)))
        loss_wh    = tf.reduce_sum(tf.square(wh_delta),       list(range(1,5)))
        loss_conf  = tf.reduce_sum(tf.square(conf_delta),     list(range(1,5)))
        loss_class = tf.reduce_sum(class_delta,               list(range(1,5)))

        loss = loss_xy + loss_wh + loss_conf + loss_class


        return loss*self.grid_scale

    def compute_output_shape(self, input_shape):
        return [(None, 1)]


def dummy_loss(y_true, y_pred):
    return tf.sqrt(tf.reduce_sum(y_pred))

def NetworkConv2D_BN_Leaky(input, channels, kernel_size, kernel_regularizer = l2(5e-4), strides=(1,1), padding="same", use_bias=False):

    network = Conv2D( filters=channels, kernel_size=kernel_size, strides=strides, padding=padding, kernel_regularizer=kernel_regularizer, use_bias=use_bias)(input)
    network = BatchNormalization()(network)
    network = LeakyReLU(alpha=0.1)(network)
    return network

def residual_block(input, channels, num_blocks):
    network = ZeroPadding2D(((1,0), (1,0)))(input)
    network = NetworkConv2D_BN_Leaky(input=network,channels=channels, kernel_size=(3,3), strides=(2,2), padding="valid")

    for blocks in range(num_blocks):
        network_1 = NetworkConv2D_BN_Leaky(input=network, channels= channels // 2, kernel_size=(1,1))
        network_1 = NetworkConv2D_BN_Leaky(input=network_1,channels= channels, kernel_size=(3,3))

        network = Add()([network, network_1])
    return network

def darknet(input):
    network = NetworkConv2D_BN_Leaky(input=input, channels=32, kernel_size=(3,3))
    network = residual_block(input=network, channels=64, num_blocks=1)
    network = residual_block(input=network, channels=128, num_blocks=2)
    network = residual_block(input=network, channels=256, num_blocks=8)
    network = residual_block(input=network, channels=512, num_blocks=8)
    network = residual_block(input=network, channels=1024, num_blocks=4)


    return network

def last_layers(input, channels_in, channels_out, layer_name=""):


    network = NetworkConv2D_BN_Leaky( input=input, channels=channels_in, kernel_size=(1,1))
    network = NetworkConv2D_BN_Leaky(input=network, channels= (channels_in * 2) , kernel_size=(3, 3))
    network = NetworkConv2D_BN_Leaky(input=network, channels=channels_in, kernel_size=(1, 1))
    network = NetworkConv2D_BN_Leaky(input=network, channels=(channels_in * 2), kernel_size=(3, 3))
    network = NetworkConv2D_BN_Leaky(input=network, channels=channels_in, kernel_size=(1, 1))

    network_1 = NetworkConv2D_BN_Leaky(input=network, channels=(channels_in * 2), kernel_size=(3, 3))
    network_1 = Conv2D(filters=channels_out, kernel_size=(1,1), name=layer_name)(network_1)

    return  network, network_1

def yolov3_base(input, num_anchors, num_classes):
    
    darknet_network = Model(input, darknet(input))

    network, network_1 = last_layers(darknet_network.output, 512, num_anchors * (num_classes + 5), layer_name="last1")

    network = NetworkConv2D_BN_Leaky( input=network, channels=256, kernel_size=(1,1))
    network = UpSampling2D(2)(network)
    network = Concatenate()([network, darknet_network.layers[152].output])

    network, network_2 = last_layers(network,  256,  num_anchors * (num_classes + 5), layer_name="last2")

    network = NetworkConv2D_BN_Leaky(input=network, channels=128, kernel_size=(1, 1))
    network = UpSampling2D(2)(network)
    network = Concatenate()([network, darknet_network.layers[92].output])

    network, network_3 = last_layers(network, 128, num_anchors * (num_classes + 5), layer_name="last3")

    return input, network_1, network_2, network_3

def yolov3_main(input, num_anchors, num_classes):

    input, network_1, network_2, network_3 = yolov3_base(input, num_anchors, num_classes)

    return Model(input, [network_1, network_2, network_3])


def yolov3_train(num_classes,
                anchors,
                max_box_per_image, 
                max_grid, 
                batch_size, 
                warmup_batches,
                ignore_thresh,
                grid_scales,
                obj_scale,
                noobj_scale,
                xywh_scale,
                class_scale):

    input_image = Input(shape=(None, None, 3)) # net_h, net_w, 3
    true_boxes  = Input(shape=(1, 1, 1, max_box_per_image, 4))
    true_yolo_1 = Input(shape=(None, None, len(anchors)//6, 4+1+num_classes)) # grid_h, grid_w, nb_anchor, 5+nb_class
    true_yolo_2 = Input(shape=(None, None, len(anchors)//6, 4+1+num_classes)) # grid_h, grid_w, nb_anchor, 5+nb_class
    true_yolo_3 = Input(shape=(None, None, len(anchors)//6, 4+1+num_classes)) # grid_h, grid_w, nb_anchor, 5+nb_class
    
    
    _ , network_1, network_2, network_3 = yolov3_base(input_image, len(anchors)//6, num_classes)
    
    loss_yolo_1 = YoloLayer(anchors[12:], 
                            [1*num for num in max_grid], 
                            batch_size, 
                            warmup_batches, 
                            ignore_thresh, 
                            grid_scales[0],
                            obj_scale,
                            noobj_scale,
                            xywh_scale,
                            class_scale)([input_image, network_1, true_yolo_1, true_boxes])

    loss_yolo_2 = YoloLayer(anchors[6:12], 
                            [2*num for num in max_grid], 
                            batch_size, 
                            warmup_batches, 
                            ignore_thresh, 
                            grid_scales[1],
                            obj_scale,
                            noobj_scale,
                            xywh_scale,
                            class_scale)([input_image, network_2, true_yolo_2, true_boxes])

    loss_yolo_3 = YoloLayer(anchors[:6], 
                            [4*num for num in max_grid], 
                            batch_size, 
                            warmup_batches, 
                            ignore_thresh, 
                            grid_scales[2],
                            obj_scale,
                            noobj_scale,
                            xywh_scale,
                            class_scale)([input_image, network_3, true_yolo_3, true_boxes]) 

    train_model = Model([input_image, true_boxes, true_yolo_1, true_yolo_2, true_yolo_3], [loss_yolo_1, loss_yolo_2, loss_yolo_3])
    infer_model = Model(input_image, [network_1, network_2, network_3])

    return [train_model, infer_model]


def tiny_yolov3_main(input, num_anchors, num_classes):

    network_1 = NetworkConv2D_BN_Leaky(input=input, channels=16, kernel_size=(3,3) )
    network_1 = MaxPool2D(pool_size=(2,2), strides=(2,2), padding="same")(network_1)
    network_1 = NetworkConv2D_BN_Leaky(input=network_1, channels=32, kernel_size=(3, 3))
    network_1 = MaxPool2D(pool_size=(2, 2), strides=(2, 2), padding="same")(network_1)
    network_1 = NetworkConv2D_BN_Leaky(input=network_1, channels=64, kernel_size=(3, 3))
    network_1 = MaxPool2D(pool_size=(2, 2), strides=(2, 2), padding="same")(network_1)
    network_1 = NetworkConv2D_BN_Leaky(input=network_1, channels=128, kernel_size=(3, 3))
    network_1 = MaxPool2D(pool_size=(2, 2), strides=(2, 2), padding="same")(network_1)
    network_1 = NetworkConv2D_BN_Leaky(input=network_1, channels=256, kernel_size=(3, 3))

    network_2 = MaxPool2D(pool_size=(2, 2), strides=(2, 2), padding="same")(network_1)
    network_2 = NetworkConv2D_BN_Leaky(input=network_2, channels=512, kernel_size=(3, 3))
    network_2 = MaxPool2D(pool_size=(2, 2), strides=(1, 1), padding="same")(network_2)
    network_2 = NetworkConv2D_BN_Leaky(input=network_2, channels=1024, kernel_size=(3, 3))
    network_2 = NetworkConv2D_BN_Leaky(input=network_2, channels=256, kernel_size=(1, 1))

    network_3 = NetworkConv2D_BN_Leaky(input=network_2, channels=512, kernel_size=(3, 3))
    network_3 = Conv2D(num_anchors * (num_classes + 5),  kernel_size=(1,1))(network_3)

    network_2 = NetworkConv2D_BN_Leaky(input=network_2, channels=128, kernel_size=(1, 1))
    network_2 = UpSampling2D(2)(network_2)

    network_4 = Concatenate()([network_2, network_1])
    network_4 = NetworkConv2D_BN_Leaky(input=network_4, channels=256, kernel_size=(3, 3))
    network_4 = Conv2D(num_anchors * (num_classes + 5), kernel_size=(1,1))(network_4)

    return Model(input, [network_3, network_4])

def dummy_loss(y_true, y_pred):
    return tf.sqrt(tf.reduce_sum(y_pred))

================================================
FILE: imageai_tf_deprecated/Detection/__init__.py
================================================
import cv2
from imageai.Detection.keras_retinanet import models as retinanet_models
from imageai.Detection.keras_retinanet.utils.image import read_image_bgr, preprocess_image, resize_image
from imageai.Detection.keras_retinanet.utils.visualization import draw_box, draw_caption
import matplotlib.pyplot as plt
import matplotlib.image as pltimage
import numpy as np
import tensorflow as tf
import os
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Input
from PIL import Image
import colorsys
import warnings

from imageai.Detection.YOLO.yolov3 import tiny_yolov3_main, yolov3_main
from imageai.Detection.YOLO.utils import letterbox_image, yolo_eval, preprocess_input, retrieve_yolo_detections, draw_boxes


class ObjectDetection:
    """
    This is the object detection class for images in the ImageAI library. It provides support for RetinaNet
        , YOLOv3 and TinyYOLOv3 object detection networks . After instantiating this class, you can set it's properties and
    make object detections using it's pre-defined functions.

    The following functions are required to be called before object detection can be made
    * setModelPath()
    * At least of of the following and it must correspond to the model set in the setModelPath()
    [setModelTypeAsRetinaNet(), setModelTypeAsYOLOv3(), setModelTypeAsTinyYOLOv3()]
    * loadModel() [This must be called once only before performing object detection]

    Once the above functions have been called, you can call the detectObjectsFromImage() function of
    the object detection instance object at anytime to obtain observable objects in any image.
    """

    def __init__(self):
        self.__modelType = ""
        self.modelPath = ""
        self.__modelPathAdded = False
        self.__modelLoaded = False
        self.__model_collection = []

        # Instance variables for RetinaNet Model
        self.__input_image_min = 1333
        self.__input_image_max = 800

        self.numbers_to_names = {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus',
                                 6: 'train',
                                 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign',
                                 12: 'parking meter',
                                 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow',
                                 20: 'elephant',
                                 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag',
                                 27: 'tie',
                                 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball',
                                 33: 'kite',
                                 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard',
                                 38: 'tennis racket',
                                 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon',
                                 45: 'bowl',
                                 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot',
                                 52: 'hot dog',
                                 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant',
                                 59: 'bed',
                                 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote',
                                 66: 'keyboard',
                                 67: 'cell phone', 68: 'microwave', 69: 'oven', 70: 'toaster', 71: 'sink',
                                 72: 'refrigerator',
                                 73: 'book', 74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear',
                                 78: 'hair dryer',
                                 79: 'toothbrush'}

        # Unique instance variables for YOLOv3 and TinyYOLOv3 model
        self.__yolo_iou = 0.45
        self.__yolo_score = 0.1
        self.__nms_thresh = 0.45
        self.__yolo_anchors = [[116,90,  156,198,  373,326],  [30,61, 62,45,  59,119], [10,13,  16,30,  33,23]]
        self.__yolo_model_image_size = (416, 416)
        self.__yolo_boxes, self.__yolo_scores, self.__yolo_classes = "", "", ""
        self.__tiny_yolo_anchors = [[81, 82, 135, 169, 344, 319], [10, 14, 23, 27, 37, 58]]
        self.__box_color = (112, 19, 24)
        

    def setModelTypeAsRetinaNet(self):
        """
        'setModelTypeAsRetinaNet()' is used to set the model type to the RetinaNet model
        for the video object detection instance instance object .
        :return:
        """
        self.__modelType = "retinanet"

    def setModelTypeAsYOLOv3(self):
        """
                'setModelTypeAsYOLOv3()' is used to set the model type to the YOLOv3 model
                for the video object detection instance instance object .
                :return:
                """

        self.__modelType = "yolov3"

    def setModelTypeAsTinyYOLOv3(self):
        """
                        'setModelTypeAsTinyYOLOv3()' is used to set the model type to the TinyYOLOv3 model
                        for the video object detection instance instance object .
                        :return:
                        """

        self.__modelType = "tinyyolov3"

    def setModelPath(self, model_path):
        """
         'setModelPath()' function is required and is used to set the file path to a RetinaNet
          object detection model trained on the COCO dataset.
          :param model_path:
          :return:
        """

        if (self.__modelPathAdded == False):
            self.modelPath = model_path
            self.__modelPathAdded = True

    def loadModel(self, detection_speed="normal"):
        """
                'loadModel()' function is required and is used to load the model structure into the program from the file path defined
                in the setModelPath() function. This function receives an optional value which is "detection_speed".
                The value is used to reduce the time it takes to detect objects in an image, down to about a 10% of the normal time, with
                 with just slight reduction in the number of objects detected.


                * prediction_speed (optional); Acceptable values are "normal", "fast", "faster", "fastest" and "flash"

                :param detection_speed:
                :return:
        """

        if (self.__modelType == "retinanet"):
            if (detection_speed == "normal"):
                self.__input_image_min = 800
                self.__input_image_max = 1333
            elif (detection_speed == "fast"):
                self.__input_image_min = 400
                self.__input_image_max = 700
            elif (detection_speed == "faster"):
                self.__input_image_min = 300
                self.__input_image_max = 500
            elif (detection_speed == "fastest"):
                self.__input_image_min = 200
                self.__input_image_max = 350
            elif (detection_speed == "flash"):
                self.__input_image_min = 100
                self.__input_image_max = 250
        elif (self.__modelType == "yolov3"):
            if (detection_speed == "normal"):
                self.__yolo_model_image_size = (416, 416)
            elif (detection_speed == "fast"):
                self.__yolo_model_image_size = (320, 320)
            elif (detection_speed == "faster"):
                self.__yolo_model_image_size = (208, 208)
            elif (detection_speed == "fastest"):
                self.__yolo_model_image_size = (128, 128)
            elif (detection_speed == "flash"):
                self.__yolo_model_image_size = (96, 96)

        elif (self.__modelType == "tinyyolov3"):
            if (detection_speed == "normal"):
                self.__yolo_model_image_size = (832, 832)
            elif (detection_speed == "fast"):
                self.__yolo_model_image_size = (576, 576)
            elif (detection_speed == "faster"):
                self.__yolo_model_image_size = (416, 416)
            elif (detection_speed == "fastest"):
                self.__yolo_model_image_size = (320, 320)
            elif (detection_speed == "flash"):
                self.__yolo_model_image_size = (272, 272)

        if (self.__modelLoaded == False):
            if (self.__modelType == ""):
                raise ValueError("You must set a valid model type before loading the model.")
            elif (self.__modelType == "retinanet"):
                model = retinanet_models.load_model(self.modelPath, backbone_name='resnet50')
                self.__model_collection.append(model)
                self.__modelLoaded = True
            elif (self.__modelType == "yolov3" or self.__modelType == "tinyyolov3"):

                input_image = Input(shape=(None, None, 3))

                if self.__modelType == "yolov3":
                    model = yolov3_main(input_image, len(self.__yolo_anchors),
                                    len(self.numbers_to_names.keys()))
                else:
                    model = tiny_yolov3_main(input_image, 3,
                                 len(self.numbers_to_names.keys()))

                model.load_weights(self.modelPath)
               
                self.__model_collection.append(model)
                self.__modelLoaded = True

    def detectObjectsFromImage(self, input_image="", output_image_path="", input_type="file", output_type="file",
                               extract_detected_objects=False, minimum_percentage_probability=50,
                               display_percentage_probability=True, display_object_name=True,
                               display_box=True, thread_safe=False, custom_objects=None):
        """
            'detectObjectsFromImage()' function is used to detect objects observable in the given image path:
                    * input_image , which can be a filepath, image numpy array or image file stream
                    * output_image_path (only if output_type = file) , file path to the output image that will contain the detection boxes and label, if output_type="file"
                    * input_type (optional) , file path/numpy array/image file stream of the image. Acceptable values are "file", "array" and "stream"
                    * output_type (optional) , file path/numpy array/image file stream of the image. Acceptable values are "file" and "array"
                    * extract_detected_objects (optional) , option to save each object detected individually as an image and return an array of the objects' image path.
                    * minimum_percentage_probability (optional, 50 by default) , option to set the minimum percentage probability for nominating a detected object for output.
                    * display_percentage_probability (optional, True by default), option to show or hide the percentage probability of each object in the saved/returned detected image
                    * display_display_object_name (optional, True by default), option to show or hide the name of each object in the saved/returned detected image
                    * thread_safe (optional, False by default), enforce the loaded detection model works across all threads if set to true, made possible by forcing all Tensorflow inference to run on the default graph.


            The values returned by this function depends on the parameters parsed. The possible values returnable
            are stated as below
            - If extract_detected_objects = False or at its default value and output_type = 'file' or
                at its default value, you must parse in the 'output_image_path' as a string to the path you want
                the detected image to be saved. Then the function will return:
                1. an array of dictionaries, with each dictionary corresponding to the objects
                    detected in the image. Each dictionary contains the following property:
                    * name (string)
                    * percentage_probability (float)
                    * box_points (list of x1,y1,x2 and y2 coordinates)

            - If extract_detected_objects = False or at its default value and output_type = 'array' ,
              Then the function will return:

                1. a numpy array of the detected image
                2. an array of dictionaries, with each dictionary corresponding to the objects
                    detected in the image. Each dictionary contains the following property:
                    * name (string)
                    * percentage_probability (float)
                    * box_points (list of x1,y1,x2 and y2 coordinates)

            - If extract_detected_objects = True and output_type = 'file' or
                at its default value, you must parse in the 'output_image_path' as a string to the path you want
                the detected image to be saved. Then the function will return:
                1. an array of dictionaries, with each dictionary corresponding to the objects
                    detected in the image. Each dictionary contains the following property:
                    * name (string)
                    * percentage_probability (float)
                    * box_points (list of x1,y1,x2 and y2 coordinates)
                2. an array of string paths to the image of each object extracted from the image

            - If extract_detected_objects = True and output_type = 'array', the the function will return:
                1. a numpy array of the detected image
                2. an array of dictionaries, with each dictionary corresponding to the objects
                    detected in the image. Each dictionary contains the following property:
                    * name (string)
                    * percentage_probability (float)
                    * box_points (list of x1,y1,x2 and y2 coordinates)
                3. an array of numpy arrays of each object detected in the image


            :param input_image:
            :param output_image_path:
            :param input_type:
            :param output_type:
            :param extract_detected_objects:
            :param minimum_percentage_probability:
            :param display_percentage_probability:
            :param display_object_name:
            :param thread_safe:
            :return image_frame:
            :return output_objects_array:
            :return detected_objects_image_array:
        """

        if (self.__modelLoaded == False):
            raise ValueError("You must call the loadModel() function before making object detection.")
        elif (self.__modelLoaded == True):
            try:

                model_detections = list()
                detections = list()
                image_copy = None

                detected_objects_image_array = []
                min_probability = minimum_percentage_probability / 100

                if (input_type == "file"):
                    input_image = cv2.imread(input_image)
                elif (input_type == "array"):
                    input_image = np.array(input_image)

                detected_copy = input_image
                image_copy = input_image

                if (self.__modelType == "yolov3" or self.__modelType == "tinyyolov3"):

                    image_h, image_w, _ = detected_copy.shape
                    detected_copy = preprocess_input(detected_copy, self.__yolo_model_image_size)

                    model = self.__model_collection[0]
                    yolo_result = model.predict(detected_copy)

                    model_detections = retrieve_yolo_detections(yolo_result,
                            self.__yolo_anchors,
                            min_probability,
                            self.__nms_thresh,
                            self.__yolo_model_image_size,
                            (image_w, image_h),
                            self.numbers_to_names)
                            
                elif (self.__modelType == "retinanet"):
                    detected_copy = preprocess_image(detected_copy)
                    detected_copy, scale = resize_image(detected_copy)

                    model = self.__model_collection[0]
                    boxes, scores, labels = model.predict_on_batch(np.expand_dims(detected_copy, axis=0))

                    
                    boxes /= scale

                    for box, score, label in zip(boxes[0], scores[0], labels[0]):
                        # scores are sorted so we can break
                        if score < min_probability:
                            break

                        detection_dict = dict()
                        detection_dict["name"] = self.numbers_to_names[label]
                        detection_dict["percentage_probability"] = score * 100
                        detection_dict["box_points"] = box.astype(int).tolist()
                        model_detections.append(detection_dict)

                counting = 0
                objects_dir = output_image_path + "-objects"

                for detection in model_detections:
                    counting += 1
                    label = detection["name"]
                    percentage_probability = detection["percentage_probability"]
                    box_points = detection["box_points"]

                    if (custom_objects is not None):
                        if (custom_objects[label] != "valid"):
                            continue
                    
                    detections.append(detection)

                    if display_object_name == False:
                        label = None

                    if display_percentage_probability == False:
                        percentage_probability = None

                    
                    image_copy = draw_boxes(image_copy, 
                                    box_points,
                                    display_box,
                                    label, 
                                    percentage_probability, 
                                    self.__box_color)
                    
                    
                    if (extract_detected_objects == True):
                        splitted_copy = image_copy.copy()[box_points[1]:box_points[3],
                                        box_points[0]:box_points[2]]
                        if (output_type == "file"):
                            if (os.path.exists(objects_dir) == False):
                                os.mkdir(objects_dir)
                            splitted_image_path = os.path.join(objects_dir,
                                                                detection["name"] + "-" + str(
                                                                    counting) + ".jpg")
                            cv2.imwrite(splitted_image_path, splitted_copy)
                            detected_objects_image_array.append(splitted_image_path)
                        elif (output_type == "array"):
                            detected_objects_image_array.append(splitted_copy)

                
                if (output_type == "file"):
                    cv2.imwrite(output_image_path, image_copy)

                if (extract_detected_objects == True):
                    if (output_type == "file"):
                        return detections, detected_objects_image_array
                    elif (output_type == "array"):
                        return image_copy, detections, detected_objects_image_array

                else:
                    if (output_type == "file"):
                        return detections
                    elif (output_type == "array"):
                        return image_copy, detections

            except:
                raise ValueError(
                    "Ensure you specified correct input image, input type, output type and/or output image path ")

    def CustomObjects(self, person=False, bicycle=False, car=False, motorcycle=False, airplane=False,
                      bus=False, train=False, truck=False, boat=False, traffic_light=False, fire_hydrant=False,
                      stop_sign=False,
                      parking_meter=False, bench=False, bird=False, cat=False, dog=False, horse=False, sheep=False,
                      cow=False, elephant=False, bear=False, zebra=False,
                      giraffe=False, backpack=False, umbrella=False, handbag=False, tie=False, suitcase=False,
                      frisbee=False, skis=False, snowboard=False,
                      sports_ball=False, kite=False, baseball_bat=False, baseball_glove=False, skateboard=False,
                      surfboard=False, tennis_racket=False,
                      bottle=False, wine_glass=False, cup=False, fork=False, knife=False, spoon=False, bowl=False,
                      banana=False, apple=False, sandwich=False, orange=False,
                      broccoli=False, carrot=False, hot_dog=False, pizza=False, donut=False, cake=False, chair=False,
                      couch=False, potted_plant=False, bed=False,
                      dining_table=False, toilet=False, tv=False, laptop=False, mouse=False, remote=False,
                      keyboard=False, cell_phone=False, microwave=False,
                      oven=False, toaster=False, sink=False, refrigerator=False, book=False, clock=False, vase=False,
                      scissors=False, teddy_bear=False, hair_dryer=False,
                      toothbrush=False):

        """
                         The 'CustomObjects()' function allows you to handpick the type of objects you want to detect
                         from an image. The objects are pre-initiated in the function variables and predefined as 'False',
                         which you can easily set to true for any number of objects available.  This function
                         returns a dictionary which must be parsed into the 'detectCustomObjectsFromImage()'. Detecting
                          custom objects only happens when you call the function 'detectCustomObjectsFromImage()'


                        * true_values_of_objects (array); Acceptable values are 'True' and False  for all object values present

                        :param boolean_values:
                        :return: custom_objects_dict
                """

        custom_objects_dict = {}
        input_values = [person, bicycle, car, motorcycle, airplane,
                        bus, train, truck, boat, traffic_light, fire_hydrant, stop_sign,
                        parking_meter, bench, bird, cat, dog, horse, sheep, cow, elephant, bear, zebra,
                        giraffe, backpack, umbrella, handbag, tie, suitcase, frisbee, skis, snowboard,
                        sports_ball, kite, baseball_bat, baseball_glove, skateboard, surfboard, tennis_racket,
                        bottle, wine_glass, cup, fork, knife, spoon, bowl, banana, apple, sandwich, orange,
                        broccoli, carrot, hot_dog, pizza, donut, cake, chair, couch, potted_plant, bed,
                        dining_table, toilet, tv, laptop, mouse, remote, keyboard, cell_phone, microwave,
                        oven, toaster, sink, refrigerator, book, clock, vase, scissors, teddy_bear, hair_dryer,
                        toothbrush]
        actual_labels = ["person", "bicycle", "car", "motorcycle", "airplane",
                         "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign",
                         "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear",
                         "zebra",
                         "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis",
                         "snowboard",
                         "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
                         "tennis racket",
                         "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich",
                         "orange",
                         "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant",
                         "bed",
                         "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
                         "microwave",
                         "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
                         "hair dryer",
                         "toothbrush"]

        for input_value, actual_label in zip(input_values, actual_labels):
            if (input_value == True):
                custom_objects_dict[actual_label] = "valid"
            else:
                custom_objects_dict[actual_label] = "invalid"

        return custom_objects_dict

    def detectCustomObjectsFromImage(self, input_image="", output_image_path="", input_type="file", output_type="file",
                               extract_detected_objects=False, minimum_percentage_probability=50,
                               display_percentage_probability=True, display_object_name=True,
                               display_box=True, thread_safe=False, custom_objects=None):
        
        warnings.warn("'detectCustomObjectsFromImage()' function has been deprecated and will be removed in future versions of ImageAI. \n Kindly use 'detectObjectsFromImage()' ",
         DeprecationWarning, stacklevel=2)
        
        return self.detectObjectsFromImage(input_image=input_image,
                                            output_image_path=output_image_path, 
                                            input_type=input_type, 
                                            output_type=output_type,
                                            extract_detected_objects=extract_detected_objects, 
                                            minimum_percentage_probability=minimum_percentage_probability,
                                            display_percentage_probability=display_percentage_probability, 
                                            display_object_name=display_object_name,
                                            display_box=display_box, 
                                            thread_safe=thread_safe, 
                                            custom_objects=custom_objects)
        

class VideoObjectDetection:
    """
                    This is the object detection class for videos and camera live stream inputs in the ImageAI library. It provides support for RetinaNet,
                     YOLOv3 and TinyYOLOv3 object detection networks. After instantiating this class, you can set it's properties and
                     make object detections using it's pre-defined functions.

                     The following functions are required to be called before object detection can be made
                     * setModelPath()
                     * At least of of the following and it must correspond to the model set in the setModelPath()
                      [setModelTypeAsRetinaNet(), setModelTypeAsYOLOv3(), setModelTinyYOLOv3()]
                     * loadModel() [This must be called once only before performing object detection]

                     Once the above functions have been called, you can call the detectObjectsFromVideo() function
                     or the detectCustomObjectsFromVideo() of  the object detection instance object at anytime to
                     obtain observable objects in any video or camera live stream.
    """

    def __init__(self):
        self.__modelType = ""
        self.modelPath = ""
        self.__modelPathAdded = False
        self.__modelLoaded = False
        self.__detector = None
        self.__input_image_min = 1333
        self.__input_image_max = 800
        self.__detection_storage = None


        self.numbers_to_names = {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus',
                                 6: 'train',
                                 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign',
                                 12: 'parking meter',
                                 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow',
                                 20: 'elephant',
                                 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag',
                                 27: 'tie',
                                 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball',
                                 33: 'kite',
                                 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard',
                                 38: 'tennis racket',
                                 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon',
                                 45: 'bowl',
                                 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot',
                                 52: 'hot dog',
                                 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant',
                                 59: 'bed',
                                 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote',
                                 66: 'keyboard',
                                 67: 'cell phone', 68: 'microwave', 69: 'oven', 70: 'toaster', 71: 'sink',
                                 72: 'refrigerator',
                                 73: 'book', 74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear',
                                 78: 'hair dryer',
                                 79: 'toothbrush'}

    def setModelTypeAsRetinaNet(self):
        """
        'setModelTypeAsRetinaNet()' is used to set the model type to the RetinaNet model
        for the video object detection instance instance object .
        :return:
        """
        self.__modelType = "retinanet"

    def setModelTypeAsYOLOv3(self):
        """
                'setModelTypeAsYOLOv3()' is used to set the model type to the YOLOv3 model
                for the video object detection instance instance object .
                :return:
                """
        self.__modelType = "yolov3"

    def setModelTypeAsTinyYOLOv3(self):
        """
                'setModelTypeAsTinyYOLOv3()' is used to set the model type to the TinyYOLOv3 model
                for the video object detection instance instance object .
                :return:
                """
        self.__modelType = "tinyyolov3"

    def setModelPath(self, model_path):
        """
         'setModelPath()' function is required and is used to set the file path to a RetinaNet,
         YOLOv3 or TinyYOLOv3 object detection model trained on the COCO dataset.
          :param model_path:
          :return:
        """

        if (self.__modelPathAdded == False):
            self.modelPath = model_path
            self.__modelPathAdded = True

    def loadModel(self, detection_speed="normal"):
        """
                'loadModel()' function is required and is used to load the model structure into the program from the file path defined
                in the setModelPath() function. This function receives an optional value which is "detection_speed".
                The value is used to reduce the time it takes to detect objects in an image, down to about a 10% of the normal time, with
                 with just slight reduction in the number of objects detected.


                * prediction_speed (optional); Acceptable values are "normal", "fast", "faster", "fastest" and "flash"

                :param detection_speed:
                :return:
        """

        if (self.__modelLoaded == False):

            frame_detector = ObjectDetection()

            if (self.__modelType == "retinanet"):
                frame_detector.setModelTypeAsRetinaNet()
            elif (self.__modelType == "yolov3"):
                frame_detector.setModelTypeAsYOLOv3()
            elif (self.__modelType == "tinyyolov3"):
                frame_detector.setModelTypeAsTinyYOLOv3()
            frame_detector.setModelPath(self.modelPath)
            frame_detector.loadModel(detection_speed)
            self.__detector = frame_detector
            self.__modelLoaded = True


    def detectObjectsFromVideo(self, input_file_path="", camera_input=None, output_file_path="", frames_per_second=20,
                               frame_detection_interval=1, minimum_percentage_probability=50, log_progress=False,
                               display_percentage_probability=True, display_object_name=True, display_box=True, save_detected_video=True,
                               per_frame_function=None, per_second_function=None, per_minute_function=None,
                               video_complete_function=None, return_detected_frame=False, detection_timeout = None, 
                               thread_safe=False, custom_objects=None):

        """
                    'detectObjectsFromVideo()' function is used to detect objects observable in the given video path or a camera input:
            * input_file_path , which is the file path to the input video. It is required only if 'camera_input' is not set
            * camera_input , allows you to parse in camera input for live video detections
            * output_file_path , which is the path to the output video. It is required only if 'save_detected_video' is not set to False
            * frames_per_second , which is the number of frames to be used in the output video
            * frame_detection_interval (optional, 1 by default)  , which is the intervals of frames that will be detected.
            * minimum_percentage_probability (optional, 50 by default) , option to set the minimum percentage probability for nominating a detected object for output.
            * log_progress (optional) , which states if the progress of the frame processed is to be logged to console
            * display_percentage_probability (optional), can be used to hide or show probability scores on the detected video frames
            * display_object_name (optional), can be used to show or hide object names on the detected video frames
            * save_save_detected_video (optional, True by default), can be set to or not to save the detected video
            * per_frame_function (optional), this parameter allows you to parse in a function you will want to execute after each frame of the video is detected. If this parameter is set to a function, after every video  frame is detected, the function will be executed with the following values parsed into it:
                -- position number of the frame
                -- an array of dictinaries, with each dictionary corresponding to each object detected. Each dictionary contains 'name', 'percentage_probability' and 'box_points'
                -- a dictionary with with keys being the name of each unique objects and value are the number of instances of the object present
                -- If return_detected_frame is set to True, the numpy array of the detected frame will be parsed as the fourth value into the function

            * per_second_function (optional), this parameter allows you to parse in a function you will want to execute after each second of the video is detected. If this parameter is set to a function, after every second of a video is detected, the function will be executed with the following values parsed into it:
                -- position number of the second
                -- an array of dictionaries whose keys are position number of each frame present in the last second , and the value for each key is the array for each frame that contains the dictionaries for each object detected in the frame
                -- an array of dictionaries, with each dictionary corresponding to each frame in the past second, and the keys of each dictionary are the name of the number of unique objects detected in each frame, and the key values are the number of instances of the objects found in the frame
                -- a dictionary with its keys being the name of each unique object detected throughout the past second, and the key values are the average number of instances of the object found in all the frames contained in the past second
                -- If return_detected_frame is set to True, the numpy array of the detected frame will be parsed
                                                                    as the fifth value into the function

            * per_minute_function (optional), this parameter allows you to parse in a function you will want to execute after each minute of the video is detected. If this parameter is set to a function, after every minute of a video is detected, the function will be executed with the following values parsed into it:
                -- position number of the minute
                -- an array of dictionaries whose keys are position number of each frame present in the last minute , and the value for each key is the array for each frame that contains the dictionaries for each object detected in the frame

                -- an array of dictionaries, with each dictionary corresponding to each frame in the past minute, and the keys of each dictionary are the name of the number of unique objects detected in each frame, and the key values are the number of instances of the objects found in the frame

                -- a dictionary with its keys being the name of each unique object detected throughout the past minute, and the key values are the average number of instances of the object found in all the frames contained in the past minute

                -- If return_detected_frame is set to True, the numpy array of the detected frame will be parsed as the fifth value into the function

            * video_complete_function (optional), this parameter allows you to parse in a function you will want to execute after all of the video frames have been detected. If this parameter is set to a function, after all of frames of a video is detected, the function will be executed with the following values parsed into it:
                -- an array of dictionaries whose keys are position number of each frame present in the entire video , and the value for each key is the array for each frame that contains the dictionaries for each object detected in the frame
                -- an array of dictionaries, with each dictionary corresponding to each frame in the entire video, and the keys of each dictionary are the name of the number of unique objects detected in each frame, and the key values are the number of instances of the objects found in the frame
                -- a dictionary with its keys being the name of each unique object detected throughout the entire video, and the key values are the average number of instances of the object found in all the frames contained in the entire video

            * return_detected_frame (optionally, False by default), option to obtain the return the last detected video frame into the per_per_frame_function, per_per_second_function or per_per_minute_function

            * detection_timeout (optionally, None by default), option to state the number of seconds of a video that should be detected after which the detection function stop processing the video
            * thread_safe (optional, False by default), enforce the loaded detection model works across all threads if set to true, made possible by forcing all Tensorflow inference to run on the default graph.


                    :param input_file_path:
                    :param camera_input
                    :param output_file_path:
                    :param save_detected_video:
                    :param frames_per_second:
                    :param frame_detection_interval:
                    :param minimum_percentage_probability:
                    :param log_progress:
                    :param display_percentage_probability:
                    :param display_object_name:
                    :param per_frame_function:
                    :param per_second_function:
                    :param per_minute_function:
                    :param video_complete_function:
                    :param return_detected_frame:
                    :param detection_timeout:
                    :param thread_safe:
                    :return output_video_filepath:
                    :return counting:
                    :return output_objects_array:
                    :return output_objects_count:
                    :return detected_copy:
                    :return this_second_output_object_array:
                    :return this_second_counting_array:
                    :return this_second_counting:
                    :return this_minute_output_object_array:
                    :return this_minute_counting_array:
                    :return this_minute_counting:
                    :return this_video_output_object_array:
                    :return this_video_counting_array:
                    :return this_video_counting:
                """

        if (input_file_path == "" and camera_input == None):
            raise ValueError(
                "You must set 'input_file_path' to a valid video file, or set 'camera_input' to a valid camera")
        elif (save_detected_video == True and output_file_path == ""):
            raise ValueError(
                "You must set 'output_video_filepath' to a valid video file name, in which the detected video will be saved. If you don't intend to save the detected video, set 'save_detected_video=False'")

        else:
            try:

                output_frames_dict = {}
                output_frames_count_dict = {}

                input_video = cv2.VideoCapture(input_file_path)
                if (camera_input != None):
                    input_video = camera_input

                output_video_filepath = output_file_path + '.avi'

                frame_width = int(input_video.get(3))
                frame_height = int(input_video.get(4))
                output_video = cv2.VideoWriter(output_video_filepath, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'),
                                               frames_per_second,
                                               (frame_width, frame_height))

                counting = 0

                detection_timeout_count = 0
                video_frames_count = 0

                while (input_video.isOpened()):
                    ret, frame = input_video.read()

                    if (ret == True):

                        video_frames_count += 1
                        if (detection_timeout != None):
                            if ((video_frames_count % frames_per_second) == 0):
                                detection_timeout_count += 1

                            if (detection_timeout_count >= detection_timeout):
                                break

                        output_objects_array = []

                        counting += 1

                        if (log_progress == True):
                            print("Processing Frame : ", str(counting))

                        detected_copy = frame.copy()

                        check_frame_interval = counting % frame_detection_interval

                        if (counting == 1 or check_frame_interval == 0):
                            try:
                                detected_copy, output_objects_array = self.__detector.detectObjectsFromImage(
                                    input_image=frame, input_type="array", output_type="array",
                                    minimum_percentage_probability=minimum_percentage_probability,
                                    display_percentage_probability=display_percentage_probability,
                                    display_object_name=display_object_name,
                                    display_box=display_box,
                                    custom_objects=custom_objects)
                            except:
                                None

                        output_frames_dict[counting] = output_objects_array

                        output_objects_count = {}
                        for eachItem in output_objects_array:
                            eachItemName = eachItem["name"]
                            try:
                                output_objects_count[eachItemName] = output_objects_count[eachItemName] + 1
                            except:
                                output_objects_count[eachItemName] = 1

                        output_frames_count_dict[counting] = output_objects_count

                        
                        if (save_detected_video == True):
                            output_video.write(detected_copy)

                        if (counting == 1 or check_frame_interval == 0):
                            if (per_frame_function != None):
                                if (return_detected_frame == True):
                                    per_frame_function(counting, output_objects_array, output_objects_count,
                                                       detected_copy)
                                elif (return_detected_frame == False):
                                    per_frame_function(counting, output_objects_array, output_objects_count)

                        if (per_second_function != None):
                            if (counting != 1 and (counting % frames_per_second) == 0):

                                this_second_output_object_array = []
                                this_second_counting_array = []
                                this_second_counting = {}

                                for aa in range(counting):
                                    if (aa >= (counting - frames_per_second)):
                                        this_second_output_object_array.append(output_frames_dict[aa + 1])
                                        this_second_counting_array.append(output_frames_count_dict[aa + 1])

                                for eachCountingDict in this_second_counting_array:
                                    for eachItem in eachCountingDict:
                                        try:
                                            this_second_counting[eachItem] = this_second_counting[eachItem] + \
                                                                             eachCountingDict[eachItem]
                                        except:
                                            this_second_counting[eachItem] = eachCountingDict[eachItem]

                                for eachCountingItem in this_second_counting:
                                    this_second_counting[eachCountingItem] = int(this_second_counting[eachCountingItem] / frames_per_second)

                                if (return_detected_frame == True):
                                    per_second_function(int(counting / frames_per_second),
                                                        this_second_output_object_array, this_second_counting_array,
                                                        this_second_counting, detected_copy)

                                elif (return_detected_frame == False):
                                    per_second_function(int(counting / frames_per_second),
                                                        this_second_output_object_array, this_second_counting_array,
                                                        this_second_counting)

                        if (per_minute_function != None):

                            if (counting != 1 and (counting % (frames_per_second * 60)) == 0):

                                this_minute_output_object_array = []
                                this_minute_counting_array = []
                                this_minute_counting = {}

                                for aa in range(counting):
                                    if (aa >= (counting - (frames_per_second * 60))):
                                        this_minute_output_object_array.append(output_frames_dict[aa + 1])
                                        this_minute_counting_array.append(output_frames_count_dict[aa + 1])

                                for eachCountingDict in this_minute_counting_array:
                                    for eachItem in eachCountingDict:
                                        try:
                                            this_minute_counting[eachItem] = this_minute_counting[eachItem] + \
                                                                             eachCountingDict[eachItem]
                                        except:
                                            this_minute_counting[eachItem] = eachCountingDict[eachItem]

                                for eachCountingItem in this_minute_counting:
                                    this_minute_counting[eachCountingItem] = int(this_minute_counting[eachCountingItem] / (frames_per_second * 60))

                                if (return_detected_frame == True):
                                    per_minute_function(int(counting / (frames_per_second * 60)),
                                                        this_minute_output_object_array, this_minute_counting_array,
                                                        this_minute_counting, detected_copy)

                                elif (return_detected_frame == False):
                                    per_minute_function(int(counting / (frames_per_second * 60)),
                                                        this_minute_output_object_array, this_minute_counting_array,
                                                        this_minute_counting)


                    else:
                        break

                if (video_complete_function != None):

                    this_video_output_object_array = []
                    this_video_counting_array = []
                    this_video_counting = {}

                    for aa in range(counting):
                        this_video_output_object_array.append(output_frames_dict[aa + 1])
                        this_video_counting_array.append(output_frames_count_dict[aa + 1])

                    for eachCountingDict in this_video_counting_array:
                        for eachItem in eachCountingDict:
                            try:
                                this_video_counting[eachItem] = this_video_counting[eachItem] + \
                                                                eachCountingDict[eachItem]
                            except:
                                this_video_counting[eachItem] = eachCountingDict[eachItem]

                    for eachCountingItem in this_video_counting:
                        this_video_counting[eachCountingItem] = int(this_video_counting[eachCountingItem] / counting)

                    video_complete_function(this_video_output_object_array, this_video_counting_array,
                                            this_video_counting)

                input_video.release()
                output_video.release()

                if (save_detected_video == True):
                    return output_video_filepath

            except:
                raise ValueError(
                    "An error occured. It may be that your input video is invalid. Ensure you specified a proper string value for 'output_file_path' is 'save_detected_video' is not False. "
                    "Also ensure your per_frame, per_second, per_minute or video_complete_analysis function is properly configured to receive the right parameters. ")

    def CustomObjects(self, person=False, bicycle=False, car=False, motorcycle=False, airplane=False,
                      bus=False, train=False, truck=False, boat=False, traffic_light=False, fire_hydrant=False,
                      stop_sign=False,
                      parking_meter=False, bench=False, bird=False, cat=False, dog=False, horse=False, sheep=False,
                      cow=False, elephant=False, bear=False, zebra=False,
                      giraffe=False, backpack=False, umbrella=False, handbag=False, tie=False, suitcase=False,
                      frisbee=False, skis=False, snowboard=False,
                      sports_ball=False, kite=False, baseball_bat=False, baseball_glove=False, skateboard=False,
                      surfboard=False, tennis_racket=False,
                      bottle=False, wine_glass=False, cup=False, fork=False, knife=False, spoon=False, bowl=False,
                      banana=False, apple=False, sandwich=False, orange=False,
                      broccoli=False, carrot=False, hot_dog=False, pizza=False, donut=False, cake=False, chair=False,
                      couch=False, potted_plant=False, bed=False,
                      dining_table=False, toilet=False, tv=False, laptop=False, mouse=False, remote=False,
                      keyboard=False, cell_phone=False, microwave=False,
                      oven=False, toaster=False, sink=False, refrigerator=False, book=False, clock=False, vase=False,
                      scissors=False, teddy_bear=False, hair_dryer=False,
                      toothbrush=False):

        """
                         The 'CustomObjects()' function allows you to handpick the type of objects you want to detect
                         from a video. The objects are pre-initiated in the function variables and predefined as 'False',
                         which you can easily set to true for any number of objects available.  This function
                         returns a dictionary which must be parsed into the 'detectCustomObjectsFromVideo()'. Detecting
                          custom objects only happens when you call the function 'detectCustomObjectsFromVideo()'


                        * true_values_of_objects (array); Acceptable values are 'True' and False  for all object values present

                        :param boolean_values:
                        :return: custom_objects_dict
                """

        custom_objects_dict = {}
        input_values = [person, bicycle, car, motorcycle, airplane,
                        bus, train, truck, boat, traffic_light, fire_hydrant, stop_sign,
                        parking_meter, bench, bird, cat, dog, horse, sheep, cow, elephant, bear, zebra,
                        giraffe, backpack, umbrella, handbag, tie, suitcase, frisbee, skis, snowboard,
                        sports_ball, kite, baseball_bat, baseball_glove, skateboard, surfboard, tennis_racket,
                        bottle, wine_glass, cup, fork, knife, spoon, bowl, banana, apple, sandwich, orange,
                        broccoli, carrot, hot_dog, pizza, donut, cake, chair, couch, potted_plant, bed,
                        dining_table, toilet, tv, laptop, mouse, remote, keyboard, cell_phone, microwave,
                        oven, toaster, sink, refrigerator, book, clock, vase, scissors, teddy_bear, hair_dryer,
                        toothbrush]
        actual_labels = ["person", "bicycle", "car", "motorcycle", "airplane",
                         "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign",
                         "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear",
                         "zebra",
                         "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis",
                         "snowboard",
                         "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
                         "tennis racket",
                         "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich",
                         "orange",
                         "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant",
                         "bed",
                         "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
                         "microwave",
                         "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
                         "hair dryer",
                         "toothbrush"]

        for input_value, actual_label in zip(input_values, actual_labels):
            if (input_value == True):
                custom_objects_dict[actual_label] = "valid"
            else:
                custom_objects_dict[actual_label] = "invalid"

        return custom_objects_dict

    def detectCustomObjectsFromVideo(self, input_file_path="", camera_input=None, output_file_path="", frames_per_second=20,
                               frame_detection_interval=1, minimum_percentage_probability=50, log_progress=False,
                               display_percentage_probability=True, display_object_name=True, display_box=True, save_detected_video=True,
                               per_frame_function=None, per_second_function=None, per_minute_function=None,
                               video_complete_function=None, return_detected_frame=False, detection_timeout = None, 
                               thread_safe=False, custom_objects=None):


        return self.detectObjectsFromVideo(input_file_path=input_file_path,
                                            camera_input=camera_input,
                                            output_file_path=output_file_path, 
                                            frames_per_second=frames_per_second,
                                            frame_detection_interval=frame_detection_interval, 
                                            minimum_percentage_probability=minimum_percentage_probability, 
                                            log_progress=log_progress,
                                            display_percentage_probability=display_percentage_probability, 
                                            display_object_name=display_object_name, 
                                            display_box=display_box, 
                                            save_detected_video=save_detected_video,
                                            per_frame_function=per_frame_function, 
                                            per_second_function=per_second_function, 
                                            per_minute_function=per_minute_function,
                                            video_complete_function=video_complete_function, 
                                            return_detected_frame=return_detected_frame, 
                                            detection_timeout = detection_timeout, 
                                            thread_safe=thread_safe, 
                                            custom_objects=custom_objects)

================================================
FILE: imageai_tf_deprecated/Detection/keras_retinanet/__init__.py
================================================


================================================
FILE: imageai_tf_deprecated/Detection/keras_retinanet/backend/__init__.py
================================================
from .backend import *  # noqa: F401,F403


================================================
FILE: imageai_tf_deprecated/Detection/keras_retinanet/backend/backend.py
================================================
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

import tensorflow
from tensorflow import keras


def bbox_transform_inv(boxes, deltas, mean=None, std=None):
    """ Applies deltas (usually regression results) to boxes (usually anchors).

    Before applying the deltas to the boxes, the normalization that was previously applied (in the generator) has to be removed.
    The mean and std are the mean and std as applied in the generator. They are unnormalized in this function and then applied to the boxes.

    Args
        boxes : np.array of shape (B, N, 4), where B is the batch size, N the number of boxes and 4 values for (x1, y1, x2, y2).
        deltas: np.array of same shape as boxes. These deltas (d_x1, d_y1, d_x2, d_y2) are a factor of the width/height.
        mean  : The mean value used when computing deltas (defaults to [0, 0, 0, 0]).
        std   : The standard deviation used when computing deltas (defaults to [0.2, 0.2, 0.2, 0.2]).

    Returns
        A np.array of the same shape as boxes, but with deltas applied to each box.
        The mean and std are used during training to normalize the regression values (networks love normalization).
    """
    if mean is None:
        mean = [0, 0, 0, 0]
    if std is None:
        std = [0.2, 0.2, 0.2, 0.2]

    width  = boxes[:, :, 2] - boxes[:, :, 0]
    height = boxes[:, :, 3] - boxes[:, :, 1]

    x1 = boxes[:, :, 0] + (deltas[:, :, 0] * std[0] + mean[0]) * width
    y1 = boxes[:, :, 1] + (deltas[:, :, 1] * std[1] + mean[1]) * height
    x2 = boxes[:, :, 2] + (deltas[:, :, 2] * std[2] + mean[2]) * width
    y2 = boxes[:, :, 3] + (deltas[:, :, 3] * std[3] + mean[3]) * height

    pred_boxes = keras.backend.stack([x1, y1, x2, y2], axis=2)

    return pred_boxes


def shift(shape, stride, anchors):
    """ Produce shifted anchors based on shape of the map and stride size.

    Args
        shape  : Shape to shift the anchors over.
        stride : Stride to shift the anchors with over the shape.
        anchors: The anchors to apply at each location.
    """
    shift_x = (keras.backend.arange(0, shape[1], dtype=keras.backend.floatx()) + keras.backend.constant(0.5, dtype=keras.backend.floatx())) * stride
    shift_y = (keras.backend.arange(0, shape[0], dtype=keras.backend.floatx()) + keras.backend.constant(0.5, dtype=keras.backend.floatx())) * stride

    shift_x, shift_y = tensorflow.meshgrid(shift_x, shift_y)
    shift_x = keras.backend.reshape(shift_x, [-1])
    shift_y = keras.backend.reshape(shift_y, [-1])

    shifts = keras.backend.stack([
        shift_x,
        shift_y,
        shift_x,
        shift_y
    ], axis=0)

    shifts            = keras.backend.transpose(shifts)
    number_of_anchors = keras.backend.shape(anchors)[0]

    k = keras.backend.shape(shifts)[0]  # number of base points = feat_h * feat_w

    shifted_anchors = keras.backend.reshape(anchors, [1, number_of_anchors, 4]) + keras.backend.cast(keras.backend.reshape(shifts, [k, 1, 4]), keras.backend.floatx())
    shifted_anchors = keras.backend.reshape(shifted_anchors, [k * number_of_anchors, 4])

    return shifted_anchors


def map_fn(*args, **kwargs):
    """ See https://www.tensorflow.org/api_docs/python/tf/map_fn .
    """

    if "shapes" in kwargs:
        shapes = kwargs.pop("shapes")
        dtype = kwargs.pop("dtype")
        sig = [tensorflow.TensorSpec(shapes[i], dtype=t) for i, t in
               enumerate(dtype)]

        # Try to use the new feature fn_output_signature in TF 2.3, use fallback if this is not available
        try:
            return tensorflow.map_fn(*args, **kwargs, fn_output_signature=sig)
        except TypeError:
            kwargs["dtype"] = dtype

    return tensorflow.map_fn(*args, **kwargs)


def resize_images(images, size, method='bilinear', align_corners=False):
    """ See https://www.tensorflow.org/versions/r1.14/api_docs/python/tf/image/resize_images .

    Args
        method: The method used for interpolation. One of ('bilinear', 'nearest', 'bicubic', 'area').
    """
    methods = {
        'bilinear': tensorflow.image.ResizeMethod.BILINEAR,
        'nearest' : tensorflow.image.ResizeMethod.NEAREST_NEIGHBOR,
        'bicubic' : tensorflow.image.ResizeMethod.BICUBIC,
        'area'    : tensorflow.image.ResizeMethod.AREA,
    }
    return tensorflow.compat.v1.image.resize_images(images, size, methods[method], align_corners)


================================================
FILE: imageai_tf_deprecated/Detection/keras_retinanet/bin/__init__.py
================================================


================================================
FILE: imageai_tf_deprecated/Detection/keras_retinanet/bin/convert_model.py
================================================
#!/usr/bin/env python

"""
Copyright 2017-2018 Fizyr (https://fizyr.com)

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

import argparse
import os
import sys

# Allow relative imports when being executed as script.
if __name__ == "__main__" and __package__ is None:
    sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..'))
    import keras_retinanet.bin  # noqa: F401
    __package__ = "keras_retinanet.bin"

# Change these to absolute imports if you copy this script outside the keras_retinanet package.
from .. import models
from ..utils.config import read_config_file, parse_anchor_parameters, parse_pyramid_levels
from ..utils.gpu import setup_gpu
from ..utils.tf_version import check_tf_version


def parse_args(args):
    parser = argparse.ArgumentParser(description='Script for converting a training model to an inference model.')

    parser.add_argument('model_in', help='The model to convert.')
    parser.add_argument('model_out', help='Path to save the converted model to.')
    parser.add_argument('--backbone', help='The backbone of the model to convert.', default='resnet50')
    parser.add_argument('--no-nms', help='Disables non maximum suppression.', dest='nms', action='store_false')
    parser.add_argument('--no-class-specific-filter', help='Disables class specific filtering.', dest='class_specific_filter', action='store_false')
    parser.add_argument('--config', help='Path to a configuration parameters .ini file.')
    parser.add_argument('--nms-threshold', help='Value for non maximum suppression threshold.', type=float, default=0.5)
    parser.add_argument('--score-threshold', help='Threshold for prefiltering boxes.', type=float, default=0.05)
    parser.add_argument('--max-detections', help='Maximum number of detections to keep.', type=int, default=300)
    parser.add_argument('--parallel-iterations', help='Number of batch items to process in parallel.', type=int, default=32)

    return parser.parse_args(args)


def main(args=None):
    # parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)

    # make sure tensorflow is the minimum required version
    check_tf_version()

    # set modified tf session to avoid using the GPUs
    setup_gpu('cpu')

    # optionally load config parameters
    anchor_parameters = None
    pyramid_levels = None
    if args.config:
        args.config = read_config_file(args.config)
        if 'anchor_parameters' in args.config:
            anchor_parameters = parse_anchor_parameters(args.config)

        if 'pyramid_levels' in args.config:
            pyramid_levels = parse_pyramid_levels(args.config)

    # load the model
    model = models.load_model(args.model_in, backbone_name=args.backbone)

    # check if this is indeed a training model
    models.check_training_model(model)

    # convert the model
    model = models.convert_model(
        model,
        nms=args.nms,
        class_specific_filter=args.class_specific_filter,
        anchor_params=anchor_parameters,
        pyramid_levels=pyramid_levels,
        nms_threshold=args.nms_threshold,
        score_threshold=args.score_threshold,
        max_detections=args.max_detections,
        parallel_iterations=args.parallel_iterations
    )

    # save model
    model.save(args.model_out)


if __name__ == '__main__':
    main()


================================================
FILE: imageai_tf_deprecated/Detection/keras_retinanet/bin/debug.py
================================================
#!/usr/bin/env python

"""
Copyright 2017-2018 Fizyr (https://fizyr.com)

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

import argparse
import os
import sys
import cv2

# Set keycodes for changing images
# 81, 83 are left and right arrows on linux in Ascii code (probably not needed)
# 65361, 65363 are left and right arrows in linux
# 2424832, 2555904 are left and right arrows on Windows
# 110, 109 are 'n' and 'm' on mac, windows, linux
# (unfortunately arrow keys not picked up on mac)
leftkeys = (81, 110, 65361, 2424832)
rightkeys = (83, 109, 65363, 2555904)

# Allow relative imports when being executed as script.
if __name__ == "__main__" and __package__ is None:
    sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..'))
    import keras_retinanet.bin  # noqa: F401
    __package__ = "keras_retinanet.bin"

# Change these to absolute imports if you copy this script outside the keras_retinanet package.
from ..preprocessing.pascal_voc import PascalVocGenerator
from ..preprocessing.csv_generator import CSVGenerator
from ..preprocessing.kitti import KittiGenerator
from ..preprocessing.open_images import OpenImagesGenerator
from ..utils.anchors import anchors_for_shape, compute_gt_annotations
from ..utils.config import read_config_file, parse_anchor_parameters, parse_pyramid_levels
from ..utils.image import random_visual_effect_generator
from ..utils.tf_version import check_tf_version
from ..utils.transform import random_transform_generator
from ..utils.visualization import draw_annotations, draw_boxes, draw_caption


def create_generator(args):
    """ Create the data generators.

    Args:
        args: parseargs arguments object.
    """
    common_args = {
        'config'           : args.config,
        'image_min_side'   : args.image_min_side,
        'image_max_side'   : args.image_max_side,
        'group_method'     : args.group_method
    }

    # create random transform generator for augmenting training data
    transform_generator = random_transform_generator(
        min_rotation=-0.1,
        max_rotation=0.1,
        min_translation=(-0.1, -0.1),
        max_translation=(0.1, 0.1),
        min_shear=-0.1,
        max_shear=0.1,
        min_scaling=(0.9, 0.9),
        max_scaling=(1.1, 1.1),
        flip_x_chance=0.5,
        flip_y_chance=0.5,
    )

    visual_effect_generator = random_visual_effect_generator(
        contrast_range=(0.9, 1.1),
        brightness_range=(-.1, .1),
        hue_range=(-0.05, 0.05),
        saturation_range=(0.95, 1.05)
    )

    if args.dataset_type == 'coco':
        # import here to prevent unnecessary dependency on cocoapi
        from ..preprocessing.coco import CocoGenerator

        generator = CocoGenerator(
            args.coco_path,
            args.coco_set,
            transform_generator=transform_generator,
            visual_effect_generator=visual_effect_generator,
            **common_args
        )
    elif args.dataset_type == 'pascal':
        generator = PascalVocGenerator(
            args.pascal_path,
            args.pascal_set,
            image_extension=args.image_extension,
            transform_generator=transform_generator,
            visual_effect_generator=visual_effect_generator,
            **common_args
        )
    elif args.dataset_type == 'csv':
        generator = CSVGenerator(
            args.annotations,
            args.classes,
            transform_generator=transform_generator,
            visual_effect_generator=visual_effect_generator,
            **common_args
        )
    elif args.dataset_type == 'oid':
        generator = OpenImagesGenerator(
            args.main_dir,
            subset=args.subset,
            version=args.version,
            labels_filter=args.labels_filter,
            parent_label=args.parent_label,
            annotation_cache_dir=args.annotation_cache_dir,
            transform_generator=transform_generator,
            visual_effect_generator=visual_effect_generator,
            **common_args
        )
    elif args.dataset_type == 'kitti':
        generator = KittiGenerator(
            args.kitti_path,
            subset=args.subset,
            transform_generator=transform_generator,
            visual_effect_generator=visual_effect_generator,
            **common_args
        )
    else:
        raise ValueError('Invalid data type received: {}'.format(args.dataset_type))

    return generator


def parse_args(args):
    """ Parse the arguments.
    """
    parser     = argparse.ArgumentParser(description='Debug script for a RetinaNet network.')
    subparsers = parser.add_subparsers(help='Arguments for specific dataset types.', dest='dataset_type')
    subparsers.required = True

    coco_parser = subparsers.add_parser('coco')
    coco_parser.add_argument('coco_path',  help='Path to dataset directory (ie. /tmp/COCO).')
    coco_parser.add_argument('--coco-set', help='Name of the set to show (defaults to val2017).', default='val2017')

    pascal_parser = subparsers.add_parser('pascal')
    pascal_parser.add_argument('pascal_path', help='Path to dataset directory (ie. /tmp/VOCdevkit).')
    pascal_parser.add_argument('--pascal-set',  help='Name of the set to show (defaults to test).', default='test')
    pascal_parser.add_argument('--image-extension',   help='Declares the dataset images\' extension.', default='.jpg')

    kitti_parser = subparsers.add_parser('kitti')
    kitti_parser.add_argument('kitti_path', help='Path to dataset directory (ie. /tmp/kitti).')
    kitti_parser.add_argument('subset', help='Argument for loading a subset from train/val.')

    def csv_list(string):
        return string.split(',')

    oid_parser = subparsers.add_parser('oid')
    oid_parser.add_argument('main_dir', help='Path to dataset directory.')
    oid_parser.add_argument('subset', help='Argument for loading a subset from train/validation/test.')
    oid_parser.add_argument('--version',  help='The current dataset version is v4.', default='v4')
    oid_parser.add_argument('--labels-filter',  help='A list of labels to filter.', type=csv_list, default=None)
    oid_parser.add_argument('--annotation-cache-dir', help='Path to store annotation cache.', default='.')
    oid_parser.add_argument('--parent-label', help='Use the hierarchy children of this label.', default=None)

    csv_parser = subparsers.add_parser('csv')
    csv_parser.add_argument('annotations', help='Path to CSV file containing annotations for evaluation.')
    csv_parser.add_argument('classes',     help='Path to a CSV file containing class label mapping.')

    parser.add_argument('--no-resize', help='Disable image resizing.', dest='resize', action='store_false')
    parser.add_argument('--anchors', help='Show positive anchors on the image.', action='store_true')
    parser.add_argument('--display-name', help='Display image name on the bottom left corner.', action='store_true')
    parser.add_argument('--show-annotations', help='Show annotations on the image. Green annotations have anchors, red annotations don\'t and therefore don\'t contribute to training.', action='store_true')
    parser.add_argument('--random-transform', help='Randomly transform image and annotations.', action='store_true')
    parser.add_argument('--image-min-side', help='Rescale the image so the smallest side is min_side.', type=int, default=800)
    parser.add_argument('--image-max-side', help='Rescale the image if the largest side is larger than max_side.', type=int, default=1333)
    parser.add_argument('--config', help='Path to a configuration parameters .ini file.')
    parser.add_argument('--no-gui', help='Do not open a GUI window. Save images to an output directory instead.', action='store_true')
    parser.add_argument('--output-dir', help='The output directory to save images to if --no-gui is specified.', default='.')
    parser.add_argument('--flatten-output', help='Flatten the folder structure of saved output images into a single folder.', action='store_true')
    parser.add_argument('--group-method', help='Determines how images are grouped together', type=str, default='ratio', choices=['none', 'random', 'ratio'])

    return parser.parse_args(args)


def run(generator, args, anchor_params, pyramid_levels):
    """ Main loop.

    Args
        generator: The generator to debug.
        args: parseargs args object.
    """
    # display images, one at a time
    i = 0
    while True:
        # load the data
        image       = generator.load_image(i)
        annotations = generator.load_annotations(i)
        if len(annotations['labels']) > 0 :
            # apply random transformations
            if args.random_transform:
                image, annotations = generator.random_transform_group_entry(image, annotations)
                image, annotations = generator.random_visual_effect_group_entry(image, annotations)

            # resize the image and annotations
            if args.resize:
                image, image_scale = generator.resize_image(image)
                annotations['bboxes'] *= image_scale

            anchors = anchors_for_shape(image.shape, anchor_params=anchor_params, pyramid_levels=pyramid_levels)
            positive_indices, _, max_indices = compute_gt_annotations(anchors, annotations['bboxes'])

            # draw anchors on the image
            if args.anchors:
                draw_boxes(image, anchors[positive_indices], (255, 255, 0), thickness=1)

            # draw annotations on the image
            if args.show_annotations:
                # draw annotations in red
                draw_annotations(image, annotations, color=(0, 0, 255), label_to_name=generator.label_to_name)

                # draw regressed anchors in green to override most red annotations
                # result is that annotations without anchors are red, with anchors are green
                draw_boxes(image, annotations['bboxes'][max_indices[positive_indices], :], (0, 255, 0))

            # display name on the image
            if args.display_name:
                draw_caption(image, [0, image.shape[0]], os.path.basename(generator.image_path(i)))

        # write to file and advance if no-gui selected
        if args.no_gui:
            output_path = make_output_path(args.output_dir, generator.image_path(i), flatten=args.flatten_output)
            os.makedirs(os.path.dirname(output_path), exist_ok=True)
            cv2.imwrite(output_path, image)
            i += 1
            if i == generator.size():  # have written all images
                break
            else:
                continue

        # if we are using the GUI, then show an image
        cv2.imshow('Image', image)
        key = cv2.waitKeyEx()

        # press right for next image and left for previous (linux or windows, doesn't work for macOS)
        # if you run macOS, press "n" or "m" (will also work on linux and windows)

        if key in rightkeys:
            i = (i + 1) % generator.size()
        if key in leftkeys:
            i -= 1
            if i < 0:
                i = generator.size() - 1

        # press q or Esc to quit
        if (key == ord('q')) or (key == 27):
            return False

    return True


def make_output_path(output_dir, image_path, flatten = False):
    """ Compute the output path for a debug image. """

    # If the output hierarchy is flattened to a single folder, throw away all leading folders.
    if flatten:
        path = os.path.basename(image_path)

    # Otherwise, make sure absolute paths are taken relative to the filesystem root.
    else:
        # Make sure to drop drive letters on Windows, otherwise relpath wil fail.
        _, path = os.path.splitdrive(image_path)
        if os.path.isabs(path):
            path = os.path.relpath(path, '/')

    # In all cases, append "_debug" to the filename, before the extension.
    base, extension = os.path.splitext(path)
    path = base + "_debug" + extension

    # Finally, join the whole thing to the output directory.
    return os.path.join(output_dir, path)


def main(args=None):
    # parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)

    # make sure tensorflow is the minimum required version
    check_tf_version()

    # create the generator
    generator = create_generator(args)

    # optionally load config parameters
    if args.config:
        args.config = read_config_file(args.config)

    # optionally load anchor parameters
    anchor_params = None
    if args.config and 'anchor_parameters' in args.config:
        anchor_params = parse_anchor_parameters(args.config)

    pyramid_levels = None
    if args.config and 'pyramid_levels' in args.config:
        pyramid_levels = parse_pyramid_levels(args.config)
    # create the display window if necessary
    if not args.no_gui:
        cv2.namedWindow('Image', cv2.WINDOW_NORMAL)

    run(generator, args, anchor_params=anchor_params, pyramid_levels=pyramid_levels)


if __name__ == '__main__':
    main()


================================================
FILE: imageai_tf_deprecated/Detection/keras_retinanet/bin/evaluate.py
================================================
#!/usr/bin/env python

"""
Copyright 2017-2018 Fizyr (https://fizyr.com)

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

import argparse
import os
import sys

# Allow relative imports when being executed as script.
if __name__ == "__main__" and __package__ is None:
    sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..'))
    import keras_retinanet.bin  # noqa: F401
    __package__ = "keras_retinanet.bin"

# Change these to absolute imports if you copy this script outside the keras_retinanet package.
from .. import models
from ..preprocessing.csv_generator import CSVGenerator
from ..preprocessing.pascal_voc import PascalVocGenerator
from ..utils.anchors import make_shapes_callback
from ..utils.config import read_config_file, parse_anchor_parameters, parse_pyramid_levels
from ..utils.eval import evaluate
from ..utils.gpu import setup_gpu
from ..utils.tf_version import check_tf_version


def create_generator(args, preprocess_image):
    """ Create generators for evaluation.
    """
    common_args = {
        'config'           : args.config,
        'image_min_side'   : args.image_min_side,
        'image_max_side'   : args.image_max_side,
        'no_resize'        : args.no_resize,
        'preprocess_image' : preprocess_image,
        'group_method'     : args.group_method
    }

    if args.dataset_type == 'coco':
        # import here to prevent unnecessary dependency on cocoapi
        from ..preprocessing.coco import CocoGenerator

        validation_generator = CocoGenerator(
            args.coco_path,
            'val2017',
            shuffle_groups=False,
            **common_args
        )
    elif args.dataset_type == 'pascal':
        validation_generator = PascalVocGenerator(
            args.pascal_path,
            'test',
            image_extension=args.image_extension,
            shuffle_groups=False,
            **common_args
        )
    elif args.dataset_type == 'csv':
        validation_generator = CSVGenerator(
            args.annotations,
            args.classes,
            shuffle_groups=False,
            **common_args
        )
    else:
        raise ValueError('Invalid data type received: {}'.format(args.dataset_type))

    return validation_generator


def parse_args(args):
    """ Parse the arguments.
    """
    parser     = argparse.ArgumentParser(description='Evaluation script for a RetinaNet network.')
    subparsers = parser.add_subparsers(help='Arguments for specific dataset types.', dest='dataset_type')
    subparsers.required = True

    coco_parser = subparsers.add_parser('coco')
    coco_parser.add_argument('coco_path', help='Path to dataset directory (ie. /tmp/COCO).')

    pascal_parser = subparsers.add_parser('pascal')
    pascal_parser.add_argument('pascal_path', help='Path to dataset directory (ie. /tmp/VOCdevkit).')
    pascal_parser.add_argument('--image-extension',   help='Declares the dataset images\' extension.', default='.jpg')

    csv_parser = subparsers.add_parser('csv')
    csv_parser.add_argument('annotations', help='Path to CSV file containing annotations for evaluation.')
    csv_parser.add_argument('classes', help='Path to a CSV file containing class label mapping.')

    parser.add_argument('model',              help='Path to RetinaNet model.')
    parser.add_argument('--convert-model',    help='Convert the model to an inference model (ie. the input is a training model).', action='store_true')
    parser.add_argument('--backbone',         help='The backbone of the model.', default='resnet50')
    parser.add_argument('--gpu',              help='Id of the GPU to use (as reported by nvidia-smi).')
    parser.add_argument('--score-threshold',  help='Threshold on score to filter detections with (defaults to 0.05).', default=0.05, type=float)
    parser.add_argument('--iou-threshold',    help='IoU Threshold to count for a positive detection (defaults to 0.5).', default=0.5, type=float)
    parser.add_argument('--max-detections',   help='Max Detections per image (defaults to 100).', default=100, type=int)
    parser.add_argument('--save-path',        help='Path for saving images with detections (doesn\'t work for COCO).')
    parser.add_argument('--image-min-side',   help='Rescale the image so the smallest side is min_side.', type=int, default=800)
    parser.add_argument('--image-max-side',   help='Rescale the image if the largest side is larger than max_side.', type=int, default=1333)
    parser.add_argument('--no-resize',        help='Don''t rescale the image.', action='store_true')
    parser.add_argument('--config',           help='Path to a configuration parameters .ini file (only used with --convert-model).')
    parser.add_argument('--group-method',     help='Determines how images are grouped together', type=str, default='ratio', choices=['none', 'random', 'ratio'])

    return parser.parse_args(args)


def main(args=None):
    # parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)

    # make sure tensorflow is the minimum required version
    check_tf_version()

    # optionally choose specific GPU
    if args.gpu:
        setup_gpu(args.gpu)

    # make save path if it doesn't exist
    if args.save_path is not None and not os.path.exists(args.save_path):
        os.makedirs(args.save_path)

    # optionally load config parameters
    if args.config:
        args.config = read_config_file(args.config)

    # create the generator
    backbone = models.backbone(args.backbone)
    generator = create_generator(args, backbone.preprocess_image)

    # optionally load anchor parameters
    anchor_params = None
    pyramid_levels = None
    if args.config and 'anchor_parameters' in args.config:
        anchor_params = parse_anchor_parameters(args.config)
    if args.config and 'pyramid_levels' in args.config:
        pyramid_levels = parse_pyramid_levels(args.config)

    # load the model
    print('Loading model, this may take a second...')
    model = models.load_model(args.model, backbone_name=args.backbone)
    generator.compute_shapes = make_shapes_callback(model)

    # optionally convert the model
    if args.convert_model:
        model = models.convert_model(model, anchor_params=anchor_params, pyramid_levels=pyramid_levels)

    # print model summary
    # print(model.summary())

    # start evaluation
    if args.dataset_type == 'coco':
        from ..utils.coco_eval import evaluate_coco
        evaluate_coco(generator, model, args.score_threshold)
    else:
        average_precisions, inference_time = evaluate(
            generator,
            model,
            iou_threshold=args.iou_threshold,
            score_threshold=args.score_threshold,
            max_detections=args.max_detections,
            save_path=args.save_path
        )

        # print evaluation
        total_instances = []
        precisions = []
        for label, (average_precision, num_annotations) in average_precisions.items():
            print('{:.0f} instances of class'.format(num_annotations),
                  generator.label_to_name(label), 'with average precision: {:.4f}'.format(average_precision))
            total_instances.append(num_annotations)
            precisions.append(average_precision)

        if sum(total_instances) == 0:
            print('No test instances found.')
            return

        print('Inference time for {:.0f} images: {:.4f}'.format(generator.size(), inference_time))

        print('mAP using the weighted average of precisions among classes: {:.4f}'.format(sum([a * b for a, b in zip(total_instances, precisions)]) / sum(total_instances)))
        print('mAP: {:.4f}'.format(sum(precisions) / sum(x > 0 for x in total_instances)))


if __name__ == '__main__':
    main()


================================================
FILE: imageai_tf_deprecated/Detection/keras_retinanet/bin/train.py
================================================
#!/usr/bin/env python

"""
Copyright 2017-2018 Fizyr (https://fizyr.com)

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

import argparse
import os
import sys
import warnings

from tensorflow import keras
import tensorflow as tf

# Allow relative imports when being executed as script.
if __name__ == "__main__" and __package__ is None:
    sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..'))
    import keras_retinanet.bin  # noqa: F401
    __package__ = "keras_retinanet.bin"

# Change these to absolute imports if you copy this script outside the keras_retinanet package.
from .. import layers  # noqa: F401
from .. import losses
from .. import models
from ..callbacks import RedirectModel
from ..callbacks.eval import Evaluate
from ..models.retinanet import retinanet_bbox
from ..preprocessing.csv_generator import CSVGenerator
from ..preprocessing.kitti import KittiGenerator
from ..preprocessing.open_images import OpenImagesGenerator
from ..preprocessing.pascal_voc import PascalVocGenerator
from ..utils.anchors import make_shapes_callback
from ..utils.config import read_config_file, parse_anchor_parameters, parse_pyramid_levels
from ..utils.gpu import setup_gpu
from ..utils.image import random_visual_effect_generator
from ..utils.model import freeze as freeze_model
from ..utils.tf_version import check_tf_version
from ..utils.transform import random_transform_generator


def makedirs(path):
    # Intended behavior: try to create the directory,
    # pass if the directory exists already, fails otherwise.
    # Meant for Python 2.7/3.n compatibility.
    try:
        os.makedirs(path)
    except OSError:
        if not os.path.isdir(path):
            raise


def model_with_weights(model, weights, skip_mismatch):
    """ Load weights for model.

    Args
        model         : The model to load weights for.
        weights       : The weights to load.
        skip_mismatch : If True, skips layers whose shape of weights doesn't match with the model.
    """
    if weights is not None:
        model.load_weights(weights, by_name=True, skip_mismatch=skip_mismatch)
    return model


def create_models(backbone_retinanet, num_classes, weights, multi_gpu=0,
                  freeze_backbone=False, lr=1e-5, optimizer_clipnorm=0.001, config=None):
    """ Creates three models (model, training_model, prediction_model).

    Args
        backbone_retinanet : A function to call to create a retinanet model with a given backbone.
        num_classes        : The number of classes to train.
        weights            : The weights to load into the model.
        multi_gpu          : The number of GPUs to use for training.
        freeze_backbone    : If True, disables learning for the backbone.
        config             : Config parameters, None indicates the default configuration.

    Returns
        model            : The base model. This is also the model that is saved in snapshots.
        training_model   : The training model. If multi_gpu=0, this is identical to model.
        prediction_model : The model wrapped with utility functions to perform object detection (applies regression values and performs NMS).
    """

    modifier = freeze_model if freeze_backbone else None

    # load anchor parameters, or pass None (so that defaults will be used)
    anchor_params = None
    num_anchors   = None
    pyramid_levels = None
    if config and 'anchor_parameters' in config:
        anchor_params = parse_anchor_parameters(config)
        num_anchors   = anchor_params.num_anchors()
    if config and 'pyramid_levels' in config:
        pyramid_levels = parse_pyramid_levels(config)

    # Keras recommends initialising a multi-gpu model on the CPU to ease weight sharing, and to prevent OOM errors.
    # optionally wrap in a parallel model
    if multi_gpu > 1:
        from keras.utils import multi_gpu_model
        with tf.device('/cpu:0'):
            model = model_with_weights(backbone_retinanet(num_classes, num_anchors=num_anchors, modifier=modifier, pyramid_levels=pyramid_levels), weights=weights, skip_mismatch=True)
        training_model = multi_gpu_model(model, gpus=multi_gpu)
    else:
        model          = model_with_weights(backbone_retinanet(num_classes, num_anchors=num_anchors, modifier=modifier, pyramid_levels=pyramid_levels), weights=weights, skip_mismatch=True)
        training_model = model

    # make prediction model
    prediction_model = retinanet_bbox(model=model, anchor_params=anchor_params, pyramid_levels=pyramid_levels)

    # compile model
    training_model.compile(
        loss={
            'regression'    : losses.smooth_l1(),
            'classification': losses.focal()
        },
        optimizer=keras.optimizers.Adam(lr=lr, clipnorm=optimizer_clipnorm)
    )

    return model, training_model, prediction_model


def create_callbacks(model, training_model, prediction_model, validation_generator, args):
    """ Creates the callbacks to use during training.

    Args
        model: The base model.
        training_model: The model that is used for training.
        prediction_model: The model that should be used for validation.
        validation_generator: The generator for creating validation data.
        args: parseargs args object.

    Returns:
        A list of callbacks used for training.
    """
    callbacks = []

    tensorboard_callback = None

    if args.tensorboard_dir:
        makedirs(args.tensorboard_dir)
        update_freq = args.tensorboard_freq
        if update_freq not in ['epoch', 'batch']:
            update_freq = int(update_freq)
        tensorboard_callback = keras.callbacks.TensorBoard(
            log_dir                = args.tensorboard_dir,
            histogram_freq         = 0,
            batch_size             = args.batch_size,
            write_graph            = True,
            write_grads            = False,
            write_images           = False,
            update_freq            = update_freq,
            embeddings_freq        = 0,
            embeddings_layer_names = None,
            embeddings_metadata    = None
        )

    if args.evaluation and validation_generator:
        if args.dataset_type == 'coco':
            from ..callbacks.coco import CocoEval

            # use prediction model for evaluation
            evaluation = CocoEval(validation_generator, tensorboard=tensorboard_callback)
        else:
            evaluation = Evaluate(validation_generator, tensorboard=tensorboard_callback, weighted_average=args.weighted_average)
        evaluation = RedirectModel(evaluation, prediction_model)
        callbacks.append(evaluation)

    # save the model
    if args.snapshots:
        # ensure directory created first; otherwise h5py will error after epoch.
        makedirs(args.snapshot_path)
        checkpoint = keras.callbacks.ModelCheckpoint(
            os.path.join(
                args.snapshot_path,
                '{backbone}_{dataset_type}_{{epoch:02d}}.h5'.format(backbone=args.backbone, dataset_type=args.dataset_type)
            ),
            verbose=1,
            # save_best_only=True,
            # monitor="mAP",
            # mode='max'
        )
        checkpoint = RedirectModel(checkpoint, model)
        callbacks.append(checkpoint)

    callbacks.append(keras.callbacks.ReduceLROnPlateau(
        monitor    = 'loss',
        factor     = args.reduce_lr_factor,
        patience   = args.reduce_lr_patience,
        verbose    = 1,
        mode       = 'auto',
        min_delta  = 0.0001,
        cooldown   = 0,
        min_lr     = 0
    ))

    if args.evaluation and validation_generator:
        callbacks.append(keras.callbacks.EarlyStopping(
            monitor    = 'mAP',
            patience   = 5,
            mode       = 'max',
            min_delta  = 0.01
        ))

    if args.tensorboard_dir:
        callbacks.append(tensorboard_callback)

    return callbacks


def create_generators(args, preprocess_image):
    """ Create generators for training and validation.

    Args
        args             : parseargs object containing configuration for generators.
        preprocess_image : Function that preprocesses an image for the network.
    """
    common_args = {
        'batch_size'       : args.batch_size,
        'config'           : args.config,
        'image_min_side'   : args.image_min_side,
        'image_max_side'   : args.image_max_side,
        'no_resize'        : args.no_resize,
        'preprocess_image' : preprocess_image,
        'group_method'     : args.group_method
    }

    # create random transform generator for augmenting training data
    if args.random_transform:
        transform_generator = random_transform_generator(
            min_rotation=-0.1,
            max_rotation=0.1,
            min_translation=(-0.1, -0.1),
            max_translation=(0.1, 0.1),
            min_shear=-0.1,
            max_shear=0.1,
            min_scaling=(0.9, 0.9),
            max_scaling=(1.1, 1.1),
            flip_x_chance=0.5,
            flip_y_chance=0.5,
        )
        visual_effect_generator = random_visual_effect_generator(
            contrast_range=(0.9, 1.1),
            brightness_range=(-.1, .1),
            hue_range=(-0.05, 0.05),
            saturation_range=(0.95, 1.05)
        )
    else:
        transform_generator = random_transform_generator(flip_x_chance=0.5)
        visual_effect_generator = None

    if args.dataset_type == 'coco':
        # import here to prevent unnecessary dependency on cocoapi
        from ..preprocessing.coco import CocoGenerator

        train_generator = CocoGenerator(
            args.coco_path,
            'train2017',
            transform_generator=transform_generator,
            visual_effect_generator=visual_effect_generator,
            **common_args
        )

        validation_generator = CocoGenerator(
            args.coco_path,
            'val2017',
            shuffle_groups=False,
            **common_args
        )
    elif args.dataset_type == 'pascal':
        train_generator = PascalVocGenerator(
            args.pascal_path,
            'train',
            image_extension=args.image_extension,
            transform_generator=transform_generator,
            visual_effect_generator=visual_effect_generator,
            **common_args
        )

        validation_generator = PascalVocGenerator(
            args.pascal_path,
            'val',
            image_extension=args.image_extension,
            shuffle_groups=False,
            **common_args
        )
    elif args.dataset_type == 'csv':
        train_generator = CSVGenerator(
            args.annotations,
            args.classes,
            transform_generator=transform_generator,
            visual_effect_generator=visual_effect_generator,
            **common_args
        )

        if args.val_annotations:
            validation_generator = CSVGenerator(
                args.val_annotations,
                args.classes,
                shuffle_groups=False,
                **common_args
            )
        else:
            validation_generator = None
    elif args.dataset_type == 'oid':
        train_generator = OpenImagesGenerator(
            args.main_dir,
            subset='train',
            version=args.version,
            labels_filter=args.labels_filter,
            annotation_cache_dir=args.annotation_cache_dir,
            parent_label=args.parent_label,
            transform_generator=transform_generator,
            visual_effect_generator=visual_effect_generator,
            **common_args
        )

        validation_generator = OpenImagesGenerator(
            args.main_dir,
            subset='validation',
            version=args.version,
            labels_filter=args.labels_filter,
            annotation_cache_dir=args.annotation_cache_dir,
            parent_label=args.parent_label,
            shuffle_groups=False,
            **common_args
        )
    elif args.dataset_type == 'kitti':
        train_generator = KittiGenerator(
            args.kitti_path,
            subset='train',
            transform_generator=transform_generator,
            visual_effect_generator=visual_effect_generator,
            **common_args
        )

        validation_generator = KittiGenerator(
            args.kitti_path,
            subset='val',
            shuffle_groups=False,
            **common_args
        )
    else:
        raise ValueError('Invalid data type received: {}'.format(args.dataset_type))

    return train_generator, validation_generator


def check_args(parsed_args):
    """ Function to check for inherent contradictions within parsed arguments.
    For example, batch_size < num_gpus
    Intended to raise errors prior to backend initialisation.

    Args
        parsed_args: parser.parse_args()

    Returns
        parsed_args
    """

    if parsed_args.multi_gpu > 1 and parsed_args.batch_size < parsed_args.multi_gpu:
        raise ValueError(
            "Batch size ({}) must be equal to or higher than the number of GPUs ({})".format(parsed_args.batch_size,
                                                                                             parsed_args.multi_gpu))

    if parsed_args.multi_gpu > 1 and parsed_args.snapshot:
        raise ValueError(
            "Multi GPU training ({}) and resuming from snapshots ({}) is not supported.".format(parsed_args.multi_gpu,
                                                                                                parsed_args.snapshot))

    if parsed_args.multi_gpu > 1 and not parsed_args.multi_gpu_force:
        raise ValueError("Multi-GPU support is experimental, use at own risk! Run with --multi-gpu-force if you wish to continue.")

    if 'resnet' not in parsed_args.backbone:
        warnings.warn('Using experimental backbone {}. Only resnet50 has been properly tested.'.format(parsed_args.backbone))

    return parsed_args


def parse_args(args):
    """ Parse the arguments.
    """
    parser     = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.')
    subparsers = parser.add_subparsers(help='Arguments for specific dataset types.', dest='dataset_type')
    subparsers.required = True

    coco_parser = subparsers.add_parser('coco')
    coco_parser.add_argument('coco_path', help='Path to dataset directory (ie. /tmp/COCO).')

    pascal_parser = subparsers.add_parser('pascal')
    pascal_parser.add_argument('pascal_path', help='Path to dataset directory (ie. /tmp/VOCdevkit).')
    pascal_parser.add_argument('--image-extension',   help='Declares the dataset images\' extension.', default='.jpg')

    kitti_parser = subparsers.add_parser('kitti')
    kitti_parser.add_argument('kitti_path', help='Path to dataset directory (ie. /tmp/kitti).')

    def csv_list(string):
        return string.split(',')

    oid_parser = subparsers.add_parser('oid')
    oid_parser.add_argument('main_dir', help='Path to dataset directory.')
    oid_parser.add_argument('--version',  help='The current dataset version is v4.', default='v4')
    oid_parser.add_argument('--labels-filter',  help='A list of labels to filter.', type=csv_list, default=None)
    oid_parser.add_argument('--annotation-cache-dir', help='Path to store annotation cache.', default='.')
    oid_parser.add_argument('--parent-label', help='Use the hierarchy children of this label.', default=None)

    csv_parser = subparsers.add_parser('csv')
    csv_parser.add_argument('annotations', help='Path to CSV file containing annotations for training.')
    csv_parser.add_argument('classes', help='Path to a CSV file containing class label mapping.')
    csv_parser.add_argument('--val-annotations', help='Path to CSV file containing annotations for validation (optional).')

    group = parser.add_mutually_exclusive_group()
    group.add_argument('--snapshot',          help='Resume training from a snapshot.')
    group.add_argument('--imagenet-weights',  help='Initialize the model with pretrained imagenet weights. This is the default behaviour.', action='store_const', const=True, default=True)
    group.add_argument('--weights',           help='Initialize the model with weights from a file.')
    group.add_argument('--no-weights',        help='Don\'t initialize the model with any weights.', dest='imagenet_weights', action='store_const', const=False)
    parser.add_argument('--backbone',         help='Backbone model used by retinanet.', default='resnet50', type=str)
    parser.add_argument('--batch-size',       help='Size of the batches.', default=1, type=int)
    parser.add_argument('--gpu',              help='Id of the GPU to use (as reported by nvidia-smi).')
    parser.add_argument('--multi-gpu',        help='Number of GPUs to use for parallel processing.', type=int, default=0)
    parser.add_argument('--multi-gpu-force',  help='Extra flag needed to enable (experimental) multi-gpu support.', action='store_true')
    parser.add_argument('--initial-epoch',    help='Epoch from which to begin the train, useful if resuming from snapshot.', type=int, default=0)
    parser.add_argument('--epochs',           help='Number of epochs to train.', type=int, default=50)
    parser.add_argument('--steps',            help='Number of steps per epoch.', type=int, default=10000)
    parser.add_argument('--lr',               help='Learning rate.', type=float, default=1e-5)
    parser.add_argument('--optimizer-clipnorm', help='Clipnorm parameter for  optimizer.', type=float, default=0.001)
    parser.add_argument('--snapshot-path',    help='Path to store snapshots of models during training (defaults to \'./snapshots\')', default='./snapshots')
    parser.add_argument('--tensorboard-dir',  help='Log directory for Tensorboard output', default='')  # default='./logs') => https://github.com/tensorflow/tensorflow/pull/34870
    parser.add_argument('--tensorboard-freq', help='Update frequency for Tensorboard output. Values \'epoch\', \'batch\' or int', default='epoch')
    parser.add_argument('--no-snapshots',     help='Disable saving snapshots.', dest='snapshots', action='store_false')
    parser.add_argument('--no-evaluation',    help='Disable per epoch evaluation.', dest='evaluation', action='store_false')
    parser.add_argument('--freeze-backbone',  help='Freeze training of backbone layers.', action='store_true')
    parser.add_argument('--random-transform', help='Randomly transform image and annotations.', action='store_true')
    parser.add_argument('--image-min-side',   help='Rescale the image so the smallest side is min_side.', type=int, default=800)
    parser.add_argument('--image-max-side',   help='Rescale the image if the largest side is larger than max_side.', type=int, default=1333)
    parser.add_argument('--no-resize',        help='Don''t rescale the image.', action='store_true')
    parser.add_argument('--config',           help='Path to a configuration parameters .ini file.')
    parser.add_argument('--weighted-average', help='Compute the mAP using the weighted average of precisions among classes.', action='store_true')
    parser.add_argument('--compute-val-loss', help='Compute validation loss during training', dest='compute_val_loss', action='store_true')
    parser.add_argument('--reduce-lr-patience', help='Reduce learning rate after validation loss decreases over reduce_lr_patience epochs', type=int, default=2)
    parser.add_argument('--reduce-lr-factor', help='When learning rate is reduced due to reduce_lr_patience, multiply by reduce_lr_factor', type=float, default=0.1)
    parser.add_argument('--group-method',     help='Determines how images are grouped together', type=str, default='ratio', choices=['none', 'random', 'ratio'])

    # Fit generator arguments
    parser.add_argument('--multiprocessing',  help='Use multiprocessing in fit_generator.', action='store_true')
    parser.add_argument('--workers',          help='Number of generator workers.', type=int, default=1)
    parser.add_argument('--max-queue-size',   help='Queue length for multiprocessing workers in fit_generator.', type=int, default=10)

    return check_args(parser.parse_args(args))


def main(args=None):
    # parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)

    # create object that stores backbone information
    backbone = models.backbone(args.backbone)

    # make sure tensorflow is the minimum required version
    check_tf_version()

    # optionally choose specific GPU
    if args.gpu is not None:
        setup_gpu(args.gpu)

    # optionally load config parameters
    if args.config:
        args.config = read_config_file(args.config)

    # create the generators
    train_generator, validation_generator = create_generators(args, backbone.preprocess_image)

    # create the model
    if args.snapshot is not None:
        print('Loading model, this may take a second...')
        model            = models.load_model(args.snapshot, backbone_name=args.backbone)
        training_model   = model
        anchor_params    = None
        pyramid_levels   = None
        if args.config and 'anchor_parameters' in args.config:
            anchor_params = parse_anchor_parameters(args.config)
        if args.config and 'pyramid_levels' in args.config:
            pyramid_levels = parse_pyramid_levels(args.config)

        prediction_model = retinanet_bbox(model=model, anchor_params=anchor_params, pyramid_levels=pyramid_levels)
    else:
        weights = args.weights
        # default to imagenet if nothing else is specified
        if weights is None and args.imagenet_weights:
            weights = backbone.download_imagenet()

        print('Creating model, this may take a second...')
        model, training_model, prediction_model = create_models(
            backbone_retinanet=backbone.retinanet,
            num_classes=train_generator.num_classes(),
            weights=weights,
            multi_gpu=args.multi_gpu,
            freeze_backbone=args.freeze_backbone,
            lr=args.lr,
            optimizer_clipnorm=args.optimizer_clipnorm,
            config=args.config
        )

    # print model summary
    print(model.summary())

    # this lets the generator compute backbone layer shapes using the actual backbone model
    if 'vgg' in args.backbone or 'densenet' in args.backbone:
        train_generator.compute_shapes = make_shapes_callback(model)
        if validation_generator:
            validation_generator.compute_shapes = train_generator.compute_shapes

    # create the callbacks
    callbacks = create_callbacks(
        model,
        training_model,
        prediction_model,
        validation_generator,
        args,
    )

    if not args.compute_val_loss:
        validation_generator = None

    # start training
    return training_model.fit_generator(
        generator=train_generator,
        steps_per_epoch=args.steps,
        epochs=args.epochs,
        verbose=1,
        callbacks=callbacks,
        workers=args.workers,
        use_multiprocessing=args.multiprocessing,
        max_queue_size=args.max_queue_size,
        validation_data=validation_generator,
        initial_epoch=args.initial_epoch
    )


if __name__ == '__main__':
    main()


================================================
FILE: imageai_tf_deprecated/Detection/keras_retinanet/callbacks/__init__.py
================================================
from .common import *  # noqa: F401,F403


================================================
FILE: imageai_tf_deprecated/Detection/keras_retinanet/callbacks/coco.py
================================================
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

from tensorflow import keras
from ..utils.coco_eval import evaluate_coco


class CocoEval(keras.callbacks.Callback):
    """ Performs COCO evaluation on each epoch.
    """
    def __init__(self, generator, tensorboard=None, threshold=0.05):
        """ CocoEval callback intializer.

        Args
            generator   : The generator used for creating validation data.
            tensorboard : If given, the results will be written to tensorboard.
            threshold   : The score threshold to use.
        """
        self.generator = generator
        self.threshold = threshold
        self.tensorboard = tensorboard

        super(CocoEval, self).__init__()

    def on_epoch_end(self, epoch, logs=None):
        logs = logs or {}

        coco_tag = ['AP @[ IoU=0.50:0.95 | area=   all | maxDets=100 ]',
                    'AP @[ IoU=0.50      | area=   all | maxDets=100 ]',
                    'AP @[ IoU=0.75      | area=   all | maxDets=100 ]',
                    'AP @[ IoU=0.50:0.95 | area= small | maxDets=100 ]',
                    'AP @[ IoU=0.50:0.95 | area=medium | maxDets=100 ]',
                    'AP @[ IoU=0.50:0.95 | area= large | maxDets=100 ]',
                    'AR @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ]',
                    'AR @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ]',
                    'AR @[ IoU=0.50:0.95 | area=   all | maxDets=100 ]',
                    'AR @[ IoU=0.50:0.95 | area= small | maxDets=100 ]',
                    'AR @[ IoU=0.50:0.95 | area=medium | maxDets=100 ]',
                    'AR @[ IoU=0.50:0.95 | area= large | maxDets=100 ]']
        coco_eval_stats = evaluate_coco(self.generator, self.model, self.threshold)

        if coco_eval_stats is not None:
            for index, result in enumerate(coco_eval_stats):
                logs[coco_tag[index]] = result

            if self.tensorboard:
                import tensorflow as tf
                writer = tf.summary.create_file_writer(self.tensorboard.log_dir)
                with writer.as_default():
                    for index, result in enumerate(coco_eval_stats):
                        tf.summary.scalar('{}. {}'.format(index + 1, coco_tag[index]), result, step=epoch)
                    writer.flush()


================================================
FILE: imageai_tf_deprecated/Detection/keras_retinanet/callbacks/common.py
================================================
from tensorflow import keras


class RedirectModel(keras.callbacks.Callback):
    """Callback which wraps another callback, but executed on a different model.

    ```python
    model = keras.models.load_model('model.h5')
    model_checkpoint = ModelCheckpoint(filepath='snapshot.h5')
    parallel_model = multi_gpu_model(model, gpus=2)
    parallel_model.fit(X_train, Y_train, callbacks=[RedirectModel(model_checkpoint, model)])
    ```

    Args
        callback : callback to wrap.
        model    : model to use when executing callbacks.
    """

    def __init__(self,
                 callback,
                 model):
        super(RedirectModel, self).__init__()

        self.callback = callback
        self.redirect_model = model

    def on_epoch_begin(self, epoch, logs=None):
        self.callback.on_epoch_begin(epoch, logs=logs)

    def on_epoch_end(self, epoch, logs=None):
        self.callback.on_epoch_end(epoch, logs=logs)

    def on_batch_begin(self, batch, logs=None):
        self.callback.on_batch_begin(batch, logs=logs)

    def on_batch_end(self, batch, logs=None):
        self.callback.on_batch_end(batch, logs=logs)

    def on_train_begin(self, logs=None):
        # overwrite the model with our custom model
        self.callback.set_model(self.redirect_model)

        self.callback.on_train_begin(logs=logs)

    def on_train_end(self, logs=None):
        self.callback.on_train_end(logs=logs)


================================================
FILE: imageai_tf_deprecated/Detection/keras_retinanet/callbacks/eval.py
================================================
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

from tensorflow import keras
from ..utils.eval import evaluate


class Evaluate(keras.callbacks.Callback):
    """ Evaluation callback for arbitrary datasets.
    """

    def __init__(
        self,
        generator,
        iou_threshold=0.5,
        score_threshold=0.05,
        max_detections=100,
        save_path=None,
        tensorboard=None,
        weighted_average=False,
        verbose=1
    ):
        """ Evaluate a given dataset using a given model at the end of every epoch during training.

        # Arguments
            generator        : The generator that represents the dataset to evaluate.
            iou_threshold    : The threshold used to consider when a detection is positive or negative.
            score_threshold  : The score confidence threshold to use for detections.
            max_detections   : The maximum number of detections to use per image.
            save_path        : The path to save images with visualized detections to.
            tensorboard      : Instance of keras.callbacks.TensorBoard used to log the mAP value.
            weighted_average : Compute the mAP using the weighted average of precisions among classes.
            verbose          : Set the verbosity level, by default this is set to 1.
        """
        self.generator       = generator
        self.iou_threshold   = iou_threshold
        self.score_threshold = score_threshold
        self.max_detections  = max_detections
        self.save_path       = save_path
        self.tensorboard     = tensorboard
        self.weighted_average = weighted_average
        self.verbose         = verbose

        super(Evaluate, self).__init__()

    def on_epoch_end(self, epoch, logs=None):
        logs = logs or {}

        # run evaluation
        average_precisions, _ = evaluate(
            self.generator,
            self.model,
            iou_threshold=self.iou_threshold,
            score_threshold=self.score_threshold,
            max_detections=self.max_detections,
            save_path=self.save_path
        )

        # compute per class average precision
        total_instances = []
        precisions = []
        for label, (average_precision, num_annotations) in average_precisions.items():
            if self.verbose == 1:
                print('{:.0f} instances of class'.format(num_annotations),
                      self.generator.label_to_name(label), 'with average precision: {:.4f}'.format(average_precision))
            total_instances.append(num_annotations)
            precisions.append(average_precision)
        if self.weighted_average:
            self.mean_ap = sum([a * b for a, b in zip(total_instances, precisions)]) / sum(total_instances)
        else:
            self.mean_ap = sum(precisions) / sum(x > 0 for x in total_instances)

        if self.tensorboard:
            import tensorflow as tf
            writer = tf.summary.create_file_writer(self.tensorboard.log_dir)
            with writer.as_default():
                tf.summary.scalar("mAP", self.mean_ap, step=epoch)
                if self.verbose == 1:
                    for label, (average_precision, num_annotations) in average_precisions.items():
                        tf.summary.scalar("AP_" + self.generator.label_to_name(label), average_precision, step=epoch)
                writer.flush()

        logs['mAP'] = self.mean_ap

        if self.verbose == 1:
            print('mAP: {:.4f}'.format(self.mean_ap))


================================================
FILE: imageai_tf_deprecated/Detection/keras_retinanet/initializers.py
================================================
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

from tensorflow import keras

import math


class PriorProbability(keras.initializers.Initializer):
    """ Apply a prior probability to the weights.
    """

    def __init__(self, probability=0.01):
        self.probability = probability

    def get_config(self):
        return {
            'probability': self.probability
        }

    def __call__(self, shape, dtype=None):
        # set bias to -log((1 - p)/p) for foreground
        result = keras.backend.ones(shape, dtype=dtype) * -math.log((1 - self.probability) / self.probability)

        return result


================================================
FILE: imageai_tf_deprecated/Detection/keras_retinanet/layers/__init__.py
================================================
from ._misc import RegressBoxes, UpsampleLike, Anchors, ClipBoxes  # noqa: F401
from .filter_detections import FilterDetections  # noqa: F401


================================================
FILE: imageai_tf_deprecated/Detection/keras_retinanet/layers/_misc.py
================================================
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

import tensorflow
from tensorflow import keras
from .. import backend
from ..utils import anchors as utils_anchors

import numpy as np


class Anchors(keras.layers.Layer):
    """ Keras layer for generating achors for a given shape.
    """

    def __init__(self, size, stride, ratios=None, scales=None, *args, **kwargs):
        """ Initializer for an Anchors layer.

        Args
            size: The base size of the anchors to generate.
            stride: The stride of the anchors to generate.
            ratios: The ratios of the anchors to generate (defaults to AnchorParameters.default.ratios).
            scales: The scales of the anchors to generate (defaults to AnchorParameters.default.scales).
        """
        self.size   = size
        self.stride = stride
        self.ratios = ratios
        self.scales = scales

        if ratios is None:
            self.ratios  = utils_anchors.AnchorParameters.default.ratios
        elif isinstance(ratios, list):
            self.ratios  = np.array(ratios)
        if scales is None:
            self.scales  = utils_anchors.AnchorParameters.default.scales
        elif isinstance(scales, list):
            self.scales  = np.array(scales)

        self.num_anchors = len(self.ratios) * len(self.scales)
        self.anchors = utils_anchors.generate_anchors(
            base_size=self.size,
            ratios=self.ratios,
            scales=self.scales,
        ).astype(np.float32)

        super(Anchors, self).__init__(*args, **kwargs)

    def call(self, inputs, **kwargs):
        features = inputs
        features_shape = keras.backend.shape(features)

        # generate proposals from bbox deltas and shifted anchors
        if keras.backend.image_data_format() == 'channels_first':
            anchors = backend.shift(features_shape[2:4], self.stride, self.anchors)
        else:
            anchors = backend.shift(features_shape[1:3], self.stride, self.anchors)
        anchors = keras.backend.tile(keras.backend.expand_dims(anchors, axis=0), (features_shape[0], 1, 1))

        return anchors

    def compute_output_shape(self, input_shape):
        if None not in input_shape[1:]:
            if keras.backend.image_data_format() == 'channels_first':
                total = np.prod(input_shape[2:4]) * self.num_anchors
            else:
                total = np.prod(input_shape[1:3]) * self.num_anchors

            return (input_shape[0], total, 4)
        else:
            return (input_shape[0], None, 4)

    def get_config(self):
        config = super(Anchors, self).get_config()
        config.update({
            'size'   : self.size,
            'stride' : self.stride,
            'ratios' : self.ratios.tolist(),
            'scales' : self.scales.tolist(),
        })

        return config


class UpsampleLike(keras.layers.Layer):
    """ Keras layer for upsampling a Tensor to be the same shape as another Tensor.
    """

    def call(self, inputs, **kwargs):
        source, target = inputs
        target_shape = keras.backend.shape(target)
        if keras.backend.image_data_format() == 'channels_first':
            source = tensorflow.transpose(source, (0, 2, 3, 1))
            output = backend.resize_images(source, (target_shape[2], target_shape[3]), method='nearest')
            output = tensorflow.transpose(output, (0, 3, 1, 2))
            return output
        else:
            return backend.resize_images(source, (target_shape[1], target_shape[2]), method='nearest')

    def compute_output_shape(self, input_shape):
        if keras.backend.image_data_format() == 'channels_first':
            return (input_shape[0][0], input_shape[0][1]) + input_shape[1][2:4]
        else:
            return (input_shape[0][0],) + input_shape[1][1:3] + (input_shape[0][-1],)


class RegressBoxes(keras.layers.Layer):
    """ Keras layer for applying regression values to boxes.
    """

    def __init__(self, mean=None, std=None, *args, **kwargs):
        """ Initializer for the RegressBoxes layer.

        Args
            mean: The mean value of the regression values which was used for normalization.
            std: The standard value of the regression values which was used for normalization.
        """
        if mean is None:
            mean = np.array([0, 0, 0, 0])
        if std is None:
            std = np.array([0.2, 0.2, 0.2, 0.2])

        if isinstance(mean, (list, tuple)):
            mean = np.array(mean)
        elif not isinstance(mean, np.ndarray):
            raise ValueError('Expected mean to be a np.ndarray, list or tuple. Received: {}'.format(type(mean)))

        if isinstance(std, (list, tuple)):
            std = np.array(std)
        elif not isinstance(std, np.ndarray):
            raise ValueError('Expected std to be a np.ndarray, list or tuple. Received: {}'.format(type(std)))

        self.mean = mean
        self.std  = std
        super(RegressBoxes, self).__init__(*args, **kwargs)

    def call(self, inputs, **kwargs):
        anchors, regression = inputs
        return backend.bbox_transform_inv(anchors, regression, mean=self.mean, std=self.std)

    def compute_output_shape(self, input_shape):
        return input_shape[0]

    def get_config(self):
        config = super(RegressBoxes, self).get_config()
        config.update({
            'mean': self.mean.tolist(),
            'std' : self.std.tolist(),
        })

        return config


class ClipBoxes(keras.layers.Layer):
    """ Keras layer to clip box values to lie inside a given shape.
    """
    def call(self, inputs, **kwargs):
        image, boxes = inputs
        shape = keras.backend.cast(keras.backend.shape(image), keras.backend.floatx())
        if keras.backend.image_data_format() == 'channels_first':
            _, _, height, width = tensorflow.unstack(shape, axis=0)
        else:
            _, height, width, _ = tensorflow.unstack(shape, axis=0)

        x1, y1, x2, y2 = tensorflow.unstack(boxes, axis=-1)
        x1 = tensorflow.clip_by_value(x1, 0, width  - 1)
        y1 = tensorflow.clip_by_value(y1, 0, height - 1)
        x2 = tensorflow.clip_by_value(x2, 0, width  - 1)
        y2 = tensorflow.clip_by_value(y2, 0, height - 1)

        return keras.backend.stack([x1, y1, x2, y2], axis=2)

    def compute_output_shape(self, input_shape):
        return input_shape[1]


================================================
FILE: imageai_tf_deprecated/Detection/keras_retinanet/layers/filter_detections.py
================================================
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

import tensorflow
from tensorflow import keras
from .. import backend


def filter_detections(
    boxes,
    classification,
    other                 = [],
    class_specific_filter = True,
    nms                   = True,
    score_threshold       = 0.05,
    max_detections        = 300,
    nms_threshold         = 0.5
):
    """ Filter detections using the boxes and classification values.

    Args
        boxes                 : Tensor of shape (num_boxes, 4) containing the boxes in (x1, y1, x2, y2) format.
        classification        : Tensor of shape (num_boxes, num_classes) containing the classification scores.
        other                 : List of tensors of shape (num_boxes, ...) to filter along with the boxes and classification scores.
        class_specific_filter : Whether to perform filtering per class, or take the best scoring class and filter those.
        nms                   : Flag to enable/disable non maximum suppression.
        score_threshold       : Threshold used to prefilter the boxes with.
        max_detections        : Maximum number of detections to keep.
        nms_threshold         : Threshold for the IoU value to determine when a box should be suppressed.

    Returns
        A list of [boxes, scores, labels, other[0], other[1], ...].
        boxes is shaped (max_detections, 4) and contains the (x1, y1, x2, y2) of the non-suppressed boxes.
        scores is shaped (max_detections,) and contains the scores of the predicted class.
        labels is shaped (max_detections,) and contains the predicted label.
        other[i] is shaped (max_detections, ...) and contains the filtered other[i] data.
        In case there are less than max_detections detections, the tensors are padded with -1's.
    """
    def _filter_detections(scores, labels):
        # threshold based on score
        indices = tensorflow.where(keras.backend.greater(scores, score_threshold))

        if nms:
            filtered_boxes  = tensorflow.gather_nd(boxes, indices)
            filtered_scores = keras.backend.gather(scores, indices)[:, 0]

            # perform NMS
            nms_indices = tensorflow.image.non_max_suppression(filtered_boxes, filtered_scores, max_output_size=max_detections, iou_threshold=nms_threshold)

            # filter indices based on NMS
            indices = keras.backend.gather(indices, nms_indices)

        # add indices to list of all indices
        labels = tensorflow.gather_nd(labels, indices)
        indices = keras.backend.stack([indices[:, 0], labels], axis=1)

        return indices

    if class_specific_filter:
        all_indices = []
        # perform per class filtering
        for c in range(int(classification.shape[1])):
            scores = classification[:, c]
            labels = c * tensorflow.ones((keras.backend.shape(scores)[0],), dtype='int64')
            all_indices.append(_filter_detections(scores, labels))

        # concatenate indices to single tensor
        indices = keras.backend.concatenate(all_indices, axis=0)
    else:
        scores  = keras.backend.max(classification, axis    = 1)
        labels  = keras.backend.argmax(classification, axis = 1)
        indices = _filter_detections(scores, labels)

    # select top k
    scores              = tensorflow.gather_nd(classification, indices)
    labels              = indices[:, 1]
    scores, top_indices = tensorflow.nn.top_k(scores, k=keras.backend.minimum(max_detections, keras.backend.shape(scores)[0]))

    # filter input using the final set of indices
    indices             = keras.backend.gather(indices[:, 0], top_indices)
    boxes               = keras.backend.gather(boxes, indices)
    labels              = keras.backend.gather(labels, top_indices)
    other_              = [keras.backend.gather(o, indices) for o in other]

    # zero pad the outputs
    pad_size = keras.backend.maximum(0, max_detections - keras.backend.shape(scores)[0])
    boxes    = tensorflow.pad(boxes, [[0, pad_size], [0, 0]], constant_values=-1)
    scores   = tensorflow.pad(scores, [[0, pad_size]], constant_values=-1)
    labels   = tensorflow.pad(labels, [[0, pad_size]], constant_values=-1)
    labels   = keras.backend.cast(labels, 'int32')
    other_   = [tensorflow.pad(o, [[0, pad_size]] + [[0, 0] for _ in range(1, len(o.shape))], constant_values=-1) for o in other_]

    # set shapes, since we know what they are
    boxes.set_shape([max_detections, 4])
    scores.set_shape([max_detections])
    labels.set_shape([max_detections])
    for o, s in zip(other_, [list(keras.backend.int_shape(o)) for o in other]):
        o.set_shape([max_detections] + s[1:])

    return [boxes, scores, labels] + other_


class FilterDetections(keras.layers.Layer):
    """ Keras layer for filtering detections using score threshold and NMS.
    """

    def __init__(
        self,
        nms                   = True,
        class_specific_filter = True,
        nms_threshold         = 0.5,
        score_threshold       = 0.05,
        max_detections        = 300,
        parallel_iterations   = 32,
        **kwargs
    ):
        """ Filters detections using score threshold, NMS and selecting the top-k detections.

        Args
            nms                   : Flag to enable/disable NMS.
            class_specific_filter : Whether to perform filtering per class, or take the best scoring class and filter those.
            nms_threshold         : Threshold for the IoU value to determine when a box should be suppressed.
            score_threshold       : Threshold used to prefilter the boxes with.
            max_detections        : Maximum number of detections to keep.
            parallel_iterations   : Number of batch items to process in parallel.
        """
        self.nms                   = nms
        self.class_specific_filter = class_specific_filter
        self.nms_threshold         = nms_threshold
        self.score_threshold       = score_threshold
        self.max_detections        = max_detections
        self.parallel_iterations   = parallel_iterations
        super(FilterDetections, self).__init__(**kwargs)

    def call(self, inputs, **kwargs):
        """ Constructs the NMS graph.

        Args
            inputs : List of [boxes, classification, other[0], other[1], ...] tensors.
        """
        boxes          = inputs[0]
        classification = inputs[1]
        other          = inputs[2:]

        # wrap nms with our parameters
        def _filter_detections(args):
            boxes          = args[0]
            classification = args[1]
            other          = args[2]

            return filter_detections(
                boxes,
                classification,
                other,
                nms                   = self.nms,
                class_specific_filter = self.class_specific_filter,
                score_threshold       = self.score_threshold,
                max_detections        = self.max_detections,
                nms_threshold         = self.nms_threshold,
            )

        # call filter_detections on each batch
        dtypes = [keras.backend.floatx(), keras.backend.floatx(), 'int32'] + [o.dtype for o in other]
        shapes = [(self.max_detections, 4), (self.max_detections,), (self.max_detections,)]
        shapes.extend([(self.max_detections,) + o.shape[2:] for o in other])
        outputs = backend.map_fn(
            _filter_detections,
            elems=[boxes, classification, other],
            dtype=dtypes,
            shapes=shapes,
            parallel_iterations=self.parallel_iterations,
        )

        return outputs

    def compute_output_shape(self, input_shape):
        """ Computes the output shapes given the input shapes.

        Args
            input_shape : List of input shapes [boxes, classification, other[0], other[1], ...].

        Returns
            List of tuples representing the output shapes:
            [filtered_boxes.shape, filtered_scores.shape, filtered_labels.shape, filtered_other[0].shape, filtered_other[1].shape, ...]
        """
        return [
            (input_shape[0][0], self.max_detections, 4),
            (input_shape[1][0], self.max_detections),
            (input_shape[1][0], self.max_detections),
        ] + [
            tuple([input_shape[i][0], self.max_detections] + list(input_shape[i][2:])) for i in range(2, len(input_shape))
        ]

    def compute_mask(self, inputs, mask=None):
        """ This is required in Keras when there is more than 1 output.
        """
        return (len(inputs) + 1) * [None]

    def get_config(self):
        """ Gets the configuration of this layer.

        Returns
            Dictionary containing the parameters of this layer.
        """
        config = super(FilterDetections, self).get_config()
        config.update({
            'nms'                   : self.nms,
            'class_specific_filter' : self.class_specific_filter,
            'nms_threshold'         : self.nms_threshold,
            'score_threshold'       : self.score_threshold,
            'max_detections'        : self.max_detections,
            'parallel_iterations'   : self.parallel_iterations,
        })

        return config


================================================
FILE: imageai_tf_deprecated/Detection/keras_retinanet/losses.py
================================================
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

import tensorflow
from tensorflow import keras


def focal(alpha=0.25, gamma=2.0, cutoff=0.5):
    """ Create a functor for computing the focal loss.

    Args
        alpha: Scale the focal weight with alpha.
        gamma: Take the power of the focal weight with gamma.
        cutoff: Positive prediction cutoff for soft targets

    Returns
        A functor that computes the focal loss using the alpha and gamma.
    """
    def _focal(y_true, y_pred):
        """ Compute the focal loss given the target tensor and the predicted tensor.

        As defined in https://arxiv.org/abs/1708.02002

        Args
            y_true: Tensor of target data from the generator with shape (B, N, num_classes).
            y_pred: Tensor of predicted data from the network with shape (B, N, num_classes).

        Returns
            The focal loss of y_pred w.r.t. y_true.
        """
        labels         = y_true[:, :, :-1]
        anchor_state   = y_true[:, :, -1]  # -1 for ignore, 0 for background, 1 for object
        classification = y_pred

        # filter out "ignore" anchors
        indices        = tensorflow.where(keras.backend.not_equal(anchor_state, -1))
        labels         = tensorflow.gather_nd(labels, indices)
        classification = tensorflow.gather_nd(classification, indices)

        # compute the focal loss
        alpha_factor = keras.backend.ones_like(labels) * alpha
        alpha_factor = tensorflow.where(keras.backend.greater(labels, cutoff), alpha_factor, 1 - alpha_factor)
        focal_weight = tensorflow.where(keras.backend.greater(labels, cutoff), 1 - classification, classification)
        focal_weight = alpha_factor * focal_weight ** gamma

        cls_loss = focal_weight * keras.backend.binary_crossentropy(labels, classification)

        # compute the normalizer: the number of positive anchors
        normalizer = tensorflow.where(keras.backend.equal(anchor_state, 1))
        normalizer = keras.backend.cast(keras.backend.shape(normalizer)[0], keras.backend.floatx())
        normalizer = keras.backend.maximum(keras.backend.cast_to_floatx(1.0), normalizer)

        return keras.backend.sum(cls_loss) / normalizer

    return _focal


def smooth_l1(sigma=3.0):
    """ Create a smooth L1 loss functor.

    Args
        sigma: This argument defines the point where the loss changes from L2 to L1.

    Returns
        A functor for computing the smooth L1 loss given target data and predicted data.
    """
    sigma_squared = sigma ** 2

    def _smooth_l1(y_true, y_pred):
        """ Compute the smooth L1 loss of y_pred w.r.t. y_true.

        Args
            y_true: Tensor from the generator of shape (B, N, 5). The last value for each box is the state of the anchor (ignore, negative, positive).
            y_pred: Tensor from the network of shape (B, N, 4).

        Returns
            The smooth L1 loss of y_pred w.r.t. y_true.
        """
        # separate target and state
        regression        = y_pred
        regression_target = y_true[:, :, :-1]
        anchor_state      = y_true[:, :, -1]

        # filter out "ignore" anchors
        indices           = tensorflow.where(keras.backend.equal(anchor_state, 1))
        regression        = tensorflow.gather_nd(regression, indices)
        regression_target = tensorflow.gather_nd(regression_target, indices)

        # compute smooth L1 loss
        # f(x) = 0.5 * (sigma * x)^2          if |x| < 1 / sigma / sigma
        #        |x| - 0.5 / sigma / sigma    otherwise
        regression_diff = regression - regression_target
        regression_diff = keras.backend.abs(regression_diff)
        regression_loss = tensorflow.where(
            keras.backend.less(regression_diff, 1.0 / sigma_squared),
            0.5 * sigma_squared * keras.backend.pow(regression_diff, 2),
            regression_diff - 0.5 / sigma_squared
        )

        # compute the normalizer: the number of positive anchors
        normalizer = keras.backend.maximum(1, keras.backend.shape(indices)[0])
        normalizer = keras.backend.cast(normalizer, dtype=keras.backend.floatx())
        return keras.backend.sum(regression_loss) / normalizer

    return _smooth_l1


================================================
FILE: imageai_tf_deprecated/Detection/keras_retinanet/models/__init__.py
================================================
from __future__ import print_function
import sys


class Backbone(object):
    """ This class stores additional information on backbones.
    """
    def __init__(self, backbone):
        # a dictionary mapping custom layer names to the correct classes
        from .. import layers
        from .. import losses
        from .. import initializers
        self.custom_objects = {
            'UpsampleLike'     : layers.UpsampleLike,
            'PriorProbability' : initializers.PriorProbability,
            'RegressBoxes'     : layers.RegressBoxes,
            'FilterDetections' : layers.FilterDetections,
            'Anchors'          : layers.Anchors,
            'ClipBoxes'        : layers.ClipBoxes,
            '_smooth_l1'       : losses.smooth_l1(),
            '_focal'           : losses.focal(),
        }

        self.backbone = backbone
        self.validate()

    def retinanet(self, *args, **kwargs):
        """ Returns a retinanet model using the correct backbone.
        """
        raise NotImplementedError('retinanet method not implemented.')

    def download_imagenet(self):
        """ Downloads ImageNet weights and returns path to weights file.
        """
        raise NotImplementedError('download_imagenet method not implemented.')

    def validate(self):
        """ Checks whether the backbone string is correct.
        """
        raise NotImplementedError('validate method not implemented.')

    def preprocess_image(self, inputs):
        """ Takes as input an image and prepares it for being passed through the network.
        Having this function in Backbone allows other backbones to define a specific preprocessing step.
        """
        raise NotImplementedError('preprocess_image method not implemented.')


def backbone(backbone_name):
    """ Returns a backbone object for the given backbone.
    """
    if 'densenet' in backbone_name:
        from .densenet import DenseNetBackbone as b
    elif 'seresnext' in backbone_name or 'seresnet' in backbone_name or 'senet' in backbone_name:
        from .senet import SeBackbone as b
    elif 'resnet' in backbone_name:
        from .resnet import ResNetBackbone as b
    elif 'mobilenet' in backbone_name:
        from .mobilenet import MobileNetBackbone as b
    elif 'vgg' in backbone_name:
        from .vgg import VGGBackbone as b
    elif 'EfficientNet' in backbone_name:
        from .effnet import EfficientNetBackbone as b
    else:
        raise NotImplementedError('Backbone class for  \'{}\' not implemented.'.format(backbone))

    return b(backbone_name)


def load_model(filepath, backbone_name='resnet50'):
    """ Loads a retinanet model using the correct custom objects.

    Args
        filepath: one of the following:
            - string, path to the saved model, or
            - h5py.File object from which to load the model
        backbone_name         : Backbone with which the model was trained.

    Returns
        A keras.models.Model object.

    Raises
        ImportError: if h5py is not available.
        ValueError: In case of an invalid savefile.
    """
    from tensorflow import keras
    return keras.models.load_model(filepath, custom_objects=backbone(backbone_name).custom_objects)


def convert_model(model, nms=True, class_specific_filter=True, anchor_params=None, **kwargs):
    """ Converts a training model to an inference model.

    Args
        model                 : A retinanet training model.
        nms                   : Boolean, whether to add NMS filtering to the converted model.
        class_specific_filter : Whether to use class specific filtering or filter for the best scoring class only.
        anchor_params         : Anchor parameters object. If omitted, default values are used.
        **kwargs              : Inference and minimal retinanet model settings.

    Returns
        A keras.models.Model object.

    Raises
        ImportError: if h5py is not available.
        ValueError: In case of an invalid savefile.
    """
    from .retinanet import retinanet_bbox
    return retinanet_bbox(model=model, nms=nms, class_specific_filter=class_specific_filter, anchor_params=anchor_params, **kwargs)


def assert_training_model(model):
    """ Assert that the model is a training model.
    """
    assert(all(output in model.output_names for output in ['regression', 'classification'])), \
        "Input is not a training model (no 'regression' and 'classification' outputs were found, outputs are: {}).".format(model.output_names)


def check_training_model(model):
    """ Check that model is a training model and exit otherwise.
    """
    try:
        assert_training_model(model)
    except AssertionError as e:
        print(e, file=sys.stderr)
        sys.exit(1)


================================================
FILE: imageai_tf_deprecated/Detection/keras_retinanet/models/densenet.py
================================================
"""
Copyright 2018 vidosits (https://github.com/vidosits/)

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

from tensorflow import keras

from . import retinanet
from . import Backbone
from ..utils.image import preprocess_image


allowed_backbones = {
    'densenet121': ([6, 12, 24, 16], keras.applications.densenet.DenseNet121),
    'densenet169': ([6, 12, 32, 32], keras.applications.densenet.DenseNet169),
    'densenet201': ([6, 12, 48, 32], keras.applications.densenet.DenseNet201),
}


class DenseNetBackbone(Backbone):
    """ Describes backbone information and provides utility functions.
    """

    def retinanet(self, *args, **kwargs):
        """ Returns a retinanet model using the correct backbone.
        """
        return densenet_retinanet(*args, backbone=self.backbone, **kwargs)

    def download_imagenet(self):
        """ Download pre-trained weights for the specified backbone name.
        This name is in the format {backbone}_weights_tf_dim_ordering_tf_kernels_notop
        where backbone is the densenet + number of layers (e.g. densenet121).
        For more info check the explanation from the keras densenet script itself:
            https://github.com/keras-team/keras/blob/master/keras/applications/densenet.py
        """
        origin    = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.8/'
        file_name = '{}_weights_tf_dim_ordering_tf_kernels_notop.h5'

        # load weights
        if keras.backend.image_data_format() == 'channels_first':
            raise ValueError('Weights for "channels_first" format are not available.')

        weights_url = origin + file_name.format(self.backbone)
        return keras.utils.get_file(file_name.format(self.backbone), weights_url, cache_subdir='models')

    def validate(self):
        """ Checks whether the backbone string is correct.
        """
        backbone = self.backbone.split('_')[0]

        if backbone not in allowed_backbones:
            raise ValueError('Backbone (\'{}\') not in allowed backbones ({}).'.format(backbone, allowed_backbones.keys()))

    def preprocess_image(self, inputs):
        """ Takes as input an image and prepares it for being passed through the network.
        """
        return preprocess_image(inputs, mode='tf')


def densenet_retinanet(num_classes, backbone='densenet121', inputs=None, modifier=None, **kwargs):
    """ Constructs a retinanet model using a densenet backbone.

    Args
        num_classes: Number of classes to predict.
        backbone: Which backbone to use (one of ('densenet121', 'densenet169', 'densenet201')).
        inputs: The inputs to the network (defaults to a Tensor of shape (None, None, 3)).
        modifier: A function handler which can modify the backbone before using it in retinanet (this can be used to freeze backbone layers for example).

    Returns
        RetinaNet model with a DenseNet backbone.
    """
    # choose default input
    if inputs is None:
        inputs = keras.layers.Input((None, None, 3))

    blocks, creator = allowed_backbones[backbone]
    model = creator(input_tensor=inputs, include_top=False, pooling=None, weights=None)

    # get last conv layer from the end of each dense block
    layer_outputs = [model.get_layer(name='conv{}_block{}_concat'.format(idx + 2, block_num)).output for idx, block_num in enumerate(blocks)]

    # create the densenet backbone
    # layer_outputs contains 4 layers
    model = keras.models.Model(inputs=inputs, outputs=layer_outputs, name=model.name)

    # invoke modifier if given
    if modifier:
        model = modifier(model)

    # create the full model
    backbone_layers = {
        'C2': model.outputs[0],
        'C3': model.outputs[1],
        'C4': model.outputs[2],
        'C5': model.outputs[3]
    }

    model = retinanet.retinanet(inputs=inputs, num_classes=num_classes, backbone_layers=backbone_layers, **kwargs)

    return model


================================================
FILE: imageai_tf_deprecated/Detection/keras_retinanet/models/effnet.py
================================================
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

from tensorflow import keras

from . import retinanet
from . import Backbone
import efficientnet.keras as efn


class EfficientNetBackbone(Backbone):
    """ Describes backbone information and provides utility functions.
    """

    def __init__(self, backbone):
        super(EfficientNetBackbone, self).__init__(backbone)
        self.preprocess_image_func = None

    def retinanet(self, *args, **kwargs):
        """ Returns a retinanet model using the correct backbone.
        """
        return effnet_retinanet(*args, backbone=self.backbone, **kwargs)

    def download_imagenet(self):
        """ Downloads ImageNet weights and returns path to weights file.
        """
        from efficientnet.weights import IMAGENET_WEIGHTS_PATH
        from efficientnet.weights import IMAGENET_WEIGHTS_HASHES

        model_name = 'efficientnet-b' + self.backbone[-1]
        file_name = model_name + '_weights_tf_dim_ordering_tf_kernels_autoaugment_notop.h5'
        file_hash = IMAGENET_WEIGHTS_HASHES[model_name][1]
        weights_path = keras.utils.get_file(file_name, IMAGENET_WEIGHTS_PATH + file_name, cache_subdir='models', file_hash=file_hash)
        return weights_path

    def validate(self):
        """ Checks whether the backbone string is correct.
        """
        allowed_backbones = ['EfficientNetB0', 'EfficientNetB1', 'EfficientNetB2', 'EfficientNetB3', 'EfficientNetB4',
                             'EfficientNetB5', 'EfficientNetB6', 'EfficientNetB7']
        backbone = self.backbone.split('_')[0]

        if backbone not in allowed_backbones:
            raise ValueError('Backbone (\'{}\') not in allowed backbones ({}).'.format(backbone, allowed_backbones))

    def preprocess_image(self, inputs):
        """ Takes as input an image and prepares it for being passed through the network.
        """
        return efn.preprocess_input(inputs)


def effnet_retinanet(num_classes, backbone='EfficientNetB0', inputs=None, modifier=None, **kwargs):
    """ Constructs a retinanet model using a resnet backbone.

    Args
        num_classes: Number of classes to predict.
        backbone: Which backbone to use (one of ('resnet50', 'resnet101', 'resnet152')).
        inputs: The inputs to the network (defaults to a Tensor of shape (None, None, 3)).
        modifier: A function handler which can modify the backbone before using it in retinanet (this can be used to freeze backbone layers for example).

    Returns
        RetinaNet model with a ResNet backbone.
    """
    # choose default input
    if inputs is None:
        if keras.backend.image_data_format() == 'channels_first':
            inputs = keras.layers.Input(shape=(3, None, None))
        else:
            # inputs = keras.layers.Input(shape=(224, 224, 3))
            inputs = keras.layers.Input(shape=(None, None, 3))

    # get last conv layer from the end of each block [28x28, 14x14, 7x7]
    if backbone == 'EfficientNetB0':
        model = efn.EfficientNetB0(input_tensor=inputs, include_top=False, weights=None)
    elif backbone == 'EfficientNetB1':
        model = efn.EfficientNetB1(input_tensor=inputs, include_top=False, weights=None)
    elif backbone == 'EfficientNetB2':
        model = efn.EfficientNetB2(input_tensor=inputs, include_top=False, weights=None)
    elif backbone == 'EfficientNetB3':
        model = efn.EfficientNetB3(input_tensor=inputs, include_top=False, weights=None)
    elif backbone == 'EfficientNetB4':
        model = efn.EfficientNetB4(input_tensor=inputs, include_top=False, weights=None)
    elif backbone == 'EfficientNetB5':
        model = efn.EfficientNetB5(input_tensor=inputs, include_top=False, weights=None)
    elif backbone == 'EfficientNetB6':
        model = efn.EfficientNetB6(input_tensor=inputs, include_top=False, weights=None)
    elif backbone == 'EfficientNetB7':
        model = efn.EfficientNetB7(input_tensor=inputs, include_top=False, weights=None)
    else:
        raise ValueError('Backbone (\'{}\') is invalid.'.format(backbone))

    layer_outputs = ['block4a_expand_activation', 'block6a_expand_activation', 'top_activation']

    layer_outputs = [
        model.get_layer(name=layer_outputs[0]).output,  # 28x28
        model.get_layer(name=layer_outputs[1]).output,  # 14x14
        model.get_layer(name=layer_outputs[2]).output,  # 7x7
    ]
    # create the densenet backbone
    model = keras.models.Model(inputs=inputs, outputs=layer_outputs, name=model.name)

    # invoke modifier if given
    if modifier:
        model = modifier(model)

    # C2 not provided
    backbone_layers = {
        'C3': model.outputs[0],
        'C4': model.outputs[1],
        'C5': model.outputs[2]
    }

    # create the full model
    return retinanet.retinanet(inputs=inputs, num_classes=num_classes, backbone_layers=backbone_layers, **kwargs)


def EfficientNetB0_retinanet(num_classes, inputs=None, **kwargs):
    return effnet_retinanet(num_classes=num_classes, backbone='EfficientNetB0', inputs=inputs, **kwargs)


def EfficientNetB1_retinanet(num_classes, inputs=None, **kwargs):
    return effnet_retinanet(num_classes=num_classes, backbone='EfficientNetB1', inputs=inputs, **kwargs)


def EfficientNetB2_retinanet(num_classes, inputs=None, **kwargs):
    return effnet_retinanet(num_classes=num_classes, backbone='EfficientNetB2', inputs=inputs, **kwargs)


def EfficientNetB3_retinanet(num_classes, inputs=None, **kwargs):
    return effnet_retinanet(num_classes=num_classes, backbone='EfficientNetB3', inputs=inputs, **kwargs)


def EfficientNetB4_retinanet(num_classes, inputs=None, **kwargs):
    return effnet_retinanet(num_classes=num_classes, backbone='EfficientNetB4', inputs=inputs, **kwargs)


def EfficientNetB5_retinanet(num_classes, inputs=None, **kwargs):
    return effnet_retinanet(num_classes=num_classes, backbone='EfficientNetB5', inputs=inputs, **kwargs)


def EfficientNetB6_retinanet(num_classes, inputs=None, **kwargs):
    return effnet_retinanet(num_classes=num_classes, backbone='EfficientNetB6', inputs=inputs, **kwargs)


def EfficientNetB7_retinanet(num_classes, inputs=None, **kwargs):
    return effnet_retinanet(num_classes=num_classes, backbone='EfficientNetB7', inputs=inputs, **kwargs)


================================================
FILE: imageai_tf_deprecated/Detection/keras_retinanet/models/mobilenet.py
================================================
"""
Copyright 2017-2018 lvaleriu (https://github.com/lvaleriu/)

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

from tensorflow import keras
from ..utils.image import preprocess_image

from . import retinanet
from . import Backbone


class MobileNetBackbone(Backbone):
    """ Describes backbone information and provides utility functions.
    """

    allowed_backbones = ['mobilenet128', 'mobilenet160', 'mobilenet192', 'mobilenet224']

    def retinanet(self, *args, **kwargs):
        """ Returns a retinanet model using the correct backbone.
        """
        return mobilenet_retinanet(*args, backbone=self.backbone, **kwargs)

    def download_imagenet(self):
        """ Download pre-trained weights for the specified backbone name.
        This name is in the format mobilenet{rows}_{alpha} where rows is the
        imagenet shape dimension and 'alpha' controls the width of the network.
        For more info check the explanation from the keras mobilenet script itself.
        """

        alpha = float(self.backbone.split('_')[1])
        rows = int(self.backbone.split('_')[0].replace('mobilenet', ''))

        # load weights
        if keras.backend.image_data_format() == 'channels_first':
            raise ValueError('Weights for "channels_last" format '
                             'are not available.')
        if alpha == 1.0:
            alpha_text = '1_0'
        elif alpha == 0.75:
            alpha_text = '7_5'
        elif alpha == 0.50:
            alpha_text = '5_0'
        else:
            alpha_text = '2_5'

        model_name = 'mobilenet_{}_{}_tf_no_top.h5'.format(alpha_text, rows)
        weights_url = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.6/' + model_name
        weights_path = keras.utils.get_file(model_name, weights_url, cache_subdir='models')

        return weights_path

    def validate(self):
        """ Checks whether the backbone string is correct.
        """
        backbone = self.backbone.split('_')[0]

        if backbone not in MobileNetBackbone.allowed_backbones:
            raise ValueError('Backbone (\'{}\') not in allowed backbones ({}).'.format(backbone, MobileNetBackbone.allowed_backbones))

    def preprocess_image(self, inputs):
        """ Takes as input an image and prepares it for being passed through the network.
        """
        return preprocess_image(inputs, mode='tf')


def mobilenet_retinanet(num_classes, backbone='mobilenet224_1.0', inputs=None, modifier=None, **kwargs):
    """ Constructs a retinanet model using a mobilenet backbone.

    Args
        num_classes: Number of classes to predict.
        backbone: Which backbone to use (one of ('mobilenet128', 'mobilenet160', 'mobilenet192', 'mobilenet224')).
        inputs: The inputs to the network (defaults to a Tensor of shape (None, None, 3)).
        modifier: A function handler which can modify the backbone before using it in retinanet (this can be used to freeze backbone layers for example).

    Returns
        RetinaNet model with a MobileNet backbone.
    """
    alpha = float(backbone.split('_')[1])

    # choose default input
    if inputs is None:
        inputs = keras.layers.Input((None, None, 3))

    backbone = keras.applications.mobilenet.MobileNet(input_tensor=inputs, alpha=alpha, include_top=False, pooling=None, weights=None)

    # create the full model
    layer_names = ['conv_pw_5_relu', 'conv_pw_11_relu', 'conv_pw_13_relu']
    layer_outputs = [backbone.get_layer(name).output for name in layer_names]
    backbone = keras.models.Model(inputs=inputs, outputs=layer_outputs, name=backbone.name)

    # invoke modifier if given
    if modifier:
        backbone = modifier(backbone)

    # C2 not provided
    backbone_layers = {
        'C3': backbone.outputs[0],
        'C4': backbone.outputs[1],
        'C5': backbone.outputs[2]
    }

    return retinanet.retinanet(inputs=inputs, num_classes=num_classes, backbone_layers=backbone_layers, **kwargs)


================================================
FILE: imageai_tf_deprecated/Detection/keras_retinanet/models/resnet.py
================================================
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

from tensorflow import keras
import keras_resnet
import keras_resnet.models

from . import retinanet
from . import Backbone
from ..utils.image import preprocess_image


class ResNetBackbone(Backbone):
    """ Describes backbone information and provides utility functions.
    """

    def __init__(self, backbone):
        super(ResNetBackbone, self).__init__(backbone)
        self.custom_objects.update(keras_resnet.custom_objects)

    def retinanet(self, *args, **kwargs):
        """ Returns a retinanet model using the correct backbone.
        """
        return resnet_retinanet(*args, backbone=self.backbone, **kwargs)

    def download_imagenet(self):
        """ Downloads ImageNet weights and returns path to weights file.
        """
        resnet_filename = 'ResNet-{}-model.keras.h5'
        resnet_resource = 'https://github.com/fizyr/keras-models/releases/download/v0.0.1/{}'.format(resnet_filename)
        depth = int(self.backbone.replace('resnet', ''))

        filename = resnet_filename.format(depth)
        resource = resnet_resource.format(depth)
        if depth == 50:
            checksum = '3e9f4e4f77bbe2c9bec13b53ee1c2319'
        elif depth == 101:
            checksum = '05dc86924389e5b401a9ea0348a3213c'
        elif depth == 152:
            checksum = '6ee11ef2b135592f8031058820bb9e71'

        return keras.utils.get_file(
            filename,
            resource,
            cache_subdir='models',
            md5_hash=checksum
        )

    def validate(self):
        """ Checks whether the backbone string is correct.
        """
        allowed_backbones = ['resnet50', 'resnet101', 'resnet152']
        backbone = self.backbone.split('_')[0]

        if backbone not in allowed_backbones:
            raise ValueError('Backbone (\'{}\') not in allowed backbones ({}).'.format(backbone, allowed_backbones))

    def preprocess_image(self, inputs):
        """ Takes as input an image and prepares it for being passed through the network.
        """
        return preprocess_image(inputs, mode='caffe')


def resnet_retinanet(num_classes, backbone='resnet50', inputs=None, modifier=None, **kwargs):
    """ Constructs a retinanet model using a resnet backbone.

    Args
        num_classes: Number of classes to predict.
        backbone: Which backbone to use (one of ('resnet50', 'resnet101', 'resnet152')).
        inputs: The inputs to the network (defaults to a Tensor of shape (None, None, 3)).
        modifier: A function handler which can modify the backbone before using it in retinanet (this can be used to freeze backbone layers for example).

    Returns
        RetinaNet model with a ResNet backbone.
    """
    # choose default input
    if inputs is None:
        if keras.backend.image_data_format() == 'channels_first':
            inputs = keras.layers.Input(shape=(3, None, None))
        else:
            inputs = keras.layers.Input(shape=(None, None, 3))

    # create the resnet backbone
    if backbone == 'resnet50':
        resnet = keras_resnet.models.ResNet50(inputs, include_top=False, freeze_bn=True)
    elif backbone == 'resnet101':
        resnet = keras_resnet.models.ResNet101(inputs, include_top=False, freeze_bn=True)
    elif backbone == 'resnet152':
        resnet = keras_resnet.models.ResNet152(inputs, include_top=False, freeze_bn=True)
    else:
        raise ValueError('Backbone (\'{}\') is invalid.'.format(backbone))

    # invoke modifier if given
    if modifier:
        resnet = modifier(resnet)

    # create the full model
    # resnet.outputs contains 4 layers
    backbone_layers = {
        'C2': resnet.outputs[0],
        'C3': resnet.outputs[1],
        'C4': resnet.outputs[2],
        'C5': resnet.outputs[3]
    }

    return retinanet.retinanet(inputs=inputs, num_classes=num_classes, backbone_layers=backbone_layers, **kwargs)


def resnet50_retinanet(num_classes, inputs=None, **kwargs):
    return resnet_retinanet(num_classes=num_classes, backbone='resnet50', inputs=inputs, **kwargs)


def resnet101_retinanet(num_classes, inputs=None, **kwargs):
    return resnet_retinanet(num_classes=num_classes, backbone='resnet101', inputs=inputs, **kwargs)


def resnet152_retinanet(num_classes, inputs=None, **kwargs):
    return resnet_retinanet(num_classes=num_classes, backbone='resnet152', inputs=inputs, **kwargs)


================================================
FILE: imageai_tf_deprecated/Detection/keras_retinanet/models/retinanet.py
================================================
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

from tensorflow import keras
from .. import initializers
from .. import layers
from ..utils.anchors import AnchorParameters
from . import assert_training_model


def default_classification_model(
    num_classes,
    num_anchors,
    pyramid_feature_size=256,
    prior_probability=0.01,
    classification_feature_size=256,
    name='classification_submodel'
):
    """ Creates the default classification submodel.

    Args
        num_classes                 : Number of classes to predict a score for at each feature level.
        num_anchors                 : Number of anchors to predict classification scores for at each feature level.
        pyramid_feature_size        : The number of filters to expect from the feature pyramid levels.
        classification_feature_size : The number of filters to use in the layers in the classification submodel.
        name                        : The name of the submodel.

    Returns
        A keras.models.Model that predicts classes for each anchor.
    """
    options = {
        'kernel_size' : 3,
        'strides'     : 1,
        'padding'     : 'same',
    }

    if keras.backend.image_data_format() == 'channels_first':
        inputs  = keras.layers.Input(shape=(pyramid_feature_size, None, None))
    else:
        inputs  = keras.layers.Input(shape=(None, None, pyramid_feature_size))
    outputs = inputs
    for i in range(4):
        outputs = keras.layers.Conv2D(
            filters=classification_feature_size,
            activation='relu',
            name='pyramid_classification_{}'.format(i),
            kernel_initializer=keras.initializers.RandomNormal(mean=0.0, stddev=0.01, seed=None),
            bias_initializer='zeros',
            **options
        )(outputs)

    outputs = keras.layers.Conv2D(
        filters=num_classes * num_anchors,
        kernel_initializer=keras.initializers.RandomNormal(mean=0.0, stddev=0.01, seed=None),
        bias_initializer=initializers.PriorProbability(probability=prior_probability),
        name='pyramid_classification',
        **options
    )(outputs)

    # reshape output and apply sigmoid
    if keras.backend.image_data_format() == 'channels_first':
        outputs = keras.layers.Permute((2, 3, 1), name='pyramid_classification_permute')(outputs)
    outputs = keras.layers.Reshape((-1, num_classes), name='pyramid_classification_reshape')(outputs)
    outputs = keras.layers.Activation('sigmoid', name='pyramid_classification_sigmoid')(outputs)

    return keras.models.Model(inputs=inputs, outputs=outputs, name=name)


def default_regression_model(num_values, num_anchors, pyramid_feature_size=256, regression_feature_size=256, name='regression_submodel'):
    """ Creates the default regression submodel.

    Args
        num_values              : Number of values to regress.
        num_anchors             : Number of anchors to regress for each feature level.
        pyramid_feature_size    : The number of filters to expect from the feature pyramid levels.
        regression_feature_size : The number of filters to use in the layers in the regression submodel.
        name                    : The name of the submodel.

    Returns
        A keras.models.Model that predicts regression values for each anchor.
    """
    # All new conv layers except the final one in the
    # RetinaNet (classification) subnets are initialized
    # with bias b = 0 and a Gaussian weight fill with stddev = 0.01.
    options = {
        'kernel_size'        : 3,
        'strides'            : 1,
        'padding'            : 'same',
        'kernel_initializer' : keras.initializers.RandomNormal(mean=0.0, stddev=0.01, seed=None),
        'bias_initializer'   : 'zeros'
    }

    if keras.backend.image_data_format() == 'channels_first':
        inputs  = keras.layers.Input(shape=(pyramid_feature_size, None, None))
    else:
        inputs  = keras.layers.Input(shape=(None, None, pyramid_feature_size))
    outputs = inputs
    for i in range(4):
        outputs = keras.layers.Conv2D(
            filters=regression_feature_size,
            activation='relu',
            name='pyramid_regression_{}'.format(i),
            **options
        )(outputs)

    outputs = keras.layers.Conv2D(num_anchors * num_values, name='pyramid_regression', **options)(outputs)
    if keras.backend.image_data_format() == 'channels_first':
        outputs = keras.layers.Permute((2, 3, 1), name='pyramid_regression_permute')(outputs)
    outputs = keras.layers.Reshape((-1, num_values), name='pyramid_regression_reshape')(outputs)

    return keras.models.Model(inputs=inputs, outputs=outputs, name=name)


def __create_pyramid_features(backbone_layers, pyramid_levels, feature_size=256):
    """ Creates the FPN layers on top of the backbone features.

    Args
        backbone_layers: a dictionary containing feature stages C3, C4, C5 from the backbone. Also contains C2 if provided.
        pyramid_levels: Pyramid levels in use.
        feature_size : The feature size to use for the resulting feature levels.

    Returns
        output_layers : A dict of feature levels. P3, P4, P5, P6 are always included. P2, P6, P7 included if in use.
    """

    output_layers = {}

    # upsample C5 to get P5 from the FPN paper
    P5           = keras.layers.Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name='C5_reduced')(backbone_layers['C5'])
    P5_upsampled = layers.UpsampleLike(name='P5_upsampled')([P5, backbone_layers['C4']])
    P5           = keras.layers.Conv2D(feature_size, kernel_size=3, strides=1, padding='same', name='P5')(P5)
    output_layers["P5"] = P5

    # add P5 elementwise to C4
    P4           = keras.layers.Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name='C4_reduced')(backbone_layers['C4'])
    P4           = keras.layers.Add(name='P4_merged')([P5_upsampled, P4])
    P4_upsampled = layers.UpsampleLike(name='P4_upsampled')([P4, backbone_layers['C3']])
    P4           = keras.layers.Conv2D(feature_size, kernel_size=3, strides=1, padding='same', name='P4')(P4)
    output_layers["P4"] = P4

    # add P4 elementwise to C3
    P3 = keras.layers.Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name='C3_reduced')(backbone_layers['C3'])
    P3 = keras.layers.Add(name='P3_merged')([P4_upsampled, P3])
    if 'C2' in backbone_layers and 2 in pyramid_levels:
        P3_upsampled = layers.UpsampleLike(name='P3_upsampled')([P3, backbone_layers['C2']])
    P3 = keras.layers.Conv2D(feature_size, kernel_size=3, strides=1, padding='same', name='P3')(P3)
    output_layers["P3"] = P3

    if 'C2' in backbone_layers and 2 in pyramid_levels:
        P2 = keras.layers.Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name='C2_reduced')(backbone_layers['C2'])
        P2 = keras.layers.Add(name='P2_merged')([P3_upsampled, P2])
        P2 = keras.layers.Conv2D(feature_size, kernel_size=3, strides=1, padding='same', name='P2')(P2)
        output_layers["P2"] = P2

    # "P6 is obtained via a 3x3 stride-2 conv on C5"
    if 6 in pyramid_levels:
        P6 = keras.layers.Conv2D(feature_size, kernel_size=3, strides=2, padding='same', name='P6')(backbone_layers['C5'])
        output_layers["P6"] = P6

    # "P7 is computed by applying ReLU followed by a 3x3 stride-2 conv on P6"
    if 7 in pyramid_levels:
        if 6 not in pyramid_levels:
            raise ValueError("P6 is required to use P7")
        P7 = keras.layers.Activation('relu', name='C6_relu')(P6)
        P7 = keras.layers.Conv2D(feature_size, kernel_size=3, strides=2, padding='same', name='P7')(P7)
        output_layers["P7"] = P7

    return output_layers


def default_submodels(num_classes, num_anchors):
    """ Create a list of default submodels used for object detection.

    The default submodels contains a regression submodel and a classification submodel.

    Args
        num_classes : Number of classes to use.
        num_anchors : Number of base anchors.

    Returns
        A list of tuple, where the first element is the name of the submodel and the second element is the submodel itself.
    """
    return [
        ('regression', default_regression_model(4, num_anchors)),
        ('classification', default_classification_model(num_classes, num_anchors))
    ]


def __build_model_pyramid(name, model, features):
    """ Applies a single submodel to each FPN level.

    Args
        name     : Name of the submodel.
        model    : The submodel to evaluate.
        features : The FPN features.

    Returns
        A tensor containing the response from the submodel on the FPN features.
    """
    return keras.layers.Concatenate(axis=1, name=name)([model(f) for f in features])


def __build_pyramid(models, features):
    """ Applies all submodels to each FPN level.

    Args
        models   : List of submodels to run on each pyramid level (by default only regression, classifcation).
        features : The FPN features.

    Returns
        A list of tensors, one for each submodel.
    """
    return [__build_model_pyramid(n, m, features) for n, m in models]


def __build_anchors(anchor_parameters, features):
    """ Builds anchors for the shape of the features from FPN.

    Args
        anchor_parameters : Parameteres that determine how anchors are generated.
        features          : The FPN features.

    Returns
        A tensor containing the anchors for the FPN features.

        The shape is:
        ```
        (batch_size, num_anchors, 4)
        ```
    """
    anchors = [
        layers.Anchors(
            size=anchor_parameters.sizes[i],
            stride=anchor_parameters.strides[i],
            ratios=anchor_parameters.ratios,
            scales=anchor_parameters.scales,
            name='anchors_{}'.format(i)
        )(f) for i, f in enumerate(features)
    ]

    return keras.layers.Concatenate(axis=1, name='anchors')(anchors)


def retinanet(
    inputs,
    backbone_layers,
    num_classes,
    num_anchors             = None,
    create_pyramid_features = __create_pyramid_features,
    pyramid_levels          = None,
    submodels               = None,
    name                    = 'retinanet'
):
    """ Construct a RetinaNet model on top of a backbone.

    This model is the minimum model necessary for training (with the unfortunate exception of anchors as output).

    Args
        inputs                  : keras.layers.Input (or list of) for the input to the model.
        num_classes             : Number of classes to classify.
        num_anchors             : Number of base anchors.
        create_pyramid_features : Functor for creating pyramid features given the features C3, C4, C5, and possibly C2 from the backbone.
        pyramid_levels          : pyramid levels to use.
        submodels               : Submodels to run on each feature map (default is regression and classification submodels).
        name                    : Name of the model.

    Returns
        A keras.models.Model which takes an image as input and outputs generated anchors and the result from each submodel on every pyramid level.

        The order of the outputs is as defined in submodels:
        ```
        [
            regression, classification, other[0], other[1], ...
        ]
        ```
    """

    if num_anchors is None:
        num_anchors = AnchorParameters.default.num_anchors()

    if submodels is None:
        submodels = default_submodels(num_classes, num_anchors)

    if pyramid_levels is None:
        pyramid_levels = [3, 4, 5, 6, 7]

    if 2 in pyramid_levels and 'C2' not in backbone_layers:
        raise ValueError("C2 not provided by backbone model. Cannot create P2 layers.")

    if 3 not in pyramid_levels or 4 not in pyramid_levels or 5 not in pyramid_levels:
        raise ValueError("pyramid levels 3, 4, and 5 required for functionality")

    # compute pyramid features as per https://arxiv.org/abs/1708.02002
    features = create_pyramid_features(backbone_layers, pyramid_levels)
    feature_list = [features['P{}'.format(p)] for p in pyramid_levels]

    # for all pyramid levels, run available submodels
    pyramids = __build_pyramid(submodels, feature_list)

    return keras.models.Model(inputs=inputs, outputs=pyramids, name=name)


def retinanet_bbox(
    model                 = None,
    nms                   = True,
    class_specific_filter = True,
    name                  = 'retinanet-bbox',
    anchor_params         = None,
    pyramid_levels        = None,
    nms_threshold         = 0.5,
    score_threshold       = 0.05,
    max_detections        = 300,
    parallel_iterations   = 32,
    **kwargs
):
    """ Construct a RetinaNet model on top of a backbone and adds convenience functions to output boxes directly.

    This model uses the minimum retinanet model and appends a few layers to compute boxes within the graph.
    These layers include applying the regression values to the anchors and performing NMS.

    Args
        model                 : RetinaNet model to append bbox layers to. If None, it will create a RetinaNet model using **kwargs.
        nms                   : Whether to use non-maximum suppression for the filtering step.
        class_specific_filter : Whether to use class specific filtering or filter for the best scoring class only.
        name                  : Name of the model.
        anchor_params         : Struct containing anchor parameters. If None, default values are used.
        pyramid_levels        : pyramid levels to use.
        nms_threshold         : Threshold for the IoU value to determine when a box should be suppressed.
        score_threshold       : Threshold used to prefilter the boxes with.
        max_detections        : Maximum number of detections to keep.
        parallel_iterations   : Number of batch items to process in parallel.
        **kwargs              : Additional kwargs to pass to the minimal retinanet model.

    Returns
        A keras.models.Model which takes an image as input and outputs the detections on the image.

        The order is defined as follows:
        ```
        [
            boxes, scores, labels, other[0], other[1], ...
        ]
        ```
    """

    # if no anchor parameters are passed, use default values
    if anchor_params is None:
        anchor_params = AnchorParameters.default

    # create RetinaNet model
    if model is None:
        model = retinanet(num_anchors=anchor_params.num_anchors(), **kwargs)
    else:
        assert_training_model(model)

    if pyramid_levels is None:
        pyramid_levels = [3, 4, 5, 6, 7]

    assert len(pyramid_levels) == len(anchor_params.sizes), \
        "number of pyramid levels {} should match number of anchor parameter sizes {}".format(len(pyramid_levels),
                                                                                              len(anchor_params.sizes))

    pyramid_layer_names = ['P{}'.format(p) for p in pyramid_levels]
    # compute the anchors
    features = [model.get_layer(p_name).output for p_name in pyramid_layer_names]
    anchors  = __build_anchors(anchor_params, features)

    # we expect the anchors, regression and classification values as first output
    regression     = model.outputs[0]
    classification = model.outputs[1]

    # "other" can be any additional output from custom submodels, by default this will be []
    other = model.outputs[2:]

    # apply predicted regression to anchors
    boxes = layers.RegressBoxes(name='boxes')([anchors, regression])
    boxes = layers.ClipBoxes(name='clipped_boxes')([model.inputs[0], boxes])

    # filter detections (apply NMS / score threshold / select top-k)
    detections = layers.FilterDetections(
        nms                   = nms,
        class_specific_filter = class_specific_filter,
        name                  = 'filtered_detections',
        nms_threshold         = nms_threshold,
        score_threshold       = score_threshold,
        max_detections        = max_detections,
        parallel_iterations   = parallel_iterations
    )([boxes, classification] + other)

    # construct the model
    return keras.models.Model(inputs=model.inputs, outputs=detections, name=name)


================================================
FILE: imageai_tf_deprecated/Detection/keras_retinanet/models/senet.py
================================================
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

from tensorflow import keras

from . import retinanet
from . import Backbone
from classification_models.keras import Classifiers


class SeBackbone(Backbone):
    """ Describes backbone information and provides utility functions.
    """

    def __init__(self, backbone):
        super(SeBackbone, self).__init__(backbone)
        _, self.preprocess_image_func = Classifiers.get(self.backbone)

    def retinanet(self, *args, **kwargs):
        """ Returns a retinanet model using the correct backbone.
        """
        return senet_retinanet(*args, backbone=self.backbone, **kwargs)

    def download_imagenet(self):
        """ Downloads ImageNet weights and returns path to weights file.
        """
        from classification_models.weights import WEIGHTS_COLLECTION

        weights_path = None
        for el in WEIGHTS_COLLECTION:
            if el['model'] == self.backbone and not el['include_top']:
                weights_path = keras.utils.get_file(el['name'], el['url'], cache_subdir='models', file_hash=el['md5'])

        if weights_path is None:
            raise ValueError('Unable to find imagenet weights for backbone {}!'.format(self.backbone))

        return weights_path

    def validate(self):
        """ Checks whether the backbone string is correct.
        """
        allowed_backbones = ['seresnet18', 'seresnet34', 'seresnet50', 'seresnet101', 'seresnet152',
                             'seresnext50', 'seresnext101', 'senet154']
        backbone = self.backbone.split('_')[0]

        if backbone not in allowed_backbones:
            raise ValueError('Backbone (\'{}\') not in allowed backbones ({}).'.format(backbone, allowed_backbones))

    def preprocess_image(self, inputs):
        """ Takes as input an image and prepares it for being passed through the network.
        """
        return self.preprocess_image_func(inputs)


def senet_retinanet(num_classes, backbone='seresnext50', inputs=None, modifier=None, **kwargs):
    """ Constructs a retinanet model using a resnet backbone.

    Args
        num_classes: Number of classes to predict.
        backbone: Which backbone to use (one of ('resnet50', 'resnet101', 'resnet152')).
        inputs: The inputs to the network (defaults to a Tensor of shape (None, None, 3)).
        modifier: A function handler which can modify the backbone before using it in retinanet (this can be used to freeze backbone layers for example).

    Returns
        RetinaNet model with a ResNet backbone.
    """
    # choose default input
    if inputs is None:
        if keras.backend.image_data_format() == 'channels_first':
            inputs = keras.layers.Input(shape=(3, None, None))
        else:
            # inputs = keras.layers.Input(shape=(224, 224, 3))
            inputs = keras.layers.Input(shape=(None, None, 3))

    classifier, _ = Classifiers.get(backbone)
    model = classifier(input_tensor=inputs, include_top=False, weights=None)

    # get last conv layer from the end of each block [28x28, 14x14, 7x7]
    if backbone == 'seresnet18' or backbone == 'seresnet34':
        layer_outputs = ['stage3_unit1_relu1', 'stage4_unit1_relu1', 'relu1']
    elif backbone == 'seresnet50':
        layer_outputs = ['activation_36', 'activation_66', 'activation_81']
    elif backbone == 'seresnet101':
        layer_outputs = ['activation_36', 'activation_151', 'activation_166']
    elif backbone == 'seresnet152':
        layer_outputs = ['activation_56', 'activation_236', 'activation_251']
    elif backbone == 'seresnext50':
        layer_outputs = ['activation_37', 'activation_67', 'activation_81']
    elif backbone == 'seresnext101':
        layer_outputs = ['activation_37', 'activation_152', 'activation_166']
    elif backbone == 'senet154':
        layer_outputs = ['activation_59', 'activation_239', 'activation_253']
    else:
        raise ValueError('Backbone (\'{}\') is invalid.'.format(backbone))

    layer_outputs = [
        model.get_layer(name=layer_outputs[0]).output,  # 28x28
        model.get_layer(name=layer_outputs[1]).output,  # 14x14
        model.get_layer(name=layer_outputs[2]).output,  # 7x7
    ]
    # create the densenet backbone
    model = keras.models.Model(inputs=inputs, outputs=layer_outputs, name=model.name)

    # invoke modifier if given
    if modifier:
        model = modifier(model)

    # C2 not provided
    backbone_layers = {
        'C3': model.outputs[0],
        'C4': model.outputs[1],
        'C5': model.outputs[2]
    }

    # create the full model
    return retinanet.retinanet(inputs=inputs, num_classes=num_classes, backbone_layers=backbone_layers, **kwargs)


def seresnet18_retinanet(num_classes, inputs=None, **kwargs):
    return senet_retinanet(num_classes=num_classes, backbone='seresnet18', inputs=inputs, **kwargs)


def seresnet34_retinanet(num_classes, inputs=None, **kwargs):
    return senet_retinanet(num_classes=num_classes, backbone='seresnet34', inputs=inputs, **kwargs)


def seresnet50_retinanet(num_classes, inputs=None, **kwargs):
    return senet_retinanet(num_classes=num_classes, backbone='seresnet50', inputs=inputs, **kwargs)


def seresnet101_retinanet(num_classes, inputs=None, **kwargs):
    return senet_retinanet(num_classes=num_classes, backbone='seresnet101', inputs=inputs, **kwargs)


def seresnet152_retinanet(num_classes, inputs=None, **kwargs):
    return senet_retinanet(num_classes=num_classes, backbone='seresnet152', inputs=inputs, **kwargs)


def seresnext50_retinanet(num_classes, inputs=None, **kwargs):
    return senet_retinanet(num_classes=num_classes, backbone='seresnext50', inputs=inputs, **kwargs)


def seresnext101_retinanet(num_classes, inputs=None, **kwargs):
    return senet_retinanet(num_classes=num_classes, backbone='seresnext101', inputs=inputs, **kwargs)


def senet154_retinanet(num_classes, inputs=None, **kwargs):
    return senet_retinanet(num_classes=num_classes, backbone='senet154', inputs=inputs, **kwargs)


================================================
FILE: imageai_tf_deprecated/Detection/keras_retinanet/models/vgg.py
================================================
"""
Copyright 2017-2018 cgratie (https://github.com/cgratie/)

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""


from tensorflow import keras

from . import retinanet
from . import Backbone
from ..utils.image import preprocess_image


class VGGBackbone(Backbone):
    """ Describes backbone information and provides utility functions.
    """

    def retinanet(self, *args, **kwargs):
        """ Returns a retinanet model using the correct backbone.
        """
        return vgg_retinanet(*args, backbone=self.backbone, **kwargs)

    def download_imagenet(self):
        """ Downloads ImageNet weights and returns path to weights file.
        Weights can be downloaded at https://github.com/fizyr/keras-models/releases .
        """
        if self.backbone == 'vgg16':
            resource = keras.applications.vgg16.vgg16.WEIGHTS_PATH_NO_TOP
            checksum = '6d6bbae143d832006294945121d1f1fc'
        elif self.backbone == 'vgg19':
            resource = keras.applications.vgg19.vgg19.WEIGHTS_PATH_NO_TOP
            checksum = '253f8cb515780f3b799900260a226db6'
        else:
            raise ValueError("Backbone '{}' not recognized.".format(self.backbone))

        return keras.utils.get_file(
            '{}_weights_tf_dim_ordering_tf_kernels_notop.h5'.format(self.backbone),
            resource,
            cache_subdir='models',
            file_hash=checksum
        )

    def validate(self):
        """ Checks whether the backbone string is correct.
        """
        allowed_backbones = ['vgg16', 'vgg19']

        if self.backbone not in allowed_backbones:
            raise ValueError('Backbone (\'{}\') not in allowed backbones ({}).'.format(self.backbone, allowed_backbones))

    def preprocess_image(self, inputs):
        """ Takes as input an image and prepares it for being passed through the network.
        """
        return preprocess_image(inputs, mode='caffe')


def vgg_retinanet(num_classes, backbone='vgg16', inputs=None, modifier=None, **kwargs):
    """ Constructs a retinanet model using a vgg backbone.

    Args
        num_classes: Number of classes to predict.
        backbone: Which backbone to use (one of ('vgg16', 'vgg19')).
        inputs: The inputs to the network (defaults to a Tensor of shape (None, None, 3)).
        modifier: A function handler which can modify the backbone before using it in retinanet (this can be used to freeze backbone layers for example).

    Returns
        RetinaNet model with a VGG backbone.
    """
    # choose default input
    if inputs is None:
        inputs = keras.layers.Input(shape=(None, None, 3))

    # create the vgg backbone
    if backbone == 'vgg16':
        vgg = keras.applications.VGG16(input_tensor=inputs, include_top=False, weights=None)
    elif backbone == 'vgg19':
        vgg = keras.applications.VGG19(input_tensor=inputs, include_top=False, weights=None)
    else:
        raise ValueError("Backbone '{}' not recognized.".format(backbone))

    if modifier:
        vgg = modifier(vgg)

    # create the full model
    layer_names = ["block3_pool", "block4_pool", "block5_pool"]
    layer_outputs = [vgg.get_layer(name).output for name in layer_names]

    # C2 not provided
    backbone_layers = {
        'C3': layer_outputs[0],
        'C4': layer_outputs[1],
        'C5': layer_outputs[2]
    }

    return retinanet.retinanet(inputs=inputs, num_classes=num_classes, backbone_layers=backbone_layers, **kwargs)


================================================
FILE: imageai_tf_deprecated/Detection/keras_retinanet/preprocessing/__init__.py
================================================


================================================
FILE: imageai_tf_deprecated/Detection/keras_retinanet/preprocessing/coco.py
================================================
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

from ..preprocessing.generator import Generator
from ..utils.image import read_image_bgr

import os
import numpy as np

from pycocotools.coco import COCO


class CocoGenerator(Generator):
    """ Generate data from the COCO dataset.

    See https://github.com/cocodataset/cocoapi/tree/master/PythonAPI for more information.
    """

    def __init__(self, data_dir, set_name, **kwargs):
        """ Initialize a COCO data generator.

        Args
            data_dir: Path to where the COCO dataset is stored.
            set_name: Name of the set to parse.
        """
        self.data_dir  = data_dir
        self.set_name  = set_name
        self.coco      = COCO(os.path.join(data_dir, 'annotations', 'instances_' + set_name + '.json'))
        self.image_ids = self.coco.getImgIds()

        self.load_classes()

        super(CocoGenerator, self).__init__(**kwargs)

    def load_classes(self):
        """ Loads the class to label mapping (and inverse) for COCO.
        """
        # load class names (name -> label)
        categories = self.coco.loadCats(self.coco.getCatIds())
        categories.sort(key=lambda x: x['id'])

        self.classes             = {}
        self.coco_labels         = {}
        self.coco_labels_inverse = {}
        for c in categories:
            self.coco_labels[len(self.classes)] = c['id']
            self.coco_labels_inverse[c['id']] = len(self.classes)
            self.classes[c['name']] = len(self.classes)

        # also load the reverse (label -> name)
        self.labels = {}
        for key, value in self.classes.items():
            self.labels[value] = key

    def size(self):
        """ Size of the COCO dataset.
        """
        return len(self.image_ids)

    def num_classes(self):
        """ Number of classes in the dataset. For COCO this is 80.
        """
        return len(self.classes)

    def has_label(self, label):
        """ Return True if label is a known label.
        """
        return label in self.labels

    def has_name(self, name):
        """ Returns True if name is a known class.
        """
        return name in self.classes

    def name_to_label(self, name):
        """ Map name to label.
        """
        return self.classes[name]

    def label_to_name(self, label):
        """ Map label to name.
        """
        return self.labels[label]

    def coco_label_to_label(self, coco_label):
        """ Map COCO label to the label as used in the network.
        COCO has some gaps in the order of labels. The highest label is 90, but there are 80 classes.
        """
        return self.coco_labels_inverse[coco_label]

    def coco_label_to_name(self, coco_label):
        """ Map COCO label to name.
        """
        return self.label_to_name(self.coco_label_to_label(coco_label))

    def label_to_coco_label(self, label):
        """ Map label as used by the network to labels as used by COCO.
        """
        return self.coco_labels[label]

    def image_path(self, image_index):
        """ Returns the image path for image_index.
        """
        image_info = self.coco.loadImgs(self.image_ids[image_index])[0]
        path       = os.path.join(self.data_dir, 'images', self.set_name, image_info['file_name'])
        return path

    def image_aspect_ratio(self, image_index):
        """ Compute the aspect ratio for an image with image_index.
        """
        image = self.coco.loadImgs(self.image_ids[image_index])[0]
        return float(image['width']) / float(image['height'])

    def load_image(self, image_index):
        """ Load an image at the image_index.
        """
        path  = self.image_path(image_index)
        return read_image_bgr(path)

    def load_annotations(self, image_index):
        """ Load annotations for an image_index.
        """
        # get ground truth annotations
        annotations_ids = self.coco.getAnnIds(imgIds=self.image_ids[image_index], iscrowd=False)
        annotations     = {'labels': np.empty((0,)), 'bboxes': np.empty((0, 4))}

        # some images appear to miss annotations (like image with id 257034)
        if len(annotations_ids) == 0:
            return annotations

        # parse annotations
        coco_annotations = self.coco.loadAnns(annotations_ids)
        for idx, a in enumerate(coco_annotations):
            # some annotations have basically no width / height, skip them
            if a['bbox'][2] < 1 or a['bbox'][3] < 1:
                continue

            annotations['labels'] = np.concatenate([annotations['labels'], [self.coco_label_to_label(a['category_id'])]], axis=0)
            annotations['bboxes'] = np.concatenate([annotations['bboxes'], [[
                a['bbox'][0],
                a['bbox'][1],
                a['bbox'][0] + a['bbox'][2],
                a['bbox'][1] + a['bbox'][3],
            ]]], axis=0)

        return annotations


================================================
FILE: imageai_tf_deprecated/Detection/keras_retinanet/preprocessing/csv_generator.py
================================================
"""
Copyright 2017-2018 yhenon (https://github.com/yhenon/)
Copyright 2017-2018 Fizyr (https://fizyr.com)

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

from .generator import Generator
from ..utils.image import read_image_bgr

import numpy as np
from PIL import Image
from six import raise_from

import csv
import sys
import os.path
from collections import OrderedDict


def _parse(value, function, fmt):
    """
    Parse a string into a value, and format a nice ValueError if it fails.

    Returns `function(value)`.
    Any `ValueError` raised is catched and a new `ValueError` is raised
    with message `fmt.format(e)`, where `e` is the caught `ValueError`.
    """
    try:
        return function(value)
    except ValueError as e:
        raise_from(ValueError(fmt.format(e)), None)


def _read_classes(csv_reader):
    """ Parse the classes file given by csv_reader.
    """
    result = OrderedDict()
    for line, row in enumerate(csv_reader):
        line += 1

        try:
            class_name, class_id = row
        except ValueError:
            raise_from(ValueError('line {}: format should be \'class_name,class_id\''.format(line)), None)
        class_id = _parse(class_id, int, 'line {}: malformed class ID: {{}}'.format(line))

        if class_name in result:
            raise ValueError('line {}: duplicate class name: \'{}\''.format(line, class_name))
        result[class_name] = class_id
    return result


def _read_annotations(csv_reader, classes):
    """ Read annotations from the csv_reader.
    """
    result = OrderedDict()
    for line, row in enumerate(csv_reader):
        line += 1

        try:
            img_file, x1, y1, x2, y2, class_name = row[:6]
        except ValueError:
            raise_from(ValueError('line {}: format should be \'img_file,x1,y1,x2,y2,class_name\' or \'img_file,,,,,\''.format(line)), None)

        if img_file not in result:
            result[img_file] = []

        # If a row contains only an image path, it's an image without annotations.
        if (x1, y1, x2, y2, class_name) == ('', '', '', '', ''):
            continue

        x1 = _parse(x1, int, 'line {}: malformed x1: {{}}'.format(line))
        y1 = _parse(y1, int, 'line {}: malformed y1: {{}}'.format(line))
        x2 = _parse(x2, int, 'line {}: malformed x2: {{}}'.format(line))
        y2 = _parse(y2, int, 'line {}: malformed y2: {{}}'.format(line))

        # Check that the bounding box is valid.
        if x2 <= x1:
            raise ValueError('line {}: x2 ({}) must be higher than x1 ({})'.format(line, x2, x1))
        if y2 <= y1:
            raise ValueError('line {}: y2 ({}) must be higher than y1 ({})'.format(line, y2, y1))

        # check if the current class name is correctly present
        if class_name not in classes:
            raise ValueError('line {}: unknown class name: \'{}\' (classes: {})'.format(line, class_name, classes))

        result[img_file].append({'x1': x1, 'x2': x2, 'y1': y1, 'y2': y2, 'class': class_name})
    return result


def _open_for_csv(path):
    """ Open a file with flags suitable for csv.reader.

    This is different for python2 it means with mode 'rb',
    for python3 this means 'r' with "universal newlines".
    """
    if sys.version_info[0] < 3:
        return open(path, 'rb')
    else:
        return open(path, 'r', newline='')


class CSVGenerator(Generator):
    """ Generate data for a custom CSV dataset.

    See https://github.com/fizyr/keras-retinanet#csv-datasets for more information.
    """

    def __init__(
        self,
        csv_data_file,
        csv_class_file,
        base_dir=None,
        **kwargs
    ):
        """ Initialize a CSV data generator.

        Args
            csv_data_file: Path to the CSV annotations file.
            csv_class_file: Path to the CSV classes file.
            base_dir: Directory w.r.t. where the files are to be searched (defaults to the directory containing the csv_data_file).
        """
        self.image_names = []
        self.image_data  = {}
        self.base_dir    = base_dir

        # Take base_dir from annotations file if not explicitly specified.
        if self.base_dir is None:
            self.base_dir = os.path.dirname(csv_data_file)

        # parse the provided class file
        try:
            with _open_for_csv(csv_class_file) as file:
                self.classes = _read_classes(csv.reader(file, delimiter=','))
        except ValueError as e:
            raise_from(ValueError('invalid CSV class file: {}: {}'.format(csv_class_file, e)), None)

        self.labels = {}
        for key, value in self.classes.items():
            self.labels[value] = key

        # csv with img_path, x1, y1, x2, y2, class_name
        try:
            with _open_for_csv(csv_data_file) as file:
                self.image_data = _read_annotations(csv.reader(file, delimiter=','), self.classes)
        except ValueError as e:
            raise_from(ValueError('invalid CSV annotations file: {}: {}'.format(csv_data_file, e)), None)
        self.image_names = list(self.image_data.keys())

        super(CSVGenerator, self).__init__(**kwargs)

    def size(self):
        """ Size of the dataset.
        """
        return len(self.image_names)

    def num_classes(self):
        """ Number of classes in the dataset.
        """
        return max(self.classes.values()) + 1

    def has_label(self, label):
        """ Return True if label is a known label.
        """
        return label in self.labels

    def has_name(self, name):
        """ Returns True if name is a known class.
        """
        return name in self.classes

    def name_to_label(self, name):
        """ Map name to label.
        """
        return self.classes[name]

    def label_to_name(self, label):
        """ Map label to name.
        """
        return self.labels[label]

    def image_path(self, image_index):
        """ Returns the image path for image_index.
        """
        return os.path.join(self.base_dir, self.image_names[image_index])

    def image_aspect_ratio(self, image_index):
        """ Compute the aspect ratio for an image with image_index.
        """
        # PIL is fast for metadata
        image = Image.open(self.image_path(image_index))
        return float(image.width) / float(image.height)

    def load_image(self, image_index):
        """ Load an image at the image_index.
        """
        return read_image_bgr(self.image_path(image_index))

    def load_annotations(self, image_index):
        """ Load annotations for an image_index.
        """
        path        = self.image_names[image_index]
        annotations = {'labels': np.empty((0,)), 'bboxes': np.empty((0, 4))}

        for idx, annot in enumerate(self.image_data[path]):
            annotations['labels'] = np.concatenate((annotations['labels'], [self.name_to_label(annot['class'])]))
            annotations['bboxes'] = np.concatenate((annotations['bboxes'], [[
                float(annot['x1']),
                float(annot['y1']),
                float(annot['x2']),
                float(annot['y2']),
            ]]))

        return annotations


================================================
FILE: imageai_tf_deprecated/Detection/keras_retinanet/preprocessing/generator.py
================================================
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

import numpy as np
import random
import warnings

from tensorflow import keras

from ..utils.anchors import (
    anchor_targets_bbox,
    anchors_for_shape,
    guess_shapes
)
from ..utils.config import parse_anchor_parameters, parse_pyramid_levels
from ..utils.image import (
    TransformParameters,
    adjust_transform_for_image,
    apply_transform,
    preprocess_image,
    resize_image,
)
from ..utils.transform import transform_aabb


class Generator(keras.utils.Sequence):
    """ Abstract generator class.
    """

    def __init__(
        self,
        transform_generator = None,
        visual_effect_generator=None,
        batch_size=1,
        group_method='ratio',  # one of 'none', 'random', 'ratio'
        shuffle_groups=True,
        image_min_side=800,
        image_max_side=1333,
        no_resize=False,
        transform_parameters=None,
        compute_anchor_targets=anchor_targets_bbox,
        compute_shapes=guess_shapes,
        preprocess_image=preprocess_image,
        config=None
    ):
        """ Initialize Generator object.

        Args
            transform_generator    : A generator used to randomly transform images and annotations.
            batch_size             : The size of the batches to generate.
            group_method           : Determines how images are grouped together (defaults to 'ratio', one of ('none', 'random', 'ratio')).
            shuffle_groups         : If True, shuffles the groups each epoch.
            image_min_side         : After resizing the minimum side of an image is equal to image_min_side.
            image_max_side         : If after resizing the maximum side is larger than image_max_side, scales down further so that the max side is equal to image_max_side.
            no_resize              : If True, no image/annotation resizing is performed.
            transform_parameters   : The transform parameters used for data augmentation.
            compute_anchor_targets : Function handler for computing the targets of anchors for an image and its annotations.
            compute_shapes         : Function handler for computing the shapes of the pyramid for a given input.
            preprocess_image       : Function handler for preprocessing an image (scaling / normalizing) for passing through a network.
        """
        self.transform_generator            = transform_generator
        self.visual_effect_generator        = visual_effect_generator
        self.batch_size                     = int(batch_size)
        self.group_method                   = group_method
        self.shuffle_groups                 = shuffle_groups
        self.image_min_side                 = image_min_side
        self.image_max_side                 = image_max_side
        self.no_resize                      = no_resize
        self.transform_parameters           = transform_parameters or TransformParameters()
        self.compute_anchor_targets         = compute_anchor_targets
        self.compute_shapes                 = compute_shapes
        self.preprocess_image               = preprocess_image
        self.config                         = config

        # Define groups
        self.group_images()

        # Shuffle when initializing
        if self.shuffle_groups:
            self.on_epoch_end()

    def on_epoch_end(self):
        if self.shuffle_groups:
            random.shuffle(self.groups)

    def size(self):
        """ Size of the dataset.
        """
        raise NotImplementedError('size method not implemented')

    def num_classes(self):
        """ Number of classes in the dataset.
        """
        raise NotImplementedError('num_classes method not implemented')

    def has_label(self, label):
        """ Returns True if label is a known label.
        """
        raise NotImplementedError('has_label method not implemented')

    def has_name(self, name):
        """ Returns True if name is a known class.
        """
        raise NotImplementedError('has_name method not implemented')

    def name_to_label(self, name):
        """ Map name to label.
        """
        raise NotImplementedError('name_to_label method not implemented')

    def label_to_name(self, label):
        """ Map label to name.
        """
        raise NotImplementedError('label_to_name method not implemented')

    def image_aspect_ratio(self, image_index):
        """ Compute the aspect ratio for an image with image_index.
        """
        raise NotImplementedError('image_aspect_ratio method not implemented')

    def image_path(self, image_index):
        """ Get the path to an image.
        """
        raise NotImplementedError('image_path method not implemented')

    def load_image(self, image_index):
        """ Load an image at the image_index.
        """
        raise NotImplementedError('load_image method not implemented')

    def load_annotations(self, image_index):
        """ Load annotations for an image_index.
        """
        raise NotImplementedError('load_annotations method not implemented')

    def load_annotations_group(self, group):
        """ Load annotations for all images in group.
        """
        annotations_group = [self.load_annotations(image_index) for image_index in group]
        for annotations in annotations_group:
            assert(isinstance(annotations, dict)), '\'load_annotations\' should return a list of dictionaries, received: {}'.format(type(annotations))
            assert('labels' in annotations), '\'load_annotations\' should return a list of dictionaries that contain \'labels\' and \'bboxes\'.'
            assert('bboxes' in annotations), '\'load_annotations\' should return a list of dictionaries that contain \'labels\' and \'bboxes\'.'

        return annotations_group

    def filter_annotations(self, image_group, annotations_group, group):
        """ Filter annotations by removing those that are outside of the image bounds or whose width/height < 0.
        """
        # test all annotations
        for index, (image, annotations) in enumerate(zip(image_group, annotations_group)):
            # test x2 < x1 | y2 < y1 | x1 < 0 | y1 < 0 | x2 <= 0 | y2 <= 0 | x2 >= image.shape[1] | y2 >= image.shape[0]
            invalid_indices = np.where(
                (annotations['bboxes'][:, 2] <= annotations['bboxes'][:, 0]) |
                (annotations['bboxes'][:, 3] <= annotations['bboxes'][:, 1]) |
                (annotations['bboxes'][:, 0] < 0) |
                (annotations['bboxes'][:, 1] < 0) |
                (annotations['bboxes'][:, 2] > image.shape[1]) |
                (annotations['bboxes'][:, 3] > image.shape[0])
            )[0]

            # delete invalid indices
            if len(invalid_indices):
                warnings.warn('Image {} with id {} (shape {}) contains the following invalid boxes: {}.'.format(
                    self.image_path(group[index]),
                    group[index],
                    image.shape,
                    annotations['bboxes'][invalid_indices, :]
                ))
                for k in annotations_group[index].keys():
                    annotations_group[index][k] = np.delete(annotations[k], invalid_indices, axis=0)
        return image_group, annotations_group

    def load_image_group(self, group):
        """ Load images for all images in a group.
        """
        return [self.load_image(image_index) for image_index in group]

    def random_visual_effect_group_entry(self, image, annotations):
        """ Randomly transforms image and annotation.
        """
        visual_effect = next(self.visual_effect_generator)
        # apply visual effect
        image = visual_effect(image)
        return image, annotations

    def random_visual_effect_group(self, image_group, annotations_group):
        """ Randomly apply visual effect on each image.
        """
        assert(len(image_group) == len(annotations_group))

        if self.visual_effect_generator is None:
            # do nothing
            return image_group, annotations_group

        for index in range(len(image_group)):
            # apply effect on a single group entry
            image_group[index], annotations_group[index] = self.random_visual_effect_group_entry(
                image_group[index], annotations_group[index]
            )

        return image_group, annotations_group

    def random_transform_group_entry(self, image, annotations, transform=None):
        """ Randomly transforms image and annotation.
        """
        # randomly transform both image and annotations
        if transform is not None or self.transform_generator:
            if transform is None:
                transform = adjust_transform_for_image(next(self.transform_generator), image, self.transform_parameters.relative_translation)

            # apply transformation to image
            image = apply_transform(transform, image, self.transform_parameters)

            # Transform the bounding boxes in the annotations.
            annotations['bboxes'] = annotations['bboxes'].copy()
            for index in range(annotations['bboxes'].shape[0]):
                annotations['bboxes'][index, :] = transform_aabb(transform, annotations['bboxes'][index, :])

        return image, annotations

    def random_transform_group(self, image_group, annotations_group):
        """ Randomly transforms each image and its annotations.
        """

        assert(len(image_group) == len(annotations_group))

        for index in range(len(image_group)):
            # transform a single group entry
            image_group[index], annotations_group[index] = self.random_transform_group_entry(image_group[index], annotations_group[index])

        return image_group, annotations_group

    def resize_image(self, image):
        """ Resize an image using image_min_side and image_max_side.
        """
        if self.no_resize:
            return image, 1
        else:
            return resize_image(image, min_side=self.image_min_side, max_side=self.image_max_side)

    def preprocess_group_entry(self, image, annotations):
        """ Preprocess image and its annotations.
        """
        # resize image
        image, image_scale = self.resize_image(image)

        # preprocess the image
        image = self.preprocess_image(image)

        # apply resizing to annotations too
        annotations['bboxes'] *= image_scale

        # convert to the wanted keras floatx
        image = keras.backend.cast_to_floatx(image)

        return image, annotations

    def preprocess_group(self, image_group, annotations_group):
        """ Preprocess each image and its annotations in its group.
        """
        assert(len(image_group) == len(annotations_group))

        for index in range(len(image_group)):
            # preprocess a single group entry
            image_group[index], annotations_group[index] = self.preprocess_group_entry(image_group[index], annotations_group[index])

        return image_group, annotations_group

    def group_images(self):
        """ Order the images according to self.order and makes groups of self.batch_size.
        """
        # determine the order of the images
        order = list(range(self.size()))
        if self.group_method == 'random':
            random.shuffle(order)
        elif self.group_method == 'ratio':
            order.sort(key=lambda x: self.image_aspect_ratio(x))

        # divide into groups, one group = one batch
        self.groups = [[order[x % len(order)] for x in range(i, i + self.batch_size)] for i in range(0, len(order), self.batch_size)]

    def compute_inputs(self, image_group):
        """ Compute inputs for the network using an image_group.
        """
        # get the max image shape
        max_shape = tuple(max(image.shape[x] for image in image_group) for x in range(3))

        # construct an image batch object
        image_batch = np.zeros((self.batch_size,) + max_shape, dtype=keras.backend.floatx())

        # copy all images to the upper left part of the image batch object
        for image_index, image in enumerate(image_group):
            image_batch[image_index, :image.shape[0], :image.shape[1], :image.shape[2]] = image

        if keras.backend.image_data_format() == 'channels_first':
            image_batch = image_batch.transpose((0, 3, 1, 2))

        return image_batch

    def generate_anchors(self, image_shape):
        anchor_params = None
        pyramid_levels = None
        if self.config and 'anchor_parameters' in self.config:
            anchor_params = parse_anchor_parameters(self.config)
        if self.config and 'pyramid_levels' in self.config:
            pyramid_levels = parse_pyramid_levels(self.config)

        return anchors_for_shape(image_shape, anchor_params=anchor_params, pyramid_levels=pyramid_levels, shapes_callback=self.compute_shapes)

    def compute_targets(self, image_group, annotations_group):
        """ Compute target outputs for the network using images and their annotations.
        """
        # get the max image shape
        max_shape = tuple(max(image.shape[x] for image in image_group) for x in range(3))
        anchors   = self.generate_anchors(max_shape)

        batches = self.compute_anchor_targets(
            anchors,
            image_group,
            annotations_group,
            self.num_classes()
        )

        return list(batches)

    def compute_input_output(self, group):
        """ Compute inputs and target outputs for the network.
        """
        # load images and annotations
        image_group       = self.load_image_group(group)
        annotations_group = self.load_annotations_group(group)

        # check validity of annotations
        image_group, annotations_group = self.filter_annotations(image_group, annotations_group, group)

        # randomly apply visual effect
        image_group, annotations_group = self.random_visual_effect_group(image_group, annotations_group)

        # randomly transform data
        image_group, annotations_group = self.random_transform_group(image_group, annotations_group)

        # perform preprocessing steps
        image_group, annotations_group = self.preprocess_group(image_group, annotations_group)

        # compute network inputs
        inputs = self.compute_inputs(image_group)

        # compute network targets
        targets = self.compute_targets(image_group, annotations_group)

        return inputs, targets

    def __len__(self):
        """
        Number of batches for generator.
        """

        return len(self.groups)

    def __getitem__(self, index):
        """
        Keras sequence method for generating batches.
        """
        group = self.groups[index]
        inputs, targets = self.compute_input_output(group)

        return inputs, targets


================================================
FILE: imageai_tf_deprecated/Detection/keras_retinanet/preprocessing/kitti.py
================================================
"""
Copyright 2017-2018 lvaleriu (https://github.com/lvaleriu/)

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

import csv
import os.path

import numpy as np
from PIL import Image

from .generator import Generator
from ..utils.image import read_image_bgr

kitti_classes = {
    'Car': 0,
    'Van': 1,
    'Truck': 2,
    'Pedestrian': 3,
    'Person_sitting': 4,
    'Cyclist': 5,
    'Tram': 6,
    'Misc': 7,
    'DontCare': 7
}


class KittiGenerator(Generator):
    """ Generate data for a KITTI dataset.

    See http://www.cvlibs.net/datasets/kitti/ for more information.
    """

    def __init__(
        self,
        base_dir,
        subset='train',
        **kwargs
    ):
        """ Initialize a KITTI data generator.

        Args
            base_dir: Directory w.r.t. where the files are to be searched (defaults to the directory containing the csv_data_file).
            subset: The subset to generate data for (defaults to 'train').
        """
        self.base_dir = base_dir

        label_dir = os.path.join(self.base_dir, subset, 'labels')
        image_dir = os.path.join(self.base_dir, subset, 'images')

        """
        1    type         Describes the type of object: 'Car', 'Van', 'Truck',
                             'Pedestrian', 'Person_sitting', 'Cyclist', 'Tram',
                             'Misc' or 'DontCare'
        1    truncated    Float from 0 (non-truncated) to 1 (truncated), where
                         truncated refers to the object leaving image boundaries
        1    occluded     Integer (0,1,2,3) indicating occlusion state:
                         0 = fully visible, 1 = partly occluded
                         2 = largely occluded, 3 = unknown
        1    alpha        Observation angle of object, ranging [-pi..pi]
        4    bbox         2D bounding box of object in the image (0-based index):
                         contains left, top, right, bottom pixel coordinates
        3    dimensions   3D object dimensions: height, width, length (in meters)
        3    location     3D object location x,y,z in camera coordinates (in meters)
        1    rotation_y   Rotation ry around Y-axis in camera coordinates [-pi..pi]
        """

        self.labels = {}
        self.classes = kitti_classes
        for name, label in self.classes.items():
            self.labels[label] = name

        self.image_data = dict()
        self.images = []
        for i, fn in enumerate(os.listdir(label_dir)):
            label_fp = os.path.join(label_dir, fn)
            image_fp = os.path.join(image_dir, fn.replace('.txt', '.png'))

            self.images.append(image_fp)

            fieldnames = ['type', 'truncated', 'occluded', 'alpha', 'left', 'top', 'right', 'bottom', 'dh', 'dw', 'dl',
                          'lx', 'ly', 'lz', 'ry']
            with open(label_fp, 'r') as csv_file:
                reader = csv.DictReader(csv_file, delimiter=' ', fieldnames=fieldnames)
                boxes = []
                for line, row in enumerate(reader):
                    label = row['type']
                    cls_id = kitti_classes[label]

                    annotation = {'cls_id': cls_id, 'x1': row['left'], 'x2': row['right'], 'y2': row['bottom'], 'y1': row['top']}
                    boxes.append(annotation)

                self.image_data[i] = boxes

        super(KittiGenerator, self).__init__(**kwargs)

    def size(self):
        """ Size of the dataset.
        """
        return len(self.images)

    def num_classes(self):
        """ Number of classes in the dataset.
        """
        return max(self.classes.values()) + 1

    def has_label(self, label):
        """ Return True if label is a known label.
        """
        return label in self.labels

    def has_name(self, name):
        """ Returns True if name is a known class.
        """
        return name in self.classes

    def name_to_label(self, name):
        """ Map name to label.
        """
        raise NotImplementedError()

    def label_to_name(self, label):
        """ Map label to name.
        """
        return self.labels[label]

    def image_aspect_ratio(self, image_index):
        """ Compute the aspect ratio for an image with image_index.
        """
        # PIL is fast for metadata
        image = Image.open(self.images[image_index])
        return float(image.width) / float(image.height)

    def image_path(self, image_index):
        """ Get the path to an image.
        """
        return self.images[image_index]

    def load_image(self, image_index):
        """ Load an image at the image_index.
        """
        return read_image_bgr(self.image_path(image_index))

    def load_annotations(self, image_index):
        """ Load annotations for an image_index.
        """
        image_data = self.image_data[image_index]
        annotations = {'labels': np.empty((len(image_data),)), 'bboxes': np.empty((len(image_data), 4))}

        for idx, ann in enumerate(image_data):
            annotations['bboxes'][idx, 0] = float(ann['x1'])
            annotations['bboxes'][idx, 1] = float(ann['y1'])
            annotations['bboxes'][idx, 2] = float(ann['x2'])
            annotations['bboxes'][idx, 3] = float(ann['y2'])
            annotations['labels'][idx] = int(ann['cls_id'])

        return annotations


================================================
FILE: imageai_tf_deprecated/Detection/keras_retinanet/preprocessing/open_images.py
================================================
"""
Copyright 2017-2018 lvaleriu (https://github.com/lvaleriu/)

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

import csv
import json
import os
import warnings

import numpy as np
from PIL import Image

from .generator import Generator
from ..utils.image import read_image_bgr


def load_hierarchy(metadata_dir, version='v4'):
    hierarchy = None
    if version == 'challenge2018':
        hierarchy = 'bbox_labels_500_hierarchy.json'
    elif version == 'v4':
        hierarchy = 'bbox_labels_600_hierarchy.json'
    elif version == 'v3':
        hierarchy = 'bbox_labels_600_hierarchy.json'

    hierarchy_json = os.path.join(metadata_dir, hierarchy)
    with open(hierarchy_json) as f:
        hierarchy_data = json.loads(f.read())

    return hierarchy_data


def load_hierarchy_children(hierarchy):
    res = [hierarchy['LabelName']]

    if 'Subcategory' in hierarchy:
        for subcategory in hierarchy['Subcategory']:
            children = load_hierarchy_children(subcategory)

            for c in children:
                res.append(c)

    return res


def find_hierarchy_parent(hierarchy, parent_cls):
    if hierarchy['LabelName'] == parent_cls:
        return hierarchy
    elif 'Subcategory' in hierarchy:
        for child in hierarchy['Subcategory']:
            res = find_hierarchy_parent(child, parent_cls)
            if res is not None:
                return res

    return None


def get_labels(metadata_dir, version='v4'):
    if version == 'v4' or version == 'challenge2018':
        csv_file = 'class-descriptions-boxable.csv' if version == 'v4' else 'challenge-2018-class-descriptions-500.csv'

        boxable_classes_descriptions = os.path.join(metadata_dir, csv_file)
        id_to_labels = {}
        cls_index    = {}

        i = 0
        with open(boxable_classes_descriptions) as f:
            for row in csv.reader(f):
                # make sure the csv row is not empty (usually the last one)
                if len(row):
                    label       = row[0]
                    description = row[1].replace("\"", "").replace("'", "").replace('`', '')

                    id_to_labels[i]  = description
                    cls_index[label] = i

                    i += 1
    else:
        trainable_classes_path = os.path.join(metadata_dir, 'classes-bbox-trainable.txt')
        description_path = os.path.join(metadata_dir, 'class-descriptions.csv')

        description_table = {}
        with open(description_path) as f:
            for row in csv.reader(f):
                # make sure the csv row is not empty (usually the last one)
                if len(row):
                    description_table[row[0]] = row[1].replace("\"", "").replace("'", "").replace('`', '')

        with open(trainable_classes_path, 'rb') as f:
            trainable_classes = f.read().split('\n')

        id_to_labels = dict([(i, description_table[c]) for i, c in enumerate(trainable_classes)])
        cls_index = dict([(c, i) for i, c in enumerate(trainable_classes)])

    return id_to_labels, cls_index


def generate_images_annotations_json(main_dir, metadata_dir, subset, cls_index, version='v4'):
    validation_image_ids = {}

    if version == 'v4':
        annotations_path = os.path.join(metadata_dir, subset, '{}-annotations-bbox.csv'.format(subset))
    elif version == 'challenge2018':
        validation_image_ids_path = os.path.join(metadata_dir, 'challenge-2018-image-ids-valset-od.csv')

        with open(validation_image_ids_path, 'r') as csv_file:
            reader = csv.DictReader(csv_file, fieldnames=['ImageID'])
            next(reader)
            for line, row in enumerate(reader):
                image_id = row['ImageID']
                validation_image_ids[image_id] = True

        annotations_path = os.path.join(metadata_dir, 'challenge-2018-train-annotations-bbox.csv')
    else:
        annotations_path = os.path.join(metadata_dir, subset, 'annotations-human-bbox.csv')

    fieldnames = ['ImageID', 'Source', 'LabelName', 'Confidence',
                  'XMin', 'XMax', 'YMin', 'YMax',
                  'IsOccluded', 'IsTruncated', 'IsGroupOf', 'IsDepiction', 'IsInside']

    id_annotations = dict()
    with open(annotations_path, 'r') as csv_file:
        reader = csv.DictReader(csv_file, fieldnames=fieldnames)
        next(reader)

        images_sizes = {}
        for line, row in enumerate(reader):
            frame = row['ImageID']

            if version == 'challenge2018':
                if subset == 'train':
                    if frame in validation_image_ids:
                        continue
                elif subset == 'validation':
                    if frame not in validation_image_ids:
                        continue
                else:
                    raise NotImplementedError('This generator handles only the train and validation subsets')

            class_name = row['LabelName']

            if class_name not in cls_index:
                continue

            cls_id = cls_index[class_name]

            if version == 'challenge2018':
                # We recommend participants to use the provided subset of the training set as a validation set.
                # This is preferable over using the V4 val/test sets, as the training set is more densely annotated.
                img_path = os.path.join(main_dir, 'images', 'train', frame + '.jpg')
            else:
                img_path = os.path.join(main_dir, 'images', subset, frame + '.jpg')

            if frame in images_sizes:
                width, height = images_sizes[frame]
            else:
                try:
                    with Image.open(img_path) as img:
                        width, height = img.width, img.height
                        images_sizes[frame] = (width, height)
                except Exception as ex:
                    if version == 'challenge2018':
                        raise ex
                    continue

            x1 = float(row['XMin'])
            x2 = float(row['XMax'])
            y1 = float(row['YMin'])
            y2 = float(row['YMax'])

            x1_int = int(round(x1 * width))
            x2_int = int(round(x2 * width))
            y1_int = int(round(y1 * height))
            y2_int = int(round(y2 * height))

            # Check that the bounding box is valid.
            if x2 <= x1:
                raise ValueError('line {}: x2 ({}) must be higher than x1 ({})'.format(line, x2, x1))
            if y2 <= y1:
                raise ValueError('line {}: y2 ({}) must be higher than y1 ({})'.format(line, y2, y1))

            if y2_int == y1_int:
                warnings.warn('filtering line {}: rounding y2 ({}) and y1 ({}) makes them equal'.format(line, y2, y1))
                continue

            if x2_int == x1_int:
                warnings.warn('filtering line {}: rounding x2 ({}) and x1 ({}) makes them equal'.format(line, x2, x1))
                continue

            img_id = row['ImageID']
            annotation = {'cls_id': cls_id, 'x1': x1, 'x2': x2, 'y1': y1, 'y2': y2}

            if img_id in id_annotations:
                annotations = id_annotations[img_id]
                annotations['boxes'].append(annotation)
            else:
                id_annotations[img_id] = {'w': width, 'h': height, 'boxes': [annotation]}
    return id_annotations


class OpenImagesGenerator(Generator):
    def __init__(
            self, main_dir, subset, version='v4',
            labels_filter=None, annotation_cache_dir='.',
            parent_label=None,
            **kwargs
    ):
        if version == 'challenge2018':
            metadata = 'challenge2018'
        elif version == 'v4':
            metadata = '2018_04'
        elif version == 'v3':
            metadata = '2017_11'
        else:
            raise NotImplementedError('There is currently no implementation for versions older than v3')

        if version == 'challenge2018':
            self.base_dir     = os.path.join(main_dir, 'images', 'train')
        else:
            self.base_dir     = os.path.join(main_dir, 'images', subset)

        metadata_dir          = os.path.join(main_dir, metadata)
        annotation_cache_json = os.path.join(annotation_cache_dir, subset + '.json')

        self.hierarchy          = load_hierarchy(metadata_dir, version=version)
        id_to_labels, cls_index = get_labels(metadata_dir, version=version)

        if os.path.exists(annotation_cache_json):
            with open(annotation_cache_json, 'r') as f:
                self.annotations = json.loads(f.read())
        else:
            self.annotations = generate_images_annotations_json(main_dir, metadata_dir, subset, cls_index, version=version)
            json.dump(self.annotations, open(annotation_cache_json, "w"))

        if labels_filter is not None or parent_label is not None:
            self.id_to_labels, self.annotations = self.__filter_data(id_to_labels, cls_index, labels_filter, parent_label)
        else:
            self.id_to_labels = id_to_labels

        self.id_to_image_id = dict([(i, k) for i, k in enumerate(self.annotations)])

        super(OpenImagesGenerator, self).__init__(**kwargs)

    def __filter_data(self, id_to_labels, cls_index, labels_filter=None, parent_label=None):
        """
        If you want to work with a subset of the labels just set a list with trainable labels
        :param labels_filter: Ex: labels_filter = ['Helmet', 'Hat', 'Analog television']
        :param parent_label: If parent_label is set this will bring you the parent label
        but also its children in the semantic hierarchy as defined in OID, ex: Animal
        hierarchical tree
        :return:
        """

        children_id_to_labels = {}

        if parent_label is None:
            # there is/are no other sublabel(s) other than the labels itself

            for label in labels_filter:
                for i, lb in id_to_labels.items():
                    if lb == label:
                        children_id_to_labels[i] = label
                        break
        else:
            parent_cls = None
            for i, lb in id_to_labels.items():
                if lb == parent_label:
                    parent_id = i
                    for c, index in cls_index.items():
                        if index == parent_id:
                            parent_cls = c
                    break

            if parent_cls is None:
                raise Exception('Couldnt find label {}'.format(parent_label))

            parent_tree = find_hierarchy_parent(self.hierarchy, parent_cls)

            if parent_tree is None:
                raise Exception('Couldnt find parent {} in the semantic hierarchical tree'.format(parent_label))

            children = load_hierarchy_children(parent_tree)

            for cls in children:
                index = cls_index[cls]
                label = id_to_labels[index]
                children_id_to_labels[index] = label

        id_map = dict([(ind, i) for i, ind in enumerate(children_id_to_labels.keys())])

        filtered_annotations = {}
        for k in self.annotations:
            img_ann = self.annotations[k]

            filtered_boxes = []
            for ann in img_ann['boxes']:
                cls_id = ann['cls_id']
                if cls_id in children_id_to_labels:
                    ann['cls_id'] = id_map[cls_id]
                    filtered_boxes.append(ann)

            if len(filtered_boxes) > 0:
                filtered_annotations[k] = {'w': img_ann['w'], 'h': img_ann['h'], 'boxes': filtered_boxes}

        children_id_to_labels = dict([(id_map[i], l) for (i, l) in children_id_to_labels.items()])

        return children_id_to_labels, filtered_annotations

    def size(self):
        return len(self.annotations)

    def num_classes(self):
        return len(self.id_to_labels)

    def has_label(self, label):
        """ Return True if label is a known label.
        """
        return label in self.id_to_labels

    def has_name(self, name):
        """ Returns True if name is a known class.
        """
        raise NotImplementedError()

    def name_to_label(self, name):
        raise NotImplementedError()

    def label_to_name(self, label):
        return self.id_to_labels[label]

    def image_aspect_ratio(self, image_index):
        img_annotations = self.annotations[self.id_to_image_id[image_index]]
        height, width = img_annotations['h'], img_annotations['w']
        return float(width) / float(height)

    def image_path(self, image_index):
        path = os.path.join(self.base_dir, self.id_to_image_id[image_index] + '.jpg')
        return path

    def load_image(self, image_index):
        return read_image_bgr(self.image_path(image_index))

    def load_annotations(self, image_index):
        image_annotations = self.annotations[self.id_to_image_id[image_index]]

        labels = image_annotations['boxes']
        height, width = image_annotations['h'], image_annotations['w']

        annotations = {'labels': np.empty((len(labels),)), 'bboxes': np.empty((len(labels), 4))}
        for idx, ann in enumerate(labels):
            cls_id = ann['cls_id']
            x1 = ann['x1'] * width
            x2 = ann['x2'] * width
            y1 = ann['y1'] * height
            y2 = ann['y2'] * height

            annotations['bboxes'][idx, 0] = x1
            annotations['bboxes'][idx, 1] = y1
            annotations['bboxes'][idx, 2] = x2
            annotations['bboxes'][idx, 3] = y2
            annotations['labels'][idx] = cls_id

        return annotations


================================================
FILE: imageai_tf_deprecated/Detection/keras_retinanet/preprocessing/pascal_voc.py
================================================
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

from ..preprocessing.generator import Generator
from ..utils.image import read_image_bgr

import os
import numpy as np
from six import raise_from
from PIL import Image

try:
    import xml.etree.cElementTree as ET
except ImportError:
    import xml.etree.ElementTree as ET

voc_classes = {
    'aeroplane'   : 0,
    'bicycle'     : 1,
    'bird'        : 2,
    'boat'        : 3,
    'bottle'      : 4,
    'bus'         : 5,
    'car'         : 6,
    'cat'         : 7,
    'chair'       : 8,
    'cow'         : 9,
    'diningtable' : 10,
    'dog'         : 11,
    'horse'       : 12,
    'motorbike'   : 13,
    'person'      : 14,
    'pottedplant' : 15,
    'sheep'       : 16,
    'sofa'        : 17,
    'train'       : 18,
    'tvmonitor'   : 19
}


def _findNode(parent, name, debug_name=None, parse=None):
    if debug_name is None:
        debug_name = name

    result = parent.find(name)
    if result is None:
        raise ValueError('missing element \'{}\''.format(debug_name))
    if parse is not None:
        try:
            return parse(result.text)
        except ValueError as e:
            raise_from(ValueError('illegal value for \'{}\': {}'.format(debug_name, e)), None)
    return result


class PascalVocGenerator(Generator):
    """ Generate data for a Pascal VOC dataset.

    See http://host.robots.ox.ac.uk/pascal/VOC/ for more information.
    """

    def __init__(
        self,
        data_dir,
        set_name,
        classes=voc_classes,
        image_extension='.jpg',
        skip_truncated=False,
        skip_difficult=False,
        **kwargs
    ):
        """ Initialize a Pascal VOC data generator.

        Args
            base_dir: Directory w.r.t. where the files are to be searched (defaults to the directory containing the csv_data_file).
            csv_class_file: Path to the CSV classes file.
        """
        self.data_dir             = data_dir
        self.set_name             = set_name
        self.classes              = classes
        self.image_names          = [line.strip().split(None, 1)[0] for line in open(os.path.join(data_dir, 'ImageSets', 'Main', set_name + '.txt')).readlines()]
        self.image_extension      = image_extension
        self.skip_truncated       = skip_truncated
        self.skip_difficult       = skip_difficult

        self.labels = {}
        for key, value in self.classes.items():
            self.labels[value] = key

        super(PascalVocGenerator, self).__init__(**kwargs)

    def size(self):
        """ Size of the dataset.
        """
        return len(self.image_names)

    def num_classes(self):
        """ Number of classes in the dataset.
        """
        return len(self.classes)

    def has_label(self, label):
        """ Return True if label is a known label.
        """
        return label in self.labels

    def has_name(self, name):
        """ Returns True if name is a known class.
        """
        return name in self.classes

    def name_to_label(self, name):
        """ Map name to label.
        """
        return self.classes[name]

    def label_to_name(self, label):
        """ Map label to name.
        """
        return self.labels[label]

    def image_aspect_ratio(self, image_index):
        """ Compute the aspect ratio for an image with image_index.
        """
        path  = os.path.join(self.data_dir, 'JPEGImages', self.image_names[image_index] + self.image_extension)
        image = Image.open(path)
        return float(image.width) / float(image.height)

    def image_path(self, image_index):
        """ Get the path to an image.
        """
        return os.path.join(self.data_dir, 'JPEGImages', self.image_names[image_index] + self.image_extension)

    def load_image(self, image_index):
        """ Load an image at the image_index.
        """
        return read_image_bgr(self.image_path(image_index))

    def __parse_annotation(self, element):
        """ Parse an annotation given an XML element.
        """
        truncated = _findNode(element, 'truncated', parse=int)
        difficult = _findNode(element, 'difficult', parse=int)

        class_name = _findNode(element, 'name').text
        if class_name not in self.classes:
            raise ValueError('class name \'{}\' not found in classes: {}'.format(class_name, list(self.classes.keys())))

        box = np.zeros((4,))
        label = self.name_to_label(class_name)

        bndbox    = _findNode(element, 'bndbox')
        box[0] = _findNode(bndbox, 'xmin', 'bndbox.xmin', parse=float) - 1
        box[1] = _findNode(bndbox, 'ymin', 'bndbox.ymin', parse=float) - 1
        box[2] = _findNode(bndbox, 'xmax', 'bndbox.xmax', parse=float) - 1
        box[3] = _findNode(bndbox, 'ymax', 'bndbox.ymax', parse=float) - 1

        return truncated, difficult, box, label

    def __parse_annotations(self, xml_root):
        """ Parse all annotations under the xml_root.
        """
        annotations = {'labels': np.empty((len(xml_root.findall('object')),)), 'bboxes': np.empty((len(xml_root.findall('object')), 4))}
        for i, element in enumerate(xml_root.iter('object')):
            try:
                truncated, difficult, box, label = self.__parse_annotation(element)
            except ValueError as e:
                raise_from(ValueError('could not parse object #{}: {}'.format(i, e)), None)

            if truncated and self.skip_truncated:
                continue
            if difficult and self.skip_difficult:
                continue

            annotations['bboxes'][i, :] = box
            annotations['labels'][i] = label

        return annotations

    def load_annotations(self, image_index):
        """ Load annotations for an image_index.
        """
        filename = self.image_names[image_index] + '.xml'
        try:
            tree = ET.parse(os.path.join(self.data_dir, 'Annotations', filename))
            return self.__parse_annotations(tree.getroot())
        except ET.ParseError as e:
            raise_from(ValueError('invalid annotations file: {}: {}'.format(filename, e)), None)
        except ValueError as e:
            raise_from(ValueError('invalid annotations file: {}: {}'.format(filename, e)), None)


================================================
FILE: imageai_tf_deprecated/Detection/keras_retinanet/utils/__init__.py
================================================


================================================
FILE: imageai_tf_deprecated/Detection/keras_retinanet/utils/anchors.py
================================================
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

import numpy as np
from tensorflow import keras

#from ..utils.compute_overlap import compute_overlap


class AnchorParameters:
    """ The parameteres that define how anchors are generated.

    Args
        sizes   : List of sizes to use. Each size corresponds to one feature level.
        strides : List of strides to use. Each stride correspond to one feature level.
        ratios  : List of ratios to use per location in a feature map.
        scales  : List of scales to use per location in a feature map.
    """
    def __init__(self, sizes, strides, ratios, scales):
        self.sizes   = sizes
        self.strides = strides
        self.ratios  = ratios
        self.scales  = scales

    def num_anchors(self):
        return len(self.ratios) * len(self.scales)


"""
The default anchor parameters.
"""
AnchorParameters.default = AnchorParameters(
    sizes   = [32, 64, 128, 256, 512],
    strides = [8, 16, 32, 64, 128],
    ratios  = np.array([0.5, 1, 2], keras.backend.floatx()),
    scales  = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)], keras.backend.floatx()),
)


def anchor_targets_bbox(
    anchors,
    image_group,
    annotations_group,
    num_classes,
    negative_overlap=0.4,
    positive_overlap=0.5
):
    """ Generate anchor targets for bbox detection.

    Args
        anchors: np.array of annotations of shape (N, 4) for (x1, y1, x2, y2).
        image_group: List of BGR images.
        annotations_group: List of annotation dictionaries with each annotation containing 'labels' and 'bboxes' of an image.
        num_classes: Number of classes to predict.
        mask_shape: If the image is padded with zeros, mask_shape can be used to mark the relevant part of the image.
        negative_overlap: IoU overlap for negative anchors (all anchors with overlap < negative_overlap are negative).
        positive_overlap: IoU overlap or positive anchors (all anchors with overlap > positive_overlap are positive).

    Returns
        labels_batch: batch that contains labels & anchor states (np.array of shape (batch_size, N, num_classes + 1),
                      where N is the number of anchors for an image and the last column defines the anchor state (-1 for ignore, 0 for bg, 1 for fg).
        regression_batch: batch that contains bounding-box regression targets for an image & anchor states (np.array of shape (batch_size, N, 4 + 1),
                      where N is the number of anchors for an image, the first 4 columns define regression targets for (x1, y1, x2, y2) and the
                      last column defines anchor states (-1 for ignore, 0 for bg, 1 for fg).
    """

    assert(len(image_group) == len(annotations_group)), "The length of the images and annotations need to be equal."
    assert(len(annotations_group) > 0), "No data received to compute anchor targets for."
    for annotations in annotations_group:
        assert('bboxes' in annotations), "Annotations should contain bboxes."
        assert('labels' in annotations), "Annotations should contain labels."

    batch_size = len(image_group)

    regression_batch  = np.zeros((batch_size, anchors.shape[0], 4 + 1), dtype=keras.backend.floatx())
    labels_batch      = np.zeros((batch_size, anchors.shape[0], num_classes + 1), dtype=keras.backend.floatx())

    # compute labels and regression targets
    for index, (image, annotations) in enumerate(zip(image_group, annotations_group)):
        if annotations['bboxes'].shape[0]:
            # obtain indices of gt annotations with the greatest overlap
            positive_indices, ignore_indices, argmax_overlaps_inds = compute_gt_annotations(anchors, annotations['bboxes'], negative_overlap, positive_overlap)

            labels_batch[index, ignore_indices, -1]       = -1
            labels_batch[index, positive_indices, -1]     = 1

            regression_batch[index, ignore_indices, -1]   = -1
            regression_batch[index, positive_indices, -1] = 1

            # compute target class labels
            labels_batch[index, positive_indices, annotations['labels'][argmax_overlaps_inds[positive_indices]].astype(int)] = 1

            regression_batch[index, :, :-1] = bbox_transform(anchors, annotations['bboxes'][argmax_overlaps_inds, :])

        # ignore annotations outside of image
        if image.shape:
            anchors_centers = np.vstack([(anchors[:, 0] + anchors[:, 2]) / 2, (anchors[:, 1] + anchors[:, 3]) / 2]).T
            indices = np.logical_or(anchors_centers[:, 0] >= image.shape[1], anchors_centers[:, 1] >= image.shape[0])

            labels_batch[index, indices, -1]     = -1
            regression_batch[index, indices, -1] = -1

    return regression_batch, labels_batch


def layer_shapes(image_shape, model):
    """Compute layer shapes given input image shape and the model.

    Args
        image_shape: The shape of the image.
        model: The model to use for computing how the image shape is transformed in the pyramid.

    Returns
        A dictionary mapping layer names to image shapes.
    """
    shape = {
        model.layers[0].name: (None,) + image_shape,
    }

    for layer in model.layers[1:]:
        nodes = layer._inbound_nodes
        for node in nodes:
            if isinstance(node.inbound_layers, keras.layers.Layer):
                inputs = [shape[node.inbound_layers.name]]
            else:
                inputs = [shape[lr.name] for lr in node.inbound_layers]
            if not inputs:
                continue
            shape[layer.name] = layer.compute_output_shape(inputs[0] if len(inputs) == 1 else inputs)

    return shape


def make_shapes_callback(model):
    """ Make a function for getting the shape of the pyramid levels.
    """
    def get_shapes(image_shape, pyramid_levels):
        shape = layer_shapes(image_shape, model)
        image_shapes = [shape["P{}".format(level)][1:3] for level in pyramid_levels]
        return image_shapes

    return get_shapes


def guess_shapes(image_shape, pyramid_levels):
    """Guess shapes based on pyramid levels.

    Args
         image_shape: The shape of the image.
         pyramid_levels: A list of what pyramid levels are used.

    Returns
        A list of image shapes at each pyramid level.
    """
    image_shape = np.array(image_shape[:2])
    image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in pyramid_levels]
    return image_shapes


def anchors_for_shape(
    image_shape,
    pyramid_levels=None,
    anchor_params=None,
    shapes_callback=None,
):
    """ Generators anchors for a given shape.

    Args
        image_shape: The shape of the image.
        pyramid_levels: List of ints representing which pyramids to use (defaults to [3, 4, 5, 6, 7]).
        anchor_params: Struct containing anchor parameters. If None, default values are used.
        shapes_callback: Function to call for getting the shape of the image at different pyramid levels.

    Returns
        np.array of shape (N, 4) containing the (x1, y1, x2, y2) coordinates for the anchors.
    """

    if pyramid_levels is None:
        pyramid_levels = [3, 4, 5, 6, 7]

    if anchor_params is None:
        anchor_params = AnchorParameters.default

    if shapes_callback is None:
        shapes_callback = guess_shapes
    image_shapes = shapes_callback(image_shape, pyramid_levels)

    # compute anchors over all pyramid levels
    all_anchors = np.zeros((0, 4))
    for idx, p in enumerate(pyramid_levels):
        anchors = generate_anchors(
            base_size=anchor_params.sizes[idx],
            ratios=anchor_params.ratios,
            scales=anchor_params.scales
        )
        shifted_anchors = shift(image_shapes[idx], anchor_params.strides[idx], anchors)
        all_anchors     = np.append(all_anchors, shifted_anchors, axis=0)

    return all_anchors


def shift(shape, stride, anchors):
    """ Produce shifted anchors based on shape of the map and stride size.

    Args
        shape  : Shape to shift the anchors over.
        stride : Stride to shift the anchors with over the shape.
        anchors: The anchors to apply at each location.
    """

    # create a grid starting from half stride from the top left corner
    shift_x = (np.arange(0, shape[1]) + 0.5) * stride
    shift_y = (np.arange(0, shape[0]) + 0.5) * stride

    shift_x, shift_y = np.meshgrid(shift_x, shift_y)

    shifts = np.vstack((
        shift_x.ravel(), shift_y.ravel(),
        shift_x.ravel(), shift_y.ravel()
    )).transpose()

    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = anchors.shape[0]
    K = shifts.shape[0]
    all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
    all_anchors = all_anchors.reshape((K * A, 4))

    return all_anchors


def generate_anchors(base_size=16, ratios=None, scales=None):
    """
    Generate anchor (reference) windows by enumerating aspect ratios X
    scales w.r.t. a reference window.
    """

    if ratios is None:
        ratios = AnchorParameters.default.ratios

    if scales is None:
        scales = AnchorParameters.default.scales

    num_anchors = len(ratios) * len(scales)

    # initialize output anchors
    anchors = np.zeros((num_anchors, 4))

    # scale base_size
    anchors[:, 2:] = base_size * np.tile(scales, (2, len(ratios))).T

    # compute areas of anchors
    areas = anchors[:, 2] * anchors[:, 3]

    # correct for ratios
    anchors[:, 2] = np.sqrt(areas / np.repeat(ratios, len(scales)))
    anchors[:, 3] = anchors[:, 2] * np.repeat(ratios, len(scales))

    # transform from (x_ctr, y_ctr, w, h) -> (x1, y1, x2, y2)
    anchors[:, 0::2] -= np.tile(anchors[:, 2] * 0.5, (2, 1)).T
    anchors[:, 1::2] -= np.tile(anchors[:, 3] * 0.5, (2, 1)).T

    return anchors


def bbox_transform(anchors, gt_boxes, mean=None, std=None):
    """Compute bounding-box regression targets for an image."""

    # The Mean and std are calculated from COCO dataset.
    # Bounding box normalization was firstly introduced in the Fast R-CNN paper.
    # See https://github.com/fizyr/keras-retinanet/issues/1273#issuecomment-585828825  for more details
    if mean is None:
        mean = np.array([0, 0, 0, 0])
    if std is None:
        std = np.array([0.2, 0.2, 0.2, 0.2])

    if isinstance(mean, (list, tuple)):
        mean = np.array(mean)
    elif not isinstance(mean, np.ndarray):
        raise ValueError('Expected mean to be a np.ndarray, list or tuple. Received: {}'.format(type(mean)))

    if isinstance(std, (list, tuple)):
        std = np.array(std)
    elif not isinstance(std, np.ndarray):
        raise ValueError('Expected std to be a np.ndarray, list or tuple. Received: {}'.format(type(std)))

    anchor_widths  = anchors[:, 2] - anchors[:, 0]
    anchor_heights = anchors[:, 3] - anchors[:, 1]

    # According to the information provided by a keras-retinanet author, they got marginally better results using
    # the following way of bounding box parametrization.
    # See https://github.com/fizyr/keras-retinanet/issues/1273#issuecomment-585828825 for more details
    targets_dx1 = (gt_boxes[:, 0] - anchors[:, 0]) / anchor_widths
    targets_dy1 = (gt_boxes[:, 1] - anchors[:, 1]) / anchor_heights
    targets_dx2 = (gt_boxes[:, 2] - anchors[:, 2]) / anchor_widths
    targets_dy2 = (gt_boxes[:, 3] - anchors[:, 3]) / anchor_heights

    targets = np.stack((targets_dx1, targets_dy1, targets_dx2, targets_dy2))
    targets = targets.T

    targets = (targets - mean) / std

    return targets


================================================
FILE: imageai_tf_deprecated/Detection/keras_retinanet/utils/coco_eval.py
================================================
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

from pycocotools.cocoeval import COCOeval

from tensorflow import keras
import numpy as np
import json

import progressbar
assert(callable(progressbar.progressbar)), "Using wrong progressbar module, install 'progressbar2' instead."


def evaluate_coco(generator, model, threshold=0.05):
    """ Use the pycocotools to evaluate a COCO model on a dataset.

    Args
        generator : The generator for generating the evaluation data.
        model     : The model to evaluate.
        threshold : The score threshold to use.
    """
    # start collecting results
    results = []
    image_ids = []
    for index in progressbar.progressbar(range(generator.size()), prefix='COCO evaluation: '):
        image = generator.load_image(index)
        image = generator.preprocess_image(image)
        image, scale = generator.resize_image(image)

        if keras.backend.image_data_format() == 'channels_first':
            image = image.transpose((2, 0, 1))

        # run network
        boxes, scores, labels = model.predict_on_batch(np.expand_dims(image, axis=0))

        # correct boxes for image scale
        boxes /= scale

        # change to (x, y, w, h) (MS COCO standard)
        boxes[:, :, 2] -= boxes[:, :, 0]
        boxes[:, :, 3] -= boxes[:, :, 1]

        # compute predicted labels and scores
        for box, score, label in zip(boxes[0], scores[0], labels[0]):
            # scores are sorted, so we can break
            if score < threshold:
                break

            # append detection for each positively labeled class
            image_result = {
                'image_id'    : generator.image_ids[index],
                'category_id' : generator.label_to_coco_label(label),
                'score'       : float(score),
                'bbox'        : box.tolist(),
            }

            # append detection to results
            results.append(image_result)

        # append image to list of processed images
        image_ids.append(generator.image_ids[index])

    if not len(results):
        return

    # write output
    json.dump(results, open('{}_bbox_results.json'.format(generator.set_name), 'w'), indent=4)
    json.dump(image_ids, open('{}_processed_image_ids.json'.format(generator.set_name), 'w'), indent=4)

    # load results in COCO evaluation tool
    coco_true = generator.coco
    coco_pred = coco_true.loadRes('{}_bbox_results.json'.format(generator.set_name))

    # run COCO evaluation
    coco_eval = COCOeval(coco_true, coco_pred, 'bbox')
    coco_eval.params.imgIds = image_ids
    coco_eval.evaluate()
    coco_eval.accumulate()
    coco_eval.summarize()
    return coco_eval.stats


================================================
FILE: imageai_tf_deprecated/Detection/keras_retinanet/utils/colors.py
================================================
import warnings


def label_color(label):
    """ Return a color from a set of predefined colors. Contains 80 colors in total.

    Args
        label: The label to get the color for.

    Returns
        A list of three values representing a RGB color.

        If no color is defined for a certain label, the color green is returned and a warning is printed.
    """
    if label < len(colors):
        return colors[label]
    else:
        warnings.warn('Label {} has no color, returning default.'.format(label))
        return (0, 255, 0)


"""
Generated using:

```
colors = [list((matplotlib.colors.hsv_to_rgb([x, 1.0, 1.0]) * 255).astype(int)) for x in np.arange(0, 1, 1.0 / 80)]
shuffle(colors)
pprint(colors)
```
"""
colors = [
    [31  , 0   , 255] ,
    [0   , 159 , 255] ,
    [255 , 95  , 0]   ,
    [255 , 19  , 0]   ,
    [255 , 0   , 0]   ,
    [255 , 38  , 0]   ,
    [0   , 255 , 25]  ,
    [255 , 0   , 133] ,
    [255 , 172 , 0]   ,
    [108 , 0   , 255] ,
    [0   , 82  , 255] ,
    [0   , 255 , 6]   ,
    [255 , 0   , 152] ,
    [223 , 0   , 255] ,
    [12  , 0   , 255] ,
    [0   , 255 , 178] ,
    [108 , 255 , 0]   ,
    [184 , 0   , 255] ,
    [255 , 0   , 76]  ,
    [146 , 255 , 0]   ,
    [51  , 0   , 255] ,
    [0   , 197 , 255] ,
    [255 , 248 , 0]   ,
    [255 , 0   , 19]  ,
    [255 , 0   , 38]  ,
    [89  , 255 , 0]   ,
    [127 , 255 , 0]   ,
    [255 , 153 , 0]   ,
    [0   , 255 , 255] ,
    [0   , 255 , 216] ,
    [0   , 255 , 121] ,
    [255 , 0   , 248] ,
    [70  , 0   , 255] ,
    [0   , 255 , 159] ,
    [0   , 216 , 255] ,
    [0   , 6   , 255] ,
    [0   , 63  , 255] ,
    [31  , 255 , 0]   ,
    [255 , 57  , 0]   ,
    [255 , 0   , 210] ,
    [0   , 255 , 102] ,
    [242 , 255 , 0]   ,
    [255 , 191 , 0]   ,
    [0   , 255 , 63]  ,
    [255 , 0   , 95]  ,
    [146 , 0   , 255] ,
    [184 , 255 , 0]   ,
    [255 , 114 , 0]   ,
    [0   , 255 , 235] ,
    [255 , 229 , 0]   ,
    [0   , 178 , 255] ,
    [255 , 0   , 114] ,
    [255 , 0   , 57]  ,
    [0   , 140 , 255] ,
    [0   , 121 , 255] ,
    [12  , 255 , 0]   ,
    [255 , 210 , 0]   ,
    [0   , 255 , 44]  ,
    [165 , 255 , 0]   ,
    [0   , 25  , 255] ,
    [0   , 255 , 140] ,
    [0   , 101 , 255] ,
    [0   , 255 , 82]  ,
    [223 , 255 , 0]   ,
    [242 , 0   , 255] ,
    [89  , 0   , 255] ,
    [165 , 0   , 255] ,
    [70  , 255 , 0]   ,
    [255 , 0   , 172] ,
    [255 , 76  , 0]   ,
    [203 , 255 , 0]   ,
    [204 , 0   , 255] ,
    [255 , 0   , 229] ,
    [255 , 133 , 0]   ,
    [127 , 0   , 255] ,
    [0   , 235 , 255] ,
    [0   , 255 , 197] ,
    [255 , 0   , 191] ,
    [0   , 44  , 255] ,
    [50  , 255 , 0]
]


================================================
FILE: imageai_tf_deprecated/Detection/keras_retinanet/utils/compute_overlap.pyx
================================================
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Sergey Karayev
# --------------------------------------------------------

cimport cython
import numpy as np
cimport numpy as np


def compute_overlap(
    np.ndarray[double, ndim=2] boxes,
    np.ndarray[double, ndim=2] query_boxes
):
    """
    Args
        a: (N, 4) ndarray of float
        b: (K, 4) ndarray of float

    Returns
        overlaps: (N, K) ndarray of overlap between boxes and query_boxes
    """
    cdef unsigned int N = boxes.shape[0]
    cdef unsigned int K = query_boxes.shape[0]
    cdef np.ndarray[double, ndim=2] overlaps = np.zeros((N, K), dtype=np.float64)
    cdef double iw, ih, box_area
    cdef double ua
    cdef unsigned int k, n
    for k in range(K):
        box_area = (
            (query_boxes[k, 2] - query_boxes[k, 0]) *
            (query_boxes[k, 3] - query_boxes[k, 1])
        )
        for n in range(N):
            iw = (
                min(boxes[n, 2], query_boxes[k, 2]) -
                max(boxes[n, 0], query_boxes[k, 0]) 
            )
            if iw > 0:
                ih = (
                    min(boxes[n, 3], query_boxes[k, 3]) -
                    max(boxes[n, 1], query_boxes[k, 1]) 
                )
                if ih > 0:
                    ua = np.float64(
                        (boxes[n, 2] - boxes[n, 0]) *
                        (boxes[n, 3] - boxes[n, 1]) +
                        box_area - iw * ih
                    )
                    overlaps[n, k] = iw * ih / ua
    return overlaps


================================================
FILE: imageai_tf_deprecated/Detection/keras_retinanet/utils/config.py
================================================
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

import configparser
import numpy as np
from tensorflow import keras
from ..utils.anchors import AnchorParameters


def read_config_file(config_path):
    config = configparser.ConfigParser()

    with open(config_path, 'r') as file:
        config.read_file(file)

    assert 'anchor_parameters' in config, \
        "Malformed config file. Verify that it contains the anchor_parameters section."

    config_keys = set(config['anchor_parameters'])
    default_keys = set(AnchorParameters.default.__dict__.keys())

    assert config_keys <= default_keys, \
        "Malformed config file. These keys are not valid: {}".format(config_keys - default_keys)

    if 'pyramid_levels' in config:
        assert('levels' in config['pyramid_levels']), "pyramid levels specified by levels key"

    return config


def parse_anchor_parameters(config):
    ratios  = np.array(list(map(float, config['anchor_parameters']['ratios'].split(' '))), keras.backend.floatx())
    scales  = np.array(list(map(float, config['anchor_parameters']['scales'].split(' '))), keras.backend.floatx())
    sizes   = list(map(int, config['anchor_parameters']['sizes'].split(' ')))
    strides = list(map(int, config['anchor_parameters']['strides'].split(' ')))
    assert (len(sizes) == len(strides)), "sizes and strides should have an equal number of values"

    return AnchorParameters(sizes, strides, ratios, scales)


def parse_pyramid_levels(config):
    levels = list(map(int, config['pyramid_levels']['levels'].split(' ')))

    return levels


================================================
FILE: imageai_tf_deprecated/Detection/keras_retinanet/utils/eval.py
================================================
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

from .anchors import compute_overlap
from .visualization import draw_detections, draw_annotations

from tensorflow import keras
import numpy as np
import os
import time

import cv2
import progressbar
assert(callable(progressbar.progressbar)), "Using wrong progressbar module, install 'progressbar2' instead."


def _compute_ap(recall, precision):
    """ Compute the average precision, given the recall and precision curves.

    Code originally from https://github.com/rbgirshick/py-faster-rcnn.

    # Arguments
        recall:    The recall curve (list).
        precision: The precision curve (list).
    # Returns
        The average precision as computed in py-faster-rcnn.
    """
    # correct AP calculation
    # first append sentinel values at the end
    mrec = np.concatenate(([0.], recall, [1.]))
    mpre = np.concatenate(([0.], precision, [0.]))

    # compute the precision envelope
    for i in range(mpre.size - 1, 0, -1):
        mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])

    # to calculate area under PR curve, look for points
    # where X axis (recall) changes value
    i = np.where(mrec[1:] != mrec[:-1])[0]

    # and sum (\Delta recall) * prec
    ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
    return ap


def _get_detections(generator, model, score_threshold=0.05, max_detections=100, save_path=None):
    """ Get the detections from the model using the generator.

    The result is a list of lists such that the size is:
        all_detections[num_images][num_classes] = detections[num_detections, 4 + num_classes]

    # Arguments
        generator       : The generator used to run images through the model.
        model           : The model to run on the images.
        score_threshold : The score confidence threshold to use.
        max_detections  : The maximum number of detections to use per image.
        save_path       : The path to save the images with visualized detections to.
    # Returns
        A list of lists containing the detections for each image in the generator.
    """
    all_detections = [[None for i in range(generator.num_classes()) if generator.has_label(i)] for j in range(generator.size())]
    all_inferences = [None for i in range(generator.size())]

    for i in progressbar.progressbar(range(generator.size()), prefix='Running network: '):
        raw_image    = generator.load_image(i)
        image, scale = generator.resize_image(raw_image.copy())
        image = generator.preprocess_image(image)

        if keras.backend.image_data_format() == 'channels_first':
            image = image.transpose((2, 0, 1))

        # run network
        start = time.time()
        boxes, scores, labels = model.predict_on_batch(np.expand_dims(image, axis=0))[:3]
        inference_time = time.time() - start

        # correct boxes for image scale
        boxes /= scale

        # select indices which have a score above the threshold
        indices = np.where(scores[0, :] > score_threshold)[0]

        # select those scores
        scores = scores[0][indices]

        # find the order with which to sort the scores
        scores_sort = np.argsort(-scores)[:max_detections]

        # select detections
        image_boxes      = boxes[0, indices[scores_sort], :]
        image_scores     = scores[scores_sort]
        image_labels     = labels[0, indices[scores_sort]]
        image_detections = np.concatenate([image_boxes, np.expand_dims(image_scores, axis=1), np.expand_dims(image_labels, axis=1)], axis=1)

        if save_path is not None:
            draw_annotations(raw_image, generator.load_annotations(i), label_to_name=generator.label_to_name)
            draw_detections(raw_image, image_boxes, image_scores, image_labels, label_to_name=generator.label_to_name, score_threshold=score_threshold)

            cv2.imwrite(os.path.join(save_path, '{}.png'.format(i)), raw_image)

        # copy detections to all_detections
        for label in range(generator.num_classes()):
            if not generator.has_label(label):
                continue

            all_detections[i][label] = image_detections[image_detections[:, -1] == label, :-1]

        all_inferences[i] = inference_time

    return all_detections, all_inferences


def _get_annotations(generator):
    """ Get the ground truth annotations from the generator.

    The result is a list of lists such that the size is:
        all_detections[num_images][num_classes] = annotations[num_detections, 5]

    # Arguments
        generator : The generator used to retrieve ground truth annotations.
    # Returns
        A list of lists containing the annotations for each image in the generator.
    """
    all_annotations = [[None for i in range(generator.num_classes())] for j in range(generator.size())]

    for i in progressbar.progressbar(range(generator.size()), prefix='Parsing annotations: '):
        # load the annotations
        annotations = generator.load_annotations(i)

        # copy detections to all_annotations
        for label in range(generator.num_classes()):
            if not generator.has_label(label):
                continue

            all_annotations[i][label] = annotations['bboxes'][annotations['labels'] == label, :].copy()

    return all_annotations


def evaluate(
    generator,
    model,
    iou_threshold=0.5,
    score_threshold=0.05,
    max_detections=100,
    save_path=None
):
    """ Evaluate a given dataset using a given model.

    # Arguments
        generator       : The generator that represents the dataset to evaluate.
        model           : The model to evaluate.
        iou_threshold   : The threshold used to consider when a detection is positive or negative.
        score_threshold : The score confidence threshold to use for detections.
        max_detections  : The maximum number of detections to use per image.
        save_path       : The path to save images with visualized detections to.
    # Returns
        A dict mapping class names to mAP scores.
    """
    # gather all detections and annotations
    all_detections, all_inferences = _get_detections(generator, model, score_threshold=score_threshold, max_detections=max_detections, save_path=save_path)
    all_annotations    = _get_annotations(generator)
    average_precisions = {}

    # all_detections = pickle.load(open('all_detections.pkl', 'rb'))
    # all_annotations = pickle.load(open('all_annotations.pkl', 'rb'))
    # pickle.dump(all_detections, open('all_detections.pkl', 'wb'))
    # pickle.dump(all_annotations, open('all_annotations.pkl', 'wb'))

    # process detections and annotations
    for label in range(generator.num_classes()):
        if not generator.has_label(label):
            continue

        false_positives = np.zeros((0,))
        true_positives  = np.zeros((0,))
        scores          = np.zeros((0,))
        num_annotations = 0.0

        for i in range(generator.size()):
            detections           = all_detections[i][label]
            annotations          = all_annotations[i][label]
            num_annotations     += annotations.shape[0]
            detected_annotations = []

            for d in detections:
                scores = np.append(scores, d[4])

                if annotations.shape[0] == 0:
                    false_positives = np.append(false_positives, 1)
                    true_positives  = np.append(true_positives, 0)
                    continue

                overlaps            = compute_overlap(np.expand_dims(d, axis=0), annotations)
                assigned_annotation = np.argmax(overlaps, axis=1)
                max_overlap         = overlaps[0, assigned_annotation]

                if max_overlap >= iou_threshold and assigned_annotation not in detected_annotations:
                    false_positives = np.append(false_positives, 0)
                    true_positives  = np.append(true_positives, 1)
                    detected_annotations.append(assigned_annotation)
                else:
                    false_positives = np.append(false_positives, 1)
                    true_positives  = np.append(true_positives, 0)

        # no annotations -> AP for this class is 0 (is this correct?)
        if num_annotations == 0:
            average_precisions[label] = 0, 0
            continue

        # sort by score
        indices         = np.argsort(-scores)
        false_positives = false_positives[indices]
        true_positives  = true_positives[indices]

        # compute false positives and true positives
        false_positives = np.cumsum(false_positives)
        true_positives  = np.cumsum(true_positives)

        # compute recall and precision
        recall    = true_positives / num_annotations
        precision = true_positives / np.maximum(true_positives + false_positives, np.finfo(np.float64).eps)

        # compute average precision
        average_precision  = _compute_ap(recall, precision)
        average_precisions[label] = average_precision, num_annotations

    # inference time
    inference_time = np.sum(all_inferences) / generator.size()

    return average_precisions, inference_time


================================================
FILE: imageai_tf_deprecated/Detection/keras_retinanet/utils/gpu.py
================================================
"""
Copyright 2017-2019 Fizyr (https://fizyr.com)

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

import tensorflow as tf


def setup_gpu(gpu_id):
    try:
        visible_gpu_indices = [int(id) for id in gpu_id.split(',')]
        available_gpus = tf.config.list_physical_devices('GPU')
        visible_gpus = [gpu for idx, gpu in enumerate(available_gpus) if idx in visible_gpu_indices]

        if visible_gpus:
            try:
                # Currently, memory growth needs to be the same across GPUs.
                for gpu in available_gpus:
                    tf.config.experimental.set_memory_growth(gpu, True)

                # Use only the selcted gpu.
                tf.config.set_visible_devices(visible_gpus, 'GPU')
            except RuntimeError as e:
                # Visible devices must be set before GPUs have been initialized.
                print(e)

            logical_gpus = tf.config.list_logical_devices('GPU')
            print(len(available_gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
        else:
            tf.config.set_visible_devices([], 'GPU')
    except ValueError:
        tf.config.set_visible_devices([], 'GPU')


================================================
FILE: imageai_tf_deprecated/Detection/keras_retinanet/utils/image.py
================================================
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

from __future__ import division
import numpy as np
import cv2
from PIL import Image

from .transform import change_transform_origin


def read_image_bgr(path):
    """ Read an image in BGR format.

    Args
        path: Path to the image.
    """
    # We deliberately don't use cv2.imread here, since it gives no feedback on errors while reading the image.
    image = np.ascontiguousarray(Image.open(path).convert('RGB'))
    return image[:, :, ::-1]


def preprocess_image(x, mode='caffe'):
    """ Preprocess an image by subtracting the ImageNet mean.

    Args
        x: np.array of shape (None, None, 3) or (3, None, None).
        mode: One of "caffe" or "tf".
            - caffe: will zero-center each color channel with
                respect to the ImageNet dataset, without scaling.
            - tf: will scale pixels between -1 and 1, sample-wise.

    Returns
        The input with the ImageNet mean subtracted.
    """
    # mostly identical to "https://github.com/keras-team/keras-applications/blob/master/keras_applications/imagenet_utils.py"
    # except for converting RGB -> BGR since we assume BGR already

    # covert always to float32 to keep compatibility with opencv
    x = x.astype(np.float32)

    if mode == 'tf':
        x /= 127.5
        x -= 1.
    elif mode == 'caffe':
        x -= [103.939, 116.779, 123.68]

    return x


def adjust_transform_for_image(transform, image, relative_translation):
    """ Adjust a transformation for a specific image.

    The translation of the matrix will be scaled with the size of the image.
    The linear part of the transformation will adjusted so that the origin of the transformation will be at the center of the image.
    """
    height, width, channels = image.shape

    result = transform

    # Scale the translation with the image size if specified.
    if relative_translation:
        result[0:2, 2] *= [width, height]

    # Move the origin of transformation.
    result = change_transform_origin(transform, (0.5 * width, 0.5 * height))

    return result


class TransformParameters:
    """ Struct holding parameters determining how to apply a transformation to an image.

    Args
        fill_mode:             One of: 'constant', 'nearest', 'reflect', 'wrap'
        interpolation:         One of: 'nearest', 'linear', 'cubic', 'area', 'lanczos4'
        cval:                  Fill value to use with fill_mode='constant'
        relative_translation:  If true (the default), interpret translation as a factor of the image size.
                               If false, interpret it as absolute pixels.
    """
    def __init__(
        self,
        fill_mode            = 'nearest',
        interpolation        = 'linear',
        cval                 = 0,
        relative_translation = True,
    ):
        self.fill_mode            = fill_mode
        self.cval                 = cval
        self.interpolation        = interpolation
        self.relative_translation = relative_translation

    def cvBorderMode(self):
        if self.fill_mode == 'constant':
            return cv2.BORDER_CONSTANT
        if self.fill_mode == 'nearest':
            return cv2.BORDER_REPLICATE
        if self.fill_mode == 'reflect':
            return cv2.BORDER_REFLECT_101
        if self.fill_mode == 'wrap':
            return cv2.BORDER_WRAP

    def cvInterpolation(self):
        if self.interpolation == 'nearest':
            return cv2.INTER_NEAREST
        if self.interpolation == 'linear':
            return cv2.INTER_LINEAR
        if self.interpolation == 'cubic':
            return cv2.INTER_CUBIC
        if self.interpolation == 'area':
            return cv2.INTER_AREA
        if self.interpolation == 'lanczos4':
            return cv2.INTER_LANCZOS4


def apply_transform(matrix, image, params):
    """
    Apply a transformation to an image.

    The origin of transformation is at the top left corner of the image.

    The matrix is interpreted such that a point (x, y) on the original image is moved to transform * (x, y) in the generated image.
    Mathematically speaking, that means that the matrix is a transformation from the transformed image space to the original image space.

    Args
      matrix: A homogeneous 3 by 3 matrix holding representing the transformation to apply.
      image:  The image to transform.
      params: The transform parameters (see TransformParameters)
    """
    output = cv2.warpAffine(
        image,
        matrix[:2, :],
        dsize       = (image.shape[1], image.shape[0]),
        flags       = params.cvInterpolation(),
        borderMode  = params.cvBorderMode(),
        borderValue = params.cval,
    )
    return output


def compute_resize_scale(image_shape, min_side=800, max_side=1333):
    """ Compute an image scale such that the image size is constrained to min_side and max_side.

    Args
        min_side: The image's min side will be equal to min_side after resizing.
        max_side: If after resizing the image's max side is above max_side, resize until the max side is equal to max_side.

    Returns
        A resizing scale.
    """
    (rows, cols, _) = image_shape

    smallest_side = min(rows, cols)

    # rescale the image so the smallest side is min_side
    scale = min_side / smallest_side

    # check if the largest side is now greater than max_side, which can happen
    # when images have a large aspect ratio
    largest_side = max(rows, cols)
    if largest_side * scale > max_side:
        scale = max_side / largest_side

    return scale


def resize_image(img, min_side=800, max_side=1333):
    """ Resize an image such that the size is constrained to min_side and max_side.

    Args
        min_side: The image's min side will be equal to min_side after resizing.
        max_side: If after resizing the image's max side is above max_side, resize until the max side is equal to max_side.

    Returns
        A resized image.
    """
    # compute scale to resize the image
    scale = compute_resize_scale(img.shape, min_side=min_side, max_side=max_side)

    # resize the image with the computed scale
    img = cv2.resize(img, None, fx=scale, fy=scale)

    return img, scale


def _uniform(val_range):
    """ Uniformly sample from the given range.

    Args
        val_range: A pair of lower and upper bound.
    """
    return np.random.uniform(val_range[0], val_range[1])


def _check_range(val_range, min_val=None, max_val=None):
    """ Check whether the range is a valid range.

    Args
        val_range: A pair of lower and upper bound.
        min_val: Minimal value for the lower bound.
        max_val: Maximal value for the upper bound.
    """
    if val_range[0] > val_range[1]:
        raise ValueError('interval lower bound > upper bound')
    if min_val is not None and val_range[0] < min_val:
        raise ValueError('invalid interval lower bound')
    if max_val is not None and val_range[1] > max_val:
        raise ValueError('invalid interval upper bound')


def _clip(image):
    """
    Clip and convert an image to np.uint8.

    Args
        image: Image to clip.
    """
    return np.clip(image, 0, 255).astype(np.uint8)


class VisualEffect:
    """ Struct holding parameters and applying image color transformation.

    Args
        contrast_factor:   A factor for adjusting contrast. Should be between 0 and 3.
        brightness_delta:  Brightness offset between -1 and 1 added to the pixel values.
        hue_delta:         Hue offset between -1 and 1 added to the hue channel.
        saturation_factor: A factor multiplying the saturation values of each pixel.
    """

    def __init__(
        self,
        contrast_factor,
        brightness_delta,
        hue_delta,
        saturation_factor,
    ):
        self.contrast_factor = contrast_factor
        self.brightness_delta = brightness_delta
        self.hue_delta = hue_delta
        self.saturation_factor = saturation_factor

    def __call__(self, image):
        """ Apply a visual effect on the image.

        Args
            image: Image to adjust
        """

        if self.contrast_factor:
            image = adjust_contrast(image, self.contrast_factor)
        if self.brightness_delta:
            image = adjust_brightness(image, self.brightness_delta)

        if self.hue_delta or self.saturation_factor:

            image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

            if self.hue_delta:
                image = adjust_hue(image, self.hue_delta)
            if self.saturation_factor:
                image = adjust_saturation(image, self.saturation_factor)

            image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)

        return image


def random_visual_effect_generator(
    contrast_range=(0.9, 1.1),
    brightness_range=(-.1, .1),
    hue_range=(-0.05, 0.05),
    saturation_range=(0.95, 1.05)
):
    """ Generate visual effect parameters uniformly sampled from the given intervals.

    Args
        contrast_factor:   A factor interval for adjusting contrast. Should be between 0 and 3.
        brightness_delta:  An interval between -1 and 1 for the amount added to the pixels.
        hue_delta:         An interval between -1 and 1 for the amount added to the hue channel.
                           The values are rotated if they exceed 180.
        saturation_factor: An interval for the factor multiplying the saturation values of each
                           pixel.
    """
    _check_range(contrast_range, 0)
    _check_range(brightness_range, -1, 1)
    _check_range(hue_range, -1, 1)
    _check_range(saturation_range, 0)

    def _generate():
        while True:
            yield VisualEffect(
                contrast_factor=_uniform(contrast_range),
                brightness_delta=_uniform(brightness_range),
                hue_delta=_uniform(hue_range),
                saturation_factor=_uniform(saturation_range),
            )

    return _generate()


def adjust_contrast(image, factor):
    """ Adjust contrast of an image.

    Args
        image: Image to adjust.
        factor: A factor for adjusting contrast.
    """
    mean = image.mean(axis=0).mean(axis=0)
    return _clip((image - mean) * factor + mean)


def adjust_brightness(image, delta):
    """ Adjust brightness of an image

    Args
        image: Image to adjust.
        delta: Brightness offset between -1 and 1 added to the pixel values.
    """
    return _clip(image + delta * 255)


def adjust_hue(image, delta):
    """ Adjust hue of an image.

    Args
        image: Image to adjust.
        delta: An interval between -1 and 1 for the amount added to the hue channel.
               The values are rotated if they exceed 180.
    """
    image[..., 0] = np.mod(image[..., 0] + delta * 180, 180)
    return image


def adjust_saturation(image, factor):
    """ Adjust saturation of an image.

    Args
        image: Image to adjust.
        factor: An interval for the factor multiplying the saturation values of each pixel.
    """
    image[..., 1] = np.clip(image[..., 1] * factor, 0 , 255)
    return image


================================================
FILE: imageai_tf_deprecated/Detection/keras_retinanet/utils/model.py
================================================
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""


def freeze(model):
    """ Set all layers in a model to non-trainable.

    The weights for these layers will not be updated during training.

    This function modifies the given model in-place,
    but it also returns the modified model to allow easy chaining with other functions.
    """
    for layer in model.layers:
        layer.trainable = False
    return model


================================================
FILE: imageai_tf_deprecated/Detection/keras_retinanet/utils/tf_version.py
================================================
"""
Copyright 2017-2019 Fizyr (https://fizyr.com)

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

from __future__ import print_function

import tensorflow as tf
import sys

MINIMUM_TF_VERSION = 2, 3, 0
BLACKLISTED_TF_VERSIONS = []


def tf_version():
    """ Get the Tensorflow version.
        Returns
            tuple of (major, minor, patch).
    """
    return tuple(map(int, tf.version.VERSION.split('-')[0].split('.')))


def tf_version_ok(minimum_tf_version=MINIMUM_TF_VERSION, blacklisted=BLACKLISTED_TF_VERSIONS):
    """ Check if the current Tensorflow version is higher than the minimum version.
    """
    return tf_version() >= minimum_tf_version and tf_version() not in blacklisted


def assert_tf_version(minimum_tf_version=MINIMUM_TF_VERSION, blacklisted=BLACKLISTED_TF_VERSIONS):
    """ Assert that the Tensorflow version is up to date.
    """
    detected = tf.version.VERSION
    required = '.'.join(map(str, minimum_tf_version))
    assert(tf_version_ok(minimum_tf_version, blacklisted)), 'You are using tensorflow version {}. The minimum required version is {} (blacklisted: {}).'.format(detected, required, blacklisted)


def check_tf_version():
    """ Check that the Tensorflow version is up to date. If it isn't, print an error message and exit the script.
    """
    try:
        assert_tf_version()
    except AssertionError as e:
        print(e, file=sys.stderr)
        sys.exit(1)


================================================
FILE: imageai_tf_deprecated/Detection/keras_retinanet/utils/transform.py
================================================
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

import numpy as np

DEFAULT_PRNG = np.random


def colvec(*args):
    """ Create a numpy array representing a column vector. """
    return np.array([args]).T


def transform_aabb(transform, aabb):
    """ Apply a transformation to an axis aligned bounding box.

    The result is a new AABB in the same coordinate system as the original AABB.
    The new AABB contains all corner points of the original AABB after applying the given transformation.

    Args
        transform: The transformation to apply.
        x1:        The minimum x value of the AABB.
        y1:        The minimum y value of the AABB.
        x2:        The maximum x value of the AABB.
        y2:        The maximum y value of the AABB.
    Returns
        The new AABB as tuple (x1, y1, x2, y2)
    """
    x1, y1, x2, y2 = aabb
    # Transform all 4 corners of the AABB.
    points = transform.dot([
        [x1, x2, x1, x2],
        [y1, y2, y2, y1],
        [1,  1,  1,  1 ],
    ])

    # Extract the min and max corners again.
    min_corner = points.min(axis=1)
    max_corner = points.max(axis=1)

    return [min_corner[0], min_corner[1], max_corner[0], max_corner[1]]


def _random_vector(min, max, prng=DEFAULT_PRNG):
    """ Construct a random vector between min and max.
    Args
        min: the minimum value for each component
        max: the maximum value for each component
    """
    min = np.array(min)
    max = np.array(max)
    assert min.shape == max.shape
    assert len(min.shape) == 1
    return prng.uniform(min, max)


def rotation(angle):
    """ Construct a homogeneous 2D rotation matrix.
    Args
        angle: the angle in radians
    Returns
        the rotation matrix as 3 by 3 numpy array
    """
    return np.array([
        [np.cos(angle), -np.sin(angle), 0],
        [np.sin(angle),  np.cos(angle), 0],
        [0, 0, 1]
    ])


def random_rotation(min, max, prng=DEFAULT_PRNG):
    """ Construct a random rotation between -max and max.
    Args
        min:  a scalar for the minimum absolute angle in radians
        max:  a scalar for the maximum absolute angle in radians
        prng: the pseudo-random number generator to use.
    Returns
        a homogeneous 3 by 3 rotation matrix
    """
    return rotation(prng.uniform(min, max))


def translation(translation):
    """ Construct a homogeneous 2D translation matrix.
    # Arguments
        translation: the translation 2D vector
    # Returns
        the translation matrix as 3 by 3 numpy array
    """
    return np.array([
        [1, 0, translation[0]],
        [0, 1, translation[1]],
        [0, 0, 1]
    ])


def random_translation(min, max, prng=DEFAULT_PRNG):
    """ Construct a random 2D translation between min and max.
    Args
        min:  a 2D vector with the minimum translation for each dimension
        max:  a 2D vector with the maximum translation for each dimension
        prng: the pseudo-random number generator to use.
    Returns
        a homogeneous 3 by 3 translation matrix
    """
    return translation(_random_vector(min, max, prng))


def shear(angle):
    """ Construct a homogeneous 2D shear matrix.
    Args
        angle: the shear angle in radians
    Returns
        the shear matrix as 3 by 3 numpy array
    """
    return np.array([
        [1, -np.sin(angle), 0],
        [0,  np.cos(angle), 0],
        [0, 0, 1]
    ])


def random_shear(min, max, prng=DEFAULT_PRNG):
    """ Construct a random 2D shear matrix with shear angle between -max and max.
    Args
        min:  the minimum shear angle in radians.
        max:  the maximum shear angle in radians.
        prng: the pseudo-random number generator to use.
    Returns
        a homogeneous 3 by 3 shear matrix
    """
    return shear(prng.uniform(min, max))


def scaling(factor):
    """ Construct a homogeneous 2D scaling matrix.
    Args
        factor: a 2D vector for X and Y scaling
    Returns
        the zoom matrix as 3 by 3 numpy array
    """
    return np.array([
        [factor[0], 0, 0],
        [0, factor[1], 0],
        [0, 0, 1]
    ])


def random_scaling(min, max, prng=DEFAULT_PRNG):
    """ Construct a random 2D scale matrix between -max and max.
    Args
        min:  a 2D vector containing the minimum scaling factor for X and Y.
        min:  a 2D vector containing The maximum scaling factor for X and Y.
        prng: the pseudo-random number generator to use.
    Returns
        a homogeneous 3 by 3 scaling matrix
    """
    return scaling(_random_vector(min, max, prng))


def random_flip(flip_x_chance, flip_y_chance, prng=DEFAULT_PRNG):
    """ Construct a transformation randomly containing X/Y flips (or not).
    Args
        flip_x_chance: The chance that the result will contain a flip along the X axis.
        flip_y_chance: The chance that the result will contain a flip along the Y axis.
        prng:          The pseudo-random number generator to use.
    Returns
        a homogeneous 3 by 3 transformation matrix
    """
    flip_x = prng.uniform(0, 1) < flip_x_chance
    flip_y = prng.uniform(0, 1) < flip_y_chance
    # 1 - 2 * bool gives 1 for False and -1 for True.
    return scaling((1 - 2 * flip_x, 1 - 2 * flip_y))


def change_transform_origin(transform, center):
    """ Create a new transform representing the same transformation,
        only with the origin of the linear part changed.
    Args
        transform: the transformation matrix
        center: the new origin of the transformation
    Returns
        translate(center) * transform * translate(-center)
    """
    center = np.array(center)
    return np.linalg.multi_dot([translation(center), transform, translation(-center)])


def random_transform(
    min_rotation=0,
    max_rotation=0,
    min_translation=(0, 0),
    max_translation=(0, 0),
    min_shear=0,
    max_shear=0,
    min_scaling=(1, 1),
    max_scaling=(1, 1),
    flip_x_chance=0,
    flip_y_chance=0,
    prng=DEFAULT_PRNG
):
    """ Create a random transformation.

    The transformation consists of the following operations in this order (from left to right):
      * rotation
      * translation
      * shear
      * scaling
      * flip x (if applied)
      * flip y (if applied)

    Note that by default, the data generators in `keras_retinanet.preprocessing.generators` interpret the translation
    as factor of the image size. So an X translation of 0.1 would translate the image by 10% of it's width.
    Set `relative_translation` to `False` in the `TransformParameters` of a data generator to have it interpret
    the translation directly as pixel distances instead.

    Args
        min_rotation:    The minimum rotation in radians for the transform as scalar.
        max_rotation:    The maximum rotation in radians for the transform as scalar.
        min_translation: The minimum translation for the transform as 2D column vector.
        max_translation: The maximum translation for the transform as 2D column vector.
        min_shear:       The minimum shear angle for the transform in radians.
        max_shear:       The maximum shear angle for the transform in radians.
        min_scaling:     The minimum scaling for the transform as 2D column vector.
        max_scaling:     The maximum scaling for the transform as 2D column vector.
        flip_x_chance:   The chance (0 to 1) that a transform will contain a flip along X direction.
        flip_y_chance:   The chance (0 to 1) that a transform will contain a flip along Y direction.
        prng:            The pseudo-random number generator to use.
    """
    return np.linalg.multi_dot([
        random_rotation(min_rotation, max_rotation, prng),
        random_translation(min_translation, max_translation, prng),
        random_shear(min_shear, max_shear, prng),
        random_scaling(min_scaling, max_scaling, prng),
        random_flip(flip_x_chance, flip_y_chance, prng)
    ])


def random_transform_generator(prng=None, **kwargs):
    """ Create a random transform generator.

    Uses a dedicated, newly created, properly seeded PRNG by default instead of the global DEFAULT_PRNG.

    The transformation consists of the following operations in this order (from left to right):
      * rotation
      * translation
      * shear
      * scaling
      * flip x (if applied)
      * flip y (if applied)

    Note that by default, the data generators in `keras_retinanet.preprocessing.generators` interpret the translation
    as factor of the image size. So an X translation of 0.1 would translate the image by 10% of it's width.
    Set `relative_translation` to `False` in the `TransformParameters` of a data generator to have it interpret
    the translation directly as pixel distances instead.

    Args
        min_rotation:    The minimum rotation in radians for the transform as scalar.
        max_rotation:    The maximum rotation in radians for the transform as scalar.
        min_translation: The minimum translation for the transform as 2D column vector.
        max_translation: The maximum translation for the transform as 2D column vector.
        min_shear:       The minimum shear angle for the transform in radians.
        max_shear:       The maximum shear angle for the transform in radians.
        min_scaling:     The minimum scaling for the transform as 2D column vector.
        max_scaling:     The maximum scaling for the transform as 2D column vector.
        flip_x_chance:   The chance (0 to 1) that a transform will contain a flip along X direction.
        flip_y_chance:   The chance (0 to 1) that a transform will contain a flip along Y direction.
        prng:            The pseudo-random number generator to use.
    """

    if prng is None:
        # RandomState automatically seeds using the best available method.
        prng = np.random.RandomState()

    while True:
        yield random_transform(prng=prng, **kwargs)


================================================
FILE: imageai_tf_deprecated/Detection/keras_retinanet/utils/visualization.py
================================================
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

import cv2
import numpy as np

from .colors import label_color


def draw_box(image, box, color, thickness=2):
    """ Draws a box on an image with a given color.

    # Arguments
        image     : The image to draw on.
        box       : A list of 4 elements (x1, y1, x2, y2).
        color     : The color of the box.
        thickness : The thickness of the lines to draw a box with.
    """
    b = np.array(box).astype(int)
    cv2.rectangle(image, (b[0], b[1]), (b[2], b[3]), color, thickness, cv2.LINE_AA)


def draw_caption(image, box, caption):
    """ Draws a caption above the box in an image.

    # Arguments
        image   : The image to draw on.
        box     : A list of 4 elements (x1, y1, x2, y2).
        caption : String containing the text to draw.
    """
    b = np.array(box).astype(int)
    cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2)
    cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1)


def draw_boxes(image, boxes, color, thickness=2):
    """ Draws boxes on an image with a given color.

    # Arguments
        image     : The image to draw on.
        boxes     : A [N, 4] matrix (x1, y1, x2, y2).
        color     : The color of the boxes.
        thickness : The thickness of the lines to draw boxes with.
    """
    for b in boxes:
        draw_box(image, b, color, thickness=thickness)


def draw_detections(image, boxes, scores, labels, color=None, label_to_name=None, score_threshold=0.5):
    """ Draws detections in an image.

    # Arguments
        image           : The image to draw on.
        boxes           : A [N, 4] matrix (x1, y1, x2, y2).
        scores          : A list of N classification scores.
        labels          : A list of N labels.
        color           : The color of the boxes. By default the color from keras_retinanet.utils.colors.label_color will be used.
        label_to_name   : (optional) Functor for mapping a label to a name.
        score_threshold : Threshold used for determining what detections to draw.
    """
    selection = np.where(scores > score_threshold)[0]

    for i in selection:
        c = color if color is not None else label_color(labels[i])
        draw_box(image, boxes[i, :], color=c)

        # draw labels
        caption = (label_to_name(labels[i]) if label_to_name else labels[i]) + ': {0:.2f}'.format(scores[i])
        draw_caption(image, boxes[i, :], caption)


def draw_annotations(image, annotations, color=(0, 255, 0), label_to_name=None):
    """ Draws annotations in an image.

    # Arguments
        image         : The image to draw on.
        annotations   : A [N, 5] matrix (x1, y1, x2, y2, label) or dictionary containing bboxes (shaped [N, 4]) and labels (shaped [N]).
        color         : The color of the boxes. By default the color from keras_retinanet.utils.colors.label_color will be used.
        label_to_name : (optional) Functor for mapping a label to a name.
    """
    if isinstance(annotations, np.ndarray):
        annotations = {'bboxes': annotations[:, :4], 'labels': annotations[:, 4]}

    assert('bboxes' in annotations)
    assert('labels' in annotations)
    assert(annotations['bboxes'].shape[0] == annotations['labels'].shape[0])

    for i in range(annotations['bboxes'].shape[0]):
        label   = annotations['labels'][i]
        c       = color if color is not None else label_color(label)
        caption = '{}'.format(label_to_name(label) if label_to_name else label)
        draw_caption(image, annotations['bboxes'][i], caption)
        draw_box(image, annotations['bboxes'][i], color=c)


================================================
FILE: imageai_tf_deprecated/Prediction/Custom/__init__.py
================================================
from ...Classification.Custom import ClassificationModelTrainer, CustomImageClassification


class ModelTraining(ClassificationModelTrainer):
    """
    Deprecated! 
    Replaced with 'imageai.Classification.Custom.ClassificationModelTrainer'
    """
    def __call__(self):
        None

class CustomImagePrediction(CustomImageClassification):
    """
    Deprecated! 
    Replaced with 'imageai.Classification.Custom.CustomImageClassification'
    """

    def __call__(self):
        None

================================================
FILE: imageai_tf_deprecated/Prediction/Custom/custom_utils.py
================================================
import json

CLASS_INDEX = None


def preprocess_input(x):
    """Preprocesses a tensor encoding a batch of images.

    # Arguments
        x: input Numpy tensor, 4D.
        data_format: data format of the image tensor.

    # Returns
        Preprocessed tensor.
    """


    # 'RGB'->'BGR'
    x *= (1./255)

    return x


def decode_predictions(preds, top=5, model_json=""):


    global CLASS_INDEX

    if CLASS_INDEX is None:
        CLASS_INDEX = json.load(open(model_json))
    results = []
    for pred in preds:
        top_indices = pred.argsort()[-top:][::-1]
        for i in top_indices:
            each_result = []
            each_result.append(CLASS_INDEX[str(i)])
            each_result.append(pred[i])
            results.append(each_result)

    return results

================================================
FILE: imageai_tf_deprecated/Prediction/__init__.py
================================================
from ..Classification import ImageClassification
from matplotlib.cbook import deprecated


class ImagePrediction(ImageClassification):
    """
    Deprecated! 
    Replaced with 'imageai.Classification.ImageClassification'
    """

    def __call__(self):
        None

================================================
FILE: imageai_tf_deprecated/Prediction/imagenet_utils.py
================================================
CLASS_INDEX = None


def preprocess_input(x):
    """Preprocesses a tensor encoding a batch of images.

    # Arguments
        x: input Numpy tensor, 4D.
        data_format: data format of the image tensor.

    # Returns
        Preprocessed tensor.
    """


    # 'RGB'->'BGR'
    x = x[..., ::-1]
    # Zero-center by mean pixel
    x[..., 0] -= 103.939
    x[..., 1] -= 116.779
    x[..., 2] -= 123.68

    return x


def decode_predictions(preds, top=5):
    """Decodes the prediction of an ImageNet model.

    # Arguments
        preds: Numpy tensor encoding a batch of predictions.
        top: integer, how many top-guesses to return.

    # Returns
        A list of lists of top class prediction tuples
        `(class_name, class_description, score)`.
        One list of tuples per sample in batch input.

    # Raises
        ValueError: in case of invalid shape of the `pred` array
            (must be 2D).
    """
    global CLASS_INDEX
    if len(preds.shape) != 2 or preds.shape[1] != 1000:
        raise ValueError('`decode_predictions` expects '
                         'a batch of predictions '
                         '(i.e. a 2D array of shape (samples, 1000)). '
                         'Found array with shape: ' + str(preds.shape))
    if CLASS_INDEX is None:
        CLASS_INDEX = {"0": ["n01440764", "tench"], "1": ["n01443537", "goldfish"], "2": ["n01484850", "great_white_shark"], "3": ["n01491361", "tiger_shark"], "4": ["n01494475", "hammerhead"], "5": ["n01496331", "electric_ray"], "6": ["n01498041", "stingray"], "7": ["n01514668", "cock"], "8": ["n01514859", "hen"], "9": ["n01518878", "ostrich"], "10": ["n01530575", "brambling"], "11": ["n01531178", "goldfinch"], "12": ["n01532829", "house_finch"], "13": ["n01534433", "junco"], "14": ["n01537544", "indigo_bunting"], "15": ["n01558993", "robin"], "16": ["n01560419", "bulbul"], "17": ["n01580077", "jay"], "18": ["n01582220", "magpie"], "19": ["n01592084", "chickadee"], "20": ["n01601694", "water_ouzel"], "21": ["n01608432", "kite"], "22": ["n01614925", "bald_eagle"], "23": ["n01616318", "vulture"], "24": ["n01622779", "great_grey_owl"], "25": ["n01629819", "European_fire_salamander"], "26": ["n01630670", "common_newt"], "27": ["n01631663", "eft"], "28": ["n01632458", "spotted_salamander"], "29": ["n01632777", "axolotl"], "30": ["n01641577", "bullfrog"], "31": ["n01644373", "tree_frog"], "32": ["n01644900", "tailed_frog"], "33": ["n01664065", "loggerhead"], "34": ["n01665541", "leatherback_turtle"], "35": ["n01667114", "mud_turtle"], "36": ["n01667778", "terrapin"], "37": ["n01669191", "box_turtle"], "38": ["n01675722", "banded_gecko"], "39": ["n01677366", "common_iguana"], "40": ["n01682714", "American_chameleon"], "41": ["n01685808", "whiptail"], "42": ["n01687978", "agama"], "43": ["n01688243", "frilled_lizard"], "44": ["n01689811", "alligator_lizard"], "45": ["n01692333", "Gila_monster"], "46": ["n01693334", "green_lizard"], "47": ["n01694178", "African_chameleon"], "48": ["n01695060", "Komodo_dragon"], "49": ["n01697457", "African_crocodile"], "50": ["n01698640", "American_alligator"], "51": ["n01704323", "triceratops"], "52": ["n01728572", "thunder_snake"], "53": ["n01728920", "ringneck_snake"], "54": ["n01729322", "hognose_snake"], "55": ["n01729977", "green_snake"], "56": ["n01734418", "king_snake"], "57": ["n01735189", "garter_snake"], "58": ["n01737021", "water_snake"], "59": ["n01739381", "vine_snake"], "60": ["n01740131", "night_snake"], "61": ["n01742172", "boa_constrictor"], "62": ["n01744401", "rock_python"], "63": ["n01748264", "Indian_cobra"], "64": ["n01749939", "green_mamba"], "65": ["n01751748", "sea_snake"], "66": ["n01753488", "horned_viper"], "67": ["n01755581", "diamondback"], "68": ["n01756291", "sidewinder"], "69": ["n01768244", "trilobite"], "70": ["n01770081", "harvestman"], "71": ["n01770393", "scorpion"], "72": ["n01773157", "black_and_gold_garden_spider"], "73": ["n01773549", "barn_spider"], "74": ["n01773797", "garden_spider"], "75": ["n01774384", "black_widow"], "76": ["n01774750", "tarantula"], "77": ["n01775062", "wolf_spider"], "78": ["n01776313", "tick"], "79": ["n01784675", "centipede"], "80": ["n01795545", "black_grouse"], "81": ["n01796340", "ptarmigan"], "82": ["n01797886", "ruffed_grouse"], "83": ["n01798484", "prairie_chicken"], "84": ["n01806143", "peacock"], "85": ["n01806567", "quail"], "86": ["n01807496", "partridge"], "87": ["n01817953", "African_grey"], "88": ["n01818515", "macaw"], "89": ["n01819313", "sulphur-crested_cockatoo"], "90": ["n01820546", "lorikeet"], "91": ["n01824575", "coucal"], "92": ["n01828970", "bee_eater"], "93": ["n01829413", "hornbill"], "94": ["n01833805", "hummingbird"], "95": ["n01843065", "jacamar"], "96": ["n01843383", "toucan"], "97": ["n01847000", "drake"], "98": ["n01855032", "red-breasted_merganser"], "99": ["n01855672", "goose"], "100": ["n01860187", "black_swan"], "101": ["n01871265", "tusker"], "102": ["n01872401", "echidna"], "103": ["n01873310", "platypus"], "104": ["n01877812", "wallaby"], "105": ["n01882714", "koala"], "106": ["n01883070", "wombat"], "107": ["n01910747", "jellyfish"], "108": ["n01914609", "sea_anemone"], "109": ["n01917289", "brain_coral"], "110": ["n01924916", "flatworm"], "111": ["n01930112", "nematode"], "112": ["n01943899", "conch"], "113": ["n01944390", "snail"], "114": ["n01945685", "slug"], "115": ["n01950731", "sea_slug"], "116": ["n01955084", "chiton"], "117": ["n01968897", "chambered_nautilus"], "118": ["n01978287", "Dungeness_crab"], "119": ["n01978455", "rock_crab"], "120": ["n01980166", "fiddler_crab"], "121": ["n01981276", "king_crab"], "122": ["n01983481", "American_lobster"], "123": ["n01984695", "spiny_lobster"], "124": ["n01985128", "crayfish"], "125": ["n01986214", "hermit_crab"], "126": ["n01990800", "isopod"], "127": ["n02002556", "white_stork"], "128": ["n02002724", "black_stork"], "129": ["n02006656", "spoonbill"], "130": ["n02007558", "flamingo"], "131": ["n02009229", "little_blue_heron"], "132": ["n02009912", "American_egret"], "133": ["n02011460", "bittern"], "134": ["n02012849", "crane"], "135": ["n02013706", "limpkin"], "136": ["n02017213", "European_gallinule"], "137": ["n02018207", "American_coot"], "138": ["n02018795", "bustard"], "139": ["n02025239", "ruddy_turnstone"], "140": ["n02027492", "red-backed_sandpiper"], "141": ["n02028035", "redshank"], "142": ["n02033041", "dowitcher"], "143": ["n02037110", "oystercatcher"], "144": ["n02051845", "pelican"], "145": ["n02056570", "king_penguin"], "146": ["n02058221", "albatross"], "147": ["n02066245", "grey_whale"], "148": ["n02071294", "killer_whale"], "149": ["n02074367", "dugong"], "150": ["n02077923", "sea_lion"], "151": ["n02085620", "Chihuahua"], "152": ["n02085782", "Japanese_spaniel"], "153": ["n02085936", "Maltese_dog"], "154": ["n02086079", "Pekinese"], "155": ["n02086240", "Shih-Tzu"], "156": ["n02086646", "Blenheim_spaniel"], "157": ["n02086910", "papillon"], "158": ["n02087046", "toy_terrier"], "159": ["n02087394", "Rhodesian_ridgeback"], "160": ["n02088094", "Afghan_hound"], "161": ["n02088238", "basset"], "162": ["n02088364", "beagle"], "163": ["n02088466", "bloodhound"], "164": ["n02088632", "bluetick"], "165": ["n02089078", "black-and-tan_coonhound"], "166": ["n02089867", "Walker_hound"], "167": ["n02089973", "English_foxhound"], "168": ["n02090379", "redbone"], "169": ["n02090622", "borzoi"], "170": ["n02090721", "Irish_wolfhound"], "171": ["n02091032", "Italian_greyhound"], "172": ["n02091134", "whippet"], "173": ["n02091244", "Ibizan_hound"], "174": ["n02091467", "Norwegian_elkhound"], "175": ["n02091635", "otterhound"], "176": ["n02091831", "Saluki"], "177": ["n02092002", "Scottish_deerhound"], "178": ["n02092339", "Weimaraner"], "179": ["n02093256", "Staffordshire_bullterrier"], "180": ["n02093428", "American_Staffordshire_terrier"], "181": ["n02093647", "Bedlington_terrier"], "182": ["n02093754", "Border_terrier"], "183": ["n02093859", "Kerry_blue_terrier"], "184": ["n02093991", "Irish_terrier"], "185": ["n02094114", "Norfolk_terrier"], "186": ["n02094258", "Norwich_terrier"], "187": ["n02094433", "Yorkshire_terrier"], "188": ["n02095314", "wire-haired_fox_terrier"], "189": ["n02095570", "Lakeland_terrier"], "190": ["n02095889", "Sealyham_terrier"], "191": ["n02096051", "Airedale"], "192": ["n02096177", "cairn"], "193": ["n02096294", "Australian_terrier"], "194": ["n02096437", "Dandie_Dinmont"], "195": ["n02096585", "Boston_bull"], "196": ["n02097047", "miniature_schnauzer"], "197": ["n02097130", "giant_schnauzer"], "198": ["n02097209", "standard_schnauzer"], "199": ["n02097298", "Scotch_terrier"], "200": ["n02097474", "Tibetan_terrier"], "201": ["n02097658", "silky_terrier"], "202": ["n02098105", "soft-coated_wheaten_terrier"], "203": ["n02098286", "West_Highland_white_terrier"], "204": ["n02098413", "Lhasa"], "205": ["n02099267", "flat-coated_retriever"], "206": ["n02099429", "curly-coated_retriever"], "207": ["n02099601", "golden_retriever"], "208": ["n02099712", "Labrador_retriever"], "209": ["n02099849", "Chesapeake_Bay_retriever"], "210": ["n02100236", "German_short-haired_pointer"], "211": ["n02100583", "vizsla"], "212": ["n02100735", "English_setter"], "213": ["n02100877", "Irish_setter"], "214": ["n02101006", "Gordon_setter"], "215": ["n02101388", "Brittany_spaniel"], "216": ["n02101556", "clumber"], "217": ["n02102040", "English_springer"], "218": ["n02102177", "Welsh_springer_spaniel"], "219": ["n02102318", "cocker_spaniel"], "220": ["n02102480", "Sussex_spaniel"], "221": ["n02102973", "Irish_water_spaniel"], "222": ["n02104029", "kuvasz"], "223": ["n02104365", "schipperke"], "224": ["n02105056", "groenendael"], "225": ["n02105162", "malinois"], "226": ["n02105251", "briard"], "227": ["n02105412", "kelpie"], "228": ["n02105505", "komondor"], "229": ["n02105641", "Old_English_sheepdog"], "230": ["n02105855", "Shetland_sheepdog"], "231": ["n02106030", "collie"], "232": ["n02106166", "Border_collie"], "233": ["n02106382", "Bouvier_des_Flandres"], "234": ["n02106550", "Rottweiler"], "235": ["n02106662", "German_shepherd"], "236": ["n02107142", "Doberman"], "237": ["n02107312", "miniature_pinscher"], "238": ["n02107574", "Greater_Swiss_Mountain_dog"], "239": ["n02107683", "Bernese_mountain_dog"], "240": ["n02107908", "Appenzeller"], "241": ["n02108000", "EntleBucher"], "242": ["n02108089", "boxer"], "243": ["n02108422", "bull_mastiff"], "244": ["n02108551", "Tibetan_mastiff"], "245": ["n02108915", "French_bulldog"], "246": ["n02109047", "Great_Dane"], "247": ["n02109525", "Saint_Bernard"], "248": ["n02109961", "Eskimo_dog"], "249": ["n02110063", "malamute"], "250": ["n02110185", "Siberian_husky"], "251": ["n02110341", "dalmatian"], "252": ["n02110627", "affenpinscher"], "253": ["n02110806", "basenji"], "254": ["n02110958", "pug"], "255": ["n02111129", "Leonberg"], "256": ["n02111277", "Newfoundland"], "257": ["n02111500", "Great_Pyrenees"], "258": ["n02111889", "Samoyed"], "259": ["n02112018", "Pomeranian"], "260": ["n02112137", "chow"], "261": ["n02112350", "keeshond"], "262": ["n02112706", "Brabancon_griffon"], "263": ["n02113023", "Pembroke"], "264": ["n02113186", "Cardigan"], "265": ["n02113624", "toy_poodle"], "266": ["n02113712", "miniature_poodle"], "267": ["n02113799", "standard_poodle"], "268": ["n02113978", "Mexican_hairless"], "269": ["n02114367", "timber_wolf"], "270": ["n02114548", "white_wolf"], "271": ["n02114712", "red_wolf"], "272": ["n02114855", "coyote"], "273": ["n02115641", "dingo"], "274": ["n02115913", "dhole"], "275": ["n02116738", "African_hunting_dog"], "276": ["n02117135", "hyena"], "277": ["n02119022", "red_fox"], "278": ["n02119789", "kit_fox"], "279": ["n02120079", "Arctic_fox"], "280": ["n02120505", "grey_fox"], "281": ["n02123045", "tabby"], "282": ["n02123159", "tiger_cat"], "283": ["n02123394", "Persian_cat"], "284": ["n02123597", "Siamese_cat"], "285": ["n02124075", "Egyptian_cat"], "286": ["n02125311", "cougar"], "287": ["n02127052", "lynx"], "288": ["n02128385", "leopard"], "289": ["n02128757", "snow_leopard"], "290": ["n02128925", "jaguar"], "291": ["n02129165", "lion"], "292": ["n02129604", "tiger"], "293": ["n02130308", "cheetah"], "294": ["n02132136", "brown_bear"], "295": ["n02133161", "American_black_bear"], "296": ["n02134084", "ice_bear"], "297": ["n02134418", "sloth_bear"], "298": ["n02137549", "mongoose"], "299": ["n02138441", "meerkat"], "300": ["n02165105", "tiger_beetle"], "301": ["n02165456", "ladybug"], "302": ["n02167151", "ground_beetle"], "303": ["n02168699", "long-horned_beetle"], "304": ["n02169497", "leaf_beetle"], "305": ["n02172182", "dung_beetle"], "306": ["n02174001", "rhinoceros_beetle"], "307": ["n02177972", "weevil"], "308": ["n02190166", "fly"], "309": ["n02206856", "bee"], "310": ["n02219486", "ant"], "311": ["n02226429", "grasshopper"], "312": ["n02229544", "cricket"], "313": ["n02231487", "walking_stick"], "314": ["n02233338", "cockroach"], "315": ["n02236044", "mantis"], "316": ["n02256656", "cicada"], "317": ["n02259212", "leafhopper"], "318": ["n02264363", "lacewing"], "319": ["n02268443", "dragonfly"], "320": ["n02268853", "damselfly"], "321": ["n02276258", "admiral"], "322": ["n02277742", "ringlet"], "323": ["n02279972", "monarch"], "324": ["n02280649", "cabbage_butterfly"], "325": ["n02281406", "sulphur_butterfly"], "326": ["n02281787", "lycaenid"], "327": ["n02317335", "starfish"], "328": ["n02319095", "sea_urchin"], "329": ["n02321529", "sea_cucumber"], "330": ["n02325366", "wood_rabbit"], "331": ["n02326432", "hare"], "332": ["n02328150", "Angora"], "333": ["n02342885", "hamster"], "334": ["n02346627", "porcupine"], "335": ["n02356798", "fox_squirrel"], "336": ["n02361337", "marmot"], "337": ["n02363005", "beaver"], "338": ["n02364673", "guinea_pig"], "339": ["n02389026", "sorrel"], "340": ["n02391049", "zebra"], "341": ["n02395406", "hog"], "342": ["n02396427", "wild_boar"], "343": ["n02397096", "warthog"], "344": ["n02398521", "hippopotamus"], "345": ["n02403003", "ox"], "346": ["n02408429", "water_buffalo"], "347": ["n02410509", "bison"], "348": ["n02412080", "ram"], "349": ["n02415577", "bighorn"], "350": ["n02417914", "ibex"], "351": ["n02422106", "hartebeest"], "352": ["n02422699", "impala"], "353": ["n02423022", "gazelle"], "354": ["n02437312", "Arabian_camel"], "355": ["n02437616", "llama"], "356": ["n02441942", "weasel"], "357": ["n02442845", "mink"], "358": ["n02443114", "polecat"], "359": ["n02443484", "black-footed_ferret"], "360": ["n02444819", "otter"], "361": ["n02445715", "skunk"], "362": ["n02447366", "badger"], "363": ["n02454379", "armadillo"], "364": ["n02457408", "three-toed_sloth"], "365": ["n02480495", "orangutan"], "366": ["n02480855", "gorilla"], "367": ["n02481823", "chimpanzee"], "368": ["n02483362", "gibbon"], "369": ["n02483708", "siamang"], "370": ["n02484975", "guenon"], "371": ["n02486261", "patas"], "372": ["n02486410", "baboon"], "373": ["n02487347", "macaque"], "374": ["n02488291", "langur"], "375": ["n02488702", "colobus"], "376": ["n02489166", "proboscis_monkey"], "377": ["n02490219", "marmoset"], "378": ["n02492035", "capuchin"], "379": ["n02492660", "howler_monkey"], "380": ["n02493509", "titi"], "381": ["n02493793", "spider_monkey"], "382": ["n02494079", "squirrel_monkey"], "383": ["n02497673", "Madagascar_cat"], "384": ["n02500267", "indri"], "385": ["n02504013", "Indian_elephant"], "386": ["n02504458", "African_elephant"], "387": ["n02509815", "lesser_panda"], "388": ["n02510455", "giant_panda"], "389": ["n02514041", "barracouta"], "390": ["n02526121", "eel"], "391": ["n02536864", "coho"], "392": ["n02606052", "rock_beauty"], "393": ["n02607072", "anemone_fish"], "394": ["n02640242", "sturgeon"], "395": ["n02641379", "gar"], "396": ["n02643566", "lionfish"], "397": ["n02655020", "puffer"], "398": ["n02666196", "abacus"], "399": ["n02667093", "abaya"], "400": ["n02669723", "academic_gown"], "401": ["n02672831", "accordion"], "402": ["n02676566", "acoustic_guitar"], "403": ["n02687172", "aircraft_carrier"], "404": ["n02690373", "airliner"], "405": ["n02692877", "airship"], "406": ["n02699494", "altar"], "407": ["n02701002", "ambulance"], "408": ["n02704792", "amphibian"], "409": ["n02708093", "analog_clock"], "410": ["n02727426", "apiary"], "411": ["n02730930", "apron"], "412": ["n02747177", "ashcan"], "413": ["n02749479", "assault_rifle"], "414": ["n02769748", "backpack"], "415": ["n02776631", "bakery"], "416": ["n02777292", "balance_beam"], "417": ["n02782093", "balloon"], "418": ["n02783161", "ballpoint"], "419": ["n02786058", "Band_Aid"], "420": ["n02787622", "banjo"], "421": ["n02788148", "bannister"], "422": ["n02790996", "barbell"], "423": ["n02791124", "barber_chair"], "424": ["n02791270", "barbershop"], "425": ["n02793495", "barn"], "426": ["n02794156", "barometer"], "427": ["n02795169", "barrel"], "428": ["n02797295", "barrow"], "429": ["n02799071", "baseball"], "430": ["n02802426", "basketball"], "431": ["n02804414", "bassinet"], "432": ["n02804610", "bassoon"], "433": ["n02807133", "bathing_cap"], "434": ["n02808304", "bath_towel"], "435": ["n02808440", "bathtub"], "436": ["n02814533", "beach_wagon"], "437": ["n02814860", "beacon"], "438": ["n02815834", "beaker"], "439": ["n02817516", "bearskin"], "440": ["n02823428", "beer_bottle"], "441": ["n02823750", "beer_glass"], "442": ["n02825657", "bell_cote"], "443": ["n02834397", "bib"], "444": ["n02835271", "bicycle-built-for-two"], "445": ["n02837789", "bikini"], "446": ["n02840245", "binder"], "447": ["n02841315", "binoculars"], "448": ["n02843684", "birdhouse"], "449": ["n02859443", "boathouse"], "450": ["n02860847", "bobsled"], "451": ["n02865351", "bolo_tie"], "452": ["n02869837", "bonnet"], "453": ["n02870880", "bookcase"], "454": ["n02871525", "bookshop"], "455": ["n02877765", "bottlecap"], "456": ["n02879718", "bow"], "457": ["n02883205", "bow_tie"], "458": ["n02892201", "brass"], "459": ["n02892767", "brassiere"], "460": ["n02894605", "breakwater"], "461": ["n02895154", "breastplate"], "462": ["n02906734", "broom"], "463": ["n02909870", "bucket"], "464": ["n02910353", "buckle"], "465": ["n02916936", "bulletproof_vest"], "466": ["n02917067", "bullet_train"], "467": ["n02927161", "butcher_shop"], "468": ["n02930766", "cab"], "469": ["n02939185", "caldron"], "470": ["n02948072", "candle"], "471": ["n02950826", "cannon"], "472": ["n02951358", "canoe"], "473": ["n02951585", "can_opener"], "474": ["n02963159", "cardigan"], "475": ["n02965783", "car_mirror"], "476": ["n02966193", "carousel"], "477": ["n02966687", "carpenter's_kit"], "478": ["n02971356", "carton"], "479": ["n02974003", "car_wheel"], "480": ["n02977058", "cash_machine"], "481": ["n02978881", "cassette"], "482": ["n02979186", "cassette_player"], "483": ["n02980441", "castle"], "484": ["n02981792", "catamaran"], "485": ["n02988304", "CD_player"], "486": ["n02992211", "cello"], "487": ["n02992529", "cellular_telephone"], "488": ["n02999410", "chain"], "489": ["n03000134", "chainlink_fence"], "490": ["n03000247", "chain_mail"], "491": ["n03000684", "chain_saw"], "492": ["n03014705", "chest"], "493": ["n03016953", "chiffonier"], "494": ["n03017168", "chime"], "495": ["n03018349", "china_cabinet"], "496": ["n03026506", "Christmas_stocking"], "497": ["n03028079", "church"], "498": ["n03032252", "cinema"], "499": ["n03041632", "cleaver"], "500": ["n03042490", "cliff_dwelling"], "501": ["n03045698", "cloak"], "502": ["n03047690", "clog"], "503": ["n03062245", "cocktail_shaker"], "504": ["n03063599", "coffee_mug"], "505": ["n03063689", "coffeepot"], "506": ["n03065424", "coil"], "507": ["n03075370", "combination_lock"], "508": ["n03085013", "computer_keyboard"], "509": ["n03089624", "confectionery"], "510": ["n03095699", "container_ship"], "511": ["n03100240", "convertible"], "512": ["n03109150", "corkscrew"], "513": ["n03110669", "cornet"], "514": ["n03124043", "cowboy_boot"], "515": ["n03124170", "cowboy_hat"], "516": ["n03125729", "cradle"], "517": ["n03126707", "crane"], "518": ["n03127747", "crash_helmet"], "519": ["n03127925", "crate"], "520": ["n03131574", "crib"], "521": ["n03133878", "Crock_Pot"], "522": ["n03134739", "croquet_ball"], "523": ["n03141823", "crutch"], "524": ["n03146219", "cuirass"], "525": ["n03160309", "dam"], "526": ["n03179701", "desk"], "527": ["n03180011", "desktop_computer"], "528": ["n03187595", "dial_telephone"], "529": ["n03188531", "diaper"], "530": ["n03196217", "digital_clock"], "531": ["n03197337", "digital_watch"], "532": ["n03201208", "dining_table"], "533": ["n03207743", "dishrag"], "534": ["n03207941", "dishwasher"], "535": ["n03208938", "disk_brake"], "536": ["n03216828", "dock"], "537": ["n03218198", "dogsled"], "538": ["n03220513", "dome"], "539": ["n03223299", "doormat"], "540": ["n03240683", "drilling_platform"], "541": ["n03249569", "drum"], "542": ["n03250847", "drumstick"], "543": ["n03255030", "dumbbell"], "544": ["n03259280", "Dutch_oven"], "545": ["n03271574", "electric_fan"], "546": ["n03272010", "electric_guitar"], "547": ["n03272562", "electric_locomotive"], "548": ["n03290653", "entertainment_center"], "549": ["n03291819", "envelope"], "550": ["n03297495", "espresso_maker"], "551": ["n03314780", "face_powder"], "552": ["n03325584", "feather_boa"], "553": ["n03337140", "file"], "554": ["n03344393", "fireboat"], "555": ["n03345487", "fire_engine"], "556": ["n03347037", "fire_screen"], "557": ["n03355925", "flagpole"], "558": ["n03372029", "flute"], "559": ["n03376595", "folding_chair"], "560": ["n03379051", "football_helmet"], "561": ["n03384352", "forklift"], "562": ["n03388043", "fountain"], "563": ["n03388183", "fountain_pen"], "564": ["n03388549", "four-poster"], "565": ["n03393912", "freight_car"], "566": ["n03394916", "French_horn"], "567": ["n03400231", "frying_pan"], "568": ["n03404251", "fur_coat"], "569": ["n03417042", "garbage_truck"], "570": ["n03424325", "gasmask"], "571": ["n03425413", "gas_pump"], "572": ["n03443371", "goblet"], "573": ["n03444034", "go-kart"], "574": ["n03445777", "golf_ball"], "575": ["n03445924", "golfcart"], "576": ["n03447447", "gondola"], "577": ["n03447721", "gong"], "578": ["n03450230", "gown"], "579": ["n03452741", "grand_piano"], "580": ["n03457902", "greenhouse"], "581": ["n03459775", "grille"], "582": ["n03461385", "grocery_store"], "583": ["n03467068", "guillotine"], "584": ["n03476684", "hair_slide"], "585": ["n03476991", "hair_spray"], "586": ["n03478589", "half_track"], "587": ["n03481172", "hammer"], "588": ["n03482405", "hamper"], "589": ["n03483316", "hand_blower"], "590": ["n03485407", "hand-held_computer"], "591": ["n03485794", "handkerchief"], "592": ["n03492542", "hard_disc"], "593": ["n03494278", "harmonica"], "594": ["n03495258", "harp"], "595": ["n03496892", "harvester"], "596": ["n03498962", "hatchet"], "597": ["n03527444", "holster"], "598": ["n03529860", "home_theater"], "599": ["n03530642", "honeycomb"], "600": ["n03532672", "hook"], "601": ["n03534580", "hoopskirt"], "602": ["n03535780", "horizontal_bar"], "603": ["n03538406", "horse_cart"], "604": ["n03544143", "hourglass"], "605": ["n03584254", "iPod"], "606": ["n03584829", "iron"], "607": ["n03590841", "jack-o'-lantern"], "608": ["n03594734", "jean"], "609": ["n03594945", "jeep"], "610": ["n03595614", "jersey"], "611": ["n03598930", "jigsaw_puzzle"], "612": ["n03599486", "jinrikisha"], "613": ["n03602883", "joystick"], "614": ["n03617480", "kimono"], "615": ["n03623198", "knee_pad"], "616": ["n03627232", "knot"], "617": ["n03630383", "lab_coat"], "618": ["n03633091", "ladle"], "619": ["n03637318", "lampshade"], "620": ["n03642806", "laptop"], "621": ["n03649909", "lawn_mower"], "622": ["n03657121", "lens_cap"], "623": ["n03658185", "letter_opener"], "624": ["n03661043", "library"], "625": ["n03662601", "lifeboat"], "626": ["n03666591", "lighter"], "627": ["n03670208", "limousine"], "628": ["n03673027", "liner"], "629": ["n03676483", "lipstick"], "630": ["n03680355", "Loafer"], "631": ["n03690938", "lotion"], "632": ["n03691459", "loudspeaker"], "633": ["n03692522", "loupe"], "634": ["n03697007", "lumbermill"], "635": ["n03706229", "magnetic_compass"], "636": ["n03709823", "mailbag"], "637": ["n03710193", "mailbox"], "638": ["n03710637", "maillot"], "639": ["n03710721", "maillot"], "640": ["n03717622", "manhole_cover"], "641": ["n03720891", "maraca"], "642": ["n03721384", "marimba"], "643": ["n03724870", "mask"], "644": ["n03729826", "matchstick"], "645": ["n03733131", "maypole"], "646": ["n03733281", "maze"], "647": ["n03733805", "measuring_cup"], "648": ["n03742115", "medicine_chest"], "649": ["n03743016", "megalith"], "650": ["n03759954", "microphone"], "651": ["n03761084", "microwave"], "652": ["n03763968", "military_uniform"], "653": ["n03764736", "milk_can"], "654": ["n03769881", "minibus"], "655": ["n03770439", "miniskirt"], "656": ["n03770679", "minivan"], "657": ["n03773504", "missile"], "658": ["n03775071", "mitten"], "659": ["n03775546", "mixing_bowl"], "660": ["n03776460", "mobile_home"], "661": ["n03777568", "Model_T"], "662": ["n03777754", "modem"], "663": ["n03781244", "monastery"], "664": ["n03782006", "monitor"], "665": ["n03785016", "moped"], "666": ["n03786901", "mortar"], "667": ["n03787032", "mortarboard"], "668": ["n03788195", "mosque"], "669": ["n03788365", "mosquito_net"], "670": ["n03791053", "motor_scooter"], "671": ["n03792782", "mountain_bike"], "672": ["n03792972", "mountain_tent"], "673": ["n03793489", "mouse"], "674": ["n03794056", "mousetrap"], "675": ["n03796401", "moving_van"], "676": ["n03803284", "muzzle"], "677": ["n03804744", "nail"], "678": ["n03814639", "neck_brace"], "679": ["n03814906", "necklace"], "680": ["n03825788", "nipple"], "681": ["n03832673", "notebook"], "682": ["n03837869", "obelisk"], "683": ["n03838899", "oboe"], "684": ["n03840681", "ocarina"], "685": ["n03841143", "odometer"], "686": ["n03843555", "oil_filter"], "687": ["n03854065", "organ"], "688": ["n03857828", "oscilloscope"], "689": ["n03866082", "overskirt"], "690": ["n03868242", "oxcart"], "691": ["n03868863", "oxygen_mask"], "692": ["n03871628", "packet"], "693": ["n03873416", "paddle"], "694": ["n03874293", "paddlewheel"], "695": ["n03874599", "padlock"], "696": ["n03876231", "paintbrush"], "697": ["n03877472", "pajama"], "698": ["n03877845", "palace"], "699": ["n03884397", "panpipe"], "700": ["n03887697", "paper_towel"], "701": ["n03888257", "parachute"], "702": ["n03888605", "parallel_bars"], "703": ["n03891251", "park_bench"], "704": ["n03891332", "parking_meter"], "705": ["n03895866", "passenger_car"], "706": ["n03899768", "patio"], "707": ["n03902125", "pay-phone"], "708": ["n03903868", "pedestal"], "709": ["n03908618", "pencil_box"], "710": ["n03908714", "pencil_sharpener"], "711": ["n03916031", "perfume"], "712": ["n03920288", "Petri_dish"], "713": ["n03924679", "photocopier"], "714": ["n03929660", "pick"], "715": ["n03929855", "pickelhaube"], "716": ["n03930313", "picket_fence"], "717": ["n03930630", "pickup"], "718": ["n03933933", "pier"], "719": ["n03935335", "piggy_bank"], "720": ["n03937543", "pill_bottle"], "721": ["n03938244", "pillow"], "722": ["n03942813", "ping-pong_ball"], "723": ["n03944341", "pinwheel"], "724": ["n03947888", "pirate"], "725": ["n03950228", "pitcher"], "726": ["n03954731", "plane"], "727": ["n03956157", "planetarium"], "728": ["n03958227", "plastic_bag"], "729": ["n03961711", "plate_rack"], "730": ["n03967562", "plow"], "731": ["n03970156", "plunger"], "732": ["n03976467", "Polaroid_camera"], "733": ["n03976657", "pole"], "734": ["n03977966", "police_van"], "735": ["n03980874", "poncho"], "736": ["n03982430", "pool_table"], "737": ["n03983396", "pop_bottle"], "738": ["n03991062", "pot"], "739": ["n03992509", "potter's_wheel"], "740": ["n03995372", "power_drill"], "741": ["n03998194", "prayer_rug"], "742": ["n04004767", "printer"], "743": ["n04005630", "prison"], "744": ["n04008634", "projectile"], "745": ["n04009552", "projector"], "746": ["n04019541", "puck"], "747": ["n04023962", "punching_bag"], "748": ["n04026417", "purse"], "749": ["n04033901", "quill"], "750": ["n04033995", "quilt"], "751": ["n04037443", "racer"], "752": ["n04039381", "racket"], "753": ["n04040759", "radiator"], "754": ["n04041544", "radio"], "755": ["n04044716", "radio_telescope"], "756": ["n04049303", "rain_barrel"], "757": ["n04065272", "recreational_vehicle"], "758": ["n04067472", "reel"], "759": ["n04069434", "reflex_camera"], "760": ["n04070727", "refrigerator"], "761": ["n04074963", "remote_control"], "762": ["n04081281", "restaurant"], "763": ["n04086273", "revolver"], "764": ["n04090263", "rifle"], "765": ["n04099969", "rocking_chair"], "766": ["n04111531", "rotisserie"], "767": ["n04116512", "rubber_eraser"], "768": ["n04118538", "rugby_ball"], "769": ["n04118776", "rule"], "770": ["n04120489", "running_shoe"], "771": ["n04125021", "safe"], "772": ["n04127249", "safety_pin"], "773": ["n04131690", "saltshaker"], "774": ["n04133789", "sandal"], "775": ["n04136333", "sarong"], "776": ["n04141076", "sax"], "777": ["n04141327", "scabbard"], "778": ["n04141975", "scale"], "779": ["n04146614", "school_bus"], "780": ["n04147183", "schooner"], "781": ["n04149813", "scoreboard"], "782": ["n04152593", "screen"], "783": ["n04153751", "screw"], "784": ["n04154565", "screwdriver"], "785": ["n04162706", "seat_belt"], "786": ["n04179913", "sewing_machine"], "787": ["n04192698", "shield"], "788": ["n04200800", "shoe_shop"], "789": ["n04201297", "shoji"], "790": ["n04204238", "shopping_basket"], "791": ["n04204347", "shopping_cart"], "792": ["n04208210", "shovel"], "793": ["n04209133", "shower_cap"], "794": ["n04209239", "shower_curtain"], "795": ["n04228054", "ski"], "796": ["n04229816", "ski_mask"], "797": ["n04235860", "sleeping_bag"], "798": ["n04238763", "slide_rule"], "799": ["n04239074", "sliding_door"], "800": ["n04243546", "slot"], "801": ["n04251144", "snorkel"], "802": ["n04252077", "snowmobile"], "803": ["n04252225", "snowplow"], "804": ["n04254120", "soap_dispenser"], "805": ["n04254680", "soccer_ball"], "806": ["n04254777", "sock"], "807": ["n04258138", "solar_dish"], "808": ["n04259630", "sombrero"], "809": ["n04263257", "soup_bowl"], "810": ["n04264628", "space_bar"], "811": ["n04265275", "space_heater"], "812": ["n04266014", "space_shuttle"], "813": ["n04270147", "spatula"], "814": ["n04273569", "speedboat"], "815": ["n04275548", "spider_web"], "816": ["n04277352", "spindle"], "817": ["n04285008", "sports_car"], "818": ["n04286575", "spotlight"], "819": ["n04296562", "stage"], "820": ["n04310018", "steam_locomotive"], "821": ["n04311004", "steel_arch_bridge"], "822": ["n04311174", "steel_drum"], "823": ["n04317175", "stethoscope"], "824": ["n04325704", "stole"], "825": ["n04326547", "stone_wall"], "826": ["n04328186", "stopwatch"], "827": ["n04330267", "stove"], "828": ["n04332243", "strainer"], "829": ["n04335435", "streetcar"], "830": ["n04336792", "stretcher"], "831": ["n04344873", "studio_couch"], "832": ["n04346328", "stupa"], "833": ["n04347754", "submarine"], "834": ["n04350905", "suit"], "835": ["n04355338", "sundial"], "836": ["n04355933", "sunglass"], "837": ["n04356056", "sunglasses"], "838": ["n04357314", "sunscreen"], "839": ["n04366367", "suspension_bridge"], "840": ["n04367480", "swab"], "841": ["n04370456", "sweatshirt"], "842": ["n04371430", "swimming_trunks"], "843": ["n04371774", "swing"], "844": ["n04372370", "switch"], "845": ["n04376876", "syringe"], "846": ["n04380533", "table_lamp"], "847": ["n04389033", "tank"], "848": ["n04392985", "tape_player"], "849": ["n04398044", "teapot"], "850": ["n04399382", "teddy"], "851": ["n04404412", "television"], "852": ["n04409515", "tennis_ball"], "853": ["n04417672", "thatch"], "854": ["n04418357", "theater_curtain"], "855": ["n04423845", "thimble"], "856": ["n04428191", "thresher"], "857": ["n04429376", "throne"], "858": ["n04435653", "tile_roof"], "859": ["n04442312", "toaster"], "860": ["n04443257", "tobacco_shop"], "861": ["n04447861", "toilet_seat"], "862": ["n04456115", "torch"], "863": ["n04458633", "totem_pole"], "864": ["n04461696", "tow_truck"], "865": ["n04462240", "toyshop"], "866": ["n04465501", "tractor"], "867": ["n04467665", "trailer_truck"], "868": ["n04476259", "tray"], "869": ["n04479046", "trench_coat"], "870": ["n04482393", "tricycle"], "871": ["n04483307", "trimaran"], "872": ["n04485082", "tripod"], "873": ["n04486054", "triumphal_arch"], "874": ["n04487081", "trolleybus"], "875": ["n04487394", "trombone"], "876": ["n04493381", "tub"], "877": ["n04501370", "turnstile"], "878": ["n04505470", "typewriter_keyboard"], "879": ["n04507155", "umbrella"], "880": ["n04509417", "unicycle"], "881": ["n04515003", "upright"], "882": ["n04517823", "vacuum"], "883": ["n04522168", "vase"], "884": ["n04523525", "vault"], "885": ["n04525038", "velvet"], "886": ["n04525305", "vending_machine"], "887": ["n04532106", "vestment"], "888": ["n04532670", "viaduct"], "889": ["n04536866", "violin"], "890": ["n04540053", "volleyball"], "891": ["n04542943", "waffle_iron"], "892": ["n04548280", "wall_clock"], "893": ["n04548362", "wallet"], "894": ["n04550184", "wardrobe"], "895": ["n04552348", "warplane"], "896": ["n04553703", "washbasin"], "897": ["n04554684", "washer"], "898": ["n04557648", "water_bottle"], "899": ["n04560804", "water_jug"], "900": ["n04562935", "water_tower"], "901": ["n04579145", "whiskey_jug"], "902": ["n04579432", "whistle"], "903": ["n04584207", "wig"], "904": ["n04589890", "window_screen"], "905": ["n04590129", "window_shade"], "906": ["n04591157", "Windsor_tie"], "907": ["n04591713", "wine_bottle"], "908": ["n04592741", "wing"], "909": ["n04596742", "wok"], "910": ["n04597913", "wooden_spoon"], "911": ["n04599235", "wool"], "912": ["n04604644", "worm_fence"], "913": ["n04606251", "wreck"], "914": ["n04612504", "yawl"], "915": ["n04613696", "yurt"], "916": ["n06359193", "web_site"], "917": ["n06596364", "comic_book"], "918": ["n06785654", "crossword_puzzle"], "919": ["n06794110", "street_sign"], "920": ["n06874185", "traffic_light"], "921": ["n07248320", "book_jacket"], "922": ["n07565083", "menu"], "923": ["n07579787", "plate"], "924": ["n07583066", "guacamole"], "925": ["n07584110", "consomme"], "926": ["n07590611", "hot_pot"], "927": ["n07613480", "trifle"], "928": ["n07614500", "ice_cream"], "929": ["n07615774", "ice_lolly"], "930": ["n07684084", "French_loaf"], "931": ["n07693725", "bagel"], "932": ["n07695742", "pretzel"], "933": ["n07697313", "cheeseburger"], "934": ["n07697537", "hotdog"], "935": ["n07711569", "mashed_potato"], "936": ["n07714571", "head_cabbage"], "937": ["n07714990", "broccoli"], "938": ["n07715103", "cauliflower"], "939": ["n07716358", "zucchini"], "940": ["n07716906", "spaghetti_squash"], "941": ["n07717410", "acorn_squash"], "942": ["n07717556", "butternut_squash"], "943": ["n07718472", "cucumber"], "944": ["n07718747", "artichoke"], "945": ["n07720875", "bell_pepper"], "946": ["n07730033", "cardoon"], "947": ["n07734744", "mushroom"], "948": ["n07742313", "Granny_Smith"], "949": ["n07745940", "strawberry"], "950": ["n07747607", "orange"], "951": ["n07749582", "lemon"], "952": ["n07753113", "fig"], "953": ["n07753275", "pineapple"], "954": ["n07753592", "banana"], "955": ["n07754684", "jackfruit"], "956": ["n07760859", "custard_apple"], "957": ["n07768694", "pomegranate"], "958": ["n07802026", "hay"], "959": ["n07831146", "carbonara"], "960": ["n07836838", "chocolate_sauce"], "961": ["n07860988", "dough"], "962": ["n07871810", "meat_loaf"], "963": ["n07873807", "pizza"], "964": ["n07875152", "potpie"], "965": ["n07880968", "burrito"], "966": ["n07892512", "red_wine"], "967": ["n07920052", "espresso"], "968": ["n07930864", "cup"], "969": ["n07932039", "eggnog"], "970": ["n09193705", "alp"], "971": ["n09229709", "bubble"], "972": ["n09246464", "cliff"], "973": ["n09256479", "coral_reef"], "974": ["n09288635", "geyser"], "975": ["n09332890", "lakeside"], "976": ["n09399592", "promontory"], "977": ["n09421951", "sandbar"], "978": ["n09428293", "seashore"], "979": ["n09468604", "valley"], "980": ["n09472597", "volcano"], "981": ["n09835506", "ballplayer"], "982": ["n10148035", "groom"], "983": ["n10565667", "scuba_diver"], "984": ["n11879895", "rapeseed"], "985": ["n11939491", "daisy"], "986": ["n12057211", "yellow_lady's_slipper"], "987": ["n12144580", "corn"], "988": ["n12267677", "acorn"], "989": ["n12620546", "hip"], "990": ["n12768682", "buckeye"], "991": ["n12985857", "coral_fungus"], "992": ["n12998815", "agaric"], "993": ["n13037406", "gyromitra"], "994": ["n13040303", "stinkhorn"], "995": ["n13044778", "earthstar"], "996": ["n13052670", "hen-of-the-woods"], "997": ["n13054560", "bolete"], "998": ["n13133613", "ear"], "999": ["n15075141", "toilet_tissue"]}
    results = []
    for pred in preds:
        top_indices = pred.argsort()[-top:][::-1]
        result = [tuple(CLASS_INDEX[str(i)]) + (pred[i],) for i in top_indices]
        result.sort(key=lambda x: x[2], reverse=True)
        results.append(result)
    return results


================================================
FILE: imageai_tf_deprecated/__init__.py
================================================


================================================
FILE: requirements.txt
================================================
cython
pillow>=7.0.0
numpy>=1.18.1
opencv-python>=4.1.2
torch>=1.9.0 --extra-index-url https://download.pytorch.org/whl/cpu
torchvision>=0.10.0 --extra-index-url https://download.pytorch.org/whl/cpu
pytest==7.1.3
tqdm==4.64.1
scipy>=1.7.3
matplotlib>=3.4.3
mock==4.0.3

================================================
FILE: requirements_extra.txt
================================================
pycocotools@git+https://github.com/gautamchitnis/cocoapi.git@cocodataset-master#subdirectory=PythonAPI

================================================
FILE: requirements_gpu.txt
================================================
cython
pillow>=7.0.0
numpy>=1.18.1
opencv-python>=4.1.2
torch>=1.9.0 --extra-index-url https://download.pytorch.org/whl/cu102
torchvision>=0.10.0 --extra-index-url https://download.pytorch.org/whl/cu102
pytest==7.1.3
tqdm==4.64.1
scipy>=1.7.3
matplotlib>=3.4.3
mock==4.0.3

================================================
FILE: scripts/pascal_voc_to_yolo.py
================================================
import glob
import os
import argparse
import pickle
import xml.etree.ElementTree as ET
from os import listdir, getcwd
from os.path import join
import shutil


dirs = ['train', 'validation']
sub_dirs = ["images", "annotations"]
classes = []

def convert(size, box):
    dw = 1./(size[0])
    dh = 1./(size[1])
    x = (box[0] + box[1])/2.0 - 1
    y = (box[2] + box[3])/2.0 - 1
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x*dw
    w = w*dw
    y = y*dh
    h = h*dh
    return (x,y,w,h)

def convert_annotation(input_ann_path):

    tree = ET.parse(input_ann_path)
    root = tree.getroot()
    size = root.find('size')
    w = int(size.find('width').text)
    h = int(size.find('height').text)

    ann_list = []

    for obj in root.iter('object'):
        obj_class = obj.find('name').text
        if obj_class not in classes:
            classes.append(obj_class)
        xmlbox = obj.find('bndbox')
        b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
        bb = convert((w,h), b)

        ann_list.append(
            {
                "class": obj_class,
                "bbox": bb
            }
        )

    return ann_list


def main(dataset_dir: str):
    yolo_dataset = os.path.join(
        os.path.dirname(dataset_dir),
        os.path.basename(f"{dataset_dir}-yolo")
    )
    for dir in dirs:
        dir_path = os.path.join(
            yolo_dataset,
            dir
        )
        os.makedirs(dir_path, exist_ok=True)

        for sub_dir in sub_dirs:
            os.makedirs(
                os.path.join(
                    dir_path,
                    sub_dir
                ),
                exist_ok=True
            )
        
    train_anns = {}
    validation_anns = {}

    for dir in dirs:
        dir_path = os.path.join(
            dataset_dir,
            dir
        )

        images = [file for file in os.listdir(
            os.path.join(dir_path, "images")
        ) if file.endswith(".png") or file.endswith(".jpg") or file.endswith(".jpeg")]

        annotations = [file for file in os.listdir(
            os.path.join(dir_path, "annotations")
        ) if file.endswith(".xml")]

        for image, annotation in zip(images, annotations):
            shutil.copy(
                os.path.join(
                    dataset_dir,
                    dir,
                    "images",
                    image
                ),
                os.path.join(
                    yolo_dataset,
                    dir,
                    "images",
                    image
                )
            )

            ann_list = convert_annotation(
               os.path.join(
                    dataset_dir,
                    dir,
                    "annotations",
                    annotation
                ) 
            )
            if dir == "train":
                train_anns[annotation] = ann_list
            elif dir == "validation":
                validation_anns[annotation] = ann_list
    
    all_classes = sorted(classes)

    for k,v in {"train": train_anns, "validation": validation_anns}.items():
        for anns_k, anns_v in v.items():
            output_ann_path = os.path.join(
                yolo_dataset, k, "annotations", anns_k.replace(".xml", ".txt")
            )
            anns_str = ""
            for ann in anns_v:
                class_idx = all_classes.index(ann["class"])
                bbox = [str(f) for f in ann["bbox"]]
                anns_str += f"{class_idx} {' '.join(bbox)}\n"
            
            with open(output_ann_path, "w") as ann_writer:
                ann_writer.write(anns_str)
        
        with open(os.path.join(
            yolo_dataset, k, "annotations", "classes.txt"
        ), "w") as classes_writer:
            classes_writer.write("\n".join(all_classes))
    

if __name__ == "__main__":

    parse = argparse.ArgumentParser(
        description="Convert Pascal VOC dataset to YOLO format")
    parse.add_argument(
        "--dataset_dir",
        help="Dataset directory",
        type=str,
        required=True,
    )
    args = parse.parse_args()
    main(args.dataset_dir)


================================================
FILE: setup.py
================================================
from setuptools import setup,find_packages

setup(name="imageai",
      version='3.0.3',
      description='A python library built to empower developers to build applications and systems  with self-contained Computer Vision capabilities',
      url="https://github.com/OlafenwaMoses/ImageAI",
      author='Moses Olafenwa',
      author_email='guymodscientist@gmail.com',
      license='MIT',
      packages= find_packages(exclude=["*imageai_tf_deprecated*"]),
      install_requires=[],
      include_package_data=True,
      zip_safe=False)

================================================
FILE: test/test_custom_classification.py
================================================
import os, sys
import cv2
from PIL import Image
import pytest
from os.path import dirname
sys.path.insert(1, os.path.join(dirname(dirname(os.path.abspath(__file__)))))
from imageai.Classification.Custom import CustomImageClassification

test_folder = dirname(os.path.abspath(__file__))


@pytest.mark.parametrize(
    "image_input",
    [
        (os.path.join(test_folder, "data-images", "1.jpg")),
        (cv2.imread(os.path.join(test_folder, "data-images", "1.jpg"))),
        (Image.open(os.path.join(test_folder, "data-images", "1.jpg"))),
    ]
)
def test_recognition_model_mobilenetv2(image_input):

    classifier = CustomImageClassification()
    classifier.setModelTypeAsMobileNetV2()
    classifier.setModelPath(os.path.join(test_folder, "data-models", "mobilenet_v2-idenprof-test_acc_0.85300_epoch-92.pt"))
    classifier.setJsonPath(os.path.join(test_folder, "data-json", "idenprof_model_classes.json"))
    classifier.loadModel()
    predictions, probabilities = classifier.classifyImage(image_input=image_input, result_count=5)

    assert isinstance(predictions, list)
    assert isinstance(probabilities, list)
    assert isinstance(predictions[0], str)
    assert isinstance(probabilities[0], float)


@pytest.mark.parametrize(
    "image_input",
    [
        (os.path.join(test_folder, "data-images", "1.jpg")),
        (cv2.imread(os.path.join(test_folder, "data-images", "1.jpg"))),
        (Image.open(os.path.join(test_folder, "data-images", "1.jpg"))),
    ]
)
def test_recognition_model_resnet(image_input):

    classifier = CustomImageClassification()
    classifier.setModelTypeAsResNet50()
    classifier.setModelPath(os.path.join(test_folder, "data-models", "resnet50-idenprof-test_acc_0.78200_epoch-91.pt"))
    classifier.setJsonPath(os.path.join(test_folder, "data-json", "idenprof_model_classes.json"))
    classifier.loadModel()
    predictions, probabilities = classifier.classifyImage(image_input=image_input, result_count=5)

    assert isinstance(predictions, list)
    assert isinstance(probabilities, list)
    assert isinstance(predictions[0], str)
    assert isinstance(probabilities[0], float)

@pytest.mark.parametrize(
    "image_input",
    [
        (os.path.join(test_folder, "data-images", "1.jpg")),
        (cv2.imread(os.path.join(test_folder, "data-images", "1.jpg"))),
        (Image.open(os.path.join(test_folder, "data-images", "1.jpg"))),
    ]
)
def test_recognition_model_inceptionv3(image_input):

    classifier = CustomImageClassification()
    classifier.setModelTypeAsInceptionV3()
    classifier.setModelPath(os.path.join(test_folder, "data-models", "inception_v3-idenprof-test_acc_0.81050_epoch-92.pt"))
    classifier.setJsonPath(os.path.join(test_folder, "data-json", "idenprof_model_classes.json"))
    classifier.loadModel()
    predictions, probabilities = classifier.classifyImage(image_input=image_input, result_count=5)

    assert isinstance(predictions, list)
    assert isinstance(probabilities, list)
    assert isinstance(predictions[0], str)
    assert isinstance(probabilities[0], float)

@pytest.mark.parametrize(
    "image_input",
    [
        (os.path.join(test_folder, "data-images", "1.jpg")),
        (cv2.imread(os.path.join(test_folder, "data-images", "1.jpg"))),
        (Image.open(os.path.join(test_folder, "data-images", "1.jpg"))),
    ]
)
def test_recognition_model_densenet(image_input):

    classifier = CustomImageClassification()
    classifier.setModelTypeAsDenseNet121()
    classifier.setModelPath(os.path.join(test_folder, "data-models", "densenet121-idenprof-test_acc_0.82550_epoch-95.pt"))
    classifier.setJsonPath(os.path.join(test_folder, "data-json", "idenprof_model_classes.json"))
    classifier.loadModel()
    predictions, probabilities = classifier.classifyImage(image_input=image_input, result_count=5)

    assert isinstance(predictions, list)
    assert isinstance(probabilities, list)
    assert isinstance(predictions[0], str)
    assert isinstance(probabilities[0], float)

================================================
FILE: test/test_custom_classification_training.py
================================================
import os, sys
import cv2
import shutil
from PIL import Image
import pytest
from os.path import dirname
sys.path.insert(1, os.path.join(dirname(dirname(os.path.abspath(__file__)))))
from imageai.Classification.Custom import ClassificationModelTrainer, CustomImageClassification

test_folder = dirname(os.path.abspath(__file__))


classification_dataset = os.path.join(
    test_folder,
    "data-datasets",
    "idenprof"
)

pretrained_models_folder = os.path.join(
    test_folder,
    "data-models"
)


@pytest.mark.parametrize(
    "transfer_learning",
    [
        (os.path.join(
            pretrained_models_folder,
            "resnet50-19c8e357.pth"
        )),
        (None),
    ]
)
def test_resnet50_training(transfer_learning):

    models_dir = os.path.join(
        classification_dataset,
        "models"
    )
    if os.path.isdir(
        models_dir
    ):
        shutil.rmtree(models_dir)

    trainer = ClassificationModelTrainer()
    trainer.setModelTypeAsResNet50()
    trainer.setDataDirectory(data_directory=classification_dataset)
    trainer.trainModel(
        num_experiments=1,
        batch_size=2,
        transfer_from_model=transfer_learning)

    assert os.path.isdir(models_dir) == True
    assert os.path.isfile(
        os.path.join(
            models_dir, "idenprof_model_classes.json"
        )
    ) == True
    
    model_found = False
    for file in os.listdir(models_dir):
        if file.endswith(".pt"):
            model_found = True
    assert model_found == True


@pytest.mark.parametrize(
    "transfer_learning",
    [
        (os.path.join(
            pretrained_models_folder,
            "densenet121-a639ec97.pth"
        )),
        (None),
    ]
)
def test_densenet121_training(transfer_learning):

    models_dir = os.path.join(
        classification_dataset,
        "models"
    )
    if os.path.isdir(
        models_dir
    ):
        shutil.rmtree(models_dir)

    trainer = ClassificationModelTrainer()
    trainer.setModelTypeAsDenseNet121()
    trainer.setDataDirectory(data_directory=classification_dataset)
    trainer.trainModel(
        num_experiments=1,
        batch_size=2,
        transfer_from_model=transfer_learning)

    assert os.path.isdir(models_dir) == True
    assert os.path.isfile(
        os.path.join(
            models_dir, "idenprof_model_classes.json"
        )
    ) == True
    model_found = False
    for file in os.listdir(models_dir):
        if file.endswith(".pt"):
            model_found = True
    assert model_found == True


@pytest.mark.parametrize(
    "transfer_learning",
    [
        (os.path.join(
            pretrained_models_folder,
            "inception_v3_google-1a9a5a14.pth"
        )),
        (None),
    ]
)
def test_inceptionv3_training(transfer_learning):

    models_dir = os.path.join(
        classification_dataset,
        "models"
    )
    if os.path.isdir(
        models_dir
    ):
        shutil.rmtree(models_dir)

    trainer = ClassificationModelTrainer()
    trainer.setModelTypeAsInceptionV3()
    trainer.setDataDirectory(data_directory=classification_dataset)
    trainer.trainModel(
        num_experiments=1,
        batch_size=2,
        transfer_from_model=transfer_learning)

    assert os.path.isdir(models_dir) == True
    assert os.path.isfile(
        os.path.join(
            models_dir, "idenprof_model_classes.json"
        )
    ) == True
    model_found = False
    for file in os.listdir(models_dir):
        if file.endswith(".pt"):
            model_found = True
    assert model_found == True


@pytest.mark.parametrize(
    "transfer_learning",
    [
        (os.path.join(
            pretrained_models_folder,
            "mobilenet_v2-b0353104.pth"
        )),
        (None),
    ]
)
def test_mobilenetv2_training(transfer_learning):

    models_dir = os.path.join(
        classification_dataset,
        "models"
    )
    if os.path.isdir(
        models_dir
    ):
        shutil.rmtree(models_dir)

    trainer = ClassificationModelTrainer()
    trainer.setModelTypeAsMobileNetV2()
    trainer.setDataDirectory(data_directory=classification_dataset)
    trainer.trainModel(
        num_experiments=1,
        batch_size=2,
        transfer_from_model=transfer_learning)

    assert os.path.isdir(models_dir) == True
    assert os.path.isfile(
        os.path.join(
            models_dir, "idenprof_model_classes.json"
        )
    ) == True
    model_found = False
    for file in os.listdir(models_dir):
        if file.endswith(".pt"):
            model_found = True
    assert model_found == True


================================================
FILE: test/test_custom_detection_training.py
================================================
import os, sys
import shutil
import pytest
from os.path import dirname
sys.path.insert(1, os.path.join(dirname(dirname(os.path.abspath(__file__)))))
from imageai.Detection.Custom import DetectionModelTrainer

test_folder = dirname(os.path.abspath(__file__))


detection_dataset = os.path.join(
    test_folder,
    "data-datasets",
    "number-plate"
)

pretrained_models_folder = os.path.join(
    test_folder,
    "data-models"
)

def delete_cache(dirs: list):
    for dir in dirs:
        if os.path.isdir(dir):
            shutil.rmtree(dir)

@pytest.mark.parametrize(
    "transfer_learning",
    [
        (os.path.join(
            pretrained_models_folder,
            "yolov3.pt"
        )),
        (None),
    ]
)
def test_yolov3_training(transfer_learning):
    json_dir = os.path.join(detection_dataset, "json")
    json_file = os.path.join(json_dir, "number-plate_yolov3_detection_config.json")
    models_dir = os.path.join(detection_dataset, "models")

    delete_cache([json_dir, models_dir])

    trainer = DetectionModelTrainer()
    trainer.setModelTypeAsYOLOv3()
    trainer.setDataDirectory(data_directory=detection_dataset)
    trainer.setTrainConfig(object_names_array=["number-plate"], batch_size=2, num_experiments=2, train_from_pretrained_model=transfer_learning)
    trainer.trainModel()

    
    assert os.path.isfile(json_file)
    assert len([file for file in os.listdir(models_dir) if file.endswith(".pt")]) > 0

    delete_cache([json_dir, models_dir])

@pytest.mark.parametrize(
    "transfer_learning",
    [
        (os.path.join(
            pretrained_models_folder,
            "tiny-yolov3.pt"
        )),
        (None),
    ]
)
def test_tiny_yolov3_training(transfer_learning):
    json_dir = os.path.join(detection_dataset, "json")
    json_file = os.path.join(json_dir, "number-plate_tiny-yolov3_detection_config.json")
    models_dir = os.path.join(detection_dataset, "models")

    delete_cache([json_dir, models_dir])

    trainer = DetectionModelTrainer()
    trainer.setModelTypeAsTinyYOLOv3()
    trainer.setDataDirectory(data_directory=detection_dataset)
    trainer.setTrainConfig(object_names_array=["number-plate"], batch_size=2, num_experiments=2, train_from_pretrained_model=transfer_learning)
    trainer.trainModel()

    
    assert os.path.isfile(json_file)
    assert len([file for file in os.listdir(models_dir) if file.endswith(".pt")]) > 0

    delete_cache([json_dir, models_dir])

================================================
FILE: test/test_custom_object_detection.py
================================================
import os, sys
from typing import List
import shutil
import cv2
import uuid
from PIL import Image
import numpy as np
import pytest
from os.path import dirname
sys.path.insert(1, os.path.join(dirname(dirname(os.path.abspath(__file__)))))
from imageai.Detection.Custom import CustomObjectDetection

test_folder = dirname(os.path.abspath(__file__))


def delete_cache(paths: List[str]):
    for path in paths:
        if os.path.isfile(path):
            os.remove(path)
        elif os.path.isdir(path):
            shutil.rmtree(path)


@pytest.mark.parametrize(
    "input_image, output_type, extract_objects",
    [
        (os.path.join(test_folder, test_folder, "data-images", "15.jpg"), "file", False),
        (os.path.join(test_folder, test_folder, "data-images", "15.jpg"), "file", True),
        (cv2.imread(os.path.join(test_folder, test_folder, "data-images", "15.jpg")), "array", False),
        (cv2.imread(os.path.join(test_folder, test_folder, "data-images", "15.jpg")), "array", True),
        (Image.open(os.path.join(test_folder, test_folder, "data-images", "15.jpg")), "array", True),
    ]
)
def test_object_detection_yolov3(input_image, output_type, extract_objects):
    detector = CustomObjectDetection()
    detector.setModelTypeAsYOLOv3()
    detector.setModelPath(os.path.join(test_folder, "data-models", "yolov3_number-plate-dataset-imageai_mAP-0.57145_epoch-11.pt"))
    detector.setJsonPath(os.path.join(test_folder, "data-json", "number-plate-dataset-imageai_yolov3_detection_config.json"))
    detector.loadModel()

    output_img_path = os.path.join(test_folder, "data-images", str(uuid.uuid4()) + ".jpg")

    if output_type == "array":
        if extract_objects:
            output_image_array, detections, extracted_objects = detector.detectObjectsFromImage(input_image=input_image, output_type=output_type, extract_detected_objects=extract_objects)

            assert len(detections) > 0
            assert len(extracted_objects) > 0
            for extracted_obj in extracted_objects:
                assert type(extracted_obj) == np.ndarray
        else:
            output_image_array, detections = detector.detectObjectsFromImage(input_image=input_image, output_type=output_type)
            assert type(output_image_array) == np.ndarray
            assert len(detections) > 0
    else:
        if extract_objects:
            detections, extracted_object_paths = detector.detectObjectsFromImage(input_image=input_image, output_image_path=output_img_path, extract_detected_objects=True)

            assert len(detections) > 0
            assert os.path.isfile(output_img_path)
            assert len(extracted_object_paths) > 0
            delete_cache(
                extracted_object_paths
            )
            delete_cache(
                [extracted_object_paths[0], output_img_path]
            )
        else:
            detections = detector.detectObjectsFromImage(input_image=input_image, output_image_path=output_img_path)
            assert len(detections) > 0
            delete_cache([output_img_path])

    assert type(detections) == list
    

    for eachObject in detections:
        assert type(eachObject) == dict
        assert "name" in eachObject.keys()
        assert type(eachObject["name"]) == str 
        assert "percentage_probability" in eachObject.keys()
        assert type(eachObject["percentage_probability"]) == float
        assert "box_points" in eachObject.keys()
        assert type(eachObject["box_points"]) == list
        box_points = eachObject["box_points"]
        for point in box_points:
            assert type(point) == int
        assert box_points[0] < box_points[2]
        assert box_points[1] < box_points[3]


@pytest.mark.parametrize(
    "input_image, output_type, extract_objects",
    [
        (os.path.join(test_folder, test_folder, "data-images", "15.jpg"), "file", False),
        (os.path.join(test_folder, test_folder, "data-images", "15.jpg"), "file", True),
        (cv2.imread(os.path.join(test_folder, test_folder, "data-images", "15.jpg")), "array", False),
        (cv2.imread(os.path.join(test_folder, test_folder, "data-images", "15.jpg")), "array", True),
        (Image.open(os.path.join(test_folder, test_folder, "data-images", "15.jpg")), "array", True),
    ]
)
def test_object_detection_tiny_yolov3(input_image, output_type, extract_objects):
    detector = CustomObjectDetection()
    detector.setModelTypeAsTinyYOLOv3()
    detector.setModelPath(os.path.join(test_folder, "data-models", "tiny_yolov3_number-plate-dataset-imageai_mAP-0.22595_epoch-20.pt"))
    detector.setJsonPath(os.path.join(test_folder, "data-json", "number-plate-dataset-imageai_tiny_yolov3_detection_config.json"))
    detector.loadModel()

    output_img_path = os.path.join(test_folder, "data-images", str(uuid.uuid4()) + ".jpg")

    if output_type == "array":
        if extract_objects:
            output_image_array, detections, extracted_objects = detector.detectObjectsFromImage(input_image=input_image, output_type=output_type, extract_detected_objects=extract_objects)

            assert len(detections) > 0
            assert len(extracted_objects) == len(detections)
            for extracted_obj in extracted_objects:
                assert type(extracted_obj) == np.ndarray
        else:
            output_image_array, detections = detector.detectObjectsFromImage(input_image=input_image, output_type=output_type)
            assert type(output_image_array) == np.ndarray
            assert len(detections) > 0
    else:
        if extract_objects:
            detections, extracted_object_paths = detector.detectObjectsFromImage(input_image=input_image, output_image_path=output_img_path, extract_detected_objects=True)

            assert len(detections) > 0
            assert os.path.isfile(output_img_path)
            assert len(extracted_object_paths) == len(detections)
            delete_cache(
                extracted_object_paths
            )
            delete_cache(
                [extracted_object_paths[0], output_img_path]
            )
        else:
            detections = detector.detectObjectsFromImage(input_image=input_image, output_image_path=output_img_path)
            assert len(detections) > 0
            delete_cache([output_img_path])

    assert type(detections) == list
    

    for eachObject in detections:
        assert type(eachObject) == dict
        assert "name" in eachObject.keys()
        assert type(eachObject["name"]) == str 
        assert "percentage_probability" in eachObject.keys()
        assert type(eachObject["percentage_probability"]) == float
        assert "box_points" in eachObject.keys()
        assert type(eachObject["box_points"]) == list
        box_points = eachObject["box_points"]
        for point in box_points:
            assert type(point) == int
        assert box_points[0] < box_points[2]
        assert box_points[1] < box_points[3]


================================================
FILE: test/test_custom_video_detection.py
================================================
import os, sys
from typing import List
from numpy import ndarray
from os.path import dirname
from mock import patch
sys.path.insert(1, os.path.join(dirname(dirname(os.path.abspath(__file__)))))

from imageai.Detection.Custom import CustomVideoObjectDetection


test_folder = dirname(os.path.abspath(__file__))

video_file = os.path.join(test_folder, "data-videos", "dashcam.mp4")
video_file_output = os.path.join(test_folder, "data-videos", "dashcam-detected")


class CallbackFunctions:
    def forFrame(frame_number, output_array, output_count, detected_frame):
        assert isinstance(detected_frame, ndarray)
        assert isinstance(frame_number, int)
        assert isinstance(output_array, list)
        assert isinstance(output_array[0], dict)
        assert isinstance(output_array[0]["name"], str)
        assert isinstance(output_array[0]["percentage_probability"], float)
        assert isinstance(output_array[0]["box_points"], list)

        assert isinstance(output_count, dict)
        for a_key in dict(output_count).keys():
            assert isinstance(a_key, str)
            assert isinstance(output_count[a_key], int)

    def forSecond(second_number, output_arrays, count_arrays, average_output_count, detected_frame):
        assert isinstance(detected_frame, ndarray)
        assert isinstance(second_number, int)
        assert isinstance(output_arrays, list)
        assert isinstance(output_arrays[0], list)

        assert isinstance(output_arrays[0][0], dict)
        assert isinstance(output_arrays[0][0]["name"], str)
        assert isinstance(output_arrays[0][0]["percentage_probability"], float)
        assert isinstance(output_arrays[0][0]["box_points"], list)

        assert isinstance(count_arrays, list)
        assert isinstance(count_arrays[0], dict)
        for a_key in dict(count_arrays[0]).keys():
            assert isinstance(a_key, str)
            assert isinstance(count_arrays[0][a_key], int)

        assert isinstance(average_output_count, dict)
        for a_key2 in dict(average_output_count).keys():
            assert isinstance(a_key2, str)
            assert isinstance(average_output_count[a_key2], int)


def delete_cache(files: List[str]):
    for file in files:
        if os.path.isfile(file):
            os.remove(file)


def test_video_detection_yolov3():
    delete_cache([video_file_output + ".mp4"])

    detector = CustomVideoObjectDetection()
    detector.setModelTypeAsYOLOv3()
    detector.setModelPath(model_path=os.path.join(test_folder, "data-models", "yolov3_number-plate-dataset-imageai_mAP-0.57145_epoch-11.pt"))
    detector.setJsonPath(os.path.join(test_folder, "data-json", "number-plate-dataset-imageai_yolov3_detection_config.json"))
    detector.loadModel()
    video_path = detector.detectObjectsFromVideo(input_file_path=video_file, output_file_path=video_file_output, save_detected_video=True, frames_per_second=30, log_progress=True)

    assert os.path.exists(video_file_output + ".mp4")
    assert isinstance(video_path, str)
    
    delete_cache([video_file_output + ".mp4"])


def test_video_detection_tiny_yolov3():
    delete_cache([video_file_output + ".mp4"])

    detector = CustomVideoObjectDetection()
    detector.setModelTypeAsTinyYOLOv3()
    detector.setModelPath(model_path=os.path.join(test_folder, "data-models", "tiny_yolov3_number-plate-dataset-imageai_mAP-0.22595_epoch-20.pt"))
    detector.setJsonPath(os.path.join(test_folder, "data-json", "number-plate-dataset-imageai_tiny_yolov3_detection_config.json"))
    detector.loadModel()
    video_path = detector.detectObjectsFromVideo(input_file_path=video_file, output_file_path=video_file_output, save_detected_video=True, frames_per_second=30, log_progress=True)

    assert os.path.exists(video_file_output + ".mp4")
    assert isinstance(video_path, str)

    delete_cache([video_file_output + ".mp4"])


def test_video_detection_yolo_analysis():
    delete_cache([video_file_output + ".mp4"])

    detector = CustomVideoObjectDetection()
    detector.setModelTypeAsYOLOv3()
    detector.setModelPath(model_path=os.path.join(test_folder, "data-models", "yolov3_number-plate-dataset-imageai_mAP-0.57145_epoch-11.pt"))
    detector.setJsonPath(os.path.join(test_folder, "data-json", "number-plate-dataset-imageai_yolov3_detection_config.json"))
    detector.loadModel()

    with patch.object(CallbackFunctions, 'forFrame') as frameFunc:
        with patch.object(CallbackFunctions, 'forSecond') as secondFunc:

            video_path = detector.detectObjectsFromVideo(input_file_path=video_file, output_file_path=video_file_output, save_detected_video=True, frames_per_second=30, log_progress=True, per_frame_function=frameFunc, per_second_function=secondFunc, return_detected_frame=True)

            assert os.path.exists(video_file_output + ".mp4")
            assert isinstance(video_path, str)

            frameFunc.assert_called()
            secondFunc.assert_called()

    delete_cache([video_file_output + ".mp4"])


================================================
FILE: test/test_image_classification.py
================================================
import os, sys
import cv2
from PIL import Image
import pytest
from os.path import dirname
sys.path.insert(1, os.path.join(dirname(dirname(os.path.abspath(__file__)))))
from imageai.Classification import ImageClassification

test_folder = dirname(os.path.abspath(__file__))


@pytest.mark.parametrize(
    "image_input",
    [
        (os.path.join(test_folder, "data-images", "1.jpg")),
        (cv2.imread(os.path.join(test_folder, "data-images", "1.jpg"))),
        (Image.open(os.path.join(test_folder, "data-images", "1.jpg"))),
    ]
)
def test_recognition_model_mobilenetv2(image_input):

    classifier = ImageClassification()
    classifier.setModelTypeAsMobileNetV2()
    classifier.setModelPath(os.path.join(test_folder, "data-models", "mobilenet_v2-b0353104.pth"))
    classifier.loadModel()
    predictions, probabilities = classifier.classifyImage(image_input=image_input)

    assert isinstance(predictions, list)
    assert isinstance(probabilities, list)
    assert isinstance(predictions[0], str)
    assert isinstance(probabilities[0], float)


@pytest.mark.parametrize(
    "image_input",
    [
        (os.path.join(test_folder, "data-images", "1.jpg")),
        (cv2.imread(os.path.join(test_folder, "data-images", "1.jpg"))),
        (Image.open(os.path.join(test_folder, "data-images", "1.jpg"))),
    ]
)
def test_recognition_model_resnet(image_input):

    classifier = ImageClassification()
    classifier.setModelTypeAsResNet50()
    classifier.setModelPath(os.path.join(test_folder, "data-models", "resnet50-19c8e357.pth"))
    classifier.loadModel()
    predictions, probabilities = classifier.classifyImage(image_input=image_input)

    assert isinstance(predictions, list)
    assert isinstance(probabilities, list)
    assert isinstance(predictions[0], str)
    assert isinstance(probabilities[0], float)

@pytest.mark.parametrize(
    "image_input",
    [
        (os.path.join(test_folder, "data-images", "1.jpg")),
        (cv2.imread(os.path.join(test_folder, "data-images", "1.jpg"))),
        (Image.open(os.path.join(test_folder, "data-images", "1.jpg"))),
    ]
)
def test_recognition_model_inceptionv3(image_input):

    classifier = ImageClassification()
    classifier.setModelTypeAsInceptionV3()
    classifier.setModelPath(os.path.join(test_folder, "data-models", "inception_v3_google-1a9a5a14.pth"))
    classifier.loadModel()
    predictions, probabilities = classifier.classifyImage(image_input=image_input)

    assert isinstance(predictions, list)
    assert isinstance(probabilities, list)
    assert isinstance(predictions[0], str)
    assert isinstance(probabilities[0], float)

@pytest.mark.parametrize(
    "image_input",
    [
        (os.path.join(test_folder, "data-images", "1.jpg")),
        (cv2.imread(os.path.join(test_folder, "data-images", "1.jpg"))),
        (Image.open(os.path.join(test_folder, "data-images", "1.jpg"))),
    ]
)
def test_recognition_model_densenet(image_input):

    classifier = ImageClassification()
    classifier.setModelTypeAsDenseNet121()
    classifier.setModelPath(os.path.join(test_folder, "data-models", "densenet121-a639ec97.pth"))
    classifier.loadModel()
    predictions, probabilities = classifier.classifyImage(image_input=image_input)

    assert isinstance(predictions, list)
    assert isinstance(probabilities, list)
    assert isinstance(predictions[0], str)
    assert isinstance(probabilities[0], float)

================================================
FILE: test/test_object_detection.py
================================================
import os, sys
from typing import List
import shutil
import cv2
import uuid
from PIL import Image
import numpy as np
import pytest
from os.path import dirname
sys.path.insert(1, os.path.join(dirname(dirname(os.path.abspath(__file__)))))
from imageai.Detection import ObjectDetection

test_folder = dirname(os.path.abspath(__file__))


def delete_cache(paths: List[str]):
    for path in paths:
        if os.path.isfile(path):
            os.remove(path)
        elif os.path.isdir(path):
            shutil.rmtree(path)


@pytest.mark.parametrize(
    "input_image, output_type, extract_objects",
    [
        (os.path.join(test_folder, test_folder, "data-images", "1.jpg"), "file", False),
        (os.path.join(test_folder, test_folder, "data-images", "4.jpg"), "file", False),
        (os.path.join(test_folder, test_folder, "data-images", "1.jpg"), "file", True),
        (cv2.imread(os.path.join(test_folder, test_folder, "data-images", "1.jpg")), "array", False),
        (cv2.imread(os.path.join(test_folder, test_folder, "data-images", "1.jpg")), "array", True),
        (Image.open(os.path.join(test_folder, test_folder, "data-images", "1.jpg")), "array", True),
    ]
)
def test_object_detection_retinanet(input_image, output_type, extract_objects):
    detector = ObjectDetection()
    detector.setModelTypeAsRetinaNet()
    detector.setModelPath(os.path.join(test_folder, "data-models", "retinanet_resnet50_fpn_coco-eeacb38b.pth"))
    detector.loadModel()

    output_img_path = os.path.join(test_folder, "data-images", str(uuid.uuid4()) + ".jpg")

    if output_type == "array":
        if extract_objects:
            output_image_array, detections, extracted_objects = detector.detectObjectsFromImage(input_image=input_image, output_type=output_type, extract_detected_objects=extract_objects)

            assert len(extracted_objects) > 1
            for extracted_obj in extracted_objects:
                assert type(extracted_obj) == np.ndarray
            assert type(detections) == list
        else:
            output_image_array, detections = detector.detectObjectsFromImage(input_image=input_image, output_type=output_type)
            assert type(output_image_array) == np.ndarray
            assert type(detections) == list
    else:
        if extract_objects:
            detections, extracted_object_paths = detector.detectObjectsFromImage(input_image=input_image, output_image_path=output_img_path, extract_detected_objects=True)

            assert type(detections) == list
            assert os.path.isfile(output_img_path)
            assert len(extracted_object_paths) > 3
            delete_cache(
                extracted_object_paths
            )
            delete_cache(
                [extracted_object_paths[0], output_img_path]
            )
        else:
            detections = detector.detectObjectsFromImage(input_image=input_image, output_image_path=output_img_path)
            assert type(detections) == list
            delete_cache(
                [output_img_path]
            )
    

    for eachObject in detections:
        assert type(eachObject) == dict
        assert "name" in eachObject.keys()
        assert type(eachObject["name"]) == str 
        assert "percentage_probability" in eachObject.keys()
        assert type(eachObject["percentage_probability"]) == float
        assert "box_points" in eachObject.keys()
        assert type(eachObject["box_points"]) == list
        box_points = eachObject["box_points"]
        for point in box_points:
            assert type(point) == int
        assert box_points[0] < box_points[2]
        assert box_points[1] < box_points[3]


@pytest.mark.parametrize(
    "input_image, output_type, extract_objects",
    [
        (os.path.join(test_folder, test_folder, "data-images", "1.jpg"), "file", False),
        (os.path.join(test_folder, test_folder, "data-images", "4.jpg"), "file", False),
        (os.path.join(test_folder, test_folder, "data-images", "1.jpg"), "file", True),
        (cv2.imread(os.path.join(test_folder, test_folder, "data-images", "1.jpg")), "array", False),
        (cv2.imread(os.path.join(test_folder, test_folder, "data-images", "1.jpg")), "array", True),
        (Image.open(os.path.join(test_folder, test_folder, "data-images", "1.jpg")), "array", True),
    ]
)
def test_object_detection_yolov3(input_image, output_type, extract_objects):
    detector = ObjectDetection()
    detector.setModelTypeAsYOLOv3()
    detector.setModelPath(os.path.join(test_folder, "data-models", "yolov3.pt"))
    detector.loadModel()

    output_img_path = os.path.join(test_folder, "data-images", str(uuid.uuid4()) + ".jpg")

    if output_type == "array":
        if extract_objects:
            output_image_array, detections, extracted_objects = detector.detectObjectsFromImage(input_image=input_image, output_type=output_type, extract_detected_objects=extract_objects)

            assert len(extracted_objects) > 1
            assert type(detections) == list
            for extracted_obj in extracted_objects:
                assert type(extracted_obj) == np.ndarray
        else:
            output_image_array, detections = detector.detectObjectsFromImage(input_image=input_image, output_type=output_type)
            assert type(output_image_array) == np.ndarray
            assert type(detections) == list
    else:
        if extract_objects:
            detections, extracted_object_paths = detector.detectObjectsFromImage(input_image=input_image, output_image_path=output_img_path, extract_detected_objects=True)

            assert os.path.isfile(output_img_path)
            assert len(extracted_object_paths) > 3
            assert type(detections) == list
            delete_cache(
                extracted_object_paths
            )
            delete_cache(
                [extracted_object_paths[0], output_img_path]
            )
        else:
            detections = detector.detectObjectsFromImage(input_image=input_image, output_image_path=output_img_path)
            assert type(detections) == list
            delete_cache(
                [output_img_path]
            )

    
    for eachObject in detections:
        assert type(eachObject) == dict
        assert "name" in eachObject.keys()
        assert type(eachObject["name"]) == str 
        assert "percentage_probability" in eachObject.keys()
        assert type(eachObject["percentage_probability"]) == float
        assert "box_points" in eachObject.keys()
        assert type(eachObject["box_points"]) == list
        box_points = eachObject["box_points"]
        for point in box_points:
            assert type(point) == int
        assert box_points[0] < box_points[2]
        assert box_points[1] < box_points[3]


@pytest.mark.parametrize(
    "input_image, output_type, extract_objects",
    [
        (os.path.join(test_folder, test_folder, "data-images", "1.jpg"), "file", False),
        (os.path.join(test_folder, test_folder, "data-images", "4.jpg"), "file", False),
        (os.path.join(test_folder, test_folder, "data-images", "1.jpg"), "file", True),
        (cv2.imread(os.path.join(test_folder, test_folder, "data-images", "1.jpg")), "array", False),
        (cv2.imread(os.path.join(test_folder, test_folder, "data-images", "1.jpg")), "array", True),
        (Image.open(os.path.join(test_folder, test_folder, "data-images", "11.jpg")), "array", True),
    ]
)
def test_object_detection_tiny_yolov3(input_image, output_type, extract_objects):
    detector = ObjectDetection()
    detector.setModelTypeAsTinyYOLOv3()
    detector.setModelPath(os.path.join(test_folder, "data-models", "tiny-yolov3.pt"))
    detector.loadModel()


    output_img_path = os.path.join(test_folder, "data-images", str(uuid.uuid4()) + ".jpg")

    if output_type == "array":
        if extract_objects:
            output_image_array, detections, extracted_objects = detector.detectObjectsFromImage(input_image=input_image, output_type=output_type, extract_detected_objects=extract_objects)

            assert len(extracted_objects) > 1
            assert type(detections) == list
            for extracted_obj in extracted_objects:
                assert type(extracted_obj) == np.ndarray
        else:
            output_image_array, detections = detector.detectObjectsFromImage(input_image=input_image, output_type=output_type)
            assert type(output_image_array) == np.ndarray
            assert type(detections) == list
    else:
        if extract_objects:
            detections, extracted_object_paths = detector.detectObjectsFromImage(input_image=input_image, output_image_path=output_img_path, extract_detected_objects=True)

            assert os.path.isfile(output_img_path)
            assert len(extracted_object_paths) > 1
            assert type(detections) == list
            delete_cache(
                extracted_object_paths
            )
            delete_cache(
                [extracted_object_paths[0], output_img_path]
            )

        else:
            detections = detector.detectObjectsFromImage(input_image=input_image, output_image_path=output_img_path)
            assert type(detections) == list
            delete_cache(
                [output_img_path]
            )
        
    
    for eachObject in detections:
        assert type(eachObject) == dict
        assert "name" in eachObject.keys()
        assert type(eachObject["name"]) == str 
        assert "percentage_probability" in eachObject.keys()
        assert type(eachObject["percentage_probability"]) == float
        assert "box_points" in eachObject.keys()
        assert type(eachObject["box_points"]) == list
        box_points = eachObject["box_points"]
        for point in box_points:
            assert type(point) == int
        assert box_points[0] < box_points[2]
        assert box_points[1] < box_points[3]


@pytest.mark.parametrize(
    "input_image",
    [
        (os.path.join(test_folder, test_folder, "data-images", "11.jpg")),
        (cv2.imread(os.path.join(test_folder, test_folder, "data-images", "11.jpg"))),
        (Image.open(os.path.join(test_folder, test_folder, "data-images", "11.jpg"))),
    ]
)
def test_object_detection_retinanet_custom_objects(input_image):
    detector = ObjectDetection()
    detector.setModelTypeAsRetinaNet()
    detector.setModelPath(os.path.join(test_folder, "data-models", "retinanet_resnet50_fpn_coco-eeacb38b.pth"))
    detector.loadModel()

    custom = detector.CustomObjects(person=True, cell_phone=True)

    custom_detections = detector.detectObjectsFromImage(input_image=input_image, custom_objects=custom)
    
    for custom_detection in custom_detections:
        assert custom_detection["name"] in ["person", "cell phone"]

    detections = detector.detectObjectsFromImage(input_image=input_image)

    assert len(detections) > len(custom_detections)


@pytest.mark.parametrize(
    "input_image",
    [
        (os.path.join(test_folder, test_folder, "data-images", "11.jpg")),
        (cv2.imread(os.path.join(test_folder, test_folder, "data-images", "11.jpg"))),
        (Image.open(os.path.join(test_folder, test_folder, "data-images", "11.jpg"))),
    ]
)
def test_object_detection_yolov3_custom_objects(input_image):
    detector = ObjectDetection()
    detector.setModelTypeAsYOLOv3()
    detector.setModelPath(os.path.join(test_folder, "data-models", "yolov3.pt"))
    detector.loadModel()

    custom = detector.CustomObjects(person=True, cell_phone=True)

    custom_detections = detector.detectObjectsFromImage(input_image=input_image, custom_objects=custom)
    
    for custom_detection in custom_detections:
        assert custom_detection["name"] in ["person", "cell phone"]

    detections = detector.detectObjectsFromImage(input_image=input_image)

    assert len(detections) > len(custom_detections)


@pytest.mark.parametrize(
    "input_image",
    [
        (os.path.join(test_folder, test_folder, "data-images", "11.jpg")),
        (cv2.imread(os.path.join(test_folder, test_folder, "data-images", "11.jpg"))),
        (Image.open(os.path.join(test_folder, test_folder, "data-images", "11.jpg"))),
    ]
)
def test_object_detection_tiny_yolov3_custom_objects(input_image):
    detector = ObjectDetection()
    detector.setModelTypeAsTinyYOLOv3()
    detector.setModelPath(os.path.join(test_folder, "data-models", "tiny-yolov3.pt"))
    detector.loadModel()

    custom = detector.CustomObjects(person=True, cell_phone=True)

    custom_detections = detector.detectObjectsFromImage(input_image=input_image, custom_objects=custom)
    
    for custom_detection in custom_detections:
        assert custom_detection["name"] in ["person", "cell phone"]

    detections = detector.detectObjectsFromImage(input_image=input_image)

    assert len(detections) > len(custom_detections)


================================================
FILE: test/test_video_object_detection.py
================================================
import os, sys
from typing import List
from numpy import ndarray
from os.path import dirname
from mock import patch
sys.path.insert(1, os.path.join(dirname(dirname(os.path.abspath(__file__)))))

from imageai.Detection import VideoObjectDetection


test_folder = dirname(os.path.abspath(__file__))

video_file = os.path.join(test_folder, "data-videos", "traffic-micro.mp4")
video_file_output = os.path.join(test_folder, "data-videos", "traffic-micro-detected")


class CallbackFunctions:
    def forFrame(frame_number, output_array, output_count, detected_frame):
        assert isinstance(detected_frame, ndarray)
        assert isinstance(frame_number, int)
        assert isinstance(output_array, list)
        assert isinstance(output_array[0], dict)
        assert isinstance(output_array[0]["name"], str)
        assert isinstance(output_array[0]["percentage_probability"], float)
        assert isinstance(output_array[0]["box_points"], list)

        assert isinstance(output_count, dict)
        for a_key in dict(output_count).keys():
            assert isinstance(a_key, str)
            assert isinstance(output_count[a_key], int)

    def forSecond(second_number, output_arrays, count_arrays, average_output_count, detected_frame):
        assert isinstance(detected_frame, ndarray)
        assert isinstance(second_number, int)
        assert isinstance(output_arrays, list)
        assert isinstance(output_arrays[0], list)

        assert isinstance(output_arrays[0][0], dict)
        assert isinstance(output_arrays[0][0]["name"], str)
        assert isinstance(output_arrays[0][0]["percentage_probability"], float)
        assert isinstance(output_arrays[0][0]["box_points"], list)

        assert isinstance(count_arrays, list)
        assert isinstance(count_arrays[0], dict)
        for a_key in dict(count_arrays[0]).keys():
            assert isinstance(a_key, str)
            assert isinstance(count_arrays[0][a_key], int)

        assert isinstance(average_output_count, dict)
        for a_key2 in dict(average_output_count).keys():
            assert isinstance(a_key2, str)
            assert isinstance(average_output_count[a_key2], int)


def delete_cache(files: List[str]):
    for file in files:
        if os.path.isfile(file):
            os.remove(file)


def test_video_detection_retinanet():

    delete_cache([video_file_output + ".mp4"])

    detector = VideoObjectDetection()
    detector.setModelTypeAsRetinaNet()
    detector.setModelPath(model_path=os.path.join(test_folder, "data-models", "retinanet_resnet50_fpn_coco-eeacb38b.pth"))
    detector.loadModel()
    video_path = detector.detectObjectsFromVideo(input_file_path=video_file, output_file_path=video_file_output, save_detected_video=True, frames_per_second=30, log_progress=True)

    assert os.path.exists(video_file_output + ".mp4")
    assert isinstance(video_path, str)

    delete_cache([video_file_output + ".mp4"])


def test_video_detection_retinanet_custom_objects():

    delete_cache([video_file_output + ".mp4"])

    detector = VideoObjectDetection()
    detector.setModelTypeAsRetinaNet()
    detector.setModelPath(model_path=os.path.join(test_folder, "data-models", "retinanet_resnet50_fpn_coco-eeacb38b.pth"))
    detector.loadModel()

    custom_objects = detector.CustomObjects(
        person=True,
        bus=True
    )

    video_path = detector.detectObjectsFromVideo(input_file_path=video_file, output_file_path=video_file_output, save_detected_video=True, frames_per_second=30, log_progress=True, custom_objects=custom_objects)

    assert os.path.exists(video_file_output + ".mp4")
    assert isinstance(video_path, str)

    delete_cache([video_file_output + ".mp4"])


def test_video_detection_yolov3():
    delete_cache([video_file_output + ".mp4"])

    detector = VideoObjectDetection()
    detector.setModelTypeAsYOLOv3()
    detector.setModelPath(model_path=os.path.join(test_folder, "data-models", "yolov3.pt"))
    detector.loadModel()
    video_path = detector.detectObjectsFromVideo(input_file_path=video_file, output_file_path=video_file_output, save_detected_video=True, frames_per_second=30, log_progress=True)

    assert os.path.exists(video_file_output + ".mp4")
    assert isinstance(video_path, str)
    
    delete_cache([video_file_output + ".mp4"])


def test_video_detection_tiny_yolov3():
    delete_cache([video_file_output + ".mp4"])

    detector = VideoObjectDetection()
    detector.setModelTypeAsTinyYOLOv3()
    detector.setModelPath(model_path=os.path.join(test_folder, "data-models", "tiny-yolov3.pt"))
    detector.loadModel()
    video_path = detector.detectObjectsFromVideo(input_file_path=video_file, output_file_path=video_file_output, save_detected_video=True, frames_per_second=30, log_progress=True)

    assert os.path.exists(video_file_output + ".mp4")
    assert isinstance(video_path, str)

    delete_cache([video_file_output + ".mp4"])


def test_video_detection_retinanet_analysis():
    delete_cache([video_file_output + ".mp4"])

    detector = VideoObjectDetection()
    detector.setModelTypeAsRetinaNet()
    detector.setModelPath(model_path=os.path.join(test_folder, "data-models", "retinanet_resnet50_fpn_coco-eeacb38b.pth"))
    detector.loadModel()

    with patch.object(CallbackFunctions, 'forFrame') as frameFunc:
        with patch.object(CallbackFunctions, 'forSecond') as secondFunc:

            video_path = detector.detectObjectsFromVideo(input_file_path=video_file, output_file_path=video_file_output, save_detected_video=True, frames_per_second=30, log_progress=True, per_frame_function=frameFunc, per_second_function=secondFunc, return_detected_frame=True)

            assert os.path.exists(video_file_output + ".mp4")
            assert isinstance(video_path, str)

            frameFunc.assert_called()
            secondFunc.assert_called()

    delete_cache([video_file_output + ".mp4"])