[
  {
    "path": ".github/FUNDING.yml",
    "content": "# These are supported funding model platforms\n\ngithub: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]\npatreon: # Replace with a single Patreon username\nopen_collective: # Replace with a single Open Collective username\nko_fi: # Replace with a single Ko-fi username\ntidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel\ncommunity_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry\nliberapay: # Replace with a single Liberapay username\nissuehunt: # Replace with a single IssueHunt username\notechie: # Replace with a single Otechie username\ncustom: ['https://www.buymeacoffee.com/hardwareai']\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/bug_report.yml",
    "content": "name: Bug Report\ndescription: File a bug report\ntitle: \"[Bug]: \"\nlabels: [bug, triage]\nassignees:\n  - AIWintermuteAI\nbody:\n  - type: markdown\n    attributes:\n      value: |\n        Thanks for taking the time to fill out this bug report! Before you do, however, make sure you have done the following.\n\n  - type: checkboxes\n    id: googled\n    attributes:\n      label: Check if applicable\n      options:\n        - label: I used Google/Bing/other search engines to thoroughly research my question and DID NOT find any suitable answers\n          required: true\n\n        - label: Additionally I went through the issues in this repository/MaixPy/Tensorflow repositories and DID NOT find any suitable answers\n          required: true\n\n  - type: textarea\n    id: what-happened\n    attributes:\n      label: Describe the bug\n      description: A clear and concise description of what the bug is, with screenshots/models/videos if necessary.\n      value: |\n            **To Reproduce**\n            Steps to reproduce the behavior:\n            1. Go to '...'\n            2. Click on '....'\n            3. Scroll down to '....'\n            4. See error\n    validations:\n      required: true\n\n  - type: textarea\n    id: what-expected\n    attributes:\n      label: Expected behavior\n      description: A clear and concise description of what you expected to happen.\n    validations:\n      required: true\n\n  - type: textarea\n    id: platform\n    attributes:\n      label: Platform\n      description: What platform are you running the code on.\n      value: |\n            - Device: [e.g. Raspberry Pi 4 or M5 StickV]\n            - OS/firmware: [e.g. Raspbian OS 32bit kernel version ...]\n            - Version/commit number of aXeleRate: [e.g. d1816f5]\n    validations:\n      required: true\n\n  - type: textarea\n    id: logs\n    attributes:\n      label: Relevant log output\n      description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.\n      render: shell\n\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/config.yml",
    "content": "blank_issues_enabled: false\ncontact_links:\n  - name: Google\n    url: https://google.com/\n    about: Please find answers to general questions,i.e \"what are anchors\", \"how is mAP calculated\", \"my cat coughing up fur can you help please\" HERE."
  },
  {
    "path": ".github/ISSUE_TEMPLATE/feature_request.yml",
    "content": "name: Feature request\ndescription: Suggest an idea for this project\ntitle: \"[Feature request]: \"\nlabels: [enhancement, help wanted]\n\nbody:\n  - type: markdown\n    attributes:\n      value: |\n        Thanks for interest in improving aXeleRate! It is a personal project of mine, which I continually develop with help of other volunteers. \n\n  - type: checkboxes\n    id: boxes\n    attributes:\n      label: Choose an option\n      options:\n        - label: I'd like to contribute to development by making a PR.\n        - label: Alternatively I could consider a small beer donation to the developer as token of my appreciation. \n\n  - type: textarea\n    id: feature\n    attributes:\n      label: Describe the desired feature\n      description: A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]. Add screenshots/models/videos if necessary.\n    validations:\n      required: true\n\n  - type: textarea\n    id: what-expected\n    attributes:\n      label: Describe the solution you'd like\n      description: A clear and concise description of what you want to happen.\n    validations:\n      required: true\n\n  - type: textarea\n    id: logs\n    attributes:\n      label: Relevant log output\n      description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.\n      render: shell"
  },
  {
    "path": ".github/workflows/python-publish.yml",
    "content": "# This workflows will upload a Python Package using Twine when a release is created\n# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries\n\nname: Upload Python Package\n\non:\n  release:\n    types: [created]\n\njobs:\n  deploy:\n\n    runs-on: ubuntu-latest\n\n    steps:\n    - uses: actions/checkout@v2\n    - name: Set up Python\n      uses: actions/setup-python@v2\n      with:\n        python-version: '3.x'\n    - name: Install dependencies\n      run: |\n        python -m pip install --upgrade pip\n        pip install setuptools wheel twine\n    - name: Build and publish\n      env:\n        TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}\n        TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}\n      run: |\n        python setup.py sdist bdist_wheel\n        twine upload dist/*\n"
  },
  {
    "path": ".gitignore",
    "content": "__pycache__/\naxelerate/networks/common_utils/ncc\naxelerate/networks/common_utils/ncc_linux_x86_64.tar.xz\naxelerate.egg-info/\nbuild/\ndist/\n_configs/\nprojects/\nlogs/\n*.tflite\n*.h5\n*.kmodel\n*.txt\n*.pyc\n.vscode/\n"
  },
  {
    "path": "LICENSE",
    "content": "MIT License\n\nCopyright (c) 2020 Dmitry Maslov\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n"
  },
  {
    "path": "README.md",
    "content": "<h1 align=\"center\">\n  <img src=\"https://raw.githubusercontent.com/AIWintermuteAI/aXeleRate/master/resources/logo.png\" alt=\"aXeleRate\" width=\"350\">\n</h1>\n\n<h3 align=\"center\">Keras-based framework for AI on the Edge</h3>\n\n<hr>\n<p align=\"center\">\naXeleRate streamlines training and converting computer vision models to be run on various platforms with hardware acceleration. It is optimized for both the workflow on local machine(Ubuntu 18.04/20.04 - other Linux distributions might work, but not tested. Mac OS/Windows  are not supported) and on Google Colab. Currently supports trained model conversion to: .kmodel(K210), .tflite format(full integer and dynamic range quantization support available), .onnx formats. Experimental support: Google Edge TPU.\n</p>\n\n<table>\n  <tr>\n    <td>Standford Dog Breed Classification Dataset NASNetMobile backend + Classifier <a href=\"https://colab.research.google.com/github/AIWintermuteAI/aXeleRate/blob/master/resources/aXeleRate_standford_dog_classifier.ipynb\">\n  <img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/>\n</a> </td>\n     <td>PASCAL-VOC 2012 Object Detection Dataset MobileNet1_0 backend + YOLOv3 <a href=\"https://colab.research.google.com/github/AIWintermuteAI/aXeleRate/blob/master/resources/aXeleRate_pascal20_detector.ipynb\">\n  <img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/>\n</a> </td>\n     <td>Human parsing Semantic Segmentation MobileNet5_0 backend + Segnet-Basic <a href=\"https://colab.research.google.com/github/AIWintermuteAI/aXeleRate/blob/master/resources/aXeleRate_human_segmentation.ipynb\">\n  <img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/>\n</a> </td>\n  </tr>\n  <tr>\n    <td><img src=\"https://raw.githubusercontent.com/AIWintermuteAI/aXeleRate/master/resources/n02106550_7003.jpg\" width=300 height=300></td>\n    <td><img src=\"https://raw.githubusercontent.com/AIWintermuteAI/aXeleRate/master/resources/2009_001349.jpg\" width=300 height=300></td>\n    <td><img src=\"https://raw.githubusercontent.com/AIWintermuteAI/aXeleRate/master/resources/66.jpg\" width=250 height=350></td>\n  </tr>\n </table>\n\n### aXeleRate\n\nTL;DR\n\naXeleRate is meant for people who need to run computer vision applications(image classification, object detection, semantic segmentation) on the edge devices with hardware acceleration. It has easy configuration process through config file or config dictionary(for Google Colab) and automatic conversion of the best model for training session into the required file format. You put the properly formatted data in, start the training script and (hopefully) come back to see a converted model that is ready for deployment on your device!\n\n### :wrench: Key Features\n  - Supports multiple computer vision models: object detection(YOLOv3), image classification, semantic segmentation(SegNet-basic)\n  - Different feature extractors to be used with the above network types: Full Yolo, Tiny Yolo, MobileNet, SqueezeNet, NASNetMobile, ResNet50, and DenseNet121. \n  - Automatic conversion of the best model for the training session. aXeleRate will download the suitable converter automatically.\n  - Currently supports trained model conversion to: .kmodel(K210), .tflite format(full integer and dynamic range quantization support available), .tflite(Edge TPU), .onnx(for later on-device optimization with TensorRT).\n  - Model version control made easier. Keras model files and converted models are saved in the project folder, grouped by the training date. Training history is saved as .png graph in the model folder.\n  - Two modes of operation: locally, with train.py script and .json config file and remote, tailored for Google Colab, with module import and dictionary config.\n\n### 💾 Install\n\nStable version:\n\npip install axelerate\n\nDaily development version:\n\npip install git+https://github.com/AIWintermuteAI/aXeleRate\n\nIf installing in Anaconda environment, make sure you have necessary CUDA/CUDNN version installed in that environment to use GPU for training.\n\n###  :question: F.A.Q.\n\nQ: I trained a YOLO model, but it doesn't run on K210 with MaixPy firmware.\n\nA: While there can be a lot of reasons for that (memory constrains is one of them), master branch of aXeleRate trains YOLOv3 model, which shows better convergence, especially for datasets with smaller objects and non-square image sizes. There is a [PR for adding YOLOv3 support](https://github.com/sipeed/MaixPy/pull/451) to MaixPy (where you can also see my comparisons of the two), but it is not merged at the moment. There are two options you can choose to train the model, that can run on K210 MaixPy:\n- switch to legacy branch on aXeleRate with ```git switch legacy-yolov2``` (if you are running the training locally you will also need to re-install aXeleRate after that with ```pip install -e .```. The trained model should be compatible with current MaixPy.\n- use [this pre-compiled firmware](https://drive.google.com/file/d/1q1BcWA8GiTQ_3Q9vYkSysRvGD62K2zh4/view?usp=sharing) with experimental support for YOLOv3 (examples included) or compile your own from [this PR's branch](https://github.com/sipeed/MaixPy/pull/451).\n\n###  :computer: Project Story\n\naXeleRate started as a personal project of mine for training YOLOv2 based object detection networks and exporting them to .kmodel format to be run on K210 chip. I also needed to train image classification networks. And sometimes I needed to run inference with Tensorflow Lite on Raspberry Pi. As a result I had a whole bunch of disconnected scripts each had somewhat overlapping functionality. So, I decided to fix that and share the results with other people who might have similar workflows.\n\naXeleRate is still work in progress project. I will be making some changes from time to time and if you find it useful and can contribute, PRs are very much welcome!\n\n:ballot_box_with_check: TODO list:\n\nTODO list is moving to Github Projects!\n\n### Acknowledgements\n\n  - YOLOv2 Keras code jeongjoonsup and Ngoc Anh Huynh https://github.com/experiencor/keras-yolo2 https://github.com/penny4860/Yolo-digit-detector\n  - SegNet Keras code Divam Gupta https://github.com/divamgupta/image-segmentation-keras\n  - Big Thank You to creator/maintainers of Keras/Tensorflow\n\n### Donation\nRecently there were a few people that wanted to make a small donation to aXeleRate, because it helped them with their work. I was caught off guard with the question about donations :) I didn't have anything set up, so I quickly created a page for them to be able to send money. If aXeleRate was useful in your work, you can donate a pizza or a beer to the project here https://www.buymeacoffee.com/hardwareai . But times are tough now(and always), so if you don't have much to spare, don't feel guilty! aXeleRate is totally open source and free to use.\n"
  },
  {
    "path": "axelerate/__init__.py",
    "content": "from .train import setup_training\nfrom .infer import setup_inference\nfrom .evaluate import setup_evaluation\n"
  },
  {
    "path": "axelerate/evaluate.py",
    "content": "import os\r\nimport argparse\r\nimport json\r\nimport cv2\r\nimport numpy as np\r\nimport matplotlib\r\nimport matplotlib.pyplot as plt\r\nimport matplotlib.image as mpimg\r\n\r\nfrom tensorflow.keras import backend as K \r\n\r\nfrom axelerate.networks.yolo.frontend import create_yolo\r\nfrom axelerate.networks.yolo.backend.utils.box import draw_boxes\r\nfrom axelerate.networks.yolo.backend.utils.annotation import parse_annotation\r\nfrom axelerate.networks.yolo.backend.utils.eval.fscore import count_true_positives, calc_score\r\nfrom axelerate.networks.segnet.frontend_segnet import create_segnet\r\nfrom axelerate.networks.classifier.frontend_classifier import get_labels, create_classifier\r\n\r\nK.clear_session()\r\n\r\nDEFAULT_THRESHOLD = 0.3\r\n\r\ndef save_report(config, report, report_file):\r\n    with open(report_file, 'w') as outfile:\r\n        outfile.write(\"REPORT\\n\")\r\n        outfile.write(str(report))\r\n        outfile.write(\"\\nCONFIG\\n\")\r\n        outfile.write(json.dumps(config, indent=4, sort_keys=False))\r\n\r\ndef show_image(filename):\r\n    image = mpimg.imread(filename)\r\n    plt.figure()\r\n    plt.imshow(image)\r\n    plt.show(block=False)\r\n    plt.pause(1)\r\n    plt.close()\r\n    print(filename)\r\n\r\ndef prepare_image(img_path, network):\r\n    orig_image = cv2.imread(img_path)\r\n    input_image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB) \r\n    input_image = cv2.resize(input_image, (network.input_size[1], network.input_size[0]))\r\n    input_image = network.norm(input_image)\r\n    input_image = np.expand_dims(input_image, 0)\r\n    return orig_image, input_image\r\n\r\ndef setup_evaluation(config, weights, threshold = None):\r\n    try:\r\n        matplotlib.use('TkAgg')\r\n    except:\r\n        pass\r\n    #added for compatibility with < 0.5.7 versions\r\n    try:\r\n        input_size = config['model']['input_size'][:]\r\n    except:\r\n        input_size = [config['model']['input_size'],config['model']['input_size']]\r\n\r\n    \"\"\"make directory to save inference results \"\"\"\r\n    dirname = os.path.dirname(weights)\r\n\r\n    if config['model']['type']=='Classifier':\r\n        print('Classifier')  \r\n\r\n        if config['model']['labels']:\r\n            labels = config['model']['labels']\r\n        else:\r\n            labels = get_labels(config['train']['train_image_folder'])\r\n\r\n        # 1.Construct the model \r\n        classifier = create_classifier(config['model']['architecture'],\r\n                                       labels,\r\n                                       input_size,\r\n                                       config['model']['fully-connected'],\r\n                                       config['model']['dropout'])\r\n\r\n        # 2. Load the pretrained weights\r\n        classifier.load_weights(weights)\r\n\r\n        report, cm = classifier.evaluate(config['train']['valid_image_folder'], 16)\r\n        save_report(config, report, os.path.join(dirname, 'report.txt'))\r\n\r\n    if config['model']['type']=='SegNet':\r\n        print('Segmentation')           \r\n        # 1. Construct the model \r\n        segnet = create_segnet(config['model']['architecture'],\r\n                                   input_size,\r\n                                   config['model']['n_classes'])   \r\n        # 2. Load the pretrained weights (if any) \r\n        segnet.load_weights(weights)\r\n        report = segnet.evaluate(config['train']['valid_image_folder'], config['train']['valid_annot_folder'], 2)\r\n        save_report(config, report, os.path.join(dirname, 'report.txt'))\r\n        print(report)\r\n\r\n    if config['model']['type']=='Detector':\r\n        # 2. create yolo instance & predict\r\n        yolo = create_yolo(config['model']['architecture'],\r\n                           config['model']['labels'],\r\n                           input_size,\r\n                           config['model']['anchors'],\r\n                           config['model']['obj_thresh'],\r\n                           config['model']['iou_thresh'],\r\n                           config['model']['coord_scale'],\r\n                           config['model']['object_scale'],\r\n                           config['model']['no_object_scale'],                           \r\n                           config['weights']['backend'])    \r\n        yolo.load_weights(weights)\r\n\r\n        # 3. read image\r\n        annotations = parse_annotation(config['train']['valid_annot_folder'],\r\n                                       config['train']['valid_image_folder'],\r\n                                       config['model']['labels'],\r\n                                       is_only_detect=config['train']['is_only_detect'])\r\n\r\n        threshold = threshold if threshold else config['model']['obj_thresh']\r\n\r\n        dirname = os.path.join(os.path.dirname(weights), 'Inference_results') #temporary\r\n\r\n        if os.path.isdir(dirname):\r\n            print(\"Folder {} is already exists. Image files in directory might be overwritten\".format(dirname))\r\n        else:\r\n            print(\"Folder {} is created.\".format(dirname))\r\n            os.makedirs(dirname)\r\n\r\n        n_true_positives = 0\r\n        n_truth = 0\r\n        n_pred = 0\r\n        inference_time = []\r\n\r\n        for i in range(len(annotations)):\r\n            img_path = annotations.fname(i)\r\n            img_fname = os.path.basename(img_path)\r\n            true_boxes = annotations.boxes(i)\r\n            true_labels = annotations.code_labels(i)\r\n\r\n            orig_image, input_image = prepare_image(img_path, yolo)\r\n            height, width = orig_image.shape[:2]\r\n            prediction_time, boxes, scores = yolo.predict(input_image, height, width, float(threshold))\r\n            classes = np.argmax(scores, axis=1) if len(scores) > 0 else []\r\n            inference_time.append(prediction_time)\r\n\r\n            # 4. save detection result\r\n            orig_image = draw_boxes(orig_image, boxes, scores, classes, config['model']['labels'])\r\n            output_path = os.path.join(dirname, os.path.split(img_fname)[-1])\r\n            cv2.imwrite(output_path, orig_image)\r\n            print(\"{}-boxes are detected. {} saved.\".format(len(boxes), output_path))\r\n            n_true_positives += count_true_positives(boxes, true_boxes, classes, true_labels)\r\n            n_truth += len(true_boxes)\r\n            n_pred += len(boxes)\r\n\r\n        report = calc_score(n_true_positives, n_truth, n_pred)\r\n        save_report(config, report, os.path.join(dirname, 'report.txt'))\r\n        print(report)\r\n\r\n        if len(inference_time)>1:\r\n            print(\"Average prediction time:{} ms\".format(sum(inference_time[1:])/len(inference_time[1:])))\r\n\r\nif __name__ == '__main__':\r\n    # 1. extract arguments\r\n\r\n    argparser = argparse.ArgumentParser(\r\n        description='Run evaluation script')\r\n\r\n    argparser.add_argument(\r\n        '-c',\r\n        '--config',\r\n        help='path to configuration file')\r\n\r\n    argparser.add_argument(\r\n        '-t',\r\n        '--threshold',\r\n        help='detection threshold')\r\n\r\n    argparser.add_argument(\r\n        '-w',\r\n        '--weights',\r\n        help='trained weight files')\r\n\r\n    args = argparser.parse_args()\r\n    with open(args.config) as config_buffer:\r\n        config = json.loads(config_buffer.read())\r\n    setup_evaluation(config, args.weights, args.threshold)\r\n"
  },
  {
    "path": "axelerate/infer.py",
    "content": "import glob\r\nimport os\r\nimport argparse\r\nimport json\r\nimport cv2\r\nimport numpy as np\r\nimport matplotlib\r\nimport matplotlib.pyplot as plt\r\nimport matplotlib.image as mpimg\r\n\r\nfrom tensorflow.keras import backend as K \r\n\r\nfrom tensorflow.keras import backend as K \r\nfrom axelerate.networks.yolo.frontend import create_yolo\r\nfrom axelerate.networks.yolo.backend.utils.box import draw_boxes\r\nfrom axelerate.networks.segnet.frontend_segnet import create_segnet\r\nfrom axelerate.networks.segnet.predict import visualize_segmentation\r\nfrom axelerate.networks.classifier.frontend_classifier import get_labels, create_classifier\r\n\r\nK.clear_session()\r\n    \r\ndef show_image(filename):\r\n    image = mpimg.imread(filename)\r\n    plt.figure()\r\n    plt.imshow(image)\r\n    plt.show(block=False)\r\n    plt.pause(1)\r\n    plt.close()\r\n    print(filename)\r\n\r\ndef prepare_image(img_path, network, input_size):\r\n    orig_image = cv2.imread(img_path)\r\n    input_image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB) \r\n    input_image = cv2.resize(input_image, (input_size[1], input_size[0]))\r\n    input_image = network.norm(input_image)\r\n    input_image = np.expand_dims(input_image, 0)\r\n    return orig_image, input_image\r\n\r\ndef find_imgs(folder):\r\n    ext_list = ['/**/*.jpg', '/**/*.jpeg', '/**/*.png', '/**/*.JPG', '/**/*.JPEG']\r\n    image_files_list = []\r\n    image_search = lambda ext : glob.glob(folder + ext, recursive=True)\r\n    for ext in ext_list: image_files_list.extend(image_search(ext))\r\n    return image_files_list\r\n\r\ndef setup_inference(config, weights, threshold = None, folder = None):\r\n    try:\r\n        matplotlib.use('TkAgg')\r\n    except:\r\n        pass\r\n\r\n    #added for compatibility with < 0.5.7 versions\r\n    try:\r\n        input_size = config['model']['input_size'][:]\r\n    except:\r\n        input_size = [config['model']['input_size'], config['model']['input_size']]\r\n\r\n    \"\"\"make directory to save inference results \"\"\"\r\n    dirname = os.path.join(os.path.dirname(weights), 'Inference_results')\r\n    if os.path.isdir(dirname):\r\n        print(\"Folder {} is already exists. Image files in directory might be overwritten\".format(dirname))\r\n    else:\r\n        print(\"Folder {} is created.\".format(dirname))\r\n        os.makedirs(dirname)\r\n\r\n    if config['model']['type']=='Classifier':\r\n        print('Classifier')    \r\n        if config['model']['labels']:\r\n            labels = config['model']['labels']\r\n        else:\r\n            labels = get_labels(config['train']['train_image_folder'])\r\n            \r\n        # 1.Construct the model \r\n        classifier = create_classifier(config['model']['architecture'],\r\n                                       labels,\r\n                                       input_size,\r\n                                       config['model']['fully-connected'],\r\n                                       config['model']['dropout'])  \r\n                                        \r\n        # 2. Load the trained weights\r\n        classifier.load_weights(weights)\r\n        \r\n        font = cv2.FONT_HERSHEY_SIMPLEX\r\n        background_color = (70, 120, 70) # grayish green background for text\r\n        text_color = (255, 255, 255)   # white text\r\n\r\n        file_folder = folder if folder else config['train']['valid_image_folder']\r\n\r\n        image_files_list = find_imgs(file_folder)\r\n        \r\n        inference_time = []\r\n        for filepath in image_files_list:\r\n            output_path = os.path.join(dirname, os.path.basename(filepath))\r\n            orig_image, input_image = prepare_image(filepath, classifier, input_size)\r\n            prediction_time, prob, img_class = classifier.predict(input_image)\r\n            inference_time.append(prediction_time)\r\n            \r\n            text = \"{}:{:.2f}\".format(img_class, prob)\r\n\r\n            # label shape and colorization\r\n            size = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)[0]\r\n            left = 10\r\n            top = 35 - size[1]\r\n            right = left + size[0]\r\n            bottom = top + size[1]\r\n\r\n            # set up the colored rectangle background for text\r\n            cv2.rectangle(orig_image, (left - 1, top - 5),(right + 1, bottom + 1), background_color, -1)\r\n            # set up text\r\n            cv2.putText(orig_image, text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, text_color, 1)\r\n            cv2.imwrite(output_path, orig_image)\r\n            show_image(output_path)\r\n            print(\"{}:{}\".format(img_class, prob))\r\n\r\n        if len(inference_time)>1:\r\n            print(\"Average prediction time:{} ms\".format(sum(inference_time[1:])/len(inference_time[1:])))\r\n\r\n    if config['model']['type']=='SegNet':\r\n        print('Segmentation')           \r\n        # 1. Construct the model \r\n        segnet = create_segnet(config['model']['architecture'],\r\n                                   input_size,\r\n                                   config['model']['n_classes'])   \r\n        # 2. Load the trained weights\r\n        segnet.load_weights(weights)\r\n\r\n        file_folder = folder if folder else config['train']['valid_image_folder']\r\n        image_files_list = find_imgs(file_folder)\r\n\r\n        inference_time = []\r\n        for filepath in image_files_list:\r\n\r\n            orig_image, input_image = prepare_image(filepath, segnet, input_size)\r\n            out_fname = os.path.join(dirname, os.path.basename(filepath))\r\n            prediction_time, output_array = segnet.predict(input_image)\r\n            seg_img = visualize_segmentation(output_array, orig_image, segnet.n_classes, overlay_img = True)\r\n            cv2.imwrite(out_fname, seg_img)\r\n            show_image(out_fname)\r\n\r\n    if config['model']['type']=='Detector':\r\n        # 2. create yolo instance & predict\r\n        yolo = create_yolo(config['model']['architecture'],\r\n                           config['model']['labels'],\r\n                           input_size,\r\n                           config['model']['anchors'],\r\n                           config['model']['obj_thresh'],\r\n                           config['model']['iou_thresh'],\r\n                           config['model']['coord_scale'],\r\n                           config['model']['object_scale'],\r\n                           config['model']['no_object_scale'],                           \r\n                           config['weights']['backend'])                           \r\n        yolo.load_weights(weights)\r\n        \r\n        file_folder = folder if folder else config['train']['valid_image_folder']\r\n        threshold = threshold if threshold else config['model']['obj_thresh']\r\n        image_files_list = find_imgs(file_folder)\r\n\r\n        inference_time = []\r\n        for filepath in image_files_list:\r\n\r\n            img_fname = os.path.basename(filepath)\r\n            orig_image, input_image = prepare_image(filepath, yolo, input_size)\r\n            height, width = orig_image.shape[:2]\r\n\r\n            prediction_time, boxes, scores = yolo.predict(input_image, height, width, float(threshold))\r\n            classes = np.argmax(scores, axis=1) if len(scores) > 0 else []\r\n            print(classes)\r\n            inference_time.append(prediction_time)\r\n\r\n            # 4. save detection result\r\n            orig_image = draw_boxes(orig_image, boxes, scores, classes, config['model']['labels'])\r\n            output_path = os.path.join(dirname, os.path.basename(filepath))\r\n            cv2.imwrite(output_path, orig_image)\r\n            print(\"{}-boxes are detected. {} saved.\".format(len(boxes), output_path))\r\n            show_image(output_path)\r\n\r\n        if len(inference_time)>1:\r\n            print(\"Average prediction time:{} ms\".format(sum(inference_time[1:])/len(inference_time[1:])))\r\n\r\nif __name__ == '__main__':\r\n    # 1. extract arguments\r\n\r\n    argparser = argparse.ArgumentParser(\r\n        description='Run inference script')\r\n\r\n    argparser.add_argument(\r\n        '-c',\r\n        '--config',\r\n        help='path to configuration file')\r\n\r\n    argparser.add_argument(\r\n        '-t',\r\n        '--threshold',\r\n        help='detection threshold')\r\n\r\n    argparser.add_argument(\r\n        '-w',\r\n        '--weights',\r\n        help='trained weight files')\r\n\r\n    argparser.add_argument(\r\n        '-f',\r\n        '--folder',\r\n        help='folder with image files to run inference on')   \r\n\r\n    args = argparser.parse_args()\r\n    \r\n    if args.create_dataset:\r\n        from pascal_voc_writer import Writer\r\n        \r\n    with open(args.config) as config_buffer:\r\n        config = json.loads(config_buffer.read())\r\n    setup_inference(config, args.weights, args.threshold, args.folder)\r\n"
  },
  {
    "path": "axelerate/networks/__init__.py",
    "content": ""
  },
  {
    "path": "axelerate/networks/classifier/__init__.py",
    "content": ""
  },
  {
    "path": "axelerate/networks/classifier/batch_gen.py",
    "content": "## Code heavily adapted from:\n## *https://github.com/keras-team/keras-preprocessing/blob/master/keras_preprocessing/\n\n\"\"\"Utilities for real-time data augmentation on image data. \"\"\"\n\nfrom .directory_iterator import DirectoryIterator\nfrom axelerate.networks.common_utils.augment import process_image_classification\nfrom tensorflow.keras.utils import Sequence\nimport cv2\nimport os\n\ndef create_datagen(img_folder, batch_size, input_size, project_folder, augment, norm):\n\n    datagen = ImageDataAugmentor(preprocess_input = norm,\n                                 process_image = process_image_classification,\n                                 augment = augment)\n    \n    generator = datagen.flow_from_directory(img_folder,\n                                        target_size = input_size,\n                                        color_mode = 'rgb',\n                                        batch_size = batch_size,\n                                        class_mode = 'categorical', \n                                        shuffle = augment)\n    if project_folder:             \n        labels = (generator.class_indices)\n        labels = dict((v,k) for k,v in labels.items())\n        fo = open(os.path.join(project_folder,\"labels.txt\"), \"w\")\n        for k,v in labels.items():\n            print(v)\n            fo.write(v+\"\\n\")\n        fo.close()\n    return generator\n    \n    \nclass ImageDataAugmentor(Sequence):\n    \"\"\"Generate batches of tensor image data with real-time data augmentation.\n    The data will be looped over (in batches).\n    # Arguments\n        preprocessing_input: function that will be implied on each input.\n            The function will run after the image is resized and augmented.\n            The function should take one argument:\n            one image, and should output a Numpy tensor with the same shape.\n        augment: augmentations passed as albumentations or imgaug transformation \n            or sequence of transformations.     \n        data_format: Image data format,\n            either \"channels_first\" or \"channels_last\".\n            \"channels_last\" mode means that the images should have shape\n            `(samples, height, width, channels)`,\n            \"channels_first\" mode means that the images should have shape\n            `(samples, channels, height, width)`.\n            It defaults to the `image_data_format` value found in your\n            Keras config file at `~/.keras/keras.json`.\n            If you never set it, then it will be \"channels_last\".\n    \"\"\"\n\n    def __init__(self,\n                 augment = False,\n                 process_image=None,\n                 preprocess_input=None,\n                 data_format='channels_last'):\n               \n        self.augment = augment\n        self.process_image = process_image\n        self.preprocess_input = preprocess_input\n\n        if data_format not in {'channels_last', 'channels_first'}:\n            raise ValueError(\n                '`data_format` should be `\"channels_last\"` '\n                '(channel after row and column) or '\n                '`\"channels_first\"` (channel before row and column). '\n                'Received: %s' % data_format)\n        self.data_format = data_format\n        if data_format == 'channels_first':\n            self.channel_axis = 1\n            self.row_axis = 2\n            self.col_axis = 3\n        if data_format == 'channels_last':\n            self.channel_axis = 3\n            self.row_axis = 1\n            self.col_axis = 2\n\n    def flow_from_directory(self,\n                            directory,\n                            target_size=(256, 256),\n                            color_mode='rgb',\n                            classes=None,\n                            class_mode='categorical',\n                            batch_size=32,\n                            shuffle=True,\n                            seed=None,\n                            save_to_dir=None,\n                            save_prefix='',\n                            save_format='png',\n                            follow_links=False,\n                            subset=None,\n                            interpolation=cv2.INTER_NEAREST):\n        \"\"\"Takes the path to a directory & generates batches of augmented data.\n        # Arguments\n            directory: string, path to the target directory.\n                It should contain one subdirectory per class.\n                Any PNG, JPG, BMP, PPM or TIF images\n                inside each of the subdirectories directory tree\n                will be included in the generator.\n                See [this script](\n                https://gist.github.com/fchollet/0830affa1f7f19fd47b06d4cf89ed44d)\n                for more details.\n            target_size: Tuple of integers `(height, width)`,\n                default: `(256, 256)`.\n                The dimensions to which all images found will be resized.\n            color_mode: One of \"gray\", \"rgb\", \"rgba\". Default: \"rgb\".\n                Whether the images will be converted to\n                have 1, 3, or 4 channels.\n            classes: Optional list of class subdirectories\n                (e.g. `['dogs', 'cats']`). Default: None.\n                If not provided, the list of classes will be automatically\n                inferred from the subdirectory names/structure\n                under `directory`, where each subdirectory will\n                be treated as a different class\n                (and the order of the classes, which will map to the label\n                indices, will be alphanumeric).\n                The dictionary containing the mapping from class names to class\n                indices can be obtained via the attribute `class_indices`.\n            class_mode: One of \"categorical\", \"binary\", \"sparse\",\n                \"input\", or None. Default: \"categorical\".\n                Determines the type of label arrays that are returned:\n                - \"categorical\" will be 2D one-hot encoded labels,\n                - \"binary\" will be 1D binary labels,\n                    \"sparse\" will be 1D integer labels,\n                - \"input\" will be images identical\n                    to input images (mainly used to work with autoencoders).\n                - If None, no labels are returned\n                  (the generator will only yield batches of image data,\n                  which is useful to use with `model.predict_generator()`).\n                  Please note that in case of class_mode None,\n                  the data still needs to reside in a subdirectory\n                  of `directory` for it to work correctly.\n            batch_size: Size of the batches of data (default: 32).\n            shuffle: Whether to shuffle the data (default: True)\n                If set to False, sorts the data in alphanumeric order.\n            seed: Optional random seed for shuffling and transformations.\n            save_to_dir: None or str (default: None).\n                This allows you to optionally specify\n                a directory to which to save\n                the augmented pictures being generated\n                (useful for visualizing what you are doing).\n            save_prefix: Str. Prefix to use for filenames of saved pictures\n                (only relevant if `save_to_dir` is set).\n            save_format: One of \"png\", \"jpeg\"\n                (only relevant if `save_to_dir` is set). Default: \"png\".\n            follow_links: Whether to follow symlinks inside\n                class subdirectories (default: False).\n            subset: Subset of data (`\"training\"` or `\"validation\"`) if\n                `validation_split` is set in `ImageDataAugmentor`.\n            interpolation: Interpolation method used to\n                resample the image if the\n                target size is different from that of the loaded image.\n                Supported methods are `\"nearest\"`, `\"bilinear\"`,\n                and `\"bicubic\"`.\n                If PIL version 1.1.3 or newer is installed, `\"lanczos\"` is also\n                supported. If PIL version 3.4.0 or newer is installed,\n                `\"box\"` and `\"hamming\"` are also supported.\n                By default, `\"nearest\"` is used.\n        # Returns\n            A `DirectoryIterator` yielding tuples of `(x, y)`\n                where `x` is a numpy array containing a batch\n                of images with shape `(batch_size, *target_size, channels)`\n                and `y` is a numpy array of corresponding labels.\n        \"\"\"\n        return DirectoryIterator(\n            directory,\n            self,\n            target_size=target_size,\n            color_mode=color_mode,\n            classes=classes,\n            class_mode=class_mode,\n            data_format=self.data_format,\n            batch_size=batch_size,\n            shuffle=shuffle,\n            seed=seed,\n            save_to_dir=save_to_dir,\n            save_prefix=save_prefix,\n            save_format=save_format,\n            follow_links=follow_links,\n            subset=subset,\n            interpolation=interpolation\n        )\n    \n\n    def transform_image(self, image, desired_w, desired_h):\n        \"\"\"\n        Transforms an image by first augmenting and then standardizing\n        \"\"\"\n        image = self.process_image(image, desired_w, desired_h, self.augment)\n        image = self.preprocess_input(image)\n        \n        return image\n"
  },
  {
    "path": "axelerate/networks/classifier/directory_iterator.py",
    "content": "\"\"\"Utilities for real-time data augmentation on image data.\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport os\nimport multiprocessing.pool\nfrom six.moves import range\n\nimport numpy as np\nimport cv2\n\nfrom .iterator import BatchFromFilesMixin, Iterator\nfrom .utils import _list_valid_filenames_in_directory\n\n\nclass DirectoryIterator(BatchFromFilesMixin, Iterator):\n    \"\"\"Iterator capable of reading images from a directory on disk.\n\n    # Arguments\n        directory: string, path to the directory to read images from.\n            Each subdirectory in this directory will be\n            considered to contain images from one class,\n            or alternatively you could specify class subdirectories\n            via the `classes` argument.\n        image_data_generator: Instance of `ImageDataAugmentor`\n            to use for random transformations and normalization.\n        target_size: tuple of integers, dimensions to resize input images to.\n        color_mode: One of `\"rgb\"`, `\"rgba\"`, `\"gray\"`.\n            Color mode to read images.\n        classes: Optional list of strings, names of subdirectories\n            containing images from each class (e.g. `[\"dogs\", \"cats\"]`).\n            It will be computed automatically if not set.\n        class_mode: Mode for yielding the targets:\n            `\"binary\"`: binary targets (if there are only two classes),\n            `\"categorical\"`: categorical targets,\n            `\"sparse\"`: integer targets,\n            `\"input\"`: targets are images identical to input images (mainly\n                used to work with autoencoders),\n            `None`: no targets get yielded (only input images are yielded).\n        batch_size: Integer, size of a batch.\n        shuffle: Boolean, whether to shuffle the data between epochs.\n            If set to False, sorts the data in alphanumeric order.\n        seed: Random seed for data shuffling.\n        data_format: String, one of `channels_first`, `channels_last`.\n        save_to_dir: Optional directory where to save the pictures\n            being yielded, in a viewable format. This is useful\n            for visualizing the random transformations being\n            applied, for debugging purposes.\n        save_prefix: String prefix to use for saving sample\n            images (if `save_to_dir` is set).\n        save_format: Format to use for saving sample images\n            (if `save_to_dir` is set).\n        follow_links: boolean,follow symbolic links to subdirectories\n        subset: Subset of data (`\"training\"` or `\"validation\"`) if\n            validation_split is set in ImageDataAugmentor.\n        interpolation: Interpolation method used to\n            resample the image if the\n            target size is different from that of the loaded image.\n            Supported methods are `\"cv2.INTER_NEAREST\"`, `\"cv2.INTER_LINEAR\"`, `\"cv2.INTER_AREA\"`, `\"cv2.INTER_CUBIC\"`\n            and `\"cv2.INTER_LANCZOS4\"`\n            By default, `\"cv2.INTER_NEAREST\"` is used.\n        dtype: Dtype to use for generated arrays.\n    \"\"\"\n    allowed_class_modes = {'categorical', 'binary', 'sparse', 'input', None}\n\n    def __init__(self,\n                 directory,\n                 image_data_generator,\n                 target_size=(256, 256),\n                 color_mode='rgb',\n                 classes=None,\n                 class_mode='categorical',\n                 batch_size=32,\n                 shuffle=True,\n                 seed=None,\n                 data_format='channels_last',\n                 save_to_dir=None,\n                 save_prefix='',\n                 save_format='png',\n                 follow_links=False,\n                 subset=None,\n                 interpolation=cv2.INTER_NEAREST,\n                 dtype='float32'):\n        super(DirectoryIterator, self).set_processing_attrs(image_data_generator,\n                                                            target_size,\n                                                            color_mode,\n                                                            data_format,\n                                                            save_to_dir,\n                                                            save_prefix,\n                                                            save_format,\n                                                            subset,\n                                                            interpolation)\n        self.directory = directory\n        self.classes = classes\n        if class_mode not in self.allowed_class_modes:\n            raise ValueError('Invalid class_mode: {}; expected one of: {}'\n                             .format(class_mode, self.allowed_class_modes))\n        self.class_mode = class_mode\n        self.dtype = dtype\n        # First, count the number of samples and classes.\n        self.samples = 0\n\n        if not classes:\n            classes = []\n            for subdir in sorted(os.listdir(directory)):\n                if os.path.isdir(os.path.join(directory, subdir)):\n                    classes.append(subdir)\n        self.num_classes = len(classes)\n        self.class_indices = dict(zip(classes, range(len(classes))))\n\n        pool = multiprocessing.pool.ThreadPool()\n\n        # Second, build an index of the images\n        # in the different class subfolders.\n        results = []\n        self.filenames = []\n        i = 0\n        for dirpath in (os.path.join(directory, subdir) for subdir in classes):\n            results.append(\n                pool.apply_async(_list_valid_filenames_in_directory,\n                                 (dirpath, self.white_list_formats, self.split,\n                                  self.class_indices, follow_links)))\n        classes_list = []\n        for res in results:\n            classes, filenames = res.get()\n            classes_list.append(classes)\n            self.filenames += filenames\n        self.samples = len(self.filenames)\n        self.classes = np.zeros((self.samples,), dtype='int32')\n        for classes in classes_list:\n            self.classes[i:i + len(classes)] = classes\n            i += len(classes)\n\n        print('Found %d images belonging to %d classes.' %\n              (self.samples, self.num_classes))\n        pool.close()\n        pool.join()\n        self._filepaths = [\n            os.path.join(self.directory, fname) for fname in self.filenames\n        ]\n        super(DirectoryIterator, self).__init__(self.samples,\n                                                batch_size,\n                                                shuffle,\n                                                seed)\n\n    @property\n    def filepaths(self):\n        return self._filepaths\n\n    @property\n    def labels(self):\n        return self.classes\n\n    @property  # mixin needs this property to work\n    def sample_weight(self):\n        # no sample weights will be returned\n        return None\n"
  },
  {
    "path": "axelerate/networks/classifier/frontend_classifier.py",
    "content": "import time\nimport os\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay\n\nfrom axelerate.networks.common_utils.feature import create_feature_extractor\nfrom axelerate.networks.classifier.batch_gen import create_datagen\nfrom axelerate.networks.common_utils.fit import train\nfrom tensorflow.keras.models import Model, load_model\nfrom tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout\nfrom tensorflow.keras.applications.mobilenet import preprocess_input\n\ndef get_labels(directory):\n    labels = sorted(os.listdir(directory))\n    return labels\n\ndef create_classifier(architecture, labels, input_size, layers, dropout, weights = None, save_bottleneck = False):\n    base_model = create_feature_extractor(architecture, input_size, weights)\n    x = base_model.feature_extractor.outputs[0]\n    x = GlobalAveragePooling2D()(x)\n    if len(layers) != 0:\n        for layer in layers[0:-1]:\n            x = Dense(layer, activation = 'relu')(x) \n            x = Dropout(dropout)(x)\n        x = Dense(layers[-1], activation = 'relu')(x)\n    preds = Dense(len(labels), activation = 'softmax')(x)\n    model = Model(inputs = base_model.feature_extractor.inputs[0],outputs = preds, name = 'classifier')\n\n    bottleneck_layer = None\n    if save_bottleneck:\n        bottleneck_layer = base_model.feature_extractor.layers[-1].name\n    network = Classifier(model, input_size, labels, base_model.normalize, bottleneck_layer)\n\n    return network\n\nclass Classifier(object):\n    def __init__(self,\n                 network,\n                 input_size,\n                 labels,\n                 norm,\n                 bottleneck_layer):\n        self.network = network       \n        self.labels = labels\n        self.input_size = input_size\n        self.bottleneck_layer = bottleneck_layer\n        self.norm = norm\n\n    def load_weights(self, weight_path, by_name=False):\n        if os.path.exists(weight_path):\n            print(\"Loading pre-trained weights for the whole model: \", weight_path)\n            self.network.load_weights(weight_path)\n        else:\n            print(\"Failed to load pre-trained weights for the whole model. It might be because you didn't specify any or the weight file cannot be found\")\n\n    def save_bottleneck(self, model_path, bottleneck_layer):\n        bottleneck_weights_path = os.path.join(os.path.dirname(model_path),'bottleneck_weigths.h5')\n        model = load_model(model_path)\n        for layer in model.layers:\n            if layer.name == bottleneck_layer:\n                output = layer.output\n        bottleneck_model = Model(model.input, output)\n        bottleneck_model.save_weights(bottleneck_weights_path)\n\n    def predict(self, img):\n\n        start_time = time.time()\n        Y_pred = np.squeeze(self.network(img, training = False))\n        elapsed_ms = (time.time() - start_time)  * 1000\n\n        y_pred = np.argmax(Y_pred)\n        prob = Y_pred[y_pred]\n\n        prediction = self.labels[y_pred]\n\n        return elapsed_ms, prob, prediction\n\n    def evaluate(self, img_folder, batch_size):\n\n        self.generator = create_datagen(img_folder, batch_size, self.input_size, None, False, self.norm)\n\n        Y_pred = self.network.predict(self.generator, len(self.generator) // batch_size + 1)\n\n        y_pred = np.argmax(Y_pred, axis=1)\n\n        print('Classification Report')\n        report = classification_report(self.generator.classes, y_pred, target_names = self.labels)\n        print(report)\n\n        print('Confusion Matrix')\n        cm = confusion_matrix(self.generator.classes, y_pred)\n        disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels = self.labels)\n        disp.plot(include_values=True, cmap='Blues', ax=None)\n        plt.show()\n\n        return report, cm\n\n    def train(self,\n              img_folder,\n              nb_epoch,\n              project_folder,\n              batch_size = 8,\n              augumentation = False,\n              learning_rate = 1e-4, \n              train_times = 1,\n              valid_times = 1,\n              valid_img_folder = \"\",\n              first_trainable_layer = None,\n              metrics = \"val_loss\"):\n\n        if metrics != \"accuracy\" and metrics != \"loss\":\n            print(\"Unknown metric for Classifier, valid options are: val_loss or val_accuracy. Defaulting ot val_loss\")\n            metrics = \"loss\"\n\n        train_generator = create_datagen(img_folder, batch_size, self.input_size, project_folder, augumentation, self.norm)\n        validation_generator = create_datagen(valid_img_folder, batch_size, self.input_size, project_folder, False, self.norm)\n\n        model_layers, model_path = train(self.network,\n                                        'categorical_crossentropy',\n                                        train_generator,\n                                        validation_generator,\n                                        learning_rate, \n                                        nb_epoch, \n                                        project_folder,\n                                        first_trainable_layer, \n                                        metric_name = metrics)\n\n        if self.bottleneck_layer:\n            self.save_bottleneck(model_path, self.bottleneck_layer)\n        return model_layers, model_path\n\n    \n"
  },
  {
    "path": "axelerate/networks/classifier/iterator.py",
    "content": "\"\"\"Utilities for real-time data augmentation on image data.\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport os\nimport threading\nimport numpy as np\nfrom keras_preprocessing import get_keras_submodule\nimport matplotlib.pyplot as plt\n\ntry:\n    IteratorType = get_keras_submodule('utils').Sequence\nexcept ImportError:\n    IteratorType = object\n\nfrom .utils import (array_to_img,\n                    img_to_array,\n                    load_img)\n\n\nclass Iterator(IteratorType):\n    \"\"\"Base class for image data iterators.\n\n    Every `Iterator` must implement the `_get_batch_of_samples`\n    method.\n\n    # Arguments\n        n: Integer, total number of samples in the dataset to loop over.\n        batch_size: Integer, size of a batch.\n        shuffle: Boolean, whether to shuffle the data between epochs.\n        seed: Random seeding for data shuffling.\n    \"\"\"\n    white_list_formats = ('png', 'jpg', 'jpeg', 'bmp', 'ppm', 'tif', 'tiff')\n\n    def __init__(self, n, batch_size, shuffle, seed):\n        self.n = n\n        self.batch_size = batch_size\n        self.seed = seed\n        self.shuffle = shuffle\n        self.batch_index = 0\n        self.total_batches_seen = 0\n        self.lock = threading.Lock()\n        self.index_array = None\n        self.index_generator = self._flow_index()\n\n    def _set_index_array(self):\n        self.index_array = np.arange(self.n)\n        if self.shuffle:\n            self.index_array = np.random.permutation(self.n)\n\n    def __getitem__(self, idx):\n        if idx >= len(self):\n            raise ValueError('Asked to retrieve element {idx}, '\n                             'but the Sequence '\n                             'has length {length}'.format(idx=idx,\n                                                          length=len(self)))\n        if self.seed is not None:\n            np.random.seed(self.seed + self.total_batches_seen)\n        self.total_batches_seen += 1\n        if self.index_array is None:\n            self._set_index_array()\n        index_array = self.index_array[self.batch_size * idx:\n                                       self.batch_size * (idx + 1)]\n        return self._get_batches_of_transformed_samples(index_array)\n\n    def __len__(self):\n        return (self.n + self.batch_size - 1) // self.batch_size  # round up\n\n    def on_epoch_end(self):\n        self._set_index_array()\n\n    def reset(self):\n        self.batch_index = 0\n\n    def _flow_index(self):\n        # Ensure self.batch_index is 0.\n        self.reset()\n        while 1:\n            if self.seed is not None:\n                np.random.seed(self.seed + self.total_batches_seen)\n            if self.batch_index == 0:\n                self._set_index_array()\n\n            if self.n == 0:\n                # Avoiding modulo by zero error\n                current_index = 0\n            else:\n                current_index = (self.batch_index * self.batch_size) % self.n\n            if self.n > current_index + self.batch_size:\n                self.batch_index += 1\n            else:\n                self.batch_index = 0\n            self.total_batches_seen += 1\n            yield self.index_array[current_index:\n                                   current_index + self.batch_size]\n\n    def __iter__(self):\n        # Needed if we want to do something like:\n        # for x, y in data_gen.flow(...):\n        return self\n\n    def __next__(self, *args, **kwargs):\n        return self.next(*args, **kwargs)\n\n    def next(self):\n        \"\"\"For python 2.x.\n\n        # Returns\n            The next batch.\n        \"\"\"\n        with self.lock:\n            index_array = next(self.index_generator)\n        # The transformation of images is not under thread lock\n        # so it can be done in parallel\n        return self._get_batches_of_transformed_samples(index_array)\n\n    def _get_batches_of_transformed_samples(self, index_array):\n        \"\"\"Gets a batch of transformed samples.\n\n        # Arguments\n            index_array: Array of sample indices to include in batch.\n\n        # Returns\n            A batch of transformed samples.\n        \"\"\"\n        raise NotImplementedError\n\n\nclass BatchFromFilesMixin():\n    \"\"\"Adds methods related to getting batches from filenames\n\n    It includes the logic to transform image files to batches.\n    \"\"\"\n\n    def set_processing_attrs(self,\n                             image_data_generator,\n                             target_size,\n                             color_mode,\n                             data_format,\n                             save_to_dir,\n                             save_prefix,\n                             save_format,\n                             subset,\n                             interpolation):\n        \"\"\"Sets attributes to use later for processing files into a batch.\n\n        # Arguments\n            image_data_generator: Instance of `ImageDataAugmentor`\n                to use for random transformations and normalization.\n            target_size: tuple of integers, dimensions to resize input images to.\n            color_mode: One of `\"rgb\"`, `\"rgba\"`, `\"gray\"`.\n                Color mode to read images.\n            data_format: String, one of `channels_first`, `channels_last`.\n            save_to_dir: Optional directory where to save the pictures\n                being yielded, in a viewable format. This is useful\n                for visualizing the random transformations being\n                applied, for debugging purposes.\n            save_prefix: String prefix to use for saving sample\n                images (if `save_to_dir` is set).\n            save_format: Format to use for saving sample images\n                (if `save_to_dir` is set).\n            subset: Subset of data (`\"training\"` or `\"validation\"`) if\n                validation_split is set in ImageDataAugmentor.\n            interpolation: Interpolation method used to\n                resample the image if the\n                target size is different from that of the loaded image.\n                Supported methods are `\"cv2.INTER_NEAREST\"`, `\"cv2.INTER_LINEAR\"`, `\"cv2.INTER_AREA\"`, `\"cv2.INTER_CUBIC\"`\n                and `\"cv2.INTER_LANCZOS4\"`\n                By default, `\"cv2.INTER_NEAREST\"` is used.\n        \"\"\"\n        self.image_data_generator = image_data_generator\n        self.target_size = tuple(target_size)\n        if color_mode not in {'rgb', 'rgba', 'gray'}:\n            raise ValueError('Invalid color mode:', color_mode,\n                             '; expected \"rgb\", \"rgba\", or \"gray\".')\n        self.color_mode = color_mode\n        self.data_format = data_format\n        if self.color_mode == 'rgba':\n            if self.data_format == 'channels_last':\n                self.image_shape = self.target_size + (4,)\n            else:\n                self.image_shape = (4,) + self.target_size\n        elif self.color_mode == 'rgb':\n            if self.data_format == 'channels_last':\n                self.image_shape = self.target_size + (3,)\n            else:\n                self.image_shape = (3,) + self.target_size\n        else:\n            if self.data_format == 'channels_last':\n                self.image_shape = self.target_size + (1,)\n            else:\n                self.image_shape = (1,) + self.target_size\n        self.save_to_dir = save_to_dir\n        self.save_prefix = save_prefix\n        self.save_format = save_format\n        self.interpolation = interpolation\n        if subset is not None:\n            validation_split = self.image_data_generator._validation_split\n            if subset == 'validation':\n                split = (0, validation_split)\n            elif subset == 'training':\n                split = (validation_split, 1)\n            else:\n                raise ValueError(\n                    'Invalid subset name: %s;'\n                    'expected \"training\" or \"validation\"' % (subset,))\n        else:\n            split = None\n        self.split = split\n        self.subset = subset\n\n    def _get_batch_of_samples(self, index_array, apply_standardization=True):\n        \"\"\"Gets a batch of transformed samples.\n\n        # Arguments\n            index_array: Array of sample indices to include in batch.\n\n        # Returns\n            A batch of transformed samples.\n        \"\"\"\n        # build batch of image data\n        # self.filepaths is dynamic, is better to call it once outside the loop\n        filepaths = self.filepaths\n        \n        # build batch of image data\n        batch_x = np.array([load_img(filepaths[x], \n                                     color_mode=self.color_mode,\n                                     target_size=self.target_size, \n                                     interpolation=self.interpolation) for x in index_array])    \n\n        # apply the augmentations and custom transformations to the image data\n        batch_x = np.array([self.image_data_generator.transform_image(x, self.target_size[0], self.target_size[1]) for x in batch_x])\n\n        # transform to `channels_first` format if needed\n        if self.data_format == \"channels_first\":\n            batch_x = np.array([np.swapaxes(x,0,2) for x in batch_x])\n\n        # optionally save augmented images to disk for debugging purposes\n        if self.save_to_dir:\n            for i, j in enumerate(index_array):\n                img = array_to_img(batch_x[i], self.data_format, scale=True)\n                fname = '{prefix}_{index}_{hash}.{format}'.format(\n                    prefix=self.save_prefix,\n                    index=j,\n                    hash=np.random.randint(1e7),\n                    format=self.save_format)\n                img.save(os.path.join(self.save_to_dir, fname))\n        # build batch of labels\n            \n        if self.class_mode == 'input':\n            batch_y = batch_x.copy()\n        elif self.class_mode in {'binary', 'sparse'}:\n            batch_y = np.empty(len(batch_x), dtype=self.dtype)\n            for i, n_observation in enumerate(index_array):\n                batch_y[i] = self.classes[n_observation]\n        elif self.class_mode == 'categorical':\n            batch_y = np.zeros((len(batch_x), len(self.class_indices)),\n                               dtype=self.dtype)\n            for i, n_observation in enumerate(index_array):\n                batch_y[i, self.classes[n_observation]] = 1.\n        elif self.class_mode == 'multi_output':\n            batch_y = [output[index_array] for output in self.labels]\n        elif self.class_mode == 'raw':\n            batch_y = self.labels[index_array]\n        else:\n            return batch_x\n        if self.sample_weight is None:\n            return batch_x, batch_y\n        else:\n            return batch_x, batch_y, self.sample_weight[index_array]\n\n    def _get_batches_of_transformed_samples(self, index_array):\n        return self._get_batch_of_samples(index_array)\n\n\n    def show_batch(self, rows:int=5, apply_standardization:bool=False, **plt_kwargs):\n        img_arr = np.random.choice(range(len(self.classes)), rows**2)\n        if self.class_mode is None:\n            imgs = self._get_batch_of_samples(img_arr, apply_standardization=apply_standardization)\n        else:\n            imgs, _ = self._get_batch_of_samples(img_arr, apply_standardization=apply_standardization)\n            lbls = np.array(self.labels)[img_arr]\n        \n            try:\n                inv_class_indices = {v: k for k, v in self.class_indices.items()}\n                lbls = [inv_class_indices.get(k) for k in lbls]\n            except:\n                pass\n\n        if self.data_format == \"channels_first\":\n            imgs = np.array([np.swapaxes(img,0,2) for img in imgs])\n\n        if not 'figsize' in plt_kwargs:\n            plt_kwargs['figsize'] = (12,12)\n\n        plt.close('all')\n        plt.figure(**plt_kwargs)\n\n        for idx, img in enumerate(imgs):\n            plt.subplot(rows, rows, idx+1)\n            plt.imshow(img.squeeze())\n            if lbls is not None:\n                plt.title(lbls[idx])\n            plt.axis('off')\n        \n        plt.subplots_adjust(hspace=0.5, wspace=0.5)\n        plt.show()\n        \n    @property\n    def filepaths(self):\n        \"\"\"List of absolute paths to image files\"\"\"\n        raise NotImplementedError(\n            '`filepaths` property method has not been implemented in {}.'\n            .format(type(self).__name__)\n        )\n\n    @property\n    def labels(self):\n        \"\"\"Class labels of every observation\"\"\"\n        raise NotImplementedError(\n            '`labels` property method has not been implemented in {}.'\n            .format(type(self).__name__)\n        )\n\n    @property\n    def sample_weight(self):\n        raise NotImplementedError(\n            '`sample_weight` property method has not been implemented in {}.'\n            .format(type(self).__name__)\n        )\n"
  },
  {
    "path": "axelerate/networks/classifier/utils.py",
    "content": "\"\"\"Utilities for real-time data augmentation on image data.\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport os\nimport warnings\n\nimport numpy as np\nimport cv2\ntry:\n    from PIL import ImageEnhance\n    from PIL import Image as pil_image\nexcept ImportError:\n    pil_image = None\n    ImageEnhance = None\n\n\nif pil_image is not None:\n    _PIL_INTERPOLATION_METHODS = {\n        'nearest': pil_image.NEAREST,\n        'bilinear': pil_image.BILINEAR,\n        'bicubic': pil_image.BICUBIC,\n    }\n    # These methods were only introduced in version 3.4.0 (2016).\n    if hasattr(pil_image, 'HAMMING'):\n        _PIL_INTERPOLATION_METHODS['hamming'] = pil_image.HAMMING\n    if hasattr(pil_image, 'BOX'):\n        _PIL_INTERPOLATION_METHODS['box'] = pil_image.BOX\n    # This method is new in version 1.1.3 (2013).\n    if hasattr(pil_image, 'LANCZOS'):\n        _PIL_INTERPOLATION_METHODS['lanczos'] = pil_image.LANCZOS\n\n\ndef validate_filename(filename, white_list_formats):\n    \"\"\"Check if a filename refers to a valid file.\n\n    # Arguments\n        filename: String, absolute path to a file\n        white_list_formats: Set, allowed file extensions\n\n    # Returns\n        A boolean value indicating if the filename is valid or not\n    \"\"\"\n    return (filename.lower().endswith(white_list_formats) and\n            os.path.isfile(filename))\n\n\ndef save_img(path,\n             x,\n             data_format='channels_last',\n             file_format=None,\n             scale=True,\n             **kwargs):\n    \"\"\"Saves an image stored as a Numpy array to a path or file object.\n\n    # Arguments\n        path: Path or file object.\n        x: Numpy array.\n        data_format: Image data format,\n            either \"channels_first\" or \"channels_last\".\n        file_format: Optional file format override. If omitted, the\n            format to use is determined from the filename extension.\n            If a file object was used instead of a filename, this\n            parameter should always be used.\n        scale: Whether to rescale image values to be within `[0, 255]`.\n        **kwargs: Additional keyword arguments passed to `PIL.Image.save()`.\n    \"\"\"\n    img = array_to_img(x, data_format=data_format, scale=scale)\n    if img.mode == 'RGBA' and (file_format == 'jpg' or file_format == 'jpeg'):\n        warnings.warn('The JPG format does not support '\n                      'RGBA images, converting to RGB.')\n        img = img.convert('RGB')\n    img.save(path, format=file_format, **kwargs)\n\n\ndef load_img(fname, color_mode='rgb', target_size=None, interpolation=cv2.INTER_NEAREST):\n    if color_mode == \"rgb\":\n        img = cv2.imread(fname)\n        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)\n        \n    elif color_mode == \"rgba\":\n        img = cv2.imread(fname,-1) \n        if img.shape[-1]!=4: #Add alpha-channel if not RGBA\n            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGBA)\n            \n    elif color_mode == \"gray\":\n        img = cv2.imread(fname, 0)\n\n    else:\n        img = cv2.imread(fname)\n        \n    if target_size is not None:\n        width_height_tuple = (target_size[1], target_size[0])\n        if img.shape[0:2] != width_height_tuple:\n            img = cv2.resize(img, dsize=width_height_tuple, interpolation = interpolation)\n\n    if color_mode == \"gray\":\n        return img[..., np.newaxis] #Add dummy axis. This is done here, cause `cv2.resize` removes the dummy axes\n\n    else:\n        return img\n\n\ndef list_pictures(directory, ext=('jpg', 'jpeg', 'bmp', 'png', 'ppm', 'tif',\n                                  'tiff')):\n    \"\"\"Lists all pictures in a directory, including all subdirectories.\n\n    # Arguments\n        directory: string, absolute path to the directory\n        ext: tuple of strings or single string, extensions of the pictures\n\n    # Returns\n        a list of paths\n    \"\"\"\n    ext = tuple('.%s' % e for e in ((ext,) if isinstance(ext, str) else ext))\n    return [os.path.join(root, f)\n            for root, _, files in os.walk(directory) for f in files\n            if f.lower().endswith(ext)]\n\n\ndef _iter_valid_files(directory, white_list_formats, follow_links):\n    \"\"\"Iterates on files with extension in `white_list_formats` contained in `directory`.\n\n    # Arguments\n        directory: Absolute path to the directory\n            containing files to be counted\n        white_list_formats: Set of strings containing allowed extensions for\n            the files to be counted.\n        follow_links: Boolean, follow symbolic links to subdirectories.\n\n    # Yields\n        Tuple of (root, filename) with extension in `white_list_formats`.\n    \"\"\"\n    def _recursive_list(subpath):\n        return sorted(os.walk(subpath, followlinks=follow_links),\n                      key=lambda x: x[0])\n\n    for root, _, files in _recursive_list(directory):\n        for fname in sorted(files):\n            if fname.lower().endswith('.tiff'):\n                warnings.warn('Using \".tiff\" files with multiple bands '\n                              'will cause distortion. Please verify your output.')\n            if fname.lower().endswith(white_list_formats):\n                yield root, fname\n\n\ndef _list_valid_filenames_in_directory(directory, white_list_formats, split,\n                                       class_indices, follow_links):\n    \"\"\"Lists paths of files in `subdir` with extensions in `white_list_formats`.\n\n    # Arguments\n        directory: absolute path to a directory containing the files to list.\n            The directory name is used as class label\n            and must be a key of `class_indices`.\n        white_list_formats: set of strings containing allowed extensions for\n            the files to be counted.\n        split: tuple of floats (e.g. `(0.2, 0.6)`) to only take into\n            account a certain fraction of files in each directory.\n            E.g.: `segment=(0.6, 1.0)` would only account for last 40 percent\n            of images in each directory.\n        class_indices: dictionary mapping a class name to its index.\n        follow_links: boolean, follow symbolic links to subdirectories.\n\n    # Returns\n         classes: a list of class indices\n         filenames: the path of valid files in `directory`, relative from\n             `directory`'s parent (e.g., if `directory` is \"dataset/class1\",\n            the filenames will be\n            `[\"class1/file1.jpg\", \"class1/file2.jpg\", ...]`).\n    \"\"\"\n    dirname = os.path.basename(directory)\n    if split:\n        num_files = len(list(\n            _iter_valid_files(directory, white_list_formats, follow_links)))\n        start, stop = int(split[0] * num_files), int(split[1] * num_files)\n        valid_files = list(\n            _iter_valid_files(\n                directory, white_list_formats, follow_links))[start: stop]\n    else:\n        valid_files = _iter_valid_files(\n            directory, white_list_formats, follow_links)\n    classes = []\n    filenames = []\n    for root, fname in valid_files:\n        classes.append(class_indices[dirname])\n        absolute_path = os.path.join(root, fname)\n        relative_path = os.path.join(\n            dirname, os.path.relpath(absolute_path, directory))\n        filenames.append(relative_path)\n\n    return classes, filenames\n\n\ndef array_to_img(x, data_format='channels_last', scale=True, dtype='float32'):\n    \"\"\"Converts a 3D Numpy array to a PIL Image instance.\n\n    # Arguments\n        x: Input Numpy array.\n        data_format: Image data format.\n            either \"channels_first\" or \"channels_last\".\n        scale: Whether to rescale image values\n            to be within `[0, 255]`.\n        dtype: Dtype to use.\n\n    # Returns\n        A PIL Image instance.\n\n    # Raises\n        ImportError: if PIL is not available.\n        ValueError: if invalid `x` or `data_format` is passed.\n    \"\"\"\n    if pil_image is None:\n        raise ImportError('Could not import PIL.Image. '\n                          'The use of `array_to_img` requires PIL.')\n    x = np.asarray(x, dtype=dtype)\n    if x.ndim != 3:\n        raise ValueError('Expected image array to have rank 3 (single image). '\n                         'Got array with shape: %s' % (x.shape,))\n\n    if data_format not in {'channels_first', 'channels_last'}:\n        raise ValueError('Invalid data_format: %s' % data_format)\n\n    # Original Numpy array x has format (height, width, channel)\n    # or (channel, height, width)\n    # but target PIL image has format (width, height, channel)\n    if data_format == 'channels_first':\n        x = x.transpose(1, 2, 0)\n    if scale:\n        x = x + max(-np.min(x), 0)\n        x_max = np.max(x)\n        if x_max != 0:\n            x /= x_max\n        x *= 255\n    if x.shape[2] == 4:\n        # RGBA\n        return pil_image.fromarray(x.astype('uint8'), 'RGBA')\n    elif x.shape[2] == 3:\n        # RGB\n        return pil_image.fromarray(x.astype('uint8'), 'RGB')\n    elif x.shape[2] == 1:\n        # grayscale\n        return pil_image.fromarray(x[:, :, 0].astype('uint8'), 'L')\n    else:\n        raise ValueError('Unsupported channel number: %s' % (x.shape[2],))\n\n\ndef img_to_array(img, data_format='channels_last', dtype='float32'):\n    \"\"\"Converts a PIL Image instance to a Numpy array.\n\n    # Arguments\n        img: PIL Image instance.\n        data_format: Image data format,\n            either \"channels_first\" or \"channels_last\".\n        dtype: Dtype to use for the returned array.\n\n    # Returns\n        A 3D Numpy array.\n\n    # Raises\n        ValueError: if invalid `img` or `data_format` is passed.\n    \"\"\"\n    if data_format not in {'channels_first', 'channels_last'}:\n        raise ValueError('Unknown data_format: %s' % data_format)\n    # Numpy array x has format (height, width, channel)\n    # or (channel, height, width)\n    # but original PIL image has format (width, height, channel)\n    x = np.asarray(img, dtype=dtype)\n    if len(x.shape) == 3:\n        if data_format == 'channels_first':\n            x = x.transpose(2, 0, 1)\n    elif len(x.shape) == 2:\n        if data_format == 'channels_first':\n            x = x.reshape((1, x.shape[0], x.shape[1]))\n        else:\n            x = x.reshape((x.shape[0], x.shape[1], 1))\n    else:\n        raise ValueError('Unsupported image shape: %s' % (x.shape,))\n    return x\n"
  },
  {
    "path": "axelerate/networks/common_utils/__init__.py",
    "content": ""
  },
  {
    "path": "axelerate/networks/common_utils/augment.py",
    "content": "# -*- coding: utf-8 -*-\r\nimport numpy as np\r\nnp.random.seed(1337)\r\nimport imgaug as ia\r\nfrom imgaug import augmenters as iaa\r\nfrom imgaug.augmentables.segmaps import SegmentationMapsOnImage\r\nfrom imgaug.augmentables.bbs import BoundingBox, BoundingBoxesOnImage\r\nimport cv2\r\nimport os\r\nimport glob\r\nimport random\r\n\r\nclass ImgAugment(object):\r\n    def __init__(self, w, h, jitter):\r\n        \"\"\"\r\n        # Args\r\n            desired_w : int\r\n            desired_h : int\r\n            jitter : bool\r\n        \"\"\"\r\n        self._jitter = jitter\r\n        self._w = w\r\n        self._h = h\r\n\r\n    def imread(self, img_file, boxes, labels):\r\n        \"\"\"\r\n        # Args\r\n            img_file : str\r\n            boxes : array, shape of (N, 4)\r\n        \r\n        # Returns\r\n            image : 3d-array, shape of (h, w, 3)\r\n            boxes_ : array, same shape of boxes\r\n                jittered & resized bounding box\r\n        \"\"\"\r\n        # 1. read image file\r\n        try:\r\n            image = cv2.imread(img_file)\r\n            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\r\n        except:\r\n            print(\"This image has an annotation file, but cannot be open. Check the integrity of your dataset.\", img_file)\r\n            raise\r\n        \r\n        boxes_ = np.copy(boxes)\r\n        labels_ = np.copy(labels)\r\n  \r\n        # 2. resize and augment image     \r\n        image, boxes_, labels_ = process_image_detection(image, boxes_, labels_, self._w, self._h, self._jitter) \r\n\r\n        return image, boxes_, labels_\r\n\r\n\r\ndef _to_bbs(boxes, labels, shape):\r\n    new_boxes = []\r\n    for i in range(len(boxes)):\r\n        x1,y1,x2,y2 = boxes[i]\r\n        new_box = BoundingBox(x1,y1,x2,y2, labels[i])\r\n        new_boxes.append(new_box)\r\n    bbs = BoundingBoxesOnImage(new_boxes, shape)\r\n    return bbs\r\n\r\ndef _to_array(bbs):\r\n    new_boxes = []\r\n    new_labels = []\r\n    for bb in bbs.bounding_boxes:\r\n        x1 = int(bb.x1)\r\n        x2 = int(bb.x2)\r\n        y1 = int(bb.y1)\r\n        y2 = int(bb.y2)\r\n        label = bb.label\r\n        new_boxes.append([x1,y1,x2,y2])\r\n        new_labels.append(label)\r\n    return new_boxes, new_labels\r\n\r\n\r\ndef process_image_detection(image, boxes, labels, desired_w, desired_h, augment):\r\n    \r\n    # resize the image to standard size\r\n    if (desired_w and desired_h) or augment:\r\n        bbs = _to_bbs(boxes, labels, image.shape)\r\n\r\n        if (desired_w and desired_h):\r\n            # Rescale image and bounding boxes\r\n            image = ia.imresize_single_image(image, (desired_w, desired_h))\r\n            bbs = bbs.on(image)\r\n\r\n        if augment:\r\n            aug_pipe = _create_augment_pipeline()\r\n            image, bbs = aug_pipe(image=image, bounding_boxes=bbs)\r\n            bbs = bbs.remove_out_of_image().clip_out_of_image()\r\n\r\n        new_boxes, new_labels = _to_array(bbs)\r\n        #if len(new_boxes) != len(boxes):\r\n        #    print(new_boxes)\r\n        #    print(boxes)\r\n        #    print(\"_________________\")\r\n\r\n        return image, np.array(new_boxes), new_labels\r\n    else:\r\n        return image, np.array(boxes), labels\r\n\r\ndef process_image_classification(image, desired_w, desired_h, augment):\r\n    \r\n    # resize the image to standard size\r\n    if (desired_w and desired_h) or augment:\r\n\r\n        if (desired_w and desired_h):\r\n            # Rescale image\r\n            image = ia.imresize_single_image(image, (desired_w, desired_h))\r\n\r\n        if augment:\r\n            aug_pipe = _create_augment_pipeline()\r\n            image = aug_pipe(image=image)\r\n        \r\n    return image\r\n\r\ndef process_image_segmentation(image, segmap, input_w, input_h, output_w, output_h, augment):\r\n    # resize the image to standard size\r\n    if (input_w and input_h) or augment:\r\n        segmap = SegmentationMapsOnImage(segmap, shape=image.shape)\r\n\r\n        if (input_w and input_h):\r\n            # Rescale image and segmaps\r\n            image = ia.imresize_single_image(image, (input_w, input_h))\r\n            segmap = segmap.resize((output_w, output_h), interpolation=\"nearest\")\r\n\r\n        if augment:\r\n            aug_pipe = _create_augment_pipeline()\r\n            image, segmap = aug_pipe(image=image, segmentation_maps=segmap)\r\n\r\n    return image, segmap.get_arr()\r\n\r\n\r\ndef _create_augment_pipeline():\r\n\r\n    sometimes = lambda aug: iaa.Sometimes(0.1, aug)\r\n\r\n    aug_pipe = iaa.Sequential(\r\n        [\r\n            iaa.Fliplr(0.5), \r\n            iaa.Flipud(0.2), \r\n            iaa.Affine(translate_percent={\"x\": (-0.1, 0.1), \"y\": (-0.1, 0.1)}),\r\n            iaa.OneOf([iaa.Affine(scale=(0.8, 1.2)),\r\n                        iaa.Affine(rotate=(-10, 10)),\r\n                        iaa.Affine(shear=(-10, 10))]),\r\n\r\n                        sometimes(iaa.OneOf([\r\n                               iaa.GaussianBlur((0, 3.0)),\r\n                               iaa.AverageBlur(k=(2, 7)),\r\n                               iaa.MedianBlur(k=(3, 11)),\r\n                           ])),\r\n                           sometimes(iaa.Sharpen(alpha=(0, 1.0), lightness=(0.75, 1.5))),\r\n                           sometimes(iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05 * 255), per_channel=0.5)),\r\n                           sometimes(iaa.OneOf([\r\n                               iaa.Dropout((0.01, 0.1), per_channel=0.5),\r\n                               iaa.CoarseDropout((0.03, 0.15), size_percent=(0.02, 0.05), per_channel=0.2),\r\n                           ])),\r\n                           sometimes(iaa.Add((-10, 10), per_channel=0.5)),  \r\n                           sometimes(iaa.Multiply((0.5, 1.5), per_channel=0.5)), \r\n                           sometimes(iaa.LinearContrast((0.5, 2.0), per_channel=0.5)) \r\n        ],\r\n        random_order=True\r\n    )\r\n\r\n    return aug_pipe\r\n\r\n\r\ndef visualize_detection_dataset(img_folder, ann_folder, num_imgs = None, img_size=None, augment=None):\r\n    import matplotlib.pyplot as plt\r\n    import matplotlib\r\n    from axelerate.networks.yolo.backend.utils.annotation import PascalVocXmlParser\r\n    try:\r\n        matplotlib.use('TkAgg')\r\n    except:\r\n        pass\r\n\r\n    parser = PascalVocXmlParser()\r\n    aug = ImgAugment(img_size, img_size, jitter=augment)\r\n    for ann in os.listdir(ann_folder)[:num_imgs]:\r\n        annotation_file = os.path.join(ann_folder, ann)\r\n        fname = parser.get_fname(annotation_file)\r\n        labels = parser.get_labels(annotation_file)\r\n        boxes = parser.get_boxes(annotation_file)\r\n        img_file =  os.path.join(img_folder, fname)\r\n        img, boxes_, labels_ = aug.imread(img_file, boxes, labels)\r\n        \r\n        for i in range(len(boxes_)):\r\n            x1, y1, x2, y2 = boxes_[i]\r\n            cv2.rectangle(img, (x1,y1), (x2,y2), (0,255,0), 3)\r\n            cv2.putText(img, \r\n                        '{}'.format(labels_[i]), \r\n                        (x1, y1 - 13), \r\n                        cv2.FONT_HERSHEY_SIMPLEX, \r\n                        1e-3 * img.shape[0], \r\n                        (255,0,0), 1)\r\n\r\n        plt.imshow(img)\r\n        plt.show(block=False)\r\n        plt.pause(1)\r\n        plt.close()\r\n\r\ndef visualize_segmentation_dataset(images_path, segs_path, num_imgs = None, img_size=None, augment=False, n_classes=255):\r\n    import matplotlib.pyplot as plt\r\n    import matplotlib\r\n    from axelerate.networks.segnet.data_utils.data_loader import get_pairs_from_paths, DATA_LOADER_SEED, class_colors, DataLoaderError\r\n\r\n    try:\r\n        matplotlib.use('TkAgg')\r\n    except:\r\n        pass\r\n\r\n    def _get_colored_segmentation_image(img, seg, colors, n_classes, img_size, do_augment=False):\r\n        \"\"\" Return a colored segmented image \"\"\"\r\n\r\n        img, seg = process_image_segmentation(img, seg, img_size, img_size, img_size, img_size, do_augment)\r\n        seg_img = np.zeros_like(seg)\r\n\r\n        for c in range(n_classes):\r\n            seg_img[:, :, 0] += ((seg[:, :, 0] == c) *\r\n                                (colors[c][0])).astype('uint8')\r\n            seg_img[:, :, 1] += ((seg[:, :, 0] == c) *\r\n                                (colors[c][1])).astype('uint8')\r\n            seg_img[:, :, 2] += ((seg[:, :, 0] == c) *\r\n                                (colors[c][2])).astype('uint8')\r\n\r\n        return img, seg_img\r\n\r\n    try:\r\n        # Get image-segmentation pairs\r\n        img_seg_pairs = get_pairs_from_paths(images_path, segs_path, ignore_non_matching=True)\r\n        # Get the colors for the classes\r\n        colors = class_colors\r\n\r\n        print(\"Please press any key to display the next image\")\r\n        for im_fn, seg_fn in img_seg_pairs[:num_imgs]:\r\n            img = cv2.imread(im_fn)[...,::-1]\r\n            seg = cv2.imread(seg_fn)\r\n            print(\"Found the following classes in the segmentation image:\", np.unique(seg))\r\n            img, seg_img = _get_colored_segmentation_image(img, seg, colors, n_classes, img_size, do_augment=augment)\r\n            fig = plt.figure(figsize=(14,7))\r\n            ax1 = fig.add_subplot(1,2,1)\r\n            ax1.imshow(img)\r\n            ax3 = fig.add_subplot(1,2,2)\r\n            ax3.imshow(seg_img)\r\n            plt.show(block=False)\r\n            plt.pause(1)\r\n            plt.close()\r\n    except DataLoaderError as e:\r\n        print(\"Found error during data loading\\n{0}\".format(str(e)))\r\n        return False\r\n\r\ndef visualize_classification_dataset(img_folder, num_imgs = None, img_size=None, augment=None):\r\n    import matplotlib.pyplot as plt\r\n    import matplotlib\r\n    try:\r\n        matplotlib.use('TkAgg')\r\n    except:\r\n        pass\r\n    font = cv2.FONT_HERSHEY_SIMPLEX\r\n    image_files_list = []\r\n    image_search = lambda ext : glob.glob(img_folder + ext, recursive=True)\r\n    for ext in ['/**/*.jpg', '/**/*.jpeg', '/**/*.png']: image_files_list.extend(image_search(ext))\r\n    random.shuffle(image_files_list)\r\n    for filename in image_files_list[0:num_imgs]:\r\n        image = cv2.imread(filename)[...,::-1]\r\n        image = process_image_classification(image, img_size, img_size, augment)\r\n        cv2.putText(image, os.path.dirname(filename).split('/')[-1], (10,30), font, image.shape[1]/700 , (255, 0, 0), 2, True)\r\n        plt.figure()\r\n        plt.imshow(image)\r\n        plt.show(block=False)\r\n        plt.pause(1)\r\n        plt.close()\r\n        print(filename)\r\n\r\n\r\nif __name__ == '__main__':\r\n    import argparse\r\n    parser = argparse.ArgumentParser()\r\n    parser.add_argument(\"--type\", type=str)\r\n    parser.add_argument(\"--images\", type=str)\r\n    parser.add_argument(\"--annotations\", type=str)\r\n    parser.add_argument(\"--num_imgs\", type=int)\r\n    parser.add_argument(\"--img_size\", type=int)\r\n    parser.add_argument(\"--aug\", type=bool)\r\n    args = parser.parse_args()\r\n    if args.type == 'detection':\r\n        visualize_detection_dataset(args.images, args.annotations, args.num_imgs, args.img_size, args.aug)\r\n    if args.type == 'segmentation':\r\n        visualize_segmentation_dataset(args.images, args.annotations, args.num_imgs, args.img_size, args.aug)\r\n    if args.type == 'classification':\r\n        visualize_classification_dataset(args.images, args.num_imgs, args.img_size, args.aug)\r\n"
  },
  {
    "path": "axelerate/networks/common_utils/callbacks.py",
    "content": "import numpy as np\nfrom tensorflow import keras\nfrom tensorflow.keras import backend as K\n\ndef cosine_decay_with_warmup(global_step,\n                             learning_rate_base,\n                             total_steps,\n                             warmup_learning_rate=0.0,\n                             warmup_steps=0,\n                             hold_base_rate_steps=0):\n    \"\"\"Cosine decay schedule with warm up period.\n    Cosine annealing learning rate as described in:\n      Loshchilov and Hutter, SGDR: Stochastic Gradient Descent with Warm Restarts.\n      ICLR 2017. https://arxiv.org/abs/1608.03983\n    In this schedule, the learning rate grows linearly from warmup_learning_rate\n    to learning_rate_base for warmup_steps, then transitions to a cosine decay\n    schedule.\n    Arguments:\n        global_step {int} -- global step.\n        learning_rate_base {float} -- base learning rate.\n        total_steps {int} -- total number of training steps.\n    Keyword Arguments:\n        warmup_learning_rate {float} -- initial learning rate for warm up. (default: {0.0})\n        warmup_steps {int} -- number of warmup steps. (default: {0})\n        hold_base_rate_steps {int} -- Optional number of steps to hold base learning rate\n                                    before decaying. (default: {0})\n    Returns:\n      a float representing learning rate.\n    Raises:\n      ValueError: if warmup_learning_rate is larger than learning_rate_base,\n        or if warmup_steps is larger than total_steps.\n    \"\"\"\n\n    if total_steps < warmup_steps:\n        raise ValueError('total_steps must be larger or equal to '\n                         'warmup_steps.')\n    learning_rate = 0.5 * learning_rate_base * (1 + np.cos(\n        np.pi *\n        (global_step - warmup_steps - hold_base_rate_steps\n         ) / float(total_steps - warmup_steps - hold_base_rate_steps)))\n    if hold_base_rate_steps > 0:\n        learning_rate = np.where(global_step > warmup_steps + hold_base_rate_steps,\n                                 learning_rate, learning_rate_base)\n    if warmup_steps > 0:\n        if learning_rate_base < warmup_learning_rate:\n            raise ValueError('learning_rate_base must be larger or equal to '\n                             'warmup_learning_rate.')\n        slope = (learning_rate_base - warmup_learning_rate) / warmup_steps\n        warmup_rate = slope * global_step + warmup_learning_rate\n        learning_rate = np.where(global_step < warmup_steps, warmup_rate,\n                                 learning_rate)\n    return np.where(global_step > total_steps, 0.0, learning_rate)\n\n\nclass WarmUpCosineDecayScheduler(keras.callbacks.Callback):\n    \"\"\"Cosine decay with warmup learning rate scheduler\n    \"\"\"\n\n    def __init__(self,\n                 learning_rate_base,\n                 total_steps,\n                 global_step_init=0,\n                 warmup_learning_rate=0.0,\n                 warmup_steps=0,\n                 hold_base_rate_steps=0,\n                 verbose=0):\n        \"\"\"Constructor for cosine decay with warmup learning rate scheduler.\n    Arguments:\n        learning_rate_base {float} -- base learning rate.\n        total_steps {int} -- total number of training steps.\n    Keyword Arguments:\n        global_step_init {int} -- initial global step, e.g. from previous checkpoint.\n        warmup_learning_rate {float} -- initial learning rate for warm up. (default: {0.0})\n        warmup_steps {int} -- number of warmup steps. (default: {0})\n        hold_base_rate_steps {int} -- Optional number of steps to hold base learning rate\n                                    before decaying. (default: {0})\n        verbose {int} -- 0: quiet, 1: update messages. (default: {0})\n        \"\"\"\n\n        super(WarmUpCosineDecayScheduler, self).__init__()\n        self.learning_rate_base = learning_rate_base\n        self.total_steps = total_steps\n        self.global_step = global_step_init\n        self.warmup_learning_rate = warmup_learning_rate\n        self.warmup_steps = warmup_steps\n        self.hold_base_rate_steps = hold_base_rate_steps\n        self.verbose = verbose\n        self.learning_rates = []\n        self.current_lr = 0.0\n        \n    def on_epoch_end(self, epoch, logs={}):\n        if self.verbose == 1:\n            print('Epoch %05d: Learning rate is %s.\\n' % (epoch, self.current_lr))        \n\n    def on_batch_end(self, batch, logs=None):\n        self.global_step = self.global_step + 1\n        lr = K.get_value(self.model.optimizer.lr)\n        self.learning_rates.append(lr)\n\n    def on_batch_begin(self, batch, logs=None):\n        self.current_lr = cosine_decay_with_warmup(global_step=self.global_step,\n                                      learning_rate_base=self.learning_rate_base,\n                                      total_steps=self.total_steps,\n                                      warmup_learning_rate=self.warmup_learning_rate,\n                                      warmup_steps=self.warmup_steps,\n                                      hold_base_rate_steps=self.hold_base_rate_steps)\n        K.set_value(self.model.optimizer.lr, self.current_lr)\n        if self.verbose ==2:\n            print('\\nBatch %05d: setting learning rate to %s.' % (self.global_step + 1, self.current_lr))\n\n"
  },
  {
    "path": "axelerate/networks/common_utils/convert.py",
    "content": "import tensorflow as tf\nimport tensorflow.keras.backend as k\nimport subprocess\nimport os\nimport cv2\nimport argparse\nimport tarfile\nimport glob\nimport shutil\nimport numpy as np\nimport shlex\n\nk210_converter_path=os.path.join(os.path.dirname(__file__),\"ncc\",\"ncc\")\nk210_converter_download_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),'ncc_linux_x86_64.tar.xz')\nnncase_download_url=\"https://github.com/kendryte/nncase/releases/download/v0.2.0-beta4/ncc_linux_x86_64.tar.xz\"\ncwd = os.path.dirname(os.path.realpath(__file__))\n\ndef run_command(cmd, cwd=None):\n    with subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, executable='/bin/bash', universal_newlines=True, cwd=cwd) as p:\n        while True:\n            line = p.stdout.readline()\n            if not line:\n                break\n            print(line)    \n        exit_code = p.poll()\n    return exit_code\n\nclass Converter(object):\n    def __init__(self, converter_type, backend=None, dataset_path=None):\n        if 'tflite' in converter_type:\n            print('Tflite Converter ready')\n\n        if 'k210' in converter_type:\n            if os.path.exists(k210_converter_path):\n                print('K210 Converter ready')\n            else:\n                print('Downloading K210 Converter')\n                _path = tf.keras.utils.get_file(k210_converter_download_path, nncase_download_url)     \n                print(_path)    \n                tar_file = tarfile.open(k210_converter_download_path)\n                tar_file.extractall(os.path.join(os.path.dirname(__file__),\"ncc\"))\n                tar_file.close()\n                os.chmod(k210_converter_path, 0o775)\n\n        if 'edgetpu' in converter_type:\n            rc, out = subprocess.getstatusoutput('dpkg -l edgetpu-compiler')\n            if rc == 0:\n                print('Edge TPU Converter ready')\n            else:\n                print('Installing Edge TPU Converter')\n                cmd = \"bash install_edge_tpu_compiler.sh\"\n                result = run_command(cmd, cwd)\n                print(result)\n                \n        if 'openvino' in converter_type:\n            rc = os.path.isdir('/opt/intel/openvino')\n            if rc:\n                print('OpenVINO Converter ready')\n            else:\n                print('Installing OpenVINO Converter')\n                cmd = \"bash install_openvino.sh\"\n                result = run_command(cmd, cwd)\n                print(result)       \n                \n        if 'onnx' in converter_type:\n            try:\n                import tf2onnx\n            except:\n                cmd = \"pip install tf2onnx\"\n                result = run_command(cmd, cwd)\n                print(result)              \n                \n        self._converter_type = converter_type\n        self._backend = backend\n        self._dataset_path=dataset_path\n\n    def edgetpu_dataset_gen(self):\n        num_imgs = 300\n        image_files_list = []\n        from axelerate.networks.common_utils.feature import create_feature_extractor\n        backend = create_feature_extractor(self._backend, [self._img_size[0], self._img_size[1]])\n        image_search = lambda ext : glob.glob(self._dataset_path + ext, recursive=True)\n        for ext in ['/**/*.jpg', '/**/*.jpeg', '/**/*.png']: image_files_list.extend(image_search(ext))\n\n        for filename in image_files_list[:num_imgs]:\n            image = cv2.imread(filename)\n            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n            image = cv2.resize(image, (self._img_size[0], self._img_size[1]))\n            data = np.array(backend.normalize(image), dtype=np.float32)\n            data = np.expand_dims(data, 0)\n            yield [data]\n\n    def k210_dataset_gen(self):\n        num_imgs = 300\n        image_files_list = []\n        from axelerate.networks.common_utils.feature import create_feature_extractor\n        backend = create_feature_extractor(self._backend, [self._img_size[0], self._img_size[1]])\n        image_search = lambda ext : glob.glob(self._dataset_path + ext, recursive=True)\n        for ext in ['/**/*.jpg', '/**/*.jpeg', '/**/*.png']: image_files_list.extend(image_search(ext))\n        temp_folder = os.path.join(os.path.dirname(__file__),'tmp')\n        os.mkdir(temp_folder)\n        for filename in image_files_list[:num_imgs]:\n            image = cv2.imread(filename)\n            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n            image = cv2.resize(image, (self._img_size[0], self._img_size[1]))\n            data = np.array(backend.normalize(image), dtype=np.float32)\n            data = np.expand_dims(data, 0)\n            bin_filename = os.path.basename(filename).split('.')[0]+'.bin'\n            with open(os.path.join(temp_folder, bin_filename), \"wb\") as f: \n                data = np.transpose(data, [0, 3, 1, 2])\n                data.tofile(f)\n        return temp_folder\n\n    def convert_edgetpu(self, model_path):\n        output_path = os.path.dirname(model_path)\n        print(output_path)\n        cmd = \"edgetpu_compiler --out_dir {} {}\".format(output_path, model_path)\n        print(cmd)\n        result = run_command(cmd)\n        print(result)\n\n    def convert_k210(self, model_path):\n        folder_name = self.k210_dataset_gen()\n        output_name = os.path.basename(model_path).split(\".\")[0]+\".kmodel\"\n        output_path = os.path.join(os.path.dirname(model_path),output_name)\n        print(output_path)\n        cmd = '{} compile \"{}\" \"{}\" -i tflite --weights-quantize-threshold 1000 --dataset-format raw --dataset \"{}\"'.format(k210_converter_path, model_path, output_path, folder_name)\n        print(cmd)\n        result = run_command(cmd)\n        shutil.rmtree(folder_name, ignore_errors=True)\n        print(result)\n\n    def convert_ir(self, model_path, model_layers):\n        input_model = os.path.join(model_path.split(\".\")[0], \"saved_model.pb\")\n        output_dir = os.path.dirname(model_path)\n        output_layer = model_layers[-2].name+'/BiasAdd'\n        cmd = 'source /opt/intel/openvino/bin/setupvars.sh && python3 /opt/intel/openvino/deployment_tools/model_optimizer/mo.py --input_model \"{}\" --output {} --batch 1 --reverse_input_channels --data_type FP16 --mean_values [127.5,127.5,127.5] --scale_values [127.5] --output_dir \"{}\"'.format(input_model, output_layer, output_dir)\n        print(cmd)\n        result = run_command(cmd)\n        print(result)\n\n    def convert_oak(self, model_path):\n        output_name = model_path.split(\".\")[0]+\".blob\"\n        cmd = 'source /opt/intel/openvino/bin/setupvars.sh && /opt/intel/openvino/deployment_tools/inference_engine/lib/intel64/myriad_compile -m \"{}\" -o \"{}\" -ip U8 -VPU_MYRIAD_PLATFORM VPU_MYRIAD_2480 -VPU_NUMBER_OF_SHAVES 4 -VPU_NUMBER_OF_CMX_SLICES 4'.format(model_path.split(\".\")[0] + '.xml', output_name)\n        print(cmd)\n        result = run_command(cmd)\n        print(result)\n\n    def convert_onnx(self, model):\n        spec = (tf.TensorSpec((None, *self._img_size, 3), tf.float32, name=\"input\"),)\n        output_path = self.model_path.split(\".\")[0] + '.onnx'\n        model_proto, external_tensor_storage = tf2onnx.convert.from_keras(model, input_signature=spec, output_path = output_path)\n\n    def convert_tflite(self, model, model_layers, target=None):\n        model_type = model.name\n        model.summary()\n\n        if target=='k210': \n            if model_type == 'yolo' or model_type == 'segnet':\n                print(\"Converting to tflite without Reshape for K210 YOLO\")\n                if len(model.outputs) == 2:\n                    output1 = model.get_layer(name=\"detection_layer_1\").output\n                    output2 = model.get_layer(name=\"detection_layer_2\").output\n                    model = tf.keras.Model(inputs=model.input, outputs=[output1, output2])\n                else:\n                    model = tf.keras.Model(inputs=model.input, outputs=model.layers[-2].output)\n                    \n            model.input.set_shape(1 + model.input.shape[1:])\n            converter = tf.lite.TFLiteConverter.from_keras_model(model)\n\n        elif target == 'edgetpu':\n            converter = tf.lite.TFLiteConverter.from_keras_model(model)\n            converter.optimizations = [tf.lite.Optimize.DEFAULT]\n            converter.representative_dataset = self.edgetpu_dataset_gen\n            converter.target_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]\n            converter.inference_input_type = tf.uint8\n            converter.inference_output_type = tf.uint8\n\n        elif target == 'tflite_dynamic':\n            converter = tf.lite.TFLiteConverter.from_keras_model(model)\n            converter.optimizations = [tf.lite.Optimize.DEFAULT]\n            \n        elif target == 'tflite_fullint':\n            converter = tf.lite.TFLiteConverter.from_keras_model(model)\n            converter.optimizations = [tf.lite.Optimize.DEFAULT]            \n            converter.representative_dataset = self.edgetpu_dataset_gen\n            \n        else:\n            converter = tf.lite.TFLiteConverter.from_keras_model(model)\n\n        tflite_model = converter.convert()\n        open(os.path.join (self.model_path.split(\".\")[0] + '.tflite'), \"wb\").write(tflite_model)\n\n    def convert_model(self, model_path):\n        k.clear_session()\n        k.set_learning_phase(0)\n        model = tf.keras.models.load_model(model_path, compile=False)\n        model_layers = model.layers\n        self._img_size = model.input_shape[1:3]\n        self.model_path = os.path.abspath(model_path)\n\n        if 'k210' in self._converter_type:\n            self.convert_tflite(model, model_layers, 'k210')\n            self.convert_k210(self.model_path.split(\".\")[0] + '.tflite')\n\n        if 'edgetpu' in self._converter_type:\n            self.convert_tflite(model, model_layers, 'edgetpu')\n            self.convert_edgetpu(model_path.split(\".\")[0] + '.tflite')\n\n        if 'onnx' in self._converter_type:\n            self.convert_onnx(model)\n            \n        if 'openvino' in self._converter_type:\n            model.save(model_path.split(\".\")[0])\n            self.convert_ir(model_path, model_layers)\n            self.convert_oak(model_path)\n\n        if 'tflite' in self._converter_type:\n            self.convert_tflite(model, model_layers, self._converter_type)\n\nif __name__ == '__main__':\n    parser = argparse.ArgumentParser(description=\"Keras model conversion to .kmodel, .tflite, or .onnx\")\n    parser.add_argument(\"--model_path\", \"-m\", type=str, required=True,\n                        help=\"path to keras model\")\n    parser.add_argument(\"--converter_type\", type=str, default='k210',\n                        help=\"batch size\")\n    parser.add_argument(\"--dataset_path\", type=str, required=False,\n                        help=\"path to calibration dataset\")\n    parser.add_argument(\"--backend\", type=str, default='MobileNet7_5',\n                    help=\"network feature extractor, e.g. Mobilenet/YOLO/NASNet/etc\")                    \n    args = parser.parse_args()\n    converter = Converter(args.converter_type, args.backend, args.dataset_path)\n    converter.convert_model(args.model_path)\n"
  },
  {
    "path": "axelerate/networks/common_utils/feature.py",
    "content": "import tensorflow\nfrom tensorflow.keras.models import Model\nfrom tensorflow.keras.layers import Reshape, Activation, Conv2D, Input, MaxPooling2D, BatchNormalization, Flatten, Dense, Lambda, ZeroPadding2D\nfrom tensorflow.keras.layers import LeakyReLU\nfrom tensorflow.keras.layers import Concatenate\nfrom tensorflow.keras.applications import DenseNet121\nfrom tensorflow.keras.applications import NASNetMobile\nfrom tensorflow.keras.applications import ResNet50\n\nfrom .mobilenet_sipeed.mobilenet import MobileNet\n\ndef create_feature_extractor(architecture, input_size, weights = None):\n    \"\"\"\n    # Args\n        architecture : str\n        input_size : int\n\n    # Returns\n        feature_extractor : BaseFeatureExtractor instance\n    \"\"\"\n    if architecture == 'DenseNet121':\n        feature_extractor = DenseNet121Feature(input_size, weights)\n    elif architecture == 'SqueezeNet':\n        feature_extractor = SqueezeNetFeature(input_size, weights)\n    elif architecture == 'MobileNet1_0':\n        feature_extractor = MobileNetFeature(input_size, weights, alpha=1)\n    elif architecture == 'MobileNet7_5':\n        feature_extractor = MobileNetFeature(input_size, weights, alpha=0.75)\n    elif architecture == 'MobileNet5_0':\n        feature_extractor = MobileNetFeature(input_size, weights, alpha=0.5)\n    elif architecture == 'MobileNet2_5':\n        feature_extractor = MobileNetFeature(input_size, weights, alpha=0.25)\n    elif architecture == 'Full Yolo':\n        feature_extractor = FullYoloFeature(input_size, weights)\n    elif architecture == 'Tiny Yolo':\n        feature_extractor = TinyYoloFeature(input_size, weights)\n    elif architecture == 'NASNetMobile':\n        feature_extractor = NASNetMobileFeature(input_size, weights)\n    elif architecture == 'ResNet50':\n        feature_extractor = ResNet50Feature(input_size, weights)\n    else:\n        raise Exception('Architecture not supported! Name should be Full Yolo, Tiny Yolo, MobileNet1_0, MobileNet7_5, MobileNet5_0, MobileNet2_5, SqueezeNet, NASNetMobile, ResNet50 or DenseNet121')\n    return feature_extractor\n\n\n\nclass BaseFeatureExtractor(object):\n    \"\"\"docstring for ClassName\"\"\"\n\n    # to be defined in each subclass\n    def __init__(self, input_size):\n        raise NotImplementedError(\"error message\")\n\n    # to be defined in each subclass\n    def normalize(self, image):\n        raise NotImplementedError(\"error message\")       \n\n    def get_input_size(self):\n        input_shape = self.feature_extractor.get_input_shape_at(0)\n        assert input_shape[1] == input_shape[2]\n        return input_shape[1]\n\n    def get_output_size(self, layer = None):\n        if not layer:\n            output_shape = self.feature_extractor.outputs[0].shape\n        output_shape = self.feature_extractor.get_layer(layer).output.shape\n        return output_shape[1:3]\n\n    def get_output_tensor(self, layer):\n        return self.feature_extractor.get_layer(layer).output\n\n    def extract(self, input_image):\n        return self.feature_extractor(input_image)\n\nclass FullYoloFeature(BaseFeatureExtractor):\n    \"\"\"docstring for ClassName\"\"\"\n    def __init__(self, input_size, weights=None):\n        input_image = Input(shape=(input_size[0], input_size[1], 3))\n\n        # the function to implement the orgnization layer (thanks to github.com/allanzelener/YAD2K)\n        def space_to_depth_x2(x):\n            return tensorflow.nn.space_to_depth(x, block_size=2)\n\n        # Layer 1\n        x = Conv2D(32, (3,3), strides=(1,1), padding='same', name='conv_1', use_bias=False)(input_image)\n        x = BatchNormalization(name='norm_1')(x)\n        x = LeakyReLU(alpha=0.1)(x)\n        x = MaxPooling2D(pool_size=(2, 2))(x)\n\n        # Layer 2\n        x = Conv2D(64, (3,3), strides=(1,1), padding='same', name='conv_2', use_bias=False)(x)\n        x = BatchNormalization(name='norm_2')(x)\n        x = LeakyReLU(alpha=0.1)(x)\n        x = MaxPooling2D(pool_size=(2, 2))(x)\n\n        # Layer 3\n        x = Conv2D(128, (3,3), strides=(1,1), padding='same', name='conv_3', use_bias=False)(x)\n        x = BatchNormalization(name='norm_3')(x)\n        x = LeakyReLU(alpha=0.1)(x)\n\n        # Layer 4\n        x = Conv2D(64, (1,1), strides=(1,1), padding='same', name='conv_4', use_bias=False)(x)\n        x = BatchNormalization(name='norm_4')(x)\n        x = LeakyReLU(alpha=0.1)(x)\n\n        # Layer 5\n        x = Conv2D(128, (3,3), strides=(1,1), padding='same', name='conv_5', use_bias=False)(x)\n        x = BatchNormalization(name='norm_5')(x)\n        x = LeakyReLU(alpha=0.1)(x)\n        x = MaxPooling2D(pool_size=(2, 2))(x)\n\n        # Layer 6\n        x = Conv2D(256, (3,3), strides=(1,1), padding='same', name='conv_6', use_bias=False)(x)\n        x = BatchNormalization(name='norm_6')(x)\n        x = LeakyReLU(alpha=0.1)(x)\n\n        # Layer 7\n        x = Conv2D(128, (1,1), strides=(1,1), padding='same', name='conv_7', use_bias=False)(x)\n        x = BatchNormalization(name='norm_7')(x)\n        x = LeakyReLU(alpha=0.1)(x)\n\n        # Layer 8\n        x = Conv2D(256, (3,3), strides=(1,1), padding='same', name='conv_8', use_bias=False)(x)\n        x = BatchNormalization(name='norm_8')(x)\n        x = LeakyReLU(alpha=0.1)(x)\n        x = MaxPooling2D(pool_size=(2, 2))(x)\n\n        # Layer 9\n        x = Conv2D(512, (3,3), strides=(1,1), padding='same', name='conv_9', use_bias=False)(x)\n        x = BatchNormalization(name='norm_9')(x)\n        x = LeakyReLU(alpha=0.1)(x)\n\n        # Layer 10\n        x = Conv2D(256, (1,1), strides=(1,1), padding='same', name='conv_10', use_bias=False)(x)\n        x = BatchNormalization(name='norm_10')(x)\n        x = LeakyReLU(alpha=0.1)(x)\n\n        # Layer 11\n        x = Conv2D(512, (3,3), strides=(1,1), padding='same', name='conv_11', use_bias=False)(x)\n        x = BatchNormalization(name='norm_11')(x)\n        x = LeakyReLU(alpha=0.1)(x)\n\n        # Layer 12\n        x = Conv2D(256, (1,1), strides=(1,1), padding='same', name='conv_12', use_bias=False)(x)\n        x = BatchNormalization(name='norm_12')(x)\n        x = LeakyReLU(alpha=0.1)(x)\n\n        # Layer 13\n        x = Conv2D(512, (3,3), strides=(1,1), padding='same', name='conv_13', use_bias=False)(x)\n        x = BatchNormalization(name='norm_13')(x)\n        x = LeakyReLU(alpha=0.1)(x)\n\n        skip_connection = x\n\n        x = MaxPooling2D(pool_size=(2, 2))(x)\n\n        # Layer 14\n        x = Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_14', use_bias=False)(x)\n        x = BatchNormalization(name='norm_14')(x)\n        x = LeakyReLU(alpha=0.1)(x)\n\n        # Layer 15\n        x = Conv2D(512, (1,1), strides=(1,1), padding='same', name='conv_15', use_bias=False)(x)\n        x = BatchNormalization(name='norm_15')(x)\n        x = LeakyReLU(alpha=0.1)(x)\n\n        # Layer 16\n        x = Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_16', use_bias=False)(x)\n        x = BatchNormalization(name='norm_16')(x)\n        x = LeakyReLU(alpha=0.1)(x)\n\n        # Layer 17\n        x = Conv2D(512, (1,1), strides=(1,1), padding='same', name='conv_17', use_bias=False)(x)\n        x = BatchNormalization(name='norm_17')(x)\n        x = LeakyReLU(alpha=0.1)(x)\n\n        # Layer 18\n        x = Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_18', use_bias=False)(x)\n        x = BatchNormalization(name='norm_18')(x)\n        x = LeakyReLU(alpha=0.1)(x)\n\n        # Layer 19\n        x = Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_19', use_bias=False)(x)\n        x = BatchNormalization(name='norm_19')(x)\n        x = LeakyReLU(alpha=0.1)(x)\n\n        # Layer 20\n        x = Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_20', use_bias=False)(x)\n        x = BatchNormalization(name='norm_20')(x)\n        x = LeakyReLU(alpha=0.1)(x)\n\n        # Layer 21\n        skip_connection = Conv2D(64, (1,1), strides=(1,1), padding='same', name='conv_21', use_bias=False)(skip_connection)\n        skip_connection = BatchNormalization(name='norm_21')(skip_connection)\n        skip_connection = LeakyReLU(alpha=0.1)(skip_connection)\n        skip_connection = Lambda(space_to_depth_x2)(skip_connection)\n\n        x = Concatenate()([skip_connection, x])\n\n        # Layer 22\n        x = Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_22', use_bias=False)(x)\n        x = BatchNormalization(name='norm_22')(x)\n        x = LeakyReLU(alpha=0.1)(x)\n\n        self.feature_extractor = Model(input_image, x)\n\n        if weights == 'imagenet':\n            print('Imagenet for YOLO backend are not available yet, defaulting to random weights')\n        elif weights == None:\n            pass\n        else:\n            print('Loaded backend weigths: '+weights)\n            self.feature_extractor.load_weights(weights)\n\n    def normalize(self, image):\n        return image / 255.\n\nclass TinyYoloFeature(BaseFeatureExtractor):\n    \"\"\"docstring for ClassName\"\"\"\n    def __init__(self, input_size, weights):\n        input_image = Input(shape=(input_size[0], input_size[1], 3))\n\n        # Layer 1\n        x = Conv2D(16, (3,3), strides=(1,1), padding='same', name='conv_1', use_bias=False)(input_image)\n        x = BatchNormalization(name='norm_1')(x)\n        x = LeakyReLU(alpha=0.1)(x)\n        x = MaxPooling2D(pool_size=(2, 2))(x)\n\n        # Layer 2 - 5\n        for i in range(0,4):\n            x = Conv2D(24*(2**i), (3,3), strides=(1,1), padding='same', name='conv_' + str(i+2), use_bias=False)(x)\n            x = BatchNormalization(name='norm_' + str(i+2))(x)\n            x = LeakyReLU(alpha=0.1)(x)\n            x = MaxPooling2D(pool_size=(2, 2))(x)\n\n        # Layer 6\n        x = Conv2D(256, (3,3), strides=(1,1), padding='same', name='conv_6', use_bias=False)(x)\n        x = BatchNormalization(name='norm_6')(x)\n        x = LeakyReLU(alpha=0.1)(x)\n        x = MaxPooling2D(pool_size=(2, 2), strides=(1,1), padding='same')(x)\n\n        # Layer 7 - 8\n        for i in range(0,2):\n            x = Conv2D(312, (3,3), strides=(1,1), padding='same', name='conv_' + str(i+7), use_bias=False)(x)\n            x = BatchNormalization(name='norm_' + str(i+7))(x)\n            x = LeakyReLU(alpha=0.1)(x)\n\n        self.feature_extractor = Model(input_image, x)\n\n        if weights == 'imagenet':\n            print('Imagenet for YOLO backend are not available yet, defaulting to random weights')\n        elif weights == None:\n            pass\n        else:\n            print('Loaded backend weigths: '+weights)\n            self.feature_extractor.load_weights(weights)\n\n\n    def normalize(self, image):\n        return image / 255.\n\nclass MobileNetFeature(BaseFeatureExtractor):\n    \"\"\"docstring for ClassName\"\"\"\n    def __init__(self, input_size, weights, alpha):\n        input_image = Input(shape=(input_size[0], input_size[1], 3))\n        input_shapes_imagenet = [(128, 128,3), (160, 160,3), (192, 192,3), (224, 224,3)]\n        input_shape =(128,128,3)\n        for item in input_shapes_imagenet:\n            if item[0] <= input_size[0]:\n                input_shape = item\n\n        if weights == 'imagenet':\n            mobilenet = MobileNet(input_shape=input_shape, input_tensor=input_image, alpha = alpha, weights = 'imagenet', include_top=False, backend=tensorflow.keras.backend, layers=tensorflow.keras.layers, models=tensorflow.keras.models, utils=tensorflow.keras.utils)\n            print('Successfully loaded imagenet backend weights')\n        else:\n            mobilenet = MobileNet(input_shape=(input_size[0],input_size[1],3),alpha = alpha,depth_multiplier = 1, dropout = 0.001, weights = None, include_top=False, backend=tensorflow.keras.backend, layers=tensorflow.keras.layers,models=tensorflow.keras.models,utils=tensorflow.keras.utils)\n            if weights:\n                print('Loaded backend weigths: '+weights)\n                mobilenet.load_weights(weights)\n\n        #x = mobilenet(input_image)\n        self.feature_extractor = mobilenet\n\n    def normalize(self, image):\n        image = image / 255.\n        image = image - 0.5\n        image = image * 2.\n\n        return image\t\t\n\nclass SqueezeNetFeature(BaseFeatureExtractor):\n    \"\"\"docstring for ClassName\"\"\"\n    def __init__(self, input_size, weights):\n\n        # define some auxiliary variables and the fire module\n        sq1x1  = \"squeeze1x1\"\n        exp1x1 = \"expand1x1\"\n        exp3x3 = \"expand3x3\"\n        relu   = \"relu_\"\n\n        def fire_module(x, fire_id, squeeze=16, expand=64):\n            s_id = 'fire' + str(fire_id) + '/'\n            x = Conv2D(squeeze, (1, 1), padding='valid', name=s_id + sq1x1)(x)\n            x = Activation('relu', name=s_id + relu + sq1x1)(x)\n\n            left = Conv2D(expand,  (1, 1), padding='valid', name=s_id + exp1x1)(x)\n            left = Activation('relu', name=s_id + relu + exp1x1)(left)\n\n            right = Conv2D(expand,  (3, 3), padding='same',  name=s_id + exp3x3)(x)\n            right = Activation('relu', name=s_id + relu + exp3x3)(right)\n\n            x = Concatenate(axis=3, name=s_id + 'concat')([left, right])\n\n            return x\n\n        # define the model of SqueezeNet\n        input_image = Input(shape=(input_size[0], input_size[1], 3))\n        x = ZeroPadding2D(padding=((1, 1), (1, 1)), name='pad')(input_image)\n        x = Conv2D(64, (3, 3), strides=(2, 2), padding='valid', name='conv1')(x)\n        x = Activation('relu', name='relu_conv1')(x)\n        x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), name='pool1')(x)\n\n        x = fire_module(x, fire_id=2, squeeze=16, expand=64)\n        x = fire_module(x, fire_id=3, squeeze=16, expand=64)\n        x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), name='pool3')(x)\n\n        x = fire_module(x, fire_id=4, squeeze=32, expand=128)\n        x = fire_module(x, fire_id=5, squeeze=32, expand=128)\n        x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), name='pool5')(x)\n\n        x = fire_module(x, fire_id=6, squeeze=48, expand=192)\n        x = fire_module(x, fire_id=7, squeeze=48, expand=192)\n        x = fire_module(x, fire_id=8, squeeze=64, expand=256)\n        x = fire_module(x, fire_id=9, squeeze=64, expand=256)\n\n        self.feature_extractor = Model(input_image, x)  \n        \n        if weights == 'imagenet':\n            print('Imagenet for SqueezeNet backend are not available yet, defaulting to random weights')\n        elif weights == None:\n            pass\n        else:\n            print('Loaded backend weigths: '+ weights)\n            self.feature_extractor.load_weights(weights)\n\n\n    def normalize(self, image):\n        image = image[..., ::-1]\n        image = image.astype('float')\n\n        image[..., 0] -= 103.939\n        image[..., 1] -= 116.779\n        image[..., 2] -= 123.68\n\n        return image    \n\nclass DenseNet121Feature(BaseFeatureExtractor):\n    \"\"\"docstring for ClassName\"\"\"\n    def __init__(self, input_size, weights):\n        input_image = Input(shape=(input_size[0], input_size[1], 3))\n\n        if weights == 'imagenet':\n            densenet = DenseNet121(input_tensor=input_image, include_top=False, weights='imagenet', pooling=None)\n            print('Successfully loaded imagenet backend weights')\n        else:\n            densenet = DenseNet121(input_tensor=input_image, include_top=False, weights=None, pooling=None)\n            if weights:\n                densenet.load_weights(weights)\n                print('Loaded backend weigths: ' + weights)\n\n        self.feature_extractor = densenet\n\n    def normalize(self, image):\n        from tensorflow.keras.applications.densenet import preprocess_input\n        return preprocess_input(image)\n\nclass NASNetMobileFeature(BaseFeatureExtractor):\n    \"\"\"docstring for ClassName\"\"\"\n    def __init__(self, input_size, weights):\n        input_image = Input(shape=(input_size[0], input_size[1], 3))\n\n        if weights == 'imagenet':\n            nasnetmobile = NASNetMobile(input_tensor=input_image, include_top=False, weights='imagenet', pooling=None)\n            print('Successfully loaded imagenet backend weights')\n        else:\n            nasnetmobile = NASNetMobile(input_tensor=input_image, include_top=False, weights=None, pooling=None)\n            if weights:\n                nasnetmobile.load_weights(weights)\n                print('Loaded backend weigths: ' + weights)\n        self.feature_extractor = nasnetmobile\n\n    def normalize(self, image):\n        from tensorflow.keras.applications.nasnet import preprocess_input\n        return preprocess_input(image)\n\nclass ResNet50Feature(BaseFeatureExtractor):\n    \"\"\"docstring for ClassName\"\"\"\n    def __init__(self, input_size, weights):\n        input_image = Input(shape=(input_size[0], input_size[1], 3))\n\n        if weights == 'imagenet':\n            resnet50 = ResNet50(input_tensor=input_image, weights='imagenet', include_top=False, pooling = None)\n            print('Successfully loaded imagenet backend weights')\n        else:\n            resnet50 = ResNet50(input_tensor=input_image, include_top=False, pooling = None)\n            if weights:\n                resnet50.load_weights(weights)\n                print('Loaded backend weigths: ' + weights)\n\n        self.feature_extractor = resnet50\n\n    def normalize(self, image):\n        image = image[..., ::-1]\n        image = image.astype('float')\n\n        image[..., 0] -= 103.939\n        image[..., 1] -= 116.779\n        image[..., 2] -= 123.68\n\n        return image \n"
  },
  {
    "path": "axelerate/networks/common_utils/fit.py",
    "content": "import shutil\nimport os\nimport time\nimport tensorflow as tf\nimport numpy as np\nimport warnings\n\nfrom axelerate.networks.common_utils.callbacks import WarmUpCosineDecayScheduler\nfrom axelerate.networks.yolo.backend.utils.custom import MergeMetrics\nfrom tensorflow.keras.optimizers import SGD\nfrom tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint\nfrom datetime import datetime\n\ndef train(model,\n         loss_func,\n         train_batch_gen,\n         valid_batch_gen,\n         learning_rate = 1e-4,\n         nb_epoch = 300,\n         project_folder = 'project',\n         first_trainable_layer = None,\n         metric=None,\n         metric_name=\"val_loss\"):\n    \"\"\"A function that performs training on a general keras model.\n\n    # Args\n        model : keras.models.Model instance\n        loss_func : function\n            refer to https://keras.io/losses/\n\n        train_batch_gen : keras.utils.Sequence instance\n        valid_batch_gen : keras.utils.Sequence instance\n        learning_rate : float\n        saved_weights_name : str\n    \"\"\"\n\n    # Create project directory\n    train_start = time.time()\n    train_date = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')\n    path = os.path.join(project_folder, train_date)\n    basename = model.name + \"_best_\"+ metric_name\n    print('Current training session folder is {}'.format(path))\n    os.makedirs(path)\n    save_weights_name = os.path.join(path, basename + '.h5')\n    save_weights_name_ctrlc = os.path.join(path, basename + '_ctrlc.h5')\n    print('\\n')\n\n    # 1 Freeze layers\n    layer_names = [layer.name for layer in model.layers]\n    fixed_layers = []\n    if first_trainable_layer in layer_names:\n        for layer in model.layers:\n            if layer.name == first_trainable_layer:\n                break\n            layer.trainable = False\n            fixed_layers.append(layer.name)\n    elif not first_trainable_layer:\n        pass\n    else:\n        print('First trainable layer specified in config file is not in the model. Did you mean one of these?')\n        for i,layer in enumerate(model.layers):\n            print(i,layer.name)\n        raise Exception('First trainable layer specified in config file is not in the model')\n\n    if fixed_layers != []:\n        print(\"The following layers do not update weights!!!\")\n        print(\"    \", fixed_layers)\n\n    # 2 create optimizer\n    optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)\n\n    if not metric:\n        metric = metric_name\n    else:\n        metric = metric[metric_name]\n\n    print(metric)    \n    # 3. create loss function\n    model.compile(loss=loss_func, optimizer=optimizer, metrics=metric if metric != 'loss' else None)\n    model.summary()\n\n    #4 create callbacks   \n    \n    tensorboard_callback = tf.keras.callbacks.TensorBoard(\"logs\", histogram_freq=1)\n    \n    warm_up_lr = WarmUpCosineDecayScheduler(learning_rate_base=learning_rate,\n                                            total_steps=len(train_batch_gen)*nb_epoch,\n                                            warmup_learning_rate=0.0,\n                                            warmup_steps=len(train_batch_gen)*min(3, nb_epoch-1),\n                                            hold_base_rate_steps=0,\n                                            verbose=1)\n\n    if metric_name in ['recall', 'precision']:\n        mergedMetric = MergeMetrics(model, metric_name, 1, True, save_weights_name, tensorboard_callback)\n        callbacks = [mergedMetric, warm_up_lr, tensorboard_callback]  \n    else:  \n        early_stop = EarlyStopping(monitor='val_' + metric, \n                                min_delta=0.001, \n                                patience=20, \n                                mode='auto', \n                                verbose=2,\n                                restore_best_weights=True)\n                       \n        checkpoint = ModelCheckpoint(save_weights_name, \n                                 monitor='val_' + metric, \n                                 verbose=2, \n                                 save_best_only=True, \n                                 mode='auto', \n                                 period=1)\n                                 \n        reduce_lr = ReduceLROnPlateau(monitor='val_' + metric,\n                                factor=0.2,\n                                patience=10,\n                                min_lr=1e-6,\n                                mode='auto',\n                                verbose=2)   \n        callbacks = [early_stop, checkpoint, warm_up_lr, tensorboard_callback] \n\n    # 4. training\n    try:\n        model.fit(train_batch_gen,\n                steps_per_epoch  = len(train_batch_gen), \n                epochs           = nb_epoch,\n                validation_data  = valid_batch_gen,\n                validation_steps = len(valid_batch_gen),\n                callbacks        = callbacks,                        \n                verbose          = 1,\n                workers          = 4,\n                max_queue_size   = 10,\n                use_multiprocessing = True)\n    except KeyboardInterrupt:\n        print(\"Saving model and copying logs\")\n        model.save(save_weights_name_ctrlc, overwrite=True, include_optimizer=False)\n        shutil.copytree(\"logs\", os.path.join(path, \"logs\"))  \n        return model.layers, save_weights_name_ctrlc \n        \n    shutil.copytree(\"logs\", os.path.join(path, \"logs\"))\n    _print_time(time.time()-train_start)\n    return model.layers, save_weights_name\n\ndef _print_time(process_time):\n    if process_time < 60:\n        print(\"{:d}-seconds to train\".format(int(process_time)))\n    else:\n        print(\"{:d}-mins to train\".format(int(process_time/60)))\n\n"
  },
  {
    "path": "axelerate/networks/common_utils/install_edge_tpu_compiler.sh",
    "content": "wget https://packages.cloud.google.com/apt/doc/apt-key.gpg \n\nsudo apt-key add apt-key.gpg &&\n\necho \"deb https://packages.cloud.google.com/apt coral-edgetpu-stable main\" | sudo tee /etc/apt/sources.list.d/coral-edgetpu.list\n\nsudo apt-get update && sudo apt-get install -y edgetpu-compiler &&\n\nrm apt-key.gpg\n"
  },
  {
    "path": "axelerate/networks/common_utils/install_openvino.sh",
    "content": "sudo apt-get install -y pciutils cpio &&\nwget http://registrationcenter-download.intel.com/akdlm/irc_nas/16345/l_openvino_toolkit_p_2020.1.023.tgz &&\ntar xf l_openvino_toolkit_p_2020.1.023.tgz &&\ncd l_openvino_toolkit_p_2020.1.023 && \nsudo -E ./install_openvino_dependencies.sh && \nsed -i 's/decline/accept/g' silent.cfg && \nsudo -E ./install.sh --silent silent.cfg\n"
  },
  {
    "path": "axelerate/networks/common_utils/mobilenet_sipeed/__init__.py",
    "content": "\"\"\"Enables dynamic setting of underlying Keras module.\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n_KERAS_BACKEND = None\n_KERAS_LAYERS = None\n_KERAS_MODELS = None\n_KERAS_UTILS = None\n\n\ndef set_keras_submodules(backend=None,\n                         layers=None,\n                         models=None,\n                         utils=None,\n                         engine=None):\n    # Deprecated, will be removed in the future.\n    global _KERAS_BACKEND\n    global _KERAS_LAYERS\n    global _KERAS_MODELS\n    global _KERAS_UTILS\n    _KERAS_BACKEND = backend\n    _KERAS_LAYERS = layers\n    _KERAS_MODELS = models\n    _KERAS_UTILS = utils\n\n\ndef get_keras_submodule(name):\n    # Deprecated, will be removed in the future.\n    if name not in {'backend', 'layers', 'models', 'utils'}:\n        raise ImportError(\n            'Can only retrieve one of \"backend\", '\n            '\"layers\", \"models\", or \"utils\". '\n            'Requested: %s' % name)\n    if _KERAS_BACKEND is None:\n        raise ImportError('You need to first `import keras` '\n                          'in order to use `keras_applications`. '\n                          'For instance, you can do:\\n\\n'\n                          '```\\n'\n                          'import keras\\n'\n                          'from keras_applications import vgg16\\n'\n                          '```\\n\\n'\n                          'Or, preferably, this equivalent formulation:\\n\\n'\n                          '```\\n'\n                          'from keras import applications\\n'\n                          '```\\n')\n    if name == 'backend':\n        return _KERAS_BACKEND\n    elif name == 'layers':\n        return _KERAS_LAYERS\n    elif name == 'models':\n        return _KERAS_MODELS\n    elif name == 'utils':\n        return _KERAS_UTILS\n\n\ndef get_submodules_from_kwargs(kwargs):\n    backend = kwargs.get('backend', _KERAS_BACKEND)\n    layers = kwargs.get('layers', _KERAS_LAYERS)\n    models = kwargs.get('models', _KERAS_MODELS)\n    utils = kwargs.get('utils', _KERAS_UTILS)\n    for key in kwargs.keys():\n        if key not in ['backend', 'layers', 'models', 'utils']:\n            raise TypeError('Invalid keyword argument: %s', key)\n    return backend, layers, models, utils\n\n\ndef correct_pad(backend, inputs, kernel_size):\n    \"\"\"Returns a tuple for zero-padding for 2D convolution with downsampling.\n\n    # Arguments\n        input_size: An integer or tuple/list of 2 integers.\n        kernel_size: An integer or tuple/list of 2 integers.\n\n    # Returns\n        A tuple.\n    \"\"\"\n    img_dim = 2 if backend.image_data_format() == 'channels_first' else 1\n    input_size = backend.int_shape(inputs)[img_dim:(img_dim + 2)]\n\n    if isinstance(kernel_size, int):\n        kernel_size = (kernel_size, kernel_size)\n\n    if input_size[0] is None:\n        adjust = (1, 1)\n    else:\n        adjust = (1 - input_size[0] % 2, 1 - input_size[1] % 2)\n\n    correct = (kernel_size[0] // 2, kernel_size[1] // 2)\n\n    return ((correct[0] - adjust[0], correct[0]),\n            (correct[1] - adjust[1], correct[1]))\n\n__version__ = '1.0.7'\n\n\nfrom . import mobilenet\n\n"
  },
  {
    "path": "axelerate/networks/common_utils/mobilenet_sipeed/imagenet_utils.py",
    "content": "\"\"\"Utilities for ImageNet data preprocessing & prediction decoding.\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport json\nimport warnings\nimport numpy as np\n\nfrom . import get_submodules_from_kwargs\n\nCLASS_INDEX = None\nCLASS_INDEX_PATH = ('https://s3.amazonaws.com/deep-learning-models/'\n                    'image-models/imagenet_class_index.json')\n\n# Global tensor of imagenet mean for preprocessing symbolic inputs\n_IMAGENET_MEAN = None\n\n\ndef _preprocess_numpy_input(x, data_format, mode, **kwargs):\n    \"\"\"Preprocesses a Numpy array encoding a batch of images.\n\n    # Arguments\n        x: Input array, 3D or 4D.\n        data_format: Data format of the image array.\n        mode: One of \"caffe\", \"tf\" or \"torch\".\n            - caffe: will convert the images from RGB to BGR,\n                then will zero-center each color channel with\n                respect to the ImageNet dataset,\n                without scaling.\n            - tf: will scale pixels between -1 and 1,\n                sample-wise.\n            - torch: will scale pixels between 0 and 1 and then\n                will normalize each channel with respect to the\n                ImageNet dataset.\n\n    # Returns\n        Preprocessed Numpy array.\n    \"\"\"\n    backend, _, _, _ = get_submodules_from_kwargs(kwargs)\n    if not issubclass(x.dtype.type, np.floating):\n        x = x.astype(backend.floatx(), copy=False)\n\n    if mode == 'tf':\n        x /= 127.5\n        x -= 1.\n        return x\n\n    if mode == 'torch':\n        x /= 255.\n        mean = [0.485, 0.456, 0.406]\n        std = [0.229, 0.224, 0.225]\n    else:\n        if data_format == 'channels_first':\n            # 'RGB'->'BGR'\n            if x.ndim == 3:\n                x = x[::-1, ...]\n            else:\n                x = x[:, ::-1, ...]\n        else:\n            # 'RGB'->'BGR'\n            x = x[..., ::-1]\n        mean = [103.939, 116.779, 123.68]\n        std = None\n\n    # Zero-center by mean pixel\n    if data_format == 'channels_first':\n        if x.ndim == 3:\n            x[0, :, :] -= mean[0]\n            x[1, :, :] -= mean[1]\n            x[2, :, :] -= mean[2]\n            if std is not None:\n                x[0, :, :] /= std[0]\n                x[1, :, :] /= std[1]\n                x[2, :, :] /= std[2]\n        else:\n            x[:, 0, :, :] -= mean[0]\n            x[:, 1, :, :] -= mean[1]\n            x[:, 2, :, :] -= mean[2]\n            if std is not None:\n                x[:, 0, :, :] /= std[0]\n                x[:, 1, :, :] /= std[1]\n                x[:, 2, :, :] /= std[2]\n    else:\n        x[..., 0] -= mean[0]\n        x[..., 1] -= mean[1]\n        x[..., 2] -= mean[2]\n        if std is not None:\n            x[..., 0] /= std[0]\n            x[..., 1] /= std[1]\n            x[..., 2] /= std[2]\n    return x\n\n\ndef _preprocess_symbolic_input(x, data_format, mode, **kwargs):\n    \"\"\"Preprocesses a tensor encoding a batch of images.\n\n    # Arguments\n        x: Input tensor, 3D or 4D.\n        data_format: Data format of the image tensor.\n        mode: One of \"caffe\", \"tf\" or \"torch\".\n            - caffe: will convert the images from RGB to BGR,\n                then will zero-center each color channel with\n                respect to the ImageNet dataset,\n                without scaling.\n            - tf: will scale pixels between -1 and 1,\n                sample-wise.\n            - torch: will scale pixels between 0 and 1 and then\n                will normalize each channel with respect to the\n                ImageNet dataset.\n\n    # Returns\n        Preprocessed tensor.\n    \"\"\"\n    global _IMAGENET_MEAN\n\n    backend, _, _, _ = get_submodules_from_kwargs(kwargs)\n\n    if mode == 'tf':\n        x /= 127.5\n        x -= 1.\n        return x\n\n    if mode == 'torch':\n        x /= 255.\n        mean = [0.485, 0.456, 0.406]\n        std = [0.229, 0.224, 0.225]\n    else:\n        if data_format == 'channels_first':\n            # 'RGB'->'BGR'\n            if backend.ndim(x) == 3:\n                x = x[::-1, ...]\n            else:\n                x = x[:, ::-1, ...]\n        else:\n            # 'RGB'->'BGR'\n            x = x[..., ::-1]\n        mean = [103.939, 116.779, 123.68]\n        std = None\n\n    if _IMAGENET_MEAN is None:\n        _IMAGENET_MEAN = backend.constant(-np.array(mean))\n\n    # Zero-center by mean pixel\n    if backend.dtype(x) != backend.dtype(_IMAGENET_MEAN):\n        x = backend.bias_add(\n            x, backend.cast(_IMAGENET_MEAN, backend.dtype(x)),\n            data_format=data_format)\n    else:\n        x = backend.bias_add(x, _IMAGENET_MEAN, data_format)\n    if std is not None:\n        x /= std\n    return x\n\n\ndef preprocess_input(x, data_format=None, mode='caffe', **kwargs):\n    \"\"\"Preprocesses a tensor or Numpy array encoding a batch of images.\n\n    # Arguments\n        x: Input Numpy or symbolic tensor, 3D or 4D.\n            The preprocessed data is written over the input data\n            if the data types are compatible. To avoid this\n            behaviour, `numpy.copy(x)` can be used.\n        data_format: Data format of the image tensor/array.\n        mode: One of \"caffe\", \"tf\" or \"torch\".\n            - caffe: will convert the images from RGB to BGR,\n                then will zero-center each color channel with\n                respect to the ImageNet dataset,\n                without scaling.\n            - tf: will scale pixels between -1 and 1,\n                sample-wise.\n            - torch: will scale pixels between 0 and 1 and then\n                will normalize each channel with respect to the\n                ImageNet dataset.\n\n    # Returns\n        Preprocessed tensor or Numpy array.\n\n    # Raises\n        ValueError: In case of unknown `data_format` argument.\n    \"\"\"\n    backend, _, _, _ = get_submodules_from_kwargs(kwargs)\n\n    if data_format is None:\n        data_format = backend.image_data_format()\n    if data_format not in {'channels_first', 'channels_last'}:\n        raise ValueError('Unknown data_format ' + str(data_format))\n\n    if isinstance(x, np.ndarray):\n        return _preprocess_numpy_input(x, data_format=data_format,\n                                       mode=mode, **kwargs)\n    else:\n        return _preprocess_symbolic_input(x, data_format=data_format,\n                                          mode=mode, **kwargs)\n\n\ndef decode_predictions(preds, top=5, **kwargs):\n    \"\"\"Decodes the prediction of an ImageNet model.\n\n    # Arguments\n        preds: Numpy tensor encoding a batch of predictions.\n        top: Integer, how many top-guesses to return.\n\n    # Returns\n        A list of lists of top class prediction tuples\n        `(class_name, class_description, score)`.\n        One list of tuples per sample in batch input.\n\n    # Raises\n        ValueError: In case of invalid shape of the `pred` array\n            (must be 2D).\n    \"\"\"\n    global CLASS_INDEX\n\n    backend, _, _, keras_utils = get_submodules_from_kwargs(kwargs)\n\n    if len(preds.shape) != 2 or preds.shape[1] != 1000:\n        raise ValueError('`decode_predictions` expects '\n                         'a batch of predictions '\n                         '(i.e. a 2D array of shape (samples, 1000)). '\n                         'Found array with shape: ' + str(preds.shape))\n    if CLASS_INDEX is None:\n        fpath = keras_utils.get_file(\n            'imagenet_class_index.json',\n            CLASS_INDEX_PATH,\n            cache_subdir='models',\n            file_hash='c2c37ea517e94d9795004a39431a14cb')\n        with open(fpath) as f:\n            CLASS_INDEX = json.load(f)\n    results = []\n    for pred in preds:\n        top_indices = pred.argsort()[-top:][::-1]\n        result = [tuple(CLASS_INDEX[str(i)]) + (pred[i],) for i in top_indices]\n        result.sort(key=lambda x: x[2], reverse=True)\n        results.append(result)\n    return results\n\n\ndef _obtain_input_shape(input_shape,\n                        default_size,\n                        min_size,\n                        data_format,\n                        require_flatten,\n                        weights=None):\n    \"\"\"Internal utility to compute/validate a model's input shape.\n\n    # Arguments\n        input_shape: Either None (will return the default network input shape),\n            or a user-provided shape to be validated.\n        default_size: Default input width/height for the model.\n        min_size: Minimum input width/height accepted by the model.\n        data_format: Image data format to use.\n        require_flatten: Whether the model is expected to\n            be linked to a classifier via a Flatten layer.\n        weights: One of `None` (random initialization)\n            or 'imagenet' (pre-training on ImageNet).\n            If weights='imagenet' input channels must be equal to 3.\n\n    # Returns\n        An integer shape tuple (may include None entries).\n\n    # Raises\n        ValueError: In case of invalid argument values.\n    \"\"\"\n    if weights != 'imagenet' and input_shape and len(input_shape) == 3:\n        if data_format == 'channels_first':\n            if input_shape[0] not in {1, 3}:\n                warnings.warn(\n                    'This model usually expects 1 or 3 input channels. '\n                    'However, it was passed an input_shape with ' +\n                    str(input_shape[0]) + ' input channels.')\n            default_shape = (input_shape[0], default_size, default_size)\n        else:\n            if input_shape[-1] not in {1, 3}:\n                warnings.warn(\n                    'This model usually expects 1 or 3 input channels. '\n                    'However, it was passed an input_shape with ' +\n                    str(input_shape[-1]) + ' input channels.')\n            default_shape = (default_size, default_size, input_shape[-1])\n    else:\n        if data_format == 'channels_first':\n            default_shape = (3, default_size, default_size)\n        else:\n            default_shape = (default_size, default_size, 3)\n    if weights == 'imagenet' and require_flatten:\n        if input_shape is not None:\n            if input_shape != default_shape:\n                raise ValueError('When setting `include_top=True` '\n                                 'and loading `imagenet` weights, '\n                                 '`input_shape` should be ' +\n                                 str(default_shape) + '.')\n        return default_shape\n    if input_shape:\n        if data_format == 'channels_first':\n            if input_shape is not None:\n                if len(input_shape) != 3:\n                    raise ValueError(\n                        '`input_shape` must be a tuple of three integers.')\n                if input_shape[0] != 3 and weights == 'imagenet':\n                    raise ValueError('The input must have 3 channels; got '\n                                     '`input_shape=' + str(input_shape) + '`')\n                if ((input_shape[1] is not None and input_shape[1] < min_size) or\n                   (input_shape[2] is not None and input_shape[2] < min_size)):\n                    raise ValueError('Input size must be at least ' +\n                                     str(min_size) + 'x' + str(min_size) +\n                                     '; got `input_shape=' +\n                                     str(input_shape) + '`')\n        else:\n            if input_shape is not None:\n                if len(input_shape) != 3:\n                    raise ValueError(\n                        '`input_shape` must be a tuple of three integers.')\n                if input_shape[-1] != 3 and weights == 'imagenet':\n                    raise ValueError('The input must have 3 channels; got '\n                                     '`input_shape=' + str(input_shape) + '`')\n                if ((input_shape[0] is not None and input_shape[0] < min_size) or\n                   (input_shape[1] is not None and input_shape[1] < min_size)):\n                    raise ValueError('Input size must be at least ' +\n                                     str(min_size) + 'x' + str(min_size) +\n                                     '; got `input_shape=' +\n                                     str(input_shape) + '`')\n    else:\n        if require_flatten:\n            input_shape = default_shape\n        else:\n            if data_format == 'channels_first':\n                input_shape = (3, None, None)\n            else:\n                input_shape = (None, None, 3)\n    if require_flatten:\n        if None in input_shape:\n            raise ValueError('If `include_top` is True, '\n                             'you should specify a static `input_shape`. '\n                             'Got `input_shape=' + str(input_shape) + '`')\n    return input_shape\n"
  },
  {
    "path": "axelerate/networks/common_utils/mobilenet_sipeed/mobilenet.py",
    "content": "\"\"\"MobileNet v1 models for Keras.\n\nMobileNet is a general architecture and can be used for multiple use cases.\nDepending on the use case, it can use different input layer size and\ndifferent width factors. This allows different width models to reduce\nthe number of multiply-adds and thereby\nreduce inference cost on mobile devices.\n\nMobileNets support any input size greater than 32 x 32, with larger image sizes\noffering better performance.\nThe number of parameters and number of multiply-adds\ncan be modified by using the `alpha` parameter,\nwhich increases/decreases the number of filters in each layer.\nBy altering the image size and `alpha` parameter,\nall 16 models from the paper can be built, with ImageNet weights provided.\n\nThe paper demonstrates the performance of MobileNets using `alpha` values of\n1.0 (also called 100 % MobileNet), 0.75, 0.5 and 0.25.\nFor each of these `alpha` values, weights for 4 different input image sizes\nare provided (224, 192, 160, 128).\n\nThe following table describes the size and accuracy of the 100% MobileNet\non size 224 x 224:\n----------------------------------------------------------------------------\nWidth Multiplier (alpha) | ImageNet Acc |  Multiply-Adds (M) |  Params (M)\n----------------------------------------------------------------------------\n|   1.0 MobileNet-224    |    70.6 %     |        529        |     4.2     |\n|   0.75 MobileNet-224   |    68.4 %     |        325        |     2.6     |\n|   0.50 MobileNet-224   |    63.7 %     |        149        |     1.3     |\n|   0.25 MobileNet-224   |    50.6 %     |        41         |     0.5     |\n----------------------------------------------------------------------------\n\nThe following table describes the performance of\nthe 100 % MobileNet on various input sizes:\n------------------------------------------------------------------------\n      Resolution      | ImageNet Acc | Multiply-Adds (M) | Params (M)\n------------------------------------------------------------------------\n|  1.0 MobileNet-224  |    70.6 %    |        529        |     4.2     |\n|  1.0 MobileNet-192  |    69.1 %    |        529        |     4.2     |\n|  1.0 MobileNet-160  |    67.2 %    |        529        |     4.2     |\n|  1.0 MobileNet-128  |    64.4 %    |        529        |     4.2     |\n------------------------------------------------------------------------\n\nThe weights for all 16 models are obtained and translated\nfrom TensorFlow checkpoints found at\nhttps://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet_v1.md\n\n# Reference\n\n- [MobileNets: Efficient Convolutional Neural Networks for\n   Mobile Vision Applications](https://arxiv.org/pdf/1704.04861.pdf))\n\"\"\"\nfrom __future__ import print_function\nfrom __future__ import absolute_import\nfrom __future__ import division\n\nimport os\nimport warnings\n\nfrom . import get_submodules_from_kwargs\nfrom . import imagenet_utils\nfrom .imagenet_utils import decode_predictions\nfrom .imagenet_utils import _obtain_input_shape\n\n\nBASE_WEIGHT_PATH = ('https://github.com/fchollet/deep-learning-models/'\n                    'releases/download/v0.6/')\n\nbackend = None\nlayers = None\nmodels = None\nkeras_utils = None\n\n\ndef preprocess_input(x, **kwargs):\n    \"\"\"Preprocesses a numpy array encoding a batch of images.\n\n    # Arguments\n        x: a 4D numpy array consists of RGB values within [0, 255].\n\n    # Returns\n        Preprocessed array.\n    \"\"\"\n    return imagenet_utils.preprocess_input(x, mode='tf', **kwargs)\n\n\ndef MobileNet(input_shape=None,\n              alpha=1.0,\n              depth_multiplier=1,\n              dropout=1e-3,\n              include_top=True,\n              weights='imagenet',\n              input_tensor=None,\n              pooling=None,\n              classes=1000,\n              **kwargs):\n    \"\"\"Instantiates the MobileNet architecture.\n\n    # Arguments\n        input_shape: optional shape tuple, only to be specified\n            if `include_top` is False (otherwise the input shape\n            has to be `(224, 224, 3)`\n            (with `channels_last` data format)\n            or (3, 224, 224) (with `channels_first` data format).\n            It should have exactly 3 inputs channels,\n            and width and height should be no smaller than 32.\n            E.g. `(200, 200, 3)` would be one valid value.\n        alpha: controls the width of the network. This is known as the\n            width multiplier in the MobileNet paper.\n            - If `alpha` < 1.0, proportionally decreases the number\n                of filters in each layer.\n            - If `alpha` > 1.0, proportionally increases the number\n                of filters in each layer.\n            - If `alpha` = 1, default number of filters from the paper\n                 are used at each layer.\n        depth_multiplier: depth multiplier for depthwise convolution. This\n            is called the resolution multiplier in the MobileNet paper.\n        dropout: dropout rate\n        include_top: whether to include the fully-connected\n            layer at the top of the network.\n        weights: one of `None` (random initialization),\n              'imagenet' (pre-training on ImageNet),\n              or the path to the weights file to be loaded.\n        input_tensor: optional Keras tensor (i.e. output of\n            `layers.Input()`)\n            to use as image input for the model.\n        pooling: Optional pooling mode for feature extraction\n            when `include_top` is `False`.\n            - `None` means that the output of the model\n                will be the 4D tensor output of the\n                last convolutional block.\n            - `avg` means that global average pooling\n                will be applied to the output of the\n                last convolutional block, and thus\n                the output of the model will be a\n                2D tensor.\n            - `max` means that global max pooling will\n                be applied.\n        classes: optional number of classes to classify images\n            into, only to be specified if `include_top` is True, and\n            if no `weights` argument is specified.\n\n    # Returns\n        A Keras model instance.\n\n    # Raises\n        ValueError: in case of invalid argument for `weights`,\n            or invalid input shape.\n        RuntimeError: If attempting to run this model with a\n            backend that does not support separable convolutions.\n    \"\"\"\n    global backend, layers, models, keras_utils\n    backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs)\n\n    if not (weights in {'imagenet', None} or os.path.exists(weights)):\n        raise ValueError('The `weights` argument should be either '\n                         '`None` (random initialization), `imagenet` '\n                         '(pre-training on ImageNet), '\n                         'or the path to the weights file to be loaded.')\n\n    if weights == 'imagenet' and include_top and classes != 1000:\n        raise ValueError('If using `weights` as `\"imagenet\"` with `include_top` '\n                         'as true, `classes` should be 1000')\n\n    # Determine proper input shape and default size.\n    if input_shape is None:\n        default_size = 224\n    else:\n        if backend.image_data_format() == 'channels_first':\n            rows = input_shape[1]\n            cols = input_shape[2]\n        else:\n            rows = input_shape[0]\n            cols = input_shape[1]\n\n        if rows == cols and rows in [128, 160, 192, 224]:\n            default_size = rows\n        else:\n            default_size = 224\n\n    input_shape = _obtain_input_shape(input_shape,\n                                      default_size=default_size,\n                                      min_size=32,\n                                      data_format=backend.image_data_format(),\n                                      require_flatten=include_top,\n                                      weights=weights)\n\n    if backend.image_data_format() == 'channels_last':\n        row_axis, col_axis = (0, 1)\n    else:\n        row_axis, col_axis = (1, 2)\n    rows = input_shape[row_axis]\n    cols = input_shape[col_axis]\n\n    if weights == 'imagenet':\n        if depth_multiplier != 1:\n            raise ValueError('If imagenet weights are being loaded, '\n                             'depth multiplier must be 1')\n\n        if alpha not in [0.25, 0.50, 0.75, 1.0]:\n            raise ValueError('If imagenet weights are being loaded, '\n                             'alpha can be one of'\n                             '`0.25`, `0.50`, `0.75` or `1.0` only.')\n\n        if rows != cols or rows not in [128, 160, 192, 224]:\n            if rows is None:\n                rows = 224\n                warnings.warn('MobileNet shape is undefined.'\n                              ' Weights for input shape '\n                              '(224, 224) will be loaded.')\n            else:\n                raise ValueError('If imagenet weights are being loaded, '\n                                 'input must have a static square shape '\n                                 '(one of (128, 128), (160, 160), '\n                                 '(192, 192), or (224, 224)). '\n                                 'Input shape provided = %s' % (input_shape,))\n\n    if backend.image_data_format() != 'channels_last':\n        warnings.warn('The MobileNet family of models is only available '\n                      'for the input data format \"channels_last\" '\n                      '(width, height, channels). '\n                      'However your settings specify the default '\n                      'data format \"channels_first\" (channels, width, height).'\n                      ' You should set `image_data_format=\"channels_last\"` '\n                      'in your Keras config located at ~/.keras/keras.json. '\n                      'The model being returned right now will expect inputs '\n                      'to follow the \"channels_last\" data format.')\n        backend.set_image_data_format('channels_last')\n        old_data_format = 'channels_first'\n    else:\n        old_data_format = None\n\n    if input_tensor is None:\n        img_input = layers.Input(shape=input_shape)\n    else:\n        if not backend.is_keras_tensor(input_tensor):\n            img_input = layers.Input(tensor=input_tensor, shape=input_shape)\n        else:\n            img_input = input_tensor\n\n    x = _conv_block(img_input, 32, alpha, strides=(2, 2))\n    x = _depthwise_conv_block(x, 64, alpha, depth_multiplier, block_id=1)\n\n    x = _depthwise_conv_block(x, 128, alpha, depth_multiplier,\n                              strides=(2, 2), block_id=2)\n    x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, block_id=3)\n\n    x = _depthwise_conv_block(x, 256, alpha, depth_multiplier,\n                              strides=(2, 2), block_id=4)\n    x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, block_id=5)\n\n    x = _depthwise_conv_block(x, 512, alpha, depth_multiplier,\n                              strides=(2, 2), block_id=6)\n    x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=7)\n    x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=8)\n    x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=9)\n    x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=10)\n    x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=11)\n\n    x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier,\n                              strides=(2, 2), block_id=12)\n    x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, block_id=13)\n\n    if include_top:\n        if backend.image_data_format() == 'channels_first':\n            shape = (int(1024 * alpha), 1, 1)\n        else:\n            shape = (1, 1, int(1024 * alpha))\n\n        x = layers.GlobalAveragePooling2D()(x)\n        x = layers.Reshape(shape, name='reshape_1')(x)\n        x = layers.Dropout(dropout, name='dropout')(x)\n        x = layers.Conv2D(classes, (1, 1),\n                          padding='same',\n                          name='conv_preds')(x)\n        x = layers.Activation('softmax', name='act_softmax')(x)\n        x = layers.Reshape((classes,), name='reshape_2')(x)\n    else:\n        if pooling == 'avg':\n            x = layers.GlobalAveragePooling2D()(x)\n        elif pooling == 'max':\n            x = layers.GlobalMaxPooling2D()(x)\n\n    # Ensure that the model takes into account\n    # any potential predecessors of `input_tensor`.\n    if input_tensor is not None:\n        inputs = keras_utils.get_source_inputs(input_tensor)\n    else:\n        inputs = img_input\n\n    # Create model.\n    model = models.Model(inputs, x, name='mobilenet_%0.2f_%s' % (alpha, rows))\n\n    # Load weights.\n    if weights == 'imagenet':\n        if backend.image_data_format() == 'channels_first':\n            raise ValueError('Weights for \"channels_first\" format '\n                             'are not available.')\n        if alpha == 1.0:\n            alpha_text = '1_0'\n        elif alpha == 0.75:\n            alpha_text = '7_5'\n        elif alpha == 0.50:\n            alpha_text = '5_0'\n        else:\n            alpha_text = '2_5'\n\n        if include_top:\n            model_name = 'mobilenet_%s_%d_tf.h5' % (alpha_text, rows)\n            weight_path = BASE_WEIGHT_PATH + model_name\n            weights_path = keras_utils.get_file(model_name,\n                                                weight_path,\n                                                cache_subdir='models')\n        else:\n            model_name = 'mobilenet_%s_%d_tf_no_top.h5' % (alpha_text, rows)\n            weight_path = BASE_WEIGHT_PATH + model_name\n            weights_path = keras_utils.get_file(model_name,\n                                                weight_path,\n                                                cache_subdir='models')\n        model.load_weights(weights_path)\n    elif weights is not None:\n        model.load_weights(weights)\n\n    if old_data_format:\n        backend.set_image_data_format(old_data_format)\n    return model\n\n\ndef _conv_block(inputs, filters, alpha, kernel=(3, 3), strides=(1, 1)):\n    \"\"\"Adds an initial convolution layer (with batch normalization and relu6).\n\n    # Arguments\n        inputs: Input tensor of shape `(rows, cols, 3)`\n            (with `channels_last` data format) or\n            (3, rows, cols) (with `channels_first` data format).\n            It should have exactly 3 inputs channels,\n            and width and height should be no smaller than 32.\n            E.g. `(224, 224, 3)` would be one valid value.\n        filters: Integer, the dimensionality of the output space\n            (i.e. the number of output filters in the convolution).\n        alpha: controls the width of the network.\n            - If `alpha` < 1.0, proportionally decreases the number\n                of filters in each layer.\n            - If `alpha` > 1.0, proportionally increases the number\n                of filters in each layer.\n            - If `alpha` = 1, default number of filters from the paper\n                 are used at each layer.\n        kernel: An integer or tuple/list of 2 integers, specifying the\n            width and height of the 2D convolution window.\n            Can be a single integer to specify the same value for\n            all spatial dimensions.\n        strides: An integer or tuple/list of 2 integers,\n            specifying the strides of the convolution\n            along the width and height.\n            Can be a single integer to specify the same value for\n            all spatial dimensions.\n            Specifying any stride value != 1 is incompatible with specifying\n            any `dilation_rate` value != 1.\n\n    # Input shape\n        4D tensor with shape:\n        `(samples, channels, rows, cols)` if data_format='channels_first'\n        or 4D tensor with shape:\n        `(samples, rows, cols, channels)` if data_format='channels_last'.\n\n    # Output shape\n        4D tensor with shape:\n        `(samples, filters, new_rows, new_cols)`\n        if data_format='channels_first'\n        or 4D tensor with shape:\n        `(samples, new_rows, new_cols, filters)`\n        if data_format='channels_last'.\n        `rows` and `cols` values might have changed due to stride.\n\n    # Returns\n        Output tensor of block.\n    \"\"\"\n    channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1\n    filters = int(filters * alpha)\n    x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)), name='conv1_pad')(inputs)\n    x = layers.Conv2D(filters, kernel,\n                      padding='valid',\n                      use_bias=False,\n                      strides=strides,\n                      name='conv1')(x)\n    x = layers.BatchNormalization(axis=channel_axis, name='conv1_bn')(x)\n    return layers.ReLU(6., name='conv1_relu')(x)\n\n\ndef _depthwise_conv_block(inputs, pointwise_conv_filters, alpha,\n                          depth_multiplier=1, strides=(1, 1), block_id=1):\n    \"\"\"Adds a depthwise convolution block.\n\n    A depthwise convolution block consists of a depthwise conv,\n    batch normalization, relu6, pointwise convolution,\n    batch normalization and relu6 activation.\n\n    # Arguments\n        inputs: Input tensor of shape `(rows, cols, channels)`\n            (with `channels_last` data format) or\n            (channels, rows, cols) (with `channels_first` data format).\n        pointwise_conv_filters: Integer, the dimensionality of the output space\n            (i.e. the number of output filters in the pointwise convolution).\n        alpha: controls the width of the network.\n            - If `alpha` < 1.0, proportionally decreases the number\n                of filters in each layer.\n            - If `alpha` > 1.0, proportionally increases the number\n                of filters in each layer.\n            - If `alpha` = 1, default number of filters from the paper\n                 are used at each layer.\n        depth_multiplier: The number of depthwise convolution output channels\n            for each input channel.\n            The total number of depthwise convolution output\n            channels will be equal to `filters_in * depth_multiplier`.\n        strides: An integer or tuple/list of 2 integers,\n            specifying the strides of the convolution\n            along the width and height.\n            Can be a single integer to specify the same value for\n            all spatial dimensions.\n            Specifying any stride value != 1 is incompatible with specifying\n            any `dilation_rate` value != 1.\n        block_id: Integer, a unique identification designating\n            the block number.\n\n    # Input shape\n        4D tensor with shape:\n        `(batch, channels, rows, cols)` if data_format='channels_first'\n        or 4D tensor with shape:\n        `(batch, rows, cols, channels)` if data_format='channels_last'.\n\n    # Output shape\n        4D tensor with shape:\n        `(batch, filters, new_rows, new_cols)`\n        if data_format='channels_first'\n        or 4D tensor with shape:\n        `(batch, new_rows, new_cols, filters)`\n        if data_format='channels_last'.\n        `rows` and `cols` values might have changed due to stride.\n\n    # Returns\n        Output tensor of block.\n    \"\"\"\n    channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1\n    pointwise_conv_filters = int(pointwise_conv_filters * alpha)\n\n    if strides == (1, 1):\n        x = inputs\n    else:\n        x = layers.ZeroPadding2D(((1, 1), (1, 1)),\n                                 name='conv_pad_%d' % block_id)(inputs)\n    x = layers.DepthwiseConv2D((3, 3),\n                               padding='same' if strides == (1, 1) else 'valid',\n                               depth_multiplier=depth_multiplier,\n                               strides=strides,\n                               use_bias=False,\n                               name='conv_dw_%d' % block_id)(x)\n    x = layers.BatchNormalization(\n        axis=channel_axis, name='conv_dw_%d_bn' % block_id)(x)\n    x = layers.ReLU(6., name='conv_dw_%d_relu' % block_id)(x)\n\n    x = layers.Conv2D(pointwise_conv_filters, (1, 1),\n                      padding='same',\n                      use_bias=False,\n                      strides=(1, 1),\n                      name='conv_pw_%d' % block_id)(x)\n    x = layers.BatchNormalization(axis=channel_axis,\n                                  name='conv_pw_%d_bn' % block_id)(x)\n    return layers.ReLU(6., name='conv_pw_%d_relu' % block_id)(x)\n"
  },
  {
    "path": "axelerate/networks/segnet/__init__.py",
    "content": ""
  },
  {
    "path": "axelerate/networks/segnet/data_utils/__init__.py",
    "content": ""
  },
  {
    "path": "axelerate/networks/segnet/data_utils/data_loader.py",
    "content": "import os\nimport numpy as np\nnp.random.seed(1337)\nfrom tensorflow.keras.utils import Sequence\nfrom axelerate.networks.common_utils.augment import process_image_segmentation\nimport glob\nimport itertools\nimport random\nimport six\nimport cv2\n\ntry:\n    from tqdm import tqdm\nexcept ImportError:\n    print(\"tqdm not found, disabling progress bars\")\n    def tqdm(iter):\n        return iter\n\n\nfrom ..models.config import IMAGE_ORDERING\n\nDATA_LOADER_SEED = 0\n\nrandom.seed(DATA_LOADER_SEED)\nclass_colors = [(random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) for _ in range(5000)]\n\nclass DataLoaderError(Exception):\n    pass\n\ndef get_pairs_from_paths(images_path, segs_path, ignore_non_matching=True):\n    \"\"\" Find all the images from the images_path directory and\n        the segmentation images from the segs_path directory\n        while checking integrity of data \"\"\"\n\n    ACCEPTABLE_IMAGE_FORMATS = [\".jpg\", \".jpeg\", \".png\" , \".bmp\"]\n    ACCEPTABLE_SEGMENTATION_FORMATS = [\".png\", \".bmp\"]\n\n    image_files = []\n    segmentation_files = {}\n\n    for dir_entry in os.listdir(images_path):\n        if os.path.isfile(os.path.join(images_path, dir_entry)) and \\\n                os.path.splitext(dir_entry)[1] in ACCEPTABLE_IMAGE_FORMATS:\n            file_name, file_extension = os.path.splitext(dir_entry)\n            image_files.append((file_name, file_extension, os.path.join(images_path, dir_entry)))\n\n    for dir_entry in os.listdir(segs_path):\n        if os.path.isfile(os.path.join(segs_path, dir_entry)) and \\\n                os.path.splitext(dir_entry)[1] in ACCEPTABLE_SEGMENTATION_FORMATS:\n            file_name, file_extension = os.path.splitext(dir_entry)\n            if file_name in segmentation_files:\n                raise DataLoaderError(\"Segmentation file with filename {0} already exists and is ambiguous to resolve with path {1}. Please remove or rename the latter.\".format(file_name, os.path.join(segs_path, dir_entry)))\n            segmentation_files[file_name] = (file_extension, os.path.join(segs_path, dir_entry))\n\n    return_value = []\n    # Match the images and segmentations\n    for image_file, _, image_full_path in image_files:\n        if image_file in segmentation_files:\n            return_value.append((image_full_path, segmentation_files[image_file][1]))\n        elif ignore_non_matching:\n            print(\"No corresponding segmentation found for image {0}.\".format(image_full_path))\n            continue\n        else:\n            # Error out\n            raise DataLoaderError(\"No corresponding segmentation found for image {0}.\".format(image_full_path))\n\n    return return_value\n\n\ndef get_image_array(image_input, norm, ordering='channels_first'):\n    \"\"\" Load image array from input \"\"\"\n    if type(image_input) is np.ndarray:\n        # It is already an array, use it as it is\n        img = image_input\n    elif  isinstance(image_input, six.string_types)  :\n        if not os.path.isfile(image_input):\n            raise DataLoaderError(\"get_image_array: path {0} doesn't exist\".format(image_input))\n        img = cv2.imread(image_input, 1)\n    else:\n        raise DataLoaderError(\"get_image_array: Can't process input type {0}\".format(str(type(image_input))))\n        \n    if norm:\n        img = norm(img)\n\n    if ordering == 'channels_first':\n        img = np.rollaxis(img, 2, 0)\n    return img\n\n\ndef get_segmentation_array(image_input, nClasses, no_reshape=True):\n    \"\"\" Load segmentation array from input \"\"\"\n\n    seg_labels = np.zeros((image_input.shape[0], image_input.shape[1], nClasses))\n\n    if type(image_input) is np.ndarray:\n        # It is already an array, use it as it is\n        img = image_input\n    elif isinstance(image_input, six.string_types) :\n        if not os.path.isfile(image_input):\n            raise DataLoaderError(\"get_segmentation_array: path {0} doesn't exist\".format(image_input))\n        img = cv2.imread(image_input, 1)\n    else:\n        raise DataLoaderError(\"get_segmentation_array: Can't process input type {0}\".format(str(type(image_input))))\n\n    img = img[:, :, 0]\n\n    for c in range(nClasses):\n        seg_labels[:, :, c] = (img == c).astype(int)\n\n    if not no_reshape:\n        seg_labels = np.reshape(seg_labels, (width*height, nClasses))\n\n    return seg_labels\n\n\ndef verify_segmentation_dataset(images_path, segs_path, n_classes, show_all_errors=False):\n    try:\n        img_seg_pairs = get_pairs_from_paths(images_path, segs_path)\n        if not len(img_seg_pairs):\n            print(\"Couldn't load any data from images_path: {0} and segmentations path: {1}\".format(images_path, segs_path))\n            return False\n\n        return_value = True\n        for im_fn, seg_fn in tqdm(img_seg_pairs):\n            img = cv2.imread(im_fn)\n            seg = cv2.imread(seg_fn)\n            # Check dimensions match\n            if not img.shape == seg.shape:\n                return_value = False\n                print(\"The size of image {0} and its segmentation {1} doesn't match (possibly the files are corrupt).\".format(im_fn, seg_fn))\n                if not show_all_errors:\n                    break\n            else:\n                max_pixel_value = np.max(seg[:, :, 0])\n                if max_pixel_value >= n_classes:\n                    return_value = False\n                    print(\"The pixel values of the segmentation image {0} violating range [0, {1}]. Found maximum pixel value {2}\".format(seg_fn, str(n_classes - 1), max_pixel_value))\n                    if not show_all_errors:\n                        break\n        if return_value:\n            print(\"Dataset verified! \")\n        else:\n            print(\"Dataset not verified!\")\n        return return_value\n    except DataLoaderError as e:\n        print(\"Found error during data loading\\n{0}\".format(str(e)))\n        return False\n        \n        \ndef create_batch_generator(images_path, segs_path, \n                           input_size=224,\n                           output_size=112,\n                           n_classes=51,\n                           batch_size=8,\n                           repeat_times=1,\n                           do_augment=False,\n                           norm=None):\n\n    worker = BatchGenerator(images_path, segs_path, batch_size,\n                 n_classes, input_size, output_size, repeat_times, \n                 do_augment, norm)\n    return worker\n\n\nclass BatchGenerator(Sequence):\n    def __init__(self,\n                 images_path, segs_path, batch_size,\n                 n_classes,input_size, output_size, repeat_times,\n                 do_augment=False, norm=None):\n        self.norm = norm\n        self.n_classes = n_classes\n        self.input_size = input_size\n        self.output_size = output_size\n        self.do_augment = do_augment\n        self._repeat_times = repeat_times\n        self._batch_size = batch_size\n        self.img_seg_pairs = get_pairs_from_paths(images_path, segs_path)\n        random.shuffle(self.img_seg_pairs)\n        self.zipped = itertools.cycle(self.img_seg_pairs)\n        self.counter = 0\n\n    def __len__(self):\n        return int(len(self.img_seg_pairs) * self._repeat_times/self._batch_size)\n\n    def __getitem__(self, idx):\n        \"\"\"\n        # Args\n            idx : batch index\n        \"\"\"\n        x_batch = []\n        y_batch= []\n        for i in range(self._batch_size):\n            img, seg = next(self.zipped)\n            img = cv2.imread(img, 1)[...,::-1]\n            seg = cv2.imread(seg, 1)\n\n            im, seg = process_image_segmentation(img, seg, self.input_size[0], self.input_size[1], self.output_size[0], self.output_size[1], self.do_augment)\n\n            x_batch.append(get_image_array(im, self.norm, ordering=IMAGE_ORDERING))\n            y_batch.append(get_segmentation_array(seg, self.n_classes))\n\n        x_batch = np.array(x_batch)\n        y_batch = np.array(y_batch)\n        self.counter += 1\n        return x_batch, y_batch\n\n    def on_epoch_end(self):\n        self.counter = 0\n        random.shuffle(self.img_seg_pairs)\n"
  },
  {
    "path": "axelerate/networks/segnet/frontend_segnet.py",
    "content": "import os\nimport numpy as np\nimport cv2\nimport time\nfrom tqdm import tqdm\n\nfrom axelerate.networks.segnet.data_utils.data_loader import create_batch_generator, verify_segmentation_dataset\nfrom axelerate.networks.common_utils.feature import create_feature_extractor\nfrom axelerate.networks.common_utils.fit import train\nfrom axelerate.networks.segnet.models.segnet import mobilenet_segnet, squeezenet_segnet, full_yolo_segnet, tiny_yolo_segnet, nasnetmobile_segnet, resnet50_segnet, densenet121_segnet\n\ndef masked_categorical_crossentropy(gt , pr ):\n    from tensorflow.keras.losses import categorical_crossentropy\n    mask = 1 -  gt[: , : , 0] \n    return categorical_crossentropy(gt, pr)*mask\n\ndef create_segnet(architecture, input_size, n_classes, weights = None):\n\n    if architecture == 'NASNetMobile':\n        model = nasnetmobile_segnet(n_classes, input_size, encoder_level=4, weights = weights)\n    elif architecture == 'SqueezeNet':\n        model = squeezenet_segnet(n_classes, input_size, encoder_level=4, weights = weights)\n    elif architecture == 'Full Yolo':\n        model = full_yolo_segnet(n_classes, input_size, encoder_level=4, weights = weights)\n    elif architecture == 'Tiny Yolo':\n        model = tiny_yolo_segnet(n_classes, input_size, encoder_level=4, weights = weights)\n    elif architecture == 'DenseNet121':\n        model = densenet121_segnet(n_classes, input_size, encoder_level=4, weights = weights)\n    elif architecture == 'ResNet50':\n        model = resnet50_segnet(n_classes, input_size, encoder_level=4, weights = weights)\n    elif 'MobileNet' in architecture:\n        model = mobilenet_segnet(n_classes, input_size, encoder_level=4, weights = weights, architecture = architecture)\n\n    output_size = (model.output_height, model.output_width)\n    network = Segnet(model, input_size, n_classes, model.normalize, output_size)\n\n    return network\n\nclass Segnet(object):\n    def __init__(self,\n                 network,\n                 input_size,\n                 n_classes,\n                 norm,\n                 output_size):\n        self.network = network       \n        self.n_classes = n_classes\n        self.input_size = input_size\n        self.output_size = output_size\n        self.norm = norm\n\n    def load_weights(self, weight_path, by_name=False):\n        if os.path.exists(weight_path):\n            print(\"Loading pre-trained weights for the whole model: \", weight_path)\n            self.network.load_weights(weight_path)\n        else:\n            print(\"Failed to load pre-trained weights for the whole model. It might be because you didn't specify any or the weight file cannot be found\")\n\n    def predict(self, image):\n\n        start_time = time.time()\n        Y_pred = np.squeeze(self.network.predict(image))\n        elapsed_ms = (time.time() - start_time)  * 1000\n\n        y_pred = np.argmax(Y_pred, axis = 2)\n\n        return elapsed_ms, y_pred\n\n\n    def evaluate(self, img_folder, ann_folder, batch_size):\n\n        self.generator = create_batch_generator(img_folder, ann_folder, self.input_size, \n                                                self.output_size, self.n_classes, \n                                                batch_size, 1, False, self.norm)\n        tp = np.zeros(self.n_classes)\n        fp = np.zeros(self.n_classes)\n        fn = np.zeros(self.n_classes)\n        n_pixels = np.zeros(self.n_classes)\n        \n        for inp, gt in tqdm(list(self.generator)):\n                y_pred = self.network.predict(inp)\n\n                y_pred = np.argmax(y_pred, axis=-1)\n                gt = np.argmax(gt, axis=-1)\n\n                for cl_i in range(self.n_classes):\n                    \n                    tp[cl_i] += np.sum((y_pred == cl_i) * (gt == cl_i))\n                    fp[cl_i] += np.sum((y_pred == cl_i) * ((gt != cl_i)))\n                    fn[cl_i] += np.sum((y_pred != cl_i) * ((gt == cl_i)))\n                    n_pixels[cl_i] += np.sum(gt == cl_i)\n\n        cl_wise_score = tp / (tp + fp + fn + 0.000000000001)\n        n_pixels_norm = n_pixels /  np.sum(n_pixels)\n        frequency_weighted_IU = np.sum(cl_wise_score*n_pixels_norm)\n        mean_IU = np.mean(cl_wise_score)\n        report = {\"frequency_weighted_IU\":frequency_weighted_IU , \"mean_IU\":mean_IU , \"class_wise_IU\":cl_wise_score}\n        return report\n\n    def train(self,\n              img_folder,\n              ann_folder,\n              nb_epoch,\n              project_folder,\n              batch_size=8,\n              do_augment=False,\n              learning_rate=1e-4, \n              train_times=1,\n              valid_times=1,\n              valid_img_folder=\"\",\n              valid_ann_folder=\"\",\n              first_trainable_layer=None,\n              ignore_zero_class=False,\n              metrics='val_loss'):\n        \n        if metrics != \"accuracy\" and metrics != \"loss\":\n            print(\"Unknown metric for SegNet, valid options are: val_loss or val_accuracy. Defaulting ot val_loss\")\n            metrics = \"loss\"\n\n        if ignore_zero_class:\n            loss_k = masked_categorical_crossentropy\n        else:\n            loss_k = 'categorical_crossentropy'\n        train_generator = create_batch_generator(img_folder, ann_folder, self.input_size, \n                          self.output_size, self.n_classes,batch_size, train_times, do_augment, self.norm)\n\n        validation_generator = create_batch_generator(valid_img_folder, valid_ann_folder, self.input_size, \n                               self.output_size, self.n_classes, batch_size, valid_times, False, self.norm)\n        \n        return train(self.network,\n                            loss_k,\n                            train_generator, \n                            validation_generator, \n                            learning_rate, \n                            nb_epoch, \n                            project_folder, \n                            first_trainable_layer, \n                            metric_name = metrics)\n    \n"
  },
  {
    "path": "axelerate/networks/segnet/metrics.py",
    "content": "import numpy as np\n\nEPS = 1e-12\n\ndef get_iou(gt, pr, n_classes):\n    class_wise = np.zeros(n_classes)\n    for cl in range(n_classes):\n        intersection = np.sum((gt == cl)*(pr == cl))\n        union = np.sum(np.maximum((gt == cl), (pr == cl)))\n        iou = float(intersection)/(union + EPS)\n        class_wise[cl] = iou\n    return class_wise\n"
  },
  {
    "path": "axelerate/networks/segnet/models/__init__.py",
    "content": ""
  },
  {
    "path": "axelerate/networks/segnet/models/_pspnet_2.py",
    "content": "# This code is proveded by Vladkryvoruchko and small modifications done by me .\n\nfrom math import ceil\nfrom sys import exit\nfrom keras import layers\nfrom keras.layers import Conv2D, MaxPooling2D, AveragePooling2D\nfrom keras.layers import BatchNormalization, Activation, Input, Dropout, \\\n    ZeroPadding2D, Lambda\nfrom keras.layers.merge import Concatenate, Add\nfrom keras.models import Model\nfrom keras.optimizers import SGD\nimport tensorflow as tf\n\nfrom .config import IMAGE_ORDERING\nfrom .model_utils import get_segmentation_model, resize_image\n\n\nlearning_rate = 1e-3  # Layer specific learning rate\n# Weight decay not implemented\n\n\ndef BN(name=\"\"):\n    return BatchNormalization(momentum=0.95, name=name, epsilon=1e-5)\n\n\nclass Interp(layers.Layer):\n\n    def __init__(self, new_size, **kwargs):\n        self.new_size = new_size\n        super(Interp, self).__init__(**kwargs)\n\n    def build(self, input_shape):\n        super(Interp, self).build(input_shape)\n\n    def call(self, inputs, **kwargs):\n        new_height, new_width = self.new_size\n        try:\n            resized = tf.image.resize(inputs, [new_height, new_width])\n        except AttributeError:\n            resized = tf.image.resize_images(inputs, [new_height, new_width],\n                                             align_corners=True)\n        return resized\n\n    def compute_output_shape(self, input_shape):\n        return tuple([None,\n                      self.new_size[0],\n                      self.new_size[1],\n                      input_shape[3]])\n\n    def get_config(self):\n        config = super(Interp, self).get_config()\n        config['new_size'] = self.new_size\n        return config\n\n\n# def Interp(x, shape):\n#    new_height, new_width = shape\n#    resized = tf.image.resize_images(x, [new_height, new_width],\n#                                      align_corners=True)\n#    return resized\n\n\ndef residual_conv(prev, level, pad=1, lvl=1, sub_lvl=1, modify_stride=False):\n    lvl = str(lvl)\n    sub_lvl = str(sub_lvl)\n    names = [\"conv\" + lvl + \"_\" + sub_lvl + \"_1x1_reduce\",\n             \"conv\" + lvl + \"_\" + sub_lvl + \"_1x1_reduce_bn\",\n             \"conv\" + lvl + \"_\" + sub_lvl + \"_3x3\",\n             \"conv\" + lvl + \"_\" + sub_lvl + \"_3x3_bn\",\n             \"conv\" + lvl + \"_\" + sub_lvl + \"_1x1_increase\",\n             \"conv\" + lvl + \"_\" + sub_lvl + \"_1x1_increase_bn\"]\n    if modify_stride is False:\n        prev = Conv2D(64 * level, (1, 1), strides=(1, 1), name=names[0],\n                      use_bias=False)(prev)\n    elif modify_stride is True:\n        prev = Conv2D(64 * level, (1, 1), strides=(2, 2), name=names[0],\n                      use_bias=False)(prev)\n\n    prev = BN(name=names[1])(prev)\n    prev = Activation('relu')(prev)\n\n    prev = ZeroPadding2D(padding=(pad, pad))(prev)\n    prev = Conv2D(64 * level, (3, 3), strides=(1, 1), dilation_rate=pad,\n                  name=names[2], use_bias=False)(prev)\n\n    prev = BN(name=names[3])(prev)\n    prev = Activation('relu')(prev)\n    prev = Conv2D(256 * level, (1, 1), strides=(1, 1), name=names[4],\n                  use_bias=False)(prev)\n    prev = BN(name=names[5])(prev)\n    return prev\n\n\ndef short_convolution_branch(prev, level, lvl=1, sub_lvl=1,\n                             modify_stride=False):\n    lvl = str(lvl)\n    sub_lvl = str(sub_lvl)\n    names = [\"conv\" + lvl + \"_\" + sub_lvl + \"_1x1_proj\",\n             \"conv\" + lvl + \"_\" + sub_lvl + \"_1x1_proj_bn\"]\n\n    if modify_stride is False:\n        prev = Conv2D(256 * level, (1, 1), strides=(1, 1), name=names[0],\n                      use_bias=False)(prev)\n    elif modify_stride is True:\n        prev = Conv2D(256 * level, (1, 1), strides=(2, 2), name=names[0],\n                      use_bias=False)(prev)\n\n    prev = BN(name=names[1])(prev)\n    return prev\n\n\ndef empty_branch(prev):\n    return prev\n\n\ndef residual_short(prev_layer, level, pad=1, lvl=1, sub_lvl=1,\n                   modify_stride=False):\n    prev_layer = Activation('relu')(prev_layer)\n    block_1 = residual_conv(prev_layer, level,\n                            pad=pad, lvl=lvl, sub_lvl=sub_lvl,\n                            modify_stride=modify_stride)\n\n    block_2 = short_convolution_branch(prev_layer, level,\n                                       lvl=lvl, sub_lvl=sub_lvl,\n                                       modify_stride=modify_stride)\n    added = Add()([block_1, block_2])\n    return added\n\n\ndef residual_empty(prev_layer, level, pad=1, lvl=1, sub_lvl=1):\n    prev_layer = Activation('relu')(prev_layer)\n\n    block_1 = residual_conv(prev_layer, level, pad=pad,\n                            lvl=lvl, sub_lvl=sub_lvl)\n    block_2 = empty_branch(prev_layer)\n    added = Add()([block_1, block_2])\n    return added\n\n\ndef ResNet(inp, layers):\n    # Names for the first couple layers of model\n    names = [\"conv1_1_3x3_s2\",\n             \"conv1_1_3x3_s2_bn\",\n             \"conv1_2_3x3\",\n             \"conv1_2_3x3_bn\",\n             \"conv1_3_3x3\",\n             \"conv1_3_3x3_bn\"]\n\n    # Short branch(only start of network)\n\n    cnv1 = Conv2D(64, (3, 3), strides=(2, 2), padding='same', name=names[0],\n                  use_bias=False)(inp)  # \"conv1_1_3x3_s2\"\n    bn1 = BN(name=names[1])(cnv1)  # \"conv1_1_3x3_s2/bn\"\n    relu1 = Activation('relu')(bn1)  # \"conv1_1_3x3_s2/relu\"\n\n    cnv1 = Conv2D(64, (3, 3), strides=(1, 1), padding='same', name=names[2],\n                  use_bias=False)(relu1)  # \"conv1_2_3x3\"\n    bn1 = BN(name=names[3])(cnv1)  # \"conv1_2_3x3/bn\"\n    relu1 = Activation('relu')(bn1)  # \"conv1_2_3x3/relu\"\n\n    cnv1 = Conv2D(128, (3, 3), strides=(1, 1), padding='same', name=names[4],\n                  use_bias=False)(relu1)  # \"conv1_3_3x3\"\n    bn1 = BN(name=names[5])(cnv1)  # \"conv1_3_3x3/bn\"\n    relu1 = Activation('relu')(bn1)  # \"conv1_3_3x3/relu\"\n\n    res = MaxPooling2D(pool_size=(3, 3), padding='same',\n                       strides=(2, 2))(relu1)  # \"pool1_3x3_s2\"\n\n    # ---Residual layers(body of network)\n\n    \"\"\"\n    Modify_stride --Used only once in first 3_1 convolutions block.\n    changes stride of first convolution from 1 -> 2\n    \"\"\"\n\n    # 2_1- 2_3\n    res = residual_short(res, 1, pad=1, lvl=2, sub_lvl=1)\n    for i in range(2):\n        res = residual_empty(res, 1, pad=1, lvl=2, sub_lvl=i + 2)\n\n    # 3_1 - 3_3\n    res = residual_short(res, 2, pad=1, lvl=3, sub_lvl=1, modify_stride=True)\n    for i in range(3):\n        res = residual_empty(res, 2, pad=1, lvl=3, sub_lvl=i + 2)\n    if layers is 50:\n        # 4_1 - 4_6\n        res = residual_short(res, 4, pad=2, lvl=4, sub_lvl=1)\n        for i in range(5):\n            res = residual_empty(res, 4, pad=2, lvl=4, sub_lvl=i + 2)\n    elif layers is 101:\n        # 4_1 - 4_23\n        res = residual_short(res, 4, pad=2, lvl=4, sub_lvl=1)\n        for i in range(22):\n            res = residual_empty(res, 4, pad=2, lvl=4, sub_lvl=i + 2)\n    else:\n        print(\"This ResNet is not implemented\")\n\n    # 5_1 - 5_3\n    res = residual_short(res, 8, pad=4, lvl=5, sub_lvl=1)\n    for i in range(2):\n        res = residual_empty(res, 8, pad=4, lvl=5, sub_lvl=i + 2)\n\n    res = Activation('relu')(res)\n    return res\n\n\ndef interp_block(prev_layer, level, feature_map_shape, input_shape):\n    if input_shape == (473, 473):\n        kernel_strides_map = {1: 60,\n                              2: 30,\n                              3: 20,\n                              6: 10}\n    elif input_shape == (713, 713):\n        kernel_strides_map = {1: 90,\n                              2: 45,\n                              3: 30,\n                              6: 15}\n    else:\n        print(\"Pooling parameters for input shape \",\n              input_shape, \" are not defined.\")\n        exit(1)\n\n    names = [\n        \"conv5_3_pool\" + str(level) + \"_conv\",\n        \"conv5_3_pool\" + str(level) + \"_conv_bn\"\n    ]\n    kernel = (kernel_strides_map[level], kernel_strides_map[level])\n    strides = (kernel_strides_map[level], kernel_strides_map[level])\n    prev_layer = AveragePooling2D(kernel, strides=strides)(prev_layer)\n    prev_layer = Conv2D(512, (1, 1), strides=(1, 1), name=names[0],\n                        use_bias=False)(prev_layer)\n    prev_layer = BN(name=names[1])(prev_layer)\n    prev_layer = Activation('relu')(prev_layer)\n    # prev_layer = Lambda(Interp, arguments={\n    #                    'shape': feature_map_shape})(prev_layer)\n    prev_layer = Interp(feature_map_shape)(prev_layer)\n    return prev_layer\n\n\ndef build_pyramid_pooling_module(res, input_shape):\n    \"\"\"Build the Pyramid Pooling Module.\"\"\"\n    # ---PSPNet concat layers with Interpolation\n    feature_map_size = tuple(int(ceil(input_dim / 8.0))\n                             for input_dim in input_shape)\n\n    interp_block1 = interp_block(res, 1, feature_map_size, input_shape)\n    interp_block2 = interp_block(res, 2, feature_map_size, input_shape)\n    interp_block3 = interp_block(res, 3, feature_map_size, input_shape)\n    interp_block6 = interp_block(res, 6, feature_map_size, input_shape)\n\n    # concat all these layers. resulted\n    # shape=(1,feature_map_size_x,feature_map_size_y,4096)\n    res = Concatenate()([res,\n                         interp_block6,\n                         interp_block3,\n                         interp_block2,\n                         interp_block1])\n    return res\n\n\ndef _build_pspnet(nb_classes, resnet_layers, input_shape,\n                  activation='softmax'):\n\n    assert IMAGE_ORDERING == 'channels_last'\n\n    inp = Input((input_shape[0], input_shape[1], 3))\n\n    res = ResNet(inp, layers=resnet_layers)\n\n    psp = build_pyramid_pooling_module(res, input_shape)\n\n    x = Conv2D(512, (3, 3), strides=(1, 1), padding=\"same\", name=\"conv5_4\",\n               use_bias=False)(psp)\n    x = BN(name=\"conv5_4_bn\")(x)\n    x = Activation('relu')(x)\n    x = Dropout(0.1)(x)\n\n    x = Conv2D(nb_classes, (1, 1), strides=(1, 1), name=\"conv6\")(x)\n    # x = Lambda(Interp, arguments={'shape': (\n    #    input_shape[0], input_shape[1])})(x)\n    x = Interp([input_shape[0], input_shape[1]])(x)\n\n    model = get_segmentation_model(inp, x)\n\n    return model\n"
  },
  {
    "path": "axelerate/networks/segnet/models/all_models.py",
    "content": "from . import pspnet\nfrom . import unet\nfrom . import segnet\nfrom . import fcn\nmodel_from_name = {}\n\n\nmodel_from_name[\"fcn_8\"] = fcn.fcn_8\nmodel_from_name[\"fcn_32\"] = fcn.fcn_32\nmodel_from_name[\"fcn_8_vgg\"] = fcn.fcn_8_vgg\nmodel_from_name[\"fcn_32_vgg\"] = fcn.fcn_32_vgg\nmodel_from_name[\"fcn_8_resnet50\"] = fcn.fcn_8_resnet50\nmodel_from_name[\"fcn_32_resnet50\"] = fcn.fcn_32_resnet50\nmodel_from_name[\"fcn_8_mobilenet\"] = fcn.fcn_8_mobilenet\nmodel_from_name[\"fcn_32_mobilenet\"] = fcn.fcn_32_mobilenet\n\n\nmodel_from_name[\"pspnet\"] = pspnet.pspnet\nmodel_from_name[\"vgg_pspnet\"] = pspnet.vgg_pspnet\nmodel_from_name[\"resnet50_pspnet\"] = pspnet.resnet50_pspnet\n\nmodel_from_name[\"vgg_pspnet\"] = pspnet.vgg_pspnet\nmodel_from_name[\"resnet50_pspnet\"] = pspnet.resnet50_pspnet\n\nmodel_from_name[\"pspnet_50\"] = pspnet.pspnet_50\nmodel_from_name[\"pspnet_101\"] = pspnet.pspnet_101\n\n\n# model_from_name[\"mobilenet_pspnet\"] = pspnet.mobilenet_pspnet\n\n\nmodel_from_name[\"unet_mini\"] = unet.unet_mini\nmodel_from_name[\"unet\"] = unet.unet\nmodel_from_name[\"vgg_unet\"] = unet.vgg_unet\nmodel_from_name[\"resnet50_unet\"] = unet.resnet50_unet\nmodel_from_name[\"mobilenet_unet\"] = unet.mobilenet_unet\n\n\nmodel_from_name[\"segnet\"] = segnet.segnet\nmodel_from_name[\"vgg_segnet\"] = segnet.vgg_segnet\nmodel_from_name[\"resnet50_segnet\"] = segnet.resnet50_segnet\nmodel_from_name[\"mobilenet_segnet\"] = segnet.mobilenet_segnet\n"
  },
  {
    "path": "axelerate/networks/segnet/models/basic_models.py",
    "content": "from keras.models import *\nfrom keras.layers import *\nimport keras.backend as K\n\nfrom .config import IMAGE_ORDERING\n\ndef vanilla_encoder(input_height=224,  input_width=224):\n\n    kernel = 3\n    filter_size = 64\n    pad = 1\n    pool_size = 2\n\n    if IMAGE_ORDERING == 'channels_first':\n        img_input = Input(shape=(3, input_height, input_width))\n    elif IMAGE_ORDERING == 'channels_last':\n        img_input = Input(shape=(input_height, input_width, 3))\n\n    x = img_input\n    levels = []\n\n    x = (ZeroPadding2D((pad, pad), data_format=IMAGE_ORDERING))(x)\n    x = (Conv2D(filter_size, (kernel, kernel),\n                data_format=IMAGE_ORDERING, padding='valid'))(x)\n    x = (BatchNormalization())(x)\n    x = (Activation('relu'))(x)\n    x = (MaxPooling2D((pool_size, pool_size), data_format=IMAGE_ORDERING))(x)\n    levels.append(x)\n\n    x = (ZeroPadding2D((pad, pad), data_format=IMAGE_ORDERING))(x)\n    x = (Conv2D(128, (kernel, kernel), data_format=IMAGE_ORDERING,\n         padding='valid'))(x)\n    x = (BatchNormalization())(x)\n    x = (Activation('relu'))(x)\n    x = (MaxPooling2D((pool_size, pool_size), data_format=IMAGE_ORDERING))(x)\n    levels.append(x)\n\n    for _ in range(3):\n        x = (ZeroPadding2D((pad, pad), data_format=IMAGE_ORDERING))(x)\n        x = (Conv2D(256, (kernel, kernel),\n                    data_format=IMAGE_ORDERING, padding='valid'))(x)\n        x = (BatchNormalization())(x)\n        x = (Activation('relu'))(x)\n        x = (MaxPooling2D((pool_size, pool_size),\n             data_format=IMAGE_ORDERING))(x)\n        levels.append(x)\n\n    return img_input, levels\n"
  },
  {
    "path": "axelerate/networks/segnet/models/config.py",
    "content": "IMAGE_ORDERING_CHANNELS_LAST = \"channels_last\"\nIMAGE_ORDERING_CHANNELS_FIRST = \"channels_first\"\n\n# Default IMAGE_ORDERING = channels_last\nIMAGE_ORDERING = IMAGE_ORDERING_CHANNELS_LAST"
  },
  {
    "path": "axelerate/networks/segnet/models/fcn.py",
    "content": "from keras.models import *\nfrom keras.layers import *\n\nfrom .config import IMAGE_ORDERING\nfrom .model_utils import get_segmentation_model\nfrom .vgg16 import get_vgg_encoder\nfrom .mobilenet import get_mobilenet_encoder\nfrom .basic_models import vanilla_encoder\nfrom .resnet50 import get_resnet50_encoder\n\n\n# crop o1 wrt o2\ndef crop(o1, o2, i):\n    o_shape2 = Model(i, o2).output_shape\n\n    if IMAGE_ORDERING == 'channels_first':\n        output_height2 = o_shape2[2]\n        output_width2 = o_shape2[3]\n    else:\n        output_height2 = o_shape2[1]\n        output_width2 = o_shape2[2]\n\n    o_shape1 = Model(i, o1).output_shape\n    if IMAGE_ORDERING == 'channels_first':\n        output_height1 = o_shape1[2]\n        output_width1 = o_shape1[3]\n    else:\n        output_height1 = o_shape1[1]\n        output_width1 = o_shape1[2]\n\n    cx = abs(output_width1 - output_width2)\n    cy = abs(output_height2 - output_height1)\n\n    if output_width1 > output_width2:\n        o1 = Cropping2D(cropping=((0, 0),  (0, cx)),\n                        data_format=IMAGE_ORDERING)(o1)\n    else:\n        o2 = Cropping2D(cropping=((0, 0),  (0, cx)),\n                        data_format=IMAGE_ORDERING)(o2)\n\n    if output_height1 > output_height2:\n        o1 = Cropping2D(cropping=((0, cy),  (0, 0)),\n                        data_format=IMAGE_ORDERING)(o1)\n    else:\n        o2 = Cropping2D(cropping=((0, cy),  (0, 0)),\n                        data_format=IMAGE_ORDERING)(o2)\n\n    return o1, o2\n\n\ndef fcn_8(n_classes, encoder=vanilla_encoder, input_height=416,\n          input_width=608):\n\n    img_input, levels = encoder(\n        input_height=input_height,  input_width=input_width)\n    [f1, f2, f3, f4, f5] = levels\n\n    o = f5\n\n    o = (Conv2D(4096, (7, 7), activation='relu',\n                padding='same', data_format=IMAGE_ORDERING))(o)\n    o = Dropout(0.5)(o)\n    o = (Conv2D(4096, (1, 1), activation='relu',\n                padding='same', data_format=IMAGE_ORDERING))(o)\n    o = Dropout(0.5)(o)\n\n    o = (Conv2D(n_classes,  (1, 1), kernel_initializer='he_normal',\n                data_format=IMAGE_ORDERING))(o)\n    o = Conv2DTranspose(n_classes, kernel_size=(4, 4),  strides=(\n        2, 2), use_bias=False, data_format=IMAGE_ORDERING)(o)\n\n    o2 = f4\n    o2 = (Conv2D(n_classes,  (1, 1), kernel_initializer='he_normal',\n                 data_format=IMAGE_ORDERING))(o2)\n\n    o, o2 = crop(o, o2, img_input)\n\n    o = Add()([o, o2])\n\n    o = Conv2DTranspose(n_classes, kernel_size=(4, 4),  strides=(\n        2, 2), use_bias=False, data_format=IMAGE_ORDERING)(o)\n    o2 = f3\n    o2 = (Conv2D(n_classes,  (1, 1), kernel_initializer='he_normal',\n                 data_format=IMAGE_ORDERING))(o2)\n    o2, o = crop(o2, o, img_input)\n    o = Add()([o2, o])\n\n    o = Conv2DTranspose(n_classes, kernel_size=(16, 16),  strides=(\n        8, 8), use_bias=False, data_format=IMAGE_ORDERING)(o)\n\n    model = get_segmentation_model(img_input, o)\n    model.model_name = \"fcn_8\"\n    return model\n\n\ndef fcn_32(n_classes, encoder=vanilla_encoder, input_height=416,\n           input_width=608):\n\n    img_input, levels = encoder(\n        input_height=input_height,  input_width=input_width)\n    [f1, f2, f3, f4, f5] = levels\n\n    o = f5\n\n    o = (Conv2D(4096, (7, 7), activation='relu',\n                padding='same', data_format=IMAGE_ORDERING))(o)\n    o = Dropout(0.5)(o)\n    o = (Conv2D(4096, (1, 1), activation='relu',\n                padding='same', data_format=IMAGE_ORDERING))(o)\n    o = Dropout(0.5)(o)\n\n    o = (Conv2D(n_classes,  (1, 1), kernel_initializer='he_normal',\n                data_format=IMAGE_ORDERING))(o)\n    o = Conv2DTranspose(n_classes, kernel_size=(64, 64),  strides=(\n        32, 32), use_bias=False,  data_format=IMAGE_ORDERING)(o)\n\n    model = get_segmentation_model(img_input, o)\n    model.model_name = \"fcn_32\"\n    return model\n\n\ndef fcn_8_vgg(n_classes,  input_height=416, input_width=608):\n    model = fcn_8(n_classes, get_vgg_encoder,\n                  input_height=input_height, input_width=input_width)\n    model.model_name = \"fcn_8_vgg\"\n    return model\n\n\ndef fcn_32_vgg(n_classes,  input_height=416, input_width=608):\n    model = fcn_32(n_classes, get_vgg_encoder,\n                   input_height=input_height, input_width=input_width)\n    model.model_name = \"fcn_32_vgg\"\n    return model\n\n\ndef fcn_8_resnet50(n_classes,  input_height=416, input_width=608):\n    model = fcn_8(n_classes, get_resnet50_encoder,\n                  input_height=input_height, input_width=input_width)\n    model.model_name = \"fcn_8_resnet50\"\n    return model\n\n\ndef fcn_32_resnet50(n_classes,  input_height=416, input_width=608):\n    model = fcn_32(n_classes, get_resnet50_encoder,\n                   input_height=input_height, input_width=input_width)\n    model.model_name = \"fcn_32_resnet50\"\n    return model\n\n\ndef fcn_8_mobilenet(n_classes,  input_height=416, input_width=608):\n    model = fcn_8(n_classes, get_mobilenet_encoder,\n                  input_height=input_height, input_width=input_width)\n    model.model_name = \"fcn_8_mobilenet\"\n    return model\n\n\ndef fcn_32_mobilenet(n_classes,  input_height=416, input_width=608):\n    model = fcn_32(n_classes, get_mobilenet_encoder,\n                   input_height=input_height, input_width=input_width)\n    model.model_name = \"fcn_32_mobilenet\"\n    return model\n\n\nif __name__ == '__main__':\n    m = fcn_8(101)\n    m = fcn_32(101)\n"
  },
  {
    "path": "axelerate/networks/segnet/models/model.py",
    "content": "\"\"\" Definition for the generic Model class \"\"\"\n\nclass Model:\n    def __init__(self, n_classes, input_height=None, input_width=None):\n        pass\n\n"
  },
  {
    "path": "axelerate/networks/segnet/models/model_utils.py",
    "content": "from types import MethodType\n\nfrom tensorflow.keras.models import *\nfrom tensorflow.keras.layers import *\nimport tensorflow.keras.backend as K\nfrom tqdm import tqdm\n\nfrom .config import IMAGE_ORDERING\nfrom ..train import train\nfrom ..predict import predict, predict_multiple, evaluate\n\n\n# source m1 , dest m2\ndef transfer_weights(m1, m2, verbose=True):\n\n    assert len(m1.layers) == len(\n        m2.layers), \"Both models should have same number of layers\"\n\n    nSet = 0\n    nNotSet = 0\n\n    if verbose:\n        print(\"Copying weights \")\n        bar = tqdm(zip(m1.layers, m2.layers))\n    else:\n        bar = zip(m1.layers, m2.layers)\n\n    for l, ll in bar:\n\n        if not any([w.shape != ww.shape for w, ww in zip(list(l.weights),\n                                                         list(ll.weights))]):\n            if len(list(l.weights)) > 0:\n                ll.set_weights(l.get_weights())\n                nSet += 1\n        else:\n            nNotSet += 1\n\n    if verbose:\n        print(\"Copied weights of %d layers and skipped %d layers\" %\n              (nSet, nNotSet))\n\n\ndef resize_image(inp,  s, data_format):\n\n    try:\n\n        return Lambda(lambda x: K.resize_images(x,\n                                                height_factor=s[0],\n                                                width_factor=s[1],\n                                                data_format=data_format,\n                                                interpolation='bilinear'))(inp)\n\n    except Exception as e:\n        # if keras is old, then rely on the tf function\n        # Sorry theano/cntk users!!!\n        assert data_format == 'channels_last'\n        assert IMAGE_ORDERING == 'channels_last'\n\n        import tensorflow as tf\n\n        return Lambda(\n            lambda x: tf.image.resize_images(\n                x, (K.int_shape(x)[1]*s[0], K.int_shape(x)[2]*s[1]))\n        )(inp)\n\n\ndef get_segmentation_model(input, output):\n\n    img_input = input\n    o = output\n\n    o_shape = Model(img_input, o).output_shape\n    i_shape = Model(img_input, o).input_shape\n\n    if IMAGE_ORDERING == 'channels_first':\n        output_height = o_shape[2]\n        output_width = o_shape[3]\n        input_height = i_shape[2]\n        input_width = i_shape[3]\n        n_classes = o_shape[1]\n        #o = (Reshape((-1, output_height*output_width)))(o)\n        o = (Permute((2, 1)))(o)\n    elif IMAGE_ORDERING == 'channels_last':\n        output_height = o_shape[1]\n        output_width = o_shape[2]\n        input_height = i_shape[1]\n        input_width = i_shape[2]\n        n_classes = o_shape[3]\n        #o = (Reshape((output_height*output_width, -1)))(o)\n\n    o = (Activation('softmax'))(o)\n    model = Model(img_input, o, name = \"segnet\")\n    model.output_width = output_width\n    model.output_height = output_height\n    model.n_classes = n_classes\n    model.input_height = input_height\n    model.input_width = input_width\n\n    model.train = MethodType(train, model)\n    model.predict_segmentation = MethodType(predict, model)\n    model.predict_multiple = MethodType(predict_multiple, model)\n    model.evaluate_segmentation = MethodType(evaluate, model)\n\n    return model\n"
  },
  {
    "path": "axelerate/networks/segnet/models/pspnet.py",
    "content": "import numpy as np\nimport keras\nfrom keras.models import *\nfrom keras.layers import *\nimport keras.backend as K\n\nfrom .config import IMAGE_ORDERING\nfrom .model_utils import get_segmentation_model, resize_image\nfrom .vgg16 import get_vgg_encoder\nfrom .mobilenet import get_mobilenet_encoder\nfrom .basic_models import vanilla_encoder\nfrom .resnet50 import get_resnet50_encoder\n\n\nif IMAGE_ORDERING == 'channels_first':\n    MERGE_AXIS = 1\nelif IMAGE_ORDERING == 'channels_last':\n    MERGE_AXIS = -1\n\n\ndef pool_block(feats, pool_factor):\n\n    if IMAGE_ORDERING == 'channels_first':\n        h = K.int_shape(feats)[2]\n        w = K.int_shape(feats)[3]\n    elif IMAGE_ORDERING == 'channels_last':\n        h = K.int_shape(feats)[1]\n        w = K.int_shape(feats)[2]\n\n    pool_size = strides = [\n        int(np.round(float(h) / pool_factor)),\n        int(np.round(float(w) / pool_factor))]\n\n    x = AveragePooling2D(pool_size, data_format=IMAGE_ORDERING,\n                         strides=strides, padding='same')(feats)\n    x = Conv2D(512, (1, 1), data_format=IMAGE_ORDERING,\n               padding='same', use_bias=False)(x)\n    x = BatchNormalization()(x)\n    x = Activation('relu')(x)\n\n    x = resize_image(x, strides, data_format=IMAGE_ORDERING)\n\n    return x\n\n\ndef _pspnet(n_classes, encoder,  input_height=384, input_width=576):\n\n    assert input_height % 192 == 0\n    assert input_width % 192 == 0\n\n    img_input, levels = encoder(\n        input_height=input_height,  input_width=input_width)\n    [f1, f2, f3, f4, f5] = levels\n\n    o = f5\n\n    pool_factors = [1, 2, 3, 6]\n    pool_outs = [o]\n\n    for p in pool_factors:\n        pooled = pool_block(o, p)\n        pool_outs.append(pooled)\n\n    o = Concatenate(axis=MERGE_AXIS)(pool_outs)\n\n    o = Conv2D(512, (1, 1), data_format=IMAGE_ORDERING, use_bias=False)(o)\n    o = BatchNormalization()(o)\n    o = Activation('relu')(o)\n\n    o = Conv2D(n_classes, (3, 3), data_format=IMAGE_ORDERING,\n               padding='same')(o)\n    o = resize_image(o, (8, 8), data_format=IMAGE_ORDERING)\n\n    model = get_segmentation_model(img_input, o)\n    return model\n\n\ndef pspnet(n_classes,  input_height=384, input_width=576):\n\n    model = _pspnet(n_classes, vanilla_encoder,\n                    input_height=input_height, input_width=input_width)\n    model.model_name = \"pspnet\"\n    return model\n\n\ndef vgg_pspnet(n_classes,  input_height=384, input_width=576):\n\n    model = _pspnet(n_classes, get_vgg_encoder,\n                    input_height=input_height, input_width=input_width)\n    model.model_name = \"vgg_pspnet\"\n    return model\n\n\ndef resnet50_pspnet(n_classes,  input_height=384, input_width=576):\n\n    model = _pspnet(n_classes, get_resnet50_encoder,\n                    input_height=input_height, input_width=input_width)\n    model.model_name = \"resnet50_pspnet\"\n    return model\n\n\ndef pspnet_50(n_classes,  input_height=473, input_width=473):\n    from ._pspnet_2 import _build_pspnet\n\n    nb_classes = n_classes\n    resnet_layers = 50\n    input_shape = (input_height, input_width)\n    model = _build_pspnet(nb_classes=nb_classes,\n                          resnet_layers=resnet_layers,\n                          input_shape=input_shape)\n    model.model_name = \"pspnet_50\"\n    return model\n\n\ndef pspnet_101(n_classes,  input_height=473, input_width=473):\n    from ._pspnet_2 import _build_pspnet\n\n    nb_classes = n_classes\n    resnet_layers = 101\n    input_shape = (input_height, input_width)\n    model = _build_pspnet(nb_classes=nb_classes,\n                          resnet_layers=resnet_layers,\n                          input_shape=input_shape)\n    model.model_name = \"pspnet_101\"\n    return model\n\n\n# def mobilenet_pspnet( n_classes ,  input_height=224, input_width=224 ):\n\n# \tmodel =  _pspnet(n_classes, get_mobilenet_encoder,\n#                    input_height=input_height, input_width=input_width)\n# \tmodel.model_name = \"mobilenet_pspnet\"\n# \treturn model\n\n\nif __name__ == '__main__':\n\n    m = _pspnet(101, vanilla_encoder)\n    # m = _pspnet( 101 , get_mobilenet_encoder ,True , 224 , 224  )\n    m = _pspnet(101, get_vgg_encoder)\n    m = _pspnet(101, get_resnet50_encoder)\n"
  },
  {
    "path": "axelerate/networks/segnet/models/segnet.py",
    "content": "import os\n\nfrom tensorflow.keras.models import *\nfrom tensorflow.keras.layers import *\n\nfrom .config import IMAGE_ORDERING\nfrom .model_utils import get_segmentation_model\nfrom axelerate.networks.common_utils.feature import create_feature_extractor\n\nmobilenet = {1:10,2:23,3:36,4:73,5:86}\ndensenet121 = {1:8,2:50,3:138,4:310,5:426}\nnasnetmobile = {1:7,2:64,3:295,4:537,5:768}\nsqueezenet = {1:2,2:17,3:32,4:47,5:61}\nfull_yolo = {1:14,2:27,3:40,4:53,5:73}\ntiny_yolo = {1:7,2:15,3:23,4:27,5:30}\nresnet50 = {1:2,2:37,3:80,4:142,5:174}\n\ndef chopper(model, model_name, f):\n    outputs = model.layers[model_name[f]].output\n\ndef segnet_decoder(f, n_classes, n_up=3):\n\n    assert n_up >= 2\n\n    o = f\n    o = (ZeroPadding2D((1, 1), data_format=IMAGE_ORDERING))(o)\n    o = (Conv2D(256, (3, 3), padding='valid', data_format=IMAGE_ORDERING))(o)\n    o = (BatchNormalization())(o)\n\n    o = (UpSampling2D((2, 2), data_format=IMAGE_ORDERING))(o)\n    o = (ZeroPadding2D((1, 1), data_format=IMAGE_ORDERING))(o)\n    o = (Conv2D(128, (3, 3), padding='valid', data_format=IMAGE_ORDERING))(o)\n    o = (BatchNormalization())(o)\n\n    for _ in range(n_up-2):\n        o = (UpSampling2D((2, 2), data_format=IMAGE_ORDERING))(o)\n        o = (ZeroPadding2D((1, 1), data_format=IMAGE_ORDERING))(o)\n        o = (Conv2D(64, (3, 3), padding='valid',\n             data_format=IMAGE_ORDERING))(o)\n        o = (BatchNormalization())(o)\n\n    o = (UpSampling2D((2, 2), data_format=IMAGE_ORDERING))(o)\n    o = (ZeroPadding2D((1, 1), data_format=IMAGE_ORDERING))(o)\n    o = (Conv2D(32, (3, 3), padding='valid', data_format=IMAGE_ORDERING))(o)\n    o = (BatchNormalization())(o)\n\n    o = Conv2D(n_classes, (3, 3), padding='same',\n               data_format=IMAGE_ORDERING)(o)\n\n    return o\n\n\ndef _segnet(n_classes, encoder_input, encoder_output,  input_height=416, input_width=608, encoder_level=3):\n\n    o = segnet_decoder(f=encoder_output, n_classes=n_classes, n_up=encoder_level-1)\n    model = get_segmentation_model(encoder_input, o)\n\n    return model\n\ndef full_yolo_segnet(n_classes, input_size, encoder_level, weights):\n\n    encoder = create_feature_extractor('Full Yolo',input_size, weights)\n    encoder_output = encoder.feature_extractor.layers[full_yolo[encoder_level]].output\n    print(encoder_output)\n    encoder_input = encoder.feature_extractor.inputs[0]\n    encoder_level += 1\n    model = _segnet(n_classes, encoder_input, encoder_output, input_size, encoder_level=encoder_level)\n    model.model_name = \"full_yolo_segnet\"\n    model.normalize = encoder.normalize\n    return model\n\ndef tiny_yolo_segnet(n_classes, input_size, encoder_level, weights):\n\n    encoder = create_feature_extractor('Tiny Yolo',input_size, weights)\n    encoder_output = encoder.feature_extractor.layers[tiny_yolo[encoder_level]].output\n    print(encoder_output)\n    encoder_input = encoder.feature_extractor.inputs[0]\n    encoder_level += 1\n    model = _segnet(n_classes, encoder_input, encoder_output, input_size, encoder_level=encoder_level)\n    model.model_name = \"tiny_yolo_segnet\"\n    model.normalize = encoder.normalize\n    return model\n\ndef squeezenet_segnet(n_classes, input_size, encoder_level, weights):\n\n    encoder = create_feature_extractor('SqueezeNet',input_size, weights)\n    encoder_output = encoder.feature_extractor.layers[squeezenet[encoder_level]].output\n    encoder_input = encoder.feature_extractor.inputs[0]\n\n    model = _segnet(n_classes, encoder_input, encoder_output, input_size, encoder_level=encoder_level)\n    model.model_name = \"squeezenet_segnet\"\n    model.normalize = encoder.normalize\n    return model\n\ndef densenet121_segnet(n_classes, input_size, encoder_level, weights):\n\n    encoder = create_feature_extractor('DenseNet121', input_size, weights)\n    encoder_output = encoder.feature_extractor.layers[densenet121[encoder_level]].output\n    encoder_input = encoder.feature_extractor.inputs[0]\n\n    model = _segnet(n_classes, encoder_input, encoder_output, input_size, encoder_level=encoder_level)\n    model.model_name = \"densenet121_segnet\"\n    model.normalize = encoder.normalize\n    return model\n\ndef nasnetmobile_segnet(n_classes, input_size, encoder_level, weights):\n\n    encoder = create_feature_extractor('NASNetMobile', input_size, weights)\n    encoder_output = encoder.feature_extractor.layers[nasnetmobile[encoder_level]].output\n    encoder_input = encoder.feature_extractor.inputs[0]\n\n    model = _segnet(n_classes, encoder_input, encoder_output, input_size, encoder_level=encoder_level)\n    model.model_name = \"nasnetmobile_segnet\"\n    model.normalize = encoder.normalize\n    return model\n\ndef resnet50_segnet(n_classes, input_size, encoder_level, weights):\n\n    encoder = create_feature_extractor('ResNet50',input_size, weights)\n    encoder_output = encoder.feature_extractor.layers[resnet50[encoder_level]].output\n    encoder_input = encoder.feature_extractor.inputs[0]\n\n    model = _segnet(n_classes, encoder_input, encoder_output, input_size, encoder_level=encoder_level)\n    model.model_name = \"resnet50_segnet\"\n    model.normalize = encoder.normalize\n    return model\n\n\ndef mobilenet_segnet(n_classes, input_size, encoder_level, weights, architecture = 'MobileNet2_5'):\n    \n    encoder = create_feature_extractor(architecture, input_size, weights)\n    encoder_output = encoder.feature_extractor.layers[mobilenet[encoder_level]].output\n    encoder_input = encoder.feature_extractor.inputs[0]\n    \n    model = _segnet(n_classes, encoder_input, encoder_output, input_size, encoder_level=encoder_level)\n    model.model_name = \"mobilenet_segnet\"\n    model.normalize = encoder.normalize\n    return model\n\n"
  },
  {
    "path": "axelerate/networks/segnet/models/unet.py",
    "content": "from keras.models import *\nfrom keras.layers import *\n\nfrom .config import IMAGE_ORDERING\nfrom .model_utils import get_segmentation_model\nfrom .vgg16 import get_vgg_encoder\nfrom .mobilenet import get_mobilenet_encoder\nfrom .basic_models import vanilla_encoder\nfrom .resnet50 import get_resnet50_encoder\n\n\nif IMAGE_ORDERING == 'channels_first':\n    MERGE_AXIS = 1\nelif IMAGE_ORDERING == 'channels_last':\n    MERGE_AXIS = -1\n\n\ndef unet_mini(n_classes, input_height=360, input_width=480):\n\n    if IMAGE_ORDERING == 'channels_first':\n        img_input = Input(shape=(3, input_height, input_width))\n    elif IMAGE_ORDERING == 'channels_last':\n        img_input = Input(shape=(input_height, input_width, 3))\n\n    conv1 = Conv2D(32, (3, 3), data_format=IMAGE_ORDERING,\n                   activation='relu', padding='same')(img_input)\n    conv1 = Dropout(0.2)(conv1)\n    conv1 = Conv2D(32, (3, 3), data_format=IMAGE_ORDERING,\n                   activation='relu', padding='same')(conv1)\n    pool1 = MaxPooling2D((2, 2), data_format=IMAGE_ORDERING)(conv1)\n\n    conv2 = Conv2D(64, (3, 3), data_format=IMAGE_ORDERING,\n                   activation='relu', padding='same')(pool1)\n    conv2 = Dropout(0.2)(conv2)\n    conv2 = Conv2D(64, (3, 3), data_format=IMAGE_ORDERING,\n                   activation='relu', padding='same')(conv2)\n    pool2 = MaxPooling2D((2, 2), data_format=IMAGE_ORDERING)(conv2)\n\n    conv3 = Conv2D(128, (3, 3), data_format=IMAGE_ORDERING,\n                   activation='relu', padding='same')(pool2)\n    conv3 = Dropout(0.2)(conv3)\n    conv3 = Conv2D(128, (3, 3), data_format=IMAGE_ORDERING,\n                   activation='relu', padding='same')(conv3)\n\n    up1 = concatenate([UpSampling2D((2, 2), data_format=IMAGE_ORDERING)(\n        conv3), conv2], axis=MERGE_AXIS)\n    conv4 = Conv2D(64, (3, 3), data_format=IMAGE_ORDERING,\n                   activation='relu', padding='same')(up1)\n    conv4 = Dropout(0.2)(conv4)\n    conv4 = Conv2D(64, (3, 3), data_format=IMAGE_ORDERING,\n                   activation='relu', padding='same')(conv4)\n\n    up2 = concatenate([UpSampling2D((2, 2), data_format=IMAGE_ORDERING)(\n        conv4), conv1], axis=MERGE_AXIS)\n    conv5 = Conv2D(32, (3, 3), data_format=IMAGE_ORDERING,\n                   activation='relu', padding='same')(up2)\n    conv5 = Dropout(0.2)(conv5)\n    conv5 = Conv2D(32, (3, 3), data_format=IMAGE_ORDERING,\n                   activation='relu', padding='same')(conv5)\n\n    o = Conv2D(n_classes, (1, 1), data_format=IMAGE_ORDERING,\n               padding='same')(conv5)\n\n    model = get_segmentation_model(img_input, o)\n    model.model_name = \"unet_mini\"\n    return model\n\n\ndef _unet(n_classes, encoder, l1_skip_conn=True, input_height=416,\n          input_width=608):\n\n    img_input, levels = encoder(\n        input_height=input_height, input_width=input_width)\n    [f1, f2, f3, f4, f5] = levels\n\n    o = f4\n\n    o = (ZeroPadding2D((1, 1), data_format=IMAGE_ORDERING))(o)\n    o = (Conv2D(512, (3, 3), padding='valid', data_format=IMAGE_ORDERING))(o)\n    o = (BatchNormalization())(o)\n\n    o = (UpSampling2D((2, 2), data_format=IMAGE_ORDERING))(o)\n    o = (concatenate([o, f3], axis=MERGE_AXIS))\n    o = (ZeroPadding2D((1, 1), data_format=IMAGE_ORDERING))(o)\n    o = (Conv2D(256, (3, 3), padding='valid', data_format=IMAGE_ORDERING))(o)\n    o = (BatchNormalization())(o)\n\n    o = (UpSampling2D((2, 2), data_format=IMAGE_ORDERING))(o)\n    o = (concatenate([o, f2], axis=MERGE_AXIS))\n    o = (ZeroPadding2D((1, 1), data_format=IMAGE_ORDERING))(o)\n    o = (Conv2D(128, (3, 3), padding='valid', data_format=IMAGE_ORDERING))(o)\n    o = (BatchNormalization())(o)\n\n    o = (UpSampling2D((2, 2), data_format=IMAGE_ORDERING))(o)\n\n    if l1_skip_conn:\n        o = (concatenate([o, f1], axis=MERGE_AXIS))\n\n    o = (ZeroPadding2D((1, 1), data_format=IMAGE_ORDERING))(o)\n    o = (Conv2D(64, (3, 3), padding='valid', data_format=IMAGE_ORDERING))(o)\n    o = (BatchNormalization())(o)\n\n    o = Conv2D(n_classes, (3, 3), padding='same',\n               data_format=IMAGE_ORDERING)(o)\n\n    model = get_segmentation_model(img_input, o)\n\n    return model\n\n\ndef unet(n_classes, input_height=416, input_width=608, encoder_level=3):\n\n    model = _unet(n_classes, vanilla_encoder,\n                  input_height=input_height, input_width=input_width)\n    model.model_name = \"unet\"\n    return model\n\n\ndef vgg_unet(n_classes, input_height=416, input_width=608, encoder_level=3):\n\n    model = _unet(n_classes, get_vgg_encoder,\n                  input_height=input_height, input_width=input_width)\n    model.model_name = \"vgg_unet\"\n    return model\n\n\ndef resnet50_unet(n_classes, input_height=416, input_width=608,\n                  encoder_level=3):\n\n    model = _unet(n_classes, get_resnet50_encoder,\n                  input_height=input_height, input_width=input_width)\n    model.model_name = \"resnet50_unet\"\n    return model\n\n\ndef mobilenet_unet(n_classes, input_height=224, input_width=224,\n                   encoder_level=3):\n\n    model = _unet(n_classes, get_mobilenet_encoder,\n                  input_height=input_height, input_width=input_width)\n    model.model_name = \"mobilenet_unet\"\n    return model\n\n\nif __name__ == '__main__':\n    m = unet_mini(101)\n    m = _unet(101, vanilla_encoder)\n    # m = _unet( 101 , get_mobilenet_encoder ,True , 224 , 224  )\n    m = _unet(101, get_vgg_encoder)\n    m = _unet(101, get_resnet50_encoder)\n"
  },
  {
    "path": "axelerate/networks/segnet/predict.py",
    "content": "import glob\nimport random\nimport json\nimport os\n\nimport cv2\nimport numpy as np\nnp.set_printoptions(threshold=np.inf)\nfrom tqdm import tqdm\nfrom tensorflow.keras.models import load_model\n\nfrom axelerate.networks.segnet.train import find_latest_checkpoint\nfrom axelerate.networks.segnet.data_utils.data_loader import get_image_array, get_segmentation_array, DATA_LOADER_SEED, class_colors, get_pairs_from_paths\nfrom axelerate.networks.segnet.models.config import IMAGE_ORDERING\nfrom . import metrics\nimport six\n\nrandom.seed(DATA_LOADER_SEED)\n\ndef model_from_checkpoint_path(checkpoints_path):\n\n    from .models.all_models import model_from_name\n    assert (os.path.isfile(checkpoints_path+\"_config.json\")\n            ), \"Checkpoint not found.\"\n    model_config = json.loads(\n        open(checkpoints_path+\"_config.json\", \"r\").read())\n    latest_weights = find_latest_checkpoint(checkpoints_path)\n    assert (latest_weights is not None), \"Checkpoint not found.\"\n    model = model_from_name[model_config['model_class']](\n        model_config['n_classes'], input_height=model_config['input_height'],\n        input_width=model_config['input_width'])\n    print(\"loaded weights \", latest_weights)\n    model.load_weights(latest_weights)\n    return model\n\ndef get_colored_segmentation_image(seg_arr, n_classes, colors=class_colors):\n    output_height = seg_arr.shape[0]\n    output_width = seg_arr.shape[1]\n    seg_img = np.zeros((output_height, output_width, 3))\n    for c in range(n_classes):\n        seg_img[:, :, 0] += ((seg_arr[:, :] == c)*(colors[c][0])).astype('uint8')\n        seg_img[:, :, 1] += ((seg_arr[:, :] == c)*(colors[c][1])).astype('uint8')\n        seg_img[:, :, 2] += ((seg_arr[:, :] == c)*(colors[c][2])).astype('uint8')\n    seg_img = seg_img.astype('uint8')\n    return seg_img \n\ndef get_legends(class_names,  colors=class_colors): \n    \n    n_classes = len(class_names)\n    legend = np.zeros(((len(class_names) * 25) + 25, 125, 3), dtype=\"uint8\") + 255\n\n    for (i, (class_name, color)) in enumerate(zip(class_names[:n_classes] , colors[:n_classes])):\n\n        color = [int(c) for c in color]\n        cv2.putText(legend, class_name, (5, (i * 25) + 17),\n            cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 0), 1)\n        cv2.rectangle(legend, (100, (i * 25)), (125, (i * 25) + 25),\n            tuple(color), -1)\n        \n    return legend    \n\ndef overlay_seg_image(inp_img , seg_img):\n    orininal_h = inp_img.shape[0]\n    orininal_w = inp_img.shape[1]\n    seg_img = cv2.resize(seg_img, (orininal_w, orininal_h))\n\n    fused_img = (inp_img/2 + seg_img/2 ).astype('uint8')\n    return fused_img \n\ndef concat_lenends(  seg_img , legend_img  ):\n    \n    new_h = np.maximum( seg_img.shape[0] , legend_img.shape[0] )\n    new_w = seg_img.shape[1] + legend_img.shape[1]\n\n    out_img = np.zeros((new_h ,new_w , 3  )).astype('uint8') + legend_img[0 , 0 , 0 ]\n\n    out_img[ :legend_img.shape[0] , :  legend_img.shape[1] ] = np.copy(legend_img)\n    out_img[ :seg_img.shape[0] , legend_img.shape[1]: ] = np.copy(seg_img)\n\n    return out_img\n\ndef visualize_segmentation(seg_arr, inp_img=None, n_classes=None, \n    colors=class_colors, class_names=None, overlay_img=False, show_legends=False, \n    prediction_width=None, prediction_height=None):\n    \n    print(\"Found the following classes in the segmentation image:\", np.unique(seg_arr))\n\n    if n_classes is None:\n        n_classes = np.max(seg_arr)\n\n    seg_img = get_colored_segmentation_image(seg_arr, n_classes , colors=colors)\n\n    if not inp_img is None:\n        orininal_h = inp_img.shape[0]\n        orininal_w = inp_img.shape[1]\n        seg_img = cv2.resize(seg_img, (orininal_w, orininal_h))\n\n    if (not prediction_height is None) and (not prediction_width is None):\n        seg_img = cv2.resize(seg_img, (prediction_width, prediction_height ))\n        if not inp_img is None:\n            inp_img = cv2.resize(inp_img, (prediction_width, prediction_height))\n            \n    if overlay_img:\n        assert not inp_img is None\n        seg_img = overlay_seg_image(inp_img, seg_img)\n\n    if show_legends:\n        assert not class_names is None\n        legend_img = get_legends(class_names , colors=colors )\n\n        seg_img = concat_lenends(seg_img, legend_img)\n\n    return seg_img\n\ndef predict(model=None, inp=None, out_fname=None, image = None, overlay_img=False,\n    class_names=None, show_legends=False, colors=class_colors, prediction_width=None, prediction_height=None):\n\n    n_classes = model.n_classes\n\n    pr = model.predict(inp)\n    pr = np.squeeze(pr)\n\n    #pr = pr.reshape((output_height,  output_width, n_classes)).argmax(axis=2)\n    pr = pr.argmax(axis=2)\n\n    seg_img = visualize_segmentation(pr, inp_img=image, n_classes=n_classes, overlay_img=True, colors=colors)\n\n    if out_fname is not None:\n        cv2.imwrite(out_fname, seg_img)\n\n    return pr\n\n\ndef predict_multiple(model=None, inps=None, inp_dir=None, out_dir=None,\n                     checkpoints_path=None ,overlay_img=False ,\n    class_names=None , show_legends=False , colors=class_colors , prediction_width=None , prediction_height=None  ):\n\n    if model is None and (checkpoints_path is not None):\n        model = model_from_checkpoint_path(checkpoints_path)\n\n    if inps is None and (inp_dir is not None):\n        inps = glob.glob(os.path.join(inp_dir, \"*.jpg\")) + glob.glob(\n            os.path.join(inp_dir, \"*.png\")) + \\\n            glob.glob(os.path.join(inp_dir, \"*.jpeg\"))\n\n    assert type(inps) is list\n\n    all_prs = []\n\n    for i, inp in enumerate(tqdm(inps)):\n        if out_dir is None:\n            out_fname = None\n        else:\n            if isinstance(inp, six.string_types):\n                out_fname = os.path.join(out_dir, os.path.basename(inp))\n            else:\n                out_fname = os.path.join(out_dir, str(i) + \".jpg\")\n\n        pr = predict(model, inp, out_fname ,\n            overlay_img=overlay_img,class_names=class_names ,show_legends=show_legends , \n            colors=colors , prediction_width=prediction_width , prediction_height=prediction_height  )\n\n        all_prs.append(pr)\n\n    return all_prs\n\ndef evaluate(model=None, inp_images=None, annotations=None, inp_images_dir=None, annotations_dir=None, checkpoints_path=None):\n    \n    if model is None:\n        assert (checkpoints_path is not None) , \"Please provide the model or the checkpoints_path\"\n        model = model_from_checkpoint_path(checkpoints_path)\n        \n    if inp_images is None:\n        assert (inp_images_dir is not None) , \"Please provide inp_images or inp_images_dir\"\n        assert (annotations_dir is not None) , \"Please provide inp_images or inp_images_dir\"\n        \n        paths = get_pairs_from_paths(inp_images_dir, annotations_dir)\n        paths = list(zip(*paths))\n        inp_images = list(paths[0])\n        annotations = list(paths[1])\n        \n    assert type(inp_images) is list\n    assert type(annotations) is list\n        \n    tp = np.zeros(model.n_classes)\n    fp = np.zeros(model.n_classes)\n    fn = np.zeros(model.n_classes)\n    n_pixels = np.zeros(model.n_classes)\n    \n    for inp, ann in tqdm(zip(inp_images , annotations)):\n        pr = model.predict(inp)\n        gt = get_segmentation_array(ann, model.n_classes, no_reshape=True)\n        gt = gt.argmax(-1)\n        #pr = pr.flatten()\n        #gt = gt.flatten()\n                \n        for cl_i in range(model.n_classes):\n            \n            tp[ cl_i ] += np.sum( (pr == cl_i) * (gt == cl_i) )\n            fp[ cl_i ] += np.sum( (pr == cl_i) * ((gt != cl_i)) )\n            fn[ cl_i ] += np.sum( (pr != cl_i) * ((gt == cl_i)) )\n            n_pixels[ cl_i ] += np.sum( gt == cl_i  )\n            \n    cl_wise_score = tp / ( tp + fp + fn + 0.000000000001 )\n    n_pixels_norm = n_pixels /  np.sum(n_pixels)\n    frequency_weighted_IU = np.sum(cl_wise_score*n_pixels_norm)\n    mean_IU = np.mean(cl_wise_score)\n    return {\"frequency_weighted_IU\":frequency_weighted_IU , \"mean_IU\":mean_IU , \"class_wise_IU\":cl_wise_score }\n"
  },
  {
    "path": "axelerate/networks/segnet/train.py",
    "content": "import argparse\nimport json\nfrom .data_utils.data_loader import create_batch_generator, verify_segmentation_dataset\nimport os\nimport glob\nimport six\n\ndef find_latest_checkpoint(checkpoints_path, fail_safe=True):\n\n    def get_epoch_number_from_path(path):\n        return path.replace(checkpoints_path, \"\").strip(\".\")\n\n    # Get all matching files\n    all_checkpoint_files = glob.glob(checkpoints_path + \".*\")\n    # Filter out entries where the epoc_number part is pure number\n    all_checkpoint_files = list(filter(lambda f: get_epoch_number_from_path(f).isdigit(), all_checkpoint_files))\n    if not len(all_checkpoint_files):\n        # The glob list is empty, don't have a checkpoints_path\n        if not fail_safe:\n            raise ValueError(\"Checkpoint path {0} invalid\".format(checkpoints_path))\n        else:\n            return None\n\n    # Find the checkpoint file with the maximum epoch\n    latest_epoch_checkpoint = max(all_checkpoint_files, key=lambda f: int(get_epoch_number_from_path(f)))\n    return latest_epoch_checkpoint\n\n\n\ndef masked_categorical_crossentropy(gt , pr ):\n    from keras.losses import categorical_crossentropy\n    mask = 1-  gt[: , : , 0 ] \n    return categorical_crossentropy( gt , pr )*mask\n\n\n\n\ndef train(model,\n          train_images,\n          train_annotations,\n          input_height=None,\n          input_width=None,\n          n_classes=None,\n          verify_dataset=True,\n          checkpoints_path=None,\n          epochs=5,\n          batch_size=2,\n          validate=False,\n          val_images=None,\n          val_annotations=None,\n          val_batch_size=2,\n          auto_resume_checkpoint=False,\n          load_weights=None,\n          steps_per_epoch=512,\n          val_steps_per_epoch=512,\n          gen_use_multiprocessing=False,\n          ignore_zero_class=False , \n          optimizer_name='adadelta' , do_augment=False , augmentation_name=\"aug_all\"\n          ):\n\n    from .models.all_models import model_from_name\n    # check if user gives model name instead of the model object\n    if isinstance(model, six.string_types):\n        # create the model from the name\n        assert (n_classes is not None), \"Please provide the n_classes\"\n        if (input_height is not None) and (input_width is not None):\n            model = model_from_name[model](\n                n_classes, input_height=input_height, input_width=input_width)\n        else:\n            model = model_from_name[model](n_classes)\n\n    n_classes = model.n_classes\n    input_height = model.input_height\n    input_width = model.input_width\n    output_height = model.output_height\n    output_width = model.output_width\n\n    if validate:\n        assert val_images is not None\n        assert val_annotations is not None\n\n    if optimizer_name is not None:\n\n        if ignore_zero_class:\n            loss_k = masked_categorical_crossentropy\n        else:\n            loss_k = 'categorical_crossentropy'\n\n        model.compile(loss= loss_k ,\n                      optimizer=optimizer_name,\n                      metrics=['accuracy'])\n\n    if checkpoints_path is not None:\n        with open(checkpoints_path+\"_config.json\", \"w\") as f:\n            json.dump({\n                \"model_class\": model.model_name,\n                \"n_classes\": n_classes,\n                \"input_height\": input_height,\n                \"input_width\": input_width,\n                \"output_height\": output_height,\n                \"output_width\": output_width\n            }, f)\n\n    if load_weights is not None and len(load_weights) > 0:\n        print(\"Loading weights from \", load_weights)\n        model.load_weights(load_weights)\n\n    if auto_resume_checkpoint and (checkpoints_path is not None):\n        latest_checkpoint = find_latest_checkpoint(checkpoints_path)\n        if latest_checkpoint is not None:\n            print(\"Loading the weights from latest checkpoint \",\n                  latest_checkpoint)\n            model.load_weights(latest_checkpoint)\n\n    if verify_dataset:\n        print(\"Verifying training dataset\")\n        verified = verify_segmentation_dataset(train_images, train_annotations, n_classes)\n        assert verified\n        if validate:\n            print(\"Verifying validation dataset\")\n            verified = verify_segmentation_dataset(val_images, val_annotations, n_classes)\n            assert verified\n\n    train_gen = image_segmentation_generator(\n        train_images, train_annotations,  batch_size,  n_classes,\n        input_height, input_width, output_height, output_width , do_augment=do_augment ,augmentation_name=augmentation_name )\n\n    if validate:\n        val_gen = image_segmentation_generator(\n            val_images, val_annotations,  val_batch_size,\n            n_classes, input_height, input_width, output_height, output_width)\n\n    if not validate:\n        for ep in range(epochs):\n            print(\"Starting Epoch \", ep)\n            model.fit_generator(train_gen, steps_per_epoch, epochs=1)\n            if checkpoints_path is not None:\n                model.save_weights(checkpoints_path + \".\" + str(ep))\n                print(\"saved \", checkpoints_path + \".model.\" + str(ep))\n            print(\"Finished Epoch\", ep)\n    else:\n        for ep in range(epochs):\n            print(\"Starting Epoch \", ep)\n            model.fit_generator(train_gen, steps_per_epoch,\n                                validation_data=val_gen,\n                                validation_steps=val_steps_per_epoch,  epochs=1 , use_multiprocessing=gen_use_multiprocessing)\n            if checkpoints_path is not None:\n                model.save_weights(checkpoints_path + \".\" + str(ep))\n                print(\"saved \", checkpoints_path + \".model.\" + str(ep))\n            print(\"Finished Epoch\", ep)\n"
  },
  {
    "path": "axelerate/networks/yolo/__init__.py",
    "content": ""
  },
  {
    "path": "axelerate/networks/yolo/backend/__init__.py",
    "content": ""
  },
  {
    "path": "axelerate/networks/yolo/backend/batch_gen.py",
    "content": "import cv2\nimport os\nimport numpy as np\nnp.random.seed(1337)\n\nfrom tensorflow.keras.utils import Sequence\nfrom axelerate.networks.common_utils.augment import ImgAugment\nfrom axelerate.networks.yolo.backend.utils.box import to_centroid, create_anchor_boxes, find_match_box\nfrom axelerate.networks.common_utils.fit import train\n\n\ndef create_batch_generator(annotations, \n                           input_size,\n                           grid_sizes,\n                           batch_size,\n                           anchors,\n                           repeat_times,\n                           augment, \n                           norm=None):\n    \"\"\"\n    # Args\n        annotations : Annotations instance in utils.annotation module\n    \n    # Return \n        worker : BatchGenerator instance\n    \"\"\"\n\n    img_aug = ImgAugment(input_size[0], input_size[1], augment)\n    yolo_box = _YoloBox(input_size, grid_sizes)\n    netin_gen = _NetinGen(input_size, norm)\n    netout_gen = _NetoutGen(grid_sizes, annotations.n_classes(), anchors)\n    worker = BatchGenerator(netin_gen,\n                            netout_gen,\n                            yolo_box,\n                            img_aug,\n                            annotations,\n                            batch_size,\n                            repeat_times)\n    return worker\n\n\nclass BatchGenerator(Sequence):\n    def __init__(self,\n                 netin_gen,\n                 netout_gen,\n                 yolo_box,\n                 img_aug,\n                 annotations,\n                 batch_size,\n                 repeat_times):\n        \"\"\"\n        # Args\n            annotations : Annotations instance\n\n        \"\"\"\n        self._netin_gen = netin_gen\n        self._netout_gen = netout_gen\n        self.nb_stages = len(netout_gen.anchors)\n        self._img_aug = img_aug\n        self._yolo_box = yolo_box\n\n        self._batch_size = min(batch_size, len(annotations)*repeat_times)\n        self._repeat_times = repeat_times\n        self.annotations = annotations\n        self.counter = 0\n\n    def __len__(self):\n        return int(len(self.annotations) * self._repeat_times /self._batch_size)\n\n    def __getitem__(self, idx):\n        \"\"\"\n        # Args\n            idx : batch index\n        \"\"\"\n        x_batch = []\n        y_batch1 = []\n\n        if self.nb_stages == 2:\n            y_batch2 = []\n\n        for i in range(self._batch_size):\n            # 1. get input file & its annotation\n            fname = self.annotations.fname(self._batch_size*idx + i)\n            boxes = self.annotations.boxes(self._batch_size*idx + i)\n            labels = self.annotations.code_labels(self._batch_size*idx + i)\n\n            # 2. read image in fixed size\n            img, boxes, labels = self._img_aug.imread(fname, boxes, labels)\n\n            # 3. grid scaling centroid boxes\n            if len(boxes) > 0:\n                norm_boxes = self._yolo_box.trans(boxes)\n            else:\n                norm_boxes = []\n                labels = []\n      \n            # 4. generate x_batch\n            x_batch.append(self._netin_gen.run(img))\n            processed_labels = self._netout_gen.run(norm_boxes, labels)\n\n            y_batch1.append(processed_labels[0])\n            if self.nb_stages == 2:           \n                y_batch2.append(processed_labels[1])\n\n        x_batch = np.array(x_batch)\n        y_batch1 = np.array(y_batch1)\n        batch = y_batch1\n\n        if self.nb_stages == 2:           \n            y_batch2 = np.array(y_batch2)\n            batch = [y_batch1, y_batch2]\n\n        self.counter += 1\n        return x_batch, batch\n\n    def on_epoch_end(self):\n        self.annotations.shuffle()\n        self.counter = 0\n\nclass _YoloBox(object):\n\n    def __init__(self, input_size, grid_size):\n        self._input_size = input_size\n        self._grid_size = grid_size\n\n    def trans(self, boxes):\n        \"\"\"\n        # Args\n            boxes : array, shape of (N, 4)\n                (x1, y1, x2, y2)-ordered & input image size scale coordinate\n\n        # Returns\n            norm_boxes : array, same shape of boxes\n                (cx, cy, w, h)-ordered & rescaled to grid-size\n        \"\"\"\n        # 1. [[100, 120, 140, 200]] minimax box -> centroid box\n        centroid_boxes = to_centroid(boxes).astype(np.float32)\n        # 2. [[120. 160.  40.  80.]] image scale -> imga scle 0 ~ 1 [[4.        5.        1.3333334 2.5      ]]\n        norm_boxes = np.zeros_like(centroid_boxes)\n        norm_boxes[:,0::2] = centroid_boxes[:,0::2] / self._input_size[1]\n        norm_boxes[:,1::2] = centroid_boxes[:,1::2] / self._input_size[0]\n        #print(\"norm boxes\", norm_boxes)\n        return norm_boxes\n\nclass _NetinGen(object):\n    def __init__(self, input_size, norm):\n        self._input_size = input_size\n        self._norm = self._set_norm(norm)\n\n    def run(self, image):\n        return self._norm(image)\n\n    def _set_norm(self, norm):\n        if norm is None:\n            return lambda x: x\n        else:\n            return norm\n\nclass _NetoutGen(object):\n    def __init__(self,\n                 grid_sizes,\n                 nb_classes,\n                 anchors):\n        self.nb_classes = nb_classes\n        self.anchors = np.asarray(anchors)\n        self._tensor_shape = self._set_tensor_shape(grid_sizes, nb_classes)\n\n    def run(self, norm_boxes, labels):\n        \"\"\"\n        # Args\n            norm_boxes : array, shape of (N, 4)\n                scale normalized boxes\n            labels : list of integers\n            y_shape : tuple (grid_size, grid_size, nb_boxes, 4+1+nb_classes)\n        \"\"\"\n        labels = np.asarray([labels])\n        norm_boxes = np.asarray(norm_boxes)\n        if len(norm_boxes) > 0:\n            norm_boxes= np.concatenate((labels.T, norm_boxes), axis = 1)\n        #print(\"boxes\", boxes)\n        y = self.box_to_label(norm_boxes)\n        #print(y.shape)\n\n        return y\n\n    def _set_tensor_shape(self, grid_size, nb_classes):\n        nb_boxes = len(self.anchors[0])\n        return [(grid_size[i][0], grid_size[i][1], nb_boxes, 4+1+nb_classes) for i in range(len(self.anchors))]\n\n    def _xy_grid_index(self, box_xy: np.ndarray, layer: int):\n        \"\"\" get xy index in grid scale\n\n        Parameters\n        ----------\n        box_xy : np.ndarray\n            value = [x,y]\n        layer  : int\n            layer index\n\n        Returns\n        -------\n        [np.ndarray,np.ndarray]\n\n            index xy : = [idx,idy]\n        \"\"\"\n        out_wh = self._tensor_shape[layer][0:2:][::-1]\n        #print(box_xy, out_wh)\n        return np.floor(box_xy * out_wh).astype('int')\n\n    @staticmethod\n    def _fake_iou(a: np.ndarray, b: np.ndarray) -> float:\n        \"\"\"set a,b center to same,then calc the iou value\n\n        Parameters\n        ----------\n        a : np.ndarray\n            array value = [w,h]\n        b : np.ndarray\n            array value = [w,h]\n\n        Returns\n        -------\n        float\n            iou value\n        \"\"\"\n        a_maxes = a / 2.\n        a_mins = -a_maxes\n\n        b_maxes = b / 2.\n        b_mins = -b_maxes\n\n        iner_mins = np.maximum(a_mins, b_mins)\n        iner_maxes = np.minimum(a_maxes, b_maxes)\n        iner_wh = np.maximum(iner_maxes - iner_mins, 0.)\n        iner_area = iner_wh[..., 0] * iner_wh[..., 1]\n\n        s1 = a[..., 0] * a[..., 1]\n        s2 = b[..., 0] * b[..., 1]\n\n        return iner_area / (s1 + s2 - iner_area)\n\n    def _get_anchor_index(self, wh: np.ndarray) -> np.ndarray:\n        \"\"\"get the max iou anchor index\n\n        Parameters\n        ----------\n        wh : np.ndarray\n            value = [w,h]\n\n        Returns\n        -------\n        np.ndarray\n            max iou anchor index\n            value  = [layer index , anchor index]\n        \"\"\"\n        iou = _NetoutGen._fake_iou(wh, self.anchors)\n        return np.unravel_index(np.argmax(iou), iou.shape)\n\n    def box_to_label(self, true_box: np.ndarray) -> tuple:\n        \"\"\"convert the annotation to yolo v3 label~\n\n        Parameters\n        ----------\n        true_box : np.ndarray\n            annotation shape :[n,5] value :[n*[p,x,y,w,h]]\n\n        Returns\n        -------\n        tuple\n            labels list value :[output_number*[out_h,out_w,anchor_num,class+5]]\n        \"\"\"\n        labels = [np.zeros((self._tensor_shape[i][0], self._tensor_shape[i][1], len(self.anchors[i]),\n                            5 + self.nb_classes), dtype='float32') for i in range(len(self.anchors))]\n        for box in true_box:\n            # NOTE box [x y w h] are relative to the size of the entire image [0~1]\n            l, n = self._get_anchor_index(box[3:5])  # [layer index, anchor index]\n            idx, idy = self._xy_grid_index(box[1:3], l)  # [x index , y index]\n            labels[l][idy, idx, n, 0:4] = np.clip(box[1:5], 1e-8, 1.)\n            labels[l][idy, idx, n, 4] = 1.\n            labels[l][idy, idx, n, 5 + int(box[0])] = 1.\n\n        return labels\n"
  },
  {
    "path": "axelerate/networks/yolo/backend/decoder.py",
    "content": "import numpy as np\r\nfrom axelerate.networks.yolo.backend.utils.box import BoundBox\r\nfrom axelerate.networks.yolo.backend.utils.box import BoundBox, nms_boxes, boxes_to_array\r\n\r\nclass YoloDecoder(object):\r\n    \r\n    def __init__(self,\r\n                 anchors,\r\n                 params,\r\n                 nms_threshold,\r\n                 input_size):\r\n\r\n        self.anchors = anchors\r\n        self.nms_threshold = nms_threshold\r\n        self.input_size = input_size\r\n        self.params = params\r\n\r\n    def run(self, netout, obj_threshold):\r\n        boxes = []\r\n\r\n        for l, output in enumerate(netout):\r\n            output = np.squeeze(output)\r\n            grid_h, grid_w, nb_box = output.shape[0:3]\r\n            \r\n            # decode the output by the network\r\n            output[..., 4] = _sigmoid(output[..., 4])\r\n            output[..., 5:] = output[..., 4][..., np.newaxis] * _sigmoid(output[..., 5:])\r\n            output[..., 5:] *= output[..., 5:] > obj_threshold\r\n            \r\n            for row in range(grid_h):\r\n                for col in range(grid_w):\r\n                    for b in range(nb_box):\r\n                        # from 4th element onwards are confidence and class classes\r\n                        classes = output[row, col, b, 5:]\r\n\r\n                        if np.sum(classes) > 0:\r\n                            # first 4 elements are x, y, w, and h\r\n                            x, y, w, h = output[row, col, b, :4]\r\n\r\n                            x = (col + _sigmoid(x)) / grid_w # center position, unit: image width\r\n                            y = (row + _sigmoid(y)) / grid_h # center position, unit: image height\r\n                            w = self.anchors[l][b][0] * np.exp(w) # unit: image width\r\n                            h = self.anchors[l][b][1] * np.exp(h) # unit: image height\r\n                            confidence = output[row, col, b, 4]\r\n                            box = BoundBox(x, y, w, h, confidence, classes)\r\n                            boxes.append(box)\r\n\r\n        boxes = nms_boxes(boxes, len(classes), self.nms_threshold, obj_threshold)\r\n        boxes, probs = boxes_to_array(boxes)\r\n\r\n        return boxes, probs\r\n\r\ndef _sigmoid(x):\r\n    return 1. / (1. + np.exp(-x))\r\n\r\n"
  },
  {
    "path": "axelerate/networks/yolo/backend/loss.py",
    "content": "import tensorflow as tf\r\nimport tensorflow.python.keras.backend as K\r\nfrom tensorflow import map_fn\r\nimport numpy as np\r\nimport os\r\nimport skimage\r\nimport cv2\r\nfrom math import cos, sin\r\n\r\ndef tf_xywh_to_all(grid_pred_xy, grid_pred_wh, layer, params):\r\n    \"\"\" rescale the pred raw [grid_pred_xy,grid_pred_wh] to [0~1]\r\n\r\n    Parameters\r\n    ----------\r\n    grid_pred_xy : tf.Tensor\r\n\r\n    grid_pred_wh : tf.Tensor\r\n\r\n    layer : int\r\n        the output layer\r\n    h : Helper\r\n\r\n\r\n    Returns\r\n    -------\r\n    tuple\r\n\r\n        after process, [all_pred_xy, all_pred_wh] \r\n    \"\"\"\r\n    with tf.name_scope('xywh_to_all_%d' % layer):\r\n        #print('xyoffset', params.xy_offset[layer], 'outhw', params.out_hw[layer][::-1])\r\n        all_pred_xy = (tf.sigmoid(grid_pred_xy[..., :]) + params.xy_offset[layer]) / params.out_hw[layer][::-1]\r\n        all_pred_wh = tf.exp(grid_pred_wh[..., :]) * params.anchors[layer]\r\n    return all_pred_xy, all_pred_wh\r\n\r\n\r\ndef tf_xywh_to_grid(all_true_xy, all_true_wh, layer, params):\r\n    \"\"\"convert true label xy wh to grid scale\r\n\r\n    Parameters\r\n    ----------\r\n    all_true_xy : tf.Tensor\r\n\r\n    all_true_wh : tf.Tensor\r\n\r\n    layer : int\r\n        layer index\r\n    h : Helper\r\n\r\n\r\n    Returns\r\n    -------\r\n    [tf.Tensor, tf.Tensor]\r\n        grid_true_xy, grid_true_wh shape = [out h ,out w,anchor num , 2 ]\r\n    \"\"\"\r\n    with tf.name_scope('xywh_to_grid_%d' % layer):\r\n        grid_true_xy = (all_true_xy * params.out_hw[layer][::-1]) - params.xy_offset[layer]\r\n        grid_true_wh = tf.math.log(all_true_wh / params.anchors[layer])\r\n    return grid_true_xy, grid_true_wh\r\n\r\n\r\ndef tf_reshape_box(true_xy_A: tf.Tensor, true_wh_A: tf.Tensor, p_xy_A: tf.Tensor, p_wh_A: tf.Tensor, layer: int, params) -> tuple:\r\n    \"\"\" reshape the xywh to [?,h,w,anchor_nums,true_box_nums,2]\r\n        NOTE  must use obj mask in atrue xywh !\r\n    Parameters\r\n    ----------\r\n    true_xy_A : tf.Tensor\r\n        shape will be [true_box_nums,2]\r\n\r\n    true_wh_A : tf.Tensor\r\n        shape will be [true_box_nums,2]\r\n\r\n    p_xy_A : tf.Tensor\r\n        shape will be [?,h,w,anhor_nums,2]\r\n\r\n    p_wh_A : tf.Tensor\r\n        shape will be [?,h,w,anhor_nums,2]\r\n\r\n    layer : int\r\n\r\n    helper : Helper\r\n\r\n\r\n    Returns\r\n    -------\r\n    tuple\r\n        true_cent, true_box_wh, pred_cent, pred_box_wh\r\n    \"\"\"\r\n    with tf.name_scope('reshape_box_%d' % layer):\r\n        true_cent = true_xy_A[tf.newaxis, tf.newaxis, tf.newaxis, tf.newaxis, ...]\r\n        true_box_wh = true_wh_A[tf.newaxis, tf.newaxis, tf.newaxis, tf.newaxis, ...]\r\n\r\n        true_cent = tf.tile(true_cent, [helper.batch_size, helper.out_hw[layer][0], helper.out_hw[layer][1], helper.anchor_number, 1, 1])\r\n        true_box_wh = tf.tile(true_box_wh, [helper.batch_size, helper.out_hw[layer][0], helper.out_hw[layer][1], helper.anchor_number, 1, 1])\r\n\r\n        pred_cent = p_xy_A[..., tf.newaxis, :]\r\n        pred_box_wh = p_wh_A[..., tf.newaxis, :]\r\n        pred_cent = tf.tile(pred_cent, [1, 1, 1, 1, tf.shape(true_xy_A)[0], 1])\r\n        pred_box_wh = tf.tile(pred_box_wh, [1, 1, 1, 1, tf.shape(true_wh_A)[0], 1])\r\n\r\n    return true_cent, true_box_wh, pred_cent, pred_box_wh\r\n\r\n\r\ndef tf_iou(pred_xy: tf.Tensor, pred_wh: tf.Tensor, vaild_xy: tf.Tensor, vaild_wh: tf.Tensor) -> tf.Tensor:\r\n    \"\"\" calc the iou form pred box with vaild box\r\n\r\n    Parameters\r\n    ----------\r\n    pred_xy : tf.Tensor\r\n        pred box shape = [out h, out w, anchor num, 2]\r\n\r\n    pred_wh : tf.Tensor\r\n        pred box shape = [out h, out w, anchor num, 2]\r\n\r\n    vaild_xy : tf.Tensor\r\n        vaild box shape = [? , 2]\r\n\r\n    vaild_wh : tf.Tensor\r\n        vaild box shape = [? , 2]\r\n\r\n    Returns\r\n    -------\r\n    tf.Tensor\r\n        iou value shape = [out h, out w, anchor num ,?]\r\n    \"\"\"\r\n    b1_xy = tf.expand_dims(pred_xy, -2)\r\n    b1_wh = tf.expand_dims(pred_wh, -2)\r\n    b1_wh_half = b1_wh / 2.\r\n    b1_mins = b1_xy - b1_wh_half\r\n    b1_maxes = b1_xy + b1_wh_half\r\n\r\n    b2_xy = tf.expand_dims(vaild_xy, 0)\r\n    b2_wh = tf.expand_dims(vaild_wh, 0)\r\n    b2_wh_half = b2_wh / 2.\r\n    b2_mins = b2_xy - b2_wh_half\r\n    b2_maxes = b2_xy + b2_wh_half\r\n\r\n    intersect_mins = tf.maximum(b1_mins, b2_mins)\r\n    intersect_maxes = tf.minimum(b1_maxes, b2_maxes)\r\n    intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)\r\n    intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]\r\n    b1_area = b1_wh[..., 0] * b1_wh[..., 1]\r\n    b2_area = b2_wh[..., 0] * b2_wh[..., 1]\r\n    iou = intersect_area / (b1_area + b2_area - intersect_area)\r\n\r\n    return iou\r\n\r\n\r\ndef calc_ignore_mask(t_xy_A: tf.Tensor, t_wh_A: tf.Tensor, p_xy: tf.Tensor, p_wh: tf.Tensor, obj_mask: tf.Tensor, iou_thresh: float, layer: int, params) -> tf.Tensor:\r\n    \"\"\"clac the ignore mask\r\n\r\n    Parameters\r\n    ----------\r\n    t_xy_A : tf.Tensor\r\n        raw ture xy,shape = [batch size,h,w,anchors,2]\r\n    t_wh_A : tf.Tensor\r\n        raw true wh,shape = [batch size,h,w,anchors,2]\r\n    p_xy : tf.Tensor\r\n        raw pred xy,shape = [batch size,h,w,anchors,2]\r\n    p_wh : tf.Tensor\r\n        raw pred wh,shape = [batch size,h,w,anchors,2]\r\n    obj_mask : tf.Tensor\r\n        old obj mask,shape = [batch size,h,w,anchors]\r\n    iou_thresh : float\r\n        iou thresh \r\n    helper : Helper\r\n        Helper obj\r\n\r\n    Returns\r\n    -------\r\n    tf.Tensor\r\n    ignore_mask : \r\n        ignore_mask, shape = [batch size, h, w, anchors, 1]\r\n    \"\"\"\r\n    with tf.name_scope('calc_mask_%d' % layer):\r\n        pred_xy, pred_wh = tf_xywh_to_all(p_xy, p_wh, layer, params)\r\n\r\n        ignore_mask = []\r\n        for bc in range(params.batch_size):\r\n            vaild_xy = tf.boolean_mask(t_xy_A[bc], obj_mask[bc])\r\n            vaild_wh = tf.boolean_mask(t_wh_A[bc], obj_mask[bc])\r\n            iou_score = tf_iou(pred_xy[bc], pred_wh[bc], vaild_xy, vaild_wh)\r\n            best_iou = tf.reduce_max(iou_score, axis=-1, keepdims=True)\r\n            ignore_mask.append(tf.cast(best_iou < iou_thresh, tf.float32))\r\n    return tf.stack(ignore_mask)\r\n\r\n\r\nclass Params:\r\n\r\n    def __init__(self, obj_thresh, iou_thresh, obj_weight, noobj_weight, wh_weight, out_hw, anchors, class_num):\r\n        self.obj_thresh = obj_thresh\r\n        self.iou_thresh = iou_thresh\r\n        self.wh_weight = wh_weight\r\n        self.obj_weight = obj_weight\r\n        self.noobj_weight = noobj_weight\r\n        self.class_num = class_num\r\n        self.out_hw = np.reshape(np.array(out_hw), (-1, 2))\r\n        #print(self.out_hw)\r\n        self.anchors = anchors\r\n\r\n        self.grid_wh = (1 / self.out_hw)[:, [1, 0]]\r\n        #print(self.grid_wh)\r\n        self.wh_scale = Params._anchor_scale(self.anchors, self.grid_wh)\r\n        self.xy_offset = Params._coordinate_offset(self.anchors, self.out_hw)\r\n\r\n        self.batch_size = None\r\n\r\n    @staticmethod\r\n    def _coordinate_offset(anchors: np.ndarray, out_hw: np.ndarray) -> np.array:\r\n        \"\"\"construct the anchor coordinate offset array , used in convert scale\r\n\r\n        Parameters\r\n        ----------\r\n        anchors : np.ndarray\r\n            anchors shape = [n,] = [ n x [m,2]]\r\n        out_hw : np.ndarray\r\n            output height width shape = [n,2]\r\n\r\n        Returns\r\n        -------\r\n        np.array\r\n            scale shape = [n,] = [n x [h_n,w_n,m,2]]\r\n        \"\"\"\r\n        grid = []\r\n        for l in range(len(anchors)):\r\n            grid_y = np.tile(np.reshape(np.arange(0, stop=out_hw[l][0]), [-1, 1, 1, 1]), [1, out_hw[l][1], 1, 1])\r\n            grid_x = np.tile(np.reshape(np.arange(0, stop=out_hw[l][1]), [1, -1, 1, 1]), [out_hw[l][0], 1, 1, 1])\r\n            grid.append(np.concatenate([grid_x, grid_y], axis=-1))\r\n        return np.array(grid)\r\n\r\n    @staticmethod\r\n    def _anchor_scale(anchors: np.ndarray, grid_wh: np.ndarray) -> np.array:\r\n        \"\"\"construct the anchor scale array , used in convert label to annotation\r\n\r\n        Parameters\r\n        ----------\r\n        anchors : np.ndarray\r\n            anchors shape = [n,] = [ n x [m,2]]\r\n        out_hw : np.ndarray\r\n            output height width shape = [n,2]\r\n\r\n        Returns\r\n        -------\r\n        np.array\r\n            scale shape = [n,] = [n x [m,2]]\r\n        \"\"\"\r\n        return np.array([anchors[i] * grid_wh[i] for i in range(len(anchors))])\r\n\r\n\r\ndef create_loss_fn(params, layer, batch_size):\r\n\r\n    params.batch_size = batch_size\r\n    shapes = [[-1] + list(params.out_hw[layer]) + [len(params.anchors[layer]), params.class_num + 5]]\r\n    #print(shapes)\r\n    # @tf.function\r\n    def loss_fn(y_true: tf.Tensor, y_pred: tf.Tensor):\r\n        #print(y_true, y_pred)\r\n        \"\"\" split the label \"\"\"\r\n        grid_pred_xy = y_pred[..., 0:2]\r\n        grid_pred_wh = y_pred[..., 2:4]\r\n        pred_confidence = y_pred[..., 4:5]\r\n        pred_cls = y_pred[..., 5:]\r\n\r\n        all_true_xy = y_true[..., 0:2]\r\n        all_true_wh = y_true[..., 2:4]\r\n        true_confidence = y_true[..., 4:5]\r\n        true_cls = y_true[..., 5:]\r\n\r\n        obj_mask = true_confidence  # true_confidence[..., 0] > obj_thresh\r\n        obj_mask_bool = y_true[..., 4] > params.obj_thresh\r\n\r\n        \"\"\" calc the ignore mask  \"\"\"\r\n\r\n        ignore_mask = calc_ignore_mask(all_true_xy, all_true_wh, grid_pred_xy,\r\n                                       grid_pred_wh, obj_mask_bool,\r\n                                       params.iou_thresh, layer, params)\r\n\r\n        grid_true_xy, grid_true_wh = tf_xywh_to_grid(all_true_xy, all_true_wh, layer, params)\r\n        # NOTE When wh=0 , tf.log(0) = -inf, so use K.switch to avoid it\r\n        grid_true_wh = K.switch(obj_mask_bool, grid_true_wh, tf.zeros_like(grid_true_wh))\r\n\r\n        \"\"\" define loss \"\"\"\r\n        coord_weight = 2 - all_true_wh[..., 0:1] * all_true_wh[..., 1:2]\r\n\r\n        xy_loss = tf.reduce_sum(\r\n            obj_mask * coord_weight * tf.nn.sigmoid_cross_entropy_with_logits(\r\n                labels=grid_true_xy, logits=grid_pred_xy)) / params.batch_size\r\n\r\n        wh_loss = tf.reduce_sum(\r\n            obj_mask * coord_weight * params.wh_weight * tf.square(tf.subtract(\r\n                x=grid_true_wh, y=grid_pred_wh))) / params.batch_size\r\n\r\n        obj_loss = params.obj_weight * tf.reduce_sum(\r\n            obj_mask * tf.nn.sigmoid_cross_entropy_with_logits(\r\n                labels=true_confidence, logits=pred_confidence)) / params.batch_size\r\n\r\n        noobj_loss = params.noobj_weight * tf.reduce_sum(\r\n            (1 - obj_mask) * ignore_mask * tf.nn.sigmoid_cross_entropy_with_logits(\r\n                labels=true_confidence, logits=pred_confidence)) / params.batch_size\r\n\r\n        cls_loss = tf.reduce_sum(\r\n            obj_mask * tf.nn.sigmoid_cross_entropy_with_logits(\r\n                labels=true_cls, logits=pred_cls)) / params.batch_size\r\n\r\n        total_loss = obj_loss + noobj_loss + cls_loss + xy_loss + wh_loss\r\n\r\n        return total_loss\r\n\r\n    return loss_fn"
  },
  {
    "path": "axelerate/networks/yolo/backend/network.py",
    "content": "# -*- coding: utf-8 -*-\nimport numpy as np\nimport tensorflow as tf\nfrom tensorflow.keras.models import Model\nfrom tensorflow.keras.layers import Reshape, Conv2D, UpSampling2D, Concatenate, ZeroPadding2D\nfrom axelerate.networks.common_utils.feature import create_feature_extractor\nfrom axelerate.networks.common_utils.mobilenet_sipeed.mobilenet import _depthwise_conv_block, _conv_block\n\ndef create_yolo_network(architecture,\n                        input_size,\n                        nb_classes,\n                        nb_box,\n                        nb_stages,\n                        weights):\n    feature_extractor = create_feature_extractor(architecture, input_size, weights)\n    yolo_net = YoloNetwork(feature_extractor,\n                           nb_stages,\n                           nb_classes,\n                           nb_box)\n    return yolo_net\n\n\nclass YoloNetwork(object):\n    \n    def __init__(self,\n                 feature_extractor,\n                 nb_stages,\n                 nb_classes,\n                 nb_box):\n\n        # 1. create full network\n        grid_size_y, grid_size_x = feature_extractor.get_output_size(layer  = 'conv_pw_13_relu')\n        x1 = feature_extractor.get_output_tensor('conv_pw_13_relu')\n        #x1 = _depthwise_conv_block(inputs = x1, alpha = 1, pointwise_conv_filters = 128, block_id=14)\n\n        # make the object detection layer\n        y1 = Conv2D(nb_box * (4 + 1 + nb_classes), (1,1), strides=(1,1),\n                            padding='same', \n                            name='detection_layer_1', \n                            kernel_initializer='lecun_normal')(x1)\n\n        if nb_stages == 2:\n            grid_size_y_2, grid_size_x_2 = feature_extractor.get_output_size(layer = 'conv_pw_11_relu')\n            x2 = feature_extractor.get_output_tensor('conv_pw_11_relu')\n            #x1 = _depthwise_conv_block(inputs = x1, alpha = 1, pointwise_conv_filters = 128, block_id=14)\n            x1 = UpSampling2D(2)(x1)\n\n            if x1.shape[1:3] != x2.shape[1:3]:\n                #print(x1.shape[1:3] - x2.shape[1:3])\n                #pad = tf.math.subtract(x1.shape[1:3], x2.shape[1:3]).numpy().tolist()\n                #print(pad)\n                x2 = ZeroPadding2D(padding=((0,1), (0,0)))(x2)\n                grid_size_y_2, grid_size_x_2 = x2.shape[1:3]\n\n            x2 = Concatenate()([x2, x1])\n            #x2 = _depthwise_conv_block(inputs = x2, alpha = 1, pointwise_conv_filters = 128, block_id=15)\n\n            y2 = Conv2D(nb_box * (4 + 1 + nb_classes), (1,1), strides=(1,1),\n                                padding='same', \n                                name='detection_layer_2', \n                                kernel_initializer='lecun_normal')(x2)\n\n        if nb_stages == 2:\n\n            l1 = Reshape((grid_size_y, grid_size_x, nb_box, 4 + 1 + nb_classes))(y1)\n            l2 = Reshape((grid_size_y_2, grid_size_x_2, nb_box, 4 + 1 + nb_classes))(y2)\n\n            detection_layers = ['detection_layer_1', 'detection_layer_2']\n            output_tensors = [l1, l2]\n        else:\n\n            l1 = Reshape((grid_size_y, grid_size_x, nb_box, 4 + 1 + nb_classes))(y1) \n\n            detection_layers = ['detection_layer_1']\n            output_tensors = [l1]\n\n        model = Model(feature_extractor.feature_extractor.inputs[0], output_tensors, name='yolo')\n        self._norm = feature_extractor.normalize\n        self._model = model\n        self._init_layers(detection_layers)\n\n    def _init_layers(self, layers):\n        for layer in layers:\n            layer = self._model.get_layer(layer)\n            weights = layer.get_weights()\n            \n            input_depth = weights[0].shape[-2] # 2048\n            new_kernel = np.random.normal(size=weights[0].shape)/ input_depth\n            new_bias   = np.zeros_like(weights[1])\n\n            layer.set_weights([new_kernel, new_bias])\n\n    def load_weights(self, weight_path, by_name):\n        self._model.load_weights(weight_path, by_name=by_name)\n        \n    def forward(self, image):\n        netout = self._model.predict(image)\n        return netout\n\n    def get_model(self, first_trainable_layer=None):\n        return self._model\n\n    def get_grid_size(self):\n        grid_sizes = []\n        for model_output in self._model.outputs:\n            grid_sizes.append(list(model_output.shape[1:3]))\n        return grid_sizes\n\n    def get_normalize_func(self):\n        return self._norm\n\n\n\n"
  },
  {
    "path": "axelerate/networks/yolo/backend/utils/__init__.py",
    "content": "# All modules in utils package can be run independently and have no dependencies on other modules in the project.\r\n# This makes it easy to reuse in other projects.\r\n\r\n\r\n\r\n\r\n"
  },
  {
    "path": "axelerate/networks/yolo/backend/utils/annotation.py",
    "content": "# -*- coding: utf-8 -*-\r\n\r\nimport os\r\nimport numpy as np\r\nfrom xml.etree.ElementTree import parse\r\n\r\n\r\ndef get_unique_labels(files):\r\n    parser = PascalVocXmlParser()\r\n    labels = []\r\n    for fname in files:\r\n        labels += parser.get_labels(fname)\r\n        labels = list(set(labels))\r\n    labels.sort()\r\n    return labels\r\n\r\n\r\ndef get_train_annotations(labels,\r\n                          img_folder,\r\n                          ann_folder,\r\n                          valid_img_folder = \"\",\r\n                          valid_ann_folder = \"\",\r\n                          is_only_detect=False):\r\n    \"\"\"\r\n    # Args\r\n        labels : list of strings\r\n            [\"raccoon\", \"human\", ...]\r\n        img_folder : str\r\n        ann_folder : str\r\n        valid_img_folder : str\r\n        valid_ann_folder : str\r\n    # Returns\r\n        train_anns : Annotations instance\r\n        valid_anns : Annotations instance\r\n    \"\"\"\r\n    # parse annotations of the training set\r\n    train_anns = parse_annotation(ann_folder,\r\n                                     img_folder,\r\n                                     labels,\r\n                                     is_only_detect)\r\n\r\n    # parse annotations of the validation set, if any, otherwise split the training set\r\n    if os.path.exists(valid_ann_folder):\r\n        print(valid_ann_folder)\r\n        valid_anns = parse_annotation(valid_ann_folder,\r\n                                         valid_img_folder,\r\n                                         labels,\r\n                                         is_only_detect)\r\n    else:\r\n        train_valid_split = int(0.8*len(train_anns))\r\n        train_anns.shuffle()\r\n\r\n        # Todo : Hard coding\r\n        valid_anns = Annotations(train_anns._label_namings)\r\n        valid_anns._components = train_anns._components[train_valid_split:]\r\n        train_anns._components = train_anns._components[:train_valid_split]\r\n\r\n    return train_anns, valid_anns\r\n\r\n\r\nclass PascalVocXmlParser(object):\r\n    \"\"\"Parse annotation for 1-annotation file \"\"\"\r\n\r\n    def __init__(self):\r\n        pass\r\n\r\n    def get_fname(self, annotation_file):\r\n        \"\"\"\r\n        # Args\r\n            annotation_file : str\r\n                annotation file including directory path\r\n\r\n        # Returns\r\n            filename : str\r\n        \"\"\"\r\n        root = self._root_tag(annotation_file)\r\n\r\n        return root.find(\"filename\").text\r\n\r\n    def get_path(self, annotation_file):\r\n        \"\"\"\r\n        # Args\r\n            annotation_file : str\r\n                annotation file including directory path\r\n\r\n        # Returns\r\n            pathfilename : str\r\n        \"\"\"\r\n\r\n        root = self._root_tag(annotation_file)\r\n\r\n        path = root.find(\"path\")\r\n\r\n        return path if path is None else path.text\r\n\r\n\r\n    def get_width(self, annotation_file):\r\n        \"\"\"\r\n        # Args\r\n            annotation_file : str\r\n                annotation file including directory path\r\n\r\n        # Returns\r\n            width : int\r\n        \"\"\"\r\n        tree = self._tree(annotation_file)\r\n        for elem in tree.iter():\r\n            if 'width' in elem.tag:\r\n                return int(elem.text)\r\n\r\n    def get_height(self, annotation_file):\r\n        \"\"\"\r\n        # Args\r\n            annotation_file : str\r\n                annotation file including directory path\r\n\r\n        # Returns\r\n            height : int\r\n        \"\"\"\r\n        tree = self._tree(annotation_file)\r\n        for elem in tree.iter():\r\n            if 'height' in elem.tag:\r\n                return int(elem.text)\r\n\r\n    def get_labels(self, annotation_file):\r\n        \"\"\"\r\n        # Args\r\n            annotation_file : str\r\n                annotation file including directory path\r\n\r\n        # Returns\r\n            labels : list of strs\r\n        \"\"\"\r\n\r\n        root = self._root_tag(annotation_file)\r\n        labels = []\r\n        obj_tags = root.findall(\"object\")\r\n        for t in obj_tags:\r\n            labels.append(t.find(\"name\").text)\r\n        return labels\r\n\r\n    def get_boxes(self, annotation_file):\r\n        \"\"\"\r\n        # Args\r\n            annotation_file : str\r\n                annotation file including directory path\r\n\r\n        # Returns\r\n            bbs : 2d-array, shape of (N, 4)\r\n                (x1, y1, x2, y2)-ordered\r\n        \"\"\"\r\n        root = self._root_tag(annotation_file)\r\n        bbs = []\r\n        obj_tags = root.findall(\"object\")\r\n        for t in obj_tags:\r\n            box_tag = t.find(\"bndbox\")\r\n            x1 = box_tag.find(\"xmin\").text\r\n            y1 = box_tag.find(\"ymin\").text\r\n            x2 = box_tag.find(\"xmax\").text\r\n            y2 = box_tag.find(\"ymax\").text\r\n            box = np.array([int(float(x1)), int(float(y1)), int(float(x2)), int(float(y2))])\r\n            bbs.append(box)\r\n        bbs = np.array(bbs)\r\n        return bbs\r\n\r\n    def _root_tag(self, fname):\r\n        tree = parse(fname)\r\n        root = tree.getroot()\r\n        return root\r\n\r\n    def _tree(self, fname):\r\n        tree = parse(fname)\r\n        return tree\r\n\r\ndef parse_annotation(ann_dir, img_dir, labels_naming=[], is_only_detect=False):\r\n    \"\"\"\r\n    # Args\r\n        ann_dir : str\r\n        img_dir : str\r\n        labels_naming : list of strings\r\n\r\n    # Returns\r\n        all_imgs : list of dict\r\n    \"\"\"\r\n    parser = PascalVocXmlParser()\r\n\r\n    if is_only_detect:\r\n        annotations = Annotations([\"object\"])\r\n    else:\r\n        annotations = Annotations(labels_naming)\r\n    for ann in sorted(os.listdir(ann_dir)):\r\n        annotation_file = os.path.join(ann_dir, ann)\r\n\r\n        fname = parser.get_fname(annotation_file)\r\n        path = parser.get_path(annotation_file)\r\n\r\n        if not path or not os.path.exists(path):\r\n            path = os.path.join(img_dir, fname)\r\n\r\n        annotation = Annotation(path)\r\n\r\n        labels = parser.get_labels(annotation_file)\r\n        boxes = parser.get_boxes(annotation_file)\r\n\r\n        for label, box in zip(labels, boxes):\r\n            x1, y1, x2, y2 = box\r\n            if is_only_detect:\r\n                annotation.add_object(x1, y1, x2, y2, name=\"object\")\r\n            else:\r\n                if label in labels_naming:\r\n                    annotation.add_object(x1, y1, x2, y2, name=label)\r\n\r\n        if annotation.boxes is not None:\r\n            annotations.add(annotation)\r\n\r\n    return annotations\r\n\r\n\r\nclass Annotation(object):\r\n    \"\"\"\r\n    # Attributes\r\n        fname : image file path\r\n        labels : list of strings\r\n        boxes : Boxes instance\r\n    \"\"\"\r\n    def __init__(self, filename):\r\n        self.fname = filename\r\n        self.labels = []\r\n        self.boxes = None\r\n\r\n    def add_object(self, x1, y1, x2, y2, name):\r\n        self.labels.append(name)\r\n        if self.boxes is None:\r\n            self.boxes = np.array([x1, y1, x2, y2]).reshape(-1,4)\r\n        else:\r\n            box = np.array([x1, y1, x2, y2]).reshape(-1,4)\r\n            self.boxes = np.concatenate([self.boxes, box])\r\n\r\nclass Annotations(object):\r\n    def __init__(self, label_namings):\r\n        self._components = []\r\n        self._label_namings = label_namings\r\n\r\n    def n_classes(self):\r\n        return len(self._label_namings)\r\n\r\n    def add(self, annotation):\r\n        self._components.append(annotation)\r\n\r\n    def shuffle(self):\r\n        np.random.shuffle(self._components)\r\n\r\n    def fname(self, i):\r\n        index = self._valid_index(i)\r\n        return self._components[index].fname\r\n\r\n    def boxes(self, i):\r\n        index = self._valid_index(i)\r\n        return self._components[index].boxes\r\n\r\n    def labels(self, i):\r\n        \"\"\"\r\n        # Returns\r\n            labels : list of strings\r\n        \"\"\"\r\n        index = self._valid_index(i)\r\n        return self._components[index].labels\r\n\r\n    def code_labels(self, i):\r\n        \"\"\"\r\n        # Returns\r\n            code_labels : list of int\r\n        \"\"\"\r\n        str_labels = self.labels(i)\r\n        labels = []\r\n        for label in str_labels:\r\n            labels.append(self._label_namings.index(label))\r\n        return labels\r\n\r\n    def _valid_index(self, i):\r\n        valid_index = i % len(self._components)\r\n        return valid_index\r\n\r\n    def __len__(self):\r\n        return len(self._components)\r\n\r\n    def __getitem__(self, idx):\r\n        return self._components[idx]\r\n\r\n\r\n"
  },
  {
    "path": "axelerate/networks/yolo/backend/utils/box.py",
    "content": "import numpy as np\r\nimport cv2\r\n\r\nclass BoundBox:\r\n    def __init__(self, x, y, w, h, c = None, classes = None):\r\n        self.x     = x\r\n        self.y     = y\r\n        self.w     = w\r\n        self.h     = h\r\n        \r\n        self.c     = c\r\n        self.classes = classes\r\n\r\n    def get_label(self):\r\n        return np.argmax(self.classes)\r\n    \r\n    def get_score(self):\r\n        return self.classes[self.get_label()]\r\n    \r\n    def iou(self, bound_box):\r\n        b1 = self.as_centroid()\r\n        b2 = bound_box.as_centroid()\r\n        return centroid_box_iou(b1, b2)\r\n\r\n    def as_centroid(self):\r\n        return np.array([self.x, self.y, self.w, self.h])\r\n    \r\n\r\ndef boxes_to_array(bound_boxes):\r\n    \"\"\"\r\n    # Args\r\n        boxes : list of BoundBox instances\r\n    \r\n    # Returns\r\n        centroid_boxes : (N, 4)\r\n        probs : (N, nb_classes)\r\n    \"\"\"\r\n    centroid_boxes = []\r\n    probs = []\r\n    for box in bound_boxes:\r\n        centroid_boxes.append([box.x, box.y, box.w, box.h])\r\n        probs.append(box.classes)\r\n    return np.array(centroid_boxes), np.array(probs)\r\n\r\n\r\ndef nms_boxes(boxes, n_classes, nms_threshold=0.3, obj_threshold=0.3):\r\n    \"\"\"\r\n    # Args\r\n        boxes : list of BoundBox\r\n    \r\n    # Returns\r\n        boxes : list of BoundBox\r\n            non maximum supressed BoundBox instances\r\n    \"\"\"\r\n    # suppress non-maximal boxes\r\n    for c in range(n_classes):\r\n        sorted_indices = list(reversed(np.argsort([box.classes[c] for box in boxes])))\r\n\r\n        for i in range(len(sorted_indices)):\r\n            index_i = sorted_indices[i]\r\n            \r\n            if boxes[index_i].classes[c] == 0: \r\n                continue\r\n            else:\r\n                for j in range(i+1, len(sorted_indices)):\r\n                    index_j = sorted_indices[j]\r\n\r\n                    if boxes[index_i].iou(boxes[index_j]) >= nms_threshold:\r\n                        boxes[index_j].classes[c] = 0\r\n    # remove the boxes which are less likely than a obj_threshold\r\n    boxes = [box for box in boxes if box.get_score() > obj_threshold]\r\n    return boxes\r\n\r\n\r\ndef draw_scaled_boxes(image, boxes, probs, labels, desired_size=400):\r\n    img_size = min(image.shape[:2])\r\n    if img_size < desired_size:\r\n        scale_factor = float(desired_size) / img_size\r\n    else:\r\n        scale_factor = 1.0\r\n    \r\n    h, w = image.shape[:2]\r\n    img_scaled = cv2.resize(image, (int(w*scale_factor), int(h*scale_factor)))\r\n    if boxes != []:\r\n        boxes_scaled = boxes*scale_factor\r\n        boxes_scaled = boxes_scaled.astype(np.int)\r\n    else:\r\n        boxes_scaled = boxes\r\n    return draw_boxes(img_scaled, boxes_scaled, probs, labels)\r\n        \r\n\r\ndef draw_boxes(image, boxes, scores, classes, labels):\r\n\r\n    color = (0, 125, 0)\r\n\r\n    for i in range(len(boxes)):\r\n\r\n        x_min, y_min, x_max, y_max  = boxes[i]\r\n        obj_class = classes[i]\r\n        score = scores[i]\r\n\r\n        # Draw bounding box around detected object\r\n        cv2.rectangle(image, (x_min, y_min), (x_max, y_max), color, 2)\r\n        #print(labels[obj_class], score)\r\n        # Create label for detected object class\r\n        label = \"{}:{:.2f}%\".format(labels[obj_class], np.max(score))\r\n        label_color = (255, 255, 255)\r\n\r\n        text_size = 0.0015 * min(image.shape[0], image.shape[1])\r\n\r\n        # Make sure label always stays on-screen\r\n        x_text, y_text = cv2.getTextSize(label, cv2.FONT_HERSHEY_DUPLEX, text_size, 1)[0][:2]\r\n\r\n        lbl_box_xy_min = (x_min, y_min if y_min < 25 else y_min - y_text)\r\n        lbl_box_xy_max = (x_min + x_text, y_min + y_text if y_min < 25 else y_min)\r\n        lbl_text_pos = (x_min, y_min)\r\n\r\n        # Add label and confidence value\r\n        cv2.rectangle(image, lbl_box_xy_min, lbl_box_xy_max, color, -1)\r\n        cv2.putText(image, label, lbl_text_pos, cv2.FONT_HERSHEY_DUPLEX, text_size, label_color, 1, cv2.LINE_AA)\r\n\r\n    return image        \r\n\r\ndef centroid_box_iou(box1, box2):\r\n    def _interval_overlap(interval_a, interval_b):\r\n        x1, x2 = interval_a\r\n        x3, x4 = interval_b\r\n    \r\n        if x3 < x1:\r\n            if x4 < x1:\r\n                return 0\r\n            else:\r\n                return min(x2,x4) - x1\r\n        else:\r\n            if x2 < x3:\r\n                return 0\r\n            else:\r\n                return min(x2,x4) - x3\r\n    \r\n    _, _, w1, h1 = box1.reshape(-1,)\r\n    _, _, w2, h2 = box2.reshape(-1,)\r\n    x1_min, y1_min, x1_max, y1_max = to_minmax(box1.reshape(-1,4)).reshape(-1,)\r\n    x2_min, y2_min, x2_max, y2_max = to_minmax(box2.reshape(-1,4)).reshape(-1,)\r\n            \r\n    intersect_w = _interval_overlap([x1_min, x1_max], [x2_min, x2_max])\r\n    intersect_h = _interval_overlap([y1_min, y1_max], [y2_min, y2_max])\r\n    intersect = intersect_w * intersect_h\r\n    union = w1 * h1 + w2 * h2 - intersect\r\n    \r\n    return float(intersect) / union\r\n\r\n\r\ndef to_centroid(minmax_boxes):\r\n    \"\"\"\r\n    minmax_boxes : (N, 4) [[100, 120, 140, 200]]\r\n    centroid_boxes: [[120. 160.  40.  80.]]\r\n    \"\"\"\r\n    #minmax_boxes = np.asarray([[100, 120, 140, 200]])\r\n    minmax_boxes = minmax_boxes.astype(np.float)\r\n    centroid_boxes = np.zeros_like(minmax_boxes)\r\n    \r\n    x1 = minmax_boxes[:,0]\r\n    y1 = minmax_boxes[:,1]\r\n    x2 = minmax_boxes[:,2]\r\n    y2 = minmax_boxes[:,3]\r\n    \r\n    centroid_boxes[:,0] = (x1 + x2) / 2\r\n    centroid_boxes[:,1] = (y1 + y2) / 2\r\n    centroid_boxes[:,2] = x2 - x1\r\n    centroid_boxes[:,3] = y2 - y1\r\n    return centroid_boxes\r\n\r\ndef to_minmax(centroid_boxes):\r\n    centroid_boxes = centroid_boxes.astype(np.float)\r\n    minmax_boxes = np.zeros_like(centroid_boxes)\r\n    \r\n    cx = centroid_boxes[:,0]\r\n    cy = centroid_boxes[:,1]\r\n    w = centroid_boxes[:,2]\r\n    h = centroid_boxes[:,3]\r\n    \r\n    minmax_boxes[:,0] = cx - w/2\r\n    minmax_boxes[:,1] = cy - h/2\r\n    minmax_boxes[:,2] = cx + w/2\r\n    minmax_boxes[:,3] = cy + h/2\r\n    return minmax_boxes\r\n\r\ndef create_anchor_boxes(anchors):\r\n    \"\"\"\r\n    # Args\r\n        anchors : list of floats\r\n    # Returns\r\n        boxes : array, shape of (len(anchors)/2, 4)\r\n            centroid-type\r\n    \"\"\"\r\n    boxes = []\r\n    n_boxes = int(len(anchors)/2)\r\n    for i in range(n_boxes):\r\n        boxes.append(np.array([0, 0, anchors[2*i], anchors[2*i+1]]))\r\n    return np.array(boxes)\r\n\r\ndef find_match_box(centroid_box, centroid_boxes):\r\n    \"\"\"Find the index of the boxes with the largest overlap among the N-boxes.\r\n\r\n    # Args\r\n        box : array, shape of (1, 4)\r\n        boxes : array, shape of (N, 4)\r\n    \r\n    # Return\r\n        match_index : int\r\n    \"\"\"\r\n    match_index = -1\r\n    max_iou     = -1\r\n    \r\n    for i, box in enumerate(centroid_boxes):\r\n        iou = centroid_box_iou(centroid_box, box)\r\n        \r\n        if max_iou < iou:\r\n            match_index = i\r\n            max_iou     = iou\r\n    return match_index\r\n\r\n"
  },
  {
    "path": "axelerate/networks/yolo/backend/utils/custom.py",
    "content": "from tensorflow.python import keras\nfrom tensorflow.python.ops import init_ops\nfrom tensorflow.python.ops import math_ops\nfrom tensorflow.python.keras.utils.generic_utils import to_list\nfrom tensorflow.python.keras.utils import metrics_utils\nfrom tensorflow.python.keras.metrics import Metric\nfrom tensorflow.python.keras import backend as K\nfrom tensorflow.python.ops import state_ops\nfrom tensorflow.python.ops.resource_variable_ops import ResourceVariable\nimport numpy as np\nimport os\nimport tensorflow as tf\nimport tensorflow.keras\n\nclass Yolo_Precision(Metric):\n    def __init__(self, thresholds=None, name=None, dtype=None):\n        super(Yolo_Precision, self).__init__(name=name, dtype=dtype)\n        self.init_thresholds = thresholds\n\n        default_threshold = 0.5\n\n        self.thresholds = default_threshold if thresholds is None else thresholds\n\n        self.true_positives = self.add_weight(\n            'tp', initializer=init_ops.zeros_initializer)  # type: ResourceVariable\n\n        self.false_positives = self.add_weight(\n            'fp', initializer=init_ops.zeros_initializer)  # type: ResourceVariable\n\n    def update_state(self, y_true, y_pred, sample_weight=None):\n        true_confidence = y_true[..., 4:5]\n        pred_confidence = y_pred[..., 4:5]\n        pred_confidence_sigmoid = math_ops.sigmoid(pred_confidence)\n\n        values = math_ops.logical_and(true_confidence > self.thresholds, pred_confidence > self.thresholds)\n        values = math_ops.cast(values, self.dtype)\n        self.true_positives.assign_add(math_ops.reduce_sum(values))\n\n        values = math_ops.logical_and(math_ops.logical_not(true_confidence > self.thresholds),\n                                      pred_confidence > self.thresholds)\n        values = math_ops.cast(values, self.dtype)\n        self.false_positives.assign_add(math_ops.reduce_sum(values))\n\n    def result(self):\n        return math_ops.div_no_nan(self.true_positives, (math_ops.add(self.true_positives, self.false_positives)))\n\n\nclass Yolo_Recall(Metric):\n    def __init__(self, thresholds=None, name=None, dtype=None):\n        super(Yolo_Recall, self).__init__(name=name, dtype=dtype)\n        self.init_thresholds = thresholds\n\n        default_threshold = 0.5\n\n        self.thresholds = default_threshold if thresholds is None else thresholds\n\n        self.true_positives = self.add_weight(\n            'tp', initializer=init_ops.zeros_initializer)\n        self.false_negatives = self.add_weight(\n            'fn', initializer=init_ops.zeros_initializer)\n\n    def update_state(self, y_true, y_pred, sample_weight=None):\n        true_confidence = y_true[..., 4:5]\n        pred_confidence = y_pred[..., 4:5]\n        pred_confidence_sigmoid = math_ops.sigmoid(pred_confidence)\n\n        values = math_ops.logical_and(true_confidence > self.thresholds, pred_confidence > self.thresholds)\n        values = math_ops.cast(values, self.dtype)\n        self.true_positives.assign_add(math_ops.reduce_sum(values))  # type: ResourceVariable\n\n        values = math_ops.logical_and(true_confidence > self.thresholds,\n                                      math_ops.logical_not(pred_confidence > self.thresholds))\n        values = math_ops.cast(values, self.dtype)\n        self.false_negatives.assign_add(math_ops.reduce_sum(values))  # type: ResourceVariable\n\n    def result(self):\n        return math_ops.div_no_nan(self.true_positives, (math_ops.add(self.true_positives, self.false_negatives)))\n\nclass MergeMetrics(tensorflow.keras.callbacks.Callback):\n\n    def __init__(self, \n                 model,\n                 type,\n                 period = 1,\n                 save_best=False,\n                 save_name=None,\n                 tensorboard=None):\n                 \n        super().__init__()\n        self.type = type\n        self.name = \"total_val_\" + self.type\n        output_names = []\n\n        for layer in model.layers:\n            if 'reshape' in layer.name:\n                output_names.append(layer.name)\n\n        self.output_names = ['val_' + output_name + \"_\" + self.type if len(output_names) > 1 else 'val_' + self.type for output_name in output_names]\n        print(\"Layers to use in {} callback monitoring: {}\".format(self.name, self.output_names))\n\n        self.num_outputs = len(self.output_names)\n        self._period = period\n        self._save_best = save_best\n        self._save_name = save_name\n        self._tensorboard = tensorboard\n\n        self.best_result = 0\n\n        if not isinstance(self._tensorboard, tensorflow.keras.callbacks.TensorBoard) and self._tensorboard is not None:\n            raise ValueError(\"Tensorboard object must be a instance from keras.callbacks.TensorBoard\")\n\n    def on_epoch_end(self, epoch, logs={}):\n        logs = logs or {}\n        if epoch % self._period == 0 and self._period != 0:\n            result = sum([logs[output_name] for output_name in self.output_names])/self.num_outputs\n            logs[self.name] = result\n\n            print('\\n')\n            print('{}: {:.4f}'.format(self.name, result))\n\n            if epoch == 0:\n                print(\"Saving model on first epoch irrespective of {}\".format(self.name))\n                self.model.save(self._save_name, overwrite=True, include_optimizer=False)\n            else:\n                if self._save_best and self._save_name is not None and result > self.best_result:\n                    print(\"{} improved from {} to {}, saving model to {}.\".format(self.name, self.best_result, result, self._save_name))\n                    self.best_result = result\n                    self.model.save(self._save_name, overwrite=True, include_optimizer=False)\n                else:\n                    print(\"{} did not improve from {}.\".format(self.name, self.best_result))\n\n            if self._tensorboard:\n                writer = tf.summary.create_file_writer(self._tensorboard.log_dir)\n                with writer.as_default():\n                    tf.summary.scalar(self.name, result, step=epoch)\n                    writer.flush()"
  },
  {
    "path": "axelerate/networks/yolo/backend/utils/eval/__init__.py",
    "content": ""
  },
  {
    "path": "axelerate/networks/yolo/backend/utils/eval/_box_match.py",
    "content": "# -*- coding: utf-8 -*-\r\nimport numpy as np\r\nfrom scipy.optimize import linear_sum_assignment as linear_assignment \r\n \r\nclass BoxMatcher(object):\r\n    \"\"\"\r\n    # Args\r\n        boxes1 : ndarray, shape of (N, 4)\r\n            (x1, y1, x2, y2) ordered\r\n\r\n        boxes2 : ndarray, shape of (M, 4)\r\n            (x1, y1, x2, y2) ordered\r\n    \"\"\"\r\n    \r\n    def __init__(self, boxes1, boxes2, labels1=None, labels2=None):\r\n        self._boxes1 = boxes1\r\n        self._boxes2 = boxes2\r\n\r\n        if len(boxes1) == 0 or len(boxes2) == 0:\r\n            pass\r\n        else:\r\n            \r\n            if labels1 is None or labels2 is None:\r\n                self._iou_matrix = self._calc(boxes1,\r\n                                              boxes2,\r\n                                              np.ones((len(boxes1),)),\r\n                                              np.ones((len(boxes2),)))\r\n            else:\r\n                self._iou_matrix = self._calc(boxes1, boxes2, labels1, labels2)\r\n            self._match_pairs = np.asarray(linear_assignment(-1*self._iou_matrix))\r\n            self._match_pairs = np.transpose(self._match_pairs)\r\n    \r\n    def match_idx_of_box1_idx(self, box1_idx):\r\n        \"\"\"\r\n        # Args\r\n            box1_idx : int\r\n        \r\n        # Returns\r\n            box2_idx : int or None\r\n                if matching index does not exist, return None\r\n            iou : float\r\n                IOU (intersection over union) between the box corresponding to the box1 index and the box2 matching it\r\n        \"\"\"\r\n        assert box1_idx < len(self._boxes1)\r\n        if len(self._boxes2) == 0:\r\n            return None, 0\r\n        \r\n        box1_matching_idx_list = self._match_pairs[:, 0]\r\n        box2_matching_idx_list = self._match_pairs[:, 1]\r\n        box2_idx = self._find(box1_idx, box1_matching_idx_list, box2_matching_idx_list)\r\n        if box2_idx is None:\r\n            iou = 0\r\n        else:\r\n            iou = self._iou_matrix[box1_idx, box2_idx]\r\n        return box2_idx, iou\r\n\r\n    def match_idx_of_box2_idx(self, box2_idx):\r\n        \"\"\"\r\n        # Args\r\n            box2_idx : int\r\n         \r\n        # Returns\r\n            box1_idx : int or None\r\n                if matching index does not exist, return None\r\n            iou : float\r\n                IOU (intersection over union) between the box corresponding to the box2 index and the box1 matching it\r\n        \"\"\"\r\n        assert box2_idx < len(self._boxes2)\r\n        if len(self._boxes1) == 0:\r\n            return None, 0\r\n\r\n        box1_matching_idx_list = self._match_pairs[:, 0]\r\n        box2_matching_idx_list = self._match_pairs[:, 1]\r\n        box1_idx = self._find(box2_idx, box2_matching_idx_list, box1_matching_idx_list)\r\n        if box1_idx is None:\r\n            iou = 0\r\n        else:\r\n            iou = self._iou_matrix[box1_idx, box2_idx]\r\n        return box1_idx, iou\r\n\r\n    def _find(self, input_idx, input_idx_list, output_idx_list):\r\n        if input_idx in input_idx_list:\r\n            loc = np.where(input_idx_list == input_idx)[0][0]\r\n            output_idx = int(output_idx_list[loc])\r\n        else:\r\n            output_idx = None\r\n        return output_idx\r\n    \r\n    def _calc_maximun_ious(self):\r\n        ious_for_each_gt = self._calc(self._boxes1, self._boxes2)\r\n        ious = np.max(ious_for_each_gt, axis=0)\r\n        return ious\r\n    \r\n    def _calc(self, boxes, true_boxes, labels, true_labels):\r\n        ious_for_each_gt = []\r\n        \r\n        for truth_box, truth_label in zip(true_boxes, true_labels):\r\n            \r\n            x1 = boxes[:, 0]\r\n            y1 = boxes[:, 1]\r\n            x2 = boxes[:, 2]\r\n            y2 = boxes[:, 3]\r\n            \r\n            x1_gt = truth_box[0]\r\n            y1_gt = truth_box[1]\r\n            x2_gt = truth_box[2]\r\n            y2_gt = truth_box[3]\r\n            \r\n            xx1 = np.maximum(x1, x1_gt)\r\n            yy1 = np.maximum(y1, y1_gt)\r\n            xx2 = np.minimum(x2, x2_gt)\r\n            yy2 = np.minimum(y2, y2_gt)\r\n        \r\n            w = np.maximum(0, xx2 - xx1 + 1)\r\n            h = np.maximum(0, yy2 - yy1 + 1)\r\n            \r\n            intersections = w*h\r\n            As = (x2 - x1 + 1) * (y2 - y1 + 1)\r\n            B = (x2_gt - x1_gt + 1) * (y2_gt - y1_gt + 1)\r\n            \r\n            label_score = (labels == truth_label).astype(np.float)\r\n            \r\n            ious = label_score * intersections.astype(float) / (As + B -intersections)\r\n            ious_for_each_gt.append(ious)\r\n        \r\n        # (n_truth, n_boxes)\r\n        ious_for_each_gt = np.array(ious_for_each_gt)\r\n        return ious_for_each_gt.T\r\n\r\n\r\nif __name__ == \"__main__\":\r\n    labels = np.array([1,2,3,4])\r\n    label = np.array([4])\r\n    expected = np.array([0, 0, 0, 1])\r\n    label_score = (labels == label).astype(np.float)\r\n    print(label_score)\r\n    \r\n    \r\n    labels = np.array([\"a\",\"bb\",\"a\",\"cc\"])\r\n    label = np.array([\"cc\"])\r\n    label_score = (labels == label).astype(np.float)\r\n    print(label_score)\r\n    \r\n    \r\n    \r\n"
  },
  {
    "path": "axelerate/networks/yolo/backend/utils/eval/fscore.py",
    "content": "# -*- coding: utf-8 -*-\r\nfrom ._box_match import BoxMatcher\r\n\r\ndef count_true_positives(detect_boxes, true_boxes, detect_labels=None, true_labels=None):\r\n    \"\"\"\r\n    # Args\r\n        detect_boxes : array, shape of (n_detected_boxes, 4)\r\n        true_boxes : array, shape of (n_true_boxes, 4)\r\n        detected_labels : array, shape of (n_detected_boxes,)\r\n        true_labels :\r\n    \"\"\"\r\n    n_true_positives = 0\r\n \r\n    matcher = BoxMatcher(detect_boxes, true_boxes, detect_labels, true_labels)\r\n    for i in range(len(detect_boxes)):\r\n        matching_idx, iou = matcher.match_idx_of_box1_idx(i)\r\n        print(\"detect_idx: {}, true_idx: {}, matching-score: {}\".format(i, matching_idx, iou))\r\n        if matching_idx is not None and iou > 0.5:\r\n            n_true_positives += 1\r\n    return n_true_positives\r\n\r\n\r\ndef calc_score(n_true_positives, n_truth, n_pred):\r\n    \"\"\"\r\n    # Args\r\n        detect_boxes : list of box-arrays\r\n        true_boxes : list of box-arrays\r\n    \"\"\"\r\n    if n_pred > 0:\r\n        precision = n_true_positives / n_pred\r\n    else:\r\n        precision = 0\r\n    if n_truth > 0:\r\n        recall = n_true_positives / n_truth\r\n    elif n_truth == 0 and n_true_positives == 0:\r\n        recall = 1\r\n    else:\r\n        recall = 0\r\n    if precision + recall > 0:\r\n        fscore = 2* precision * recall / (precision + recall)\r\n        score = {\"fscore\": fscore, \"precision\": precision, \"recall\": recall}\r\n    else:\r\n        score = 0\r\n    return score\r\n    \r\n\r\nif __name__ == '__main__':\r\n    pass\r\n"
  },
  {
    "path": "axelerate/networks/yolo/frontend.py",
    "content": "# -*- coding: utf-8 -*-\n# This module is responsible for communicating with the outside of the yolo package.\n# Outside the package, someone can use yolo detector accessing with this module.\n\nimport os\nimport time\nimport numpy as np\nimport tensorflow as tf\nfrom tqdm import tqdm\n\nfrom axelerate.networks.common_utils.fit import train\nfrom axelerate.networks.yolo.backend.decoder import YoloDecoder\nfrom axelerate.networks.yolo.backend.utils.custom import Yolo_Precision, Yolo_Recall\nfrom axelerate.networks.yolo.backend.loss import create_loss_fn, Params\nfrom axelerate.networks.yolo.backend.network import create_yolo_network\nfrom axelerate.networks.yolo.backend.batch_gen import create_batch_generator\nfrom axelerate.networks.yolo.backend.utils.annotation import get_train_annotations, get_unique_labels\nfrom axelerate.networks.yolo.backend.utils.box import to_minmax\n\ndef get_object_labels(ann_directory):\n    files = os.listdir(ann_directory)\n    files = [os.path.join(ann_directory, fname) for fname in files]\n    return get_unique_labels(files)\n\ndef create_yolo(architecture,\n                labels,\n                input_size,\n                anchors,\n                obj_thresh,\n                iou_thresh,\n                coord_scale,\n                object_scale,\n                no_object_scale,\n                weights = None):\n\n    n_classes = len(labels)\n    n_boxes = int(len(anchors[0]))\n    n_branches = len(anchors)\n    yolo_network = create_yolo_network(architecture, input_size, n_classes, n_boxes, n_branches, weights)\n    yolo_params = Params(obj_thresh, iou_thresh, object_scale, no_object_scale, coord_scale, yolo_network.get_grid_size(), anchors, n_classes)\n    yolo_loss = create_loss_fn\n\n    metrics_dict = {'recall': [Yolo_Precision(obj_thresh, name='precision'), Yolo_Recall(obj_thresh, name='recall')],\n                    'precision': [Yolo_Precision(obj_thresh, name='precision'), Yolo_Recall(obj_thresh, name='recall')]}\n\n    yolo_decoder = YoloDecoder(anchors, yolo_params, 0.1, input_size)\n    yolo = YOLO(yolo_network, yolo_loss, yolo_decoder, labels, input_size, yolo_params, metrics_dict)\n    return yolo\n\n\nclass YOLO(object):\n    def __init__(self,\n                 yolo_network,\n                 yolo_loss,\n                 yolo_decoder,\n                 labels,\n                 input_size,\n                 yolo_params,\n                 metrics_dict):\n\n        self.yolo_network = yolo_network\n        self.yolo_loss = yolo_loss\n        self.yolo_decoder = yolo_decoder\n        self.labels = labels\n        self.input_size = input_size\n        self.norm = yolo_network._norm\n        self.yolo_params = yolo_params\n        self.num_branches = len(self.yolo_params.anchors)\n        self.metrics_dict = metrics_dict\n\n    def load_weights(self, weight_path, by_name=True):\n        if os.path.exists(weight_path):\n            print(\"Loading pre-trained weights for the whole model: \", weight_path)\n            self.yolo_network.load_weights(weight_path, by_name=True)\n        else:\n            print(\"Failed to load pre-trained weights for the whole model. It might be because you didn't specify any or the weight file cannot be found\")\n\n    def predict(self, image, height, width, threshold=0.3):\n        \"\"\"\n        # Args\n            image : 3d-array (RGB ordered)\n        \n        # Returns\n            boxes : array, shape of (N, 4)\n            probs : array, shape of (N, nb_classes)\n        \"\"\"\n\n        def _to_original_scale(boxes):\n            minmax_boxes = to_minmax(boxes)\n            minmax_boxes[:,0] *= width\n            minmax_boxes[:,2] *= width\n            minmax_boxes[:,1] *= height\n            minmax_boxes[:,3] *= height\n            return minmax_boxes.astype(np.int)\n\n        start_time = time.time()\n        netout = self.yolo_network.forward(image)\n        elapsed_ms = (time.time() - start_time) * 1000\n        boxes, probs= self.yolo_decoder.run(netout, threshold)\n\n        if len(boxes) > 0:\n            boxes = _to_original_scale(boxes)\n            print(boxes, probs)\n            return elapsed_ms, boxes, probs\n        else:\n            return elapsed_ms, [], []\n\n    def evaluate(self, img_folder, ann_folder, batch_size):\n\n        self.generator = create_batch_generator(img_folder, ann_folder, self.input_size, \n                                                self.output_size, self.n_classes, \n                                                batch_size, 1, False, self.norm)\n        tp = np.zeros(self.n_classes)\n        fp = np.zeros(self.n_classes)\n        fn = np.zeros(self.n_classes)\n        n_pixels = np.zeros(self.n_classes)\n        \n        for inp, gt in tqdm(list(self.generator)):\n            y_pred = self.network.predict(inp)        \n\n    def train(self,\n              img_folder,\n              ann_folder,\n              nb_epoch,\n              project_folder,\n              batch_size,\n              jitter,\n              learning_rate, \n              train_times,\n              valid_times,\n              valid_img_folder,\n              valid_ann_folder,\n              first_trainable_layer,\n              metrics):\n\n        # 1. get annotations        \n        train_annotations, valid_annotations = get_train_annotations(self.labels,\n                                                                     img_folder,\n                                                                     ann_folder,\n                                                                     valid_img_folder,\n                                                                     valid_ann_folder,\n                                                                     is_only_detect = False)\n        # 1. get batch generator\n        valid_batch_size = len(valid_annotations)*valid_times\n        if valid_batch_size < batch_size: \n            raise ValueError(\"Not enough validation images: batch size {} is larger than {} validation images. Add more validation images or decrease batch size!\".format(batch_size, valid_batch_size))\n        \n        train_batch_generator = self._get_batch_generator(train_annotations, batch_size, train_times, augment=jitter)\n        valid_batch_generator = self._get_batch_generator(valid_annotations, batch_size, valid_times, augment=False)\n        \n        # 2. To train model get keras model instance & loss function\n        model = self.yolo_network.get_model(first_trainable_layer)\n        loss = self._get_loss_func(batch_size)\n        \n        # 3. Run training loop\n        return train(model,\n                loss,\n                train_batch_generator,\n                valid_batch_generator,\n                learning_rate = learning_rate, \n                nb_epoch  = nb_epoch,\n                project_folder = project_folder,\n                first_trainable_layer = first_trainable_layer,\n                metric=self.metrics_dict,\n                metric_name=metrics)\n\n    def _get_loss_func(self, batch_size):\n        return [self.yolo_loss(self.yolo_params, layer, batch_size) for layer in range(self.num_branches)]\n\n    def _get_batch_generator(self, annotations, batch_size, repeat_times, augment):\n        \"\"\"\n        # Args\n            annotations : Annotations instance\n            batch_size : int\n            jitter : bool\n        \n        # Returns\n            batch_generator : BatchGenerator instance\n        \"\"\"\n        batch_generator = create_batch_generator(annotations,\n                                                 self.input_size,\n                                                 self.yolo_network.get_grid_size(),\n                                                 batch_size,\n                                                 self.yolo_params.anchors,\n                                                 repeat_times,\n                                                 augment=augment,\n                                                 norm=self.yolo_network.get_normalize_func())\n        return batch_generator\n    \n"
  },
  {
    "path": "axelerate/train.py",
    "content": "import shutil\nimport numpy as np\nnp.random.seed(111)\nimport argparse\nimport os\nimport time\nimport sys\nimport json\nimport matplotlib\n\nfrom axelerate.networks.yolo.frontend import create_yolo, get_object_labels\nfrom axelerate.networks.classifier.frontend_classifier import create_classifier, get_labels\nfrom axelerate.networks.segnet.frontend_segnet import create_segnet\nfrom axelerate.networks.common_utils.convert import Converter\n\nos.environ['TF_CPP_MIN_LOG_LEVEL'] = '4'\nimport tensorflow as tf\n\ntf.get_logger().setLevel('ERROR')\n\nargparser = argparse.ArgumentParser(\n    description='Train and validate YOLO_v2 model on any dataset')\n\nargparser.add_argument(\n    '-c',\n    '--config',\n    default=\"configs/from_scratch.json\",\n    help='path to configuration file')\n\ndef train_from_config(config,project_folder):\n    try:\n        matplotlib.use('Agg')\n    except:\n        pass\n\n    #added for compatibility with < 0.5.7 versions\n    try:\n        input_size = config['model']['input_size'][:]\n    except:\n        input_size = [config['model']['input_size'],config['model']['input_size']]\n\n    # Create the converter\n    converter = Converter(config['converter']['type'], config['model']['architecture'], config['train']['valid_image_folder'])\n\n    #  Segmentation network\n    if config['model']['type']=='SegNet':\n        print('Segmentation')\n        # 1. Construct the model\n        segnet = create_segnet(config['model']['architecture'],\n                                   input_size,\n                                   config['model']['n_classes'],\n                                   config['weights']['backend'])\n        # 2. Load the pretrained weights (if any)\n        segnet.load_weights(config['weights']['full'], by_name=True)\n        # 3. actual training\n        model_layers, model_path = segnet.train(config['train']['train_image_folder'],\n                                           config['train']['train_annot_folder'],\n                                           config['train']['actual_epoch'],\n                                           project_folder,\n                                           config[\"train\"][\"batch_size\"],\n                                           config[\"train\"][\"augmentation\"],\n                                           config['train']['learning_rate'], \n                                           config['train']['train_times'],\n                                           config['train']['valid_times'],\n                                           config['train']['valid_image_folder'],\n                                           config['train']['valid_annot_folder'],\n                                           config['train']['first_trainable_layer'],\n                                           config['train']['ignore_zero_class'],\n                                           config['train']['valid_metric'])\n\n    #  Classifier\n    if config['model']['type']=='Classifier':\n        print('Classifier')\n        if config['model']['labels']:\n            labels = config['model']['labels']\n        else:\n            labels = get_labels(config['train']['train_image_folder'])\n                 # 1. Construct the model\n        classifier = create_classifier(config['model']['architecture'],\n                                       labels,\n                                       input_size,\n                                       config['model']['fully-connected'],\n                                       config['model']['dropout'],\n                                       config['weights']['backend'],\n                                       config['weights']['save_bottleneck'])\n        # 2. Load the pretrained weights (if any)\n        classifier.load_weights(config['weights']['full'], by_name=True)\n\n        # 3. actual training\n        model_layers, model_path = classifier.train(config['train']['train_image_folder'],\n                                               config['train']['actual_epoch'],\n                                               project_folder,\n                                               config[\"train\"][\"batch_size\"],\n                                               config[\"train\"][\"augmentation\"],\n                                               config['train']['learning_rate'], \n                                               config['train']['train_times'],\n                                               config['train']['valid_times'],\n                                               config['train']['valid_image_folder'],\n                                               config['train']['first_trainable_layer'],\n                                               config['train']['valid_metric'])\n\n\n\n    #  Detector\n    if config['model']['type']=='Detector':\n        if config['train']['is_only_detect']:\n            labels = [\"object\"]\n        else:\n            if config['model']['labels']:\n                labels = config['model']['labels']\n            else:\n                labels = get_object_labels(config['train']['train_annot_folder'])\n        print(labels)\n\n        # 1. Construct the model\n        yolo = create_yolo(config['model']['architecture'],\n                           labels,\n                           input_size,\n                           config['model']['anchors'],\n                           config['model']['obj_thresh'],\n                           config['model']['iou_thresh'],\n                           config['model']['coord_scale'],\n                           config['model']['object_scale'],\n                           config['model']['no_object_scale'],                           \n                           config['weights']['backend'])\n\n        # 2. Load the pretrained weights (if any)\n        yolo.load_weights(config['weights']['full'], by_name=True)\n\n        # 3. actual training\n        model_layers, model_path = yolo.train(config['train']['train_image_folder'],\n                                           config['train']['train_annot_folder'],\n                                           config['train']['actual_epoch'],\n                                           project_folder,\n                                           config[\"train\"][\"batch_size\"],\n                                           config[\"train\"][\"augmentation\"],\n                                           config['train']['learning_rate'], \n                                           config['train']['train_times'],\n                                           config['train']['valid_times'],\n                                           config['train']['valid_image_folder'],\n                                           config['train']['valid_annot_folder'],\n                                           config['train']['first_trainable_layer'],\n                                           config['train']['valid_metric'])\n    # 4 Convert the model\n    time.sleep(2)\n    converter.convert_model(model_path)\n    return model_path\n\ndef setup_training(config_file=None, config_dict=None):\n    \"\"\"make directory to save weights & its configuration \"\"\"\n    if config_file:\n        with open(config_file) as config_buffer:\n            config = json.loads(config_buffer.read())\n    elif config_dict:\n        config = config_dict\n    else:\n        print('No config found')\n        sys.exit()\n    dirname = os.path.join(\"projects\", config['train']['saved_folder'])\n    if os.path.isdir(dirname):\n        print(\"Project folder {} already exists. Creating a folder for new training session.\".format(dirname))\n    else:\n        print(\"Project folder {} is created.\".format(dirname, dirname))\n        os.makedirs(dirname)\n\n    return(train_from_config(config, dirname))\n\n\nif __name__ == '__main__':\n\n    argparser = argparse.ArgumentParser(\n        description='Train and validate YOLO_v2 model on any dataset')\n\n    argparser.add_argument(\n        '-c',\n        '--config',\n        default=\"configs/classifer.json\",\n        help='path to configuration file')\n\n    args = argparser.parse_args()\n    setup_training(config_file=args.config)\n    shutil.rmtree(\"logs\", ignore_errors=True)\n"
  },
  {
    "path": "configs/classifier.json",
    "content": "{\r\n    \"model\" : {\r\n        \"type\":                 \"Classifier\",\r\n        \"architecture\":         \"MobileNet7_5\",\r\n        \"input_size\":           224,\r\n        \"fully-connected\":      [100,50],\r\n        \"labels\":               [],\r\n        \"dropout\" : \t\t0.5\r\n    },\r\n     \"weights\" : {\r\n            \"full\":   \t\t\t\t\"\",\r\n            \"backend\":   \t\t    \"imagenet\",\r\n            \"save_bottleneck\":      false\r\n        \r\n    },\r\n    \"train\" : {\r\n        \"actual_epoch\":         1,\r\n        \"train_image_folder\":   \"sample_datasets/classifier/imgs\",\r\n        \"train_times\":          4,\r\n        \"valid_image_folder\":   \"sample_datasets/classifier/imgs_validation\",\r\n        \"valid_times\":          4,\r\n        \"valid_metric\":         \"val_accuracy\",\r\n        \"batch_size\":           4,\r\n        \"learning_rate\":        1e-4,\r\n        \"saved_folder\":   \t\t\"classifier\",\r\n        \"first_trainable_layer\": \"\",\r\n        \"augmentation\":\t\t     true\r\n    },\r\n    \"converter\" : {\r\n        \"type\":   \t\t\t\t[\"k210\",\"tflite\"]\r\n    }\r\n}\r\n"
  },
  {
    "path": "configs/detector.json",
    "content": "{\r\n    \"model\" : {\r\n        \"type\":                 \"Detector\",\r\n        \"architecture\":         \"MobileNet7_5\",\r\n        \"input_size\":           224,\r\n        \"anchors\":              [[[0.76120044, 0.57155991], [0.6923348, 0.88535553], [0.47163042, 0.34163313]]],\r\n        \"labels\":               [\"aeroplane\",\"person\",\"diningtable\",\"bottle\",\"bird\",\"bus\",\"boat\",\"cow\",\"sheep\",\"train\"],\r\n        \"obj_thresh\" : \t\t    0.5,\r\n        \"iou_thresh\" : \t\t    0.5,\r\n        \"coord_scale\" : \t\t2.0,\r\n        \"object_scale\" : \t\t2.0,            \r\n        \"no_object_scale\" : \t1.0\r\n    },\r\n    \"weights\" : {\r\n        \"full\":   \t\t\t\t\"\",\r\n        \"backend\":              \"imagenet\"\r\n    },\r\n    \"train\" : {\r\n        \"actual_epoch\":         1,\r\n        \"train_image_folder\":   \"sample_datasets/detector/imgs\",\r\n        \"train_annot_folder\":   \"sample_datasets/detector/anns\",\r\n        \"train_times\":          4,\r\n        \"valid_image_folder\":   \"sample_datasets/detector/imgs_validation\",\r\n        \"valid_annot_folder\":   \"sample_datasets/detector/anns_validation\",\r\n        \"valid_times\":          4,\r\n        \"valid_metric\":         \"mAP\",\r\n        \"batch_size\":           4,\r\n        \"learning_rate\":        1e-4,\r\n        \"saved_folder\":   \t\t\"detector\",\r\n        \"first_trainable_layer\": \"\",\r\n        \"augmentation\":\t\t    true,\r\n        \"is_only_detect\" : \t\tfalse\r\n    },\r\n    \"converter\" : {\r\n        \"type\":   \t\t\t\t[\"k210\", \"tflite\"]\r\n    }\r\n}\r\n"
  },
  {
    "path": "configs/dogs_classifier.json",
    "content": "{\r\n    \"model\" : {\r\n        \"type\":                 \"Classifier\",\r\n        \"architecture\":         \"NASNetMobile\",\r\n        \"input_size\":           224,\r\n        \"fully-connected\":      [],\r\n        \"labels\":               [],\r\n        \"dropout\" : \t\t    0.2\r\n    },\r\n     \"weights\" : {\r\n            \"full\":   \t\t\t\t\"\",\r\n            \"backend\":   \t\t    \"imagenet\",\r\n            \"save_bottleneck\":      false\r\n        \r\n    },\r\n    \"train\" : {\r\n        \"actual_epoch\":         100,\r\n        \"train_image_folder\":   \"/home/ubuntu/datasets/dogs_classification/imgs\",\r\n        \"train_times\":          1,\r\n        \"valid_image_folder\":   \"/home/ubuntu/datasets/dogs_classification/imgs_validation\",\r\n        \"valid_times\":          1,\r\n        \"valid_metric\":         \"val_accuracy\",\r\n        \"batch_size\":           16,\r\n        \"learning_rate\":        1e-3,\r\n        \"saved_folder\":   \t\t\"dogs_classifier\",\r\n        \"first_trainable_layer\": \"\",\r\n        \"augmentation\":\t\t     true\r\n    },\r\n    \"converter\" : {\r\n        \"type\":   \t\t\t\t[\"tflite\"]\r\n    }\r\n}\r\n"
  },
  {
    "path": "configs/face_detector.json",
    "content": "{\r\n        \"model\":{\r\n            \"type\":                 \"Detector\",\r\n            \"architecture\":         \"MobileNet2_5\",\r\n            \"input_size\":           [240, 320],\r\n            \"anchors\":              [[[0.51424575, 0.54116074], [0.29523918, 0.45838044], [0.21371929, 0.21518053]]],\r\n            \"labels\":               [\"face\"],\r\n            \"obj_thresh\" : \t\t    0.5,\r\n            \"iou_thresh\" : \t\t    0.5,\r\n            \"coord_scale\" : \t\t2.0,\r\n            \"object_scale\" : \t\t2.0,            \r\n            \"no_object_scale\" : \t1.0\r\n        },\r\n    \"weights\" : {\r\n        \"full\":   \t\t\t\t\"\",\r\n        \"backend\":              \"imagenet\"\r\n    },\r\n        \"train\" : {\r\n            \"actual_epoch\":         30,\r\n            \"train_image_folder\":   \"/home/ubuntu/datasets/WideFace_large/imgs\",\r\n            \"train_annot_folder\":   \"/home/ubuntu/datasets/WideFace_large/anns\",\r\n            \"train_times\":          1,\r\n            \"valid_image_folder\":   \"/home/ubuntu/datasets/WideFace_large/imgs_validation\",\r\n            \"valid_annot_folder\":   \"/home/ubuntu/datasets/WideFace_large/anns_validation\",\r\n            \"valid_times\":          1,\r\n            \"valid_metric\":         \"val_recall\",\r\n            \"batch_size\":           32,\r\n            \"learning_rate\":        1e-3,\r\n            \"saved_folder\":   \t\t\"face_detector\",\r\n            \"first_trainable_layer\": \"\",\r\n            \"augmentation\":\t\t    true,\r\n            \"is_only_detect\" : \t\tfalse\r\n        },\r\n        \"converter\" : {\r\n            \"type\":   \t\t\t\t[\"k210\"]\r\n        }\r\n}\r\n"
  },
  {
    "path": "configs/kangaroo_detector.json",
    "content": "{\r\n    \"model\" : {\r\n        \"type\":                 \"Detector\",\r\n        \"architecture\":         \"MobileNet2_5\",\r\n        \"input_size\":           224,\r\n        \"anchors\":              [[[0.76120044, 0.57155991], [0.6923348, 0.88535553], [0.47163042, 0.34163313]]],\r\n        \"labels\":               [\"kangaroo\"],\r\n        \"obj_thresh\" : \t\t    0.5,\r\n        \"iou_thresh\" : \t\t    0.5,\r\n        \"coord_scale\" : \t\t2.0,\r\n        \"object_scale\" : \t\t2.0,            \r\n        \"no_object_scale\" : \t1.0\r\n    },\r\n    \"weights\" : {\r\n        \"full\":   \t\t\t\t\"\",\r\n        \"backend\":              \"imagenet\"\r\n    },\r\n    \"train\" : {\r\n        \"actual_epoch\":         50,\r\n        \"train_image_folder\":   \"/home/ubuntu/datasets/kangaroo_detection/imgs\",\r\n        \"train_annot_folder\":   \"/home/ubuntu/datasets/kangaroo_detection/anns\",\r\n        \"train_times\":          4,\r\n        \"valid_image_folder\":   \"/home/ubuntu/datasets/kangaroo_detection/imgs_validation\",\r\n        \"valid_annot_folder\":   \"/home/ubuntu/datasets/kangaroo_detection/anns_validation\",\r\n        \"valid_times\":          2,\r\n        \"valid_metric\":         \"mAP\",\r\n        \"batch_size\":           8,\r\n        \"learning_rate\":        1e-3,\r\n        \"saved_folder\":   \t\t\"kangaroo_detector\",\r\n        \"first_trainable_layer\": \"\",\r\n        \"augmentation\":\t\t    true,\r\n        \"is_only_detect\" : \t\tfalse\r\n    },\r\n    \"converter\" : {\r\n        \"type\":   \t\t\t\t[\"openvino\"]\r\n    }\r\n}\r\n"
  },
  {
    "path": "configs/lego_detector.json",
    "content": "{\r\n    \"model\" : {\r\n        \"type\":                 \"Detector\",\r\n        \"architecture\":         \"MobileNet7_5\",\r\n        \"input_size\":           224,\r\n        \"anchors\":              [[[0.76120044, 0.57155991], [0.6923348, 0.88535553], [0.47163042, 0.34163313]]],\r\n        \"labels\":               [\"lego\"],\r\n        \"obj_thresh\" : \t\t    0.5,\r\n        \"iou_thresh\" : \t\t    0.5,\r\n        \"coord_scale\" : \t\t2.0,\r\n        \"object_scale\" : \t\t2.0,            \r\n        \"no_object_scale\" : \t1.0\r\n    },\r\n    \"weights\" : {\r\n        \"full\":   \t\t\t\t\"\",\r\n        \"backend\":              \"imagenet\"\r\n    },\r\n    \"train\" : {\r\n        \"actual_epoch\":         15,\r\n        \"train_image_folder\":   \"../dataset/imgs\",\r\n        \"train_annot_folder\":   \"../dataset/anns\",\r\n        \"train_times\":          2,\r\n        \"valid_image_folder\":   \"../dataset/imgs_validation\",\r\n        \"valid_annot_folder\":   \"../dataset/anns_validation\",\r\n        \"valid_times\":          2,\r\n        \"valid_metric\":         \"mAP\",\r\n        \"batch_size\":           32,\r\n        \"learning_rate\":        1e-3,\r\n        \"saved_folder\":   \t\t\"detector\",\r\n        \"first_trainable_layer\": \"\",\r\n        \"augmentation\":\t\t    true,\r\n        \"is_only_detect\" : \t\tfalse\r\n    },\r\n    \"converter\" : {\r\n        \"type\":   \t\t\t\t[\"edgetpu\"]\r\n    }\r\n}\r\n"
  },
  {
    "path": "configs/pascal_20_detector.json",
    "content": "{\r\n    \"model\" : {\r\n        \"type\":                 \"Detector\",\r\n        \"architecture\":         \"MobileNet7_5\",\r\n        \"input_size\":           224,\r\n        \"anchors\":              [[[0.76120044, 0.57155991], [0.6923348, 0.88535553], [0.47163042, 0.34163313]]],\r\n        \"labels\":               [\"person\", \"bird\", \"cat\", \"cow\", \"dog\", \"horse\", \"sheep\", \"aeroplane\", \"bicycle\", \"boat\", \"bus\", \"car\", \"motorbike\", \"train\",\"bottle\", \"chair\", \"diningtable\", \"pottedplant\", \"sofa\", \"tvmonitor\"],\r\n        \"obj_thresh\" : \t\t    0.5,\r\n        \"iou_thresh\" : \t\t    0.5,\r\n        \"coord_scale\" : \t\t2.0,\r\n        \"object_scale\" : \t\t2.0,            \r\n        \"no_object_scale\" : \t1.0\r\n    },\r\n    \"weights\" : {\r\n        \"full\":   \t\t\t\t\"\",\r\n        \"backend\":              \"imagenet\"\r\n    },\r\n    \"train\" : {\r\n        \"actual_epoch\":         50,\r\n        \"train_image_folder\":   \"/home/ubuntu/datasets/pascal_20_detection/imgs\",\r\n        \"train_annot_folder\":   \"/home/ubuntu/datasets/pascal_20_detection/anns\",\r\n        \"train_times\":          1,\r\n        \"valid_image_folder\":   \"/home/ubuntu/datasets/pascal_20_detection/imgs_validation\",\r\n        \"valid_annot_folder\":   \"/home/ubuntu/datasets/pascal_20_detection/anns_validation\",\r\n        \"valid_times\":          1,\r\n        \"valid_metric\":         \"val_loss\",\r\n        \"batch_size\":           32,\r\n        \"learning_rate\":        1e-3,\r\n        \"saved_folder\":   \t\t\"pascal\",\r\n        \"first_trainable_layer\": \"\",\r\n        \"augmentation\":\t\ttrue,\r\n        \"is_only_detect\" : \t\tfalse\r\n    },\r\n    \"converter\" : {\r\n        \"type\":   \t\t\t\t[\"tflite\"]\r\n    }\r\n}\r\n"
  },
  {
    "path": "configs/pascal_20_detector_2.json",
    "content": "{\r\n    \"model\" : {\r\n        \"type\":                 \"Detector\",\r\n        \"architecture\":         \"MobileNet1_0\",\r\n        \"input_size\":           [224, 320],\r\n        \"anchors\":              [[[0.76120044, 0.57155991], [0.6923348, 0.88535553], [0.47163042, 0.34163313]],\r\n                                 [[0.33340788, 0.70065861], [0.18124964, 0.38986752], [0.08497349, 0.1527057 ]]],\r\n        \"labels\":               [\"person\", \"bird\", \"cat\", \"cow\", \"dog\", \"horse\", \"sheep\", \"aeroplane\", \"bicycle\", \"boat\", \"bus\", \"car\", \"motorbike\", \"train\",\"bottle\", \"chair\", \"diningtable\", \"pottedplant\", \"sofa\", \"tvmonitor\"],\r\n        \"obj_thresh\" : \t\t    0.5,\r\n        \"iou_thresh\" : \t\t    0.5,\r\n        \"coord_scale\" : \t\t1.0,\r\n        \"object_scale\" : \t\t3.0,            \r\n        \"no_object_scale\" : \t1.0\r\n    },\r\n    \"weights\" : {\r\n        \"full\":   \t\t\t\t\"\",\r\n        \"backend\":              \"imagenet\"\r\n    },\r\n    \"train\" : {\r\n        \"actual_epoch\":         50,\r\n        \"train_image_folder\":   \"/home/ubuntu/datasets/pascal_20_detection/imgs\",\r\n        \"train_annot_folder\":   \"/home/ubuntu/datasets/pascal_20_detection/anns\",\r\n        \"train_times\":          1,\r\n        \"valid_image_folder\":   \"/home/ubuntu/datasets/pascal_20_detection/imgs_validation\",\r\n        \"valid_annot_folder\":   \"/home/ubuntu/datasets/pascal_20_detection/anns_validation\",\r\n        \"valid_times\":          1,\r\n        \"valid_metric\":         \"recall\",\r\n        \"batch_size\":           32,\r\n        \"learning_rate\":        1e-3,\r\n        \"saved_folder\":   \t\t\"pascal\",\r\n        \"first_trainable_layer\": \"\",\r\n        \"augmentation\":\t\t    true,\r\n        \"is_only_detect\" : \t\tfalse\r\n    },\r\n    \"converter\" : {\r\n        \"type\":   \t\t\t\t[\"tflite\"]\r\n    }\r\n}\r\n"
  },
  {
    "path": "configs/pascal_20_segnet.json",
    "content": "{\r\n    \"model\" : {\r\n        \"type\":                 \"SegNet\",\r\n        \"architecture\":         \"MobileNet7_5\",\r\n        \"input_size\":           224,\r\n        \"n_classes\" : \t\t    20\r\n    },\r\n    \"weights\" : {\r\n        \"full\":   \t\t\t\t\"\",\r\n        \"backend\":              \"imagenet\"\r\n    },\r\n    \"train\" : {\r\n        \"actual_epoch\":         50,\r\n        \"train_image_folder\":   \"/home/ubuntu/datasets/pascal_20_segmentation/imgs\",\r\n        \"train_annot_folder\":   \"/home/ubuntu/datasets/pascal_20_segmentation/anns\",\r\n        \"train_times\":          1,\r\n        \"valid_image_folder\":   \"/home/ubuntu/datasets/pascal_20_segmentation/imgs_validation\",\r\n        \"valid_annot_folder\":   \"/home/ubuntu/datasets/pascal_20_segmentation/anns_validation\",\r\n        \"valid_times\":          1,\r\n        \"valid_metric\":         \"val_loss\",\r\n        \"batch_size\":           8,\r\n        \"learning_rate\":        1e-3,\r\n        \"saved_folder\":   \t\t\"pascal_20\",\r\n        \"first_trainable_layer\": \"0\",\r\n        \"ignore_zero_class\":    false,\r\n        \"augmentation\":\t\t    true\r\n    },\r\n    \"converter\" : {\r\n        \"type\":   \t\t\t\t[\"tflite\"]\r\n    }\r\n}\r\n"
  },
  {
    "path": "configs/person_detector.json",
    "content": "{\r\n    \"model\" : {\r\n        \"type\":                 \"Detector\",\r\n        \"architecture\":         \"MobileNet7_5\",\r\n        \"input_size\":           [224, 320],\r\n        \"anchors\":              [[[0.76120044, 0.57155991], [0.6923348, 0.88535553], [0.47163042, 0.34163313]],\r\n                                 [[0.33340788, 0.70065861], [0.18124964, 0.38986752], [0.08497349, 0.1527057 ]]],\r\n        \"labels\":               [\"person\"],\r\n        \"obj_thresh\" : \t\t    0.7,\r\n        \"iou_thresh\" : \t\t    0.5,\r\n        \"coord_scale\" : \t\t1.0,\r\n        \"class_scale\" : \t\t1.0,\r\n        \"object_scale\" : \t\t5.0,\r\n        \"no_object_scale\" : \t1.0\r\n    },\r\n    \"weights\" : {\r\n        \"full\":   \t\t\t\t\"\",\r\n        \"backend\":              \"imagenet\"\r\n    },\r\n    \"train\" : {\r\n        \"actual_epoch\":         100,\r\n        \"train_image_folder\":   \"/home/ubuntu/datasets/pascal_20_detection/imgs\",\r\n        \"train_annot_folder\":   \"/home/ubuntu/datasets/pascal_20_detection/anns\",\r\n        \"train_times\":          1,\r\n        \"valid_image_folder\":   \"/home/ubuntu/datasets/pascal_20_detection/imgs_validation\",\r\n        \"valid_annot_folder\":   \"/home/ubuntu/datasets/pascal_20_detection/anns_validation\",\r\n        \"valid_times\":          1,\r\n        \"valid_metric\":         \"recall\",\r\n        \"batch_size\":           32,\r\n        \"learning_rate\":        1e-3,\r\n        \"saved_folder\":   \t\t\"person_detector\",\r\n        \"first_trainable_layer\": \"\",\r\n        \"augmentation\":\t\t    true,\r\n        \"is_only_detect\" : \t\tfalse\r\n    },\r\n    \"converter\" : {\r\n        \"type\":   \t\t\t\t[\"k210\", \"tflite\"]\r\n    }\r\n}\r\n"
  },
  {
    "path": "configs/raccoon_detector.json",
    "content": "{\r\n    \"model\" : {\r\n        \"type\":                 \"Detector\",\r\n        \"architecture\":         \"MobileNet5_0\",\r\n        \"input_size\":           [240, 320],\r\n        \"anchors\":              [[[0.76120044, 0.57155991], [0.6923348, 0.88535553], [0.47163042, 0.34163313]]],\r\n        \"labels\":               [\"raccoon\"],\r\n        \"obj_thresh\" : \t\t    0.5,\r\n        \"iou_thresh\" : \t\t    0.5,\r\n        \"coord_scale\" : \t\t2.0,\r\n        \"object_scale\" : \t\t2.0,            \r\n        \"no_object_scale\" : \t1.0\r\n    },\r\n    \"weights\" : {\r\n        \"full\":   \t\t\t\t\"\",\r\n        \"backend\":              \"imagenet\"\r\n    },\r\n    \"train\" : {\r\n        \"actual_epoch\":         50,\r\n        \"train_image_folder\":   \"/home/ubuntu/datasets/raccoon_detector/imgs\",\r\n        \"train_annot_folder\":   \"/home/ubuntu/datasets/raccoon_detector/anns\",\r\n        \"train_times\":          2,\r\n        \"valid_image_folder\":   \"/home/ubuntu/datasets/raccoon_detector/imgs_validation\",\r\n        \"valid_annot_folder\":   \"/home/ubuntu/datasets/raccoon_detector/anns_validation\",\r\n        \"valid_times\":          2,\r\n        \"valid_metric\":         \"recall\",\r\n        \"batch_size\":           4,\r\n        \"learning_rate\":        1e-4,\r\n        \"saved_folder\":   \t\t\"raccoon_detector\",\r\n        \"first_trainable_layer\": \"\",\r\n        \"augmentation\":\t\t    true,\r\n        \"is_only_detect\" : \t\tfalse\r\n    },\r\n    \"converter\" : {\r\n        \"type\":   \t\t\t\t[\"k210\"]\r\n    }\r\n}\r\n"
  },
  {
    "path": "configs/santa_uno.json",
    "content": "{\r\n    \"model\" : {\r\n        \"type\":                 \"Classifier\",\r\n        \"architecture\":         \"MobileNet7_5\",\r\n        \"input_size\":           224,\r\n        \"fully-connected\":      [],\r\n        \"labels\":               [],\r\n        \"dropout\" : \t\t0.5\r\n    },\r\n     \"weights\" : {\r\n            \"full\":   \t\t\t\t\"\",\r\n            \"backend\":   \t\t    \"imagenet\",\r\n            \"save_bottleneck\":      false\r\n        \r\n    },\r\n    \"train\" : {\r\n        \"actual_epoch\":         3,\r\n        \"train_image_folder\":   \"/home/ubuntu/santa_uno_dataset/imgs\",\r\n        \"train_times\":          1,\r\n        \"valid_image_folder\":   \"/home/ubuntu/santa_uno_dataset/imgs_validation\",\r\n        \"valid_times\":          1,\r\n        \"valid_metric\":         \"val_accuracy\",\r\n        \"batch_size\":           8,\r\n        \"learning_rate\":        1e-4,\r\n        \"saved_folder\":   \t\t\"santa_uno\",\r\n        \"first_trainable_layer\": \"\",\r\n        \"augmentation\":\t\t\t\ttrue\r\n    },\r\n    \"converter\" : {\r\n        \"type\":   \t\t\t\t[\"k210\", \"tflite\"]\r\n    }\r\n}\r\n"
  },
  {
    "path": "configs/segmentation.json",
    "content": "{\r\n    \"model\" : {\r\n        \"type\":                 \"SegNet\",\r\n        \"architecture\":         \"MobileNet7_5\",\r\n        \"input_size\":           224,\r\n        \"n_classes\" : \t\t    20\r\n    },\r\n    \"weights\" : {\r\n        \"full\":   \t\t\t\t\"\",\r\n        \"backend\":              \"imagenet\"\r\n    },\r\n    \"train\" : {\r\n        \"actual_epoch\":         1,\r\n        \"train_image_folder\":   \"sample_datasets/segmentation/imgs\",\r\n        \"train_annot_folder\":   \"sample_datasets/segmentation/anns\",\r\n        \"train_times\":          4,\r\n        \"valid_image_folder\":   \"sample_datasets/segmentation/imgs_validation\",\r\n        \"valid_annot_folder\":   \"sample_datasets/segmentation/anns_validation\",\r\n        \"valid_times\":          4,\r\n        \"valid_metric\":         \"val_loss\",\r\n        \"batch_size\":           8,\r\n        \"learning_rate\":        1e-4,\r\n        \"saved_folder\":   \t\t\"segment\",\r\n        \"first_trainable_layer\": \"\",\r\n        \"ignore_zero_class\":    false,\r\n        \"augmentation\":\t\t    true\r\n    },\r\n    \"converter\" : {\r\n        \"type\":   \t\t\t\t[\"k210\", \"tflite\"]\r\n    }\r\n}\r\n"
  },
  {
    "path": "example_scripts/arm_nn/README.md",
    "content": "# PyArmNN Object Detection Sample Application\n\n## Introduction\nThis sample application guides the user and shows how to perform object detection using PyArmNN API. We assume the user has already built PyArmNN by following the instructions of the README in the main PyArmNN directory.\n\nWe provide example scripts for performing object detection from video file and video stream with `run_video_file.py` and `run_video_stream.py`.\n\nThe application takes a model and video file or camera feed as input, runs inference on each frame, and draws bounding boxes around detected objects, with the corresponding labels and confidence scores overlaid.\n\nA similar implementation of this object detection application is also provided in C++ in the examples for ArmNN.\n\n## Prerequisites\n\n##### PyArmNN\n\nBefore proceeding to the next steps, make sure that you have successfully installed the newest version of PyArmNN on your system by following the instructions in the README of the PyArmNN root directory.\n\nYou can verify that PyArmNN library is installed and check PyArmNN version using:\n```bash\n$ pip show pyarmnn\n```\n\nYou can also verify it by running the following and getting output similar to below:\n```bash\n$ python -c \"import pyarmnn as ann;print(ann.GetVersion())\"\n'24.0.0'\n```\n\n##### Dependencies\n\nInstall the following libraries on your system:\n```bash\n$ sudo apt-get install python3-opencv libqtgui4 libqt4-test\n```\n\nCreate a virtual environment:\n```bash\n$ python3.7 -m venv devenv --system-site-packages\n$ source devenv/bin/activate\n```\n\nInstall the dependencies:\n```bash\n$ pip install -r requirements.txt\n```\n\n---\n\n# Performing Object Detection\n\n## Object Detection from Video File\nThe `run_video_file.py` example takes a video file as input, runs inference on each frame, and produces frames with bounding boxes drawn around detected objects. The processed frames are written to video file.\n\nThe user can specify these arguments at command line:\n\n* `--video_file_path` - <b>Required:</b> Path to the video file to run object detection on\n\n* `--model_file_path` - <b>Required:</b> Path to <b>.tflite, .pb</b> or <b>.onnx</b> object detection model\n\n* `--model_name` - <b>Required:</b> The name of the model being used. Assembles the workflow for the input model. The examples support the model names:\n\n  * `ssd_mobilenet_v1`\n\n  * `yolo_v3_tiny`\n\n* `--label_path` - <b>Required:</b> Path to labels file for the specified model file\n\n* `--output_video_file_path` - Path to the output video file with detections added in\n\n* `--preferred_backends` - You can specify one or more backend in order of preference. Accepted backends include `CpuAcc, GpuAcc, CpuRef`. Arm NN will decide which layers of the network are supported by the backend, falling back to the next if a layer is unsupported. Defaults to `['CpuAcc', 'CpuRef']`\n\n\nRun the sample script:\n```bash\n$ python run_video_file.py --video_file_path <video_file_path> --model_file_path <model_file_path> --model_name <model_name>\n```\n\n## Object Detection from Video Stream\nThe `run_video_stream.py` example captures frames from a video stream of a device, runs inference on each frame, and produces frames with bounding boxes drawn around detected objects. A window is displayed and refreshed with the latest processed frame.\n\nThe user can specify these arguments at command line:\n\n* `--video_source` - Device index to access video stream. Defaults to primary device camera at index 0\n\n* `--model_file_path` - <b>Required:</b> Path to <b>.tflite, .pb</b> or <b>.onnx</b> object detection model\n\n* `--model_name` - <b>Required:</b> The name of the model being used. Assembles the workflow for the input model. The examples support the model names:\n\n  * `ssd_mobilenet_v1`\n\n  * `yolo_v3_tiny`\n\n* `--label_path` - <b>Required:</b> Path to labels file for the specified model file\n\n* `--preferred_backends` - You can specify one or more backend in order of preference. Accepted backends include `CpuAcc, GpuAcc, CpuRef`. Arm NN will decide which layers of the network are supported by the backend, falling back to the next if a layer is unsupported. Defaults to `['CpuAcc', 'CpuRef']`\n\n\nRun the sample script:\n```bash\n$ python run_video_stream.py --model_file_path <model_file_path> --model_name <model_name>\n```\n\nThis application has been verified to work against the MobileNet SSD model, which can be downloaded along with it's label set from:\n\n* https://storage.googleapis.com/download.tensorflow.org/models/tflite/coco_ssd_mobilenet_v1_1.0_quant_2018_06_29.zip\n\n## Implementing Your Own Network\nThe examples provide support for `ssd_mobilenet_v1` and `yolo_v3_tiny` models. However, the user is able to add their own network to the object detection scripts by following the steps:\n\n1. Create a new file for your network, for example `network.py`, to contain functions to process the output of the model\n2. In that file, the user will need to write a function that decodes the output vectors obtained from running inference on their network and return the bounding box positions of detected objects plus their class index and confidence. Additionally, include a function that returns a resize factor that will scale the obtained bounding boxes to their correct positions in the original frame\n3. Import the functions into the main file and, such as with the provided networks, add a conditional statement to the `get_model_processing()` function with the new model name and functions\n4. The labels associated with the model can then be passed in with `--label_path` argument\n\n---\n\n# Application Overview\n\nThis section provides a walkthrough of the application, explaining in detail the steps:\n\n1. Initialisation\n2. Creating a Network\n3. Preparing the Workload Tensors\n4. Executing Inference\n5. Postprocessing\n\n\n### Initialisation\n\n##### Reading from Video Source\nAfter parsing user arguments, the chosen video file or stream is loaded into an OpenCV `cv2.VideoCapture()` object. We use this object to capture frames from the source using the `read()` function.\n\nThe `VideoCapture` object also tells us information about the source, such as the framerate and resolution of the input video. Using this information, we create a `cv2.VideoWriter()` object which will be used at the end of every loop to write the processed frame to an output video file of the same format as the input.\n\n##### Preparing Labels and Model Specific Functions\nIn order to interpret the result of running inference on the loaded network, it is required to load the labels associated with the model. In the provided example code, the `dict_labels()` function creates a dictionary that is keyed on the classification index at the output node of the model, with values of the dictionary corresponding to a label and a randomly generated RGB color. This ensures that each class has a unique color which will prove helpful when plotting the bounding boxes of various detected objects in a frame.\n\nDepending on the model being used, the user-specified model name accesses and returns functions to decode and process the inference output, along with a resize factor used when plotting bounding boxes to ensure they are scaled to their correct position in the original frame.\n\n\n### Creating a Network\n\n##### Creating Parser and Importing Graph\nThe first step with PyArmNN is to import a graph from file by using the appropriate parser.\n\nThe Arm NN SDK provides parsers for reading graphs from a variety of model formats. In our application we specifically focus on `.tflite, .pb, .onnx` models.\n\nBased on the extension of the provided model file, the corresponding parser is created and the network file loaded with `CreateNetworkFromBinaryFile()` function. The parser will handle the creation of the underlying Arm NN graph.\n\n##### Optimizing Graph for Compute Device\nArm NN supports optimized execution on multiple CPU and GPU devices. Prior to executing a graph, we must select the appropriate device context. We do this by creating a runtime context with default options with `IRuntime()`.\n\nWe can optimize the imported graph by specifying a list of backends in order of preference and implement backend-specific optimizations. The backends are identified by a string unique to the backend, for example `CpuAcc, GpuAcc, CpuRef`.\n\nInternally and transparently, Arm NN splits the graph into subgraph based on backends, it calls a optimize subgraphs function on each of them and, if possible, substitutes the corresponding subgraph in the original graph with its optimized version.\n\nUsing the `Optimize()` function we optimize the graph for inference and load the optimized network onto the compute device with `LoadNetwork()`. This function creates the backend-specific workloads for the layers and a backend specific workload factory which is called to create the workloads.\n\n##### Creating Input and Output Binding Information\nParsers can also be used to extract the input information for the network. By calling `GetSubgraphInputTensorNames` we extract all the input names and, with `GetNetworkInputBindingInfo`, bind the input points of the graph.\n\nThe input binding information contains all the essential information about the input. It is a tuple consisting of integer identifiers for bindable layers (inputs, outputs) and the tensor info (data type, quantization information, number of dimensions, total number of elements).\n\nSimilarly, we can get the output binding information for an output layer by using the parser to retrieve output tensor names and calling `GetNetworkOutputBindingInfo()`.\n\n\n### Preparing the Workload Tensors\n\n##### Preprocessing the Captured Frame\nEach frame captured from source is read as an `ndarray` in BGR format and therefore has to be preprocessed before being passed into the network.\n\nThis preprocessing step consists of swapping channels (BGR to RGB in this example), resizing the frame to the required resolution, expanding dimensions of the array and doing data type conversion to match the model input layer. This information about the input tensor can be readily obtained from reading the `input_binding_info`. For example, SSD MobileNet V1 takes for input a tensor with shape `[1, 300, 300, 3]` and data type `uint8`.\n\n##### Making Input and Output Tensors\nTo produce the workload tensors, calling the functions `make_input_tensors()` and `make_output_tensors()` will return the input and output tensors respectively.\n\n\n### Executing Inference\nAfter making the workload tensors, a compute device performs inference for the loaded network using the `EnqueueWorkload()` function of the runtime context. By calling the `workload_tensors_to_ndarray()` function, we obtain the results from inference as a list of `ndarrays`.\n\n\n### Postprocessing\n\n##### Decoding and Processing Inference Output\nThe output from inference must be decoded to obtain information about detected objects in the frame. In the examples there are implementations for two networks but you may also implement your own network decoding solution here. Please refer to <i>Implementing Your Own Network</i> section of this document to learn how to do this.\n\nFor SSD MobileNet V1 models, we decode the results to obtain the bounding box positions, classification index, confidence and number of detections in the input frame.\n\nFor YOLO V3 Tiny models, we decode the output and perform non-maximum suppression to filter out any weak detections below a confidence threshold and any redudant bounding boxes above an intersection-over-union threshold.\n\nIt is encouraged to experiment with threshold values for confidence and intersection-over-union (IoU) to achieve the best visual results.\n\nThe detection results are always returned as a list in the form `[class index, [box positions], confidence score]`, with the box positions list containing bounding box coordinates in the form `[x_min, y_min, x_max, y_max]`.\n\n##### Drawing Bounding Boxes\nWith the obtained results and using `draw_bounding_boxes()`, we are able to draw bounding boxes around detected objects and add the associated label and confidence score. The labels dictionary created earlier uses the class index of the detected object as a key to return the associated label and color for that class. The resize factor defined at the beginning scales the bounding box coordinates to their correct positions in the original frame. The processed frames are written to file or displayed in a separate window.\n"
  },
  {
    "path": "example_scripts/arm_nn/box.py",
    "content": "import numpy as np\nimport cv2\n\n\n# Todo : BoundBox & its related method extraction\nclass BoundBox:\n    def __init__(self, x, y, w, h, c = None, classes = None):\n        self.x     = x\n        self.y     = y\n        self.w     = w\n        self.h     = h\n        \n        self.c     = c\n        self.classes = classes\n\n    def get_label(self):\n        return np.argmax(self.classes)\n    \n    def get_score(self):\n        return self.classes[self.get_label()]\n    \n    def iou(self, bound_box):\n        b1 = self.as_centroid()\n        b2 = bound_box.as_centroid()\n        return centroid_box_iou(b1, b2)\n\n    def as_centroid(self):\n        return np.array([self.x, self.y, self.w, self.h])\n    \n\ndef boxes_to_array(bound_boxes):\n    \"\"\"\n    # Args\n        boxes : list of BoundBox instances\n    \n    # Returns\n        centroid_boxes : (N, 4)\n        probs : (N, nb_classes)\n    \"\"\"\n    centroid_boxes = []\n    probs = []\n    for box in bound_boxes:\n        centroid_boxes.append([box.x, box.y, box.w, box.h])\n        probs.append(box.classes)\n    return np.array(centroid_boxes), np.array(probs)\n\n\ndef nms_boxes(boxes, n_classes, nms_threshold=0.3, obj_threshold=0.3):\n    \"\"\"\n    # Args\n        boxes : list of BoundBox\n    \n    # Returns\n        boxes : list of BoundBox\n            non maximum supressed BoundBox instances\n    \"\"\"\n    # suppress non-maximal boxes\n    for c in range(n_classes):\n        sorted_indices = list(reversed(np.argsort([box.classes[c] for box in boxes])))\n\n        for i in range(len(sorted_indices)):\n            index_i = sorted_indices[i]\n            \n            if boxes[index_i].classes[c] == 0: \n                continue\n            else:\n                for j in range(i+1, len(sorted_indices)):\n                    index_j = sorted_indices[j]\n\n                    if boxes[index_i].iou(boxes[index_j]) >= nms_threshold:\n                        boxes[index_j].classes[c] = 0\n    # remove the boxes which are less likely than a obj_threshold\n    boxes = [box for box in boxes if box.get_score() > obj_threshold]\n    return boxes\n\n\ndef draw_scaled_boxes(image, boxes, probs, labels, desired_size=400):\n    img_size = min(image.shape[:2])\n    if img_size < desired_size:\n        scale_factor = float(desired_size) / img_size\n    else:\n        scale_factor = 1.0\n    \n    h, w = image.shape[:2]\n    img_scaled = cv2.resize(image, (int(w*scale_factor), int(h*scale_factor)))\n    if boxes != []:\n        boxes_scaled = boxes*scale_factor\n        boxes_scaled = boxes_scaled.astype(np.int)\n    else:\n        boxes_scaled = boxes\n    return draw_boxes(img_scaled, boxes_scaled, probs, labels)\n        \n\ndef draw_boxes(image, boxes, probs, labels):\n    for box, classes in zip(boxes, probs):\n        x1, y1, x2, y2 = box\n        cv2.rectangle(image, (x1,y1), (x2,y2), (0,255,0), 3)\n        cv2.putText(image, \n                    '{}:  {:.2f}'.format(labels[np.argmax(classes)], classes.max()), \n                    (x1, y1 - 13), \n                    cv2.FONT_HERSHEY_SIMPLEX, \n                    1e-3 * image.shape[0], \n                    (0,255,0), 2)\n    return image        \n\n\ndef centroid_box_iou(box1, box2):\n    def _interval_overlap(interval_a, interval_b):\n        x1, x2 = interval_a\n        x3, x4 = interval_b\n    \n        if x3 < x1:\n            if x4 < x1:\n                return 0\n            else:\n                return min(x2,x4) - x1\n        else:\n            if x2 < x3:\n                return 0\n            else:\n                return min(x2,x4) - x3\n    \n    _, _, w1, h1 = box1.reshape(-1,)\n    _, _, w2, h2 = box2.reshape(-1,)\n    x1_min, y1_min, x1_max, y1_max = to_minmax(box1.reshape(-1,4)).reshape(-1,)\n    x2_min, y2_min, x2_max, y2_max = to_minmax(box2.reshape(-1,4)).reshape(-1,)\n            \n    intersect_w = _interval_overlap([x1_min, x1_max], [x2_min, x2_max])\n    intersect_h = _interval_overlap([y1_min, y1_max], [y2_min, y2_max])\n    intersect = intersect_w * intersect_h\n    union = w1 * h1 + w2 * h2 - intersect\n    \n    return float(intersect) / union\n\n\ndef to_centroid(minmax_boxes):\n    \"\"\"\n    minmax_boxes : (N, 4)\n    \"\"\"\n    minmax_boxes = minmax_boxes.astype(np.float)\n    centroid_boxes = np.zeros_like(minmax_boxes)\n    \n    x1 = minmax_boxes[:,0]\n    y1 = minmax_boxes[:,1]\n    x2 = minmax_boxes[:,2]\n    y2 = minmax_boxes[:,3]\n    \n    centroid_boxes[:,0] = (x1 + x2) / 2\n    centroid_boxes[:,1] = (y1 + y2) / 2\n    centroid_boxes[:,2] = x2 - x1\n    centroid_boxes[:,3] = y2 - y1\n    return centroid_boxes\n\ndef to_minmax(centroid_boxes):\n    centroid_boxes = centroid_boxes.astype(np.float)\n    minmax_boxes = np.zeros_like(centroid_boxes)\n    \n    cx = centroid_boxes[:,0]\n    cy = centroid_boxes[:,1]\n    w = centroid_boxes[:,2]\n    h = centroid_boxes[:,3]\n    \n    minmax_boxes[:,0] = cx - w/2\n    minmax_boxes[:,1] = cy - h/2\n    minmax_boxes[:,2] = cx + w/2\n    minmax_boxes[:,3] = cy + h/2\n    return minmax_boxes\n\ndef create_anchor_boxes(anchors):\n    \"\"\"\n    # Args\n        anchors : list of floats\n    # Returns\n        boxes : array, shape of (len(anchors)/2, 4)\n            centroid-type\n    \"\"\"\n    boxes = []\n    n_boxes = int(len(anchors)/2)\n    for i in range(n_boxes):\n        boxes.append(np.array([0, 0, anchors[2*i], anchors[2*i+1]]))\n    return np.array(boxes)\n\ndef find_match_box(centroid_box, centroid_boxes):\n    \"\"\"Find the index of the boxes with the largest overlap among the N-boxes.\n    # Args\n        box : array, shape of (1, 4)\n        boxes : array, shape of (N, 4)\n    \n    # Return\n        match_index : int\n    \"\"\"\n    match_index = -1\n    max_iou     = -1\n    \n    for i, box in enumerate(centroid_boxes):\n        iou = centroid_box_iou(centroid_box, box)\n        \n        if max_iou < iou:\n            match_index = i\n            max_iou     = iou\n    return match_index\n"
  },
  {
    "path": "example_scripts/arm_nn/cv_utils.py",
    "content": "# Copyright © 2020 Arm Ltd and Contributors. All rights reserved.\n# SPDX-License-Identifier: MIT\n\n\"\"\"\nThis file contains helper functions for reading video/image data and\n pre/postprocessing of video/image data using OpenCV.\n\"\"\"\n\nimport os\n\nimport cv2\nimport numpy as np\n\nimport pyarmnn as ann\n\n\ndef preprocess(frame: np.ndarray, input_binding_info: tuple):\n    \"\"\"\n    Takes a frame, resizes, swaps channels and converts data type to match\n    model input layer. The converted frame is wrapped in a const tensor\n    and bound to the input tensor.\n\n    Args:\n        frame: Captured frame from video.\n        input_binding_info:  Contains shape and data type of model input layer.\n\n    Returns:\n        Input tensor.\n    \"\"\"\n    # Swap channels and resize frame to model resolution\n    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)\n    resized_frame = resize_with_aspect_ratio(frame, input_binding_info)\n\n    # Expand dimensions and convert data type to match model input\n    data_type = np.float32 if input_binding_info[1].GetDataType() == ann.DataType_Float32 else np.uint8\n    resized_frame = np.expand_dims(np.asarray(resized_frame, dtype=data_type), axis=0)\n    resized_frame /= 255.\n    resized_frame -= 0.5\n    resized_frame *= 2\n    assert resized_frame.shape == tuple(input_binding_info[1].GetShape())\n\n    input_tensors = ann.make_input_tensors([input_binding_info], [resized_frame])\n    return input_tensors\n\n\ndef resize_with_aspect_ratio(frame: np.ndarray, input_binding_info: tuple):\n    \"\"\"\n    Resizes frame while maintaining aspect ratio, padding any empty space.\n\n    Args:\n        frame: Captured frame.\n        input_binding_info: Contains shape of model input layer.\n\n    Returns:\n        Frame resized to the size of model input layer.\n    \"\"\"\n    aspect_ratio = frame.shape[1] / frame.shape[0]\n    model_height, model_width = list(input_binding_info[1].GetShape())[1:3]\n\n    if aspect_ratio >= 1.0:\n        new_height, new_width = int(model_width / aspect_ratio), model_width\n        b_padding, r_padding = model_height - new_height, 0\n    else:\n        new_height, new_width = model_height, int(model_height * aspect_ratio)\n        b_padding, r_padding = 0, model_width - new_width\n\n    # Resize and pad any empty space\n    frame = cv2.resize(frame, (new_width, new_height), interpolation=cv2.INTER_LINEAR)\n    frame = cv2.copyMakeBorder(frame, top=0, bottom=b_padding, left=0, right=r_padding,\n                               borderType=cv2.BORDER_CONSTANT, value=[0, 0, 0])\n    return frame\n\n\ndef create_video_writer(video: cv2.VideoCapture, video_path: str, output_path: str):\n    \"\"\"\n    Creates a video writer object to write processed frames to file.\n\n    Args:\n        video: Video capture object, contains information about data source.\n        video_path: User-specified video file path.\n        output_path: Optional path to save the processed video.\n\n    Returns:\n        Video writer object.\n    \"\"\"\n    _, ext = os.path.splitext(video_path)\n\n    if output_path is not None:\n        assert os.path.isdir(output_path)\n\n    i, filename = 0, os.path.join(output_path if output_path is not None else str(), f'object_detection_demo{ext}')\n    while os.path.exists(filename):\n        i += 1\n        filename = os.path.join(output_path if output_path is not None else str(), f'object_detection_demo({i}){ext}')\n\n    video_writer = cv2.VideoWriter(filename=filename,\n                                   fourcc=get_source_encoding_int(video),\n                                   fps=int(video.get(cv2.CAP_PROP_FPS)),\n                                   frameSize=(int(video.get(cv2.CAP_PROP_FRAME_WIDTH)),\n                                              int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))))\n    return video_writer\n\n\ndef init_video_file_capture(video_path: str, output_path: str):\n    \"\"\"\n    Creates a video capture object from a video file.\n\n    Args:\n        video_path: User-specified video file path.\n        output_path: Optional path to save the processed video.\n\n    Returns:\n        Video capture object to capture frames, video writer object to write processed\n        frames to file, plus total frame count of video source to iterate through.\n    \"\"\"\n    if not os.path.exists(video_path):\n        raise FileNotFoundError(f'Video file not found for: {video_path}')\n    video = cv2.VideoCapture(video_path)\n    if not video.isOpened:\n        raise RuntimeError(f'Failed to open video capture from file: {video_path}')\n\n    video_writer = create_video_writer(video, video_path, output_path)\n    iter_frame_count = range(int(video.get(cv2.CAP_PROP_FRAME_COUNT)))\n    return video, video_writer, iter_frame_count\n\n\ndef init_video_stream_capture(video_source: int):\n    \"\"\"\n    Creates a video capture object from a device.\n\n    Args:\n        video_source: Device index used to read video stream.\n\n    Returns:\n        Video capture object used to capture frames from a video stream.\n    \"\"\"\n    video = cv2.VideoCapture(video_source)\n    if not video.isOpened:\n        raise RuntimeError(f'Failed to open video capture for device with index: {video_source}')\n    print('Processing video stream. Press \\'Esc\\' key to exit the demo.')\n    return video\n\n\ndef draw_bounding_boxes(frame: np.ndarray, detections: list, resize_factor, labels: dict):\n    \"\"\"\n    Draws bounding boxes around detected objects and adds a label and confidence score.\n\n    Args:\n        frame: The original captured frame from video source.\n        detections: A list of detected objects in the form [class, [box positions], confidence].\n        resize_factor: Resizing factor to scale box coordinates to output frame size.\n        labels: Dictionary of labels and colors keyed on the classification index.\n    \"\"\"\n    for detection in detections:\n        class_idx, box, confidence = [d for d in detection]\n        label, color = labels[class_idx][0].capitalize(), labels[class_idx][1]\n\n        # Obtain frame size and resized bounding box positions\n        frame_height, frame_width = frame.shape[:2]\n        x_min, y_min, x_max, y_max = [int(position * resize_factor) for position in box]\n\n        # Ensure box stays within the frame\n        x_min, y_min = max(0, x_min), max(0, y_min)\n        x_max, y_max = min(frame_width, x_max), min(frame_height, y_max)\n\n        # Draw bounding box around detected object\n        cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2)\n\n        # Create label for detected object class\n        label = f'{label} {confidence * 100:.1f}%'\n        label_color = (0, 0, 0) if sum(color)>200 else (255, 255, 255)\n\n        # Make sure label always stays on-screen\n        x_text, y_text = cv2.getTextSize(label, cv2.FONT_HERSHEY_DUPLEX, 1, 1)[0][:2]\n\n        lbl_box_xy_min = (x_min, y_min if y_min<25 else y_min - y_text)\n        lbl_box_xy_max = (x_min + int(0.55 * x_text), y_min + y_text if y_min<25 else y_min)\n        lbl_text_pos = (x_min + 5, y_min + 16 if y_min<25 else y_min - 5)\n\n        # Add label and confidence value\n        cv2.rectangle(frame, lbl_box_xy_min, lbl_box_xy_max, color, -1)\n        cv2.putText(frame, label, lbl_text_pos, cv2.FONT_HERSHEY_DUPLEX, 0.50,\n                    label_color, 1, cv2.LINE_AA)\n\n\ndef get_source_encoding_int(video_capture):\n    return int(video_capture.get(cv2.CAP_PROP_FOURCC))\n"
  },
  {
    "path": "example_scripts/arm_nn/network_executor.py",
    "content": "# Copyright © 2020 Arm Ltd and Contributors. All rights reserved.\n# SPDX-License-Identifier: MIT\n\nimport os\nfrom typing import List, Tuple\n\nimport pyarmnn as ann\nimport numpy as np\n\n\ndef create_network(model_file: str, backends: list, input_names: Tuple[str] = (), output_names: Tuple[str] = ()):\n    \"\"\"\n    Creates a network based on the model file and a list of backends.\n\n    Args:\n        model_file: User-specified model file.\n        backends: List of backends to optimize network.\n        input_names:\n        output_names:\n\n    Returns:\n        net_id: Unique ID of the network to run.\n        runtime: Runtime context for executing inference.\n        input_binding_info: Contains essential information about the model input.\n        output_binding_info: Used to map output tensor and its memory.\n    \"\"\"\n    if not os.path.exists(model_file):\n        raise FileNotFoundError(f'Model file not found for: {model_file}')\n\n    _, ext = os.path.splitext(model_file)\n    if ext == '.tflite':\n        parser = ann.ITfLiteParser()\n    else:\n        raise ValueError(\"Supplied model file type is not supported. Supported types are [ tflite ]\")\n\n    network = parser.CreateNetworkFromBinaryFile(model_file)\n\n    # Specify backends to optimize network\n    preferred_backends = []\n    for b in backends:\n        preferred_backends.append(ann.BackendId(b))\n\n    # Select appropriate device context and optimize the network for that device\n    options = ann.CreationOptions()\n    runtime = ann.IRuntime(options)\n    opt_network, messages = ann.Optimize(network, preferred_backends, runtime.GetDeviceSpec(),\n                                         ann.OptimizerOptions())\n    print(f'Preferred backends: {backends}\\n{runtime.GetDeviceSpec()}\\n'\n          f'Optimization warnings: {messages}')\n\n    # Load the optimized network onto the Runtime device\n    net_id, _ = runtime.LoadNetwork(opt_network)\n\n    # Get input and output binding information\n    graph_id = parser.GetSubgraphCount() - 1\n    input_names = parser.GetSubgraphInputTensorNames(graph_id)\n    input_binding_info = parser.GetNetworkInputBindingInfo(graph_id, input_names[0])\n    output_names = parser.GetSubgraphOutputTensorNames(graph_id)\n    output_binding_info = []\n\n    for output_name in output_names:\n        out_bind_info = parser.GetNetworkOutputBindingInfo(graph_id, output_name)\n        output_binding_info.append(out_bind_info)\n\n    return net_id, runtime, input_binding_info, output_binding_info\n\n\ndef execute_network(input_tensors: list, output_tensors: list, runtime, net_id: int) -> List[np.ndarray]:\n    \"\"\"\n    Executes inference for the loaded network.\n\n    Args:\n        input_tensors: The input frame tensor.\n        output_tensors: The output tensor from output node.\n        runtime: Runtime context for executing inference.\n        net_id: Unique ID of the network to run.\n\n    Returns:\n        list: Inference results as a list of ndarrays.\n    \"\"\"\n    runtime.EnqueueWorkload(net_id, input_tensors, output_tensors)\n    output = ann.workload_tensors_to_ndarray(output_tensors)\n    return output\n\n\nclass ArmnnNetworkExecutor:\n\n    def __init__(self, model_file: str, backends: list):\n        \"\"\"\n        Creates an inference executor for a given network and a list of backends.\n\n        Args:\n            model_file: User-specified model file.\n            backends: List of backends to optimize network.\n        \"\"\"\n        self.network_id, self.runtime, self.input_binding_info, self.output_binding_info = create_network(model_file,\n                                                                                                          backends)\n        self.output_tensors = ann.make_output_tensors(self.output_binding_info)\n\n    def run(self, input_tensors: list) -> List[np.ndarray]:\n        \"\"\"\n        Executes inference for the loaded network.\n\n        Args:\n            input_tensors: The input frame tensor.\n\n        Returns:\n            list: Inference results as a list of ndarrays.\n        \"\"\"\n        return execute_network(input_tensors, self.output_tensors, self.runtime, self.network_id)\n"
  },
  {
    "path": "example_scripts/arm_nn/run_video_file.py",
    "content": "# Copyright © 2020 Arm Ltd and Contributors. All rights reserved.\r\n# SPDX-License-Identifier: MIT\r\n\r\n\"\"\"\r\nObject detection demo that takes a video file, runs inference on each frame producing\r\nbounding boxes and labels around detected objects, and saves the processed video.\r\n\r\npython3 run_video_file.py --fd_model_file_path YOLO_best_mAP.tflite --kp_model_file MobileFaceNet_kpts.tflite --video_file_path test_s.mp4 \r\n\r\n\"\"\"\r\n\r\nimport os\r\nimport sys\r\nimport time\r\nscript_dir = os.path.dirname(__file__)\r\nsys.path.insert(1, os.path.join(script_dir, '..', 'common'))\r\n\r\nimport cv2\r\nimport numpy as np\r\nfrom tqdm import tqdm\r\nfrom argparse import ArgumentParser\r\n\r\nfrom yolov2 import yolo_processing, yolo_resize_factor\r\nfrom utils import dict_labels\r\nfrom cv_utils import init_video_file_capture, resize_with_aspect_ratio\r\nfrom network_executor import ArmnnNetworkExecutor\r\nimport pyarmnn as ann\r\n\r\n\r\ndef preprocess(frame: np.ndarray, input_binding_info: tuple):\r\n    \"\"\"\r\n    Takes a frame, resizes, swaps channels and converts data type to match\r\n    model input layer. The converted frame is wrapped in a const tensor\r\n    and bound to the input tensor.\r\n\r\n    Args:\r\n        frame: Captured frame from video.\r\n        input_binding_info:  Contains shape and data type of model input layer.\r\n\r\n    Returns:\r\n        Input tensor.\r\n    \"\"\"\r\n    # Swap channels and resize frame to model resolution\r\n    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)\r\n    resized_frame = resize_with_aspect_ratio(frame, input_binding_info)\r\n\r\n    # Expand dimensions and convert data type to match model input\r\n    data_type = np.float32 if input_binding_info[1].GetDataType() == ann.DataType_Float32 else np.uint8\r\n    resized_frame = np.expand_dims(np.asarray(resized_frame, dtype=data_type), axis=0)\r\n    resized_frame /= 255.\r\n    resized_frame -= 0.5\r\n    resized_frame *= 2\r\n    assert resized_frame.shape == tuple(input_binding_info[1].GetShape())\r\n\r\n    input_tensors = ann.make_input_tensors([input_binding_info], [resized_frame])\r\n    return input_tensors\r\n\r\ndef process_faces(frame, detections, executor_kp, resize_factor):\r\n    kpts_list = []\r\n\r\n    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)\r\n\r\n    for detection in detections:\r\n        box = detection[1].copy()\r\n        for i in range(len(box)):\r\n            box[i] = int(box[i] * resize_factor)\r\n        x, y, w, h = box[0], box[1], box[2] - box[0], box[3] - box[1]\r\n\r\n        face_img = frame[box[1]:box[3], box[0]:box[2]]\r\n\r\n        face_img = cv2.resize(face_img, (128, 128)) \r\n\r\n        face_img = face_img.astype(np.float32)\r\n        face_img /= 127.5\r\n        face_img -= 1.\r\n\r\n        input_tensors = ann.make_input_tensors([executor_kp.input_binding_info], [face_img])\r\n\r\n        plist = executor_kp.run(input_tensors)[0][0]\r\n\r\n        le = (x + int(plist[0] * w+5), y + int(plist[1] * h+5))\r\n        re = (x + int(plist[2] * w), y + int(plist[3] * h+5))\r\n        n = (x + int(plist[4] * w), y + int(plist[5] * h))\r\n        lm = (x + int(plist[6] * w), y + int(plist[7] * h))\r\n        rm = (x + int(plist[8] * w), y + int(plist[9] * h))\r\n        kpts = [le, re, n, lm, rm]\r\n\r\n        kpts_list.append(kpts)\r\n\r\n    return kpts_list\r\n\r\ndef draw_bounding_boxes(frame: np.ndarray, detections: list, resize_factor, kpts):\r\n    \"\"\"\r\n    Draws bounding boxes around detected objects and adds a label and confidence score.\r\n\r\n    Args:\r\n        frame: The original captured frame from video source.\r\n        detections: A list of detected objects in the form [class, [box positions], confidence].\r\n        resize_factor: Resizing factor to scale box coordinates to output frame size.\r\n        labels: Dictionary of labels and colors keyed on the classification index.\r\n    \"\"\"\r\n    for detection in detections:\r\n        class_idx, box, confidence = [d for d in detection]\r\n        label, color = 'Person', (0, 255, 0)\r\n\r\n        # Obtain frame size and resized bounding box positions\r\n        frame_height, frame_width = frame.shape[:2]\r\n        x_min, y_min, x_max, y_max = [int(position * resize_factor) for position in box]\r\n\r\n        # Ensure box stays within the frame\r\n        x_min, y_min = max(0, x_min), max(0, y_min)\r\n        x_max, y_max = min(frame_width, x_max), min(frame_height, y_max)\r\n\r\n        # Draw bounding box around detected object\r\n        cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2)\r\n\r\n        # Create label for detected object class\r\n        label = f'{label} {confidence * 100:.1f}%'\r\n        label_color = (0, 0, 0) if sum(color)>200 else (255, 255, 255)\r\n\r\n        # Make sure label always stays on-screen\r\n        x_text, y_text = cv2.getTextSize(label, cv2.FONT_HERSHEY_DUPLEX, 1, 1)[0][:2]\r\n\r\n        lbl_box_xy_min = (x_min, y_min if y_min<25 else y_min - y_text)\r\n        lbl_box_xy_max = (x_min + int(0.55 * x_text), y_min + y_text if y_min<25 else y_min)\r\n        lbl_text_pos = (x_min + 5, y_min + 16 if y_min<25 else y_min - 5)\r\n\r\n        # Add label and confidence value\r\n        cv2.rectangle(frame, lbl_box_xy_min, lbl_box_xy_max, color, -1)\r\n        cv2.putText(frame, label, lbl_text_pos, cv2.FONT_HERSHEY_DUPLEX, 0.50,\r\n                    label_color, 1, cv2.LINE_AA)\r\n\r\n        for kpt_set in kpts:\r\n            for kpt in kpt_set:\r\n                cv2.circle(frame, (int(kpt[0]), int(kpt[1])), 5, (255, 0, 0), 2)\r\n\r\n\r\ndef main(args):\r\n    video, video_writer, frame_count = init_video_file_capture(args.video_file_path, args.output_video_file_path)\r\n    frame_num = len(frame_count)\r\n\r\n    executor_fd = ArmnnNetworkExecutor(args.fd_model_file_path, args.preferred_backends)\r\n    executor_kp = ArmnnNetworkExecutor(args.kp_model_file_path, args.preferred_backends)    \r\n\r\n    process_output, resize_factor = yolo_processing, yolo_resize_factor(video, executor_fd.input_binding_info)\r\n\r\n    times = []\r\n\r\n    for _ in tqdm(frame_count, desc='Processing frames'):\r\n        frame_present, frame = video.read()\r\n        if not frame_present:\r\n            continue\r\n\r\n        input_tensors = preprocess(frame, executor_fd.input_binding_info)\r\n\r\n        start_time = time.time() # start time of the loop\r\n        output_result = executor_fd.run(input_tensors)\r\n\r\n        detections = process_output(output_result)\r\n\r\n        kpts = process_faces(frame, detections, executor_kp, resize_factor)\r\n        draw_bounding_boxes(frame, detections, resize_factor, kpts)\r\n\r\n        end_time = (time.time() - start_time)*1000\r\n        times.append(end_time)\r\n        video_writer.write(frame)\r\n\r\n    print('Finished processing frames')\r\n    video.release(), video_writer.release()\r\n\r\n    print(\"Average time(ms): \", sum(times)//frame_num) \r\n    print(\"FPS: \", 1000.0 / (sum(times)//frame_num)) # FPS = 1 / time to process loop\r\n\r\nif __name__ == '__main__':\r\n    parser = ArgumentParser()\r\n    parser.add_argument('--video_file_path', required=True, type=str,\r\n                        help='Path to the video file to run object detection on')\r\n\r\n    parser.add_argument('--fd_model_file_path', required=True, type=str,\r\n                        help='Path to the Object Detection model to use')\r\n    parser.add_argument('--kp_model_file_path', required=True, type=str,\r\n                        help='Path to the Object Detection model to use')\r\n\r\n    parser.add_argument('--output_video_file_path', type=str,\r\n                        help='Path to the output video file with detections added in')\r\n    parser.add_argument('--preferred_backends', type=str, nargs='+', default=['CpuAcc', 'CpuRef'],\r\n                        help='Takes the preferred backends in preference order, separated by whitespace, '\r\n                             'for example: CpuAcc GpuAcc CpuRef. Accepted options: [CpuAcc, CpuRef, GpuAcc]. '\r\n                             'Defaults to [CpuAcc, CpuRef]')\r\n    args = parser.parse_args()\r\n    main(args)\r\n"
  },
  {
    "path": "example_scripts/arm_nn/run_video_stream.py",
    "content": "\"\"\"\r\nObject detection demo that takes a video stream from a device, runs inference\r\non each frame producing bounding boxes and labels around detected objects,\r\nand displays a window with the latest processed frame.\r\n\"\"\"\r\nimport os\r\nimport sys\r\nimport time\r\nscript_dir = os.path.dirname(__file__)\r\nsys.path.insert(1, os.path.join(script_dir, '..', 'common'))\r\n\r\nimport cv2\r\nimport numpy as np\r\nfrom tqdm import tqdm\r\nfrom argparse import ArgumentParser\r\n\r\nfrom yolov2 import yolo_processing, yolo_resize_factor\r\n\r\nfrom cv_utils import init_video_stream_capture, resize_with_aspect_ratio\r\nfrom network_executor import ArmnnNetworkExecutor\r\nimport pyarmnn as ann\r\n\r\n\r\ndef preprocess(frame: np.ndarray, input_binding_info: tuple):\r\n    \"\"\"\r\n    Takes a frame, resizes, swaps channels and converts data type to match\r\n    model input layer. The converted frame is wrapped in a const tensor\r\n    and bound to the input tensor.\r\n\r\n    Args:\r\n        frame: Captured frame from video.\r\n        input_binding_info:  Contains shape and data type of model input layer.\r\n\r\n    Returns:\r\n        Input tensor.\r\n    \"\"\"\r\n    # Swap channels and resize frame to model resolution\r\n    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)\r\n    resized_frame = resize_with_aspect_ratio(frame, input_binding_info)\r\n\r\n    # Expand dimensions and convert data type to match model input\r\n    data_type = np.float32 if input_binding_info[1].GetDataType() == ann.DataType_Float32 else np.uint8\r\n    resized_frame = np.expand_dims(np.asarray(resized_frame, dtype=data_type), axis=0)\r\n    resized_frame /= 255.\r\n    resized_frame -= 0.5\r\n    resized_frame *= 2\r\n    assert resized_frame.shape == tuple(input_binding_info[1].GetShape())\r\n\r\n    input_tensors = ann.make_input_tensors([input_binding_info], [resized_frame])\r\n    return input_tensors\r\n\r\ndef process_faces(frame, detections, executor_kp, resize_factor):\r\n    kpts_list = []\r\n\r\n    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)\r\n\r\n    for detection in detections:\r\n        box = detection[1].copy()\r\n        for i in range(len(box)):\r\n            box[i] = int(box[i] * resize_factor)\r\n        x, y, w, h = box[0], box[1], box[2] - box[0], box[3] - box[1]\r\n\r\n        face_img = frame[box[1]:box[3], box[0]:box[2]]\r\n\r\n        face_img = cv2.resize(face_img, (128, 128)) \r\n        #cv2.imshow('PyArmNN Object Detection Demo face', face_img)\r\n        face_img = face_img.astype(np.float32)\r\n        face_img /= 127.5\r\n        face_img -= 1.\r\n\r\n        input_tensors = ann.make_input_tensors([executor_kp.input_binding_info], [face_img])\r\n\r\n        plist = executor_kp.run(input_tensors)[0][0]\r\n\r\n        le = (x + int(plist[0] * w+5), y + int(plist[1] * h+5))\r\n        re = (x + int(plist[2] * w), y + int(plist[3] * h+5))\r\n        n = (x + int(plist[4] * w), y + int(plist[5] * h))\r\n        lm = (x + int(plist[6] * w), y + int(plist[7] * h))\r\n        rm = (x + int(plist[8] * w), y + int(plist[9] * h))\r\n        kpts = [le, re, n, lm, rm]\r\n\r\n        kpts_list.append(kpts)\r\n\r\n    return kpts_list\r\n\r\ndef draw_bounding_boxes(frame: np.ndarray, detections: list, resize_factor, kpts):\r\n    \"\"\"\r\n    Draws bounding boxes around detected objects and adds a label and confidence score.\r\n\r\n    Args:\r\n        frame: The original captured frame from video source.\r\n        detections: A list of detected objects in the form [class, [box positions], confidence].\r\n        resize_factor: Resizing factor to scale box coordinates to output frame size.\r\n        labels: Dictionary of labels and colors keyed on the classification index.\r\n    \"\"\"\r\n    for detection in detections:\r\n        class_idx, box, confidence = [d for d in detection]\r\n        label, color = 'Person', (0, 255, 0)\r\n\r\n        # Obtain frame size and resized bounding box positions\r\n        frame_height, frame_width = frame.shape[:2]\r\n        x_min, y_min, x_max, y_max = [int(position * resize_factor) for position in box]\r\n\r\n        # Ensure box stays within the frame\r\n        x_min, y_min = max(0, x_min), max(0, y_min)\r\n        x_max, y_max = min(frame_width, x_max), min(frame_height, y_max)\r\n\r\n        # Draw bounding box around detected object\r\n        cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2)\r\n\r\n        # Create label for detected object class\r\n        label = f'{label} {confidence * 100:.1f}%'\r\n        label_color = (0, 0, 0) if sum(color)>200 else (255, 255, 255)\r\n\r\n        # Make sure label always stays on-screen\r\n        x_text, y_text = cv2.getTextSize(label, cv2.FONT_HERSHEY_DUPLEX, 1, 1)[0][:2]\r\n\r\n        lbl_box_xy_min = (x_min, y_min if y_min<25 else y_min - y_text)\r\n        lbl_box_xy_max = (x_min + int(0.55 * x_text), y_min + y_text if y_min<25 else y_min)\r\n        lbl_text_pos = (x_min + 5, y_min + 16 if y_min<25 else y_min - 5)\r\n\r\n        # Add label and confidence value\r\n        cv2.rectangle(frame, lbl_box_xy_min, lbl_box_xy_max, color, -1)\r\n        cv2.putText(frame, label, lbl_text_pos, cv2.FONT_HERSHEY_DUPLEX, 0.50,\r\n                    label_color, 1, cv2.LINE_AA)\r\n\r\n        for kpt_set in kpts:\r\n            for kpt in kpt_set:\r\n\r\n                cv2.circle(frame, (int(kpt[0]), int(kpt[1])), 5, (255, 0, 0), 2)\r\ndef main(args):\r\n    video = init_video_stream_capture(args.video_source)\r\n\r\n    executor_fd = ArmnnNetworkExecutor(args.fd_model_file_path, args.preferred_backends)\r\n    executor_kp = ArmnnNetworkExecutor(args.kp_model_file_path, args.preferred_backends)    \r\n\r\n    process_output, resize_factor = yolo_processing, yolo_resize_factor(video, executor_fd.input_binding_info)\r\n\r\n    while True:\r\n\r\n        frame_present, frame = video.read()\r\n        frame = cv2.flip(frame, 1)  # Horizontally flip the frame\r\n        if not frame_present:\r\n            raise RuntimeError('Error reading frame from video stream')\r\n        input_tensors = preprocess(frame, executor_fd.input_binding_info)\r\n        print(\"Running inference...\")\r\n\r\n        start_time = time.time() \r\n        output_result = executor_fd.run(input_tensors)\r\n        detections = process_output(output_result)\r\n        kpts = process_faces(frame, detections, executor_kp, resize_factor)\r\n\r\n        print(\"FPS: \", 1.0 / (time.time() - start_time)) # FPS = 1 / time to process loop\r\n        print(\"Time(ms): \", (time.time() - start_time)*1000) \r\n\r\n        draw_bounding_boxes(frame, detections, resize_factor, kpts)\r\n        cv2.imshow('PyArmNN Object Detection Demo', frame)\r\n\r\n        if cv2.waitKey(1) == 27:\r\n            print('\\nExit key activated. Closing video...')\r\n            break\r\n    video.release(), cv2.destroyAllWindows()\r\n\r\n\r\nif __name__ == '__main__':\r\n    parser = ArgumentParser()\r\n    parser.add_argument('--video_source', type=int, default=0,\r\n                        help='Device index to access video stream. Defaults to primary device camera at index 0')\r\n\r\n    parser.add_argument('--fd_model_file_path', required=True, type=str,\r\n                        help='Path to the Object Detection model to use')\r\n    parser.add_argument('--kp_model_file_path', required=True, type=str,\r\n                        help='Path to the Object Detection model to use')\r\n\r\n    parser.add_argument('--preferred_backends', type=str, nargs='+', default=['CpuAcc', 'CpuRef'],\r\n                        help='Takes the preferred backends in preference order, separated by whitespace, '\r\n                             'for example: CpuAcc GpuAcc CpuRef. Accepted options: [CpuAcc, CpuRef, GpuAcc]. '\r\n                             'Defaults to [CpuAcc, CpuRef]')\r\n    args = parser.parse_args()\r\n    main(args)\r\n"
  },
  {
    "path": "example_scripts/arm_nn/yolov2.py",
    "content": "# Copyright © 2020 Arm Ltd and Contributors. All rights reserved.\r\n# SPDX-License-Identifier: MIT\r\n\r\n\"\"\"\r\nContains functions specific to decoding and processing inference results for YOLO V3 Tiny models.\r\n\"\"\"\r\n\r\nimport cv2\r\nimport numpy as np\r\nfrom box import BoundBox, nms_boxes, boxes_to_array, to_minmax, draw_boxes\r\n\r\n\r\ndef yolo_processing(netout):\r\n    anchors = [1.889, 2.5245, 2.9465, 3.94056, 3.99987, 5.3658, 5.155437, 6.92275, 6.718375, 9.01025]\r\n    nms_threshold=0.2\r\n    \"\"\"Convert Yolo network output to bounding box\r\n\r\n    # Args\r\n        netout : 4d-array, shape of (grid_h, grid_w, num of boxes per grid, 5 + n_classes)\r\n            YOLO neural network output array\r\n\r\n    # Returns\r\n        boxes : array, shape of (N, 4)\r\n            coordinate scale is normalized [0, 1]\r\n        probs : array, shape of (N, nb_classes)\r\n    \"\"\"\r\n    netout = netout[0].reshape(7,7,5,6)\r\n    grid_h, grid_w, nb_box = netout.shape[:3]\r\n    boxes = []\r\n\r\n    # decode the output by the network\r\n    netout[..., 4]  = _sigmoid(netout[..., 4])\r\n    netout[..., 5:] = netout[..., 4][..., np.newaxis] * _softmax(netout[..., 5:])\r\n    netout[..., 5:] *= netout[..., 5:] > 0.3\r\n\r\n    for row in range(grid_h):\r\n        for col in range(grid_w):\r\n            for b in range(nb_box):\r\n                # from 4th element onwards are confidence and class classes\r\n                classes = netout[row,col,b,5:]\r\n                \r\n                if np.sum(classes) > 0:\r\n                    # first 4 elements are x, y, w, and h\r\n                    x, y, w, h = netout[row,col,b,:4]\r\n\r\n                    x = (col + _sigmoid(x)) / grid_w # center position, unit: image width\r\n                    y = (row + _sigmoid(y)) / grid_h # center position, unit: image height\r\n                    w = anchors[2 * b + 0] * np.exp(w) / grid_w # unit: image width\r\n                    h = anchors[2 * b + 1] * np.exp(h) / grid_h # unit: image height\r\n                    confidence = netout[row,col,b,4]\r\n                    box = BoundBox(x, y, w, h, confidence, classes)\r\n                    boxes.append(box)\r\n\r\n    boxes = nms_boxes(boxes, len(classes), nms_threshold, 0.3)\r\n    boxes, probs = boxes_to_array(boxes)\r\n    #print(boxes)\r\n    predictions = []\r\n    def _to_original_scale(boxes):\r\n        minmax_boxes = to_minmax(boxes)\r\n        minmax_boxes[:,0] *= 224\r\n        minmax_boxes[:,2] *= 224\r\n        minmax_boxes[:,1] *= 224\r\n        minmax_boxes[:,3] *= 224\r\n        return minmax_boxes.astype(np.int)\r\n\r\n    if len(boxes) > 0:\r\n        boxes = _to_original_scale(boxes)\r\n\r\n        for i in range(len(boxes)):\r\n            predictions.append([0, boxes[i], probs[i][0]])\r\n\r\n    return predictions\r\n\r\ndef _sigmoid(x):\r\n    return 1. / (1. + np.exp(-x))\r\n\r\ndef _softmax(x, axis=-1, t=-100.):\r\n    x = x - np.max(x)\r\n    if np.min(x) < t:\r\n        x = x/np.min(x)*t\r\n    e_x = np.exp(x)\r\n    return e_x / e_x.sum(axis, keepdims=True)\r\n\r\ndef yolo_resize_factor(video: cv2.VideoCapture, input_binding_info: tuple):\r\n    \"\"\"\r\n    Gets a multiplier to scale the bounding box positions to\r\n    their correct position in the frame.\r\n\r\n    Args:\r\n        video: Video capture object, contains information about data source.\r\n        input_binding_info: Contains shape of model input layer.\r\n\r\n    Returns:\r\n        Resizing factor to scale box coordinates to output frame size.\r\n    \"\"\"\r\n    frame_height = video.get(cv2.CAP_PROP_FRAME_HEIGHT)\r\n    frame_width = video.get(cv2.CAP_PROP_FRAME_WIDTH)\r\n    model_height, model_width = list(input_binding_info[1].GetShape())[1:3]\r\n    return max(frame_height, frame_width) / max(model_height, model_width)\r\n"
  },
  {
    "path": "example_scripts/edge_tpu/detector/box.py",
    "content": "\r\nimport numpy as np\r\nimport cv2\r\n\r\n\r\n# Todo : BoundBox & its related method extraction\r\nclass BoundBox:\r\n    def __init__(self, x, y, w, h, c = None, classes = None):\r\n        self.x     = x\r\n        self.y     = y\r\n        self.w     = w\r\n        self.h     = h\r\n        \r\n        self.c     = c\r\n        self.classes = classes\r\n\r\n    def get_label(self):\r\n        return np.argmax(self.classes)\r\n    \r\n    def get_score(self):\r\n        return self.classes[self.get_label()]\r\n    \r\n    def iou(self, bound_box):\r\n        b1 = self.as_centroid()\r\n        b2 = bound_box.as_centroid()\r\n        return centroid_box_iou(b1, b2)\r\n\r\n    def as_centroid(self):\r\n        return np.array([self.x, self.y, self.w, self.h])\r\n    \r\n\r\ndef boxes_to_array(bound_boxes):\r\n    \"\"\"\r\n    # Args\r\n        boxes : list of BoundBox instances\r\n    \r\n    # Returns\r\n        centroid_boxes : (N, 4)\r\n        probs : (N, nb_classes)\r\n    \"\"\"\r\n    centroid_boxes = []\r\n    probs = []\r\n    for box in bound_boxes:\r\n        centroid_boxes.append([box.x, box.y, box.w, box.h])\r\n        probs.append(box.classes)\r\n    return np.array(centroid_boxes), np.array(probs)\r\n\r\n\r\ndef nms_boxes(boxes, n_classes, nms_threshold=0.3, obj_threshold=0.3):\r\n    \"\"\"\r\n    # Args\r\n        boxes : list of BoundBox\r\n    \r\n    # Returns\r\n        boxes : list of BoundBox\r\n            non maximum supressed BoundBox instances\r\n    \"\"\"\r\n    # suppress non-maximal boxes\r\n    for c in range(n_classes):\r\n        sorted_indices = list(reversed(np.argsort([box.classes[c] for box in boxes])))\r\n\r\n        for i in range(len(sorted_indices)):\r\n            index_i = sorted_indices[i]\r\n            \r\n            if boxes[index_i].classes[c] == 0: \r\n                continue\r\n            else:\r\n                for j in range(i+1, len(sorted_indices)):\r\n                    index_j = sorted_indices[j]\r\n\r\n                    if boxes[index_i].iou(boxes[index_j]) >= nms_threshold:\r\n                        boxes[index_j].classes[c] = 0\r\n    # remove the boxes which are less likely than a obj_threshold\r\n    boxes = [box for box in boxes if box.get_score() > obj_threshold]\r\n    return boxes\r\n\r\n\r\ndef draw_scaled_boxes(image, boxes, probs, labels, desired_size=400):\r\n    img_size = min(image.shape[:2])\r\n    if img_size < desired_size:\r\n        scale_factor = float(desired_size) / img_size\r\n    else:\r\n        scale_factor = 1.0\r\n    \r\n    h, w = image.shape[:2]\r\n    img_scaled = cv2.resize(image, (int(w*scale_factor), int(h*scale_factor)))\r\n    if boxes != []:\r\n        boxes_scaled = boxes*scale_factor\r\n        boxes_scaled = boxes_scaled.astype(np.int)\r\n    else:\r\n        boxes_scaled = boxes\r\n    return draw_boxes(img_scaled, boxes_scaled, probs, labels)\r\n        \r\n\r\ndef draw_boxes(image, boxes, probs, labels):\r\n    for box, classes in zip(boxes, probs):\r\n        x1, y1, x2, y2 = box\r\n        cv2.rectangle(image, (x1,y1), (x2,y2), (0,255,0), 3)\r\n        cv2.putText(image, \r\n                    '{}:  {:.2f}'.format(labels[np.argmax(classes)], classes.max()), \r\n                    (x1, y1 - 13), \r\n                    cv2.FONT_HERSHEY_SIMPLEX, \r\n                    1e-3 * image.shape[0], \r\n                    (0,255,0), 2)\r\n    return image        \r\n\r\n\r\ndef centroid_box_iou(box1, box2):\r\n    def _interval_overlap(interval_a, interval_b):\r\n        x1, x2 = interval_a\r\n        x3, x4 = interval_b\r\n    \r\n        if x3 < x1:\r\n            if x4 < x1:\r\n                return 0\r\n            else:\r\n                return min(x2,x4) - x1\r\n        else:\r\n            if x2 < x3:\r\n                return 0\r\n            else:\r\n                return min(x2,x4) - x3\r\n    \r\n    _, _, w1, h1 = box1.reshape(-1,)\r\n    _, _, w2, h2 = box2.reshape(-1,)\r\n    x1_min, y1_min, x1_max, y1_max = to_minmax(box1.reshape(-1,4)).reshape(-1,)\r\n    x2_min, y2_min, x2_max, y2_max = to_minmax(box2.reshape(-1,4)).reshape(-1,)\r\n            \r\n    intersect_w = _interval_overlap([x1_min, x1_max], [x2_min, x2_max])\r\n    intersect_h = _interval_overlap([y1_min, y1_max], [y2_min, y2_max])\r\n    intersect = intersect_w * intersect_h\r\n    union = w1 * h1 + w2 * h2 - intersect\r\n    \r\n    return float(intersect) / union\r\n\r\n\r\ndef to_centroid(minmax_boxes):\r\n    \"\"\"\r\n    minmax_boxes : (N, 4)\r\n    \"\"\"\r\n    minmax_boxes = minmax_boxes.astype(np.float)\r\n    centroid_boxes = np.zeros_like(minmax_boxes)\r\n    \r\n    x1 = minmax_boxes[:,0]\r\n    y1 = minmax_boxes[:,1]\r\n    x2 = minmax_boxes[:,2]\r\n    y2 = minmax_boxes[:,3]\r\n    \r\n    centroid_boxes[:,0] = (x1 + x2) / 2\r\n    centroid_boxes[:,1] = (y1 + y2) / 2\r\n    centroid_boxes[:,2] = x2 - x1\r\n    centroid_boxes[:,3] = y2 - y1\r\n    return centroid_boxes\r\n\r\ndef to_minmax(centroid_boxes):\r\n    centroid_boxes = centroid_boxes.astype(np.float)\r\n    minmax_boxes = np.zeros_like(centroid_boxes)\r\n    \r\n    cx = centroid_boxes[:,0]\r\n    cy = centroid_boxes[:,1]\r\n    w = centroid_boxes[:,2]\r\n    h = centroid_boxes[:,3]\r\n    \r\n    minmax_boxes[:,0] = cx - w/2\r\n    minmax_boxes[:,1] = cy - h/2\r\n    minmax_boxes[:,2] = cx + w/2\r\n    minmax_boxes[:,3] = cy + h/2\r\n    return minmax_boxes\r\n\r\ndef create_anchor_boxes(anchors):\r\n    \"\"\"\r\n    # Args\r\n        anchors : list of floats\r\n    # Returns\r\n        boxes : array, shape of (len(anchors)/2, 4)\r\n            centroid-type\r\n    \"\"\"\r\n    boxes = []\r\n    n_boxes = int(len(anchors)/2)\r\n    for i in range(n_boxes):\r\n        boxes.append(np.array([0, 0, anchors[2*i], anchors[2*i+1]]))\r\n    return np.array(boxes)\r\n\r\ndef find_match_box(centroid_box, centroid_boxes):\r\n    \"\"\"Find the index of the boxes with the largest overlap among the N-boxes.\r\n\r\n    # Args\r\n        box : array, shape of (1, 4)\r\n        boxes : array, shape of (N, 4)\r\n    \r\n    # Return\r\n        match_index : int\r\n    \"\"\"\r\n    match_index = -1\r\n    max_iou     = -1\r\n    \r\n    for i, box in enumerate(centroid_boxes):\r\n        iou = centroid_box_iou(centroid_box, box)\r\n        \r\n        if max_iou < iou:\r\n            match_index = i\r\n            max_iou     = iou\r\n    return match_index\r\n\r\n"
  },
  {
    "path": "example_scripts/edge_tpu/detector/detector_video.py",
    "content": "import argparse\nimport io\nimport time\nimport numpy as np\nimport cv2\n\nfrom box import BoundBox, nms_boxes, boxes_to_array, to_minmax, draw_boxes\n#from tflite_runtime.interpreter import Interpreter\nimport tflite_runtime.interpreter as tflite\n\nclass Detector(object):\n\n    def __init__(self, label_file, model_file, threshold):\n        self._threshold = float(threshold)\n        self.labels = self.load_labels(label_file)\n        self.interpreter = tflite.Interpreter(model_file, experimental_delegates=[tflite.load_delegate('libedgetpu.so.1')])\n        self.interpreter.allocate_tensors()\n        _, self.input_height, self.input_width, _ = self.interpreter.get_input_details()[0]['shape']\n        self.tensor_index = self.interpreter.get_input_details()[0]['index']\n\n    def load_labels(self, path):\n        with open(path, 'r') as f:\n            return {i: line.strip() for i, line in enumerate(f.read().replace('\"','').split(','))}\n\n    def preprocess(self, img):\n        img = cv2.resize(img, (self.input_width, self.input_height))\n        img = img.astype(np.float32)\n        img = img / 255.\n        img = img - 0.5\n        img = img * 2.\n        img = img[:, :, ::-1]\n        img = np.expand_dims(img, 0)\n        return img\n\n    def get_output_tensor(self, index):\n      \"\"\"Returns the output tensor at the given index.\"\"\"\n      output_details = self.interpreter.get_output_details()[index]\n      tensor = np.squeeze(self.interpreter.get_tensor(output_details['index']))\n      return tensor\n\n    def detect_objects(self, image):\n      \"\"\"Returns a list of detection results, each a dictionary of object info.\"\"\"\n      img = self.preprocess(image)\n      self.interpreter.set_tensor(self.tensor_index, img)\n      self.interpreter.invoke()\n      # Get all output details\n      raw_detections = self.get_output_tensor(0)\n      output_shape = [7, 7, 5, 6]\n      output = np.reshape(raw_detections, output_shape)\n      return output \n\n    def detect(self, original_image):\n        self.output_height, self.output_width = original_image.shape[0:2]\n        start_time = time.time()\n        results = self.detect_objects(original_image)\n        elapsed_ms = (time.time() - start_time) * 1000\n        fps  = 1 / elapsed_ms*1000\n        print(\"Estimated frames per second : {0:.2f} Inference time: {1:.2f}\".format(fps, elapsed_ms))\n\n        def _to_original_scale(boxes):\n            minmax_boxes = to_minmax(boxes)\n            minmax_boxes[:,0] *= self.output_width\n            minmax_boxes[:,2] *= self.output_width\n            minmax_boxes[:,1] *= self.output_height\n            minmax_boxes[:,3] *= self.output_height\n            return minmax_boxes.astype(np.int)\n\n        boxes, probs = self.run(results)\n        print(boxes)\n        if len(boxes) > 0:\n            boxes = _to_original_scale(boxes)\n            original_image = draw_boxes(original_image, boxes, probs, self.labels)\n        return original_image\n\n\n    def run(self, netout):\n        anchors = [0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828]\n        nms_threshold=0.2\n        \"\"\"Convert Yolo network output to bounding box\n        \n        # Args\n            netout : 4d-array, shape of (grid_h, grid_w, num of boxes per grid, 5 + n_classes)\n                YOLO neural network output array\n        \n        # Returns\n            boxes : array, shape of (N, 4)\n                coordinate scale is normalized [0, 1]\n            probs : array, shape of (N, nb_classes)\n        \"\"\"\n        grid_h, grid_w, nb_box = netout.shape[:3]\n        boxes = []\n        \n        # decode the output by the network\n        netout[..., 4]  = _sigmoid(netout[..., 4])\n        netout[..., 5:] = netout[..., 4][..., np.newaxis] * _softmax(netout[..., 5:])\n        netout[..., 5:] *= netout[..., 5:] > self._threshold\n\n        for row in range(grid_h):\n            for col in range(grid_w):\n                for b in range(nb_box):\n                    # from 4th element onwards are confidence and class classes\n                    classes = netout[row,col,b,5:]\n                    \n                    if np.sum(classes) > 0:\n                        # first 4 elements are x, y, w, and h\n                        x, y, w, h = netout[row,col,b,:4]\n\n                        x = (col + _sigmoid(x)) / grid_w # center position, unit: image width\n                        y = (row + _sigmoid(y)) / grid_h # center position, unit: image height\n                        w = anchors[2 * b + 0] * np.exp(w) / grid_w # unit: image width\n                        h = anchors[2 * b + 1] * np.exp(h) / grid_h # unit: image height\n                        confidence = netout[row,col,b,4]\n                        box = BoundBox(x, y, w, h, confidence, classes)\n                        boxes.append(box)\n        \n        boxes = nms_boxes(boxes, len(classes), nms_threshold, self._threshold)\n        boxes, probs = boxes_to_array(boxes)\n        return boxes, probs\n\ndef _sigmoid(x):\n    return 1. / (1. + np.exp(-x))\n\ndef _softmax(x, axis=-1, t=-100.):\n    x = x - np.max(x)\n    if np.min(x) < t:\n        x = x/np.min(x)*t\n    e_x = np.exp(x)\n    return e_x / e_x.sum(axis, keepdims=True)\n\n\nparser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)\nparser.add_argument('--model', help='File path of .tflite file.', required=True)\nparser.add_argument('--labels', help='File path of labels file.', required=True)\nparser.add_argument('--threshold', help='Confidence threshold.', default=0.3)\nargs = parser.parse_args()\n\ndetector = Detector(args.labels, args.model, args.threshold)\ncamera = cv2.VideoCapture(2)\n\nwhile(camera.isOpened()):\n    ret, frame = camera.read()\n    image = detector.detect(frame)\n    if ret == True:\n\n        # Display the resulting frame\n        cv2.imshow('Frame', image)\n\n        # Press Q on keyboard to  exit\n        if cv2.waitKey(25) & 0xFF == ord('q'):\n          break\n\n    # Break the loop\n    else: \n        break\n\n# When everything done, release the video capture object\ncamera.release()\n\n# Closes all the frames\ncv2.destroyAllWindows()\n"
  },
  {
    "path": "example_scripts/k210/classifier/santa_uno.py",
    "content": "# tested with firmware maixpy_v0.6.2_72_g22a8555b5_openmv_kmodel_v4_with_ide_support\r\nimport sensor, image, lcd, time\r\nimport KPU as kpu\r\nlcd.init()\r\nsensor.reset()\r\nsensor.set_pixformat(sensor.RGB565)\r\nsensor.set_framesize(sensor.QVGA)\r\nsensor.set_windowing((224, 224))\r\nsensor.set_vflip(1)\r\nlcd.clear()\r\n\r\nlabels=['arduino_uno','santa_claus'] #number of labels should match the number of labels the model was trained with\r\n\r\ntask = kpu.load(0x200000) #change to \"/sd/name_of_the_model_file.kmodel\" if loading from SD card\r\nkpu.set_outputs(task, 0, 1, 1, 2) #the actual shape needs to match the last layer shape of your model\r\n\r\nwhile(True):\r\n    kpu.memtest()\r\n    img = sensor.snapshot()\r\n    #img = img.rotation_corr(z_rotation=90.0)   uncomment if need rotation correction - only present in full maixpy firmware\r\n    #a = img.pix_to_ai()\r\n    fmap = kpu.forward(task, img)\r\n    plist=fmap[:]\r\n    pmax=max(plist)\r\n    max_index=plist.index(pmax)\r\n    a = img.draw_string(0,0, str(labels[max_index].strip()), color=(255,0,0), scale=2)\r\n    a = img.draw_string(0,20, str(pmax), color=(255,0,0), scale=2)\r\n    print((pmax, labels[max_index].strip()))\r\n    a = lcd.display(img)\r\na = kpu.deinit(task)\r\n\r\n\r\n"
  },
  {
    "path": "example_scripts/k210/detector/yolov2/person_detector_v4.py",
    "content": "#tested with firmware maixpy_v0.6.2_72_g22a8555b5_openmv_kmodel_v4_with_ide_support\r\nimport sensor, image, lcd\r\nimport KPU as kpu\r\n\r\nlcd.init()\r\nsensor.reset()\r\nsensor.set_pixformat(sensor.RGB565)\r\nsensor.set_framesize(sensor.QVGA)\r\nsensor.set_windowing((224, 224))\r\nsensor.set_vflip(1)\r\nsensor.run(1)\r\nclasses = [\"person\"]\r\ntask = kpu.load(0x200000) #change to \"/sd/name_of_the_model_file.kmodel\" if loading from SD card\r\na = kpu.set_outputs(task, 0, 7,7,30) #the actual shape needs to match the last layer shape of your model(before Reshape)\r\nanchor = (0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828)\r\na = kpu.init_yolo2(task, 0.3, 0.3, 5, anchor) #tweak the second parameter if you're getting too many false positives\r\n\r\nwhile(True):\r\n    img = sensor.snapshot().rotation_corr(z_rotation=180.0)\r\n    a = img.pix_to_ai()\r\n    code = kpu.run_yolo2(task, img)\r\n    if code:\r\n        for i in code:\r\n            a = img.draw_rectangle(i.rect(),color = (0, 255, 0))\r\n            a = img.draw_string(i.x(),i.y(), classes[i.classid()], color=(255,0,0), scale=3)\r\n        a = lcd.display(img)\r\n    else:\r\n        a = lcd.display(img)\r\na = kpu.deinit(task)\r\n"
  },
  {
    "path": "example_scripts/k210/detector/yolov2/raccoon_detector.py",
    "content": "# tested with firmware maixpy_v0.6.2_72_g22a8555b5_openmv_kmodel_v4_with_ide_support\r\nimport sensor, image, lcd\r\nimport KPU as kpu\r\n\r\nlcd.init()\r\nsensor.reset()\r\nsensor.set_pixformat(sensor.RGB565)\r\nsensor.set_framesize(sensor.QVGA)\r\nsensor.set_windowing((224, 224))\r\nsensor.set_vflip(1)\r\nsensor.run(1)\r\nclasses = [\"raccoon\"]\r\ntask = kpu.load(0x200000) #change to \"/sd/name_of_the_model_file.kmodel\" if loading from SD card\r\na = kpu.set_outputs(task, 0, 7,7,30)   #the actual shape needs to match the last layer shape of your model(before Reshape)\r\nanchor = (0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828)\r\na = kpu.init_yolo2(task, 0.3, 0.3, 5, anchor) #tweak the second parameter if you're getting too many false positives\r\nwhile(True):\r\n    img = sensor.snapshot().rotation_corr(z_rotation=90.0)\r\n    a = img.pix_to_ai()\r\n    code = kpu.run_yolo2(task, img)\r\n    if code:\r\n        for i in code:\r\n            a = img.draw_rectangle(i.rect(),color = (0, 255, 0))\r\n            a = img.draw_string(i.x(),i.y(), classes[i.classid()], color=(255,0,0), scale=3)\r\n        a = lcd.display(img)\r\n    else:\r\n        a = lcd.display(img)\r\na = kpu.deinit(task)\r\n"
  },
  {
    "path": "example_scripts/k210/detector/yolov2/raccoon_detector_uart.py",
    "content": "# tested with firmware 5-0.22\r\nimport sensor,image,lcd\r\nimport KPU as kpu\r\nfrom fpioa_manager import fm\r\nfrom machine import UART\r\nfrom board import board_info\r\n\r\nlcd.init()\r\nsensor.reset()\r\nsensor.set_pixformat(sensor.RGB565)\r\nsensor.set_framesize(sensor.QVGA)\r\nsensor.set_windowing((224, 224))\r\nsensor.set_vflip(1)\r\nsensor.run(1)\r\nfm.register(board_info.PIN15,fm.fpioa.UART1_TX)\r\nfm.register(board_info.PIN17,fm.fpioa.UART1_RX)\r\nuart_A = UART(UART.UART1, 115200, 8, None, 1, timeout=1000, read_buf_len=4096)\r\n\r\nclasses = [\"raccoon\"]\r\ntask = kpu.load(0x200000) #change to \"/sd/name_of_the_model_file.kmodel\" if loading from SD card\r\na = kpu.set_outputs(task, 0, 7,7,30)   #the actual shape needs to match the last layer shape of your model(before Reshape)\r\nanchor = (0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828)\r\na = kpu.init_yolo2(task, 0.3, 0.3, 5, anchor) #tweak the second parameter if you're getting too many false positives\r\nwhile(True):\r\n    img = sensor.snapshot().rotation_corr(z_rotation=90.0)\r\n    a = img.pix_to_ai()\r\n    code = kpu.run_yolo2(task, img)\r\n    if code:\r\n        for i in code:\r\n            a=img.draw_rectangle(i.rect(),color = (0, 255, 0))\r\n            a = img.draw_string(i.x(),i.y(), classes[i.classid()], color=(255,0,0), scale=3)\r\n            uart_A.write(str(i.rect()))\r\n        a = lcd.display(img)\r\n    else:\r\n        a = lcd.display(img)\r\na = kpu.deinit(task)\r\nuart_A.deinit()\r\ndel uart_A\r\n"
  },
  {
    "path": "example_scripts/k210/detector/yolov3/raccoon_detector.py",
    "content": "# needs firmware from my fork with yolov3 support, see\r\n# https://github.com/sipeed/MaixPy/pull/451\r\n\r\nimport sensor, image, lcd\r\nimport KPU as kpu\r\n\r\nlcd.init()\r\nsensor.reset()\r\nsensor.set_pixformat(sensor.RGB565)\r\nsensor.set_framesize(sensor.QVGA)\r\nsensor.set_vflip(1)\r\nsensor.run(1)\r\n\r\nclasses = [\"raccoon\"]\r\n\r\ntask = kpu.load(0x300000) #change to \"/sd/name_of_the_model_file.kmodel\" if loading from SD card\r\na = kpu.set_outputs(task, 0, 10, 8, 18) #the actual shape needs to match the last layer shape of your model(before Reshape)\r\nanchor = (0.76120044, 0.57155991, 0.6923348, 0.88535553, 0.47163042, 0.34163313)\r\n\r\na = kpu.init_yolo3(task, 0.5, 0.3, 3, 1, anchor) \r\n# second parameter - obj_threshold, tweak if you're getting too many false positives\r\n# third parameter - nms_threshold\r\n# fourth parameter - number of anchors\r\n# fifth parameter - number of branches for YOLOv3, in this case we only use one branch\r\n\r\nwhile(True):\r\n    img = sensor.snapshot()\r\n    #a = img.pix_to_ai() # only necessary if you do opeartions (e.g. resize) on image\r\n    code = kpu.run_yolo3(task, img)\r\n\r\n    if code:\r\n        for i in code:\r\n            a = img.draw_rectangle(i.rect(),color = (0, 255, 0))\r\n            a = img.draw_string(i.x(), i.y(), classes[i.classid()], color=(255,0,0), scale = 1.5)\r\n        a = lcd.display(img)\r\n    else:\r\n        a = lcd.display(img)\r\na = kpu.deinit(task)\r\n"
  },
  {
    "path": "example_scripts/k210/segnet/segnet-support-is-WIP-contributions-welcome",
    "content": ""
  },
  {
    "path": "example_scripts/oak/yolov2/YOLO_best_mAP.json",
    "content": "{\n    \"NN_config\":\n    {\n        \"output_format\" : \"raw\",\n        \"NN_family\" : \"YOLO\",\n        \"NN_specific_metadata\" :\n        { \n            \"classes\" : 1,\n            \"coordinates\" : 4,\n            \"anchors\" : [10,14, 23,27, 37,58, 81,82, 135,169, 344,319],\n            \"anchor_masks\" : \n            {\n                \"side26\" : [1,2,3],\n                \"side13\" : [3,4,5]\n            },\n            \"iou_threshold\" : 0.5,\n            \"confidence_threshold\" : 0.5\n        }\n    },\n    \"mappings\":\n    {\n        \"labels\":\n        [\n            \"person\",\n            \"bicycle\",\n            \"car\",\n            \"motorbike\",\n            \"aeroplane\",\n            \"bus\",\n            \"train\",\n            \"truck\",\n            \"boat\",\n            \"traffic light\",\n            \"fire hydrant\",\n            \"stop sign\",\n            \"parking meter\",\n            \"bench\",\n            \"bird\",\n            \"cat\",\n            \"dog\",\n            \"horse\",\n            \"sheep\",\n            \"cow\",\n            \"elephant\",\n            \"bear\",\n            \"zebra\",\n            \"giraffe\",\n            \"backpack\",\n            \"umbrella\",\n            \"handbag\",\n            \"tie\",\n            \"suitcase\",\n            \"frisbee\",\n            \"skis\",\n            \"snowboard\",\n            \"sports ball\",\n            \"kite\",\n            \"baseball bat\",\n            \"baseball glove\",\n            \"skateboard\",\n            \"surfboard\",\n            \"tennis racket\",\n            \"bottle\",\n            \"wine glass\",\n            \"cup\",\n            \"fork\",\n            \"knife\",\n            \"spoon\",\n            \"bowl\",\n            \"banana\",\n            \"apple\",\n            \"sandwich\",\n            \"orange\",\n            \"broccoli\",\n            \"carrot\",\n            \"hot dog\",\n            \"pizza\",\n            \"donut\",\n            \"cake\",\n            \"chair\",\n            \"sofa\",\n            \"pottedplant\",\n            \"bed\",\n            \"diningtable\",\n            \"toilet\",\n            \"tvmonitor\",\n            \"laptop\",\n            \"mouse\",\n            \"remote\",\n            \"keyboard\",\n            \"cell phone\",\n            \"microwave\",\n            \"oven\",\n            \"toaster\",\n            \"sink\",\n            \"refrigerator\",\n            \"book\",\n            \"clock\",\n            \"vase\",\n            \"scissors\",\n            \"teddy bear\",\n            \"hair drier\",\n            \"toothbrush\"        \n        ]\n    }\n}\n\n"
  },
  {
    "path": "example_scripts/oak/yolov2/box.py",
    "content": "\r\nimport numpy as np\r\nimport cv2\r\n\r\n\r\n# Todo : BoundBox & its related method extraction\r\nclass BoundBox:\r\n    def __init__(self, x, y, w, h, c = None, classes = None):\r\n        self.x     = x\r\n        self.y     = y\r\n        self.w     = w\r\n        self.h     = h\r\n        \r\n        self.c     = c\r\n        self.classes = classes\r\n\r\n    def get_label(self):\r\n        return np.argmax(self.classes)\r\n    \r\n    def get_score(self):\r\n        return self.classes[self.get_label()]\r\n    \r\n    def iou(self, bound_box):\r\n        b1 = self.as_centroid()\r\n        b2 = bound_box.as_centroid()\r\n        return centroid_box_iou(b1, b2)\r\n\r\n    def as_centroid(self):\r\n        return np.array([self.x, self.y, self.w, self.h])\r\n    \r\n\r\ndef boxes_to_array(bound_boxes):\r\n    \"\"\"\r\n    # Args\r\n        boxes : list of BoundBox instances\r\n    \r\n    # Returns\r\n        centroid_boxes : (N, 4)\r\n        probs : (N, nb_classes)\r\n    \"\"\"\r\n    centroid_boxes = []\r\n    probs = []\r\n    for box in bound_boxes:\r\n        centroid_boxes.append([box.x, box.y, box.w, box.h])\r\n        probs.append(box.classes)\r\n    return np.array(centroid_boxes), np.array(probs)\r\n\r\n\r\ndef nms_boxes(boxes, n_classes, nms_threshold=0.3, obj_threshold=0.3):\r\n    \"\"\"\r\n    # Args\r\n        boxes : list of BoundBox\r\n    \r\n    # Returns\r\n        boxes : list of BoundBox\r\n            non maximum supressed BoundBox instances\r\n    \"\"\"\r\n    # suppress non-maximal boxes\r\n    for c in range(n_classes):\r\n        sorted_indices = list(reversed(np.argsort([box.classes[c] for box in boxes])))\r\n\r\n        for i in range(len(sorted_indices)):\r\n            index_i = sorted_indices[i]\r\n            \r\n            if boxes[index_i].classes[c] == 0: \r\n                continue\r\n            else:\r\n                for j in range(i+1, len(sorted_indices)):\r\n                    index_j = sorted_indices[j]\r\n\r\n                    if boxes[index_i].iou(boxes[index_j]) >= nms_threshold:\r\n                        boxes[index_j].classes[c] = 0\r\n    # remove the boxes which are less likely than a obj_threshold\r\n    boxes = [box for box in boxes if box.get_score() > obj_threshold]\r\n    return boxes\r\n\r\n\r\ndef draw_scaled_boxes(image, boxes, probs, labels, desired_size=400):\r\n    img_size = min(image.shape[:2])\r\n    if img_size < desired_size:\r\n        scale_factor = float(desired_size) / img_size\r\n    else:\r\n        scale_factor = 1.0\r\n    \r\n    h, w = image.shape[:2]\r\n    img_scaled = cv2.resize(image, (int(w*scale_factor), int(h*scale_factor)))\r\n    if boxes != []:\r\n        boxes_scaled = boxes*scale_factor\r\n        boxes_scaled = boxes_scaled.astype(np.int)\r\n    else:\r\n        boxes_scaled = boxes\r\n    return draw_boxes(img_scaled, boxes_scaled, probs, labels)\r\n        \r\n\r\ndef draw_boxes(image, boxes, probs, labels):\r\n    for box, classes in zip(boxes, probs):\r\n        x1, y1, x2, y2 = box\r\n        cv2.rectangle(image, (x1,y1), (x2,y2), (0,255,0), 3)\r\n        cv2.putText(image, \r\n                    '{}:  {:.2f}'.format(labels[np.argmax(classes)], classes.max()), \r\n                    (x1, y1 - 13), \r\n                    cv2.FONT_HERSHEY_SIMPLEX, \r\n                    1e-3 * image.shape[0], \r\n                    (0,255,0), 2)\r\n    return image        \r\n\r\n\r\ndef centroid_box_iou(box1, box2):\r\n    def _interval_overlap(interval_a, interval_b):\r\n        x1, x2 = interval_a\r\n        x3, x4 = interval_b\r\n    \r\n        if x3 < x1:\r\n            if x4 < x1:\r\n                return 0\r\n            else:\r\n                return min(x2,x4) - x1\r\n        else:\r\n            if x2 < x3:\r\n                return 0\r\n            else:\r\n                return min(x2,x4) - x3\r\n    \r\n    _, _, w1, h1 = box1.reshape(-1,)\r\n    _, _, w2, h2 = box2.reshape(-1,)\r\n    x1_min, y1_min, x1_max, y1_max = to_minmax(box1.reshape(-1,4)).reshape(-1,)\r\n    x2_min, y2_min, x2_max, y2_max = to_minmax(box2.reshape(-1,4)).reshape(-1,)\r\n            \r\n    intersect_w = _interval_overlap([x1_min, x1_max], [x2_min, x2_max])\r\n    intersect_h = _interval_overlap([y1_min, y1_max], [y2_min, y2_max])\r\n    intersect = intersect_w * intersect_h\r\n    union = w1 * h1 + w2 * h2 - intersect\r\n    \r\n    return float(intersect) / union\r\n\r\n\r\ndef to_centroid(minmax_boxes):\r\n    \"\"\"\r\n    minmax_boxes : (N, 4)\r\n    \"\"\"\r\n    minmax_boxes = minmax_boxes.astype(np.float)\r\n    centroid_boxes = np.zeros_like(minmax_boxes)\r\n    \r\n    x1 = minmax_boxes[:,0]\r\n    y1 = minmax_boxes[:,1]\r\n    x2 = minmax_boxes[:,2]\r\n    y2 = minmax_boxes[:,3]\r\n    \r\n    centroid_boxes[:,0] = (x1 + x2) / 2\r\n    centroid_boxes[:,1] = (y1 + y2) / 2\r\n    centroid_boxes[:,2] = x2 - x1\r\n    centroid_boxes[:,3] = y2 - y1\r\n    return centroid_boxes\r\n\r\ndef to_minmax(centroid_boxes):\r\n    centroid_boxes = centroid_boxes.astype(np.float)\r\n    minmax_boxes = np.zeros_like(centroid_boxes)\r\n    \r\n    cx = centroid_boxes[:,0]\r\n    cy = centroid_boxes[:,1]\r\n    w = centroid_boxes[:,2]\r\n    h = centroid_boxes[:,3]\r\n    \r\n    minmax_boxes[:,0] = cx - w/2\r\n    minmax_boxes[:,1] = cy - h/2\r\n    minmax_boxes[:,2] = cx + w/2\r\n    minmax_boxes[:,3] = cy + h/2\r\n    return minmax_boxes\r\n\r\ndef create_anchor_boxes(anchors):\r\n    \"\"\"\r\n    # Args\r\n        anchors : list of floats\r\n    # Returns\r\n        boxes : array, shape of (len(anchors)/2, 4)\r\n            centroid-type\r\n    \"\"\"\r\n    boxes = []\r\n    n_boxes = int(len(anchors)/2)\r\n    for i in range(n_boxes):\r\n        boxes.append(np.array([0, 0, anchors[2*i], anchors[2*i+1]]))\r\n    return np.array(boxes)\r\n\r\ndef find_match_box(centroid_box, centroid_boxes):\r\n    \"\"\"Find the index of the boxes with the largest overlap among the N-boxes.\r\n\r\n    # Args\r\n        box : array, shape of (1, 4)\r\n        boxes : array, shape of (N, 4)\r\n    \r\n    # Return\r\n        match_index : int\r\n    \"\"\"\r\n    match_index = -1\r\n    max_iou     = -1\r\n    \r\n    for i, box in enumerate(centroid_boxes):\r\n        iou = centroid_box_iou(centroid_box, box)\r\n        \r\n        if max_iou < iou:\r\n            match_index = i\r\n            max_iou     = iou\r\n    return match_index\r\n\r\n"
  },
  {
    "path": "example_scripts/oak/yolov2/yolo.py",
    "content": "import consts.resource_paths\nimport cv2\nimport depthai\nimport argparse\nimport time \nimport numpy as np\n\nIOU_THRESHOLD = 0.1\nlabels = ['null', 'kangaroo']\nGREEN = '\\033[1;32m'\nRED = '\\033[1;31m'\nNOCOLOR = '\\033[0m'\nYELLOW = '\\033[1;33m'\nDEVICE = \"MYRIAD\"\n\ndef sigmoid(x):\n    return 1.0 / (1 + np.exp(x * -1.0))\n\n\ndef calculate_overlap(x1, w1, x2, w2):\n    box1_coordinate = max(x1 - w1 / 2.0, x2 - w2 / 2.0)\n    box2_coordinate = min(x1 + w1 / 2.0, x2 + w2 / 2.0)\n    overlap = box2_coordinate - box1_coordinate\n    return overlap\n\n\ndef calculate_iou(box, truth):\n    # calculate the iou intersection over union by first calculating the overlapping height and width\n    width_overlap = calculate_overlap(box[0], box[2], truth[0], truth[2])\n    height_overlap = calculate_overlap(box[1], box[3], truth[1], truth[3])\n    # no overlap\n    if width_overlap < 0 or height_overlap < 0:\n        return 0\n\n    intersection_area = width_overlap * height_overlap\n    union_area = box[2] * box[3] + truth[2] * truth[3] - intersection_area\n    iou = intersection_area / union_area\n    return iou\n\n\ndef apply_nms(boxes):\n    # sort the boxes by score in descending order\n    sorted_boxes = sorted(boxes, key=lambda d: d[7])[::-1]\n    high_iou_objs = dict()\n    # compare the iou for each of the detected objects\n    for current_object in range(len(sorted_boxes)):\n        if current_object in high_iou_objs:\n            continue\n\n        truth = sorted_boxes[current_object]\n        for next_object in range(current_object + 1, len(sorted_boxes)):\n            if next_object in high_iou_objs:\n                continue\n            box = sorted_boxes[next_object]\n            iou = calculate_iou(box, truth)\n            if iou >= IOU_THRESHOLD:\n                high_iou_objs[next_object] = 1\n\n    # filter and sort detected items\n    filtered_result = list()\n    for current_object in range(len(sorted_boxes)):\n        if current_object not in high_iou_objs:\n            filtered_result.append(sorted_boxes[current_object])\n    return filtered_result\n\ndef post_processing(output, label_list, threshold):\n\n    num_classes = 1\n    num_grids = 7\n    num_anchor_boxes = 5\n    original_results = output.astype(np.float32)   \n\n    # Tiny Yolo V2 uses a 13 x 13 grid with 5 anchor boxes for each grid cell.\n    # This specific model was trained with the VOC Pascal data set and is comprised of 20 classes\n\n    original_results = np.reshape(original_results, (num_anchor_boxes, 5+num_classes, num_grids, num_grids))\n    reordered_results = np.transpose(original_results, (2, 3, 0, 1))\n    reordered_results = np.reshape(reordered_results, (num_grids*num_grids, num_anchor_boxes, 5+num_classes))\n\n    # The 125 results need to be re-organized into 5 chunks of 25 values\n    # 20 classes + 1 score + 4 coordinates = 25 values\n    # 25 values for each of the 5 anchor bounding boxes = 125 values\n    #reordered_results = np.zeros((13 * 13, 5, 25))\n\n    index = 0\n    #for row in range( num_grids ):\n    #    for col in range( num_grids ):\n    #        for b_box_voltron in range(125):\n    #            b_box = row * num_grids + col\n    #            b_box_num = int(b_box_voltron / 25)\n    #            b_box_info = b_box_voltron % 25\n    #            reordered_results[b_box][b_box_num][b_box_info] = original_results[row][col][b_box_voltron]\n\n    # shapes for the 5 Tiny Yolo v2 bounding boxes\n    anchor_boxes = [0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828]\n\n    boxes = list()\n    # iterate through the grids and anchor boxes and filter out all scores which do not exceed the DETECTION_THRESHOLD\n    for row in range(num_grids):\n        for col in range(num_grids):\n            for anchor_box_num in range(num_anchor_boxes):\n                box = list()\n                class_list = list()\n                current_score_total = 0\n                # calculate the coordinates for the current anchor box\n                box_x = (col + sigmoid(reordered_results[row * num_grids + col][anchor_box_num][0])) / 7.0\n                box_y = (row + sigmoid(reordered_results[row * num_grids + col][anchor_box_num][1])) / 7.0\n                box_w = (np.exp(reordered_results[row * num_grids + col][anchor_box_num][2]) *\n                         anchor_boxes[2 * anchor_box_num]) / 7.0\n                box_h = (np.exp(reordered_results[row * num_grids + col][anchor_box_num][3]) *\n                         anchor_boxes[2 * anchor_box_num + 1]) / 7.0\n                \n                # find the class with the highest score\n                for class_enum in range(num_classes):\n                    class_list.append(reordered_results[row * num_grids + col][anchor_box_num][5 + class_enum])\n\n                current_score_total = sum(class_list)\n                for current_class in range(len(class_list)):\n                    class_list[current_class] = class_list[current_class] * 1.0 / current_score_total\n\n                # probability that the current anchor box contains an item\n                object_confidence = sigmoid(reordered_results[row * num_grids + col][anchor_box_num][4])\n                # highest class score detected for the object in the current anchor box\n                highest_class_score = max(class_list)\n                # index of the class with the highest score\n                class_w_highest_score = class_list.index(max(class_list)) + 1\n                # the final score for the detected object\n                final_object_score = object_confidence * highest_class_score\n\n                box.append(box_x)\n                box.append(box_y)\n                box.append(box_w)\n                box.append(box_h)\n                box.append(class_w_highest_score)\n                box.append(object_confidence)\n                box.append(highest_class_score)\n                box.append(final_object_score)\n\n                # filter out all detected objects with a score less than the threshold\n                if final_object_score > threshold:\n                    boxes.append(box)\n\n    # gets rid of all duplicate boxes using non-maximal suppression\n    results = apply_nms(boxes)\n    return results\n\ndef show_tiny_yolo(results, original_img, is_depth=0):\n\n    image_width = original_img.shape[1]\n    image_height = original_img.shape[0]\n\n    label_list = labels\n\n    # calculate the actual box coordinates in relation to the input image\n    print('\\n Found this many objects in the image: ' + str(len(results)))\n    for box in results:\n        box_xmin = int((box[0] - box[2] / 2.0) * image_width)\n        box_xmax = int((box[0] + box[2] / 2.0) * image_width)\n        box_ymin = int((box[1] - box[3] / 2.0) * image_height)\n        box_ymax = int((box[1] + box[3] / 2.0) * image_height)\n        # ensure the box is not drawn out of the window resolution\n        if box_xmin < 0:\n            box_xmin = 0\n        if box_xmax > image_width:\n            box_xmax = image_width\n        if box_ymin < 0:\n            box_ymin = 0\n        if box_ymax > image_height:\n            box_ymax = image_height\n\n        print(\" - object: \" + YELLOW + label_list[box[4]] + NOCOLOR + \" is at left: \" + str(box_xmin) + \" top: \" + str(box_ymin) + \" right: \" + str(box_xmax) + \" bottom: \" + str(box_ymax))\n\n        # label shape and colorization\n        label_text = label_list[box[4]] + \" \" + str(\"{0:.2f}\".format(box[5]*box[6]))\n        label_background_color = (70, 120, 70) # grayish green background for text\n        label_text_color = (255, 255, 255)   # white text\n\n        label_size = cv2.getTextSize(label_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)[0]\n        label_left = int(box_xmin)\n        label_top = int(box_ymin) - label_size[1]\n        label_right = label_left + label_size[0]\n        label_bottom = label_top + label_size[1]\n\n        # set up the colored rectangle background for text\n        cv2.rectangle(original_img, (label_left - 1, label_top - 5),(label_right + 1, label_bottom + 1),\n                      label_background_color, -1)\n        # set up text\n        cv2.putText(original_img, label_text, (int(box_xmin), int(box_ymin - 5)), cv2.FONT_HERSHEY_SIMPLEX, 0.5,\n                    label_text_color, 1)\n        # set up the rectangle around the object\n        cv2.rectangle(original_img, (int(box_xmin), int(box_ymin)), (int(box_xmax), int(box_ymax)), (0, 255, 0), 2)\n\n    return original_img\n\n\nparser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)\nparser.add_argument('--model', help='File path of .tflite file.', required=True)\nparser.add_argument('--config', help='File path of config file.', required=True)\nparser.add_argument('--threshold', help='Confidence threshold.', default=0.4)\nargs = parser.parse_args()\n\nif __name__ == \"__main__\" :\n    \n    if not depthai.init_device(consts.resource_paths.device_cmd_fpath):\n        raise RuntimeError(\"Error initializing device. Try to reset it.\")\n\n    p = depthai.create_pipeline(config={\n    \"streams\": [\"metaout\", \"previewout\"],\n    \"ai\": {\n        \"blob_file\": args.model,\n        \"blob_file_config\": 'YOLO_best_mAP.json'\n          }\n        })\n\n    if p is None:\n        raise RuntimeError(\"Error initializing pipelne\")\n    recv = False\n    while True:\n        nnet_packets, data_packets = p.get_available_nnet_and_data_packets()\n\n        for nnet_packet in nnet_packets:\n            raw_detections = nnet_packet.get_tensor(0)\n            raw_detections.dtype = np.float16  \n            raw_detections = np.squeeze(raw_detections)\n            recv = True\n            \n        for packet in data_packets:\n            if packet.stream_name == 'previewout':\n                data = packet.getData()\n                data0 = data[0, :, :]\n                data1 = data[1, :, :]\n                data2 = data[2, :, :]\n                frame = cv2.merge([data0, data1, data2])\n                if recv:\n                    filtered_objects = post_processing(raw_detections, ['kangaroo'], args.threshold)\n                    frame = show_tiny_yolo(filtered_objects, frame, 0)\n                cv2.imshow('previewout', frame)\n\n        if cv2.waitKey(1) == ord('q'):\n            break\n\ndel p\ndepthai.deinit_device()\n\n"
  },
  {
    "path": "example_scripts/oak/yolov2/yolo_alt.py",
    "content": "import consts.resource_paths\nimport cv2\nimport depthai\nimport argparse\nimport time \nimport numpy as np\nfrom box import BoundBox, nms_boxes, boxes_to_array, to_minmax, draw_boxes\n\nclass Detector(object):\n\n    def __init__(self, label_file, model_file, threshold):\n            \n        self._threshold = float(threshold)\n        self.labels = self.load_labels(label_file)\n\n    def load_labels(self, path):\n        with open(path, 'r') as f:\n            return {i: line.strip() for i, line in enumerate(f.read().replace('\"','').split(','))}\n\n    def parse(self, original_image, tensor):\n        #start_time = time.time()\n        #elapsed_ms = (time.time() - start_time) * 1000\n        #fps  = 1 / elapsed_ms*1000\n        #print(\"Estimated frames per second : {0:.2f} Inference time: {1:.2f}\".format(fps, elapsed_ms))\n        boxes, probs = self.run(tensor)\n\n        \n        def _to_original_scale(boxes):\n            minmax_boxes = to_minmax(boxes)\n            minmax_boxes[:,0] *= 224\n            minmax_boxes[:,2] *= 224\n            minmax_boxes[:,1] *= 224\n            minmax_boxes[:,3] *= 224\n            return minmax_boxes.astype(np.int)\n        \n        if len(boxes) > 0:\n            boxes = _to_original_scale(boxes)\n            #print(boxes)\n            original_image = draw_boxes(original_image, boxes, probs, self.labels)\n        return original_image\n\n\n    def run(self, netout):\n        anchors = [0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828]\n        nms_threshold=0.2\n        \"\"\"Convert Yolo network output to bounding box\n        \n        # Args\n            netout : 4d-array, shape of (grid_h, grid_w, num of boxes per grid, 5 + n_classes)\n                YOLO neural network output array\n        \n        # Returns\n            boxes : array, shape of (N, 4)\n                coordinate scale is normalized [0, 1]\n            probs : array, shape of (N, nb_classes)\n        \"\"\"\n        grid_h, grid_w, nb_box = netout.shape[:3]\n        boxes = []\n        \n        # decode the output by the network\n        netout[..., 4]  = _sigmoid(netout[..., 4])\n        netout[..., 5:] = netout[..., 4][..., np.newaxis] * _softmax(netout[..., 5:])\n        netout[..., 5:] *= netout[..., 5:] > self._threshold\n        \n        for row in range(grid_h):\n            for col in range(grid_w):\n                for b in range(nb_box):\n                    # from 4th element onwards are confidence and class classes\n                    classes = netout[row,col,b,5:]\n                    \n                    if np.sum(classes) > 0:\n                        # first 4 elements are x, y, w, and h\n                        x, y, w, h = netout[row,col,b,:4]\n\n                        x = (col + _sigmoid(x)) / grid_w # center position, unit: image width\n                        y = (row + _sigmoid(y)) / grid_h # center position, unit: image height\n                        w = anchors[2 * b + 0] * np.exp(w) / grid_w # unit: image width\n                        h = anchors[2 * b + 1] * np.exp(h) / grid_h # unit: image height\n                        confidence = netout[row,col,b,4]\n                        box = BoundBox(x, y, w, h, confidence, classes)\n                        boxes.append(box)\n        \n        boxes = nms_boxes(boxes, len(classes), nms_threshold, self._threshold)\n        boxes, probs = boxes_to_array(boxes)\n        return boxes, probs\n\ndef _sigmoid(x):\n    return 1. / (1. + np.exp(-x))\n\ndef _softmax(x, axis=-1, t=-100.):\n    x = x - np.max(x)\n    if np.min(x) < t:\n        x = x/np.min(x)*t\n    e_x = np.exp(x)\n    return e_x / e_x.sum(axis, keepdims=True)    \n    \n    \nparser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)\nparser.add_argument('--model', help='File path of .tflite file.', required=True)\nparser.add_argument('--labels', help='File path of labels file.', required=True)\nparser.add_argument('--threshold', help='Confidence threshold.', default=0.3)\nargs = parser.parse_args()\n\nif __name__ == \"__main__\" :\n    detector = Detector(args.labels, args.model, args.threshold)\n    \n    \n    if not depthai.init_device(consts.resource_paths.device_cmd_fpath):\n        raise RuntimeError(\"Error initializing device. Try to reset it.\")\n\n    p = depthai.create_pipeline(config={\n    \"streams\": [\"metaout\", \"previewout\"],\n    \"ai\": {\n        \"blob_file\": args.model,\n        \"blob_file_config\": 'yolov2/YOLO_best_mAP_alt.json'\n          }\n        })\n\n    if p is None:\n        raise RuntimeError(\"Error initializing pipelne\")\n    recv = False\n    while True:\n        nnet_packets, data_packets = p.get_available_nnet_and_data_packets()\n\n        for nnet_packet in nnet_packets:\n            raw_detections = nnet_packet.get_tensor(0)\n            raw_detections.dtype = np.float16\n            raw_detections = np.squeeze(raw_detections)\n            output_shape = [5, 6, 7, 7]\n            output = np.reshape(raw_detections, output_shape)\n            output = np.transpose(output, (2, 3, 0, 1))\n            recv = True\n            \n        for packet in data_packets:\n            if packet.stream_name == 'previewout':\n                data = packet.getData()\n                data0 = data[0, :, :]\n                data1 = data[1, :, :]\n                data2 = data[2, :, :]\n                frame = cv2.merge([data0, data1, data2])\n                if recv:\n                    frame = detector.parse(frame, output)\n                cv2.imshow('previewout', frame)\n\n        if cv2.waitKey(1) == ord('q'):\n            break\n\ndel p\ndepthai.deinit_device()\n\n\n"
  },
  {
    "path": "example_scripts/tensorflow_lite/classifier/base_camera.py",
    "content": "import time\nimport threading\ntry:\n    from greenlet import getcurrent as get_ident\nexcept ImportError:\n    try:\n        from thread import get_ident\n    except ImportError:\n        from _thread import get_ident\n\n\nclass CameraEvent(object):\n    \"\"\"An Event-like class that signals all active clients when a new frame is\n    available.\n    \"\"\"\n    def __init__(self):\n        self.events = {}\n\n    def wait(self):\n        \"\"\"Invoked from each client's thread to wait for the next frame.\"\"\"\n        ident = get_ident()\n        if ident not in self.events:\n            # this is a new client\n            # add an entry for it in the self.events dict\n            # each entry has two elements, a threading.Event() and a timestamp\n            self.events[ident] = [threading.Event(), time.time()]\n        return self.events[ident][0].wait()\n\n    def set(self):\n        \"\"\"Invoked by the camera thread when a new frame is available.\"\"\"\n        now = time.time()\n        remove = None\n        for ident, event in self.events.items():\n            if not event[0].isSet():\n                # if this client's event is not set, then set it\n                # also update the last set timestamp to now\n                event[0].set()\n                event[1] = now\n            else:\n                # if the client's event is already set, it means the client\n                # did not process a previous frame\n                # if the event stays set for more than 5 seconds, then assume\n                # the client is gone and remove it\n                if now - event[1] > 5:\n                    remove = ident\n        if remove:\n            del self.events[remove]\n\n    def clear(self):\n        \"\"\"Invoked from each client's thread after a frame was processed.\"\"\"\n        self.events[get_ident()][0].clear()\n\n\nclass BaseCamera(object):\n    thread = None  # background thread that reads frames from camera\n    frame = None  # current frame is stored here by background thread\n    last_access = 0  # time of last client access to the camera\n    event = CameraEvent()\n\n    def __init__(self):\n        \"\"\"Start the background camera thread if it isn't running yet.\"\"\"\n        if BaseCamera.thread is None:\n            BaseCamera.last_access = time.time()\n\n            # start background frame thread\n            BaseCamera.thread = threading.Thread(target=self._thread)\n            BaseCamera.thread.start()\n\n            # wait until frames are available\n            while self.get_frame() is None:\n                time.sleep(0)\n\n    def get_frame(self):\n        \"\"\"Return the current camera frame.\"\"\"\n        BaseCamera.last_access = time.time()\n\n        # wait for a signal from the camera thread\n        BaseCamera.event.wait()\n        BaseCamera.event.clear()\n\n        return BaseCamera.frame\n\n    @staticmethod\n    def frames():\n        \"\"\"\"Generator that returns frames from the camera.\"\"\"\n        raise RuntimeError('Must be implemented by subclasses.')\n\n    @classmethod\n    def _thread(cls):\n        \"\"\"Camera background thread.\"\"\"\n        print('Starting camera thread.')\n        frames_iterator = cls.frames()\n        for frame in frames_iterator:\n            BaseCamera.frame = frame\n            BaseCamera.event.set()  # send signal to clients\n            time.sleep(0)\n\n            # if there hasn't been any clients asking for frames in\n            # the last 10 seconds then stop the thread\n            if time.time() - BaseCamera.last_access > 10:\n                frames_iterator.close()\n                print('Stopping camera thread due to inactivity.')\n                break\n        BaseCamera.thread = None\n"
  },
  {
    "path": "example_scripts/tensorflow_lite/classifier/camera_opencv.py",
    "content": "import cv2\nfrom base_camera import BaseCamera\n\n\nclass Camera(BaseCamera):\n    video_source = 0\n\n    @staticmethod\n    def set_video_source(source):\n        Camera.video_source = source\n\n    @staticmethod\n    def frames():\n        camera = cv2.VideoCapture(Camera.video_source)\n        if not camera.isOpened():\n            raise RuntimeError('Could not start camera.')\n\n        while True:\n            # read current frame\n            _, img = camera.read()\n\n            # encode as a jpeg image and return it\n            yield img\n"
  },
  {
    "path": "example_scripts/tensorflow_lite/classifier/camera_pi.py",
    "content": "import io\nimport time\nimport picamera\nimport picamera.array\nimport cv2\nfrom base_camera import BaseCamera\n\n\nclass Camera(BaseCamera):\n    video_source = 0\n\n    @staticmethod\n    def set_video_source(source):\n        pass\n\n    @staticmethod\n    def frames():\n        with picamera.PiCamera(resolution = (1280,720)) as camera:\n            # let camera warm up\n            time.sleep(2)\n\n            with picamera.array.PiRGBArray(camera, size=(1280,720)) as stream:\n                while True:\n                \n                    camera.capture(stream, format='bgr', use_video_port=True)\n                    # At this point the image is available as stream.array\n                    image = stream.array\n                    stream.truncate(0)\n                    yield image\n\n"
  },
  {
    "path": "example_scripts/tensorflow_lite/classifier/classifier_file.py",
    "content": "import time\nimport argparse\nimport os\nimport cv2\nimport numpy as np\nfrom tqdm import tqdm\n\nfrom cv_utils import init_video_file_capture, decode_classifier, draw_classification, preprocess\nfrom tflite_runtime.interpreter import Interpreter\n\ndef load_labels(path):\n    with open(path, 'r') as f:\n        return {i: line.strip() for i, line in enumerate(f.read().replace('\"','').split(','))}\n\nclass NetworkExecutor(object):\n\n    def __init__(self, model_file):\n\n        self.interpreter = Interpreter(model_file, num_threads=3)\n        self.interpreter.allocate_tensors()\n        _, self.input_height, self.input_width, _ = self.interpreter.get_input_details()[0]['shape']\n        self.tensor_index = self.interpreter.get_input_details()[0]['index']\n\n    def get_output_tensors(self):\n\n      output_details = self.interpreter.get_output_details()\n      tensor_indices = []\n      tensor_list = []\n\n      for output in output_details:\n            tensor = np.squeeze(self.interpreter.get_tensor(output['index']))\n            tensor_list.append(tensor)\n\n      return tensor_list\n\n    def run(self, image):\n        if image.shape[1:2] != (self.input_height, self.input_width):\n            img = cv2.resize(image, (self.input_width, self.input_height))\n        img = preprocess(img)\n        self.interpreter.set_tensor(self.tensor_index, img)\n        self.interpreter.invoke()\n        return self.get_output_tensors()\n\ndef main(args):\n    video, video_writer, frame_count = init_video_file_capture(args.file, 'classifier_demo')\n\n    if not os.path.exists(args.labels[0]):\n        labels = args.labels\n    else:   \n        labels = load_labels(args.labels[0])\n\n    frame_num = len(frame_count)\n    times = []\n\n    for _ in tqdm(frame_count, desc='Processing frames'):\n        frame_present, frame = video.read()\n        if not frame_present:\n            continue\n\n        start_time = time.time()\n        results = classification_network.run(frame)\n        elapsed_ms = (time.time() - start_time) * 1000\n\n        classification = decode_classifier(netout = results, top_k = args.top_k)\n\n        draw_classification(frame, classification, labels)\n\n        times.append(elapsed_ms)\n        video_writer.write(frame)\n\n    print('Finished processing frames')\n    video.release(), video_writer.release()\n\n    print(\"Average time(ms): \", sum(times)//frame_num) \n    print(\"FPS: \", 1000.0 / (sum(times)//frame_num)) # FPS = 1 / time to process loop\n\nif __name__ == \"__main__\" :\n\n    print(\"OpenCV version: {}\".format(cv2. __version__))\n\n    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)\n    parser.add_argument('--model', help='File path of .tflite file.', required=True)\n    parser.add_argument('--labels', nargs=\"+\", help='File path of labels file.', required=True)\n    parser.add_argument('--top_k', help='How many top results to display', default=3)\n    parser.add_argument('--file', help='File path of video file', default=None)\n    args = parser.parse_args()\n\n    classification_network = NetworkExecutor(args.model)\n\n    main(args)\n    \n"
  },
  {
    "path": "example_scripts/tensorflow_lite/classifier/classifier_stream.py",
    "content": "import time\nimport argparse\nimport os\nimport cv2\nimport numpy as np\n\nfrom cv_utils import decode_classifier, draw_classification, preprocess\nfrom tflite_runtime.interpreter import Interpreter\nfrom flask import Flask, render_template, request, Response\n\napp = Flask (__name__, static_url_path = '')\n\ndef load_labels(path):\n    with open(path, 'r') as f:\n        return {i: line.strip() for i, line in enumerate(f.read().replace('\"','').split(','))}\n\nclass NetworkExecutor(object):\n\n    def __init__(self, model_file):\n\n        self.interpreter = Interpreter(model_file, num_threads=3)\n        self.interpreter.allocate_tensors()\n        _, self.input_height, self.input_width, _ = self.interpreter.get_input_details()[0]['shape']\n        self.tensor_index = self.interpreter.get_input_details()[0]['index']\n\n    def get_output_tensors(self):\n\n      output_details = self.interpreter.get_output_details()\n      tensor_indices = []\n      tensor_list = []\n\n      for output in output_details:\n            tensor = np.squeeze(self.interpreter.get_tensor(output['index']))\n            tensor_list.append(tensor)\n\n      return tensor_list\n\n    def run(self, image):\n        if image.shape[1:2] != (self.input_height, self.input_width):\n            img = cv2.resize(image, (self.input_width, self.input_height))\n        img = preprocess(img)\n        self.interpreter.set_tensor(self.tensor_index, img)\n        self.interpreter.invoke()\n        return self.get_output_tensors()\n\nclass Classifier(NetworkExecutor):\n\n    def __init__(self, label_file, model_file, top_k):\n        super().__init__(model_file)\n        self.top_k = top_k\n\n        if not os.path.exists(label_file):\n            self.labels = [label_file]\n        else:   \n            self.labels = load_labels(label_file)\n\n    def classify(self, frame):\n        start_time = time.time()\n        results = self.run(frame)\n        elapsed_ms = (time.time() - start_time) * 1000\n\n        classification = decode_classifier(netout = results, top_k = self.top_k)\n\n        draw_classification(frame, classification, self.labels)\n\n        fps  = 1 / elapsed_ms*1000\n        print(\"Estimated frames per second : {0:.2f} Inference time: {1:.2f}\".format(fps, elapsed_ms))\n\n        return cv2.imencode('.jpg', frame)[1].tobytes()\n\n@app.route(\"/\")\ndef index():\n   return render_template('index.html', name = None)\n\ndef gen(camera):\n    while True:\n        frame = camera.get_frame()\n        image = classifier.classify(frame)\n        yield (b'--frame\\r\\n'+b'Content-Type: image/jpeg\\r\\n\\r\\n' + image + b'\\r\\n')\n\n@app.route('/video_feed')\ndef video_feed():\n    return Response(gen(Camera()), mimetype='multipart/x-mixed-replace; boundary=frame')\n\nparser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)\nparser.add_argument('--model', help='File path of .tflite file.', required=True)\nparser.add_argument('--labels', help='File path of labels file.', required=True)\nparser.add_argument('--top_k', help='How many top results to display', default=3)\nparser.add_argument('--source', help='picamera or cv', default='cv')\nargs = parser.parse_args()\n\nif args.source == \"cv\":\n    from camera_opencv import Camera\n    source = 0\nelif args.source == \"picamera\":\n    from camera_pi import Camera\n    source = 0\n    \nCamera.set_video_source(source)\n\nclassifier = Classifier(args.labels, args.model, args.top_k)\n\nif __name__ == \"__main__\" :\n   app.run(host = '0.0.0.0', port = 5000, debug = True)\n    \n"
  },
  {
    "path": "example_scripts/tensorflow_lite/classifier/cv_utils.py",
    "content": "# Copyright © 2020 Arm Ltd and Contributors. All rights reserved.\n# SPDX-License-Identifier: MIT\n\n\"\"\"\nThis file contains helper functions for reading video/image data and\n pre/postprocessing of video/image data using OpenCV.\n\"\"\"\n\nimport os\nimport cv2\nimport numpy as np\n\ndef preprocess(img):\n\n    img = img.astype(np.float32)\n    img = img / 255.\n    img = img - 0.5\n    img = img * 2.\n    img = img[:, :, ::-1]\n    img = np.expand_dims(img, 0)\n    return img\n\ndef decode_yolov2(netout, \n                  nms_threshold = 0.2,\n                  threshold = 0.3, \n                  anchors = [1.889, 2.5245, 2.9465, 3.94056, 3.99987, 5.3658, 5.155437, 6.92275, 6.718375, 9.01025]):\n\n    #Convert Yolo network output to bounding box\n\n    netout = netout[0].reshape(7,7,5,6)\n    grid_h, grid_w, nb_box = netout.shape[:3]\n    boxes = []\n    \n    # decode the output by the network\n    netout[..., 4]  = _sigmoid(netout[..., 4])\n    netout[..., 5:] = netout[..., 4][..., np.newaxis] * _softmax(netout[..., 5:])\n    netout[..., 5:] *= netout[..., 5:] > threshold\n    \n    for row in range(grid_h):\n        for col in range(grid_w):\n            for b in range(nb_box):\n                # from 4th element onwards are confidence and class classes\n                classes = netout[row,col,b,5:]\n                \n                if np.sum(classes) > 0:\n                    # first 4 elements are x, y, w, and h\n                    x, y, w, h = netout[row,col,b,:4]\n\n                    x = (col + _sigmoid(x)) / grid_w # center position, unit: image width\n                    y = (row + _sigmoid(y)) / grid_h # center position, unit: image height\n                    w = anchors[2 * b + 0] * np.exp(w) / grid_w # unit: image width\n                    h = anchors[2 * b + 1] * np.exp(h) / grid_h # unit: image height\n                    confidence = netout[row,col,b,4]\n                    box = BoundBox(x, y, w, h, confidence, classes)\n                    boxes.append(box)\n    \n    boxes = nms_boxes(boxes, len(classes), nms_threshold, threshold)\n\n    if len(boxes) > 0:\n        return boxes_to_array(boxes)\n    else:\n        return []\n\ndef decode_yolov3(netout, \n                  nms_threshold = 0.2,\n                  threshold = 0.3, \n                  anchors = [[[0.76120044, 0.57155991], [0.6923348, 0.88535553], [0.47163042, 0.34163313]],\n                                 [[0.33340788, 0.70065861], [0.18124964, 0.38986752], [0.08497349, 0.1527057 ]]]):\n\n    #Convert Yolo network output to bounding box\n\n    boxes = []\n\n    for l, output in enumerate(netout):\n        grid_h, grid_w, nb_box = output.shape[0:3]\n        \n        # decode the output by the network\n        output[..., 4] = _sigmoid(output[..., 4])\n        output[..., 5:] = output[..., 4][..., np.newaxis] * _sigmoid(output[..., 5:])\n        output[..., 5:] *= output[..., 5:] > threshold\n        \n        for row in range(grid_h):\n            for col in range(grid_w):\n                for b in range(nb_box):\n                    # from 4th element onwards are confidence and class classes\n                    classes = output[row, col, b, 5:]\n\n                    if np.sum(classes) > 0:\n                        # first 4 elements are x, y, w, and h\n                        x, y, w, h = output[row, col, b, :4]\n                        x = (col + _sigmoid(x)) / grid_w # center position, unit: image width\n                        y = (row + _sigmoid(y)) / grid_h # center position, unit: image height\n                        w = anchors[l][b][0] * np.exp(w) # unit: image width\n                        h = anchors[l][b][1] * np.exp(h) # unit: image height\n                        confidence = output[row, col, b, 4]\n                        box = BoundBox(x, y, w, h, confidence, classes)\n                        boxes.append(box)\n\n    boxes = nms_boxes(boxes, len(classes), nms_threshold, threshold)\n\n    if len(boxes) > 0:\n        return boxes_to_array(boxes)\n    else:\n        return []\n\ndef decode_classifier(netout, top_k=3):\n    netout = netout[0]\n    ordered = np.argsort(netout)\n    results = [(i, netout[i]) for i in ordered[-top_k:][::-1]]\n    return results\n\ndef decode_segnet(netout, labels, class_colors):\n    netout = netout[0] \n\n    seg_arr = netout.argmax(axis=2)\n\n    seg_img = np.zeros((netout.shape[0], netout.shape[1], 3))\n\n    for c in range(len(labels)):\n        seg_img[:, :, 0] += ((seg_arr[:, :] == c)*(class_colors[c][0])).astype('uint8')\n        seg_img[:, :, 1] += ((seg_arr[:, :] == c)*(class_colors[c][1])).astype('uint8')\n        seg_img[:, :, 2] += ((seg_arr[:, :] == c)*(class_colors[c][2])).astype('uint8')\n\n    return seg_img\n\ndef get_legends(class_names, colors):\n\n    n_classes = len(class_names)\n    legend = np.zeros(((len(class_names) * 25), 150, 3), dtype=\"uint8\") + 255\n\n    for (i, (class_name, color)) in enumerate(zip(class_names.values() , colors)):\n        color = [int(c) for c in color]\n        cv2.putText(legend, class_name, (5, (i * 25) + 17),cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 0), 1)\n        cv2.rectangle(legend, (125, (i * 25)), (150, (i * 25) + 25), tuple(color), -1)\n\n    return legend \n\ndef overlay_seg_image(inp_img, seg_img):\n    orininal_h = inp_img.shape[0]\n    orininal_w = inp_img.shape[1]\n    seg_img = cv2.resize(seg_img, (orininal_w, orininal_h))\n\n    fused_img = (inp_img/2 + seg_img/2 ).astype('uint8')\n    return fused_img \n\ndef concat_lenends(seg_img, legend_img):\n    \n    seg_img[:legend_img.shape[0],:legend_img.shape[1]] = np.copy(legend_img)\n\n    return seg_img\n\ndef _sigmoid(x):\n    return 1. / (1. + np.exp(-x))\n\ndef _softmax(x, axis=-1, t=-100.):\n    x = x - np.max(x)\n    if np.min(x) < t:\n        x = x/np.min(x)*t\n    e_x = np.exp(x)\n    return e_x / e_x.sum(axis, keepdims=True)\n\ndef resize_with_aspect_ratio(frame: np.ndarray, input_binding_info: tuple):\n    \"\"\"\n    Resizes frame while maintaining aspect ratio, padding any empty space.\n\n    Args:\n        frame: Captured frame.\n        input_binding_info: Contains shape of model input layer.\n\n    Returns:\n        Frame resized to the size of model input layer.\n    \"\"\"\n    aspect_ratio = frame.shape[1] / frame.shape[0]\n    model_height, model_width = list(input_binding_info[1].GetShape())[1:3]\n\n    if aspect_ratio >= 1.0:\n        new_height, new_width = int(model_width / aspect_ratio), model_width\n        b_padding, r_padding = model_height - new_height, 0\n    else:\n        new_height, new_width = model_height, int(model_height * aspect_ratio)\n        b_padding, r_padding = 0, model_width - new_width\n\n    # Resize and pad any empty space\n    frame = cv2.resize(frame, (new_width, new_height), interpolation=cv2.INTER_LINEAR)\n    frame = cv2.copyMakeBorder(frame, top=0, bottom=b_padding, left=0, right=r_padding,\n                               borderType=cv2.BORDER_CONSTANT, value=[0, 0, 0])\n    return frame\n\n\ndef create_video_writer(video, video_path, output_name):\n    \"\"\"\n    Creates a video writer object to write processed frames to file.\n\n    Args:\n        video: Video capture object, contains information about data source.\n        video_path: User-specified video file path.\n        output_path: Optional path to save the processed video.\n\n    Returns:\n        Video writer object.\n    \"\"\"\n    _, ext = os.path.splitext(video_path)\n\n    i, filename = 0, output_name + ext\n    while os.path.exists(filename):\n        i += 1\n        filename = output_name + str(i) + ext\n\n    video_writer = cv2.VideoWriter(filename=filename,\n                                   fourcc=get_source_encoding_int(video),\n                                   fps=int(video.get(cv2.CAP_PROP_FPS)),\n                                   frameSize=(int(video.get(cv2.CAP_PROP_FRAME_WIDTH)),\n                                              int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))))\n    return video_writer\n\n\ndef init_video_file_capture(video_path, output_name):\n    \"\"\"\n    Creates a video capture object from a video file.\n\n    Args:\n        video_path: User-specified video file path.\n        output_path: Optional path to save the processed video.\n\n    Returns:\n        Video capture object to capture frames, video writer object to write processed\n        frames to file, plus total frame count of video source to iterate through.\n    \"\"\"\n    if not os.path.exists(video_path):\n        raise FileNotFoundError(f'Video file not found for: {video_path}')\n    video = cv2.VideoCapture(video_path)\n    if not video.isOpened:\n        raise RuntimeError(f'Failed to open video capture from file: {video_path}')\n\n    video_writer = create_video_writer(video, video_path, output_name)\n    iter_frame_count = range(int(video.get(cv2.CAP_PROP_FRAME_COUNT)))\n\n    return video, video_writer, iter_frame_count\n\ndef draw_bounding_boxes(frame, detections, labels=None, processing_function=None):\n    \"\"\"\n    Draws bounding boxes around detected objects and adds a label and confidence score.\n\n    Args:\n        frame: The original captured frame from video source.\n        detections: A list of detected objects in the form [class, [box positions], confidence].\n        resize_factor: Resizing factor to scale box coordinates to output frame size.\n        labels: Dictionary of labels and colors keyed on the classification index.\n    \"\"\"\n    def _to_original_scale(boxes, frame_height, frame_width):\n        minmax_boxes = np.empty(shape=(4, ), dtype=np.int)\n\n        cx = boxes[0] * frame_width\n        cy = boxes[1] * frame_height\n        w = boxes[2] * frame_width\n        h = boxes[3] * frame_height\n        \n        minmax_boxes[0] = cx - w/2\n        minmax_boxes[1] = cy - h/2\n        minmax_boxes[2] = cx + w/2\n        minmax_boxes[3] = cy + h/2\n\n        return minmax_boxes\n\n    color = (0, 255, 0)\n    label_color = (125, 125, 125)\n\n    for i in range(len(detections)):\n        class_idx, box, confidence = [d for d in detections[i]]\n\n        # Obtain frame size and resized bounding box positions\n        frame_height, frame_width = frame.shape[:2]\n\n        x_min, y_min, x_max, y_max = _to_original_scale(box, frame_height, frame_width)\n        # Ensure box stays within the frame\n        x_min, y_min = max(0, x_min), max(0, y_min)\n        x_max, y_max = min(frame_width, x_max), min(frame_height, y_max)\n\n        # Draw bounding box around detected object\n        cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2)\n\n        if processing_function:\n            roi_img = frame[y_min:y_max, x_min:x_max]\n            label = processing_function(roi_img)\n        else:\n            # Create label for detected object class\n            label = labels[class_idx].capitalize() \n            label = f'{label} {confidence * 100:.1f}%'\n\n        # Make sure label always stays on-screen\n        x_text, y_text = cv2.getTextSize(label, cv2.FONT_HERSHEY_DUPLEX, 1, 1)[0][:2]\n\n        lbl_box_xy_min = (x_min, y_min if y_min<25 else y_min - y_text)\n        lbl_box_xy_max = (x_min + int(0.55 * x_text), y_min + y_text if y_min<25 else y_min)\n        lbl_text_pos = (x_min + 5, y_min + 16 if y_min<25 else y_min - 5)\n\n        # Add label and confidence value\n        cv2.rectangle(frame, lbl_box_xy_min, lbl_box_xy_max, color, -1)\n        cv2.putText(frame, label, lbl_text_pos, cv2.FONT_HERSHEY_DUPLEX, 0.50, label_color, 1, cv2.LINE_AA)\n\ndef draw_classification(frame, classifications, labels):\n\n    for i in range(len(classifications)):\n        label_id, prob = classifications[i]\n        text = '%s : %.2f' % (labels[label_id], prob)\n        cv2.putText(frame, text, (10, 20*i+20), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2, True)\n\ndef get_source_encoding_int(video_capture):\n    return int(video_capture.get(cv2.CAP_PROP_FOURCC))\n\nclass BoundBox:\n    def __init__(self, x, y, w, h, c = None, classes = None):\n        self.x     = x\n        self.y     = y\n        self.w     = w\n        self.h     = h\n        \n        self.c     = c\n        self.classes = classes\n\n    def get_label(self):\n        return np.argmax(self.classes)\n    \n    def get_score(self):\n        return self.classes[self.get_label()]\n    \n    def iou(self, bound_box):\n        b1 = self.as_centroid()\n        b2 = bound_box.as_centroid()\n        return centroid_box_iou(b1, b2)\n\n    def as_centroid(self):\n        return np.array([self.x, self.y, self.w, self.h])\n    \n\ndef boxes_to_array(bound_boxes):\n    \"\"\"\n    # Args\n        boxes : list of BoundBox instances\n    \n    # Returns\n        centroid_boxes : (N, 4)\n        probs : (N, nb_classes)\n    \"\"\"\n    temp_list = []\n    for box in bound_boxes:\n        temp_list.append([np.argmax(box.classes), np.asarray([box.x, box.y, box.w, box.h]), np.max(box.classes)])\n\n    return np.array(temp_list)\n\n\ndef nms_boxes(boxes, n_classes, nms_threshold=0.3, obj_threshold=0.3):\n    \"\"\"\n    # Args\n        boxes : list of BoundBox\n    \n    # Returns\n        boxes : list of BoundBox\n            non maximum supressed BoundBox instances\n    \"\"\"\n    # suppress non-maximal boxes\n    for c in range(n_classes):\n        sorted_indices = list(reversed(np.argsort([box.classes[c] for box in boxes])))\n\n        for i in range(len(sorted_indices)):\n            index_i = sorted_indices[i]\n            \n            if boxes[index_i].classes[c] == 0: \n                continue\n            else:\n                for j in range(i+1, len(sorted_indices)):\n                    index_j = sorted_indices[j]\n\n                    if boxes[index_i].iou(boxes[index_j]) >= nms_threshold:\n                        boxes[index_j].classes[c] = 0\n    # remove the boxes which are less likely than a obj_threshold\n    boxes = [box for box in boxes if box.get_score() > obj_threshold]\n    return boxes\n\ndef centroid_box_iou(box1, box2):\n    def _interval_overlap(interval_a, interval_b):\n        x1, x2 = interval_a\n        x3, x4 = interval_b\n    \n        if x3 < x1:\n            if x4 < x1:\n                return 0\n            else:\n                return min(x2,x4) - x1\n        else:\n            if x2 < x3:\n                return 0\n            else:\n                return min(x2,x4) - x3\n    \n    _, _, w1, h1 = box1.reshape(-1,)\n    _, _, w2, h2 = box2.reshape(-1,)\n    x1_min, y1_min, x1_max, y1_max = to_minmax(box1.reshape(-1,4)).reshape(-1,)\n    x2_min, y2_min, x2_max, y2_max = to_minmax(box2.reshape(-1,4)).reshape(-1,)\n            \n    intersect_w = _interval_overlap([x1_min, x1_max], [x2_min, x2_max])\n    intersect_h = _interval_overlap([y1_min, y1_max], [y2_min, y2_max])\n    intersect = intersect_w * intersect_h\n    union = w1 * h1 + w2 * h2 - intersect\n    \n    return float(intersect) / union\n\ndef to_minmax(centroid_boxes):\n    centroid_boxes = centroid_boxes.astype(np.float)\n    minmax_boxes = np.zeros_like(centroid_boxes)\n    \n    cx = centroid_boxes[:,0]\n    cy = centroid_boxes[:,1]\n    w = centroid_boxes[:,2]\n    h = centroid_boxes[:,3]\n    \n    minmax_boxes[:,0] = cx - w/2\n    minmax_boxes[:,1] = cy - h/2\n    minmax_boxes[:,2] = cx + w/2\n    minmax_boxes[:,3] = cy + h/2\n    return minmax_boxes"
  },
  {
    "path": "example_scripts/tensorflow_lite/classifier/templates/index.html",
    "content": "<html>\n  <head>\n    <title>Video Streaming Demonstration</title>\n  </head>\n  <body>\n    <h1>Tflite Image Classification Demo</h1>\n    <img src=\"{{ url_for('video_feed') }}\">\n  </body>\n</html>\n"
  },
  {
    "path": "example_scripts/tensorflow_lite/detector/base_camera.py",
    "content": "import time\nimport threading\ntry:\n    from greenlet import getcurrent as get_ident\nexcept ImportError:\n    try:\n        from thread import get_ident\n    except ImportError:\n        from _thread import get_ident\n\n\nclass CameraEvent(object):\n    \"\"\"An Event-like class that signals all active clients when a new frame is\n    available.\n    \"\"\"\n    def __init__(self):\n        self.events = {}\n\n    def wait(self):\n        \"\"\"Invoked from each client's thread to wait for the next frame.\"\"\"\n        ident = get_ident()\n        if ident not in self.events:\n            # this is a new client\n            # add an entry for it in the self.events dict\n            # each entry has two elements, a threading.Event() and a timestamp\n            self.events[ident] = [threading.Event(), time.time()]\n        return self.events[ident][0].wait()\n\n    def set(self):\n        \"\"\"Invoked by the camera thread when a new frame is available.\"\"\"\n        now = time.time()\n        remove = None\n        for ident, event in self.events.items():\n            if not event[0].isSet():\n                # if this client's event is not set, then set it\n                # also update the last set timestamp to now\n                event[0].set()\n                event[1] = now\n            else:\n                # if the client's event is already set, it means the client\n                # did not process a previous frame\n                # if the event stays set for more than 5 seconds, then assume\n                # the client is gone and remove it\n                if now - event[1] > 5:\n                    remove = ident\n        if remove:\n            del self.events[remove]\n\n    def clear(self):\n        \"\"\"Invoked from each client's thread after a frame was processed.\"\"\"\n        self.events[get_ident()][0].clear()\n\n\nclass BaseCamera(object):\n    thread = None  # background thread that reads frames from camera\n    frame = None  # current frame is stored here by background thread\n    last_access = 0  # time of last client access to the camera\n    event = CameraEvent()\n\n    def __init__(self):\n        \"\"\"Start the background camera thread if it isn't running yet.\"\"\"\n        if BaseCamera.thread is None:\n            BaseCamera.last_access = time.time()\n\n            # start background frame thread\n            BaseCamera.thread = threading.Thread(target=self._thread)\n            BaseCamera.thread.start()\n\n            # wait until frames are available\n            while self.get_frame() is None:\n                time.sleep(0)\n\n    def get_frame(self):\n        \"\"\"Return the current camera frame.\"\"\"\n        BaseCamera.last_access = time.time()\n\n        # wait for a signal from the camera thread\n        BaseCamera.event.wait()\n        BaseCamera.event.clear()\n\n        return BaseCamera.frame\n\n    @staticmethod\n    def frames():\n        \"\"\"\"Generator that returns frames from the camera.\"\"\"\n        raise RuntimeError('Must be implemented by subclasses.')\n\n    @classmethod\n    def _thread(cls):\n        \"\"\"Camera background thread.\"\"\"\n        print('Starting camera thread.')\n        frames_iterator = cls.frames()\n        for frame in frames_iterator:\n            BaseCamera.frame = frame\n            BaseCamera.event.set()  # send signal to clients\n            time.sleep(0)\n\n            # if there hasn't been any clients asking for frames in\n            # the last 10 seconds then stop the thread\n            if time.time() - BaseCamera.last_access > 10:\n                frames_iterator.close()\n                print('Stopping camera thread due to inactivity.')\n                break\n        BaseCamera.thread = None\n"
  },
  {
    "path": "example_scripts/tensorflow_lite/detector/camera_opencv.py",
    "content": "import cv2\nfrom base_camera import BaseCamera\n\n\nclass Camera(BaseCamera):\n    video_source = 0\n\n    @staticmethod\n    def set_video_source(source):\n        Camera.video_source = source\n\n    @staticmethod\n    def frames():\n        camera = cv2.VideoCapture(Camera.video_source)\n        if not camera.isOpened():\n            raise RuntimeError('Could not start camera.')\n\n        while True:\n            # read current frame\n            _, img = camera.read()\n            #img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)\n\n            # return img\n            yield img\n"
  },
  {
    "path": "example_scripts/tensorflow_lite/detector/camera_pi.py",
    "content": "import io\nimport time\nimport picamera\nimport picamera.array\nimport cv2\nfrom base_camera import BaseCamera\n\n\nclass Camera(BaseCamera):\n    video_source = 0\n\n    @staticmethod\n    def set_video_source(source):\n        pass\n\n    @staticmethod\n    def frames():\n        with picamera.PiCamera(resolution = (1280,720)) as camera:\n            # let camera warm up\n            time.sleep(2)\n\n            with picamera.array.PiRGBArray(camera, size=(1280,720)) as stream:\n                while True:\n                \n                    camera.capture(stream, format='bgr', use_video_port=True)\n                    # At this point the image is available as stream.array\n                    image = stream.array\n                    stream.truncate(0)\n                    yield image\n\n"
  },
  {
    "path": "example_scripts/tensorflow_lite/detector/cv_utils.py",
    "content": "# Copyright © 2020 Arm Ltd and Contributors. All rights reserved.\n# SPDX-License-Identifier: MIT\n\n\"\"\"\nThis file contains helper functions for reading video/image data and\n pre/postprocessing of video/image data using OpenCV.\n\"\"\"\n\nimport os\nimport cv2\nimport numpy as np\n\ndef preprocess(img):\n\n    img = img.astype(np.float32)\n    img = img / 255.\n    img = img - 0.5\n    img = img * 2.\n    img = img[:, :, ::-1]\n    img = np.expand_dims(img, 0)\n    return img\n\ndef decode_yolov2(netout, \n                  nms_threshold = 0.2,\n                  threshold = 0.3, \n                  anchors = [1.889, 2.5245, 2.9465, 3.94056, 3.99987, 5.3658, 5.155437, 6.92275, 6.718375, 9.01025]):\n\n    #Convert Yolo network output to bounding box\n\n    netout = netout[0].reshape(7,7,5,6)\n    grid_h, grid_w, nb_box = netout.shape[:3]\n    boxes = []\n    \n    # decode the output by the network\n    netout[..., 4]  = _sigmoid(netout[..., 4])\n    netout[..., 5:] = netout[..., 4][..., np.newaxis] * _softmax(netout[..., 5:])\n    netout[..., 5:] *= netout[..., 5:] > threshold\n    \n    for row in range(grid_h):\n        for col in range(grid_w):\n            for b in range(nb_box):\n                # from 4th element onwards are confidence and class classes\n                classes = netout[row,col,b,5:]\n                \n                if np.sum(classes) > 0:\n                    # first 4 elements are x, y, w, and h\n                    x, y, w, h = netout[row,col,b,:4]\n\n                    x = (col + _sigmoid(x)) / grid_w # center position, unit: image width\n                    y = (row + _sigmoid(y)) / grid_h # center position, unit: image height\n                    w = anchors[2 * b + 0] * np.exp(w) / grid_w # unit: image width\n                    h = anchors[2 * b + 1] * np.exp(h) / grid_h # unit: image height\n                    confidence = netout[row,col,b,4]\n                    box = BoundBox(x, y, w, h, confidence, classes)\n                    boxes.append(box)\n    \n    boxes = nms_boxes(boxes, len(classes), nms_threshold, threshold)\n\n    if len(boxes) > 0:\n        return boxes_to_array(boxes)\n    else:\n        return []\n\ndef decode_yolov3(netout, \n                  nms_threshold = 0.2,\n                  threshold = 0.3, \n                  anchors = [[[0.76120044, 0.57155991], [0.6923348, 0.88535553], [0.47163042, 0.34163313]],\n                                 [[0.33340788, 0.70065861], [0.18124964, 0.38986752], [0.08497349, 0.1527057 ]]]):\n\n    #Convert Yolo network output to bounding box\n\n    boxes = []\n\n    for l, output in enumerate(netout):\n        grid_h, grid_w, nb_box = output.shape[0:3]\n        \n        # decode the output by the network\n        output[..., 4] = _sigmoid(output[..., 4])\n        output[..., 5:] = output[..., 4][..., np.newaxis] * _sigmoid(output[..., 5:])\n        output[..., 5:] *= output[..., 5:] > threshold\n        \n        for row in range(grid_h):\n            for col in range(grid_w):\n                for b in range(nb_box):\n                    # from 4th element onwards are confidence and class classes\n                    classes = output[row, col, b, 5:]\n\n                    if np.sum(classes) > 0:\n                        # first 4 elements are x, y, w, and h\n                        x, y, w, h = output[row, col, b, :4]\n                        x = (col + _sigmoid(x)) / grid_w # center position, unit: image width\n                        y = (row + _sigmoid(y)) / grid_h # center position, unit: image height\n                        w = anchors[l][b][0] * np.exp(w) # unit: image width\n                        h = anchors[l][b][1] * np.exp(h) # unit: image height\n                        confidence = output[row, col, b, 4]\n                        box = BoundBox(x, y, w, h, confidence, classes)\n                        boxes.append(box)\n\n    boxes = nms_boxes(boxes, len(classes), nms_threshold, threshold)\n\n    if len(boxes) > 0:\n        return boxes_to_array(boxes)\n    else:\n        return []\n\ndef decode_classifier(netout, top_k=3):\n    netout = netout[0]\n    ordered = np.argsort(netout)\n    results = [(i, netout[i]) for i in ordered[-top_k:][::-1]]\n    return results\n\ndef decode_segnet(netout, labels, class_colors):\n    netout = netout[0] \n\n    seg_arr = netout.argmax(axis=2)\n\n    seg_img = np.zeros((netout.shape[0], netout.shape[1], 3))\n\n    for c in range(len(labels)):\n        seg_img[:, :, 0] += ((seg_arr[:, :] == c)*(class_colors[c][0])).astype('uint8')\n        seg_img[:, :, 1] += ((seg_arr[:, :] == c)*(class_colors[c][1])).astype('uint8')\n        seg_img[:, :, 2] += ((seg_arr[:, :] == c)*(class_colors[c][2])).astype('uint8')\n\n    return seg_img\n\ndef get_legends(class_names, colors):\n\n    n_classes = len(class_names)\n    legend = np.zeros(((len(class_names) * 25), 150, 3), dtype=\"uint8\") + 255\n\n    for (i, (class_name, color)) in enumerate(zip(class_names.values() , colors)):\n        color = [int(c) for c in color]\n        cv2.putText(legend, class_name, (5, (i * 25) + 17),cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 0), 1)\n        cv2.rectangle(legend, (125, (i * 25)), (150, (i * 25) + 25), tuple(color), -1)\n\n    return legend \n\ndef overlay_seg_image(inp_img, seg_img):\n    orininal_h = inp_img.shape[0]\n    orininal_w = inp_img.shape[1]\n    seg_img = cv2.resize(seg_img, (orininal_w, orininal_h))\n\n    fused_img = (inp_img/2 + seg_img/2 ).astype('uint8')\n    return fused_img \n\ndef concat_lenends(seg_img, legend_img):\n    \n    seg_img[:legend_img.shape[0],:legend_img.shape[1]] = np.copy(legend_img)\n\n    return seg_img\n\ndef _sigmoid(x):\n    return 1. / (1. + np.exp(-x))\n\ndef _softmax(x, axis=-1, t=-100.):\n    x = x - np.max(x)\n    if np.min(x) < t:\n        x = x/np.min(x)*t\n    e_x = np.exp(x)\n    return e_x / e_x.sum(axis, keepdims=True)\n\ndef resize_with_aspect_ratio(frame: np.ndarray, input_binding_info: tuple):\n    \"\"\"\n    Resizes frame while maintaining aspect ratio, padding any empty space.\n\n    Args:\n        frame: Captured frame.\n        input_binding_info: Contains shape of model input layer.\n\n    Returns:\n        Frame resized to the size of model input layer.\n    \"\"\"\n    aspect_ratio = frame.shape[1] / frame.shape[0]\n    model_height, model_width = list(input_binding_info[1].GetShape())[1:3]\n\n    if aspect_ratio >= 1.0:\n        new_height, new_width = int(model_width / aspect_ratio), model_width\n        b_padding, r_padding = model_height - new_height, 0\n    else:\n        new_height, new_width = model_height, int(model_height * aspect_ratio)\n        b_padding, r_padding = 0, model_width - new_width\n\n    # Resize and pad any empty space\n    frame = cv2.resize(frame, (new_width, new_height), interpolation=cv2.INTER_LINEAR)\n    frame = cv2.copyMakeBorder(frame, top=0, bottom=b_padding, left=0, right=r_padding,\n                               borderType=cv2.BORDER_CONSTANT, value=[0, 0, 0])\n    return frame\n\n\ndef create_video_writer(video, video_path, output_name):\n    \"\"\"\n    Creates a video writer object to write processed frames to file.\n\n    Args:\n        video: Video capture object, contains information about data source.\n        video_path: User-specified video file path.\n        output_path: Optional path to save the processed video.\n\n    Returns:\n        Video writer object.\n    \"\"\"\n    _, ext = os.path.splitext(video_path)\n\n    i, filename = 0, output_name + ext\n    while os.path.exists(filename):\n        i += 1\n        filename = output_name + str(i) + ext\n\n    video_writer = cv2.VideoWriter(filename=filename,\n                                   fourcc=get_source_encoding_int(video),\n                                   fps=int(video.get(cv2.CAP_PROP_FPS)),\n                                   frameSize=(int(video.get(cv2.CAP_PROP_FRAME_WIDTH)),\n                                              int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))))\n    return video_writer\n\n\ndef init_video_file_capture(video_path, output_name):\n    \"\"\"\n    Creates a video capture object from a video file.\n\n    Args:\n        video_path: User-specified video file path.\n        output_path: Optional path to save the processed video.\n\n    Returns:\n        Video capture object to capture frames, video writer object to write processed\n        frames to file, plus total frame count of video source to iterate through.\n    \"\"\"\n    if not os.path.exists(video_path):\n        raise FileNotFoundError(f'Video file not found for: {video_path}')\n    video = cv2.VideoCapture(video_path)\n    if not video.isOpened:\n        raise RuntimeError(f'Failed to open video capture from file: {video_path}')\n\n    video_writer = create_video_writer(video, video_path, output_name)\n    iter_frame_count = range(int(video.get(cv2.CAP_PROP_FRAME_COUNT)))\n\n    return video, video_writer, iter_frame_count\n\ndef draw_bounding_boxes(frame, detections, labels=None, processing_function=None):\n    \"\"\"\n    Draws bounding boxes around detected objects and adds a label and confidence score.\n\n    Args:\n        frame: The original captured frame from video source.\n        detections: A list of detected objects in the form [class, [box positions], confidence].\n        resize_factor: Resizing factor to scale box coordinates to output frame size.\n        labels: Dictionary of labels and colors keyed on the classification index.\n    \"\"\"\n    def _to_original_scale(boxes, frame_height, frame_width):\n        minmax_boxes = np.empty(shape=(4, ), dtype=np.int)\n\n        cx = boxes[0] * frame_width\n        cy = boxes[1] * frame_height\n        w = boxes[2] * frame_width\n        h = boxes[3] * frame_height\n        \n        minmax_boxes[0] = cx - w/2\n        minmax_boxes[1] = cy - h/2\n        minmax_boxes[2] = cx + w/2\n        minmax_boxes[3] = cy + h/2\n\n        return minmax_boxes\n\n    color = (0, 255, 0)\n    label_color = (125, 125, 125)\n\n    for i in range(len(detections)):\n        class_idx, box, confidence = [d for d in detections[i]]\n\n        # Obtain frame size and resized bounding box positions\n        frame_height, frame_width = frame.shape[:2]\n\n        x_min, y_min, x_max, y_max = _to_original_scale(box, frame_height, frame_width)\n        # Ensure box stays within the frame\n        x_min, y_min = max(0, x_min), max(0, y_min)\n        x_max, y_max = min(frame_width, x_max), min(frame_height, y_max)\n\n        # Draw bounding box around detected object\n        cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2)\n\n        if processing_function:\n            roi_img = frame[y_min:y_max, x_min:x_max]\n            label = processing_function(roi_img)\n        else:\n            # Create label for detected object class\n            label = labels[class_idx].capitalize() \n            label = f'{label} {confidence * 100:.1f}%'\n\n        # Make sure label always stays on-screen\n        x_text, y_text = cv2.getTextSize(label, cv2.FONT_HERSHEY_DUPLEX, 1, 1)[0][:2]\n\n        lbl_box_xy_min = (x_min, y_min if y_min<25 else y_min - y_text)\n        lbl_box_xy_max = (x_min + int(0.55 * x_text), y_min + y_text if y_min<25 else y_min)\n        lbl_text_pos = (x_min + 5, y_min + 16 if y_min<25 else y_min - 5)\n\n        # Add label and confidence value\n        cv2.rectangle(frame, lbl_box_xy_min, lbl_box_xy_max, color, -1)\n        cv2.putText(frame, label, lbl_text_pos, cv2.FONT_HERSHEY_DUPLEX, 0.50, label_color, 1, cv2.LINE_AA)\n\ndef draw_classification(frame, classifications, labels):\n\n    for i in range(len(classifications)):\n        label_id, prob = classifications[i]\n        text = '%s : %.2f' % (labels[label_id], prob)\n        cv2.putText(frame, text, (10, 20*i+20), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2, True)\n\ndef get_source_encoding_int(video_capture):\n    return int(video_capture.get(cv2.CAP_PROP_FOURCC))\n\nclass BoundBox:\n    def __init__(self, x, y, w, h, c = None, classes = None):\n        self.x     = x\n        self.y     = y\n        self.w     = w\n        self.h     = h\n        \n        self.c     = c\n        self.classes = classes\n\n    def get_label(self):\n        return np.argmax(self.classes)\n    \n    def get_score(self):\n        return self.classes[self.get_label()]\n    \n    def iou(self, bound_box):\n        b1 = self.as_centroid()\n        b2 = bound_box.as_centroid()\n        return centroid_box_iou(b1, b2)\n\n    def as_centroid(self):\n        return np.array([self.x, self.y, self.w, self.h])\n    \n\ndef boxes_to_array(bound_boxes):\n    \"\"\"\n    # Args\n        boxes : list of BoundBox instances\n    \n    # Returns\n        centroid_boxes : (N, 4)\n        probs : (N, nb_classes)\n    \"\"\"\n    temp_list = []\n    for box in bound_boxes:\n        temp_list.append([np.argmax(box.classes), np.asarray([box.x, box.y, box.w, box.h]), np.max(box.classes)])\n\n    return np.array(temp_list)\n\n\ndef nms_boxes(boxes, n_classes, nms_threshold=0.3, obj_threshold=0.3):\n    \"\"\"\n    # Args\n        boxes : list of BoundBox\n    \n    # Returns\n        boxes : list of BoundBox\n            non maximum supressed BoundBox instances\n    \"\"\"\n    # suppress non-maximal boxes\n    for c in range(n_classes):\n        sorted_indices = list(reversed(np.argsort([box.classes[c] for box in boxes])))\n\n        for i in range(len(sorted_indices)):\n            index_i = sorted_indices[i]\n            \n            if boxes[index_i].classes[c] == 0: \n                continue\n            else:\n                for j in range(i+1, len(sorted_indices)):\n                    index_j = sorted_indices[j]\n\n                    if boxes[index_i].iou(boxes[index_j]) >= nms_threshold:\n                        boxes[index_j].classes[c] = 0\n    # remove the boxes which are less likely than a obj_threshold\n    boxes = [box for box in boxes if box.get_score() > obj_threshold]\n    return boxes\n\ndef centroid_box_iou(box1, box2):\n    def _interval_overlap(interval_a, interval_b):\n        x1, x2 = interval_a\n        x3, x4 = interval_b\n    \n        if x3 < x1:\n            if x4 < x1:\n                return 0\n            else:\n                return min(x2,x4) - x1\n        else:\n            if x2 < x3:\n                return 0\n            else:\n                return min(x2,x4) - x3\n    \n    _, _, w1, h1 = box1.reshape(-1,)\n    _, _, w2, h2 = box2.reshape(-1,)\n    x1_min, y1_min, x1_max, y1_max = to_minmax(box1.reshape(-1,4)).reshape(-1,)\n    x2_min, y2_min, x2_max, y2_max = to_minmax(box2.reshape(-1,4)).reshape(-1,)\n            \n    intersect_w = _interval_overlap([x1_min, x1_max], [x2_min, x2_max])\n    intersect_h = _interval_overlap([y1_min, y1_max], [y2_min, y2_max])\n    intersect = intersect_w * intersect_h\n    union = w1 * h1 + w2 * h2 - intersect\n    \n    return float(intersect) / union\n\ndef to_minmax(centroid_boxes):\n    centroid_boxes = centroid_boxes.astype(np.float)\n    minmax_boxes = np.zeros_like(centroid_boxes)\n    \n    cx = centroid_boxes[:,0]\n    cy = centroid_boxes[:,1]\n    w = centroid_boxes[:,2]\n    h = centroid_boxes[:,3]\n    \n    minmax_boxes[:,0] = cx - w/2\n    minmax_boxes[:,1] = cy - h/2\n    minmax_boxes[:,2] = cx + w/2\n    minmax_boxes[:,3] = cy + h/2\n    return minmax_boxes"
  },
  {
    "path": "example_scripts/tensorflow_lite/detector/detector_file.py",
    "content": "import time\nimport argparse\nimport os\nimport cv2\nimport numpy as np\nfrom tqdm import tqdm\n\nfrom cv_utils import init_video_file_capture, decode_yolov3, draw_bounding_boxes, preprocess\nfrom tflite_runtime.interpreter import Interpreter\n\ndef load_labels(path):\n    with open(path, 'r') as f:\n        return {i: line.strip() for i, line in enumerate(f.read().replace('\"','').split(','))}\n\nclass NetworkExecutor(object):\n\n    def __init__(self, model_file):\n\n        self.interpreter = Interpreter(model_file, num_threads=3)\n        self.interpreter.allocate_tensors()\n        _, self.input_height, self.input_width, _ = self.interpreter.get_input_details()[0]['shape']\n        self.tensor_index = self.interpreter.get_input_details()[0]['index']\n\n    def get_output_tensors(self):\n\n      output_details = self.interpreter.get_output_details()\n      tensor_indices = []\n      tensor_list = []\n\n      for output in output_details:\n            tensor = np.squeeze(self.interpreter.get_tensor(output['index']))\n            tensor_list.append(tensor)\n\n      return tensor_list\n\n    def run(self, image):\n        if image.shape[1:2] != (self.input_height, self.input_width):\n            img = cv2.resize(image, (self.input_width, self.input_height))\n        img = preprocess(img)\n        self.interpreter.set_tensor(self.tensor_index, img)\n        self.interpreter.invoke()\n        return self.get_output_tensors()\n\ndef main(args, detector):\n    video, video_writer, frame_count = init_video_file_capture(args.file, 'detector_demo')\n\n    if not os.path.exists(args.labels[0]):\n        labels = args.labels\n    else:   \n        labels = load_labels(args.labels[0])\n\n    frame_num = len(frame_count)\n    times = []\n\n    for _ in tqdm(frame_count, desc='Processing frames'):\n        frame_present, frame = video.read()\n        if not frame_present:\n            continue\n\n        start_time = time.time()\n        results = detection_network.run(frame)\n        elapsed_ms = (time.time() - start_time) * 1000\n\n        detections = decode_yolov3(netout = results, threshold = args.threshold)\n\n        draw_bounding_boxes(frame, detections, labels)\n\n        times.append(elapsed_ms)\n        video_writer.write(frame)\n\n    print('Finished processing frames')\n    video.release(), video_writer.release()\n\n    print(\"Average time(ms): \", sum(times)//frame_num) \n    print(\"FPS: \", 1000.0 / (sum(times)//frame_num)) # FPS = 1 / time to process loop\n\nif __name__ == \"__main__\" :\n\n    print(\"OpenCV version: {}\".format(cv2. __version__))\n\n    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)\n    parser.add_argument('--model', help='File path of .tflite file.', required=True)\n    parser.add_argument('--labels', nargs=\"+\", help='File path of labels file.', required=True)\n    parser.add_argument('--threshold', help='Confidence threshold.', default=0.7)\n    parser.add_argument('--file', help='File path of video file', default=None)\n    args = parser.parse_args()\n\n    detection_network = NetworkExecutor(args.model)\n\n    main(args, detection_network)\n    \n"
  },
  {
    "path": "example_scripts/tensorflow_lite/detector/detector_stream.py",
    "content": "import time\nimport argparse\nimport os\nimport cv2\nimport numpy as np\n\nfrom cv_utils import decode_yolov3, preprocess, draw_bounding_boxes\nfrom tflite_runtime.interpreter import Interpreter\nfrom flask import Flask, render_template, request, Response\n\napp = Flask (__name__, static_url_path = '')\n\ndef load_labels(path):\n    with open(path, 'r') as f:\n        return {i: line.strip() for i, line in enumerate(f.read().replace('\"','').split(','))}\n\nclass NetworkExecutor(object):\n\n    def __init__(self, model_file):\n\n        self.interpreter = Interpreter(model_file, num_threads=3)\n        self.interpreter.allocate_tensors()\n        _, self.input_height, self.input_width, _ = self.interpreter.get_input_details()[0]['shape']\n        self.tensor_index = self.interpreter.get_input_details()[0]['index']\n\n    def get_output_tensors(self):\n\n      output_details = self.interpreter.get_output_details()\n      tensor_indices = []\n      tensor_list = []\n\n      for output in output_details:\n            tensor = np.squeeze(self.interpreter.get_tensor(output['index']))\n            tensor_list.append(tensor)\n\n      return tensor_list\n\n    def run(self, image):\n        if image.shape[1:2] != (self.input_height, self.input_width):\n            img = cv2.resize(image, (self.input_width, self.input_height))\n        img = preprocess(img)\n        self.interpreter.set_tensor(self.tensor_index, img)\n        self.interpreter.invoke()\n        return self.get_output_tensors()\n\nclass Detector(NetworkExecutor):\n\n    def __init__(self, label_file, model_file, threshold):\n        super().__init__(model_file)\n        self._threshold = float(threshold)\n\n        if not os.path.exists(label_file):\n            self.labels = [label_file]\n        else:   \n            self.labels = load_labels(label_file)\n\n    def detect(self, original_image):\n        start_time = time.time()\n        results = self.run(original_image)\n        elapsed_ms = (time.time() - start_time) * 1000\n\n        detections = decode_yolov3(netout = results, threshold = self._threshold)\n        draw_bounding_boxes(original_image, detections, self.labels)\n\n        fps  = 1 / elapsed_ms*1000\n        print(\"Estimated frames per second : {0:.2f} Inference time: {1:.2f}\".format(fps, elapsed_ms))\n\n        return cv2.imencode('.jpg', original_image)[1].tobytes()\n\n@app.route(\"/\")\ndef index():\n   return render_template('index.html', name = None)\n\ndef gen(camera):\n    while True:\n        frame = camera.get_frame()\n        image = detector.detect(frame)\n        yield (b'--frame\\r\\n'+b'Content-Type: image/jpeg\\r\\n\\r\\n' + image + b'\\r\\n')\n\n@app.route('/video_feed')\ndef video_feed():\n    return Response(gen(Camera()), mimetype='multipart/x-mixed-replace; boundary=frame')\n\nparser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)\nparser.add_argument('--model', help='File path of .tflite file.', required=True)\nparser.add_argument('--labels', help='File path of labels file.', required=True)\nparser.add_argument('--threshold', help='Confidence threshold.', default=0.7)\nparser.add_argument('--source', help='picamera or cv', default='cv')\nargs = parser.parse_args()\n\nif args.source == \"cv\":\n    from camera_opencv import Camera\n    source = 0\nelif args.source == \"picamera\":\n    from camera_pi import Camera\n    source = 0\n    \nCamera.set_video_source(source)\n\ndetector = Detector(args.labels, args.model, args.threshold)\n\nif __name__ == \"__main__\" :\n   app.run(host = '0.0.0.0', port = 5000, debug = True)\n    \n"
  },
  {
    "path": "example_scripts/tensorflow_lite/detector/templates/index.html",
    "content": "<html>\n  <head>\n    <title>Video Streaming Demonstration</title>\n  </head>\n  <body>\n    <h1>Tflite Object Detection Demo</h1>\n    <img src=\"{{ url_for('video_feed') }}\">\n  </body>\n</html>\n"
  },
  {
    "path": "example_scripts/tensorflow_lite/segnet/base_camera.py",
    "content": "import time\nimport threading\ntry:\n    from greenlet import getcurrent as get_ident\nexcept ImportError:\n    try:\n        from thread import get_ident\n    except ImportError:\n        from _thread import get_ident\n\n\nclass CameraEvent(object):\n    \"\"\"An Event-like class that signals all active clients when a new frame is\n    available.\n    \"\"\"\n    def __init__(self):\n        self.events = {}\n\n    def wait(self):\n        \"\"\"Invoked from each client's thread to wait for the next frame.\"\"\"\n        ident = get_ident()\n        if ident not in self.events:\n            # this is a new client\n            # add an entry for it in the self.events dict\n            # each entry has two elements, a threading.Event() and a timestamp\n            self.events[ident] = [threading.Event(), time.time()]\n        return self.events[ident][0].wait()\n\n    def set(self):\n        \"\"\"Invoked by the camera thread when a new frame is available.\"\"\"\n        now = time.time()\n        remove = None\n        for ident, event in self.events.items():\n            if not event[0].isSet():\n                # if this client's event is not set, then set it\n                # also update the last set timestamp to now\n                event[0].set()\n                event[1] = now\n            else:\n                # if the client's event is already set, it means the client\n                # did not process a previous frame\n                # if the event stays set for more than 5 seconds, then assume\n                # the client is gone and remove it\n                if now - event[1] > 5:\n                    remove = ident\n        if remove:\n            del self.events[remove]\n\n    def clear(self):\n        \"\"\"Invoked from each client's thread after a frame was processed.\"\"\"\n        self.events[get_ident()][0].clear()\n\n\nclass BaseCamera(object):\n    thread = None  # background thread that reads frames from camera\n    frame = None  # current frame is stored here by background thread\n    last_access = 0  # time of last client access to the camera\n    event = CameraEvent()\n\n    def __init__(self):\n        \"\"\"Start the background camera thread if it isn't running yet.\"\"\"\n        if BaseCamera.thread is None:\n            BaseCamera.last_access = time.time()\n\n            # start background frame thread\n            BaseCamera.thread = threading.Thread(target=self._thread)\n            BaseCamera.thread.start()\n\n            # wait until frames are available\n            while self.get_frame() is None:\n                time.sleep(0)\n\n    def get_frame(self):\n        \"\"\"Return the current camera frame.\"\"\"\n        BaseCamera.last_access = time.time()\n\n        # wait for a signal from the camera thread\n        BaseCamera.event.wait()\n        BaseCamera.event.clear()\n\n        return BaseCamera.frame\n\n    @staticmethod\n    def frames():\n        \"\"\"\"Generator that returns frames from the camera.\"\"\"\n        raise RuntimeError('Must be implemented by subclasses.')\n\n    @classmethod\n    def _thread(cls):\n        \"\"\"Camera background thread.\"\"\"\n        print('Starting camera thread.')\n        frames_iterator = cls.frames()\n        for frame in frames_iterator:\n            BaseCamera.frame = frame\n            BaseCamera.event.set()  # send signal to clients\n            time.sleep(0)\n\n            # if there hasn't been any clients asking for frames in\n            # the last 10 seconds then stop the thread\n            if time.time() - BaseCamera.last_access > 10:\n                frames_iterator.close()\n                print('Stopping camera thread due to inactivity.')\n                break\n        BaseCamera.thread = None\n"
  },
  {
    "path": "example_scripts/tensorflow_lite/segnet/camera_opencv.py",
    "content": "import cv2\nfrom base_camera import BaseCamera\n\n\nclass Camera(BaseCamera):\n    video_source = 0\n\n    @staticmethod\n    def set_video_source(source):\n        Camera.video_source = source\n\n    @staticmethod\n    def frames():\n        camera = cv2.VideoCapture(Camera.video_source)\n        if not camera.isOpened():\n            raise RuntimeError('Could not start camera.')\n\n        while True:\n            # read current frame\n            _, img = camera.read()\n            #img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)\n\n            # return img\n            yield img\n"
  },
  {
    "path": "example_scripts/tensorflow_lite/segnet/camera_pi.py",
    "content": "import io\nimport time\nimport picamera\nimport picamera.array\nimport cv2\nfrom base_camera import BaseCamera\n\n\nclass Camera(BaseCamera):\n    @staticmethod\n    def frames():\n        with picamera.PiCamera(resolution = (1280,720)) as camera:\n            # let camera warm up\n            time.sleep(2)\n\n            with picamera.array.PiRGBArray(camera, size=(1280,720)) as stream:\n                while True:\n                    camera.capture(stream, format='bgr')\n                    # At this point the image is available as stream.array\n                    image = stream.array\n                    stream.truncate(0)\n                    yield image\n\n"
  },
  {
    "path": "example_scripts/tensorflow_lite/segnet/cv_utils.py",
    "content": "# Copyright © 2020 Arm Ltd and Contributors. All rights reserved.\n# SPDX-License-Identifier: MIT\n\n\"\"\"\nThis file contains helper functions for reading video/image data and\n pre/postprocessing of video/image data using OpenCV.\n\"\"\"\n\nimport os\nimport cv2\nimport numpy as np\n\ndef preprocess(img):\n\n    img = img.astype(np.float32)\n    img = img / 255.\n    img = img - 0.5\n    img = img * 2.\n    img = img[:, :, ::-1]\n    img = np.expand_dims(img, 0)\n    return img\n\ndef decode_yolov2(netout, \n                  nms_threshold = 0.2,\n                  threshold = 0.3, \n                  anchors = [1.889, 2.5245, 2.9465, 3.94056, 3.99987, 5.3658, 5.155437, 6.92275, 6.718375, 9.01025]):\n\n    #Convert Yolo network output to bounding box\n\n    netout = netout[0].reshape(7,7,5,6)\n    grid_h, grid_w, nb_box = netout.shape[:3]\n    boxes = []\n    \n    # decode the output by the network\n    netout[..., 4]  = _sigmoid(netout[..., 4])\n    netout[..., 5:] = netout[..., 4][..., np.newaxis] * _softmax(netout[..., 5:])\n    netout[..., 5:] *= netout[..., 5:] > threshold\n    \n    for row in range(grid_h):\n        for col in range(grid_w):\n            for b in range(nb_box):\n                # from 4th element onwards are confidence and class classes\n                classes = netout[row,col,b,5:]\n                \n                if np.sum(classes) > 0:\n                    # first 4 elements are x, y, w, and h\n                    x, y, w, h = netout[row,col,b,:4]\n\n                    x = (col + _sigmoid(x)) / grid_w # center position, unit: image width\n                    y = (row + _sigmoid(y)) / grid_h # center position, unit: image height\n                    w = anchors[2 * b + 0] * np.exp(w) / grid_w # unit: image width\n                    h = anchors[2 * b + 1] * np.exp(h) / grid_h # unit: image height\n                    confidence = netout[row,col,b,4]\n                    box = BoundBox(x, y, w, h, confidence, classes)\n                    boxes.append(box)\n    \n    boxes = nms_boxes(boxes, len(classes), nms_threshold, threshold)\n\n    if len(boxes) > 0:\n        return boxes_to_array(boxes)\n    else:\n        return []\n\ndef decode_yolov3(netout, \n                  nms_threshold = 0.2,\n                  threshold = 0.3, \n                  anchors = [[[0.76120044, 0.57155991], [0.6923348, 0.88535553], [0.47163042, 0.34163313]],\n                                 [[0.33340788, 0.70065861], [0.18124964, 0.38986752], [0.08497349, 0.1527057 ]]]):\n\n    #Convert Yolo network output to bounding box\n\n    boxes = []\n\n    for l, output in enumerate(netout):\n        grid_h, grid_w, nb_box = output.shape[0:3]\n        \n        # decode the output by the network\n        output[..., 4] = _sigmoid(output[..., 4])\n        output[..., 5:] = output[..., 4][..., np.newaxis] * _sigmoid(output[..., 5:])\n        output[..., 5:] *= output[..., 5:] > threshold\n        \n        for row in range(grid_h):\n            for col in range(grid_w):\n                for b in range(nb_box):\n                    # from 4th element onwards are confidence and class classes\n                    classes = output[row, col, b, 5:]\n\n                    if np.sum(classes) > 0:\n                        # first 4 elements are x, y, w, and h\n                        x, y, w, h = output[row, col, b, :4]\n                        x = (col + _sigmoid(x)) / grid_w # center position, unit: image width\n                        y = (row + _sigmoid(y)) / grid_h # center position, unit: image height\n                        w = anchors[l][b][0] * np.exp(w) # unit: image width\n                        h = anchors[l][b][1] * np.exp(h) # unit: image height\n                        confidence = output[row, col, b, 4]\n                        box = BoundBox(x, y, w, h, confidence, classes)\n                        boxes.append(box)\n\n    boxes = nms_boxes(boxes, len(classes), nms_threshold, threshold)\n\n    if len(boxes) > 0:\n        return boxes_to_array(boxes)\n    else:\n        return []\n\ndef decode_classifier(netout, top_k=3):\n    netout = netout[0]\n    ordered = np.argsort(netout)\n    results = [(i, netout[i]) for i in ordered[-top_k:][::-1]]\n    return results\n\ndef decode_segnet(netout, labels, class_colors):\n    netout = netout[0] \n\n    seg_arr = netout.argmax(axis=2)\n\n    seg_img = np.zeros((netout.shape[0], netout.shape[1], 3))\n\n    for c in range(len(labels)):\n        seg_img[:, :, 0] += ((seg_arr[:, :] == c)*(class_colors[c][0])).astype('uint8')\n        seg_img[:, :, 1] += ((seg_arr[:, :] == c)*(class_colors[c][1])).astype('uint8')\n        seg_img[:, :, 2] += ((seg_arr[:, :] == c)*(class_colors[c][2])).astype('uint8')\n\n    return seg_img\n\ndef get_legends(class_names, colors):\n\n    n_classes = len(class_names)\n    legend = np.zeros(((len(class_names) * 25), 150, 3), dtype=\"uint8\") + 255\n\n    for (i, (class_name, color)) in enumerate(zip(class_names.values() , colors)):\n        color = [int(c) for c in color]\n        cv2.putText(legend, class_name, (5, (i * 25) + 17),cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 0), 1)\n        cv2.rectangle(legend, (125, (i * 25)), (150, (i * 25) + 25), tuple(color), -1)\n\n    return legend \n\ndef overlay_seg_image(inp_img, seg_img):\n    orininal_h = inp_img.shape[0]\n    orininal_w = inp_img.shape[1]\n    seg_img = cv2.resize(seg_img, (orininal_w, orininal_h))\n\n    fused_img = (inp_img/2 + seg_img/2 ).astype('uint8')\n    return fused_img \n\ndef concat_lenends(seg_img, legend_img):\n    \n    seg_img[:legend_img.shape[0],:legend_img.shape[1]] = np.copy(legend_img)\n\n    return seg_img\n\ndef _sigmoid(x):\n    return 1. / (1. + np.exp(-x))\n\ndef _softmax(x, axis=-1, t=-100.):\n    x = x - np.max(x)\n    if np.min(x) < t:\n        x = x/np.min(x)*t\n    e_x = np.exp(x)\n    return e_x / e_x.sum(axis, keepdims=True)\n\ndef resize_with_aspect_ratio(frame: np.ndarray, input_binding_info: tuple):\n    \"\"\"\n    Resizes frame while maintaining aspect ratio, padding any empty space.\n\n    Args:\n        frame: Captured frame.\n        input_binding_info: Contains shape of model input layer.\n\n    Returns:\n        Frame resized to the size of model input layer.\n    \"\"\"\n    aspect_ratio = frame.shape[1] / frame.shape[0]\n    model_height, model_width = list(input_binding_info[1].GetShape())[1:3]\n\n    if aspect_ratio >= 1.0:\n        new_height, new_width = int(model_width / aspect_ratio), model_width\n        b_padding, r_padding = model_height - new_height, 0\n    else:\n        new_height, new_width = model_height, int(model_height * aspect_ratio)\n        b_padding, r_padding = 0, model_width - new_width\n\n    # Resize and pad any empty space\n    frame = cv2.resize(frame, (new_width, new_height), interpolation=cv2.INTER_LINEAR)\n    frame = cv2.copyMakeBorder(frame, top=0, bottom=b_padding, left=0, right=r_padding,\n                               borderType=cv2.BORDER_CONSTANT, value=[0, 0, 0])\n    return frame\n\n\ndef create_video_writer(video, video_path, output_name):\n    \"\"\"\n    Creates a video writer object to write processed frames to file.\n\n    Args:\n        video: Video capture object, contains information about data source.\n        video_path: User-specified video file path.\n        output_path: Optional path to save the processed video.\n\n    Returns:\n        Video writer object.\n    \"\"\"\n    _, ext = os.path.splitext(video_path)\n\n    i, filename = 0, output_name + ext\n    while os.path.exists(filename):\n        i += 1\n        filename = output_name + str(i) + ext\n\n    video_writer = cv2.VideoWriter(filename=filename,\n                                   fourcc=get_source_encoding_int(video),\n                                   fps=int(video.get(cv2.CAP_PROP_FPS)),\n                                   frameSize=(int(video.get(cv2.CAP_PROP_FRAME_WIDTH)),\n                                              int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))))\n    return video_writer\n\n\ndef init_video_file_capture(video_path, output_name):\n    \"\"\"\n    Creates a video capture object from a video file.\n\n    Args:\n        video_path: User-specified video file path.\n        output_path: Optional path to save the processed video.\n\n    Returns:\n        Video capture object to capture frames, video writer object to write processed\n        frames to file, plus total frame count of video source to iterate through.\n    \"\"\"\n    if not os.path.exists(video_path):\n        raise FileNotFoundError(f'Video file not found for: {video_path}')\n    video = cv2.VideoCapture(video_path)\n    if not video.isOpened:\n        raise RuntimeError(f'Failed to open video capture from file: {video_path}')\n\n    video_writer = create_video_writer(video, video_path, output_name)\n    iter_frame_count = range(int(video.get(cv2.CAP_PROP_FRAME_COUNT)))\n\n    return video, video_writer, iter_frame_count\n\ndef draw_bounding_boxes(frame, detections, labels=None, processing_function=None):\n    \"\"\"\n    Draws bounding boxes around detected objects and adds a label and confidence score.\n\n    Args:\n        frame: The original captured frame from video source.\n        detections: A list of detected objects in the form [class, [box positions], confidence].\n        resize_factor: Resizing factor to scale box coordinates to output frame size.\n        labels: Dictionary of labels and colors keyed on the classification index.\n    \"\"\"\n    def _to_original_scale(boxes, frame_height, frame_width):\n        minmax_boxes = np.empty(shape=(4, ), dtype=np.int)\n\n        cx = boxes[0] * frame_width\n        cy = boxes[1] * frame_height\n        w = boxes[2] * frame_width\n        h = boxes[3] * frame_height\n        \n        minmax_boxes[0] = cx - w/2\n        minmax_boxes[1] = cy - h/2\n        minmax_boxes[2] = cx + w/2\n        minmax_boxes[3] = cy + h/2\n\n        return minmax_boxes\n\n    color = (0, 255, 0)\n    label_color = (125, 125, 125)\n\n    for i in range(len(detections)):\n        class_idx, box, confidence = [d for d in detections[i]]\n\n        # Obtain frame size and resized bounding box positions\n        frame_height, frame_width = frame.shape[:2]\n\n        x_min, y_min, x_max, y_max = _to_original_scale(box, frame_height, frame_width)\n        # Ensure box stays within the frame\n        x_min, y_min = max(0, x_min), max(0, y_min)\n        x_max, y_max = min(frame_width, x_max), min(frame_height, y_max)\n\n        # Draw bounding box around detected object\n        cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2)\n\n        if processing_function:\n            roi_img = frame[y_min:y_max, x_min:x_max]\n            label = processing_function(roi_img)\n        else:\n            # Create label for detected object class\n            label = labels[class_idx].capitalize() \n            label = f'{label} {confidence * 100:.1f}%'\n\n        # Make sure label always stays on-screen\n        x_text, y_text = cv2.getTextSize(label, cv2.FONT_HERSHEY_DUPLEX, 1, 1)[0][:2]\n\n        lbl_box_xy_min = (x_min, y_min if y_min<25 else y_min - y_text)\n        lbl_box_xy_max = (x_min + int(0.55 * x_text), y_min + y_text if y_min<25 else y_min)\n        lbl_text_pos = (x_min + 5, y_min + 16 if y_min<25 else y_min - 5)\n\n        # Add label and confidence value\n        cv2.rectangle(frame, lbl_box_xy_min, lbl_box_xy_max, color, -1)\n        cv2.putText(frame, label, lbl_text_pos, cv2.FONT_HERSHEY_DUPLEX, 0.50, label_color, 1, cv2.LINE_AA)\n\ndef draw_classification(frame, classifications, labels):\n\n    for i in range(len(classifications)):\n        label_id, prob = classifications[i]\n        text = '%s : %.2f' % (labels[label_id], prob)\n        cv2.putText(frame, text, (10, 20*i+20), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2, True)\n\ndef get_source_encoding_int(video_capture):\n    return int(video_capture.get(cv2.CAP_PROP_FOURCC))\n\nclass BoundBox:\n    def __init__(self, x, y, w, h, c = None, classes = None):\n        self.x     = x\n        self.y     = y\n        self.w     = w\n        self.h     = h\n        \n        self.c     = c\n        self.classes = classes\n\n    def get_label(self):\n        return np.argmax(self.classes)\n    \n    def get_score(self):\n        return self.classes[self.get_label()]\n    \n    def iou(self, bound_box):\n        b1 = self.as_centroid()\n        b2 = bound_box.as_centroid()\n        return centroid_box_iou(b1, b2)\n\n    def as_centroid(self):\n        return np.array([self.x, self.y, self.w, self.h])\n    \n\ndef boxes_to_array(bound_boxes):\n    \"\"\"\n    # Args\n        boxes : list of BoundBox instances\n    \n    # Returns\n        centroid_boxes : (N, 4)\n        probs : (N, nb_classes)\n    \"\"\"\n    temp_list = []\n    for box in bound_boxes:\n        temp_list.append([np.argmax(box.classes), np.asarray([box.x, box.y, box.w, box.h]), np.max(box.classes)])\n\n    return np.array(temp_list)\n\n\ndef nms_boxes(boxes, n_classes, nms_threshold=0.3, obj_threshold=0.3):\n    \"\"\"\n    # Args\n        boxes : list of BoundBox\n    \n    # Returns\n        boxes : list of BoundBox\n            non maximum supressed BoundBox instances\n    \"\"\"\n    # suppress non-maximal boxes\n    for c in range(n_classes):\n        sorted_indices = list(reversed(np.argsort([box.classes[c] for box in boxes])))\n\n        for i in range(len(sorted_indices)):\n            index_i = sorted_indices[i]\n            \n            if boxes[index_i].classes[c] == 0: \n                continue\n            else:\n                for j in range(i+1, len(sorted_indices)):\n                    index_j = sorted_indices[j]\n\n                    if boxes[index_i].iou(boxes[index_j]) >= nms_threshold:\n                        boxes[index_j].classes[c] = 0\n    # remove the boxes which are less likely than a obj_threshold\n    boxes = [box for box in boxes if box.get_score() > obj_threshold]\n    return boxes\n\ndef centroid_box_iou(box1, box2):\n    def _interval_overlap(interval_a, interval_b):\n        x1, x2 = interval_a\n        x3, x4 = interval_b\n    \n        if x3 < x1:\n            if x4 < x1:\n                return 0\n            else:\n                return min(x2,x4) - x1\n        else:\n            if x2 < x3:\n                return 0\n            else:\n                return min(x2,x4) - x3\n    \n    _, _, w1, h1 = box1.reshape(-1,)\n    _, _, w2, h2 = box2.reshape(-1,)\n    x1_min, y1_min, x1_max, y1_max = to_minmax(box1.reshape(-1,4)).reshape(-1,)\n    x2_min, y2_min, x2_max, y2_max = to_minmax(box2.reshape(-1,4)).reshape(-1,)\n            \n    intersect_w = _interval_overlap([x1_min, x1_max], [x2_min, x2_max])\n    intersect_h = _interval_overlap([y1_min, y1_max], [y2_min, y2_max])\n    intersect = intersect_w * intersect_h\n    union = w1 * h1 + w2 * h2 - intersect\n    \n    return float(intersect) / union\n\ndef to_minmax(centroid_boxes):\n    centroid_boxes = centroid_boxes.astype(np.float)\n    minmax_boxes = np.zeros_like(centroid_boxes)\n    \n    cx = centroid_boxes[:,0]\n    cy = centroid_boxes[:,1]\n    w = centroid_boxes[:,2]\n    h = centroid_boxes[:,3]\n    \n    minmax_boxes[:,0] = cx - w/2\n    minmax_boxes[:,1] = cy - h/2\n    minmax_boxes[:,2] = cx + w/2\n    minmax_boxes[:,3] = cy + h/2\n    return minmax_boxes"
  },
  {
    "path": "example_scripts/tensorflow_lite/segnet/segnet_file.py",
    "content": "import time\nimport argparse\nimport os\nimport cv2\nimport numpy as np\nfrom tqdm import tqdm\n\nimport random\nrandom.seed(0)\n\nfrom cv_utils import init_video_file_capture, decode_segnet, get_legends, overlay_seg_image, concat_lenends, preprocess\nfrom tflite_runtime.interpreter import Interpreter\n\ndef load_labels(path):\n    with open(path, 'r') as f:\n        return {i: line.strip() for i, line in enumerate(f.read().replace('\"','').split(','))}\n\nclass NetworkExecutor(object):\n\n    def __init__(self, model_file):\n\n        self.interpreter = Interpreter(model_file, num_threads=3)\n        self.interpreter.allocate_tensors()\n        _, self.input_height, self.input_width, _ = self.interpreter.get_input_details()[0]['shape']\n        self.tensor_index = self.interpreter.get_input_details()[0]['index']\n\n    def get_output_tensors(self):\n\n      output_details = self.interpreter.get_output_details()\n      tensor_indices = []\n      tensor_list = []\n\n      for output in output_details:\n            tensor = np.squeeze(self.interpreter.get_tensor(output['index']))\n            tensor_list.append(tensor)\n\n      return tensor_list\n\n    def run(self, image):\n        if image.shape[1:2] != (self.input_height, self.input_width):\n            img = cv2.resize(image, (self.input_width, self.input_height))\n        img = preprocess(img)\n        self.interpreter.set_tensor(self.tensor_index, img)\n        self.interpreter.invoke()\n        return self.get_output_tensors()\n\ndef main(args):\n    video, video_writer, frame_count = init_video_file_capture(args.file, 'segnet_demo')\n\n    if not os.path.exists(args.labels[0]):\n        labels = args.labels\n    else:   \n        labels = load_labels(args.labels[0])\n\n    class_colors = [(random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) for _ in range(256)]\n    legend_img = get_legends(labels, class_colors)\n\n    frame_num = len(frame_count)\n    times = []\n\n    for _ in tqdm(frame_count, desc='Processing frames'):\n        frame_present, frame = video.read()\n        if not frame_present:\n            continue\n\n        start_time = time.time()\n        results = segmentation_network.run(frame)\n        elapsed_ms = (time.time() - start_time) * 1000\n\n        seg_img = decode_segnet(results, labels, class_colors)\n\n        if args.overlay == True:\n            seg_img = overlay_seg_image(frame, seg_img)\n\n        frame = concat_lenends(seg_img, legend_img)\n\n        times.append(elapsed_ms)\n        video_writer.write(frame)\n\n    print('Finished processing frames')\n    video.release(), video_writer.release()\n\n    print(\"Average time(ms): \", sum(times)//frame_num) \n    print(\"FPS: \", 1000.0 / (sum(times)//frame_num)) # FPS = 1 / time to process loop\n\nif __name__ == \"__main__\" :\n\n    print(\"OpenCV version: {}\".format(cv2. __version__))\n\n    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)\n    parser.add_argument('--model', help='File path of .tflite file.', required=True)\n    parser.add_argument('--labels', nargs=\"+\", help='File path of labels file.', required=True)\n    parser.add_argument('--overlay', help='Overlay original image.', default=True)\n    parser.add_argument('--file', help='File path of video file', default=None)\n    args = parser.parse_args()\n\n    segmentation_network = NetworkExecutor(args.model)\n\n    main(args)\n    \n"
  },
  {
    "path": "example_scripts/tensorflow_lite/segnet/segnet_stream.py",
    "content": "import time\nimport argparse\nimport os\nimport cv2\nimport numpy as np\n\nimport random\nrandom.seed(0)\n\nfrom cv_utils import decode_segnet, get_legends, overlay_seg_image, concat_lenends, preprocess\n\nfrom tflite_runtime.interpreter import Interpreter\nfrom flask import Flask, render_template, request, Response\n\napp = Flask (__name__, static_url_path = '')\n\ndef load_labels(path):\n    with open(path, 'r') as f:\n        return {i: line.strip() for i, line in enumerate(f.read().replace('\"','').split(','))}\n\nclass NetworkExecutor(object):\n\n    def __init__(self, model_file):\n\n        self.interpreter = Interpreter(model_file, num_threads=3)\n        self.interpreter.allocate_tensors()\n        _, self.input_height, self.input_width, _ = self.interpreter.get_input_details()[0]['shape']\n        self.tensor_index = self.interpreter.get_input_details()[0]['index']\n\n    def get_output_tensors(self):\n\n      output_details = self.interpreter.get_output_details()\n      tensor_indices = []\n      tensor_list = []\n\n      for output in output_details:\n            tensor = np.squeeze(self.interpreter.get_tensor(output['index']))\n            tensor_list.append(tensor)\n\n      return tensor_list\n\n    def run(self, image):\n        if image.shape[1:2] != (self.input_height, self.input_width):\n            img = cv2.resize(image, (self.input_width, self.input_height))\n        img = preprocess(img)\n        self.interpreter.set_tensor(self.tensor_index, img)\n        self.interpreter.invoke()\n        return self.get_output_tensors()\n\nclass Segnet(NetworkExecutor):\n\n    def __init__(self, label_file, model_file, overlay):\n        super().__init__(model_file)\n\n        if not os.path.exists(label_file):\n            self.labels = [label_file]\n        else:   \n            self.labels = load_labels(label_file)\n\n        self.class_colors = [(random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) for _ in range(256)]\n        self.legend_img = get_legends(self.labels, self.class_colors)\n        self.overlay = overlay \n\n    def segment(self, frame):\n        start_time = time.time()\n        results = self.run(frame)\n        elapsed_ms = (time.time() - start_time) * 1000\n\n        seg_img = decode_segnet(results, self.labels, self.class_colors)\n\n        if args.overlay == True:\n            seg_img = overlay_seg_image(frame, seg_img)\n\n        frame = concat_lenends(seg_img, self.legend_img)\n\n        fps  = 1 / elapsed_ms*1000\n        print(\"Estimated frames per second : {0:.2f} Inference time: {1:.2f}\".format(fps, elapsed_ms))\n\n        return cv2.imencode('.jpg', frame)[1].tobytes()\n\n@app.route(\"/\")\ndef index():\n   return render_template('index.html', name = None)\n\ndef gen(camera):\n    while True:\n        frame = camera.get_frame()\n        image = segnet.segment(frame)\n        yield (b'--frame\\r\\n'+b'Content-Type: image/jpeg\\r\\n\\r\\n' + image + b'\\r\\n')\n\n@app.route('/video_feed')\ndef video_feed():\n    return Response(gen(Camera()), mimetype='multipart/x-mixed-replace; boundary=frame')\n\nparser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)\nparser.add_argument('--model', help='File path of .tflite file.', required=True)\nparser.add_argument('--labels', help='File path of labels file.', required=True)\nparser.add_argument('--overlay', help='Overlay original image.', default=True)\nparser.add_argument('--source', help='picamera or cv', default='cv')\nargs = parser.parse_args()\n\nif args.source == \"cv\":\n    from camera_opencv import Camera\n    source = 0\nelif args.source == \"picamera\":\n    from camera_pi import Camera\n    source = 0\n    \nCamera.set_video_source(source)\n\nsegnet = Segnet(args.labels, args.model, args.overlay)\n\nif __name__ == \"__main__\" :\n   app.run(host = '0.0.0.0', port = 5000, debug = True)\n    \n"
  },
  {
    "path": "example_scripts/tensorflow_lite/segnet/templates/index.html",
    "content": "<html>\n  <head>\n    <title>Video Streaming Demonstration</title>\n  </head>\n  <body>\n    <h1>Tflite Semantic Segmentation Demo</h1>\n    <img src=\"{{ url_for('video_feed') }}\">\n  </body>\n</html>\n"
  },
  {
    "path": "resources/aXeleRate_face_detector.ipynb",
    "content": "{\n  \"nbformat\": 4,\n  \"nbformat_minor\": 0,\n  \"metadata\": {\n    \"colab\": {\n      \"name\": \"aXeleRate_pascal20_detector.ipynb\",\n      \"private_outputs\": true,\n      \"provenance\": [],\n      \"collapsed_sections\": [],\n      \"mount_file_id\": \"1_yhmzOZKns_-h0GwyPu9YAT3K0WQ1PG8\",\n      \"authorship_tag\": \"ABX9TyObcL241uRYx/322b9y47kr\",\n      \"include_colab_link\": true\n    },\n    \"kernelspec\": {\n      \"name\": \"python3\",\n      \"display_name\": \"Python 3\"\n    },\n    \"accelerator\": \"GPU\"\n  },\n  \"cells\": [\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"view-in-github\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"<a href=\\\"https://colab.research.google.com/github/AIWintermuteAI/aXeleRate/blob/dev/resources/aXeleRate_face_detector.ipynb\\\" target=\\\"_parent\\\"><img src=\\\"https://colab.research.google.com/assets/colab-badge.svg\\\" alt=\\\"Open In Colab\\\"/></a>\"\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"hS9yMrWe02WQ\"\n      },\n      \"source\": [\n        \"## PASCAL-VOC Detection model Training and Inference\\n\",\n        \"\\n\",\n        \"In this notebook we will use axelerate, Keras-based framework for AI on the edge, to quickly setup model training and then after training session is completed convert it to .tflite and .kmodel formats.\\n\",\n        \"\\n\",\n        \"First, let's take care of some administrative details. \\n\",\n        \"\\n\",\n        \"1) Before we do anything, make sure you have choosen GPU as Runtime type (in Runtime - > Change Runtime type).\\n\",\n        \"\\n\",\n        \"2) We need to mount Google Drive for saving our model checkpoints and final converted model(s). Press on Mount Google Drive button in Files tab on your left. \\n\",\n        \"\\n\",\n        \"In the next cell we clone axelerate Github repository and import it. \\n\",\n        \"\\n\",\n        \"**It is possible to use pip install or python setup.py install, but in that case you will need to restart the enironment.** Since I'm trying to make the process as streamlined as possibile I'm using sys.path.append for import.\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"y07yAbYbjV2s\"\n      },\n      \"source\": [\n        \"#we need imgaug 0.4 for image augmentations to work properly, see https://stackoverflow.com/questions/62580797/in-colab-doing-image-data-augmentation-with-imgaug-is-not-working-as-intended\\n\",\n        \"!pip uninstall -y imgaug && pip uninstall -y albumentations && pip install imgaug==0.4\\n\",\n        \"!git clone https://github.com/AIWintermuteAI/aXeleRate.git\\n\",\n        \"import sys\\n\",\n        \"sys.path.append('/content/aXeleRate')\\n\",\n        \"from axelerate import setup_training, setup_inference\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"5TBRMPZ83dRL\"\n      },\n      \"source\": [\n        \"At this step you typically need to get the dataset. You can use !wget command to download it from somewhere on the Internet or !cp to copy from My Drive as in this example\\n\",\n        \"```\\n\",\n        \"!cp -r /content/drive/'My Drive'/pascal_20_segmentation.zip .\\n\",\n        \"!unzip --qq pascal_20_segmentation.zip\\n\",\n        \"```\\n\",\n        \"For this notebook we will use PASCAL-VOC 2012 object detection dataset, which you can download here:\\n\",\n        \"\\n\",\n        \"http://host.robots.ox.ac.uk:8080/pascal/VOC/voc2012/index.html#devkit\\n\",\n        \"\\n\",\n        \"I split the dataset into training and validation using a simple Python script. Since most of the models trained with aXeleRate are to be run on embedded devices and thus have memory and latency constraints, the validation images are easier than most of the images in training set. The validation images include one(or many) instance of a particular class, no mixed classes in one image.\\n\",\n        \"\\n\",\n        \"Let's visualize our detection model test dataset. We use img_num=10 to show only first 10 images. Feel free to change the number to None to see all 100 images.\\n\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"_tpsgkGj7d79\"\n      },\n      \"source\": [\n        \"%matplotlib inline\\n\",\n        \"!gdown https://drive.google.com/uc?id=1uQtP-Yct0Uiz7bU7cwl9hJU0AVGkMgGZ  #subset of WideFace dataset\\n\",\n        \"\\n\",\n        \"!unzip --qq WideFace_large.zip\\n\",\n        \"\\n\",\n        \"from axelerate.networks.common_utils.augment import visualize_detection_dataset\\n\",\n        \"\\n\",\n        \"visualize_detection_dataset(img_folder='WideFace_large/imgs_validation', ann_folder='WideFace_large/anns_validation', num_imgs=10, img_size=224, augment=True)\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"S1oqdtbr7VLB\"\n      },\n      \"source\": [\n        \"Next step is defining a config dictionary. Most lines are self-explanatory.\\n\",\n        \"\\n\",\n        \"Type is model frontend - Classifier, Detector or Segnet\\n\",\n        \"\\n\",\n        \"Architecture is model backend (feature extractor) \\n\",\n        \"\\n\",\n        \"- Full Yolo\\n\",\n        \"- Tiny Yolo\\n\",\n        \"- MobileNet1_0\\n\",\n        \"- MobileNet7_5 \\n\",\n        \"- MobileNet5_0 \\n\",\n        \"- MobileNet2_5 \\n\",\n        \"- SqueezeNet\\n\",\n        \"- NASNetMobile\\n\",\n        \"- DenseNet121\\n\",\n        \"- ResNet50\\n\",\n        \"\\n\",\n        \"For more information on anchors, please read here\\n\",\n        \"https://github.com/pjreddie/darknet/issues/568\\n\",\n        \"\\n\",\n        \"Labels are labels present in your dataset.\\n\",\n        \"IMPORTANT: Please, list all the labels present in the dataset.\\n\",\n        \"\\n\",\n        \"object_scale determines how much to penalize wrong prediction of confidence of object predictors\\n\",\n        \"\\n\",\n        \"no_object_scale determines how much to penalize wrong prediction of confidence of non-object predictors\\n\",\n        \"\\n\",\n        \"coord_scale determines how much to penalize wrong position and size predictions (x, y, w, h)\\n\",\n        \"\\n\",\n        \"class_scale determines how much to penalize wrong class prediction\\n\",\n        \"\\n\",\n        \"For converter type you can choose the following:\\n\",\n        \"\\n\",\n        \"'k210', 'tflite_fullint', 'tflite_dynamic', 'edgetpu', 'openvino', 'onnx'\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"uruWpeGRf6Qi\"\n      },\n      \"source\": [\n        \"config = {\\n\",\n        \"        \\\"model\\\":{\\n\",\n        \"            \\\"type\\\":                 \\\"Detector\\\",\\n\",\n        \"            \\\"architecture\\\":         \\\"MobileNet2_5\\\",\\n\",\n        \"            \\\"input_size\\\":           224,\\n\",\n        \"            \\\"anchors\\\":              [0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828],\\n\",\n        \"            \\\"labels\\\":               [\\\"face\\\"],\\n\",\n        \"            \\\"coord_scale\\\" : \\t\\t1.0,\\n\",\n        \"            \\\"class_scale\\\" : \\t\\t1.0,\\n\",\n        \"            \\\"object_scale\\\" : \\t\\t5.0,\\n\",\n        \"            \\\"no_object_scale\\\" : \\t1.0\\n\",\n        \"        },\\n\",\n        \"        \\\"weights\\\" : {\\n\",\n        \"            \\\"full\\\":   \\t\\t\\t\\t\\\"\\\",\\n\",\n        \"            \\\"backend\\\":   \\t\\t    \\\"imagenet\\\"\\n\",\n        \"        },\\n\",\n        \"        \\\"train\\\" : {\\n\",\n        \"            \\\"actual_epoch\\\":         30,\\n\",\n        \"            \\\"train_image_folder\\\":   \\\"WideFace_large/imgs\\\",\\n\",\n        \"            \\\"train_annot_folder\\\":   \\\"WideFace_large/anns\\\",\\n\",\n        \"            \\\"train_times\\\":          1,\\n\",\n        \"            \\\"valid_image_folder\\\":   \\\"WideFace_large/imgs_validation\\\",\\n\",\n        \"            \\\"valid_annot_folder\\\":   \\\"WideFace_large/anns_validation\\\",\\n\",\n        \"            \\\"valid_times\\\":          1,\\n\",\n        \"            \\\"valid_metric\\\":         \\\"mAP\\\",\\n\",\n        \"            \\\"batch_size\\\":           32,\\n\",\n        \"            \\\"learning_rate\\\":        1e-3,\\n\",\n        \"            \\\"saved_folder\\\":   \\t\\tF\\\"/content/drive/MyDrive/WideFace_large\\\",\\n\",\n        \"            \\\"first_trainable_layer\\\": \\\"\\\",\\n\",\n        \"            \\\"augumentation\\\":\\t\\t\\t\\tFalse,\\n\",\n        \"            \\\"is_only_detect\\\" : \\t\\t  False\\n\",\n        \"        },\\n\",\n        \"        \\\"converter\\\" : {\\n\",\n        \"            \\\"type\\\":   \\t\\t\\t\\t[\\\"tflite\\\"]\\n\",\n        \"        }\\n\",\n        \"    }\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"kobC_7gd5mEu\"\n      },\n      \"source\": [\n        \"Let's check what GPU we have been assigned in this Colab session, if any.\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"rESho_T70BWq\"\n      },\n      \"source\": [\n        \"from tensorflow.python.client import device_lib\\n\",\n        \"device_lib.list_local_devices()\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"i0Fc61WrTxh1\"\n      },\n      \"source\": [\n        \"Also, let's open Tensorboard, where we will be able to watch model training progress in real time. Training and validation logs also will be saved in project folder.\\n\",\n        \"Since there are no logs before we start the training, tensorboard will be empty. Refresh it after first epoch.\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"jsGp9JvjTzzp\"\n      },\n      \"source\": [\n        \"%load_ext tensorboard\\n\",\n        \"%tensorboard --logdir logs\\n\",\n        \"!sleep 10\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"cWyKjw-b5_yp\"\n      },\n      \"source\": [\n        \"Finally we start the training by passing config dictionary we have defined earlier to setup_training function. The function will start the training with  Reduce Learning Rate on Plateau and save on best mAP callbacks. Every epoch mAP of the model predictions is measured on the validation dataset. If you have specified the converter type in the config, after the training has stopped the script will convert the best model into the format you have specified in config and save it to the project folder.\\n\",\n        \"\\n\",\n        \"Let's train for one epoch to see how the whole pipeline works.\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"deYD3cwukHsj\"\n      },\n      \"source\": [\n        \"from keras import backend as K \\n\",\n        \"K.clear_session()\\n\",\n        \"model_path = setup_training(config_dict=config)\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"ypTe3GZI619O\"\n      },\n      \"source\": [\n        \"After training it is good to check the actual perfomance of your model by doing inference on your validation dataset and visualizing results. This is exactly what next block does.\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"jE7pTYmZN7Pi\"\n      },\n      \"source\": [\n        \"%matplotlib inline\\n\",\n        \"from keras import backend as K \\n\",\n        \"K.clear_session()\\n\",\n        \"setup_inference(config, model_path)\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"nKsxhdPvzrD8\"\n      },\n      \"source\": [\n        \"If you need to convert trained model to other formats, for example for inference with Edge TPU or OpenCV AI Kit, you can do it with following commands. Specify the converter type, backend and folder with calbiration images(normally your validation image folder).\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"awR7r4ILzrmb\"\n      },\n      \"source\": [\n        \"from axelerate.networks.common_utils.convert import Converter\\n\",\n        \"converter = Converter('openvino', 'MobileNet2_5', 'WideFace_large/imgs_validation')\\n\",\n        \"converter.convert_model(model_path)\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"5YuVe2VD11cd\"\n      },\n      \"source\": [\n        \"Good luck and happy training! Have a look at these articles, that would allow you to get the most of Google Colab or connect to local runtime if there are no GPUs available;\\n\",\n        \"\\n\",\n        \"https://medium.com/@oribarel/getting-the-most-out-of-your-google-colab-2b0585f82403\\n\",\n        \"\\n\",\n        \"https://research.google.com/colaboratory/local-runtimes.html\"\n      ]\n    }\n  ]\n}"
  },
  {
    "path": "resources/aXeleRate_human_segmentation.ipynb",
    "content": "{\n  \"nbformat\": 4,\n  \"nbformat_minor\": 0,\n  \"metadata\": {\n    \"colab\": {\n      \"name\": \"aXeleRate_human_segmentation.ipynb\",\n      \"private_outputs\": true,\n      \"provenance\": [],\n      \"collapsed_sections\": [],\n      \"mount_file_id\": \"101-DJzi5oWG7njbiibTdxgmG67ku_62z\",\n      \"authorship_tag\": \"ABX9TyMYA8L5Gv+PoKfxaPtba9us\",\n      \"include_colab_link\": true\n    },\n    \"kernelspec\": {\n      \"name\": \"python3\",\n      \"display_name\": \"Python 3\"\n    },\n    \"accelerator\": \"GPU\"\n  },\n  \"cells\": [\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"view-in-github\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"<a href=\\\"https://colab.research.google.com/github/AIWintermuteAI/aXeleRate/blob/master/resources/aXeleRate_human_segmentation.ipynb\\\" target=\\\"_parent\\\"><img src=\\\"https://colab.research.google.com/assets/colab-badge.svg\\\" alt=\\\"Open In Colab\\\"/></a>\"\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"hS9yMrWe02WQ\"\n      },\n      \"source\": [\n        \"## Segmentation model Training and Inference\\n\",\n        \"\\n\",\n        \"In this notebook we will use axelerate Keras-based framework for AI on the edge to quickly setup model training and then after training session is completed convert it to .tflite and .kmodel formats.\\n\",\n        \"\\n\",\n        \"First, let's take care of some administrative details. \\n\",\n        \"\\n\",\n        \"1) Before we do anything, make sure you have choosen GPU as Runtime type (in Runtime - > Change Runtime type).\\n\",\n        \"\\n\",\n        \"2) We need to mount Google Drive for saving our model checkpoints and final converted model(s). Press on Mount Google Drive button in Files tab on your left. \\n\",\n        \"\\n\",\n        \"In the next cell we clone axelerate Github repository and import it. \\n\",\n        \"\\n\",\n        \"**It is possible to use pip install or python setup.py install, but in that case you will need to restart the enironment.** Since I'm trying to make the process as streamlined as possibile I'm using sys.path.append for import.\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"y07yAbYbjV2s\"\n      },\n      \"source\": [\n        \"#we need imgaug 0.4 for image augmentations to work properly, see https://stackoverflow.com/questions/62580797/in-colab-doing-image-data-augmentation-with-imgaug-is-not-working-as-intended\\n\",\n        \"!pip uninstall -y imgaug && pip uninstall -y albumentations && pip install imgaug==0.4\\n\",\n        \"!git clone https://github.com/AIWintermuteAI/aXeleRate.git\\n\",\n        \"import sys\\n\",\n        \"sys.path.append('/content/aXeleRate')\\n\",\n        \"from axelerate import setup_training, setup_inference\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"5TBRMPZ83dRL\"\n      },\n      \"source\": [\n        \"At this step you typically need to get the dataset. You can use !wget command to download it from somewhere on the Internet or !cp to copy from My Drive as in this example\\n\",\n        \"```\\n\",\n        \"!cp -r /content/drive/'My Drive'/pascal_20_segmentation.zip .\\n\",\n        \"!unzip --qq pascal_20_segmentation.zip\\n\",\n        \"```\\n\",\n        \"For this notebook we'll download the dataset I shared on Google Drive - it is a combination of two dataset for human image segmentation:\\n\",\n        \"\\n\",\n        \"[Human Segmentation Dataset by Vikram Shenoy](https://github.com/VikramShenoy97/Human-Segmentation-Dataset)\\n\",\n        \"\\n\",\n        \"[Human Parsing Dataset](https://github.com/lemondan/HumanParsing-Dataset)\\n\",\n        \"\\n\",\n        \"For semantic segmentation the dataset consists of RGB images and segmentation masks. \\n\",\n        \"A few things to keep in mind:\\n\",\n        \"\\n\",\n        \"- The filenames of the annotation images should be same as the filenames of the RGB images.\\n\",\n        \"\\n\",\n        \"- The dimensions of the annotation image for the corresponding RGB image should be same.\\n\",\n        \"\\n\",\n        \"- For each pixel in the RGB image, the class label of that pixel in the annotation image would be the value of the annotation image pixel.\\n\",\n        \"\\n\",\n        \"Let's visualize our semantic segmentation test dataset and see what that means in practice.\\n\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"_tpsgkGj7d79\"\n      },\n      \"source\": [\n        \"%matplotlib inline\\n\",\n        \"!gdown https://drive.google.com/uc?id=1NlKgS_GVusRhEFLqwm0EOP2i74z1JMHX\\n\",\n        \"!gdown https://drive.google.com/uc?id=18z2MLv9M6ARVE1KTHyoAqJQZOfSJWc57\\n\",\n        \"!unzip --qq human_segmentation.zip\\n\",\n        \"\\n\",\n        \"from axelerate.networks.common_utils.augment import visualize_segmentation_dataset\\n\",\n        \"\\n\",\n        \"visualize_segmentation_dataset(images_path = 'human_segmentation/imgs_validation', segs_path = 'human_segmentation/anns_validation', num_imgs = 10, img_size=224, augment=True, n_classes=2)\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"S1oqdtbr7VLB\"\n      },\n      \"source\": [\n        \"Next step is defining a config dictionary. Most lines are self-explanatory.\\n\",\n        \"\\n\",\n        \"Type is model frontend - Classifier, Detector or Segnet\\n\",\n        \"\\n\",\n        \"Architecture is model backend (feature extractor) \\n\",\n        \"\\n\",\n        \"- Full Yolo\\n\",\n        \"- Tiny Yolo\\n\",\n        \"- MobileNet1_0\\n\",\n        \"- MobileNet7_5 \\n\",\n        \"- MobileNet5_0 \\n\",\n        \"- MobileNet2_5 \\n\",\n        \"- SqueezeNet\\n\",\n        \"- NASNetMobile\\n\",\n        \"- ResNet50\\n\",\n        \"- DenseNet121\\n\",\n        \"\\n\",\n        \"For converter type you can choose the following:\\n\",\n        \"\\n\",\n        \"'k210', 'tflite_fullint', 'tflite_dynamic', 'edgetpu', 'openvino', 'onnx'\\n\",\n        \"\\n\",\n        \"**Since it is an example notebook, we will use pretrained weights and set all layers of the model to be \\\"frozen\\\"(non-trainable).** \\n\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"Jw4q6_MsegD2\"\n      },\n      \"source\": [\n        \"config = {\\n\",\n        \"            \\\"model\\\" : {\\n\",\n        \"                \\\"type\\\":                 \\\"SegNet\\\",\\n\",\n        \"                \\\"architecture\\\":         \\\"MobileNet5_0\\\",\\n\",\n        \"                \\\"input_size\\\":           224,\\n\",\n        \"                \\\"n_classes\\\" : \\t\\t2\\n\",\n        \"            },\\n\",\n        \"            \\\"weights\\\" : {\\n\",\n        \"            \\\"full\\\":   \\t\\t\\t\\t\\\"/content/Segnet_best_val_loss.h5\\\",\\n\",\n        \"            \\\"backend\\\":   \\t\\t    \\\"imagenet\\\"\\n\",\n        \"        },\\n\",\n        \"            \\\"train\\\" : {\\n\",\n        \"                \\\"actual_epoch\\\":         1,\\n\",\n        \"                \\\"train_image_folder\\\":   \\\"human_segmentation/imgs\\\",\\n\",\n        \"                \\\"train_annot_folder\\\":   \\\"human_segmentation/anns\\\",\\n\",\n        \"                \\\"train_times\\\":          1,\\n\",\n        \"                \\\"valid_image_folder\\\":   \\\"human_segmentation/imgs_validation\\\",\\n\",\n        \"                \\\"valid_annot_folder\\\":   \\\"human_segmentation/anns_validation\\\",\\n\",\n        \"                \\\"valid_times\\\":          1,\\n\",\n        \"                \\\"valid_metric\\\":         \\\"val_loss\\\",\\n\",\n        \"                \\\"batch_size\\\":           32,\\n\",\n        \"                \\\"learning_rate\\\":        0.0,\\n\",\n        \"                \\\"saved_folder\\\":   \\t\\tF\\\"/content/drive/MyDrive/projects/human_segmentation\\\",\\n\",\n        \"                \\\"first_trainable_layer\\\": \\\"activation\\\",\\n\",\n        \"                \\\"ignore_zero_class\\\":    False,\\n\",\n        \"                \\\"augmentation\\\":\\t\\t\\t\\tTrue\\n\",\n        \"            },\\n\",\n        \"            \\\"converter\\\" : {\\n\",\n        \"                \\\"type\\\":   \\t\\t\\t\\t[]\\n\",\n        \"            }\\n\",\n        \"        }\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"kobC_7gd5mEu\"\n      },\n      \"source\": [\n        \"Let's check what GPU we have been assigned in this Colab session, if any.\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"rESho_T70BWq\"\n      },\n      \"source\": [\n        \"from tensorflow.python.client import device_lib\\n\",\n        \"device_lib.list_local_devices()\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"WB9096YQUQtb\"\n      },\n      \"source\": [\n        \"Also, let's open Tensorboard, where we will be able to watch model training progress in real time. Training and validation logs also will be saved in project folder.\\n\",\n        \"Since there are no logs before we start the training, tensorboard will be empty. Refresh it after first epoch.\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"k6P31xsjUSzi\"\n      },\n      \"source\": [\n        \"%load_ext tensorboard\\n\",\n        \"%tensorboard --logdir logs\\n\",\n        \"!sleep 10\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"cWyKjw-b5_yp\"\n      },\n      \"source\": [\n        \"Finally we start the training by passing config dictionary we have defined earlier to setup_training function. The function will start the training with Checkpoint, Reduce Learning Rate on Plateu and Early Stopping callbacks. If you have specified the converter type in the config, after the training has stopped the script will convert the best model into the format you have specified in config and save it to the project folder.\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"deYD3cwukHsj\"\n      },\n      \"source\": [\n        \"from keras import backend as K \\n\",\n        \"K.clear_session()\\n\",\n        \"model_path = setup_training(config_dict = config)\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"ypTe3GZI619O\"\n      },\n      \"source\": [\n        \"After training it is good to check the actual perfomance of your model by doing inference on your validation dataset and visualizing results. This is exactly what next block does. Our model used pre-trained weights and since we set learning rate to 0, we are just observing the perfomance of the model that was trained before.\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"jE7pTYmZN7Pi\"\n      },\n      \"source\": [\n        \"%matplotlib inline\\n\",\n        \"from keras import backend as K \\n\",\n        \"K.clear_session()\\n\",\n        \"setup_inference(config, model_path)\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"23ByTRGE17g-\"\n      },\n      \"source\": [\n        \"If you need to convert trained model to other formats, for example for inference with OpenCV AI Kit or Raspberry Pi(with quantized tflite model), you can do it with following commands. Specify the converter type, backend and folder with calbiration images(normally your validation image folder).\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"gXtqAape18K0\"\n      },\n      \"source\": [\n        \"from axelerate.networks.common_utils.convert import Converter\\n\",\n        \"converter = Converter('k210', 'MobileNet5_0', 'human_segmentation/imgs_validation')\\n\",\n        \"converter.convert_model(model_path)\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"crJm0Ttw10g1\"\n      },\n      \"source\": [\n        \"To train the model from scratch use the following config and then run the cells with training and (optinally) inference functions again.\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"0r9IKzfQ11UJ\"\n      },\n      \"source\": [\n        \"config = {\\n\",\n        \"            \\\"model\\\" : {\\n\",\n        \"                \\\"type\\\":                 \\\"SegNet\\\",\\n\",\n        \"                \\\"architecture\\\":         \\\"MobileNet5_0\\\",\\n\",\n        \"                \\\"input_size\\\":           224,\\n\",\n        \"                \\\"n_classes\\\" : \\t\\t2\\n\",\n        \"            },\\n\",\n        \"            \\\"weights\\\" : {\\n\",\n        \"            \\\"full\\\":   \\t\\t\\t\\t\\\"\\\",\\n\",\n        \"            \\\"backend\\\":   \\t\\t    \\\"imagenet\\\"\\n\",\n        \"        },\\n\",\n        \"            \\\"train\\\" : {\\n\",\n        \"                \\\"actual_epoch\\\":         100,\\n\",\n        \"                \\\"train_image_folder\\\":   \\\"human_segmentation/imgs\\\",\\n\",\n        \"                \\\"train_annot_folder\\\":   \\\"human_segmentation/anns\\\",\\n\",\n        \"                \\\"train_times\\\":          1,\\n\",\n        \"                \\\"valid_image_folder\\\":   \\\"human_segmentation/imgs_validation\\\",\\n\",\n        \"                \\\"valid_annot_folder\\\":   \\\"human_segmentation/anns_validation\\\",\\n\",\n        \"                \\\"valid_times\\\":          1,\\n\",\n        \"                \\\"valid_metric\\\":         \\\"val_loss\\\",\\n\",\n        \"                \\\"batch_size\\\":           32,\\n\",\n        \"                \\\"learning_rate\\\":        1e-3,\\n\",\n        \"                \\\"saved_folder\\\":   \\t\\tF\\\"/content/drive/MyDrive/projects/human_segmentation\\\",\\n\",\n        \"                \\\"first_trainable_layer\\\": \\\"\\\",\\n\",\n        \"                \\\"ignore_zero_class\\\":    False,\\n\",\n        \"                \\\"augumentation\\\":\\t\\t\\t\\tTrue\\n\",\n        \"            },\\n\",\n        \"            \\\"converter\\\" : {\\n\",\n        \"                \\\"type\\\":   \\t\\t\\t\\t[\\\"k210\\\",\\\"tflite\\\"]\\n\",\n        \"            }\\n\",\n        \"        }\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"uxuW0Bh92FA9\"\n      },\n      \"source\": [\n        \"from keras import backend as K \\n\",\n        \"K.clear_session()\\n\",\n        \"model_path = setup_training(config_dict=config)\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"IK8RLSzA2FKZ\"\n      },\n      \"source\": [\n        \"%matplotlib inline\\n\",\n        \"from keras import backend as K \\n\",\n        \"K.clear_session()\\n\",\n        \"setup_inference(config, model_path)\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"5YuVe2VD11cd\"\n      },\n      \"source\": [\n        \"Good luck and happy training! Have a look at these articles, that would allow you to get the most of Google Colab or connect to local runtime if there are no GPUs available;\\n\",\n        \"\\n\",\n        \"https://medium.com/@oribarel/getting-the-most-out-of-your-google-colab-2b0585f82403\\n\",\n        \"\\n\",\n        \"https://research.google.com/colaboratory/local-runtimes.html\"\n      ]\n    }\n  ]\n}"
  },
  {
    "path": "resources/aXeleRate_mark_detector.ipynb",
    "content": "{\n  \"nbformat\": 4,\n  \"nbformat_minor\": 0,\n  \"metadata\": {\n    \"colab\": {\n      \"name\": \"aXeleRate_mark_detector.ipynb\",\n      \"private_outputs\": true,\n      \"provenance\": [],\n      \"collapsed_sections\": [],\n      \"mount_file_id\": \"1tDQwRgaEZqe_E-7g2kgi9QQ9FNl6e_2w\",\n      \"authorship_tag\": \"ABX9TyOlFv83Dt6/Ug76a0IqmYTT\",\n      \"include_colab_link\": true\n    },\n    \"kernelspec\": {\n      \"name\": \"python3\",\n      \"display_name\": \"Python 3\"\n    },\n    \"accelerator\": \"GPU\"\n  },\n  \"cells\": [\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"view-in-github\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"<a href=\\\"https://colab.research.google.com/github/AIWintermuteAI/aXeleRate/blob/dev/resources/aXeleRate_mark_detector.ipynb\\\" target=\\\"_parent\\\"><img src=\\\"https://colab.research.google.com/assets/colab-badge.svg\\\" alt=\\\"Open In Colab\\\"/></a>\"\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"hS9yMrWe02WQ\"\n      },\n      \"source\": [\n        \"## M.A.R.K. Detection model Training and Inference\\n\",\n        \"\\n\",\n        \"In this notebook we will use axelerate, Keras-based framework for AI on the edge, to quickly setup model training and then after training session is completed convert it to .tflite and .kmodel formats.\\n\",\n        \"\\n\",\n        \"First, let's take care of some administrative details. \\n\",\n        \"\\n\",\n        \"1) Before we do anything, make sure you have choosen GPU as Runtime type (in Runtime - > Change Runtime type).\\n\",\n        \"\\n\",\n        \"2) We need to mount Google Drive for saving our model checkpoints and final converted model(s). Press on Mount Google Drive button in Files tab on your left. \\n\",\n        \"\\n\",\n        \"In the next cell we clone axelerate Github repository and import it. \\n\",\n        \"\\n\",\n        \"**It is possible to use pip install or python setup.py install, but in that case you will need to restart the enironment.** Since I'm trying to make the process as streamlined as possibile I'm using sys.path.append for import.\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"y07yAbYbjV2s\"\n      },\n      \"source\": [\n        \"%load_ext tensorboard\\n\",\n        \"#we need imgaug 0.4 for image augmentations to work properly, see https://stackoverflow.com/questions/62580797/in-colab-doing-image-data-augmentation-with-imgaug-is-not-working-as-intended\\n\",\n        \"!pip uninstall -y imgaug && pip uninstall -y albumentations && pip install imgaug==0.4\\n\",\n        \"!git clone https://github.com/AIWintermuteAI/aXeleRate.git\\n\",\n        \"import sys\\n\",\n        \"sys.path.append('/content/aXeleRate')\\n\",\n        \"from axelerate import setup_training, setup_inference\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"5TBRMPZ83dRL\"\n      },\n      \"source\": [\n        \"At this step you typically need to get the dataset. You can use !wget command to download it from somewhere on the Internet or !cp to copy from My Drive as in this example\\n\",\n        \"```\\n\",\n        \"!cp -r /content/drive/'My Drive'/pascal_20_segmentation.zip .\\n\",\n        \"!unzip --qq pascal_20_segmentation.zip\\n\",\n        \"```\\n\",\n        \"Dataset preparation and postprocessing are discussed in the article here:\\n\",\n        \"\\n\",\n        \"The annotation tool I use is LabelImg\\n\",\n        \"https://github.com/tzutalin/labelImg\\n\",\n        \"\\n\",\n        \"Let's visualize our detection model test dataset. There are images in validation folder with corresponding annotations in PASCAL-VOC format in validation annotations folder.\\n\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"_tpsgkGj7d79\"\n      },\n      \"source\": [\n        \"%matplotlib inline\\n\",\n        \"!gdown https://drive.google.com/uc?id=1s2h6DI_1tHpLoUWRc_SavvMF9jYG8XSi #dataset\\n\",\n        \"!gdown https://drive.google.com/uc?id=1-bDRZ9Z2T81SfwhHEfZIMFG7FtMQ5ZiZ #pre-trained model\\n\",\n        \"\\n\",\n        \"!unzip --qq mark_dataset.zip\\n\",\n        \"\\n\",\n        \"from axelerate.networks.common_utils.augment import visualize_detection_dataset\\n\",\n        \"\\n\",\n        \"visualize_detection_dataset(img_folder='mark_detection/imgs_validation', ann_folder='mark_detection/ann_validation', num_imgs=10, img_size=224, augment=True)\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"S1oqdtbr7VLB\"\n      },\n      \"source\": [\n        \"Next step is defining a config dictionary. Most lines are self-explanatory.\\n\",\n        \"\\n\",\n        \"Type is model frontend - Classifier, Detector or Segnet\\n\",\n        \"\\n\",\n        \"Architecture is model backend (feature extractor) \\n\",\n        \"\\n\",\n        \"- Full Yolo\\n\",\n        \"- Tiny Yolo\\n\",\n        \"- MobileNet1_0\\n\",\n        \"- MobileNet7_5 \\n\",\n        \"- MobileNet5_0 \\n\",\n        \"- MobileNet2_5 \\n\",\n        \"- SqueezeNet\\n\",\n        \"- NASNetMobile\\n\",\n        \"- DenseNet121\\n\",\n        \"- ResNet50\\n\",\n        \"\\n\",\n        \"For more information on anchors, please read here\\n\",\n        \"https://github.com/pjreddie/darknet/issues/568\\n\",\n        \"\\n\",\n        \"Labels are labels present in your dataset.\\n\",\n        \"IMPORTANT: Please, list all the labels present in the dataset.\\n\",\n        \"\\n\",\n        \"object_scale determines how much to penalize wrong prediction of confidence of object predictors\\n\",\n        \"\\n\",\n        \"no_object_scale determines how much to penalize wrong prediction of confidence of non-object predictors\\n\",\n        \"\\n\",\n        \"coord_scale determines how much to penalize wrong position and size predictions (x, y, w, h)\\n\",\n        \"\\n\",\n        \"class_scale determines how much to penalize wrong class prediction\\n\",\n        \"\\n\",\n        \"For converter type you can choose the following:\\n\",\n        \"\\n\",\n        \"'k210', 'tflite_fullint', 'tflite_dynamic', 'edgetpu', 'openvino', 'onnx'\\n\"\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"EkASgMdcj3Nu\"\n      },\n      \"source\": [\n        \"## Parameters for Person Detection\\n\",\n        \"\\n\",\n        \"K210, which is where we will run the network, has constrained memory (5.5 RAM) available, so with Micropython firmware, the largest model you can run is about 2 MB, which limits our architecture choice to Tiny Yolo, MobileNet(up to 0.75 alpha) and SqueezeNet. Out of these 3 architectures, only one comes with pre-trained model - MobileNet. So, to save the training time we will use Mobilenet with alpha 0.75, which has ... parameters. For objects that do not have that much variety, you can use MobileNet with lower alpha, down to 0.25.\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"Jw4q6_MsegD2\"\n      },\n      \"source\": [\n        \"config = {\\n\",\n        \"        \\\"model\\\":{\\n\",\n        \"            \\\"type\\\":                 \\\"Detector\\\",\\n\",\n        \"            \\\"architecture\\\":         \\\"MobileNet5_0\\\",\\n\",\n        \"            \\\"input_size\\\":           224,\\n\",\n        \"            \\\"anchors\\\":              [0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828],\\n\",\n        \"            \\\"labels\\\":               [\\\"mark\\\"],\\n\",\n        \"            \\\"coord_scale\\\" : \\t\\t1.0,\\n\",\n        \"            \\\"class_scale\\\" : \\t\\t1.0,\\n\",\n        \"            \\\"object_scale\\\" : \\t\\t5.0,\\n\",\n        \"            \\\"no_object_scale\\\" : \\t1.0\\n\",\n        \"        },\\n\",\n        \"        \\\"weights\\\" : {\\n\",\n        \"            \\\"full\\\":   \\t\\t\\t\\t\\\"\\\",\\n\",\n        \"            \\\"backend\\\":   \\t\\t    \\\"imagenet\\\"\\n\",\n        \"        },\\n\",\n        \"        \\\"train\\\" : {\\n\",\n        \"            \\\"actual_epoch\\\":         50,\\n\",\n        \"            \\\"train_image_folder\\\":   \\\"mark_detection/imgs\\\",\\n\",\n        \"            \\\"train_annot_folder\\\":   \\\"mark_detection/ann\\\",\\n\",\n        \"            \\\"train_times\\\":          1,\\n\",\n        \"            \\\"valid_image_folder\\\":   \\\"mark_detection/imgs_validation\\\",\\n\",\n        \"            \\\"valid_annot_folder\\\":   \\\"mark_detection/ann_validation\\\",\\n\",\n        \"            \\\"valid_times\\\":          1,\\n\",\n        \"            \\\"valid_metric\\\":         \\\"mAP\\\",\\n\",\n        \"            \\\"batch_size\\\":           32,\\n\",\n        \"            \\\"learning_rate\\\":        1e-3,\\n\",\n        \"            \\\"saved_folder\\\":   \\t\\tF\\\"/content/drive/MyDrive/mark_detector\\\",\\n\",\n        \"            \\\"first_trainable_layer\\\": \\\"\\\",\\n\",\n        \"            \\\"augumentation\\\":\\t\\t\\t\\tTrue,\\n\",\n        \"            \\\"is_only_detect\\\" : \\t\\tFalse\\n\",\n        \"        },\\n\",\n        \"        \\\"converter\\\" : {\\n\",\n        \"            \\\"type\\\":   \\t\\t\\t\\t[\\\"k210\\\",\\\"tflite\\\"]\\n\",\n        \"        }\\n\",\n        \"    }\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"kobC_7gd5mEu\"\n      },\n      \"source\": [\n        \"Let's check what GPU we have been assigned in this Colab session, if any.\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"rESho_T70BWq\"\n      },\n      \"source\": [\n        \"from tensorflow.python.client import device_lib\\n\",\n        \"device_lib.list_local_devices()\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"-oJ6i53GG-I0\"\n      },\n      \"source\": [\n        \"Also, let's open Tensorboard, where we will be able to watch model training progress in real time. Training and validation logs also will be saved in project folder.\\n\",\n        \"Since there are no logs before we start the training, tensorboard will be empty. Refresh it after first epoch.\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"d8l_DDM4G_aK\"\n      },\n      \"source\": [\n        \"%tensorboard --logdir logs\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"cWyKjw-b5_yp\"\n      },\n      \"source\": [\n        \"Finally we start the training by passing config dictionary we have defined earlier to setup_training function. The function will start the training with Checkpoint, Reduce Learning Rate on Plateau and Early Stopping callbacks. After the training has stopped, it will convert the best model into the format you have specified in config and save it to the project folder.\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"deYD3cwukHsj\"\n      },\n      \"source\": [\n        \"from keras import backend as K \\n\",\n        \"K.clear_session()\\n\",\n        \"model_path = setup_training(config_dict=config)\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"ypTe3GZI619O\"\n      },\n      \"source\": [\n        \"After training it is good to check the actual perfomance of your model by doing inference on your validation dataset and visualizing results. This is exactly what next block does. Obviously since our model has only trained on a few images the results are far from stellar, but if you have a good dataset, you'll have better results.\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"jE7pTYmZN7Pi\"\n      },\n      \"source\": [\n        \"from keras import backend as K \\n\",\n        \"K.clear_session()\\n\",\n        \"setup_inference(config, model_path)\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"5YuVe2VD11cd\"\n      },\n      \"source\": [\n        \"My end results are:\\n\",\n        \"\\n\",\n        \"{'fscore': 0.942528735632184, 'precision': 0.9318181818181818, 'recall': 0.9534883720930233}\\n\",\n        \"\\n\",\n        \"**You can obtain these results by loading a pre-trained model.**\\n\",\n        \"\\n\",\n        \"Good luck and happy training! Have a look at these articles, that would allow you to get the most of Google Colab or connect to local runtime if there are no GPUs available;\\n\",\n        \"\\n\",\n        \"https://medium.com/@oribarel/getting-the-most-out-of-your-google-colab-2b0585f82403\\n\",\n        \"\\n\",\n        \"https://research.google.com/colaboratory/local-runtimes.html\"\n      ]\n    }\n  ]\n}"
  },
  {
    "path": "resources/aXeleRate_pascal20_detector.ipynb",
    "content": "{\n  \"nbformat\": 4,\n  \"nbformat_minor\": 0,\n  \"metadata\": {\n    \"colab\": {\n      \"name\": \"aXeleRate_pascal20_detector.ipynb\",\n      \"private_outputs\": true,\n      \"provenance\": [],\n      \"collapsed_sections\": [],\n      \"mount_file_id\": \"1_yhmzOZKns_-h0GwyPu9YAT3K0WQ1PG8\",\n      \"authorship_tag\": \"ABX9TyPUzrsszS4m23mnB7AcN0I9\",\n      \"include_colab_link\": true\n    },\n    \"kernelspec\": {\n      \"name\": \"python3\",\n      \"display_name\": \"Python 3\"\n    },\n    \"accelerator\": \"GPU\"\n  },\n  \"cells\": [\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"view-in-github\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"<a href=\\\"https://colab.research.google.com/github/AIWintermuteAI/aXeleRate/blob/master/resources/aXeleRate_pascal20_detector.ipynb\\\" target=\\\"_parent\\\"><img src=\\\"https://colab.research.google.com/assets/colab-badge.svg\\\" alt=\\\"Open In Colab\\\"/></a>\"\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"hS9yMrWe02WQ\"\n      },\n      \"source\": [\n        \"## PASCAL-VOC Detection model Training and Inference\\n\",\n        \"\\n\",\n        \"In this notebook we will use axelerate, Keras-based framework for AI on the edge, to quickly setup model training and then after training session is completed convert it to .tflite and .kmodel formats.\\n\",\n        \"\\n\",\n        \"First, let's take care of some administrative details. \\n\",\n        \"\\n\",\n        \"1) Before we do anything, make sure you have choosen GPU as Runtime type (in Runtime - > Change Runtime type).\\n\",\n        \"\\n\",\n        \"2) We need to mount Google Drive for saving our model checkpoints and final converted model(s). Press on Mount Google Drive button in Files tab on your left. \\n\",\n        \"\\n\",\n        \"In the next cell we clone axelerate Github repository and import it. \\n\",\n        \"\\n\",\n        \"**It is possible to use pip install or python setup.py install, but in that case you will need to restart the enironment.** Since I'm trying to make the process as streamlined as possibile I'm using sys.path.append for import.\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"y07yAbYbjV2s\"\n      },\n      \"source\": [\n        \"#we need imgaug 0.4 for image augmentations to work properly, see https://stackoverflow.com/questions/62580797/in-colab-doing-image-data-augmentation-with-imgaug-is-not-working-as-intended\\n\",\n        \"!pip uninstall -y imgaug && pip uninstall -y albumentations && pip install imgaug==0.4\\n\",\n        \"!git clone https://github.com/AIWintermuteAI/aXeleRate.git\\n\",\n        \"import sys\\n\",\n        \"sys.path.append('/content/aXeleRate')\\n\",\n        \"from axelerate import setup_training, setup_inference, setup_evaluation\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"5TBRMPZ83dRL\"\n      },\n      \"source\": [\n        \"At this step you typically need to get the dataset. You can use !wget command to download it from somewhere on the Internet or !cp to copy from My Drive as in this example\\n\",\n        \"```\\n\",\n        \"!cp -r /content/drive/'My Drive'/pascal_20_segmentation.zip .\\n\",\n        \"!unzip --qq pascal_20_segmentation.zip\\n\",\n        \"```\\n\",\n        \"For this notebook we will use PASCAL-VOC 2012 object detection dataset, which you can download here:\\n\",\n        \"\\n\",\n        \"http://host.robots.ox.ac.uk:8080/pascal/VOC/voc2012/index.html#devkit\\n\",\n        \"\\n\",\n        \"I split the dataset into training and validation using a simple Python script. Since most of the models trained with aXeleRate are to be run on embedded devices and thus have memory and latency constraints, the validation images are easier than most of the images in training set. The validation images include one(or many) instance of a particular class, no mixed classes in one image.\\n\",\n        \"\\n\",\n        \"Let's visualize our detection model test dataset. We use img_num=10 to show only first 10 images. Feel free to change the number to None to see all 100 images.\\n\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"_tpsgkGj7d79\"\n      },\n      \"source\": [\n        \"%matplotlib inline\\n\",\n        \"!gdown https://drive.google.com/uc?id=1xgk7svdjBiEyzyUVoZrCz4PP6dSjVL8S  #pascal-voc dataset\\n\",\n        \"!gdown https://drive.google.com/uc?id=1-2jYfTRPX4kSUTL5SUQVxwHKjBclrBTA  #pre-trained model\\n\",\n        \"!unzip --qq pascal_20_detection.zip\\n\",\n        \"\\n\",\n        \"from axelerate.networks.common_utils.augment import visualize_detection_dataset\\n\",\n        \"\\n\",\n        \"visualize_detection_dataset(img_folder='pascal_20_detection/imgs_validation', ann_folder='pascal_20_detection/anns_validation', num_imgs=10, img_size=320, augment=True)\\n\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"S1oqdtbr7VLB\"\n      },\n      \"source\": [\n        \"Next step is defining a config dictionary. Most lines are self-explanatory.\\n\",\n        \"\\n\",\n        \"Type is model frontend - Classifier, Detector or Segnet\\n\",\n        \"\\n\",\n        \"Architecture is model backend (feature extractor) \\n\",\n        \"\\n\",\n        \"- Full Yolo\\n\",\n        \"- Tiny Yolo\\n\",\n        \"- MobileNet1_0\\n\",\n        \"- MobileNet7_5 \\n\",\n        \"- MobileNet5_0 \\n\",\n        \"- MobileNet2_5 \\n\",\n        \"- SqueezeNet\\n\",\n        \"- NASNetMobile\\n\",\n        \"- DenseNet121\\n\",\n        \"- ResNet50\\n\",\n        \"\\n\",\n        \"Currently only MobileNet backends available for YOLOv3 detector. I'm working on backend (feature exctractor) overhaul.\\n\",\n        \"\\n\",\n        \"For more information on anchors, please read here\\n\",\n        \"https://github.com/pjreddie/darknet/issues/568\\n\",\n        \"\\n\",\n        \"Labels are labels present in your dataset.\\n\",\n        \"IMPORTANT: Please, list all the labels present in the dataset.\\n\",\n        \"\\n\",\n        \"object_scale determines how much to penalize wrong prediction of confidence of object predictors\\n\",\n        \"\\n\",\n        \"no_object_scale determines how much to penalize wrong prediction of confidence of non-object predictors\\n\",\n        \"\\n\",\n        \"coord_scale determines how much to penalize wrong position and size predictions (x, y, w, h)\\n\",\n        \"\\n\",\n        \"obj_thresh, nms_threshold set detection confidence threshold and nms thresholds to be used when calcualting precision/recall\\n\",\n        \"\\n\",\n        \"For converter type you can choose the following:\\n\",\n        \"\\n\",\n        \"'k210', 'tflite_fullint', 'tflite_dynamic', 'edgetpu', 'openvino', 'onnx'\\n\",\n        \"\\n\",\n        \"**Since it is an example notebook, we will use pretrained weights and set learning rate to 0.0** \"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"Jw4q6_MsegD2\"\n      },\n      \"source\": [\n        \"config = {\\n\",\n        \"        \\\"model\\\":{\\n\",\n        \"            \\\"type\\\":                 \\\"Detector\\\",\\n\",\n        \"            \\\"architecture\\\":         \\\"MobileNet1_0\\\",\\n\",\n        \"            \\\"input_size\\\":           [224, 320],\\n\",\n        \"            \\\"anchors\\\":              [[[0.76120044, 0.57155991], [0.6923348, 0.88535553], [0.47163042, 0.34163313]],\\n\",\n        \"                                    [[0.33340788, 0.70065861], [0.18124964, 0.38986752], [0.08497349, 0.1527057 ]]],\\n\",\n        \"            \\\"labels\\\":               [\\\"person\\\", \\\"bird\\\", \\\"cat\\\", \\\"cow\\\", \\\"dog\\\", \\\"horse\\\", \\\"sheep\\\", \\\"aeroplane\\\", \\\"bicycle\\\", \\\"boat\\\", \\\"bus\\\", \\\"car\\\", \\\"motorbike\\\", \\\"train\\\",\\\"bottle\\\", \\\"chair\\\", \\\"diningtable\\\", \\\"pottedplant\\\", \\\"sofa\\\", \\\"tvmonitor\\\"],\\n\",\n        \"            \\\"obj_thresh\\\" : \\t\\t    0.7,\\n\",\n        \"            \\\"iou_thresh\\\" : \\t\\t    0.5,\\n\",\n        \"            \\\"coord_scale\\\" : \\t\\t  1.0,\\n\",\n        \"            \\\"object_scale\\\" : \\t\\t  3.0,            \\n\",\n        \"            \\\"no_object_scale\\\" : \\t1.0\\n\",\n        \"        },\\n\",\n        \"        \\\"weights\\\" : {\\n\",\n        \"            \\\"full\\\":   \\t\\t\\t\\t  \\\"/content/yolo_best_recall.h5\\\",\\n\",\n        \"            \\\"backend\\\":   \\t\\t    \\\"imagenet\\\"\\n\",\n        \"        },\\n\",\n        \"        \\\"train\\\" : {\\n\",\n        \"            \\\"actual_epoch\\\":         1,\\n\",\n        \"            \\\"train_image_folder\\\":   \\\"pascal_20_detection/imgs\\\",\\n\",\n        \"            \\\"train_annot_folder\\\":   \\\"pascal_20_detection/anns\\\",\\n\",\n        \"            \\\"train_times\\\":          1,\\n\",\n        \"            \\\"valid_image_folder\\\":   \\\"pascal_20_detection/imgs_validation\\\",\\n\",\n        \"            \\\"valid_annot_folder\\\":   \\\"pascal_20_detection/anns_validation\\\",\\n\",\n        \"            \\\"valid_times\\\":          1,\\n\",\n        \"            \\\"valid_metric\\\":         \\\"recall\\\",\\n\",\n        \"            \\\"batch_size\\\":           32,\\n\",\n        \"            \\\"learning_rate\\\":        0.0,\\n\",\n        \"            \\\"saved_folder\\\":   \\t\\tF\\\"/content/drive/MyDrive/projects/pascal20_yolov3\\\",\\n\",\n        \"            \\\"first_trainable_layer\\\": \\\"\\\",\\n\",\n        \"            \\\"augmentation\\\":\\t\\t\\t\\t  True,\\n\",\n        \"            \\\"is_only_detect\\\" : \\t\\t  False\\n\",\n        \"        },\\n\",\n        \"        \\\"converter\\\" : {\\n\",\n        \"            \\\"type\\\":   \\t\\t\\t\\t[]\\n\",\n        \"        }\\n\",\n        \"}\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"kobC_7gd5mEu\"\n      },\n      \"source\": [\n        \"Let's check what GPU we have been assigned in this Colab session, if any.\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"rESho_T70BWq\"\n      },\n      \"source\": [\n        \"from tensorflow.python.client import device_lib\\n\",\n        \"device_lib.list_local_devices()\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"i0Fc61WrTxh1\"\n      },\n      \"source\": [\n        \"Also, let's open Tensorboard, where we will be able to watch model training progress in real time. Training and validation logs also will be saved in project folder.\\n\",\n        \"Since there are no logs before we start the training, tensorboard will be empty. Refresh it after first epoch.\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"jsGp9JvjTzzp\"\n      },\n      \"source\": [\n        \"%load_ext tensorboard\\n\",\n        \"%tensorboard --logdir logs\\n\",\n        \"!sleep 5\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"cWyKjw-b5_yp\"\n      },\n      \"source\": [\n        \"Finally we start the training by passing config dictionary we have defined earlier to setup_training function. The function will start the training with  Reduce Learning Rate on Plateau and save on best mAP callbacks. Every epoch mAP of the model predictions is measured on the validation dataset. If you have specified the converter type in the config, after the training has stopped the script will convert the best model into the format you have specified in config and save it to the project folder.\\n\",\n        \"\\n\",\n        \"Let's train for one epoch to see how the whole pipeline works.\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"deYD3cwukHsj\"\n      },\n      \"source\": [\n        \"from keras import backend as K \\n\",\n        \"K.clear_session()\\n\",\n        \"model_path = setup_training(config_dict=config)\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"ypTe3GZI619O\"\n      },\n      \"source\": [\n        \"After training it is good to check the actual perfomance of your model by doing inference on your validation dataset and visualizing results. This is exactly what next block does. Our model used pre-trained weights and since all the layers were set as non-trainable, we are just observing the perfomance of the model that was trained before.\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"jE7pTYmZN7Pi\"\n      },\n      \"source\": [\n        \"%matplotlib inline\\n\",\n        \"from keras import backend as K \\n\",\n        \"K.clear_session()\\n\",\n        \"setup_inference(config, model_path)\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"nKsxhdPvzrD8\"\n      },\n      \"source\": [\n        \"If you need to convert trained model to other formats, for example for inference with Edge TPU or OpenCV AI Kit, you can do it with following commands. Specify the converter type, backend and folder with calbiration images(normally your validation image folder).\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"awR7r4ILzrmb\"\n      },\n      \"source\": [\n        \"from axelerate.networks.common_utils.convert import Converter\\n\",\n        \"converter = Converter('tflite_dynamic', 'MobileNet1_0', 'pascal_20_detection/imgs_validation')\\n\",\n        \"converter.convert_model(model_path)\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"JPvYzcRhfs2u\"\n      },\n      \"source\": [\n        \"To train the model from scratch use the following config and then run the cells with training and (optinally) inference functions again.\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"uruWpeGRf6Qi\"\n      },\n      \"source\": [\n        \"config = {\\n\",\n        \"        \\\"model\\\":{\\n\",\n        \"            \\\"type\\\":                 \\\"Detector\\\",\\n\",\n        \"            \\\"architecture\\\":         \\\"MobileNet1_0\\\",\\n\",\n        \"            \\\"input_size\\\":           [224, 320],\\n\",\n        \"            \\\"anchors\\\":              [[[0.76120044, 0.57155991], [0.6923348, 0.88535553], [0.47163042, 0.34163313]],\\n\",\n        \"                                    [[0.33340788, 0.70065861], [0.18124964, 0.38986752], [0.08497349, 0.1527057 ]]],\\n\",\n        \"            \\\"labels\\\":               [\\\"person\\\", \\\"bird\\\", \\\"cat\\\", \\\"cow\\\", \\\"dog\\\", \\\"horse\\\", \\\"sheep\\\", \\\"aeroplane\\\", \\\"bicycle\\\", \\\"boat\\\", \\\"bus\\\", \\\"car\\\", \\\"motorbike\\\", \\\"train\\\",\\\"bottle\\\", \\\"chair\\\", \\\"diningtable\\\", \\\"pottedplant\\\", \\\"sofa\\\", \\\"tvmonitor\\\"],\\n\",\n        \"            \\\"obj_thresh\\\" : \\t\\t    0.7,\\n\",\n        \"            \\\"iou_thresh\\\" : \\t\\t    0.5,\\n\",\n        \"            \\\"coord_scale\\\" : \\t\\t  1.0,\\n\",\n        \"            \\\"object_scale\\\" : \\t\\t  3.0,            \\n\",\n        \"            \\\"no_object_scale\\\" : \\t1.0\\n\",\n        \"        },\\n\",\n        \"        \\\"weights\\\" : {\\n\",\n        \"            \\\"full\\\":   \\t\\t\\t\\t  \\\"\\\",\\n\",\n        \"            \\\"backend\\\":   \\t\\t    \\\"imagenet\\\"\\n\",\n        \"        },\\n\",\n        \"        \\\"train\\\" : {\\n\",\n        \"            \\\"actual_epoch\\\":         50,\\n\",\n        \"            \\\"train_image_folder\\\":   \\\"pascal_20_detection/imgs\\\",\\n\",\n        \"            \\\"train_annot_folder\\\":   \\\"pascal_20_detection/anns\\\",\\n\",\n        \"            \\\"train_times\\\":          1,\\n\",\n        \"            \\\"valid_image_folder\\\":   \\\"pascal_20_detection/imgs_validation\\\",\\n\",\n        \"            \\\"valid_annot_folder\\\":   \\\"pascal_20_detection/anns_validation\\\",\\n\",\n        \"            \\\"valid_times\\\":          1,\\n\",\n        \"            \\\"valid_metric\\\":         \\\"recall\\\",\\n\",\n        \"            \\\"batch_size\\\":           32,\\n\",\n        \"            \\\"learning_rate\\\":        1e-3,\\n\",\n        \"            \\\"saved_folder\\\":   \\t\\tF\\\"/content/drive/MyDrive/projects/pascal20_yolov3\\\",\\n\",\n        \"            \\\"first_trainable_layer\\\": \\\"\\\",\\n\",\n        \"            \\\"augmentation\\\":\\t\\t\\t\\t  True,\\n\",\n        \"            \\\"is_only_detect\\\" : \\t\\t  False\\n\",\n        \"        },\\n\",\n        \"        \\\"converter\\\" : {\\n\",\n        \"            \\\"type\\\":   \\t\\t\\t\\t[]\\n\",\n        \"        }\\n\",\n        \"}\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"1frVrWMcf-k7\"\n      },\n      \"source\": [\n        \"from keras import backend as K \\n\",\n        \"K.clear_session()\\n\",\n        \"model_path = setup_training(config_dict=config)\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"Ipv1AGzRgAMA\"\n      },\n      \"source\": [\n        \"%matplotlib inline\\n\",\n        \"from keras import backend as K \\n\",\n        \"K.clear_session()\\n\",\n        \"setup_inference(config, model_path)\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"5YuVe2VD11cd\"\n      },\n      \"source\": [\n        \"Good luck and happy training! Have a look at these articles, that would allow you to get the most of Google Colab or connect to local runtime if there are no GPUs available;\\n\",\n        \"\\n\",\n        \"https://medium.com/@oribarel/getting-the-most-out-of-your-google-colab-2b0585f82403\\n\",\n        \"\\n\",\n        \"https://research.google.com/colaboratory/local-runtimes.html\"\n      ]\n    }\n  ]\n}"
  },
  {
    "path": "resources/aXeleRate_person_detector.ipynb",
    "content": "{\n  \"nbformat\": 4,\n  \"nbformat_minor\": 0,\n  \"metadata\": {\n    \"colab\": {\n      \"name\": \"aXeleRate_person_detector.ipynb\",\n      \"private_outputs\": true,\n      \"provenance\": [],\n      \"collapsed_sections\": [],\n      \"include_colab_link\": true\n    },\n    \"kernelspec\": {\n      \"name\": \"python3\",\n      \"display_name\": \"Python 3\"\n    },\n    \"accelerator\": \"GPU\"\n  },\n  \"cells\": [\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"view-in-github\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"<a href=\\\"https://colab.research.google.com/github/AIWintermuteAI/aXeleRate/blob/master/resources/aXeleRate_person_detector.ipynb\\\" target=\\\"_parent\\\"><img src=\\\"https://colab.research.google.com/assets/colab-badge.svg\\\" alt=\\\"Open In Colab\\\"/></a>\"\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"hS9yMrWe02WQ\"\n      },\n      \"source\": [\n        \"## Person Detection model Training and Inference\\n\",\n        \"\\n\",\n        \"In this notebook we will use axelerate, Keras-based framework for AI on the edge, to quickly setup model training and then after training session is completed convert it to .tflite and .kmodel formats.\\n\",\n        \"\\n\",\n        \"First, let's take care of some administrative details. \\n\",\n        \"\\n\",\n        \"1) Before we do anything, make sure you have choosen GPU as Runtime type (in Runtime - > Change Runtime type).\\n\",\n        \"\\n\",\n        \"2) We need to mount Google Drive for saving our model checkpoints and final converted model(s). Press on Mount Google Drive button in Files tab on your left. \\n\",\n        \"\\n\",\n        \"In the next cell we clone axelerate Github repository and import it. \\n\",\n        \"\\n\",\n        \"**It is possible to use pip install or python setup.py install, but in that case you will need to restart the enironment.** Since I'm trying to make the process as streamlined as possibile I'm using sys.path.append for import.\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"y07yAbYbjV2s\"\n      },\n      \"source\": [\n        \"%load_ext tensorboard\\n\",\n        \"#we need imgaug 0.4 for image augmentations to work properly, see https://stackoverflow.com/questions/62580797/in-colab-doing-image-data-augmentation-with-imgaug-is-not-working-as-intended\\n\",\n        \"!pip uninstall -y imgaug && pip uninstall -y albumentations && pip install imgaug==0.4\\n\",\n        \"!pip install --upgrade --no-cache-dir gdown\\n\",\n        \"!git clone https://github.com/AIWintermuteAI/aXeleRate.git\\n\",\n        \"import sys\\n\",\n        \"sys.path.append('/content/aXeleRate')\\n\",\n        \"from axelerate import setup_training, setup_inference\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"5TBRMPZ83dRL\"\n      },\n      \"source\": [\n        \"At this step you typically need to get the dataset. You can use !wget command to download it from somewhere on the Internet or !cp to copy from My Drive as in this example\\n\",\n        \"```\\n\",\n        \"!cp -r /content/drive/'My Drive'/pascal_20_segmentation.zip .\\n\",\n        \"!unzip --qq pascal_20_segmentation.zip\\n\",\n        \"```\\n\",\n        \"For this notebook well use gdown command line tool to download the dataset for person detection I shared on Google Drive and then unzip it with unzip command. It is based on INRIA person detection dataset, which I converted to PASCAL-VOC annotation format.\\n\",\n        \"https://dbcollection.readthedocs.io/en/latest/datasets/inria_ped.html\\n\",\n        \"When actually training the model myself I added about 400 pictures of our office staff, which I cannot share online. I recommend you also augment this dataset by taking and annotating pictures of your family/friends. The annotation tool I use is LabelImg\\n\",\n        \"https://github.com/tzutalin/labelImg\\n\",\n        \"\\n\",\n        \"Let's visualize our detection model test dataset. There are images in validation folder with corresponding annotations in PASCAL-VOC format in validation annotations folder.\\n\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"_tpsgkGj7d79\"\n      },\n      \"source\": [\n        \"%matplotlib inline\\n\",\n        \"!gdown https://drive.google.com/uc?id=1UWwxlJm5JH_JiBY9PoLgGyHsRDzBqRGU #dataset\\n\",\n        \"!gdown https://drive.google.com/uc?id=1-2fiBxykZVZBRcux9I6mKZaS3yAHq6hk #pre-trained model\\n\",\n        \"\\n\",\n        \"!unzip --qq person_dataset.zip\\n\",\n        \"\\n\",\n        \"from axelerate.networks.common_utils.augment import visualize_detection_dataset\\n\",\n        \"\\n\",\n        \"visualize_detection_dataset(img_folder='person_dataset/imgs_validation', ann_folder='person_dataset/anns_validation', img_size=None, augment=True)\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"S1oqdtbr7VLB\"\n      },\n      \"source\": [\n        \"Next step is defining a config dictionary. Most lines are self-explanatory.\\n\",\n        \"\\n\",\n        \"Type is model frontend - Classifier, Detector or Segnet\\n\",\n        \"\\n\",\n        \"Architecture is model backend (feature extractor) \\n\",\n        \"\\n\",\n        \"- Full Yolo\\n\",\n        \"- Tiny Yolo\\n\",\n        \"- MobileNet1_0\\n\",\n        \"- MobileNet7_5 \\n\",\n        \"- MobileNet5_0 \\n\",\n        \"- MobileNet2_5 \\n\",\n        \"- SqueezeNet\\n\",\n        \"- NASNetMobile\\n\",\n        \"- DenseNet121\\n\",\n        \"- ResNet50\\n\",\n        \"\\n\",\n        \"For more information on anchors, please read here\\n\",\n        \"https://github.com/pjreddie/darknet/issues/568\\n\",\n        \"\\n\",\n        \"Labels are labels present in your dataset.\\n\",\n        \"IMPORTANT: Please, list all the labels present in the dataset.\\n\",\n        \"\\n\",\n        \"object_scale determines how much to penalize wrong prediction of confidence of object predictors\\n\",\n        \"\\n\",\n        \"no_object_scale determines how much to penalize wrong prediction of confidence of non-object predictors\\n\",\n        \"\\n\",\n        \"coord_scale determines how much to penalize wrong position and size predictions (x, y, w, h)\\n\",\n        \"\\n\",\n        \"class_scale determines how much to penalize wrong class prediction\\n\",\n        \"\\n\",\n        \"For converter type you can choose the following:\\n\",\n        \"\\n\",\n        \"'k210', 'tflite_fullint', 'tflite_dynamic', 'edgetpu', 'openvino', 'onnx'\"\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"EkASgMdcj3Nu\"\n      },\n      \"source\": [\n        \"## Parameters for Person Detection\\n\",\n        \"\\n\",\n        \"K210, which is where we will run the network, has constrained memory (5.5 RAM) available, so with Micropython firmware, the largest model you can run is about 2 MB, which limits our architecture choice to Tiny Yolo, MobileNet(up to 0.75 alpha) and SqueezeNet. Out of these 3 architectures, only one comes with pre-trained model - MobileNet. So, to save the training time we will use Mobilenet with alpha 0.75, which has ... parameters. For objects that do not have that much variety, you can use MobileNet with lower alpha, down to 0.25.\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"Jw4q6_MsegD2\"\n      },\n      \"source\": [\n        \"config = {\\n\",\n        \"        \\\"model\\\":{\\n\",\n        \"            \\\"type\\\":                 \\\"Detector\\\",\\n\",\n        \"            \\\"architecture\\\":         \\\"MobileNet5_0\\\",\\n\",\n        \"            \\\"input_size\\\":           [224, 320],\\n\",\n        \"            \\\"anchors\\\":              [[[0.76120044, 0.57155991], [0.6923348, 0.88535553], [0.47163042, 0.34163313]],\\n\",\n        \"                                    [[0.33340788, 0.70065861], [0.18124964, 0.38986752], [0.08497349, 0.1527057 ]]],\\n\",\n        \"            \\\"labels\\\":               [\\\"person\\\"],\\n\",\n        \"            \\\"obj_thresh\\\" : \\t\\t  0.7,\\n\",\n        \"            \\\"iou_thresh\\\" : \\t\\t  0.5,\\n\",\n        \"            \\\"coord_scale\\\" : \\t\\t1.0,\\n\",\n        \"            \\\"class_scale\\\" : \\t\\t1.0,\\n\",\n        \"            \\\"object_scale\\\" : \\t\\t5.0,\\n\",\n        \"            \\\"no_object_scale\\\" : \\t1.0\\n\",\n        \"        },\\n\",\n        \"        \\\"weights\\\" : {\\n\",\n        \"            \\\"full\\\":   \\t\\t\\t\\t\\\"\\\",\\n\",\n        \"            \\\"backend\\\":   \\t\\t    \\\"imagenet\\\"\\n\",\n        \"        },\\n\",\n        \"        \\\"train\\\" : {\\n\",\n        \"            \\\"actual_epoch\\\":         1,\\n\",\n        \"            \\\"train_image_folder\\\":   \\\"person_dataset/imgs\\\",\\n\",\n        \"            \\\"train_annot_folder\\\":   \\\"person_dataset/anns\\\",\\n\",\n        \"            \\\"train_times\\\":          1,\\n\",\n        \"            \\\"valid_image_folder\\\":   \\\"person_dataset/imgs_validation\\\",\\n\",\n        \"            \\\"valid_annot_folder\\\":   \\\"person_dataset/anns_validation\\\",\\n\",\n        \"            \\\"valid_times\\\":          1,\\n\",\n        \"            \\\"valid_metric\\\":         \\\"recall\\\",\\n\",\n        \"            \\\"batch_size\\\":           10,\\n\",\n        \"            \\\"learning_rate\\\":        1e-3,\\n\",\n        \"            \\\"saved_folder\\\":   \\t\\tF\\\"/content/drive/MyDrive/person_detector\\\",\\n\",\n        \"            \\\"first_trainable_layer\\\": \\\"\\\",\\n\",\n        \"            \\\"augmentation\\\":\\t\\t\\t\\tTrue,\\n\",\n        \"            \\\"is_only_detect\\\" : \\t\\tFalse\\n\",\n        \"        },\\n\",\n        \"        \\\"converter\\\" : {\\n\",\n        \"            \\\"type\\\":   \\t\\t\\t\\t[\\\"k210\\\",\\\"tflite\\\"]\\n\",\n        \"        }\\n\",\n        \"    }\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"kobC_7gd5mEu\"\n      },\n      \"source\": [\n        \"Let's check what GPU we have been assigned in this Colab session, if any.\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"rESho_T70BWq\"\n      },\n      \"source\": [\n        \"from tensorflow.python.client import device_lib\\n\",\n        \"device_lib.list_local_devices()\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"gtNVJF3WIYXL\"\n      },\n      \"source\": [\n        \"Also, let's open Tensorboard, where we will be able to watch model training progress in real time. Training and validation logs also will be saved in project folder.\\n\",\n        \"Since there are no logs before we start the training, tensorboard will be empty. Refresh it after first epoch.\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"lLUCRqhSIcRP\"\n      },\n      \"source\": [\n        \"%tensorboard --logdir logs\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"cWyKjw-b5_yp\"\n      },\n      \"source\": [\n        \"Finally we start the training by passing config dictionary we have defined earlier to setup_training function. The function will start the training with Checkpoint, Reduce Learning Rate on Plateau and Early Stopping callbacks. After the training has stopped, it will convert the best model into the format you have specified in config and save it to the project folder.\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"deYD3cwukHsj\"\n      },\n      \"source\": [\n        \"from keras import backend as K \\n\",\n        \"K.clear_session()\\n\",\n        \"model_path = setup_training(config_dict=config)\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"ypTe3GZI619O\"\n      },\n      \"source\": [\n        \"After training it is good to check the actual perfomance of your model by doing inference on your validation dataset and visualizing results. This is exactly what next block does.\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"jE7pTYmZN7Pi\"\n      },\n      \"source\": [\n        \"%matplotlib inline\\n\",\n        \"from keras import backend as K \\n\",\n        \"K.clear_session()\\n\",\n        \"setup_inference(config, model_path)\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"5YuVe2VD11cd\"\n      },\n      \"source\": [\n        \"The pre-trained weights inference results are: {'fscore': 0.918918918918919, 'precision': 0.8947368421052632, 'recall': 0.9444444444444444}, final validation mAP 0.5657894736842105 \\n\",\n        \"**weights name:  YOLO_best_mAP.h5**\\n\",\n        \"\\n\",\n        \"Good luck and happy training! Have a look at these articles, that would allow you to get the most of Google Colab or connect to local runtime if there are no GPUs available;\\n\",\n        \"\\n\",\n        \"https://medium.com/@oribarel/getting-the-most-out-of-your-google-colab-2b0585f82403\\n\",\n        \"\\n\",\n        \"https://research.google.com/colaboratory/local-runtimes.html\"\n      ]\n    }\n  ]\n}"
  },
  {
    "path": "resources/aXeleRate_standford_dog_classifier.ipynb",
    "content": "{\n  \"nbformat\": 4,\n  \"nbformat_minor\": 0,\n  \"metadata\": {\n    \"colab\": {\n      \"name\": \"aXeleRate_standford_dog_classifier.ipynb\",\n      \"private_outputs\": true,\n      \"provenance\": [],\n      \"collapsed_sections\": [],\n      \"mount_file_id\": \"1rCJbj9BGoDxEt1ERSK3onxShVBv9LS7B\",\n      \"authorship_tag\": \"ABX9TyP3QFJgHG/Wic0bXC60lYCn\",\n      \"include_colab_link\": true\n    },\n    \"kernelspec\": {\n      \"name\": \"python3\",\n      \"display_name\": \"Python 3\"\n    },\n    \"accelerator\": \"GPU\"\n  },\n  \"cells\": [\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"view-in-github\",\n        \"colab_type\": \"text\"\n      },\n      \"source\": [\n        \"<a href=\\\"https://colab.research.google.com/github/AIWintermuteAI/aXeleRate/blob/master/resources/aXeleRate_standford_dog_classifier.ipynb\\\" target=\\\"_parent\\\"><img src=\\\"https://colab.research.google.com/assets/colab-badge.svg\\\" alt=\\\"Open In Colab\\\"/></a>\"\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"hS9yMrWe02WQ\"\n      },\n      \"source\": [\n        \"## Standford Dog Breed Classification model Training and Inference\\n\",\n        \"\\n\",\n        \"In this notebook we will use axelerate Keras-based framework for AI on the edge to quickly setup model training and then after training session is completed convert it to .tflite and .kmodel formats.\\n\",\n        \"\\n\",\n        \"First, let's take care of some administrative details. \\n\",\n        \"\\n\",\n        \"1) Before we do anything, make sure you have choosen GPU as Runtime type (in Runtime - > Change Runtime type).\\n\",\n        \"\\n\",\n        \"2) We need to mount Google Drive for saving our model checkpoints and final converted model(s). Press on Mount Google Drive button in Files tab on your left. \\n\",\n        \"\\n\",\n        \"In the next cell we clone axelerate Github repository and import it. \\n\",\n        \"\\n\",\n        \"**It is possible to use pip install or python setup.py install, but in that case you will need to restart the enironment.** Since I'm trying to make the process as streamlined as possibile I'm using sys.path.append for import.\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"y07yAbYbjV2s\"\n      },\n      \"source\": [\n        \"#we need imgaug 0.4 for image augmentations to work properly, see https://stackoverflow.com/questions/62580797/in-colab-doing-image-data-augmentation-with-imgaug-is-not-working-as-intended\\n\",\n        \"!pip uninstall -y imgaug && pip uninstall -y albumentations && pip install imgaug==0.4\\n\",\n        \"!git clone https://github.com/AIWintermuteAI/aXeleRate.git\\n\",\n        \"import sys\\n\",\n        \"sys.path.append('/content/aXeleRate')\\n\",\n        \"from axelerate import setup_training, setup_inference\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"5TBRMPZ83dRL\"\n      },\n      \"source\": [\n        \"At this step you typically need to get the dataset. You can use !wget command to download it from somewhere on the Internet or !cp to copy from My Drive as in this example\\n\",\n        \"```\\n\",\n        \"!cp -r /content/drive/'My Drive'/pascal_20_segmentation.zip .\\n\",\n        \"!unzip --qq pascal_20_segmentation.zip\\n\",\n        \"```\\n\",\n        \"For this notebook we will use Standford Dog Breed Classification dataset for fine-grained classification, which you can download here:\\n\",\n        \"http://vision.stanford.edu/aditya86/ImageNetDogs/\\n\",\n        \"\\n\",\n        \"In the next cell we will download the same dataset, but with training/validation split already done - I shared on my Google Drive. We will also download pre-trained model to demonstrate inference results.\\n\",\n        \"\\n\",\n        \"Let's visualize our classification validation dataset with visualize_dataset function, which will search for all images in folder and display num_imgs number of images with class overlayer over the image.\\n\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"_tpsgkGj7d79\"\n      },\n      \"source\": [\n        \"%matplotlib inline\\n\",\n        \"!gdown https://drive.google.com/uc?id=1qq758Tjsfm7Euu9ev7hSyLkMj63YC9ST  #dog breed classification dataset\\n\",\n        \"!gdown https://drive.google.com/uc?id=1dFnDCOxws2uX4ZpauSPC6r6jdjHoJw_p  #pre-trained model\\n\",\n        \"!unzip --qq dogs_classification.zip\\n\",\n        \"\\n\",\n        \"from axelerate.networks.common_utils.augment import visualize_classification_dataset\\n\",\n        \"\\n\",\n        \"visualize_classification_dataset('dogs_classification/imgs_validation', num_imgs=10, img_size=224, augment=True)\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"S1oqdtbr7VLB\"\n      },\n      \"source\": [\n        \"Next step is defining a config dictionary. Most lines are self-explanatory.\\n\",\n        \"\\n\",\n        \"Type is model frontend - Classifier, Detector or Segnet\\n\",\n        \"\\n\",\n        \"Architecture is model backend (feature extractor) \\n\",\n        \"\\n\",\n        \"- Full Yolo\\n\",\n        \"- Tiny Yolo\\n\",\n        \"- MobileNet1_0\\n\",\n        \"- MobileNet7_5 \\n\",\n        \"- MobileNet5_0 \\n\",\n        \"- MobileNet2_5 \\n\",\n        \"- SqueezeNet\\n\",\n        \"- NASNetMobile\\n\",\n        \"- DenseNet121\\n\",\n        \"- ResNet50\\n\",\n        \"\\n\",\n        \"**Note that while you can train any network type with any backend (Tiny YOLO + Classifier, NASNETMobile +  Detector, DenseNet121 + Segnet and so on), some converters do not support larger networks! E.g. K210 converter only supports MobileNet and TinyYOLO backends.**\\n\",\n        \"\\n\",\n        \"Fully_connected is number of neurons in classification layers as list.\\n\",\n        \"\\n\",\n        \"Dropout value is dropout in classification layers.\\n\",\n        \"\\n\",\n        \"actual_epoch is number of epochs to train, noramlly good starting value is 50 - 100\\n\",\n        \"\\n\",\n        \"train_times is a multiplier for training dataset, i.e. how many times to repeat the dataset during one epoch. Useful when you apply augmentations to image. Normally between 1 and 3 is okay. If you have big dataset, can leave at 1.\\n\",\n        \"\\n\",\n        \"For converter type you can choose the following:\\n\",\n        \"\\n\",\n        \"'k210', 'tflite_fullint', 'tflite_dynamic', 'edgetpu', 'openvino', 'onnx'\\n\",\n        \"\\n\",\n        \"**Since it is an example notebook, we will use pretrained weights and set all layers of the model to be \\\"frozen\\\"(non-trainable), except for the last one. Also we set learning rate to very low value, that will allow us to see the perfomance of pretrained model** \"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"Jw4q6_MsegD2\"\n      },\n      \"source\": [\n        \"config = {\\n\",\n        \"    \\\"model\\\" : {\\n\",\n        \"        \\\"type\\\":                 \\\"Classifier\\\",\\n\",\n        \"        \\\"architecture\\\":         \\\"NASNetMobile\\\",\\n\",\n        \"        \\\"input_size\\\":           224,\\n\",\n        \"        \\\"fully-connected\\\":      [],\\n\",\n        \"        \\\"labels\\\":               [],\\n\",\n        \"        \\\"dropout\\\" : \\t\\t0.2\\n\",\n        \"    },\\n\",\n        \"     \\\"weights\\\" : {\\n\",\n        \"            \\\"full\\\":   \\t\\t\\t\\t\\\"/content/Classifier_best_val_accuracy.h5\\\",\\n\",\n        \"            \\\"backend\\\":   \\t\\t    \\\"imagenet\\\",\\n\",\n        \"            \\\"save_bottleneck\\\":      False\\n\",\n        \"        \\n\",\n        \"    },\\n\",\n        \"    \\\"train\\\" : {\\n\",\n        \"        \\\"actual_epoch\\\":         1,\\n\",\n        \"        \\\"train_image_folder\\\":   \\\"dogs_classification/imgs\\\",\\n\",\n        \"        \\\"train_times\\\":          1,\\n\",\n        \"        \\\"valid_image_folder\\\":   \\\"dogs_classification/imgs_validation\\\",\\n\",\n        \"        \\\"valid_times\\\":          1,\\n\",\n        \"        \\\"valid_metric\\\":         \\\"val_accuracy\\\",\\n\",\n        \"        \\\"batch_size\\\":           16,\\n\",\n        \"        \\\"learning_rate\\\":        0.0,\\n\",\n        \"        \\\"saved_folder\\\":   \\t\\tF\\\"/content/drive/MyDrive/dogs_classifier\\\",\\n\",\n        \"        \\\"first_trainable_layer\\\": \\\"dense\\\",\\n\",\n        \"        \\\"augmentation\\\":\\t\\t\\t\\tTrue\\n\",\n        \"    },\\n\",\n        \"    \\\"converter\\\" : {\\n\",\n        \"        \\\"type\\\":   \\t\\t\\t\\t[]\\n\",\n        \"    }\\n\",\n        \"}\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"kobC_7gd5mEu\"\n      },\n      \"source\": [\n        \"Let's check what GPU we have been assigned in this Colab session, if any.\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"rESho_T70BWq\"\n      },\n      \"source\": [\n        \"from tensorflow.python.client import device_lib\\n\",\n        \"device_lib.list_local_devices()\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"vsu5OuxwH58t\"\n      },\n      \"source\": [\n        \"Also, let's open Tensorboard, where we will be able to watch model training progress in real time. Training and validation logs also will be saved in project folder.\\n\",\n        \"Since there are no logs before we start the training, tensorboard will be empty. Refresh it after first epoch.\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"8H59nl11H6kB\"\n      },\n      \"source\": [\n        \"%load_ext tensorboard\\n\",\n        \"%tensorboard --logdir logs\\n\",\n        \"!sleep 10\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"cWyKjw-b5_yp\"\n      },\n      \"source\": [\n        \"Finally we start the training by passing config dictionary we have defined earlier to setup_training function. The function will start the training with Checkpoint, Reduce Learning Rate on Plateu and Early Stopping callbacks. Every time our validation metric(in this config set to \\\"val_accuracy\\\") improves, the model is saved with Checkpoint callback. If you have specified the converter type in the config, after the training has stopped the script will convert the best model into the format you have specified in config and save it to the project folder.\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"deYD3cwukHsj\"\n      },\n      \"source\": [\n        \"from keras import backend as K \\n\",\n        \"K.clear_session()\\n\",\n        \"model_path = setup_training(config_dict=config)\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"ypTe3GZI619O\"\n      },\n      \"source\": [\n        \"After training it is good to check the actual perfomance of your model by doing inference on your validation dataset and visualizing results. This is exactly what next block does. Our model used pre-trained weights and since all the layers,except for the last one were set as non-trainable and we set the learning rate to a very low value, we are just observing the perfomance of the model that was trained before.\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"jE7pTYmZN7Pi\"\n      },\n      \"source\": [\n        \"%matplotlib inline\\n\",\n        \"from keras import backend as K \\n\",\n        \"K.clear_session()\\n\",\n        \"setup_inference(config, model_path)\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"PF__ooBsyb58\"\n      },\n      \"source\": [\n        \"If you need to convert trained model to other formats, for example for inference with Edge TPU or Kendryte K210, you can do it with following commands. Specify the converter type, backend and folder with calbiration images(normally your validation image folder).\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"fGNqUf1Gyc4z\"\n      },\n      \"source\": [\n        \"from axelerate.networks.common_utils.convert import Converter\\n\",\n        \"converter = Converter('tflite_dynamic', 'NASNetMobile', 'dogs_classification/imgs_validation')\\n\",\n        \"converter.convert_model(model_path)\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"fn7H0V4SEOd_\"\n      },\n      \"source\": [\n        \"To train the model from scratch use the following config and then run the cells with training and (optinally) inference functions again.\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"oT87SwQ6EQB8\"\n      },\n      \"source\": [\n        \"config = {\\n\",\n        \"    \\\"model\\\" : {\\n\",\n        \"        \\\"type\\\":                 \\\"Classifier\\\",\\n\",\n        \"        \\\"architecture\\\":         \\\"NASNetMobile\\\",\\n\",\n        \"        \\\"input_size\\\":           224,\\n\",\n        \"        \\\"fully-connected\\\":      [],\\n\",\n        \"        \\\"labels\\\":               [],\\n\",\n        \"        \\\"dropout\\\" : \\t\\t0.2\\n\",\n        \"    },\\n\",\n        \"     \\\"weights\\\" : {\\n\",\n        \"            \\\"full\\\":   \\t\\t\\t\\t\\\"\\\",\\n\",\n        \"            \\\"backend\\\":   \\t\\t    \\\"imagenet\\\",\\n\",\n        \"            \\\"save_bottleneck\\\":      False\\n\",\n        \"        \\n\",\n        \"    },\\n\",\n        \"    \\\"train\\\" : {\\n\",\n        \"        \\\"actual_epoch\\\":         50,\\n\",\n        \"        \\\"train_image_folder\\\":   \\\"dogs_classification/imgs\\\",\\n\",\n        \"        \\\"train_times\\\":          1,\\n\",\n        \"        \\\"valid_image_folder\\\":   \\\"dogs_classification/imgs_validation\\\",\\n\",\n        \"        \\\"valid_times\\\":          1,\\n\",\n        \"        \\\"valid_metric\\\":         \\\"val_accuracy\\\",\\n\",\n        \"        \\\"batch_size\\\":           16,\\n\",\n        \"        \\\"learning_rate\\\":        1e-3,\\n\",\n        \"        \\\"saved_folder\\\":   \\t\\tF\\\"/content/drive/MyDrive/dogs_classifier\\\",\\n\",\n        \"        \\\"first_trainable_layer\\\": \\\"\\\",\\n\",\n        \"        \\\"augumentation\\\":\\t\\t\\t\\tTrue\\n\",\n        \"    },\\n\",\n        \"    \\\"converter\\\" : {\\n\",\n        \"        \\\"type\\\":   \\t\\t\\t\\t[\\\"tflite_dynamic\\\"]\\n\",\n        \"    }\\n\",\n        \"}\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"NQjvas2UEe8l\"\n      },\n      \"source\": [\n        \"from keras import backend as K \\n\",\n        \"K.clear_session()\\n\",\n        \"model_path = setup_training(config_dict=config)\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"id\": \"iJJWjuRaEfkj\"\n      },\n      \"source\": [\n        \"%matplotlib inline\\n\",\n        \"from keras import backend as K \\n\",\n        \"K.clear_session()\\n\",\n        \"setup_inference(config, model_path)\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"5YuVe2VD11cd\"\n      },\n      \"source\": [\n        \"Good luck and happy training! Have a look at these articles, that would allow you to get the most of Google Colab or connect to local runtime if there are no GPUs available;\\n\",\n        \"\\n\",\n        \"https://medium.com/@oribarel/getting-the-most-out-of-your-google-colab-2b0585f82403\\n\",\n        \"\\n\",\n        \"https://research.google.com/colaboratory/local-runtimes.html\"\n      ]\n    }\n  ]\n}"
  },
  {
    "path": "sample_datasets/detector/anns/2007_000032.xml",
    "content": "<annotation>\n\t<folder>VOC2012</folder>\n\t<filename>2007_000032.jpg</filename>\n\t<source>\n\t\t<database>The VOC2007 Database</database>\n\t\t<annotation>PASCAL VOC2007</annotation>\n\t\t<image>flickr</image>\n\t</source>\n\t<size>\n\t\t<width>500</width>\n\t\t<height>281</height>\n\t\t<depth>3</depth>\n\t</size>\n\t<segmented>1</segmented>\n\t<object>\n\t\t<name>aeroplane</name>\n\t\t<pose>Frontal</pose>\n\t\t<truncated>0</truncated>\n\t\t<difficult>0</difficult>\n\t\t<bndbox>\n\t\t\t<xmin>104</xmin>\n\t\t\t<ymin>78</ymin>\n\t\t\t<xmax>375</xmax>\n\t\t\t<ymax>183</ymax>\n\t\t</bndbox>\n\t</object>\n\t<object>\n\t\t<name>aeroplane</name>\n\t\t<pose>Left</pose>\n\t\t<truncated>0</truncated>\n\t\t<difficult>0</difficult>\n\t\t<bndbox>\n\t\t\t<xmin>133</xmin>\n\t\t\t<ymin>88</ymin>\n\t\t\t<xmax>197</xmax>\n\t\t\t<ymax>123</ymax>\n\t\t</bndbox>\n\t</object>\n\t<object>\n\t\t<name>person</name>\n\t\t<pose>Rear</pose>\n\t\t<truncated>0</truncated>\n\t\t<difficult>0</difficult>\n\t\t<bndbox>\n\t\t\t<xmin>195</xmin>\n\t\t\t<ymin>180</ymin>\n\t\t\t<xmax>213</xmax>\n\t\t\t<ymax>229</ymax>\n\t\t</bndbox>\n\t</object>\n\t<object>\n\t\t<name>person</name>\n\t\t<pose>Rear</pose>\n\t\t<truncated>0</truncated>\n\t\t<difficult>0</difficult>\n\t\t<bndbox>\n\t\t\t<xmin>26</xmin>\n\t\t\t<ymin>189</ymin>\n\t\t\t<xmax>44</xmax>\n\t\t\t<ymax>238</ymax>\n\t\t</bndbox>\n\t</object>\n</annotation>\n"
  },
  {
    "path": "sample_datasets/detector/anns/2007_000033.xml",
    "content": "<annotation>\n\t<folder>VOC2012</folder>\n\t<filename>2007_000033.jpg</filename>\n\t<source>\n\t\t<database>The VOC2007 Database</database>\n\t\t<annotation>PASCAL VOC2007</annotation>\n\t\t<image>flickr</image>\n\t</source>\n\t<size>\n\t\t<width>500</width>\n\t\t<height>366</height>\n\t\t<depth>3</depth>\n\t</size>\n\t<segmented>1</segmented>\n\t<object>\n\t\t<name>aeroplane</name>\n\t\t<pose>Unspecified</pose>\n\t\t<truncated>0</truncated>\n\t\t<difficult>0</difficult>\n\t\t<bndbox>\n\t\t\t<xmin>9</xmin>\n\t\t\t<ymin>107</ymin>\n\t\t\t<xmax>499</xmax>\n\t\t\t<ymax>263</ymax>\n\t\t</bndbox>\n\t</object>\n\t<object>\n\t\t<name>aeroplane</name>\n\t\t<pose>Left</pose>\n\t\t<truncated>0</truncated>\n\t\t<difficult>0</difficult>\n\t\t<bndbox>\n\t\t\t<xmin>421</xmin>\n\t\t\t<ymin>200</ymin>\n\t\t\t<xmax>482</xmax>\n\t\t\t<ymax>226</ymax>\n\t\t</bndbox>\n\t</object>\n\t<object>\n\t\t<name>aeroplane</name>\n\t\t<pose>Left</pose>\n\t\t<truncated>1</truncated>\n\t\t<difficult>0</difficult>\n\t\t<bndbox>\n\t\t\t<xmin>325</xmin>\n\t\t\t<ymin>188</ymin>\n\t\t\t<xmax>411</xmax>\n\t\t\t<ymax>223</ymax>\n\t\t</bndbox>\n\t</object>\n</annotation>\n"
  },
  {
    "path": "sample_datasets/detector/anns_validation/2007_000243.xml",
    "content": "<annotation>\n\t<folder>VOC2012</folder>\n\t<filename>2007_000243.jpg</filename>\n\t<source>\n\t\t<database>The VOC2007 Database</database>\n\t\t<annotation>PASCAL VOC2007</annotation>\n\t\t<image>flickr</image>\n\t</source>\n\t<size>\n\t\t<width>500</width>\n\t\t<height>333</height>\n\t\t<depth>3</depth>\n\t</size>\n\t<segmented>1</segmented>\n\t<object>\n\t\t<name>aeroplane</name>\n\t\t<pose>Unspecified</pose>\n\t\t<truncated>0</truncated>\n\t\t<difficult>0</difficult>\n\t\t<bndbox>\n\t\t\t<xmin>181</xmin>\n\t\t\t<ymin>127</ymin>\n\t\t\t<xmax>274</xmax>\n\t\t\t<ymax>193</ymax>\n\t\t</bndbox>\n\t</object>\n</annotation>\n"
  },
  {
    "path": "sample_datasets/detector/anns_validation/2007_000250.xml",
    "content": "<annotation>\n\t<folder>VOC2012</folder>\n\t<filename>2007_000250.jpg</filename>\n\t<source>\n\t\t<database>The VOC2007 Database</database>\n\t\t<annotation>PASCAL VOC2007</annotation>\n\t\t<image>flickr</image>\n\t</source>\n\t<size>\n\t\t<width>500</width>\n\t\t<height>375</height>\n\t\t<depth>3</depth>\n\t</size>\n\t<segmented>1</segmented>\n\t<object>\n\t\t<name>diningtable</name>\n\t\t<pose>Unspecified</pose>\n\t\t<truncated>1</truncated>\n\t\t<difficult>1</difficult>\n\t\t<bndbox>\n\t\t\t<xmin>1</xmin>\n\t\t\t<ymin>170</ymin>\n\t\t\t<xmax>474</xmax>\n\t\t\t<ymax>375</ymax>\n\t\t</bndbox>\n\t</object>\n\t<object>\n\t\t<name>bottle</name>\n\t\t<pose>Unspecified</pose>\n\t\t<truncated>0</truncated>\n\t\t<difficult>0</difficult>\n\t\t<bndbox>\n\t\t\t<xmin>97</xmin>\n\t\t\t<ymin>124</ymin>\n\t\t\t<xmax>150</xmax>\n\t\t\t<ymax>297</ymax>\n\t\t</bndbox>\n\t</object>\n</annotation>\n"
  },
  {
    "path": "sample_datasets/detector/anns_validation/2007_000645.xml",
    "content": "<annotation>\n\t<folder>VOC2012</folder>\n\t<filename>2007_000645.jpg</filename>\n\t<source>\n\t\t<database>The VOC2007 Database</database>\n\t\t<annotation>PASCAL VOC2007</annotation>\n\t\t<image>flickr</image>\n\t</source>\n\t<size>\n\t\t<width>500</width>\n\t\t<height>375</height>\n\t\t<depth>3</depth>\n\t</size>\n\t<segmented>1</segmented>\n\t<object>\n\t\t<name>bird</name>\n\t\t<pose>Left</pose>\n\t\t<truncated>0</truncated>\n\t\t<difficult>0</difficult>\n\t\t<bndbox>\n\t\t\t<xmin>135</xmin>\n\t\t\t<ymin>46</ymin>\n\t\t\t<xmax>500</xmax>\n\t\t\t<ymax>374</ymax>\n\t\t</bndbox>\n\t</object>\n\t<object>\n\t\t<name>bird</name>\n\t\t<pose>Left</pose>\n\t\t<truncated>0</truncated>\n\t\t<difficult>0</difficult>\n\t\t<bndbox>\n\t\t\t<xmin>124</xmin>\n\t\t\t<ymin>146</ymin>\n\t\t\t<xmax>365</xmax>\n\t\t\t<ymax>375</ymax>\n\t\t</bndbox>\n\t</object>\n</annotation>\n"
  },
  {
    "path": "sample_datasets/detector/anns_validation/2007_001595.xml",
    "content": "<annotation>\n\t<folder>VOC2012</folder>\n\t<filename>2007_001595.jpg</filename>\n\t<source>\n\t\t<database>The VOC2007 Database</database>\n\t\t<annotation>PASCAL VOC2007</annotation>\n\t\t<image>flickr</image>\n\t</source>\n\t<size>\n\t\t<width>500</width>\n\t\t<height>375</height>\n\t\t<depth>3</depth>\n\t</size>\n\t<segmented>1</segmented>\n\t<object>\n\t\t<name>bus</name>\n\t\t<pose>Unspecified</pose>\n\t\t<truncated>0</truncated>\n\t\t<difficult>0</difficult>\n\t\t<bndbox>\n\t\t\t<xmin>268</xmin>\n\t\t\t<ymin>162</ymin>\n\t\t\t<xmax>442</xmax>\n\t\t\t<ymax>296</ymax>\n\t\t</bndbox>\n\t</object>\n\t<object>\n\t\t<name>bus</name>\n\t\t<pose>Unspecified</pose>\n\t\t<truncated>1</truncated>\n\t\t<difficult>0</difficult>\n\t\t<bndbox>\n\t\t\t<xmin>40</xmin>\n\t\t\t<ymin>158</ymin>\n\t\t\t<xmax>275</xmax>\n\t\t\t<ymax>288</ymax>\n\t\t</bndbox>\n\t</object>\n</annotation>\n"
  },
  {
    "path": "sample_datasets/detector/anns_validation/2007_001834.xml",
    "content": "<annotation>\n\t<folder>VOC2012</folder>\n\t<filename>2007_001834.jpg</filename>\n\t<source>\n\t\t<database>The VOC2007 Database</database>\n\t\t<annotation>PASCAL VOC2007</annotation>\n\t\t<image>flickr</image>\n\t</source>\n\t<size>\n\t\t<width>500</width>\n\t\t<height>334</height>\n\t\t<depth>3</depth>\n\t</size>\n\t<segmented>1</segmented>\n\t<object>\n\t\t<name>diningtable</name>\n\t\t<pose>Unspecified</pose>\n\t\t<truncated>0</truncated>\n\t\t<difficult>0</difficult>\n\t\t<bndbox>\n\t\t\t<xmin>46</xmin>\n\t\t\t<ymin>39</ymin>\n\t\t\t<xmax>456</xmax>\n\t\t\t<ymax>304</ymax>\n\t\t</bndbox>\n\t</object>\n</annotation>\n"
  },
  {
    "path": "sample_datasets/detector/anns_validation/2007_003131.xml",
    "content": "<annotation>\n\t<folder>VOC2012</folder>\n\t<filename>2007_003131.jpg</filename>\n\t<source>\n\t\t<database>The VOC2007 Database</database>\n\t\t<annotation>PASCAL VOC2007</annotation>\n\t\t<image>flickr</image>\n\t</source>\n\t<size>\n\t\t<width>500</width>\n\t\t<height>334</height>\n\t\t<depth>3</depth>\n\t</size>\n\t<segmented>1</segmented>\n\t<object>\n\t\t<name>boat</name>\n\t\t<pose>Right</pose>\n\t\t<truncated>0</truncated>\n\t\t<difficult>0</difficult>\n\t\t<bndbox>\n\t\t\t<xmin>340</xmin>\n\t\t\t<ymin>214</ymin>\n\t\t\t<xmax>410</xmax>\n\t\t\t<ymax>330</ymax>\n\t\t</bndbox>\n\t</object>\n</annotation>\n"
  },
  {
    "path": "sample_datasets/detector/anns_validation/2007_003201.xml",
    "content": "<annotation>\n\t<folder>VOC2012</folder>\n\t<filename>2007_003201.jpg</filename>\n\t<source>\n\t\t<database>The VOC2007 Database</database>\n\t\t<annotation>PASCAL VOC2007</annotation>\n\t\t<image>flickr</image>\n\t</source>\n\t<size>\n\t\t<width>500</width>\n\t\t<height>315</height>\n\t\t<depth>3</depth>\n\t</size>\n\t<segmented>1</segmented>\n\t<object>\n\t\t<name>cow</name>\n\t\t<pose>Frontal</pose>\n\t\t<truncated>0</truncated>\n\t\t<difficult>0</difficult>\n\t\t<bndbox>\n\t\t\t<xmin>1</xmin>\n\t\t\t<ymin>53</ymin>\n\t\t\t<xmax>166</xmax>\n\t\t\t<ymax>260</ymax>\n\t\t</bndbox>\n\t</object>\n\t<object>\n\t\t<name>cow</name>\n\t\t<pose>Left</pose>\n\t\t<truncated>0</truncated>\n\t\t<difficult>0</difficult>\n\t\t<bndbox>\n\t\t\t<xmin>137</xmin>\n\t\t\t<ymin>25</ymin>\n\t\t\t<xmax>416</xmax>\n\t\t\t<ymax>298</ymax>\n\t\t</bndbox>\n\t</object>\n\t<object>\n\t\t<name>cow</name>\n\t\t<pose>Unspecified</pose>\n\t\t<truncated>1</truncated>\n\t\t<difficult>0</difficult>\n\t\t<bndbox>\n\t\t\t<xmin>320</xmin>\n\t\t\t<ymin>30</ymin>\n\t\t\t<xmax>500</xmax>\n\t\t\t<ymax>261</ymax>\n\t\t</bndbox>\n\t</object>\n</annotation>\n"
  },
  {
    "path": "sample_datasets/detector/anns_validation/2007_003593.xml",
    "content": "<annotation>\n\t<folder>VOC2012</folder>\n\t<filename>2007_003593.jpg</filename>\n\t<source>\n\t\t<database>The VOC2007 Database</database>\n\t\t<annotation>PASCAL VOC2007</annotation>\n\t\t<image>flickr</image>\n\t</source>\n\t<size>\n\t\t<width>500</width>\n\t\t<height>333</height>\n\t\t<depth>3</depth>\n\t</size>\n\t<segmented>1</segmented>\n\t<object>\n\t\t<name>sheep</name>\n\t\t<pose>Left</pose>\n\t\t<truncated>1</truncated>\n\t\t<difficult>0</difficult>\n\t\t<bndbox>\n\t\t\t<xmin>316</xmin>\n\t\t\t<ymin>135</ymin>\n\t\t\t<xmax>463</xmax>\n\t\t\t<ymax>265</ymax>\n\t\t</bndbox>\n\t</object>\n\t<object>\n\t\t<name>sheep</name>\n\t\t<pose>Left</pose>\n\t\t<truncated>1</truncated>\n\t\t<difficult>0</difficult>\n\t\t<bndbox>\n\t\t\t<xmin>62</xmin>\n\t\t\t<ymin>119</ymin>\n\t\t\t<xmax>314</xmax>\n\t\t\t<ymax>303</ymax>\n\t\t</bndbox>\n\t</object>\n</annotation>\n"
  },
  {
    "path": "sample_datasets/detector/anns_validation/2007_004627.xml",
    "content": "<annotation>\n\t<folder>VOC2012</folder>\n\t<filename>2007_004627.jpg</filename>\n\t<source>\n\t\t<database>The VOC2007 Database</database>\n\t\t<annotation>PASCAL VOC2007</annotation>\n\t\t<image>flickr</image>\n\t</source>\n\t<size>\n\t\t<width>500</width>\n\t\t<height>375</height>\n\t\t<depth>3</depth>\n\t</size>\n\t<segmented>1</segmented>\n\t<object>\n\t\t<name>train</name>\n\t\t<pose>Unspecified</pose>\n\t\t<truncated>0</truncated>\n\t\t<difficult>0</difficult>\n\t\t<bndbox>\n\t\t\t<xmin>193</xmin>\n\t\t\t<ymin>202</ymin>\n\t\t\t<xmax>421</xmax>\n\t\t\t<ymax>272</ymax>\n\t\t</bndbox>\n\t</object>\n\t<object>\n\t\t<name>train</name>\n\t\t<pose>Unspecified</pose>\n\t\t<truncated>1</truncated>\n\t\t<difficult>0</difficult>\n\t\t<bndbox>\n\t\t\t<xmin>417</xmin>\n\t\t\t<ymin>227</ymin>\n\t\t\t<xmax>500</xmax>\n\t\t\t<ymax>284</ymax>\n\t\t</bndbox>\n\t</object>\n</annotation>\n"
  },
  {
    "path": "sample_datasets/detector/anns_validation/2007_005803.xml",
    "content": "<annotation>\n\t<folder>VOC2012</folder>\n\t<filename>2007_005803.jpg</filename>\n\t<source>\n\t\t<database>The VOC2007 Database</database>\n\t\t<annotation>PASCAL VOC2007</annotation>\n\t\t<image>flickr</image>\n\t</source>\n\t<size>\n\t\t<width>500</width>\n\t\t<height>375</height>\n\t\t<depth>3</depth>\n\t</size>\n\t<segmented>1</segmented>\n\t<object>\n\t\t<name>diningtable</name>\n\t\t<pose>Unspecified</pose>\n\t\t<truncated>0</truncated>\n\t\t<difficult>0</difficult>\n\t\t<bndbox>\n\t\t\t<xmin>67</xmin>\n\t\t\t<ymin>156</ymin>\n\t\t\t<xmax>433</xmax>\n\t\t\t<ymax>273</ymax>\n\t\t</bndbox>\n\t</object>\n</annotation>\n"
  },
  {
    "path": "setup.py",
    "content": "from setuptools import setup, find_packages\r\nfrom os import path\r\nthis_directory = path.abspath(path.dirname(__file__))\r\n\r\nwith open(path.join(this_directory, 'README.md'), encoding='utf-8') as f:\r\n    long_description = f.read()\r\n\r\nwith open('requirements.txt') as f:\r\n    requirements = f.read().splitlines()\r\n\r\nsetup(name='axelerate',\r\n      version=\"0.7.6\",\r\n      description='Keras-based framework for AI on the Edge',\r\n      install_requires=requirements,\r\n      long_description=long_description,\r\n      long_description_content_type=\"text/markdown\",\r\n      author='Dmitry Maslov',\r\n      author_email='dmitrywat@gmail.com',\r\n      url='https://github.com/AIWintermuteAI',\r\n      packages=find_packages(),\r\n     )\r\n"
  },
  {
    "path": "tests_training_and_inference.py",
    "content": "import argparse\r\nimport json\r\nfrom axelerate import setup_training, setup_evaluation\r\nimport tensorflow.keras.backend as K\r\nfrom termcolor import colored\r\nimport traceback\r\nimport time \r\n\r\ndef configs(network_type):\r\n\r\n    classifier = {\r\n        \"model\" : {\r\n            \"type\":                 \"Classifier\",\r\n            \"architecture\":         \"Tiny Yolo\",\r\n            \"input_size\":           [224,224],\r\n            \"fully-connected\":      [],\r\n            \"labels\":               [],\r\n            \"dropout\" : \t\t    0.5\r\n        },\r\n        \"weights\" : {\r\n            \"full\":   \t\t\t\t\"\",\r\n            \"backend\":   \t\t    None,\r\n            \"save_bottleneck\":      True\r\n        \r\n        },\r\n        \"train\" : {\r\n            \"actual_epoch\":         5,\r\n            \"train_image_folder\":   \"sample_datasets/classifier/imgs\",\r\n            \"train_times\":          1,\r\n            \"valid_image_folder\":   \"sample_datasets/classifier/imgs_validation\",\r\n            \"valid_times\":          1,\r\n            \"valid_metric\":         \"accuracy\",\r\n            \"batch_size\":           2,\r\n            \"learning_rate\":        1e-4,\r\n            \"saved_folder\":   \t\t\"classifier\",\r\n            \"first_trainable_layer\": \"\",\r\n            \"augmentation\":\t\tTrue\r\n        },\r\n        \"converter\" : {\r\n            \"type\":   \t\t\t\t[]\r\n        }\r\n    }\r\n\r\n\r\n    detector = {\r\n        \"model\":{\r\n            \"type\":                 \"Detector\",\r\n            \"architecture\":         \"MobileNet7_5\",\r\n            \"input_size\":           [240, 320],\r\n            \"anchors\":              [[[0.51424575, 0.54116074], [0.29523918, 0.45838044], [0.21371929, 0.21518053]]],\r\n            \"labels\":               [\"aeroplane\", \"person\", \"diningtable\",\" bottle\", \"bird\", \"bus\", \"boat\", \"cow\", \"sheep\", \"train\"],\r\n            \"obj_thresh\" : \t\t    0.7,\r\n            \"iou_thresh\" : \t\t    0.3,\r\n            \"coord_scale\" : \t\t0.5,\r\n            \"object_scale\" : \t\t5.0,            \r\n            \"no_object_scale\" : \t0.5\r\n        },\r\n        \"weights\" : {\r\n            \"full\":   \t\t\t\t\"\",\r\n            \"backend\":   \t\t    None\r\n        },\r\n        \"train\" : {\r\n            \"actual_epoch\":         5,\r\n            \"train_image_folder\":   \"sample_datasets/detector/imgs\",\r\n            \"train_annot_folder\":   \"sample_datasets/detector/anns\",\r\n            \"train_times\":          1,\r\n            \"valid_image_folder\":   \"sample_datasets/detector/imgs_validation\",\r\n            \"valid_annot_folder\":   \"sample_datasets/detector/anns_validation\",\r\n            \"valid_times\":          1,\r\n            \"valid_metric\":         \"recall\",\r\n            \"batch_size\":           2,\r\n            \"learning_rate\":        1e-4,\r\n            \"saved_folder\":   \t\t\"detector\",\r\n            \"first_trainable_layer\": \"\",\r\n            \"augmentation\":\t\t    True,\r\n            \"is_only_detect\" : \t\tFalse\r\n        },\r\n        \"converter\" : {\r\n            \"type\":   \t\t\t\t[]\r\n        }\r\n    }\r\n\r\n    segnet = {\r\n            \"model\" : {\r\n                \"type\":                 \"SegNet\",\r\n                \"architecture\":         \"MobileNet5_0\",\r\n                \"input_size\":           [224,224],\r\n                \"n_classes\" : \t\t     20\r\n            },\r\n        \"weights\" : {\r\n            \"full\":   \t\t\t\t\"\",\r\n            \"backend\":   \t\t    None\r\n        },\r\n            \"train\" : {\r\n                \"actual_epoch\":         5,\r\n                \"train_image_folder\":   \"sample_datasets/segmentation/imgs\",\r\n                \"train_annot_folder\":   \"sample_datasets/segmentation/anns\",\r\n                \"train_times\":          4,\r\n                \"valid_image_folder\":   \"sample_datasets/segmentation/imgs_validation\",\r\n                \"valid_annot_folder\":   \"sample_datasets/segmentation/anns_validation\",\r\n                \"valid_times\":          4,\r\n                \"valid_metric\":         \"loss\",\r\n                \"batch_size\":           2,\r\n                \"learning_rate\":        1e-4,\r\n                \"saved_folder\":   \t\t\"segment\",\r\n                \"first_trainable_layer\": \"\",\r\n                \"ignore_zero_class\":    False,\r\n                \"augmentation\":\t\tTrue\r\n            },\r\n            \"converter\" : {\r\n                \"type\":   \t\t\t\t[]\r\n            }\r\n        }\r\n\r\n    dict = {'all':[classifier,detector,segnet],'classifier':[classifier],'detector':[detector],'segnet':[segnet]}\r\n\r\n    return dict[network_type]\r\n\r\n\r\nargparser = argparse.ArgumentParser(description='Test axelerate on sample datasets')\r\n\r\nargparser.add_argument(\r\n    '-t',\r\n    '--type',\r\n    default=\"all\",\r\n    help='type of network to test:classifier,detector,segnet or all')\r\n    \r\nargparser.add_argument(\r\n    '-a',\r\n    '--arch',\r\n    type=bool,\r\n    default=False,\r\n    help='test all architectures?')\r\n\r\nargparser.add_argument(\r\n    '-c',\r\n    '--conv',\r\n    type=bool,\r\n    default=False,\r\n    help='test all converters?')\r\n\r\nargs = argparser.parse_args()\r\n\r\narchs = ['MobileNet7_5']\r\nconverters = [\"\"]\r\nerrors = []\r\n\r\nif args.arch:\r\n    archs = ['Full Yolo', 'Tiny Yolo', 'MobileNet1_0', 'MobileNet7_5', 'MobileNet5_0', 'MobileNet2_5', 'SqueezeNet', 'NASNetMobile', 'ResNet50', 'DenseNet121']\r\nif args.conv:\r\n    converters = ['k210', 'tflite_fullint', 'tflite_dynamic', 'edgetpu', 'openvino', 'onnx']\r\n\r\nfor item in configs(args.type):\r\n    for arch in archs:\r\n        for converter in converters:\r\n            try:\r\n                item['model']['architecture'] = arch\r\n                item['converter']['type'] = converter\r\n                print(json.dumps(item, indent=4, sort_keys=False))\r\n                model_path = setup_training(config_dict=item)\r\n                K.clear_session()\r\n                setup_evaluation(item, model_path)\r\n            except Exception as e:\r\n                traceback.print_exc()\r\n                print(colored(str(e), 'red'))\r\n                time.sleep(2)\r\n                errors.append(item['model']['type'] + \" \" + arch + \" \" + converter + \" \" + str(e))\r\n\r\nfor error in errors:\r\n    print(error)\r\n\r\n\r\n\r\n"
  }
]