Full Code of explosion/lightnet for AI

master e7283d95367e cached

151 files

921.9 KB

294.9k tokens

920 symbols

1 requests

Download .txt

Showing preview only (970K chars total). Download the full file or copy to clipboard to get everything.

Repository: explosion/lightnet
Branch: master
Commit: e7283d95367e
Files: 151
Total size: 921.9 KB

Directory structure:
gitextract_3zntw7v9/

├── .appveyor.yml
├── .gitignore
├── .travis.yml
├── LICENSE
├── MANIFEST.in
├── README.rst
├── bin/
│   ├── cythonize.py
│   └── train.py
├── lightnet/
│   ├── __init__.pxd
│   ├── __init__.py
│   ├── __main__.py
│   ├── _darknet/
│   │   ├── Makefile
│   │   ├── activation_kernels.cu
│   │   ├── activation_layer.c
│   │   ├── activation_layer.h
│   │   ├── activations.c
│   │   ├── activations.h
│   │   ├── avgpool_layer.c
│   │   ├── avgpool_layer.h
│   │   ├── avgpool_layer_kernels.cu
│   │   ├── batchnorm_layer.c
│   │   ├── batchnorm_layer.h
│   │   ├── blas.c
│   │   ├── blas.h
│   │   ├── blas_kernels.cu
│   │   ├── box.c
│   │   ├── box.h
│   │   ├── classifier.h
│   │   ├── col2im.c
│   │   ├── col2im.h
│   │   ├── col2im_kernels.cu
│   │   ├── connected_layer.c
│   │   ├── connected_layer.h
│   │   ├── convolutional_kernels.cu
│   │   ├── convolutional_layer.c
│   │   ├── convolutional_layer.h
│   │   ├── cost_layer.c
│   │   ├── cost_layer.h
│   │   ├── crnn_layer.c
│   │   ├── crnn_layer.h
│   │   ├── crop_layer.c
│   │   ├── crop_layer.h
│   │   ├── crop_layer_kernels.cu
│   │   ├── cuda.c
│   │   ├── cuda.h
│   │   ├── darknet.h
│   │   ├── data.c
│   │   ├── data.h
│   │   ├── deconvolutional_kernels.cu
│   │   ├── deconvolutional_layer.c
│   │   ├── deconvolutional_layer.h
│   │   ├── demo.c
│   │   ├── demo.h
│   │   ├── detection_layer.c
│   │   ├── detection_layer.h
│   │   ├── dropout_layer.c
│   │   ├── dropout_layer.h
│   │   ├── dropout_layer_kernels.cu
│   │   ├── gemm.c
│   │   ├── gemm.h
│   │   ├── gru_layer.c
│   │   ├── gru_layer.h
│   │   ├── im2col.c
│   │   ├── im2col.h
│   │   ├── im2col_kernels.cu
│   │   ├── image.c
│   │   ├── image.h
│   │   ├── layer.c
│   │   ├── layer.h
│   │   ├── list.c
│   │   ├── list.h
│   │   ├── local_layer.c
│   │   ├── local_layer.h
│   │   ├── lstm_layer.c
│   │   ├── lstm_layer.h
│   │   ├── matrix.c
│   │   ├── matrix.h
│   │   ├── maxpool_layer.c
│   │   ├── maxpool_layer.h
│   │   ├── maxpool_layer_kernels.cu
│   │   ├── network.c
│   │   ├── network.h
│   │   ├── normalization_layer.c
│   │   ├── normalization_layer.h
│   │   ├── option_list.c
│   │   ├── option_list.h
│   │   ├── parser.c
│   │   ├── parser.h
│   │   ├── region_layer.c
│   │   ├── region_layer.h
│   │   ├── reorg_layer.c
│   │   ├── reorg_layer.h
│   │   ├── rnn_layer.c
│   │   ├── rnn_layer.h
│   │   ├── route_layer.c
│   │   ├── route_layer.h
│   │   ├── shortcut_layer.c
│   │   ├── shortcut_layer.h
│   │   ├── softmax_layer.c
│   │   ├── softmax_layer.h
│   │   ├── stb_image.h
│   │   ├── stb_image_write.h
│   │   ├── tree.c
│   │   ├── tree.h
│   │   ├── utils.c
│   │   └── utils.h
│   ├── about.py
│   ├── cli.py
│   ├── data/
│   │   ├── alexnet.cfg
│   │   ├── cifar.cfg
│   │   ├── cifar.test.cfg
│   │   ├── coco.names
│   │   ├── coco.template
│   │   ├── darknet.cfg
│   │   ├── darknet19.cfg
│   │   ├── darknet19_448.cfg
│   │   ├── darknet9000.cfg
│   │   ├── densenet201.cfg
│   │   ├── extraction.cfg
│   │   ├── extraction.conv.cfg
│   │   ├── extraction22k.cfg
│   │   ├── go.cfg
│   │   ├── go.test.cfg
│   │   ├── gru.cfg
│   │   ├── jnet-conv.cfg
│   │   ├── resnet152.cfg
│   │   ├── resnet50.cfg
│   │   ├── rnn.cfg
│   │   ├── rnn.train.cfg
│   │   ├── strided.cfg
│   │   ├── t1.test.cfg
│   │   ├── tiny-yolo-voc.cfg
│   │   ├── tiny-yolo.cfg
│   │   ├── tiny.cfg
│   │   ├── vgg-16.cfg
│   │   ├── vgg-conv.cfg
│   │   ├── voc.names
│   │   ├── writing.cfg
│   │   ├── yolo-voc.2.0.cfg
│   │   ├── yolo-voc.cfg
│   │   ├── yolo.2.0.cfg
│   │   ├── yolo.cfg
│   │   └── yolo9000.cfg
│   ├── lightnet.pxd
│   ├── lightnet.pyx
│   └── util.py
├── requirements.txt
├── setup.py
└── tests/
    ├── test_boxes.py
    ├── test_image.py
    └── test_network.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .appveyor.yml
================================================
environment:

  matrix:

    # For Python versions available on Appveyor, see
    # http://www.appveyor.com/docs/installed-software#python
    # The list here is complete (excluding Python 2.6, which
    # isn't covered by this document) at the time of writing.

    - PYTHON: "C:\\Python27"
    - PYTHON: "C:\\Python35"
    - PYTHON: "C:\\Python27-x64"
    - PYTHON: "C:\\Python35-x64"
    - PYTHON: "C:\\Python36-x64"

install:
  # We need wheel installed to build wheels
  - "%PYTHON%\\python.exe -m pip install wheel"
  - "%PYTHON%\\python.exe -m pip install cython"
  - "%PYTHON%\\python.exe -m pip install -r requirements.txt"
  - "%PYTHON%\\python.exe -m pip install -e ."

build: off

test_script:
  # Put your test command here.
  # If you don't need to build C extensions on 64-bit Python 3.3 or 3.4,
  # you can remove "build.cmd" from the front of the command, as it's
  # only needed to support those cases.
  # Note that you must use the environment variable %PYTHON% to refer to
  # the interpreter you're using - Appveyor does not do anything special
  # to put the Python version you want to use on PATH.
  - "%PYTHON%\\python.exe -m pytest tests/"

after_test:
  # This step builds your wheels.
  # Again, you only need build.cmd if you're building C extensions for
  # 64-bit Python 3.3/3.4. And you need to use %PYTHON% to get the correct
  # interpreter
  - "%PYTHON%\\python.exe setup.py bdist_wheel"

artifacts:
  # bdist_wheel puts your built wheel in the dist directory
  - path: dist\*

#on_success:
#  You can use this step to upload your artifacts to a public website.
#  See Appveyor's documentation for more details. Or you can simply
#  access your wheels from the Appveyor "artifacts" tab for your build.


================================================
FILE: .gitignore
================================================
*.weights

# Cython / C extensions
cythonize.json
spacy/*.html
*.cpp
*.so

# Vim / VSCode / editors
*.swp
*.sw*
Profile.prof
.vscode
.sass-cache

# Python
.Python
.python-version
__pycache__/
*.py[cod]
.env/
.env*
.~env/
.venv
venv/
.dev
.denv
.pypyenv

# Distribution / packaging
env/
build/
develop-eggs/
dist/
eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
.eggs
MANIFEST

# Temporary files
*.~*
tmp/

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.cache
nosetests.xml
coverage.xml

# Translations
*.mo

# Mr Developer
.mr.developer.cfg
.project
.pydevproject

# Rope
.ropeproject

# Django stuff:
*.log
*.pot

# Windows
*.bat
Thumbs.db
Desktop.ini

# Mac OS X
*.DS_Store

# Komodo project files
*.komodoproject

# Other
*.tgz

# Pycharm project files
*.idea


================================================
FILE: .travis.yml
================================================
language: python

python:
  - "2.7"
  - "3.5"
  - "3.6"

install:
  - if [ "$TRAVIS_OS_NAME" == "linux" ] ; then sudo apt-get install libopenblas-dev ; fi
  - pip install -r requirements.txt
  - pip install cython
  - python setup.py build_ext --inplace
  - pip install -e .
  - export PYTHONPATH=`pwd`
  - python -m lightnet download tiny-yolo
  - pip install pytest

script:
  - python -m pytest tests


notifications:
  email: false
  slack:
    secure: VSqtxg7u4NTZRfoZqjxPRPVS92KTy/mp62egfDZ9ujTP4VPxNe15QZuTB6r/ICPgEYqBtdhLc/aetuBcemt0bHfentV0F7bz7iDY/AFQC1h1i4G0D0wKMufuqOJFw9MOp2tSpuvCVzhCxR+Ymx/F9SaeYBAiwBawce4wu+qu3lA=


================================================
FILE: LICENSE
================================================
The MIT License (MIT)

Copyright (C) 2017 ExplosionAI UG (haftungsbeschränkt), 2014-2017 Joseph Redmon

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.


================================================
FILE: MANIFEST.in
================================================
include LICENSE
include README.rst
include bin/cythonize.py
include lightnet/_darknet/Makefile
recursive-include lightnet/_darknet *.c
recursive-include lightnet/_darknet *.cu
recursive-include lightnet/_darknet *.h
recursive-include lightnet/data *.cfg
recursive-include lightnet/data *.data
recursive-include lightnet/data *.names


================================================
FILE: README.rst
================================================
LightNet: Bringing pjreddie's DarkNet out of the shadows
********************************************************

LightNet provides a simple and efficient Python interface to
`DarkNet <https://github.com/pjreddie/darknet>`_, a neural  network library
written by Joseph Redmon that's well known for its state-of-the-art object
detection models, `YOLO and YOLOv2 <https://pjreddie.com/darknet/yolo/>`_.
LightNet's main purpose for now is to power `Prodigy <https://prodi.gy>`_'s
upcoming object detection and image segmentation features. However, it may be
useful to anyone interested in the DarkNet library.

.. image:: https://img.shields.io/travis/explosion/lightnet/master.svg?style=flat-square
    :target: https://travis-ci.org/explosion/lightnet
    :alt: Build Status

.. image:: https://img.shields.io/github/release/explosion/lightnet.svg?style=flat-square
    :target: https://github.com/explosion/lightnet/releases
    :alt: Current Release Version

.. image:: https://img.shields.io/pypi/v/lightnet.svg?style=flat-square
    :target: https://pypi.python.org/pypi/lightnet
    :alt: pypi Version

.. image:: https://img.shields.io/twitter/follow/explosion_ai.svg?style=social&label=Follow
    :target: https://twitter.com/explosion_ai
    :alt: Explosion AI on Twitter

----

LightNet's features include:

* **State-of-the-art object detection**: YOLOv2 offers unmatched speed/accuracy trade-offs.
* **Easy-to-use via Python**: Pass in byte strings, get back numpy arrays with bounding boxes.
* **Lightweight and self-contained**: No dependency on large frameworks like Tensorflow, PyTorch etc. The DarkNet source is provided in the package.
* **Easy to install**: Just ``pip install lightnet`` and ``python -m lightnet download yolo``.
* **Cross-platform**: Works on OSX and Linux, on Python 2.7, 3.5 and 3.6.
* **10x faster on CPU**: Uses BLAS for its matrix multiplications routines.
* **Not named DarkNet**: Avoids some potentially awkward misunderstandings.

.. image:: https://user-images.githubusercontent.com/13643239/33104476-a31678ce-cf28-11e7-993f-872f3234f4b5.png
    :alt: LightNet "logo"

🌓 Installation
===============

==================== ===
**Operating system** macOS / OS X, Linux (Windows coming soon)
**Python version**   CPython 2.7, 3.5, 3.6. Only 64 bit.
**Package managers** pip (source packages only)
==================== ===

LightNet requires an installation of `OpenBLAS <https://www.openblas.net/>`_:

.. code:: bash

    sudo apt-get install libopenblas-dev

LightNet can be installed via pip:

.. code:: bash

    pip install lightnet

Once you've downloaded LightNet, you can install a model using the
``lightnet download`` command. This will save the models in the
``lightnet/data`` directory. If you've installed LightNet system-wide, make
sure to run the command as administrator.

.. code:: bash

    python -m lightnet download tiny-yolo
    python -m lightnet download yolo

The following models are currently available via the ``download`` command:

===================== ======= ===
``yolo.weights``      258 MB  `Direct download`__
``tiny-yolo.weights`` 44.9 MB `Direct download`__
===================== ======= ===

__ https://pjreddie.com/media/files/yolo.weights
__ https://pjreddie.com/media/files/tiny-yolo.weights

🌓 Usage
========

An object detection system predicts labelled bounding boxes on an image. The
label scheme comes from the training data, so different models will have
different label sets. `YOLOv2 <https://pjreddie.com/darknet/yolo/>`_ can detect
objects in images of any resolution. Smaller images will be faster to predict,
while high resolution images will give you better object detection accuracy.

Images can be loaded by file-path, by JPEG-encoded byte-string, or by numpy
array. If passing in a numpy array, it should be of dtype float32, and shape
``(width, height, colors)``.

.. code:: python

    import lightnet

    model = lightnet.load('tiny-yolo')
    image = lightnet.Image.from_bytes(open('eagle.jpg', 'rb').read())
    boxes = model(image)

``METHOD`` lightnet.load
------------------------

Load a pre-trained model. If a ``path`` is provided, it shoud be a directory
containing two files,  named ``{name}.weights`` and ``{name}.cfg``. If a
``path`` is not provided, the built-in data directory is used, which is
located within the LightNet package.

.. code:: python

    model = lightnet.load('tiny-yolo')
    model = lightnet.load(path='/path/to/yolo')

=========== =========== ===========
Argument    Type        Description
=========== =========== ===========
``name``    unicode     Name of the model located in the data directory, e.g. ``tiny-yolo``.
``path``    unicode     Optional path to a model data directory.
**RETURNS** ``Network`` The loaded model.
=========== =========== ===========

----

🌓 Network
==========

The neural network object. Wraps DarkNet's ``network`` struct.

``CLASSMETHOD`` Network.load
----------------------------

Load a pre-trained model. Identical to ``lightnet.load()``.

``METHOD`` Network.__call__
---------------------------

Detect bounding boxes given an ``Image`` object. The bounding boxes are
provided as a list, with each entry
``(class_id, class_name, prob, [(x, y, width, height)])``, where ```x``` and
``y``` are the pixel coordinates of the center of the centre of the box, and
``width`` and ``height`` describe its dimensions. ``class_id`` is the integer
index of the object type, class_name is a string with the object type, and
``prob`` is a float indicating the detection score. The ``thresh`` parameter
controls the prediction threshold. Objects with a detection probability above
``thresh`` are returned. We don't know what ``hier_thresh`` or ``nms`` do.

.. code:: python

    boxes = model(image, thresh=0.5, hier_thresh=0.5, nms=0.45)

=============== =========== ===========
Argument        Type        Description
=============== =========== ===========
``image``       ``Image``   The image to process.
``thresh``      float       Prediction threshold.
``hier_thresh`` float
``path``        unicode     Optional path to a model data directory.
**RETURNS**     list        The bounding boxes, as ``(class_id, class_name, prob, xywh)`` tuples.
=============== =========== ===========

``METHOD`` Network.update
-------------------------

Update the model, on a batch of examples. The images should be provided as a
list of ``Image`` objects. The ``box_labels`` should be a list of ``BoxLabel``
objects. Returns a float, indicating how much the models prediction differed
from the provided true labels.

.. code:: python

    loss = model.update([image1, image2], [box_labels1, box_labels2])

============== =========== ===========
Argument       Type        Description
============== =========== ===========
``images``     list        List of ``Image`` objects.
``box_labels`` list        List of ``BoxLabel`` objects.
**RETURNS**    float       The loss indicating how much the prediction differed from the provided labels.
============== =========== ===========

----

🌓 Image
========

Data container for a single image. Wraps DarkNet's ``image`` struct.

``METHOD`` Image.__init__
-------------------------

Create an image. `data` should be a numpy array of dtype float32, and shape
(width, height, colors).

.. code:: python

    image = Image(data)

=========== =========== ===========
Argument    Type        Description
=========== =========== ===========
``data``    numpy array The image data
**RETURNS** ``Image``   The newly constructed object.
=========== =========== ===========

``CLASSMETHOD`` Image.blank
---------------------------

Create a blank image, of specified dimensions.

.. code:: python

    image = Image.blank(width, height, colors)

=========== =========== ===========
Argument    Type        Description
=========== =========== ===========
``width``   int         The image width, in pixels.
``height``  int         The image height, in pixels.
``colors``  int         The number of color channels (usually ``3``).
**RETURNS** ``Image``   The newly constructed object.
=========== =========== ===========

``CLASSMETHOD`` Image.load
--------------------------

Load an image from a path to a jpeg file, of the specified dimensions.

.. code:: python

    image = Image.load(path, width, height, colors)

=========== =========== ===========
Argument    Type        Description
=========== =========== ===========
``path``    unicode     The path to the image file.
``width``   int         The image width, in pixels.
``height``  int         The image height, in pixels.
``colors``  int         The number of color channels (usually ``3``).
**RETURNS** ``Image``   The newly constructed object.
=========== =========== ===========

``CLASSMETHOD`` Image.from_bytes
--------------------------------

Read an image from a byte-string, which should be the contents of a jpeg file.

.. code:: python

    image = Image.from_bytes(bytes_data)

============== =========== ===========
Argument       Type        Description
============== =========== ===========
``bytes_data`` bytes       The image contents.
**RETURNS**    ``Image``   The newly constructed object.
============== =========== ===========

----

🌓 BoxLabels
============

Data container for labelled bounding boxes for a single image. Wraps an array
of DarkNet's ``box_label`` struct.

``METHOD`` BoxLabels.__init__
-----------------------------

Labelled box annotations for a single image, used to update the model. ``ids``
should be a 1d numpy array of dtype int32, indicating the correct class IDs of
the objects. ``boxes`` should be a 2d array of dtype float32, and shape
``(len(ids), 4)``. The 4 columns of the boxes should provide the **relative**
``x, y, width, height`` of the bounding box, where ``x`` and ``y`` are the
coordinates of the centre, relative to the image size, and ``width`` and
``height`` are the relative dimensions of the box.

.. code:: python

    box_labels = BoxLabels(ids, boxes)

============== ============= ===========
Argument       Type          Description
============== ============= ===========
``ids``        numpy array   The class IDs of the objects.
``boxes``      numpy array   The boxes providing the relative ``x, y, width, height`` of the bounding box.
**RETURNS**    ``BoxLabels`` The newly constructed object.
============== ============= ===========

``CLASSMETHOD`` BoxLabels.load
------------------------------

Load annotations for a single image from a text file. Each box should be
described on a single line, in the format ``class_id x y width height``.

.. code:: python

    box_labels = BoxLabels.load(path)

============== ============= ===========
Argument       Type          Description
============== ============= ===========
``path``       unicode       The path to load from.
**RETURNS**    ``BoxLabels`` The newly constructed object.
============== ============= ===========


================================================
FILE: bin/cythonize.py
================================================
#!/usr/bin/env python
""" cythonize

Cythonize pyx files into C files as needed.

Usage: cythonize [root_dir]

Default [root_dir] is 'lightnet'.

Checks pyx files to see if they have been changed relative to their
corresponding C files.  If they have, then runs cython on these files to
recreate the C files.

The script thinks that the pyx files have changed relative to the C files
by comparing hashes stored in a database file.

Simple script to invoke Cython (and Tempita) on all .pyx (.pyx.in)
files; while waiting for a proper build system. Uses file hashes to
figure out if rebuild is needed.

For now, this script should be run by developers when changing Cython files
only, and the resulting C files checked in, so that end-users (and Python-only
developers) do not get the Cython/Tempita dependencies.

Originally written by Dag Sverre Seljebotn, and copied here from:

https://raw.github.com/dagss/private-scipy-refactor/cythonize/cythonize.py

Note: this script does not check any of the dependent C libraries; it only
operates on the Cython .pyx files.
"""

from __future__ import division, print_function, absolute_import

import os
import re
import sys
import hashlib
import subprocess

HASH_FILE = 'cythonize.dat'
DEFAULT_ROOT = 'lightnet'
VENDOR = 'Explosion'

# WindowsError is not defined on unix systems
try:
    WindowsError
except NameError:
    WindowsError = None

#
# Rules
#
def process_pyx(fromfile, tofile):
    try:
        from Cython.Compiler.Version import version as cython_version
        from distutils.version import LooseVersion
        if LooseVersion(cython_version) < LooseVersion('0.19'):
            raise Exception('Building %s requires Cython >= 0.19' % VENDOR)

    except ImportError:
        pass

    flags = ['--fast-fail']
    if tofile.endswith('.cpp'):
        flags += ['--cplus']

    try:
        try:
            r = subprocess.call(['cython'] + flags + ["-o", tofile, fromfile])
            if r != 0:
                raise Exception('Cython failed')
        except OSError:
            # There are ways of installing Cython that don't result in a cython
            # executable on the path, see gh-2397.
            r = subprocess.call([sys.executable, '-c',
                                 'import sys; from Cython.Compiler.Main import '
                                 'setuptools_main as main; sys.exit(main())'] + flags +
                                 ["-o", tofile, fromfile])
            if r != 0:
                raise Exception('Cython failed')
    except OSError:
        raise OSError('Cython needs to be installed')

def process_tempita_pyx(fromfile, tofile):
    try:
        try:
            from Cython import Tempita as tempita
        except ImportError:
            import tempita
    except ImportError:
        raise Exception('Building %s requires Tempita: '
                        'pip install --user Tempita' % VENDOR)
    with open(fromfile, "r") as f:
        tmpl = f.read()
    pyxcontent = tempita.sub(tmpl)
    assert fromfile.endswith('.pyx.in')
    pyxfile = fromfile[:-len('.pyx.in')] + '.pyx'
    with open(pyxfile, "w") as f:
        f.write(pyxcontent)
    process_pyx(pyxfile, tofile)

rules = {
    # fromext : function
    '.pyx' : process_pyx,
    '.pyx.in' : process_tempita_pyx
    }
#
# Hash db
#
def load_hashes(filename):
    # Return { filename : (sha1 of input, sha1 of output) }
    if os.path.isfile(filename):
        hashes = {}
        with open(filename, 'r') as f:
            for line in f:
                filename, inhash, outhash = line.split()
                hashes[filename] = (inhash, outhash)
    else:
        hashes = {}
    return hashes

def save_hashes(hash_db, filename):
    with open(filename, 'w') as f:
        for key, value in sorted(hash_db.items()):
            f.write("%s %s %s\n" % (key, value[0], value[1]))

def sha1_of_file(filename):
    h = hashlib.sha1()
    with open(filename, "rb") as f:
        h.update(f.read())
    return h.hexdigest()

#
# Main program
#

def normpath(path):
    path = path.replace(os.sep, '/')
    if path.startswith('./'):
        path = path[2:]
    return path

def get_hash(frompath, topath):
    from_hash = sha1_of_file(frompath)
    to_hash = sha1_of_file(topath) if os.path.exists(topath) else None
    return (from_hash, to_hash)

def process(path, fromfile, tofile, processor_function, hash_db):
    fullfrompath = os.path.join(path, fromfile)
    fulltopath = os.path.join(path, tofile)
    current_hash = get_hash(fullfrompath, fulltopath)
    if current_hash == hash_db.get(normpath(fullfrompath), None):
        print('%s has not changed' % fullfrompath)
        return

    orig_cwd = os.getcwd()
    try:
        os.chdir(path)
        print('Processing %s' % fullfrompath)
        processor_function(fromfile, tofile)
    finally:
        os.chdir(orig_cwd)
    # changed target file, recompute hash
    current_hash = get_hash(fullfrompath, fulltopath)
    # store hash in db
    hash_db[normpath(fullfrompath)] = current_hash


def find_process_files(root_dir):
    hash_db = load_hashes(HASH_FILE)
    for cur_dir, dirs, files in os.walk(root_dir):
        for filename in files:
            in_file = os.path.join(cur_dir, filename + ".in")
            if filename.endswith('.pyx') and os.path.isfile(in_file):
                continue
            for fromext, function in rules.items():
                if filename.endswith(fromext):
                    with open(os.path.join(cur_dir, filename), 'rb') as f:
                         data = f.read()
                         m = re.search(br"^\s*#\s*distutils:\s*language\s*=\s*c\+\+\s*$", data, re.I|re.M)
                         if m:
                             toext = ".cpp"
                         else:
                             toext = ".c"
                    fromfile = filename
                    tofile = filename[:-len(fromext)] + toext
                    process(cur_dir, fromfile, tofile, function, hash_db)
                    save_hashes(hash_db, HASH_FILE)

def main():
    try:
        root_dir = sys.argv[1]
    except IndexError:
        root_dir = DEFAULT_ROOT
    find_process_files(root_dir)


if __name__ == '__main__':
    main()


================================================
FILE: bin/train.py
================================================
from lightnet.lightnet import train
import plac
from pathlib import Path

try:
    unicode
except NameError:
    unicode = str

def path2bytes(loc):
    return unicode(Path(loc).resolve()).encode('utf8')

def main(cfg_loc, weight_loc, images_loc):
    train(path2bytes(cfg_loc), path2bytes(weight_loc),
          path2bytes(images_loc), path2bytes('/tmp/yolo'))

if __name__ == '__main__':
    plac.call(main)


================================================
FILE: lightnet/__init__.pxd
================================================


================================================
FILE: lightnet/__init__.py
================================================
# coding: utf8
from __future__ import unicode_literals

from .lightnet import Network, Image, BoxLabels
from .about import __version__


def load(name, path=None):
    return Network.load(name, path=path)


================================================
FILE: lightnet/__main__.py
================================================
# coding: utf8
from __future__ import print_function
# NB! This breaks in plac on Python 2!!
# from __future__ import unicode_literals


if __name__ == '__main__':
    import plac
    import sys
    try:
        from lightnet.cli import download
    except ImportError:
        from cli import download

    commands = {
        'download': download,
    }
    if len(sys.argv) == 1:
        print(', '.join(commands), title="Available commands", exits=1)
    command = sys.argv.pop(1)
    sys.argv[0] = 'lightnet %s' % command
    if command in commands:
        plac.call(commands[command])
    else:
        print(
            "Available: %s" % ', '.join(commands),
            title="Unknown command: %s" % command,
            exits=1)


================================================
FILE: lightnet/_darknet/Makefile
================================================
GPU=0
CUDNN=0
OPENCV=0
OPENMP=0
DEBUG=0

ARCH= -gencode arch=compute_30,code=sm_30 \
      -gencode arch=compute_35,code=sm_35 \
      -gencode arch=compute_50,code=[sm_50,compute_50] \
      -gencode arch=compute_52,code=[sm_52,compute_52]
#      -gencode arch=compute_20,code=[sm_20,sm_21] \ This one is deprecated?

# This is what I use, uncomment if you know your arch and want to specify
# ARCH= -gencode arch=compute_52,code=compute_52

VPATH=./
SLIB=libdarknet.so
ALIB=libdarknet.a
EXEC=darknet
OBJDIR=./obj/

CC=gcc
NVCC=nvcc
AR=ar
ARFLAGS=rcs
OPTS=-Ofast
LDFLAGS= -lm -pthread
COMMON= -I.
CFLAGS=-Wall -Wno-unknown-pragmas -Wfatal-errors -fPIC

ifeq ($(OPENMP), 1)
CFLAGS+= -fopenmp
endif

ifeq ($(DEBUG), 1)
OPTS=-O0 -g
endif

CFLAGS+=$(OPTS)

ifeq ($(OPENCV), 1)
COMMON+= -DOPENCV
CFLAGS+= -DOPENCV
LDFLAGS+= `pkg-config --libs opencv`
COMMON+= `pkg-config --cflags opencv`
endif

ifeq ($(GPU), 1)
COMMON+= -DGPU -I/usr/local/cuda/include/
CFLAGS+= -DGPU
LDFLAGS+= -L/usr/local/cuda/lib64 -lcuda -lcudart -lcublas -lcurand
endif

ifeq ($(CUDNN), 1)
COMMON+= -DCUDNN
CFLAGS+= -DCUDNN
LDFLAGS+= -lcudnn
endif

OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o detection_layer.o route_layer.o box.o normalization_layer.o avgpool_layer.o layer.o local_layer.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o crnn_layer.o demo.o batchnorm_layer.o region_layer.o reorg_layer.o tree.o  lstm_layer.o
EXECOBJA=captcha.o lsd.o super.o art.o tag.o cifar.o go.o rnn.o segmenter.o regressor.o classifier.o coco.o yolo.o detector.o nightmare.o attention.o darknet.o
ifeq ($(GPU), 1)
LDFLAGS+= -lstdc++
OBJ+=convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o avgpool_layer_kernels.o
endif

EXECOBJ = $(addprefix $(OBJDIR), $(EXECOBJA))
OBJS = $(addprefix $(OBJDIR), $(OBJ))
DEPS = $(wildcard ./*.h) Makefile ./darknet.h

#all: obj backup results $(SLIB) $(ALIB) $(EXEC)
all: obj $(ALIB)


$(EXEC): $(EXECOBJ) $(ALIB)
	$(CC) $(COMMON) $(CFLAGS) $^ -o $@ $(LDFLAGS) $(ALIB)

$(ALIB): $(OBJS)
	$(AR) $(ARFLAGS) $@ $^

$(SLIB): $(OBJS)
	$(CC) $(CFLAGS) -shared $^ -o $@ $(LDFLAGS)

$(OBJDIR)%.o: %.c $(DEPS)
	$(CC) $(COMMON) $(CFLAGS) -c $< -o $@

$(OBJDIR)%.o: %.cu $(DEPS)
	$(NVCC) $(ARCH) $(COMMON) --compiler-options "$(CFLAGS)" -c $< -o $@

obj:
	mkdir -p obj
backup:
	mkdir -p backup
results:
	mkdir -p results

.PHONY: clean

clean:
	rm -rf $(OBJS) $(SLIB) $(ALIB) $(EXEC) $(EXECOBJ)



================================================
FILE: lightnet/_darknet/activation_kernels.cu
================================================
#include "cuda_runtime.h"
#include "curand.h"
#include "cublas_v2.h"

extern "C" {
#include "activations.h"
#include "cuda.h"
}


__device__ float lhtan_activate_kernel(float x)
{
    if(x < 0) return .001f*x;
    if(x > 1) return .001f*(x-1.f) + 1.f;
    return x;
}
__device__ float lhtan_gradient_kernel(float x)
{
    if(x > 0 && x < 1) return 1;
    return .001;
}

__device__ float hardtan_activate_kernel(float x)
{
    if (x < -1) return -1;
    if (x > 1) return 1;
    return x;
}
__device__ float linear_activate_kernel(float x){return x;}
__device__ float logistic_activate_kernel(float x){return 1.f/(1.f + expf(-x));}
__device__ float loggy_activate_kernel(float x){return 2.f/(1.f + expf(-x)) - 1;}
__device__ float relu_activate_kernel(float x){return x*(x>0);}
__device__ float elu_activate_kernel(float x){return (x >= 0)*x + (x < 0)*(expf(x)-1);}
__device__ float relie_activate_kernel(float x){return (x>0) ? x : .01f*x;}
__device__ float ramp_activate_kernel(float x){return x*(x>0)+.1f*x;}
__device__ float leaky_activate_kernel(float x){return (x>0) ? x : .1f*x;}
__device__ float tanh_activate_kernel(float x){return (2.f/(1 + expf(-2*x)) - 1);}
__device__ float plse_activate_kernel(float x)
{
    if(x < -4) return .01f * (x + 4);
    if(x > 4)  return .01f * (x - 4) + 1;
    return .125f*x + .5f;
}
__device__ float stair_activate_kernel(float x)
{
    int n = floorf(x);
    if (n%2 == 0) return floorf(x/2);
    else return (x - n) + floorf(x/2);
}
 

__device__ float hardtan_gradient_kernel(float x)
{
    if (x > -1 && x < 1) return 1;
    return 0;
}
__device__ float linear_gradient_kernel(float x){return 1;}
__device__ float logistic_gradient_kernel(float x){return (1-x)*x;}
__device__ float loggy_gradient_kernel(float x)
{
    float y = (x+1)/2;
    return 2*(1-y)*y;
}
__device__ float relu_gradient_kernel(float x){return (x>0);}
__device__ float elu_gradient_kernel(float x){return (x >= 0) + (x < 0)*(x + 1);}
__device__ float relie_gradient_kernel(float x){return (x>0) ? 1 : .01f;}
__device__ float ramp_gradient_kernel(float x){return (x>0)+.1f;}
__device__ float leaky_gradient_kernel(float x){return (x>0) ? 1 : .1f;}
__device__ float tanh_gradient_kernel(float x){return 1-x*x;}
__device__ float plse_gradient_kernel(float x){return (x < 0 || x > 1) ? .01f : .125f;}
__device__ float stair_gradient_kernel(float x)
{
    if (floorf(x) == x) return 0;
    return 1;
}

__device__ float activate_kernel(float x, ACTIVATION a)
{
    switch(a){
        case LINEAR:
            return linear_activate_kernel(x);
        case LOGISTIC:
            return logistic_activate_kernel(x);
        case LOGGY:
            return loggy_activate_kernel(x);
        case RELU:
            return relu_activate_kernel(x);
        case ELU:
            return elu_activate_kernel(x);
        case RELIE:
            return relie_activate_kernel(x);
        case RAMP:
            return ramp_activate_kernel(x);
        case LEAKY:
            return leaky_activate_kernel(x);
        case TANH:
            return tanh_activate_kernel(x);
        case PLSE:
            return plse_activate_kernel(x);
        case STAIR:
            return stair_activate_kernel(x);
        case HARDTAN:
            return hardtan_activate_kernel(x);
        case LHTAN:
            return lhtan_activate_kernel(x);
    }
    return 0;
}

__device__ float gradient_kernel(float x, ACTIVATION a)
{
    switch(a){
        case LINEAR:
            return linear_gradient_kernel(x);
        case LOGISTIC:
            return logistic_gradient_kernel(x);
        case LOGGY:
            return loggy_gradient_kernel(x);
        case RELU:
            return relu_gradient_kernel(x);
        case ELU:
            return elu_gradient_kernel(x);
        case RELIE:
            return relie_gradient_kernel(x);
        case RAMP:
            return ramp_gradient_kernel(x);
        case LEAKY:
            return leaky_gradient_kernel(x);
        case TANH:
            return tanh_gradient_kernel(x);
        case PLSE:
            return plse_gradient_kernel(x);
        case STAIR:
            return stair_gradient_kernel(x);
        case HARDTAN:
            return hardtan_gradient_kernel(x);
        case LHTAN:
            return lhtan_gradient_kernel(x);
    }
    return 0;
}

__global__ void binary_gradient_array_kernel(float *x, float *dy, int n, int s, BINARY_ACTIVATION a, float *dx)
{
    int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    int i = id % s;
    int b = id / s;
    float x1 = x[b*s + i];
    float x2 = x[b*s + s/2 + i];
    if(id < n) {
        float de = dy[id];
        dx[b*s + i] = x2*de;
        dx[b*s + s/2 + i] = x1*de; 
    }
}

extern "C" void binary_gradient_array_gpu(float *x, float *dx, int n, int size, BINARY_ACTIVATION a, float *y) 
{
    binary_gradient_array_kernel<<<cuda_gridsize(n/2), BLOCK>>>(x, dx, n/2, size, a, y);
    check_error(cudaPeekAtLastError());
}
__global__ void binary_activate_array_kernel(float *x, int n, int s, BINARY_ACTIVATION a, float *y)
{
    int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    int i = id % s;
    int b = id / s;
    float x1 = x[b*s + i];
    float x2 = x[b*s + s/2 + i];
    if(id < n) y[id] = x1*x2;
}

extern "C" void binary_activate_array_gpu(float *x, int n, int size, BINARY_ACTIVATION a, float *y) 
{
    binary_activate_array_kernel<<<cuda_gridsize(n/2), BLOCK>>>(x, n/2, size, a, y);
    check_error(cudaPeekAtLastError());
}

__global__ void activate_array_kernel(float *x, int n, ACTIVATION a)
{
    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if(i < n) x[i] = activate_kernel(x[i], a);
}

__global__ void gradient_array_kernel(float *x, int n, ACTIVATION a, float *delta)
{
    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if(i < n) delta[i] *= gradient_kernel(x[i], a);
}

extern "C" void activate_array_gpu(float *x, int n, ACTIVATION a) 
{
    activate_array_kernel<<<cuda_gridsize(n), BLOCK>>>(x, n, a);
    check_error(cudaPeekAtLastError());
}

extern "C" void gradient_array_gpu(float *x, int n, ACTIVATION a, float *delta) 
{
    gradient_array_kernel<<<cuda_gridsize(n), BLOCK>>>(x, n, a, delta);
    check_error(cudaPeekAtLastError());
}


================================================
FILE: lightnet/_darknet/activation_layer.c
================================================
#include "activation_layer.h"
#include "utils.h"
#include "cuda.h"
#include "blas.h"
#include "gemm.h"

#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

layer make_activation_layer(int batch, int inputs, ACTIVATION activation)
{
    layer l = {0};
    l.type = ACTIVE;

    l.inputs = inputs;
    l.outputs = inputs;
    l.batch=batch;

    l.output = calloc(batch*inputs, sizeof(float*));
    l.delta = calloc(batch*inputs, sizeof(float*));

    l.forward = forward_activation_layer;
    l.backward = backward_activation_layer;
#ifdef GPU
    l.forward_gpu = forward_activation_layer_gpu;
    l.backward_gpu = backward_activation_layer_gpu;

    l.output_gpu = cuda_make_array(l.output, inputs*batch);
    l.delta_gpu = cuda_make_array(l.delta, inputs*batch);
#endif
    l.activation = activation;
    fprintf(stderr, "Activation Layer: %d inputs\n", inputs);
    return l;
}

void forward_activation_layer(layer l, network net)
{
    copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1);
    activate_array(l.output, l.outputs*l.batch, l.activation);
}

void backward_activation_layer(layer l, network net)
{
    gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);
    copy_cpu(l.outputs*l.batch, l.delta, 1, net.delta, 1);
}

#ifdef GPU

void forward_activation_layer_gpu(layer l, network net)
{
    copy_gpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1);
    activate_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation);
}

void backward_activation_layer_gpu(layer l, network net)
{
    gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu);
    copy_gpu(l.outputs*l.batch, l.delta_gpu, 1, net.delta_gpu, 1);
}
#endif


================================================
FILE: lightnet/_darknet/activation_layer.h
================================================
#ifndef ACTIVATION_LAYER_H
#define ACTIVATION_LAYER_H

#include "activations.h"
#include "layer.h"
#include "network.h"

layer make_activation_layer(int batch, int inputs, ACTIVATION activation);

void forward_activation_layer(layer l, network net);
void backward_activation_layer(layer l, network net);

#ifdef GPU
void forward_activation_layer_gpu(layer l, network net);
void backward_activation_layer_gpu(layer l, network net);
#endif

#endif



================================================
FILE: lightnet/_darknet/activations.c
================================================
#include "activations.h"

#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

char *get_activation_string(ACTIVATION a)
{
    switch(a){
        case LOGISTIC:
            return "logistic";
        case LOGGY:
            return "loggy";
        case RELU:
            return "relu";
        case ELU:
            return "elu";
        case RELIE:
            return "relie";
        case RAMP:
            return "ramp";
        case LINEAR:
            return "linear";
        case TANH:
            return "tanh";
        case PLSE:
            return "plse";
        case LEAKY:
            return "leaky";
        case STAIR:
            return "stair";
        case HARDTAN:
            return "hardtan";
        case LHTAN:
            return "lhtan";
        default:
            break;
    }
    return "relu";
}

ACTIVATION get_activation(char *s)
{
    if (strcmp(s, "logistic")==0) return LOGISTIC;
    if (strcmp(s, "loggy")==0) return LOGGY;
    if (strcmp(s, "relu")==0) return RELU;
    if (strcmp(s, "elu")==0) return ELU;
    if (strcmp(s, "relie")==0) return RELIE;
    if (strcmp(s, "plse")==0) return PLSE;
    if (strcmp(s, "hardtan")==0) return HARDTAN;
    if (strcmp(s, "lhtan")==0) return LHTAN;
    if (strcmp(s, "linear")==0) return LINEAR;
    if (strcmp(s, "ramp")==0) return RAMP;
    if (strcmp(s, "leaky")==0) return LEAKY;
    if (strcmp(s, "tanh")==0) return TANH;
    if (strcmp(s, "stair")==0) return STAIR;
    fprintf(stderr, "Couldn't find activation function %s, going with ReLU\n", s);
    return RELU;
}

float activate(float x, ACTIVATION a)
{
    switch(a){
        case LINEAR:
            return linear_activate(x);
        case LOGISTIC:
            return logistic_activate(x);
        case LOGGY:
            return loggy_activate(x);
        case RELU:
            return relu_activate(x);
        case ELU:
            return elu_activate(x);
        case RELIE:
            return relie_activate(x);
        case RAMP:
            return ramp_activate(x);
        case LEAKY:
            return leaky_activate(x);
        case TANH:
            return tanh_activate(x);
        case PLSE:
            return plse_activate(x);
        case STAIR:
            return stair_activate(x);
        case HARDTAN:
            return hardtan_activate(x);
        case LHTAN:
            return lhtan_activate(x);
    }
    return 0;
}

void activate_array(float *x, const int n, const ACTIVATION a)
{
    int i;
    for(i = 0; i < n; ++i){
        x[i] = activate(x[i], a);
    }
}

float gradient(float x, ACTIVATION a)
{
    switch(a){
        case LINEAR:
            return linear_gradient(x);
        case LOGISTIC:
            return logistic_gradient(x);
        case LOGGY:
            return loggy_gradient(x);
        case RELU:
            return relu_gradient(x);
        case ELU:
            return elu_gradient(x);
        case RELIE:
            return relie_gradient(x);
        case RAMP:
            return ramp_gradient(x);
        case LEAKY:
            return leaky_gradient(x);
        case TANH:
            return tanh_gradient(x);
        case PLSE:
            return plse_gradient(x);
        case STAIR:
            return stair_gradient(x);
        case HARDTAN:
            return hardtan_gradient(x);
        case LHTAN:
            return lhtan_gradient(x);
    }
    return 0;
}

void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta)
{
    int i;
    for(i = 0; i < n; ++i){
        delta[i] *= gradient(x[i], a);
    }
} 



================================================
FILE: lightnet/_darknet/activations.h
================================================
#ifndef ACTIVATIONS_H
#define ACTIVATIONS_H
#include "darknet.h"
#include "cuda.h"
#include "math.h"

ACTIVATION get_activation(char *s);

char *get_activation_string(ACTIVATION a);
float activate(float x, ACTIVATION a);
float gradient(float x, ACTIVATION a);
void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta);
void activate_array(float *x, const int n, const ACTIVATION a);
#ifdef GPU
void activate_array_gpu(float *x, int n, ACTIVATION a);
void gradient_array_gpu(float *x, int n, ACTIVATION a, float *delta);
#endif

static inline float stair_activate(float x)
{
    int n = floor(x);
    if (n%2 == 0) return floor(x/2.);
    else return (x - n) + floor(x/2.);
}
static inline float hardtan_activate(float x)
{
    if (x < -1) return -1;
    if (x > 1) return 1;
    return x;
}
static inline float linear_activate(float x){return x;}
static inline float logistic_activate(float x){return 1./(1. + exp(-x));}
static inline float loggy_activate(float x){return 2./(1. + exp(-x)) - 1;}
static inline float relu_activate(float x){return x*(x>0);}
static inline float elu_activate(float x){return (x >= 0)*x + (x < 0)*(exp(x)-1);}
static inline float relie_activate(float x){return (x>0) ? x : .01*x;}
static inline float ramp_activate(float x){return x*(x>0)+.1*x;}
static inline float leaky_activate(float x){return (x>0) ? x : .1*x;}
static inline float tanh_activate(float x){return (exp(2*x)-1)/(exp(2*x)+1);}
static inline float plse_activate(float x)
{
    if(x < -4) return .01 * (x + 4);
    if(x > 4)  return .01 * (x - 4) + 1;
    return .125*x + .5;
}

static inline float lhtan_activate(float x)
{
    if(x < 0) return .001*x;
    if(x > 1) return .001*(x-1) + 1;
    return x;
}
static inline float lhtan_gradient(float x)
{
    if(x > 0 && x < 1) return 1;
    return .001;
}

static inline float hardtan_gradient(float x)
{
    if (x > -1 && x < 1) return 1;
    return 0;
}
static inline float linear_gradient(float x){return 1;}
static inline float logistic_gradient(float x){return (1-x)*x;}
static inline float loggy_gradient(float x)
{
    float y = (x+1.)/2.;
    return 2*(1-y)*y;
}
static inline float stair_gradient(float x)
{
    if (floor(x) == x) return 0;
    return 1;
}
static inline float relu_gradient(float x){return (x>0);}
static inline float elu_gradient(float x){return (x >= 0) + (x < 0)*(x + 1);}
static inline float relie_gradient(float x){return (x>0) ? 1 : .01;}
static inline float ramp_gradient(float x){return (x>0)+.1;}
static inline float leaky_gradient(float x){return (x>0) ? 1 : .1;}
static inline float tanh_gradient(float x){return 1-x*x;}
static inline float plse_gradient(float x){return (x < 0 || x > 1) ? .01 : .125;}

#endif



================================================
FILE: lightnet/_darknet/avgpool_layer.c
================================================
#include "avgpool_layer.h"
#include "cuda.h"
#include <stdio.h>

avgpool_layer make_avgpool_layer(int batch, int w, int h, int c)
{
    fprintf(stderr, "avg                     %4d x%4d x%4d   ->  %4d\n",  w, h, c, c);
    avgpool_layer l = {0};
    l.type = AVGPOOL;
    l.batch = batch;
    l.h = h;
    l.w = w;
    l.c = c;
    l.out_w = 1;
    l.out_h = 1;
    l.out_c = c;
    l.outputs = l.out_c;
    l.inputs = h*w*c;
    int output_size = l.outputs * batch;
    l.output =  calloc(output_size, sizeof(float));
    l.delta =   calloc(output_size, sizeof(float));
    l.forward = forward_avgpool_layer;
    l.backward = backward_avgpool_layer;
    #ifdef GPU
    l.forward_gpu = forward_avgpool_layer_gpu;
    l.backward_gpu = backward_avgpool_layer_gpu;
    l.output_gpu  = cuda_make_array(l.output, output_size);
    l.delta_gpu   = cuda_make_array(l.delta, output_size);
    #endif
    return l;
}

void resize_avgpool_layer(avgpool_layer *l, int w, int h)
{
    l->w = w;
    l->h = h;
    l->inputs = h*w*l->c;
}

void forward_avgpool_layer(const avgpool_layer l, network net)
{
    int b,i,k;

    for(b = 0; b < l.batch; ++b){
        for(k = 0; k < l.c; ++k){
            int out_index = k + b*l.c;
            l.output[out_index] = 0;
            for(i = 0; i < l.h*l.w; ++i){
                int in_index = i + l.h*l.w*(k + b*l.c);
                l.output[out_index] += net.input[in_index];
            }
            l.output[out_index] /= l.h*l.w;
        }
    }
}

void backward_avgpool_layer(const avgpool_layer l, network net)
{
    int b,i,k;

    for(b = 0; b < l.batch; ++b){
        for(k = 0; k < l.c; ++k){
            int out_index = k + b*l.c;
            for(i = 0; i < l.h*l.w; ++i){
                int in_index = i + l.h*l.w*(k + b*l.c);
                net.delta[in_index] += l.delta[out_index] / (l.h*l.w);
            }
        }
    }
}



================================================
FILE: lightnet/_darknet/avgpool_layer.h
================================================
#ifndef AVGPOOL_LAYER_H
#define AVGPOOL_LAYER_H

#include "image.h"
#include "cuda.h"
#include "layer.h"
#include "network.h"

typedef layer avgpool_layer;

image get_avgpool_image(avgpool_layer l);
avgpool_layer make_avgpool_layer(int batch, int w, int h, int c);
void resize_avgpool_layer(avgpool_layer *l, int w, int h);
void forward_avgpool_layer(const avgpool_layer l, network net);
void backward_avgpool_layer(const avgpool_layer l, network net);

#ifdef GPU
void forward_avgpool_layer_gpu(avgpool_layer l, network net);
void backward_avgpool_layer_gpu(avgpool_layer l, network net);
#endif

#endif



================================================
FILE: lightnet/_darknet/avgpool_layer_kernels.cu
================================================
#include "cuda_runtime.h"
#include "curand.h"
#include "cublas_v2.h"

extern "C" {
#include "avgpool_layer.h"
#include "cuda.h"
}

__global__ void forward_avgpool_layer_kernel(int n, int w, int h, int c, float *input, float *output)
{
    int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if(id >= n) return;

    int k = id % c;
    id /= c;
    int b = id;

    int i;
    int out_index = (k + c*b);
    output[out_index] = 0;
    for(i = 0; i < w*h; ++i){
        int in_index = i + h*w*(k + b*c);
        output[out_index] += input[in_index];
    }
    output[out_index] /= w*h;
}

__global__ void backward_avgpool_layer_kernel(int n, int w, int h, int c, float *in_delta, float *out_delta)
{
    int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if(id >= n) return;

    int k = id % c;
    id /= c;
    int b = id;

    int i;
    int out_index = (k + c*b);
    for(i = 0; i < w*h; ++i){
        int in_index = i + h*w*(k + b*c);
        in_delta[in_index] += out_delta[out_index] / (w*h);
    }
}

extern "C" void forward_avgpool_layer_gpu(avgpool_layer layer, network net)
{
    size_t n = layer.c*layer.batch;

    forward_avgpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.w, layer.h, layer.c, net.input_gpu, layer.output_gpu);
    check_error(cudaPeekAtLastError());
}

extern "C" void backward_avgpool_layer_gpu(avgpool_layer layer, network net)
{
    size_t n = layer.c*layer.batch;

    backward_avgpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.w, layer.h, layer.c, net.delta_gpu, layer.delta_gpu);
    check_error(cudaPeekAtLastError());
}



================================================
FILE: lightnet/_darknet/batchnorm_layer.c
================================================
#include "convolutional_layer.h"
#include "batchnorm_layer.h"
#include "blas.h"
#include <stdio.h>

layer make_batchnorm_layer(int batch, int w, int h, int c)
{
    fprintf(stderr, "Batch Normalization Layer: %d x %d x %d image\n", w,h,c);
    layer l = {0};
    l.type = BATCHNORM;
    l.batch = batch;
    l.h = l.out_h = h;
    l.w = l.out_w = w;
    l.c = l.out_c = c;
    l.output = calloc(h * w * c * batch, sizeof(float));
    l.delta  = calloc(h * w * c * batch, sizeof(float));
    l.inputs = w*h*c;
    l.outputs = l.inputs;

    l.scales = calloc(c, sizeof(float));
    l.scale_updates = calloc(c, sizeof(float));
    l.biases = calloc(c, sizeof(float));
    l.bias_updates = calloc(c, sizeof(float));
    int i;
    for(i = 0; i < c; ++i){
        l.scales[i] = 1;
    }

    l.mean = calloc(c, sizeof(float));
    l.variance = calloc(c, sizeof(float));

    l.rolling_mean = calloc(c, sizeof(float));
    l.rolling_variance = calloc(c, sizeof(float));

    l.forward = forward_batchnorm_layer;
    l.backward = backward_batchnorm_layer;
#ifdef GPU
    l.forward_gpu = forward_batchnorm_layer_gpu;
    l.backward_gpu = backward_batchnorm_layer_gpu;

    l.output_gpu =  cuda_make_array(l.output, h * w * c * batch);
    l.delta_gpu =   cuda_make_array(l.delta, h * w * c * batch);

    l.biases_gpu = cuda_make_array(l.biases, c);
    l.bias_updates_gpu = cuda_make_array(l.bias_updates, c);

    l.scales_gpu = cuda_make_array(l.scales, c);
    l.scale_updates_gpu = cuda_make_array(l.scale_updates, c);

    l.mean_gpu = cuda_make_array(l.mean, c);
    l.variance_gpu = cuda_make_array(l.variance, c);

    l.rolling_mean_gpu = cuda_make_array(l.mean, c);
    l.rolling_variance_gpu = cuda_make_array(l.variance, c);

    l.mean_delta_gpu = cuda_make_array(l.mean, c);
    l.variance_delta_gpu = cuda_make_array(l.variance, c);

    l.x_gpu = cuda_make_array(l.output, l.batch*l.outputs);
    l.x_norm_gpu = cuda_make_array(l.output, l.batch*l.outputs);
    #ifdef CUDNN
    cudnnCreateTensorDescriptor(&l.normTensorDesc);
    cudnnCreateTensorDescriptor(&l.dstTensorDesc);
    cudnnSetTensor4dDescriptor(l.dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.out_c, l.out_h, l.out_w); 
    cudnnSetTensor4dDescriptor(l.normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l.out_c, 1, 1); 

    #endif
#endif
    return l;
}

void backward_scale_cpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates)
{
    int i,b,f;
    for(f = 0; f < n; ++f){
        float sum = 0;
        for(b = 0; b < batch; ++b){
            for(i = 0; i < size; ++i){
                int index = i + size*(f + n*b);
                sum += delta[index] * x_norm[index];
            }
        }
        scale_updates[f] += sum;
    }
}

void mean_delta_cpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta)
{

    int i,j,k;
    for(i = 0; i < filters; ++i){
        mean_delta[i] = 0;
        for (j = 0; j < batch; ++j) {
            for (k = 0; k < spatial; ++k) {
                int index = j*filters*spatial + i*spatial + k;
                mean_delta[i] += delta[index];
            }
        }
        mean_delta[i] *= (-1./sqrt(variance[i] + .00001f));
    }
}
void  variance_delta_cpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta)
{

    int i,j,k;
    for(i = 0; i < filters; ++i){
        variance_delta[i] = 0;
        for(j = 0; j < batch; ++j){
            for(k = 0; k < spatial; ++k){
                int index = j*filters*spatial + i*spatial + k;
                variance_delta[i] += delta[index]*(x[index] - mean[i]);
            }
        }
        variance_delta[i] *= -.5 * pow(variance[i] + .00001f, (float)(-3./2.));
    }
}
void normalize_delta_cpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta)
{
    int f, j, k;
    for(j = 0; j < batch; ++j){
        for(f = 0; f < filters; ++f){
            for(k = 0; k < spatial; ++k){
                int index = j*filters*spatial + f*spatial + k;
                delta[index] = delta[index] * 1./(sqrt(variance[f] + .00001f)) + variance_delta[f] * 2. * (x[index] - mean[f]) / (spatial * batch) + mean_delta[f]/(spatial*batch);
            }
        }
    }
}

void resize_batchnorm_layer(layer *layer, int w, int h)
{
    fprintf(stderr, "Not implemented\n");
}

void forward_batchnorm_layer(layer l, network net)
{
    if(l.type == BATCHNORM) copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1);
    copy_cpu(l.outputs*l.batch, l.output, 1, l.x, 1);
    if(net.train){
        mean_cpu(l.output, l.batch, l.out_c, l.out_h*l.out_w, l.mean);
        variance_cpu(l.output, l.mean, l.batch, l.out_c, l.out_h*l.out_w, l.variance);

        scal_cpu(l.out_c, .99, l.rolling_mean, 1);
        axpy_cpu(l.out_c, .01, l.mean, 1, l.rolling_mean, 1);
        scal_cpu(l.out_c, .99, l.rolling_variance, 1);
        axpy_cpu(l.out_c, .01, l.variance, 1, l.rolling_variance, 1);

        normalize_cpu(l.output, l.mean, l.variance, l.batch, l.out_c, l.out_h*l.out_w);   
        copy_cpu(l.outputs*l.batch, l.output, 1, l.x_norm, 1);
    } else {
        normalize_cpu(l.output, l.rolling_mean, l.rolling_variance, l.batch, l.out_c, l.out_h*l.out_w);
    }
    scale_bias(l.output, l.scales, l.batch, l.out_c, l.out_h*l.out_w);
    add_bias(l.output, l.biases, l.batch, l.out_c, l.out_h*l.out_w);
}

void backward_batchnorm_layer(layer l, network net)
{
    if(!net.train){
        l.mean = l.rolling_mean;
        l.variance = l.rolling_variance;
    }
    backward_bias(l.bias_updates, l.delta, l.batch, l.out_c, l.out_w*l.out_h);
    backward_scale_cpu(l.x_norm, l.delta, l.batch, l.out_c, l.out_w*l.out_h, l.scale_updates);

    scale_bias(l.delta, l.scales, l.batch, l.out_c, l.out_h*l.out_w);

    mean_delta_cpu(l.delta, l.variance, l.batch, l.out_c, l.out_w*l.out_h, l.mean_delta);
    variance_delta_cpu(l.x, l.delta, l.mean, l.variance, l.batch, l.out_c, l.out_w*l.out_h, l.variance_delta);
    normalize_delta_cpu(l.x, l.mean, l.variance, l.mean_delta, l.variance_delta, l.batch, l.out_c, l.out_w*l.out_h, l.delta);
    if(l.type == BATCHNORM) copy_cpu(l.outputs*l.batch, l.delta, 1, net.delta, 1);
}

#ifdef GPU

void pull_batchnorm_layer(layer l)
{
    cuda_pull_array(l.scales_gpu, l.scales, l.c);
    cuda_pull_array(l.rolling_mean_gpu, l.rolling_mean, l.c);
    cuda_pull_array(l.rolling_variance_gpu, l.rolling_variance, l.c);
}
void push_batchnorm_layer(layer l)
{
    cuda_push_array(l.scales_gpu, l.scales, l.c);
    cuda_push_array(l.rolling_mean_gpu, l.rolling_mean, l.c);
    cuda_push_array(l.rolling_variance_gpu, l.rolling_variance, l.c);
}

void forward_batchnorm_layer_gpu(layer l, network net)
{
    if(l.type == BATCHNORM) copy_gpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1);
    copy_gpu(l.outputs*l.batch, l.output_gpu, 1, l.x_gpu, 1);
    if (net.train) {
#ifdef CUDNN
        float one = 1;
        float zero = 0;
        cudnnBatchNormalizationForwardTraining(cudnn_handle(),
                CUDNN_BATCHNORM_SPATIAL,
                &one,
                &zero,
                l.dstTensorDesc,
                l.x_gpu,
                l.dstTensorDesc,
                l.output_gpu,
                l.normTensorDesc,
                l.scales_gpu,
                l.biases_gpu,
                .01,
                l.rolling_mean_gpu,
                l.rolling_variance_gpu,
                .00001,
                l.mean_gpu,
                l.variance_gpu);
#else
        fast_mean_gpu(l.output_gpu, l.batch, l.out_c, l.out_h*l.out_w, l.mean_gpu);
        fast_variance_gpu(l.output_gpu, l.mean_gpu, l.batch, l.out_c, l.out_h*l.out_w, l.variance_gpu);

        scal_gpu(l.out_c, .99, l.rolling_mean_gpu, 1);
        axpy_gpu(l.out_c, .01, l.mean_gpu, 1, l.rolling_mean_gpu, 1);
        scal_gpu(l.out_c, .99, l.rolling_variance_gpu, 1);
        axpy_gpu(l.out_c, .01, l.variance_gpu, 1, l.rolling_variance_gpu, 1);

        copy_gpu(l.outputs*l.batch, l.output_gpu, 1, l.x_gpu, 1);
        normalize_gpu(l.output_gpu, l.mean_gpu, l.variance_gpu, l.batch, l.out_c, l.out_h*l.out_w);
        copy_gpu(l.outputs*l.batch, l.output_gpu, 1, l.x_norm_gpu, 1);

        scale_bias_gpu(l.output_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w);
        add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.out_c, l.out_w*l.out_h);
#endif
    } else {
        normalize_gpu(l.output_gpu, l.rolling_mean_gpu, l.rolling_variance_gpu, l.batch, l.out_c, l.out_h*l.out_w);
        scale_bias_gpu(l.output_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w);
        add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.out_c, l.out_w*l.out_h);
    }

}

void backward_batchnorm_layer_gpu(layer l, network net)
{
    if(!net.train){
        l.mean_gpu = l.rolling_mean_gpu;
        l.variance_gpu = l.rolling_variance_gpu;
    }
#ifdef CUDNN
    float one = 1;
    float zero = 0;
    cudnnBatchNormalizationBackward(cudnn_handle(),
            CUDNN_BATCHNORM_SPATIAL,
            &one,
            &zero,
            &one,
            &one,
            l.dstTensorDesc,
            l.x_gpu,
            l.dstTensorDesc,
            l.delta_gpu,
            l.dstTensorDesc,
            l.x_norm_gpu,
            l.normTensorDesc,
            l.scales_gpu,
            l.scale_updates_gpu,
            l.bias_updates_gpu,
            .00001,
            l.mean_gpu,
            l.variance_gpu);
    copy_gpu(l.outputs*l.batch, l.x_norm_gpu, 1, l.delta_gpu, 1);
#else
    backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.out_c, l.out_w*l.out_h);
    backward_scale_gpu(l.x_norm_gpu, l.delta_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.scale_updates_gpu);

    scale_bias_gpu(l.delta_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w);

    fast_mean_delta_gpu(l.delta_gpu, l.variance_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.mean_delta_gpu);
    fast_variance_delta_gpu(l.x_gpu, l.delta_gpu, l.mean_gpu, l.variance_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.variance_delta_gpu);
    normalize_delta_gpu(l.x_gpu, l.mean_gpu, l.variance_gpu, l.mean_delta_gpu, l.variance_delta_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.delta_gpu);
#endif
    if(l.type == BATCHNORM) copy_gpu(l.outputs*l.batch, l.delta_gpu, 1, net.delta_gpu, 1);
}
#endif


================================================
FILE: lightnet/_darknet/batchnorm_layer.h
================================================
#ifndef BATCHNORM_LAYER_H
#define BATCHNORM_LAYER_H

#include "image.h"
#include "layer.h"
#include "network.h"

layer make_batchnorm_layer(int batch, int w, int h, int c);
void forward_batchnorm_layer(layer l, network net);
void backward_batchnorm_layer(layer l, network net);

#ifdef GPU
void forward_batchnorm_layer_gpu(layer l, network net);
void backward_batchnorm_layer_gpu(layer l, network net);
void pull_batchnorm_layer(layer l);
void push_batchnorm_layer(layer l);
#endif

#endif


================================================
FILE: lightnet/_darknet/blas.c
================================================
#include "blas.h"

#include <math.h>
#include <assert.h>
#include <float.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void reorg_cpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out)
{
    int b,i,j,k;
    int out_c = c/(stride*stride);

    for(b = 0; b < batch; ++b){
        for(k = 0; k < c; ++k){
            for(j = 0; j < h; ++j){
                for(i = 0; i < w; ++i){
                    int in_index  = i + w*(j + h*(k + c*b));
                    int c2 = k % out_c;
                    int offset = k / out_c;
                    int w2 = i*stride + offset % stride;
                    int h2 = j*stride + offset / stride;
                    int out_index = w2 + w*stride*(h2 + h*stride*(c2 + out_c*b));
                    if(forward) out[out_index] = x[in_index];
                    else out[in_index] = x[out_index];
                }
            }
        }
    }
}

void flatten(float *x, int size, int layers, int batch, int forward)
{
    float *swap = calloc(size*layers*batch, sizeof(float));
    int i,c,b;
    for(b = 0; b < batch; ++b){
        for(c = 0; c < layers; ++c){
            for(i = 0; i < size; ++i){
                int i1 = b*layers*size + c*size + i;
                int i2 = b*layers*size + i*layers + c;
                if (forward) swap[i2] = x[i1];
                else swap[i1] = x[i2];
            }
        }
    }
    memcpy(x, swap, size*layers*batch*sizeof(float));
    free(swap);
}

void weighted_sum_cpu(float *a, float *b, float *s, int n, float *c)
{
    int i;
    for(i = 0; i < n; ++i){
        c[i] = s[i]*a[i] + (1-s[i])*(b ? b[i] : 0);
    }
}

void weighted_delta_cpu(float *a, float *b, float *s, float *da, float *db, float *ds, int n, float *dc)
{
    int i;
    for(i = 0; i < n; ++i){
        if(da) da[i] += dc[i] * s[i];
        if(db) db[i] += dc[i] * (1-s[i]);
        ds[i] += dc[i] * (a[i] - b[i]);
    }
}

void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out)
{
    int stride = w1/w2;
    int sample = w2/w1;
    assert(stride == h1/h2);
    assert(sample == h2/h1);
    if(stride < 1) stride = 1;
    if(sample < 1) sample = 1;
    int minw = (w1 < w2) ? w1 : w2;
    int minh = (h1 < h2) ? h1 : h2;
    int minc = (c1 < c2) ? c1 : c2;

    int i,j,k,b;
    for(b = 0; b < batch; ++b){
        for(k = 0; k < minc; ++k){
            for(j = 0; j < minh; ++j){
                for(i = 0; i < minw; ++i){
                    int out_index = i*sample + w2*(j*sample + h2*(k + c2*b));
                    int add_index = i*stride + w1*(j*stride + h1*(k + c1*b));
                    out[out_index] += add[add_index];
                }
            }
        }
    }
}

void mean_cpu(float *x, int batch, int filters, int spatial, float *mean)
{
    float scale = 1./(batch * spatial);
    int i,j,k;
    for(i = 0; i < filters; ++i){
        mean[i] = 0;
        for(j = 0; j < batch; ++j){
            for(k = 0; k < spatial; ++k){
                int index = j*filters*spatial + i*spatial + k;
                mean[i] += x[index];
            }
        }
        mean[i] *= scale;
    }
}

void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, float *variance)
{
    float scale = 1./(batch * spatial - 1);
    int i,j,k;
    for(i = 0; i < filters; ++i){
        variance[i] = 0;
        for(j = 0; j < batch; ++j){
            for(k = 0; k < spatial; ++k){
                int index = j*filters*spatial + i*spatial + k;
                variance[i] += pow((x[index] - mean[i]), 2);
            }
        }
        variance[i] *= scale;
    }
}

void normalize_cpu(float *x, float *mean, float *variance, int batch, int filters, int spatial)
{
    int b, f, i;
    for(b = 0; b < batch; ++b){
        for(f = 0; f < filters; ++f){
            for(i = 0; i < spatial; ++i){
                int index = b*filters*spatial + f*spatial + i;
                x[index] = (x[index] - mean[f])/(sqrt(variance[f]) + .000001f);
            }
        }
    }
}

void const_cpu(int N, float ALPHA, float *X, int INCX)
{
    int i;
    for(i = 0; i < N; ++i) X[i*INCX] = ALPHA;
}

void mul_cpu(int N, float *X, int INCX, float *Y, int INCY)
{
    int i;
    for(i = 0; i < N; ++i) Y[i*INCY] *= X[i*INCX];
}

void pow_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
{
    int i;
    for(i = 0; i < N; ++i) Y[i*INCY] = pow(X[i*INCX], ALPHA);
}

void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
{
    int i;
    for(i = 0; i < N; ++i) Y[i*INCY] += ALPHA*X[i*INCX];
}

void scal_cpu(int N, float ALPHA, float *X, int INCX)
{
    int i;
    for(i = 0; i < N; ++i) X[i*INCX] *= ALPHA;
}

void fill_cpu(int N, float ALPHA, float *X, int INCX)
{
    int i;
    for(i = 0; i < N; ++i) X[i*INCX] = ALPHA;
}

void deinter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT)
{
    int i, j;
    int index = 0;
    for(j = 0; j < B; ++j) {
        for(i = 0; i < NX; ++i){
            if(X) X[j*NX + i] += OUT[index];
            ++index;
        }
        for(i = 0; i < NY; ++i){
            if(Y) Y[j*NY + i] += OUT[index];
            ++index;
        }
    }
}

void inter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT)
{
    int i, j;
    int index = 0;
    for(j = 0; j < B; ++j) {
        for(i = 0; i < NX; ++i){
            OUT[index++] = X[j*NX + i];
        }
        for(i = 0; i < NY; ++i){
            OUT[index++] = Y[j*NY + i];
        }
    }
}

void copy_cpu(int N, float *X, int INCX, float *Y, int INCY)
{
    int i;
    for(i = 0; i < N; ++i) Y[i*INCY] = X[i*INCX];
}

void mult_add_into_cpu(int N, float *X, float *Y, float *Z)
{
    int i;
    for(i = 0; i < N; ++i) Z[i] += X[i]*Y[i];
}

void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error)
{
    int i;
    for(i = 0; i < n; ++i){
        float diff = truth[i] - pred[i];
        float abs_val = fabs(diff);
        if(abs_val < 1) {
            error[i] = diff * diff;
            delta[i] = diff;
        }
        else {
            error[i] = 2*abs_val - 1;
            delta[i] = (diff < 0) ? 1 : -1;
        }
    }
}

void l1_cpu(int n, float *pred, float *truth, float *delta, float *error)
{
    int i;
    for(i = 0; i < n; ++i){
        float diff = truth[i] - pred[i];
        error[i] = fabs(diff);
        delta[i] = diff > 0 ? 1 : -1;
    }
}

void l2_cpu(int n, float *pred, float *truth, float *delta, float *error)
{
    int i;
    for(i = 0; i < n; ++i){
        float diff = truth[i] - pred[i];
        error[i] = diff * diff;
        delta[i] = diff;
    }
}

float dot_cpu(int N, float *X, int INCX, float *Y, int INCY)
{
    int i;
    float dot = 0;
    for(i = 0; i < N; ++i) dot += X[i*INCX] * Y[i*INCY];
    return dot;
}

void softmax(float *input, int n, float temp, int stride, float *output)
{
    int i;
    float sum = 0;
    float largest = -FLT_MAX;
    for(i = 0; i < n; ++i){
        if(input[i*stride] > largest) largest = input[i*stride];
    }
    for(i = 0; i < n; ++i){
        float e = exp(input[i*stride]/temp - largest/temp);
        sum += e;
        output[i*stride] = e;
    }
    for(i = 0; i < n; ++i){
        output[i*stride] /= sum;
    }
}


void softmax_cpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output)
{
    int g, b;
    for(b = 0; b < batch; ++b){
        for(g = 0; g < groups; ++g){
            softmax(input + b*batch_offset + g*group_offset, n, temp, stride, output + b*batch_offset + g*group_offset);
        }
    }
}



================================================
FILE: lightnet/_darknet/blas.h
================================================
#ifndef BLAS_H
#define BLAS_H
#include "darknet.h"

void flatten(float *x, int size, int layers, int batch, int forward);
void pm(int M, int N, float *A);
float *random_matrix(int rows, int cols);
void time_random_matrix(int TA, int TB, int m, int k, int n);
void reorg_cpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out);

void test_blas();

void inter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT);
void deinter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT);
void mult_add_into_cpu(int N, float *X, float *Y, float *Z);

void const_cpu(int N, float ALPHA, float *X, int INCX);
void constrain_gpu(int N, float ALPHA, float * X, int INCX);
void pow_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY);
void mul_cpu(int N, float *X, int INCX, float *Y, int INCY);

void fill_cpu(int N, float ALPHA, float * X, int INCX);
float dot_cpu(int N, float *X, int INCX, float *Y, int INCY);
int test_gpu_blas();
void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out);

void mean_cpu(float *x, int batch, int filters, int spatial, float *mean);
void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, float *variance);

void scale_bias(float *output, float *scales, int batch, int n, int size);
void backward_scale_cpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates);
void mean_delta_cpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta);
void  variance_delta_cpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta);
void normalize_delta_cpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta);

void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error);
void l2_cpu(int n, float *pred, float *truth, float *delta, float *error);
void l1_cpu(int n, float *pred, float *truth, float *delta, float *error);
void weighted_sum_cpu(float *a, float *b, float *s, int num, float *c);
void weighted_delta_cpu(float *a, float *b, float *s, float *da, float *db, float *ds, int n, float *dc);

void softmax(float *input, int n, float temp, int stride, float *output);
void softmax_cpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output);

#ifdef GPU
#include "cuda.h"
#include "tree.h"

void axpy_gpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY);
void axpy_gpu_offset(int N, float ALPHA, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY);
void copy_gpu(int N, float * X, int INCX, float * Y, int INCY);
void copy_gpu_offset(int N, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY);
void add_gpu(int N, float ALPHA, float * X, int INCX);
void supp_gpu(int N, float ALPHA, float * X, int INCX);
void mask_gpu(int N, float * X, float mask_num, float * mask);
void scale_mask_gpu(int N, float * X, float mask_num, float * mask, float scale);
void const_gpu(int N, float ALPHA, float *X, int INCX);
void pow_gpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY);
void mul_gpu(int N, float *X, int INCX, float *Y, int INCY);

void mean_gpu(float *x, int batch, int filters, int spatial, float *mean);
void variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance);
void normalize_gpu(float *x, float *mean, float *variance, int batch, int filters, int spatial);

void normalize_delta_gpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta);

void fast_mean_delta_gpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta);
void fast_variance_delta_gpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta);

void fast_variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance);
void fast_mean_gpu(float *x, int batch, int filters, int spatial, float *mean);
void shortcut_gpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out);
void scale_bias_gpu(float *output, float *biases, int batch, int n, int size);
void backward_scale_gpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates);
void scale_bias_gpu(float *output, float *biases, int batch, int n, int size);
void add_bias_gpu(float *output, float *biases, int batch, int n, int size);
void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size);

void smooth_l1_gpu(int n, float *pred, float *truth, float *delta, float *error);
void l2_gpu(int n, float *pred, float *truth, float *delta, float *error);
void l1_gpu(int n, float *pred, float *truth, float *delta, float *error);
void weighted_delta_gpu(float *a, float *b, float *s, float *da, float *db, float *ds, int num, float *dc);
void weighted_sum_gpu(float *a, float *b, float *s, int num, float *c);
void mult_add_into_gpu(int num, float *a, float *b, float *c);
void inter_gpu(int NX, float *X, int NY, float *Y, int B, float *OUT);
void deinter_gpu(int NX, float *X, int NY, float *Y, int B, float *OUT);

void reorg_gpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out);

void softmax_gpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output);
void adam_update_gpu(float *w, float *d, float *m, float *v, float B1, float B2, float eps, float decay, float rate, int n, int batch, int t);
void adam_gpu(int n, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t);

void flatten_gpu(float *x, int spatial, int layers, int batch, int forward, float *out);
void softmax_tree(float *input, int spatial, int batch, int stride, float temp, float *output, tree hier);

#endif
#endif


================================================
FILE: lightnet/_darknet/blas_kernels.cu
================================================
#include "cuda_runtime.h"
#include "curand.h"
#include "cublas_v2.h"
#include <assert.h>

extern "C" {
#include "blas.h"
#include "cuda.h"
#include "utils.h"
}

__global__ void scale_bias_kernel(float *output, float *biases, int n, int size)
{
    int offset = blockIdx.x * blockDim.x + threadIdx.x;
    int filter = blockIdx.y;
    int batch = blockIdx.z;

    if(offset < size) output[(batch*n+filter)*size + offset] *= biases[filter];
}

void scale_bias_gpu(float *output, float *biases, int batch, int n, int size)
{
    dim3 dimGrid((size-1)/BLOCK + 1, n, batch);
    dim3 dimBlock(BLOCK, 1, 1);

    scale_bias_kernel<<<dimGrid, dimBlock>>>(output, biases, n, size);
    check_error(cudaPeekAtLastError());
}

__global__ void backward_scale_kernel(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates)
{
    __shared__ float part[BLOCK];
    int i,b;
    int filter = blockIdx.x;
    int p = threadIdx.x;
    float sum = 0;
    for(b = 0; b < batch; ++b){
        for(i = 0; i < size; i += BLOCK){
            int index = p + i + size*(filter + n*b);
            sum += (p+i < size) ? delta[index]*x_norm[index] : 0;
        }
    }
    part[p] = sum;
    __syncthreads();
    if (p == 0) {
        for(i = 0; i < BLOCK; ++i) scale_updates[filter] += part[i];
    }
}

void backward_scale_gpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates)
{
    backward_scale_kernel<<<n, BLOCK>>>(x_norm, delta, batch, n, size, scale_updates);
    check_error(cudaPeekAtLastError());
}

__global__ void add_bias_kernel(float *output, float *biases, int batch, int n, int size)
{
    int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if (index >= n*size*batch) return;
    int i = index % size;
    index /= size;
    int j = index % n;
    index /= n;
    int k = index;

    output[(k*n+j)*size + i] += biases[j];
}

void add_bias_gpu(float *output, float *biases, int batch, int n, int size)
{
    int num = n*size*batch;

    add_bias_kernel<<<cuda_gridsize(num), BLOCK>>>(output, biases, batch, n, size);
    check_error(cudaPeekAtLastError());
}

__global__ void backward_bias_conn_kernel(float *bias_updates, float *delta, int batch, int n)
{
    int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if (index >= n) return;
    int b;
    float sum = 0;
    for(b = 0; b < batch; ++b){
        int i = b*n + index;
        sum += delta[i];
    }
    bias_updates[index] += sum;
}

__global__ void backward_bias_kernel(float *bias_updates, float *delta, int batch, int n, int size)
{
    __shared__ float part[BLOCK];
    int i,b;
    int filter = blockIdx.x;
    int p = threadIdx.x;
    float sum = 0;
    for(b = 0; b < batch; ++b){
        for(i = 0; i < size; i += BLOCK){
            int index = p + i + size*(filter + n*b);
            sum += (p+i < size) ? delta[index] : 0;
        }
    }
    part[p] = sum;
    __syncthreads();
    if (p == 0) {
        for(i = 0; i < BLOCK; ++i) bias_updates[filter] += part[i];
    }
}

void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size)
{
    if(size == 1){
        backward_bias_conn_kernel<<<cuda_gridsize(n), BLOCK>>>(bias_updates, delta, batch, n);
    }else{
        backward_bias_kernel<<<n, BLOCK>>>(bias_updates, delta, batch, n, size);
    }
    check_error(cudaPeekAtLastError());
}

/*
__global__ void dot_kernel(float *output, float scale, int batch, int n, int size, float *delta)
{
    int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    int f1 = index / n;
    int f2 = index % n;
    if (f2 <= f1) return;
    
    float sum = 0;
    float norm1 = 0;
    float norm2 = 0;
    int b, i;
    for(b = 0; b <  batch; ++b){
        for(i = 0; i < size; ++i){
            int i1 = b * size * n + f1 * size + i;
            int i2 = b * size * n + f2 * size + i;
            sum += output[i1] * output[i2];
            norm1 += output[i1] * output[i1];
            norm2 += output[i2] * output[i2];
        }
    }
    norm1 = sqrt(norm1);
    norm2 = sqrt(norm2);
    float norm = norm1 * norm2;
    sum = sum / norm;
    for(b = 0; b <  batch; ++b){
        for(i = 0; i < size; ++i){
            int i1 = b * size * n + f1 * size + i;
            int i2 = b * size * n + f2 * size + i;
            delta[i1] += - scale * sum * output[i2] / norm;
            delta[i2] += - scale * sum * output[i1] / norm;
        }
    }
}

void dot_error_gpu(layer l)
{
    dot_kernel<<<cuda_gridsize(l.n*l.n), BLOCK>>>(l.output_gpu, l.dot, l.batch, l.n, l.out_w * l.out_h, l.delta_gpu);
    check_error(cudaPeekAtLastError());
}
*/


__global__ void adam_kernel(int N, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t)
{
    int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if (index >= N) return;
    
    x[index] = x[index] + (rate * sqrtf(1.f-powf(B2, t)) / (1.f-powf(B1, t)) * m[index] / (sqrtf(v[index]) + eps));
}

extern "C" void adam_gpu(int n, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t)
{
    adam_kernel<<<cuda_gridsize(n), BLOCK>>>(n, x, m, v, B1, B2, rate, eps, t);
    check_error(cudaPeekAtLastError());
}

extern "C" void adam_update_gpu(float *w, float *d, float *m, float *v, float B1, float B2, float eps, float decay, float rate, int n, int batch, int t)
{
    scal_gpu(n, B1, m, 1);
    scal_gpu(n, B2, v, 1);
    axpy_gpu(n, -decay*batch, w, 1, d, 1);

    axpy_gpu(n, (1-B1), d, 1, m, 1);
    mul_gpu(n, d, 1, d, 1);
    axpy_gpu(n, (1-B2), d, 1, v, 1);

    adam_gpu(n, w, m, v, B1, B2, rate, eps, t);
    fill_gpu(n, 0, d, 1);
}

__global__ void normalize_kernel(int N, float *x, float *mean, float *variance, int batch, int filters, int spatial)
{
    int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if (index >= N) return;
    int f = (index/spatial)%filters;
    
    x[index] = (x[index] - mean[f])/(sqrtf(variance[f] + .00001f));
}

__global__ void normalize_delta_kernel(int N, float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta)
{
    int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if (index >= N) return;
    int f = (index/spatial)%filters;
    
    delta[index] = delta[index] * 1.f/(sqrtf(variance[f] + .00001f)) + variance_delta[f] * 2.f * (x[index] - mean[f]) / (spatial * batch) + mean_delta[f]/(spatial*batch);
}

extern "C" void normalize_delta_gpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta)
{
    size_t N = batch*filters*spatial;
    normalize_delta_kernel<<<cuda_gridsize(N), BLOCK>>>(N, x, mean, variance, mean_delta, variance_delta, batch, filters, spatial, delta);
    check_error(cudaPeekAtLastError());
}

__global__ void  variance_delta_kernel(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta)
{
    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if (i >= filters) return;
    int j,k;
    variance_delta[i] = 0;
    for(j = 0; j < batch; ++j){
        for(k = 0; k < spatial; ++k){
            int index = j*filters*spatial + i*spatial + k;
            variance_delta[i] += delta[index]*(x[index] - mean[i]);
        }
    }
    variance_delta[i] *= -.5f * powf(variance[i] + .00001f, (float)(-3.f/2.f));
}

__global__ void accumulate_kernel(float *x, int n, int groups, float *sum)
{
    int k;
    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if (i >= groups) return;
    sum[i] = 0;
    for(k = 0; k < n; ++k){
        sum[i] += x[k*groups + i];
    }
}

__global__ void fast_mean_delta_kernel(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta)
{
    const int threads = BLOCK;
    __shared__ float local[threads];

    int id = threadIdx.x;
    local[id] = 0;

    int filter = blockIdx.x;

    int i, j;
    for(j = 0; j < batch; ++j){
        for(i = 0; i < spatial; i += threads){
            int index = j*spatial*filters + filter*spatial + i + id;
            local[id] += (i+id < spatial) ? delta[index] : 0;
        }
    }

    __syncthreads();

    if(id == 0){
        mean_delta[filter] = 0;
        for(i = 0; i < threads; ++i){
            mean_delta[filter] += local[i];
        }
        mean_delta[filter] *= (-1.f/sqrtf(variance[filter] + .00001f));
    }
}

__global__ void  fast_variance_delta_kernel(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta)
{
    const int threads = BLOCK;
    __shared__ float local[threads];

    int id = threadIdx.x;
    local[id] = 0;

    int filter = blockIdx.x;

    int i, j;
    for(j = 0; j < batch; ++j){
        for(i = 0; i < spatial; i += threads){
            int index = j*spatial*filters + filter*spatial + i + id;

            local[id] += (i+id < spatial) ? delta[index]*(x[index] - mean[filter]) : 0;
        }
    }

    __syncthreads();

    if(id == 0){
        variance_delta[filter] = 0;
        for(i = 0; i < threads; ++i){
            variance_delta[filter] += local[i];
        }
        variance_delta[filter] *= -.5f * powf(variance[filter] + .00001f, (float)(-3.f/2.f));
    }
}


__global__ void mean_delta_kernel(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta)
{
    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if (i >= filters) return;
    int j,k;
    mean_delta[i] = 0;
    for (j = 0; j < batch; ++j) {
        for (k = 0; k < spatial; ++k) {
            int index = j*filters*spatial + i*spatial + k;
            mean_delta[i] += delta[index];
        }
    }
    mean_delta[i] *= (-1.f/sqrtf(variance[i] + .00001f));
}

extern "C" void mean_delta_gpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta)
{
    mean_delta_kernel<<<cuda_gridsize(filters), BLOCK>>>(delta, variance, batch, filters, spatial, mean_delta);
    check_error(cudaPeekAtLastError());
}

extern "C" void fast_mean_delta_gpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta)
{
    fast_mean_delta_kernel<<<filters, BLOCK>>>(delta, variance, batch, filters, spatial, mean_delta);
    check_error(cudaPeekAtLastError());
}

extern "C" void fast_variance_delta_gpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta)
{
    fast_variance_delta_kernel<<<filters, BLOCK>>>(x, delta, mean, variance, batch, filters, spatial, variance_delta);
    check_error(cudaPeekAtLastError());
}

__global__ void  mean_kernel(float *x, int batch, int filters, int spatial, float *mean)
{
    float scale = 1.f/(batch * spatial);
    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if (i >= filters) return;
    int j,k;
    mean[i] = 0;
    for(j = 0; j < batch; ++j){
        for(k = 0; k < spatial; ++k){
            int index = j*filters*spatial + i*spatial + k;
            mean[i] += x[index];
        }
    }
    mean[i] *= scale;
}

__global__ void variance_kernel(float *x, float *mean, int batch, int filters, int spatial, float *variance)
{
    float scale = 1.f/(batch * spatial - 1);
    int j,k;
    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if (i >= filters) return;
    variance[i] = 0;
    for(j = 0; j < batch; ++j){
        for(k = 0; k < spatial; ++k){
            int index = j*filters*spatial + i*spatial + k;
            variance[i] += powf((x[index] - mean[i]), 2);
        }
    }
    variance[i] *= scale;
}

__global__ void reorg_kernel(int N, float *x, int w, int h, int c, int batch, int stride, int forward, float *out)
{
    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if(i >= N) return;
    int in_index = i;
    int in_w = i%w;
    i = i/w;
    int in_h = i%h;
    i = i/h;
    int in_c = i%c;
    i = i/c;
    int b = i%batch;

    int out_c = c/(stride*stride);

    int c2 = in_c % out_c;
    int offset = in_c / out_c;
    int w2 = in_w*stride + offset % stride;
    int h2 = in_h*stride + offset / stride;
    //printf("%d\n", offset);
    int out_index = w2 + w*stride*(h2 + h*stride*(c2 + out_c*b));

   // printf("%d %d %d\n", w2, h2, c2);
    //printf("%d %d\n", in_index, out_index);
    //if(out_index >= N || out_index < 0) printf("bad bad bad \n");

    if(forward) out[out_index] = x[in_index];
    else out[in_index] = x[out_index];
    //if(forward) out[1] = x[1];
    //else out[0] = x[0];
}

__global__ void axpy_kernel(int N, float ALPHA, float *X, int OFFX, int INCX,  float *Y, int OFFY, int INCY)
{
    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if(i < N) Y[OFFY+i*INCY] += ALPHA*X[OFFX+i*INCX];
}

__global__ void pow_kernel(int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
{
    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if(i < N) Y[i*INCY] = pow(X[i*INCX], ALPHA);
}

__global__ void const_kernel(int N, float ALPHA, float *X, int INCX)
{
    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if(i < N) X[i*INCX] = ALPHA;
}

__global__ void constrain_kernel(int N, float ALPHA, float *X, int INCX)
{
    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if(i < N) X[i*INCX] = fminf(ALPHA, fmaxf(-ALPHA, X[i*INCX]));
}

__global__ void supp_kernel(int N, float ALPHA, float *X, int INCX)
{
    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if(i < N) {
        if((X[i*INCX] * X[i*INCX]) < (ALPHA * ALPHA)) X[i*INCX] = 0;
    }
}

__global__ void add_kernel(int N, float ALPHA, float *X, int INCX)
{
    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if(i < N) X[i*INCX] += ALPHA;
}

__global__ void scal_kernel(int N, float ALPHA, float *X, int INCX)
{
    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if(i < N) X[i*INCX] *= ALPHA;
}

__global__ void fill_kernel(int N, float ALPHA, float *X, int INCX)
{
    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if(i < N) X[i*INCX] = ALPHA;
}

__global__ void mask_kernel(int n,  float *x, float mask_num, float *mask)
{
    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if(i < n && mask[i] == mask_num) x[i] = mask_num;
}

__global__ void copy_kernel(int N,  float *X, int OFFX, int INCX, float *Y, int OFFY, int INCY)
{
    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if(i < N) Y[i*INCY + OFFY] = X[i*INCX + OFFX];
}

__global__ void mul_kernel(int N, float *X, int INCX, float *Y, int INCY)
{
    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if(i < N) Y[i*INCY] *= X[i*INCX];
}


extern "C" void normalize_gpu(float *x, float *mean, float *variance, int batch, int filters, int spatial)
{
    size_t N = batch*filters*spatial;
    normalize_kernel<<<cuda_gridsize(N), BLOCK>>>(N, x, mean, variance, batch, filters, spatial);
    check_error(cudaPeekAtLastError());
}

__global__ void  fast_mean_kernel(float *x, int batch, int filters, int spatial, float *mean)
{
    const int threads = BLOCK;
    __shared__ float local[threads];

    int id = threadIdx.x;
    local[id] = 0;

    int filter = blockIdx.x;

    int i, j;
    for(j = 0; j < batch; ++j){
        for(i = 0; i < spatial; i += threads){
            int index = j*spatial*filters + filter*spatial + i + id;
            local[id] += (i+id < spatial) ? x[index] : 0;
        }
    }

    __syncthreads();

    if(id == 0){
        mean[filter] = 0;
        for(i = 0; i < threads; ++i){
            mean[filter] += local[i];
        }
        mean[filter] /= spatial * batch;
    }
}

__global__ void  fast_variance_kernel(float *x, float *mean, int batch, int filters, int spatial, float *variance)
{
    const int threads = BLOCK;
    __shared__ float local[threads];

    int id = threadIdx.x;
    local[id] = 0;

    int filter = blockIdx.x;

    int i, j;
    for(j = 0; j < batch; ++j){
        for(i = 0; i < spatial; i += threads){
            int index = j*spatial*filters + filter*spatial + i + id;

            local[id] += (i+id < spatial) ? powf((x[index] - mean[filter]), 2) : 0;
        }
    }

    __syncthreads();

    if(id == 0){
        variance[filter] = 0;
        for(i = 0; i < threads; ++i){
            variance[filter] += local[i];
        }
        variance[filter] /= (spatial * batch - 1);
    }
}

extern "C" void fast_mean_gpu(float *x, int batch, int filters, int spatial, float *mean)
{
    fast_mean_kernel<<<filters, BLOCK>>>(x, batch, filters, spatial, mean);
    check_error(cudaPeekAtLastError());
}

extern "C" void fast_variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance)
{
    fast_variance_kernel<<<filters, BLOCK>>>(x, mean, batch, filters, spatial, variance);
    check_error(cudaPeekAtLastError());
}


extern "C" void mean_gpu(float *x, int batch, int filters, int spatial, float *mean)
{
    mean_kernel<<<cuda_gridsize(filters), BLOCK>>>(x, batch, filters, spatial, mean);
    check_error(cudaPeekAtLastError());
}

extern "C" void variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance)
{
    variance_kernel<<<cuda_gridsize(filters), BLOCK>>>(x, mean, batch, filters, spatial, variance);
    check_error(cudaPeekAtLastError());
}

extern "C" void axpy_gpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY)
{
    axpy_gpu_offset(N, ALPHA, X, 0, INCX, Y, 0, INCY);
}

extern "C" void pow_gpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY)
{
    pow_kernel<<<cuda_gridsize(N), BLOCK>>>(N, ALPHA, X, INCX, Y, INCY);
    check_error(cudaPeekAtLastError());
}

extern "C" void axpy_gpu_offset(int N, float ALPHA, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY)
{
    axpy_kernel<<<cuda_gridsize(N), BLOCK>>>(N, ALPHA, X, OFFX, INCX, Y, OFFY, INCY);
    check_error(cudaPeekAtLastError());
}

extern "C" void copy_gpu(int N, float * X, int INCX, float * Y, int INCY)
{
    copy_gpu_offset(N, X, 0, INCX, Y, 0, INCY);
}

extern "C" void mul_gpu(int N, float * X, int INCX, float * Y, int INCY)
{
    mul_kernel<<<cuda_gridsize(N), BLOCK>>>(N, X, INCX, Y, INCY);
    check_error(cudaPeekAtLastError());
}

extern "C" void copy_gpu_offset(int N, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY)
{
    copy_kernel<<<cuda_gridsize(N), BLOCK>>>(N, X, OFFX, INCX, Y, OFFY, INCY);
    check_error(cudaPeekAtLastError());
}

__global__ void flatten_kernel(int N, float *x, int spatial, int layers, int batch, int forward, float *out)
{
    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if(i >= N) return;
    int in_s = i%spatial;
    i = i/spatial;
    int in_c = i%layers;
    i = i/layers;
    int b = i;

    int i1 = b*layers*spatial + in_c*spatial + in_s;
    int i2 = b*layers*spatial + in_s*layers +  in_c;

    if (forward) out[i2] = x[i1];
    else out[i1] = x[i2];
}

extern "C" void flatten_gpu(float *x, int spatial, int layers, int batch, int forward, float *out)
{
    int size = spatial*batch*layers;
    flatten_kernel<<<cuda_gridsize(size), BLOCK>>>(size, x, spatial, layers, batch, forward, out);
    check_error(cudaPeekAtLastError());
}

extern "C" void reorg_gpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out)
{
    int size = w*h*c*batch;
    reorg_kernel<<<cuda_gridsize(size), BLOCK>>>(size, x, w, h, c, batch, stride, forward, out);
    check_error(cudaPeekAtLastError());
}

__global__ void scale_mask_kernel(int n,  float *x, float mask_num, float *mask, float scale)
{
    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if(i < n && mask[i] == mask_num) x[i] *= scale;
}

extern "C" void scale_mask_gpu(int N, float * X, float mask_num, float * mask, float scale)
{
    scale_mask_kernel<<<cuda_gridsize(N), BLOCK>>>(N, X, mask_num, mask, scale);
    check_error(cudaPeekAtLastError());
}

extern "C" void mask_gpu(int N, float * X, float mask_num, float * mask)
{
    mask_kernel<<<cuda_gridsize(N), BLOCK>>>(N, X, mask_num, mask);
    check_error(cudaPeekAtLastError());
}

extern "C" void const_gpu(int N, float ALPHA, float * X, int INCX)
{
    const_kernel<<<cuda_gridsize(N), BLOCK>>>(N, ALPHA, X, INCX);
    check_error(cudaPeekAtLastError());
}

extern "C" void constrain_gpu(int N, float ALPHA, float * X, int INCX)
{
    constrain_kernel<<<cuda_gridsize(N), BLOCK>>>(N, ALPHA, X, INCX);
    check_error(cudaPeekAtLastError());
}


extern "C" void add_gpu(int N, float ALPHA, float * X, int INCX)
{
    add_kernel<<<cuda_gridsize(N), BLOCK>>>(N, ALPHA, X, INCX);
    check_error(cudaPeekAtLastError());
}

extern "C" void scal_gpu(int N, float ALPHA, float * X, int INCX)
{
    scal_kernel<<<cuda_gridsize(N), BLOCK>>>(N, ALPHA, X, INCX);
    check_error(cudaPeekAtLastError());
}

extern "C" void supp_gpu(int N, float ALPHA, float * X, int INCX)
{
    supp_kernel<<<cuda_gridsize(N), BLOCK>>>(N, ALPHA, X, INCX);
    check_error(cudaPeekAtLastError());
}

extern "C" void fill_gpu(int N, float ALPHA, float * X, int INCX)
{
    fill_kernel<<<cuda_gridsize(N), BLOCK>>>(N, ALPHA, X, INCX);
    check_error(cudaPeekAtLastError());
}

__global__ void shortcut_kernel(int size, int minw, int minh, int minc, int stride, int sample, int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out)
{
    int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if (id >= size) return;
    int i = id % minw;
    id /= minw;
    int j = id % minh;
    id /= minh;
    int k = id % minc;
    id /= minc;
    int b = id % batch;

    int out_index = i*sample + w2*(j*sample + h2*(k + c2*b));
    int add_index = i*stride + w1*(j*stride + h1*(k + c1*b));
    out[out_index] += add[add_index];
}

extern "C" void shortcut_gpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out)
{
    int minw = (w1 < w2) ? w1 : w2;
    int minh = (h1 < h2) ? h1 : h2;
    int minc = (c1 < c2) ? c1 : c2;

    int stride = w1/w2;
    int sample = w2/w1;
    assert(stride == h1/h2);
    assert(sample == h2/h1);
    if(stride < 1) stride = 1;
    if(sample < 1) sample = 1;

    int size = batch * minw * minh * minc;
    shortcut_kernel<<<cuda_gridsize(size), BLOCK>>>(size, minw, minh, minc, stride, sample, batch, w1, h1, c1, add, w2, h2, c2, out);
    check_error(cudaPeekAtLastError());
}

__global__ void smooth_l1_kernel(int n, float *pred, float *truth, float *delta, float *error)
{
    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if(i < n){
        float diff = truth[i] - pred[i];
        float abs_val = fabsf(diff);
        if(abs_val < 1) {
            error[i] = diff * diff;
            delta[i] = diff;
        }
        else {
            error[i] = 2*abs_val - 1;
            delta[i] = (diff > 0) ? 1 : -1;
        }
    }
}

extern "C" void smooth_l1_gpu(int n, float *pred, float *truth, float *delta, float *error)
{
    smooth_l1_kernel<<<cuda_gridsize(n), BLOCK>>>(n, pred, truth, delta, error);
    check_error(cudaPeekAtLastError());
}

__global__ void l2_kernel(int n, float *pred, float *truth, float *delta, float *error)
{
    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if(i < n){
        float diff = truth[i] - pred[i];
        error[i] = diff * diff; //I know this is technically wrong, deal with it.
        delta[i] = diff;
    }
}

extern "C" void l2_gpu(int n, float *pred, float *truth, float *delta, float *error)
{
    l2_kernel<<<cuda_gridsize(n), BLOCK>>>(n, pred, truth, delta, error);
    check_error(cudaPeekAtLastError());
}

__global__ void l1_kernel(int n, float *pred, float *truth, float *delta, float *error)
{
    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if(i < n){
        float diff = truth[i] - pred[i];
        error[i] = abs(diff);
        delta[i] = (diff > 0) ? 1 : -1;
    }
}

extern "C" void l1_gpu(int n, float *pred, float *truth, float *delta, float *error)
{
    l1_kernel<<<cuda_gridsize(n), BLOCK>>>(n, pred, truth, delta, error);
    check_error(cudaPeekAtLastError());
}




__global__ void weighted_sum_kernel(int n, float *a, float *b, float *s, float *c)
{
    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if(i < n){
        c[i] = s[i]*a[i] + (1-s[i])*(b ? b[i] : 0);
    }
}

__global__ void deinter_kernel(int NX, float *X, int NY, float *Y, int B, float *OUT)
{
    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if(i < (NX+NY)*B){
        int b = i / (NX+NY);
        int j = i % (NX+NY);
        if (j < NX){
            if(X) X[b*NX + j] += OUT[i];
        } else {
            if(Y) Y[b*NY + j - NX] += OUT[i];
        }
    }
}

extern "C" void deinter_gpu(int NX, float *X, int NY, float *Y, int B, float *OUT)
{
    deinter_kernel<<<cuda_gridsize((NX+NY)*B), BLOCK>>>(NX, X, NY, Y, B, OUT);
    check_error(cudaPeekAtLastError());
}

__global__ void inter_kernel(int NX, float *X, int NY, float *Y, int B, float *OUT)
{
    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if(i < (NX+NY)*B){
        int b = i / (NX+NY);
        int j = i % (NX+NY);
        if (j < NX){
            OUT[i] = X[b*NX + j];
        } else {
            OUT[i] = Y[b*NY + j - NX];
        }
    }
}

extern "C" void inter_gpu(int NX, float *X, int NY, float *Y, int B, float *OUT)
{
    inter_kernel<<<cuda_gridsize((NX+NY)*B), BLOCK>>>(NX, X, NY, Y, B, OUT);
    check_error(cudaPeekAtLastError());
}

extern "C" void weighted_sum_gpu(float *a, float *b, float *s, int num, float *c)
{
    weighted_sum_kernel<<<cuda_gridsize(num), BLOCK>>>(num, a, b, s, c);
    check_error(cudaPeekAtLastError());
}

__global__ void weighted_delta_kernel(int n, float *a, float *b, float *s, float *da, float *db, float *ds, float *dc)
{
    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if(i < n){
        if(da) da[i] += dc[i] * s[i];
        if(db) db[i] += dc[i] * (1-s[i]);
        ds[i] += dc[i] * (a[i] - b[i]);
    }
}

extern "C" void weighted_delta_gpu(float *a, float *b, float *s, float *da, float *db, float *ds, int num, float *dc)
{
    weighted_delta_kernel<<<cuda_gridsize(num), BLOCK>>>(num, a, b, s, da, db, ds, dc);
    check_error(cudaPeekAtLastError());
}

__global__ void mult_add_into_kernel(int n, float *a, float *b, float *c)
{
    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if(i < n){
        c[i] += a[i]*b[i];
    }
}

extern "C" void mult_add_into_gpu(int num, float *a, float *b, float *c)
{
    mult_add_into_kernel<<<cuda_gridsize(num), BLOCK>>>(num, a, b, c);
    check_error(cudaPeekAtLastError());
}


__device__ void softmax_device(float *input, int n, float temp, int stride, float *output)
{
    int i;
    float sum = 0;
    float largest = -INFINITY;
    for(i = 0; i < n; ++i){
        int val = input[i*stride];
        largest = (val>largest) ? val : largest;
    }
    for(i = 0; i < n; ++i){
        float e = expf(input[i*stride]/temp - largest/temp);
        sum += e;
        output[i*stride] = e;
    }
    for(i = 0; i < n; ++i){
        output[i*stride] /= sum;
    }
}


__global__ void softmax_tree_kernel(float *input, int spatial, int batch, int stride, float temp, float *output, int groups, int *group_size, int *group_offset)
{
    int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if (id >= spatial*batch*groups) return;
    int s = id % spatial;
    id = id / spatial;
    int g = id % groups;
    int b = id / groups;
    int goff = group_offset[g]*spatial;
    int boff = b*stride;
    softmax_device(input + goff + boff + s, group_size[g], temp, spatial, output + goff + boff + s);
}

extern "C" void softmax_tree(float *input, int spatial, int batch, int stride, float temp, float *output, tree hier)
{
    int *tree_groups_size = cuda_make_int_array(hier.group_size, hier.groups);
    int *tree_groups_offset = cuda_make_int_array(hier.group_offset, hier.groups);
    /*
    static int *tree_groups_size = 0;
    static int *tree_groups_offset = 0;
    if(!tree_groups_size){
        tree_groups_size = cuda_make_int_array(hier.group_size, hier.groups);
        tree_groups_offset = cuda_make_int_array(hier.group_offset, hier.groups);
    }
    */
    int num = spatial*batch*hier.groups;
    softmax_tree_kernel<<<cuda_gridsize(num), BLOCK>>>(input, spatial, batch, stride, temp, output, hier.groups, tree_groups_size, tree_groups_offset);
    check_error(cudaPeekAtLastError());
    cuda_free((float *)tree_groups_size);
    cuda_free((float *)tree_groups_offset);
}

__global__ void softmax_kernel(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output)
{
    int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if (id >= batch*groups) return;
    int b = id / groups;
    int g = id % groups;
    softmax_device(input + b*batch_offset + g*group_offset, n, temp, stride, output + b*batch_offset + g*group_offset);
}

extern "C" void softmax_gpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output)
{
    softmax_kernel<<<cuda_gridsize(batch*groups), BLOCK>>>(input, n, batch, batch_offset, groups, group_offset, stride, temp, output);
    check_error(cudaPeekAtLastError());
}


================================================
FILE: lightnet/_darknet/box.c
================================================
#include "box.h"
#include <stdio.h>
#include <math.h>
#include <stdlib.h>

box float_to_box(float *f, int stride)
{
    box b;
    b.x = f[0];
    b.y = f[1*stride];
    b.w = f[2*stride];
    b.h = f[3*stride];
    return b;
}

dbox derivative(box a, box b)
{
    dbox d;
    d.dx = 0;
    d.dw = 0;
    float l1 = a.x - a.w/2;
    float l2 = b.x - b.w/2;
    if (l1 > l2){
        d.dx -= 1;
        d.dw += .5;
    }
    float r1 = a.x + a.w/2;
    float r2 = b.x + b.w/2;
    if(r1 < r2){
        d.dx += 1;
        d.dw += .5;
    }
    if (l1 > r2) {
        d.dx = -1;
        d.dw = 0;
    }
    if (r1 < l2){
        d.dx = 1;
        d.dw = 0;
    }

    d.dy = 0;
    d.dh = 0;
    float t1 = a.y - a.h/2;
    float t2 = b.y - b.h/2;
    if (t1 > t2){
        d.dy -= 1;
        d.dh += .5;
    }
    float b1 = a.y + a.h/2;
    float b2 = b.y + b.h/2;
    if(b1 < b2){
        d.dy += 1;
        d.dh += .5;
    }
    if (t1 > b2) {
        d.dy = -1;
        d.dh = 0;
    }
    if (b1 < t2){
        d.dy = 1;
        d.dh = 0;
    }
    return d;
}

float overlap(float x1, float w1, float x2, float w2)
{
    float l1 = x1 - w1/2;
    float l2 = x2 - w2/2;
    float left = l1 > l2 ? l1 : l2;
    float r1 = x1 + w1/2;
    float r2 = x2 + w2/2;
    float right = r1 < r2 ? r1 : r2;
    return right - left;
}

float box_intersection(box a, box b)
{
    float w = overlap(a.x, a.w, b.x, b.w);
    float h = overlap(a.y, a.h, b.y, b.h);
    if(w < 0 || h < 0) return 0;
    float area = w*h;
    return area;
}

float box_union(box a, box b)
{
    float i = box_intersection(a, b);
    float u = a.w*a.h + b.w*b.h - i;
    return u;
}

float box_iou(box a, box b)
{
    return box_intersection(a, b)/box_union(a, b);
}

float box_rmse(box a, box b)
{
    return sqrt(pow(a.x-b.x, 2) + 
                pow(a.y-b.y, 2) + 
                pow(a.w-b.w, 2) + 
                pow(a.h-b.h, 2));
}

dbox dintersect(box a, box b)
{
    float w = overlap(a.x, a.w, b.x, b.w);
    float h = overlap(a.y, a.h, b.y, b.h);
    dbox dover = derivative(a, b);
    dbox di;

    di.dw = dover.dw*h;
    di.dx = dover.dx*h;
    di.dh = dover.dh*w;
    di.dy = dover.dy*w;

    return di;
}

dbox dunion(box a, box b)
{
    dbox du;

    dbox di = dintersect(a, b);
    du.dw = a.h - di.dw;
    du.dh = a.w - di.dh;
    du.dx = -di.dx;
    du.dy = -di.dy;

    return du;
}


void test_dunion()
{
    box a = {0, 0, 1, 1};
    box dxa= {0+.0001, 0, 1, 1};
    box dya= {0, 0+.0001, 1, 1};
    box dwa= {0, 0, 1+.0001, 1};
    box dha= {0, 0, 1, 1+.0001};

    box b = {.5, .5, .2, .2};
    dbox di = dunion(a,b);
    printf("Union: %f %f %f %f\n", di.dx, di.dy, di.dw, di.dh);
    float inter =  box_union(a, b);
    float xinter = box_union(dxa, b);
    float yinter = box_union(dya, b);
    float winter = box_union(dwa, b);
    float hinter = box_union(dha, b);
    xinter = (xinter - inter)/(.0001);
    yinter = (yinter - inter)/(.0001);
    winter = (winter - inter)/(.0001);
    hinter = (hinter - inter)/(.0001);
    printf("Union Manual %f %f %f %f\n", xinter, yinter, winter, hinter);
}
void test_dintersect()
{
    box a = {0, 0, 1, 1};
    box dxa= {0+.0001, 0, 1, 1};
    box dya= {0, 0+.0001, 1, 1};
    box dwa= {0, 0, 1+.0001, 1};
    box dha= {0, 0, 1, 1+.0001};

    box b = {.5, .5, .2, .2};
    dbox di = dintersect(a,b);
    printf("Inter: %f %f %f %f\n", di.dx, di.dy, di.dw, di.dh);
    float inter =  box_intersection(a, b);
    float xinter = box_intersection(dxa, b);
    float yinter = box_intersection(dya, b);
    float winter = box_intersection(dwa, b);
    float hinter = box_intersection(dha, b);
    xinter = (xinter - inter)/(.0001);
    yinter = (yinter - inter)/(.0001);
    winter = (winter - inter)/(.0001);
    hinter = (hinter - inter)/(.0001);
    printf("Inter Manual %f %f %f %f\n", xinter, yinter, winter, hinter);
}

void test_box()
{
    test_dintersect();
    test_dunion();
    box a = {0, 0, 1, 1};
    box dxa= {0+.00001, 0, 1, 1};
    box dya= {0, 0+.00001, 1, 1};
    box dwa= {0, 0, 1+.00001, 1};
    box dha= {0, 0, 1, 1+.00001};

    box b = {.5, 0, .2, .2};

    float iou = box_iou(a,b);
    iou = (1-iou)*(1-iou);
    printf("%f\n", iou);
    dbox d = diou(a, b);
    printf("%f %f %f %f\n", d.dx, d.dy, d.dw, d.dh);

    float xiou = box_iou(dxa, b);
    float yiou = box_iou(dya, b);
    float wiou = box_iou(dwa, b);
    float hiou = box_iou(dha, b);
    xiou = ((1-xiou)*(1-xiou) - iou)/(.00001);
    yiou = ((1-yiou)*(1-yiou) - iou)/(.00001);
    wiou = ((1-wiou)*(1-wiou) - iou)/(.00001);
    hiou = ((1-hiou)*(1-hiou) - iou)/(.00001);
    printf("manual %f %f %f %f\n", xiou, yiou, wiou, hiou);
}

dbox diou(box a, box b)
{
    float u = box_union(a,b);
    float i = box_intersection(a,b);
    dbox di = dintersect(a,b);
    dbox du = dunion(a,b);
    dbox dd = {0,0,0,0};

    if(i <= 0 || 1) {
        dd.dx = b.x - a.x;
        dd.dy = b.y - a.y;
        dd.dw = b.w - a.w;
        dd.dh = b.h - a.h;
        return dd;
    }

    dd.dx = 2*pow((1-(i/u)),1)*(di.dx*u - du.dx*i)/(u*u);
    dd.dy = 2*pow((1-(i/u)),1)*(di.dy*u - du.dy*i)/(u*u);
    dd.dw = 2*pow((1-(i/u)),1)*(di.dw*u - du.dw*i)/(u*u);
    dd.dh = 2*pow((1-(i/u)),1)*(di.dh*u - du.dh*i)/(u*u);
    return dd;
}

typedef struct{
    int index;
    int class;
    float **probs;
} sortable_bbox;

int nms_comparator(const void *pa, const void *pb)
{
    sortable_bbox a = *(sortable_bbox *)pa;
    sortable_bbox b = *(sortable_bbox *)pb;
    float diff = a.probs[a.index][b.class] - b.probs[b.index][b.class];
    if(diff < 0) return 1;
    else if(diff > 0) return -1;
    return 0;
}

void do_nms_obj(box *boxes, float **probs, int total, int classes, float thresh)
{
    int i, j, k;
    sortable_bbox *s = calloc(total, sizeof(sortable_bbox));

    for(i = 0; i < total; ++i){
        s[i].index = i;       
        s[i].class = classes;
        s[i].probs = probs;
    }

    qsort(s, total, sizeof(sortable_bbox), nms_comparator);
    for(i = 0; i < total; ++i){
        if(probs[s[i].index][classes] == 0) continue;
        box a = boxes[s[i].index];
        for(j = i+1; j < total; ++j){
            box b = boxes[s[j].index];
            if (box_iou(a, b) > thresh){
                for(k = 0; k < classes+1; ++k){
                    probs[s[j].index][k] = 0;
                }
            }
        }
    }
    free(s);
}


void do_nms_sort(box *boxes, float **probs, int total, int classes, float thresh)
{
    int i, j, k;
    sortable_bbox *s = calloc(total, sizeof(sortable_bbox));

    for(i = 0; i < total; ++i){
        s[i].index = i;       
        s[i].class = 0;
        s[i].probs = probs;
    }

    for(k = 0; k < classes; ++k){
        for(i = 0; i < total; ++i){
            s[i].class = k;
        }
        qsort(s, total, sizeof(sortable_bbox), nms_comparator);
        for(i = 0; i < total; ++i){
            if(probs[s[i].index][k] == 0) continue;
            box a = boxes[s[i].index];
            for(j = i+1; j < total; ++j){
                box b = boxes[s[j].index];
                if (box_iou(a, b) > thresh){
                    probs[s[j].index][k] = 0;
                }
            }
        }
    }
    free(s);
}

void do_nms(box *boxes, float **probs, int total, int classes, float thresh)
{
    int i, j, k;
    for(i = 0; i < total; ++i){
        int any = 0;
        for(k = 0; k < classes; ++k) any = any || (probs[i][k] > 0);
        if(!any) {
            continue;
        }
        for(j = i+1; j < total; ++j){
            if (box_iou(boxes[i], boxes[j]) > thresh){
                for(k = 0; k < classes; ++k){
                    if (probs[i][k] < probs[j][k]) probs[i][k] = 0;
                    else probs[j][k] = 0;
                }
            }
        }
    }
}

box encode_box(box b, box anchor)
{
    box encode;
    encode.x = (b.x - anchor.x) / anchor.w;
    encode.y = (b.y - anchor.y) / anchor.h;
    encode.w = log2(b.w / anchor.w);
    encode.h = log2(b.h / anchor.h);
    return encode;
}

box decode_box(box b, box anchor)
{
    box decode;
    decode.x = b.x * anchor.w + anchor.x;
    decode.y = b.y * anchor.h + anchor.y;
    decode.w = pow(2., b.w) * anchor.w;
    decode.h = pow(2., b.h) * anchor.h;
    return decode;
}


================================================
FILE: lightnet/_darknet/box.h
================================================
#ifndef BOX_H
#define BOX_H
#include "darknet.h"

typedef struct{
    float dx, dy, dw, dh;
} dbox;

float box_rmse(box a, box b);
dbox diou(box a, box b);
box decode_box(box b, box anchor);
box encode_box(box b, box anchor);

#endif


================================================
FILE: lightnet/_darknet/classifier.h
================================================



================================================
FILE: lightnet/_darknet/col2im.c
================================================
#include <stdio.h>
#include <math.h>
void col2im_add_pixel(float *im, int height, int width, int channels,
                        int row, int col, int channel, int pad, float val)
{
    row -= pad;
    col -= pad;

    if (row < 0 || col < 0 ||
        row >= height || col >= width) return;
    im[col + width*(row + height*channel)] += val;
}
//This one might be too, can't remember.
void col2im_cpu(float* data_col,
         int channels,  int height,  int width,
         int ksize,  int stride, int pad, float* data_im) 
{
    int c,h,w;
    int height_col = (height + 2*pad - ksize) / stride + 1;
    int width_col = (width + 2*pad - ksize) / stride + 1;

    int channels_col = channels * ksize * ksize;
    for (c = 0; c < channels_col; ++c) {
        int w_offset = c % ksize;
        int h_offset = (c / ksize) % ksize;
        int c_im = c / ksize / ksize;
        for (h = 0; h < height_col; ++h) {
            for (w = 0; w < width_col; ++w) {
                int im_row = h_offset + h * stride;
                int im_col = w_offset + w * stride;
                int col_index = (c * height_col + h) * width_col + w;
                double val = data_col[col_index];
                col2im_add_pixel(data_im, height, width, channels,
                        im_row, im_col, c_im, pad, val);
            }
        }
    }
}



================================================
FILE: lightnet/_darknet/col2im.h
================================================
#ifndef COL2IM_H
#define COL2IM_H

void col2im_cpu(float* data_col,
        int channels, int height, int width,
        int ksize, int stride, int pad, float* data_im);

#ifdef GPU
void col2im_gpu(float *data_col,
        int channels, int height, int width,
        int ksize, int stride, int pad, float *data_im);
#endif
#endif


================================================
FILE: lightnet/_darknet/col2im_kernels.cu
================================================
#include "cuda_runtime.h"
#include "curand.h"
#include "cublas_v2.h"

extern "C" {
#include "col2im.h"
#include "cuda.h"
}

// src: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu
// You may also want to read: https://github.com/BVLC/caffe/blob/master/LICENSE

__global__ void col2im_gpu_kernel(const int n, const float* data_col,
        const int height, const int width, const int ksize,
        const int pad,
        const int stride,
        const int height_col, const int width_col,
        float *data_im) {
    int index = blockIdx.x*blockDim.x+threadIdx.x;
    for(; index < n; index += blockDim.x*gridDim.x){
        float val = 0;
        int w = index % width + pad;
        int h = (index / width) % height + pad;
        int c = index / (width * height);
        // compute the start and end of the output
        int w_col_start = (w < ksize) ? 0 : (w - ksize) / stride + 1;
        int w_col_end = min(w / stride + 1, width_col);
        int h_col_start = (h < ksize) ? 0 : (h - ksize) / stride + 1;
        int h_col_end = min(h / stride + 1, height_col);
        // equivalent implementation
        int offset =
            (c * ksize * ksize + h * ksize + w) * height_col * width_col;
        int coeff_h_col = (1 - stride * ksize * height_col) * width_col;
        int coeff_w_col = (1 - stride * height_col * width_col);
        for (int h_col = h_col_start; h_col < h_col_end; ++h_col) {
            for (int w_col = w_col_start; w_col < w_col_end; ++w_col) {
                val += data_col[offset + h_col * coeff_h_col + w_col * coeff_w_col];
            }
        }
        data_im[index] += val;
    }
}

void col2im_gpu(float *data_col,
        int channels, int height, int width,
        int ksize, int stride, int pad, float *data_im){
    // We are going to launch channels * height_col * width_col kernels, each
    // kernel responsible for copying a single-channel grid.
    int height_col = (height + 2 * pad - ksize) / stride + 1;
    int width_col = (width + 2 * pad - ksize) / stride + 1;
    int num_kernels = channels * height * width;
    col2im_gpu_kernel<<<(num_kernels+BLOCK-1)/BLOCK,
        BLOCK>>>(
                num_kernels, data_col, height, width, ksize, pad,
                stride, height_col,
                width_col, data_im);
}



================================================
FILE: lightnet/_darknet/connected_layer.c
================================================
#include "connected_layer.h"
#include "convolutional_layer.h"
#include "batchnorm_layer.h"
#include "utils.h"
#include "cuda.h"
#include "blas.h"
#include "gemm.h"

#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

layer make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation, int batch_normalize, int adam)
{
    int i;
    layer l = {0};
    l.learning_rate_scale = 1;
    l.type = CONNECTED;

    l.inputs = inputs;
    l.outputs = outputs;
    l.batch=batch;
    l.batch_normalize = batch_normalize;
    l.h = 1;
    l.w = 1;
    l.c = inputs;
    l.out_h = 1;
    l.out_w = 1;
    l.out_c = outputs;

    l.output = calloc(batch*outputs, sizeof(float));
    l.delta = calloc(batch*outputs, sizeof(float));

    l.weight_updates = calloc(inputs*outputs, sizeof(float));
    l.bias_updates = calloc(outputs, sizeof(float));

    l.weights = calloc(outputs*inputs, sizeof(float));
    l.biases = calloc(outputs, sizeof(float));

    l.forward = forward_connected_layer;
    l.backward = backward_connected_layer;
    l.update = update_connected_layer;

    //float scale = 1./sqrt(inputs);
    float scale = sqrt(2./inputs);
    for(i = 0; i < outputs*inputs; ++i){
        l.weights[i] = scale*rand_uniform(-1, 1);
    }

    for(i = 0; i < outputs; ++i){
        l.biases[i] = 0;
    }

    if(adam){
        l.m = calloc(l.inputs*l.outputs, sizeof(float));
        l.v = calloc(l.inputs*l.outputs, sizeof(float));
        l.bias_m = calloc(l.outputs, sizeof(float));
        l.scale_m = calloc(l.outputs, sizeof(float));
        l.bias_v = calloc(l.outputs, sizeof(float));
        l.scale_v = calloc(l.outputs, sizeof(float));
    }
    if(batch_normalize){
        l.scales = calloc(outputs, sizeof(float));
        l.scale_updates = calloc(outputs, sizeof(float));
        for(i = 0; i < outputs; ++i){
            l.scales[i] = 1;
        }

        l.mean = calloc(outputs, sizeof(float));
        l.mean_delta = calloc(outputs, sizeof(float));
        l.variance = calloc(outputs, sizeof(float));
        l.variance_delta = calloc(outputs, sizeof(float));

        l.rolling_mean = calloc(outputs, sizeof(float));
        l.rolling_variance = calloc(outputs, sizeof(float));

        l.x = calloc(batch*outputs, sizeof(float));
        l.x_norm = calloc(batch*outputs, sizeof(float));
    }

#ifdef GPU
    l.forward_gpu = forward_connected_layer_gpu;
    l.backward_gpu = backward_connected_layer_gpu;
    l.update_gpu = update_connected_layer_gpu;

    l.weights_gpu = cuda_make_array(l.weights, outputs*inputs);
    l.biases_gpu = cuda_make_array(l.biases, outputs);

    l.weight_updates_gpu = cuda_make_array(l.weight_updates, outputs*inputs);
    l.bias_updates_gpu = cuda_make_array(l.bias_updates, outputs);

    l.output_gpu = cuda_make_array(l.output, outputs*batch);
    l.delta_gpu = cuda_make_array(l.delta, outputs*batch);
    if (adam) {
        l.m_gpu =       cuda_make_array(0, inputs*outputs);
        l.v_gpu =       cuda_make_array(0, inputs*outputs);
        l.bias_m_gpu =  cuda_make_array(0, outputs);
        l.bias_v_gpu =  cuda_make_array(0, outputs);
        l.scale_m_gpu = cuda_make_array(0, outputs);
        l.scale_v_gpu = cuda_make_array(0, outputs);
    }

    if(batch_normalize){
        l.mean_gpu = cuda_make_array(l.mean, outputs);
        l.variance_gpu = cuda_make_array(l.variance, outputs);

        l.rolling_mean_gpu = cuda_make_array(l.mean, outputs);
        l.rolling_variance_gpu = cuda_make_array(l.variance, outputs);

        l.mean_delta_gpu = cuda_make_array(l.mean, outputs);
        l.variance_delta_gpu = cuda_make_array(l.variance, outputs);

        l.scales_gpu = cuda_make_array(l.scales, outputs);
        l.scale_updates_gpu = cuda_make_array(l.scale_updates, outputs);

        l.x_gpu = cuda_make_array(l.output, l.batch*outputs);
        l.x_norm_gpu = cuda_make_array(l.output, l.batch*outputs);
#ifdef CUDNN
        cudnnCreateTensorDescriptor(&l.normTensorDesc);
        cudnnCreateTensorDescriptor(&l.dstTensorDesc);
        cudnnSetTensor4dDescriptor(l.dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.out_c, l.out_h, l.out_w); 
        cudnnSetTensor4dDescriptor(l.normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l.out_c, 1, 1); 
#endif
    }
#endif
    l.activation = activation;
    fprintf(stderr, "connected                            %4d  ->  %4d\n", inputs, outputs);
    return l;
}

void update_connected_layer(layer l, update_args a)
{
    float learning_rate = a.learning_rate*l.learning_rate_scale;
    float momentum = a.momentum;
    float decay = a.decay;
    int batch = a.batch;
    axpy_cpu(l.outputs, learning_rate/batch, l.bias_updates, 1, l.biases, 1);
    scal_cpu(l.outputs, momentum, l.bias_updates, 1);

    if(l.batch_normalize){
        axpy_cpu(l.outputs, learning_rate/batch, l.scale_updates, 1, l.scales, 1);
        scal_cpu(l.outputs, momentum, l.scale_updates, 1);
    }

    axpy_cpu(l.inputs*l.outputs, -decay*batch, l.weights, 1, l.weight_updates, 1);
    axpy_cpu(l.inputs*l.outputs, learning_rate/batch, l.weight_updates, 1, l.weights, 1);
    scal_cpu(l.inputs*l.outputs, momentum, l.weight_updates, 1);
}

void forward_connected_layer(layer l, network net)
{
    fill_cpu(l.outputs*l.batch, 0, l.output, 1);
    int m = l.batch;
    int k = l.inputs;
    int n = l.outputs;
    float *a = net.input;
    float *b = l.weights;
    float *c = l.output;
    gemm(0,1,m,n,k,1,a,k,b,k,1,c,n);
    if(l.batch_normalize){
        forward_batchnorm_layer(l, net);
    } else {
        add_bias(l.output, l.biases, l.batch, l.outputs, 1);
    }
    activate_array(l.output, l.outputs*l.batch, l.activation);
}

void backward_connected_layer(layer l, network net)
{
    gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);

    if(l.batch_normalize){
        backward_batchnorm_layer(l, net);
    } else {
        backward_bias(l.bias_updates, l.delta, l.batch, l.outputs, 1);
    }

    int m = l.outputs;
    int k = l.batch;
    int n = l.inputs;
    float *a = l.delta;
    float *b = net.input;
    float *c = l.weight_updates;
    gemm(1,0,m,n,k,1,a,m,b,n,1,c,n);

    m = l.batch;
    k = l.outputs;
    n = l.inputs;

    a = l.delta;
    b = l.weights;
    c = net.delta;

    if(c) gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
}


void denormalize_connected_layer(layer l)
{
    int i, j;
    for(i = 0; i < l.outputs; ++i){
        float scale = l.scales[i]/sqrt(l.rolling_variance[i] + .000001);
        for(j = 0; j < l.inputs; ++j){
            l.weights[i*l.inputs + j] *= scale;
        }
        l.biases[i] -= l.rolling_mean[i] * scale;
        l.scales[i] = 1;
        l.rolling_mean[i] = 0;
        l.rolling_variance[i] = 1;
    }
}


void statistics_connected_layer(layer l)
{
    if(l.batch_normalize){
        printf("Scales ");
        print_statistics(l.scales, l.outputs);
        /*
           printf("Rolling Mean ");
           print_statistics(l.rolling_mean, l.outputs);
           printf("Rolling Variance ");
           print_statistics(l.rolling_variance, l.outputs);
         */
    }
    printf("Biases ");
    print_statistics(l.biases, l.outputs);
    printf("Weights ");
    print_statistics(l.weights, l.outputs);
}

#ifdef GPU

void pull_connected_layer(layer l)
{
    cuda_pull_array(l.weights_gpu, l.weights, l.inputs*l.outputs);
    cuda_pull_array(l.biases_gpu, l.biases, l.outputs);
    cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.inputs*l.outputs);
    cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.outputs);
    if (l.batch_normalize){
        cuda_pull_array(l.scales_gpu, l.scales, l.outputs);
        cuda_pull_array(l.rolling_mean_gpu, l.rolling_mean, l.outputs);
        cuda_pull_array(l.rolling_variance_gpu, l.rolling_variance, l.outputs);
    }
}

void push_connected_layer(layer l)
{
    cuda_push_array(l.weights_gpu, l.weights, l.inputs*l.outputs);
    cuda_push_array(l.biases_gpu, l.biases, l.outputs);
    cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.inputs*l.outputs);
    cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.outputs);
    if (l.batch_normalize){
        cuda_push_array(l.scales_gpu, l.scales, l.outputs);
        cuda_push_array(l.rolling_mean_gpu, l.rolling_mean, l.outputs);
        cuda_push_array(l.rolling_variance_gpu, l.rolling_variance, l.outputs);
    }
}

void update_connected_layer_gpu(layer l, update_args a)
{
    float learning_rate = a.learning_rate*l.learning_rate_scale;
    float momentum = a.momentum;
    float decay = a.decay;
    int batch = a.batch;
    if(a.adam){
        adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.inputs*l.outputs, batch, a.t);
        adam_update_gpu(l.biases_gpu, l.bias_updates_gpu, l.bias_m_gpu, l.bias_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.outputs, batch, a.t);
        if(l.scales_gpu){
            adam_update_gpu(l.scales_gpu, l.scale_updates_gpu, l.scale_m_gpu, l.scale_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.outputs, batch, a.t);
        }
    }else{
        axpy_gpu(l.outputs, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1);
        scal_gpu(l.outputs, momentum, l.bias_updates_gpu, 1);

        if(l.batch_normalize){
            axpy_gpu(l.outputs, learning_rate/batch, l.scale_updates_gpu, 1, l.scales_gpu, 1);
            scal_gpu(l.outputs, momentum, l.scale_updates_gpu, 1);
        }

        axpy_gpu(l.inputs*l.outputs, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1);
        axpy_gpu(l.inputs*l.outputs, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1);
        scal_gpu(l.inputs*l.outputs, momentum, l.weight_updates_gpu, 1);
    }
}

void forward_connected_layer_gpu(layer l, network net)
{
    fill_gpu(l.outputs*l.batch, 0, l.output_gpu, 1);

    int m = l.batch;
    int k = l.inputs;
    int n = l.outputs;
    float * a = net.input_gpu;
    float * b = l.weights_gpu;
    float * c = l.output_gpu;
    gemm_gpu(0,1,m,n,k,1,a,k,b,k,1,c,n);

    if (l.batch_normalize) {
        forward_batchnorm_layer_gpu(l, net);
    } else {
        add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.outputs, 1);
    }
    activate_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation);
}

void backward_connected_layer_gpu(layer l, network net)
{
    constrain_gpu(l.outputs*l.batch, 1, l.delta_gpu, 1);
    gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu);
    if(l.batch_normalize){
        backward_batchnorm_layer_gpu(l, net);
    } else {
        backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.outputs, 1);
    }

    int m = l.outputs;
    int k = l.batch;
    int n = l.inputs;
    float * a = l.delta_gpu;
    float * b = net.input_gpu;
    float * c = l.weight_updates_gpu;
    gemm_gpu(1,0,m,n,k,1,a,m,b,n,1,c,n);

    m = l.batch;
    k = l.outputs;
    n = l.inputs;

    a = l.delta_gpu;
    b = l.weights_gpu;
    c = net.delta_gpu;

    if(c) gemm_gpu(0,0,m,n,k,1,a,k,b,n,1,c,n);
}
#endif


================================================
FILE: lightnet/_darknet/connected_layer.h
================================================
#ifndef CONNECTED_LAYER_H
#define CONNECTED_LAYER_H

#include "activations.h"
#include "layer.h"
#include "network.h"

layer make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation, int batch_normalize, int adam);

void forward_connected_layer(layer l, network net);
void backward_connected_layer(layer l, network net);
void update_connected_layer(layer l, update_args a);

#ifdef GPU
void forward_connected_layer_gpu(layer l, network net);
void backward_connected_layer_gpu(layer l, network net);
void update_connected_layer_gpu(layer l, update_args a);
void push_connected_layer(layer l);
void pull_connected_layer(layer l);
#endif

#endif



================================================
FILE: lightnet/_darknet/convolutional_kernels.cu
================================================
#include "cuda_runtime.h"
#include "curand.h"
#include "cublas_v2.h"

extern "C" {
#include "convolutional_layer.h"
#include "batchnorm_layer.h"
#include "gemm.h"
#include "blas.h"
#include "im2col.h"
#include "col2im.h"
#include "utils.h"
#include "cuda.h"
}

__global__ void binarize_kernel(float *x, int n, float *binary)
{
    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if (i >= n) return;
    binary[i] = (x[i] >= 0) ? 1 : -1;
}

void binarize_gpu(float *x, int n, float *binary)
{
    binarize_kernel<<<cuda_gridsize(n), BLOCK>>>(x, n, binary);
    check_error(cudaPeekAtLastError());
}

__global__ void binarize_input_kernel(float *input, int n, int size, float *binary)
{
    int s = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if (s >= size) return;
    int i = 0;
    float mean = 0;
    for(i = 0; i < n; ++i){
        mean += fabsf(input[i*size + s]);
    }
    mean = mean / n;
    for(i = 0; i < n; ++i){
        binary[i*size + s] = (input[i*size + s] > 0) ? mean : -mean;
    }
}

void binarize_input_gpu(float *input, int n, int size, float *binary)
{
    binarize_input_kernel<<<cuda_gridsize(size), BLOCK>>>(input, n, size, binary);
    check_error(cudaPeekAtLastError());
}


__global__ void binarize_weights_kernel(float *weights, int n, int size, float *binary)
{
    int f = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if (f >= n) return;
    int i = 0;
    float mean = 0;
    for(i = 0; i < size; ++i){
        mean += fabsf(weights[f*size + i]);
    }
    mean = mean / size;
    for(i = 0; i < size; ++i){
        binary[f*size + i] = (weights[f*size + i] > 0) ? mean : -mean;
        //binary[f*size + i] = weights[f*size + i];
    }
}

void binarize_weights_gpu(float *weights, int n, int size, float *binary)
{
    binarize_weights_kernel<<<cuda_gridsize(n), BLOCK>>>(weights, n, size, binary);
    check_error(cudaPeekAtLastError());
}

void forward_convolutional_layer_gpu(convolutional_layer l, network net)
{
    fill_gpu(l.outputs*l.batch, 0, l.output_gpu, 1);
    if(l.binary){
        binarize_weights_gpu(l.weights_gpu, l.n, l.c/l.groups*l.size*l.size, l.binary_weights_gpu);
        swap_binary(&l);
    }

    if(l.xnor){
        binarize_weights_gpu(l.weights_gpu, l.n, l.c/l.groups*l.size*l.size, l.binary_weights_gpu);
        swap_binary(&l);
        binarize_gpu(net.input_gpu, l.c*l.h*l.w*l.batch, l.binary_input_gpu);
        net.input_gpu = l.binary_input_gpu;
    }

#ifdef CUDNN
    float one = 1;
    cudnnConvolutionForward(cudnn_handle(),
                &one,
                l.srcTensorDesc,
                net.input_gpu,
                l.weightDesc,
                l.weights_gpu,
                l.convDesc,
                l.fw_algo,
                net.workspace,
                l.workspace_size,
                &one,
                l.dstTensorDesc,
                l.output_gpu);

#else
    int i, j;
    int m = l.n/l.groups;
    int k = l.size*l.size*l.c/l.groups;
    int n = l.out_w*l.out_h;
    for(i = 0; i < l.batch; ++i){
        for(j = 0; j < l.groups; ++j){
            float *a = l.weights_gpu + j*l.nweights/l.groups;
            float *b = net.workspace;
            float *c = l.output_gpu + (i*l.groups + j)*n*m;

            im2col_gpu(net.input_gpu + (i*l.groups + j)*l.c/l.groups*l.h*l.w,
                l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, b);
            gemm_gpu(0,0,m,n,k,1,a,k,b,n,1,c,n);
        }
    }
#endif

    if (l.batch_normalize) {
        forward_batchnorm_layer_gpu(l, net);
    } else {
        add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.n, l.out_w*l.out_h);
    }

    activate_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation);
    //if(l.dot > 0) dot_error_gpu(l);
    if(l.binary || l.xnor) swap_binary(&l);
}

__global__ void smooth_kernel(float *x, int n, int w, int h, int c, int size, float rate, float *delta)
{
    int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if(id >= n) return;

    int j = id % w;
    id /= w;
    int i = id % h;
    id /= h;
    int k = id % c;
    id /= c;
    int b = id;

    int w_offset = -(size/2.f);
    int h_offset = -(size/2.f);

    int out_index = j + w*(i + h*(k + c*b));
    int l, m;
    for(l = 0; l < size; ++l){
        for(m = 0; m < size; ++m){
            int cur_h = h_offset + i + l;
            int cur_w = w_offset + j + m;
            int index = cur_w + w*(cur_h + h*(k + b*c));
            int valid = (cur_h >= 0 && cur_h < h &&
                    cur_w >= 0 && cur_w < w);
            delta[out_index] += valid ? rate*(x[index] - x[out_index]) : 0;
        }
    }
}

extern "C" void smooth_layer(layer l, int size, float rate)
{
    int h = l.out_h;
    int w = l.out_w;
    int c = l.out_c;

    size_t n = h*w*c*l.batch;

    smooth_kernel<<<cuda_gridsize(n), BLOCK>>>(l.output_gpu, n, l.w, l.h, l.c, size, rate, l.delta_gpu);
    check_error(cudaPeekAtLastError());
}

void backward_convolutional_layer_gpu(convolutional_layer l, network net)
{
    if(l.smooth){
        smooth_layer(l, 5, l.smooth);
    }
    constrain_gpu(l.outputs*l.batch, 1, l.delta_gpu, 1);
    gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu);


    if(l.batch_normalize){
        backward_batchnorm_layer_gpu(l, net);
    } else {
        backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.n, l.out_w*l.out_h);
    }
    float *original_input = net.input_gpu;

    if(l.xnor) net.input_gpu = l.binary_input_gpu;
#ifdef CUDNN
    float one = 1;
    cudnnConvolutionBackwardFilter(cudnn_handle(),
            &one,
            l.srcTensorDesc,
            net.input_gpu,
            l.ddstTensorDesc,
            l.delta_gpu,
            l.convDesc,
            l.bf_algo,
            net.workspace,
            l.workspace_size,
            &one,
            l.dweightDesc,
            l.weight_updates_gpu);

    if(net.delta_gpu){
        if(l.binary || l.xnor) swap_binary(&l);
        cudnnConvolutionBackwardData(cudnn_handle(),
                &one,
                l.weightDesc,
                l.weights_gpu,
                l.ddstTensorDesc,
                l.delta_gpu,
                l.convDesc,
                l.bd_algo,
                net.workspace,
                l.workspace_size,
                &one,
                l.dsrcTensorDesc,
                net.delta_gpu);
        if(l.binary || l.xnor) swap_binary(&l);
        if(l.xnor) gradient_array_gpu(original_input, l.batch*l.c*l.h*l.w, HARDTAN, net.delta_gpu);
    }

#else
    int m = l.n/l.groups;
    int n = l.size*l.size*l.c/l.groups;
    int k = l.out_w*l.out_h;

    int i, j;
    for(i = 0; i < l.batch; ++i){
        for(j = 0; j < l.groups; ++j){
            float *a = l.delta_gpu + (i*l.groups + j)*m*k;
            float *b = net.workspace;
            float *c = l.weight_updates_gpu + j*l.nweights/l.groups;

            float *im = net.input+(i*l.groups + j)*l.c/l.groups*l.h*l.w;

            im2col_gpu(im, l.c/l.groups, l.h, l.w,
                    l.size, l.stride, l.pad, b);
            gemm_gpu(0,1,m,n,k,1,a,k,b,k,1,c,n);

            if(net.delta_gpu){
                if(l.binary || l.xnor) swap_binary(&l);
                a = l.weights_gpu + j*l.nweights/l.groups;
                b = l.delta_gpu + (i*l.groups + j)*m*k;
                c = net.workspace;

                gemm_gpu(1,0,n,k,m,1,a,n,b,k,0,c,k);

                col2im_gpu(net.workspace, l.c/l.groups, l.h, l.w, l.size, l.stride, 
                    l.pad, net.delta_gpu + (i*l.groups + j)*l.c/l.groups*l.h*l.w);
                if(l.binary || l.xnor) {
                    swap_binary(&l);
                }
            }
            if(l.xnor) gradient_array_gpu(original_input + i*l.c*l.h*l.w, l.c*l.h*l.w, HARDTAN, net.delta_gpu + i*l.c*l.h*l.w);
        }
    }
#endif
}

void pull_convolutional_layer(layer l)
{
    cuda_pull_array(l.weights_gpu, l.weights, l.nweights);
    cuda_pull_array(l.biases_gpu, l.biases, l.n);
    cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.nweights);
    cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.n);
    if (l.batch_normalize){
        cuda_pull_array(l.scales_gpu, l.scales, l.n);
        cuda_pull_array(l.rolling_mean_gpu, l.rolling_mean, l.n);
        cuda_pull_array(l.rolling_variance_gpu, l.rolling_variance, l.n);
    }
}

void push_convolutional_layer(layer l)
{
    cuda_push_array(l.weights_gpu, l.weights, l.nweights);
    cuda_push_array(l.biases_gpu, l.biases, l.n);
    cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.nweights);
    cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.n);
    if (l.batch_normalize){
        cuda_push_array(l.scales_gpu, l.scales, l.n);
        cuda_push_array(l.rolling_mean_gpu, l.rolling_mean, l.n);
        cuda_push_array(l.rolling_variance_gpu, l.rolling_variance, l.n);
    }
}

void update_convolutional_layer_gpu(layer l, update_args a)
{
    float learning_rate = a.learning_rate*l.learning_rate_scale;
    float momentum = a.momentum;
    float decay = a.decay;
    int batch = a.batch;

    if(a.adam){
        adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.nweights, batch, a.t);
        adam_update_gpu(l.biases_gpu, l.bias_updates_gpu, l.bias_m_gpu, l.bias_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.n, batch, a.t);
        if(l.scales_gpu){
            adam_update_gpu(l.scales_gpu, l.scale_updates_gpu, l.scale_m_gpu, l.scale_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.n, batch, a.t);
        }
    }else{
        axpy_gpu(l.nweights, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1);
        axpy_gpu(l.nweights, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1);
        scal_gpu(l.nweights, momentum, l.weight_updates_gpu, 1);

        axpy_gpu(l.n, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1);
        scal_gpu(l.n, momentum, l.bias_updates_gpu, 1);

        if(l.scales_gpu){
            axpy_gpu(l.n, learning_rate/batch, l.scale_updates_gpu, 1, l.scales_gpu, 1);
            scal_gpu(l.n, momentum, l.scale_updates_gpu, 1);
        }
    }
}




================================================
FILE: lightnet/_darknet/convolutional_layer.c
================================================
#include "convolutional_layer.h"
#include "utils.h"
#include "batchnorm_layer.h"
#include "im2col.h"
#include "col2im.h"
#include "blas.h"
#include "gemm.h"
#include <stdio.h>
#include <time.h>

#ifdef AI2
#include "xnor_layer.h"
#endif

void swap_binary(convolutional_layer *l)
{
    float *swap = l->weights;
    l->weights = l->binary_weights;
    l->binary_weights = swap;

#ifdef GPU
    swap = l->weights_gpu;
    l->weights_gpu = l->binary_weights_gpu;
    l->binary_weights_gpu = swap;
#endif
}

void binarize_weights(float *weights, int n, int size, float *binary)
{
    int i, f;
    for(f = 0; f < n; ++f){
        float mean = 0;
        for(i = 0; i < size; ++i){
            mean += fabs(weights[f*size + i]);
        }
        mean = mean / size;
        for(i = 0; i < size; ++i){
            binary[f*size + i] = (weights[f*size + i] > 0) ? mean : -mean;
        }
    }
}

void binarize_cpu(float *input, int n, float *binary)
{
    int i;
    for(i = 0; i < n; ++i){
        binary[i] = (input[i] > 0) ? 1 : -1;
    }
}

void binarize_input(float *input, int n, int size, float *binary)
{
    int i, s;
    for(s = 0; s < size; ++s){
        float mean = 0;
        for(i = 0; i < n; ++i){
            mean += fabs(input[i*size + s]);
        }
        mean = mean / n;
        for(i = 0; i < n; ++i){
            binary[i*size + s] = (input[i*size + s] > 0) ? mean : -mean;
        }
    }
}

int convolutional_out_height(convolutional_layer l)
{
    return (l.h + 2*l.pad - l.size) / l.stride + 1;
}

int convolutional_out_width(convolutional_layer l)
{
    return (l.w + 2*l.pad - l.size) / l.stride + 1;
}

image get_convolutional_image(convolutional_layer l)
{
    return float_to_image(l.out_w,l.out_h,l.out_c,l.output);
}

image get_convolutional_delta(convolutional_layer l)
{
    return float_to_image(l.out_w,l.out_h,l.out_c,l.delta);
}

static size_t get_workspace_size(layer l){
#ifdef CUDNN
    if(gpu_index >= 0){
        size_t most = 0;
        size_t s = 0;
        cudnnGetConvolutionForwardWorkspaceSize(cudnn_handle(),
                l.srcTensorDesc,
                l.weightDesc,
                l.convDesc,
                l.dstTensorDesc,
                l.fw_algo,
                &s);
        if (s > most) most = s;
        cudnnGetConvolutionBackwardFilterWorkspaceSize(cudnn_handle(),
                l.srcTensorDesc,
                l.ddstTensorDesc,
                l.convDesc,
                l.dweightDesc,
                l.bf_algo,
                &s);
        if (s > most) most = s;
        cudnnGetConvolutionBackwardDataWorkspaceSize(cudnn_handle(),
                l.weightDesc,
                l.ddstTensorDesc,
                l.convDesc,
                l.dsrcTensorDesc,
                l.bd_algo,
                &s);
        if (s > most) most = s;
        return most;
    }
#endif
    return (size_t)l.out_h*l.out_w*l.size*l.size*l.c/l.groups*sizeof(float);
}

#ifdef GPU
#ifdef CUDNN
void cudnn_convolutional_setup(layer *l)
{
    cudnnSetTensor4dDescriptor(l->dsrcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w); 
    cudnnSetTensor4dDescriptor(l->ddstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w); 

    cudnnSetTensor4dDescriptor(l->srcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w); 
    cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w); 
    cudnnSetTensor4dDescriptor(l->normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, 1, 1); 

    cudnnSetFilter4dDescriptor(l->dweightDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, l->n, l->c/l->groups, l->size, l->size); 
    cudnnSetFilter4dDescriptor(l->weightDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, l->n, l->c/l->groups, l->size, l->size); 
    #if CUDNN_MAJOR >= 6
    cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT);
    #else
    cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION);
    #endif

    #if CUDNN_MAJOR >= 7
    cudnnSetConvolutionGroupCount(l->convDesc, l->groups);
    #else
    if(l->groups > 1){
        error("CUDNN < 7 doesn't support groups, please upgrade!");
    }
    #endif

    cudnnGetConvolutionForwardAlgorithm(cudnn_handle(),
            l->srcTensorDesc,
            l->weightDesc,
            l->convDesc,
            l->dstTensorDesc,
            CUDNN_CONVOLUTION_FWD_PREFER_FASTEST,
            0,
            &l->fw_algo);
    cudnnGetConvolutionBackwardDataAlgorithm(cudnn_handle(),
            l->weightDesc,
            l->ddstTensorDesc,
            l->convDesc,
            l->dsrcTensorDesc,
            CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST,
            0,
            &l->bd_algo);
    cudnnGetConvolutionBackwardFilterAlgorithm(cudnn_handle(),
            l->srcTensorDesc,
            l->ddstTensorDesc,
            l->convDesc,
            l->dweightDesc,
            CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST,
            0,
            &l->bf_algo);
}
#endif
#endif

convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int groups, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam)
{
    int i;
    convolutional_layer l = {0};
    l.type = CONVOLUTIONAL;

    l.groups = groups;
    l.h = h;
    l.w = w;
    l.c = c;
    l.n = n;
    l.binary = binary;
    l.xnor = xnor;
    l.batch = batch;
    l.stride = stride;
    l.size = size;
    l.pad = padding;
    l.batch_normalize = batch_normalize;

    l.weights = calloc(c/groups*n*size*size, sizeof(float));
    l.weight_updates = calloc(c/groups*n*size*size, sizeof(float));

    l.biases = calloc(n, sizeof(float));
    l.bias_updates = calloc(n, sizeof(float));

    l.nweights = c/groups*n*size*size;
    l.nbiases = n;

    // float scale = 1./sqrt(size*size*c);
    float scale = sqrt(2./(size*size*c/l.groups));
    //scale = .02;
    //for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_uniform(-1, 1);
    for(i = 0; i < l.nweights; ++i) l.weights[i] = scale*rand_normal();
    int out_w = convolutional_out_width(l);
    int out_h = convolutional_out_height(l);
    l.out_h = out_h;
    l.out_w = out_w;
    l.out_c = n;
    l.outputs = l.out_h * l.out_w * l.out_c;
    l.inputs = l.w * l.h * l.c;

    l.output = calloc(l.batch*l.outputs, sizeof(float));
    l.delta  = calloc(l.batch*l.outputs, sizeof(float));

    l.forward = forward_convolutional_layer;
    l.backward = backward_convolutional_layer;
    l.update = update_convolutional_layer;
    if(binary){
        l.binary_weights = calloc(l.nweights, sizeof(float));
        l.cweights = calloc(l.nweights, sizeof(char));
        l.scales = calloc(n, sizeof(float));
    }
    if(xnor){
        l.binary_weights = calloc(l.nweights, sizeof(float));
        l.binary_input = calloc(l.inputs*l.batch, sizeof(float));
    }

    if(batch_normalize){
        l.scales = calloc(n, sizeof(float));
        l.scale_updates = calloc(n, sizeof(float));
        for(i = 0; i < n; ++i){
            l.scales[i] = 1;
        }

        l.mean = calloc(n, sizeof(float));
        l.variance = calloc(n, sizeof(float));

        l.mean_delta = calloc(n, sizeof(float));
        l.variance_delta = calloc(n, sizeof(float));

        l.rolling_mean = calloc(n, sizeof(float));
        l.rolling_variance = calloc(n, sizeof(float));
        l.x = calloc(l.batch*l.outputs, sizeof(float));
        l.x_norm = calloc(l.batch*l.outputs, sizeof(float));
    }
    if(adam){
        l.m = calloc(l.nweights, sizeof(float));
        l.v = calloc(l.nweights, sizeof(float));
        l.bias_m = calloc(n, sizeof(float));
        l.scale_m = calloc(n, sizeof(float));
        l.bias_v = calloc(n, sizeof(float));
        l.scale_v = calloc(n, sizeof(float));
    }

#ifdef GPU
    l.forward_gpu = forward_convolutional_layer_gpu;
    l.backward_gpu = backward_convolutional_layer_gpu;
    l.update_gpu = update_convolutional_layer_gpu;

    if(gpu_index >= 0){
        if (adam) {
            l.m_gpu = cuda_make_array(l.m, l.nweights);
            l.v_gpu = cuda_make_array(l.v, l.nweights);
            l.bias_m_gpu = cuda_make_array(l.bias_m, n);
            l.bias_v_gpu = cuda_make_array(l.bias_v, n);
            l.scale_m_gpu = cuda_make_array(l.scale_m, n);
            l.scale_v_gpu = cuda_make_array(l.scale_v, n);
        }

        l.weights_gpu = cuda_make_array(l.weights, l.nweights);
        l.weight_updates_gpu = cuda_make_array(l.weight_updates, l.nweights);

        l.biases_gpu = cuda_make_array(l.biases, n);
        l.bias_updates_gpu = cuda_make_array(l.bias_updates, n);

        l.delta_gpu = cuda_make_array(l.delta, l.batch*out_h*out_w*n);
        l.output_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);

        if(binary){
            l.binary_weights_gpu = cuda_make_array(l.weights, l.nweights);
        }
        if(xnor){
            l.binary_weights_gpu = cuda_make_array(l.weights, l.nweights);
            l.binary_input_gpu = cuda_make_array(0, l.inputs*l.batch);
        }

        if(batch_normalize){
            l.mean_gpu = cuda_make_array(l.mean, n);
            l.variance_gpu = cuda_make_array(l.variance, n);

            l.rolling_mean_gpu = cuda_make_array(l.mean, n);
            l.rolling_variance_gpu = cuda_make_array(l.variance, n);

            l.mean_delta_gpu = cuda_make_array(l.mean, n);
            l.variance_delta_gpu = cuda_make_array(l.variance, n);

            l.scales_gpu = cuda_make_array(l.scales, n);
            l.scale_updates_gpu = cuda_make_array(l.scale_updates, n);

            l.x_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);
            l.x_norm_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);
        }
#ifdef CUDNN
        cudnnCreateTensorDescriptor(&l.normTensorDesc);
        cudnnCreateTensorDescriptor(&l.srcTensorDesc);
        cudnnCreateTensorDescriptor(&l.dstTensorDesc);
        cudnnCreateFilterDescriptor(&l.weightDesc);
        cudnnCreateTensorDescriptor(&l.dsrcTensorDesc);
        cudnnCreateTensorDescriptor(&l.ddstTensorDesc);
        cudnnCreateFilterDescriptor(&l.dweightDesc);
        cudnnCreateConvolutionDescriptor(&l.convDesc);
        cudnn_convolutional_setup(&l);
#endif
    }
#endif
    l.workspace_size = get_workspace_size(l);
    l.activation = activation;

    //fprintf(stderr, "conv  %5d %2d x%2d /%2d  %4d x%4d x%4d   ->  %4d x%4d x%4d\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c);

    return l;
}

void denormalize_convolutional_layer(convolutional_layer l)
{
    int i, j;
    for(i = 0; i < l.n; ++i){
        float scale = l.scales[i]/sqrt(l.rolling_variance[i] + .00001);
        for(j = 0; j < l.c/l.groups*l.size*l.size; ++j){
            l.weights[i*l.c/l.groups*l.size*l.size + j] *= scale;
        }
        l.biases[i] -= l.rolling_mean[i] * scale;
        l.scales[i] = 1;
        l.rolling_mean[i] = 0;
        l.rolling_variance[i] = 1;
    }
}

/*
void test_convolutional_layer()
{
    convolutional_layer l = make_convolutional_layer(1, 5, 5, 3, 2, 5, 2, 1, LEAKY, 1, 0, 0, 0);
    l.batch_normalize = 1;
    float data[] = {1,1,1,1,1,
        1,1,1,1,1,
        1,1,1,1,1,
        1,1,1,1,1,
        1,1,1,1,1,
        2,2,2,2,2,
        2,2,2,2,2,
        2,2,2,2,2,
        2,2,2,2,2,
        2,2,2,2,2,
        3,3,3,3,3,
        3,3,3,3,3,
        3,3,3,3,3,
        3,3,3,3,3,
        3,3,3,3,3};
    //net.input = data;
    //forward_convolutional_layer(l);
}
*/

void resize_convolutional_layer(convolutional_layer *l, int w, int h)
{
    l->w = w;
    l->h = h;
    int out_w = convolutional_out_width(*l);
    int out_h = convolutional_out_height(*l);

    l->out_w = out_w;
    l->out_h = out_h;

    l->outputs = l->out_h * l->out_w * l->out_c;
    l->inputs = l->w * l->h * l->c;

    l->output = realloc(l->output, l->batch*l->outputs*sizeof(float));
    l->delta  = realloc(l->delta,  l->batch*l->outputs*sizeof(float));
    if(l->batch_normalize){
        l->x = realloc(l->x, l->batch*l->outputs*sizeof(float));
        l->x_norm  = realloc(l->x_norm, l->batch*l->outputs*sizeof(float));
    }

#ifdef GPU
    cuda_free(l->delta_gpu);
    cuda_free(l->output_gpu);

    l->delta_gpu =  cuda_make_array(l->delta,  l->batch*l->outputs);
    l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs);

    if(l->batch_normalize){
        cuda_free(l->x_gpu);
        cuda_free(l->x_norm_gpu);

        l->x_gpu = cuda_make_array(l->output, l->batch*l->outputs);
        l->x_norm_gpu = cuda_make_array(l->output, l->batch*l->outputs);
    }
#ifdef CUDNN
    cudnn_convolutional_setup(l);
#endif
#endif
    l->workspace_size = get_workspace_size(*l);
}

void add_bias(float *output, float *biases, int batch, int n, int size)
{
    int i,j,b;
    for(b = 0; b < batch; ++b){
        for(i = 0; i < n; ++i){
            for(j = 0; j < size; ++j){
                output[(b*n + i)*size + j] += biases[i];
            }
        }
    }
}

void scale_bias(float *output, float *scales, int batch, int n, int size)
{
    int i,j,b;
    for(b = 0; b < batch; ++b){
        for(i = 0; i < n; ++i){
            for(j = 0; j < size; ++j){
                output[(b*n + i)*size + j] *= scales[i];
            }
        }
    }
}

void backward_bias(float *bias_updates, float *delta, int batch, int n, int size)
{
    int i,b;
    for(b = 0; b < batch; ++b){
        for(i = 0; i < n; ++i){
            bias_updates[i] += sum_array(delta+size*(i+b*n), size);
        }
    }
}

void forward_convolutional_layer(convolutional_layer l, network net)
{
    int i, j;

    fill_cpu(l.outputs*l.batch, 0, l.output, 1);

    if(l.xnor){
        binarize_weights(l.weights, l.n, l.c/l.groups*l.size*l.size, l.binary_weights);
        swap_binary(&l);
        binarize_cpu(net.input, l.c*l.h*l.w*l.batch, l.binary_input);
        net.input = l.binary_input;
    }

    int m = l.n/l.groups;
    int k = l.size*l.size*l.c/l.groups;
    int n = l.out_w*l.out_h;
    for(i = 0; i < l.batch; ++i){
        for(j = 0; j < l.groups; ++j){
            float *a = l.weights + j*l.nweights/l.groups;
            float *b = net.workspace;
            float *c = l.output + (i*l.groups + j)*n*m;

            im2col_cpu(net.input + (i*l.groups + j)*l.c/l.groups*l.h*l.w,
                l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, b);
            gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
        }
    }

    if(l.batch_normalize){
        forward_batchnorm_layer(l, net);
    } else {
        add_bias(l.output, l.biases, l.batch, l.n, l.out_h*l.out_w);
    }

    activate_array(l.output, l.outputs*l.batch, l.activation);
    if(l.binary || l.xnor) swap_binary(&l);
}

void backward_convolutional_layer(convolutional_layer l, network net)
{
    int i, j;
    int m = l.n/l.groups;
    int n = l.size*l.size*l.c/l.groups;
    int k = l.out_w*l.out_h;

    gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);

    if(l.batch_normalize){
        backward_batchnorm_layer(l, net);
    } else {
        backward_bias(l.bias_updates, l.delta, l.batch, l.n, k);
    }

    for(i = 0; i < l.batch; ++i){
        for(j = 0; j < l.groups; ++j){
            float *a = l.delta + (i*l.groups + j)*m*k;
            float *b = net.workspace;
            float *c = l.weight_updates + j*l.nweights/l.groups;

            float *im = net.input+(i*l.groups + j)*l.c/l.groups*l.h*l.w;

            im2col_cpu(im, l.c/l.groups, l.h, l.w, 
                    l.size, l.stride, l.pad, b);
            gemm(0,1,m,n,k,1,a,k,b,k,1,c,n);

            if(net.delta){
                a = l.weights + j*l.nweights/l.groups;
                b = l.delta + (i*l.groups + j)*m*k;
                c = net.workspace;

                gemm(1,0,n,k,m,1,a,n,b,k,0,c,k);

                col2im_cpu(net.workspace, l.c/l.groups, l.h, l.w, l.size, l.stride, 
                    l.pad, net.delta + (i*l.groups + j)*l.c/l.groups*l.h*l.w);
            }
        }
    }
}

void update_convolutional_layer(convolutional_layer l, update_args a)
{
    float learning_rate = a.learning_rate*l.learning_rate_scale;
    float momentum = a.momentum;
    float decay = a.decay;
    int batch = a.batch;

    axpy_cpu(l.n, learning_rate/batch, l.bias_updates, 1, l.biases, 1);
    scal_cpu(l.n, momentum, l.bias_updates, 1);

    if(l.scales){
        axpy_cpu(l.n, learning_rate/batch, l.scale_updates, 1, l.scales, 1);
        scal_cpu(l.n, momentum, l.scale_updates, 1);
    }

    axpy_cpu(l.nweights, -decay*batch, l.weights, 1, l.weight_updates, 1);
    axpy_cpu(l.nweights, learning_rate/batch, l.weight_updates, 1, l.weights, 1);
    scal_cpu(l.nweights, momentum, l.weight_updates, 1);
}


image get_convolutional_weight(convolutional_layer l, int i)
{
    int h = l.size;
    int w = l.size;
    int c = l.c/l.groups;
    return float_to_image(w,h,c,l.weights+i*h*w*c);
}

void rgbgr_weights(convolutional_layer l)
{
    int i;
    for(i = 0; i < l.n; ++i){
        image im = get_convolutional_weight(l, i);
        if (im.c == 3) {
            rgbgr_image(im);
        }
    }
}

void rescale_weights(convolutional_layer l, float scale, float trans)
{
    int i;
    for(i = 0; i < l.n; ++i){
        image im = get_convolutional_weight(l, i);
        if (im.c == 3) {
            scale_image(im, scale);
            float sum = sum_array(im.data, im.w*im.h*im.c);
            l.biases[i] += sum*trans;
        }
    }
}

image *get_weights(convolutional_layer l)
{
    image *weights = calloc(l.n, sizeof(image));
    int i;
    for(i = 0; i < l.n; ++i){
        weights[i] = copy_image(get_convolutional_weight(l, i));
        normalize_image(weights[i]);
        /*
           char buff[256];
           sprintf(buff, "filter%d", i);
           save_image(weights[i], buff);
         */
    }
    //error("hey");
    return weights;
}

image *visualize_convolutional_layer(convolutional_layer l, char *window, image *prev_weights)
{
    image *single_weights = get_weights(l);
    show_images(single_weights, l.n, window);

    image delta = get_convolutional_image(l);
    image dc = collapse_image_layers(delta, 1);
    char buff[256];
    //sprintf(buff, "%s: Output", window);
    //show_image(dc, buff);
    //save_image(dc, buff);
    free_image(dc);
    return single_weights;
}



================================================
FILE: lightnet/_darknet/convolutional_layer.h
================================================
#ifndef CONVOLUTIONAL_LAYER_H
#define CONVOLUTIONAL_LAYER_H

#include "cuda.h"
#include "image.h"
#include "activations.h"
#include "layer.h"
#include "network.h"

typedef layer convolutional_layer;

#ifdef GPU
void forward_convolutional_layer_gpu(convolutional_layer layer, network net);
void backward_convolutional_layer_gpu(convolutional_layer layer, network net);
void update_convolutional_layer_gpu(convolutional_layer layer, update_args a);

void push_convolutional_layer(convolutional_layer layer);
void pull_convolutional_layer(convolutional_layer layer);

void add_bias_gpu(float *output, float *biases, int batch, int n, int size);
void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size);
void adam_update_gpu(float *w, float *d, float *m, float *v, float B1, float B2, float eps, float decay, float rate, int n, int batch, int t);
#ifdef CUDNN
void cudnn_convolutional_setup(layer *l);
#endif
#endif

convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int groups, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam);
void resize_convolutional_layer(convolutional_layer *layer, int w, int h);
void forward_convolutional_layer(const convolutional_layer layer, network net);
void update_convolutional_layer(convolutional_layer layer, update_args a);
image *visualize_convolutional_layer(convolutional_layer layer, char *window, image *prev_weights);
void binarize_weights(float *weights, int n, int size, float *binary);
void swap_binary(convolutional_layer *l);
void binarize_weights2(float *weights, int n, int size, char *binary, float *scales);

void backward_convolutional_layer(convolutional_layer layer, network net);

void add_bias(float *output, float *biases, int batch, int n, int size);
void backward_bias(float *bias_updates, float *delta, int batch, int n, int size);

image get_convolutional_image(convolutional_layer layer);
image get_convolutional_delta(convolutional_layer layer);
image get_convolutional_weight(convolutional_layer layer, int i);

int convolutional_out_height(convolutional_layer layer);
int convolutional_out_width(convolutional_layer layer);

#endif



================================================
FILE: lightnet/_darknet/cost_layer.c
================================================
#include "cost_layer.h"
#include "utils.h"
#include "cuda.h"
#include "blas.h"
#include <math.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>

COST_TYPE get_cost_type(char *s)
{
    if (strcmp(s, "seg")==0) return SEG;
    if (strcmp(s, "sse")==0) return SSE;
    if (strcmp(s, "masked")==0) return MASKED;
    if (strcmp(s, "smooth")==0) return SMOOTH;
    if (strcmp(s, "L1")==0) return L1;
    fprintf(stderr, "Couldn't find cost type %s, going with SSE\n", s);
    return SSE;
}

char *get_cost_string(COST_TYPE a)
{
    switch(a){
        case SEG:
            return "seg";
        case SSE:
            return "sse";
        case MASKED:
            return "masked";
        case SMOOTH:
            return "smooth";
        case L1:
            return "L1";
    }
    return "sse";
}

cost_layer make_cost_layer(int batch, int inputs, COST_TYPE cost_type, float scale)
{
    fprintf(stderr, "cost                                           %4d\n",  inputs);
    cost_layer l = {0};
    l.type = COST;

    l.scale = scale;
    l.batch = batch;
    l.inputs = inputs;
    l.outputs = inputs;
    l.cost_type = cost_type;
    l.delta = calloc(inputs*batch, sizeof(float));
    l.output = calloc(inputs*batch, sizeof(float));
    l.cost = calloc(1, sizeof(float));

    l.forward = forward_cost_layer;
    l.backward = backward_cost_layer;
    #ifdef GPU
    l.forward_gpu = forward_cost_layer_gpu;
    l.backward_gpu = backward_cost_layer_gpu;

    l.delta_gpu = cuda_make_array(l.output, inputs*batch);
    l.output_gpu = cuda_make_array(l.delta, inputs*batch);
    #endif
    return l;
}

void resize_cost_layer(cost_layer *l, int inputs)
{
    l->inputs = inputs;
    l->outputs = inputs;
    l->delta = realloc(l->delta, inputs*l->batch*sizeof(float));
    l->output = realloc(l->output, inputs*l->batch*sizeof(float));
#ifdef GPU
    cuda_free(l->delta_gpu);
    cuda_free(l->output_gpu);
    l->delta_gpu = cuda_make_array(l->delta, inputs*l->batch);
    l->output_gpu = cuda_make_array(l->output, inputs*l->batch);
#endif
}

void forward_cost_layer(cost_layer l, network net)
{
    if (!net.truth) return;
    if(l.cost_type == MASKED){
        int i;
        for(i = 0; i < l.batch*l.inputs; ++i){
            if(net.truth[i] == SECRET_NUM) net.input[i] = SECRET_NUM;
        }
    }
    if(l.cost_type == SMOOTH){
        smooth_l1_cpu(l.batch*l.inputs, net.input, net.truth, l.delta, l.output);
    }else if(l.cost_type == L1){
        l1_cpu(l.batch*l.inputs, net.input, net.truth, l.delta, l.output);
    } else {
        l2_cpu(l.batch*l.inputs, net.input, net.truth, l.delta, l.output);
    }
    l.cost[0] = sum_array(l.output, l.batch*l.inputs);
}

void backward_cost_layer(const cost_layer l, network net)
{
    axpy_cpu(l.batch*l.inputs, l.scale, l.delta, 1, net.delta, 1);
}

#ifdef GPU

void pull_cost_layer(cost_layer l)
{
    cuda_pull_array(l.delta_gpu, l.delta, l.batch*l.inputs);
}

void push_cost_layer(cost_layer l)
{
    cuda_push_array(l.delta_gpu, l.delta, l.batch*l.inputs);
}

int float_abs_compare (const void * a, const void * b)
{
    float fa = *(const float*) a;
    if(fa < 0) fa = -fa;
    float fb = *(const float*) b;
    if(fb < 0) fb = -fb;
    return (fa > fb) - (fa < fb);
}

void forward_cost_layer_gpu(cost_layer l, network net)
{
    if (!net.truth_gpu) return;
    if(l.smooth){
        scal_gpu(l.batch*l.inputs, (1-l.smooth), net.truth_gpu, 1);
        add_gpu(l.batch*l.inputs, l.smooth * 1./l.inputs, net.truth_gpu, 1);
    }
    if (l.cost_type == MASKED) {
        mask_gpu(l.batch*l.inputs, net.input_gpu, SECRET_NUM, net.truth_gpu);
    }

    if(l.cost_type == SMOOTH){
        smooth_l1_gpu(l.batch*l.inputs, net.input_gpu, net.truth_gpu, l.delta_gpu, l.output_gpu);
    } else if (l.cost_type == L1){
        l1_gpu(l.batch*l.inputs, net.input_gpu, net.truth_gpu, l.delta_gpu, l.output_gpu);
    } else {
        l2_gpu(l.batch*l.inputs, net.input_gpu, net.truth_gpu, l.delta_gpu, l.output_gpu);
    }

    if (l.cost_type == SEG && l.noobject_scale != 1) {
        scale_mask_gpu(l.batch*l.inputs, l.delta_gpu, 0, net.truth_gpu, l.noobject_scale);
        scale_mask_gpu(l.batch*l.inputs, l.output_gpu, 0, net.truth_gpu, l.noobject_scale);
    }

    if(l.ratio){
        cuda_pull_array(l.delta_gpu, l.delta, l.batch*l.inputs);
        qsort(l.delta, l.batch*l.inputs, sizeof(float), float_abs_compare);
        int n = (1-l.ratio) * l.batch*l.inputs;
        float thresh = l.delta[n];
        thresh = 0;
        printf("%f\n", thresh);
        supp_gpu(l.batch*l.inputs, thresh, l.delta_gpu, 1);
    }

    if(l.thresh){
        supp_gpu(l.batch*l.inputs, l.thresh*1./l.inputs, l.delta_gpu, 1);
    }

    cuda_pull_array(l.output_gpu, l.output, l.batch*l.inputs);
    l.cost[0] = sum_array(l.output, l.batch*l.inputs);
}

void backward_cost_layer_gpu(const cost_layer l, network net)
{
    axpy_gpu(l.batch*l.inputs, l.scale, l.delta_gpu, 1, net.delta_gpu, 1);
}
#endif



================================================
FILE: lightnet/_darknet/cost_layer.h
================================================
#ifndef COST_LAYER_H
#define COST_LAYER_H
#include "layer.h"
#include "network.h"

typedef layer cost_layer;

COST_TYPE get_cost_type(char *s);
char *get_cost_string(COST_TYPE a);
cost_layer make_cost_layer(int batch, int inputs, COST_TYPE type, float scale);
void forward_cost_layer(const cost_layer l, network net);
void backward_cost_layer(const cost_layer l, network net);
void resize_cost_layer(cost_layer *l, int inputs);

#ifdef GPU
void forward_cost_layer_gpu(cost_layer l, network net);
void backward_cost_layer_gpu(const cost_layer l, network net);
#endif

#endif


================================================
FILE: lightnet/_darknet/crnn_layer.c
================================================
#include "crnn_layer.h"
#include "convolutional_layer.h"
#include "utils.h"
#include "cuda.h"
#include "blas.h"
#include "gemm.h"

#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

static void increment_layer(layer *l, int steps)
{
    int num = l->outputs*l->batch*steps;
    l->output += num;
    l->delta += num;
    l->x += num;
    l->x_norm += num;

#ifdef GPU
    l->output_gpu += num;
    l->delta_gpu += num;
    l->x_gpu += num;
    l->x_norm_gpu += num;
#endif
}

layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, ACTIVATION activation, int batch_normalize)
{
    fprintf(stderr, "CRNN Layer: %d x %d x %d image, %d filters\n", h,w,c,output_filters);
    batch = batch / steps;
    layer l = {0};
    l.batch = batch;
    l.type = CRNN;
    l.steps = steps;
    l.h = h;
    l.w = w;
    l.c = c;
    l.out_h = h;
    l.out_w = w;
    l.out_c = output_filters;
    l.inputs = h*w*c;
    l.hidden = h * w * hidden_filters;
    l.outputs = l.out_h * l.out_w * l.out_c;

    l.state = calloc(l.hidden*batch*(steps+1), sizeof(float));

    l.input_layer = malloc(sizeof(layer));
    fprintf(stderr, "\t\t");
    *(l.input_layer) = make_convolutional_layer(batch*steps, h, w, c, hidden_filters, 1, 3, 1, 1,  activation, batch_normalize, 0, 0, 0);
    l.input_layer->batch = batch;

    l.self_layer = malloc(sizeof(layer));
    fprintf(stderr, "\t\t");
    *(l.self_layer) = make_convolutional_layer(batch*steps, h, w, hidden_filters, hidden_filters, 1, 3, 1, 1,  activation, batch_normalize, 0, 0, 0);
    l.self_layer->batch = batch;

    l.output_layer = malloc(sizeof(layer));
    fprintf(stderr, "\t\t");
    *(l.output_layer) = make_convolutional_layer(batch*steps, h, w, hidden_filters, output_filters, 1, 3, 1, 1,  activation, batch_normalize, 0, 0, 0);
    l.output_layer->batch = batch;

    l.output = l.output_layer->output;
    l.delta = l.output_layer->delta;

    l.forward = forward_crnn_layer;
    l.backward = backward_crnn_layer;
    l.update = update_crnn_layer;

#ifdef GPU
    l.forward_gpu = forward_crnn_layer_gpu;
    l.backward_gpu = backward_crnn_layer_gpu;
    l.update_gpu = update_crnn_layer_gpu;

    l.state_gpu = cuda_make_array(l.state, l.hidden*batch*(steps+1));
    l.output_gpu = l.output_layer->output_gpu;
    l.delta_gpu = l.output_layer->delta_gpu;
#endif

    return l;
}

void update_crnn_layer(layer l, update_args a)
{
    update_convolutional_layer(*(l.input_layer),  a);
    update_convolutional_layer(*(l.self_layer),   a);
    update_convolutional_layer(*(l.output_layer), a);
}

void forward_crnn_layer(layer l, network net)
{
    network s = net;
    s.train = net.train;
    int i;
    layer input_layer = *(l.input_layer);
    layer self_layer = *(l.self_layer);
    layer output_layer = *(l.output_layer);

    fill_cpu(l.outputs * l.batch * l.steps, 0, output_layer.delta, 1);
    fill_cpu(l.hidden * l.batch * l.steps, 0, self_layer.delta, 1);
    fill_cpu(l.hidden * l.batch * l.steps, 0, input_layer.delta, 1);
    if(net.train) fill_cpu(l.hidden * l.batch, 0, l.state, 1);

    for (i = 0; i < l.steps; ++i) {
        s.input = net.input;
        forward_convolutional_layer(input_layer, s);

        s.input = l.state;
        forward_convolutional_layer(self_layer, s);

        float *old_state = l.state;
        if(net.train) l.state += l.hidden*l.batch;
        if(l.shortcut){
            copy_cpu(l.hidden * l.batch, old_state, 1, l.state, 1);
        }else{
            fill_cpu(l.hidden * l.batch, 0, l.state, 1);
        }
        axpy_cpu(l.hidden * l.batch, 1, input_layer.output, 1, l.state, 1);
        axpy_cpu(l.hidden * l.batch, 1, self_layer.output, 1, l.state, 1);

        s.input = l.state;
        forward_convolutional_layer(output_layer, s);

        net.input += l.inputs*l.batch;
        increment_layer(&input_layer, 1);
        increment_layer(&self_layer, 1);
        increment_layer(&output_layer, 1);
    }
}

void backward_crnn_layer(layer l, network net)
{
    network s = net;
    int i;
    layer input_layer = *(l.input_layer);
    layer self_layer = *(l.self_layer);
    layer output_layer = *(l.output_layer);

    increment_layer(&input_layer, l.steps-1);
    increment_layer(&self_layer, l.steps-1);
    increment_layer(&output_layer, l.steps-1);

    l.state += l.hidden*l.batch*l.steps;
    for (i = l.steps-1; i >= 0; --i) {
        copy_cpu(l.hidden * l.batch, input_layer.output, 1, l.state, 1);
        axpy_cpu(l.hidden * l.batch, 1, self_layer.output, 1, l.state, 1);

        s.input = l.state;
        s.delta = self_layer.delta;
        backward_convolutional_layer(output_layer, s);

        l.state -= l.hidden*l.batch;
        /*
           if(i > 0){
           copy_cpu(l.hidden * l.batch, input_layer.output - l.hidden*l.batch, 1, l.state, 1);
           axpy_cpu(l.hidden * l.batch, 1, self_layer.output - l.hidden*l.batch, 1, l.state, 1);
           }else{
           fill_cpu(l.hidden * l.batch, 0, l.state, 1);
           }
         */

        s.input = l.state;
        s.delta = self_layer.delta - l.hidden*l.batch;
        if (i == 0) s.delta = 0;
        backward_convolutional_layer(self_layer, s);

        copy_cpu(l.hidden*l.batch, self_layer.delta, 1, input_layer.delta, 1);
        if (i > 0 && l.shortcut) axpy_cpu(l.hidden*l.batch, 1, self_layer.delta, 1, self_layer.delta - l.hidden*l.batch, 1);
        s.input = net.input + i*l.inputs*l.batch;
        if(net.delta) s.delta = net.delta + i*l.inputs*l.batch;
        else s.delta = 0;
        backward_convolutional_layer(input_layer, s);

        increment_layer(&input_layer, -1);
        increment_layer(&self_layer, -1);
        increment_layer(&output_layer, -1);
    }
}

#ifdef GPU

void pull_crnn_layer(layer l)
{
    pull_convolutional_layer(*(l.input_layer));
    pull_convolutional_layer(*(l.self_layer));
    pull_convolutional_layer(*(l.output_layer));
}

void push_crnn_layer(layer l)
{
    push_convolutional_layer(*(l.input_layer));
    push_convolutional_layer(*(l.self_layer));
    push_convolutional_layer(*(l.output_layer));
}

void update_crnn_layer_gpu(layer l, update_args a)
{
    update_convolutional_layer_gpu(*(l.input_layer),  a);
    update_convolutional_layer_gpu(*(l.self_layer),   a);
    update_convolutional_layer_gpu(*(l.output_layer), a);
}

void forward_crnn_layer_gpu(layer l, network net)
{
    network s = net;
    int i;
    layer input_layer = *(l.input_layer);
    layer self_layer = *(l.self_layer);
    layer output_layer = *(l.output_layer);

    fill_gpu(l.outputs * l.batch * l.steps, 0, output_layer.delta_gpu, 1);
    fill_gpu(l.hidden * l.batch * l.steps, 0, self_layer.delta_gpu, 1);
    fill_gpu(l.hidden * l.batch * l.steps, 0, input_layer.delta_gpu, 1);
    if(net.train) fill_gpu(l.hidden * l.batch, 0, l.state_gpu, 1);

    for (i = 0; i < l.steps; ++i) {
        s.input_gpu = net.input_gpu;
        forward_convolutional_layer_gpu(input_layer, s);

        s.input_gpu = l.state_gpu;
        forward_convolutional_layer_gpu(self_layer, s);

        float *old_state = l.state_gpu;
        if(net.train) l.state_gpu += l.hidden*l.batch;
        if(l.shortcut){
            copy_gpu(l.hidden * l.batch, old_state, 1, l.state_gpu, 1);
        }else{
            fill_gpu(l.hidden * l.batch, 0, l.state_gpu, 1);
        }
        axpy_gpu(l.hidden * l.batch, 1, input_layer.output_gpu, 1, l.state_gpu, 1);
        axpy_gpu(l.hidden * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1);

        s.input_gpu = l.state_gpu;
        forward_convolutional_layer_gpu(output_layer, s);

        net.input_gpu += l.inputs*l.batch;
        increment_layer(&input_layer, 1);
        increment_layer(&self_layer, 1);
        increment_layer(&output_layer, 1);
    }
}

void backward_crnn_layer_gpu(layer l, network net)
{
    network s = net;
    s.train = net.train;
    int i;
    layer input_layer = *(l.input_layer);
    layer self_layer = *(l.self_layer);
    layer output_layer = *(l.output_layer);
    increment_layer(&input_layer,  l.steps - 1);
    increment_layer(&self_layer,   l.steps - 1);
    increment_layer(&output_layer, l.steps - 1);
    l.state_gpu += l.hidden*l.batch*l.steps;
    for (i = l.steps-1; i >= 0; --i) {
        copy_gpu(l.hidden * l.batch, input_layer.output_gpu, 1, l.state_gpu, 1);
        axpy_gpu(l.hidden * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1);

        s.input_gpu = l.state_gpu;
        s.delta_gpu = self_layer.delta_gpu;
        backward_convolutional_layer_gpu(output_layer, s);

        l.state_gpu -= l.hidden*l.batch;

        s.input_gpu = l.state_gpu;
        s.delta_gpu = self_layer.delta_gpu - l.hidden*l.batch;
        if (i == 0) s.delta_gpu = 0;
        backward_convolutional_layer_gpu(self_layer, s);

        copy_gpu(l.hidden*l.batch, self_layer.delta_gpu, 1, input_layer.delta_gpu, 1);
        if (i > 0 && l.shortcut) axpy_gpu(l.hidden*l.batch, 1, self_layer.delta_gpu, 1, self_layer.delta_gpu - l.hidden*l.batch, 1);
        s.input_gpu = net.input_gpu + i*l.inputs*l.batch;
        if(net.delta_gpu) s.delta_gpu = net.delta_gpu + i*l.inputs*l.batch;
        else s.delta_gpu = 0;
        backward_convolutional_layer_gpu(input_layer, s);

        increment_layer(&input_layer,  -1);
        increment_layer(&self_layer,   -1);
        increment_layer(&output_layer, -1);
    }
}
#endif


================================================
FILE: lightnet/_darknet/crnn_layer.h
================================================

#ifndef CRNN_LAYER_H
#define CRNN_LAYER_H

#include "activations.h"
#include "layer.h"
#include "network.h"

layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, ACTIVATION activation, int batch_normalize);

void forward_crnn_layer(layer l, network net);
void backward_crnn_layer(layer l, network net);
void update_crnn_layer(layer l, update_args a);

#ifdef GPU
void forward_crnn_layer_gpu(layer l, network net);
void backward_crnn_layer_gpu(layer l, network net);
void update_crnn_layer_gpu(layer l, update_args a);
void push_crnn_layer(layer l);
void pull_crnn_layer(layer l);
#endif

#endif



================================================
FILE: lightnet/_darknet/crop_layer.c
================================================
#include "crop_layer.h"
#include "cuda.h"
#include <stdio.h>

image get_crop_image(crop_layer l)
{
    int h = l.out_h;
    int w = l.out_w;
    int c = l.out_c;
    return float_to_image(w,h,c,l.output);
}

void backward_crop_layer(const crop_layer l, network net){}
void backward_crop_layer_gpu(const crop_layer l, network net){}

crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure)
{
    fprintf(stderr, "Crop Layer: %d x %d -> %d x %d x %d image\n", h,w,crop_height,crop_width,c);
    crop_layer l = {0};
    l.type = CROP;
    l.batch = batch;
    l.h = h;
    l.w = w;
    l.c = c;
    l.scale = (float)crop_height / h;
    l.flip = flip;
    l.angle = angle;
    l.saturation = saturation;
    l.exposure = exposure;
    l.out_w = crop_width;
    l.out_h = crop_height;
    l.out_c = c;
    l.inputs = l.w * l.h * l.c;
    l.outputs = l.out_w * l.out_h * l.out_c;
    l.output = calloc(l.outputs*batch, sizeof(float));
    l.forward = forward_crop_layer;
    l.backward = backward_crop_layer;

    #ifdef GPU
    l.forward_gpu = forward_crop_layer_gpu;
    l.backward_gpu = backward_crop_layer_gpu;
    l.output_gpu = cuda_make_array(l.output, l.outputs*batch);
    l.rand_gpu   = cuda_make_array(0, l.batch*8);
    #endif
    return l;
}

void resize_crop_layer(layer *l, int w, int h)
{
    l->w = w;
    l->h = h;

    l->out_w =  l->scale*w;
    l->out_h =  l->scale*h;

    l->inputs = l->w * l->h * l->c;
    l->outputs = l->out_h * l->out_w * l->out_c;

    l->output = realloc(l->output, l->batch*l->outputs*sizeof(float));
    #ifdef GPU
    cuda_free(l->output_gpu);
    l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch);
    #endif
}


void forward_crop_layer(const crop_layer l, network net)
{
    int i,j,c,b,row,col;
    int index;
    int count = 0;
    int flip = (l.flip && rand()%2);
    int dh = rand()%(l.h - l.out_h + 1);
    int dw = rand()%(l.w - l.out_w + 1);
    float scale = 2;
    float trans = -1;
    if(l.noadjust){
        scale = 1;
        trans = 0;
    }
    if(!net.train){
        flip = 0;
        dh = (l.h - l.out_h)/2;
        dw = (l.w - l.out_w)/2;
    }
    for(b = 0; b < l.batch; ++b){
        for(c = 0; c < l.c; ++c){
            for(i = 0; i < l.out_h; ++i){
                for(j = 0; j < l.out_w; ++j){
                    if(flip){
                        col = l.w - dw - j - 1;    
                    }else{
                        col = j + dw;
                    }
                    row = i + dh;
                    index = col+l.w*(row+l.h*(c + l.c*b)); 
                    l.output[count++] = net.input[index]*scale + trans;
                }
            }
        }
    }
}



================================================
FILE: lightnet/_darknet/crop_layer.h
================================================
#ifndef CROP_LAYER_H
#define CROP_LAYER_H

#include "image.h"
#include "layer.h"
#include "network.h"

typedef layer crop_layer;

image get_crop_image(crop_layer l);
crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure);
void forward_crop_layer(const crop_layer l, network net);
void resize_crop_layer(layer *l, int w, int h);

#ifdef GPU
void forward_crop_layer_gpu(crop_layer l, network net);
#endif

#endif



================================================
FILE: lightnet/_darknet/crop_layer_kernels.cu
================================================
#include "cuda_runtime.h"
#include "curand.h"
#include "cublas_v2.h"

extern "C" {
#include "crop_layer.h"
#include "utils.h"
#include "cuda.h"
#include "image.h"
}

__device__ float get_pixel_kernel(float *image, int w, int h, int x, int y, int c)
{
    if(x < 0 || x >= w || y < 0 || y >= h) return 0;
    return image[x + w*(y + c*h)];
}

__device__ float3 rgb_to_hsv_kernel(float3 rgb)
{
    float r = rgb.x;
    float g = rgb.y; 
    float b = rgb.z;

    float h, s, v;
    float max = (r > g) ? ( (r > b) ? r : b) : ( (g > b) ? g : b);
    float min = (r < g) ? ( (r < b) ? r : b) : ( (g < b) ? g : b);
    float delta = max - min;
    v = max;
    if(max == 0){
        s = 0;
        h = -1;
    }else{
        s = delta/max;
        if(r == max){
            h = (g - b) / delta;
        } else if (g == max) {
            h = 2 + (b - r) / delta;
        } else {
            h = 4 + (r - g) / delta;
        }
        if (h < 0) h += 6;
    }
    return make_float3(h, s, v);
}

__device__ float3 hsv_to_rgb_kernel(float3 hsv)
{
    float h = hsv.x;
    float s = hsv.y; 
    float v = hsv.z;

    float r, g, b;
    float f, p, q, t;

    if (s == 0) {
        r = g = b = v;
    } else {
        int index = (int) floorf(h);
        f = h - index;
        p = v*(1-s);
        q = v*(1-s*f);
        t = v*(1-s*(1-f));
        if(index == 0){
            r = v; g = t; b = p;
        } else if(index == 1){
            r = q; g = v; b = p;
        } else if(index == 2){
            r = p; g = v; b = t;
        } else if(index == 3){
            r = p; g = q; b = v;
        } else if(index == 4){
            r = t; g = p; b = v;
        } else {
            r = v; g = p; b = q;
        }
    }
    r = (r < 0) ? 0 : ((r > 1) ? 1 : r);
    g = (g < 0) ? 0 : ((g > 1) ? 1 : g);
    b = (b < 0) ? 0 : ((b > 1) ? 1 : b);
    return make_float3(r, g, b);
}

__device__ float bilinear_interpolate_kernel(float *image, int w, int h, float x, float y, int c)
{
    int ix = (int) floorf(x);
    int iy = (int) floorf(y);

    float dx = x - ix;
    float dy = y - iy;

    float val = (1-dy) * (1-dx) * get_pixel_kernel(image, w, h, ix, iy, c) + 
        dy     * (1-dx) * get_pixel_kernel(image, w, h, ix, iy+1, c) + 
        (1-dy) *   dx   * get_pixel_kernel(image, w, h, ix+1, iy, c) +
        dy     *   dx   * get_pixel_kernel(image, w, h, ix+1, iy+1, c);
    return val;
}

__global__ void levels_image_kernel(float *image, float *rand, int batch, int w, int h, int train, float saturation, float exposure, float translate, float scale, float shift)
{
    int size = batch * w * h;
    int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if(id >= size) return;
    int x = id % w;
    id /= w;
    int y = id % h;
    id /= h;
    float rshift = rand[0];
    float gshift = rand[1];
    float bshift = rand[2];
    float r0 = rand[8*id + 0];
    float r1 = rand[8*id + 1];
    float r2 = rand[8*id + 2];
    float r3 = rand[8*id + 3];

    saturation = r0*(saturation - 1) + 1;
    saturation = (r1 > .5f) ? 1.f/saturation : saturation;
    exposure = r2*(exposure - 1) + 1;
    exposure = (r3 > .5f) ? 1.f/exposure : exposure;

    size_t offset = id * h * w * 3;
    image += offset;
    float r = image[x + w*(y + h*0)];
    float g = image[x + w*(y + h*1)];
    float b = image[x + w*(y + h*2)];
    float3 rgb = make_float3(r,g,b);
    if(train){
        float3 hsv = rgb_to_hsv_kernel(rgb);
        hsv.y *= saturation;
        hsv.z *= exposure;
        rgb = hsv_to_rgb_kernel(hsv);
    } else {
        shift = 0;
    }
    image[x + w*(y + h*0)] = rgb.x*scale + translate + (rshift - .5f)*shift;
    image[x + w*(y + h*1)] = rgb.y*scale + translate + (gshift - .5f)*shift;
    image[x + w*(y + h*2)] = rgb.z*scale + translate + (bshift - .5f)*shift;
}

__global__ void forward_crop_layer_kernel(float *input, float *rand, int size, int c, int h, int w, int crop_height, int crop_width, int train, int flip, float angle, float *output)
{
    int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if(id >= size) return;

    float cx = w/2.f;
    float cy = h/2.f;

    int count = id;
    int j = id % crop_width;
    id /= crop_width;
    int i = id % crop_height;
    id /= crop_height;
    int k = id % c;
    id /= c;
    int b = id;

    float r4 = rand[8*b + 4];
    float r5 = rand[8*b + 5];
    float r6 = rand[8*b + 6];
    float r7 = rand[8*b + 7];

    float dw = (w - crop_width)*r4;
    float dh = (h - crop_height)*r5;
    flip = (flip && (r6 > .5f));
    angle = 2*angle*r7 - angle;
    if(!train){
        dw = (w - crop_width)/2.f;
        dh = (h - crop_height)/2.f;
        flip = 0;
        angle = 0;
    }

    input += w*h*c*b;

    float x = (flip) ? w - dw - j - 1 : j + dw;    
    float y = i + dh;

    float rx = cosf(angle)*(x-cx) - sinf(angle)*(y-cy) + cx;
    float ry = sinf(angle)*(x-cx) + cosf(angle)*(y-cy) + cy;

    output[count] = bilinear_interpolate_kernel(input, w, h, rx, ry, k);
}

extern "C" void forward_crop_layer_gpu(crop_layer layer, network net)
{
    cuda_random(layer.rand_gpu, layer.batch*8);

    float radians = layer.angle*3.14159265f/180.f;

    float scale = 2;
    float translate = -1;
    if(layer.noadjust){
        scale = 1;
        translate = 0;
    }

    int size = layer.batch * layer.w * layer.h;

    levels_image_kernel<<<cuda_gridsize(size), BLOCK>>>(net.input_gpu, layer.rand_gpu, layer.batch, layer.w, layer.h, net.train, layer.saturation, layer.exposure, translate, scale, layer.shift);
    check_error(cudaPeekAtLastError());

    size = layer.batch*layer.c*layer.out_w*layer.out_h;

    forward_crop_layer_kernel<<<cuda_gridsize(size), BLOCK>>>(net.input_gpu, layer.rand_gpu, size, layer.c, layer.h, layer.w, layer.out_h, layer.out_w, net.train, layer.flip, radians, layer.output_gpu);
    check_error(cudaPeekAtLastError());

/*
       cuda_pull_array(layer.output_gpu, layer.output, size);
       image im = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 0*(size/layer.batch));
       image im2 = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 1*(size/layer.batch));
       image im3 = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 2*(size/layer.batch));

       translate_image(im, -translate);
       scale_image(im, 1/scale);
       translate_image(im2, -translate);
       scale_image(im2, 1/scale);
       translate_image(im3, -translate);
       scale_image(im3, 1/scale);
       
       show_image(im, "cropped");
       show_image(im2, "cropped2");
       show_image(im3, "cropped3");
       cvWaitKey(0);
       */
}



================================================
FILE: lightnet/_darknet/cuda.c
================================================
int gpu_index = 0;

#ifdef GPU

#include "cuda.h"
#include "utils.h"
#include "blas.h"
#include <assert.h>
#include <stdlib.h>
#include <time.h>

void cuda_set_device(int n)
{
    gpu_index = n;
    cudaError_t status = cudaSetDevice(n);
    check_error(status);
}

int cuda_get_device()
{
    int n = 0;
    cudaError_t status = cudaGetDevice(&n);
    check_error(status);
    return n;
}

void check_error(cudaError_t status)
{
    //cudaDeviceSynchronize();
    cudaError_t status2 = cudaGetLastError();
    if (status != cudaSuccess)
    {   
        const char *s = cudaGetErrorString(status);
        char buffer[256];
        printf("CUDA Error: %s\n", s);
        assert(0);
        snprintf(buffer, 256, "CUDA Error: %s", s);
        error(buffer);
    } 
    if (status2 != cudaSuccess)
    {   
        const char *s = cudaGetErrorString(status);
        char buffer[256];
        printf("CUDA Error Prev: %s\n", s);
        assert(0);
        snprintf(buffer, 256, "CUDA Error Prev: %s", s);
        error(buffer);
    } 
}

dim3 cuda_gridsize(size_t n){
    size_t k = (n-1) / BLOCK + 1;
    size_t x = k;
    size_t y = 1;
    if(x > 65535){
        x = ceil(sqrt(k));
        y = (n-1)/(x*BLOCK) + 1;
    }
    dim3 d = {x, y, 1};
    //printf("%ld %ld %ld %ld\n", n, x, y, x*y*BLOCK);
    return d;
}

#ifdef CUDNN
cudnnHandle_t cudnn_handle()
{
    static int init[16] = {0};
    static cudnnHandle_t handle[16];
    int i = cuda_get_device();
    if(!init[i]) {
        cudnnCreate(&handle[i]);
        init[i] = 1;
    }
    return handle[i];
}
#endif

cublasHandle_t blas_handle()
{
    static int init[16] = {0};
    static cublasHandle_t handle[16];
    int i = cuda_get_device();
    if(!init[i]) {
        cublasCreate(&handle[i]);
        init[i] = 1;
    }
    return handle[i];
}

float *cuda_make_array(float *x, size_t n)
{
    float *x_gpu;
    size_t size = sizeof(float)*n;
    cudaError_t status = cudaMalloc((void **)&x_gpu, size);
    check_error(status);
    if(x){
        status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice);
        check_error(status);
    } else {
        fill_gpu(n, 0, x_gpu, 1);
    }
    if(!x_gpu) error("Cuda malloc failed\n");
    return x_gpu;
}

void cuda_random(float *x_gpu, size_t n)
{
    static curandGenerator_t gen[16];
    static int init[16] = {0};
    int i = cuda_get_device();
    if(!init[i]){
        curandCreateGenerator(&gen[i], CURAND_RNG_PSEUDO_DEFAULT);
        curandSetPseudoRandomGeneratorSeed(gen[i], time(0));
        init[i] = 1;
    }
    curandGenerateUniform(gen[i], x_gpu, n);
    check_error(cudaPeekAtLastError());
}

float cuda_compare(float *x_gpu, float *x, size_t n, char *s)
{
    float *tmp = calloc(n, sizeof(float));
    cuda_pull_array(x_gpu, tmp, n);
    //int i;
    //for(i = 0; i < n; ++i) printf("%f %f\n", tmp[i], x[i]);
    axpy_cpu(n, -1, x, 1, tmp, 1);
    float err = dot_cpu(n, tmp, 1, tmp, 1);
    printf("Error %s: %f\n", s, sqrt(err/n));
    free(tmp);
    return err;
}

int *cuda_make_int_array(int *x, size_t n)
{
    int *x_gpu;
    size_t size = sizeof(int)*n;
    cudaError_t status = cudaMalloc((void **)&x_gpu, size);
    check_error(status);
    if(x){
        status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice);
        check_error(status);
    }
    if(!x_gpu) error("Cuda malloc failed\n");
    return x_gpu;
}

void cuda_free(float *x_gpu)
{
    cudaError_t status = cudaFree(x_gpu);
    check_error(status);
}

void cuda_push_array(float *x_gpu, float *x, size_t n)
{
    size_t size = sizeof(float)*n;
    cudaError_t status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice);
    check_error(status);
}

void cuda_pull_array(float *x_gpu, float *x, size_t n)
{
    size_t size = sizeof(float)*n;
    cudaError_t status = cudaMemcpy(x, x_gpu, size, cudaMemcpyDeviceToHost);
    check_error(status);
}

float cuda_mag_array(float *x_gpu, size_t n)
{
    float *temp = calloc(n, sizeof(float));
    cuda_pull_array(x_gpu, temp, n);
    float m = mag_array(temp, n);
    free(temp);
    return m;
}
#else
void cuda_set_device(int n){}

#endif


================================================
FILE: lightnet/_darknet/cuda.h
================================================
#ifndef CUDA_H
#define CUDA_H

#include "darknet.h"

#ifdef GPU

void check_error(cudaError_t status);
cublasHandle_t blas_handle();
int *cuda_make_int_array(int *x, size_t n);
void cuda_random(float *x_gpu, size_t n);
float cuda_compare(float *x_gpu, float *x, size_t n, char *s);
dim3 cuda_gridsize(size_t n);

#ifdef CUDNN
cudnnHandle_t cudnn_handle();
#endif

#endif
#endif


================================================
FILE: lightnet/_darknet/darknet.h
================================================
#ifndef DARKNET_API
#define DARKNET_API
#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#include <pthread.h>

#define SECRET_NUM -1234
extern int gpu_index;

#ifdef GPU
    #define BLOCK 512

    #include "cuda_runtime.h"
    #include "curand.h"
    #include "cublas_v2.h"

    #ifdef CUDNN
    #include "cudnn.h"
    #endif
#endif

#ifndef __cplusplus
    #ifdef OPENCV
    #include "opencv2/highgui/highgui_c.h"
    #include "opencv2/imgproc/imgproc_c.h"
    #include "opencv2/core/version.hpp"
    #if CV_MAJOR_VERSION == 3
    #include "opencv2/videoio/videoio_c.h"
    #include "opencv2/imgcodecs/imgcodecs_c.h"
    #endif
    #endif
#endif

typedef struct{
    int classes;
    char **names;
} metadata;

metadata get_metadata(char *file);

typedef struct{
    int *leaf;
    int n;
    int *parent;
    int *child;
    int *group;
    char **name;

    int groups;
    int *group_size;
    int *group_offset;
} tree;

typedef enum{
    LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN
} ACTIVATION;

typedef enum{
    MULT, ADD, SUB, DIV
} BINARY_ACTIVATION;

typedef enum {
    CONVOLUTIONAL,
    DECONVOLUTIONAL,
    CONNECTED,
    MAXPOOL,
    SOFTMAX,
    DETECTION,
    DROPOUT,
    CROP,
    ROUTE,
    COST,
    NORMALIZATION,
    AVGPOOL,
    LOCAL,
    SHORTCUT,
    ACTIVE,
    RNN,
    GRU,
    LSTM,
    CRNN,
    BATCHNORM,
    NETWORK,
    XNOR,
    REGION,
    REORG,
    BLANK
} LAYER_TYPE;

typedef enum{
    SSE, MASKED, L1, SEG, SMOOTH
} COST_TYPE;

typedef struct{
    int batch;
    float learning_rate;
    float momentum;
    float decay;
    int adam;
    float B1;
    float B2;
    float eps;
    int t;
} update_args;

struct network;
typedef struct network network;

struct layer;
typedef struct layer layer;

struct layer{
    LAYER_TYPE type;
    ACTIVATION activation;
    COST_TYPE cost_type;
    void (*forward)   (struct layer, struct network);
    void (*backward)  (struct layer, struct network);
    void (*update)    (struct layer, update_args);
    void (*forward_gpu)   (struct layer, struct network);
    void (*backward_gpu)  (struct layer, struct network);
    void (*update_gpu)    (struct layer, update_args);
    int batch_normalize;
    int shortcut;
    int batch;
    int forced;
    int flipped;
    int inputs;
    int outputs;
    int nweights;
    int nbiases;
    int extra;
    int truths;
    int h,w,c;
    int out_h, out_w, out_c;
    int n;
    int max_boxes;
    int groups;
    int size;
    int side;
    int stride;
    int reverse;
    int flatten;
    int spatial;
    int pad;
    int sqrt;
    int flip;
    int index;
    int binary;
    int xnor;
    int steps;
    int hidden;
    int truth;
    float smooth;
    float dot;
    float angle;
    float jitter;
    float saturation;
    float exposure;
    float shift;
    float ratio;
    float learning_rate_scale;
    int softmax;
    int classes;
    int coords;
    int background;
    int rescore;
    int objectness;
    int does_cost;
    int joint;
    int noadjust;
    int reorg;
    int log;
    int tanh;

    float alpha;
    float beta;
    float kappa;

    float coord_scale;
    float object_scale;
    float noobject_scale;
    float mask_scale;
    float class_scale;
    int bias_match;
    int random;
    float thresh;
    int classfix;
    int absolute;

    int onlyforward;
    int stopbackward;
    int dontload;
    int dontloadscales;

    float temperature;
    float probability;
    float scale;

    char  * cweights;
    int   * indexes;
    int   * input_layers;
    int   * input_sizes;
    int   * map;
    float * rand;
    float * cost;
    float * state;
    float * prev_state;
    float * forgot_state;
    float * forgot_delta;
    float * state_delta;
    float * combine_cpu;
    float * combine_delta_cpu;

    float * concat;
    float * concat_delta;

    float * binary_weights;

    float * biases;
    float * bias_updates;

    float * scales;
    float * scale_updates;

    float * weights;
    float * weight_updates;

    float * delta;
    float * output;
    float * squared;
    float * norms;

    float * spatial_mean;
    float * mean;
    float * variance;

    float * mean_delta;
    float * variance_delta;

    float * rolling_mean;
    float * rolling_variance;

    float * x;
    float * x_norm;

    float * m;
    float * v;
    
    float * bias_m;
    float * bias_v;
    float * scale_m;
    float * scale_v;


    float *z_cpu;
    float *r_cpu;
    float *h_cpu;
    float * prev_state_cpu;

    float *temp_cpu;
    float *temp2_cpu;
    float *temp3_cpu;

    float *dh_cpu;
    float *hh_cpu;
    float *prev_cell_cpu;
    float *cell_cpu;
    float *f_cpu;
    float *i_cpu;
    float *g_cpu;
    float *o_cpu;
    float *c_cpu;
    float *dc_cpu; 

    float * binary_input;

    struct layer *input_layer;
    struct layer *self_layer;
    struct layer *output_layer;

    struct layer *reset_layer;
    struct layer *update_layer;
    struct layer *state_layer;

    struct layer *input_gate_layer;
    struct layer *state_gate_layer;
    struct layer *input_save_layer;
    struct layer *state_save_layer;
    struct layer *input_state_layer;
    struct layer *state_state_layer;

    struct layer *input_z_layer;
    struct layer *state_z_layer;

    struct layer *input_r_layer;
    struct layer *state_r_layer;

    struct layer *input_h_layer;
    struct layer *state_h_layer;
	
    struct layer *wz;
    struct layer *uz;
    struct layer *wr;
    struct layer *ur;
    struct layer *wh;
    struct layer *uh;
    struct layer *uo;
    struct layer *wo;
    struct layer *uf;
    struct layer *wf;
    struct layer *ui;
    struct layer *wi;
    struct layer *ug;
    struct layer *wg;

    tree *softmax_tree;

    size_t workspace_size;

#ifdef GPU
    int *indexes_gpu;

    float *z_gpu;
    float *r_gpu;
    float *h_gpu;

    float *temp_gpu;
    float *temp2_gpu;
    float *temp3_gpu;

    float *dh_gpu;
    float *hh_gpu;
    float *prev_cell_gpu;
    float *cell_gpu;
    float *f_gpu;
    float *i_gpu;
    float *g_gpu;
    float *o_gpu;
    float *c_gpu;
    float *dc_gpu; 

    float *m_gpu;
    float *v_gpu;
    float *bias_m_gpu;
    float *scale_m_gpu;
    float *bias_v_gpu;
    float *scale_v_gpu;

    float * combine_gpu;
    float * combine_delta_gpu;

    float * prev_state_gpu;
    float * forgot_state_gpu;
    float * forgot_delta_gpu;
    float * state_gpu;
    float * state_delta_gpu;
    float * gate_gpu;
    float * gate_delta_gpu;
    float * save_gpu;
    float * save_delta_gpu;
    float * concat_gpu;
    float * concat_delta_gpu;

    float * binary_input_gpu;
    float * binary_weights_gpu;

    float * mean_gpu;
    float * variance_gpu;

    float * rolling_mean_gpu;
    float * rolling_variance_gpu;

    float * variance_delta_gpu;
    float * mean_delta_gpu;

    float * x_gpu;
    float * x_norm_gpu;
    float * weights_gpu;
    float * weight_updates_gpu;
    float * weight_change_gpu;

    float * biases_gpu;
    float * bias_updates_gpu;
    float * bias_change_gpu;

    float * scales_gpu;
    float * scale_updates_gpu;
    float * scale_change_gpu;

    float * output_gpu;
    float * delta_gpu;
    float * rand_gpu;
    float * squared_gpu;
    float * norms_gpu;
#ifdef CUDNN
    cudnnTensorDescriptor_t srcTensorDesc, dstTensorDesc;
    cudnnTensorDescriptor_t dsrcTensorDesc, ddstTensorDesc;
    cudnnTensorDescriptor_t normTensorDesc;
    cudnnFilterDescriptor_t weightDesc;
    cudnnFilterDescriptor_t dweightDesc;
    cudnnConvolutionDescriptor_t convDesc;
    cudnnConvolutionFwdAlgo_t fw_algo;
    cudnnConvolutionBwdDataAlgo_t bd_algo;
    cudnnConvolutionBwdFilterAlgo_t bf_algo;
#endif
#endif
};

void free_layer(layer);

typedef enum {
    CONSTANT, STEP, EXP, POLY, STEPS, SIG, RANDOM
} learning_rate_policy;

typedef struct network{
    int n;
    int batch;
    size_t *seen;
    int *t;
    float epoch;
    int subdivisions;
    layer *layers;
    float *output;
    learning_rate_policy policy;

    float learning_rate;
    float momentum;
    float decay;
    float gamma;
    float scale;
    float power;
    int time_steps;
    int step;
    int max_batches;
    float *scales;
    int   *steps;
    int num_steps;
    int burn_in;

    int adam;
    float B1;
    float B2;
    float eps;

    int inputs;
    int outputs;
    int truths;
    int notruth;
    int h, w, c;
    int max_crop;
    int min_crop;
    float max_ratio;
    float min_ratio;
    int center;
    float angle;
    float aspect;
    float exposure;
    float saturation;
    float hue;
    int random;

    int gpu_index;
    tree *hierarchy;

    float *input;
    float *truth;
    float *delta;
    float *workspace;
    int train;
    int index;
    float *cost;

#ifdef GPU
    float *input_gpu;
    float *truth_gpu;
    float *delta_gpu;
    float *output_gpu;
#endif

} network;

typedef struct {
    int w;
    int h;
    float scale;
    float rad;
    float dx;
    float dy;
    float aspect;
} augment_args;

typedef struct {
    int w;
    int h;
    int c;
    float *data;
} image;

typedef struct{
    float x, y, w, h;
} box;

typedef struct matrix{
    int rows, cols;
    float **vals;
} matrix;


typedef struct{
    int w, h;
    matrix X;
    matrix y;
    int shallow;
    int *num_boxes;
    box **boxes;
} data;

typedef enum {
    CLASSIFICATION_DATA, DETECTION_DATA, CAPTCHA_DATA, REGION_DATA, IMAGE_DATA, COMPARE_DATA, WRITING_DATA, SWAG_DATA, TAG_DATA, OLD_CLASSIFICATION_DATA, STUDY_DATA, DET_DATA, SUPER_DATA, LETTERBOX_DATA, REGRESSION_DATA, SEGMENTATION_DATA, INSTANCE_DATA
} data_type;

typedef struct load_args{
    int threads;
    char **paths;
    char *path;
    int n;
    int m;
    char **labels;
    int h;
    int w;
    int out_w;
    int out_h;
    int nh;
    int nw;
    int num_boxes;
    int min, max, size;
    int c

Download .txt

gitextract_3zntw7v9/

├── .appveyor.yml
├── .gitignore
├── .travis.yml
├── LICENSE
├── MANIFEST.in
├── README.rst
├── bin/
│   ├── cythonize.py
│   └── train.py
├── lightnet/
│   ├── __init__.pxd
│   ├── __init__.py
│   ├── __main__.py
│   ├── _darknet/
│   │   ├── Makefile
│   │   ├── activation_kernels.cu
│   │   ├── activation_layer.c
│   │   ├── activation_layer.h
│   │   ├── activations.c
│   │   ├── activations.h
│   │   ├── avgpool_layer.c
│   │   ├── avgpool_layer.h
│   │   ├── avgpool_layer_kernels.cu
│   │   ├── batchnorm_layer.c
│   │   ├── batchnorm_layer.h
│   │   ├── blas.c
│   │   ├── blas.h
│   │   ├── blas_kernels.cu
│   │   ├── box.c
│   │   ├── box.h
│   │   ├── classifier.h
│   │   ├── col2im.c
│   │   ├── col2im.h
│   │   ├── col2im_kernels.cu
│   │   ├── connected_layer.c
│   │   ├── connected_layer.h
│   │   ├── convolutional_kernels.cu
│   │   ├── convolutional_layer.c
│   │   ├── convolutional_layer.h
│   │   ├── cost_layer.c
│   │   ├── cost_layer.h
│   │   ├── crnn_layer.c
│   │   ├── crnn_layer.h
│   │   ├── crop_layer.c
│   │   ├── crop_layer.h
│   │   ├── crop_layer_kernels.cu
│   │   ├── cuda.c
│   │   ├── cuda.h
│   │   ├── darknet.h
│   │   ├── data.c
│   │   ├── data.h
│   │   ├── deconvolutional_kernels.cu
│   │   ├── deconvolutional_layer.c
│   │   ├── deconvolutional_layer.h
│   │   ├── demo.c
│   │   ├── demo.h
│   │   ├── detection_layer.c
│   │   ├── detection_layer.h
│   │   ├── dropout_layer.c
│   │   ├── dropout_layer.h
│   │   ├── dropout_layer_kernels.cu
│   │   ├── gemm.c
│   │   ├── gemm.h
│   │   ├── gru_layer.c
│   │   ├── gru_layer.h
│   │   ├── im2col.c
│   │   ├── im2col.h
│   │   ├── im2col_kernels.cu
│   │   ├── image.c
│   │   ├── image.h
│   │   ├── layer.c
│   │   ├── layer.h
│   │   ├── list.c
│   │   ├── list.h
│   │   ├── local_layer.c
│   │   ├── local_layer.h
│   │   ├── lstm_layer.c
│   │   ├── lstm_layer.h
│   │   ├── matrix.c
│   │   ├── matrix.h
│   │   ├── maxpool_layer.c
│   │   ├── maxpool_layer.h
│   │   ├── maxpool_layer_kernels.cu
│   │   ├── network.c
│   │   ├── network.h
│   │   ├── normalization_layer.c
│   │   ├── normalization_layer.h
│   │   ├── option_list.c
│   │   ├── option_list.h
│   │   ├── parser.c
│   │   ├── parser.h
│   │   ├── region_layer.c
│   │   ├── region_layer.h
│   │   ├── reorg_layer.c
│   │   ├── reorg_layer.h
│   │   ├── rnn_layer.c
│   │   ├── rnn_layer.h
│   │   ├── route_layer.c
│   │   ├── route_layer.h
│   │   ├── shortcut_layer.c
│   │   ├── shortcut_layer.h
│   │   ├── softmax_layer.c
│   │   ├── softmax_layer.h
│   │   ├── stb_image.h
│   │   ├── stb_image_write.h
│   │   ├── tree.c
│   │   ├── tree.h
│   │   ├── utils.c
│   │   └── utils.h
│   ├── about.py
│   ├── cli.py
│   ├── data/
│   │   ├── alexnet.cfg
│   │   ├── cifar.cfg
│   │   ├── cifar.test.cfg
│   │   ├── coco.names
│   │   ├── coco.template
│   │   ├── darknet.cfg
│   │   ├── darknet19.cfg
│   │   ├── darknet19_448.cfg
│   │   ├── darknet9000.cfg
│   │   ├── densenet201.cfg
│   │   ├── extraction.cfg
│   │   ├── extraction.conv.cfg
│   │   ├── extraction22k.cfg
│   │   ├── go.cfg
│   │   ├── go.test.cfg
│   │   ├── gru.cfg
│   │   ├── jnet-conv.cfg
│   │   ├── resnet152.cfg
│   │   ├── resnet50.cfg
│   │   ├── rnn.cfg
│   │   ├── rnn.train.cfg
│   │   ├── strided.cfg
│   │   ├── t1.test.cfg
│   │   ├── tiny-yolo-voc.cfg
│   │   ├── tiny-yolo.cfg
│   │   ├── tiny.cfg
│   │   ├── vgg-16.cfg
│   │   ├── vgg-conv.cfg
│   │   ├── voc.names
│   │   ├── writing.cfg
│   │   ├── yolo-voc.2.0.cfg
│   │   ├── yolo-voc.cfg
│   │   ├── yolo.2.0.cfg
│   │   ├── yolo.cfg
│   │   └── yolo9000.cfg
│   ├── lightnet.pxd
│   ├── lightnet.pyx
│   └── util.py
├── requirements.txt
├── setup.py
└── tests/
    ├── test_boxes.py
    ├── test_image.py
    └── test_network.py

Download .txt

SYMBOL INDEX (920 symbols across 66 files)

FILE: bin/cythonize.py
  function process_pyx (line 54) | def process_pyx(fromfile, tofile):
  function process_tempita_pyx (line 85) | def process_tempita_pyx(fromfile, tofile):
  function load_hashes (line 111) | def load_hashes(filename):
  function save_hashes (line 123) | def save_hashes(hash_db, filename):
  function sha1_of_file (line 128) | def sha1_of_file(filename):
  function normpath (line 138) | def normpath(path):
  function get_hash (line 144) | def get_hash(frompath, topath):
  function process (line 149) | def process(path, fromfile, tofile, processor_function, hash_db):
  function find_process_files (line 170) | def find_process_files(root_dir):
  function main (line 191) | def main():

FILE: bin/train.py
  function path2bytes (line 10) | def path2bytes(loc):
  function main (line 13) | def main(cfg_loc, weight_loc, images_loc):

FILE: lightnet/__init__.py
  function load (line 8) | def load(name, path=None):

FILE: lightnet/_darknet/activation_layer.c
  function layer (line 12) | layer make_activation_layer(int batch, int inputs, ACTIVATION activation)
  function forward_activation_layer (line 38) | void forward_activation_layer(layer l, network net)
  function backward_activation_layer (line 44) | void backward_activation_layer(layer l, network net)
  function forward_activation_layer_gpu (line 52) | void forward_activation_layer_gpu(layer l, network net)
  function backward_activation_layer_gpu (line 58) | void backward_activation_layer_gpu(layer l, network net)

FILE: lightnet/_darknet/activations.c
  function ACTIVATION (line 43) | ACTIVATION get_activation(char *s)
  function activate (line 62) | float activate(float x, ACTIVATION a)
  function activate_array (line 95) | void activate_array(float *x, const int n, const ACTIVATION a)
  function gradient (line 103) | float gradient(float x, ACTIVATION a)
  function gradient_array (line 136) | void gradient_array(const float *x, const int n, const ACTIVATION a, flo...

FILE: lightnet/_darknet/activations.h
  function stair_activate (line 19) | static inline float stair_activate(float x)
  function hardtan_activate (line 25) | static inline float hardtan_activate(float x)
  function linear_activate (line 31) | static inline float linear_activate(float x){return x;}
  function logistic_activate (line 32) | static inline float logistic_activate(float x){return 1./(1. + exp(-x));}
  function loggy_activate (line 33) | static inline float loggy_activate(float x){return 2./(1. + exp(-x)) - 1;}
  function relu_activate (line 34) | static inline float relu_activate(float x){return x*(x>0);}
  function elu_activate (line 35) | static inline float elu_activate(float x){return (x >= 0)*x + (x < 0)*(e...
  function relie_activate (line 36) | static inline float relie_activate(float x){return (x>0) ? x : .01*x;}
  function ramp_activate (line 37) | static inline float ramp_activate(float x){return x*(x>0)+.1*x;}
  function leaky_activate (line 38) | static inline float leaky_activate(float x){return (x>0) ? x : .1*x;}
  function tanh_activate (line 39) | static inline float tanh_activate(float x){return (exp(2*x)-1)/(exp(2*x)...
  function plse_activate (line 40) | static inline float plse_activate(float x)
  function lhtan_activate (line 47) | static inline float lhtan_activate(float x)
  function lhtan_gradient (line 53) | static inline float lhtan_gradient(float x)
  function hardtan_gradient (line 59) | static inline float hardtan_gradient(float x)
  function linear_gradient (line 64) | static inline float linear_gradient(float x){return 1;}
  function logistic_gradient (line 65) | static inline float logistic_gradient(float x){return (1-x)*x;}
  function loggy_gradient (line 66) | static inline float loggy_gradient(float x)
  function stair_gradient (line 71) | static inline float stair_gradient(float x)
  function relu_gradient (line 76) | static inline float relu_gradient(float x){return (x>0);}
  function elu_gradient (line 77) | static inline float elu_gradient(float x){return (x >= 0) + (x < 0)*(x +...
  function relie_gradient (line 78) | static inline float relie_gradient(float x){return (x>0) ? 1 : .01;}
  function ramp_gradient (line 79) | static inline float ramp_gradient(float x){return (x>0)+.1;}
  function leaky_gradient (line 80) | static inline float leaky_gradient(float x){return (x>0) ? 1 : .1;}
  function tanh_gradient (line 81) | static inline float tanh_gradient(float x){return 1-x*x;}
  function plse_gradient (line 82) | static inline float plse_gradient(float x){return (x < 0 || x > 1) ? .01...

FILE: lightnet/_darknet/avgpool_layer.c
  function avgpool_layer (line 5) | avgpool_layer make_avgpool_layer(int batch, int w, int h, int c)
  function resize_avgpool_layer (line 33) | void resize_avgpool_layer(avgpool_layer *l, int w, int h)
  function forward_avgpool_layer (line 40) | void forward_avgpool_layer(const avgpool_layer l, network net)
  function backward_avgpool_layer (line 57) | void backward_avgpool_layer(const avgpool_layer l, network net)

FILE: lightnet/_darknet/avgpool_layer.h
  type layer (line 9) | typedef layer avgpool_layer;

FILE: lightnet/_darknet/batchnorm_layer.c
  function layer (line 6) | layer make_batchnorm_layer(int batch, int w, int h, int c)
  function backward_scale_cpu (line 72) | void backward_scale_cpu(float *x_norm, float *delta, int batch, int n, i...
  function mean_delta_cpu (line 87) | void mean_delta_cpu(float *delta, float *variance, int batch, int filter...
  function variance_delta_cpu (line 102) | void  variance_delta_cpu(float *x, float *delta, float *mean, float *var...
  function normalize_delta_cpu (line 117) | void normalize_delta_cpu(float *x, float *mean, float *variance, float *...
  function resize_batchnorm_layer (line 130) | void resize_batchnorm_layer(layer *layer, int w, int h)
  function forward_batchnorm_layer (line 135) | void forward_batchnorm_layer(layer l, network net)
  function backward_batchnorm_layer (line 157) | void backward_batchnorm_layer(layer l, network net)
  function pull_batchnorm_layer (line 176) | void pull_batchnorm_layer(layer l)
  function push_batchnorm_layer (line 182) | void push_batchnorm_layer(layer l)
  function forward_batchnorm_layer_gpu (line 189) | void forward_batchnorm_layer_gpu(layer l, network net)
  function backward_batchnorm_layer_gpu (line 238) | void backward_batchnorm_layer_gpu(layer l, network net)

FILE: lightnet/_darknet/blas.c
  function reorg_cpu (line 9) | void reorg_cpu(float *x, int w, int h, int c, int batch, int stride, int...
  function flatten (line 32) | void flatten(float *x, int size, int layers, int batch, int forward)
  function weighted_sum_cpu (line 50) | void weighted_sum_cpu(float *a, float *b, float *s, int n, float *c)
  function weighted_delta_cpu (line 58) | void weighted_delta_cpu(float *a, float *b, float *s, float *da, float *...
  function shortcut_cpu (line 68) | void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2,...
  function mean_cpu (line 94) | void mean_cpu(float *x, int batch, int filters, int spatial, float *mean)
  function variance_cpu (line 110) | void variance_cpu(float *x, float *mean, int batch, int filters, int spa...
  function normalize_cpu (line 126) | void normalize_cpu(float *x, float *mean, float *variance, int batch, in...
  function const_cpu (line 139) | void const_cpu(int N, float ALPHA, float *X, int INCX)
  function mul_cpu (line 145) | void mul_cpu(int N, float *X, int INCX, float *Y, int INCY)
  function pow_cpu (line 151) | void pow_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
  function axpy_cpu (line 157) | void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
  function scal_cpu (line 163) | void scal_cpu(int N, float ALPHA, float *X, int INCX)
  function fill_cpu (line 169) | void fill_cpu(int N, float ALPHA, float *X, int INCX)
  function deinter_cpu (line 175) | void deinter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT)
  function inter_cpu (line 191) | void inter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT)
  function copy_cpu (line 205) | void copy_cpu(int N, float *X, int INCX, float *Y, int INCY)
  function mult_add_into_cpu (line 211) | void mult_add_into_cpu(int N, float *X, float *Y, float *Z)
  function smooth_l1_cpu (line 217) | void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float...
  function l1_cpu (line 234) | void l1_cpu(int n, float *pred, float *truth, float *delta, float *error)
  function l2_cpu (line 244) | void l2_cpu(int n, float *pred, float *truth, float *delta, float *error)
  function dot_cpu (line 254) | float dot_cpu(int N, float *X, int INCX, float *Y, int INCY)
  function softmax (line 262) | void softmax(float *input, int n, float temp, int stride, float *output)
  function softmax_cpu (line 281) | void softmax_cpu(float *input, int n, int batch, int batch_offset, int g...

FILE: lightnet/_darknet/box.c
  function box (line 6) | box float_to_box(float *f, int stride)
  function dbox (line 16) | dbox derivative(box a, box b)
  function overlap (line 67) | float overlap(float x1, float w1, float x2, float w2)
  function box_intersection (line 78) | float box_intersection(box a, box b)
  function box_union (line 87) | float box_union(box a, box b)
  function box_iou (line 94) | float box_iou(box a, box b)
  function box_rmse (line 99) | float box_rmse(box a, box b)
  function dbox (line 107) | dbox dintersect(box a, box b)
  function dbox (line 122) | dbox dunion(box a, box b)
  function test_dunion (line 136) | void test_dunion()
  function test_dintersect (line 158) | void test_dintersect()
  function test_box (line 181) | void test_box()
  function dbox (line 210) | dbox diou(box a, box b)
  type sortable_bbox (line 233) | typedef struct{
  function nms_comparator (line 239) | int nms_comparator(const void *pa, const void *pb)
  function do_nms_obj (line 249) | void do_nms_obj(box *boxes, float **probs, int total, int classes, float...
  function do_nms_sort (line 277) | void do_nms_sort(box *boxes, float **probs, int total, int classes, floa...
  function do_nms (line 307) | void do_nms(box *boxes, float **probs, int total, int classes, float thr...
  function box (line 327) | box encode_box(box b, box anchor)
  function box (line 337) | box decode_box(box b, box anchor)

FILE: lightnet/_darknet/box.h
  type dbox (line 5) | typedef struct{

FILE: lightnet/_darknet/col2im.c
  function col2im_add_pixel (line 3) | void col2im_add_pixel(float *im, int height, int width, int channels,
  function col2im_cpu (line 14) | void col2im_cpu(float* data_col,

FILE: lightnet/_darknet/connected_layer.c
  function layer (line 14) | layer make_connected_layer(int batch, int inputs, int outputs, ACTIVATIO...
  function update_connected_layer (line 132) | void update_connected_layer(layer l, update_args a)
  function forward_connected_layer (line 151) | void forward_connected_layer(layer l, network net)
  function backward_connected_layer (line 169) | void backward_connected_layer(layer l, network net)
  function denormalize_connected_layer (line 199) | void denormalize_connected_layer(layer l)
  function statistics_connected_layer (line 215) | void statistics_connected_layer(layer l)
  function pull_connected_layer (line 235) | void pull_connected_layer(layer l)
  function push_connected_layer (line 248) | void push_connected_layer(layer l)
  function update_connected_layer_gpu (line 261) | void update_connected_layer_gpu(layer l, update_args a)
  function forward_connected_layer_gpu (line 288) | void forward_connected_layer_gpu(layer l, network net)
  function backward_connected_layer_gpu (line 308) | void backward_connected_layer_gpu(layer l, network net)

FILE: lightnet/_darknet/convolutional_layer.c
  function swap_binary (line 15) | void swap_binary(convolutional_layer *l)
  function binarize_weights (line 28) | void binarize_weights(float *weights, int n, int size, float *binary)
  function binarize_cpu (line 43) | void binarize_cpu(float *input, int n, float *binary)
  function binarize_input (line 51) | void binarize_input(float *input, int n, int size, float *binary)
  function convolutional_out_height (line 66) | int convolutional_out_height(convolutional_layer l)
  function convolutional_out_width (line 71) | int convolutional_out_width(convolutional_layer l)
  function image (line 76) | image get_convolutional_image(convolutional_layer l)
  function image (line 81) | image get_convolutional_delta(convolutional_layer l)
  function get_workspace_size (line 86) | static size_t get_workspace_size(layer l){
  function cudnn_convolutional_setup (line 123) | void cudnn_convolutional_setup(layer *l)
  function convolutional_layer (line 176) | convolutional_layer make_convolutional_layer(int batch, int h, int w, in...
  function denormalize_convolutional_layer (line 329) | void denormalize_convolutional_layer(convolutional_layer l)
  function resize_convolutional_layer (line 369) | void resize_convolutional_layer(convolutional_layer *l, int w, int h)
  function add_bias (line 410) | void add_bias(float *output, float *biases, int batch, int n, int size)
  function scale_bias (line 422) | void scale_bias(float *output, float *scales, int batch, int n, int size)
  function backward_bias (line 434) | void backward_bias(float *bias_updates, float *delta, int batch, int n, ...
  function forward_convolutional_layer (line 444) | void forward_convolutional_layer(convolutional_layer l, network net)
  function backward_convolutional_layer (line 482) | void backward_convolutional_layer(convolutional_layer l, network net)
  function update_convolutional_layer (line 523) | void update_convolutional_layer(convolutional_layer l, update_args a)
  function image (line 544) | image get_convolutional_weight(convolutional_layer l, int i)
  function rgbgr_weights (line 552) | void rgbgr_weights(convolutional_layer l)
  function rescale_weights (line 563) | void rescale_weights(convolutional_layer l, float scale, float trans)
  function image (line 576) | image *get_weights(convolutional_layer l)
  function image (line 593) | image *visualize_convolutional_layer(convolutional_layer l, char *window...

FILE: lightnet/_darknet/convolutional_layer.h
  type layer (line 10) | typedef layer convolutional_layer;

FILE: lightnet/_darknet/cost_layer.c
  function COST_TYPE (line 10) | COST_TYPE get_cost_type(char *s)
  function cost_layer (line 38) | cost_layer make_cost_layer(int batch, int inputs, COST_TYPE cost_type, f...
  function resize_cost_layer (line 65) | void resize_cost_layer(cost_layer *l, int inputs)
  function forward_cost_layer (line 79) | void forward_cost_layer(cost_layer l, network net)
  function backward_cost_layer (line 98) | void backward_cost_layer(const cost_layer l, network net)
  function pull_cost_layer (line 105) | void pull_cost_layer(cost_layer l)
  function push_cost_layer (line 110) | void push_cost_layer(cost_layer l)
  function float_abs_compare (line 115) | int float_abs_compare (const void * a, const void * b)
  function forward_cost_layer_gpu (line 124) | void forward_cost_layer_gpu(cost_layer l, network net)
  function backward_cost_layer_gpu (line 166) | void backward_cost_layer_gpu(const cost_layer l, network net)

FILE: lightnet/_darknet/cost_layer.h
  type layer (line 6) | typedef layer cost_layer;

FILE: lightnet/_darknet/crnn_layer.c
  function increment_layer (line 13) | static void increment_layer(layer *l, int steps)
  function layer (line 29) | layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters...
  function update_crnn_layer (line 84) | void update_crnn_layer(layer l, update_args a)
  function forward_crnn_layer (line 91) | void forward_crnn_layer(layer l, network net)
  function backward_crnn_layer (line 132) | void backward_crnn_layer(layer l, network net)
  function pull_crnn_layer (line 183) | void pull_crnn_layer(layer l)
  function push_crnn_layer (line 190) | void push_crnn_layer(layer l)
  function update_crnn_layer_gpu (line 197) | void update_crnn_layer_gpu(layer l, update_args a)
  function forward_crnn_layer_gpu (line 204) | void forward_crnn_layer_gpu(layer l, network net)
  function backward_crnn_layer_gpu (line 244) | void backward_crnn_layer_gpu(layer l, network net)

FILE: lightnet/_darknet/crop_layer.c
  function image (line 5) | image get_crop_image(crop_layer l)
  function backward_crop_layer (line 13) | void backward_crop_layer(const crop_layer l, network net){}
  function backward_crop_layer_gpu (line 14) | void backward_crop_layer_gpu(const crop_layer l, network net){}
  function crop_layer (line 16) | crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_heig...
  function resize_crop_layer (line 48) | void resize_crop_layer(layer *l, int w, int h)
  function forward_crop_layer (line 67) | void forward_crop_layer(const crop_layer l, network net)

FILE: lightnet/_darknet/crop_layer.h
  type layer (line 8) | typedef layer crop_layer;

FILE: lightnet/_darknet/cuda.c
  function cuda_set_device (line 12) | void cuda_set_device(int n)
  function cuda_get_device (line 19) | int cuda_get_device()
  function check_error (line 27) | void check_error(cudaError_t status)
  function dim3 (line 51) | dim3 cuda_gridsize(size_t n){
  function cudnnHandle_t (line 65) | cudnnHandle_t cudnn_handle()
  function cublasHandle_t (line 78) | cublasHandle_t blas_handle()
  function cuda_random (line 106) | void cuda_random(float *x_gpu, size_t n)
  function cuda_compare (line 120) | float cuda_compare(float *x_gpu, float *x, size_t n, char *s)
  function cuda_free (line 147) | void cuda_free(float *x_gpu)
  function cuda_push_array (line 153) | void cuda_push_array(float *x_gpu, float *x, size_t n)
  function cuda_pull_array (line 160) | void cuda_pull_array(float *x_gpu, float *x, size_t n)
  function cuda_mag_array (line 167) | float cuda_mag_array(float *x_gpu, size_t n)
  function cuda_set_device (line 176) | void cuda_set_device(int n){}

FILE: lightnet/_darknet/darknet.h
  type metadata (line 36) | typedef struct{
  type tree (line 43) | typedef struct{
  type ACTIVATION (line 56) | typedef enum{
  type BINARY_ACTIVATION (line 60) | typedef enum{
  type LAYER_TYPE (line 64) | typedef enum {
  type COST_TYPE (line 92) | typedef enum{
  type update_args (line 96) | typedef struct{
  type network (line 108) | struct network
  type network (line 109) | typedef struct network network;
  type layer (line 111) | struct layer
  type layer (line 112) | typedef struct layer layer;
  type layer (line 114) | struct layer{
  type learning_rate_policy (line 413) | typedef enum {
  type network (line 417) | typedef struct network{
  type augment_args (line 484) | typedef struct {
  type image (line 494) | typedef struct {
  type box (line 501) | typedef struct{
  type matrix (line 505) | typedef struct matrix{
  type data (line 511) | typedef struct{
  type data_type (line 520) | typedef enum {
  type load_args (line 524) | typedef struct load_args{
  type box_label (line 557) | typedef struct{
  type node (line 569) | typedef struct node{
  type list (line 575) | typedef struct list{

FILE: lightnet/_darknet/data.c
  function list (line 12) | list *get_paths(char *filename)
  function matrix (line 68) | matrix load_image_paths_gray(char **paths, int n, int w, int h)
  function matrix (line 89) | matrix load_image_paths(char **paths, int n, int w, int h)
  function matrix (line 105) | matrix load_image_augment_paths(char **paths, int n, int min, int max, i...
  function box_label (line 138) | box_label *read_boxes(char *filename, int *n)
  function randomize_boxes (line 168) | void randomize_boxes(box_label *b, int n)
  function correct_boxes (line 179) | void correct_boxes(box_label *boxes, int n, float dx, float dy, float sx...
  function fill_truth_swag (line 216) | void fill_truth_swag(char *path, float *truth, int classes, int flip, fl...
  function fill_truth_region (line 254) | void fill_truth_region(char *path, float *truth, int classes, int num_bo...
  function load_rle (line 302) | void load_rle(image im, int *rle, int n)
  function or_image (line 318) | void or_image(image src, image dest, int c)
  function exclusive_image (line 326) | void exclusive_image(image src)
  function box (line 341) | box bound_image(image im)
  function fill_truth_iseg (line 363) | void fill_truth_iseg(char *path, int num_boxes, float *truth, int classe...
  function fill_truth_detection (line 408) | void fill_truth_detection(char *path, int num_boxes, float *truth, int c...
  function print_letters (line 448) | void print_letters(float *pred, int n)
  function fill_truth_captcha (line 458) | void fill_truth_captcha(char *path, int n, float *truth)
  function data (line 473) | data load_data_captcha(char **paths, int n, int m, int k, int w, int h)
  function data (line 488) | data load_data_captcha_encode(char **paths, int n, int m, int w, int h)
  function fill_truth (line 500) | void fill_truth(char *path, char **labels, int k, float *truth)
  function fill_hierarchy (line 514) | void fill_hierarchy(float *truth, int k, tree *hierarchy)
  function matrix (line 546) | matrix load_regression_labels_paths(char **paths, int n)
  function matrix (line 564) | matrix load_labels_paths(char **paths, int n, char **labels, int k, tree...
  function matrix (line 577) | matrix load_tags_paths(char **paths, int n, int k)
  function free_data (line 613) | void free_data(data d)
  function image (line 624) | image get_segmentation_image(char *path, int w, int h, int classes)
  function image (line 651) | image get_segmentation_image2(char *path, int w, int h, int classes)
  function data (line 685) | data load_data_seg(int n, char **paths, int m, int w, int h, int classes...
  function data (line 733) | data load_data_iseg(int n, char **paths, int m, int w, int h, int classe...
  function data (line 773) | data load_data_region(int n, char **paths, int m, int w, int h, int size...
  function data (line 826) | data load_data_compare(int n, char **paths, int m, int classes, int w, i...
  function data (line 890) | data load_data_swag(char **paths, int n, int classes, float jitter)
  function data (line 943) | data load_data_detection(int n, char **paths, int m, int w, int h, int b...
  type load_args (line 998) | struct load_args
  function pthread_t (line 1038) | pthread_t load_data_in_thread(load_args args)
  function load_data_blocking (line 1076) | void load_data_blocking(load_args args)
  function pthread_t (line 1083) | pthread_t load_data(load_args args)
  function data (line 1092) | data load_data_writing(char **paths, int n, int m, int w, int h, int out...
  function data (line 1107) | data load_data_old(char **paths, int n, int m, char **labels, int k, int...
  function data (line 1132) | data load_data_super(char **paths, int n, int m, int w, int h, int scale)
  function data (line 1162) | data load_data_regression(char **paths, int n, int m, int min, int max, ...
  function data (line 1173) | data select_data(data *orig, int *inds)
  function data (line 1196) | data *tile_data(data orig, int divs, int size)
  function data (line 1223) | data resize_data(data orig, int w, int h)
  function data (line 1243) | data load_data_augment(char **paths, int n, int m, char **labels, int k,...
  function data (line 1256) | data load_data_tag(char **paths, int n, int m, int k, int min, int max, ...
  function matrix (line 1269) | matrix concat_matrix(matrix m1, matrix m2)
  function data (line 1285) | data concat_data(data d1, data d2)
  function data (line 1296) | data concat_datas(data *d, int n)
  function data (line 1308) | data load_categorical_data_csv(char *filename, int target, int k)
  function data (line 1325) | data load_cifar10_data(char *filename)
  function get_random_batch (line 1352) | void get_random_batch(data d, int n, float *X, float *y)
  function get_next_batch (line 1362) | void get_next_batch(data d, int n, int offset, float *X, float *y)
  function smooth_data (line 1372) | void smooth_data(data d)
  function data (line 1384) | data load_all_cifar10()
  function data (line 1417) | data load_go(char *filename)
  function randomize_data (line 1464) | void randomize_data(data d)
  function scale_data_rows (line 1479) | void scale_data_rows(data d, float s)
  function translate_data_rows (line 1487) | void translate_data_rows(data d, float s)
  function data (line 1495) | data copy_data(data d)
  function normalize_data_rows (line 1508) | void normalize_data_rows(data d)
  function data (line 1516) | data get_data_part(data d, int part, int total)
  function data (line 1529) | data get_random_data(data d, int num)
  function data (line 1552) | data *split_data(data d, int part, int total)

FILE: lightnet/_darknet/data.h
  function distance_from_edge (line 11) | static inline float distance_from_edge(int x, int max)

FILE: lightnet/_darknet/deconvolutional_layer.c
  function get_workspace_size (line 14) | static size_t get_workspace_size(layer l){
  function layer (line 19) | layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, ...
  function denormalize_deconvolutional_layer (line 147) | void denormalize_deconvolutional_layer(layer l)
  function resize_deconvolutional_layer (line 162) | void resize_deconvolutional_layer(layer *l, int h, int w)
  function forward_deconvolutional_layer (line 201) | void forward_deconvolutional_layer(const layer l, network net)
  function backward_deconvolutional_layer (line 228) | void backward_deconvolutional_layer(layer l, network net)
  function update_deconvolutional_layer (line 269) | void update_deconvolutional_layer(layer l, update_args a)

FILE: lightnet/_darknet/demo.c
  function demo (line 118) | void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, ...
  function demo_compare (line 206) | void demo_compare(char *cfg1, char *weight1, char *cfg2, char *weight2, ...
  function demo (line 294) | void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, ...

FILE: lightnet/_darknet/detection_layer.c
  function detection_layer (line 14) | detection_layer make_detection_layer(int batch, int inputs, int n, int s...
  function forward_detection_layer (line 50) | void forward_detection_layer(const detection_layer l, network net)
  function backward_detection_layer (line 220) | void backward_detection_layer(const detection_layer l, network net)
  function get_detection_boxes (line 225) | void get_detection_boxes(layer l, int w, int h, float thresh, float **pr...
  function forward_detection_layer_gpu (line 257) | void forward_detection_layer_gpu(const detection_layer l, network net)
  function backward_detection_layer_gpu (line 272) | void backward_detection_layer_gpu(detection_layer l, network net)

FILE: lightnet/_darknet/detection_layer.h
  type layer (line 7) | typedef layer detection_layer;

FILE: lightnet/_darknet/dropout_layer.c
  function dropout_layer (line 7) | dropout_layer make_dropout_layer(int batch, int inputs, float probability)
  function resize_dropout_layer (line 28) | void resize_dropout_layer(dropout_layer *l, int inputs)
  function forward_dropout_layer (line 38) | void forward_dropout_layer(dropout_layer l, network net)
  function backward_dropout_layer (line 50) | void backward_dropout_layer(dropout_layer l, network net)

FILE: lightnet/_darknet/dropout_layer.h
  type layer (line 7) | typedef layer dropout_layer;

FILE: lightnet/_darknet/gemm.c
  function gemm_bin (line 8) | void gemm_bin(int M, int N, int K, float ALPHA,
  function time_random_matrix (line 40) | void time_random_matrix(int TA, int TB, int m, int k, int n)
  function gemm (line 65) | void gemm(int TA, int TB, int M, int N, int K, float ALPHA,
  function gemm_nn (line 80) | void gemm_nn(int M, int N, int K, float ALPHA,
  function gemm_nt (line 97) | void gemm_nt(int M, int N, int K, float ALPHA,
  function gemm_tn (line 115) | void gemm_tn(int M, int N, int K, float ALPHA,
  function gemm_tt (line 132) | void gemm_tt(int M, int N, int K, float ALPHA,
  function gemm_cblas (line 156) | void gemm_cblas(int TA, int TB, int M, int N, int K, float ALPHA,
  function gemm_cpu (line 173) | void gemm_cpu(int TA, int TB, int M, int N, int K, float ALPHA,
  function gemm_gpu (line 200) | void gemm_gpu(int TA, int TB, int M, int N, int K, float ALPHA,
  function time_gpu_random_matrix (line 217) | void time_gpu_random_matrix(int TA, int TB, int m, int k, int n)
  function time_gpu (line 241) | void time_gpu(int TA, int TB, int m, int k, int n)
  function test_gpu_accuracy (line 276) | void test_gpu_accuracy(int TA, int TB, int m, int k, int n)
  function test_gpu_blas (line 313) | int test_gpu_blas()

FILE: lightnet/_darknet/gru_layer.c
  function increment_layer (line 13) | static void increment_layer(layer *l, int steps)
  function layer (line 29) | layer make_gru_layer(int batch, int inputs, int outputs, int steps, int ...
  function update_gru_layer (line 118) | void update_gru_layer(layer l, update_args a)
  function forward_gru_layer (line 128) | void forward_gru_layer(layer l, network net)
  function backward_gru_layer (line 204) | void backward_gru_layer(layer l, network net)
  function pull_gru_layer (line 210) | void pull_gru_layer(layer l)
  function push_gru_layer (line 214) | void push_gru_layer(layer l)
  function update_gru_layer_gpu (line 218) | void update_gru_layer_gpu(layer l, update_args a)
  function forward_gru_layer_gpu (line 228) | void forward_gru_layer_gpu(layer l, network net)
  function backward_gru_layer_gpu (line 302) | void backward_gru_layer_gpu(layer l, network net)

FILE: lightnet/_darknet/im2col.c
  function im2col_get_pixel (line 3) | inline float im2col_get_pixel(float *im, int height, int width, int chan...
  function im2col_cpu (line 16) | void im2col_cpu(float* data_im,

FILE: lightnet/_darknet/image.c
  function get_color (line 17) | float get_color(int c, int x, int max)
  function image (line 28) | image mask_to_rgb(image mask)
  function get_pixel (line 47) | static float get_pixel(image m, int x, int y, int c)
  function get_pixel_extend (line 52) | static float get_pixel_extend(image m, int x, int y, int c)
  function set_pixel (line 64) | static void set_pixel(image m, int x, int y, int c, float val)
  function add_pixel (line 70) | static void add_pixel(image m, int x, int y, int c, float val)
  function bilinear_interpolate (line 76) | static float bilinear_interpolate(image im, float x, float y, int c)
  function composite_image (line 92) | void composite_image(image source, image dest, int dx, int dy)
  function image (line 106) | image border_image(image a, int border)
  function image (line 122) | image tile_images(image a, image b, int dx)
  function image (line 132) | image get_label(image **characters, char *string, int size)
  function draw_label (line 148) | void draw_label(image a, int r, int c, image label, const float *rgb)
  function draw_box (line 165) | void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g,...
  function draw_box_width (line 201) | void draw_box_width(image a, int x1, int y1, int x2, int y2, int w, floa...
  function draw_bbox (line 209) | void draw_bbox(image a, box bbox, int w, float r, float g, float b)
  function image (line 222) | image **load_alphabet()
  function draw_detections (line 238) | void draw_detections(image im, int num, float thresh, box *boxes, float ...
  function transpose_image (line 310) | void transpose_image(image im)
  function rotate_image_cw (line 326) | void rotate_image_cw(image im, int times)
  function flip_image (line 347) | void flip_image(image a)
  function image (line 363) | image image_distance(image a, image b)
  function ghost_image (line 378) | void ghost_image(image source, image dest, int dx, int dy)
  function embed_image (line 397) | void embed_image(image source, image dest, int dx, int dy)
  function image (line 410) | image collapse_image_layers(image source, int border)
  function constrain_image (line 425) | void constrain_image(image im)
  function normalize_image (line 434) | void normalize_image(image p)
  function normalize_image2 (line 454) | void normalize_image2(image p)
  function copy_image_into (line 483) | void copy_image_into(image src, image dest)
  function image (line 488) | image copy_image(image p)
  function rgbgr_image (line 496) | void rgbgr_image(image im)
  function show_image_cv (line 507) | void show_image_cv(image p, const char *name, IplImage *disp)
  function show_image (line 544) | void show_image(image p, const char *name)
  function ipl_into_image (line 561) | void ipl_into_image(IplImage* src, image im)
  function image (line 579) | image ipl_to_image(IplImage* src)
  function image (line 589) | image load_image_cv(char *filename, int channels)
  function flush_stream_buffer (line 615) | void flush_stream_buffer(CvCapture *cap, int n)
  function image (line 623) | image get_image_from_stream(CvCapture *cap)
  function fill_image_from_stream (line 632) | int fill_image_from_stream(CvCapture *cap, image im)
  function save_image_jpg (line 641) | void save_image_jpg(image p, const char *name)
  function save_image_png (line 665) | void save_image_png(image im, const char *name)
  function save_image (line 682) | void save_image(image im, const char *name)
  function show_image_layers (line 692) | void show_image_layers(image p, char *name)
  function show_image_collapsed (line 704) | void show_image_collapsed(image p, char *name)
  function image (line 711) | image make_empty_image(int w, int h, int c)
  function image (line 721) | image make_image(int w, int h, int c)
  function image (line 728) | image make_random_image(int w, int h, int c)
  function image (line 739) | image float_to_image(int w, int h, int c, float *data)
  function place_image (line 746) | void place_image(image im, int w, int h, int dx, int dy, image canvas)
  function image (line 761) | image center_crop_image(image im, int w, int h)
  function image (line 770) | image rotate_crop_image(image im, float rad, float s, int w, int h, floa...
  function image (line 789) | image rotate_image(image im, float rad)
  function fill_image (line 808) | void fill_image(image m, float s)
  function translate_image (line 814) | void translate_image(image m, float s)
  function scale_image (line 820) | void scale_image(image m, float s)
  function image (line 826) | image crop_image(image im, int dx, int dy, int w, int h)
  function best_3d_shift_r (line 846) | int best_3d_shift_r(image a, image b, int min, int max)
  function best_3d_shift (line 860) | int best_3d_shift(image a, image b, int min, int max)
  function composite_3d (line 878) | void composite_3d(char *f1, char *f2, char *out, int delta)
  function letterbox_image_into (line 913) | void letterbox_image_into(image im, int w, int h, image boxed)
  function image (line 929) | image letterbox_image(image im, int w, int h)
  function image (line 950) | image resize_max(image im, int max)
  function image (line 966) | image resize_min(image im, int min)
  function image (line 982) | image random_crop_image(image im, int w, int h)
  function augment_args (line 990) | augment_args random_augment_args(image im, float angle, float aspect, in...
  function image (line 1017) | image random_augment_image(image im, float angle, float aspect, int low,...
  function three_way_max (line 1024) | float three_way_max(float a, float b, float c)
  function three_way_min (line 1029) | float three_way_min(float a, float b, float c)
  function yuv_to_rgb (line 1034) | void yuv_to_rgb(image im)
  function rgb_to_yuv (line 1057) | void rgb_to_yuv(image im)
  function rgb_to_hsv (line 1081) | void rgb_to_hsv(image im)
  function hsv_to_rgb (line 1118) | void hsv_to_rgb(image im)
  function grayscale_image_3c (line 1159) | void grayscale_image_3c(image im)
  function image (line 1177) | image grayscale_image(image im)
  function image (line 1193) | image threshold_image(image im, float thresh)
  function image (line 1203) | image blend_image(image fore, image back, float alpha)
  function scale_image_channel (line 1220) | void scale_image_channel(image im, int c, float v)
  function translate_image_channel (line 1232) | void translate_image_channel(image im, int c, float v)
  function image (line 1244) | image binarize_image(image im)
  function saturate_image (line 1255) | void saturate_image(image im, float sat)
  function hue_image (line 1263) | void hue_image(image im, float hue)
  function exposure_image (line 1276) | void exposure_image(image im, float sat)
  function distort_image (line 1284) | void distort_image(image im, float hue, float sat, float val)
  function random_distort_image (line 1299) | void random_distort_image(image im, float hue, float saturation, float e...
  function saturate_exposure_image (line 1307) | void saturate_exposure_image(image im, float sat, float exposure)
  function image (line 1316) | image resize_image(image im, int w, int h)
  function test_resize (line 1361) | void test_resize(char *filename)
  function image (line 1411) | image load_image_stb(char *filename, int channels)
  function image (line 1435) | image load_image(char *filename, int w, int h, int c)
  function image (line 1451) | image load_image_color(char *filename, int w, int h)
  function image (line 1456) | image get_image_layer(image m, int l)
  function print_image (line 1465) | void print_image(image m)
  function image (line 1482) | image collapse_images_vert(image *ims, int n)
  function image (line 1517) | image collapse_images_horz(image *ims, int n)
  function show_image_normalized (line 1553) | void show_image_normalized(image im, const char *name)
  function show_images (line 1561) | void show_images(image *ims, int n, char *window)
  function free_image (line 1579) | void free_image(image m)

FILE: lightnet/_darknet/layer.c
  function free_layer (line 6) | void free_layer(layer l)

FILE: lightnet/_darknet/list.c
  function list (line 5) | list *make_list()
  function list_insert (line 40) | void list_insert(list *l, void *val)
  function free_node (line 57) | void free_node(node *n)
  function free_list (line 67) | void free_list(list *l)
  function free_list_contents (line 73) | void free_list_contents(list *l)

FILE: lightnet/_darknet/local_layer.c
  function local_out_height (line 10) | int local_out_height(local_layer l)
  function local_out_width (line 18) | int local_out_width(local_layer l)
  function local_layer (line 26) | local_layer make_local_layer(int batch, int h, int w, int c, int n, int ...
  function forward_local_layer (line 91) | void forward_local_layer(const local_layer l, network net)
  function backward_local_layer (line 122) | void backward_local_layer(local_layer l, network net)
  function update_local_layer (line 167) | void update_local_layer(local_layer l, update_args a)
  function forward_local_layer_gpu (line 186) | void forward_local_layer_gpu(const local_layer l, network net)
  function backward_local_layer_gpu (line 217) | void backward_local_layer_gpu(local_layer l, network net)
  function update_local_layer_gpu (line 261) | void update_local_layer_gpu(local_layer l, update_args a)
  function pull_local_layer (line 278) | void pull_local_layer(local_layer l)
  function push_local_layer (line 286) | void push_local_layer(local_layer l)

FILE: lightnet/_darknet/local_layer.h
  type layer (line 10) | typedef layer local_layer;

FILE: lightnet/_darknet/lstm_layer.c
  function increment_layer (line 13) | static void increment_layer(layer *l, int steps)
  function layer (line 29) | layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int...
  function update_lstm_layer (line 144) | void update_lstm_layer(layer l, update_args a)
  function forward_lstm_layer (line 156) | void forward_lstm_layer(layer l, network state)
  function backward_lstm_layer (line 242) | void backward_lstm_layer(layer l, network state)
  function update_lstm_layer_gpu (line 386) | void update_lstm_layer_gpu(layer l, update_args a)
  function forward_lstm_layer_gpu (line 398) | void forward_lstm_layer_gpu(layer l, network state)
  function backward_lstm_layer_gpu (line 484) | void backward_lstm_layer_gpu(layer l, network state)

FILE: lightnet/_darknet/matrix.c
  function free_matrix (line 10) | void free_matrix(matrix m)
  function matrix_topk_accuracy (line 17) | float matrix_topk_accuracy(matrix truth, matrix guess, int k)
  function scale_matrix (line 37) | void scale_matrix(matrix m, float scale)
  function matrix (line 47) | matrix resize_matrix(matrix m, int size)
  function matrix_add_matrix (line 66) | void matrix_add_matrix(matrix from, matrix to)
  function matrix (line 77) | matrix copy_matrix(matrix m)
  function matrix (line 91) | matrix make_matrix(int rows, int cols)
  function matrix (line 104) | matrix hold_out_matrix(matrix *m, int n)
  function matrix (line 133) | matrix csv_to_matrix(char *filename)
  function matrix_to_csv (line 161) | void matrix_to_csv(matrix m)
  function print_matrix (line 174) | void print_matrix(matrix m)

FILE: lightnet/_darknet/maxpool_layer.c
  function image (line 5) | image get_maxpool_image(maxpool_layer l)
  function image (line 13) | image get_maxpool_delta(maxpool_layer l)
  function maxpool_layer (line 21) | maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int siz...
  function resize_maxpool_layer (line 54) | void resize_maxpool_layer(maxpool_layer *l, int w, int h)
  function forward_maxpool_layer (line 79) | void forward_maxpool_layer(const maxpool_layer l, network net)
  function backward_maxpool_layer (line 116) | void backward_maxpool_layer(const maxpool_layer l, network net)

FILE: lightnet/_darknet/maxpool_layer.h
  type layer (line 9) | typedef layer maxpool_layer;

FILE: lightnet/_darknet/network.c
  function load_args (line 33) | load_args get_base_args(network *net)
  function network (line 51) | network *load_network(char *cfg, char *weights, int clear)
  function get_current_batch (line 61) | size_t get_current_batch(network *net)
  function reset_network_state (line 67) | void reset_network_state(network *net, int b)
  function reset_rnn (line 83) | void reset_rnn(network *net)
  function get_current_rate (line 88) | float get_current_rate(network *net)
  function network (line 173) | network *make_network(int n)
  function forward_network (line 184) | void forward_network(network *netp)
  function update_network (line 209) | void update_network(network *netp)
  function calc_network_cost (line 239) | void calc_network_cost(network *netp)
  function get_predicted_class_network (line 254) | int get_predicted_class_network(network *net)
  function backward_network (line 259) | void backward_network(network *netp)
  function train_network_datum (line 285) | float train_network_datum(network *net)
  function train_network_sgd (line 296) | float train_network_sgd(network *net, data d, int n)
  function train_network (line 310) | float train_network(network *net, data d)
  function set_temp_network (line 326) | void set_temp_network(network *net, float t)
  function set_batch_network (line 335) | void set_batch_network(network *net, int b)
  function resize_network (line 354) | int resize_network(network *net, int w, int h)
  function layer (line 427) | layer get_network_detection_layer(network *net)
  function image (line 440) | image get_network_image_layer(network *net, int i)
  function image (line 453) | image get_network_image(network *net)
  function visualize_network (line 464) | void visualize_network(network *net)
  function top_predictions (line 478) | void top_predictions(network *net, int k, int *index)
  function num_boxes (line 497) | int num_boxes(network *net)
  function box (line 503) | box *make_boxes(network *net)
  function network_detect (line 519) | void network_detect(network *net, image im, float thresh, float hier_thr...
  function network_width (line 538) | int network_width(network *net){return net->w;}
  function network_height (line 539) | int network_height(network *net){return net->h;}
  function matrix (line 541) | matrix network_predict_data_multi(network *net, data test, int n)
  function matrix (line 566) | matrix network_predict_data(network *net, data test)
  function print_network (line 591) | void print_network(network *net)
  function compare_networks (line 608) | void compare_networks(network *n1, network *n2, data test)
  function network_accuracy (line 633) | float network_accuracy(network *net, data d)
  function layer (line 651) | layer get_network_output_layer(network *net)
  function network_accuracy_multi (line 660) | float network_accuracy_multi(network *net, data d, int n)
  function free_network (line 668) | void free_network(network *net)
  function layer (line 688) | layer network_output_layer(network *net)
  function network_inputs (line 697) | int network_inputs(network *net)
  function network_outputs (line 702) | int network_outputs(network *net)
  function forward_network_gpu (line 714) | void forward_network_gpu(network *netp)
  function backward_network_gpu (line 742) | void backward_network_gpu(network *netp)
  function update_network_gpu (line 765) | void update_network_gpu(network *netp)
  function harmless_update_network_gpu (line 790) | void harmless_update_network_gpu(network *netp)
  type train_args (line 803) | typedef struct {
  function pthread_t (line 818) | pthread_t train_network_in_thread(network *net, data d, float *err)
  function merge_weights (line 829) | void merge_weights(layer l, layer base)
  function scale_weights (line 843) | void scale_weights(layer l, float s)
  function pull_weights (line 858) | void pull_weights(layer l)
  function push_weights (line 870) | void push_weights(layer l)
  function distribute_weights (line 882) | void distribute_weights(layer l, layer base)
  function sync_layer (line 979) | void sync_layer(network **nets, int n, int j)
  type sync_args (line 999) | typedef struct{
  function pthread_t (line 1013) | pthread_t sync_layer_in_thread(network **nets, int n, int j)
  function sync_nets (line 1024) | void sync_nets(network **nets, int n, int interval)
  function train_networks (line 1043) | float train_networks(network **nets, int n, data d, int interval)
  function pull_network_output (line 1075) | void pull_network_output(network *net)

FILE: lightnet/_darknet/normalization_layer.c
  function layer (line 6) | layer make_normalization_layer(int batch, int w, int h, int c, int size,...
  function resize_normalization_layer (line 40) | void resize_normalization_layer(layer *layer, int w, int h)
  function forward_normalization_layer (line 66) | void forward_normalization_layer(const layer layer, network net)
  function backward_normalization_layer (line 97) | void backward_normalization_layer(const layer layer, network net)
  function forward_normalization_layer_gpu (line 110) | void forward_normalization_layer_gpu(const layer layer, network net)
  function backward_normalization_layer_gpu (line 141) | void backward_normalization_layer_gpu(const layer layer, network net)

FILE: lightnet/_darknet/option_list.c
  function list (line 7) | list *read_data_cfg(char *filename)
  function metadata (line 35) | metadata get_metadata(char *file)
  function read_option (line 52) | int read_option(char *s, list *options)
  function option_insert (line 70) | void option_insert(list *l, char *key, char *val)
  function option_unused (line 79) | void option_unused(list *l)
  function option_find_int (line 112) | int option_find_int(list *l, char *key, int def)
  function option_find_int_quiet (line 120) | int option_find_int_quiet(list *l, char *key, int def)
  function option_find_float_quiet (line 127) | float option_find_float_quiet(list *l, char *key, float def)
  function option_find_float (line 134) | float option_find_float(list *l, char *key, float def)

FILE: lightnet/_darknet/option_list.h
  type kvp (line 5) | typedef struct{

FILE: lightnet/_darknet/parser.c
  type section (line 35) | typedef struct{
  function LAYER_TYPE (line 42) | LAYER_TYPE string_to_layer_type(char * type)
  function free_section (line 79) | void free_section(section *s)
  function parse_data (line 95) | void parse_data(char *data, float *a, int n)
  type size_params (line 111) | typedef struct size_params{
  function local_layer (line 122) | local_layer parse_local(list *options, size_params params)
  function layer (line 143) | layer parse_deconvolutional(list *options, size_params params)
  function convolutional_layer (line 169) | convolutional_layer parse_convolutional(list *options, size_params params)
  function layer (line 199) | layer parse_crnn(list *options, size_params params)
  function layer (line 214) | layer parse_rnn(list *options, size_params params)
  function layer (line 228) | layer parse_gru(list *options, size_params params)
  function layer (line 239) | layer parse_lstm(list *options, size_params params)
  function layer (line 249) | layer parse_connected(list *options, size_params params)
  function softmax_layer (line 260) | softmax_layer parse_softmax(list *options, size_params params)
  function layer (line 274) | layer parse_region(list *options, size_params params)
  function detection_layer (line 325) | detection_layer parse_detection(list *options, size_params params)
  function cost_layer (line 349) | cost_layer parse_cost(list *options, size_params params)
  function crop_layer (line 361) | crop_layer parse_crop(list *options, size_params params)
  function layer (line 385) | layer parse_reorg(list *options, size_params params)
  function maxpool_layer (line 403) | maxpool_layer parse_maxpool(list *options, size_params params)
  function avgpool_layer (line 420) | avgpool_layer parse_avgpool(list *options, size_params params)
  function dropout_layer (line 433) | dropout_layer parse_dropout(list *options, size_params params)
  function layer (line 443) | layer parse_normalization(list *options, size_params params)
  function layer (line 453) | layer parse_batchnorm(list *options, size_params params)
  function layer (line 459) | layer parse_shortcut(list *options, size_params params, network *net)
  function layer (line 477) | layer parse_activation(list *options, size_params params)
  function route_layer (line 494) | route_layer parse_route(list *options, size_params params, network *net)
  function learning_rate_policy (line 535) | learning_rate_policy get_policy(char *s)
  function parse_net_options (line 548) | void parse_net_options(list *options, network *net)
  function is_network (line 628) | int is_network(section *s)
  function network (line 634) | network *parse_network_cfg(char *filename)
  function list (line 770) | list *read_cfg(char *filename)
  function save_convolutional_weights_binary (line 805) | void save_convolutional_weights_binary(layer l, FILE *fp)
  function save_convolutional_weights (line 837) | void save_convolutional_weights(layer l, FILE *fp)
  function save_batchnorm_weights (line 858) | void save_batchnorm_weights(layer l, FILE *fp)
  function save_connected_weights (line 870) | void save_connected_weights(layer l, FILE *fp)
  function save_weights_upto (line 886) | void save_weights_upto(network *net, char *filename, int cutoff)
  function save_weights (line 958) | void save_weights(network *net, char *filename)
  function transpose_matrix (line 963) | void transpose_matrix(float *a, int rows, int cols)
  function load_connected_weights (line 976) | void load_connected_weights(layer l, FILE *fp, int transpose)
  function load_batchnorm_weights (line 1000) | void load_batchnorm_weights(layer l, FILE *fp)
  function load_convolutional_weights_binary (line 1012) | void load_convolutional_weights_binary(layer l, FILE *fp)
  function load_convolutional_weights (line 1042) | void load_convolutional_weights(layer l, FILE *fp)
  function load_weights_upto (line 1095) | void load_weights_upto(network *net, char *filename, int start, int cutoff)
  function load_weights (line 1185) | void load_weights(network *net, char *filename)

FILE: lightnet/_darknet/region_layer.c
  function layer (line 13) | layer make_region_layer(int batch, int w, int h, int n, int classes, int...
  function resize_region_layer (line 56) | void resize_region_layer(layer *l, int w, int h)
  function box (line 76) | box get_region_box(float *x, float *biases, int n, int index, int i, int...
  function delta_region_box (line 86) | float delta_region_box(box truth, float *x, float *biases, int n, int in...
  function delta_region_mask (line 103) | void delta_region_mask(float *truth, float *x, int n, int index, float *...
  function delta_region_class (line 112) | void delta_region_class(float *output, float *delta, int index, int clas...
  function logit (line 141) | float logit(float x)
  function tisnan (line 146) | float tisnan(float x)
  function entry_index (line 151) | int entry_index(layer l, int batch, int location, int entry)
  function forward_region_layer (line 158) | void forward_region_layer(const layer l, network net)
  function backward_region_layer (line 329) | void backward_region_layer(const layer l, network net)
  function correct_region_boxes (line 342) | void correct_region_boxes(box *boxes, int n, int w, int h, int netw, int...
  function get_region_boxes (line 370) | void get_region_boxes(layer l, int w, int h, int netw, int neth, float t...
  function forward_region_layer_gpu (line 461) | void forward_region_layer_gpu(const layer l, network net)
  function backward_region_layer_gpu (line 570) | void backward_region_layer_gpu(const layer l, network net)
  function zero_objectness (line 589) | void zero_objectness(layer l)

FILE: lightnet/_darknet/reorg_layer.c
  function layer (line 8) | layer make_reorg_layer(int batch, int w, int h, int c, int stride, int r...
  function resize_reorg_layer (line 53) | void resize_reorg_layer(layer *l, int w, int h)
  function forward_reorg_layer (line 86) | void forward_reorg_layer(const layer l, network net)
  function backward_reorg_layer (line 107) | void backward_reorg_layer(const layer l, network net)
  function forward_reorg_layer_gpu (line 129) | void forward_reorg_layer_gpu(layer l, network net)
  function backward_reorg_layer_gpu (line 149) | void backward_reorg_layer_gpu(layer l, network net)

FILE: lightnet/_darknet/rnn_layer.c
  function increment_layer (line 13) | static void increment_layer(layer *l, int steps)
  function layer (line 29) | layer make_rnn_layer(int batch, int inputs, int outputs, int steps, ACTI...
  function update_rnn_layer (line 82) | void update_rnn_layer(layer l, update_args a)
  function forward_rnn_layer (line 89) | void forward_rnn_layer(layer l, network net)
  function backward_rnn_layer (line 130) | void backward_rnn_layer(layer l, network net)
  function pull_rnn_layer (line 182) | void pull_rnn_layer(layer l)
  function push_rnn_layer (line 189) | void push_rnn_layer(layer l)
  function update_rnn_layer_gpu (line 196) | void update_rnn_layer_gpu(layer l, update_args a)
  function forward_rnn_layer_gpu (line 203) | void forward_rnn_layer_gpu(layer l, network net)
  function backward_rnn_layer_gpu (line 242) | void backward_rnn_layer_gpu(layer l, network net)

FILE: lightnet/_darknet/route_layer.c
  function route_layer (line 7) | route_layer make_route_layer(int batch, int n, int *input_layers, int *i...
  function resize_route_layer (line 40) | void resize_route_layer(route_layer *l, network *net)
  function forward_route_layer (line 74) | void forward_route_layer(const route_layer l, network net)
  function backward_route_layer (line 89) | void backward_route_layer(const route_layer l, network net)
  function forward_route_layer_gpu (line 105) | void forward_route_layer_gpu(const route_layer l, network net)
  function backward_route_layer_gpu (line 120) | void backward_route_layer_gpu(const route_layer l, network net)

FILE: lightnet/_darknet/route_layer.h
  type layer (line 6) | typedef layer route_layer;

FILE: lightnet/_darknet/shortcut_layer.c
  function layer (line 9) | layer make_shortcut_layer(int batch, int index, int w, int h, int c, int...
  function forward_shortcut_layer (line 41) | void forward_shortcut_layer(const layer l, network net)
  function backward_shortcut_layer (line 48) | void backward_shortcut_layer(const layer l, network net)
  function forward_shortcut_layer_gpu (line 56) | void forward_shortcut_layer_gpu(const layer l, network net)
  function backward_shortcut_layer_gpu (line 63) | void backward_shortcut_layer_gpu(const layer l, network net)

FILE: lightnet/_darknet/softmax_layer.c
  function softmax_layer (line 11) | softmax_layer make_softmax_layer(int batch, int inputs, int groups)
  function forward_softmax_layer (line 36) | void forward_softmax_layer(const softmax_layer l, network net)
  function backward_softmax_layer (line 51) | void backward_softmax_layer(const softmax_layer l, network net)
  function pull_softmax_layer_output (line 58) | void pull_softmax_layer_output(const softmax_layer layer)
  function forward_softmax_layer_gpu (line 63) | void forward_softmax_layer_gpu(const softmax_layer l, network net)
  function backward_softmax_layer_gpu (line 82) | void backward_softmax_layer_gpu(const softmax_layer layer, network net)

FILE: lightnet/_darknet/softmax_layer.h
  type layer (line 6) | typedef layer softmax_layer;

FILE: lightnet/_darknet/stb_image.h
  type stbi_uc (line 403) | typedef unsigned char stbi_uc;
  type stbi_io_callbacks (line 424) | typedef struct
  type stbi__uint16 (line 591) | typedef unsigned short stbi__uint16;
  type stbi__int16 (line 592) | typedef   signed short stbi__int16;
  type stbi__uint32 (line 593) | typedef unsigned int   stbi__uint32;
  type stbi__int32 (line 594) | typedef   signed int   stbi__int32;
  type stbi__uint16 (line 597) | typedef uint16_t stbi__uint16;
  type stbi__int16 (line 598) | typedef int16_t  stbi__int16;
  type stbi__uint32 (line 599) | typedef uint32_t stbi__uint32;
  type stbi__int32 (line 600) | typedef int32_t  stbi__int32;
  function stbi__cpuid3 (line 675) | static int stbi__cpuid3(void)
  function stbi__sse2_available (line 696) | static int stbi__sse2_available()
  function stbi__sse2_available (line 704) | static int stbi__sse2_available()
  type stbi__context (line 739) | typedef struct
  function stbi__start_mem (line 759) | static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int...
  function stbi__start_callbacks (line 768) | static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c...
  function stbi__stdio_read (line 780) | static int stbi__stdio_read(void *user, char *data, int size)
  function stbi__stdio_skip (line 785) | static void stbi__stdio_skip(void *user, int n)
  function stbi__stdio_eof (line 790) | static int stbi__stdio_eof(void *user)
  function stbi__start_file (line 802) | static void stbi__start_file(stbi__context *s, FILE *f)
  function stbi__rewind (line 811) | static void stbi__rewind(stbi__context *s)
  function STBIDEF (line 876) | STBIDEF const char *stbi_failure_reason(void)
  function stbi__err (line 881) | static int stbi__err(const char *str)
  function STBIDEF (line 907) | STBIDEF void stbi_image_free(void *retval_from_stbi_load)
  function STBIDEF (line 922) | STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)
  function stbi__float_postprocess (line 992) | static void stbi__float_postprocess(float *result, int *x, int *y, int *...
  function FILE (line 1016) | static FILE *stbi__fopen(char const *filename, char const *mode)
  function STBIDEF (line 1029) | STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *co...
  function STBIDEF (line 1039) | STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp,...
  function STBIDEF (line 1053) | STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, i...
  function STBIDEF (line 1060) | STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk,...
  function STBIDEF (line 1085) | STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, in...
  function STBIDEF (line 1092) | STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, ...
  function STBIDEF (line 1100) | STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *com...
  function STBIDEF (line 1110) | STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, ...
  function STBIDEF (line 1124) | STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len)
  function STBIDEF (line 1138) | STBIDEF int      stbi_is_hdr          (char const *filename)
  function STBIDEF (line 1149) | STBIDEF int      stbi_is_hdr_from_file(FILE *f)
  function STBIDEF (line 1161) | STBIDEF int      stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clb...
  function STBIDEF (line 1176) | STBIDEF void   stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = ga...
  function STBIDEF (line 1177) | STBIDEF void   stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = sc...
  function STBIDEF (line 1180) | STBIDEF void   stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = ...
  function STBIDEF (line 1181) | STBIDEF void   stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = ...
  function stbi__refill_buffer (line 1196) | static void stbi__refill_buffer(stbi__context *s)
  function stbi_inline (line 1212) | stbi_inline static stbi_uc stbi__get8(stbi__context *s)
  function stbi_inline (line 1223) | stbi_inline static int stbi__at_eof(stbi__context *s)
  function stbi__skip (line 1235) | static void stbi__skip(stbi__context *s, int n)
  function stbi__getn (line 1252) | static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n)
  function stbi__get16be (line 1276) | static int stbi__get16be(stbi__context *s)
  function stbi__uint32 (line 1282) | static stbi__uint32 stbi__get32be(stbi__context *s)
  function stbi__get16le (line 1288) | static int stbi__get16le(stbi__context *s)
  function stbi__uint32 (line 1294) | static stbi__uint32 stbi__get32le(stbi__context *s)
  function stbi_uc (line 1314) | static stbi_uc stbi__compute_y(int r, int g, int b)
  function stbi_uc (line 1384) | static stbi_uc *stbi__hdr_to_ldr(float   *data, int x, int y, int comp)
  type stbi__huffman (line 1436) | typedef struct
  type stbi__jpeg (line 1447) | typedef struct
  function stbi__build_huffman (line 1498) | static int stbi__build_huffman(stbi__huffman *h, int *count)
  function stbi__build_fast_ac (line 1541) | static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h)
  function stbi__grow_buffer_unsafe (line 1566) | static void stbi__grow_buffer_unsafe(stbi__jpeg *j)
  function stbi_inline (line 1587) | stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffm...
  function stbi_inline (line 1641) | stbi_inline static int stbi__extend_receive(stbi__jpeg *j, int n)
  function stbi_inline (line 1657) | stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n)
  function stbi_inline (line 1668) | stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j)
  function stbi__jpeg_decode_block (line 1696) | static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__...
  function stbi__jpeg_decode_block_prog_dc (line 1748) | static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64]...
  function stbi__jpeg_decode_block_prog_ac (line 1775) | static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64]...
  function stbi_inline (line 1895) | stbi_inline static stbi_uc stbi__clamp(int x)
  function stbi__idct_block (line 1946) | static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64])
  function stbi__idct_simd (line 2009) | static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
  function stbi__idct_simd (line 2190) | static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
  function stbi_uc (line 2398) | static stbi_uc stbi__get_marker(stbi__jpeg *j)
  function stbi__jpeg_reset (line 2415) | static void stbi__jpeg_reset(stbi__jpeg *j)
  function stbi__parse_entropy_coded_data (line 2428) | static int stbi__parse_entropy_coded_data(stbi__jpeg *z)
  function stbi__jpeg_dequantize (line 2552) | static void stbi__jpeg_dequantize(short *data, stbi_uc *dequant)
  function stbi__jpeg_finish (line 2559) | static void stbi__jpeg_finish(stbi__jpeg *z)
  function stbi__process_marker (line 2578) | static int stbi__process_marker(stbi__jpeg *z, int m)
  function stbi__process_scan_header (line 2642) | static int stbi__process_scan_header(stbi__jpeg *z)
  function stbi__process_frame_header (line 2681) | static int stbi__process_frame_header(stbi__jpeg *z, int scan)
  function stbi__decode_jpeg_header (line 2772) | static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan)
  function stbi__decode_jpeg_image (line 2795) | static int stbi__decode_jpeg_image(stbi__jpeg *j)
  type stbi_uc (line 2834) | typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_u...
  function stbi_uc (line 2839) | static stbi_uc *resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *...
  function stbi_uc (line 2848) | static stbi_uc* stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, s...
  function stbi_uc (line 2858) | static stbi_uc*  stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, ...
  function stbi_uc (line 2888) | static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, ...
  function stbi_uc (line 2913) | static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_n...
  function stbi_uc (line 3029) | static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_nea...
  function stbi__YCbCr_to_RGB_row (line 3044) | static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const...
  function stbi__YCbCr_to_RGB_row (line 3072) | static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const...
  function stbi__YCbCr_to_RGB_simd (line 3099) | static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi...
  function stbi__setup_jpeg (line 3234) | static void stbi__setup_jpeg(stbi__jpeg *j)
  function stbi__cleanup_jpeg (line 3260) | static void stbi__cleanup_jpeg(stbi__jpeg *j)
  type stbi__resample (line 3281) | typedef struct
  function stbi_uc (line 3291) | static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, i...
  function stbi__jpeg_test (line 3396) | static int stbi__jpeg_test(stbi__context *s)
  function stbi__jpeg_info_raw (line 3407) | static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp)
  function stbi__jpeg_info (line 3419) | static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp)
  type stbi__zhuffman (line 3442) | typedef struct
  function stbi_inline (line 3452) | stbi_inline static int stbi__bitreverse16(int n)
  function stbi_inline (line 3461) | stbi_inline static int stbi__bit_reverse(int v, int bits)
  function stbi__zbuild_huffman (line 3469) | static int stbi__zbuild_huffman(stbi__zhuffman *z, stbi_uc *sizelist, in...
  type stbi__zbuf (line 3522) | typedef struct
  function stbi_inline (line 3536) | stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z)
  function stbi__fill_bits (line 3542) | static void stbi__fill_bits(stbi__zbuf *z)
  function stbi__zreceive (line 3551) | int stbi__zreceive(stbi__zbuf *z, int n)
  function stbi__zhuffman_decode_slowpath (line 3561) | static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z)
  function stbi_inline (line 3579) | stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffm...
  function stbi__zexpand (line 3593) | static int stbi__zexpand(stbi__zbuf *z, char *zout, int n)  // need to m...
  function stbi__parse_huffman_block (line 3625) | static int stbi__parse_huffman_block(stbi__zbuf *a)
  function stbi__compute_huffman_codes (line 3667) | static int stbi__compute_huffman_codes(stbi__zbuf *a)
  function stbi__parse_uncomperssed_block (line 3713) | static int stbi__parse_uncomperssed_block(stbi__zbuf *a)
  function stbi__parse_zlib_header (line 3742) | static int stbi__parse_zlib_header(stbi__zbuf *a)
  function stbi__init_zdefaults (line 3757) | static void stbi__init_zdefaults(void)
  function stbi__parse_zlib (line 3768) | static int stbi__parse_zlib(stbi__zbuf *a, int parse_header)
  function stbi__do_zlib (line 3797) | static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, i...
  function STBIDEF (line 3807) | STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int ...
  function STBIDEF (line 3823) | STBIDEF char *stbi_zlib_decode_malloc(char const *buffer, int len, int *...
  function STBIDEF (line 3828) | STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *b...
  function STBIDEF (line 3844) | STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, char const ...
  function STBIDEF (line 3855) | STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int l...
  function STBIDEF (line 3871) | STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, co...
  type stbi__pngchunk (line 3894) | typedef struct
  function stbi__pngchunk (line 3900) | static stbi__pngchunk stbi__get_chunk_header(stbi__context *s)
  function stbi__check_png_header (line 3908) | static int stbi__check_png_header(stbi__context *s)
  type stbi__png (line 3917) | typedef struct
  function stbi__paeth (line 3944) | static int stbi__paeth(int a, int b, int c)
  function stbi__create_png_image_raw (line 3958) | static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__...
  function stbi__create_png_image (line 4136) | static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stb...
  function stbi__compute_transparency (line 4178) | static int stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int o...
  function stbi__expand_png_palette (line 4203) | static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int ...
  function STBIDEF (line 4243) | STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpr...
  function STBIDEF (line 4248) | STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_conv...
  function stbi__de_iphone (line 4253) | static void stbi__de_iphone(stbi__png *z)
  function stbi__parse_png_file (line 4297) | static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
  function stbi__png_test (line 4484) | static int stbi__png_test(stbi__context *s)
  function stbi__png_info_raw (line 4492) | static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp)
  function stbi__png_info (line 4504) | static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp)
  function stbi__bmp_test_raw (line 4515) | static int stbi__bmp_test_raw(stbi__context *s)
  function stbi__bmp_test (line 4530) | static int stbi__bmp_test(stbi__context *s)
  function stbi__high_bit (line 4539) | static int stbi__high_bit(unsigned int z)
  function stbi__bitcount (line 4551) | static int stbi__bitcount(unsigned int a)
  function stbi__shiftsigned (line 4561) | static int stbi__shiftsigned(int v, int shift, int bits)
  function stbi_uc (line 4578) | static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *co...
  function stbi__tga_info (line 4784) | static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp)
  function stbi__tga_test (line 4821) | static int stbi__tga_test(stbi__context *s)
  function stbi_uc (line 4846) | static stbi_uc *stbi__tga_load(stbi__context *s, int *x, int *y, int *co...
  function stbi__psd_test (line 5045) | static int stbi__psd_test(stbi__context *s)
  function stbi_uc (line 5052) | static stbi_uc *stbi__psd_load(stbi__context *s, int *x, int *y, int *co...
  function stbi__pic_is4 (line 5221) | static int stbi__pic_is4(stbi__context *s,const char *str)
  function stbi__pic_test_core (line 5231) | static int stbi__pic_test_core(stbi__context *s)
  type stbi__pic_packet (line 5247) | typedef struct
  function stbi_uc (line 5252) | static stbi_uc *stbi__readval(stbi__context *s, int channel, stbi_uc *dest)
  function stbi__copyval (line 5266) | static void stbi__copyval(int channel,stbi_uc *dest,const stbi_uc *src)
  function stbi_uc (line 5275) | static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int heigh...
  function stbi_uc (line 5386) | static stbi_uc *stbi__pic_load(stbi__context *s,int *px,int *py,int *com...
  function stbi__pic_test (line 5419) | static int stbi__pic_test(stbi__context *s)
  type stbi__gif_lzw (line 5431) | typedef struct
  type stbi__gif (line 5438) | typedef struct
  function stbi__gif_test_raw (line 5455) | static int stbi__gif_test_raw(stbi__context *s)
  function stbi__gif_test (line 5465) | static int stbi__gif_test(stbi__context *s)
  function stbi__gif_parse_colortable (line 5472) | static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256...
  function stbi__gif_header (line 5483) | static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, i...
  function stbi__gif_info_raw (line 5511) | static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp)
  function stbi__out_gif_code (line 5523) | static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code)
  function stbi_uc (line 5557) | static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g)
  function stbi__fill_gif_background (line 5637) | static void stbi__fill_gif_background(stbi__gif *g)
  function stbi_uc (line 5652) | static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int ...
  function stbi_uc (line 5752) | static stbi_uc *stbi__gif_load(stbi__context *s, int *x, int *y, int *co...
  function stbi__gif_info (line 5768) | static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp)
  function stbi__hdr_test_core (line 5778) | static int stbi__hdr_test_core(stbi__context *s)
  function stbi__hdr_test (line 5788) | static int stbi__hdr_test(stbi__context* s)
  function stbi__hdr_convert (line 5818) | static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp)
  function stbi__hdr_info (line 5956) | static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp)
  function stbi__bmp_info (line 5997) | static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp)
  function stbi__psd_info (line 6027) | static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp)
  function stbi__pic_info (line 6060) | static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp)
  function stbi__pnm_test (line 6120) | static int      stbi__pnm_test(stbi__context *s)
  function stbi_uc (line 6132) | static stbi_uc *stbi__pnm_load(stbi__context *s, int *x, int *y, int *co...
  function stbi__pnm_isspace (line 6152) | static int      stbi__pnm_isspace(char c)
  function stbi__pnm_skip_whitespace (line 6157) | static void     stbi__pnm_skip_whitespace(stbi__context *s, char *c)
  function stbi__pnm_isdigit (line 6163) | static int      stbi__pnm_isdigit(char c)
  function stbi__pnm_getinteger (line 6168) | static int      stbi__pnm_getinteger(stbi__context *s, char *c)
  function stbi__pnm_info (line 6180) | static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp)
  function stbi__info_main (line 6215) | static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp)
  function STBIDEF (line 6258) | STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp)
  function STBIDEF (line 6268) | STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp)
  function STBIDEF (line 6280) | STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x...
  function STBIDEF (line 6287) | STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *u...

FILE: lightnet/_darknet/stb_image_write.h
  type stbiw_uint32 (line 129) | typedef unsigned int stbiw_uint32;
  function writefv (line 132) | static void writefv(FILE *f, const char *fmt, va_list v)
  function write3 (line 152) | static void write3(FILE *f, unsigned char a, unsigned char b, unsigned c...
  function write_pixels (line 159) | static void write_pixels(FILE *f, int rgb_dir, int vdir, int x, int y, i...
  function outfile (line 206) | static int outfile(char const *filename, int rgb_dir, int vdir, int x, i...
  function stbi_write_bmp (line 222) | int stbi_write_bmp(char const *filename, int x, int y, int comp, const v...
  function stbi_write_tga (line 231) | int stbi_write_tga(char const *filename, int x, int y, int comp, const v...
  function stbiw__linear_to_rgbe (line 245) | void stbiw__linear_to_rgbe(unsigned char *rgbe, float *linear)
  function stbiw__write_run_data (line 262) | void stbiw__write_run_data(FILE *f, int length, unsigned char databyte)
  function stbiw__write_dump_data (line 270) | void stbiw__write_dump_data(FILE *f, int length, unsigned char *data)
  function stbiw__write_hdr_scanline (line 278) | void stbiw__write_hdr_scanline(FILE *f, int width, int comp, unsigned ch...
  function stbi_write_hdr (line 367) | int stbi_write_hdr(char const *filename, int x, int y, int comp, const f...
  function stbiw__zlib_bitrev (line 425) | static int stbiw__zlib_bitrev(int code, int codebits)
  function stbiw__zlib_countm (line 435) | static unsigned int stbiw__zlib_countm(unsigned char *a, unsigned char *...
  function stbiw__zhash (line 443) | static unsigned int stbiw__zhash(unsigned char *data)
  function stbiw__crc32 (line 572) | unsigned int stbiw__crc32(unsigned char *buffer, int len)
  function stbiw__wpcrc (line 590) | static void stbiw__wpcrc(unsigned char **data, int len)
  function stbiw__paeth (line 596) | static unsigned char stbiw__paeth(int a, int b, int c)
  function stbi_write_png (line 696) | int stbi_write_png(char const *filename, int x, int y, int comp, const v...

FILE: lightnet/_darknet/tree.c
  function change_leaves (line 7) | void change_leaves(tree *t, char *leaf_list)
  function get_hierarchy_probability (line 27) | float get_hierarchy_probability(float *x, tree *hier, int c, int stride)
  function hierarchy_predictions (line 37) | void hierarchy_predictions(float *predictions, int n, tree *hier, int on...
  function hierarchy_top_prediction (line 53) | int hierarchy_top_prediction(float *predictions, tree *hier, float thres...
  function tree (line 83) | tree *read_tree(char *filename)

FILE: lightnet/_darknet/utils.c
  function what_time_is_it_now (line 26) | double what_time_is_it_now()
  function sorta_shuffle (line 71) | void sorta_shuffle(void *arr, size_t n, size_t size, size_t sections)
  function shuffle (line 82) | void shuffle(void *arr, size_t n, size_t size)
  function del_arg (line 110) | void del_arg(int argc, char **argv, int index)
  function find_arg (line 117) | int find_arg(int argc, char* argv[], char *arg)
  function find_int_arg (line 130) | int find_int_arg(int argc, char **argv, char *arg, int def)
  function find_float_arg (line 145) | float find_float_arg(int argc, char **argv, char *arg, float def)
  function alphanum_to_int (line 190) | int alphanum_to_int(char c)
  function int_to_alphanum (line 194) | char int_to_alphanum(int i)
  function pm (line 200) | void pm(int M, int N, float *A)
  function find_replace (line 213) | void find_replace(char *str, char *orig, char *rep, char *output)
  function sec (line 229) | float sec(clock_t clocks)
  function top_k (line 234) | void top_k(float *a, int n, int k, int *index)
  function error (line 250) | void error(const char *s)
  function malloc_error (line 272) | void malloc_error()
  function file_error (line 278) | void file_error(char *s)
  function list (line 284) | list *split_str(char *s, char delim)
  function strip (line 299) | void strip(char *s)
  function strip_char (line 312) | void strip_char(char *s, char bad)
  function free_ptrs (line 325) | void free_ptrs(void **ptrs, int n)
  function read_int (line 363) | int read_int(int fd)
  function write_int (line 371) | void write_int(int fd, int n)
  function read_all_fail (line 377) | int read_all_fail(int fd, char *buffer, size_t bytes)
  function write_all_fail (line 388) | int write_all_fail(int fd, char *buffer, size_t bytes)
  function read_all (line 399) | void read_all(int fd, char *buffer, size_t bytes)
  function write_all (line 409) | void write_all(int fd, char *buffer, size_t bytes)
  function list (line 427) | list *parse_csv_line(char *line)
  function count_fields (line 444) | int count_fields(char *line)
  function sum_array (line 476) | float sum_array(float *a, int n)
  function mean_array (line 484) | float mean_array(float *a, int n)
  function mean_arrays (line 489) | void mean_arrays(float **a, int n, int els, float *avg)
  function print_statistics (line 504) | void print_statistics(float *a, int n)
  function variance_array (line 511) | float variance_array(float *a, int n)
  function constrain_int (line 521) | int constrain_int(int a, int min, int max)
  function constrain (line 528) | float constrain(float min, float max, float a)
  function dist_array (line 535) | float dist_array(float *a, float *b, int n, int sub)
  function mse_array (line 543) | float mse_array(float *a, int n)
  function normalize_array (line 551) | void normalize_array(float *a, int n)
  function translate_array (line 563) | void translate_array(float *a, int n, float s)
  function mag_array (line 571) | float mag_array(float *a, int n)
  function scale_array (line 581) | void scale_array(float *a, int n, float s)
  function sample_array (line 589) | int sample_array(float *a, int n)
  function max_int_index (line 602) | int max_int_index(int *a, int n)
  function max_index (line 616) | int max_index(float *a, int n)
  function rand_int (line 630) | int rand_int(int min, int max)
  function rand_normal (line 642) | float rand_normal()
  function rand_size_t (line 674) | size_t rand_size_t()
  function rand_uniform (line 686) | float rand_uniform(float min, float max)
  function rand_scale (line 696) | float rand_scale(float s)

FILE: lightnet/cli.py
  function download (line 20) | def download(cmd, model, direct=False):
  function download_file (line 34) | def download_file(url, path):

FILE: lightnet/util.py
  function make_temp_dir (line 7) | def make_temp_dir():

FILE: setup.py
  class ExtensionBuilder (line 20) | class ExtensionBuilder(distutils.command.build_ext.build_ext):
    method build_extensions (line 21) | def build_extensions(self):
  function get_c_sources (line 78) | def get_c_sources(start_dir):

FILE: tests/test_boxes.py
  function ids (line 6) | def ids():
  function xywh (line 10) | def xywh():
  function test_BoxLabels_init (line 16) | def test_BoxLabels_init(ids, xywh):

FILE: tests/test_image.py
  function test_make_image (line 6) | def test_make_image():
  function test_random_image (line 10) | def test_random_image():
  function test_image_from_bytes (line 14) | def test_image_from_bytes():

FILE: tests/test_network.py
  function ids_xywh (line 9) | def ids_xywh():
  function image (line 14) | def image():
  function box_labels (line 18) | def box_labels(ids_xywh):
  function test_init (line 22) | def test_init():
  function test_load (line 25) | def test_load():
  function test_from_disk (line 29) | def test_from_disk(image):
  function test_to_from_bytes (line 34) | def test_to_from_bytes(image):
  function test_detect (line 43) | def test_detect(image):
  function test_box_labels (line 47) | def test_box_labels(box_labels):
  function test_detection_data (line 51) | def test_detection_data(image, box_labels):
  function test_update (line 58) | def test_update(image, box_labels):
  function test_evaluate (line 65) | def test_evaluate(image, box_labels):

Download .json

Condensed preview — 151 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (993K chars).

[
  {
    "path": ".appveyor.yml",
    "chars": 1737,
    "preview": "environment:\n\n  matrix:\n\n    # For Python versions available on Appveyor, see\n    # http://www.appveyor.com/docs/install"
  },
  {
    "path": ".gitignore",
    "chars": 855,
    "preview": "*.weights\n\n# Cython / C extensions\ncythonize.json\nspacy/*.html\n*.cpp\n*.so\n\n# Vim / VSCode / editors\n*.swp\n*.sw*\nProfile."
  },
  {
    "path": ".travis.yml",
    "chars": 630,
    "preview": "language: python\n\npython:\n  - \"2.7\"\n  - \"3.5\"\n  - \"3.6\"\n\ninstall:\n  - if [ \"$TRAVIS_OS_NAME\" == \"linux\" ] ; then sudo ap"
  },
  {
    "path": "LICENSE",
    "chars": 1127,
    "preview": "The MIT License (MIT)\n\nCopyright (C) 2017 ExplosionAI UG (haftungsbeschränkt), 2014-2017 Joseph Redmon\n\nPermission is he"
  },
  {
    "path": "MANIFEST.in",
    "chars": 333,
    "preview": "include LICENSE\ninclude README.rst\ninclude bin/cythonize.py\ninclude lightnet/_darknet/Makefile\nrecursive-include lightne"
  },
  {
    "path": "README.rst",
    "chars": 10900,
    "preview": "LightNet: Bringing pjreddie's DarkNet out of the shadows\n********************************************************\n\nLight"
  },
  {
    "path": "bin/cythonize.py",
    "chars": 6213,
    "preview": "#!/usr/bin/env python\n\"\"\" cythonize\n\nCythonize pyx files into C files as needed.\n\nUsage: cythonize [root_dir]\n\nDefault ["
  },
  {
    "path": "bin/train.py",
    "chars": 410,
    "preview": "from lightnet.lightnet import train\nimport plac\nfrom pathlib import Path\n\ntry:\n    unicode\nexcept NameError:\n    unicode"
  },
  {
    "path": "lightnet/__init__.pxd",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "lightnet/__init__.py",
    "chars": 205,
    "preview": "# coding: utf8\nfrom __future__ import unicode_literals\n\nfrom .lightnet import Network, Image, BoxLabels\nfrom .about impo"
  },
  {
    "path": "lightnet/__main__.py",
    "chars": 741,
    "preview": "# coding: utf8\nfrom __future__ import print_function\n# NB! This breaks in plac on Python 2!!\n# from __future__ import un"
  },
  {
    "path": "lightnet/_darknet/Makefile",
    "chars": 2751,
    "preview": "GPU=0\nCUDNN=0\nOPENCV=0\nOPENMP=0\nDEBUG=0\n\nARCH= -gencode arch=compute_30,code=sm_30 \\\n      -gencode arch=compute_35,code"
  },
  {
    "path": "lightnet/_darknet/activation_kernels.cu",
    "chars": 6284,
    "preview": "#include \"cuda_runtime.h\"\n#include \"curand.h\"\n#include \"cublas_v2.h\"\n\nextern \"C\" {\n#include \"activations.h\"\n#include \"cu"
  },
  {
    "path": "lightnet/_darknet/activation_layer.c",
    "chars": 1707,
    "preview": "#include \"activation_layer.h\"\n#include \"utils.h\"\n#include \"cuda.h\"\n#include \"blas.h\"\n#include \"gemm.h\"\n\n#include <math.h"
  },
  {
    "path": "lightnet/_darknet/activation_layer.h",
    "chars": 447,
    "preview": "#ifndef ACTIVATION_LAYER_H\n#define ACTIVATION_LAYER_H\n\n#include \"activations.h\"\n#include \"layer.h\"\n#include \"network.h\"\n"
  },
  {
    "path": "lightnet/_darknet/activations.c",
    "chars": 3560,
    "preview": "#include \"activations.h\"\n\n#include <math.h>\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n\nchar *get_activa"
  },
  {
    "path": "lightnet/_darknet/activations.h",
    "chars": 2717,
    "preview": "#ifndef ACTIVATIONS_H\n#define ACTIVATIONS_H\n#include \"darknet.h\"\n#include \"cuda.h\"\n#include \"math.h\"\n\nACTIVATION get_act"
  },
  {
    "path": "lightnet/_darknet/avgpool_layer.c",
    "chars": 1877,
    "preview": "#include \"avgpool_layer.h\"\n#include \"cuda.h\"\n#include <stdio.h>\n\navgpool_layer make_avgpool_layer(int batch, int w, int "
  },
  {
    "path": "lightnet/_darknet/avgpool_layer.h",
    "chars": 606,
    "preview": "#ifndef AVGPOOL_LAYER_H\n#define AVGPOOL_LAYER_H\n\n#include \"image.h\"\n#include \"cuda.h\"\n#include \"layer.h\"\n#include \"netwo"
  },
  {
    "path": "lightnet/_darknet/avgpool_layer_kernels.cu",
    "chars": 1628,
    "preview": "#include \"cuda_runtime.h\"\n#include \"curand.h\"\n#include \"cublas_v2.h\"\n\nextern \"C\" {\n#include \"avgpool_layer.h\"\n#include \""
  },
  {
    "path": "lightnet/_darknet/batchnorm_layer.c",
    "chars": 10366,
    "preview": "#include \"convolutional_layer.h\"\n#include \"batchnorm_layer.h\"\n#include \"blas.h\"\n#include <stdio.h>\n\nlayer make_batchnorm"
  },
  {
    "path": "lightnet/_darknet/batchnorm_layer.h",
    "chars": 490,
    "preview": "#ifndef BATCHNORM_LAYER_H\n#define BATCHNORM_LAYER_H\n\n#include \"image.h\"\n#include \"layer.h\"\n#include \"network.h\"\n\nlayer m"
  },
  {
    "path": "lightnet/_darknet/blas.c",
    "chars": 7588,
    "preview": "#include \"blas.h\"\n\n#include <math.h>\n#include <assert.h>\n#include <float.h>\n#include <stdio.h>\n#include <stdlib.h>\n#incl"
  },
  {
    "path": "lightnet/_darknet/blas.h",
    "chars": 5996,
    "preview": "#ifndef BLAS_H\n#define BLAS_H\n#include \"darknet.h\"\n\nvoid flatten(float *x, int size, int layers, int batch, int forward)"
  },
  {
    "path": "lightnet/_darknet/blas_kernels.cu",
    "chars": 29741,
    "preview": "#include \"cuda_runtime.h\"\n#include \"curand.h\"\n#include \"cublas_v2.h\"\n#include <assert.h>\n\nextern \"C\" {\n#include \"blas.h\""
  },
  {
    "path": "lightnet/_darknet/box.c",
    "chars": 8230,
    "preview": "#include \"box.h\"\n#include <stdio.h>\n#include <math.h>\n#include <stdlib.h>\n\nbox float_to_box(float *f, int stride)\n{\n    "
  },
  {
    "path": "lightnet/_darknet/box.h",
    "chars": 234,
    "preview": "#ifndef BOX_H\n#define BOX_H\n#include \"darknet.h\"\n\ntypedef struct{\n    float dx, dy, dw, dh;\n} dbox;\n\nfloat box_rmse(box "
  },
  {
    "path": "lightnet/_darknet/classifier.h",
    "chars": 1,
    "preview": "\n"
  },
  {
    "path": "lightnet/_darknet/col2im.c",
    "chars": 1340,
    "preview": "#include <stdio.h>\n#include <math.h>\nvoid col2im_add_pixel(float *im, int height, int width, int channels,\n             "
  },
  {
    "path": "lightnet/_darknet/col2im.h",
    "chars": 331,
    "preview": "#ifndef COL2IM_H\n#define COL2IM_H\n\nvoid col2im_cpu(float* data_col,\n        int channels, int height, int width,\n       "
  },
  {
    "path": "lightnet/_darknet/col2im_kernels.cu",
    "chars": 2311,
    "preview": "#include \"cuda_runtime.h\"\n#include \"curand.h\"\n#include \"cublas_v2.h\"\n\nextern \"C\" {\n#include \"col2im.h\"\n#include \"cuda.h\""
  },
  {
    "path": "lightnet/_darknet/connected_layer.c",
    "chars": 11056,
    "preview": "#include \"connected_layer.h\"\n#include \"convolutional_layer.h\"\n#include \"batchnorm_layer.h\"\n#include \"utils.h\"\n#include \""
  },
  {
    "path": "lightnet/_darknet/connected_layer.h",
    "chars": 666,
    "preview": "#ifndef CONNECTED_LAYER_H\n#define CONNECTED_LAYER_H\n\n#include \"activations.h\"\n#include \"layer.h\"\n#include \"network.h\"\n\nl"
  },
  {
    "path": "lightnet/_darknet/convolutional_kernels.cu",
    "chars": 10207,
    "preview": "#include \"cuda_runtime.h\"\n#include \"curand.h\"\n#include \"cublas_v2.h\"\n\nextern \"C\" {\n#include \"convolutional_layer.h\"\n#inc"
  },
  {
    "path": "lightnet/_darknet/convolutional_layer.c",
    "chars": 18445,
    "preview": "#include \"convolutional_layer.h\"\n#include \"utils.h\"\n#include \"batchnorm_layer.h\"\n#include \"im2col.h\"\n#include \"col2im.h\""
  },
  {
    "path": "lightnet/_darknet/convolutional_layer.h",
    "chars": 2218,
    "preview": "#ifndef CONVOLUTIONAL_LAYER_H\n#define CONVOLUTIONAL_LAYER_H\n\n#include \"cuda.h\"\n#include \"image.h\"\n#include \"activations."
  },
  {
    "path": "lightnet/_darknet/cost_layer.c",
    "chars": 4956,
    "preview": "#include \"cost_layer.h\"\n#include \"utils.h\"\n#include \"cuda.h\"\n#include \"blas.h\"\n#include <math.h>\n#include <string.h>\n#in"
  },
  {
    "path": "lightnet/_darknet/cost_layer.h",
    "chars": 574,
    "preview": "#ifndef COST_LAYER_H\n#define COST_LAYER_H\n#include \"layer.h\"\n#include \"network.h\"\n\ntypedef layer cost_layer;\n\nCOST_TYPE "
  },
  {
    "path": "lightnet/_darknet/crnn_layer.c",
    "chars": 9388,
    "preview": "#include \"crnn_layer.h\"\n#include \"convolutional_layer.h\"\n#include \"utils.h\"\n#include \"cuda.h\"\n#include \"blas.h\"\n#include"
  },
  {
    "path": "lightnet/_darknet/crnn_layer.h",
    "chars": 649,
    "preview": "\n#ifndef CRNN_LAYER_H\n#define CRNN_LAYER_H\n\n#include \"activations.h\"\n#include \"layer.h\"\n#include \"network.h\"\n\nlayer make"
  },
  {
    "path": "lightnet/_darknet/crop_layer.c",
    "chars": 2759,
    "preview": "#include \"crop_layer.h\"\n#include \"cuda.h\"\n#include <stdio.h>\n\nimage get_crop_image(crop_layer l)\n{\n    int h = l.out_h;\n"
  },
  {
    "path": "lightnet/_darknet/crop_layer.h",
    "chars": 506,
    "preview": "#ifndef CROP_LAYER_H\n#define CROP_LAYER_H\n\n#include \"image.h\"\n#include \"layer.h\"\n#include \"network.h\"\n\ntypedef layer cro"
  },
  {
    "path": "lightnet/_darknet/crop_layer_kernels.cu",
    "chars": 6669,
    "preview": "#include \"cuda_runtime.h\"\n#include \"curand.h\"\n#include \"cublas_v2.h\"\n\nextern \"C\" {\n#include \"crop_layer.h\"\n#include \"uti"
  },
  {
    "path": "lightnet/_darknet/cuda.c",
    "chars": 4095,
    "preview": "int gpu_index = 0;\n\n#ifdef GPU\n\n#include \"cuda.h\"\n#include \"utils.h\"\n#include \"blas.h\"\n#include <assert.h>\n#include <std"
  },
  {
    "path": "lightnet/_darknet/cuda.h",
    "chars": 378,
    "preview": "#ifndef CUDA_H\n#define CUDA_H\n\n#include \"darknet.h\"\n\n#ifdef GPU\n\nvoid check_error(cudaError_t status);\ncublasHandle_t bl"
  },
  {
    "path": "lightnet/_darknet/darknet.h",
    "chars": 18737,
    "preview": "#ifndef DARKNET_API\n#define DARKNET_API\n#include <stdlib.h>\n#include <stdio.h>\n#include <string.h>\n\n#include <pthread.h>"
  },
  {
    "path": "lightnet/_darknet/data.c",
    "chars": 44209,
    "preview": "#include \"data.h\"\n#include \"utils.h\"\n#include \"image.h\"\n#include \"cuda.h\"\n\n#include <stdio.h>\n#include <stdlib.h>\n#inclu"
  },
  {
    "path": "lightnet/_darknet/data.h",
    "chars": 2430,
    "preview": "#ifndef DATA_H\n#define DATA_H\n#include <pthread.h>\n\n#include \"darknet.h\"\n#include \"matrix.h\"\n#include \"list.h\"\n#include "
  },
  {
    "path": "lightnet/_darknet/deconvolutional_kernels.cu",
    "chars": 4712,
    "preview": "#include \"cuda_runtime.h\"\n#include \"curand.h\"\n#include \"cublas_v2.h\"\n\nextern \"C\" {\n#include \"convolutional_layer.h\"\n#inc"
  },
  {
    "path": "lightnet/_darknet/deconvolutional_layer.c",
    "chars": 9160,
    "preview": "#include \"deconvolutional_layer.h\"\n#include \"convolutional_layer.h\"\n#include \"batchnorm_layer.h\"\n#include \"utils.h\"\n#inc"
  },
  {
    "path": "lightnet/_darknet/deconvolutional_layer.h",
    "chars": 871,
    "preview": "#ifndef DECONVOLUTIONAL_LAYER_H\n#define DECONVOLUTIONAL_LAYER_H\n\n#include \"cuda.h\"\n#include \"image.h\"\n#include \"activati"
  },
  {
    "path": "lightnet/_darknet/demo.c",
    "chars": 9220,
    "preview": "#include \"network.h\"\n#include \"detection_layer.h\"\n#include \"region_layer.h\"\n#include \"cost_layer.h\"\n#include \"utils.h\"\n#"
  },
  {
    "path": "lightnet/_darknet/demo.h",
    "chars": 58,
    "preview": "#ifndef DEMO_H\n#define DEMO_H\n\n#include \"image.h\"\n\n#endif\n"
  },
  {
    "path": "lightnet/_darknet/detection_layer.c",
    "chars": 10333,
    "preview": "#include \"detection_layer.h\"\n#include \"activations.h\"\n#include \"softmax_layer.h\"\n#include \"blas.h\"\n#include \"box.h\"\n#inc"
  },
  {
    "path": "lightnet/_darknet/detection_layer.h",
    "chars": 545,
    "preview": "#ifndef DETECTION_LAYER_H\n#define DETECTION_LAYER_H\n\n#include \"layer.h\"\n#include \"network.h\"\n\ntypedef layer detection_la"
  },
  {
    "path": "lightnet/_darknet/dropout_layer.c",
    "chars": 1606,
    "preview": "#include \"dropout_layer.h\"\n#include \"utils.h\"\n#include \"cuda.h\"\n#include <stdlib.h>\n#include <stdio.h>\n\ndropout_layer ma"
  },
  {
    "path": "lightnet/_darknet/dropout_layer.h",
    "chars": 523,
    "preview": "#ifndef DROPOUT_LAYER_H\n#define DROPOUT_LAYER_H\n\n#include \"layer.h\"\n#include \"network.h\"\n\ntypedef layer dropout_layer;\n\n"
  },
  {
    "path": "lightnet/_darknet/dropout_layer_kernels.cu",
    "chars": 1239,
    "preview": "#include \"cuda_runtime.h\"\n#include \"curand.h\"\n#include \"cublas_v2.h\"\n\nextern \"C\" {\n#include \"dropout_layer.h\"\n#include \""
  },
  {
    "path": "lightnet/_darknet/gemm.c",
    "chars": 9099,
    "preview": "#include \"gemm.h\"\n#include \"utils.h\"\n#include \"cuda.h\"\n#include <stdlib.h>\n#include <stdio.h>\n#include <math.h>\n\nvoid ge"
  },
  {
    "path": "lightnet/_darknet/gemm.h",
    "chars": 1120,
    "preview": "#ifndef GEMM_H\n#define GEMM_H\n\nvoid gemm_bin(int M, int N, int K, float ALPHA, \n        char  *A, int lda, \n        floa"
  },
  {
    "path": "lightnet/_darknet/gru_layer.c",
    "chars": 13715,
    "preview": "#include \"gru_layer.h\"\n#include \"connected_layer.h\"\n#include \"utils.h\"\n#include \"cuda.h\"\n#include \"blas.h\"\n#include \"gem"
  },
  {
    "path": "lightnet/_darknet/gru_layer.h",
    "chars": 597,
    "preview": "\n#ifndef GRU_LAYER_H\n#define GRU_LAYER_H\n\n#include \"activations.h\"\n#include \"layer.h\"\n#include \"network.h\"\n\nlayer make_g"
  },
  {
    "path": "lightnet/_darknet/im2col.c",
    "chars": 1344,
    "preview": "#include \"im2col.h\"\n#include <stdio.h>\ninline float im2col_get_pixel(float *im, int height, int width, int channels,\n   "
  },
  {
    "path": "lightnet/_darknet/im2col.h",
    "chars": 329,
    "preview": "#ifndef IM2COL_H\n#define IM2COL_H\n\nvoid im2col_cpu(float* data_im,\n        int channels, int height, int width,\n        "
  },
  {
    "path": "lightnet/_darknet/im2col_kernels.cu",
    "chars": 2278,
    "preview": "#include \"cuda_runtime.h\"\n#include \"curand.h\"\n#include \"cublas_v2.h\"\n\nextern \"C\" {\n#include \"im2col.h\"\n#include \"cuda.h\""
  },
  {
    "path": "lightnet/_darknet/image.c",
    "chars": 41296,
    "preview": "#include \"image.h\"\n#include \"utils.h\"\n#include \"blas.h\"\n#include \"cuda.h\"\n#include <stdio.h>\n#include <math.h>\n\n#define "
  },
  {
    "path": "lightnet/_darknet/image.h",
    "chars": 2407,
    "preview": "#ifndef IMAGE_H\n#define IMAGE_H\n\n#include <stdlib.h>\n#include <stdio.h>\n#include <float.h>\n#include <string.h>\n#include "
  },
  {
    "path": "lightnet/_darknet/layer.c",
    "chars": 4471,
    "preview": "#include \"layer.h\"\n#include \"cuda.h\"\n\n#include <stdlib.h>\n\nvoid free_layer(layer l)\n{\n    if(l.type == DROPOUT){\n       "
  },
  {
    "path": "lightnet/_darknet/layer.h",
    "chars": 21,
    "preview": "#include \"darknet.h\"\n"
  },
  {
    "path": "lightnet/_darknet/list.c",
    "chars": 1370,
    "preview": "#include <stdlib.h>\n#include <string.h>\n#include \"list.h\"\n\nlist *make_list()\n{\n\tlist *l = malloc(sizeof(list));\n\tl->size"
  },
  {
    "path": "lightnet/_darknet/list.h",
    "chars": 185,
    "preview": "#ifndef LIST_H\n#define LIST_H\n#include \"darknet.h\"\n\nlist *make_list();\nint list_find(list *l, void *val);\n\nvoid list_ins"
  },
  {
    "path": "lightnet/_darknet/local_layer.c",
    "chars": 8929,
    "preview": "#include \"local_layer.h\"\n#include \"utils.h\"\n#include \"im2col.h\"\n#include \"col2im.h\"\n#include \"blas.h\"\n#include \"gemm.h\"\n"
  },
  {
    "path": "lightnet/_darknet/local_layer.h",
    "chars": 943,
    "preview": "#ifndef LOCAL_LAYER_H\n#define LOCAL_LAYER_H\n\n#include \"cuda.h\"\n#include \"image.h\"\n#include \"activations.h\"\n#include \"lay"
  },
  {
    "path": "lightnet/_darknet/lstm_layer.c",
    "chars": 24438,
    "preview": "#include \"lstm_layer.h\"\n#include \"connected_layer.h\"\n#include \"utils.h\"\n#include \"cuda.h\"\n#include \"blas.h\"\n#include \"ge"
  },
  {
    "path": "lightnet/_darknet/lstm_layer.h",
    "chars": 503,
    "preview": "#ifndef LSTM_LAYER_H\n#define LSTM_LAYER_H\n\n#include \"activations.h\"\n#include \"layer.h\"\n#include \"network.h\"\n#define USET"
  },
  {
    "path": "lightnet/_darknet/matrix.c",
    "chars": 4262,
    "preview": "#include \"matrix.h\"\n#include \"utils.h\"\n#include \"blas.h\"\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#inc"
  },
  {
    "path": "lightnet/_darknet/matrix.h",
    "chars": 246,
    "preview": "#ifndef MATRIX_H\n#define MATRIX_H\n#include \"darknet.h\"\n\nmatrix copy_matrix(matrix m);\nvoid print_matrix(matrix m);\n\nmatr"
  },
  {
    "path": "lightnet/_darknet/maxpool_layer.c",
    "chars": 3942,
    "preview": "#include \"maxpool_layer.h\"\n#include \"cuda.h\"\n#include <stdio.h>\n\nimage get_maxpool_image(maxpool_layer l)\n{\n    int h = "
  },
  {
    "path": "lightnet/_darknet/maxpool_layer.h",
    "chars": 641,
    "preview": "#ifndef MAXPOOL_LAYER_H\n#define MAXPOOL_LAYER_H\n\n#include \"image.h\"\n#include \"cuda.h\"\n#include \"layer.h\"\n#include \"netwo"
  },
  {
    "path": "lightnet/_darknet/maxpool_layer_kernels.cu",
    "chars": 3180,
    "preview": "#include \"cuda_runtime.h\"\n#include \"curand.h\"\n#include \"cublas_v2.h\"\n\nextern \"C\" {\n#include \"maxpool_layer.h\"\n#include \""
  },
  {
    "path": "lightnet/_darknet/network.c",
    "chars": 28856,
    "preview": "#include <stdio.h>\n#include <time.h>\n#include <assert.h>\n#include \"network.h\"\n#include \"image.h\"\n#include \"data.h\"\n#incl"
  },
  {
    "path": "lightnet/_darknet/network.h",
    "chars": 631,
    "preview": "// Oh boy, why am I about to do this....\n#ifndef NETWORK_H\n#define NETWORK_H\n#include \"darknet.h\"\n\n#include \"image.h\"\n#i"
  },
  {
    "path": "lightnet/_darknet/normalization_layer.c",
    "chars": 5532,
    "preview": "#include \"normalization_layer.h\"\n#include \"blas.h\"\n\n#include <stdio.h>\n\nlayer make_normalization_layer(int batch, int w,"
  },
  {
    "path": "lightnet/_darknet/normalization_layer.h",
    "chars": 658,
    "preview": "#ifndef NORMALIZATION_LAYER_H\n#define NORMALIZATION_LAYER_H\n\n#include \"image.h\"\n#include \"layer.h\"\n#include \"network.h\"\n"
  },
  {
    "path": "lightnet/_darknet/option_list.c",
    "chars": 3127,
    "preview": "#include <stdlib.h>\n#include <stdio.h>\n#include <string.h>\n#include \"option_list.h\"\n#include \"utils.h\"\n\nlist *read_data_"
  },
  {
    "path": "lightnet/_darknet/option_list.h",
    "chars": 474,
    "preview": "#ifndef OPTION_LIST_H\n#define OPTION_LIST_H\n#include \"list.h\"\n\ntypedef struct{\n    char *key;\n    char *val;\n    int use"
  },
  {
    "path": "lightnet/_darknet/parser.c",
    "chars": 41228,
    "preview": "#include <stdio.h>\n#include <string.h>\n#include <stdlib.h>\n#include <assert.h>\n\n#include \"activation_layer.h\"\n#include \""
  },
  {
    "path": "lightnet/_darknet/parser.h",
    "chars": 188,
    "preview": "#ifndef PARSER_H\n#define PARSER_H\n#include \"darknet.h\"\n#include \"network.h\"\n\nvoid save_network(network net, char *filena"
  },
  {
    "path": "lightnet/_darknet/region_layer.c",
    "chars": 23090,
    "preview": "#include \"region_layer.h\"\n#include \"activations.h\"\n#include \"blas.h\"\n#include \"box.h\"\n#include \"cuda.h\"\n#include \"utils."
  },
  {
    "path": "lightnet/_darknet/region_layer.h",
    "chars": 492,
    "preview": "#ifndef REGION_LAYER_H\n#define REGION_LAYER_H\n\n#include \"darknet.h\"\n#include \"layer.h\"\n#include \"network.h\"\n\nlayer make_"
  },
  {
    "path": "lightnet/_darknet/reorg_layer.c",
    "chars": 4784,
    "preview": "#include \"reorg_layer.h\"\n#include \"cuda.h\"\n#include \"blas.h\"\n\n#include <stdio.h>\n\n\nlayer make_reorg_layer(int batch, int"
  },
  {
    "path": "lightnet/_darknet/reorg_layer.h",
    "chars": 519,
    "preview": "#ifndef REORG_LAYER_H\n#define REORG_LAYER_H\n\n#include \"image.h\"\n#include \"cuda.h\"\n#include \"layer.h\"\n#include \"network.h"
  },
  {
    "path": "lightnet/_darknet/rnn_layer.c",
    "chars": 10093,
    "preview": "#include \"rnn_layer.h\"\n#include \"connected_layer.h\"\n#include \"utils.h\"\n#include \"cuda.h\"\n#include \"blas.h\"\n#include \"gem"
  },
  {
    "path": "lightnet/_darknet/rnn_layer.h",
    "chars": 625,
    "preview": "\n#ifndef RNN_LAYER_H\n#define RNN_LAYER_H\n\n#include \"activations.h\"\n#include \"layer.h\"\n#include \"network.h\"\n#define USET\n"
  },
  {
    "path": "lightnet/_darknet/route_layer.c",
    "chars": 3945,
    "preview": "#include \"route_layer.h\"\n#include \"cuda.h\"\n#include \"blas.h\"\n\n#include <stdio.h>\n\nroute_layer make_route_layer(int batch"
  },
  {
    "path": "lightnet/_darknet/route_layer.h",
    "chars": 529,
    "preview": "#ifndef ROUTE_LAYER_H\n#define ROUTE_LAYER_H\n#include \"network.h\"\n#include \"layer.h\"\n\ntypedef layer route_layer;\n\nroute_l"
  },
  {
    "path": "lightnet/_darknet/shortcut_layer.c",
    "chars": 2226,
    "preview": "#include \"shortcut_layer.h\"\n#include \"cuda.h\"\n#include \"blas.h\"\n#include \"activations.h\"\n\n#include <stdio.h>\n#include <a"
  },
  {
    "path": "lightnet/_darknet/shortcut_layer.h",
    "chars": 451,
    "preview": "#ifndef SHORTCUT_LAYER_H\n#define SHORTCUT_LAYER_H\n\n#include \"layer.h\"\n#include \"network.h\"\n\nlayer make_shortcut_layer(in"
  },
  {
    "path": "lightnet/_darknet/softmax_layer.c",
    "chars": 2666,
    "preview": "#include \"softmax_layer.h\"\n#include \"blas.h\"\n#include \"cuda.h\"\n\n#include <float.h>\n#include <math.h>\n#include <stdlib.h>"
  },
  {
    "path": "lightnet/_darknet/softmax_layer.h",
    "chars": 604,
    "preview": "#ifndef SOFTMAX_LAYER_H\n#define SOFTMAX_LAYER_H\n#include \"layer.h\"\n#include \"network.h\"\n\ntypedef layer softmax_layer;\n\nv"
  },
  {
    "path": "lightnet/_darknet/stb_image.h",
    "chars": 220334,
    "preview": "/* stb_image - v2.06 - public domain image loader - http://nothings.org/stb_image.h\n                                    "
  },
  {
    "path": "lightnet/_darknet/stb_image_write.h",
    "chars": 25652,
    "preview": "/* stb_image_write - v0.98 - public domain - http://nothings.org/stb/stb_image_write.h\n   writes out PNG/BMP/TGA images "
  },
  {
    "path": "lightnet/_darknet/tree.c",
    "chars": 3730,
    "preview": "#include <stdio.h>\n#include <stdlib.h>\n#include \"tree.h\"\n#include \"utils.h\"\n#include \"data.h\"\n\nvoid change_leaves(tree *"
  },
  {
    "path": "lightnet/_darknet/tree.h",
    "chars": 255,
    "preview": "#ifndef TREE_H\n#define TREE_H\n#include \"darknet.h\"\n\ntree *read_tree(char *filename);\nint hierarchy_top_prediction(float "
  },
  {
    "path": "lightnet/_darknet/utils.c",
    "chars": 14276,
    "preview": "#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include <math.h>\n#include <assert.h>\n#include <unistd.h>\n#in"
  },
  {
    "path": "lightnet/_darknet/utils.h",
    "chars": 1721,
    "preview": "#ifndef UTILS_H\n#define UTILS_H\n#include <stdio.h>\n#include <time.h>\n#include \"darknet.h\"\n#include \"list.h\"\n\n#define TIM"
  },
  {
    "path": "lightnet/about.py",
    "chars": 225,
    "preview": "__title__ = 'lightnet'\n__version__ = '0.0.13'\n__summary__ = \"Bringing pjreddie's DarkNet out of the shadows\"\n__uri__ = '"
  },
  {
    "path": "lightnet/cli.py",
    "chars": 1189,
    "preview": "# coding: utf8\nfrom __future__ import unicode_literals\n\nimport plac\nimport requests\nimport os\nimport sys\nfrom tqdm impor"
  },
  {
    "path": "lightnet/data/alexnet.cfg",
    "chars": 879,
    "preview": "[net]\nbatch=128\nsubdivisions=1\nheight=227\nwidth=227\nchannels=3\nmomentum=0.9\ndecay=0.0005\nmax_crop=256\n\nlearning_rate=0.0"
  },
  {
    "path": "lightnet/data/cifar.cfg",
    "chars": 1219,
    "preview": "[net]\nbatch=128\nsubdivisions=1\nheight=28\nwidth=28\nchannels=3\nmax_crop=32\nmin_crop=32\n\nhue=.1\nsaturation=.75\nexposure=.75"
  },
  {
    "path": "lightnet/data/cifar.test.cfg",
    "chars": 1174,
    "preview": "[net]\nbatch=128\nsubdivisions=1\nheight=32\nwidth=32\nchannels=3\nmomentum=0.9\ndecay=0.0005\n\nlearning_rate=0.4\npolicy=poly\npo"
  },
  {
    "path": "lightnet/data/coco.names",
    "chars": 625,
    "preview": "person\nbicycle\ncar\nmotorbike\naeroplane\nbus\ntrain\ntruck\nboat\ntraffic light\nfire hydrant\nstop sign\nparking meter\nbench\nbir"
  },
  {
    "path": "lightnet/data/coco.template",
    "chars": 133,
    "preview": "classes= 80\ntrain  = $DATA/coco/trainvalno5k.txt\nvalid = $DATA/coco_val_5k.list\nnames = $HERE/coco.names\nbackup = $BACKU"
  },
  {
    "path": "lightnet/data/darknet.cfg",
    "chars": 1091,
    "preview": "[net]\n# Train\nbatch=1\nsubdivisions=1\n# Test\n# batch=1\n# subdivisions=1\nheight=224\nwidth=224\nchannels=3\nmomentum=0.9\ndeca"
  },
  {
    "path": "lightnet/data/darknet19.cfg",
    "chars": 1963,
    "preview": "[net]\nbatch=128\nsubdivisions=1\nheight=224\nwidth=224\nchannels=3\nmomentum=0.9\ndecay=0.0005\nmax_crop=448\n\nlearning_rate=0.1"
  },
  {
    "path": "lightnet/data/darknet19_448.cfg",
    "chars": 2021,
    "preview": "[net]\nbatch=128\nsubdivisions=4\nheight=448\nwidth=448\nmax_crop=512\nchannels=3\nmomentum=0.9\ndecay=0.0005\n\nlearning_rate=0.0"
  },
  {
    "path": "lightnet/data/darknet9000.cfg",
    "chars": 2092,
    "preview": "[net]\n# Training\n# batch=128\n# subdivisions=4\n# Testing\nbatch = 1\nsubdivisions = 1\nheight=448\nwidth=448\nmax_crop=512\ncha"
  },
  {
    "path": "lightnet/data/densenet201.cfg",
    "chars": 19764,
    "preview": "[net]\n# Training\n# batch=128\n# subdivisions=4\n\n# Testing\nbatch=1\nsubdivisions=1\n\nheight=256\nwidth=256\nmax_crop=448\nchann"
  },
  {
    "path": "lightnet/data/extraction.cfg",
    "chars": 2109,
    "preview": "[net]\nbatch=128\nsubdivisions=1\nheight=224\nwidth=224\nmax_crop=320\nchannels=3\nmomentum=0.9\ndecay=0.0005\n\nlearning_rate=0.1"
  },
  {
    "path": "lightnet/data/extraction.conv.cfg",
    "chars": 1689,
    "preview": "[net]\nbatch=1\nsubdivisions=1\nheight=256\nwidth=256\nchannels=3\nmomentum=0.9\ndecay=0.0005\n\nlearning_rate=0.5\npolicy=poly\npo"
  },
  {
    "path": "lightnet/data/extraction22k.cfg",
    "chars": 2182,
    "preview": "[net]\nbatch=128\nsubdivisions=1\nheight=224\nwidth=224\nmax_crop=320\nchannels=3\nmomentum=0.9\ndecay=0.0005\n\nlearning_rate=0.0"
  },
  {
    "path": "lightnet/data/go.cfg",
    "chars": 1387,
    "preview": "[net]\nbatch=512\nsubdivisions=1\nheight=19\nwidth=19\nchannels=1\nmomentum=0.9\ndecay=0.0005\n\nburn_in=1000\nlearning_rate=0.1\np"
  },
  {
    "path": "lightnet/data/go.test.cfg",
    "chars": 1371,
    "preview": "[net]\nbatch=1\nsubdivisions=1\nheight=19\nwidth=19\nchannels=1\nmomentum=0.9\ndecay=0.0005\n\nlearning_rate=0.01\npolicy=poly\npow"
  },
  {
    "path": "lightnet/data/gru.cfg",
    "chars": 287,
    "preview": "[net]\nsubdivisions=1\nbatch = 256\ninputs=256\nmomentum=0.9\ndecay=0.0\ntime_steps=128\nlearning_rate=.002\nadam=1\n\npolicy=cons"
  },
  {
    "path": "lightnet/data/jnet-conv.cfg",
    "chars": 1081,
    "preview": "[net]\nbatch=1\nsubdivisions=1\nheight=10\nwidth=10\nchannels=3\nlearning_rate=0.01\nmomentum=0.9\ndecay=0.0005\n\n[convolutional]"
  },
  {
    "path": "lightnet/data/resnet152.cfg",
    "chars": 15366,
    "preview": "[net]\n# Training\n# batch=128\n# subdivisions=8\n\n# Testing\nbatch=1\nsubdivisions=1\n\nheight=256\nwidth=256\nmax_crop=448\nchann"
  },
  {
    "path": "lightnet/data/resnet50.cfg",
    "chars": 5272,
    "preview": "[net]\n# Training\n# batch=128\n# subdivisions=4\n\n# Testing\nbatch=1\nsubdivisions=1\n\nheight=256\nwidth=256\nmax_crop=448\nchann"
  },
  {
    "path": "lightnet/data/rnn.cfg",
    "chars": 433,
    "preview": "[net]\nsubdivisions=1\ninputs=256\nbatch = 1\nmomentum=0.9\ndecay=0.001\nmax_batches = 2000\ntime_steps=1\nlearning_rate=0.1\npol"
  },
  {
    "path": "lightnet/data/rnn.train.cfg",
    "chars": 437,
    "preview": "[net]\nsubdivisions=1\ninputs=256\nbatch = 128\nmomentum=0.9\ndecay=0.001\nmax_batches = 2000\ntime_steps=576\nlearning_rate=0.1"
  },
  {
    "path": "lightnet/data/strided.cfg",
    "chars": 1778,
    "preview": "[net]\nbatch=128\nsubdivisions=4\nheight=256\nwidth=256\nchannels=3\nmomentum=0.9\ndecay=0.0005\n\nlearning_rate=0.01\npolicy=step"
  },
  {
    "path": "lightnet/data/t1.test.cfg",
    "chars": 1129,
    "preview": "[net]\nbatch=1\nsubdivisions=1\nheight=224\nwidth=224\nchannels=3\nmomentum=0.9\ndecay=0.0005\n\nlearning_rate=0.0005\npolicy=step"
  },
  {
    "path": "lightnet/data/tiny-yolo-voc.cfg",
    "chars": 1408,
    "preview": "[net]\nbatch=64\nsubdivisions=8\nwidth=416\nheight=416\nchannels=3\nmomentum=0.9\ndecay=0.0005\nangle=0\nsaturation = 1.5\nexposur"
  },
  {
    "path": "lightnet/data/tiny-yolo.cfg",
    "chars": 1488,
    "preview": "[net]\n# Training\n# batch=64\n# subdivisions=2\n# Testing\nbatch=1\nsubdivisions=1\nwidth=416\nheight=416\nchannels=3\nmomentum=0"
  },
  {
    "path": "lightnet/data/tiny.cfg",
    "chars": 1767,
    "preview": "[net]\n# Train\nbatch=128\nsubdivisions=1\n# Test\n# batch=1\n# subdivisions=1\nheight=224\nwidth=224\nchannels=3\nmomentum=0.9\nde"
  },
  {
    "path": "lightnet/data/vgg-16.cfg",
    "chars": 1408,
    "preview": "[net]\nbatch=128\nsubdivisions=4\nheight=256\nwidth=256\nchannels=3\nlearning_rate=0.00001\nmomentum=0.9\ndecay=0.0005\n\n[crop]\nc"
  },
  {
    "path": "lightnet/data/vgg-conv.cfg",
    "chars": 1114,
    "preview": "[net]\nbatch=1\nsubdivisions=1\nwidth=224\nheight=224\nchannels=3\nlearning_rate=0.00001\nmomentum=0.9\ndecay=0.0005\n\n[convoluti"
  },
  {
    "path": "lightnet/data/voc.names",
    "chars": 135,
    "preview": "aeroplane\nbicycle\nbird\nboat\nbottle\nbus\ncar\ncat\nchair\ncow\ndiningtable\ndog\nhorse\nmotorbike\nperson\npottedplant\nsheep\nsofa\nt"
  },
  {
    "path": "lightnet/data/writing.cfg",
    "chars": 400,
    "preview": "[net]\nbatch=128\nsubdivisions=2\nheight=256\nwidth=256\nchannels=3\nlearning_rate=0.00000001\nmomentum=0.9\ndecay=0.0005\nseen=0"
  },
  {
    "path": "lightnet/data/yolo-voc.2.0.cfg",
    "chars": 2554,
    "preview": "[net]\nbatch=64\nsubdivisions=8\nheight=416\nwidth=416\nchannels=3\nmomentum=0.9\ndecay=0.0005\nangle=0\nsaturation = 1.5\nexposur"
  },
  {
    "path": "lightnet/data/yolo-voc.cfg",
    "chars": 2721,
    "preview": "[net]\n# Testing\nbatch=1\nsubdivisions=1\n# Training\n# batch=64\n# subdivisions=8\nheight=416\nwidth=416\nchannels=3\nmomentum=0"
  },
  {
    "path": "lightnet/data/yolo.2.0.cfg",
    "chars": 2588,
    "preview": "[net]\nbatch=1\nsubdivisions=1\nwidth=416\nheight=416\nchannels=3\nmomentum=0.9\ndecay=0.0005\nangle=0\nsaturation = 1.5\nexposure"
  },
  {
    "path": "lightnet/data/yolo.cfg",
    "chars": 2724,
    "preview": "[net]\n# Testing\n#batch=1\n#subdivisions=1\n# Training\nbatch=64\nsubdivisions=8\nwidth=608\nheight=608\nchannels=3\nmomentum=0.9"
  },
  {
    "path": "lightnet/data/yolo9000.cfg",
    "chars": 2329,
    "preview": "[net]\n# Testing\nbatch=1\nsubdivisions=1\n# Training\n# batch=64\n# subdivisions=8\nbatch=1\nsubdivisions=1\nheight=544\nwidth=54"
  },
  {
    "path": "lightnet/lightnet.pxd",
    "chars": 18041,
    "preview": "# Generated by https://github.com/tarruda/python-autopxd\nfrom libc.stdio cimport FILE\nfrom libc.time cimport clock_t\n\ncd"
  },
  {
    "path": "lightnet/lightnet.pyx",
    "chars": 20891,
    "preview": "# cython: infer_types=True\n# cython: cdivision=True\nfrom __future__ import print_function\nfrom libc.stdlib cimport callo"
  },
  {
    "path": "lightnet/util.py",
    "chars": 180,
    "preview": "from contextlib import contextmanager\nfrom tempfile import mkdtemp\nimport shutil\n\n\n@contextmanager\ndef make_temp_dir():\n"
  },
  {
    "path": "requirements.txt",
    "chars": 43,
    "preview": "pathlib\nnumpy\nplac\nrequests\nmsgpack-python\n"
  },
  {
    "path": "setup.py",
    "chars": 4766,
    "preview": "#!/usr/bin/env python\nimport shutil\nimport io\nimport os\nimport json\nimport distutils.command.build_ext\nimport subprocess"
  },
  {
    "path": "tests/test_boxes.py",
    "chars": 585,
    "preview": "import pytest\nimport numpy\nfrom lightnet.lightnet import BoxLabels\n\n@pytest.fixture\ndef ids():\n    return numpy.asarray("
  },
  {
    "path": "tests/test_image.py",
    "chars": 551,
    "preview": "from pathlib import Path\nfrom numpy.testing import assert_equal\n\nfrom lightnet.lightnet import Image\n\ndef test_make_imag"
  },
  {
    "path": "tests/test_network.py",
    "chars": 1904,
    "preview": "from __future__ import unicode_literals\nfrom lightnet import Network, Image, BoxLabels\nfrom lightnet.lightnet import Det"
  }
]

About this extraction

This page contains the full source code of the explosion/lightnet GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 151 files (921.9 KB), approximately 294.9k tokens, and a symbol index with 920 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Extract another repo