[
  {
    "path": ".github/workflows/ci.yml",
    "content": "# On a push to any branch this workflow:\n# - builds the Docker image,\n# - build wheels for different Python versions,\n# - installs the wheel for each Python version and runs the unit tests\n#   against newest and oldest versions of dependencies.\n# On manual dispatch this workflow:\n# - pushes the previously built Docker image to DockerHub with tag \"dev\".\n\nname: ci and release\n\non:\n  workflow_dispatch:\n  pull_request:\n\njobs:\n  build_docker:\n    name: Build Docker image\n    runs-on: ubuntu-latest\n\n    steps:\n      - name: Checkout\n        uses: actions/checkout@v4\n\n      - name: Set up QEMU\n        uses: docker/setup-qemu-action@v3\n\n      - name: Set up Docker Buildx\n        uses: docker/setup-buildx-action@v3\n\n      - name: Login to DockerHub\n        uses: docker/login-action@v3\n        with:\n          username: ${{ secrets.DOCKERHUB_USERNAME }}\n          password: ${{ secrets.DOCKERHUB_TOKEN }}\n\n      - name: Build and export\n        uses: docker/build-push-action@v6\n        with:\n          context: .\n          tags: mv-extractor:local\n          outputs: type=docker,dest=/tmp/image.tar\n          cache-from: type=registry,ref=lubo1994/mv-extractor:buildcache\n          cache-to: type=registry,ref=lubo1994/mv-extractor:buildcache,mode=max\n\n      - name: Upload Docker image as artifact\n        uses: actions/upload-artifact@v4\n        with:\n          name: mv-extractor-docker-image\n          path: /tmp/image.tar\n\n  test_docker:\n    name: Run unit tests in Docker container (only for the Python version used in the Dockerfile command)\n    runs-on: ubuntu-latest\n    needs:\n      - build_docker\n\n    steps:\n      - name: Checkout\n        uses: actions/checkout@v4\n\n      - name: Download artifact containing Docker image\n        uses: actions/download-artifact@v4\n        with:\n          name: mv-extractor-docker-image\n          path: /tmp\n\n      - name: Load Docker image\n        run: |\n          docker load --input /tmp/image.tar\n\n      - name: Run unit tests\n        run: |\n          docker run -v ${{ github.workspace }}:/home/video_cap \\\n            mv-extractor:local \\\n              /bin/bash -c ' \\\n                yum install -y compat-openssl10 && \\\n                python3.12 -m unittest discover -s tests -p \"*tests.py\"\n              '\n\n  build_and_test_wheels:\n    name: Build wheels for cp${{ matrix.python }}-${{ matrix.platform_id }}\n    runs-on: ${{ matrix.os }}\n    needs: \n      - build_docker\n    strategy:\n      # Ensure that a wheel builder finishes even if another fails\n      fail-fast: false\n      matrix:\n        include:\n        - os: ubuntu-latest\n          python: 39\n          bitness: 64\n          platform_id: manylinux_x86_64\n          manylinux_image: mv-extractor:local\n          numpy_min_version: \"numpy==1.19.3\"\n          opencv_min_version: \"opencv-python==4.4.0.46\"\n        - os: ubuntu-latest\n          python: 310\n          bitness: 64\n          platform_id: manylinux_x86_64\n          manylinux_image: mv-extractor:local\n          numpy_min_version: \"numpy==1.21.2\"\n          opencv_min_version: \"opencv-python==4.5.4.60\"\n        - os: ubuntu-latest\n          python: 311\n          bitness: 64\n          platform_id: manylinux_x86_64\n          manylinux_image: mv-extractor:local\n          numpy_min_version: \"numpy==1.23.3\"\n          opencv_min_version: \"opencv-python==4.7.0.72\"\n        - os: ubuntu-latest\n          python: 312\n          bitness: 64\n          platform_id: manylinux_x86_64\n          manylinux_image: mv-extractor:local\n          numpy_min_version: \"numpy==1.26.0\"\n          opencv_min_version: \"opencv-python==4.9.0.80\"\n        - os: ubuntu-latest\n          python: 313\n          bitness: 64\n          platform_id: manylinux_x86_64\n          manylinux_image: mv-extractor:local\n          numpy_min_version: \"numpy==2.2.6\"\n          opencv_min_version: \"opencv-python==4.12.0.88\"\n        - os: ubuntu-latest\n          python: 314\n          bitness: 64\n          platform_id: manylinux_x86_64\n          manylinux_image: mv-extractor:local\n          numpy_min_version: \"numpy==2.2.6\"\n          opencv_min_version: \"opencv-python==4.12.0.88\"\n\n    steps:\n      - name: Checkout\n        uses: actions/checkout@v4\n\n      - name: Download artifact containing Docker image\n        uses: actions/download-artifact@v4\n        with:\n          name: mv-extractor-docker-image\n          path: /tmp\n      \n      - name: Load Docker image\n        run: |\n          docker load --input /tmp/image.tar\n\n      - name: Build and test wheels\n        uses: pypa/cibuildwheel@v3.2.1\n        env:\n          CIBW_PLATFORM: linux\n          CIBW_BUILD: cp${{ matrix.python }}-${{ matrix.platform_id }}\n          # Disable building PyPy wheels on all platforms\n          CIBW_SKIP: pp*\n          CIBW_ARCHS: x86_64\n          CIBW_MANYLINUX_X86_64_IMAGE: ${{ matrix.manylinux_image }}\n          #CIBW_MANYLINUX_I686_IMAGE: ${{ matrix.manylinux_image }}\n          CIBW_BUILD_FRONTEND: build\n          CIBW_TEST_COMMAND: |\n            echo \"Running unit tests\" && \\\n            yum install -y compat-openssl10 && \\\n            PROJECT_ROOT={project} python3 -m unittest discover -s {project}/tests -p \"*tests.py\" && \\\n            echo \"Running unit tests against oldest supported versions of dependencies\" && \\\n            python3 -m pip install ${{ matrix.numpy_min_version }} ${{ matrix.opencv_min_version }} && \\\n            PROJECT_ROOT={project} python3 -m unittest discover -s {project}/tests -p \"*tests.py\"\n          CIBW_BUILD_VERBOSITY: 1\n\n      - uses: actions/upload-artifact@v4\n        with:\n          name: python-wheel-${{ matrix.python }}\n          path: ./wheelhouse/*.whl\n\n  push_docker:\n    name: Push Docker image to DockerHub\n    if: github.event_name == 'workflow_dispatch'\n    runs-on: ubuntu-latest\n    needs: \n      - build_docker\n      - test_docker\n      - build_and_test_wheels\n\n    steps:\n      - name: Download artifact containing Docker image\n        uses: actions/download-artifact@v4\n        with:\n          name: mv-extractor-docker-image\n          path: /tmp\n\n      - name: Login to DockerHub\n        uses: docker/login-action@v3\n        with:\n          username: ${{ secrets.DOCKERHUB_USERNAME }}\n          password: ${{ secrets.DOCKERHUB_TOKEN }}\n\n      - name: Load and push Docker image\n        run: |\n          docker load --input /tmp/image.tar\n          docker tag mv-extractor:local lubo1994/mv-extractor:dev\n          docker push lubo1994/mv-extractor:dev"
  },
  {
    "path": ".gitignore",
    "content": "/build/\n/dist/\n/wheelhouse/\n*.egg-info\n*.egg\n.eggs\n__pycache__/\n/venv3.*/\nenv/\nout-*/\n\n*.tar\na.out\n*.so\n"
  },
  {
    "path": "Dockerfile",
    "content": "FROM quay.io/pypa/manylinux_2_28_x86_64 AS builder\n\n# Install build tools\nRUN yum update -y && \\\n  yum install -y \\\n    wget \\\n    unzip \\\n    git \\\n    make \\\n    cmake \\\n    gcc-toolset-10 \\\n    gcc-c++ \\\n    pkgconfig \\\n    libtool && \\\n  yum clean all\n\n# Activate specific version of gcc toolset (newer versions of gcc fail to build old versions of ffmpeg)\nENV PATH=\"/opt/rh/gcc-toolset-10/root/usr/bin:$PATH\"\nENV LD_LIBRARY_PATH=\"/opt/rh/gcc-toolset-10/root/usr/lib64:$LD_LIBRARY_PATH\"\n\n# Install OpenCV\nARG OPENCV_VERSION=\"4.12.0\"\nWORKDIR /opt\nRUN wget -O opencv.zip https://github.com/opencv/opencv/archive/\"$OPENCV_VERSION\".zip && \\\n  unzip opencv.zip && \\\n  mv opencv-\"$OPENCV_VERSION\" opencv && \\\n  mkdir opencv/build && \\\n  cd opencv/build && \\\n  cmake \\\n  -D CMAKE_BUILD_TYPE=RELEASE \\\n  -D OPENCV_GENERATE_PKGCONFIG=YES \\\n  -D CMAKE_INSTALL_PREFIX=/usr/local \\\n  -D OPENCV_ENABLE_NONFREE=OFF \\\n  -D BUILD_LIST=core,imgproc \\\n  .. && \\\n  make -j $(nproc) && \\\n  make install && \\\n  ldconfig && \\\n  rm -rf ../../opencv.zip && \\\n  rm -rf ../../opencv\n\n# Install FFMPEG\nWORKDIR /opt/ffmpeg_sources\nRUN curl -O -L https://www.nasm.us/pub/nasm/releasebuilds/2.15.05/nasm-2.15.05.tar.bz2 && \\\n  tar xjvf nasm-2.15.05.tar.bz2 && \\\n  cd nasm-2.15.05 && \\\n  ./autogen.sh && \\\n  ./configure --disable-shared --enable-static && \\\n  make -j $(nproc) && \\\n  make install && \\\n  rm -rf ../nasm-2.15.05.tar.bz2 && \\\n  rm -rf ../nasm-2.15.05\n\nWORKDIR /opt/ffmpeg_sources\nRUN curl -O -L https://www.tortall.net/projects/yasm/releases/yasm-1.3.0.tar.gz && \\\n  tar xzvf yasm-1.3.0.tar.gz && \\\n  cd yasm-1.3.0 && \\\n  ./configure --disable-shared --enable-static && \\\n  make -j $(nproc) && \\\n  make install && \\\n  rm -rf ../yasm-1.3.0.tar.gz && \\\n  rm -rf ../yasm-1.3.0\n\nWORKDIR /opt/ffmpeg_sources\nRUN git clone --branch stable --depth 1 https://code.videolan.org/videolan/x264.git && \\\n  cd x264 && \\\n  ./configure --disable-shared --enable-static --enable-pic && \\\n  make -j $(nproc) && \\\n  make install && \\\n  rm -rf ../x264\n\nARG FFMPEG_VERSION=\"4.1.3\"\nWORKDIR /opt/ffmpeg_sources\nRUN wget -O ffmpeg-snapshot.tar.bz2 https://ffmpeg.org/releases/ffmpeg-\"$FFMPEG_VERSION\".tar.bz2 && \\\n  mkdir -p ffmpeg && \\\n  tar xjvf ffmpeg-snapshot.tar.bz2 -C ffmpeg --strip-components=1 && \\\n  rm -rf ffmpeg-snapshot.tar.bz2\n\n\nWORKDIR /opt/ffmpeg_sources/ffmpeg\nRUN ./configure \\\n  --pkg-config-flags=\"--static\" \\\n  --extra-cflags=\"-I/usr/local/include\" \\\n  --extra-ldflags=\"-L/usr/local/lib\" \\\n  --extra-libs=-lpthread \\\n  --extra-libs=-lm \\\n  --enable-static \\\n  --disable-shared \\\n  --enable-gpl \\\n  --enable-libx264 \\\n  --enable-nonfree \\\n  --enable-pic && \\\n  make -j $(nproc) && \\\n  make install && \\\n  rm -rf ../ffmpeg\n\nFROM quay.io/pypa/manylinux_2_28_x86_64\n\n# copy libraries\nWORKDIR /usr/local/lib\nCOPY --from=builder /usr/local/lib .\nWORKDIR /usr/local/lib64\nCOPY --from=builder /usr/local/lib64 .\nWORKDIR /usr/local/include\nCOPY --from=builder /usr/local/include .\nWORKDIR /usr/local/lib\nCOPY --from=builder /usr/local/lib .\n\n# Set environment variables\nENV PKG_CONFIG_PATH=\"$PKG_CONFIG_PATH:/usr/local/lib64/pkgconfig\"\nENV LD_LIBRARY_PATH=\"$LD_LIBRARY_PATH:/usr/local/lib64\"\n\nWORKDIR /home/video_cap\n\nCOPY pyproject.toml /home/video_cap/\nCOPY setup.py /home/video_cap/\nCOPY src /home/video_cap/src/\nCOPY README.md /home/video_cap/\n\n# Install Python package\nRUN python3.12 -m pip install .\n\n# Location of the \"extract_mvs\" script\nENV PATH=\"$PATH:/opt/python/cp312-cp312/bin\"\n\nCMD [\"sh\", \"-c\", \"tail -f /dev/null\"]\n"
  },
  {
    "path": "LICENSE",
    "content": "The MIT License\n\nCopyright (c) 2010-2024 Lukas Bommes\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in\nall copies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\nTHE SOFTWARE.\n"
  },
  {
    "path": "MANIFEST.in",
    "content": "recursive-include ffmpeg_patch *\nrecursive-include src *\nrecursive-include tests *\ninclude LICENSE\ninclude pyproject.toml\ninclude extract_mvs.py\ninclude setup.py\ninclude vid_h264.mp4\ninclude vid_mpeg4_part2.mp4\ninclude vid_h264.264\n"
  },
  {
    "path": "README.md",
    "content": "\n<h1 align=\"center\">\n    <a href=\"https://github.com/LukasBommes/mv-extractor\" target=\"blank_\">\n        <img width=\"250\" alt=\"mvextractor\" src=\"https://raw.githubusercontent.com/LukasBommes/mv-extractor/cb8e08f4c1e161d103d5382ded93134f26e96f05/logo.svg\" />\n    </a>\n    <br>\n    Motion Vector Extractor\n</h1>\n\nThis tool extracts motion vectors, frames, and frame types from H.264 and MPEG-4 Part 2 encoded videos.\n\nA replacement for OpenCV's [VideoCapture](https://docs.opencv.org/4.1.0/d8/dfe/classcv_1_1VideoCapture.html) that returns for each frame:\n- Frame type (I, P, or B)\n- motion vectors\n- Optional decoded frame as BGR image\n\nFrame decoding can be skipped for very fast motion vector extraction, ideal for, e.g., fast visual object tracking. Both a C++ and a Python API is provided.\n\nThe image below shows a video frame with extracted motion vectors overlaid.\n\n![motion_vector_demo_image](https://raw.githubusercontent.com/LukasBommes/mv-extractor/cb8e08f4c1e161d103d5382ded93134f26e96f05/mvs.png)\n\n<details>\n  <summary><strong>Note on Deprecation of Timestamp Extraction</strong></summary>\n\n  Versions 1.x of the motion vector extractor additionally returned the timestamps of video frames. For RTSP streams, the UTC wall time of when the sender transmitted a frame was returned (rather than the more easily retrievable reception timestamp).\n\n  Since this feature required patching FFmpeg internals, it became difficult to maintain and prevented compatibility with newer versions of FFmpeg.\n\n  As a result, timestamp extraction was removed in the 2.0.0 release. If you rely on this feature, please use version **1.1.0**.\n</details>\n\n## News\n\n### Recent Changes in Release 2.0.0\n\n- New motion-vectors-only mode, in which frame decoding is skipped for better performance (thanks to [@microa](https://github.com/LukasBommes/mv-extractor/pull/78))\n- Dropped extraction of timestamps as this feature was complex and difficult to maintain. Note the breaking API change to the `read` and `retrieve` methods of the `VideoCapture` class\n\n```diff\n- ret, frame, motion_vectors, frame_type, timestamp = cap.read()\n+ ret, frame, motion_vectors, frame_type = cap.read()\n```\n\n- Added support for Python 3.13 and 3.14\n- Moved installation of FFMPEG and OpenCV from script files directly into Dockerfile\n- Improved quickstart section of the readme\n\n\n## Quickstart\n\n### Step 1: Install\n\n```bash\npip install motion-vector-extractor\n```\nNote, that we currently provide the package only for x86-64 linux, such as Ubuntu or Debian, and Python 3.9 to 3.14. If you are on a different platform, please use the Docker image as described [below](#installation-via-docker).\n\n### Step 2: Extract Motion Vectors\n\nYou can follow along the examples below using the example video [`vid_h264.mp4`](https://github.com/LukasBommes/mv-extractor/blob/master/vid_h264.mp4) from the repo.\n\n#### Command Line\n\n```bash\n# Extract motion vectors and show live preview\nextract_mvs vid_h264.mp4 --preview --verbose\n\n# Extract motion vectors and skip frame decoding (faster)\nextract_mvs vid_h264.mp4 --verbose --skip-decoding-frames\n\n# Extract and store motion vectors and frames to disk without showing live preview\nextract_mvs vid_h264.mp4 --dump\n\n# See all available options\nextract_mvs -h\n```\n\n#### Python API\n```python\nfrom mvextractor.videocap import VideoCap\n\ncap = VideoCap()\ncap.open(\"vid_h264.mp4\")\n\n# (optional) skip decoding frames\ncap.set_decode_frames(False)\n\nwhile True:\n    ret, frame, motion_vectors, frame_type = cap.read()\n    if not ret:\n        break\n    print(f\"Num. motion vectors: {len(motion_vectors)}\")\n    print(f\"Frame type: {frame_type}\")\n    if frame is not None:\n        print(f\"Frame size: {frame.shape}\")\n\ncap.release()\n```\n\n## Advanced Usage\n\n### Installation via Docker\n\nInstead of installing the motion vector extractor via PyPI you can also use the prebuild Docker image from [DockerHub](https://hub.docker.com/r/lubo1994/mv-extractor). The Docker image contains the motion vector extractor and all its dependencies and comes in handy for quick testing or in case your platform is not compatible with the provided Python package.\n\n#### Prerequisites\n\nTo use the Docker image you need to install [Docker](https://docs.docker.com/). Furthermore, you need to clone the source code with\n```bash\ngit clone https://github.com/LukasBommes/mv-extractor.git mv_extractor\n```\n\n#### Run Motion Vector Extraction in Docker\n\nAfterwards, you can run the extraction script in the `mv_extractor` directory as follows\n```bash\n./run.sh python3.12 extract_mvs.py vid_h264.mp4 --preview --verbose\n```\nThis pulls the prebuild Docker image from DockerHub and runs the extraction script inside the Docker container.\n\n#### Building the Docker Image Locally (Optional)\n \nThis step is not required and for faster installation, we recommend using the prebuilt image.\nIf you still want to build the Docker image locally, you can do so by running the following command in the `mv_extractor` directory\n```bash\ndocker build . --tag=mv-extractor\n```\nNote that building can take more than one hour.\n\nNow, run the docker container with\n```bash\ndocker run -it --ipc=host --env=\"DISPLAY\" -v $(pwd):/home/video_cap -v /tmp/.X11-unix:/tmp/.X11-unix:rw mv-extractor /bin/bash\n```\n\n\n## Python API\n\nThis module provides a Python API which is very similar to that of OpenCV [VideoCapture](https://docs.opencv.org/4.1.0/d8/dfe/classcv_1_1VideoCapture.html). Using the Python API is the recommended way of using the H.264 Motion Vector Capture class.\n\n#### Class :: VideoCap()\n\n| Methods | Description |\n| --- | --- |\n| VideoCap() | Constructor |\n| open() | Open a video file or url |\n| grab() | Reads the next video frame and motion vectors from the stream |\n| retrieve() | Decodes and returns the grabbed frame and motion vectors |\n| read() | Convenience function which combines a call of grab() and retrieve() |\n| release() | Close a video file or url and release all ressources |\n| set_decode_frames() | Enable/disable decoding of video frames |\n\n| Attributes | Description |\n| --- | --- |\n| decode_frames | Getter to check if frame decoding is enabled (True) or skipped (False) |\n\n##### Method :: VideoCap()\n\nConstructor. Takes no input arguments and returns nothing.\n\n##### Method :: open()\n\nOpen a video file or url. The stream must be H264 encoded. Otherwise, undesired behaviour is likely.\n\n| Parameter | Type | Description |\n| --- | --- | --- |\n| url | string | Relative or fully specified file path or an url specifying the location of the video stream. Example \"vid.flv\" for a video file located in the same directory as the source files. Or \"rtsp://xxx.xxx.xxx.xxx:554\" for an IP camera streaming via RTSP. |\n\n| Returns | Type | Description |\n| --- | --- | --- |\n| success | bool | True if video file or url could be opened successfully, false otherwise. |\n\n##### Method :: grab()\n\nReads the next video frame and motion vectors from the stream, but does not yet decode it. Thus, grab() is fast. A subsequent call to retrieve() is needed to decode and return the frame and motion vectors. the purpose of splitting up grab() and retrieve() is to provide a means to capture frames in multi-camera scenarios which are as close in time as possible. To do so, first call grab() on all cameras and afterwards call retrieve() on all cameras.\n\nTakes no input arguments.\n\n| Returns | Type | Description |\n| --- | --- | --- |\n| success | bool | True if next frame and motion vectors could be grabbed successfully, false otherwise. |\n\n##### Method :: retrieve()\n\nDecodes and returns the grabbed frame and motion vectors. Prior to calling retrieve() on a stream, grab() needs to have been called and returned successfully.\n\nTakes no input arguments and returns a tuple with the elements described in the table below.\n\n| Index | Name | Type | Description |\n| --- | --- | --- | --- |\n| 0 | success | bool | True in case the frame and motion vectors could be retrieved sucessfully, false otherwise or in case the end of stream is reached. When false, the other tuple elements are set to empty numpy arrays or 0. |\n| 1 | frame | numpy array | Array of dtype uint8 shape (h, w, 3) containing the decoded video frame. w and h are the width and height of this frame in pixels. Channels are in BGR order. If no frame could be decoded an empty numpy ndarray of shape (0, 0, 3) and dtype uint8 is returned. If frame decoding is disabled with set_decode_frames(False) None is returned instead. |\n| 2 | motion vectors | numpy array | Array of dtype int32 and shape (N, 10) containing the N motion vectors of the frame. Each row of the array corresponds to one motion vector. If no motion vectors are present in a frame, e.g. if the frame is an `I` frame an empty numpy array of shape (0, 10) and dtype int32 is returned. The columns of each vector have the following meaning (also refer to [AVMotionVector](https://ffmpeg.org/doxygen/4.1/structAVMotionVector.html) in FFMPEG documentation): <br>- 0: `source`: offset of the reference frame from the current frame. The reference frame is the frame where the motion vector points to and where the corresponding macroblock comes from. If `source < 0`, the reference frame is in the past. For `source > 0` the it is in the future (in display order).<br>- 1: `w`: width of the vector's macroblock.<br>- 2: `h`: height of the vector's macroblock.<br>- 3: `src_x`: x-location (in pixels) where the motion vector points to in the reference frame.<br>- 4: `src_y`: y-location (in pixels) where the motion vector points to in the reference frame.<br>- 5: `dst_x`: x-location of the vector's origin in the current frame (in pixels). Corresponds to the x-center coordinate of the corresponding macroblock.<br>- 6: `dst_y`: y-location of the vector's origin in the current frame (in pixels). Corresponds to the y-center coordinate of the corresponding macroblock.<br>- 7: `motion_x`: Macroblock displacement in x-direction, multiplied by `motion_scale` to become integer. Used to compute fractional value for `src_x` as `src_x = dst_x + motion_x / motion_scale`.<br>- 8: `motion_y`: Macroblock displacement in y-direction, multiplied by `motion_scale` to become integer. Used to compute fractional value for `src_y` as `src_y = dst_y + motion_y / motion_scale`.<br>- 9: `motion_scale`: see definiton of columns 7 and 8. Used to scale up the motion components to integer values. E.g. if `motion_scale = 4`, motion components can be integer values but encode a float with 1/4 pixel precision.<br><br>Note: `src_x` and `src_y` are only in integer resolution. They are contained in the [AVMotionVector](https://ffmpeg.org/doxygen/4.1/structAVMotionVector.html) struct and exported only for the sake of completeness. Use equations in field 7 and 8 to get more accurate fractional values for `src_x` and `src_y`. |\n| 3 | frame_type | string | Unicode string representing the type of frame. Can be `\"I\"` for a keyframe, `\"P\"` for a frame with references to only past frames and `\"B\"` for a frame with references to both past and future frames. A `\"?\"` string indicates an unknown frame type. |\n\n##### Method :: read()\n\nConvenience function which internally calls first grab() and then retrieve(). It takes no arguments and returns the same values as retrieve().\n\n##### Method :: release()\n\nClose a video file or url and release all ressources. Takes no input arguments and returns nothing.\n\n##### Method :: set_decode_frames()\n\nEnable/disable decoding of video frames. May be called anytime, even mid-stream. Returns nothing.\n\n| Parameter | Type | Description |\n| --- | --- | --- |\n| enable | bool | If True (default) RGB frames are decoded and returned in addition to extracted motion vectors. If False, frame decoding is skipped, yielding much higher extraction througput. |\n\n\n## C++ API\n\nThe C++ API differs from the Python API in what parameters the methods expect and what values they return. Refer to the docstrings in `src/video_cap.hpp`.\n\n\n## Theory\n\nWhat follows is a short explanation of the data returned by the `VideoCap` class. Also refer this [excellent book](https://dl.acm.org/citation.cfm?id=1942939) by Iain E. Richardson for more details.\n\n##### Frame\nThe decoded video frame. Nothing special about that.\n\n##### Motion Vectors\n\nH.264 and MPEG-4 Part 2 use different techniques to reduce the size of a raw video frame prior to sending it over a network or storing it into a file. One of those techniques is motion estimation and prediction of future frames based on previous or future frames. Each frame is segmented into macroblocks of e.g. 16 pixel x 16 pixel. During encoding motion estimation matches every macroblock to a similar looking macroblock in a previously encoded frame (note that this frame can also be a future frame since encoding and presentation order might differ). This allows to transmit only those motion vectors and the reference macroblock instead of all macroblocks, effectively reducing the amount of transmitted or stored data. <br>\nMotion vectors correlate directly with motion in the video scene and are useful for various computer vision tasks, such as visual object tracking.\n\nIn MPEG-4 Part 2 macroblocks are always 16 pixel x 16 pixel. In H.264 macroblocks can be 16x16, 16x8, 8x16, 8x8, 8x4, 4x8, or 4x4 in size.\n\n##### Frame Types\n\nThe frame type is either \"P\", \"B\" or \"I\" and refers to the H.264 encoding mode of the current frame. An \"I\" frame is send fully over the network and serves as a reference for \"P\" and \"B\" frames for which only differences to previously decoded frames are transmitted. Those differences are encoded via motion vectors. As a consequence, for an \"I\" frame no motion vectors are returned by this library. The difference between \"P\" and \"B\" frames is that \"P\" frames refer only to past frames, whereas \"B\" frames have motion vectors which refer to both past and future frames. References to future frames are possible even with live streams because the decoding order of frames differs from the presentation order.\n\n\n## About\n\nThis software is maintained by [**Lukas Bommes**](https://lukasbommes.de/).\nIt is based on [MV-Tractus](https://github.com/jishnujayakumar/MV-Tractus/tree/master/include) and OpenCV's [videoio module](https://github.com/opencv/opencv/tree/master/modules/videoio).\n\n\n#### License\n\nThis project is licensed under the MIT License - see the [LICENSE](https://github.com/LukasBommes/mv-extractor/blob/master/LICENSE) file for details.\n\n\n#### Citation\n\nIf you use our work for academic research please cite\n\n```\n@INPROCEEDINGS{9248145,\n  author={L. {Bommes} and X. {Lin} and J. {Zhou}},\n  booktitle={2020 15th IEEE Conference on Industrial Electronics and Applications (ICIEA)}, \n  title={MVmed: Fast Multi-Object Tracking in the Compressed Domain}, \n  year={2020},\n  volume={},\n  number={},\n  pages={1419-1424},\n  doi={10.1109/ICIEA48937.2020.9248145}}\n```\n\n\n"
  },
  {
    "path": "dockerhub.md",
    "content": "# Motion Vector Extractor\n\nThe [motion vector extractor](https://github.com/LukasBommes/mv-extractor) is a tool to extract frames, motion vectors and frame types from H.264 and MPEG-4 Part 2 encoded videos. The tool provides a single class, which serves as a replacement for OpenCV's [VideoCapture](https://docs.opencv.org/4.1.0/d8/dfe/classcv_1_1VideoCapture.html) and can be used to read and decode video frames from a H.264 or MPEG-4 Part 2 encoded video stream/file.\n\nThis Docker image is based on the [manylinux_2_28](https://github.com/pypa/manylinux) image and serves two purposes:\n1. It contains all dependencies to run the motion vector extractor and its test suite.\n2. It functions as build environment for building the [Python package](https://pypi.org/project/motion-vector-extractor/) of the motion vector extraction for all supported Python versions.\n\n## Tags with respective Dockerfile links\n\n- [`v1.1.0`, `latest`](https://github.com/LukasBommes/mv-extractor/blob/c56b94b9ec7e96e273e67eb5cf19f0e6b927f68b/Dockerfile)\n- [`v1.0.6`](https://github.com/LukasBommes/mv-extractor/blob/75424afe230f9847f3e86e243f46d3105eeba858/Dockerfile)\n- [`v1.0.5`](https://github.com/LukasBommes/mv-extractor/blob/ac539243f6cd7cc1d9640d8ce52ba1814a3cbc7d/Dockerfile)\n- [`v1.0.4`](https://github.com/LukasBommes/mv-extractor/blob/94a79e0ce72446beb7b3862f8ed04a1cbce0d1a3/Dockerfile)\n- [`v1.0.3`](https://github.com/LukasBommes/mv-extractor/blob/2ccce5b85e1c9cf813271e443490981c5773dc02/Dockerfile)\n- [`v1.0.2`](https://github.com/LukasBommes/mv-extractor/blob/4dc77fe5681d55820b43657c63c81294bf47a0bc/Dockerfile)\n- [`v1.0.1`](https://github.com/LukasBommes/mv-extractor/blob/17ae26680194b49996e01397871bef857064514f/Dockerfile)\n- [`v1.0.0`](https://github.com/LukasBommes/mv-extractor/blob/4b44302a44e78618aeabde95ee02cecee311b456/Dockerfile)\n\nImages tagged with `dev` and `buildcache` are intermediate artefacts generated by the CI and should not be used directly.\n\n## Usage\n\nPull and run the motion vector extractor with\n```cmd\ndocker run lubo1994/mv-extractor:latest extract_mvs -h\n```\n\nMap a video file into the container and extract motion vectors (replace <videofile.mp4> with the actual filename)\n```cmd\ndocker run -v ./<videofile.mp4>:/home/video_cap/<videofile.mp4> lubo1994/mv-extractor:latest extract_mvs <videofile.mp4> --verbose\n```\n\nIf you want to use the graphical preview, you have to supply additional arguments to the docker run command\n```cmd\ndocker run -it --ipc=host --env=\"DISPLAY\" -v ./<videofile.mp4>:/home/video_cap/<videofile.mp4> -v /tmp/.X11-unix:/tmp/.X11-unix:rw lubo1994/mv-extractor:latest extract_mvs <videofile.mp4> --preview\n```\n\nFor more details on the usage see the [project homepage](https://github.com/LukasBommes/mv-extractor).\n\n## About\n\nThis software is written by [**Lukas Bommes**](https://lukasbommes.de/) and licensed under the [MIT License](https://github.com/LukasBommes/mv-extractor/blob/master/LICENSE).\n\nIf you use the project for academic research please cite\n\n```text\n@INPROCEEDINGS{9248145,\n  author={L. {Bommes} and X. {Lin} and J. {Zhou}},\n  booktitle={2020 15th IEEE Conference on Industrial Electronics and Applications (ICIEA)}, \n  title={MVmed: Fast Multi-Object Tracking in the Compressed Domain}, \n  year={2020},\n  volume={},\n  number={},\n  pages={1419-1424},\n  doi={10.1109/ICIEA48937.2020.9248145}}\n```\n"
  },
  {
    "path": "extract_mvs.py",
    "content": "from mvextractor.__main__ import main\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "pyproject.toml",
    "content": "[build-system]\nrequires = [\n    \"setuptools>=61.0\",\n    \"pkgconfig>=1.5.1\",\n    \"numpy>=2.0.0\"\n]\nbuild-backend = \"setuptools.build_meta\""
  },
  {
    "path": "release.md",
    "content": "# Create a new release\n\n### Step 1) Bump version\n\nBump the version in `setup.py`\n\n### Step 2) Push code\n\nMake changes, commit and push.\n\n### Step 3) Run build workflow\n\nOn GitHub go to the repo's \"Actions\" tab and manually trigger the \"build\" workflow. The build workflow builds the Docker image and wheels. The Docker image is automatically pushed to Dockerhub. The wheels need to be manually uploaded to PyPI as explained below.\n\n### Step 4) Create tag and release\n\nNow, create a tag with the same version just entered in the `setup.py` and push that tag to the remote.\n```\ngit tag vx.x.x\ngit push origin vx.x.x\n```\n\nThen create a release on GitHub using this tag.\n\n### Step 5) Upload wheels to PyPI\n\nFirst, make sure you have the most recent version of twine installed on the host\n```\npython3 -m pip install --upgrade twine\n```\n\nThen, download and extract the wheels from the (successfully completed) workflow run. Place them inside the \"dist\" folder (create if it does not exist). Then, upload to PyPI with\n```\npython3 -m twine upload dist/*\n```\n\n#### Step 6) Tag Docker image with correct version \n\nWhen pushing changes, a Docker image `lubo1994/mv-extractor:dev` is being build and pushed to DockerHub. Upon a release, this image should be tagged with the correct release version and the `latest` tag. To this end, first pull the `dev` image\n```\ndocker pull lubo1994/mv-extractor:dev\n```\nand then login to the docker registry\n```\ncat docker_registry_password.txt | docker login --username <username> --password-stdin\n```\nand tag and push the image as follows\n```\ndocker tag lubo1994/mv-extractor:dev lubo1994/mv-extractor:vx.x.x\ndocker push lubo1994/mv-extractor:vx.x.x\ndocker tag lubo1994/mv-extractor:vx.x.x lubo1994/mv-extractor:latest\ndocker push lubo1994/mv-extractor:latest\n```\nwhere `vx.x.x` is replaced with the version of the release.\n"
  },
  {
    "path": "run.sh",
    "content": "#!/bin/bash\n\nxhost +\n\ndocker run \\\n    -it \\\n    --ipc=host \\\n    --env=\"DISPLAY\" \\\n    -v $(pwd):/home/video_cap \\\n    -v /tmp/.X11-unix:/tmp/.X11-unix:rw \\\n    lubo1994/mv-extractor:latest \\\n    \"$@\""
  },
  {
    "path": "setup.py",
    "content": "from setuptools import find_packages, setup, Extension\nimport pkgconfig\nfrom pathlib import Path\nimport numpy as np\n\n\npkgconfig_result = pkgconfig.parse('libavformat libswscale opencv4')\n\nprint(\"Numpy dir: \", np.get_include())\n\nmvextractor = Extension('mvextractor.videocap',\n    include_dirs = [\n        *pkgconfig_result['include_dirs'],\n        np.get_include()\n    ],\n    library_dirs = pkgconfig_result['library_dirs'],\n    libraries = pkgconfig_result['libraries'],\n    sources = [\n        'src/mvextractor/py_video_cap.cpp',\n        'src/mvextractor/video_cap.cpp',\n        'src/mvextractor/mat_to_ndarray.cpp'\n    ],\n    extra_compile_args = ['-std=c++11'],\n    extra_link_args = ['-fPIC', '-Wl,-Bsymbolic'])\n\nsetup(\n    name='motion-vector-extractor',\n    author='Lukas Bommes',\n    author_email=' ',\n    version=\"2.0.0\",\n    license='MIT',\n    url='https://github.com/LukasBommes/mv-extractor',\n    description=('Reads video frames and MPEG-4/H.264 motion vectors.'),\n    long_description=(Path(__file__).parent / \"README.md\").read_text(),\n    long_description_content_type='text/markdown',\n    classifiers=[\n        \"Development Status :: 5 - Production/Stable\",\n        \"Environment :: Console\",\n        \"Environment :: X11 Applications\",\n        \"Intended Audience :: Developers\",\n        \"Intended Audience :: Science/Research\",\n        \"Intended Audience :: Education\",\n        \"Intended Audience :: Information Technology\",\n        \"Topic :: Multimedia :: Video\",\n        \"Topic :: Multimedia :: Video :: Capture\",\n        \"Topic :: Multimedia :: Video :: Display\",\n        \"License :: OSI Approved :: MIT License\",\n        \"Natural Language :: English\",\n        \"Operating System :: POSIX :: Linux\",\n        \"Programming Language :: C\",\n        \"Programming Language :: C++\",\n        \"Programming Language :: Python :: 3\",\n        \"Programming Language :: Python :: 3.9\",\n        \"Programming Language :: Python :: 3.10\",\n        \"Programming Language :: Python :: 3.11\",\n        \"Programming Language :: Python :: 3.12\",\n        \"Programming Language :: Python :: 3.13\",\n        \"Programming Language :: Python :: 3.14\",\n    ],\n    keywords=['motion vector', 'video capture', 'mpeg4', 'h.264', 'compressed domain'],\n    ext_modules=[mvextractor],\n    packages=find_packages(where='src'),\n    package_dir={'': 'src'},\n    entry_points={\n        'console_scripts': [\n            'extract_mvs=mvextractor.__main__:main',\n        ],\n    },\n    python_requires='>=3.9, <4',\n    # minimum versions of numpy and opencv are the oldest versions\n    # just supporting the minimum Python version (Python 3.9)\n    install_requires=['numpy>=1.19.3', 'opencv-python>=4.4.0.46']\n)\n"
  },
  {
    "path": "src/mvextractor/__init__.py",
    "content": ""
  },
  {
    "path": "src/mvextractor/__main__.py",
    "content": "import sys\nimport os\nimport time\nfrom datetime import datetime\nimport argparse\n\nimport numpy as np\nimport cv2\n\nfrom mvextractor.videocap import VideoCap\n\n\ndef draw_motion_vectors(frame, motion_vectors):\n    if len(motion_vectors) > 0:\n        num_mvs = np.shape(motion_vectors)[0]\n        shift = 2\n        factor = (1 << shift)\n        for mv in np.split(motion_vectors, num_mvs):\n            start_pt = (int((mv[0, 5] + mv[0, 7] / mv[0, 9]) * factor + 0.5), int((mv[0, 6] + mv[0, 8] / mv[0, 9]) * factor + 0.5))\n            end_pt = (mv[0, 5] * factor, mv[0, 6] * factor)\n            cv2.arrowedLine(frame, start_pt, end_pt, (0, 0, 255), 1, cv2.LINE_AA, shift, 0.1)\n    return frame\n\n\ndef main(args=None):\n    if args is None:\n        args = sys.argv[1:]\n\n    parser = argparse.ArgumentParser(description='Extract motion vectors from video.')\n    parser.add_argument('video_url', type=str, nargs='?', help='file path or url of the video stream')\n    parser.add_argument('-p', '--preview', action='store_true', help='show a preview video with overlaid motion vectors')\n    parser.add_argument('-v', '--verbose', action='store_true', help='show detailled text output')\n    parser.add_argument('-s', '--skip-decoding-frames', action='store_true', help='skip decoding RGB frames and return only motion vectors (faster)')\n    parser.add_argument('-d', '--dump', nargs='?', const=True,\n        help='dump frames, motion vectors and frame types to optionally specified output directory')\n    args = parser.parse_args()\n\n    if args.dump:\n        if isinstance(args.dump, str):\n            dumpdir = args.dump\n        else:\n            dumpdir = f\"out-{datetime.now().strftime('%Y-%m-%dT%H:%M:%S')}\"\n        for child in [\"frames\", \"motion_vectors\"]:\n            os.makedirs(os.path.join(dumpdir, child), exist_ok=True)\n\n    cap = VideoCap()\n\n    # open the video file\n    ret = cap.open(args.video_url)\n\n    if not ret:\n        raise RuntimeError(f\"Could not open {args.video_url}\")\n    \n    if args.verbose:\n        print(\"Sucessfully opened video file\")\n\n    if args.skip_decoding_frames:\n        cap.set_decode_frames(False)\n\n    step = 0\n    times = []\n\n    # continuously read and display video frames and motion vectors\n    while True:\n        if args.verbose:\n            print(\"Frame: \", step, end=\" \")\n\n        tstart = time.perf_counter()\n\n        # read next video frame and corresponding motion vectors\n        ret, frame, motion_vectors, frame_type = cap.read()\n\n        tend = time.perf_counter()\n        telapsed = tend - tstart\n        times.append(telapsed)\n\n        # if there is an error reading the frame\n        if not ret:\n            if args.verbose:\n                print(\"No frame read. Stopping.\")\n            break\n\n        # print results\n        if args.verbose:\n            print(\"frame type: {} | \".format(frame_type), end=\" \")\n            if frame is not None:\n                print(\"frame size: {} | \".format(np.shape(frame)), end=\" \")\n            else:\n                print(\"frame size: () | \", end=\" \")\n            print(\"motion vectors: {} | \".format(np.shape(motion_vectors)), end=\" \")\n            print(\"elapsed time: {} s\".format(telapsed))\n\n        # draw vectors on frames\n        if not args.skip_decoding_frames and frame is not None:\n            frame = draw_motion_vectors(frame, motion_vectors)\n\n        # store motion vectors, frames, and fraem types in output directory\n        if args.dump:\n            np.save(os.path.join(dumpdir, \"motion_vectors\", f\"mvs-{step}.npy\"), motion_vectors)\n            with open(os.path.join(dumpdir, \"frame_types.txt\"), \"a\") as f:\n                f.write(frame_type+\"\\n\")\n            if not args.skip_decoding_frames and frame is not None:\n                cv2.imwrite(os.path.join(dumpdir, \"frames\", f\"frame-{step}.jpg\"), frame)\n\n        step += 1\n\n        if args.preview and not args.skip_decoding_frames:\n            cv2.imshow(\"Frame\", frame)\n\n            # if user presses \"q\" key stop program\n            if cv2.waitKey(1) & 0xFF == ord('q'):\n                break\n    \n    if args.verbose:\n        print(\"average dt: \", np.mean(times))\n\n    cap.release()\n\n    # close the GUI window\n    if args.preview:\n        cv2.destroyAllWindows()\n\n\nif __name__ == \"__main__\":\n    sys.exit(main())\n"
  },
  {
    "path": "src/mvextractor/mat_to_ndarray.cpp",
    "content": "// Taken from OpenCV master commit e2a5a6a05c7ce64911e1e898e986abe8dd26cab6\n// File: opencv/modules/python/cv2.cpp\n\n#include \"mat_to_ndarray.hpp\"\n\nclass PyAllowThreads\n{\npublic:\n    PyAllowThreads() : _state(PyEval_SaveThread()) {}\n    ~PyAllowThreads()\n    {\n        PyEval_RestoreThread(_state);\n    }\nprivate:\n    PyThreadState* _state;\n};\n\nclass PyEnsureGIL\n{\npublic:\n    PyEnsureGIL() : _state(PyGILState_Ensure()) {}\n    ~PyEnsureGIL()\n    {\n        PyGILState_Release(_state);\n    }\nprivate:\n    PyGILState_STATE _state;\n};\n\n#define ERRWRAP2(expr) \\\ntry \\\n{ \\\n    PyAllowThreads allowThreads; \\\n    expr; \\\n} \\\ncatch (const cv::Exception &e) \\\n{ \\\n    PyObject_SetAttrString(opencv_error, \"file\", PyString_FromString(e.file.c_str())); \\\n    PyObject_SetAttrString(opencv_error, \"func\", PyString_FromString(e.func.c_str())); \\\n    PyObject_SetAttrString(opencv_error, \"line\", PyInt_FromLong(e.line)); \\\n    PyObject_SetAttrString(opencv_error, \"code\", PyInt_FromLong(e.code)); \\\n    PyObject_SetAttrString(opencv_error, \"msg\", PyString_FromString(e.msg.c_str())); \\\n    PyObject_SetAttrString(opencv_error, \"err\", PyString_FromString(e.err.c_str())); \\\n    PyErr_SetString(opencv_error, e.what()); \\\n    return 0; \\\n}\n\nusing namespace cv;\n\nclass NumpyAllocator : public MatAllocator\n{\npublic:\n    NumpyAllocator() { stdAllocator = Mat::getStdAllocator(); }\n    ~NumpyAllocator() {}\n\n    UMatData* allocate(PyObject* o, int dims, const int* sizes, int type, size_t* step) const\n    {\n        UMatData* u = new UMatData(this);\n        u->data = u->origdata = (uchar*)PyArray_DATA((PyArrayObject*) o);\n        npy_intp* _strides = PyArray_STRIDES((PyArrayObject*) o);\n        for( int i = 0; i < dims - 1; i++ )\n            step[i] = (size_t)_strides[i];\n        step[dims-1] = CV_ELEM_SIZE(type);\n        u->size = sizes[0]*step[0];\n        u->userdata = o;\n        return u;\n    }\n\n    UMatData* allocate(int dims0, const int* sizes, int type, void* data, size_t* step, AccessFlag flags, UMatUsageFlags usageFlags) const CV_OVERRIDE\n    {\n        if( data != 0 )\n        {\n            // issue #6969: CV_Error(Error::StsAssert, \"The data should normally be NULL!\");\n            // probably this is safe to do in such extreme case\n            return stdAllocator->allocate(dims0, sizes, type, data, step, flags, usageFlags);\n        }\n        PyEnsureGIL gil;\n\n        int depth = CV_MAT_DEPTH(type);\n        int cn = CV_MAT_CN(type);\n        const int f = (int)(sizeof(size_t)/8);\n        int typenum = depth == CV_8U ? NPY_UBYTE : depth == CV_8S ? NPY_BYTE :\n        depth == CV_16U ? NPY_USHORT : depth == CV_16S ? NPY_SHORT :\n        depth == CV_32S ? NPY_INT : depth == CV_32F ? NPY_FLOAT :\n        depth == CV_64F ? NPY_DOUBLE : f*NPY_ULONGLONG + (f^1)*NPY_UINT;\n        int i, dims = dims0;\n        cv::AutoBuffer<npy_intp> _sizes(dims + 1);\n        for( i = 0; i < dims; i++ )\n            _sizes[i] = sizes[i];\n        if( cn > 1 )\n            _sizes[dims++] = cn;\n        PyObject* o = PyArray_SimpleNew(dims, _sizes.data(), typenum);\n        if(!o)\n            CV_Error_(Error::StsError, (\"The numpy array of typenum=%d, ndims=%d can not be created\", typenum, dims));\n        return allocate(o, dims0, sizes, type, step);\n    }\n\n    bool allocate(UMatData* u, AccessFlag accessFlags, UMatUsageFlags usageFlags) const CV_OVERRIDE\n    {\n        return stdAllocator->allocate(u, accessFlags, usageFlags);\n    }\n\n    void deallocate(UMatData* u) const CV_OVERRIDE\n    {\n        if(!u)\n            return;\n        PyEnsureGIL gil;\n        CV_Assert(u->urefcount >= 0);\n        CV_Assert(u->refcount >= 0);\n        if(u->refcount == 0)\n        {\n            PyObject* o = (PyObject*)u->userdata;\n            Py_XDECREF(o);\n            delete u;\n        }\n    }\n\n    const MatAllocator* stdAllocator;\n};\n\nNumpyAllocator g_numpyAllocator;\n\nint* NDArrayConverter::init() { import_array(); return NULL; }\n\nNDArrayConverter::NDArrayConverter() { init(); }\n\nPyObject* NDArrayConverter::toNDArray(const cv::Mat& m) {\n    if( !m.data )\n        Py_RETURN_NONE;\n    Mat temp, *p = (Mat*)&m;\n    if(!p->u || p->allocator != &g_numpyAllocator)\n    {\n        temp.allocator = &g_numpyAllocator;\n        ERRWRAP2(m.copyTo(temp));\n        p = &temp;\n    }\n    PyObject* o = (PyObject*)p->u->userdata;\n    Py_INCREF(o);\n    return o;\n}\n"
  },
  {
    "path": "src/mvextractor/mat_to_ndarray.hpp",
    "content": "// Taken from OpenCV master commit e2a5a6a05c7ce64911e1e898e986abe8dd26cab6\n// File: opencv/modules/python/cv2.cpp\n\n#include <Python.h>\n#include <numpy/ndarrayobject.h>\n#include <opencv2/opencv.hpp>\n#include \"opencv2/core/core.hpp\"\n#include \"opencv2/core/types_c.h\"\n#include \"opencv2/opencv_modules.hpp\"\n#include \"pycompat.hpp\"\n#include <map>\n\nstatic PyObject* opencv_error = NULL;\n\nclass PyAllowThreads;\n\nclass PyEnsureGIL;\n\n#define ERRWRAP2(expr) \\\ntry \\\n{ \\\n    PyAllowThreads allowThreads; \\\n    expr; \\\n} \\\ncatch (const cv::Exception &e) \\\n{ \\\n    PyObject_SetAttrString(opencv_error, \"file\", PyString_FromString(e.file.c_str())); \\\n    PyObject_SetAttrString(opencv_error, \"func\", PyString_FromString(e.func.c_str())); \\\n    PyObject_SetAttrString(opencv_error, \"line\", PyInt_FromLong(e.line)); \\\n    PyObject_SetAttrString(opencv_error, \"code\", PyInt_FromLong(e.code)); \\\n    PyObject_SetAttrString(opencv_error, \"msg\", PyString_FromString(e.msg.c_str())); \\\n    PyObject_SetAttrString(opencv_error, \"err\", PyString_FromString(e.err.c_str())); \\\n    PyErr_SetString(opencv_error, e.what()); \\\n    return 0; \\\n}\n\nclass NumpyAllocator;\n\nenum { ARG_NONE = 0, ARG_MAT = 1, ARG_SCALAR = 2 };\n\nclass NDArrayConverter\n{\nprivate:\n    int* init();\npublic:\n    NDArrayConverter();\n    PyObject* toNDArray(const cv::Mat& m);\n};\n"
  },
  {
    "path": "src/mvextractor/py_video_cap.cpp",
    "content": "#define PY_SSIZE_T_CLEAN\n#include <Python.h>\n#include <numpy/arrayobject.h>\n#include <opencv2/core/core.hpp>\n\n#include \"video_cap.hpp\"\n#include \"mat_to_ndarray.hpp\"\n\ntypedef struct {\n    PyObject_HEAD\n    VideoCap vcap;\n} VideoCapObject;\n\n\nstatic int\nVideoCap_init(VideoCapObject *self, PyObject *args, PyObject *kwds)\n{\n    new(&self->vcap) VideoCap();\n    return 0;\n}\n\n\nstatic void\nVideoCap_dealloc(VideoCapObject *self)\n{\n    self->vcap.release();\n    Py_TYPE(self)->tp_free((PyObject *) self);\n}\n\n\nstatic PyObject *\nVideoCap_open(VideoCapObject *self, PyObject *args)\n{\n    const char *url;\n\n    if (!PyArg_ParseTuple(args, \"s\", &url))\n        Py_RETURN_FALSE;\n\n    if (!self->vcap.open(url))\n        Py_RETURN_FALSE;\n\n    Py_RETURN_TRUE;\n}\n\nstatic PyObject *\nVideoCap_grab(VideoCapObject *self, PyObject *Py_UNUSED(ignored))\n{\n    if (!self->vcap.grab())\n        Py_RETURN_FALSE;\n\n    Py_RETURN_TRUE;\n}\n\n\nstatic PyObject *\nVideoCap_retrieve(VideoCapObject *self, PyObject *Py_UNUSED(ignored))\n{\n    cv::Mat frame_cv;\n    uint8_t *frame = NULL;\n    int width = 0;\n    int height = 0;\n    int step = 0;\n    int cn = 0;\n\n    MVS_DTYPE *motion_vectors = NULL;\n    MVS_DTYPE num_mvs = 0;\n    char frame_type[2] = \"?\";\n\n    PyObject *ret = Py_True;\n\n    if (!self->vcap.retrieve(&frame, &step, &width, &height, &cn, frame_type, &motion_vectors, &num_mvs)) {\n        num_mvs = 0;\n        width = 0;\n        height = 0;\n        step = 0;\n        cn = 0;\n        ret = Py_False;\n    }\n\n    // copy frame buffer into new cv::Mat\n    PyObject* frame_nd = Py_None;\n    if (self->vcap.getDecodeFrames()) {\n        cv::Mat(height, width, CV_MAKETYPE(CV_8U, cn), frame, step).copyTo(frame_cv);\n\n        // convert frame cv::Mat to numpy.ndarray\n        NDArrayConverter cvt;\n        frame_nd = cvt.toNDArray(frame_cv);\n    } else {\n        Py_INCREF(Py_None);\n    }\n\n    // convert motion vector buffer into numpy array\n    npy_intp dims_mvs[2] = {(npy_intp)num_mvs, 10};\n    PyObject *motion_vectors_nd = PyArray_SimpleNewFromData(2, dims_mvs, MVS_DTYPE_NP, motion_vectors);\n    PyArray_ENABLEFLAGS((PyArrayObject*)motion_vectors_nd, NPY_ARRAY_OWNDATA);\n\n    return Py_BuildValue(\"(ONNs)\", ret, frame_nd, motion_vectors_nd, (const char*)frame_type);\n}\n\n\nstatic PyObject *\nVideoCap_read(VideoCapObject *self, PyObject *Py_UNUSED(ignored))\n{\n    cv::Mat frame_cv;\n    uint8_t *frame = NULL;\n    int width = 0;\n    int height = 0;\n    int step = 0;\n    int cn = 0;\n\n    MVS_DTYPE *motion_vectors = NULL;\n    MVS_DTYPE num_mvs = 0;\n    char frame_type[2] = \"?\";\n\n    PyObject *ret = Py_True;\n\n    if (!self->vcap.read(&frame, &step, &width, &height, &cn, frame_type, &motion_vectors, &num_mvs)) {\n        num_mvs = 0;\n        width = 0;\n        height = 0;\n        step = 0;\n        cn = 0;\n        ret = Py_False;\n    }\n\n    PyObject* frame_nd = Py_None;\n    if (self->vcap.getDecodeFrames()) {\n        cv::Mat(height, width, CV_MAKETYPE(CV_8U, cn), frame, step).copyTo(frame_cv);\n\n        // convert frame cv::Mat to numpy.ndarray\n        NDArrayConverter cvt;\n        frame_nd = cvt.toNDArray(frame_cv);\n    } else {\n        Py_INCREF(Py_None);\n    }\n\n    // convert motion vector buffer into numpy array\n    npy_intp dims_mvs[2] = {(npy_intp)num_mvs, 10};\n    PyObject *motion_vectors_nd = PyArray_SimpleNewFromData(2, dims_mvs, MVS_DTYPE_NP, motion_vectors);\n    PyArray_ENABLEFLAGS((PyArrayObject*)motion_vectors_nd, NPY_ARRAY_OWNDATA);\n\n    return Py_BuildValue(\"(ONNs)\", ret, frame_nd, motion_vectors_nd, (const char*)frame_type);\n}\n\n\nstatic PyObject *\nVideoCap_release(VideoCapObject *self, PyObject *Py_UNUSED(ignored))\n{\n    self->vcap.release();\n    Py_RETURN_NONE;\n}\n\n\nstatic PyObject *\nVideoCap_set_decode_frames(VideoCapObject *self, PyObject *args)\n{\n    int enable = 0;\n    if (!PyArg_ParseTuple(args, \"p\", &enable))\n        Py_RETURN_NONE;\n\n    self->vcap.setDecodeFrames(enable != 0);\n    Py_RETURN_NONE;\n}\n\n\nstatic PyObject *\nVideoCap_get_decode_frames(VideoCapObject *self, PyObject *Py_UNUSED(ignored))\n{\n    if (self->vcap.getDecodeFrames())\n        Py_RETURN_TRUE;\n    else\n        Py_RETURN_FALSE;\n}\n\n\nstatic PyMethodDef VideoCap_methods[] = {\n    {\"open\", (PyCFunction) VideoCap_open, METH_VARARGS, \"Open a video file or device with given filename/url\"},\n    {\"read\", (PyCFunction) VideoCap_read, METH_NOARGS, \"Grab and decode the next frame and motion vectors\"},\n    {\"grab\", (PyCFunction) VideoCap_grab, METH_NOARGS, \"Grab the next frame and motion vectors from the stream\"},\n    {\"retrieve\", (PyCFunction) VideoCap_retrieve, METH_NOARGS, \"Decode the grabbed frame and motion vectors\"},\n    {\"release\", (PyCFunction) VideoCap_release, METH_NOARGS, \"Release the video device and free ressources\"},\n    {\"set_decode_frames\", (PyCFunction) VideoCap_set_decode_frames, METH_VARARGS, \"Enable/disable decoding of RGB frames\"},\n    {NULL}  /* Sentinel */\n};\n\n\nstatic PyGetSetDef VideoCap_getset[] = {\n    {\"decode_frames\", (getter)VideoCap_get_decode_frames, NULL,\n     \"Whether RGB frames are decoded (True) or only motion vectors (False)\", NULL},\n    {NULL}  // Sentinel\n};\n\n\nstatic PyTypeObject VideoCapType = {\n    PyVarObject_HEAD_INIT(NULL, 0)\n    .tp_name = \"videocap.VideoCap\",\n    .tp_basicsize = sizeof(VideoCapObject),\n    .tp_itemsize = 0,\n    .tp_dealloc = (destructor) VideoCap_dealloc,\n    .tp_vectorcall_offset = NULL,\n    .tp_getattr = NULL,\n    .tp_setattr = NULL,\n    .tp_as_async = NULL,\n    .tp_repr = NULL,\n    .tp_as_number = NULL,\n    .tp_as_sequence = NULL,\n    .tp_as_mapping = NULL,\n    .tp_hash = NULL,\n    .tp_call = NULL,\n    .tp_str = NULL,\n    .tp_getattro = NULL,\n    .tp_setattro = NULL,\n    .tp_as_buffer = NULL,\n    .tp_flags = Py_TPFLAGS_DEFAULT,\n    .tp_doc = \"Video Capture Object\",\n    .tp_traverse = NULL,\n    .tp_clear = NULL,\n    .tp_richcompare = NULL,\n    .tp_weaklistoffset = 0,\n    .tp_iter = NULL,\n    .tp_iternext = NULL,\n    .tp_methods = VideoCap_methods,\n    .tp_members = NULL,\n    .tp_getset = VideoCap_getset,\n    .tp_base = NULL,\n    .tp_dict = NULL,\n    .tp_descr_get = NULL,\n    .tp_descr_set = NULL,\n    .tp_dictoffset = 0,\n    .tp_init = (initproc) VideoCap_init,\n    .tp_alloc = NULL,\n    .tp_new = PyType_GenericNew,\n    .tp_free = NULL,\n    .tp_is_gc = NULL,\n    .tp_bases = NULL,\n    .tp_mro = NULL,\n    .tp_cache = NULL,\n    .tp_subclasses = NULL,\n    .tp_weaklist = NULL,\n    .tp_del = NULL,\n    .tp_version_tag = 0,\n    .tp_finalize  = NULL,\n};\n\n\nstatic PyModuleDef videocapmodule = {\n    PyModuleDef_HEAD_INIT,\n    .m_name = \"videocap\",\n    .m_doc = \"Capture video frames and motion vectors from a H264 encoded stream.\",\n    .m_size = -1,\n};\n\n\nPyMODINIT_FUNC\nPyInit_videocap(void)\n{\n    Py_Initialize();  // maybe not needed\n    import_array();\n\n    PyObject *m;\n    if (PyType_Ready(&VideoCapType) < 0)\n        return NULL;\n\n    m = PyModule_Create(&videocapmodule);\n    if (m == NULL)\n        return NULL;\n\n    Py_INCREF(&VideoCapType);\n    PyModule_AddObject(m, \"VideoCap\", (PyObject *) &VideoCapType);\n    return m;\n}\n"
  },
  {
    "path": "src/mvextractor/pycompat.hpp",
    "content": "// Taken from OpenCV master commit e2a5a6a05c7ce64911e1e898e986abe8dd26cab6\n// File: opencv/modules/python/pycompat.hpp.cpp\n\n/*M///////////////////////////////////////////////////////////////////////////////////////\n//\n//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\n//\n//  By downloading, copying, installing or using the software you agree to this license.\n//  If you do not agree to this license, do not download, install,\n//  copy or use the software.\n//\n//\n//                           License Agreement\n//                For Open Source Computer Vision Library\n//\n// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.\n// Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.\n// Third party copyrights are property of their respective owners.\n//\n// Redistribution and use in source and binary forms, with or without modification,\n// are permitted provided that the following conditions are met:\n//\n//   * Redistribution's of source code must retain the above copyright notice,\n//     this list of conditions and the following disclaimer.\n//\n//   * Redistribution's in binary form must reproduce the above copyright notice,\n//     this list of conditions and the following disclaimer in the documentation\n//     and/or other materials provided with the distribution.\n//\n//   * The name of the copyright holders may not be used to endorse or promote products\n//     derived from this software without specific prior written permission.\n//\n// This software is provided by the copyright holders and contributors \"as is\" and\n// any express or implied warranties, including, but not limited to, the implied\n// warranties of merchantability and fitness for a particular purpose are disclaimed.\n// In no event shall the Intel Corporation or contributors be liable for any direct,\n// indirect, incidental, special, exemplary, or consequential damages\n// (including, but not limited to, procurement of substitute goods or services;\n// loss of use, data, or profits; or business interruption) however caused\n// and on any theory of liability, whether in contract, strict liability,\n// or tort (including negligence or otherwise) arising in any way out of\n// the use of this software, even if advised of the possibility of such damage.\n//\n//M*/\n\n// Defines for Python 2/3 compatibility.\n#ifndef __PYCOMPAT_HPP__\n#define __PYCOMPAT_HPP__\n\n#if PY_MAJOR_VERSION >= 3\n\n// Python3 treats all ints as longs, PyInt_X functions have been removed.\n#define PyInt_Check PyLong_Check\n#define PyInt_CheckExact PyLong_CheckExact\n#define PyInt_AsLong PyLong_AsLong\n#define PyInt_AS_LONG PyLong_AS_LONG\n#define PyInt_FromLong PyLong_FromLong\n#define PyNumber_Int PyNumber_Long\n\n\n#define PyString_FromString PyUnicode_FromString\n#define PyString_FromStringAndSize PyUnicode_FromStringAndSize\n\n#endif // PY_MAJOR >=3\n\nstatic inline bool getUnicodeString(PyObject * obj, std::string &str)\n{\n    bool res = false;\n    if (PyUnicode_Check(obj))\n    {\n        PyObject * bytes = PyUnicode_AsUTF8String(obj);\n        if (PyBytes_Check(bytes))\n        {\n            const char * raw = PyBytes_AsString(bytes);\n            if (raw)\n            {\n                str = std::string(raw);\n                res = true;\n            }\n        }\n        Py_XDECREF(bytes);\n    }\n#if PY_MAJOR_VERSION < 3\n    else if (PyString_Check(obj))\n    {\n        const char * raw = PyString_AsString(obj);\n        if (raw)\n        {\n            str = std::string(raw);\n            res = true;\n        }\n    }\n#endif\n    return res;\n}\n\n//==================================================================================================\n\n#define CV_PY_FN_WITH_KW_(fn, flags) (PyCFunction)(void*)(PyCFunctionWithKeywords)(fn), (flags) | METH_VARARGS | METH_KEYWORDS\n#define CV_PY_FN_NOARGS_(fn, flags) (PyCFunction)(fn), (flags) | METH_NOARGS\n\n#define CV_PY_FN_WITH_KW(fn) CV_PY_FN_WITH_KW_(fn, 0)\n#define CV_PY_FN_NOARGS(fn) CV_PY_FN_NOARGS_(fn, 0)\n\n#define CV_PY_TO_CLASS(TYPE)                                                                          \\\ntemplate<>                                                                                            \\\nbool pyopencv_to(PyObject* dst, TYPE& src, const char* name)                                          \\\n{                                                                                                     \\\n    if (!dst || dst == Py_None)                                                                       \\\n        return true;                                                                                  \\\n    Ptr<TYPE> ptr;                                                                                    \\\n                                                                                                      \\\n    if (!pyopencv_to(dst, ptr, name)) return false;                                                   \\\n    src = *ptr;                                                                                       \\\n    return true;                                                                                      \\\n}\n\n#define CV_PY_FROM_CLASS(TYPE)                                                                        \\\ntemplate<>                                                                                            \\\nPyObject* pyopencv_from(const TYPE& src)                                                              \\\n{                                                                                                     \\\n    Ptr<TYPE> ptr(new TYPE());                                                                        \\\n                                                                                                      \\\n    *ptr = src;                                                                                       \\\n    return pyopencv_from(ptr);                                                                        \\\n}\n\n#define CV_PY_TO_CLASS_PTR(TYPE)                                                                      \\\ntemplate<>                                                                                            \\\nbool pyopencv_to(PyObject* dst, TYPE*& src, const char* name)                                         \\\n{                                                                                                     \\\n    if (!dst || dst == Py_None)                                                                       \\\n        return true;                                                                                  \\\n    Ptr<TYPE> ptr;                                                                                    \\\n                                                                                                      \\\n    if (!pyopencv_to(dst, ptr, name)) return false;                                                   \\\n    src = ptr;                                                                                        \\\n    return true;                                                                                      \\\n}\n\n#define CV_PY_FROM_CLASS_PTR(TYPE)                                                                    \\\nstatic PyObject* pyopencv_from(TYPE*& src)                                                            \\\n{                                                                                                     \\\n    return pyopencv_from(Ptr<TYPE>(src));                                                             \\\n}\n\n#define CV_PY_TO_ENUM(TYPE)                                                                           \\\ntemplate<>                                                                                            \\\nbool pyopencv_to(PyObject* dst, TYPE& src, const char* name)                                          \\\n{                                                                                                     \\\n    if (!dst || dst == Py_None)                                                                       \\\n        return true;                                                                                  \\\n    int underlying = 0;                                                  \\\n                                                                                                      \\\n    if (!pyopencv_to(dst, underlying, name)) return false;                                            \\\n    src = static_cast<TYPE>(underlying);                                                              \\\n    return true;                                                                                      \\\n}\n\n#define CV_PY_FROM_ENUM(TYPE)                                                                         \\\ntemplate<>                                                                                            \\\nPyObject* pyopencv_from(const TYPE& src)                                                              \\\n{                                                                                                     \\\n    return pyopencv_from(static_cast<int>(src));                         \\\n}\n\n//==================================================================================================\n\n#if PY_MAJOR_VERSION >= 3\n#define CVPY_TYPE_HEAD PyVarObject_HEAD_INIT(&PyType_Type, 0)\n#define CVPY_TYPE_INCREF(T) Py_INCREF(T)\n#else\n#define CVPY_TYPE_HEAD PyObject_HEAD_INIT(&PyType_Type) 0,\n#define CVPY_TYPE_INCREF(T) _Py_INC_REFTOTAL _Py_REF_DEBUG_COMMA (T)->ob_refcnt++\n#endif\n\n\n#define CVPY_TYPE_DECLARE(NAME, STORAGE, SNAME) \\\n    struct pyopencv_##NAME##_t \\\n    { \\\n        PyObject_HEAD \\\n        STORAGE v; \\\n    }; \\\n    static PyTypeObject pyopencv_##NAME##_TypeXXX = \\\n    { \\\n        CVPY_TYPE_HEAD \\\n        MODULESTR\".\"#NAME, \\\n        sizeof(pyopencv_##NAME##_t), \\\n    }; \\\n    static PyTypeObject * pyopencv_##NAME##_TypePtr = &pyopencv_##NAME##_TypeXXX; \\\n    static bool pyopencv_##NAME##_getp(PyObject * self, STORAGE * & dst) \\\n    { \\\n        if (PyObject_TypeCheck(self, pyopencv_##NAME##_TypePtr)) \\\n        { \\\n            dst = &(((pyopencv_##NAME##_t*)self)->v); \\\n            return true; \\\n        } \\\n        return false; \\\n    } \\\n    static PyObject * pyopencv_##NAME##_Instance(const STORAGE &r) \\\n    { \\\n        pyopencv_##NAME##_t *m = PyObject_NEW(pyopencv_##NAME##_t, pyopencv_##NAME##_TypePtr); \\\n        new (&(m->v)) STORAGE(r); \\\n        return (PyObject*)m; \\\n    } \\\n    static void pyopencv_##NAME##_dealloc(PyObject* self) \\\n    { \\\n        ((pyopencv_##NAME##_t*)self)->v.STORAGE::~SNAME(); \\\n        PyObject_Del(self); \\\n    } \\\n    static PyObject* pyopencv_##NAME##_repr(PyObject* self) \\\n    { \\\n        char str[1000]; \\\n        sprintf(str, \"<\"#NAME\" %p>\", self); \\\n        return PyString_FromString(str); \\\n    }\n\n\n#define CVPY_TYPE_INIT_STATIC(NAME, ERROR_HANDLER, BASE, CONSTRUCTOR) \\\n    { \\\n        pyopencv_##NAME##_TypePtr->tp_base = pyopencv_##BASE##_TypePtr; \\\n        pyopencv_##NAME##_TypePtr->tp_dealloc = pyopencv_##NAME##_dealloc; \\\n        pyopencv_##NAME##_TypePtr->tp_repr = pyopencv_##NAME##_repr; \\\n        pyopencv_##NAME##_TypePtr->tp_getset = pyopencv_##NAME##_getseters; \\\n        pyopencv_##NAME##_TypePtr->tp_init = (initproc) CONSTRUCTOR; \\\n        pyopencv_##NAME##_TypePtr->tp_methods = pyopencv_##NAME##_methods; \\\n        pyopencv_##NAME##_TypePtr->tp_alloc = PyType_GenericAlloc; \\\n        pyopencv_##NAME##_TypePtr->tp_new = PyType_GenericNew; \\\n        pyopencv_##NAME##_TypePtr->tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE; \\\n        if (PyType_Ready(pyopencv_##NAME##_TypePtr) != 0) \\\n        { \\\n            ERROR_HANDLER; \\\n        } \\\n        CVPY_TYPE_INCREF(pyopencv_##NAME##_TypePtr); \\\n        PyModule_AddObject(m, #NAME, (PyObject *)pyopencv_##NAME##_TypePtr); \\\n    }\n\n//==================================================================================================\n\n#define CVPY_TYPE_DECLARE_DYNAMIC(NAME, STORAGE, SNAME) \\\n    struct pyopencv_##NAME##_t \\\n    { \\\n        PyObject_HEAD \\\n        STORAGE v; \\\n    }; \\\n    static PyObject * pyopencv_##NAME##_TypePtr = 0; \\\n    static bool pyopencv_##NAME##_getp(PyObject * self, STORAGE * & dst) \\\n    { \\\n        if (PyObject_TypeCheck(self, (PyTypeObject*)pyopencv_##NAME##_TypePtr)) \\\n        { \\\n            dst = &(((pyopencv_##NAME##_t*)self)->v); \\\n            return true; \\\n        } \\\n        return false; \\\n    } \\\n    static PyObject * pyopencv_##NAME##_Instance(const STORAGE &r) \\\n    { \\\n        pyopencv_##NAME##_t *m = PyObject_New(pyopencv_##NAME##_t, (PyTypeObject*)pyopencv_##NAME##_TypePtr); \\\n        new (&(m->v)) STORAGE(r); \\\n        return (PyObject*)m; \\\n    } \\\n    static void pyopencv_##NAME##_dealloc(PyObject* self) \\\n    { \\\n        ((pyopencv_##NAME##_t*)self)->v.STORAGE::~SNAME(); \\\n        PyObject_Del(self); \\\n    } \\\n    static PyObject* pyopencv_##NAME##_repr(PyObject* self) \\\n    { \\\n        char str[1000]; \\\n        sprintf(str, \"<\"#NAME\" %p>\", self); \\\n        return PyString_FromString(str); \\\n    } \\\n    static PyType_Slot pyopencv_##NAME##_Slots[] =  \\\n    { \\\n        {Py_tp_dealloc, 0}, \\\n        {Py_tp_repr, 0}, \\\n        {Py_tp_getset, 0}, \\\n        {Py_tp_init, 0}, \\\n        {Py_tp_methods, 0}, \\\n        {Py_tp_alloc, 0}, \\\n        {Py_tp_new, 0}, \\\n        {0, 0} \\\n    }; \\\n    static PyType_Spec pyopencv_##NAME##_Spec = \\\n    { \\\n        MODULESTR\".\"#NAME, \\\n        sizeof(pyopencv_##NAME##_t), \\\n        0, \\\n        Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, \\\n        pyopencv_##NAME##_Slots  \\\n    };\n\n#define CVPY_TYPE_INIT_DYNAMIC(NAME, ERROR_HANDLER, BASE, CONSTRUCTOR) \\\n    { \\\n        pyopencv_##NAME##_Slots[0].pfunc /*tp_dealloc*/ = (void*)pyopencv_##NAME##_dealloc; \\\n        pyopencv_##NAME##_Slots[1].pfunc /*tp_repr*/ = (void*)pyopencv_##NAME##_repr; \\\n        pyopencv_##NAME##_Slots[2].pfunc /*tp_getset*/ = (void*)pyopencv_##NAME##_getseters; \\\n        pyopencv_##NAME##_Slots[3].pfunc /*tp_init*/ = (void*) CONSTRUCTOR; \\\n        pyopencv_##NAME##_Slots[4].pfunc /*tp_methods*/ = pyopencv_##NAME##_methods; \\\n        pyopencv_##NAME##_Slots[5].pfunc /*tp_alloc*/ = (void*)PyType_GenericAlloc; \\\n        pyopencv_##NAME##_Slots[6].pfunc /*tp_new*/ = (void*)PyType_GenericNew; \\\n        PyObject * bases = 0; \\\n        if (pyopencv_##BASE##_TypePtr) \\\n            bases = PyTuple_Pack(1, pyopencv_##BASE##_TypePtr); \\\n        pyopencv_##NAME##_TypePtr = PyType_FromSpecWithBases(&pyopencv_##NAME##_Spec, bases); \\\n        if (!pyopencv_##NAME##_TypePtr) \\\n        { \\\n            printf(\"Failed to init: \" #NAME \", base (\" #BASE \")\" \"\\n\"); \\\n            ERROR_HANDLER; \\\n        } \\\n        PyModule_AddObject(m, #NAME, (PyObject *)pyopencv_##NAME##_TypePtr); \\\n    }\n\n// Debug module load:\n//\n// else \\\n// { \\\n//     printf(\"Init: \" #NAME \", base (\" #BASE \") -> %p\" \"\\n\", pyopencv_##NAME##_TypePtr); \\\n// } \\\n\n\n#endif // END HEADER GUARD\n"
  },
  {
    "path": "src/mvextractor/video_cap.cpp",
    "content": "#include \"video_cap.hpp\"\n\n\nVideoCap::VideoCap() {\n    this->opts = NULL;\n    this->codec = NULL;\n    this->fmt_ctx = NULL;\n    this->video_dec_ctx = NULL;\n    this->video_stream = NULL;\n    this->video_stream_idx = -1;\n    this->frame = NULL;\n    this->img_convert_ctx = NULL;\n    this->frame_number = 0;\n    this->decode_frames = true;\n\n    memset(&(this->rgb_frame), 0, sizeof(this->rgb_frame));\n    memset(&(this->picture), 0, sizeof(this->picture));\n    memset(&(this->packet), 0, sizeof(this->packet));\n    av_init_packet(&(this->packet));\n}\n\n\nvoid VideoCap::release(void) {\n    if (this->img_convert_ctx != NULL) {\n        sws_freeContext(this->img_convert_ctx);\n        this->img_convert_ctx = NULL;\n    }\n\n    if (this->frame != NULL) {\n        av_frame_free(&(this->frame));\n        this->frame = NULL;\n    }\n\n    av_frame_unref(&(this->rgb_frame));\n    memset(&(this->rgb_frame), 0, sizeof(this->rgb_frame));\n    memset(&(this->picture), 0, sizeof(this->picture));\n\n    if (this->video_dec_ctx != NULL) {\n        avcodec_free_context(&(this->video_dec_ctx));\n        this->video_dec_ctx = NULL;\n    }\n\n    if (this->fmt_ctx != NULL) {\n        avformat_close_input(&(this->fmt_ctx));\n        this->fmt_ctx = NULL;\n    }\n\n    if (this->opts != NULL) {\n        av_dict_free(&(this->opts));\n        this->opts = NULL;\n    }\n\n    if (this->packet.data) {\n        av_packet_unref(&(this->packet));\n        this->packet.data = NULL;\n    }\n    memset(&packet, 0, sizeof(packet));\n    av_init_packet(&packet);\n\n    this->codec = NULL;\n    this->video_stream = NULL;\n    this->video_stream_idx = -1;\n    this->frame_number = 0;\n    this->decode_frames = true;\n}\n\n\nbool VideoCap::open(const char *url) {\n\n    bool valid = false;\n    AVStream *st = NULL;\n    int enc_width, enc_height, idx;\n\n    this->release();\n\n    // if another file is already opened\n    if (this->fmt_ctx != NULL)\n        goto error;\n\n    this->url = url;\n\n    // open RTSP stream with TCP\n    av_dict_set(&(this->opts), \"rtsp_transport\", \"tcp\", 0);\n    av_dict_set(&(this->opts), \"stimeout\", \"5000000\", 0); // set timeout to 5 seconds\n    if (avformat_open_input(&(this->fmt_ctx), url, NULL, &(this->opts)) < 0)\n        goto error;\n\n    // read packets of a media file to get stream information.\n    if (avformat_find_stream_info(this->fmt_ctx, NULL) < 0)\n        goto error;\n\n    // find the most suitable stream of given type (e.g. video) and set the codec accordingly\n    idx = av_find_best_stream(this->fmt_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, &(this->codec), 0);\n    if (idx < 0)\n        goto error;\n\n    // set stream in format context\n    this->video_stream_idx = idx;\n    st = this->fmt_ctx->streams[this->video_stream_idx];\n\n    // allocate an AVCodecContext and set its fields to default values\n    this->video_dec_ctx = avcodec_alloc_context3(this->codec);\n    if (!this->video_dec_ctx)\n        goto error;\n\n    // fill the codec context based on the values from the supplied codec parameters\n    if (avcodec_parameters_to_context(this->video_dec_ctx, st->codecpar) < 0)\n        goto error;\n\n    // ffmpeg recommends no more than 16 threads\n    this->video_dec_ctx->thread_count = std::min(std::thread::hardware_concurrency(), 16u);\n#ifdef DEBUG\n    std::cerr << \"Using parallel processing with \" << this->video_dec_ctx->thread_count << \" threads\" << std::endl;\n#endif\n\n    // backup encoder's width/height\n    enc_width = this->video_dec_ctx->width;\n    enc_height = this->video_dec_ctx->height;\n\n    // Init the video decoder with the codec and set additional option to extract motion vectors\n    av_dict_set(&(this->opts), \"flags2\", \"+export_mvs\", 0);\n    if (avcodec_open2(this->video_dec_ctx, this->codec, &(this->opts)) < 0)\n        goto error;\n\n    this->video_stream = this->fmt_ctx->streams[this->video_stream_idx];\n\n    // checking width/height (since decoder can sometimes alter it, eg. vp6f)\n    if (enc_width && (this->video_dec_ctx->width != enc_width))\n        this->video_dec_ctx->width = enc_width;\n    if (enc_height && (this->video_dec_ctx->height != enc_height))\n        this->video_dec_ctx->height = enc_height;\n\n    this->picture.width = this->video_dec_ctx->width;\n    this->picture.height = this->video_dec_ctx->height;\n    this->picture.data = NULL;\n\n    // print info (duration, bitrate, streams, container, programs, metadata, side data, codec, time base)\n#ifdef DEBUG\n    av_dump_format(this->fmt_ctx, 0, url, 0);\n#endif\n\n    this->frame = av_frame_alloc();\n    if (!this->frame)\n        goto error;\n\n    // default: decode frames\n    this->decode_frames = true;\n\n    if (this->video_stream_idx >= 0)\n        valid = true;\n\nerror:\n\n    if (!valid)\n        this->release();\n\n    return valid;\n}\n\nvoid VideoCap::setDecodeFrames(bool enable) {\n    this->decode_frames = enable;\n}\n\nbool VideoCap::getDecodeFrames() {\n    return this->decode_frames;\n}\n\n\nbool VideoCap::grab(void) {\n\n    bool valid = false;\n    int got_frame;\n\n    int count_errs = 0;\n    const int max_number_of_attempts = 512;\n\n    // make sure file is opened\n    if (!this->fmt_ctx || !this->video_stream)\n        return false;\n\n    // check if there is a frame left in the stream\n    if (this->fmt_ctx->streams[this->video_stream_idx]->nb_frames > 0 &&\n        this->frame_number > this->fmt_ctx->streams[this->video_stream_idx]->nb_frames)\n        return false;\n\n    // loop over different streams (video, audio) in the file\n    while(!valid) {\n        av_packet_unref(&(this->packet));\n\n        // read next packet from the stream\n        int ret = av_read_frame(this->fmt_ctx, &(this->packet));\n\n        if (ret == AVERROR(EAGAIN))\n            continue;\n\n        // if the packet is not from the video stream don't do anything and get next packet\n        if (this->packet.stream_index != this->video_stream_idx) {\n            av_packet_unref(&(this->packet));\n            count_errs++;\n            if (count_errs > max_number_of_attempts)\n                break;\n            continue;\n        }\n\n        // decode the video frame\n        avcodec_decode_video2(this->video_dec_ctx, this->frame, &got_frame, &(this->packet));\n\n        if(got_frame) {\n            this->frame_number++;\n            valid = true;\n        }\n        else {\n            count_errs++;\n            if (count_errs > max_number_of_attempts)\n                break;\n        }\n\n    }\n\n    return valid;\n}\n\n\nbool VideoCap::retrieve(uint8_t **frame, int *step, int *width, int *height, int *cn, char *frame_type, MVS_DTYPE **motion_vectors, MVS_DTYPE *num_mvs) {\n\n    if (!this->video_stream || !(this->frame->data[0]))\n        return false;\n\n    // perform color conversion and return frame buffer\n    if (this->decode_frames) {\n\n        if (this->img_convert_ctx == NULL ||\n            this->picture.width != this->video_dec_ctx->width ||\n            this->picture.height != this->video_dec_ctx->height ||\n            this->picture.data == NULL) {\n\n            // Some sws_scale optimizations have some assumptions about alignment of data/step/width/height\n            // Also we use coded_width/height to workaround problem with legacy ffmpeg versions (like n0.8)\n            int buffer_width = this->video_dec_ctx->coded_width;\n            int buffer_height = this->video_dec_ctx->coded_height;\n\n            this->img_convert_ctx = sws_getCachedContext(\n                    this->img_convert_ctx,\n                    buffer_width, buffer_height,\n                    this->video_dec_ctx->pix_fmt,\n                    buffer_width, buffer_height,\n                    AV_PIX_FMT_BGR24,\n                    SWS_BICUBIC,\n                    NULL, NULL, NULL\n                    );\n\n            if (this->img_convert_ctx == NULL)\n                return false;\n\n            av_frame_unref(&(this->rgb_frame));\n            this->rgb_frame.format = AV_PIX_FMT_BGR24;\n            this->rgb_frame.width = buffer_width;\n            this->rgb_frame.height = buffer_height;\n            if (0 != av_frame_get_buffer(&(this->rgb_frame), 32))\n                return false;\n\n            this->picture.width = this->video_dec_ctx->width;\n            this->picture.height = this->video_dec_ctx->height;\n            this->picture.data = this->rgb_frame.data[0];\n            this->picture.step = this->rgb_frame.linesize[0];\n            this->picture.cn = 3;\n        }\n\n        // change color space of frame\n        sws_scale(\n            this->img_convert_ctx,\n            this->frame->data,\n            this->frame->linesize,\n            0, this->video_dec_ctx->coded_height,\n            this->rgb_frame.data,\n            this->rgb_frame.linesize\n            );\n\n        *frame = this->picture.data;\n        *width = this->picture.width;\n        *height = this->picture.height;\n        *step = this->picture.step;\n        *cn = this->picture.cn;\n\n    } else {\n        // when not decoding frames, don't allocate or return frame buffer\n        *frame = NULL;\n        *width = 0;\n        *height = 0;\n        *step = 0;\n        *cn = 0;\n    }\n\n    // get motion vectors\n    AVFrameSideData *sd = av_frame_get_side_data(this->frame, AV_FRAME_DATA_MOTION_VECTORS);\n    if (sd) {\n        AVMotionVector *mvs = (AVMotionVector *)sd->data;\n\n        *num_mvs = sd->size / sizeof(*mvs);\n\n        if (*num_mvs > 0) {\n\n            // allocate memory for motion vectors as 1D array\n            if (!(*motion_vectors = (MVS_DTYPE *) malloc(*num_mvs * 10 * sizeof(MVS_DTYPE))))\n                return false;\n\n            // store the motion vectors in the allocated memory (C contiguous)\n            for (MVS_DTYPE i = 0; i < *num_mvs; ++i) {\n                *(*motion_vectors + i*10     ) = static_cast<MVS_DTYPE>(mvs[i].source);\n                *(*motion_vectors + i*10 +  1) = static_cast<MVS_DTYPE>(mvs[i].w);\n                *(*motion_vectors + i*10 +  2) = static_cast<MVS_DTYPE>(mvs[i].h);\n                *(*motion_vectors + i*10 +  3) = static_cast<MVS_DTYPE>(mvs[i].src_x);\n                *(*motion_vectors + i*10 +  4) = static_cast<MVS_DTYPE>(mvs[i].src_y);\n                *(*motion_vectors + i*10 +  5) = static_cast<MVS_DTYPE>(mvs[i].dst_x);\n                *(*motion_vectors + i*10 +  6) = static_cast<MVS_DTYPE>(mvs[i].dst_y);\n                *(*motion_vectors + i*10 +  7) = static_cast<MVS_DTYPE>(mvs[i].motion_x);\n                *(*motion_vectors + i*10 +  8) = static_cast<MVS_DTYPE>(mvs[i].motion_y);\n                *(*motion_vectors + i*10 +  9) = static_cast<MVS_DTYPE>(mvs[i].motion_scale);\n                //*(*motion_vectors + i*11 + 10) = static_cast<MVS_DTYPE>(mvs[i].flags);\n            }\n        }\n    }\n\n    // get frame type (I, P, B, etc.) and create a null terminated c-string\n    frame_type[0] = av_get_picture_type_char(this->frame->pict_type);\n    frame_type[1] = '\\0';\n\n    return true;\n}\n\n\nbool VideoCap::read(uint8_t **frame, int *step, int *width, int *height, int *cn, char *frame_type, MVS_DTYPE **motion_vectors, MVS_DTYPE *num_mvs) {\n    bool ret = this->grab();\n    if (ret)\n        ret = this->retrieve(frame, step, width, height, cn, frame_type, motion_vectors, num_mvs);\n    return ret;\n}\n"
  },
  {
    "path": "src/mvextractor/video_cap.hpp",
    "content": "#include <thread>\n#include <iostream>\n#include <cstdint>\n#include <chrono>\n#include <ctime>\n#include <math.h>\n\n// FFMPEG\nextern \"C\" {\n#include <libavutil/motion_vector.h>\n#include <libavformat/avformat.h>\n#include <libswscale/swscale.h>\n}\n\n\n// for changing the dtype of motion vector\n#define MVS_DTYPE int32_t\n#define MVS_DTYPE_NP NPY_INT32\n\n// whether or not to print some debug info\n//#define DEBUG\n\n\nstruct Image_FFMPEG\n{\n    unsigned char* data;\n    int width;\n    int height;\n    int step;\n    int cn;\n};\n\n\n/**\n* Decode frames and motion vectors from a H264 encoded video file or RTSP stream.\n*\n* Implements a VideoCap object similar to OpenCV's VideoCapture. For details\n* see (https://docs.opencv.org/4.1.0/d8/dfe/classcv_1_1VideoCapture.html).\n* The class is intended to open a H264 encoded video file or RTSP stream by\n* providing the according file path or stream url to the `open` method.\n* Upon sucessful opening of the stream, the `read` method can be called in\n* a loop each time yielding the next decoded frame of the stream as well as\n* frame side data, such as motion vectors (as specified per H264 standard).\n* Instead of calling read, the two methods `grab` and `retrieve` can be used.\n* `grab` performs reading of the next frame from the stream and decoding which\n* is fast. `retrieve` performs color space conversion of the frame and motion\n* vector extraction which is slower. Splitting up `read` like this allows to\n* generate timestamps which are close to another in case multi-camera setups\n* are used and captured frames should be close to another.\n*\n*/\nclass VideoCap {\n\nprivate:\n    const char *url;\n    AVDictionary *opts;\n    AVCodec *codec;\n    AVFormatContext *fmt_ctx;\n    AVCodecContext *video_dec_ctx;\n    AVStream *video_stream;\n    int video_stream_idx;\n    AVPacket packet;\n    AVFrame *frame;\n    AVFrame rgb_frame;\n    Image_FFMPEG picture;\n    struct SwsContext *img_convert_ctx;\n    int64_t frame_number;\n    // When true, retrieve only motion vectors and skip RGB/color conversion\n    bool decode_frames;\n#if USE_AV_INTERRUPT_CALLBACK\n    AVInterruptCallbackMetadata interrupt_metadata;\n#endif\n\npublic:\n\n    /** Constructor */\n    VideoCap();\n\n    /** Destroy the VideoCap object and free all ressources */\n    void release(void);\n\n    /** Open a video file or RTSP url\n    *\n    * The stream must be H264 encoded. Otherwise, undefined behaviour is\n    * likely.\n    *\n    * @param url Relative or fully specified file path or an RTSP url specifying\n    *     the location of the video stream. Example \"vid.flv\" for a video\n    *     file located in the same directory as the source files. Or\n    *     \"rtsp://xxx.xxx.xxx.xxx:554\" for an IP camera streaming via RTSP.\n    *\n    * @retval true if video file or url could be opened sucessfully, false\n    *     otherwise.\n    */\n    bool open(const char *url);\n\n    /** Reads the next video frame and motion vectors from the stream\n    *\n    * @retval true if a new video frame could be read and decoded, false\n    *    otherwise (e.g. at the end of the stream).\n    */\n    bool grab(void);\n\n    /** Decodes and returns the grabbed frame and motion vectors\n    *\n    * @param frame Pointer to the raw data of the decoded video frame. The\n    *    frame is stored as a C contiguous array of shape (height, width, 3) and\n    *    can be converted into a cv::Mat by using the constructor\n    *    `cv::Mat cv_frame(height, width, CV_MAKETYPE(CV_8U, 3), frame)`.\n    *    Note: A subsequent call of `retrieve` will reuse the same memory for\n    *          storing the new frame. If you want a frame to persist for a longer\n    *          perdiod of time, allocate a new array and memcopy the raw frame\n    *          data into it. After usage you have to manually free this copied\n    *          array.\n    *\n    * @param width Width of the returned frame in pixels.\n    *\n    * @param height Height of the returned frame in pixels.\n    *\n    * @param frame_type Either \"P\", \"B\" or \"I\" indicating whether it is an\n    *    intra-coded frame (I), a predicted frame with only references to past\n    *    frames (P) or reference to both past and future frames (B). Motion\n    *    vectors are only returned for \"P\" and \"B\" frames.\n    *\n    * @param motion_vectors Pointer to the raw data of the motion vectors\n    *    belonging to the decoded frame. The motion vectors are stored as a\n    *    C contiguous array of shape (num_mvs, 10). Each row of the array\n    *    corresponds to one motion vector. The columns of each vector have the\n    *    following meaning (also refer to AVMotionVector in FFMPEG\n    *    documentation):\n    *    - 0: source: Where the current macroblock comes from. Negative value\n    *                 when it comes from the past, positive value when it comes\n    *                 from the future.\n    *    - 1: w: Width and height of the vector's macroblock.\n    *    - 2: h: Height of the vector's macroblock.\n    *    - 3: src_x: x-location of the vector's origin in source frame (in pixels).\n    *    - 4: src_y: y-location of the vector's origin in source frame (in pixels).\n    *    - 5: dst_x: x-location of the vector's destination in the current frame\n    *                (in pixels).\n    *    - 6: dst_y: y-location of the vector's destination in the current frame\n    *                (in pixels).\n    *    - 7: motion_x: src_x = dst_x + motion_x / motion_scale\n    *    - 8: motion_y: src_y = dst_y + motion_y / motion_scale\n    *    - 9: motion_scale: see definiton of columns 7 and 8\n    *    Note: If no motion vectors are present in a frame, e.g. if the frame is\n    *          an I frame, `num_mvs` will be 0 and no memory is allocated for\n    *          the motion vectors.\n    *    Note: Other than the frame array, new memory for storing motion vectors\n    *          is allocated on every call of `retrieve`, thus memcopying is not\n    *          needed to persist the motion vectors for a longer period of time.\n    *          Note, that the buffer needs to be freed manually by calling\n    *          `free(motion_vectors)` when the motion vectors are not needed\n    *          anymore.\n    *\n    * @param num_mvs The number of motion vectors corresponding to the rows of\n    *    the motion vector array.\n    *\n    * @retval true if the grabbed video frame and motion vectors could be\n    *    decoded and returned successfully, false otherwise.\n    */\n    bool retrieve(uint8_t **frame, int *step, int *width, int *height, int *cn, char *frame_type, MVS_DTYPE **motion_vectors, MVS_DTYPE *num_mvs);\n\n    /** Convenience wrapper which combines a call of `grab` and `retrieve`.\n    *\n    *   The parameters and return value correspond to the `retrieve` method.\n    */\n    bool read(uint8_t **frame, int *step, int *width, int *height, int *cn, char *frame_type, MVS_DTYPE **motion_vectors, MVS_DTYPE *num_mvs);\n\n    /** Enable/disable decoding frames in addition to extracting motion vectors. \n    * If decoding is disabled (false), retrieve() will skip color space conversion \n    * and not fill the frame buffer to avoid costly RGB decoding/copying.\n    */\n    void setDecodeFrames(bool enable);\n    bool getDecodeFrames();\n};\n"
  },
  {
    "path": "tests/README.md",
    "content": "# Tests\n\n## Run Tests\n\nYou can run the test suite either directly on your machine or (easier) within the provided Docker container. Both methods require you to first clone the repository. To this end, change into the desired installation directory on your machine and run\n```bash\ngit clone https://github.com/LukasBommes/mv-extractor.git mv_extractor\n```\n\n### In Docker Container\n\nTo run the tests in the Docker container, change into the `mv_extractor` directory, and run\n```bash\n./run.sh /bin/bash -c 'yum install -y compat-openssl10 && python3.12 -m unittest discover -s tests -p \"*tests.py\"'\n```\n\n### On Host\n\nTo run the tests directly on your machine, you need to install the motion vector extractor as explained [above](#step-1-install).\n\nNow, change into the `mv_extractor` directory and run the tests with\n```bash\npython3.12 -m unittest discover -s tests -p \"*tests.py\"\n```\nConfirm that all tests pass.\n\nSome tests run the [LIVE555 Media Server](http://www.live555.com/mediaServer/), which has dependencies on its own, such as OpenSSL. Make sure these dependencies are installed correctly on your machine, or otherwise you will get test failures with messages, such as \"error while loading shared libraries: libssl.so.10: cannot open shared object file: No such file or directory\". E.g. in Alma Linux you could fix this issue by installing OpenSSL with\n```bash\nyum install -y compat-openssl10\n```\nFor other operating systems you may be lacking additional dependencies, and the package names and installation command may differ.\n\n\n## Reference Test Data\n\nThis directory contains reference test data for validating mv-extractor output. The test suite compares current output against this reference data to ensure no regressions. More specifically the test suite verifies that:\n1. Motion vector extraction produces consistent results\n2. Frame decoding works correctly\n3. Frame types are correctly identified\n\n### Structure\n\n- `h264/` - H.264 test video reference data\n- `mpeg4_part2/` - MPEG-4 Part 2 test video reference data  \n- `rtsp/` - RTSP stream reference data\n\n### Data Format\n\nEach subdirectory contains:\n- `motion_vectors/` - Motion vector .npy files\n- `frames/` - Frame image .jpg files\n- `frame_types.txt` - Frame type information\n\n### Reference Data Creation\n\nReference datasets for H.264 and MPEG-4 PART 2 were obtained by running the `extract_mvs` command of a manually verified version of the mvextractor on the provided video files `vid_h264.mp4` and `vid_mpeg4_part2.mp4`\n\nRTSP reference data was obtained by streaming one of the video files with the [LIVE555 Media Server](http://www.live555.com/mediaServer/) and then reading the RTSP stream with the motion vector extractor. To reproduce the reference data, follow the steps below.\n\n#### Convert input file into H.264 video elementary stream\n\nFirst, convert the `vid_h264.mp4` file into a H.264 video elementary stream file. To this end, run\n```\nffmpeg -i vid_h264.mp4 -vf scale=640:360 -vcodec libx264 -f h264 vid_h264.264\n```\nin the project's root directory.\n\nThe conversion is needed, because the LIVE555 Media Server cannot directly serve MP4 files. I also tried converting and serving both input videos as Matroska, which did not work well, and WebM, which did not work at all. Hence, I decided to stick with an H.264 video elementary stream.\n\nThe command also scales down the input video from 720p to 360p because the default `OutPacketBuffer::maxSize` in the media server is set too low to handle the 720p video. The server logs warnings like\n```text\nMultiFramedRTPSink::afterGettingFrame1(): The input frame data was too large for our buffer size (100176).  10743 bytes of trailing data was dropped!  Correct this by increasing \"OutPacketBuffer::maxSize\" to at least 110743, *before* creating this 'RTPSink'.  (Current value is 100000.)\n```\nand the resulting video frame is truncated at the bottom.\n\n#### Serve the video with LIVE555 Media Server\n\nNow, we serve the file `vid_h264.264` with LIVE555 Media Server. Place the file in a folder named `data`\n```\nmkdir -p data\ncp vid_h264.264 ./data/vid_h264.264\n```\nand then run a frash manylinux Docker container, in which you mount the `data` folder as a volume\n```\ndocker run -it -v $(pwd)/data:/data quay.io/pypa/manylinux_2_28_x86_64 /bin/bash\n```\nIn the container install and start the LIVE555 Media Server\n```\nyum install -y wget compat-openssl10\nwget -qP /usr/local/bin/ http://www.live555.com/mediaServer/linux/live555MediaServer\nchmod +x /usr/local/bin/live555MediaServer\ncd /data\nlive555MediaServer &\n```\nYou may have to hit `CTRL+C` now to dismiss the log of the server. The server will continue running in the background.\n\n#### Consume the RTSP stream with the motion vector extractor\n\nStill in the Docker container, install the motion vector extractor\n```\npython3.12 -m pip install \"motion-vector-extractor==1.1.0\"\n```\nand run it to read and dump the RTSP stream to a folder named `out-reference`\n```\n/opt/python/cp312-cp312/bin/extract_mvs 'rtsp://localhost:554/vid_h264.264' --verbose --dump out-reference\n```\n\n#### Preserve reference data and cleanup\n\nFinally, exist the container with\n```\nexit\n```\nNow, copy the folder `out-reference` into the `tests/reference/rtsp` folder.\n```\ncp -r data/out-reference tests/reference/rtsp\n```\nand cleanup\n```\nrm -rf data\n```\n"
  },
  {
    "path": "tests/end_to_end_tests.py",
    "content": "import os\nimport time\nimport tempfile\nimport unittest\nimport subprocess\n\nimport cv2\nimport numpy as np\n\n\nPROJECT_ROOT = os.getenv(\"PROJECT_ROOT\", \"\")\n\n\ndef motions_vectors_valid(outdir, refdir):\n    equal = []\n    num_mvs = len(os.listdir(os.path.join(refdir, \"motion_vectors\")))\n    for i in range(num_mvs):\n        mvs = np.load(os.path.join(outdir, \"motion_vectors\", f\"mvs-{i}.npy\"))\n        mvs_ref = np.load(os.path.join(refdir, \"motion_vectors\", f\"mvs-{i}.npy\"))\n        equal.append(np.all(mvs == mvs_ref))\n    return all(equal)\n\n\ndef frame_types_valid(outdir, refdir):\n    with open(os.path.join(outdir, \"frame_types.txt\"), \"r\") as file:\n        frame_types = [line.strip() for line in file]\n    with open(os.path.join(refdir, \"frame_types.txt\"), \"r\") as file:\n        frame_types_ref = [line.strip() for line in file]\n    return frame_types == frame_types_ref\n\n\ndef frames_valid(outdir, refdir):\n    equal = []\n    num_frames = len(os.listdir(os.path.join(refdir, \"frames\")))\n    for i in range(num_frames):\n        frame = cv2.imread(os.path.join(outdir, \"frames\", f\"frame-{i}.jpg\"))\n        frame_ref = cv2.imread(os.path.join(refdir, \"frames\", f\"frame-{i}.jpg\"))\n        equal.append(np.all(frame == frame_ref))\n    return all(equal)\n\n\nclass TestEndToEnd(unittest.TestCase):\n\n    def test_end_to_end_h264(self):\n        with tempfile.TemporaryDirectory() as outdir:\n            print(\"Running extraction for H.264\")\n            video_path = os.path.join(PROJECT_ROOT, 'vid_h264.mp4')\n            subprocess.run(f\"extract_mvs {video_path} --dump {outdir}\", shell=True, check=True)\n            refdir = os.path.join(PROJECT_ROOT, \"tests/reference/h264\")\n\n            self.assertTrue(motions_vectors_valid(outdir, refdir), msg=\"motion vectors are invalid\")\n            self.assertTrue(frame_types_valid(outdir, refdir), msg=\"frame types are invalid\")\n            self.assertTrue(frames_valid(outdir, refdir), msg=\"frames are invalid\")\n\n    \n    def test_end_to_end_motion_vectors_only_h264(self):\n        with tempfile.TemporaryDirectory() as outdir:\n            print(\"Running motion-vectors-only extraction for H.264\")\n            video_path = os.path.join(PROJECT_ROOT, 'vid_h264.mp4')\n            subprocess.run(f\"extract_mvs {video_path} --skip-decoding-frames --dump {outdir}\", shell=True, check=True)\n            refdir = os.path.join(PROJECT_ROOT, \"tests/reference/h264\")\n\n            self.assertTrue(motions_vectors_valid(outdir, refdir), msg=\"motion vectors are invalid\")\n            self.assertTrue(frame_types_valid(outdir, refdir), msg=\"frame types are invalid\")\n\n\n    def test_end_to_end_mpeg4_part2(self):\n        with tempfile.TemporaryDirectory() as outdir:\n            print(\"Running extraction for MPEG-4 Part 2\")\n            video_path = os.path.join(PROJECT_ROOT, 'vid_mpeg4_part2.mp4')\n            subprocess.run(f\"extract_mvs {video_path} --dump {outdir}\", shell=True, check=True)\n            refdir = os.path.join(PROJECT_ROOT, \"tests/reference/mpeg4_part2\")\n\n            self.assertTrue(motions_vectors_valid(outdir, refdir), msg=\"motion vectors are invalid\")\n            self.assertTrue(frame_types_valid(outdir, refdir), msg=\"frame types are invalid\")\n            self.assertTrue(frames_valid(outdir, refdir), msg=\"frames are invalid\")\n\n    \n    def test_end_to_end_motion_vectors_only_mpeg4_part2(self):\n        with tempfile.TemporaryDirectory() as outdir:\n            print(\"Running motion-vectors-only extraction for MPEG-4 Part 2\")\n            video_path = os.path.join(PROJECT_ROOT, 'vid_mpeg4_part2.mp4')\n            subprocess.run(f\"extract_mvs {video_path} --skip-decoding-frames --dump {outdir}\", shell=True, check=True)\n            refdir = os.path.join(PROJECT_ROOT, \"tests/reference/mpeg4_part2\")\n\n            self.assertTrue(motions_vectors_valid(outdir, refdir), msg=\"motion vectors are invalid\")\n            self.assertTrue(frame_types_valid(outdir, refdir), msg=\"frame types are invalid\")\n\n    \n    def test_end_to_end_rtsp(self):\n        with tempfile.TemporaryDirectory() as outdir:\n            print(\"Setting up end to end test for RTSP\")\n            media_server_binary = os.path.abspath(os.path.join(PROJECT_ROOT, \"tests/tools/live555MediaServer\"))\n            rtsp_server = subprocess.Popen(media_server_binary, cwd=PROJECT_ROOT if PROJECT_ROOT else None)\n            try:\n                time.sleep(1)\n                print(\"Running extraction for RTSP stream\")\n                rtsp_url = \"rtsp://localhost:554/vid_h264.264\"\n                subprocess.run(f\"extract_mvs {rtsp_url} --dump {outdir}\", shell=True, check=True)\n                refdir = os.path.join(PROJECT_ROOT, \"tests/reference/rtsp\")\n\n                self.assertTrue(motions_vectors_valid(outdir, refdir), msg=\"motion vectors are invalid\")\n                self.assertTrue(frame_types_valid(outdir, refdir), msg=\"frame types are invalid\")\n                self.assertTrue(frames_valid(outdir, refdir), msg=\"frames are invalid\")\n            finally:\n                rtsp_server.terminate()\n\n\nif __name__ == '__main__':\n    unittest.main()\n            "
  },
  {
    "path": "tests/reference/h264/frame_types.txt",
    "content": "I\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nI\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nI\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\n"
  },
  {
    "path": "tests/reference/mpeg4_part2/frame_types.txt",
    "content": "I\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nI\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nI\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nI\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nI\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nI\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nI\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nI\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nI\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nI\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nI\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nI\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nI\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nI\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nI\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nI\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nI\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nI\nP\nP\nP\nP\nP\nI\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nI\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nI\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nI\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nI\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nI\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nI\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nI\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nI\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nI\nP\nP\nP\nP\nP\nI\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nI\n"
  },
  {
    "path": "tests/reference/rtsp/frame_types.txt",
    "content": "I\nB\nB\nB\nP\nP\nP\nP\nP\nB\nB\nB\nP\nB\nB\nB\nP\nB\nB\nB\nP\nB\nB\nB\nP\nB\nB\nB\nP\nB\nB\nB\nP\nB\nB\nB\nP\nB\nB\nB\nP\nB\nB\nB\nP\nB\nB\nB\nP\nB\nB\nB\nP\nB\nB\nB\nP\nB\nB\nB\nP\nB\nB\nB\nP\nB\nB\nB\nP\nB\nB\nB\nP\nB\nB\nB\nP\nB\nB\nB\nP\nP\nP\nP\nP\nP\nP\nP\nB\nP\nB\nP\nB\nB\nB\nP\nP\nP\nP\nP\nP\nP\nP\nP\nB\nB\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nB\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nB\nB\nP\nP\nB\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nB\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nB\nP\nP\nP\nP\nP\nP\nB\nB\nB\nP\nP\nB\nB\nB\nP\nB\nP\nP\nP\nP\nB\nB\nP\nP\nP\nB\nB\nB\nP\nB\nB\nP\nB\nB\nP\nI\nB\nB\nB\nP\nB\nB\nB\nP\nB\nB\nB\nP\nB\nP\nP\nP\nP\nP\nB\nP\nB\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nP\nB\nP\nP\nP\nP\nP\nP\nP\nP\nP\nB\nP\nP\nB\nP\nB\nP\nB\nP\nB\nB\nB\nP\nP\nB\nB\nB\nP\nB\nP\nB\nP\nB\nB\nB\nP\nB\nB\nB\nP\nB\nB\nB\nP\nB\nB\nP\nB\nP\nB\nB\nB\nP\nB\nB\nB\nP\nB\nB\nB\nP\nB\nB\nB\nP\nB\nB\nB\nP\nB\nB\nB\nP\nB\nP\nB\nP\nP\nI\nB\nB\nB\nP\nB\nB\nP\nB\nB\nB\nP\n"
  },
  {
    "path": "tests/unit_tests.py",
    "content": "import os\nimport unittest\nimport time\n\nimport numpy as np\n\nfrom mvextractor.videocap import VideoCap\n\n\nPROJECT_ROOT = os.getenv(\"PROJECT_ROOT\", \"\")\n\n\nclass TestMotionVectorExtraction(unittest.TestCase):\n\n    def validate_frame(self, frame):\n        self.assertEqual(type(frame), np.ndarray, \"Frame should be numpy array\")\n        self.assertEqual(frame.dtype, np.uint8, \"Frame dtype should be uint8\")\n        self.assertEqual(frame.shape, (720, 1280, 3), \"Frams hape should be (720, 1280, 3)\")\n\n\n    def validate_motion_vectors(self, motion_vectors, shape=(0, 10)):\n        self.assertEqual(type(motion_vectors), np.ndarray, \"Motion vectors should be numpy array\")\n        self.assertEqual(motion_vectors.dtype, np.int32, \"Motion vectors dtype should be int32\")\n        self.assertEqual(motion_vectors.shape, shape, \"Motion vectors shape not matching expected shape\")\n\n\n    # run before every test\n    def setUp(self):\n        self.cap = VideoCap()\n\n\n    # run after every test regardless of success\n    def tearDown(self):\n        self.cap.release()\n\n\n    def open_video(self):\n        return self.cap.open(os.path.join(PROJECT_ROOT, \"vid_h264.mp4\"))\n\n\n    def test_init_cap(self):\n        self.cap = VideoCap()\n        self.assertIn('open', dir(self.cap))\n        self.assertIn('grab', dir(self.cap))\n        self.assertIn('read', dir(self.cap))\n        self.assertIn('release', dir(self.cap))\n        self.assertIn('retrieve', dir(self.cap))\n        self.assertIn('set_decode_frames', dir(self.cap))\n        self.assertIn('decode_frames', dir(self.cap))\n\n\n    def test_decode_frames_mode(self):\n        self.cap = VideoCap()\n        self.assertTrue(self.cap.decode_frames, \"Frame decoding is expected to be actived by default\")\n        self.cap.set_decode_frames(True)\n        self.assertTrue(self.cap.decode_frames, \"Frame decoding is expected to be active\")\n        self.cap.set_decode_frames(False)\n        self.assertFalse(self.cap.decode_frames, \"Frame decoding is expected to be inactive\")\n        self.open_video()\n        self.assertTrue(self.cap.decode_frames, \"Frame decoding is expected to be actived after opening a video\")\n        self.cap.set_decode_frames(False)\n        self.assertFalse(self.cap.decode_frames, \"Frame decoding is expected to be inactive\")\n        self.cap.release()\n        self.assertTrue(self.cap.decode_frames, \"Frame decoding is expected to be active\")\n\n\n    def test_open_video(self):\n        ret = self.open_video()\n        self.assertTrue(ret, \"Should open video file successfully\")\n\n    \n    def test_open_invalid_video(self):\n        ret = self.cap.open(\"vid_not_existent.mp4\")\n        self.assertFalse(ret, \"Should fail to open non-existent video file\")\n\n\n    def test_read_not_opened_cap(self):\n        ret = self.cap.open(\"vid_not_existent.mp4\")\n        self.assertFalse(ret, \"Should fail to open non-existent video file\")\n        ret, frame, motion_vectors, frame_type = self.cap.read()\n        self.assertEqual(frame_type, \"?\", \"Frame type should be ?\")\n        self.assertFalse(ret, \"Should fail to read from non-existent video file\")\n        self.assertIsNone(frame, \"Frame read from non-existent video should be None\")\n        self.validate_motion_vectors(motion_vectors)\n\n\n    def test_read_first_I_frame(self):\n        self.open_video()\n        ret, frame, motion_vectors, frame_type = self.cap.read()\n        self.assertTrue(ret, \"Should succeed to read from video file\")\n        self.assertEqual(frame_type, \"I\", \"Frame type of first frame should be I\")      \n        self.validate_frame(frame)\n        self.validate_motion_vectors(motion_vectors)\n\n\n    def test_read_first_P_frame(self):\n        self.open_video()\n        self.cap.read()  # skip first frame (I frame)\n        ret, frame, motion_vectors, frame_type = self.cap.read()\n        self.assertTrue(ret, \"Should succeed to read from video file\")\n        self.assertEqual(frame_type, \"P\", \"Frame type of second frame should be P\")      \n        self.validate_frame(frame)\n        self.validate_motion_vectors(motion_vectors, shape=(3665, 10))\n        self.assertTrue(np.all(motion_vectors[:10, :] == np.array([\n            [-1, 16, 16,   8, 8,   8, 8, 0, 0, 4],\n            [-1, 16, 16,  24, 8,  24, 8, 0, 0, 4],\n            [-1, 16, 16,  40, 8,  40, 8, 0, 0, 4],\n            [-1, 16, 16,  56, 8,  56, 8, 0, 0, 4],\n            [-1, 16, 16,  72, 8,  72, 8, 0, 0, 4],\n            [-1, 16, 16,  88, 8,  88, 8, 0, 0, 4],\n            [-1, 16, 16, 104, 8, 104, 8, 0, 0, 4],\n            [-1, 16, 16, 120, 8, 120, 8, 0, 0, 4],\n            [-1, 16, 16, 136, 8, 136, 8, 0, 0, 4],\n            [-1, 16, 16, 152, 8, 152, 8, 0, 0, 4],\n        ])), \"Motion vectors should match the expected values\")\n\n\n    def test_read_first_ten_frames(self):\n        rets = []\n        frames = []\n        motion_vectors = []\n        frame_types = []\n        self.open_video()\n        for _ in range(10):\n            ret, frame, motion_vector, frame_type = self.cap.read()\n            rets.append(ret)\n            frames.append(frame)\n            motion_vectors.append(motion_vector)\n            frame_types.append(frame_type)\n\n        self.assertTrue(all(rets), \"All frames should be read successfully\")\n        self.assertEqual(frame_types, ['I', 'P', 'P', 'P', 'P', 'P', 'P', 'P', 'P', 'P'])\n        [self.validate_frame(frame) for frame in frames]\n        shapes = [\n            (0, 10), (3665, 10), (3696, 10), (3722, 10), (3807, 10), \n            (3953, 10), (4155, 10), (3617, 10), (4115, 10), (4192, 10)\n        ]\n        [self.validate_motion_vectors(motion_vector, shape) for motion_vector, shape in zip(motion_vectors, shapes)]\n\n\n    def test_frame_count(self):\n        self.open_video()\n        frame_count = 0\n        while True:\n            ret, _, _, _ = self.cap.read()\n            if not ret:\n                break\n            frame_count += 1\n        self.assertEqual(frame_count, 337, \"Video file is expected to have 337 frames\")\n\n\n    def test_timings(self):\n        self.open_video()\n        times = []\n        while True:\n            tstart = time.perf_counter()\n            ret, _, _, _ = self.cap.read()\n            if not ret:\n                break\n            tend = time.perf_counter()\n            telapsed = tend - tstart\n            times.append(telapsed)\n        dt_mean = np.mean(times)\n        dt_std = np.std(times)\n        print(f\"Timings: mean {dt_mean} s -- std: {dt_std} s\")\n        self.assertGreater(dt_mean, 0)\n        self.assertGreater(dt_std, 0)\n        self.assertLess(dt_mean, 0.01, msg=f\"Mean of frame read duration exceeds maximum ({dt_mean} s > {0.01} s)\")\n        self.assertLess(dt_std, 0.003, msg=f\"Standard deviation of frame read duration exceeds maximum ({dt_std} s > {0.003} s)\")\n\n\n    def test_skipping_frame_decoding_does_not_raise(self):\n        self.cap.set_decode_frames(False)\n        self.cap.set_decode_frames(True)\n\n\n    def test_read_first_I_frame_skipping_frame_decoding(self):\n        self.open_video()\n        self.cap.set_decode_frames(False)\n        ret, frame, motion_vectors, frame_type = self.cap.read()\n        self.assertTrue(ret, \"Should succeed to read from video file\")\n        self.assertEqual(frame_type, \"I\", \"Frame type of first frame should be I\")\n        self.assertIsNone(frame, \"Frame should be None when skipping frame decoding\")\n        self.validate_motion_vectors(motion_vectors)\n        \n\n    def test_read_first_P_frame_skipping_frame_decoding(self):\n        self.open_video()\n        self.cap.set_decode_frames(False)\n        self.cap.read()  # skip first frame (I frame)\n        ret, frame, motion_vectors, frame_type = self.cap.read()\n        self.assertTrue(ret, \"Should succeed to read from video file\")\n        self.assertEqual(frame_type, \"P\", \"Frame type of second frame should be P\")      \n        self.assertIsNone(frame, \"Frame should be None when skipping frame decoding\")\n        self.validate_motion_vectors(motion_vectors, shape=(3665, 10))\n        self.assertTrue(np.all(motion_vectors[:10, :] == np.array([\n            [-1, 16, 16,   8, 8,   8, 8, 0, 0, 4],\n            [-1, 16, 16,  24, 8,  24, 8, 0, 0, 4],\n            [-1, 16, 16,  40, 8,  40, 8, 0, 0, 4],\n            [-1, 16, 16,  56, 8,  56, 8, 0, 0, 4],\n            [-1, 16, 16,  72, 8,  72, 8, 0, 0, 4],\n            [-1, 16, 16,  88, 8,  88, 8, 0, 0, 4],\n            [-1, 16, 16, 104, 8, 104, 8, 0, 0, 4],\n            [-1, 16, 16, 120, 8, 120, 8, 0, 0, 4],\n            [-1, 16, 16, 136, 8, 136, 8, 0, 0, 4],\n            [-1, 16, 16, 152, 8, 152, 8, 0, 0, 4],\n        ])), \"Motion vectors should match the expected values\")\n\n\n    def test_read_first_ten_frames_skipping_frame_decoding(self):\n        rets = []\n        frames = []\n        motion_vectors = []\n        frame_types = []\n        self.open_video()\n        self.cap.set_decode_frames(False)\n        for _ in range(10):\n            ret, frame, motion_vector, frame_type = self.cap.read()\n            rets.append(ret)\n            frames.append(frame)\n            motion_vectors.append(motion_vector)\n            frame_types.append(frame_type)\n\n        self.assertTrue(all(rets), \"All frames should be read successfully\")\n        self.assertEqual(frame_types, ['I', 'P', 'P', 'P', 'P', 'P', 'P', 'P', 'P', 'P'])\n        [self.assertIsNone(frame) for frame in frames]\n        shapes = [\n            (0, 10), (3665, 10), (3696, 10), (3722, 10), (3807, 10), \n            (3953, 10), (4155, 10), (3617, 10), (4115, 10), (4192, 10)\n        ]\n        [self.validate_motion_vectors(motion_vector, shape) for motion_vector, shape in zip(motion_vectors, shapes)]\n\n\n    def test_frame_count_skipping_frame_decoding(self):\n        self.open_video()\n        self.cap.set_decode_frames(False)\n        frame_count = 0\n        while True:\n            ret, _, _, _ = self.cap.read()\n            if not ret:\n                break\n            frame_count += 1\n        self.assertEqual(frame_count, 337, \"Video file is expected to have 337 frames\")\n\n\n    def test_skipping_frame_decoding_is_faster_than_not_skipping(self):\n        self.open_video()\n        # skip frame decoding\n        self.cap.set_decode_frames(False)\n        start_time = time.perf_counter()\n        frame_count = 0\n        for _ in range(50):  # read 50 frames\n            ret, _, _, _ = self.cap.read()\n            if not ret:\n                break\n            frame_count += 1\n        mvo_time = time.perf_counter() - start_time\n        \n        # do not skip frame decoding\n        self.cap.set_decode_frames(True)\n        start_time = time.perf_counter()\n        frame_count_full = 0\n        for i in range(50):  # Read 50 frames\n            ret, _, _, _ = self.cap.read()\n            if not ret:\n                break\n            frame_count_full += 1\n        full_time = time.perf_counter() - start_time\n        \n        self.assertEqual(frame_count, 50, \"Should read 50 frames\")\n        self.assertEqual(frame_count_full, 50, \"Should read 50 frames\")\n        \n        # Performance comparison (skipping decoding should be at least as fast as not skipping decoding mode)\n        if mvo_time > 0 and full_time > 0:\n            speedup = full_time / mvo_time\n            print(f\"Speedup by skipping frame decoding: {speedup:.2f}x\")\n            self.assertGreaterEqual(speedup, 1.0, \"Skipping frame decoding should be reasonably fast\")\n\n\nif __name__ == '__main__':\n    unittest.main()\n"
  }
]