Repository: matham/ffpyplayer
Branch: master
Commit: 58b68c2225f3
Files: 63
Total size: 622.9 KB

Directory structure:
gitextract_izcxaq8m/

├── .ci/
│   ├── apple_arm64_x265.patch
│   ├── apple_libvorbis_cpusubtype.patch
│   ├── build-wheels.sh
│   ├── build_wheels_osx.sh
│   ├── dep_versions.sh
│   ├── fdk.patch
│   ├── libmp3lame-symbols.patch
│   ├── merge_osx_deps.sh
│   ├── x265_51ae8e922bcc4586ad4710812072289af91492a8.patch
│   ├── x265_b354c009a60bcd6d7fc04014e200a1ee9c45c167.patch
│   └── yum_deps.sh
├── .github/
│   └── workflows/
│       └── pythonapp.yml
├── .gitignore
├── COPYING
├── Makefile
├── README.rst
├── doc/
│   ├── Makefile
│   ├── make.bat
│   └── source/
│       ├── api.rst
│       ├── conf.py
│       ├── examples.rst
│       ├── getting_started.rst
│       ├── index.rst
│       ├── installation.rst
│       ├── pic.rst
│       ├── player.rst
│       ├── tools.rst
│       └── writer.rst
├── examples/
│   └── test.py
├── ffpyplayer/
│   ├── __init__.py
│   ├── clib/
│   │   ├── misc.c
│   │   └── misc.h
│   ├── includes/
│   │   ├── ff_consts.pxi
│   │   ├── ffmpeg.pxi
│   │   ├── inline_funcs.pxi
│   │   └── sdl.pxi
│   ├── pic.pxd
│   ├── pic.pyx
│   ├── player/
│   │   ├── __init__.py
│   │   ├── clock.pxd
│   │   ├── clock.pyx
│   │   ├── core.pxd
│   │   ├── core.pyx
│   │   ├── decoder.pxd
│   │   ├── decoder.pyx
│   │   ├── frame_queue.pxd
│   │   ├── frame_queue.pyx
│   │   ├── player.pxd
│   │   ├── player.pyx
│   │   ├── queue.pxd
│   │   └── queue.pyx
│   ├── tests/
│   │   ├── __init__.py
│   │   ├── common.py
│   │   ├── test_pic.py
│   │   ├── test_play.py
│   │   └── test_write.py
│   ├── threading.pxd
│   ├── threading.pyx
│   ├── tools.pyx
│   ├── writer.pxd
│   └── writer.pyx
├── pyproject.toml
└── setup.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .ci/apple_arm64_x265.patch
================================================
diff -Naur ./source/CMakeLists.txt ../x265_apple_patch/source/CMakeLists.txt
--- ./source/CMakeLists.txt	2021-05-08 13:06:22.000000000 +0100
+++ ../x265_apple_patch/source/CMakeLists.txt	2021-05-08 13:08:01.000000000 +0100
@@ -40,9 +40,11 @@
 # System architecture detection
 string(TOLOWER "${CMAKE_SYSTEM_PROCESSOR}" SYSPROC)
 set(X86_ALIASES x86 i386 i686 x86_64 amd64)
-set(ARM_ALIASES armv6l armv7l aarch64)
+set(ARM_ALIASES armv6l armv7l)
+set(ARM64_ALIASES arm64 arm64e aarch64)
 list(FIND X86_ALIASES "${SYSPROC}" X86MATCH)
 list(FIND ARM_ALIASES "${SYSPROC}" ARMMATCH)
+list(FIND ARM64_ALIASES "${SYSPROC}" ARM64MATCH)
 set(POWER_ALIASES ppc64 ppc64le)
 list(FIND POWER_ALIASES "${SYSPROC}" POWERMATCH)
 if("${SYSPROC}" STREQUAL "" OR X86MATCH GREATER "-1")
@@ -79,6 +81,15 @@
         message(STATUS "Detected ARM target processor")
         add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=0 -DHAVE_ARMV6=1)
     endif()
+elseif(ARM64MATCH GREATER "-1")
+    if(CROSS_COMPILE_ARM64)
+        message(STATUS "Cross compiling for ARM64 arch")
+    else()
+        set(CROSS_COMPILE_ARM64 0)
+    endif()
+    message(STATUS "Detected ARM64 target processor")
+    set(ARM64 1)
+    add_definitions(-DX265_ARCH_ARM64=1 -DHAVE_NEON)
 else()
     message(STATUS "CMAKE_SYSTEM_PROCESSOR value `${CMAKE_SYSTEM_PROCESSOR}` is unknown")
     message(STATUS "Please add this value near ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE}")
@@ -259,6 +270,9 @@
             endif()
         endif()
     endif()
+    if(ARM64 OR CROSS_COMPILE_ARM64)
+        add_definitions(-DHAVE_NEON)
+    endif()
     add_definitions(${ARM_ARGS})
     if(FPROFILE_GENERATE)
         if(INTEL_CXX)
@@ -350,7 +364,7 @@
 endif(GCC)
 
 find_package(Nasm)
-if(ARM OR CROSS_COMPILE_ARM)
+if(ARM OR CROSS_COMPILE_ARM OR ARM64 OR CROSS_COMPILE_ARM64)
     option(ENABLE_ASSEMBLY "Enable use of assembly coded primitives" ON)
 elseif(NASM_FOUND AND X86)
     if (NASM_VERSION_STRING VERSION_LESS "2.13.0")
@@ -549,6 +563,32 @@
                 ARGS ${ARM_ARGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX}
                 DEPENDS ${ASM_SRC})
         endforeach()
+    elseif(ARM64 OR CROSS_COMPILE_ARM64)
+    # compile ARM arch asm files here
+        enable_language(ASM)
+        foreach(ASM ${ARM_ASMS})
+            set(ASM_SRC ${CMAKE_CURRENT_SOURCE_DIR}/common/arm64/${ASM})
+            list(APPEND ASM_SRCS ${ASM_SRC})
+            list(APPEND ASM_OBJS ${ASM}.${SUFFIX})
+            add_custom_command(
+                OUTPUT ${ASM}.${SUFFIX}
+                COMMAND ${CMAKE_CXX_COMPILER}
+                ARGS ${ARM_ARGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX}
+                DEPENDS ${ASM_SRC})
+        endforeach()
+    elseif(ARM64 OR CROSS_COMPILE_ARM64)
+    # compile ARM arch asm files here
+        enable_language(ASM)
+        foreach(ASM ${ARM_ASMS})
+            set(ASM_SRC ${CMAKE_CURRENT_SOURCE_DIR}/common/arm64/${ASM})
+            list(APPEND ASM_SRCS ${ASM_SRC})
+            list(APPEND ASM_OBJS ${ASM}.${SUFFIX})
+            add_custom_command(
+                OUTPUT ${ASM}.${SUFFIX}
+                COMMAND ${CMAKE_CXX_COMPILER}
+                ARGS ${ARM_ARGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX}
+                DEPENDS ${ASM_SRC})
+        endforeach()
     elseif(X86)
     # compile X86 arch asm files here
         foreach(ASM ${MSVC_ASMS})
diff -Naur ./source/common/CMakeLists.txt ../x265_apple_patch/source/common/CMakeLists.txt
--- ./source/common/CMakeLists.txt	2021-05-08 13:06:22.000000000 +0100
+++ ../x265_apple_patch/source/common/CMakeLists.txt	2021-05-08 13:08:01.000000000 +0100
@@ -114,6 +114,22 @@
     source_group(Assembly FILES ${ASM_PRIMITIVES})
 endif(ENABLE_ASSEMBLY AND (ARM OR CROSS_COMPILE_ARM))
 
+
+if(ENABLE_ASSEMBLY AND (ARM64 OR CROSS_COMPILE_ARM64))
+    set(C_SRCS asm-primitives.cpp pixel-prim.h pixel-prim.cpp  filter-prim.h filter-prim.cpp dct-prim.h dct-prim.cpp loopfilter-prim.cpp loopfilter-prim.h intrapred-prim.cpp arm64-utils.cpp arm64-utils.h)
+    enable_language(ASM)
+    # add ARM assembly/intrinsic files here
+    #set(A_SRCS )
+    #set(VEC_PRIMITIVES)
+
+    #set(ARM64_ASMS "${A_SRCS}" CACHE INTERNAL "ARM64 Assembly Sources")
+    foreach(SRC ${C_SRCS})
+        set(ASM_PRIMITIVES ${ASM_PRIMITIVES} arm64/${SRC})
+    endforeach()
+    source_group(Assembly FILES ${ASM_PRIMITIVES})
+endif(ENABLE_ASSEMBLY AND (ARM64 OR CROSS_COMPILE_ARM64))
+
+
 if(POWER)
     set_source_files_properties(version.cpp PROPERTIES COMPILE_FLAGS -DX265_VERSION=${X265_VERSION})
     if(ENABLE_ALTIVEC)
diff -Naur ./source/common/arm64/arm64-utils.cpp ../x265_apple_patch/source/common/arm64/arm64-utils.cpp
--- ./source/common/arm64/arm64-utils.cpp	1970-01-01 01:00:00.000000000 +0100
+++ ../x265_apple_patch/source/common/arm64/arm64-utils.cpp	2021-05-08 13:08:01.000000000 +0100
@@ -0,0 +1,290 @@
+#include "common.h"
+#include "x265.h"
+#include "arm64-utils.h"
+#include <arm_neon.h>
+
+#define COPY_16(d,s) *(uint8x16_t *)(d) = *(uint8x16_t *)(s)
+namespace X265_NS {
+
+
+
+void transpose8x8(uint8_t* dst, const uint8_t* src, intptr_t dstride, intptr_t sstride)
+{
+    uint8x8_t a0,a1,a2,a3,a4,a5,a6,a7;
+    uint8x8_t b0,b1,b2,b3,b4,b5,b6,b7;
+
+    a0 = *(uint8x8_t *)(src + 0*sstride);
+    a1 = *(uint8x8_t *)(src + 1*sstride);
+    a2 = *(uint8x8_t *)(src + 2*sstride);
+    a3 = *(uint8x8_t *)(src + 3*sstride);
+    a4 = *(uint8x8_t *)(src + 4*sstride);
+    a5 = *(uint8x8_t *)(src + 5*sstride);
+    a6 = *(uint8x8_t *)(src + 6*sstride);
+    a7 = *(uint8x8_t *)(src + 7*sstride);
+
+    b0 = vtrn1_u32(a0,a4);
+    b1 = vtrn1_u32(a1,a5);
+    b2 = vtrn1_u32(a2,a6);
+    b3 = vtrn1_u32(a3,a7);
+    b4 = vtrn2_u32(a0,a4);
+    b5 = vtrn2_u32(a1,a5);
+    b6 = vtrn2_u32(a2,a6);
+    b7 = vtrn2_u32(a3,a7);
+
+    a0 = vtrn1_u16(b0,b2);
+    a1 = vtrn1_u16(b1,b3);
+    a2 = vtrn2_u16(b0,b2);
+    a3 = vtrn2_u16(b1,b3);
+    a4 = vtrn1_u16(b4,b6);
+    a5 = vtrn1_u16(b5,b7);
+    a6 = vtrn2_u16(b4,b6);
+    a7 = vtrn2_u16(b5,b7);
+
+    b0 = vtrn1_u8(a0,a1);
+    b1 = vtrn2_u8(a0,a1);
+    b2 = vtrn1_u8(a2,a3);
+    b3 = vtrn2_u8(a2,a3);
+    b4 = vtrn1_u8(a4,a5);
+    b5 = vtrn2_u8(a4,a5);
+    b6 = vtrn1_u8(a6,a7);
+    b7 = vtrn2_u8(a6,a7);
+
+    *(uint8x8_t *)(dst + 0*dstride) = b0;
+    *(uint8x8_t *)(dst + 1*dstride) = b1;
+    *(uint8x8_t *)(dst + 2*dstride) = b2;
+    *(uint8x8_t *)(dst + 3*dstride) = b3;
+    *(uint8x8_t *)(dst + 4*dstride) = b4;
+    *(uint8x8_t *)(dst + 5*dstride) = b5;
+    *(uint8x8_t *)(dst + 6*dstride) = b6;
+    *(uint8x8_t *)(dst + 7*dstride) = b7;
+}
+
+
+
+
+
+
+void transpose16x16(uint8_t* dst, const uint8_t* src, intptr_t dstride, intptr_t sstride)
+{
+    uint16x8_t a0,a1,a2,a3,a4,a5,a6,a7,a8,a9,aA,aB,aC,aD,aE,aF;
+    uint16x8_t b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,bA,bB,bC,bD,bE,bF;
+    uint16x8_t c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,cA,cB,cC,cD,cE,cF;
+    uint16x8_t d0,d1,d2,d3,d4,d5,d6,d7,d8,d9,dA,dB,dC,dD,dE,dF;
+    
+    a0 = *(uint16x8_t *)(src + 0*sstride);
+    a1 = *(uint16x8_t *)(src + 1*sstride);
+    a2 = *(uint16x8_t *)(src + 2*sstride);
+    a3 = *(uint16x8_t *)(src + 3*sstride);
+    a4 = *(uint16x8_t *)(src + 4*sstride);
+    a5 = *(uint16x8_t *)(src + 5*sstride);
+    a6 = *(uint16x8_t *)(src + 6*sstride);
+    a7 = *(uint16x8_t *)(src + 7*sstride);
+    a8 = *(uint16x8_t *)(src + 8*sstride);
+    a9 = *(uint16x8_t *)(src + 9*sstride);
+    aA = *(uint16x8_t *)(src + 10*sstride);
+    aB = *(uint16x8_t *)(src + 11*sstride);
+    aC = *(uint16x8_t *)(src + 12*sstride);
+    aD = *(uint16x8_t *)(src + 13*sstride);
+    aE = *(uint16x8_t *)(src + 14*sstride);
+    aF = *(uint16x8_t *)(src + 15*sstride);
+
+    b0 = vtrn1q_u64(a0, a8);
+    b1 = vtrn1q_u64(a1, a9);
+    b2 = vtrn1q_u64(a2, aA);
+    b3 = vtrn1q_u64(a3, aB);
+    b4 = vtrn1q_u64(a4, aC);
+    b5 = vtrn1q_u64(a5, aD);
+    b6 = vtrn1q_u64(a6, aE);
+    b7 = vtrn1q_u64(a7, aF);
+    b8 = vtrn2q_u64(a0, a8);
+    b9 = vtrn2q_u64(a1, a9);
+    bA = vtrn2q_u64(a2, aA);
+    bB = vtrn2q_u64(a3, aB);
+    bC = vtrn2q_u64(a4, aC);
+    bD = vtrn2q_u64(a5, aD);
+    bE = vtrn2q_u64(a6, aE);
+    bF = vtrn2q_u64(a7, aF);
+
+    c0 = vtrn1q_u32(b0, b4);
+    c1 = vtrn1q_u32(b1, b5);
+    c2 = vtrn1q_u32(b2, b6);
+    c3 = vtrn1q_u32(b3, b7);
+    c4 = vtrn2q_u32(b0, b4);
+    c5 = vtrn2q_u32(b1, b5);
+    c6 = vtrn2q_u32(b2, b6);
+    c7 = vtrn2q_u32(b3, b7);
+    c8 = vtrn1q_u32(b8, bC);
+    c9 = vtrn1q_u32(b9, bD);
+    cA = vtrn1q_u32(bA, bE);
+    cB = vtrn1q_u32(bB, bF);
+    cC = vtrn2q_u32(b8, bC);
+    cD = vtrn2q_u32(b9, bD);
+    cE = vtrn2q_u32(bA, bE);
+    cF = vtrn2q_u32(bB, bF);
+    
+    d0 = vtrn1q_u16(c0, c2);
+    d1 = vtrn1q_u16(c1, c3);
+    d2 = vtrn2q_u16(c0, c2);
+    d3 = vtrn2q_u16(c1, c3);
+    d4 = vtrn1q_u16(c4, c6);
+    d5 = vtrn1q_u16(c5, c7);
+    d6 = vtrn2q_u16(c4, c6);
+    d7 = vtrn2q_u16(c5, c7);
+    d8 = vtrn1q_u16(c8, cA);
+    d9 = vtrn1q_u16(c9, cB);
+    dA = vtrn2q_u16(c8, cA);
+    dB = vtrn2q_u16(c9, cB);
+    dC = vtrn1q_u16(cC, cE);
+    dD = vtrn1q_u16(cD, cF);
+    dE = vtrn2q_u16(cC, cE);
+    dF = vtrn2q_u16(cD, cF);
+    
+    *(uint16x8_t *)(dst + 0*dstride)  = vtrn1q_u8(d0, d1);
+    *(uint16x8_t *)(dst + 1*dstride)  = vtrn2q_u8(d0, d1);
+    *(uint16x8_t *)(dst + 2*dstride)  = vtrn1q_u8(d2, d3);
+    *(uint16x8_t *)(dst + 3*dstride)  = vtrn2q_u8(d2, d3);
+    *(uint16x8_t *)(dst + 4*dstride)  = vtrn1q_u8(d4, d5);
+    *(uint16x8_t *)(dst + 5*dstride)  = vtrn2q_u8(d4, d5);
+    *(uint16x8_t *)(dst + 6*dstride)  = vtrn1q_u8(d6, d7);
+    *(uint16x8_t *)(dst + 7*dstride)  = vtrn2q_u8(d6, d7);
+    *(uint16x8_t *)(dst + 8*dstride)  = vtrn1q_u8(d8, d9);
+    *(uint16x8_t *)(dst + 9*dstride)  = vtrn2q_u8(d8, d9);
+    *(uint16x8_t *)(dst + 10*dstride)  = vtrn1q_u8(dA, dB);
+    *(uint16x8_t *)(dst + 11*dstride)  = vtrn2q_u8(dA, dB);
+    *(uint16x8_t *)(dst + 12*dstride)  = vtrn1q_u8(dC, dD);
+    *(uint16x8_t *)(dst + 13*dstride)  = vtrn2q_u8(dC, dD);
+    *(uint16x8_t *)(dst + 14*dstride)  = vtrn1q_u8(dE, dF);
+    *(uint16x8_t *)(dst + 15*dstride)  = vtrn2q_u8(dE, dF);
+
+    
+}
+
+
+void transpose32x32(uint8_t* dst, const uint8_t* src, intptr_t dstride, intptr_t sstride)
+{
+    //assumption: there is no partial overlap
+    transpose16x16(dst,src,dstride,sstride);
+    transpose16x16(dst+16*dstride+16,src+16*sstride+16,dstride,sstride);
+    if (dst == src)
+    {
+        uint8_t tmp[16*16] __attribute__((aligned(64)));
+        transpose16x16(tmp,src + 16,16,sstride);
+        transpose16x16(dst + 16, src + 16*sstride,dstride,sstride);
+        for (int i=0;i<16;i++) COPY_16(dst+(16 + i)*dstride,tmp + 16*i);
+    }
+    else
+    {
+        transpose16x16(dst+16*dstride,src + 16,dstride,sstride);
+        transpose16x16(dst + 16, src + 16*sstride,dstride,sstride);
+    }
+    
+}
+
+
+
+void transpose8x8(uint16_t* dst, const uint16_t* src, intptr_t dstride, intptr_t sstride)
+{
+    uint16x8_t a0,a1,a2,a3,a4,a5,a6,a7;
+    uint16x8_t b0,b1,b2,b3,b4,b5,b6,b7;
+
+    a0 = *(uint16x8_t *)(src + 0*sstride);
+    a1 = *(uint16x8_t *)(src + 1*sstride);
+    a2 = *(uint16x8_t *)(src + 2*sstride);
+    a3 = *(uint16x8_t *)(src + 3*sstride);
+    a4 = *(uint16x8_t *)(src + 4*sstride);
+    a5 = *(uint16x8_t *)(src + 5*sstride);
+    a6 = *(uint16x8_t *)(src + 6*sstride);
+    a7 = *(uint16x8_t *)(src + 7*sstride);
+
+    b0 = vtrn1q_u64(a0,a4);
+    b1 = vtrn1q_u64(a1,a5);
+    b2 = vtrn1q_u64(a2,a6);
+    b3 = vtrn1q_u64(a3,a7);
+    b4 = vtrn2q_u64(a0,a4);
+    b5 = vtrn2q_u64(a1,a5);
+    b6 = vtrn2q_u64(a2,a6);
+    b7 = vtrn2q_u64(a3,a7);
+
+    a0 = vtrn1q_u32(b0,b2);
+    a1 = vtrn1q_u32(b1,b3);
+    a2 = vtrn2q_u32(b0,b2);
+    a3 = vtrn2q_u32(b1,b3);
+    a4 = vtrn1q_u32(b4,b6);
+    a5 = vtrn1q_u32(b5,b7);
+    a6 = vtrn2q_u32(b4,b6);
+    a7 = vtrn2q_u32(b5,b7);
+
+    b0 = vtrn1q_u16(a0,a1);
+    b1 = vtrn2q_u16(a0,a1);
+    b2 = vtrn1q_u16(a2,a3);
+    b3 = vtrn2q_u16(a2,a3);
+    b4 = vtrn1q_u16(a4,a5);
+    b5 = vtrn2q_u16(a4,a5);
+    b6 = vtrn1q_u16(a6,a7);
+    b7 = vtrn2q_u16(a6,a7);
+
+    *(uint16x8_t *)(dst + 0*dstride) = b0;
+    *(uint16x8_t *)(dst + 1*dstride) = b1;
+    *(uint16x8_t *)(dst + 2*dstride) = b2;
+    *(uint16x8_t *)(dst + 3*dstride) = b3;
+    *(uint16x8_t *)(dst + 4*dstride) = b4;
+    *(uint16x8_t *)(dst + 5*dstride) = b5;
+    *(uint16x8_t *)(dst + 6*dstride) = b6;
+    *(uint16x8_t *)(dst + 7*dstride) = b7;
+}
+
+void transpose16x16(uint16_t* dst, const uint16_t* src, intptr_t dstride, intptr_t sstride)
+{
+    //assumption: there is no partial overlap
+    transpose8x8(dst,src,dstride,sstride);
+    transpose8x8(dst+8*dstride+8,src+8*sstride+8,dstride,sstride);
+
+    if (dst == src)
+    {
+        uint16_t tmp[8*8];
+        transpose8x8(tmp,src + 8,8,sstride);
+        transpose8x8(dst + 8, src + 8*sstride,dstride,sstride);
+        for (int i=0;i<8;i++) COPY_16(dst+(8 + i)*dstride,tmp + 8*i);
+    }
+    else
+    {
+        transpose8x8(dst+8*dstride,src + 8,dstride,sstride);
+        transpose8x8(dst + 8, src + 8*sstride,dstride,sstride);
+    }
+    
+}
+
+
+
+void transpose32x32(uint16_t* dst, const uint16_t* src, intptr_t dstride, intptr_t sstride)
+{
+    //assumption: there is no partial overlap
+    for (int i=0;i<4;i++)
+    {
+        transpose8x8(dst+i*8*(1+dstride),src+i*8*(1+sstride),dstride,sstride);
+        for (int j=i+1;j<4;j++)
+        {
+            if (dst == src)
+            {
+                uint16_t tmp[8*8] __attribute__((aligned(64)));
+                transpose8x8(tmp,src + 8*i + 8*j*sstride,8,sstride);
+                transpose8x8(dst + 8*i + 8*j*dstride, src + 8*j + 8*i*sstride,dstride,sstride);
+                for (int k=0;k<8;k++) COPY_16(dst+ 8*j + (8*i+k)*dstride,tmp + 8*k);
+            }
+            else
+            {
+                transpose8x8(dst + 8*(j + i*dstride),src + 8*(i + j*sstride),dstride,sstride);
+                transpose8x8(dst + 8*(i + j*dstride),src + 8*(j + i*sstride),dstride,sstride);
+            }
+            
+        }
+    }
+}
+
+
+
+
+}
+
+
+
diff -Naur ./source/common/arm64/arm64-utils.h ../x265_apple_patch/source/common/arm64/arm64-utils.h
--- ./source/common/arm64/arm64-utils.h	1970-01-01 01:00:00.000000000 +0100
+++ ../x265_apple_patch/source/common/arm64/arm64-utils.h	2021-05-08 13:08:01.000000000 +0100
@@ -0,0 +1,14 @@
+#ifndef __ARM64_UTILS_H__
+#define __ARM64_UTILS_H__
+
+
+namespace X265_NS {
+void transpose8x8(uint8_t* dst, const uint8_t* src, intptr_t dstride, intptr_t sstride);
+void transpose16x16(uint8_t* dst, const uint8_t* src, intptr_t dstride, intptr_t sstride);
+void transpose32x32(uint8_t* dst, const uint8_t* src, intptr_t dstride, intptr_t sstride);
+void transpose8x8(uint16_t* dst, const uint16_t* src, intptr_t dstride, intptr_t sstride);
+void transpose16x16(uint16_t* dst, const uint16_t* src, intptr_t dstride, intptr_t sstride);
+void transpose32x32(uint16_t* dst, const uint16_t* src, intptr_t dstride, intptr_t sstride);
+}
+
+#endif
diff -Naur ./source/common/arm64/asm-primitives.cpp ../x265_apple_patch/source/common/arm64/asm-primitives.cpp
--- ./source/common/arm64/asm-primitives.cpp	1970-01-01 01:00:00.000000000 +0100
+++ ../x265_apple_patch/source/common/arm64/asm-primitives.cpp	2021-05-08 13:08:01.000000000 +0100
@@ -0,0 +1,53 @@
+/*****************************************************************************
+ * Copyright (C) 2013-2017 MulticoreWare, Inc
+ *
+ * Authors: Steve Borho <steve@borho.org>
+ *          Praveen Kumar Tiwari <praveen@multicorewareinc.com>
+ *          Min Chen <chenm003@163.com> <min.chen@multicorewareinc.com>
+ *          Dnyaneshwar Gorade <dnyaneshwar@multicorewareinc.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at license @ x265.com.
+ *****************************************************************************/
+
+#include "common.h"
+#include "primitives.h"
+#include "x265.h"
+#include "cpu.h"
+
+#include "pixel-prim.h"
+#include "filter-prim.h"
+#include "dct-prim.h"
+#include "loopfilter-prim.h"
+#include "intrapred-prim.h"
+
+namespace X265_NS {
+// private x265 namespace
+
+void setupAssemblyPrimitives(EncoderPrimitives &p, int cpuMask)
+{
+    if (cpuMask & X265_CPU_NEON)
+    {
+      setupPixelPrimitives_neon(p);
+      setupFilterPrimitives_neon(p);
+      setupDCTPrimitives_neon(p);
+      setupLoopFilterPrimitives_neon(p);
+      setupIntraPrimitives_neon(p);
+    }
+}
+
+} // namespace X265_NS
diff -Naur ./source/common/arm64/dct-prim.cpp ../x265_apple_patch/source/common/arm64/dct-prim.cpp
--- ./source/common/arm64/dct-prim.cpp	1970-01-01 01:00:00.000000000 +0100
+++ ../x265_apple_patch/source/common/arm64/dct-prim.cpp	2021-05-08 13:08:01.000000000 +0100
@@ -0,0 +1,933 @@
+#include "dct-prim.h"
+
+
+#if HAVE_NEON
+
+#include <arm_neon.h>
+
+
+namespace {
+using namespace X265_NS;
+
+
+static int16x8_t rev16(const int16x8_t a)
+{
+    static const int8x16_t tbl = {14,15,12,13,10,11,8,9,6,7,4,5,2,3,0,1};
+    return vqtbx1q_u8(a,a,tbl);
+}
+
+static int32x4_t rev32(const int32x4_t a)
+{
+    static const int8x16_t tbl = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
+    return vqtbx1q_u8(a,a,tbl);
+}
+    
+static void transpose_4x4x16(int16x4_t& x0,int16x4_t& x1,int16x4_t& x2,int16x4_t& x3)
+{
+    int16x4_t s0,s1,s2,s3;
+    s0 = vtrn1_s32(x0,x2);
+    s1 = vtrn1_s32(x1,x3);
+    s2 = vtrn2_s32(x0,x2);
+    s3 = vtrn2_s32(x1,x3);
+    
+    x0 = vtrn1_s16(s0,s1);
+    x1 = vtrn2_s16(s0,s1);
+    x2 = vtrn1_s16(s2,s3);
+    x3 = vtrn2_s16(s2,s3);
+}
+
+
+
+static int scanPosLast_opt(const uint16_t *scan, const coeff_t *coeff, uint16_t *coeffSign, uint16_t *coeffFlag, uint8_t *coeffNum, int numSig, const uint16_t* /*scanCG4x4*/, const int /*trSize*/)
+{
+
+    // This is an optimized function for scanPosLast, which removes the rmw dependency, once integrated into mainline x265, should replace reference implementation
+    // For clarity, left the original reference code in comments
+    int scanPosLast = 0;
+    
+    uint16_t cSign = 0;
+    uint16_t cFlag = 0;
+    uint8_t cNum = 0;
+    
+    uint32_t prevcgIdx = 0;
+    do
+    {
+        const uint32_t cgIdx = (uint32_t)scanPosLast >> MLS_CG_SIZE;
+
+        const uint32_t posLast = scan[scanPosLast];
+
+        const int curCoeff = coeff[posLast];
+        const uint32_t isNZCoeff = (curCoeff != 0);
+        /*
+        NOTE: the new algorithm is complicated, so I keep reference code here
+        uint32_t posy   = posLast >> log2TrSize;
+        uint32_t posx   = posLast - (posy << log2TrSize);
+        uint32_t blkIdx0 = ((posy >> MLS_CG_LOG2_SIZE) << codingParameters.log2TrSizeCG) + (posx >> MLS_CG_LOG2_SIZE);
+        const uint32_t blkIdx = ((posLast >> (2 * MLS_CG_LOG2_SIZE)) & ~maskPosXY) + ((posLast >> MLS_CG_LOG2_SIZE) & maskPosXY);
+        sigCoeffGroupFlag64 |= ((uint64_t)isNZCoeff << blkIdx);
+        */
+
+        // get L1 sig map
+        numSig -= isNZCoeff;
+
+        if (scanPosLast % (1<<MLS_CG_SIZE) == 0)
+        {
+            coeffSign[prevcgIdx] = cSign;
+            coeffFlag[prevcgIdx] = cFlag;
+            coeffNum[prevcgIdx] = cNum;
+            cSign = 0;
+            cFlag = 0;
+            cNum = 0;
+        }
+        // TODO: optimize by instruction BTS
+       cSign += (uint16_t)(((curCoeff < 0) ? 1 : 0) << cNum);
+       cFlag = (cFlag << 1) + (uint16_t)isNZCoeff;
+       cNum += (uint8_t)isNZCoeff;
+       prevcgIdx = cgIdx;
+        scanPosLast++;
+    }
+    while (numSig > 0);
+
+    coeffSign[prevcgIdx] = cSign;
+    coeffFlag[prevcgIdx] = cFlag;
+    coeffNum[prevcgIdx] = cNum;
+    return scanPosLast - 1;
+}
+
+
+#if (MLS_CG_SIZE == 4)
+template<int log2TrSize>
+static void nonPsyRdoQuant_neon(int16_t *m_resiDctCoeff, int64_t *costUncoded, int64_t *totalUncodedCost, int64_t *totalRdCost, uint32_t blkPos)
+{
+    const int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize; /* Represents scaling through forward transform */
+    const int scaleBits = SCALE_BITS - 2 * transformShift;
+    const uint32_t trSize = 1 << log2TrSize;
+
+    int64x2_t vcost_sum_0 = vdupq_n_s64(0);
+    int64x2_t vcost_sum_1 = vdupq_n_s64(0);
+    for (int y = 0; y < MLS_CG_SIZE; y++)
+    {
+      int16x4_t in = *(int16x4_t *)&m_resiDctCoeff[blkPos];
+      int32x4_t mul = vmull_s16(in,in);
+      int64x2_t cost0, cost1;
+      cost0 = vshll_n_s32(vget_low_s32(mul),scaleBits);
+      cost1 = vshll_high_n_s32(mul,scaleBits);
+      *(int64x2_t *)&costUncoded[blkPos+0] = cost0;
+      *(int64x2_t *)&costUncoded[blkPos+2] = cost1;
+      vcost_sum_0 = vaddq_s64(vcost_sum_0,cost0);
+      vcost_sum_1 = vaddq_s64(vcost_sum_1,cost1);
+      blkPos += trSize;
+    }
+    int64_t sum = vaddvq_s64(vaddq_s64(vcost_sum_0,vcost_sum_1));
+    *totalUncodedCost += sum;
+    *totalRdCost += sum;
+}
+
+template<int log2TrSize>
+static void psyRdoQuant_neon(int16_t *m_resiDctCoeff, int16_t *m_fencDctCoeff, int64_t *costUncoded, int64_t *totalUncodedCost, int64_t *totalRdCost, int64_t *psyScale, uint32_t blkPos)
+{
+    const int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize; /* Represents scaling through forward transform */
+    const int scaleBits = SCALE_BITS - 2 * transformShift;
+    const uint32_t trSize = 1 << log2TrSize;
+    //using preprocessor to bypass clang bug
+    const int max = X265_MAX(0, (2 * transformShift + 1));
+
+    int64x2_t vcost_sum_0 = vdupq_n_s64(0);
+    int64x2_t vcost_sum_1 = vdupq_n_s64(0);
+    int32x4_t vpsy = vdupq_n_s32(*psyScale);
+    for (int y = 0; y < MLS_CG_SIZE; y++)
+    {
+      int32x4_t signCoef = vmovl_s16(*(int16x4_t *)&m_resiDctCoeff[blkPos]);
+      int32x4_t predictedCoef = vsubq_s32(vmovl_s16(*(int16x4_t *)&m_fencDctCoeff[blkPos]),signCoef);
+      int64x2_t cost0, cost1;
+      cost0 = vmull_s32(vget_low_s32(signCoef),vget_low_s32(signCoef));
+      cost1 = vmull_high_s32(signCoef,signCoef);
+      cost0 = vshlq_n_s64(cost0,scaleBits);
+      cost1 = vshlq_n_s64(cost1,scaleBits);
+      int64x2_t neg0 = vmull_s32(vget_low_s32(predictedCoef),vget_low_s32(vpsy));
+      int64x2_t neg1 = vmull_high_s32(predictedCoef,vpsy);
+      if (max > 0) {
+        int64x2_t shift = vdupq_n_s64(-max);
+        neg0 = vshlq_s64(neg0,shift);
+        neg1 = vshlq_s64(neg1,shift);
+      }
+      cost0 = vsubq_s64(cost0,neg0);
+      cost1 = vsubq_s64(cost1,neg1);
+      *(int64x2_t *)&costUncoded[blkPos+0] = cost0;
+      *(int64x2_t *)&costUncoded[blkPos+2] = cost1;
+      vcost_sum_0 = vaddq_s64(vcost_sum_0,cost0);
+      vcost_sum_1 = vaddq_s64(vcost_sum_1,cost1);
+     
+        blkPos += trSize;
+    }
+  int64_t sum = vaddvq_s64(vaddq_s64(vcost_sum_0,vcost_sum_1));
+  *totalUncodedCost += sum;
+  *totalRdCost += sum;
+}
+
+#else
+      #error "MLS_CG_SIZE must be 4 for neon version"
+#endif
+
+
+
+template<int trSize>
+int  count_nonzero_neon(const int16_t* quantCoeff)
+{
+  X265_CHECK(((intptr_t)quantCoeff & 15) == 0, "quant buffer not aligned\n");
+  int count = 0;
+  int16x8_t vcount = vdupq_n_s16(0);
+  const int numCoeff = trSize * trSize;
+  int i = 0;
+  for (; (i + 8) <= numCoeff; i+=8)
+  {
+    int16x8_t in = *(int16x8_t*)&quantCoeff[i];
+    vcount = vaddq_s16(vcount,vtstq_s16(in,in));
+  }
+  for (; i < numCoeff; i++)
+  {
+      count += quantCoeff[i] != 0;
+  }
+
+  return count - vaddvq_s16(vcount);
+}
+
+template<int trSize>
+uint32_t copy_count_neon(int16_t* coeff, const int16_t* residual, intptr_t resiStride)
+{
+  uint32_t numSig = 0;
+  int16x8_t vcount = vdupq_n_s16(0);
+  for (int k = 0; k < trSize; k++)
+  {
+    int j = 0;
+    for (; (j + 8) <= trSize; j+=8)
+    {
+      int16x8_t in = *(int16x8_t*)&residual[j];
+      *(int16x8_t*)&coeff[j] = in;
+      vcount = vaddq_s16(vcount,vtstq_s16(in,in));
+    }
+    for (; j < trSize; j++)
+    {
+      coeff[j] = residual[j];
+      numSig += (residual[j] != 0);
+    }
+    residual += resiStride;
+    coeff += trSize;
+  }
+
+  return numSig - vaddvq_s16(vcount);
+}
+
+
+static void partialButterfly16(const int16_t* src, int16_t* dst, int shift, int line)
+{
+    int j, k;
+    int32x4_t E[2], O[2];
+    int32x4_t EE, EO;
+    int32x2_t EEE, EEO;
+    const int add = 1 << (shift - 1);
+    const int32x4_t _vadd = {add,0};
+    
+    for (j = 0; j < line; j++)
+    {
+        int16x8_t in0 = *(int16x8_t *)src;
+        int16x8_t in1 = rev16(*(int16x8_t *)&src[8]);
+        
+        E[0] = vaddl_s16(vget_low_s16(in0),vget_low_s16(in1));
+        O[0] = vsubl_s16(vget_low_s16(in0),vget_low_s16(in1));
+        E[1] = vaddl_high_s16(in0,in1);
+        O[1] = vsubl_high_s16(in0,in1);
+
+        for (k = 1; k < 16; k += 2)
+        {
+            int32x4_t c0 = vmovl_s16(*(int16x4_t *)&g_t16[k][0]);
+            int32x4_t c1 = vmovl_s16(*(int16x4_t *)&g_t16[k][4]);
+            
+            int32x4_t res = _vadd;
+            res = vmlaq_s32(res,c0,O[0]);
+            res = vmlaq_s32(res,c1,O[1]);
+            dst[k * line] = (int16_t)(vaddvq_s32(res) >> shift);
+        }
+
+        /* EE and EO */
+        EE = vaddq_s32(E[0],rev32(E[1]));
+        EO = vsubq_s32(E[0],rev32(E[1]));
+        
+        for (k = 2; k < 16; k += 4)
+        {
+            int32x4_t c0 = vmovl_s16(*(int16x4_t *)&g_t16[k][0]);
+            int32x4_t res = _vadd;
+            res = vmlaq_s32(res,c0,EO);
+            dst[k * line] = (int16_t)(vaddvq_s32(res) >> shift);
+        }
+
+        /* EEE and EEO */
+        EEE[0] = EE[0] + EE[3];
+        EEO[0] = EE[0] - EE[3];
+        EEE[1] = EE[1] + EE[2];
+        EEO[1] = EE[1] - EE[2];
+        
+        dst[0] = (int16_t)((g_t16[0][0] * EEE[0] + g_t16[0][1] * EEE[1] + add) >> shift);
+        dst[8 * line] = (int16_t)((g_t16[8][0] * EEE[0] + g_t16[8][1] * EEE[1] + add) >> shift);
+        dst[4 * line] = (int16_t)((g_t16[4][0] * EEO[0] + g_t16[4][1] * EEO[1] + add) >> shift);
+        dst[12 * line] = (int16_t)((g_t16[12][0] * EEO[0] + g_t16[12][1] * EEO[1] + add) >> shift);
+        
+  
+        src += 16;
+        dst++;
+    }
+}
+
+    
+static void partialButterfly32(const int16_t* src, int16_t* dst, int shift, int line)
+{
+    int j, k;
+    const int add = 1 << (shift - 1);
+    
+    
+    for (j = 0; j < line; j++)
+    {
+        int32x4_t VE[4], VO0,VO1,VO2,VO3;
+        int32x4_t VEE[2], VEO[2];
+        int32x4_t VEEE, VEEO;
+        int EEEE[2], EEEO[2];
+
+        int16x8x4_t inputs;
+        inputs = *(int16x8x4_t *)&src[0];
+        int16x8x4_t in_rev;
+        
+        in_rev.val[1] = rev16(inputs.val[2]);
+        in_rev.val[0] = rev16(inputs.val[3]);
+        
+        VE[0] = vaddl_s16(vget_low_s16(inputs.val[0]),vget_low_s16(in_rev.val[0]));
+        VE[1] = vaddl_high_s16(inputs.val[0],in_rev.val[0]);
+        VO0 = vsubl_s16(vget_low_s16(inputs.val[0]),vget_low_s16(in_rev.val[0]));
+        VO1 = vsubl_high_s16(inputs.val[0],in_rev.val[0]);
+        VE[2] = vaddl_s16(vget_low_s16(inputs.val[1]),vget_low_s16(in_rev.val[1]));
+        VE[3] = vaddl_high_s16(inputs.val[1],in_rev.val[1]);
+        VO2 = vsubl_s16(vget_low_s16(inputs.val[1]),vget_low_s16(in_rev.val[1]));
+        VO3 = vsubl_high_s16(inputs.val[1],in_rev.val[1]);
+
+        for (k = 1; k < 32; k += 2)
+        {
+            int32x4_t c0 = vmovl_s16(*(int16x4_t *)&g_t32[k][0]);
+            int32x4_t c1 = vmovl_s16(*(int16x4_t *)&g_t32[k][4]);
+            int32x4_t c2 = vmovl_s16(*(int16x4_t *)&g_t32[k][8]);
+            int32x4_t c3 = vmovl_s16(*(int16x4_t *)&g_t32[k][12]);
+            int32x4_t s = vmulq_s32(c0,VO0);
+            s = vmlaq_s32(s,c1,VO1);
+            s = vmlaq_s32(s,c2,VO2);
+            s = vmlaq_s32(s,c3,VO3);
+            
+            dst[k * line] = (int16_t)((vaddvq_s32(s) + add) >> shift);
+            
+        }
+        
+        int32x4_t rev_VE[2];
+        
+        
+        rev_VE[0] = rev32(VE[3]);
+        rev_VE[1] = rev32(VE[2]);
+        
+        /* EE and EO */
+        for (k = 0; k < 2; k++)
+        {
+            VEE[k] = vaddq_s32(VE[k],rev_VE[k]);
+            VEO[k] = vsubq_s32(VE[k],rev_VE[k]);
+        }
+        for (k = 2; k < 32; k += 4)
+        {
+            int32x4_t c0 = vmovl_s16(*(int16x4_t *)&g_t32[k][0]);
+            int32x4_t c1 = vmovl_s16(*(int16x4_t *)&g_t32[k][4]);
+            int32x4_t s = vmulq_s32(c0,VEO[0]);
+            s = vmlaq_s32(s,c1,VEO[1]);
+            
+            dst[k * line] = (int16_t)((vaddvq_s32(s) + add) >> shift);
+            
+        }
+        
+        int32x4_t tmp = rev32(VEE[1]);
+        VEEE = vaddq_s32(VEE[0],tmp);
+        VEEO = vsubq_s32(VEE[0],tmp);
+        for (k = 4; k < 32; k += 8)
+        {
+            int32x4_t c = vmovl_s16(*(int16x4_t *)&g_t32[k][0]);
+            int32x4_t s = vmulq_s32(c,VEEO);
+            
+            dst[k * line] = (int16_t)((vaddvq_s32(s) + add) >> shift);
+        }
+        
+        /* EEEE and EEEO */
+        EEEE[0] = VEEE[0] + VEEE[3];
+        EEEO[0] = VEEE[0] - VEEE[3];
+        EEEE[1] = VEEE[1] + VEEE[2];
+        EEEO[1] = VEEE[1] - VEEE[2];
+        
+        dst[0] = (int16_t)((g_t32[0][0] * EEEE[0] + g_t32[0][1] * EEEE[1] + add) >> shift);
+        dst[16 * line] = (int16_t)((g_t32[16][0] * EEEE[0] + g_t32[16][1] * EEEE[1] + add) >> shift);
+        dst[8 * line] = (int16_t)((g_t32[8][0] * EEEO[0] + g_t32[8][1] * EEEO[1] + add) >> shift);
+        dst[24 * line] = (int16_t)((g_t32[24][0] * EEEO[0] + g_t32[24][1] * EEEO[1] + add) >> shift);
+        
+        
+        
+        src += 32;
+        dst++;
+    }
+}
+
+static void partialButterfly8(const int16_t* src, int16_t* dst, int shift, int line)
+{
+    int j, k;
+    int E[4], O[4];
+    int EE[2], EO[2];
+    int add = 1 << (shift - 1);
+
+    for (j = 0; j < line; j++)
+    {
+        /* E and O*/
+        for (k = 0; k < 4; k++)
+        {
+            E[k] = src[k] + src[7 - k];
+            O[k] = src[k] - src[7 - k];
+        }
+
+        /* EE and EO */
+        EE[0] = E[0] + E[3];
+        EO[0] = E[0] - E[3];
+        EE[1] = E[1] + E[2];
+        EO[1] = E[1] - E[2];
+
+        dst[0] = (int16_t)((g_t8[0][0] * EE[0] + g_t8[0][1] * EE[1] + add) >> shift);
+        dst[4 * line] = (int16_t)((g_t8[4][0] * EE[0] + g_t8[4][1] * EE[1] + add) >> shift);
+        dst[2 * line] = (int16_t)((g_t8[2][0] * EO[0] + g_t8[2][1] * EO[1] + add) >> shift);
+        dst[6 * line] = (int16_t)((g_t8[6][0] * EO[0] + g_t8[6][1] * EO[1] + add) >> shift);
+
+        dst[line] = (int16_t)((g_t8[1][0] * O[0] + g_t8[1][1] * O[1] + g_t8[1][2] * O[2] + g_t8[1][3] * O[3] + add) >> shift);
+        dst[3 * line] = (int16_t)((g_t8[3][0] * O[0] + g_t8[3][1] * O[1] + g_t8[3][2] * O[2] + g_t8[3][3] * O[3] + add) >> shift);
+        dst[5 * line] = (int16_t)((g_t8[5][0] * O[0] + g_t8[5][1] * O[1] + g_t8[5][2] * O[2] + g_t8[5][3] * O[3] + add) >> shift);
+        dst[7 * line] = (int16_t)((g_t8[7][0] * O[0] + g_t8[7][1] * O[1] + g_t8[7][2] * O[2] + g_t8[7][3] * O[3] + add) >> shift);
+
+        src += 8;
+        dst++;
+    }
+}
+
+static void partialButterflyInverse4(const int16_t* src, int16_t* dst, int shift, int line)
+{
+    int j;
+    int E[2], O[2];
+    int add = 1 << (shift - 1);
+
+    for (j = 0; j < line; j++)
+    {
+        /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
+        O[0] = g_t4[1][0] * src[line] + g_t4[3][0] * src[3 * line];
+        O[1] = g_t4[1][1] * src[line] + g_t4[3][1] * src[3 * line];
+        E[0] = g_t4[0][0] * src[0] + g_t4[2][0] * src[2 * line];
+        E[1] = g_t4[0][1] * src[0] + g_t4[2][1] * src[2 * line];
+
+        /* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
+        dst[0] = (int16_t)(x265_clip3(-32768, 32767, (E[0] + O[0] + add) >> shift));
+        dst[1] = (int16_t)(x265_clip3(-32768, 32767, (E[1] + O[1] + add) >> shift));
+        dst[2] = (int16_t)(x265_clip3(-32768, 32767, (E[1] - O[1] + add) >> shift));
+        dst[3] = (int16_t)(x265_clip3(-32768, 32767, (E[0] - O[0] + add) >> shift));
+
+        src++;
+        dst += 4;
+    }
+}
+
+ 
+    
+static void partialButterflyInverse16_neon(const int16_t* src, int16_t* orig_dst, int shift, int line)
+{
+#define FMAK(x,l) s[l] = vmlal_lane_s16(s[l],*(int16x4_t*)&src[(x)*line],*(int16x4_t *)&g_t16[x][k],l)
+#define MULK(x,l) vmull_lane_s16(*(int16x4_t*)&src[x*line],*(int16x4_t *)&g_t16[x][k],l);
+#define ODD3_15(k) FMAK(3,k);FMAK(5,k);FMAK(7,k);FMAK(9,k);FMAK(11,k);FMAK(13,k);FMAK(15,k);
+#define EVEN6_14_STEP4(k) FMAK(6,k);FMAK(10,k);FMAK(14,k);
+
+
+    int j, k;
+    int32x4_t E[8], O[8];
+    int32x4_t EE[4], EO[4];
+    int32x4_t EEE[2], EEO[2];
+    const int add = 1 << (shift - 1);
+
+    
+#pragma unroll(4)
+    for (j = 0; j < line; j+=4)
+    {
+        /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
+        
+#pragma unroll(2)
+        for (k=0;k<2;k++) {
+            int32x4_t s;
+            s = vmull_s16(vdup_n_s16(g_t16[4][k]),*(int16x4_t*)&src[4*line]);;
+            EEO[k] = vmlal_s16(s,vdup_n_s16(g_t16[12][k]),*(int16x4_t*)&src[(12)*line]);
+            s = vmull_s16(vdup_n_s16(g_t16[0][k]),*(int16x4_t*)&src[0*line]);;
+            EEE[k] = vmlal_s16(s,vdup_n_s16(g_t16[8][k]),*(int16x4_t*)&src[(8)*line]);
+        }
+        
+        /* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
+        EE[0] = vaddq_s32(EEE[0] , EEO[0]);
+        EE[2] = vsubq_s32(EEE[1] , EEO[1]);
+        EE[1] = vaddq_s32(EEE[1] , EEO[1]);
+        EE[3] = vsubq_s32(EEE[0] , EEO[0]);
+
+
+#pragma unroll(1)
+        for (k = 0; k < 4; k+=4)
+        {
+            int32x4_t s[4];
+            s[0] = MULK(2,0);
+            s[1] = MULK(2,1);
+            s[2] = MULK(2,2);
+            s[3] = MULK(2,3);
+            
+            EVEN6_14_STEP4(0);
+            EVEN6_14_STEP4(1);
+            EVEN6_14_STEP4(2);
+            EVEN6_14_STEP4(3);
+            
+            EO[k] = s[0];
+            EO[k+1] = s[1];
+            EO[k+2] = s[2];
+            EO[k+3] = s[3];
+         }
+        
+
+        
+        static const int32x4_t min = vdupq_n_s32(-32768);
+        static const int32x4_t max = vdupq_n_s32(32767);
+        const int32x4_t minus_shift = vdupq_n_s32(-shift);
+
+#pragma unroll(4)
+        for (k = 0; k < 4; k++)
+        {
+            E[k] = vaddq_s32(EE[k] , EO[k]);
+            E[k + 4] = vsubq_s32(EE[3 - k] , EO[3 - k]);
+        }
+        
+#pragma unroll(2)
+        for (k = 0; k < 8; k+=4)
+        {
+            int32x4_t s[4];
+            s[0] = MULK(1,0);
+            s[1] = MULK(1,1);
+            s[2] = MULK(1,2);
+            s[3] = MULK(1,3);
+            ODD3_15(0);
+            ODD3_15(1);
+            ODD3_15(2);
+            ODD3_15(3);
+            O[k] = s[0];
+            O[k+1] = s[1];
+            O[k+2] = s[2];
+            O[k+3] = s[3];
+            int32x4_t t;
+            int16x4_t x0,x1,x2,x3;
+            
+            E[k] = vaddq_s32(vdupq_n_s32(add),E[k]);
+            t = vaddq_s32(E[k],O[k]);
+            t = vshlq_s32(t,minus_shift);
+            t = vmaxq_s32(t,min);
+            t = vminq_s32(t,max);
+            x0 = vmovn_s32(t);
+
+            E[k+1] = vaddq_s32(vdupq_n_s32(add),E[k+1]);
+            t = vaddq_s32(E[k+1],O[k+1]);
+            t = vshlq_s32(t,minus_shift);
+            t = vmaxq_s32(t,min);
+            t = vminq_s32(t,max);
+            x1 = vmovn_s32(t);
+
+            E[k+2] = vaddq_s32(vdupq_n_s32(add),E[k+2]);
+            t = vaddq_s32(E[k+2],O[k+2]);
+            t = vshlq_s32(t,minus_shift);
+            t = vmaxq_s32(t,min);
+            t = vminq_s32(t,max);
+            x2 = vmovn_s32(t);
+
+            E[k+3] = vaddq_s32(vdupq_n_s32(add),E[k+3]);
+            t = vaddq_s32(E[k+3],O[k+3]);
+            t = vshlq_s32(t,minus_shift);
+            t = vmaxq_s32(t,min);
+            t = vminq_s32(t,max);
+            x3 = vmovn_s32(t);
+            
+            transpose_4x4x16(x0,x1,x2,x3);
+            *(int16x4_t*)&orig_dst[0*16+k] = x0;
+            *(int16x4_t*)&orig_dst[1*16+k] = x1;
+            *(int16x4_t*)&orig_dst[2*16+k] = x2;
+            *(int16x4_t*)&orig_dst[3*16+k] = x3;
+        }
+
+
+#pragma unroll(2)
+        for (k = 0; k < 8; k+=4)
+        {
+            int32x4_t t;
+            int16x4_t x0,x1,x2,x3;
+            
+            t = vsubq_s32(E[7-k],O[7-k]);
+            t = vshlq_s32(t,minus_shift);
+            t = vmaxq_s32(t,min);
+            t = vminq_s32(t,max);
+            x0 = vmovn_s32(t);
+            
+            t = vsubq_s32(E[6-k],O[6-k]);
+            t = vshlq_s32(t,minus_shift);
+            t = vmaxq_s32(t,min);
+            t = vminq_s32(t,max);
+            x1 = vmovn_s32(t);
+            
+            t = vsubq_s32(E[5-k],O[5-k]);
+ 
+            t = vshlq_s32(t,minus_shift);
+            t = vmaxq_s32(t,min);
+            t = vminq_s32(t,max);
+            x2 = vmovn_s32(t);
+            
+            t = vsubq_s32(E[4-k],O[4-k]);
+            t = vshlq_s32(t,minus_shift);
+            t = vmaxq_s32(t,min);
+            t = vminq_s32(t,max);
+            x3 = vmovn_s32(t);
+            
+            transpose_4x4x16(x0,x1,x2,x3);
+            *(int16x4_t*)&orig_dst[0*16+k+8] = x0;
+            *(int16x4_t*)&orig_dst[1*16+k+8] = x1;
+            *(int16x4_t*)&orig_dst[2*16+k+8] = x2;
+            *(int16x4_t*)&orig_dst[3*16+k+8] = x3;
+        }
+        orig_dst += 4*16;
+        src+=4;
+    }
+    
+#undef MUL
+#undef FMA
+#undef FMAK
+#undef MULK
+#undef ODD3_15
+#undef EVEN6_14_STEP4
+
+    
+}
+
+
+
+static void partialButterflyInverse32_neon(const int16_t* src, int16_t* orig_dst, int shift, int line)
+{
+#define MUL(x) vmull_s16(vdup_n_s16(g_t32[x][k]),*(int16x4_t*)&src[x*line]);
+#define FMA(x) s = vmlal_s16(s,vdup_n_s16(g_t32[x][k]),*(int16x4_t*)&src[(x)*line])
+#define FMAK(x,l) s[l] = vmlal_lane_s16(s[l],*(int16x4_t*)&src[(x)*line],*(int16x4_t *)&g_t32[x][k],l)
+#define MULK(x,l) vmull_lane_s16(*(int16x4_t*)&src[x*line],*(int16x4_t *)&g_t32[x][k],l);
+#define ODD31(k) FMAK(3,k);FMAK(5,k);FMAK(7,k);FMAK(9,k);FMAK(11,k);FMAK(13,k);FMAK(15,k);FMAK(17,k);FMAK(19,k);FMAK(21,k);FMAK(23,k);FMAK(25,k);FMAK(27,k);FMAK(29,k);FMAK(31,k);
+
+#define ODD15(k) FMAK(6,k);FMAK(10,k);FMAK(14,k);FMAK(18,k);FMAK(22,k);FMAK(26,k);FMAK(30,k);
+#define ODD7(k) FMAK(12,k);FMAK(20,k);FMAK(28,k);
+
+
+    int j, k;
+    int32x4_t E[16], O[16];
+    int32x4_t EE[8], EO[8];
+    int32x4_t EEE[4], EEO[4];
+    int32x4_t EEEE[2], EEEO[2];
+    int16x4_t dst[32];
+    int add = 1 << (shift - 1);
+    
+#pragma unroll (8)
+    for (j = 0; j < line; j+=4)
+    {
+#pragma unroll (4)
+        for (k = 0; k < 16; k+=4)
+        {
+            int32x4_t s[4];
+            s[0] = MULK(1,0);
+            s[1] = MULK(1,1);
+            s[2] = MULK(1,2);
+            s[3] = MULK(1,3);
+            ODD31(0);
+            ODD31(1);
+            ODD31(2);
+            ODD31(3);
+            O[k] = s[0];
+            O[k+1] = s[1];
+            O[k+2] = s[2];
+            O[k+3] = s[3];
+            
+            
+        }
+        
+        
+#pragma unroll (2)
+        for (k = 0; k < 8; k+=4)
+        {
+            int32x4_t s[4];
+            s[0] = MULK(2,0);
+            s[1] = MULK(2,1);
+            s[2] = MULK(2,2);
+            s[3] = MULK(2,3);
+            
+            ODD15(0);
+            ODD15(1);
+            ODD15(2);
+            ODD15(3);
+            
+            EO[k] = s[0];
+            EO[k+1] = s[1];
+            EO[k+2] = s[2];
+            EO[k+3] = s[3];
+        }
+        
+        
+        for (k = 0; k < 4; k+=4)
+        {
+            int32x4_t s[4];
+            s[0] = MULK(4,0);
+            s[1] = MULK(4,1);
+            s[2] = MULK(4,2);
+            s[3] = MULK(4,3);
+            
+            ODD7(0);
+            ODD7(1);
+            ODD7(2);
+            ODD7(3);
+            
+            EEO[k] = s[0];
+            EEO[k+1] = s[1];
+            EEO[k+2] = s[2];
+            EEO[k+3] = s[3];
+        }
+        
+#pragma unroll (2)
+        for (k=0;k<2;k++) {
+            int32x4_t s;
+            s = MUL(8);
+            EEEO[k] = FMA(24);
+            s = MUL(0);
+            EEEE[k] = FMA(16);
+        }
+        /* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
+        EEE[0] = vaddq_s32(EEEE[0],EEEO[0]);
+        EEE[3] = vsubq_s32(EEEE[0],EEEO[0]);
+        EEE[1] = vaddq_s32(EEEE[1],EEEO[1]);
+        EEE[2] = vsubq_s32(EEEE[1],EEEO[1]);
+        
+#pragma unroll (4)
+        for (k = 0; k < 4; k++)
+        {
+            EE[k] = vaddq_s32(EEE[k],EEO[k]);
+            EE[k + 4] = vsubq_s32((EEE[3 - k]), (EEO[3 - k]));
+        }
+        
+#pragma unroll (8)
+        for (k = 0; k < 8; k++)
+        {
+            E[k] = vaddq_s32(EE[k],EO[k]);
+            E[k + 8] = vsubq_s32((EE[7 - k]),(EO[7 - k]));
+        }
+        
+        static const int32x4_t min = vdupq_n_s32(-32768);
+        static const int32x4_t max = vdupq_n_s32(32767);
+        
+        
+        
+#pragma unroll (16)
+        for (k = 0; k < 16; k++)
+        {
+            int32x4_t adde = vaddq_s32(vdupq_n_s32(add),E[k]);
+            int32x4_t s = vaddq_s32(adde,O[k]);
+            s = vshlq_s32(s,vdupq_n_s32(-shift));
+            s = vmaxq_s32(s,min);
+            s = vminq_s32(s,max);
+            
+            
+            
+            dst[k] = vmovn_s32(s);
+            adde = vaddq_s32(vdupq_n_s32(add),(E[15-k]));
+            s  =vsubq_s32(adde,(O[15-k]));
+            s = vshlq_s32(s,vdupq_n_s32(-shift));
+            s = vmaxq_s32(s,min);
+            s = vminq_s32(s,max);
+            
+            dst[k+16] = vmovn_s32(s);
+        }
+        
+
+#pragma unroll (8)
+        for (k = 0; k < 32; k+=4)
+        {
+            int16x4_t x0 = dst[k+0];
+            int16x4_t x1 = dst[k+1];
+            int16x4_t x2 = dst[k+2];
+            int16x4_t x3 = dst[k+3];
+            transpose_4x4x16(x0,x1,x2,x3);
+            *(int16x4_t*)&orig_dst[0*32+k] = x0;
+            *(int16x4_t*)&orig_dst[1*32+k] = x1;
+            *(int16x4_t*)&orig_dst[2*32+k] = x2;
+            *(int16x4_t*)&orig_dst[3*32+k] = x3;
+        }
+        orig_dst += 4*32;
+        src += 4;
+    }
+#undef MUL
+#undef FMA
+#undef FMAK
+#undef MULK
+#undef ODD31
+#undef ODD15
+#undef ODD7
+
+}
+
+
+static void dct8_neon(const int16_t* src, int16_t* dst, intptr_t srcStride)
+{
+    const int shift_1st = 2 + X265_DEPTH - 8;
+    const int shift_2nd = 9;
+
+    ALIGN_VAR_32(int16_t, coef[8 * 8]);
+    ALIGN_VAR_32(int16_t, block[8 * 8]);
+
+    for (int i = 0; i < 8; i++)
+    {
+        memcpy(&block[i * 8], &src[i * srcStride], 8 * sizeof(int16_t));
+    }
+
+    partialButterfly8(block, coef, shift_1st, 8);
+    partialButterfly8(coef, dst, shift_2nd, 8);
+}
+
+static void dct16_neon(const int16_t* src, int16_t* dst, intptr_t srcStride)
+{
+    const int shift_1st = 3 + X265_DEPTH - 8;
+    const int shift_2nd = 10;
+
+    ALIGN_VAR_32(int16_t, coef[16 * 16]);
+    ALIGN_VAR_32(int16_t, block[16 * 16]);
+
+    for (int i = 0; i < 16; i++)
+    {
+        memcpy(&block[i * 16], &src[i * srcStride], 16 * sizeof(int16_t));
+    }
+
+    partialButterfly16(block, coef, shift_1st, 16);
+    partialButterfly16(coef, dst, shift_2nd, 16);
+}
+
+static void dct32_neon(const int16_t* src, int16_t* dst, intptr_t srcStride)
+{
+    const int shift_1st = 4 + X265_DEPTH - 8;
+    const int shift_2nd = 11;
+
+    ALIGN_VAR_32(int16_t, coef[32 * 32]);
+    ALIGN_VAR_32(int16_t, block[32 * 32]);
+
+    for (int i = 0; i < 32; i++)
+    {
+        memcpy(&block[i * 32], &src[i * srcStride], 32 * sizeof(int16_t));
+    }
+
+    partialButterfly32(block, coef, shift_1st, 32);
+    partialButterfly32(coef, dst, shift_2nd, 32);
+}
+
+static void idct4_neon(const int16_t* src, int16_t* dst, intptr_t dstStride)
+{
+    const int shift_1st = 7;
+    const int shift_2nd = 12 - (X265_DEPTH - 8);
+
+    ALIGN_VAR_32(int16_t, coef[4 * 4]);
+    ALIGN_VAR_32(int16_t, block[4 * 4]);
+
+    partialButterflyInverse4(src, coef, shift_1st, 4); // Forward DST BY FAST ALGORITHM, block input, coef output
+    partialButterflyInverse4(coef, block, shift_2nd, 4); // Forward DST BY FAST ALGORITHM, coef input, coeff output
+
+    for (int i = 0; i < 4; i++)
+    {
+        memcpy(&dst[i * dstStride], &block[i * 4], 4 * sizeof(int16_t));
+    }
+}
+
+static void idct16_neon(const int16_t* src, int16_t* dst, intptr_t dstStride)
+{
+    const int shift_1st = 7;
+    const int shift_2nd = 12 - (X265_DEPTH - 8);
+
+    ALIGN_VAR_32(int16_t, coef[16 * 16]);
+    ALIGN_VAR_32(int16_t, block[16 * 16]);
+
+    partialButterflyInverse16_neon(src, coef, shift_1st, 16);
+    partialButterflyInverse16_neon(coef, block, shift_2nd, 16);
+
+    for (int i = 0; i < 16; i++)
+    {
+        memcpy(&dst[i * dstStride], &block[i * 16], 16 * sizeof(int16_t));
+    }
+}
+
+static void idct32_neon(const int16_t* src, int16_t* dst, intptr_t dstStride)
+{
+    const int shift_1st = 7;
+    const int shift_2nd = 12 - (X265_DEPTH - 8);
+
+    ALIGN_VAR_32(int16_t, coef[32 * 32]);
+    ALIGN_VAR_32(int16_t, block[32 * 32]);
+
+    partialButterflyInverse32_neon(src, coef, shift_1st, 32);
+    partialButterflyInverse32_neon(coef, block, shift_2nd, 32);
+
+    for (int i = 0; i < 32; i++)
+    {
+        memcpy(&dst[i * dstStride], &block[i * 32], 32 * sizeof(int16_t));
+    }
+}
+
+
+
+}
+
+namespace X265_NS {
+// x265 private namespace
+void setupDCTPrimitives_neon(EncoderPrimitives& p) {
+    p.cu[BLOCK_4x4].nonPsyRdoQuant   = nonPsyRdoQuant_neon<2>;
+    p.cu[BLOCK_8x8].nonPsyRdoQuant   = nonPsyRdoQuant_neon<3>;
+    p.cu[BLOCK_16x16].nonPsyRdoQuant = nonPsyRdoQuant_neon<4>;
+    p.cu[BLOCK_32x32].nonPsyRdoQuant = nonPsyRdoQuant_neon<5>;
+    p.cu[BLOCK_4x4].psyRdoQuant = psyRdoQuant_neon<2>;
+    p.cu[BLOCK_8x8].psyRdoQuant = psyRdoQuant_neon<3>;
+    p.cu[BLOCK_16x16].psyRdoQuant = psyRdoQuant_neon<4>;
+    p.cu[BLOCK_32x32].psyRdoQuant = psyRdoQuant_neon<5>;
+    p.cu[BLOCK_8x8].dct   = dct8_neon;
+    p.cu[BLOCK_16x16].dct = dct16_neon;
+    p.cu[BLOCK_32x32].dct = dct32_neon;
+    p.cu[BLOCK_4x4].idct   = idct4_neon;
+    p.cu[BLOCK_16x16].idct = idct16_neon;
+    p.cu[BLOCK_32x32].idct = idct32_neon;
+    p.cu[BLOCK_4x4].count_nonzero = count_nonzero_neon<4>;
+    p.cu[BLOCK_8x8].count_nonzero = count_nonzero_neon<8>;
+    p.cu[BLOCK_16x16].count_nonzero = count_nonzero_neon<16>;
+    p.cu[BLOCK_32x32].count_nonzero = count_nonzero_neon<32>;
+
+    p.cu[BLOCK_4x4].copy_cnt   = copy_count_neon<4>;
+    p.cu[BLOCK_8x8].copy_cnt   = copy_count_neon<8>;
+    p.cu[BLOCK_16x16].copy_cnt = copy_count_neon<16>;
+    p.cu[BLOCK_32x32].copy_cnt = copy_count_neon<32>;
+    p.cu[BLOCK_4x4].psyRdoQuant_1p = nonPsyRdoQuant_neon<2>;
+    p.cu[BLOCK_4x4].psyRdoQuant_2p = psyRdoQuant_neon<2>;
+    p.cu[BLOCK_8x8].psyRdoQuant_1p = nonPsyRdoQuant_neon<3>;
+    p.cu[BLOCK_8x8].psyRdoQuant_2p = psyRdoQuant_neon<3>;
+    p.cu[BLOCK_16x16].psyRdoQuant_1p = nonPsyRdoQuant_neon<4>;
+    p.cu[BLOCK_16x16].psyRdoQuant_2p = psyRdoQuant_neon<4>;
+    p.cu[BLOCK_32x32].psyRdoQuant_1p = nonPsyRdoQuant_neon<5>;
+    p.cu[BLOCK_32x32].psyRdoQuant_2p = psyRdoQuant_neon<5>;
+    
+    p.scanPosLast  =scanPosLast_opt;
+
+}
+};
+
+
+
+#endif
diff -Naur ./source/common/arm64/dct-prim.h ../x265_apple_patch/source/common/arm64/dct-prim.h
--- ./source/common/arm64/dct-prim.h	1970-01-01 01:00:00.000000000 +0100
+++ ../x265_apple_patch/source/common/arm64/dct-prim.h	2021-05-08 13:08:01.000000000 +0100
@@ -0,0 +1,18 @@
+#ifndef __DCT_PRIM_NEON_H__
+#define __DCT_PRIM_NEON_H__
+
+
+#include "common.h"
+#include "primitives.h"
+#include "contexts.h"   // costCoeffNxN_c
+#include "threading.h"  // CLZ
+
+namespace X265_NS {
+// x265 private namespace
+void setupDCTPrimitives_neon(EncoderPrimitives& p);
+};
+
+
+
+#endif
+
diff -Naur ./source/common/arm64/filter-prim.cpp ../x265_apple_patch/source/common/arm64/filter-prim.cpp
--- ./source/common/arm64/filter-prim.cpp	1970-01-01 01:00:00.000000000 +0100
+++ ../x265_apple_patch/source/common/arm64/filter-prim.cpp	2021-05-08 13:08:01.000000000 +0100
@@ -0,0 +1,797 @@
+
+#if HAVE_NEON
+
+#include "filter-prim.h"
+#include <arm_neon.h>
+
+namespace {
+
+using namespace X265_NS;
+
+
+template<int width, int height>
+void filterPixelToShort_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride)
+{
+    const int shift = IF_INTERNAL_PREC - X265_DEPTH;
+    int row, col;
+    const int16x8_t off = vdupq_n_s16(IF_INTERNAL_OFFS);
+    for (row = 0; row < height; row++)
+    {
+      
+        for (col = 0; col < width; col+=8)
+        {
+          int16x8_t in;
+          
+#if HIGH_BIT_DEPTH
+          in = *(int16x8_t *)&src[col];
+#else
+          in = vmovl_u8(*(uint8x8_t *)&src[col]);
+#endif
+          
+          int16x8_t tmp = vshlq_n_s16(in,shift);
+          tmp = vsubq_s16(tmp,off);
+          *(int16x8_t *)&dst[col] = tmp;
+          
+        }
+
+        src += srcStride;
+        dst += dstStride;
+    }
+}
+
+
+template<int N, int width, int height>
+void interp_horiz_pp_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx)
+{
+    const int16_t* coeff = (N == 4) ? g_chromaFilter[coeffIdx] : g_lumaFilter[coeffIdx];
+    int headRoom = IF_FILTER_PREC;
+    int offset =  (1 << (headRoom - 1));
+    uint16_t maxVal = (1 << X265_DEPTH) - 1;
+    int cStride = 1;
+
+    src -= (N / 2 - 1) * cStride;
+    int16x8_t vc;
+    vc = *(int16x8_t *)coeff;
+    int16x4_t low_vc = vget_low_s16(vc);
+    int16x4_t high_vc = vget_high_s16(vc);
+
+    const int32x4_t voffset = vdupq_n_s32(offset);
+    const int32x4_t vhr = vdupq_n_s32(-headRoom);
+    
+    int row, col;
+    for (row = 0; row < height; row++)
+    {
+        for (col = 0; col < width; col+=8)
+        {
+            int32x4_t vsum1,vsum2;
+            
+            int16x8_t input[N];
+            
+            for (int i=0;i<N;i++)
+            {
+#if HIGH_BIT_DEPTH
+                input[i] = *(int16x8_t *)&src[col+i];
+#else
+                input[i] = vmovl_u8(*(uint8x8_t *)&src[col+i]);
+#endif
+            }
+            vsum1 = voffset;
+            vsum2 = voffset;
+            
+            vsum1 = vmlal_lane_s16(vsum1,vget_low_s16(input[0]),low_vc,0);
+            vsum2 = vmlal_high_lane_s16(vsum2,input[0],low_vc,0);
+            
+            vsum1 = vmlal_lane_s16(vsum1,vget_low_s16(input[1]),low_vc,1);
+            vsum2 = vmlal_high_lane_s16(vsum2,input[1],low_vc,1);
+
+            vsum1 = vmlal_lane_s16(vsum1,vget_low_s16(input[2]),low_vc,2);
+            vsum2 = vmlal_high_lane_s16(vsum2,input[2],low_vc,2);
+
+            vsum1 = vmlal_lane_s16(vsum1,vget_low_s16(input[3]),low_vc,3);
+            vsum2 = vmlal_high_lane_s16(vsum2,input[3],low_vc,3);
+
+            if (N == 8)
+            {
+                vsum1 = vmlal_lane_s16(vsum1,vget_low_s16(input[4]),high_vc,0);
+                vsum2 = vmlal_high_lane_s16(vsum2,input[4],high_vc,0);
+                vsum1 = vmlal_lane_s16(vsum1,vget_low_s16(input[5]),high_vc,1);
+                vsum2 = vmlal_high_lane_s16(vsum2,input[5],high_vc,1);
+                vsum1 = vmlal_lane_s16(vsum1,vget_low_s16(input[6]),high_vc,2);
+                vsum2 = vmlal_high_lane_s16(vsum2,input[6],high_vc,2);
+                vsum1 = vmlal_lane_s16(vsum1,vget_low_s16(input[7]),high_vc,3);
+                vsum2 = vmlal_high_lane_s16(vsum2,input[7],high_vc,3);
+                
+            }
+            
+            vsum1 = vshlq_s32(vsum1, vhr);
+            vsum2 = vshlq_s32(vsum2, vhr);
+            
+            int16x8_t vsum = vuzp1q_s16(vsum1,vsum2);
+            vsum = vminq_s16(vsum,vdupq_n_s16(maxVal));
+            vsum = vmaxq_s16(vsum,vdupq_n_s16(0));
+#if HIGH_BIT_DEPTH
+            *(int16x8_t *)&dst[col] = vsum;
+#else
+            uint8x16_t usum = vuzp1q_u8(vsum,vsum);
+            *(uint8x8_t *)&dst[col] = vget_low_u8(usum);
+#endif
+          
+        }
+        
+        src += srcStride;
+        dst += dstStride;
+    }
+}
+
+#if HIGH_BIT_DEPTH
+
+template<int N, int width, int height>
+void interp_horiz_ps_neon(const uint16_t * src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt)
+{
+    const int16_t* coeff = (N == 4) ? g_chromaFilter[coeffIdx] : g_lumaFilter[coeffIdx];
+    const int headRoom = IF_INTERNAL_PREC - X265_DEPTH;
+    const int shift = IF_FILTER_PREC - headRoom;
+    const int offset = (unsigned)-IF_INTERNAL_OFFS << shift;
+
+    int blkheight = height;
+    src -= N / 2 - 1;
+
+    if (isRowExt)
+    {
+        src -= (N / 2 - 1) * srcStride;
+        blkheight += N - 1;
+    }
+    int32x4_t vc0 = vmovl_s16(*(int16x4_t *)coeff);
+    int32x4_t vc1;
+    
+    if (N ==8) {
+        vc1 = vmovl_s16(*(int16x4_t *)(coeff + 4));
+    }
+    
+    const int32x4_t voffset = vdupq_n_s32(offset);
+    const int32x4_t vhr = vdupq_n_s32(-shift);
+    
+    int row, col;
+    for (row = 0; row < blkheight; row++)
+    {
+        for (col = 0; col < width; col+=4)
+        {
+            int32x4_t vsum;
+            
+            int32x4_t input[N];
+            
+            for (int i=0;i<N;i++)
+            {
+                input[i] = vmovl_s16(*(int16x4_t *)&src[col+i]);
+            }
+            vsum = voffset;
+            vsum = vmlaq_laneq_s32(vsum,(input[0]),vc0,0);
+            vsum = vmlaq_laneq_s32(vsum,(input[1]),vc0,1);
+            vsum = vmlaq_laneq_s32(vsum,(input[2]),vc0,2);
+            vsum = vmlaq_laneq_s32(vsum,(input[3]),vc0,3);
+
+
+            if (N == 8)
+            {
+                vsum = vmlaq_laneq_s32(vsum,(input[4]),vc1,0);
+                vsum = vmlaq_laneq_s32(vsum,(input[5]),vc1,1);
+                vsum = vmlaq_laneq_s32(vsum,(input[6]),vc1,2);
+                vsum = vmlaq_laneq_s32(vsum,(input[7]),vc1,3);
+                
+            }
+            
+            vsum = vshlq_s32(vsum, vhr);
+            *(int16x4_t *)&dst[col] = vmovn_u32(vsum);
+        }
+        
+        src += srcStride;
+        dst += dstStride;
+    }
+  }
+
+
+#else
+
+template<int N, int width, int height>
+void interp_horiz_ps_neon(const uint8_t* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt)
+{
+    const int16_t* coeff = (N == 4) ? g_chromaFilter[coeffIdx] : g_lumaFilter[coeffIdx];
+    const int headRoom = IF_INTERNAL_PREC - X265_DEPTH;
+    const int shift = IF_FILTER_PREC - headRoom;
+    const int offset = (unsigned)-IF_INTERNAL_OFFS << shift;
+
+    int blkheight = height;
+    src -= N / 2 - 1;
+
+    if (isRowExt)
+    {
+        src -= (N / 2 - 1) * srcStride;
+        blkheight += N - 1;
+    }
+    int16x8_t vc;
+    vc = *(int16x8_t *)coeff;
+
+    const int16x8_t voffset = vdupq_n_s16(offset);
+    const int16x8_t vhr = vdupq_n_s16(-shift);
+    
+    int row, col;
+    for (row = 0; row < blkheight; row++)
+    {
+        for (col = 0; col < width; col+=8)
+        {
+            int16x8_t vsum;
+            
+            int16x8_t input[N];
+            
+            for (int i=0;i<N;i++)
+            {
+                input[i] = vmovl_u8(*(uint8x8_t *)&src[col+i]);
+            }
+            vsum = voffset;
+            vsum = vmlaq_laneq_s16(vsum,(input[0]),vc,0);
+            vsum = vmlaq_laneq_s16(vsum,(input[1]),vc,1);
+            vsum = vmlaq_laneq_s16(vsum,(input[2]),vc,2);
+            vsum = vmlaq_laneq_s16(vsum,(input[3]),vc,3);
+
+
+            if (N == 8)
+            {
+                vsum = vmlaq_laneq_s16(vsum,(input[4]),vc,4);
+                vsum = vmlaq_laneq_s16(vsum,(input[5]),vc,5);
+                vsum = vmlaq_laneq_s16(vsum,(input[6]),vc,6);
+                vsum = vmlaq_laneq_s16(vsum,(input[7]),vc,7);
+                
+            }
+            
+            vsum = vshlq_s16(vsum, vhr);
+            *(int16x8_t *)&dst[col] = vsum;
+        }
+        
+        src += srcStride;
+        dst += dstStride;
+    }
+  }
+
+#endif
+
+
+template<int N, int width, int height>
+void interp_vert_ss_neon(const int16_t* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx)
+{
+    const int16_t* c = (N == 8 ? g_lumaFilter[coeffIdx] : g_chromaFilter[coeffIdx]);
+    int shift = IF_FILTER_PREC;
+    src -= (N / 2 - 1) * srcStride;
+      int16x8_t vc;
+      vc = *(int16x8_t *)c;
+      int16x4_t low_vc = vget_low_s16(vc);
+      int16x4_t high_vc = vget_high_s16(vc);
+
+      const int32x4_t vhr = vdupq_n_s32(-shift);
+      
+      int row, col;
+      for (row = 0; row < height; row++)
+      {
+          for (col = 0; col < width; col+=8)
+          {
+              int32x4_t vsum1,vsum2;
+              
+              int16x8_t input[N];
+              
+              for (int i=0;i<N;i++)
+              {
+                  input[i] = *(int16x8_t *)&src[col+i*srcStride];
+              }
+              
+              vsum1 = vmull_lane_s16(vget_low_s16(input[0]),low_vc,0);
+              vsum2 = vmull_high_lane_s16(input[0],low_vc,0);
+              
+              vsum1 = vmlal_lane_s16(vsum1,vget_low_s16(input[1]),low_vc,1);
+              vsum2 = vmlal_high_lane_s16(vsum2,input[1],low_vc,1);
+
+              vsum1 = vmlal_lane_s16(vsum1,vget_low_s16(input[2]),low_vc,2);
+              vsum2 = vmlal_high_lane_s16(vsum2,input[2],low_vc,2);
+
+              vsum1 = vmlal_lane_s16(vsum1,vget_low_s16(input[3]),low_vc,3);
+              vsum2 = vmlal_high_lane_s16(vsum2,input[3],low_vc,3);
+
+              if (N == 8)
+              {
+                  vsum1 = vmlal_lane_s16(vsum1,vget_low_s16(input[4]),high_vc,0);
+                  vsum2 = vmlal_high_lane_s16(vsum2,input[4],high_vc,0);
+                  vsum1 = vmlal_lane_s16(vsum1,vget_low_s16(input[5]),high_vc,1);
+                  vsum2 = vmlal_high_lane_s16(vsum2,input[5],high_vc,1);
+                  vsum1 = vmlal_lane_s16(vsum1,vget_low_s16(input[6]),high_vc,2);
+                  vsum2 = vmlal_high_lane_s16(vsum2,input[6],high_vc,2);
+                  vsum1 = vmlal_lane_s16(vsum1,vget_low_s16(input[7]),high_vc,3);
+                  vsum2 = vmlal_high_lane_s16(vsum2,input[7],high_vc,3);
+                  
+              }
+              
+              vsum1 = vshlq_s32(vsum1, vhr);
+              vsum2 = vshlq_s32(vsum2, vhr);
+              
+              int16x8_t vsum = vuzp1q_s16(vsum1,vsum2);
+              *(int16x8_t *)&dst[col] = vsum;
+          }
+          
+          src += srcStride;
+          dst += dstStride;
+      }
+
+}
+
+
+#if HIGH_BIT_DEPTH
+
+template<int N, int width, int height>
+void interp_vert_pp_neon(const uint16_t* src, intptr_t srcStride, uint16_t* dst, intptr_t dstStride, int coeffIdx)
+{
+    
+    const int16_t* c = (N == 4) ? g_chromaFilter[coeffIdx] : g_lumaFilter[coeffIdx];
+    int shift = IF_FILTER_PREC;
+    int offset = 1 << (shift - 1);
+    const uint16_t maxVal = (1 << X265_DEPTH) - 1;
+
+    src -= (N / 2 - 1) * srcStride;
+    int16x8_t vc;
+    vc = *(int16x8_t *)c;
+    int32x4_t low_vc = vmovl_s16(vget_low_s16(vc));
+    int32x4_t high_vc = vmovl_s16(vget_high_s16(vc));
+
+    const int32x4_t voffset = vdupq_n_s32(offset);
+    const int32x4_t vhr = vdupq_n_s32(-shift);
+    
+    int row, col;
+    for (row = 0; row < height; row++)
+    {
+        for (col = 0; col < width; col+=4)
+        {
+            int32x4_t vsum;
+            
+            int32x4_t input[N];
+            
+            for (int i=0;i<N;i++)
+            {
+                input[i] = vmovl_u16(*(uint16x4_t *)&src[col+i*srcStride]);
+            }
+            vsum = voffset;
+            
+            vsum = vmlaq_laneq_s32(vsum,(input[0]),low_vc,0);
+            vsum = vmlaq_laneq_s32(vsum,(input[1]),low_vc,1);
+            vsum = vmlaq_laneq_s32(vsum,(input[2]),low_vc,2);
+            vsum = vmlaq_laneq_s32(vsum,(input[3]),low_vc,3);
+
+            if (N == 8)
+            {
+              vsum = vmlaq_laneq_s32(vsum,(input[4]),high_vc,0);
+              vsum = vmlaq_laneq_s32(vsum,(input[5]),high_vc,1);
+              vsum = vmlaq_laneq_s32(vsum,(input[6]),high_vc,2);
+              vsum = vmlaq_laneq_s32(vsum,(input[7]),high_vc,3);
+            }
+            
+            vsum = vshlq_s32(vsum, vhr);
+            vsum = vminq_s32(vsum,vdupq_n_s32(maxVal));
+            vsum = vmaxq_s32(vsum,vdupq_n_s32(0));
+            *(uint16x4_t *)&dst[col] = vmovn_u32(vsum);
+        }
+        src += srcStride;
+        dst += dstStride;
+    }
+}
+
+
+
+
+#else
+
+template<int N, int width, int height>
+void interp_vert_pp_neon(const uint8_t* src, intptr_t srcStride, uint8_t* dst, intptr_t dstStride, int coeffIdx)
+{
+    
+    const int16_t* c = (N == 4) ? g_chromaFilter[coeffIdx] : g_lumaFilter[coeffIdx];
+    int shift = IF_FILTER_PREC;
+    int offset = 1 << (shift - 1);
+    const uint16_t maxVal = (1 << X265_DEPTH) - 1;
+
+    src -= (N / 2 - 1) * srcStride;
+    int16x8_t vc;
+    vc = *(int16x8_t *)c;
+
+    const int16x8_t voffset = vdupq_n_s16(offset);
+    const int16x8_t vhr = vdupq_n_s16(-shift);
+    
+    int row, col;
+    for (row = 0; row < height; row++)
+    {
+        for (col = 0; col < width; col+=8)
+        {
+            int16x8_t vsum;
+            
+            int16x8_t input[N];
+            
+            for (int i=0;i<N;i++)
+            {
+                input[i] = vmovl_u8(*(uint8x8_t *)&src[col+i*srcStride]);
+            }
+            vsum = voffset;
+            
+            vsum = vmlaq_laneq_s16(vsum,(input[0]),vc,0);
+            vsum = vmlaq_laneq_s16(vsum,(input[1]),vc,1);
+            vsum = vmlaq_laneq_s16(vsum,(input[2]),vc,2);
+            vsum = vmlaq_laneq_s16(vsum,(input[3]),vc,3);
+
+            if (N == 8)
+            {
+              vsum = vmlaq_laneq_s16(vsum,(input[4]),vc,4);
+              vsum = vmlaq_laneq_s16(vsum,(input[5]),vc,5);
+              vsum = vmlaq_laneq_s16(vsum,(input[6]),vc,6);
+              vsum = vmlaq_laneq_s16(vsum,(input[7]),vc,7);
+
+            }
+            
+            vsum = vshlq_s16(vsum, vhr);
+            
+            vsum = vminq_s16(vsum,vdupq_n_s16(maxVal));
+            vsum = vmaxq_s16(vsum,vdupq_n_s16(0));
+            uint8x16_t usum = vuzp1q_u8(vsum,vsum);
+            *(uint8x8_t *)&dst[col] = vget_low_u8(usum);
+          
+        }
+        
+        src += srcStride;
+        dst += dstStride;
+    }
+}
+
+
+#endif
+
+
+#if HIGH_BIT_DEPTH
+
+template<int N, int width, int height>
+void interp_vert_ps_neon(const uint16_t* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx)
+{
+    const int16_t* c = (N == 4) ? g_chromaFilter[coeffIdx] : g_lumaFilter[coeffIdx];
+    int headRoom = IF_INTERNAL_PREC - X265_DEPTH;
+    int shift = IF_FILTER_PREC - headRoom;
+    int offset = (unsigned)-IF_INTERNAL_OFFS << shift;
+    src -= (N / 2 - 1) * srcStride;
+
+    int16x8_t vc;
+    vc = *(int16x8_t *)c;
+    int32x4_t low_vc = vmovl_s16(vget_low_s16(vc));
+    int32x4_t high_vc = vmovl_s16(vget_high_s16(vc));
+
+    const int32x4_t voffset = vdupq_n_s32(offset);
+    const int32x4_t vhr = vdupq_n_s32(-shift);
+    
+    int row, col;
+    for (row = 0; row < height; row++)
+    {
+        for (col = 0; col < width; col+=4)
+        {
+            int16x8_t vsum;
+            
+            int16x8_t input[N];
+            
+            for (int i=0;i<N;i++)
+            {
+                input[i] = vmovl_u16(*(uint16x4_t *)&src[col+i*srcStride]);
+            }
+            vsum = voffset;
+            
+            vsum = vmlaq_laneq_s32(vsum,(input[0]),low_vc,0);
+            vsum = vmlaq_laneq_s32(vsum,(input[1]),low_vc,1);
+            vsum = vmlaq_laneq_s32(vsum,(input[2]),low_vc,2);
+            vsum = vmlaq_laneq_s32(vsum,(input[3]),low_vc,3);
+
+            if (N == 8)
+            {
+                int16x8_t  vsum1 = vmulq_laneq_s32((input[4]),high_vc,0);
+                vsum1 = vmlaq_laneq_s32(vsum1,(input[5]),high_vc,1);
+                vsum1 = vmlaq_laneq_s32(vsum1,(input[6]),high_vc,2);
+                vsum1 = vmlaq_laneq_s32(vsum1,(input[7]),high_vc,3);
+                vsum = vaddq_s32(vsum,vsum1);
+            }
+            
+            vsum = vshlq_s32(vsum, vhr);
+            
+            *(uint16x4_t *)&dst[col] = vmovn_s32(vsum);
+        }
+        
+        src += srcStride;
+        dst += dstStride;
+    }
+}
+
+#else
+
+template<int N, int width, int height>
+void interp_vert_ps_neon(const uint8_t* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx)
+{
+    const int16_t* c = (N == 4) ? g_chromaFilter[coeffIdx] : g_lumaFilter[coeffIdx];
+    int headRoom = IF_INTERNAL_PREC - X265_DEPTH;
+    int shift = IF_FILTER_PREC - headRoom;
+    int offset = (unsigned)-IF_INTERNAL_OFFS << shift;
+    src -= (N / 2 - 1) * srcStride;
+
+    int16x8_t vc;
+    vc = *(int16x8_t *)c;
+
+    const int16x8_t voffset = vdupq_n_s16(offset);
+    const int16x8_t vhr = vdupq_n_s16(-shift);
+    
+    int row, col;
+    for (row = 0; row < height; row++)
+    {
+        for (col = 0; col < width; col+=8)
+        {
+            int16x8_t vsum;
+            
+            int16x8_t input[N];
+            
+            for (int i=0;i<N;i++)
+            {
+                input[i] = vmovl_u8(*(uint8x8_t *)&src[col+i*srcStride]);
+            }
+            vsum = voffset;
+            
+            vsum = vmlaq_laneq_s16(vsum,(input[0]),vc,0);
+            vsum = vmlaq_laneq_s16(vsum,(input[1]),vc,1);
+            vsum = vmlaq_laneq_s16(vsum,(input[2]),vc,2);
+            vsum = vmlaq_laneq_s16(vsum,(input[3]),vc,3);
+
+            if (N == 8)
+            {
+                int16x8_t  vsum1 = vmulq_laneq_s16((input[4]),vc,4);
+                vsum1 = vmlaq_laneq_s16(vsum1,(input[5]),vc,5);
+                vsum1 = vmlaq_laneq_s16(vsum1,(input[6]),vc,6);
+                vsum1 = vmlaq_laneq_s16(vsum1,(input[7]),vc,7);
+                vsum = vaddq_s16(vsum,vsum1);
+            }
+            
+            vsum = vshlq_s32(vsum, vhr);
+            *(int16x8_t *)&dst[col] = vsum;
+        }
+        
+        src += srcStride;
+        dst += dstStride;
+    }
+}
+
+#endif
+
+
+
+template<int N, int width, int height>
+void interp_vert_sp_neon(const int16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx)
+{
+    int headRoom = IF_INTERNAL_PREC - X265_DEPTH;
+    int shift = IF_FILTER_PREC + headRoom;
+    int offset = (1 << (shift - 1)) + (IF_INTERNAL_OFFS << IF_FILTER_PREC);
+    uint16_t maxVal = (1 << X265_DEPTH) - 1;
+    const int16_t* coeff = (N == 8 ? g_lumaFilter[coeffIdx] : g_chromaFilter[coeffIdx]);
+
+    src -= (N / 2 - 1) * srcStride;
+
+    int16x8_t vc;
+    vc = *(int16x8_t *)coeff;
+    int16x4_t low_vc = vget_low_s16(vc);
+    int16x4_t high_vc = vget_high_s16(vc);
+
+    const int32x4_t voffset = vdupq_n_s32(offset);
+    const int32x4_t vhr = vdupq_n_s32(-shift);
+
+    int row, col;
+    for (row = 0; row < height; row++)
+    {
+        for (col = 0; col < width; col+=8)
+        {
+            int32x4_t vsum1,vsum2;
+
+            int16x8_t input[N];
+
+            for (int i=0;i<N;i++)
+            {
+                input[i] = *(int16x8_t *)&src[col+i*srcStride];
+            }
+            vsum1 = voffset;
+            vsum2 = voffset;
+
+            vsum1 = vmlal_lane_s16(vsum1,vget_low_s16(input[0]),low_vc,0);
+            vsum2 = vmlal_high_lane_s16(vsum2,input[0],low_vc,0);
+
+            vsum1 = vmlal_lane_s16(vsum1,vget_low_s16(input[1]),low_vc,1);
+            vsum2 = vmlal_high_lane_s16(vsum2,input[1],low_vc,1);
+
+            vsum1 = vmlal_lane_s16(vsum1,vget_low_s16(input[2]),low_vc,2);
+            vsum2 = vmlal_high_lane_s16(vsum2,input[2],low_vc,2);
+
+            vsum1 = vmlal_lane_s16(vsum1,vget_low_s16(input[3]),low_vc,3);
+            vsum2 = vmlal_high_lane_s16(vsum2,input[3],low_vc,3);
+
+            if (N == 8)
+            {
+                vsum1 = vmlal_lane_s16(vsum1,vget_low_s16(input[4]),high_vc,0);
+                vsum2 = vmlal_high_lane_s16(vsum2,input[4],high_vc,0);
+                vsum1 = vmlal_lane_s16(vsum1,vget_low_s16(input[5]),high_vc,1);
+                vsum2 = vmlal_high_lane_s16(vsum2,input[5],high_vc,1);
+                vsum1 = vmlal_lane_s16(vsum1,vget_low_s16(input[6]),high_vc,2);
+                vsum2 = vmlal_high_lane_s16(vsum2,input[6],high_vc,2);
+                vsum1 = vmlal_lane_s16(vsum1,vget_low_s16(input[7]),high_vc,3);
+                vsum2 = vmlal_high_lane_s16(vsum2,input[7],high_vc,3);
+            }
+
+            vsum1 = vshlq_s32(vsum1, vhr);
+            vsum2 = vshlq_s32(vsum2, vhr);
+
+            int16x8_t vsum = vuzp1q_s16(vsum1,vsum2);
+            vsum = vminq_s16(vsum,vdupq_n_s16(maxVal));
+            vsum = vmaxq_s16(vsum,vdupq_n_s16(0));
+#if HIGH_BIT_DEPTH
+            *(int16x8_t *)&dst[col] = vsum;
+#else
+            uint8x16_t usum = vuzp1q_u8(vsum,vsum);
+            *(uint8x8_t *)&dst[col] = vget_low_u8(usum);
+#endif
+
+        }
+          
+        src += srcStride;
+        dst += dstStride;
+    }
+}
+
+
+
+
+
+
+template<int N, int width, int height>
+void interp_hv_pp_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int idxX, int idxY)
+{
+    ALIGN_VAR_32(int16_t, immed[width * (height + N - 1)]);
+
+    interp_horiz_ps_neon<N, width, height>(src, srcStride, immed, width, idxX, 1);
+    interp_vert_sp_neon<N,width,height>(immed + (N / 2 - 1) * width, width, dst, dstStride, idxY);
+}
+
+
+
+}
+
+
+
+
+namespace X265_NS {
+   #define CHROMA_420(W, H) \
+       p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].filter_hpp = interp_horiz_pp_neon<4, W, H>; \
+       p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].filter_hps = interp_horiz_ps_neon<4, W, H>; \
+       p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].filter_vpp = interp_vert_pp_neon<4, W, H>;  \
+       p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].filter_vps = interp_vert_ps_neon<4, W, H>;  \
+       p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].filter_vsp = interp_vert_sp_neon<4, W, H>;  \
+       p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].filter_vss = interp_vert_ss_neon<4, W, H>; \
+       p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].p2s[NONALIGNED] = filterPixelToShort_neon<W, H>;\
+       p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].p2s[ALIGNED] = filterPixelToShort_neon<W, H>;
+
+   #define CHROMA_422(W, H) \
+       p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].filter_hpp = interp_horiz_pp_neon<4, W, H>; \
+       p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].filter_hps = interp_horiz_ps_neon<4, W, H>; \
+       p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].filter_vpp = interp_vert_pp_neon<4, W, H>;  \
+       p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].filter_vps = interp_vert_ps_neon<4, W, H>;  \
+       p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].filter_vsp = interp_vert_sp_neon<4, W, H>;  \
+       p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].filter_vss = interp_vert_ss_neon<4, W, H>; \
+       p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].p2s[NONALIGNED] = filterPixelToShort_neon<W, H>;\
+       p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].p2s[ALIGNED] = filterPixelToShort_neon<W, H>;
+
+   #define CHROMA_444(W, H) \
+       p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_hpp = interp_horiz_pp_neon<4, W, H>; \
+       p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_hps = interp_horiz_ps_neon<4, W, H>; \
+       p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vpp = interp_vert_pp_neon<4, W, H>;  \
+       p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vps = interp_vert_ps_neon<4, W, H>;  \
+       p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vsp = interp_vert_sp_neon<4, W, H>;  \
+       p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].filter_vss = interp_vert_ss_neon<4, W, H>; \
+       p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].p2s[NONALIGNED] = filterPixelToShort_neon<W, H>;\
+       p.chroma[X265_CSP_I444].pu[LUMA_ ## W ## x ## H].p2s[ALIGNED] = filterPixelToShort_neon<W, H>;
+
+   #define LUMA(W, H) \
+       p.pu[LUMA_ ## W ## x ## H].luma_hpp     = interp_horiz_pp_neon<8, W, H>; \
+       p.pu[LUMA_ ## W ## x ## H].luma_hps     = interp_horiz_ps_neon<8, W, H>; \
+       p.pu[LUMA_ ## W ## x ## H].luma_vpp     = interp_vert_pp_neon<8, W, H>;  \
+       p.pu[LUMA_ ## W ## x ## H].luma_vps     = interp_vert_ps_neon<8, W, H>;  \
+       p.pu[LUMA_ ## W ## x ## H].luma_vsp     = interp_vert_sp_neon<8, W, H>;  \
+       p.pu[LUMA_ ## W ## x ## H].luma_vss     = interp_vert_ss_neon<8, W, H>;  \
+       p.pu[LUMA_ ## W ## x ## H].luma_hvpp    = interp_hv_pp_neon<8, W, H>; \
+       p.pu[LUMA_ ## W ## x ## H].convert_p2s[NONALIGNED] = filterPixelToShort_neon<W, H>;\
+       p.pu[LUMA_ ## W ## x ## H].convert_p2s[ALIGNED] = filterPixelToShort_neon<W, H>;
+
+  
+void setupFilterPrimitives_neon(EncoderPrimitives &p)
+{
+  
+  // All neon functions assume width of multiple of 8, (2,4,12 variants are not optimized)
+  
+  LUMA(8, 8);
+  LUMA(8, 4);
+  LUMA(16, 16);
+  CHROMA_420(8,  8);
+  LUMA(16,  8);
+  CHROMA_420(8,  4);
+  LUMA(8, 16);
+  LUMA(16, 12);
+  CHROMA_420(8,  6);
+  LUMA(16,  4);
+  CHROMA_420(8,  2);
+  LUMA(32, 32);
+  CHROMA_420(16, 16);
+  LUMA(32, 16);
+  CHROMA_420(16, 8);
+  LUMA(16, 32);
+  CHROMA_420(8,  16);
+  LUMA(32, 24);
+  CHROMA_420(16, 12);
+  LUMA(24, 32);
+  LUMA(32,  8);
+  CHROMA_420(16, 4);
+  LUMA(8, 32);
+  LUMA(64, 64);
+  CHROMA_420(32, 32);
+  LUMA(64, 32);
+  CHROMA_420(32, 16);
+  LUMA(32, 64);
+  CHROMA_420(16, 32);
+  LUMA(64, 48);
+  CHROMA_420(32, 24);
+  LUMA(48, 64);
+  CHROMA_420(24, 32);
+  LUMA(64, 16);
+  CHROMA_420(32, 8);
+  LUMA(16, 64);
+  CHROMA_420(8,  32);
+  CHROMA_422(8,  16);
+  CHROMA_422(8,  8);
+  CHROMA_422(8,  12);
+  CHROMA_422(8,  4);
+  CHROMA_422(16, 32);
+  CHROMA_422(16, 16);
+  CHROMA_422(8,  32);
+  CHROMA_422(16, 24);
+  CHROMA_422(16, 8);
+  CHROMA_422(32, 64);
+  CHROMA_422(32, 32);
+  CHROMA_422(16, 64);
+  CHROMA_422(32, 48);
+  CHROMA_422(24, 64);
+  CHROMA_422(32, 16);
+  CHROMA_422(8,  64);
+  CHROMA_444(8,  8);
+  CHROMA_444(8,  4);
+  CHROMA_444(16, 16);
+  CHROMA_444(16, 8);
+  CHROMA_444(8,  16);
+  CHROMA_444(16, 12);
+  CHROMA_444(16, 4);
+  CHROMA_444(32, 32);
+  CHROMA_444(32, 16);
+  CHROMA_444(16, 32);
+  CHROMA_444(32, 24);
+  CHROMA_444(24, 32);
+  CHROMA_444(32, 8);
+  CHROMA_444(8,  32);
+  CHROMA_444(64, 64);
+  CHROMA_444(64, 32);
+  CHROMA_444(32, 64);
+  CHROMA_444(64, 48);
+  CHROMA_444(48, 64);
+  CHROMA_444(64, 16);
+  CHROMA_444(16, 64);
+
+}
+
+};
+
+
+#endif
+
+
diff -Naur ./source/common/arm64/filter-prim.h ../x265_apple_patch/source/common/arm64/filter-prim.h
--- ./source/common/arm64/filter-prim.h	1970-01-01 01:00:00.000000000 +0100
+++ ../x265_apple_patch/source/common/arm64/filter-prim.h	2021-05-08 13:08:01.000000000 +0100
@@ -0,0 +1,20 @@
+#ifndef _FILTER_PRIM_ARM64_H__
+#define _FILTER_PRIM_ARM64_H__
+
+
+#include "common.h"
+#include "slicetype.h"      // LOWRES_COST_MASK
+#include "primitives.h"
+#include "x265.h"
+
+
+namespace X265_NS {
+   
+  
+void setupFilterPrimitives_neon(EncoderPrimitives &p);
+
+};
+
+
+#endif
+
diff -Naur ./source/common/arm64/intrapred-prim.cpp ../x265_apple_patch/source/common/arm64/intrapred-prim.cpp
--- ./source/common/arm64/intrapred-prim.cpp	1970-01-01 01:00:00.000000000 +0100
+++ ../x265_apple_patch/source/common/arm64/intrapred-prim.cpp	2021-05-08 13:08:01.000000000 +0100
@@ -0,0 +1,266 @@
+/*****************************************************************************
+ * Copyright (C) 2013-2017 MulticoreWare, Inc
+ *
+ * Authors: Min Chen <chenm003@163.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at license @ x265.com.
+ *****************************************************************************/
+
+
+#include "common.h"
+#include "primitives.h"
+
+
+#if 1
+#include "arm64-utils.h"
+#include <arm_neon.h>
+
+using namespace X265_NS;
+
+namespace {
+
+
+
+template<int width>
+void intra_pred_ang_neon(pixel* dst, intptr_t dstStride, const pixel *srcPix0, int dirMode, int bFilter)
+{
+    int width2 = width << 1;
+    // Flip the neighbours in the horizontal case.
+    int horMode = dirMode < 18;
+    pixel neighbourBuf[129];
+    const pixel *srcPix = srcPix0;
+
+    if (horMode)
+    {
+        neighbourBuf[0] = srcPix[0];
+        //for (int i = 0; i < width << 1; i++)
+        //{
+        //    neighbourBuf[1 + i] = srcPix[width2 + 1 + i];
+        //    neighbourBuf[width2 + 1 + i] = srcPix[1 + i];
+        //}
+        memcpy(&neighbourBuf[1],&srcPix[width2+1],sizeof(pixel)*(width << 1));
+        memcpy(&neighbourBuf[width2 + 1],&srcPix[1],sizeof(pixel)*(width << 1));
+        srcPix = neighbourBuf;
+    }
+
+    // Intra prediction angle and inverse angle tables.
+    const int8_t angleTable[17] = { -32, -26, -21, -17, -13, -9, -5, -2, 0, 2, 5, 9, 13, 17, 21, 26, 32 };
+    const int16_t invAngleTable[8] = { 4096, 1638, 910, 630, 482, 390, 315, 256 };
+
+    // Get the prediction angle.
+    int angleOffset = horMode ? 10 - dirMode : dirMode - 26;
+    int angle = angleTable[8 + angleOffset];
+
+    // Vertical Prediction.
+    if (!angle)
+    {
+        for (int y = 0; y < width; y++) {
+            memcpy(&dst[y * dstStride],srcPix + 1,sizeof(pixel)*width);
+        }
+        if (bFilter)
+        {
+            int topLeft = srcPix[0], top = srcPix[1];
+            for (int y = 0; y < width; y++)
+                dst[y * dstStride] = x265_clip((int16_t)(top + ((srcPix[width2 + 1 + y] - topLeft) >> 1)));
+        }
+    }
+    else // Angular prediction.
+    {
+        // Get the reference pixels. The reference base is the first pixel to the top (neighbourBuf[1]).
+        pixel refBuf[64];
+        const pixel *ref;
+
+        // Use the projected left neighbours and the top neighbours.
+        if (angle < 0)
+        {
+            // Number of neighbours projected.
+            int nbProjected = -((width * angle) >> 5) - 1;
+            pixel *ref_pix = refBuf + nbProjected + 1;
+
+            // Project the neighbours.
+            int invAngle = invAngleTable[- angleOffset - 1];
+            int invAngleSum = 128;
+            for (int i = 0; i < nbProjected; i++)
+            {
+                invAngleSum += invAngle;
+                ref_pix[- 2 - i] = srcPix[width2 + (invAngleSum >> 8)];
+            }
+
+            // Copy the top-left and top pixels.
+            //for (int i = 0; i < width + 1; i++)
+                //ref_pix[-1 + i] = srcPix[i];
+            
+            memcpy(&ref_pix[-1],srcPix,(width+1)*sizeof(pixel));
+            ref = ref_pix;
+        }
+        else // Use the top and top-right neighbours.
+            ref = srcPix + 1;
+
+        // Pass every row.
+        int angleSum = 0;
+        for (int y = 0; y < width; y++)
+        {
+            angleSum += angle;
+            int offset = angleSum >> 5;
+            int fraction = angleSum & 31;
+
+            if (fraction) // Interpolate
+            {
+                if (width >= 8 && sizeof(pixel) == 1)
+                {
+                    const int16x8_t f0 = vdupq_n_s16(32-fraction);
+                    const int16x8_t f1 = vdupq_n_s16(fraction);
+                    for (int x = 0;x<width;x+=8) {
+                        uint8x8_t in0 = *(uint8x8_t *)&ref[offset + x];
+                        uint8x8_t in1 = *(uint8x8_t *)&ref[offset+ x + 1];
+                        int16x8_t lo = vmlaq_s16(vdupq_n_s16(16),vmovl_u8(in0),f0);
+                        lo = vmlaq_s16(lo,vmovl_u8(in1),f1);
+                        lo = vshrq_n_s16(lo,5);
+                        *(uint8x8_t *)&dst[y * dstStride + x] = vmovn_u16(lo);
+                    }
+                }
+                else if (width >= 4 && sizeof(pixel) == 2)
+                {
+                    const int32x4_t f0 = vdupq_n_s32(32-fraction);
+                    const int32x4_t f1 = vdupq_n_s32(fraction);
+                    for (int x = 0;x<width;x+=4) {
+                        uint16x4_t in0 = *(uint16x4_t *)&ref[offset + x];
+                        uint16x4_t in1 = *(uint16x4_t *)&ref[offset+ x + 1];
+                        int32x4_t lo = vmlaq_s32(vdupq_n_s32(16),vmovl_u16(in0),f0);
+                        lo = vmlaq_s32(lo,vmovl_u16(in1),f1);
+                        lo = vshrq_n_s32(lo,5);
+                        *(uint16x4_t *)&dst[y * dstStride + x] = vmovn_u32(lo);
+                    }
+                }
+                else {
+                    for (int x = 0; x < width; x++)
+                        dst[y * dstStride + x] = (pixel)(((32 - fraction) * ref[offset + x] + fraction * ref[offset + x + 1] + 16) >> 5);
+                }
+            }
+            else // Copy.
+            {
+                memcpy(&dst[y * dstStride],&ref[offset],sizeof(pixel)*width);
+            }
+        }
+    }
+
+    // Flip for horizontal.
+    if (horMode)
+    {
+        if (width == 8)  transpose8x8(dst,dst,dstStride,dstStride);
+        else if (width == 16) transpose16x16(dst,dst,dstStride,dstStride);
+        else if (width == 32) transpose32x32(dst,dst,dstStride,dstStride);
+        else {
+            for (int y = 0; y < width - 1; y++)
+            {
+                for (int x = y + 1; x < width; x++)
+                {
+                    pixel tmp              = dst[y * dstStride + x];
+                    dst[y * dstStride + x] = dst[x * dstStride + y];
+                    dst[x * dstStride + y] = tmp;
+                }
+            }
+        }
+    }
+}
+
+template<int log2Size>
+void all_angs_pred_neon(pixel *dest, pixel *refPix, pixel *filtPix, int bLuma)
+{
+    const int size = 1 << log2Size;
+    for (int mode = 2; mode <= 34; mode++)
+    {
+        pixel *srcPix  = (g_intraFilterFlags[mode] & size ? filtPix  : refPix);
+        pixel *out = dest + ((mode - 2) << (log2Size * 2));
+
+        intra_pred_ang_neon<size>(out, size, srcPix, mode, bLuma);
+
+        // Optimize code don't flip buffer
+        bool modeHor = (mode < 18);
+
+        // transpose the block if this is a horizontal mode
+        if (modeHor)
+        {
+            if (size == 8) transpose8x8(out,out,size,size);
+            else if (size == 16) transpose16x16(out,out,size,size);
+            else if (size == 32) transpose32x32(out,out,size,size);
+            else {
+                for (int k = 0; k < size - 1; k++)
+                {
+                    for (int l = k + 1; l < size; l++)
+                    {
+                        pixel tmp         = out[k * size + l];
+                        out[k * size + l] = out[l * size + k];
+                        out[l * size + k] = tmp;
+                    }
+                }
+            }
+        }
+    }
+}
+}
+
+namespace X265_NS {
+// x265 private namespace
+
+void setupIntraPrimitives_neon(EncoderPrimitives& p)
+{
+//    p.cu[BLOCK_4x4].intra_filter = intraFilter<4>;
+//    p.cu[BLOCK_8x8].intra_filter = intraFilter<8>;
+//    p.cu[BLOCK_16x16].intra_filter = intraFilter<16>;
+//    p.cu[BLOCK_32x32].intra_filter = intraFilter<32>;
+
+//    p.cu[BLOCK_4x4].intra_pred[PLANAR_IDX] = planar_pred_neon<2>;
+//    p.cu[BLOCK_8x8].intra_pred[PLANAR_IDX] = planar_pred_neon<3>;
+//    p.cu[BLOCK_16x16].intra_pred[PLANAR_IDX] = planar_pred_neon<4>;
+//    p.cu[BLOCK_32x32].intra_pred[PLANAR_IDX] = planar_pred_neon<5>;
+//
+//    p.cu[BLOCK_4x4].intra_pred[DC_IDX] = intra_pred_dc_neon<4>;
+//    p.cu[BLOCK_8x8].intra_pred[DC_IDX] = intra_pred_dc_neon<8>;
+//    p.cu[BLOCK_16x16].intra_pred[DC_IDX] = intra_pred_dc_neon<16>;
+//    p.cu[BLOCK_32x32].intra_pred[DC_IDX] = intra_pred_dc_neon<32>;
+
+    for (int i = 2; i < NUM_INTRA_MODE; i++)
+    {
+        p.cu[BLOCK_4x4].intra_pred[i] = intra_pred_ang_neon<4>;
+        p.cu[BLOCK_8x8].intra_pred[i] = intra_pred_ang_neon<8>;
+        p.cu[BLOCK_16x16].intra_pred[i] = intra_pred_ang_neon<16>;
+        p.cu[BLOCK_32x32].intra_pred[i] = intra_pred_ang_neon<32>;
+    }
+
+    p.cu[BLOCK_4x4].intra_pred_allangs = all_angs_pred_neon<2>;
+    p.cu[BLOCK_8x8].intra_pred_allangs = all_angs_pred_neon<3>;
+    p.cu[BLOCK_16x16].intra_pred_allangs = all_angs_pred_neon<4>;
+    p.cu[BLOCK_32x32].intra_pred_allangs = all_angs_pred_neon<5>;
+}
+}
+
+
+
+#else
+
+namespace X265_NS {
+// x265 private namespace
+void setupIntraPrimitives_neon(EncoderPrimitives& p)
+{}
+}
+
+#endif
+
+
+
diff -Naur ./source/common/arm64/intrapred-prim.h ../x265_apple_patch/source/common/arm64/intrapred-prim.h
--- ./source/common/arm64/intrapred-prim.h	1970-01-01 01:00:00.000000000 +0100
+++ ../x265_apple_patch/source/common/arm64/intrapred-prim.h	2021-05-08 13:08:01.000000000 +0100
@@ -0,0 +1,14 @@
+#ifndef INTRAPRED_PRIM_H__
+
+#if defined(__aarch64__)
+
+namespace X265_NS {
+// x265 private namespace
+
+void setupIntraPrimitives_neon(EncoderPrimitives& p);
+}
+
+#endif
+
+#endif
+
diff -Naur ./source/common/arm64/loopfilter-prim.cpp ../x265_apple_patch/source/common/arm64/loopfilter-prim.cpp
--- ./source/common/arm64/loopfilter-prim.cpp	1970-01-01 01:00:00.000000000 +0100
+++ ../x265_apple_patch/source/common/arm64/loopfilter-prim.cpp	2021-05-08 13:08:01.000000000 +0100
@@ -0,0 +1,305 @@
+/*****************************************************************************
+* Copyright (C) 2013-2017 MulticoreWare, Inc
+*
+* Authors: Praveen Kumar Tiwari <praveen@multicorewareinc.com>
+*          Dnyaneshwar Gorade <dnyaneshwar@multicorewareinc.com>
+*          Min Chen <chenm003@163.com>
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation; either version 2 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+*
+* This program is also available under a commercial proprietary license.
+* For more information, contact us at license @ x265.com.
+*****************************************************************************/
+#include "loopfilter-prim.h"
+
+#define PIXEL_MIN 0
+
+
+
+#if !(HIGH_BIT_DEPTH) && defined(HAVE_NEON)
+#include<arm_neon.h>
+
+namespace {
+
+
+/* get the sign of input variable (TODO: this is a dup, make common) */
+static inline int8_t signOf(int x)
+{
+    return (x >> 31) | ((int)((((uint32_t)-x)) >> 31));
+}
+
+static inline int8x8_t sign_diff_neon(const uint8x8_t in0, const uint8x8_t in1)
+{
+  int16x8_t in = vsubl_u8(in0,in1);
+  return vmovn_s16(vmaxq_s16(vminq_s16(in,vdupq_n_s16(1)),vdupq_n_s16(-1)));
+}
+
+static void calSign_neon(int8_t *dst, const pixel *src1, const pixel *src2, const int endX)
+{
+  int x = 0;
+  for (; (x + 8) <= endX; x += 8) {
+    *(int8x8_t *)&dst[x]  = sign_diff_neon(*(uint8x8_t *)&src1[x],*(uint8x8_t *)&src2[x]);
+  }
+
+    for (; x < endX; x++)
+        dst[x] = signOf(src1[x] - src2[x]);
+}
+
+static void processSaoCUE0_neon(pixel * rec, int8_t * offsetEo, int width, int8_t* signLeft, intptr_t stride)
+{
+
+  
+    int y;
+    int8_t signRight, signLeft0;
+    int8_t edgeType;
+
+    for (y = 0; y < 2; y++)
+    {
+        signLeft0 = signLeft[y];
+        int x = 0;
+
+        if (width >= 8) {
+            int8x8_t vsignRight;
+            int8x8x2_t shifter;
+            shifter.val[1][0] = signLeft0;
+            static const int8x8_t index = {8,0,1,2,3,4,5,6};
+            int8x8_t tbl = *(int8x8_t *)offsetEo;
+            for (; (x+8) <= width; x+=8)
+            {
+                uint8x8_t in = *(uint8x8_t *)&rec[x];
+                vsignRight = sign_diff_neon(in,*(uint8x8_t *)&rec[x+1]);
+                shifter.val[0] = vneg_s8(vsignRight);
+                int8x8_t tmp = shifter.val[0];
+                int8x8_t edge = vtbl2_s8(shifter,index);
+                int8x8_t vedgeType = vadd_s8(vadd_s8(vsignRight,edge),vdup_n_s8(2));
+                shifter.val[1][0] = tmp[7];
+                int16x8_t t1 = vmovl_s8(vtbl1_s8(tbl,vedgeType));
+                t1 = vaddw_u8(t1,in);
+                t1 = vmaxq_s16(t1,vdupq_n_s16(0));
+                t1 = vminq_s16(t1,vdupq_n_s16(255));
+                *(uint8x8_t *)&rec[x] = vmovn_u16(t1);
+            }
+            signLeft0 = shifter.val[1][0];
+        }
+        for (; x < width; x++)
+        {
+            signRight = ((rec[x] - rec[x + 1]) < 0) ? -1 : ((rec[x] - rec[x + 1]) > 0) ? 1 : 0;
+            edgeType = signRight + signLeft0 + 2;
+            signLeft0 = -signRight;
+            rec[x] = x265_clip(rec[x] + offsetEo[edgeType]);
+        }
+        rec += stride;
+    }
+}
+
+static void processSaoCUE1_neon(pixel* rec, int8_t* upBuff1, int8_t* offsetEo, intptr_t stride, int width)
+{
+    int x = 0;
+    int8_t signDown;
+    int edgeType;
+
+    if (width >= 8) {
+        int8x8_t tbl = *(int8x8_t *)offsetEo;
+        for (; (x+8) <= width; x+=8)
+        {
+            uint8x8_t in0 = *(uint8x8_t *)&rec[x];
+            uint8x8_t in1 = *(uint8x8_t *)&rec[x+stride];
+            int8x8_t vsignDown = sign_diff_neon(in0,in1);
+            int8x8_t vedgeType = vadd_s8(vadd_s8(vsignDown,*(int8x8_t *)&upBuff1[x]),vdup_n_s8(2));
+            *(int8x8_t *)&upBuff1[x] = vneg_s8(vsignDown);
+            int16x8_t t1 = vmovl_s8(vtbl1_s8(tbl,vedgeType));
+            t1 = vaddw_u8(t1,in0);
+            *(uint8x8_t *)&rec[x] = vqmovun_s16(t1);
+      }
+    }
+    for (; x < width; x++)
+    {
+        signDown = signOf(rec[x] - rec[x + stride]);
+        edgeType = signDown + upBuff1[x] + 2;
+        upBuff1[x] = -signDown;
+        rec[x] = x265_clip(rec[x] + offsetEo[edgeType]);
+    }
+}
+
+static void processSaoCUE1_2Rows_neon(pixel* rec, int8_t* upBuff1, int8_t* offsetEo, intptr_t stride, int width)
+{
+    int y;
+    int8_t signDown;
+    int edgeType;
+
+    for (y = 0; y < 2; y++)
+    {
+      int x=0;
+      if (width >= 8) {
+        int8x8_t tbl = *(int8x8_t *)offsetEo;
+        for (; (x+8) <= width; x+=8)
+        {
+          uint8x8_t in0 = *(uint8x8_t *)&rec[x];
+          uint8x8_t in1 = *(uint8x8_t *)&rec[x+stride];
+          int8x8_t vsignDown = sign_diff_neon(in0,in1);
+          int8x8_t vedgeType = vadd_s8(vadd_s8(vsignDown,*(int8x8_t *)&upBuff1[x]),vdup_n_s8(2));
+          *(int8x8_t *)&upBuff1[x] = vneg_s8(vsignDown);
+          int16x8_t t1 = vmovl_s8(vtbl1_s8(tbl,vedgeType));
+          t1 = vaddw_u8(t1,in0);
+          t1 = vmaxq_s16(t1,vdupq_n_s16(0));
+          t1 = vminq_s16(t1,vdupq_n_s16(255));
+          *(uint8x8_t *)&rec[x] = vmovn_u16(t1);
+
+        }
+      }
+      for (; x < width; x++)
+      {
+          signDown = signOf(rec[x] - rec[x + stride]);
+          edgeType = signDown + upBuff1[x] + 2;
+          upBuff1[x] = -signDown;
+          rec[x] = x265_clip(rec[x] + offsetEo[edgeType]);
+      }
+      rec += stride;
+  }
+}
+
+static void processSaoCUE2_neon(pixel * rec, int8_t * bufft, int8_t * buff1, int8_t * offsetEo, int width, intptr_t stride)
+{
+    int x;
+  
+    if (abs(buff1-bufft) < 16)
+    {
+      for (x = 0; x < width; x++)
+      {
+          int8_t signDown = signOf(rec[x] - rec[x + stride + 1]);
+          int edgeType = signDown + buff1[x] + 2;
+          bufft[x + 1] = -signDown;
+          rec[x] = x265_clip(rec[x] + offsetEo[edgeType]);;
+      }
+    }
+    else
+    {
+      int8x8_t tbl = *(int8x8_t *)offsetEo;
+      x=0;
+      for (; (x + 8) <= width; x+=8)
+      {
+          uint8x8_t in0 = *(uint8x8_t *)&rec[x];
+          uint8x8_t in1 = *(uint8x8_t *)&rec[x+stride+1];
+          int8x8_t vsignDown = sign_diff_neon(in0,in1);
+          int8x8_t vedgeType = vadd_s8(vadd_s8(vsignDown,*(int8x8_t *)&buff1[x]),vdup_n_s8(2));
+          *(int8x8_t *)&bufft[x+1] = vneg_s8(vsignDown);
+          int16x8_t t1 = vmovl_s8(vtbl1_s8(tbl,vedgeType));
+          t1 = vaddw_u8(t1,in0);
+          t1 = vmaxq_s16(t1,vdupq_n_s16(0));
+          t1 = vminq_s16(t1,vdupq_n_s16(255));
+          *(uint8x8_t *)&rec[x] = vmovn_u16(t1);
+      }
+      for (; x < width; x++)
+      {
+          int8_t signDown = signOf(rec[x] - rec[x + stride + 1]);
+          int edgeType = signDown + buff1[x] + 2;
+          bufft[x + 1] = -signDown;
+          rec[x] = x265_clip(rec[x] + offsetEo[edgeType]);;
+      }
+
+    }
+}
+ 
+
+static void processSaoCUE3_neon(pixel *rec, int8_t *upBuff1, int8_t *offsetEo, intptr_t stride, int startX, int endX)
+{
+    int8_t signDown;
+    int8_t edgeType;
+  int8x8_t tbl = *(int8x8_t *)offsetEo;
+
+    int x = startX + 1;
+  for (; (x+8) <= endX; x+=8 )
+  {
+    uint8x8_t in0 = *(uint8x8_t *)&rec[x];
+    uint8x8_t in1 = *(uint8x8_t *)&rec[x+stride];
+    int8x8_t vsignDown = sign_diff_neon(in0,in1);
+    int8x8_t vedgeType = vadd_s8(vadd_s8(vsignDown,*(int8x8_t *)&upBuff1[x]),vdup_n_s8(2));
+    *(int8x8_t *)&upBuff1[x-1] = vneg_s8(vsignDown);
+    int16x8_t t1 = vmovl_s8(vtbl1_s8(tbl,vedgeType));
+    t1 = vaddw_u8(t1,in0);
+    t1 = vmaxq_s16(t1,vdupq_n_s16(0));
+    t1 = vminq_s16(t1,vdupq_n_s16(255));
+    *(uint8x8_t *)&rec[x] = vmovn_u16(t1);
+
+  }
+    for (; x < endX; x++)
+    {
+        signDown = signOf(rec[x] - rec[x + stride]);
+        edgeType = signDown + upBuff1[x] + 2;
+        upBuff1[x - 1] = -signDown;
+        rec[x] = x265_clip(rec[x] + offsetEo[edgeType]);
+    }
+}
+
+static void processSaoCUB0_neon(pixel* rec, const int8_t* offset, int ctuWidth, int ctuHeight, intptr_t stride)
+{
+    #define SAO_BO_BITS 5
+    const int boShift = X265_DEPTH - SAO_BO_BITS;
+    int x, y;
+  int8x8x4_t table;
+  table = *(int8x8x4_t *)offset;
+  
+    for (y = 0; y < ctuHeight; y++)
+    {
+      
+        for (x = 0; (x+8) <= ctuWidth; x+=8)
+        {
+          int8x8_t in = *(int8x8_t*)&rec[x];
+          int8x8_t offsets = vtbl4_s8(table,vshr_n_u8(in,boShift));
+          int16x8_t tmp = vmovl_s8(offsets);
+          tmp = vaddw_u8(tmp,in);
+          tmp = vmaxq_s16(tmp,vdupq_n_s16(0));
+          tmp = vminq_s16(tmp,vdupq_n_s16(255));
+          *(uint8x8_t *)&rec[x] = vmovn_u16(tmp);
+        }
+        for (; x < ctuWidth; x++)
+        {
+            rec[x] = x265_clip(rec[x] + offset[rec[x] >> boShift]);
+        }
+        rec += stride;
+    }
+}
+
+}
+
+
+
+namespace X265_NS {
+void setupLoopFilterPrimitives_neon(EncoderPrimitives &p)
+{
+    p.saoCuOrgE0 = processSaoCUE0_neon;
+    p.saoCuOrgE1 = processSaoCUE1_neon;
+    p.saoCuOrgE1_2Rows = processSaoCUE1_2Rows_neon;
+    p.saoCuOrgE2[0] = processSaoCUE2_neon;
+    p.saoCuOrgE2[1] = processSaoCUE2_neon;
+    p.saoCuOrgE3[0] = processSaoCUE3_neon;
+    p.saoCuOrgE3[1] = processSaoCUE3_neon;
+    p.saoCuOrgB0 = processSaoCUB0_neon;
+    p.sign = calSign_neon;
+
+}
+
+#else //HIGH_BIT_DEPTH
+
+
+namespace X265_NS {
+void setupLoopFilterPrimitives_neon(EncoderPrimitives &)
+{
+}
+
+#endif
+
+
+}
diff -Naur ./source/common/arm64/loopfilter-prim.h ../x265_apple_patch/source/common/arm64/loopfilter-prim.h
--- ./source/common/arm64/loopfilter-prim.h	1970-01-01 01:00:00.000000000 +0100
+++ ../x265_apple_patch/source/common/arm64/loopfilter-prim.h	2021-05-08 13:08:01.000000000 +0100
@@ -0,0 +1,43 @@
+#ifndef _LOOPFILTER_NEON_H__
+#define _LOOPFILTER_NEON_H__
+
+
+/*****************************************************************************
+* Copyright (C) 2013-2017 MulticoreWare, Inc
+*
+* Authors: Praveen Kumar Tiwari <praveen@multicorewareinc.com>
+*          Dnyaneshwar Gorade <dnyaneshwar@multicorewareinc.com>
+*          Min Chen <chenm003@163.com>
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation; either version 2 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+*
+* This program is also available under a commercial proprietary license.
+* For more information, contact us at license @ x265.com.
+*****************************************************************************/
+
+
+
+#include "common.h"
+#include "primitives.h"
+
+#define PIXEL_MIN 0
+
+namespace X265_NS {
+void setupLoopFilterPrimitives_neon(EncoderPrimitives &p);
+
+};
+
+
+#endif
diff -Naur ./source/common/arm64/pixel-prim.cpp ../x265_apple_patch/source/common/arm64/pixel-prim.cpp
--- ./source/common/arm64/pixel-prim.cpp	1970-01-01 01:00:00.000000000 +0100
+++ ../x265_apple_patch/source/common/arm64/pixel-prim.cpp	2021-05-08 13:08:01.000000000 +0100
@@ -0,0 +1,1940 @@
+#include "common.h"
+#include "slicetype.h"      // LOWRES_COST_MASK
+#include "primitives.h"
+#include "x265.h"
+
+#include "pixel-prim.h"
+#include "arm64-utils.h"
+#if HAVE_NEON
+
+#include <arm_neon.h>
+
+using namespace X265_NS;
+
+
+
+namespace  {
+
+
+/* SATD SA8D variants - based on x264 */
+static inline void SUMSUB_AB(int16x8_t& sum, int16x8_t& sub, const int16x8_t a, const int16x8_t b)
+{
+    sum = vaddq_s16(a,b);
+    sub = vsubq_s16(a,b);
+}
+
+static inline void transpose_8h(int16x8_t& t1, int16x8_t& t2, const int16x8_t s1, const int16x8_t s2)
+{
+    t1 = vtrn1q_s16(s1, s2);
+    t2 = vtrn2q_s16(s1, s2);
+}
+
+static inline void transpose_4s(int16x8_t& t1, int16x8_t& t2, const int16x8_t s1, const int16x8_t s2)
+{
+    t1 = vtrn1q_s32(s1, s2);
+    t2 = vtrn2q_s32(s1, s2);
+}
+
+#if (X265_DEPTH <= 10)
+static inline void transpose_2d(int16x8_t& t1, int16x8_t& t2, const int16x8_t s1, const int16x8_t s2)
+{
+    t1 = vtrn1q_s64(s1, s2);
+    t2 = vtrn2q_s64(s1, s2);
+}
+#endif
+
+
+static inline void SUMSUB_ABCD(int16x8_t& s1, int16x8_t& d1, int16x8_t& s2, int16x8_t& d2,
+                        int16x8_t a,int16x8_t  b,int16x8_t  c,int16x8_t  d)
+{
+    SUMSUB_AB(s1,d1,a,b);
+    SUMSUB_AB(s2,d2,c,d);
+}
+
+static inline void HADAMARD4_V(int16x8_t& r1,int16x8_t& r2,int16x8_t& r3,int16x8_t& r4,
+                        int16x8_t& t1,int16x8_t& t2,int16x8_t& t3,int16x8_t& t4)
+{
+    SUMSUB_ABCD(t1, t2, t3, t4, r1, r2, r3, r4);
+    SUMSUB_ABCD(r1, r3, r2, r4, t1, t3, t2, t4);
+}
+
+
+static int _satd_4x8_8x4_end_neon(int16x8_t v0,int16x8_t v1,int16x8_t v2, int16x8_t v3)
+                                 
+{
+    
+    int16x8_t v4,v5,v6,v7,v16,v17,v18,v19;
+
+    
+    SUMSUB_AB   (v16, v17, v0,  v1);
+    SUMSUB_AB   (v18, v19, v2,  v3);
+
+    SUMSUB_AB   (v4 , v6 , v16, v18);
+    SUMSUB_AB   (v5 , v7 , v17, v19);
+
+    v0 = vtrn1q_s16(v4, v5);
+    v1 = vtrn2q_s16(v4, v5);
+    v2 = vtrn1q_s16(v6, v7);
+    v3 = vtrn2q_s16(v6, v7);
+
+    SUMSUB_AB   (v16, v17, v0,  v1);
+    SUMSUB_AB   (v18, v19, v2,  v3);
+
+    v0 = vtrn1q_s32(v16, v18);
+    v1 = vtrn2q_s32(v16, v18);
+    v2 = vtrn1q_s32(v17, v19);
+    v3 = vtrn2q_s32(v17, v19);
+
+    v0 = vabsq_s16(v0);
+    v1 = vabsq_s16(v1);
+    v2 = vabsq_s16(v2);
+    v3 = vabsq_s16(v3);
+    
+    v0 = vmaxq_u16(v0, v1);
+    v1 = vmaxq_u16(v2, v3);
+
+    v0 = vaddq_u16(v0, v1);
+    return vaddlvq_u16(v0);
+}
+
+static inline int _satd_4x4_neon(int16x8_t v0, int16x8_t v1)
+{
+    int16x8_t v2,v3;
+    SUMSUB_AB   (v2,  v3,  v0,  v1);
+
+    v0 = vzip1q_s64(v2,v3);
+    v1 = vzip2q_s64(v2,v3);
+    SUMSUB_AB   (v2,  v3,  v0,  v1);
+
+    v0 = vtrn1q_s16(v2,v3);
+    v1 = vtrn2q_s16(v2,v3);
+    SUMSUB_AB   (v2,  v3,  v0,  v1);
+
+    v0 = vtrn1q_s32(v2,v3);
+    v1 = vtrn2q_s32(v2,v3);
+
+    v0 = vabsq_s16(v0);
+    v1 = vabsq_s16(v1);
+    v0 = vmaxq_u16(v0, v1);
+    
+    return vaddlvq_s16(v0);
+}
+
+static void _satd_8x4v_8x8h_neon(int16x8_t& v0,int16x8_t& v1, int16x8_t&v2,int16x8_t& v3,int16x8_t& v20,int16x8_t& v21, int16x8_t&v22,int16x8_t& v23)
+{
+    int16x8_t v16,v17,v18,v19,v4,v5,v6,v7;
+    
+    SUMSUB_AB(v16, v18, v0,  v2);
+    SUMSUB_AB(v17, v19, v1,  v3);
+
+    HADAMARD4_V (v20, v21, v22, v23, v0,  v1, v2, v3);
+
+    transpose_8h(   v0,  v1,  v16, v17);
+    transpose_8h(   v2,  v3,  v18, v19);
+    transpose_8h(   v4,  v5,  v20, v21);
+    transpose_8h(   v6,  v7,  v22, v23);
+
+    SUMSUB_AB   (v16, v17, v0,  v1);
+    SUMSUB_AB   (v18, v19, v2,  v3);
+    SUMSUB_AB   (v20, v21, v4,  v5);
+    SUMSUB_AB   (v22, v23, v6,  v7);
+
+    transpose_4s(   v0,  v2,  v16, v18);
+    transpose_4s(   v1,  v3,  v17, v19);
+    transpose_4s(   v4,  v6,  v20, v22);
+    transpose_4s(   v5,  v7,  v21, v23);
+
+    v0 = vabsq_s16(v0);
+    v1 = vabsq_s16(v1);
+    v2 = vabsq_s16(v2);
+    v3 = vabsq_s16(v3);
+    v4 = vabsq_s16(v4);
+    v5 = vabsq_s16(v5);
+    v6 = vabsq_s16(v6);
+    v7 = vabsq_s16(v7);
+
+    v0 = vmaxq_u16(v0,v2);
+    v1 = vmaxq_u16(v1,v3);
+    v2 = vmaxq_u16(v4,v6);
+    v3 = vmaxq_u16(v5,v7);
+
+}
+
+#if HIGH_BIT_DEPTH
+
+#if (X265_DEPTH > 10)
+static inline void transpose_2d(int32x4_t& t1, int32x4_t& t2, const int32x4_t s1, const int32x4_t s2)
+{
+    t1 = vtrn1q_s64(s1, s2);
+    t2 = vtrn2q_s64(s1, s2);
+}
+
+static inline void ISUMSUB_AB(int32x4_t& sum, int32x4_t& sub, const int32x4_t a, const int32x4_t b)
+{
+    sum = vaddq_s32(a,b);
+    sub = vsubq_s32(a,b);
+}
+
+static inline void ISUMSUB_AB_FROM_INT16(int32x4_t& suml, int32x4_t& sumh, int32x4_t& subl, int32x4_t& subh, const int16x8_t a, const int16x8_t b)
+{
+    suml = vaddl_s16(vget_low_s16(a),vget_low_s16(b));
+    sumh = vaddl_high_s16(a,b);
+    subl = vsubl_s16(vget_low_s16(a),vget_low_s16(b));
+    subh = vsubl_high_s16(a, b);
+}
+
+#endif
+
+static inline void _sub_8x8_fly(const uint16_t* pix1, intptr_t stride_pix1, const uint16_t* pix2, intptr_t stride_pix2,
+                            int16x8_t& v0,int16x8_t& v1, int16x8_t& v2,int16x8_t& v3,
+                            int16x8_t& v20,int16x8_t& v21, int16x8_t& v22,int16x8_t& v23)
+{
+    uint16x8_t r0,r1,r2,r3;
+    uint16x8_t t0,t1,t2,t3;
+    int16x8_t v16,v17;
+    int16x8_t v18,v19;
+    
+    r0 = *(uint16x8_t*)(pix1 + 0*stride_pix1);
+    r1 = *(uint16x8_t*)(pix1 + 1*stride_pix1);
+    r2 = *(uint16x8_t*)(pix1 + 2*stride_pix1);
+    r3 = *(uint16x8_t*)(pix1 + 3*stride_pix1);
+
+    t0 = *(uint16x8_t*)(pix2 + 0*stride_pix2);
+    t1 = *(uint16x8_t*)(pix2 + 1*stride_pix2);
+    t2 = *(uint16x8_t*)(pix2 + 2*stride_pix2);
+    t3 = *(uint16x8_t*)(pix2 + 3*stride_pix2);
+
+    v16 = vsubq_u16(r0,t0);
+    v17 = vsubq_u16(r1,t1);
+    v18 = vsubq_u16(r2,t2);
+    v19 = vsubq_u16(r3,t3);
+
+    r0 = *(uint16x8_t*)(pix1 + 4*stride_pix1);
+    r1 = *(uint16x8_t*)(pix1 + 5*stride_pix1);
+    r2 = *(uint16x8_t*)(pix1 + 6*stride_pix1);
+    r3 = *(uint16x8_t*)(pix1 + 7*stride_pix1);
+
+    t0 = *(uint16x8_t*)(pix2 + 4*stride_pix2);
+    t1 = *(uint16x8_t*)(pix2 + 5*stride_pix2);
+    t2 = *(uint16x8_t*)(pix2 + 6*stride_pix2);
+    t3 = *(uint16x8_t*)(pix2 + 7*stride_pix2);
+    
+    v20 = vsubq_u16(r0,t0);
+    v21 = vsubq_u16(r1,t1);
+    v22 = vsubq_u16(r2,t2);
+    v23 = vsubq_u16(r3,t3);
+
+    SUMSUB_AB   (v0,  v1,  v16, v17);
+    SUMSUB_AB   (v2,  v3,  v18, v19);
+ 
+}
+
+
+
+
+static void _satd_16x4_neon(const uint16_t* pix1, intptr_t stride_pix1, const uint16_t* pix2, intptr_t stride_pix2,
+                            int16x8_t& v0,int16x8_t&v1, int16x8_t&v2,int16x8_t&v3)
+{
+    uint8x16_t r0,r1,r2,r3;
+    uint8x16_t t0,t1,t2,t3;
+    int16x8_t v16,v17,v20,v21;
+    int16x8_t v18,v19,v22,v23;
+    
+    r0 = *(int16x8_t*)(pix1 + 0*stride_pix1);
+    r1 = *(int16x8_t*)(pix1 + 1*stride_pix1);
+    r2 = *(int16x8_t*)(pix1 + 2*stride_pix1);
+    r3 = *(int16x8_t*)(pix1 + 3*stride_pix1);
+
+    t0 = *(int16x8_t*)(pix2 + 0*stride_pix2);
+    t1 = *(int16x8_t*)(pix2 + 1*stride_pix2);
+    t2 = *(int16x8_t*)(pix2 + 2*stride_pix2);
+    t3 = *(int16x8_t*)(pix2 + 3*stride_pix2);
+  
+    
+    v16 = vsubq_u16((r0),(t0) );
+    v17 = vsubq_u16((r1),(t1) );
+    v18 = vsubq_u16((r2),(t2) );
+    v19 = vsubq_u16((r3),(t3) );
+
+    r0 = *(int16x8_t*)(pix1 + 0*stride_pix1 + 8);
+    r1 = *(int16x8_t*)(pix1 + 1*stride_pix1 + 8);
+    r2 = *(int16x8_t*)(pix1 + 2*stride_pix1 + 8);
+    r3 = *(int16x8_t*)(pix1 + 3*stride_pix1 + 8);
+
+    t0 = *(int16x8_t*)(pix2 + 0*stride_pix2 + 8);
+    t1 = *(int16x8_t*)(pix2 + 1*stride_pix2 + 8);
+    t2 = *(int16x8_t*)(pix2 + 2*stride_pix2 + 8);
+    t3 = *(int16x8_t*)(pix2 + 3*stride_pix2 + 8);
+
+    
+    v20 = vsubq_u16(r0,t0);
+    v21 = vsubq_u16(r1,t1);
+    v22 = vsubq_u16(r2,t2);
+    v23 = vsubq_u16(r3,t3);
+
+    SUMSUB_AB   (v0,  v1,  v16, v17);
+    SUMSUB_AB   (v2,  v3,  v18, v19);
+    
+    _satd_8x4v_8x8h_neon(v0,v1,v2,v3,v20,v21,v22,v23);
+    
+}
+
+
+int pixel_satd_4x4_neon(const uint16_t* pix1, intptr_t stride_pix1, const uint16_t* pix2, intptr_t stride_pix2)
+{
+    uint64x2_t t0,t1,r0,r1;
+    t0[0] = *(uint64_t *)(pix1 + 0*stride_pix1);
+    t1[0] = *(uint64_t *)(pix1 + 1*stride_pix1);
+    t0[1] = *(uint64_t *)(pix1 + 2*stride_pix1);
+    t1[1] = *(uint64_t *)(pix1 + 3*stride_pix1);
+
+    r0[0] = *(uint64_t *)(pix2 + 0*stride_pix1);
+    r1[0] = *(uint64_t *)(pix2 + 1*stride_pix2);
+    r0[1] = *(uint64_t *)(pix2 + 2*stride_pix2);
+    r1[1] = *(uint64_t *)(pix2 + 3*stride_pix2);
+    
+    return _satd_4x4_neon(vsubq_u16(t0,r0), vsubq_u16(r1,t1));
+}
+
+
+
+
+
+
+int pixel_satd_8x4_neon(const uint16_t* pix1, intptr_t stride_pix1, const uint16_t* pix2, intptr_t stride_pix2)
+{
+    uint16x8_t i0,i1,i2,i3,i4,i5,i6,i7;
+    
+    i0 = *(uint16x8_t *)(pix1 + 0*stride_pix1);
+    i1 = *(uint16x8_t *)(pix2 + 0*stride_pix2);
+    i2 = *(uint16x8_t *)(pix1 + 1*stride_pix1);
+    i3 = *(uint16x8_t *)(pix2 + 1*stride_pix2);
+    i4 = *(uint16x8_t *)(pix1 + 2*stride_pix1);
+    i5 = *(uint16x8_t *)(pix2 + 2*stride_pix2);
+    i6 = *(uint16x8_t *)(pix1 + 3*stride_pix1);
+    i7 = *(uint16x8_t *)(pix2 + 3*stride_pix2);
+
+    int16x8_t v0 = vsubq_u16(i0,i1);
+    int16x8_t v1 = vsubq_u16(i2,i3);
+    int16x8_t v2 = vsubq_u16(i4,i5);
+    int16x8_t v3 = vsubq_u16(i6,i7);
+
+    return _satd_4x8_8x4_end_neon(v0,v1,v2,v3);
+}
+
+
+int pixel_satd_16x16_neon(const uint16_t* pix1, intptr_t stride_pix1, const uint16_t* pix2, intptr_t stride_pix2)
+{
+    int32x4_t v30 = vdupq_n_u32(0),v31= vdupq_n_u32(0);
+    int16x8_t v0,v1,v2,v3;
+    
+    _satd_16x4_neon(pix1,stride_pix1,pix2,stride_pix2,v0,v1,v2,v3);
+    v30 = vpadalq_u16(v30,v0);
+    v30 = vpadalq_u16(v30,v1);
+    v31 = vpadalq_u16(v31,v2);
+    v31 = vpadalq_u16(v31,v3);
+
+    _satd_16x4_neon(pix1 + 4*stride_pix1,stride_pix1,pix2+4*stride_pix2,stride_pix2,v0,v1,v2,v3);
+    v30 = vpadalq_u16(v30,v0);
+    v30 = vpadalq_u16(v30,v1);
+    v31 = vpadalq_u16(v31,v2);
+    v31 = vpadalq_u16(v31,v3);
+
+    _satd_16x4_neon(pix1 + 8*stride_pix1,stride_pix1,pix2+8*stride_pix2,stride_pix2,v0,v1,v2,v3);
+    v30 = vpadalq_u16(v30,v0);
+    v30 = vpadalq_u16(v30,v1);
+    v31 = vpadalq_u16(v31,v2);
+    v31 = vpadalq_u16(v31,v3);
+
+    _satd_16x4_neon(pix1 + 12*stride_pix1,stride_pix1,pix2+12*stride_pix2,stride_pix2,v0,v1,v2,v3);
+    v30 = vpadalq_u16(v30,v0);
+    v30 = vpadalq_u16(v30,v1);
+    v31 = vpadalq_u16(v31,v2);
+    v31 = vpadalq_u16(v31,v3);
+
+    return vaddvq_s32(vaddq_s32(v30,v31));
+    
+}
+
+#else       //HIGH_BIT_DEPTH
+
+static void _satd_16x4_neon(const uint8_t* pix1, intptr_t stride_pix1, const uint8_t* pix2, intptr_t stride_pix2,
+                            int16x8_t& v0,int16x8_t&v1, int16x8_t&v2,int16x8_t&v3)
+{
+    uint8x16_t r0,r1,r2,r3;
+    uint8x16_t t0,t1,t2,t3;
+    int16x8_t v16,v17,v20,v21;
+    int16x8_t v18,v19,v22,v23;
+    
+    r0 = *(uint8x16_t*)(pix1 + 0*stride_pix1);
+    r1 = *(uint8x16_t*)(pix1 + 1*stride_pix1);
+    r2 = *(uint8x16_t*)(pix1 + 2*stride_pix1);
+    r3 = *(uint8x16_t*)(pix1 + 3*stride_pix1);
+
+    t0 = *(uint8x16_t*)(pix2 + 0*stride_pix2);
+    t1 = *(uint8x16_t*)(pix2 + 1*stride_pix2);
+    t2 = *(uint8x16_t*)(pix2 + 2*stride_pix2);
+    t3 = *(uint8x16_t*)(pix2 + 3*stride_pix2);
+
+    
+    
+    v16 = vsubl_u8(vget_low_u8(r0),vget_low_u8(t0) );
+    v20 = vsubl_high_u8(r0,t0);
+    v17 = vsubl_u8(vget_low_u8(r1),vget_low_u8(t1) );
+    v21 = vsubl_high_u8(r1,t1);
+    v18 = vsubl_u8(vget_low_u8(r2),vget_low_u8(t2) );
+    v22 = vsubl_high_u8(r2,t2);
+    v19 = vsubl_u8(vget_low_u8(r3),vget_low_u8(t3) );
+    v23 = vsubl_high_u8(r3,t3);
+
+    SUMSUB_AB   (v0,  v1,  v16, v17);
+    SUMSUB_AB   (v2,  v3,  v18, v19);
+    
+    _satd_8x4v_8x8h_neon(v0,v1,v2,v3,v20,v21,v22,v23);
+    
+}
+
+
+static inline void _sub_8x8_fly(const uint8_t* pix1, intptr_t stride_pix1, const uint8_t* pix2, intptr_t stride_pix2,
+                            int16x8_t& v0,int16x8_t& v1, int16x8_t& v2,int16x8_t& v3,
+                            int16x8_t& v20,int16x8_t& v21, int16x8_t& v22,int16x8_t& v23)
+{
+    uint8x8_t r0,r1,r2,r3;
+    uint8x8_t t0,t1,t2,t3;
+    int16x8_t v16,v17;
+    int16x8_t v18,v19;
+    
+    r0 = *(uint8x8_t*)(pix1 + 0*stride_pix1);
+    r1 = *(uint8x8_t*)(pix1 + 1*stride_pix1);
+    r2 = *(uint8x8_t*)(pix1 + 2*stride_pix1);
+    r3 = *(uint8x8_t*)(pix1 + 3*stride_pix1);
+
+    t0 = *(uint8x8_t*)(pix2 + 0*stride_pix2);
+    t1 = *(uint8x8_t*)(pix2 + 1*stride_pix2);
+    t2 = *(uint8x8_t*)(pix2 + 2*stride_pix2);
+    t3 = *(uint8x8_t*)(pix2 + 3*stride_pix2);
+
+    v16 = vsubl_u8(r0,t0);
+    v17 = vsubl_u8(r1,t1);
+    v18 = vsubl_u8(r2,t2);
+    v19 = vsubl_u8(r3,t3);
+
+    r0 = *(uint8x8_t*)(pix1 + 4*stride_pix1);
+    r1 = *(uint8x8_t*)(pix1 + 5*stride_pix1);
+    r2 = *(uint8x8_t*)(pix1 + 6*stride_pix1);
+    r3 = *(uint8x8_t*)(pix1 + 7*stride_pix1);
+
+    t0 = *(uint8x8_t*)(pix2 + 4*stride_pix2);
+    t1 = *(uint8x8_t*)(pix2 + 5*stride_pix2);
+    t2 = *(uint8x8_t*)(pix2 + 6*stride_pix2);
+    t3 = *(uint8x8_t*)(pix2 + 7*stride_pix2);
+    
+    v20 = vsubl_u8(r0,t0);
+    v21 = vsubl_u8(r1,t1);
+    v22 = vsubl_u8(r2,t2);
+    v23 = vsubl_u8(r3,t3);
+
+
+    SUMSUB_AB   (v0,  v1,  v16, v17);
+    SUMSUB_AB   (v2,  v3,  v18, v19);
+    
+}
+
+int pixel_satd_4x4_neon(const uint8_t* pix1, intptr_t stride_pix1, const uint8_t* pix2, intptr_t stride_pix2)
+{
+    uint32x2_t t0,t1,r0,r1;
+    t0[0] = *(uint32_t *)(pix1 + 0*stride_pix1);
+    t1[0] = *(uint32_t *)(pix1 + 1*stride_pix1);
+    t0[1] = *(uint32_t *)(pix1 + 2*stride_pix1);
+    t1[1] = *(uint32_t *)(pix1 + 3*stride_pix1);
+
+    r0[0] = *(uint32_t *)(pix2 + 0*stride_pix1);
+    r1[0] = *(uint32_t *)(pix2 + 1*stride_pix2);
+    r0[1] = *(uint32_t *)(pix2 + 2*stride_pix2);
+    r1[1] = *(uint32_t *)(pix2 + 3*stride_pix2);
+    
+    return _satd_4x4_neon(vsubl_u8(t0,r0), vsubl_u8(r1,t1));
+}
+
+
+int pixel_satd_8x4_neon(const uint8_t* pix1, intptr_t stride_pix1, const uint8_t* pix2, intptr_t stride_pix2)
+{
+    uint8x8_t i0,i1,i2,i3,i4,i5,i6,i7;
+    
+    i0 = *(uint8x8_t *)(pix1 + 0*stride_pix1);
+    i1 = *(uint8x8_t *)(pix2 + 0*stride_pix2);
+    i2 = *(uint8x8_t *)(pix1 + 1*stride_pix1);
+    i3 = *(uint8x8_t *)(pix2 + 1*stride_pix2);
+    i4 = *(uint8x8_t *)(pix1 + 2*stride_pix1);
+    i5 = *(uint8x8_t *)(pix2 + 2*stride_pix2);
+    i6 = *(uint8x8_t *)(pix1 + 3*stride_pix1);
+    i7 = *(uint8x8_t *)(pix2 + 3*stride_pix2);
+
+    int16x8_t v0 = vsubl_u8(i0,i1);
+    int16x8_t v1 = vsubl_u8(i2,i3);
+    int16x8_t v2 = vsubl_u8(i4,i5);
+    int16x8_t v3 = vsubl_u8(i6,i7);
+
+    return _satd_4x8_8x4_end_neon(v0,v1,v2,v3);
+}
+
+int pixel_satd_16x16_neon(const uint8_t* pix1, intptr_t stride_pix1, const uint8_t* pix2, intptr_t stride_pix2)
+{
+    int16x8_t v30,v31;
+    int16x8_t v0,v1,v2,v3;
+    
+    _satd_16x4_neon(pix1,stride_pix1,pix2,stride_pix2,v0,v1,v2,v3);
+    v30 = vaddq_s16(v0,v1);
+    v31 = vaddq_s16(v2,v3);
+    
+    _satd_16x4_neon(pix1 + 4*stride_pix1,stride_pix1,pix2+4*stride_pix2,stride_pix2,v0,v1,v2,v3);
+    v0 = vaddq_s16(v0,v1);
+    v1 = vaddq_s16(v2,v3);
+    v30 = vaddq_s16(v30, v0);
+    v31 = vaddq_s16(v31, v1);
+
+    _satd_16x4_neon(pix1 + 8*stride_pix1,stride_pix1,pix2+8*stride_pix2,stride_pix2,v0,v1,v2,v3);
+    v0 = vaddq_s16(v0,v1);
+    v1 = vaddq_s16(v2,v3);
+    v30 = vaddq_s16(v30, v0);
+    v31 = vaddq_s16(v31, v1);
+
+    _satd_16x4_neon(pix1 + 12*stride_pix1,stride_pix1,pix2+12*stride_pix2,stride_pix2,v0,v1,v2,v3);
+    v0 = vaddq_s16(v0,v1);
+    v1 = vaddq_s16(v2,v3);
+    v30 = vaddq_s16(v30, v0);
+    v31 = vaddq_s16(v31, v1);
+
+    int32x4_t sum0 = vpaddlq_u16(v30);
+    int32x4_t sum1 = vpaddlq_u16(v31);
+    sum0 = vaddq_s32(sum0,sum1);
+    return vaddvq_s32(sum0);
+    
+}
+#endif      //HIGH_BIT_DEPTH
+
+
+static inline void _sa8d_8x8_neon_end(int16x8_t& v0,int16x8_t& v1,int16x8_t v2,int16x8_t v3,
+                                     int16x8_t v20,int16x8_t v21,int16x8_t v22,int16x8_t v23)
+{
+    int16x8_t v16,v17,v18,v19;
+    int16x8_t v4,v5,v6,v7;
+    
+    SUMSUB_AB   (v16, v18, v0,  v2);
+    SUMSUB_AB   (v17, v19, v1,  v3);
+
+    HADAMARD4_V (v20, v21, v22, v23, v0,  v1, v2, v3);
+
+    SUMSUB_AB   (v0,  v16, v16, v20);
+    SUMSUB_AB   (v1,  v17, v17, v21);
+    SUMSUB_AB   (v2,  v18, v18, v22);
+    SUMSUB_AB   (v3,  v19, v19, v23);
+
+    transpose_8h   (v20, v21, v16, v17);
+    transpose_8h   (v4,  v5,  v0,  v1);
+    transpose_8h   (v22, v23, v18, v19);
+    transpose_8h   (v6,  v7,  v2,  v3);
+    
+#if (X265_DEPTH <= 10)
+
+    int16x8_t v24,v25;
+
+    SUMSUB_AB   (v2,  v3,  v20, v21);
+    SUMSUB_AB   (v24, v25, v4,  v5);
+    SUMSUB_AB   (v0,  v1,  v22, v23);
+    SUMSUB_AB   (v4,  v5,  v6,  v7);
+
+    transpose_4s   (v20, v22, v2,  v0);
+    transpose_4s   (v21, v23, v3,  v1);
+    transpose_4s   (v16, v18, v24, v4);
+    transpose_4s   (v17, v19, v25, v5);
+
+    SUMSUB_AB   (v0,  v2,  v20, v22);
+    SUMSUB_AB   (v1,  v3,  v21, v23);
+    SUMSUB_AB   (v4,  v6,  v16, v18);
+    SUMSUB_AB   (v5,  v7,  v17, v19);
+
+    transpose_2d   (v16, v20,  v0,  v4);
+    transpose_2d   (v17, v21,  v1,  v5);
+    transpose_2d   (v18, v22,  v2,  v6);
+    transpose_2d   (v19, v23,  v3,  v7);
+
+    
+    v16 = vabsq_s16(v16);
+    v17 = vabsq_s16(v17);
+    v18 = vabsq_s16(v18);
+    v19 = vabsq_s16(v19);
+    v20 = vabsq_s16(v20);
+    v21 = vabsq_s16(v21);
+    v22 = vabsq_s16(v22);
+    v23 = vabsq_s16(v23);
+
+    v16 = vmaxq_u16(v16,v20);
+    v17 = vmaxq_u16(v17,v21);
+    v18 = vmaxq_u16(v18,v22);
+    v19 = vmaxq_u16(v19,v23);
+
+#if HIGH_BIT_DEPTH
+    v0 = vpaddlq_u16(v16);
+    v1 = vpaddlq_u16(v17);
+    v0 = vpadalq_u16(v0,v18);
+    v1 = vpadalq_u16(v1,v19);
+    
+#else //HIGH_BIT_DEPTH
+    
+    v0 = vaddq_u16(v16,v17);
+    v1 = vaddq_u16(v18,v19);
+
+#endif //HIGH_BIT_DEPTH
+    
+#else // HIGH_BIT_DEPTH 12 bit only, switching math to int32, each int16x8 is up-convreted to 2 int32x4 (low and high)
+
+    int32x4_t v2l,v2h,v3l,v3h,v24l,v24h,v25l,v25h,v0l,v0h,v1l,v1h;
+    int32x4_t v22l,v22h,v23l,v23h;
+    int32x4_t v4l,v4h,v5l,v5h;
+    int32x4_t v6l,v6h,v7l,v7h;
+    int32x4_t v16l,v16h,v17l,v17h;
+    int32x4_t v18l,v18h,v19l,v19h;
+    int32x4_t v20l,v20h,v21l,v21h;
+
+    ISUMSUB_AB_FROM_INT16(v2l, v2h, v3l, v3h, v20, v21);
+    ISUMSUB_AB_FROM_INT16(v24l, v24h, v25l, v25h, v4, v5);
+
+    v22l = vmovl_s16(vget_low_s16(v22));
+    v22h = vmovl_high_s16(v22);
+    v23l = vmovl_s16(vget_low_s16(v23));
+    v23h = vmovl_high_s16(v23);
+    
+    ISUMSUB_AB(v0l,  v1l,  v22l, v23l);
+    ISUMSUB_AB(v0h,  v1h,  v22h, v23h);
+
+    v6l = vmovl_s16(vget_low_s16(v6));
+    v6h = vmovl_high_s16(v6);
+    v7l = vmovl_s16(vget_low_s16(v7));
+    v7h = vmovl_high_s16(v7);
+
+    ISUMSUB_AB   (v4l,  v5l,  v6l,  v7l);
+    ISUMSUB_AB   (v4h,  v5h,  v6h,  v7h);
+
+    transpose_2d   (v20l, v22l, v2l,  v0l);
+    transpose_2d   (v21l, v23l, v3l,  v1l);
+    transpose_2d   (v16l, v18l, v24l, v4l);
+    transpose_2d   (v17l, v19l, v25l, v5l);
+
+    transpose_2d   (v20h, v22h, v2h,  v0h);
+    transpose_2d   (v21h, v23h, v3h,  v1h);
+    transpose_2d   (v16h, v18h, v24h, v4h);
+    transpose_2d   (v17h, v19h, v25h, v5h);
+
+    ISUMSUB_AB   (v0l,  v2l,  v20l, v22l);
+    ISUMSUB_AB   (v1l,  v3l,  v21l, v23l);
+    ISUMSUB_AB   (v4l,  v6l,  v16l, v18l);
+    ISUMSUB_AB   (v5l,  v7l,  v17l, v19l);
+
+    ISUMSUB_AB   (v0h,  v2h,  v20h, v22h);
+    ISUMSUB_AB   (v1h,  v3h,  v21h, v23h);
+    ISUMSUB_AB   (v4h,  v6h,  v16h, v18h);
+    ISUMSUB_AB   (v5h,  v7h,  v17h, v19h);
+
+    v16l = v0l;
+    v16h = v4l;
+    v20l = v0h;
+    v20h = v4h;
+    
+    v17l = v1l;
+    v17h = v5l;
+    v21l = v1h;
+    v21h = v5h;
+    
+    v18l = v2l;
+    v18h = v6l;
+    v22l = v2h;
+    v22h = v6h;
+    
+    v19l = v3l;
+    v19h = v7l;
+    v23l = v3h;
+    v23h = v7h;
+
+    v16l = vabsq_s32(v16l);
+    v17l = vabsq_s32(v17l);
+    v18l = vabsq_s32(v18l);
+    v19l = vabsq_s32(v19l);
+    v20l = vabsq_s32(v20l);
+    v21l = vabsq_s32(v21l);
+    v22l = vabsq_s32(v22l);
+    v23l = vabsq_s32(v23l);
+
+    v16h = vabsq_s32(v16h);
+    v17h = vabsq_s32(v17h);
+    v18h = vabsq_s32(v18h);
+    v19h = vabsq_s32(v19h);
+    v20h = vabsq_s32(v20h);
+    v21h = vabsq_s32(v21h);
+    v22h = vabsq_s32(v22h);
+    v23h = vabsq_s32(v23h);
+
+    v16l = vmaxq_u32(v16l,v20l);
+    v17l = vmaxq_u32(v17l,v21l);
+    v18l = vmaxq_u32(v18l,v22l);
+    v19l = vmaxq_u32(v19l,v23l);
+
+    v16h = vmaxq_u32(v16h,v20h);
+    v17h = vmaxq_u32(v17h,v21h);
+    v18h = vmaxq_u32(v18h,v22h);
+    v19h = vmaxq_u32(v19h,v23h);
+
+    v16l = vaddq_u32(v16l,v16h);
+    v17l = vaddq_u32(v17l,v17h);
+    v18l = vaddq_u32(v18l,v18h);
+    v19l = vaddq_u32(v19l,v19h);
+
+    v0 = vaddq_u32(v16l, v17l);
+    v1 = vaddq_u32(v18l,v19l);
+    
+    
+#endif
+    
+}
+
+
+
+static inline void _satd_8x8_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2,
+                            int16x8_t& v0,int16x8_t&v1, int16x8_t&v2,int16x8_t&v3)
+{
+    
+    int16x8_t v20,v21,v22,v23;
+    _sub_8x8_fly(pix1,stride_pix1,pix2,stride_pix2,v0,v1,v2,v3,v20,v21,v22,v23);
+    _satd_8x4v_8x8h_neon(v0,v1,v2,v3,v20,v21,v22,v23);
+    
+}
+
+
+
+int pixel_satd_8x8_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2)
+{
+    int16x8_t v30,v31;
+    int16x8_t v0,v1,v2,v3;
+    
+    _satd_8x8_neon(pix1,stride_pix1,pix2,stride_pix2,v0,v1,v2,v3);
+#if !(HIGH_BIT_DEPTH)
+    v30 = vaddq_u16(v0,v1);
+    v31 = vaddq_u16(v2,v3);
+    
+    uint16x8_t sum = vaddq_u16(v30,v31);
+    return vaddvq_s32(vpaddlq_u16(sum));
+#else
+    
+    v30 = vaddq_u16(v0,v1);
+    v31 = vaddq_u16(v2,v3);
+
+    int32x4_t sum = vpaddlq_u16(v30);
+    sum = vpadalq_u16(sum, v31);
+    return vaddvq_s32(sum);
+#endif
+}
+
+
+int pixel_sa8d_8x8_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2)
+{
+    int16x8_t v0,v1,v2,v3;
+    int16x8_t v20,v21,v22,v23;
+    
+    _sub_8x8_fly(pix1, stride_pix1, pix2, stride_pix2, v0, v1, v2, v3, v20, v21, v22, v23);
+    _sa8d_8x8_neon_end(v0, v1, v2, v3, v20, v21, v22, v23);
+
+#if HIGH_BIT_DEPTH
+//#if 1//HIGH_BIT_DEPTH
+    int32x4_t s = vaddq_u32(v0,v1);
+    return (vaddvq_u32(s) + 1) >> 1;
+#else
+    return (vaddlvq_s16(vaddq_u16(v0, v1)) + 1) >> 1;
+#endif
+}
+
+
+
+
+
+int pixel_sa8d_16x16_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2)
+{
+    int16x8_t v0,v1,v2,v3;
+    int16x8_t v20,v21,v22,v23;
+    int32x4_t v30,v31;
+    
+    _sub_8x8_fly(pix1, stride_pix1, pix2, stride_pix2, v0, v1, v2, v3, v20, v21, v22, v23);
+    _sa8d_8x8_neon_end(v0, v1, v2, v3, v20, v21, v22, v23);
+
+#if !(HIGH_BIT_DEPTH)
+    v30 = vpaddlq_u16(v0);
+    v31 = vpaddlq_u16(v1);
+#else
+    v30 = vaddq_s32(v0,v1);
+#endif
+    
+    _sub_8x8_fly(pix1 + 8, stride_pix1, pix2 + 8, stride_pix2, v0, v1, v2, v3, v20, v21, v22, v23);
+    _sa8d_8x8_neon_end(v0, v1, v2, v3, v20, v21, v22, v23);
+
+#if !(HIGH_BIT_DEPTH)
+     v30 = vpadalq_u16(v30,v0);
+     v31 = vpadalq_u16(v31,v1);
+#else
+     v31 = vaddq_s32(v0,v1);
+#endif
+
+
+    _sub_8x8_fly(pix1 + 8*stride_pix1, stride_pix1, pix2 + 8*stride_pix2, stride_pix2, v0, v1, v2, v3, v20, v21, v22, v23);
+    _sa8d_8x8_neon_end(v0, v1, v2, v3, v20, v21, v22, v23);
+
+#if !(HIGH_BIT_DEPTH)
+    v30 = vpadalq_u16(v30,v0);
+    v31 = vpadalq_u16(v31,v1);
+#else
+    v30 = vaddq_s32(v30,v0);
+    v31 = vaddq_s32(v31,v1);
+#endif
+
+    _sub_8x8_fly(pix1 + 8*stride_pix1 + 8, stride_pix1, pix2 + 8*stride_pix2 + 8, stride_pix2, v0, v1, v2, v3, v20, v21, v22, v23);
+    _sa8d_8x8_neon_end(v0, v1, v2, v3, v20, v21, v22, v23);
+
+#if !(HIGH_BIT_DEPTH)
+     v30 = vpadalq_u16(v30,v0);
+     v31 = vpadalq_u16(v31,v1);
+#else
+     v30 = vaddq_s32(v30,v0);
+     v31 = vaddq_s32(v31,v1);
+#endif
+
+    v30 = vaddq_u32(v30,v31);
+    
+    return (vaddvq_u32(v30) + 1) >> 1;
+}
+
+
+
+
+
+
+
+
+template<int size>
+void blockfill_s_neon(int16_t* dst, intptr_t dstride, int16_t val)
+{
+  for (int y = 0; y < size; y++) {
+    int x = 0;
+    int16x8_t v = vdupq_n_s16(val);
+    for (; (x + 8) <= size; x+=8) {
+        *(int16x8_t*)&dst[y * dstride + x] = v;
+    }
+    for (; x < size; x++) {
+        dst[y * dstride + x] = val;
+    }
+  }
+}
+
+template<int lx, int ly>
+int sad_pp_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2)
+{
+  int sum = 0;
+
+
+  for (int y = 0; y < ly; y++)
+  {
+#if HIGH_BIT_DEPTH
+      int x=0;
+      uint16x8_t vsum16_1 = vdupq_n_u16(0);
+      for (; (x + 8) <= lx; x+=8) {
+        uint16x8_t p1 = *(uint16x8_t*)&pix1[x];
+        uint16x8_t p2 = *(uint16x8_t*)&pix2[x];
+        vsum16_1 = vabaq_s16(vsum16_1,p1,p2);
+              
+      }
+      if (lx & 4) {
+        uint16x4_t p1 = *(uint16x4_t*)&pix1[x];
+        uint16x4_t p2 = *(uint16x4_t*)&pix2[x];
+        sum += vaddlv_s16(vaba_s16(vdup_n_s16(0),p1,p2));
+        x += 4;
+      }
+      if (lx >= 4) {
+        sum += vaddlvq_s16(vsum16_1);
+      }
+
+#else
+
+    int x=0;
+    uint16x8_t vsum16_1 = vdupq_n_u16(0);
+    uint16x8_t vsum16_2 = vdupq_n_u16(0);
+
+    for (; (x + 16) <= lx; x+=16) {
+      uint8x16_t p1 = *(uint8x16_t*)&pix1[x];
+      uint8x16_t p2 = *(uint8x16_t*)&pix2[x];
+      vsum16_1 = vabal_u8(vsum16_1,vget_low_u8(p1),vget_low_u8(p2));
+      vsum16_2 = vabal_high_u8(vsum16_2,p1,p2);
+    }
+    if (lx & 8) {
+      uint8x8_t p1 = *(uint8x8_t*)&pix1[x];
+      uint8x8_t p2 = *(uint8x8_t*)&pix2[x];
+      vsum16_1 = vabal_u8(vsum16_1,p1,p2);
+      x += 8;
+    }
+    if (lx & 4) {
+      uint32x2_t p1 = vdup_n_u32(0);
+      p1[0] = *(uint32_t*)&pix1[x];
+      uint32x2_t p2 = vdup_n_u32(0);
+      p2[0] = *(uint32_t*)&pix2[x];
+      vsum16_1 = vabal_u8(vsum16_1,p1,p2);
+      x += 4;
+    }
+    if (lx >= 16) {
+      vsum16_1 = vaddq_u16(vsum16_1,vsum16_2);
+    }
+    if (lx >= 4) {
+      sum += vaddvq_u16(vsum16_1);
+    }
+
+#endif
+    if (lx & 3) for (; x < lx; x++) {
+        sum += abs(pix1[x] - pix2[x]);
+    }
+    
+    pix1 += stride_pix1;
+    pix2 += stride_pix2;
+  }
+
+  return sum;
+}
+
+template<int lx, int ly>
+void sad_x3_neon(const pixel* pix1, const pixel* pix2, const pixel* pix3, const pixel* pix4, intptr_t frefstride, int32_t* res)
+{
+  res[0] = 0;
+  res[1] = 0;
+  res[2] = 0;
+  for (int y = 0; y < ly; y++)
+  {
+    int x = 0;
+    uint16x8_t vsum16_0 = vdupq_n_u16(0);
+    uint16x8_t vsum16_1 = vdupq_n_u16(0);
+    uint16x8_t vsum16_2 = vdupq_n_u16(0);
+#if HIGH_BIT_DEPTH
+      for (; (x + 8) <= lx; x+=8) {
+        uint16x8_t p1 = *(uint16x8_t*)&pix1[x];
+        uint16x8_t p2 = *(uint16x8_t*)&pix2[x];
+        uint16x8_t p3 = *(uint16x8_t*)&pix3[x];
+        uint16x8_t p4 = *(uint16x8_t*)&pix4[x];
+        vsum16_0 = vabaq_s16(vsum16_0,p1,p2);
+        vsum16_1 = vabaq_s16(vsum16_1,p1,p3);
+        vsum16_2 = vabaq_s16(vsum16_2,p1,p4);
+
+      }
+      if (lx & 4) {
+        uint16x4_t p1 = *(uint16x4_t*)&pix1[x];
+        uint16x4_t p2 = *(uint16x4_t*)&pix2[x];
+        uint16x4_t p3 = *(uint16x4_t*)&pix3[x];
+        uint16x4_t p4 = *(uint16x4_t*)&pix4[x];
+        res[0] += vaddlv_s16(vaba_s16(vdup_n_s16(0),p1,p2));
+        res[1] += vaddlv_s16(vaba_s16(vdup_n_s16(0),p1,p3));
+        res[2] += vaddlv_s16(vaba_s16(vdup_n_s16(0),p1,p4));
+        x += 4;
+      }
+      if (lx >= 4) {
+        res[0] += vaddlvq_s16(vsum16_0);
+        res[1] += vaddlvq_s16(vsum16_1);
+        res[2] += vaddlvq_s16(vsum16_2);
+      }
+#else
+    
+    for (; (x + 16) <= lx; x+=16) {
+      uint8x16_t p1 = *(uint8x16_t*)&pix1[x];
+      uint8x16_t p2 = *(uint8x16_t*)&pix2[x];
+      uint8x16_t p3 = *(uint8x16_t*)&pix3[x];
+      uint8x16_t p4 = *(uint8x16_t*)&pix4[x];
+      vsum16_0 = vabal_u8(vsum16_0,vget_low_u8(p1),vget_low_u8(p2));
+      vsum16_0 = vabal_high_u8(vsum16_0,p1,p2);
+      vsum16_1 = vabal_u8(vsum16_1,vget_low_u8(p1),vget_low_u8(p3));
+      vsum16_1 = vabal_high_u8(vsum16_1,p1,p3);
+      vsum16_2 = vabal_u8(vsum16_2,vget_low_u8(p1),vget_low_u8(p4));
+      vsum16_2 = vabal_high_u8(vsum16_2,p1,p4);
+    }
+    if (lx & 8) {
+      uint8x8_t p1 = *(uint8x8_t*)&pix1[x];
+      uint8x8_t p2 = *(uint8x8_t*)&pix2[x];
+      uint8x8_t p3 = *(uint8x8_t*)&pix3[x];
+      uint8x8_t p4 = *(uint8x8_t*)&pix4[x];
+      vsum16_0 = vabal_u8(vsum16_0,p1,p2);
+      vsum16_1 = vabal_u8(vsum16_1,p1,p3);
+      vsum16_2 = vabal_u8(vsum16_2,p1,p4);
+      x += 8;
+    }
+    if (lx & 4) {
+      uint32x2_t p1 = vdup_n_u32(0);
+      p1[0] = *(uint32_t*)&pix1[x];
+      uint32x2_t p2 = vdup_n_u32(0);
+      p2[0] = *(uint32_t*)&pix2[x];
+      uint32x2_t p3 = vdup_n_u32(0);
+      p3[0] = *(uint32_t*)&pix3[x];
+      uint32x2_t p4 = vdup_n_u32(0);
+      p4[0] = *(uint32_t*)&pix4[x];
+      vsum16_0 = vabal_u8(vsum16_0,p1,p2);
+      vsum16_1 = vabal_u8(vsum16_1,p1,p3);
+      vsum16_2 = vabal_u8(vsum16_2,p1,p4);
+      x += 4;
+    }
+    if (lx >= 4) {
+      res[0] += vaddvq_u16(vsum16_0);
+      res[1] += vaddvq_u16(vsum16_1);
+      res[2] += vaddvq_u16(vsum16_2);
+    }
+
+#endif
+    if (lx & 3) for (; x < lx; x++)
+    {
+      res[0] += abs(pix1[x] - pix2[x]);
+      res[1] += abs(pix1[x] - pix3[x]);
+      res[2] += abs(pix1[x] - pix4[x]);
+    }
+
+    pix1 += FENC_STRIDE;
+    pix2 += frefstride;
+    pix3 += frefstride;
+    pix4 += frefstride;
+  }
+}
+
+template<int lx, int ly>
+void sad_x4_neon(const pixel* pix1, const pixel* pix2, const pixel* pix3, const pixel* pix4, const pixel* pix5, intptr_t frefstride, int32_t* res)
+{
+  res[0] = 0;
+  res[1] = 0;
+  res[2] = 0;
+  res[3] = 0;
+  for (int y = 0; y < ly; y++)
+  {
+    int x=0;
+    uint16x8_t vsum16_0 = vdupq_n_u16(0);
+    uint16x8_t vsum16_1 = vdupq_n_u16(0);
+    uint16x8_t vsum16_2 = vdupq_n_u16(0);
+    uint16x8_t vsum16_3 = vdupq_n_u16(0);
+#if HIGH_BIT_DEPTH
+      for (; (x + 8) <= lx; x+=8) {
+        uint16x8_t p1 = *(uint16x8_t*)&pix1[x];
+        uint16x8_t p2 = *(uint16x8_t*)&pix2[x];
+        uint16x8_t p3 = *(uint16x8_t*)&pix3[x];
+        uint16x8_t p4 = *(uint16x8_t*)&pix4[x];
+        uint16x8_t p5 = *(uint16x8_t*)&pix5[x];
+        vsum16_0 = vabaq_s16(vsum16_0,p1,p2);
+        vsum16_1 = vabaq_s16(vsum16_1,p1,p3);
+        vsum16_2 = vabaq_s16(vsum16_2,p1,p4);
+        vsum16_3 = vabaq_s16(vsum16_3,p1,p5);
+
+      }
+      if (lx & 4) {
+        uint16x4_t p1 = *(uint16x4_t*)&pix1[x];
+        uint16x4_t p2 = *(uint16x4_t*)&pix2[x];
+        uint16x4_t p3 = *(uint16x4_t*)&pix3[x];
+        uint16x4_t p4 = *(uint16x4_t*)&pix4[x];
+        uint16x4_t p5 = *(uint16x4_t*)&pix5[x];
+        res[0] += vaddlv_s16(vaba_s16(vdup_n_s16(0),p1,p2));
+        res[1] += vaddlv_s16(vaba_s16(vdup_n_s16(0),p1,p3));
+        res[2] += vaddlv_s16(vaba_s16(vdup_n_s16(0),p1,p4));
+        res[3] += vaddlv_s16(vaba_s16(vdup_n_s16(0),p1,p5));
+        x += 4;
+      }
+      if (lx >= 4) {
+        res[0] += vaddlvq_s16(vsum16_0);
+        res[1] += vaddlvq_s16(vsum16_1);
+        res[2] += vaddlvq_s16(vsum16_2);
+        res[3] += vaddlvq_s16(vsum16_3);
+      }
+
+#else
+    
+    for (; (x + 16) <= lx; x+=16) {
+      uint8x16_t p1 = *(uint8x16_t*)&pix1[x];
+      uint8x16_t p2 = *(uint8x16_t*)&pix2[x];
+      uint8x16_t p3 = *(uint8x16_t*)&pix3[x];
+      uint8x16_t p4 = *(uint8x16_t*)&pix4[x];
+      uint8x16_t p5 = *(uint8x16_t*)&pix5[x];
+      vsum16_0 = vabal_u8(vsum16_0,vget_low_u8(p1),vget_low_u8(p2));
+      vsum16_0 = vabal_high_u8(vsum16_0,p1,p2);
+      vsum16_1 = vabal_u8(vsum16_1,vget_low_u8(p1),vget_low_u8(p3));
+      vsum16_1 = vabal_high_u8(vsum16_1,p1,p3);
+      vsum16_2 = vabal_u8(vsum16_2,vget_low_u8(p1),vget_low_u8(p4));
+      vsum16_2 = vabal_high_u8(vsum16_2,p1,p4);
+      vsum16_3 = vabal_u8(vsum16_3,vget_low_u8(p1),vget_low_u8(p5));
+      vsum16_3 = vabal_high_u8(vsum16_3,p1,p5);
+    }
+    if (lx & 8) {
+      uint8x8_t p1 = *(uint8x8_t*)&pix1[x];
+      uint8x8_t p2 = *(uint8x8_t*)&pix2[x];
+      uint8x8_t p3 = *(uint8x8_t*)&pix3[x];
+      uint8x8_t p4 = *(uint8x8_t*)&pix4[x];
+      uint8x8_t p5 = *(uint8x8_t*)&pix5[x];
+      vsum16_0 = vabal_u8(vsum16_0,p1,p2);
+      vsum16_1 = vabal_u8(vsum16_1,p1,p3);
+      vsum16_2 = vabal_u8(vsum16_2,p1,p4);
+      vsum16_3 = vabal_u8(vsum16_3,p1,p5);
+      x += 8;
+    }
+    if (lx & 4) {
+      uint32x2_t p1 = vdup_n_u32(0);
+      p1[0] = *(uint32_t*)&pix1[x];
+      uint32x2_t p2 = vdup_n_u32(0);
+      p2[0] = *(uint32_t*)&pix2[x];
+      uint32x2_t p3 = vdup_n_u32(0);
+      p3[0] = *(uint32_t*)&pix3[x];
+      uint32x2_t p4 = vdup_n_u32(0);
+      p4[0] = *(uint32_t*)&pix4[x];
+      uint32x2_t p5 = vdup_n_u32(0);
+      p5[0] = *(uint32_t*)&pix5[x];
+      vsum16_0 = vabal_u8(vsum16_0,p1,p2);
+      vsum16_1 = vabal_u8(vsum16_1,p1,p3);
+      vsum16_2 = vabal_u8(vsum16_2,p1,p4);
+      vsum16_3 = vabal_u8(vsum16_3,p1,p5);
+      x += 4;
+    }
+    if (lx >= 4) {
+      res[0] += vaddvq_u16(vsum16_0);
+      res[1] += vaddvq_u16(vsum16_1);
+      res[2] += vaddvq_u16(vsum16_2);
+      res[3] += vaddvq_u16(vsum16_3);
+    }
+
+#endif
+    if (lx & 3) for (; x < lx; x++)
+    {
+      res[0] += abs(pix1[x] - pix2[x]);
+      res[1] += abs(pix1[x] - pix3[x]);
+      res[2] += abs(pix1[x] - pix4[x]);
+      res[3] += abs(pix1[x] - pix5[x]);
+    }
+
+    pix1 += FENC_STRIDE;
+    pix2 += frefstride;
+    pix3 += frefstride;
+    pix4 += frefstride;
+    pix5 += frefstride;
+  }
+}
+
+
+template<int lx, int ly, class T1, class T2>
+sse_t sse_neon(const T1* pix1, intptr_t stride_pix1, const T2* pix2, intptr_t stride_pix2)
+{
+    sse_t sum = 0;
+
+    int32x4_t vsum1 = vdupq_n_s32(0);
+    int32x4_t vsum2 = vdupq_n_s32(0);
+    for (int y = 0; y < ly; y++)
+    {
+      int x = 0;
+        for (; (x+8) <= lx; x+=8)
+        {
+          int16x8_t tmp;
+          if (sizeof(T1) == 2 && sizeof(T2) == 2) {
+            tmp = vsubq_s16(*(int16x8_t *)&pix1[x],*(int16x8_t *)&pix2[x]);
+          } else if (sizeof(T1) == 1 && sizeof(T2) == 1){
+            tmp = vsubl_u8(*(uint8x8_t *)&pix1[x],*(uint8x8_t *)&pix2[x]);
+          }
+          else {
+            X265_CHECK(false,"unsupported sse");
+          }
+          vsum1 = vmlal_s16(vsum1,vget_low_s16(tmp),vget_low_s16(tmp));
+          vsum2 = vmlal_high_s16(vsum2,tmp,tmp);
+        }
+        for (; x < lx; x++)
+        {
+            int tmp = pix1[x] - pix2[x];
+            sum += (tmp * tmp);
+        }
+        
+        if (sizeof(T1) == 2 && sizeof(T2) == 2)
+        {
+            int32x4_t vsum = vaddq_u32(vsum1,vsum2);;
+            sum += vaddvq_u32(vsum);
+            vsum1 = vsum2 = vdupq_n_u16(0);
+        }
+
+        pix1 += stride_pix1;
+        pix2 += stride_pix2;
+    }
+    int32x4_t vsum = vaddq_u32(vsum1,vsum2);
+
+    return sum + vaddvq_u32(vsum);
+}
+
+
+template<int bx, int by>
+void blockcopy_ps_neon(int16_t* a, intptr_t stridea, const pixel* b, intptr_t strideb)
+{
+    for (int y = 0; y < by; y++)
+    {
+      int x= 0;
+      for (; (x + 8) <= bx; x+=8)
+      {
+#if HIGH_BIT_DEPTH
+        *(int16x8_t *)&a[x] = *(int16x8_t *)&b[x];
+#else
+        *(int16x8_t *)&a[x] = vmovl_u8(*(int8x8_t *)&b[x]);
+#endif
+      }
+      for (; x < bx; x++) {
+          a[x] = (int16_t)b[x];
+      }
+      
+      a += stridea;
+      b += strideb;
+    }
+}
+
+
+template<int bx, int by>
+void blockcopy_pp_neon(pixel* a, intptr_t stridea, const pixel* b, intptr_t strideb)
+{
+    for (int y = 0; y < by; y++)
+    {
+      int x = 0;
+#if HIGH_BIT_DEPTH
+      for (; (x + 8) <= bx; x+=8)
+      {
+        *(int16x8_t *)&a[x] = *(int16x8_t *)&b[x];
+      }
+      if (bx & 4)
+      {
+        *(uint64_t *)&a[x] = *(uint64_t *)&b[x];
+        x += 4;
+      }
+#else
+      for (; (x + 16) <= bx; x+=16)
+      {
+        *(uint8x16_t *)&a[x] = *(uint8x16_t *)&b[x];
+      }
+      if (bx & 8)
+      {
+          *(uint8x8_t *)&a[x] = *(uint8x8_t *)&b[x];
+          x += 8;
+      }
+      if (bx & 4)
+      {
+          *(uint32_t *)&a[x] = *(uint32_t *)&b[x];
+          x += 4;
+      }
+#endif
+      for (; x < bx; x++) {
+          a[x] = b[x];
+      }
+
+      a += stridea;
+      b += strideb;
+    }
+}
+
+
+template<int bx, int by>
+void pixel_sub_ps_neon(int16_t* a, intptr_t dstride, const pixel* b0, const pixel* b1, intptr_t sstride0, intptr_t sstride1)
+{
+    for (int y = 0; y < by; y++)
+    {
+      int x = 0;
+      for (; (x + 8) <= bx; x+=8) {
+#if HIGH_BIT_DEPTH
+        *(int16x8_t *)&a[x] = vsubq_s16(*(int16x8_t *)&b0[x], *(int16x8_t *)&b1[x]);
+#else
+        *(int16x8_t *)&a[x] = vsubl_u8(*(uint8x8_t *)&b0[x], *(uint8x8_t *)&b1[x]);
+#endif
+      }
+      for (; x < bx; x++)
+          a[x] = (int16_t)(b0[x] - b1[x]);
+
+        b0 += sstride0;
+        b1 += sstride1;
+        a += dstride;
+    }
+}
+
+template<int bx, int by>
+void pixel_add_ps_neon(pixel* a, intptr_t dstride, const pixel* b0, const int16_t* b1, intptr_t sstride0, intptr_t sstride1)
+{
+        for (int y = 0; y < by; y++)
+        {
+          int x = 0;
+          for (; (x + 8) <= bx; x+=8) {
+            int16x8_t t;
+            int16x8_t b1e = *(int16x8_t *)&b1[x];
+            int16x8_t b0e;
+#if HIGH_BIT_DEPTH
+            b0e = *(int16x8_t *)&b0[x];
+            t = vaddq_s16(b0e,b1e);
+            t = vminq_s16(t,vdupq_n_s16((1 << X265_DEPTH) - 1));
+            t = vmaxq_s16(t,vdupq_n_s16(0));
+            *(int16x8_t *)&a[x] = t;
+#else
+            b0e = vmovl_u8(*(uint8x8_t *)&b0[x]);
+            t = vaddq_s16(b0e,b1e);
+            *(uint8x8_t *)&a[x] = vqmovun_s16(t);
+#endif
+          }
+          for (; x < bx; x++)
+              a[x] = (int16_t)x265_clip(b0[x] + b1[x]);
+
+          b0 += sstride0;
+          b1 += sstride1;
+          a += dstride;
+        }
+}
+
+template<int bx, int by>
+void addAvg_neon(const int16_t* src0, const int16_t* src1, pixel* dst, intptr_t src0Stride, intptr_t src1Stride, intptr_t dstStride)
+{
+
+    const int shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH;
+    const int offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
+
+    const int32x4_t addon = vdupq_n_s32(offset);
+    for (int y = 0; y < by; y++)
+    {
+      int x = 0;
+      
+        for (; (x + 8) <= bx; x += 8)
+        {
+          int16x8_t in0 = *(int16x8_t*)&src0[x];
+          int16x8_t in1 = *(int16x8_t*)&src1[x];
+          int32x4_t t1 = vaddl_s16(vget_low_s16(in0),vget_low_s16(in1));
+          int32x4_t t2 = vaddl_high_s16(in0,in1);
+          t1 = vaddq_s32(t1,addon);
+          t2 = vaddq_s32(t2,addon);
+          t1 = vshrq_n_s32(t1,shiftNum);
+          t2 = vshrq_n_s32(t2,shiftNum);
+          int16x8_t t = vuzp1q_s16(t1,t2);
+#if HIGH_BIT_DEPTH
+          t = vminq_s16(t,vdupq_n_s16((1 << X265_DEPTH) - 1));
+          t = vmaxq_s16(t,vdupq_n_s16(0));
+          *(int16x8_t *)&dst[x] = t;
+#else
+          *(uint8x8_t *)&dst[x] = vqmovun_s16(t);
+#endif
+        }
+      for (; x < bx; x += 2)
+      {
+          dst[x + 0] = x265_clip((src0[x + 0] + src1[x + 0] + offset) >> shiftNum);
+          dst[x + 1] = x265_clip((src0[x + 1] + src1[x + 1] + offset) >> shiftNum);
+      }
+
+        src0 += src0Stride;
+        src1 += src1Stride;
+        dst  += dstStride;
+    }
+}
+
+template<int lx, int ly>
+void pixelavg_pp_neon(pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int)
+{
+    for (int y = 0; y < ly; y++)
+    {
+      int x = 0;
+      for (; (x+8) <= lx; x+=8) {
+#if HIGH_BIT_DEPTH
+        int16x8_t in0 = *(int16x8_t *)&src0[x];
+        int16x8_t in1 = *(int16x8_t *)&src1[x];
+        int16x8_t t = vaddq_s16(in0,in1);
+        t = vaddq_s16(t,vdupq_n_s16(1));
+        t = vshrq_n_s16(t,1);
+        *(int16x8_t *)&dst[x] = t;
+#else
+        int16x8_t in0 = vmovl_u8(*(uint8x8_t *)&src0[x]);
+        int16x8_t in1 = vmovl_u8(*(uint8x8_t *)&src1[x]);
+        int16x8_t t = vaddq_s16(in0,in1);
+        t = vaddq_s16(t,vdupq_n_s16(1));
+        t = vshrq_n_s16(t,1);
+        *(uint8x8_t *)&dst[x] = vmovn_u16(t);
+#endif
+      }
+      for (; x < lx; x++)
+          dst[x] = (src0[x] + src1[x] + 1) >> 1;
+
+      src0 += sstride0;
+      src1 += sstride1;
+      dst += dstride;
+    }
+}
+
+
+template<int size>
+void cpy1Dto2D_shl_neon(int16_t* dst, const int16_t* src, intptr_t dstStride, int shift)
+{
+    X265_CHECK((((intptr_t)dst | (dstStride * sizeof(*dst))) & 15) == 0 || size == 4, "dst alignment error\n");
+    X265_CHECK(((intptr_t)src & 15) == 0, "src alignment error\n");
+    X265_CHECK(shift >= 0, "invalid shift\n");
+
+    for (int i = 0; i < size; i++)
+    {
+        int j = 0;
+        for (; (j+8) <= size; j+=8)
+        {
+          *(int16x8_t *)&dst[j] = vshlq_s16(*(int16x8_t*)&src[j],vdupq_n_s16(shift));
+        }
+        for (; j < size; j++)
+        {
+            dst[j] = src[j] << shift;
+        }
+        src += size;
+        dst += dstStride;
+    }
+}
+
+
+template<int size>
+uint64_t pixel_var_neon(const uint8_t* pix, intptr_t i_stride)
+{
+    uint32_t sum = 0, sqr = 0;
+
+    int32x4_t vsqr = vdupq_n_s32(0);
+    for (int y = 0; y < size; y++)
+    {
+      int x = 0;
+      int16x8_t vsum = vdupq_n_s16(0);
+      for (; (x + 8) <= size; x+=8)
+      {
+        int16x8_t in;
+        in = vmovl_u8(*(uint8x8_t*)&pix[x]);
+        vsum = vaddq_u16(vsum,in);
+        vsqr = vmlal_s16(vsqr,vget_low_s16(in),vget_low_s16(in));
+        vsqr = vmlal_high_s16(vsqr,in,in);
+      }
+      for (; x < size; x++)
+      {
+          sum += pix[x];
+          sqr += pix[x] * pix[x];
+      }
+      sum += vaddvq_s16(vsum);
+
+      pix += i_stride;
+    }
+    sqr += vaddvq_u32(vsqr);
+    return sum + ((uint64_t)sqr << 32);
+}
+
+template<int blockSize>
+void getResidual_neon(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride)
+{
+    for (int y = 0; y < blockSize; y++)
+    {
+      int x = 0;
+      for (; (x + 8) < blockSize; x+=8) {
+        int16x8_t vfenc,vpred;
+#if HIGH_BIT_DEPTH
+        vfenc = *(int16x8_t *)&fenc[x];
+        vpred = *(int16x8_t *)&pred[x];
+#else
+        vfenc = vmovl_u8(*(uint8x8_t *)&fenc[x]);
+        vpred = vmovl_u8(*(uint8x8_t *)&pred[x]);
+#endif
+        *(int16x8_t*)&residual[x] = vsubq_s16(vfenc,vpred);
+      }
+      for (; x < blockSize; x++) {
+            residual[x] = static_cast<int16_t>(fenc[x]) - static_cast<int16_t>(pred[x]);
+      }
+      fenc += stride;
+      residual += stride;
+      pred += stride;
+  }
+}
+
+#if 1//!(HIGH_BIT_DEPTH)
+template<int size>
+int psyCost_pp_neon(const pixel* source, intptr_t sstride, const pixel* recon, intptr_t rstride)
+{
+    static pixel zeroBuf[8] /* = { 0 } */;
+
+    if (size)
+    {
+        int dim = 1 << (size + 2);
+        uint32_t totEnergy = 0;
+        for (int i = 0; i < dim; i += 8)
+        {
+            for (int j = 0; j < dim; j+= 8)
+            {
+                /* AC energy, measured by sa8d (AC + DC) minus SAD (DC) */
+                int sourceEnergy = pixel_sa8d_8x8_neon(source + i * sstride + j, sstride, zeroBuf, 0) -
+                                   (sad_pp_neon<8, 8>(source + i * sstride + j, sstride, zeroBuf, 0) >> 2);
+                int reconEnergy =  pixel_sa8d_8x8_neon(recon + i * rstride + j, rstride, zeroBuf, 0) -
+                                   (sad_pp_neon<8, 8>(recon + i * rstride + j, rstride, zeroBuf, 0) >> 2);
+
+                totEnergy += abs(sourceEnergy - reconEnergy);
+            }
+        }
+        return totEnergy;
+    }
+    else
+    {
+        /* 4x4 is too small for sa8d */
+        int sourceEnergy = pixel_satd_4x4_neon(source, sstride, zeroBuf, 0) - (sad_pp_neon<4, 4>(source, sstride, zeroBuf, 0) >> 2);
+        int reconEnergy = pixel_satd_4x4_neon(recon, rstride, zeroBuf, 0) - (sad_pp_neon<4, 4>(recon, rstride, zeroBuf, 0) >> 2);
+        return abs(sourceEnergy - reconEnergy);
+    }
+}
+
+
+template<int w, int h>
+// Calculate sa8d in blocks of 8x8
+int sa8d8(const pixel* pix1, intptr_t i_pix1, const pixel* pix2, intptr_t i_pix2)
+{
+    int cost = 0;
+
+    for (int y = 0; y < h; y += 8)
+        for (int x = 0; x < w; x += 8)
+            cost += pixel_sa8d_8x8_neon(pix1 + i_pix1 * y + x, i_pix1, pix2 + i_pix2 * y + x, i_pix2);
+
+    return cost;
+}
+
+template<int w, int h>
+// Calculate sa8d in blocks of 16x16
+int sa8d16(const pixel* pix1, intptr_t i_pix1, const pixel* pix2, intptr_t i_pix2)
+{
+    int cost = 0;
+
+    for (int y = 0; y < h; y += 16)
+        for (int x = 0; x < w; x += 16)
+            cost += pixel_sa8d_16x16_neon(pix1 + i_pix1 * y + x, i_pix1, pix2 + i_pix2 * y + x, i_pix2);
+
+    return cost;
+}
+#endif
+
+template<int size>
+void cpy2Dto1D_shl_neon(int16_t* dst, const int16_t* src, intptr_t srcStride, int shift)
+{
+    X265_CHECK(((intptr_t)dst & 15) == 0, "dst alignment error\n");
+    X265_CHECK((((intptr_t)src | (srcStride * sizeof(*src))) & 15) == 0 || size == 4, "src alignment error\n");
+    X265_CHECK(shift >= 0, "invalid shift\n");
+
+    for (int i = 0; i < size; i++)
+    {
+        for (int j = 0; j < size; j++)
+            dst[j] = src[j] << shift;
+
+        src += srcStride;
+        dst += size;
+    }
+}
+
+
+#if 1//!(HIGH_BIT_DEPTH)
+template<int w, int h>
+// calculate satd in blocks of 4x4
+int satd4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2)
+{
+    int satd = 0;
+
+    for (int row = 0; row < h; row += 4)
+        for (int col = 0; col < w; col += 4)
+            satd += pixel_satd_4x4_neon(pix1 + row * stride_pix1 + col, stride_pix1,
+                             pix2 + row * stride_pix2 + col, stride_pix2);
+
+    return satd;
+}
+
+template<int w, int h>
+// calculate satd in blocks of 8x4
+int satd8(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2)
+{
+    int satd = 0;
+
+    if (((w | h) & 15) == 0)
+    {
+        for (int row = 0; row < h; row += 16)
+            for (int col = 0; col < w; col += 16)
+                satd += pixel_satd_16x16_neon(pix1 + row * stride_pix1 + col, stride_pix1,
+                                            pix2 + row * stride_pix2 + col, stride_pix2);
+
+    }
+    else
+    if (((w | h) & 7) == 0)
+    {
+        for (int row = 0; row < h; row += 8)
+            for (int col = 0; col < w; col += 8)
+                satd += pixel_satd_8x8_neon(pix1 + row * stride_pix1 + col, stride_pix1,
+                                            pix2 + row * stride_pix2 + col, stride_pix2);
+
+    }
+    else
+    {
+        for (int row = 0; row < h; row += 4)
+            for (int col = 0; col < w; col += 8)
+                satd += pixel_satd_8x4_neon(pix1 + row * stride_pix1 + col, stride_pix1,
+                                            pix2 + row * stride_pix2 + col, stride_pix2);
+    }
+
+    return satd;
+}
+#endif
+
+
+template<int blockSize>
+void transpose_neon(pixel* dst, const pixel* src, intptr_t stride)
+{
+    for (int k = 0; k < blockSize; k++)
+        for (int l = 0; l < blockSize; l++)
+            dst[k * blockSize + l] = src[l * stride + k];
+}
+
+
+template<>
+void transpose_neon<8>(pixel* dst, const pixel* src, intptr_t stride)
+{
+    transpose8x8(dst,src,8,stride);
+}
+
+template<>
+void transpose_neon<16>(pixel* dst, const pixel* src, intptr_t stride)
+{
+    transpose16x16(dst,src,16,stride);
+}
+
+template<>
+void transpose_neon<32>(pixel* dst, const pixel* src, intptr_t stride)
+{
+    transpose32x32(dst,src,32,stride);
+}
+
+
+template<>
+void transpose_neon<64>(pixel* dst, const pixel* src, intptr_t stride)
+{
+    transpose32x32(dst,src,64,stride);
+    transpose32x32(dst+32*64+32,src+32*stride+32,64,stride);
+    transpose32x32(dst+32*64,src+32,64,stride);
+    transpose32x32(dst+32,src+32*stride,64,stride);
+}
+
+
+template<int size>
+sse_t pixel_ssd_s_neon(const int16_t* a, intptr_t dstride)
+{
+    sse_t sum = 0;
+    
+    
+  int32x4_t vsum = vdupq_n_s32(0);
+
+    for (int y = 0; y < size; y++)
+    {
+      int x = 0;
+      
+      for (; (x + 8) <= size; x+=8) {
+        int16x8_t in = *(int16x8_t*)&a[x];
+        vsum = vmlal_s16(vsum,vget_low_s16(in),vget_low_s16(in));
+        vsum = vmlal_high_s16(vsum,(in),(in));
+      }
+      for (; x < size; x++) {
+            sum += a[x] * a[x];
+      }
+
+        a += dstride;
+    }
+    return sum + vaddvq_s32(vsum);
+}
+
+
+};
+
+
+
+
+namespace X265_NS {
+   
+  
+void setupPixelPrimitives_neon(EncoderPrimitives &p)
+{
+  #define LUMA_PU(W, H) \
+      p.pu[LUMA_ ## W ## x ## H].copy_pp = blockcopy_pp_neon<W, H>; \
+      p.pu[LUMA_ ## W ## x ## H].addAvg[NONALIGNED] = addAvg_neon<W, H>; \
+      p.pu[LUMA_ ## W ## x ## H].addAvg[ALIGNED] = addAvg_neon<W, H>; \
+      p.pu[LUMA_ ## W ## x ## H].sad = sad_pp_neon<W, H>; \
+      p.pu[LUMA_ ## W ## x ## H].sad_x3 = sad_x3_neon<W, H>; \
+      p.pu[LUMA_ ## W ## x ## H].sad_x4 = sad_x4_neon<W, H>; \
+      p.pu[LUMA_ ## W ## x ## H].pixelavg_pp[NONALIGNED] = pixelavg_pp_neon<W, H>; \
+      p.pu[LUMA_ ## W ## x ## H].pixelavg_pp[ALIGNED] = pixelavg_pp_neon<W, H>;
+  
+#if !(HIGH_BIT_DEPTH)
+
+#define LUMA_CU(W, H) \
+      p.cu[BLOCK_ ## W ## x ## H].sub_ps        = pixel_sub_ps_neon<W, H>; \
+      p.cu[BLOCK_ ## W ## x ## H].add_ps[NONALIGNED]    = pixel_add_ps_neon<W, H>; \
+      p.cu[BLOCK_ ## W ## x ## H].add_ps[ALIGNED] = pixel_add_ps_neon<W, H>; \
+      p.cu[BLOCK_ ## W ## x ## H].copy_ps       = blockcopy_ps_neon<W, H>; \
+      p.cu[BLOCK_ ## W ## x ## H].copy_pp       = blockcopy_pp_neon<W, H>; \
+      p.cu[BLOCK_ ## W ## x ## H].blockfill_s[NONALIGNED] = blockfill_s_neon<W>;  \
+      p.cu[BLOCK_ ## W ## x ## H].blockfill_s[ALIGNED]    = blockfill_s_neon<W>;  \
+      p.cu[BLOCK_ ## W ## x ## H].cpy2Dto1D_shl = cpy2Dto1D_shl_neon<W>; \
+      p.cu[BLOCK_ ## W ## x ## H].cpy1Dto2D_shl[NONALIGNED] = cpy1Dto2D_shl_neon<W>; \
+      p.cu[BLOCK_ ## W ## x ## H].cpy1Dto2D_shl[ALIGNED] = cpy1Dto2D_shl_neon<W>; \
+      p.cu[BLOCK_ ## W ## x ## H].psy_cost_pp   = psyCost_pp_neon<BLOCK_ ## W ## x ## H>; \
+      p.cu[BLOCK_ ## W ## x ## H].transpose     = transpose_neon<W>; \
+      p.cu[BLOCK_ ## W ## x ## H].var           = pixel_var_neon<W>; \
+      p.cu[BLOCK_ ## W ## x ## H].calcresidual[NONALIGNED]  = getResidual_neon<W>; \
+      p.cu[BLOCK_ ## W ## x ## H].calcresidual[ALIGNED]     = getResidual_neon<W>; \
+
+#else
+    
+    #define LUMA_CU(W, H) \
+    p.cu[BLOCK_ ## W ## x ## H].sub_ps        = pixel_sub_ps_neon<W, H>; \
+    p.cu[BLOCK_ ## W ## x ## H].add_ps[NONALIGNED]    = pixel_add_ps_neon<W, H>; \
+    p.cu[BLOCK_ ## W ## x ## H].add_ps[ALIGNED] = pixel_add_ps_neon<W, H>; \
+    p.cu[BLOCK_ ## W ## x ## H].copy_pp       = blockcopy_pp_neon<W, H>; \
+    p.cu[BLOCK_ ## W ## x ## H].copy_ps       = blockcopy_ps_neon<W, H>; \
+    p.cu[BLOCK_ ## W ## x ## H].copy_pp       = blockcopy_pp_neon<W, H>; \
+    p.cu[BLOCK_ ## W ## x ## H].blockfill_s[NONALIGNED] = blockfill_s_neon<W>;  \
+    p.cu[BLOCK_ ## W ## x ## H].blockfill_s[ALIGNED]    = blockfill_s_neon<W>;  \
+    p.cu[BLOCK_ ## W ## x ## H].cpy2Dto1D_shl = cpy2Dto1D_shl_neon<W>; \
+    p.cu[BLOCK_ ## W ## x ## H].cpy1Dto2D_shl[NONALIGNED] = cpy1Dto2D_shl_neon<W>; \
+    p.cu[BLOCK_ ## W ## x ## H].cpy1Dto2D_shl[ALIGNED] = cpy1Dto2D_shl_neon<W>; \
+    p.cu[BLOCK_ ## W ## x ## H].psy_cost_pp   = psyCost_pp_neon<BLOCK_ ## W ## x ## H>; \
+    p.cu[BLOCK_ ## W ## x ## H].transpose     = transpose_neon<W>; \
+    /*p.cu[BLOCK_ ## W ## x ## H].var           = pixel_var_neon<W>;*/ \
+    p.cu[BLOCK_ ## W ## x ## H].calcresidual[NONALIGNED]  = getResidual_neon<W>; \
+    p.cu[BLOCK_ ## W ## x ## H].calcresidual[ALIGNED]     = getResidual_neon<W>; \
+
+    
+    
+#endif
+    
+    
+      LUMA_PU(4, 4);
+      LUMA_PU(8, 8);
+      LUMA_PU(16, 16);
+      LUMA_PU(32, 32);
+      LUMA_PU(64, 64);
+      LUMA_PU(4, 8);
+      LUMA_PU(8, 4);
+      LUMA_PU(16,  8);
+      LUMA_PU(8, 16);
+      LUMA_PU(16, 12);
+      LUMA_PU(12, 16);
+      LUMA_PU(16,  4);
+      LUMA_PU(4, 16);
+      LUMA_PU(32, 16);
+      LUMA_PU(16, 32);
+      LUMA_PU(32, 24);
+      LUMA_PU(24, 32);
+      LUMA_PU(32,  8);
+      LUMA_PU(8, 32);
+      LUMA_PU(64, 32);
+      LUMA_PU(32, 64);
+      LUMA_PU(64, 48);
+      LUMA_PU(48, 64);
+      LUMA_PU(64, 16);
+      LUMA_PU(16, 64);
+
+      p.pu[LUMA_4x4].satd   = pixel_satd_4x4_neon;
+      p.pu[LUMA_8x8].satd   = satd8<8, 8>;
+      p.pu[LUMA_8x4].satd   = pixel_satd_8x4_neon;
+      p.pu[LUMA_4x8].satd   = satd4<4, 8>;
+      p.pu[LUMA_16x16].satd = satd8<16, 16>;
+      p.pu[LUMA_16x8].satd  = satd8<16, 8>;
+      p.pu[LUMA_8x16].satd  = satd8<8, 16>;
+      p.pu[LUMA_16x12].satd = satd8<16, 12>;
+      p.pu[LUMA_12x16].satd = satd4<12, 16>;
+      p.pu[LUMA_16x4].satd  = satd8<16, 4>;
+      p.pu[LUMA_4x16].satd  = satd4<4, 16>;
+      p.pu[LUMA_32x32].satd = satd8<32, 32>;
+      p.pu[LUMA_32x16].satd = satd8<32, 16>;
+      p.pu[LUMA_16x32].satd = satd8<16, 32>;
+      p.pu[LUMA_32x24].satd = satd8<32, 24>;
+      p.pu[LUMA_24x32].satd = satd8<24, 32>;
+      p.pu[LUMA_32x8].satd  = satd8<32, 8>;
+      p.pu[LUMA_8x32].satd  = satd8<8, 32>;
+      p.pu[LUMA_64x64].satd = satd8<64, 64>;
+      p.pu[LUMA_64x32].satd = satd8<64, 32>;
+      p.pu[LUMA_32x64].satd = satd8<32, 64>;
+      p.pu[LUMA_64x48].satd = satd8<64, 48>;
+      p.pu[LUMA_48x64].satd = satd8<48, 64>;
+      p.pu[LUMA_64x16].satd = satd8<64, 16>;
+      p.pu[LUMA_16x64].satd = satd8<16, 64>;
+
+    
+      LUMA_CU(4, 4);
+      LUMA_CU(8, 8);
+      LUMA_CU(16, 16);
+      LUMA_CU(32, 32);
+      LUMA_CU(64, 64);
+
+
+      p.cu[BLOCK_4x4].sa8d   = pixel_satd_4x4_neon;
+      p.cu[BLOCK_8x8].sa8d   = pixel_sa8d_8x8_neon;
+      p.cu[BLOCK_16x16].sa8d = pixel_sa8d_16x16_neon;
+      p.cu[BLOCK_32x32].sa8d = sa8d16<32, 32>;
+      p.cu[BLOCK_64x64].sa8d = sa8d16<64, 64>;
+
+    
+  #define CHROMA_PU_420(W, H) \
+      p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].addAvg[NONALIGNED]  = addAvg_neon<W, H>;         \
+      p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].addAvg[ALIGNED]  = addAvg_neon<W, H>;         \
+      p.chroma[X265_CSP_I420].pu[CHROMA_420_ ## W ## x ## H].copy_pp = blockcopy_pp_neon<W, H>; \
+
+
+      CHROMA_PU_420(4, 4);
+      CHROMA_PU_420(8, 8);
+      CHROMA_PU_420(16, 16);
+      CHROMA_PU_420(32, 32);
+      CHROMA_PU_420(4, 2);
+      CHROMA_PU_420(8, 4);
+      CHROMA_PU_420(4, 8);
+      CHROMA_PU_420(8, 6);
+      CHROMA_PU_420(6, 8);
+      CHROMA_PU_420(8, 2);
+      CHROMA_PU_420(2, 8);
+      CHROMA_PU_420(16, 8);
+      CHROMA_PU_420(8,  16);
+      CHROMA_PU_420(16, 12);
+      CHROMA_PU_420(12, 16);
+      CHROMA_PU_420(16, 4);
+      CHROMA_PU_420(4,  16);
+      CHROMA_PU_420(32, 16);
+      CHROMA_PU_420(16, 32);
+      CHROMA_PU_420(32, 24);
+      CHROMA_PU_420(24, 32);
+      CHROMA_PU_420(32, 8);
+      CHROMA_PU_420(8,  32);
+
+    
+
+      p.chroma[X265_CSP_I420].pu[CHROMA_420_2x2].satd   = NULL;
+      p.chroma[X265_CSP_I420].pu[CHROMA_420_4x4].satd   = pixel_satd_4x4_neon;
+      p.chroma[X265_CSP_I420].pu[CHROMA_420_8x8].satd   = satd8<8, 8>;
+      p.chroma[X265_CSP_I420].pu[CHROMA_420_16x16].satd = satd8<16, 16>;
+      p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].satd = satd8<32, 32>;
+
+      p.chroma[X265_CSP_I420].pu[CHROMA_420_4x2].satd   = NULL;
+      p.chroma[X265_CSP_I420].pu[CHROMA_420_2x4].satd   = NULL;
+      p.chroma[X265_CSP_I420].pu[CHROMA_420_8x4].satd   = pixel_satd_8x4_neon;
+      p.chroma[X265_CSP_I420].pu[CHROMA_420_4x8].satd   = satd4<4, 8>;
+      p.chroma[X265_CSP_I420].pu[CHROMA_420_16x8].satd  = satd8<16, 8>;
+      p.chroma[X265_CSP_I420].pu[CHROMA_420_8x16].satd  = satd8<8, 16>;
+      p.chroma[X265_CSP_I420].pu[CHROMA_420_32x16].satd = satd8<32, 16>;
+      p.chroma[X265_CSP_I420].pu[CHROMA_420_16x32].satd = satd8<16, 32>;
+
+      p.chroma[X265_CSP_I420].pu[CHROMA_420_8x6].satd   = NULL;
+      p.chroma[X265_CSP_I420].pu[CHROMA_420_6x8].satd   = NULL;
+      p.chroma[X265_CSP_I420].pu[CHROMA_420_8x2].satd   = NULL;
+      p.chroma[X265_CSP_I420].pu[CHROMA_420_2x8].satd   = NULL;
+      p.chroma[X265_CSP_I420].pu[CHROMA_420_16x12].satd = satd4<16, 12>;
+      p.chroma[X265_CSP_I420].pu[CHROMA_420_12x16].satd = satd4<12, 16>;
+      p.chroma[X265_CSP_I420].pu[CHROMA_420_16x4].satd  = satd4<16, 4>;
+      p.chroma[X265_CSP_I420].pu[CHROMA_420_4x16].satd  = satd4<4, 16>;
+      p.chroma[X265_CSP_I420].pu[CHROMA_420_32x24].satd = satd8<32, 24>;
+      p.chroma[X265_CSP_I420].pu[CHROMA_420_24x32].satd = satd8<24, 32>;
+      p.chroma[X265_CSP_I420].pu[CHROMA_420_32x8].satd  = satd8<32, 8>;
+      p.chroma[X265_CSP_I420].pu[CHROMA_420_8x32].satd  = satd8<8, 32>;
+
+    
+  #define CHROMA_CU_420(W, H) \
+      p.chroma[X265_CSP_I420].cu[BLOCK_420_ ## W ## x ## H].sse_pp  = sse_neon<W, H, pixel, pixel>; \
+      p.chroma[X265_CSP_I420].cu[BLOCK_420_ ## W ## x ## H].copy_pp = blockcopy_pp_neon<W, H>; \
+      p.chroma[X265_CSP_I420].cu[BLOCK_420_ ## W ## x ## H].copy_ps = blockcopy_ps_neon<W, H>; \
+      p.chroma[X265_CSP_I420].cu[BLOCK_420_ ## W ## x ## H].sub_ps = pixel_sub_ps_neon<W, H>;  \
+      p.chroma[X265_CSP_I420].cu[BLOCK_420_ ## W ## x ## H].add_ps[NONALIGNED] = pixel_add_ps_neon<W, H>; \
+      p.chroma[X265_CSP_I420].cu[BLOCK_420_ ## W ## x ## H].add_ps[ALIGNED] = pixel_add_ps_neon<W, H>;
+
+
+      CHROMA_CU_420(4, 4)
+      CHROMA_CU_420(8, 8)
+      CHROMA_CU_420(16, 16)
+      CHROMA_CU_420(32, 32)
+
+
+      p.chroma[X265_CSP_I420].cu[BLOCK_8x8].sa8d   = p.chroma[X265_CSP_I420].pu[CHROMA_420_4x4].satd;
+      p.chroma[X265_CSP_I420].cu[BLOCK_16x16].sa8d = sa8d8<8, 8>;
+      p.chroma[X265_CSP_I420].cu[BLOCK_32x32].sa8d = sa8d16<16, 16>;
+      p.chroma[X265_CSP_I420].cu[BLOCK_64x64].sa8d = sa8d16<32, 32>;
+
+    
+  #define CHROMA_PU_422(W, H) \
+      p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].addAvg[NONALIGNED]  = addAvg_neon<W, H>;         \
+      p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].addAvg[ALIGNED]  = addAvg_neon<W, H>;         \
+      p.chroma[X265_CSP_I422].pu[CHROMA_422_ ## W ## x ## H].copy_pp = blockcopy_pp_neon<W, H>; \
+
+
+      CHROMA_PU_422(4, 8);
+      CHROMA_PU_422(8, 16);
+      CHROMA_PU_422(16, 32);
+      CHROMA_PU_422(32, 64);
+      CHROMA_PU_422(4, 4);
+      CHROMA_PU_422(2, 8);
+      CHROMA_PU_422(8, 8);
+      CHROMA_PU_422(4, 16);
+      CHROMA_PU_422(8, 12);
+      CHROMA_PU_422(6, 16);
+      CHROMA_PU_422(8, 4);
+      CHROMA_PU_422(2, 16);
+      CHROMA_PU_422(16, 16);
+      CHROMA_PU_422(8, 32);
+      CHROMA_PU_422(16, 24);
+      CHROMA_PU_422(12, 32);
+      CHROMA_PU_422(16, 8);
+      CHROMA_PU_422(4,  32);
+      CHROMA_PU_422(32, 32);
+      CHROMA_PU_422(16, 64);
+      CHROMA_PU_422(32, 48);
+      CHROMA_PU_422(24, 64);
+      CHROMA_PU_422(32, 16);
+      CHROMA_PU_422(8,  64);
+
+
+      p.chroma[X265_CSP_I422].pu[CHROMA_422_2x4].satd   = NULL;
+      p.chroma[X265_CSP_I422].pu[CHROMA_422_4x8].satd   = satd4<4, 8>;
+      p.chroma[X265_CSP_I422].pu[CHROMA_422_8x16].satd  = satd8<8, 16>;
+      p.chroma[X265_CSP_I422].pu[CHROMA_422_16x32].satd = satd8<16, 32>;
+      p.chroma[X265_CSP_I422].pu[CHROMA_422_32x64].satd = satd8<32, 64>;
+
+      p.chroma[X265_CSP_I422].pu[CHROMA_422_4x4].satd   = pixel_satd_4x4_neon;
+      p.chroma[X265_CSP_I422].pu[CHROMA_422_2x8].satd   = NULL;
+      p.chroma[X265_CSP_I422].pu[CHROMA_422_8x8].satd   = satd8<8, 8>;
+      p.chroma[X265_CSP_I422].pu[CHROMA_422_4x16].satd  = satd4<4, 16>;
+      p.chroma[X265_CSP_I422].pu[CHROMA_422_16x16].satd = satd8<16, 16>;
+      p.chroma[X265_CSP_I422].pu[CHROMA_422_8x32].satd  = satd8<8, 32>;
+      p.chroma[X265_CSP_I422].pu[CHROMA_422_32x32].satd = satd8<32, 32>;
+      p.chroma[X265_CSP_I422].pu[CHROMA_422_16x64].satd = satd8<16, 64>;
+
+      p.chroma[X265_CSP_I422].pu[CHROMA_422_8x12].satd  = satd4<8, 12>;
+      p.chroma[X265_CSP_I422].pu[CHROMA_422_6x16].satd  = NULL;
+      p.chroma[X265_CSP_I422].pu[CHROMA_422_8x4].satd   = satd4<8, 4>;
+      p.chroma[X265_CSP_I422].pu[CHROMA_422_2x16].satd  = NULL;
+      p.chroma[X265_CSP_I422].pu[CHROMA_422_16x24].satd = satd8<16, 24>;
+      p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].satd = satd4<12, 32>;
+      p.chroma[X265_CSP_I422].pu[CHROMA_422_16x8].satd  = satd8<16, 8>;
+      p.chroma[X265_CSP_I422].pu[CHROMA_422_4x32].satd  = satd4<4, 32>;
+      p.chroma[X265_CSP_I422].pu[CHROMA_422_32x48].satd = satd8<32, 48>;
+      p.chroma[X265_CSP_I422].pu[CHROMA_422_24x64].satd = satd8<24, 64>;
+      p.chroma[X265_CSP_I422].pu[CHROMA_422_32x16].satd = satd8<32, 16>;
+      p.chroma[X265_CSP_I422].pu[CHROMA_422_8x64].satd  = satd8<8, 64>;
+
+    
+  #define CHROMA_CU_422(W, H) \
+      p.chroma[X265_CSP_I422].cu[BLOCK_422_ ## W ## x ## H].sse_pp  = sse_neon<W, H, pixel, pixel>;  \
+      p.chroma[X265_CSP_I422].cu[BLOCK_422_ ## W ## x ## H].copy_pp = blockcopy_pp_neon<W, H>; \
+      p.chroma[X265_CSP_I422].cu[BLOCK_422_ ## W ## x ## H].copy_ps = blockcopy_ps_neon<W, H>; \
+      p.chroma[X265_CSP_I422].cu[BLOCK_422_ ## W ## x ## H].sub_ps = pixel_sub_ps_neon<W, H>; \
+      p.chroma[X265_CSP_I422].cu[BLOCK_422_ ## W ## x ## H].add_ps[NONALIGNED] = pixel_add_ps_neon<W, H>; \
+      p.chroma[X265_CSP_I422].cu[BLOCK_422_ ## W ## x ## H].add_ps[ALIGNED] = pixel_add_ps_neon<W, H>;
+
+
+      CHROMA_CU_422(4, 8)
+      CHROMA_CU_422(8, 16)
+      CHROMA_CU_422(16, 32)
+      CHROMA_CU_422(32, 64)
+
+      p.chroma[X265_CSP_I422].cu[BLOCK_8x8].sa8d   = p.chroma[X265_CSP_I422].pu[CHROMA_422_4x8].satd;
+      p.chroma[X265_CSP_I422].cu[BLOCK_16x16].sa8d = sa8d8<8, 16>;
+      p.chroma[X265_CSP_I422].cu[BLOCK_32x32].sa8d = sa8d16<16, 32>;
+      p.chroma[X265_CSP_I422].cu[BLOCK_64x64].sa8d = sa8d16<32, 64>;
+
+    
+}
+  
+  
+}
+
+
+#endif
+
diff -Naur ./source/common/arm64/pixel-prim.h ../x265_apple_patch/source/common/arm64/pixel-prim.h
--- ./source/common/arm64/pixel-prim.h	1970-01-01 01:00:00.000000000 +0100
+++ ../x265_apple_patch/source/common/arm64/pixel-prim.h	2021-05-08 13:08:01.000000000 +0100
@@ -0,0 +1,22 @@
+#ifndef PIXEL_PRIM_NEON_H__
+#define PIXEL_PRIM_NEON_H__
+
+#include "common.h"
+#include "slicetype.h"      // LOWRES_COST_MASK
+#include "primitives.h"
+#include "x265.h"
+
+
+
+namespace X265_NS {
+  
+  
+  
+void setupPixelPrimitives_neon(EncoderPrimitives &p);
+  
+  
+}
+
+
+#endif
+
diff -Naur ./source/common/arm64/pixel.h ../x265_apple_patch/source/common/arm64/pixel.h
--- ./source/common/arm64/pixel.h	1970-01-01 01:00:00.000000000 +0100
+++ ../x265_apple_patch/source/common/arm64/pixel.h	2021-05-08 13:08:01.000000000 +0100
@@ -0,0 +1,134 @@
+/*****************************************************************************
+ * pixel.h: aarch64 pixel metrics
+ *****************************************************************************
+ * Copyright (C) 2009-2019 x265 project
+ *
+ * Authors: David Conrad <lessen42@gmail.com>
+ *          Janne Grunau <janne-x265@jannau.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@x265.com.
+ *****************************************************************************/
+
+#ifndef x265_AARCH64_PIXEL_H
+#define x265_AARCH64_PIXEL_H
+
+#define x265_pixel_sad_16x16_neon x265_template(pixel_sad_16x16_neon)
+#define x265_pixel_sad_16x8_neon x265_template(pixel_sad_16x8_neon)
+#define x265_pixel_sad_4x16_neon x265_template(pixel_sad_4x16_neon)
+#define x265_pixel_sad_4x4_neon x265_template(pixel_sad_4x4_neon)
+#define x265_pixel_sad_4x8_neon x265_template(pixel_sad_4x8_neon)
+#define x265_pixel_sad_8x16_neon x265_template(pixel_sad_8x16_neon)
+#define x265_pixel_sad_8x4_neon x265_template(pixel_sad_8x4_neon)
+#define x265_pixel_sad_8x8_neon x265_template(pixel_sad_8x8_neon)
+#define x265_pixel_sad_x3_16x16_neon x265_template(pixel_sad_x3_16x16_neon)
+#define x265_pixel_sad_x3_16x8_neon x265_template(pixel_sad_x3_16x8_neon)
+#define x265_pixel_sad_x3_4x4_neon x265_template(pixel_sad_x3_4x4_neon)
+#define x265_pixel_sad_x3_4x8_neon x265_template(pixel_sad_x3_4x8_neon)
+#define x265_pixel_sad_x3_8x16_neon x265_template(pixel_sad_x3_8x16_neon)
+#define x265_pixel_sad_x3_8x4_neon x265_template(pixel_sad_x3_8x4_neon)
+#define x265_pixel_sad_x3_8x8_neon x265_template(pixel_sad_x3_8x8_neon)
+#define x265_pixel_sad_x4_16x16_neon x265_template(pixel_sad_x4_16x16_neon)
+#define x265_pixel_sad_x4_16x8_neon x265_template(pixel_sad_x4_16x8_neon)
+#define x265_pixel_sad_x4_4x4_neon x265_template(pixel_sad_x4_4x4_neon)
+#define x265_pixel_sad_x4_4x8_neon x265_template(pixel_sad_x4_4x8_neon)
+#define x265_pixel_sad_x4_8x16_neon x265_template(pixel_sad_x4_8x16_neon)
+#define x265_pixel_sad_x4_8x4_neon x265_template(pixel_sad_x4_8x4_neon)
+#define x265_pixel_sad_x4_8x8_neon x265_template(pixel_sad_x4_8x8_neon)
+#define x265_pixel_satd_16x16_neon x265_template(pixel_satd_16x16_neon)
+#define x265_pixel_satd_16x8_neon x265_template(pixel_satd_16x8_neon)
+#define x265_pixel_satd_4x16_neon x265_template(pixel_satd_4x16_neon)
+#define x265_pixel_satd_4x4_neon x265_template(pixel_satd_4x4_neon)
+#define x265_pixel_satd_4x8_neon x265_template(pixel_satd_4x8_neon)
+#define x265_pixel_satd_8x16_neon x265_template(pixel_satd_8x16_neon)
+#define x265_pixel_satd_8x4_neon x265_template(pixel_satd_8x4_neon)
+#define x265_pixel_satd_8x8_neon x265_template(pixel_satd_8x8_neon)
+#define x265_pixel_ssd_16x16_neon x265_template(pixel_ssd_16x16_neon)
+#define x265_pixel_ssd_16x8_neon x265_template(pixel_ssd_16x8_neon)
+#define x265_pixel_ssd_4x16_neon x265_template(pixel_ssd_4x16_neon)
+#define x265_pixel_ssd_4x4_neon x265_template(pixel_ssd_4x4_neon)
+#define x265_pixel_ssd_4x8_neon x265_template(pixel_ssd_4x8_neon)
+#define x265_pixel_ssd_8x16_neon x265_template(pixel_ssd_8x16_neon)
+#define x265_pixel_ssd_8x4_neon x265_template(pixel_ssd_8x4_neon)
+#define x265_pixel_ssd_8x8_neon x265_template(pixel_ssd_8x8_neon)
+#define DECL_PIXELS( ret, name, suffix, args ) \
+    ret x265_pixel_##name##_16x16_##suffix args;\
+    ret x265_pixel_##name##_16x8_##suffix args;\
+    ret x265_pixel_##name##_8x16_##suffix args;\
+    ret x265_pixel_##name##_8x8_##suffix args;\
+    ret x265_pixel_##name##_8x4_##suffix args;\
+    ret x265_pixel_##name##_4x16_##suffix args;\
+    ret x265_pixel_##name##_4x8_##suffix args;\
+    ret x265_pixel_##name##_4x4_##suffix args;\
+
+#define DECL_X1( name, suffix ) \
+    DECL_PIXELS( int, name, suffix, ( uint8_t *, intptr_t, uint8_t *, intptr_t ) )
+
+#define DECL_X4( name, suffix ) \
+    DECL_PIXELS( void, name##_x3, suffix, ( uint8_t *, uint8_t *, uint8_t *, uint8_t *, intptr_t, int * ) )\
+    DECL_PIXELS( void, name##_x4, suffix, ( uint8_t *, uint8_t *, uint8_t *, uint8_t *, uint8_t *, intptr_t, int * ) )
+
+DECL_X1( sad, neon )
+DECL_X4( sad, neon )
+DECL_X1( satd, neon )
+DECL_X1( ssd, neon )
+
+
+#define x265_pixel_ssd_nv12_core_neon x265_template(pixel_ssd_nv12_core_neon)
+void x265_pixel_ssd_nv12_core_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, int, int, uint64_t *, uint64_t * );
+
+#define x265_pixel_vsad_neon x265_template(pixel_vsad_neon)
+int x265_pixel_vsad_neon( uint8_t *, intptr_t, int );
+
+#define x265_pixel_sa8d_8x8_neon x265_template(pixel_sa8d_8x8_neon)
+int x265_pixel_sa8d_8x8_neon  ( uint8_t *, intptr_t, uint8_t *, intptr_t );
+#define x265_pixel_sa8d_16x16_neon x265_template(pixel_sa8d_16x16_neon)
+int x265_pixel_sa8d_16x16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t );
+#define x265_pixel_sa8d_satd_16x16_neon x265_template(pixel_sa8d_satd_16x16_neon)
+uint64_t x265_pixel_sa8d_satd_16x16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t );
+
+#define x265_pixel_var_8x8_neon x265_template(pixel_var_8x8_neon)
+uint64_t x265_pixel_var_8x8_neon  ( uint8_t *, intptr_t );
+#define x265_pixel_var_8x16_neon x265_template(pixel_var_8x16_neon)
+uint64_t x265_pixel_var_8x16_neon ( uint8_t *, intptr_t );
+#define x265_pixel_var_16x16_neon x265_template(pixel_var_16x16_neon)
+uint64_t x265_pixel_var_16x16_neon( uint8_t *, intptr_t );
+#define x265_pixel_var2_8x8_neon x265_template(pixel_var2_8x8_neon)
+int x265_pixel_var2_8x8_neon ( uint8_t *, uint8_t *, int * );
+#define x265_pixel_var2_8x16_neon x265_template(pixel_var2_8x16_neon)
+int x265_pixel_var2_8x16_neon( uint8_t *, uint8_t *, int * );
+
+#define x265_pixel_hadamard_ac_8x8_neon x265_template(pixel_hadamard_ac_8x8_neon)
+uint64_t x265_pixel_hadamard_ac_8x8_neon  ( uint8_t *, intptr_t );
+#define x265_pixel_hadamard_ac_8x16_neon x265_template(pixel_hadamard_ac_8x16_neon)
+uint64_t x265_pixel_hadamard_ac_8x16_neon ( uint8_t *, intptr_t );
+#define x265_pixel_hadamard_ac_16x8_neon x265_template(pixel_hadamard_ac_16x8_neon)
+uint64_t x265_pixel_hadamard_ac_16x8_neon ( uint8_t *, intptr_t );
+#define x265_pixel_hadamard_ac_16x16_neon x265_template(pixel_hadamard_ac_16x16_neon)
+uint64_t x265_pixel_hadamard_ac_16x16_neon( uint8_t *, intptr_t );
+
+#define x265_pixel_ssim_4x4x2_core_neon x265_template(pixel_ssim_4x4x2_core_neon)
+void x265_pixel_ssim_4x4x2_core_neon( const uint8_t *, intptr_t,
+                                      const uint8_t *, intptr_t,
+                                      int sums[2][4] );
+#define x265_pixel_ssim_end4_neon x265_template(pixel_ssim_end4_neon)
+float x265_pixel_ssim_end4_neon( int sum0[5][4], int sum1[5][4], int width );
+
+#define x265_pixel_asd8_neon x265_template(pixel_asd8_neon)
+int x265_pixel_asd8_neon( uint8_t *, intptr_t,  uint8_t *, intptr_t, int );
+
+#endif
diff -Naur ./source/common/cpu.cpp ../x265_apple_patch/source/common/cpu.cpp
--- ./source/common/cpu.cpp	2021-05-08 13:06:22.000000000 +0100
+++ ../x265_apple_patch/source/common/cpu.cpp	2021-05-08 13:08:01.000000000 +0100
@@ -104,7 +104,8 @@
     { "ARMv6",           X265_CPU_ARMV6 },
     { "NEON",            X265_CPU_NEON },
     { "FastNeonMRC",     X265_CPU_FAST_NEON_MRC },
-
+#elif X265_ARCH_ARM64
+    { "NEON",            X265_CPU_NEON },
 #elif X265_ARCH_POWER8
     { "Altivec",         X265_CPU_ALTIVEC },
 
@@ -374,6 +375,18 @@
 #endif // if HAVE_ARMV6
     return flags;
 }
+#elif X265_ARCH_ARM64
+
+uint32_t cpu_detect(bool benableavx512)
+{
+    int flags = 0;
+
+#if HAVE_NEON
+    flags |= X265_CPU_NEON;
+#endif
+
+    return flags;
+}
 
 #elif X265_ARCH_POWER8
 
diff -Naur ./source/common/pixel.cpp ../x265_apple_patch/source/common/pixel.cpp
--- ./source/common/pixel.cpp	2021-05-08 13:06:22.000000000 +0100
+++ ../x265_apple_patch/source/common/pixel.cpp	2021-05-08 13:08:01.000000000 +0100
@@ -266,7 +266,7 @@
 {
     int satd = 0;
 
-#if ENABLE_ASSEMBLY && X265_ARCH_ARM64
+#if ENABLE_ASSEMBLY && X265_ARCH_ARM64 && 0
     pixelcmp_t satd_4x4 = x265_pixel_satd_4x4_neon;
 #endif
 
@@ -284,7 +284,7 @@
 {
     int satd = 0;
 
-#if ENABLE_ASSEMBLY && X265_ARCH_ARM64
+#if ENABLE_ASSEMBLY && X265_ARCH_ARM64 && 0
     pixelcmp_t satd_8x4 = x265_pixel_satd_8x4_neon;
 #endif
 
diff -Naur ./source/common/version.cpp ../x265_apple_patch/source/common/version.cpp
--- ./source/common/version.cpp	2021-05-08 13:06:22.000000000 +0100
+++ ../x265_apple_patch/source/common/version.cpp	2021-05-08 13:47:38.000000000 +0100
@@ -31,7 +31,7 @@
 
 #if defined(__clang__)
 #define COMPILEDBY  "[clang " XSTR(__clang_major__) "." XSTR(__clang_minor__) "." XSTR(__clang_patchlevel__) "]"
-#ifdef __IA64__
+#ifdef __IA64__ || __arm64__ || __aarch64__
 #define ONARCH    "[on 64-bit] "
 #else
 #define ONARCH    "[on 32-bit] "
@@ -71,7 +71,7 @@
 #define ONOS    "[Unk-OS]"
 #endif
 
-#if X86_64
+#if X86_64 || __arm64__ || __aarch64__
 #define BITS    "[64 bit]"
 #else
 #define BITS    "[32 bit]"
diff -Naur ./source/test/testharness.h ../x265_apple_patch/source/test/testharness.h
--- ./source/test/testharness.h	2021-05-08 13:06:22.000000000 +0100
+++ ../x265_apple_patch/source/test/testharness.h	2021-05-08 13:08:01.000000000 +0100
@@ -64,7 +64,6 @@
 
     uint64_t m_rand;
 };
-
 #ifdef _MSC_VER
 #include <intrin.h>
 #elif HAVE_RDTSC
@@ -73,7 +72,7 @@
 #include <x86intrin.h>
 #elif ( !defined(__APPLE__) && defined (__GNUC__) && defined(__ARM_NEON__))
 #include <arm_neon.h>
-#elif defined(__GNUC__) && (!defined(__clang__) || __clang_major__ < 4)
+#else
 /* fallback for older GCC/MinGW */
 static inline uint32_t __rdtsc(void)
 {
@@ -90,6 +89,12 @@
 
     // TO-DO: replace clock() function with appropriate ARM cpu instructions
     a = clock();
+#elif X265_ARCH_ARM64
+    // TOD-DO: verify following inline asm to get cpu Timestamp Counter for ARM arch
+    // asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(a));
+
+    // TO-DO: replace clock() function with appropriate ARM cpu instructions
+    a = clock();
 #endif
 #endif
     return a;
@@ -140,7 +145,7 @@
  * needs an explicit asm check because it only sometimes crashes in normal use. */
 intptr_t PFX(checkasm_call)(intptr_t (*func)(), int *ok, ...);
 float PFX(checkasm_call_float)(float (*func)(), int *ok, ...);
-#elif X265_ARCH_ARM == 0
+#elif (X265_ARCH_ARM == 0 && X265_ARCH_ARM64 == 0)
 #define PFX(stack_pagealign)(func, align) func()
 #endif
 
diff -Naur ./source/test/testharness.h.orig ../x265_apple_patch/source/test/testharness.h.orig
--- ./source/test/testharness.h.orig	1970-01-01 01:00:00.000000000 +0100
+++ ../x265_apple_patch/source/test/testharness.h.orig	2021-05-08 13:08:01.000000000 +0100
@@ -0,0 +1,184 @@
+/*****************************************************************************
+ * Copyright (C) 2013-2020 MulticoreWare, Inc
+ *
+ * Authors: Steve Borho <steve@borho.org>
+ *          Min Chen <chenm003@163.com>
+ *          Yimeng Su <yimeng.su@huawei.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at license @ x265.com.
+ *****************************************************************************/
+
+#ifndef _TESTHARNESS_H_
+#define _TESTHARNESS_H_ 1
+
+#include "common.h"
+#include "primitives.h"
+
+#if _MSC_VER
+#pragma warning(disable: 4324) // structure was padded due to __declspec(align())
+#endif
+
+#define PIXEL_MIN 0
+#define SHORT_MAX  32767
+#define SHORT_MIN -32767
+#define UNSIGNED_SHORT_MAX 65535
+
+using namespace X265_NS;
+
+extern const char* lumaPartStr[NUM_PU_SIZES];
+extern const char* const* chromaPartStr[X265_CSP_COUNT];
+
+class TestHarness
+{
+public:
+
+    TestHarness() {}
+
+    virtual ~TestHarness() {}
+
+    virtual bool testCorrectness(const EncoderPrimitives& ref, const EncoderPrimitives& opt) = 0;
+
+    virtual void measureSpeed(const EncoderPrimitives& ref, const EncoderPrimitives& opt) = 0;
+
+    virtual const char *getName() const = 0;
+
+protected:
+
+    /* Temporary variables for stack checks */
+    int      m_ok;
+
+    uint64_t m_rand;
+};
+
+#ifdef _MSC_VER
+#include <intrin.h>
+#elif HAVE_RDTSC
+#include <intrin.h>
+#elif (!defined(__APPLE__) && (defined (__GNUC__) && (defined(__x86_64__) || defined(__i386__))))
+#include <x86intrin.h>
+#elif ( !defined(__APPLE__) && defined (__GNUC__) && defined(__ARM_NEON__))
+#include <arm_neon.h>
+#elif defined(__GNUC__) && (!defined(__clang__) || __clang_major__ < 4)
+/* fallback for older GCC/MinGW */
+static inline uint32_t __rdtsc(void)
+{
+    uint32_t a = 0;
+
+#if X265_ARCH_X86
+    asm volatile("rdtsc" : "=a" (a) ::"edx");
+#elif X265_ARCH_ARM
+#if X265_ARCH_ARM64
+    asm volatile("mrs %0, cntvct_el0" : "=r"(a));
+#else
+    // TOD-DO: verify following inline asm to get cpu Timestamp Counter for ARM arch
+    // asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(a));
+
+    // TO-DO: replace clock() function with appropriate ARM cpu instructions
+    a = clock();
+#endif
+#endif
+    return a;
+}
+#endif // ifdef _MSC_VER
+
+#define BENCH_RUNS 2000
+
+/* Adapted from checkasm.c, runs each optimized primitive four times, measures rdtsc
+ * and discards invalid times. Repeats BENCH_RUNS times to get a good average.
+ * Then measures the C reference with BENCH_RUNS / 4 runs and reports X factor and average cycles.*/
+#define REPORT_SPEEDUP(RUNOPT, RUNREF, ...) \
+    { \
+        uint32_t cycles = 0; int runs = 0; \
+        RUNOPT(__VA_ARGS__); \
+        for (int ti = 0; ti < BENCH_RUNS; ti++) { \
+            uint32_t t0 = (uint32_t)__rdtsc(); \
+            RUNOPT(__VA_ARGS__); \
+            RUNOPT(__VA_ARGS__); \
+            RUNOPT(__VA_ARGS__); \
+            RUNOPT(__VA_ARGS__); \
+            uint32_t t1 = (uint32_t)__rdtsc() - t0; \
+            if (t1 * runs <= cycles * 4 && ti > 0) { cycles += t1; runs++; } \
+        } \
+        uint32_t refcycles = 0; int refruns = 0; \
+        RUNREF(__VA_ARGS__); \
+        for (int ti = 0; ti < BENCH_RUNS / 4; ti++) { \
+            uint32_t t0 = (uint32_t)__rdtsc(); \
+            RUNREF(__VA_ARGS__); \
+            RUNREF(__VA_ARGS__); \
+            RUNREF(__VA_ARGS__); \
+            RUNREF(__VA_ARGS__); \
+            uint32_t t1 = (uint32_t)__rdtsc() - t0; \
+            if (t1 * refruns <= refcycles * 4 && ti > 0) { refcycles += t1; refruns++; } \
+        } \
+        x265_emms(); \
+        float optperf = (10.0f * cycles / runs) / 4; \
+        float refperf = (10.0f * refcycles / refruns) / 4; \
+        printf("\t%3.2fx ", refperf / optperf); \
+        printf("\t %-8.2lf \t %-8.2lf\n", optperf, refperf); \
+    }
+
+extern "C" {
+#if X265_ARCH_X86
+int PFX(stack_pagealign)(int (*func)(), int align);
+
+/* detect when callee-saved regs aren't saved
+ * needs an explicit asm check because it only sometimes crashes in normal use. */
+intptr_t PFX(checkasm_call)(intptr_t (*func)(), int *ok, ...);
+float PFX(checkasm_call_float)(float (*func)(), int *ok, ...);
+#elif X265_ARCH_ARM == 0
+#define PFX(stack_pagealign)(func, align) func()
+#endif
+
+#if X86_64
+
+/* Evil hack: detect incorrect assumptions that 32-bit ints are zero-extended to 64-bit.
+ * This is done by clobbering the stack with junk around the stack pointer and calling the
+ * assembly function through x265_checkasm_call with added dummy arguments which forces all
+ * real arguments to be passed on the stack and not in registers. For 32-bit argument the
+ * upper half of the 64-bit register location on the stack will now contain junk. Note that
+ * this is dependent on compiler behavior and that interrupts etc. at the wrong time may
+ * overwrite the junk written to the stack so there's no guarantee that it will always
+ * detect all functions that assumes zero-extension.
+ */
+void PFX(checkasm_stack_clobber)(uint64_t clobber, ...);
+#define checked(func, ...) ( \
+        m_ok = 1, m_rand = (rand() & 0xffff) * 0x0001000100010001ULL, \
+        PFX(checkasm_stack_clobber)(m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, \
+                                    m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, \
+                                    m_rand, m_rand, m_rand, m_rand, m_rand), /* max_args+6 */ \
+        PFX(checkasm_call)((intptr_t(*)())func, &m_ok, 0, 0, 0, 0, __VA_ARGS__))
+
+#define checked_float(func, ...) ( \
+        m_ok = 1, m_rand = (rand() & 0xffff) * 0x0001000100010001ULL, \
+        PFX(checkasm_stack_clobber)(m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, \
+                                    m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, \
+                                    m_rand, m_rand, m_rand, m_rand, m_rand), /* max_args+6 */ \
+        PFX(checkasm_call_float)((float(*)())func, &m_ok, 0, 0, 0, 0, __VA_ARGS__))
+#define reportfail() if (!m_ok) { fflush(stdout); fprintf(stderr, "stack clobber check failed at %s:%d", __FILE__, __LINE__); abort(); }
+#elif ARCH_X86
+#define checked(func, ...) PFX(checkasm_call)((intptr_t(*)())func, &m_ok, __VA_ARGS__);
+#define checked_float(func, ...) PFX(checkasm_call_float)((float(*)())func, &m_ok, __VA_ARGS__);
+
+#else // if X86_64
+#define checked(func, ...) func(__VA_ARGS__)
+#define checked_float(func, ...) func(__VA_ARGS__)
+#define reportfail()
+#endif // if X86_64
+}
+
+#endif // ifndef _TESTHARNESS_H_


================================================
FILE: .ci/apple_libvorbis_cpusubtype.patch
================================================
--- configure.orig	2024-10-09 23:11:57
+++ configure	2024-10-09 23:12:43
@@ -12840,9 +12840,9 @@
 		CFLAGS="-O3 -Wall -Wextra -ffast-math -D__NO_MATH_INLINES -fsigned-char $sparc_cpu"
 		PROFILE="-pg -g -O3 -D__NO_MATH_INLINES -fsigned-char $sparc_cpu" ;;
 	*-*-darwin*)
-		DEBUG="-DDARWIN -fno-common -force_cpusubtype_ALL -Wall -g -O0 -fsigned-char"
-		CFLAGS="-DDARWIN -fno-common -force_cpusubtype_ALL -Wall -g -O3 -ffast-math -fsigned-char"
-		PROFILE="-DDARWIN -fno-common -force_cpusubtype_ALL -Wall -g -pg -O3 -ffast-math -fsigned-char";;
+		DEBUG="-DDARWIN -fno-common -Wall -g -O0 -fsigned-char"
+		CFLAGS="-DDARWIN -fno-common -Wall -g -O3 -ffast-math -fsigned-char"
+		PROFILE="-DDARWIN -fno-common -Wall -g -pg -O3 -ffast-math -fsigned-char";;
 	*-*-os2*)
 		# Use -W instead of -Wextra because gcc on OS/2 is an old version.
 		DEBUG="-g -Wall -W -D_REENTRANT -D__NO_MATH_INLINES -fsigned-char"

================================================
FILE: .ci/build-wheels.sh
================================================
#!/bin/bash
set -e -x

# no permissions in that dir
source /io/.ci/yum_deps.sh
source /io/.ci/dep_versions.sh

BUILD_DIR="$HOME/ffmpeg_build"
export LD_LIBRARY_PATH="$BUILD_DIR/lib:$LD_LIBRARY_PATH"
export PATH="$BUILD_DIR/bin:$PATH"
export PKG_CONFIG_PATH="$BUILD_DIR/lib/pkgconfig:$BUILD_DIR/lib64/pkgconfig:/usr/lib/pkgconfig/"

mkdir ~/ffmpeg_sources


cd ~/ffmpeg_sources;
curl -sLO "https://github.com/libsdl-org/SDL/releases/download/release-$SDL_VERSION/SDL2-$SDL_VERSION.tar.gz"
tar xzf "SDL2-$SDL_VERSION.tar.gz"
cd "SDL2-$SDL_VERSION"
./configure --prefix="$BUILD_DIR" --bindir="$BUILD_DIR/bin";
make;
make install;
make distclean;

cd ~/ffmpeg_sources;
curl -sLO "https://www.openssl.org/source/openssl-$OPENSSL_VERSION.tar.gz"
tar xzf "openssl-$OPENSSL_VERSION.tar.gz"
cd "openssl-$OPENSSL_VERSION"
./config -fpic shared --prefix="$BUILD_DIR";
make;
make install;

cd ~/ffmpeg_sources;
curl -sLO "http://www.tortall.net/projects/yasm/releases/yasm-$YASM_VERSION.tar.gz"
tar xzf "yasm-$YASM_VERSION.tar.gz"
cd "yasm-$YASM_VERSION"
./configure --prefix="$BUILD_DIR" --bindir="$BUILD_DIR/bin";
make;
make install;
make distclean;

cd ~/ffmpeg_sources;
curl -sLO "http://www.nasm.us/pub/nasm/releasebuilds/$NASM_VERSION/nasm-$NASM_VERSION.tar.gz"
tar -xvzf "nasm-$NASM_VERSION.tar.gz"
cd "nasm-$NASM_VERSION"
./configure --prefix="$BUILD_DIR" --bindir="$BUILD_DIR/bin";
make;
make install;
make distclean;

cd ~/ffmpeg_sources;
git clone --depth 1 --branch stable https://code.videolan.org/videolan/x264.git
cd x264
./configure --prefix="$BUILD_DIR" --bindir="$BUILD_DIR/bin" --enable-shared --extra-cflags="-fPIC";
make;
make install;
make distclean;

cd ~/ffmpeg_sources;
curl -kLO "https://cfhcable.dl.sourceforge.net/project/lame/lame/$LAME_VERSION/lame-$LAME_VERSION.tar.gz"
tar xzf "lame-$LAME_VERSION.tar.gz"
cd "lame-$LAME_VERSION"
./configure --prefix="$BUILD_DIR" --enable-nasm --enable-shared;
make;
make install;
make distclean;

cd ~/ffmpeg_sources
curl -sLO "https://github.com/fribidi/fribidi/releases/download/v$FRIBIDI_VERSION/fribidi-$FRIBIDI_VERSION.tar.xz"
tar xf "fribidi-$FRIBIDI_VERSION.tar.xz"
cd "fribidi-$FRIBIDI_VERSION"
./configure --prefix="$BUILD_DIR" --enable-shared;
make
make install

cd ~/ffmpeg_sources
curl -sLO "https://github.com/libass/libass/releases/download/$LIBASS_VERSION/libass-$LIBASS_VERSION.tar.gz"
tar xzf "libass-$LIBASS_VERSION.tar.gz"
cd "libass-$LIBASS_VERSION"
./configure --prefix="$BUILD_DIR" --enable-shared --disable-require-system-font-provider;
make
make install

cd ~/ffmpeg_sources
curl -sLO "https://bitbucket.org/multicoreware/x265_git/downloads/x265_$X265_VERSION.tar.gz"
tar xzf "x265_$X265_VERSION.tar.gz"
cd x265_*/
# Backport patches to fix build on cmake >4.0.0
patch -p1 < /io/.ci/x265_b354c009a60bcd6d7fc04014e200a1ee9c45c167.patch
patch -p1 < /io/.ci/x265_51ae8e922bcc4586ad4710812072289af91492a8.patch
cd build/linux
cmake -G "Unix Makefiles" -DCMAKE_INSTALL_PREFIX="$BUILD_DIR" -DENABLE_SHARED:bool=on ../../source
make
make install

cd ~/ffmpeg_sources
git clone --depth 1 --branch "v$FDK_VERSION" https://github.com/mstorsjo/fdk-aac.git
cd fdk-aac
git apply /io/.ci/fdk.patch
autoreconf -fiv
./configure --prefix="$BUILD_DIR" --enable-shared
make
make install

cd ~/ffmpeg_sources
curl -LO "https://archive.mozilla.org/pub/opus/opus-$OPUS_VERSION.tar.gz"
tar xzvf "opus-$OPUS_VERSION.tar.gz"
cd "opus-$OPUS_VERSION"
./configure --prefix="$BUILD_DIR" --enable-shared
make
make install

cd ~/ffmpeg_sources
curl -LO "http://downloads.xiph.org/releases/ogg/libogg-$LIBOGG_VERSION.tar.gz"
tar xzvf "libogg-$LIBOGG_VERSION.tar.gz"
cd "libogg-$LIBOGG_VERSION"
./configure --prefix="$BUILD_DIR" --enable-shared
make
make install

cd ~/ffmpeg_sources;
curl -LO "http://downloads.xiph.org/releases/theora/libtheora-$LIBTHEORA_VERSION.tar.gz"
tar xzvf "libtheora-$LIBTHEORA_VERSION.tar.gz"
cd "libtheora-$LIBTHEORA_VERSION"
./configure --prefix="$BUILD_DIR" --enable-shared;
make;
make install

cd ~/ffmpeg_sources
curl -LO "http://downloads.xiph.org/releases/vorbis/libvorbis-$LIBVORBIS_VERSION.tar.gz"
tar xzvf "libvorbis-$LIBVORBIS_VERSION.tar.gz"
cd "libvorbis-$LIBVORBIS_VERSION"
./configure --prefix="$BUILD_DIR" --with-ogg="$BUILD_DIR" --enable-shared
make
make install

cd ~/ffmpeg_sources
git clone --depth 1 --branch "v$LIBVPX_VERSION" https://chromium.googlesource.com/webm/libvpx.git
cd libvpx
./configure --prefix="$BUILD_DIR" --disable-examples  --as=yasm --enable-shared --disable-unit-tests
make
make install

cd ~/ffmpeg_sources;
curl -sLO http://ffmpeg.org/releases/ffmpeg-$FFMPEG_VERSION.tar.bz2;
tar xjf ffmpeg-$FFMPEG_VERSION.tar.bz2;
cd ffmpeg-$FFMPEG_VERSION;
./configure --prefix="$BUILD_DIR" --extra-cflags="-I$BUILD_DIR/include -fPIC" --extra-ldflags="-L$BUILD_DIR/lib" --bindir="$BUILD_DIR/bin" --enable-gpl --enable-version3 --enable-libmp3lame --enable-libx264 --enable-libx265 --enable-libfdk_aac --enable-nonfree --enable-libass --enable-libvorbis --enable-libtheora --enable-libfreetype --enable-libopus --enable-libvpx --enable-openssl --enable-shared
make;
make install;

cp -r "$BUILD_DIR" "/io/ffmpeg_build"


================================================
FILE: .ci/build_wheels_osx.sh
================================================
#!/bin/bash
set -e -x

# can be either arm64 or x86_64
ARCH="$1"
SRC_PATH="$HOME/ffmpeg_sources_$ARCH"
BUILD_PATH="$HOME/${FFMPEG_BUILD_PATH}_$ARCH"
base_dir="$(pwd)"

source "$base_dir/.ci/dep_versions.sh"

export LD_LIBRARY_PATH="$BUILD_PATH/lib:$LD_LIBRARY_PATH"
export PATH="$BUILD_PATH/bin:/usr/local/bin/:$PATH"
export PKG_CONFIG_PATH="$BUILD_PATH/lib/pkgconfig:/usr/lib/pkgconfig/:$PKG_CONFIG_PATH"
export CC="/usr/bin/clang"
export CXX="/usr/bin/clang"
export MACOSX_DEPLOYMENT_TARGET=10.13

if [ "$ARCH" = "x86_64" ]; then
  ARCH2=x86_64
else
  ARCH2=aarch64
  export CFLAGS="-arch arm64"
  export CXXFLAGS="-arch arm64"
fi


brew install automake meson pkg-config cmake
brew install --cask xquartz
mkdir "$SRC_PATH"


cd "$SRC_PATH"
curl -sLO "https://tukaani.org/xz/xz-$XZ_VERSION.tar.gz"
tar xzf "xz-$XZ_VERSION.tar.gz"
cd "xz-$XZ_VERSION"
./configure --prefix="$BUILD_PATH" --host=$ARCH2-darwin
make
make install


cd "$SRC_PATH"
curl -sLO "https://zlib.net/fossils/zlib-$ZLIB_VERSION.tar.gz"
tar xzf "zlib-$ZLIB_VERSION.tar.gz"
cd "zlib-$ZLIB_VERSION"
./configure --prefix="$BUILD_PATH"
make
make install


cd "$SRC_PATH";
curl -sLO "https://github.com/libsdl-org/SDL/releases/download/release-$SDL_VERSION/SDL2-$SDL_VERSION.tar.gz"
tar xzf "SDL2-$SDL_VERSION.tar.gz"
cd "SDL2-$SDL_VERSION"
CPPFLAGS="$CXXFLAGS" LDFLAGS="$CFLAGS" ./configure --prefix="$BUILD_PATH" --bindir="$BUILD_PATH/bin" --host=$ARCH2-darwin
make
make install
make distclean


cd "$SRC_PATH"
curl -sLO "https://www.openssl.org/source/openssl-$OPENSSL_VERSION.tar.gz"
tar xzf "openssl-$OPENSSL_VERSION.tar.gz"
cd "openssl-$OPENSSL_VERSION"
./configure darwin64-$ARCH-cc -fPIC shared --prefix="$BUILD_PATH"
make
make install


cd "$SRC_PATH"
curl -sLO "https://github.com/glennrp/libpng/archive/refs/tags/v$LIBPNG_VERSION.tar.gz"
tar xzf "v$LIBPNG_VERSION.tar.gz"
cd "libpng-$LIBPNG_VERSION"
./configure --prefix="$BUILD_PATH" --bindir="$BUILD_PATH/bin" --host=$ARCH2-darwin
make
make install


cd "$SRC_PATH"
curl -sLO "https://github.com/google/brotli/archive/refs/tags/v$BROTLI_VERSION.tar.gz"
tar xzf "v$BROTLI_VERSION.tar.gz"
cd "brotli-$BROTLI_VERSION"
mkdir out
cd out
cmake -DCMAKE_INSTALL_PREFIX="$BUILD_PATH" -DCMAKE_OSX_ARCHITECTURES="$ARCH" -DCMAKE_BUILD_TYPE=Release -DCMAKE_POLICY_VERSION_MINIMUM=3.5 ..
cmake --build . --config Release --target install


if [ "$ARCH" = "x86_64" ]; then
 cd "$SRC_PATH"
 curl -sLO "http://www.tortall.net/projects/yasm/releases/yasm-$YASM_VERSION.tar.gz"
 tar xzf "yasm-$YASM_VERSION.tar.gz"
 cd "yasm-$YASM_VERSION"
 ./configure --prefix="$BUILD_PATH" --bindir="$BUILD_PATH/bin"
 make
 make install
 make distclean

 cd "$SRC_PATH"
 curl -sLO "http://www.nasm.us/pub/nasm/releasebuilds/$NASM_VERSION/nasm-$NASM_VERSION.tar.gz"
 tar -xvzf "nasm-$NASM_VERSION.tar.gz"
 cd "nasm-$NASM_VERSION"
 ./configure --prefix="$BUILD_PATH" --bindir="$BUILD_PATH/bin"
 make
 make install
 make distclean

fi


arg=()
if [ "$ARCH" = "arm64" ]; then
    arg=("--disable-asm")
fi
cd "$SRC_PATH"
git clone --depth 1 --branch stable https://code.videolan.org/videolan/x264.git
cd x264
./configure --prefix="$BUILD_PATH" --bindir="$BUILD_PATH/bin" --enable-shared --extra-cflags="-fPIC" \
  "${arg[@]}" --host=$ARCH2-darwin
make
make install
make distclean


arg=()
if [ "$ARCH" = "x86_64" ]; then
  arg=("--enable-nasm")
fi
cd "$SRC_PATH";
curl -kLO "https://cfhcable.dl.sourceforge.net/project/lame/lame/$LAME_VERSION/lame-$LAME_VERSION.tar.gz"
tar xzf "lame-$LAME_VERSION.tar.gz"
cd "lame-$LAME_VERSION"
git apply "$base_dir/.ci/libmp3lame-symbols.patch"
./configure --prefix="$BUILD_PATH" --enable-shared "${arg[@]}" --host=$ARCH2-darwin
make
make install
make distclean


cd "$SRC_PATH"
curl -sLO "https://github.com/fribidi/fribidi/releases/download/v$FRIBIDI_VERSION/fribidi-$FRIBIDI_VERSION.tar.xz"
tar xf "fribidi-$FRIBIDI_VERSION.tar.xz"
cd "fribidi-$FRIBIDI_VERSION"
./configure --prefix="$BUILD_PATH" --enable-shared --host=$ARCH2-darwin
make
make install


cd "$SRC_PATH"
curl -sLO "https://download.savannah.gnu.org/releases/freetype/freetype-$FREETYPE_VERSION.tar.xz"
tar xf "freetype-$FREETYPE_VERSION.tar.xz"
cd "freetype-$FREETYPE_VERSION"
./configure --prefix="$BUILD_PATH" --enable-shared --host=$ARCH2-darwin --with-harfbuzz=no
make
make install


cd "$SRC_PATH"
curl -sLO "https://github.com/harfbuzz/harfbuzz/releases/download/$HARFBUZZ_VERSION/harfbuzz-$HARFBUZZ_VERSION.tar.xz"
tar xf "harfbuzz-$HARFBUZZ_VERSION.tar.xz"
cd "harfbuzz-$HARFBUZZ_VERSION"


if [ "$ARCH" = "arm64" ]; then
  cat <<EOT > cross_file.txt
[host_machine]
system = 'darwin'
cpu_family = 'aarch64'
cpu = 'arm64'
endian = 'little'
[binaries]
pkgconfig = '/usr/local/bin/pkg-config'
EOT

  LDFLAGS="-arch arm64" meson build --prefix="$BUILD_PATH" -Dglib=disabled -Dgobject=disabled -Dcairo=disabled \
    -Dfreetype=enabled -Ddocs=disabled -Dtests=disabled -Dintrospection=disabled -Dbenchmark=disabled \
    --cross-file cross_file.txt -Dc_args="-arch arm64" -Dc_link_args="-arch arm64" -Dcpp_args="-arch arm64" \
    -Dcpp_link_args="-arch arm64"
	LDFLAGS="-arch arm64" meson compile -C build
else
  meson build --prefix="$BUILD_PATH" -Dglib=disabled -Dgobject=disabled -Dcairo=disabled -Dfreetype=enabled \
    -Ddocs=disabled -Dtests=disabled -Dintrospection=disabled -Dbenchmark=disabled
	meson compile -C build
fi
meson install -C build


cd "$SRC_PATH"
curl -sLO "https://github.com/libass/libass/releases/download/$LIBASS_VERSION/libass-$LIBASS_VERSION.tar.gz"
tar xzf "libass-$LIBASS_VERSION.tar.gz"
cd "libass-$LIBASS_VERSION"
./configure --prefix="$BUILD_PATH" --enable-shared --disable-fontconfig --host=$ARCH2-darwin
make
make install


cd "$SRC_PATH"
git clone https://bitbucket.org/multicoreware/x265_git.git --depth 1 --branch "Release_$X265_VERSION"
cd x265_git
# Backport patches to fix build on cmake >4.0.0
patch -p1 < "$base_dir/.ci/x265_b354c009a60bcd6d7fc04014e200a1ee9c45c167.patch"
patch -p1 < "$base_dir/.ci/x265_51ae8e922bcc4586ad4710812072289af91492a8.patch"
if [ "$ARCH" = "arm64" ]; then
  patch -p1 < "$base_dir/.ci/apple_arm64_x265.patch"
  cd source
  sed -i "" "s/^if(X265_LATEST_TAG)$/if(1)/g" CMakeLists.txt
  CXX= LDFLAGS="-arch arm64" cmake -G "Unix Makefiles" -DCMAKE_INSTALL_PREFIX="$BUILD_PATH" -DENABLE_SHARED:bool=on \
    -DCMAKE_OSX_ARCHITECTURES=arm64 -DCROSS_COMPILE_ARM64:bool=on -DCMAKE_HOST_SYSTEM_PROCESSOR=aarch64 \
    -DCMAKE_APPLE_SILICON_PROCESSOR=aarch64 .
  CXX= LDFLAGS="-arch arm64" make
else
  cd source
  sed -i "" "s/^if(X265_LATEST_TAG)$/if(1)/g" CMakeLists.txt
  CXX= cmake -G "Unix Makefiles" -DCMAKE_INSTALL_PREFIX="$BUILD_PATH" -DENABLE_SHARED:bool=on .
  CXX= make
fi
make install


cd "$SRC_PATH"
git clone --depth 1 --branch "v$FDK_VERSION" https://github.com/mstorsjo/fdk-aac.git
cd fdk-aac
git apply "$base_dir/.ci/fdk.patch"
cmake -DCMAKE_INSTALL_PREFIX="$BUILD_PATH" -DENABLE_SHARED:bool=on -DCMAKE_OSX_ARCHITECTURES="$ARCH" .
make
make install


cd "$SRC_PATH"
curl -LO "https://archive.mozilla.org/pub/opus/opus-$OPUS_VERSION.tar.gz"
tar xzvf "opus-$OPUS_VERSION.tar.gz"
cd "opus-$OPUS_VERSION"
./configure --prefix="$BUILD_PATH" --enable-shared --host=$ARCH2-darwin
make
make install


cd "$SRC_PATH"
curl -LO "http://downloads.xiph.org/releases/ogg/libogg-$LIBOGG_VERSION.tar.gz"
tar xzvf "libogg-$LIBOGG_VERSION.tar.gz"
cd "libogg-$LIBOGG_VERSION"
./configure --prefix="$BUILD_PATH" --enable-shared --host=$ARCH2-darwin
make
make install


cd "$SRC_PATH"
curl -LO "http://downloads.xiph.org/releases/vorbis/libvorbis-$LIBVORBIS_VERSION.tar.gz"
tar xzvf "libvorbis-$LIBVORBIS_VERSION.tar.gz"
cd "libvorbis-$LIBVORBIS_VERSION"
patch -p1 < "$base_dir/.ci/apple_libvorbis_cpusubtype.patch"
./configure --prefix="$BUILD_PATH" --with-ogg="$BUILD_PATH" --enable-shared --host=$ARCH2-darwin
make
make install


cd "$SRC_PATH";
curl -LO "http://downloads.xiph.org/releases/theora/libtheora-$LIBTHEORA_VERSION.tar.gz"
tar xzvf "libtheora-$LIBTHEORA_VERSION.tar.gz"
cd "libtheora-$LIBTHEORA_VERSION"
# https://bugs.gentoo.org/465450
sed -i "" 's/png_\(sizeof\)/\1/g' examples/png2theora.c
THEORA_ARCH=$ARCH2
./configure --prefix="$BUILD_PATH" --enable-shared --host=$THEORA_ARCH-apple-darwin
make
make install


cd "$SRC_PATH"
git clone --depth 1 --branch "v$LIBVPX_VERSION" https://chromium.googlesource.com/webm/libvpx.git
cd libvpx
sed -i.original -e 's/-march=armv8-a//g' build/make/configure.sh

if [ "$ARCH" = "x86_64" ]; then
    arg=("--as=yasm")
    LDFLAGS_VPX="$LDFLAGS"
else
    arg=("--target=$ARCH-darwin20-gcc")
    LDFLAGS_VPX="$LDFLAGS -arch arm64"
fi
CXX= CC= LDFLAGS="$LDFLAGS_VPX" ./configure --prefix="$BUILD_PATH" --disable-examples --enable-vp9-highbitdepth --enable-vp8 --enable-vp9 --enable-pic \
  --enable-postproc --enable-multithread "${arg[@]}" --enable-shared --disable-unit-tests
CXX= CC= make
make install


cd "$SRC_PATH"
curl -sLO "http://ffmpeg.org/releases/ffmpeg-$FFMPEG_VERSION.tar.bz2"
tar xjf "ffmpeg-$FFMPEG_VERSION.tar.bz2"
cd "ffmpeg-$FFMPEG_VERSION"

if [ "$ARCH" = "x86_64" ]; then
    arg=("--extra-ldflags=-L$BUILD_PATH/lib")
else
    arg=("--enable-cross-compile" "--arch=arm64" "--target-os=darwin" "--extra-ldflags=-L$BUILD_PATH/lib -arch arm64" \
      "--extra-objcflags=-arch arm64")
fi

./configure --prefix="$BUILD_PATH" --extra-cflags="$CFLAGS" --extra-cxxflags="$CXXFLAGS" --bindir="$BUILD_PATH/bin" \
  --enable-gpl --enable-libmp3lame --enable-libx264 --enable-libx265 --enable-libfdk_aac --enable-nonfree \
  --enable-libass --enable-libvorbis --enable-libtheora --enable-libfreetype --enable-libopus --enable-libvpx \
  --enable-openssl --enable-shared --pkg-config-flags="--static" --disable-libxcb --disable-libxcb-shm \
  --disable-libxcb-xfixes --disable-libxcb-shape --disable-xlib "${arg[@]}"
make
make install
make distclean


file "$BUILD_PATH"/lib/*
file "$BUILD_PATH"/bin/*
find "$BUILD_PATH"


================================================
FILE: .ci/dep_versions.sh
================================================
# FFMPEG_VERSION and SDL_VERSION are also set in the actions yaml

export FFMPEG_VERSION=6.0  # https://ffmpeg.org/releases/
export SDL_VERSION=2.26.4  # https://github.com/libsdl-org/SDL/releases
export SDL_MIXER_VERSION=2.6.3  # https://github.com/libsdl-org/SDL_mixer/releases
export OPENSSL_VERSION=3.0.8  # https://www.openssl.org/source
export YASM_VERSION=1.3.0  # http://www.tortall.net/projects/yasm/releases
export NASM_VERSION=2.16.01  # https://www.nasm.us/pub/nasm/releasebuilds/
export LAME_VERSION=3.100  # https://sourceforge.net/projects/lame/files/lame/
export FRIBIDI_VERSION=1.0.12  # https://github.com/fribidi/fribidi/releases
export LIBASS_VERSION=0.17.0  # https://github.com/libass/libass/releases
export X265_VERSION=3.5  # https://bitbucket.org/multicoreware/x265_git/downloads
export FDK_VERSION=2.0.2  # https://github.com/mstorsjo/fdk-aac
export OPUS_VERSION=1.3.1  # https://archive.mozilla.org/pub/opus/
export LIBOGG_VERSION=1.3.5  # http://downloads.xiph.org/releases/ogg/
export LIBTHEORA_VERSION=1.2.0  # https://ftp.osuosl.org/pub/xiph/releases/theora/
export LIBVORBIS_VERSION=1.3.7  # http://downloads.xiph.org/releases/vorbis
export LIBVPX_VERSION=1.14.1  # https://chromium.googlesource.com/webm/libvpx
export XZ_VERSION=5.4.1  # https://tukaani.org/xz/
export ZLIB_VERSION=1.2.13  # https://zlib.net/
export LIBPNG_VERSION=1.6.39  # https://github.com/glennrp/libpng/tags
export BROTLI_VERSION=1.0.9  # https://github.com/google/brotli/tags
export FREETYPE_VERSION=2.12.1  # https://download.savannah.gnu.org/releases/freetype/
export HARFBUZZ_VERSION=6.0.0  # https://github.com/harfbuzz/harfbuzz/releases


================================================
FILE: .ci/fdk.patch
================================================
diff --git a/Makefile.am b/Makefile.am
index 5b2c65b..728c72a 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -13,7 +13,7 @@ AM_CPPFLAGS = \
     -I$(top_srcdir)/libPCMutils/include
 
 AM_CXXFLAGS = -fno-exceptions -fno-rtti
-libfdk_aac_la_LINK = $(LINK) $(libfdk_aac_la_LDFLAGS)
+#libfdk_aac_la_LINK = $(LINK) $(libfdk_aac_la_LDFLAGS)
 # Mention a dummy pure C file to trigger generation of the $(LINK) variable
 nodist_EXTRA_libfdk_aac_la_SOURCES = dummy.c
 
diff --git a/configure.ac b/configure.ac
index 1485ff7..4bec7a7 100644
--- a/configure.ac
+++ b/configure.ac
@@ -19,7 +19,11 @@ AM_CONDITIONAL(EXAMPLE, test x$example = xyes)
 dnl Checks for programs.
 AC_PROG_CC
 AC_PROG_CXX
-LT_INIT
+
+AM_PROG_CC_C_O
+
+AC_PROG_LIBTOOL
+AC_SUBST(LIBTOOL_DEPS)
 
 AC_SEARCH_LIBS([sin], [m])
 

================================================
FILE: .ci/libmp3lame-symbols.patch
================================================
--- lame-3.100/include/libmp3lame.sym	2017-09-06 14:33:35.000000000 -0500
+++ lame-3.100/include/libmp3lame.sym	2017-10-22 16:18:44.708436200 -0500
@@ -1,5 +1,4 @@
 lame_init
-lame_init_old
 lame_set_num_samples
 lame_get_num_samples
 lame_set_in_samplerate
@@ -188,6 +187,7 @@ hip_decode_exit
 hip_set_errorf
 hip_set_debugf
 hip_set_msgf
+hip_set_pinfo
 hip_decode
 hip_decode_headers
 hip_decode1


================================================
FILE: .ci/merge_osx_deps.sh
================================================
#!/bin/bash

set -e -x

py_osx_ver=$(echo ${MACOSX_DEPLOYMENT_TARGET} | sed "s/\./_/g")
py_osx_ver_arm=$(echo ${MACOSX_DEPLOYMENT_TARGET_ARM} | sed "s/\./_/g")
for whl in *.whl; do
   if [[ "$whl" == *macosx_${py_osx_ver}_x86_64.whl ]]; then
       whl_base=$(echo "$whl" | rev | cut -c 24- | rev)
       if [[ -f "${whl_base}macosx_${py_osx_ver_arm}_arm64.whl" ]]; then
           delocate-merge "$whl" "${whl_base}macosx_${py_osx_ver_arm}_arm64.whl"
       fi
   fi
done


================================================
FILE: .ci/x265_51ae8e922bcc4586ad4710812072289af91492a8.patch
================================================
From 51ae8e922bcc4586ad4710812072289af91492a8 Mon Sep 17 00:00:00 2001
From: yaswanthsastry <yaswanth.sastry@multicorewareinc.com>
Date: Mon, 7 Apr 2025 11:27:36 +0530
Subject: [PATCH] Fix for CMake Build Errors in MacOS

---
 source/CMakeLists.txt | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
index 4f5b3ed82..7183fd3ce 100755
--- a/source/CMakeLists.txt
+++ b/source/CMakeLists.txt
@@ -6,18 +6,14 @@ if(NOT CMAKE_BUILD_TYPE)
         FORCE)
 endif()
 message(STATUS "cmake version ${CMAKE_VERSION}")
-if(POLICY CMP0025)
-    cmake_policy(SET CMP0025 NEW) # report Apple's Clang as just Clang
-endif()
+
 if(POLICY CMP0042)
     cmake_policy(SET CMP0042 NEW) # MACOSX_RPATH
 endif()
-if(POLICY CMP0054)
-    cmake_policy(SET CMP0054 NEW) # Only interpret if() arguments as variables or keywords when unquoted
-endif()
+
 
 project (x265)
-cmake_minimum_required (VERSION 2.8.8) # OBJECT libraries require 2.8.8
+cmake_minimum_required (VERSION 2.8.8...3.10) # OBJECT libraries require 2.8.8
 include(CheckIncludeFiles)
 include(CheckFunctionExists)
 include(CheckSymbolExists)
@@ -168,7 +164,7 @@ if(APPLE)
   add_definitions(-DMACOS=1)
 endif()
 
-if(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang")
+if(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang" OR ${CMAKE_CXX_COMPILER_ID} STREQUAL "AppleClang")
     set(CLANG 1)
 endif()
 if(${CMAKE_CXX_COMPILER_ID} STREQUAL "Intel")
@@ -740,6 +736,9 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY)
     if(ARM OR CROSS_COMPILE_ARM)
     # compile ARM arch asm files here
         enable_language(ASM)
+        if(APPLE)
+            set(ARM_ARGS ${ARM_ARGS} -arch ${CMAKE_OSX_ARCHITECTURES})
+        endif()
         foreach(ASM ${ARM_ASMS})
 			set(ASM_SRC ${CMAKE_CURRENT_SOURCE_DIR}/common/arm/${ASM})
             list(APPEND ASM_SRCS ${ASM_SRC})
-- 
2.49.0


================================================
FILE: .ci/x265_b354c009a60bcd6d7fc04014e200a1ee9c45c167.patch
================================================
From b354c009a60bcd6d7fc04014e200a1ee9c45c167 Mon Sep 17 00:00:00 2001
From: yaswanthsastry <yaswanth.sastry@multicorewareinc.com>
Date: Mon, 24 Feb 2025 17:07:03 +0530
Subject: [PATCH] Fix CMake build error with latest CMake 4.0 release

---
 source/CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
index 37dbe1a87..4f5b3ed82 100755
--- a/source/CMakeLists.txt
+++ b/source/CMakeLists.txt
@@ -7,13 +7,13 @@ if(NOT CMAKE_BUILD_TYPE)
 endif()
 message(STATUS "cmake version ${CMAKE_VERSION}")
 if(POLICY CMP0025)
-    cmake_policy(SET CMP0025 OLD) # report Apple's Clang as just Clang
+    cmake_policy(SET CMP0025 NEW) # report Apple's Clang as just Clang
 endif()
 if(POLICY CMP0042)
     cmake_policy(SET CMP0042 NEW) # MACOSX_RPATH
 endif()
 if(POLICY CMP0054)
-    cmake_policy(SET CMP0054 OLD) # Only interpret if() arguments as variables or keywords when unquoted
+    cmake_policy(SET CMP0054 NEW) # Only interpret if() arguments as variables or keywords when unquoted
 endif()
 
 project (x265)
-- 
2.49.0


================================================
FILE: .ci/yum_deps.sh
================================================
#!/bin/bash
set -e -x

yum -y update
yum install -y epel-release
yum -y install libass libass-devel autoconf automake bzip2 cmake freetype-devel gcc gcc-c++ git libtool make mercurial \
pkgconfig zlib-devel enca-devel fontconfig-devel openssl openssl-devel wget openjpeg openjpeg-devel \
libpng libpng-devel libtiff libtiff-devel libwebp libwebp-devel dbus-devel dbus ibus-devel ibus libsamplerate-devel \
libsamplerate libmodplug-devel libmodplug flac-devel flac \
libjpeg-turbo-devel libjpeg-turbo pulseaudio pulseaudio-libs-devel alsa-lib alsa-lib-devel ca-certificates perl-devel \
perl perl-IPC-Cmd patch


================================================
FILE: .github/workflows/pythonapp.yml
================================================
name: Python application

on: [push, pull_request]

env:
  FFMPEG_VERSION: "6.0"  # https://ffmpeg.org/releases/
  SDL_VERSION: "2.26.4"  # https://github.com/libsdl-org/SDL/releases
  SDL_MIXER_VERSION: "2.6.3"  # https://github.com/libsdl-org/SDL_mixer/releases
  USE_SDL2_MIXER: "1"
  MACOSX_DEPLOYMENT_TARGET: "10.13"
  MACOSX_DEPLOYMENT_TARGET_ARM: "11.0"

jobs:
  windows_wheels_tests:
    runs-on: windows-latest
    env:
      FF_BUILD_DIR: ~/ff_deps
      SDL_ROOT: ~/ff_deps/SDL2
      FFMPEG_ROOT: ~/ff_deps/ffmpeg
    strategy:
      matrix:
        python: [ '3.9', '3.10', '3.11', '3.12', '3.13']
    steps:
    - uses: actions/checkout@v4.2.2
    - name: Set up Python ${{ matrix.python }}
      uses: actions/setup-python@v5.4.0
      with:
        python-version: ${{ matrix.python }}
    - name: Get dependencies
      run: |
        mkdir "$env:FF_BUILD_DIR"
        cd "$env:FF_BUILD_DIR"

        curl -sLO "https://github.com/GyanD/codexffmpeg/releases/download/$env:FFMPEG_VERSION/ffmpeg-$env:FFMPEG_VERSION-full_build-shared.zip"
        7z x "ffmpeg-$env:FFMPEG_VERSION-full_build-shared.zip"
        ren "ffmpeg-$env:FFMPEG_VERSION-full_build-shared" ffmpeg

        curl -sLO "https://github.com/libsdl-org/SDL/releases/download/release-$env:SDL_VERSION/SDL2-devel-$env:SDL_VERSION-VC.zip"
        7z x "SDL2-devel-$env:SDL_VERSION-VC.zip"
        ren "SDL2-$env:SDL_VERSION" SDL2
        curl -sLO "https://github.com/libsdl-org/SDL_mixer/releases/download/release-$env:SDL_MIXER_VERSION/SDL2_mixer-devel-$env:SDL_MIXER_VERSION-VC.zip"
        7z x "SDL2_mixer-devel-$env:SDL_MIXER_VERSION-VC.zip"

        mkdir "SDL2\bin"
        mkdir "SDL2\include\SDL2"

        Copy-Item "SDL2\COPYING.txt" -destination "SDL2\bin"
        Copy-Item "SDL2\README-SDL.txt" -destination "SDL2\bin"

        Copy-Item "SDL2\lib\x64\*.dll" -destination "SDL2\bin" -Recurse -Force
        Copy-Item "SDL2\lib\x64\*.lib" -destination "SDL2\lib" -Recurse -Force

        Copy-Item "SDL2_mixer-$env:SDL_MIXER_VERSION\lib\x64\*.dll" -destination "SDL2\bin" -Recurse -Force
        Copy-Item "SDL2_mixer-$env:SDL_MIXER_VERSION\lib\x64\*.lib" -destination "SDL2\lib" -Recurse -Force
        Copy-Item "SDL2_mixer-$env:SDL_MIXER_VERSION\include\*" -destination "SDL2\include" -Recurse -Force

        Copy-Item "SDL2\include\*.h" -destination "SDL2\include\SDL2" -Recurse -Force

        echo "Dependency paths are:"
        ls $env:SDL_ROOT
        ls $env:FFMPEG_ROOT
    - name: Install pip deps
      run: |
        python -m pip install --upgrade pip virtualenv wheel setuptools cython~=3.0.11 pytest
    - name: Make sdist
      if: matrix.python == '3.13'
      run: python setup.py sdist --formats=gztar
    - name: Make wheel
      run: |
        $env:SDL_ROOT=(get-item $env:SDL_ROOT).FullName
        $env:FFMPEG_ROOT=(get-item $env:FFMPEG_ROOT).FullName
        python setup.py bdist_wheel
    - name: Upload wheel
      uses: actions/upload-artifact@v4.6.2
      with:
        name: py_wheel-win-${{ matrix.python }}
        path: dist
    - name: Upload to GitHub Release
      uses: softprops/action-gh-release@v2.2.1
      if: startsWith(github.ref, 'refs/tags/')
      env:
        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
      with:
        files: dist/*
    - name: Publish to PyPI
      if: startsWith(github.ref, 'refs/tags/')
      env:
        TWINE_USERNAME: "__token__"
        TWINE_PASSWORD: ${{ secrets.pypi_password }}
      run: |
        python -m pip install twine
        twine upload dist/*
    - name: Test
      run: |
        # see https://social.msdn.microsoft.com/Forums/security/en-US/0c13bd1a-388f-48cf-a190-7259d39a080f/ffmpeg-doesnt-work-from-inside-a-container-but-works-on-the-host?forum=windowscontainers
        # https://trac.ffmpeg.org/ticket/6875, https://stackoverflow.com/questions/46147012/opencv-import-failed-in-windows-container-on-windows-server-2016
        # and https://social.msdn.microsoft.com/Forums/en-US/a95032d2-c469-494a-b3f9-521b1389a6c9/cant-use-opencvpython-package-in-windows-container-windows-server-2016-standard?forum=windowscontainers
        # for the reason we need to manually copy some missing dlls to the PATH
        Invoke-WebRequest  "https://github.com/matham/ffpyplayer/releases/download/v4.1.0/ffmpeg_win_dll_container_deps.zip"  -OutFile "ffmpeg_win_dll_container_deps.zip"
        7z x "ffmpeg_win_dll_container_deps.zip"
        $env:PATH="$env:PATH;$env:GITHUB_WORKSPACE\ffmpeg_win_dll_container_deps\x64"
        ls "$env:GITHUB_WORKSPACE\ffmpeg_win_dll_container_deps\x64"

        $dist_path=(get-item dist).FullName
        $root=(get-item .).FullName
        $env:FFPYPLAYER_TEST_DIRS="$root\ffpyplayer\tests;$root\examples"
        cd ~/

        python -m pip install --no-index --find-links=$dist_path ffpyplayer
        $name = python -c "import ffpyplayer, os.path;print(os.path.dirname(ffpyplayer.__file__))"
        echo $name
        # powershell interprets writing to stderr as an error, so only raise error if the return code is none-zero
        try {
          pytest "$name\tests"
        } catch {
          if ($LastExitCode -ne 0) {
            throw $_
          } else {
            echo $_
          }
        }

  linux_test_src:
    strategy:
      matrix:
        include:
          - os: ubuntu-latest
          - os: ubuntu-24.04-arm
    runs-on: ${{ matrix.os }}
    needs: windows_wheels_tests
    steps:
      - uses: actions/checkout@v4.2.2
      - name: Set up Python 3.x
        uses: actions/setup-python@v5.4.0
        with:
          python-version: 3.x
      - uses: actions/download-artifact@v4.2.1
        with:
          pattern: py_wheel-*
          merge-multiple: true
          path: dist
      - name: Install
        run: |
          sudo apt update
          sudo apt install -y ffmpeg libavcodec-dev libavdevice-dev libavfilter-dev libavformat-dev
          sudo apt install -y libavutil-dev libswscale-dev libswresample-dev libpostproc-dev libsdl2-dev libsdl2-2.0-0
          sudo apt install -y libsdl2-mixer-2.0-0 libsdl2-mixer-dev python3-dev python3
          python3 -m pip install pytest

          root=`pwd`
          cd ~/
          python3 -m pip install `ls $root/dist/ffpyplayer*.tar.gz`
      - name: Test
        run: |
          root=`pwd`
          export FFPYPLAYER_TEST_DIRS="$root/ffpyplayer/tests:$root/examples"
          cd ~/

          name=`python3 -c "import ffpyplayer, os.path;print(os.path.dirname(ffpyplayer.__file__))"`
          echo $name
          pytest "$name/tests"

  linux_test_wheel:
    strategy:
      matrix:
        include:
          - os: ubuntu-latest
          - os: ubuntu-24.04-arm
    runs-on: ${{ matrix.os }}
    needs: linux_wheels
    steps:
      - uses: actions/checkout@v4.2.2
      - name: Set up Python 3.x
        uses: actions/setup-python@v5.4.0
        with:
          python-version: 3.x
      - uses: actions/download-artifact@v4.2.1
        with:
          pattern: py_wheel-*
          merge-multiple: true
          path: dist
      - name: Install
        run: |
          python3 -m pip install --upgrade pip pytest
          root=`pwd`
          cd ~/
          python3 -m pip install --no-index --find-links=$root/dist ffpyplayer
      - name: Test
        run: |
          root=`pwd`
          export FFPYPLAYER_TEST_DIRS="$root/ffpyplayer/tests:$root/examples"
          cd ~/

          name=`python3 -c "import ffpyplayer, os.path;print(os.path.dirname(ffpyplayer.__file__))"`
          echo $name
          pytest "$name/tests"

  linux_wheels:
    env:
      CIBW_ENVIRONMENT_LINUX: "USE_SDL2_MIXER=0 PKG_CONFIG_PATH=$HOME/ffmpeg_build/lib/pkgconfig:$HOME/ffmpeg_build/lib64/pkgconfig LD_LIBRARY_PATH=$HOME/ffmpeg_build/lib:$HOME/ffmpeg_build/lib64:$LD_LIBRARY_PATH"
      CIBW_BUILD_VERBOSITY: 3
      CIBW_BUILD: ${{ matrix.cibw_build }}
      CIBW_ARCHS: ${{ matrix.cibw_archs }}
      CIBW_BEFORE_ALL_LINUX: >
        cp -r `pwd`/ffmpeg_build $HOME/ffmpeg_build &&
        source .ci/yum_deps.sh
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        include:
          - os: ubuntu-latest
            cibw_archs: 'x86_64'
            cibw_build: 'cp39-manylinux_x86_64 cp310-manylinux_x86_64 cp311-manylinux_x86_64 cp312-manylinux_x86_64 cp313-manylinux_x86_64'
          - os: ubuntu-24.04-arm
            cibw_archs: 'aarch64'
            cibw_build: 'cp39-manylinux_aarch64 cp310-manylinux_aarch64 cp311-manylinux_aarch64 cp312-manylinux_aarch64 cp313-manylinux_aarch64'
    steps:
      - uses: actions/checkout@v4.2.2
      - name: Set up Python 3.x
        uses: actions/setup-python@v5.4.0
        with:
          python-version: 3.x
      - uses: actions/cache@v4.2.3
        id: deps-cache
        with:
          path: ffmpeg_build
          key: ${{ runner.os }}-${{ matrix.cibw_archs }}-deps-cache-${{ hashFiles('**/build-wheels.sh') }}-${{ hashFiles('**/yum_deps.sh') }}
      - name: Build dependencies
        if: ${{ steps.deps-cache.outputs.cache-hit != 'true' }}
        run: |
          mkdir dist
          docker run --rm -v `pwd`:/io:rw quay.io/pypa/manylinux2014_${{ matrix.cibw_archs }} /io/.ci/build-wheels.sh
      - name: Install cibuildwheel
        run: |
          python -m pip install cibuildwheel~=2.23.3
      - name: Make wheels
        run: |
          python -m cibuildwheel --output-dir dist
      - name: Upload wheel
        uses: actions/upload-artifact@v4.6.2
        with:
          name: py_wheel-linux-${{ matrix.os }}-${{ matrix.cibw_archs }}
          path: dist
      - name: Upload to GitHub Release
        uses: softprops/action-gh-release@v2.2.1
        if: startsWith(github.ref, 'refs/tags/')
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        with:
          files: dist/*
      - name: Publish to PyPI
        if: startsWith(github.ref, 'refs/tags/')
        env:
          TWINE_USERNAME: "__token__"
          TWINE_PASSWORD: ${{ secrets.pypi_password }}
        run: |
          python -m pip install twine
          twine upload dist/*

  osx_wheels_create:
    runs-on: macos-13
    env:
      USE_SDL2_MIXER: 0
      FFMPEG_BUILD_PATH: "ffmpeg_build"
      CIBW_BUILD_VERBOSITY: 3
      CIBW_BUILD: "cp39-* cp310-* cp311-* cp312-* cp313-*"
      CIBW_ARCHS_MACOS: ${{ matrix.arch }}
      CIBW_REPAIR_WHEEL_COMMAND_MACOS: >
        DYLD_FALLBACK_LIBRARY_PATH=$REPAIR_LIBRARY_PATH delocate-listdeps {wheel} &&
        DYLD_FALLBACK_LIBRARY_PATH=$REPAIR_LIBRARY_PATH delocate-wheel --require-archs {delocate_archs} -w {dest_dir} {wheel}
    strategy:
      matrix:
        arch: [ "x86_64", "arm64" ]
    steps:
      - uses: actions/checkout@v4.2.2
      - name: Set up Python
        uses: actions/setup-python@v5.4.0
        with:
          python-version: 3.x

      - name: Cache ffmpeg
        id: cache-ffmpeg
        uses: actions/cache@v4.2.3
        with:
          path: ~/${{ env.FFMPEG_BUILD_PATH }}_${{ matrix.arch }}
          key: ${{ runner.os }}-ffmpeg-${{ matrix.arch }}-${{ env.MACOSX_DEPLOYMENT_TARGET }}-${{ env.MACOSX_DEPLOYMENT_TARGET_ARM }}-${{ hashFiles('.ci/build_wheels_osx.sh') }}
      - name: Build FFmpeg
        if: steps.cache-ffmpeg.outputs.cache-hit != 'true'
        run: bash .ci/build_wheels_osx.sh "${{ matrix.arch }}"

      - name: Install cibuildwheel
        run: |
          python -m pip install cibuildwheel~=2.23.3
      - name: Build wheels
        run: |
          export REPAIR_LIBRARY_PATH="$HOME/${{ env.FFMPEG_BUILD_PATH }}_${{ matrix.arch }}/lib"
          export PKG_CONFIG_PATH="$HOME/${{ env.FFMPEG_BUILD_PATH }}_${{ matrix.arch }}/lib/pkgconfig:$PKG_CONFIG_PATH"
          python -m cibuildwheel --output-dir dist

      - name: Upload wheel
        uses: actions/upload-artifact@v4.6.2
        with:
          name: py_wheel-osx-${{ matrix.arch }}
          path: dist

  osx_wheels_fuse_test_upload:
    runs-on: macos-13
    needs: osx_wheels_create
    steps:
      - uses: actions/checkout@v4.2.2
      - name: Set up Python
        uses: actions/setup-python@v5.4.0
        with:
          python-version: 3.x

      - uses: actions/download-artifact@v4.2.1
        with:
          pattern: py_wheel-*
          merge-multiple: true
          path: dist

      - name: Fuse FFmpeg arm64/x86
        run: |
          pip install delocate
          cd dist
          bash ../.ci/merge_osx_deps.sh

      - name: Upload wheel
        uses: actions/upload-artifact@v4.6.2
        with:
          name: py_wheel-osx-fused
          path: dist

      - name: Upload to GitHub Release
        uses: softprops/action-gh-release@v2.2.1
        if: startsWith(github.ref, 'refs/tags/')
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        with:
          files: dist/*
      - name: Publish to PyPI
        if: startsWith(github.ref, 'refs/tags/')
        env:
          TWINE_USERNAME: "__token__"
          TWINE_PASSWORD: ${{ secrets.pypi_password }}
        run: |
          python -m pip install twine
          twine upload dist/*
      - name: Test
        run: |
          root=`pwd`
          export FFPYPLAYER_TEST_DIRS="$root/ffpyplayer/tests:$root/examples"
          cd ~/

          python -m pip install --upgrade pip virtualenv wheel setuptools pytest
          python -m pip install --no-index --find-links=$root/dist ffpyplayer
          name=`python -c "import ffpyplayer, os.path;print(os.path.dirname(ffpyplayer.__file__))"`
          echo $name
          pytest "$name/tests"

  docs:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4.2.2
      - name: Set up Python 3.x
        uses: actions/setup-python@v5.4.0
        with:
          python-version: 3.x
      - name: Install
        run: |
          sudo apt update
          sudo apt install ffmpeg libavcodec-dev libavdevice-dev libavfilter-dev libavformat-dev
          sudo apt install libavutil-dev libswscale-dev libswresample-dev libpostproc-dev libsdl2-dev libsdl2-2.0-0
          sudo apt install libsdl2-mixer-2.0-0 libsdl2-mixer-dev python3-dev

          python -m pip install --upgrade pip virtualenv wheel setuptools sphinx sphinx_rtd_theme
          python -m pip install -e .
      - name: Generate docs
        run: |
          cd doc
          make html
      - name: gh-pages upload
        if: github.event_name == 'push' && github.ref == 'refs/heads/master'
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        run: |
          cp -r doc/build/html ~/docs_temp

          git config --global user.email "moiein2000@gmail.com"
          git config --global user.name "Matthew Einhorn"
          git remote rm origin || true
          git remote add origin "https://x-access-token:${GITHUB_TOKEN}@github.com/matham/ffpyplayer.git"

          git checkout --orphan gh-pages
          cp -r .git ~/docs_git
          cd ..
          rm -rf ffpyplayer
          mkdir ffpyplayer
          cd ffpyplayer
          cp -r ~/docs_git .git
          cp -r ~/docs_temp/* .
          touch .nojekyll

          git add .
          git commit -a -m "Docs for git-$GITHUB_SHA"
          git push origin gh-pages -f


================================================
FILE: .gitignore
================================================
build/
*.pyd
*.pyc
ffpyplayer/*.c
ffpyplayer/*.html
ffpyplayer/player/*.c
ffpyplayer/includes/ffconfig.h
ffpyplayer/includes/ffconfig.pxi
*egg-info


================================================
FILE: COPYING
================================================
                   GNU LESSER GENERAL PUBLIC LICENSE
                       Version 3, 29 June 2007

 Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
 Everyone is permitted to copy and distribute verbatim copies
 of this license document, but changing it is not allowed.


  This version of the GNU Lesser General Public License incorporates
the terms and conditions of version 3 of the GNU General Public
License, supplemented by the additional permissions listed below.

  0. Additional Definitions.

  As used herein, "this License" refers to version 3 of the GNU Lesser
General Public License, and the "GNU GPL" refers to version 3 of the GNU
General Public License.

  "The Library" refers to a covered work governed by this License,
other than an Application or a Combined Work as defined below.

  An "Application" is any work that makes use of an interface provided
by the Library, but which is not otherwise based on the Library.
Defining a subclass of a class defined by the Library is deemed a mode
of using an interface provided by the Library.

  A "Combined Work" is a work produced by combining or linking an
Application with the Library.  The particular version of the Library
with which the Combined Work was made is also called the "Linked
Version".

  The "Minimal Corresponding Source" for a Combined Work means the
Corresponding Source for the Combined Work, excluding any source code
for portions of the Combined Work that, considered in isolation, are
based on the Application, and not on the Linked Version.

  The "Corresponding Application Code" for a Combined Work means the
object code and/or source code for the Application, including any data
and utility programs needed for reproducing the Combined Work from the
Application, but excluding the System Libraries of the Combined Work.

  1. Exception to Section 3 of the GNU GPL.

  You may convey a covered work under sections 3 and 4 of this License
without being bound by section 3 of the GNU GPL.

  2. Conveying Modified Versions.

  If you modify a copy of the Library, and, in your modifications, a
facility refers to a function or data to be supplied by an Application
that uses the facility (other than as an argument passed when the
facility is invoked), then you may convey a copy of the modified
version:

   a) under this License, provided that you make a good faith effort to
   ensure that, in the event an Application does not supply the
   function or data, the facility still operates, and performs
   whatever part of its purpose remains meaningful, or

   b) under the GNU GPL, with none of the additional permissions of
   this License applicable to that copy.

  3. Object Code Incorporating Material from Library Header Files.

  The object code form of an Application may incorporate material from
a header file that is part of the Library.  You may convey such object
code under terms of your choice, provided that, if the incorporated
material is not limited to numerical parameters, data structure
layouts and accessors, or small macros, inline functions and templates
(ten or fewer lines in length), you do both of the following:

   a) Give prominent notice with each copy of the object code that the
   Library is used in it and that the Library and its use are
   covered by this License.

   b) Accompany the object code with a copy of the GNU GPL and this license
   document.

  4. Combined Works.

  You may convey a Combined Work under terms of your choice that,
taken together, effectively do not restrict modification of the
portions of the Library contained in the Combined Work and reverse
engineering for debugging such modifications, if you also do each of
the following:

   a) Give prominent notice with each copy of the Combined Work that
   the Library is used in it and that the Library and its use are
   covered by this License.

   b) Accompany the Combined Work with a copy of the GNU GPL and this license
   document.

   c) For a Combined Work that displays copyright notices during
   execution, include the copyright notice for the Library among
   these notices, as well as a reference directing the user to the
   copies of the GNU GPL and this license document.

   d) Do one of the following:

       0) Convey the Minimal Corresponding Source under the terms of this
       License, and the Corresponding Application Code in a form
       suitable for, and under terms that permit, the user to
       recombine or relink the Application with a modified version of
       the Linked Version to produce a modified Combined Work, in the
       manner specified by section 6 of the GNU GPL for conveying
       Corresponding Source.

       1) Use a suitable shared library mechanism for linking with the
       Library.  A suitable mechanism is one that (a) uses at run time
       a copy of the Library already present on the user's computer
       system, and (b) will operate properly with a modified version
       of the Library that is interface-compatible with the Linked
       Version.

   e) Provide Installation Information, but only if you would otherwise
   be required to provide such information under section 6 of the
   GNU GPL, and only to the extent that such information is
   necessary to install and execute a modified version of the
   Combined Work produced by recombining or relinking the
   Application with a modified version of the Linked Version. (If
   you use option 4d0, the Installation Information must accompany
   the Minimal Corresponding Source and Corresponding Application
   Code. If you use option 4d1, you must provide the Installation
   Information in the manner specified by section 6 of the GNU GPL
   for conveying Corresponding Source.)

  5. Combined Libraries.

  You may place library facilities that are a work based on the
Library side by side in a single library together with other library
facilities that are not Applications and are not covered by this
License, and convey such a combined library under terms of your
choice, if you do both of the following:

   a) Accompany the combined library with a copy of the same work based
   on the Library, uncombined with any other library facilities,
   conveyed under the terms of this License.

   b) Give prominent notice with the combined library that part of it
   is a work based on the Library, and explaining where to find the
   accompanying uncombined form of the same work.

  6. Revised Versions of the GNU Lesser General Public License.

  The Free Software Foundation may publish revised and/or new versions
of the GNU Lesser General Public License from time to time. Such new
versions will be similar in spirit to the present version, but may
differ in detail to address new problems or concerns.

  Each version is given a distinguishing version number. If the
Library as you received it specifies that a certain numbered version
of the GNU Lesser General Public License "or any later version"
applies to it, you have the option of following the terms and
conditions either of that published version or of any later version
published by the Free Software Foundation. If the Library as you
received it does not specify a version number of the GNU Lesser
General Public License, you may choose any version of the GNU Lesser
General Public License ever published by the Free Software Foundation.

  If the Library as you received it specifies that a proxy can decide
whether future versions of the GNU Lesser General Public License shall
apply, that proxy's public statement of acceptance of any version is
permanent authorization for you to choose that version for the
Library.


================================================
FILE: Makefile
================================================
PYTHON = python

.PHONY: build force test html

build:
	$(PYTHON) setup.py build_ext --inplace

force:
	$(PYTHON) setup.py build_ext --inplace -f

test:
	$(PYTHON) -m pytest ffpyplayer/tests

html:
	@cd doc && make html


================================================
FILE: README.rst
================================================
FFPyPlayer is a python binding for the FFmpeg library for playing and writing
media files.

For more information: https://matham.github.io/ffpyplayer/index.html

To install: https://matham.github.io/ffpyplayer/installation.html

.. image:: https://travis-ci.org/matham/ffpyplayer.svg?branch=master
    :target: https://travis-ci.org/matham/ffpyplayer
    :alt: TravisCI status

.. image:: https://ci.appveyor.com/api/projects/status/nfl6tyiwks26ngyu/branch/master?svg=true
    :target: https://ci.appveyor.com/project/matham/ffpyplayer/branch/master
    :alt: Appveyor status

.. image:: https://img.shields.io/pypi/pyversions/ffpyplayer.svg
    :target: https://pypi.python.org/pypi/ffpyplayer/
    :alt: Supported Python versions

.. image:: https://img.shields.io/pypi/v/ffpyplayer.svg
    :target: https://pypi.python.org/pypi/ffpyplayer/
    :alt: Latest Version on PyPI

.. warning::

    Although the ffpyplayer source code is licensed under the LGPL, the ffpyplayer wheels
    for Windows and linux on PYPI are distributed under the GPL because the included FFmpeg binaries
    were compiled with GPL options.

    If you want to use it under the LGPL you need to compile FFmpeg yourself with the correct options.

    Similarly, the wheels bundle openssl for online camera support. However, releases are not made
    for every openssl release, so it is recommended that you compile ffpyplayer yourself if security
    is a issue.

Usage example
-------------

Playing a file:

.. code-block:: python

    >>> from ffpyplayer.player import MediaPlayer
    >>> import time

    >>> player = MediaPlayer(filename)
    >>> val = ''
    >>> while val != 'eof':
    ...     frame, val = player.get_frame()
    ...     if val != 'eof' and frame is not None:
    ...         img, t = frame
    ...         # display img

Writing a video file:

.. code-block:: python

    >>> from ffpyplayer.writer import MediaWriter
    >>> from ffpyplayer.pic import Image

    >>> w, h = 640, 480
    >>> # write at 5 fps.
    >>> out_opts = {'pix_fmt_in':'rgb24', 'width_in':w, 'height_in':h,
    ...     'codec':'rawvideo', 'frame_rate':(5, 1)}
    >>> writer = MediaWriter('output.avi', [out_opts])

    >>> # Construct image
    >>> size = w * h * 3
    >>> buf = bytearray([int(x * 255 / size) for x in range(size)])
    >>> img = Image(plane_buffers=[buf], pix_fmt='rgb24', size=(w, h))

    >>> for i in range(20):
    ...     writer.write_frame(img=img, pts=i / 5., stream=0)

Converting images:

.. code-block:: python

    >>> from ffpyplayer.pic import Image, SWScale
    >>> w, h = 500, 100
    >>> size = w * h * 3
    >>> buf = bytearray([int(x * 255 / size) for x in range(size)])

    >>> img = Image(plane_buffers=[buf], pix_fmt='rgb24', size=(w, h))
    >>> sws = SWScale(w, h, img.get_pixel_format(), ofmt='yuv420p')

    >>> img2 = sws.scale(img)
    >>> img2.get_pixel_format()
    'yuv420p'
    >>> planes = img2.to_bytearray()
    >>> [len(plane) for plane in planes]
    [50000, 12500, 12500, 0]


================================================
FILE: doc/Makefile
================================================
# Makefile for Sphinx documentation
#

# You can set these variables from the command line.
SPHINXOPTS    =
SPHINXBUILD   = sphinx-build
PAPER         =
BUILDDIR      = build

# User-friendly check for sphinx-build
ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
endif

# Internal variables.
PAPEROPT_a4     = -D latex_paper_size=a4
PAPEROPT_letter = -D latex_paper_size=letter
ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
# the i18n builder cannot share the environment and doctrees with the others
I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source

.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext

help:
	@echo "Please use \`make <target>' where <target> is one of"
	@echo "  html       to make standalone HTML files"
	@echo "  dirhtml    to make HTML files named index.html in directories"
	@echo "  singlehtml to make a single large HTML file"
	@echo "  pickle     to make pickle files"
	@echo "  json       to make JSON files"
	@echo "  htmlhelp   to make HTML files and a HTML help project"
	@echo "  qthelp     to make HTML files and a qthelp project"
	@echo "  devhelp    to make HTML files and a Devhelp project"
	@echo "  epub       to make an epub"
	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
	@echo "  text       to make text files"
	@echo "  man        to make manual pages"
	@echo "  texinfo    to make Texinfo files"
	@echo "  info       to make Texinfo files and run them through makeinfo"
	@echo "  gettext    to make PO message catalogs"
	@echo "  changes    to make an overview of all changed/added/deprecated items"
	@echo "  xml        to make Docutils-native XML files"
	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
	@echo "  linkcheck  to check all external links for integrity"
	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"

clean:
	rm -rf $(BUILDDIR)/*

html:
	@cd .. && python setup.py build_ext --inplace && cd doc
	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
	@echo
	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."

dirhtml:
	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
	@echo
	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."

singlehtml:
	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
	@echo
	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."

pickle:
	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
	@echo
	@echo "Build finished; now you can process the pickle files."

json:
	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
	@echo
	@echo "Build finished; now you can process the JSON files."

htmlhelp:
	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
	@echo
	@echo "Build finished; now you can run HTML Help Workshop with the" \
	      ".hhp project file in $(BUILDDIR)/htmlhelp."

qthelp:
	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
	@echo
	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/FFPyPlayer.qhcp"
	@echo "To view the help file:"
	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/FFPyPlayer.qhc"

devhelp:
	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
	@echo
	@echo "Build finished."
	@echo "To view the help file:"
	@echo "# mkdir -p $$HOME/.local/share/devhelp/FFPyPlayer"
	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/FFPyPlayer"
	@echo "# devhelp"

epub:
	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
	@echo
	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."

latex:
	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
	@echo
	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
	@echo "Run \`make' in that directory to run these through (pdf)latex" \
	      "(use \`make latexpdf' here to do that automatically)."

latexpdf:
	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
	@echo "Running LaTeX files through pdflatex..."
	$(MAKE) -C $(BUILDDIR)/latex all-pdf
	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."

latexpdfja:
	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
	@echo "Running LaTeX files through platex and dvipdfmx..."
	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."

text:
	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
	@echo
	@echo "Build finished. The text files are in $(BUILDDIR)/text."

man:
	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
	@echo
	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."

texinfo:
	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
	@echo
	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
	@echo "Run \`make' in that directory to run these through makeinfo" \
	      "(use \`make info' here to do that automatically)."

info:
	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
	@echo "Running Texinfo files through makeinfo..."
	make -C $(BUILDDIR)/texinfo info
	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."

gettext:
	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
	@echo
	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."

changes:
	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
	@echo
	@echo "The overview file is in $(BUILDDIR)/changes."

linkcheck:
	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
	@echo
	@echo "Link check complete; look for any errors in the above output " \
	      "or in $(BUILDDIR)/linkcheck/output.txt."

doctest:
	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
	@echo "Testing of doctests in the sources finished, look at the " \
	      "results in $(BUILDDIR)/doctest/output.txt."

xml:
	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
	@echo
	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."

pseudoxml:
	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
	@echo
	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."


================================================
FILE: doc/make.bat
================================================
@ECHO OFF

REM Command file for Sphinx documentation

if "%SPHINXBUILD%" == "" (
	set SPHINXBUILD=sphinx-build
)
set BUILDDIR=build
set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source
set I18NSPHINXOPTS=%SPHINXOPTS% source
if NOT "%PAPER%" == "" (
	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
)

if "%1" == "" goto help

if "%1" == "help" (
	:help
	echo.Please use `make ^<target^>` where ^<target^> is one of
	echo.  html       to make standalone HTML files
	echo.  dirhtml    to make HTML files named index.html in directories
	echo.  singlehtml to make a single large HTML file
	echo.  pickle     to make pickle files
	echo.  json       to make JSON files
	echo.  htmlhelp   to make HTML files and a HTML help project
	echo.  qthelp     to make HTML files and a qthelp project
	echo.  devhelp    to make HTML files and a Devhelp project
	echo.  epub       to make an epub
	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
	echo.  text       to make text files
	echo.  man        to make manual pages
	echo.  texinfo    to make Texinfo files
	echo.  gettext    to make PO message catalogs
	echo.  changes    to make an overview over all changed/added/deprecated items
	echo.  xml        to make Docutils-native XML files
	echo.  pseudoxml  to make pseudoxml-XML files for display purposes
	echo.  linkcheck  to check all external links for integrity
	echo.  doctest    to run all doctests embedded in the documentation if enabled
	goto end
)

if "%1" == "clean" (
	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
	del /q /s %BUILDDIR%\*
	goto end
)


%SPHINXBUILD% 2> nul
if errorlevel 9009 (
	echo.
	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
	echo.installed, then set the SPHINXBUILD environment variable to point
	echo.to the full path of the 'sphinx-build' executable. Alternatively you
	echo.may add the Sphinx directory to PATH.
	echo.
	echo.If you don't have Sphinx installed, grab it from
	echo.http://sphinx-doc.org/
	exit /b 1
)

if "%1" == "html" (
	cd .. & python setup.py build_ext --inplace & cd doc
	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
	goto end
)

if "%1" == "dirhtml" (
	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
	goto end
)

if "%1" == "singlehtml" (
	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
	goto end
)

if "%1" == "pickle" (
	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished; now you can process the pickle files.
	goto end
)

if "%1" == "json" (
	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished; now you can process the JSON files.
	goto end
)

if "%1" == "htmlhelp" (
	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished; now you can run HTML Help Workshop with the ^
.hhp project file in %BUILDDIR%/htmlhelp.
	goto end
)

if "%1" == "qthelp" (
	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished; now you can run "qcollectiongenerator" with the ^
.qhcp project file in %BUILDDIR%/qthelp, like this:
	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\FFPyPlayer.qhcp
	echo.To view the help file:
	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\FFPyPlayer.ghc
	goto end
)

if "%1" == "devhelp" (
	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished.
	goto end
)

if "%1" == "epub" (
	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished. The epub file is in %BUILDDIR%/epub.
	goto end
)

if "%1" == "latex" (
	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
	goto end
)

if "%1" == "latexpdf" (
	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
	cd %BUILDDIR%/latex
	make all-pdf
	cd %BUILDDIR%/..
	echo.
	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
	goto end
)

if "%1" == "latexpdfja" (
	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
	cd %BUILDDIR%/latex
	make all-pdf-ja
	cd %BUILDDIR%/..
	echo.
	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
	goto end
)

if "%1" == "text" (
	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished. The text files are in %BUILDDIR%/text.
	goto end
)

if "%1" == "man" (
	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished. The manual pages are in %BUILDDIR%/man.
	goto end
)

if "%1" == "texinfo" (
	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
	goto end
)

if "%1" == "gettext" (
	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
	goto end
)

if "%1" == "changes" (
	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
	if errorlevel 1 exit /b 1
	echo.
	echo.The overview file is in %BUILDDIR%/changes.
	goto end
)

if "%1" == "linkcheck" (
	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
	if errorlevel 1 exit /b 1
	echo.
	echo.Link check complete; look for any errors in the above output ^
or in %BUILDDIR%/linkcheck/output.txt.
	goto end
)

if "%1" == "doctest" (
	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
	if errorlevel 1 exit /b 1
	echo.
	echo.Testing of doctests in the sources finished, look at the ^
results in %BUILDDIR%/doctest/output.txt.
	goto end
)

if "%1" == "xml" (
	%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished. The XML files are in %BUILDDIR%/xml.
	goto end
)

if "%1" == "pseudoxml" (
	%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
	goto end
)

:end


================================================
FILE: doc/source/api.rst
================================================

####################
  The FFPyPlayer API
####################

.. toctree::
   :maxdepth: 1

   player.rst
   writer.rst
   pic.rst
   tools.rst


================================================
FILE: doc/source/conf.py
================================================
# Configuration file for the Sphinx documentation builder.
#
# This file only contains a selection of the most common options. For a full
# list see the documentation:
# http://www.sphinx-doc.org/en/master/config

# -- Path setup --------------------------------------------------------------

# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import os
import sphinx_rtd_theme


# -- Project information -----------------------------------------------------

project = 'FFPyPlayer'
copyright = '2013, Matthew Einhorn'
author = 'Matthew Einhorn'


# -- General configuration ---------------------------------------------------

# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
    'sphinx.ext.autodoc',
    'sphinx.ext.todo',
    'sphinx.ext.coverage',
    "sphinx_rtd_theme",
]

# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']

# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = []


# -- Options for HTML output -------------------------------------------------

# The theme to use for HTML and HTML Help pages.  See the documentation for
# a list of builtin themes.
#
html_theme = 'sphinx_rtd_theme'

# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']


================================================
FILE: doc/source/examples.rst
================================================
.. _examples:

********
Examples
********


Converting Image formats
------------------------

.. code-block:: python

    from ffpyplayer.pic import Image, SWScale
    w, h = 500, 100
    size = w * h * 3
    buf = bytearray([int(x * 255 / size) for x in range(size)])

    img = Image(plane_buffers=[buf], pix_fmt='rgb24', size=(w, h))
    sws = SWScale(w, h, img.get_pixel_format(), ofmt='yuv420p')

    img2 = sws.scale(img)
    img2.get_pixel_format()
    'yuv420p'
    planes = img2.to_bytearray()
    map(len, planes)
    [50000, 12500, 12500, 0]

.. _dshow-example:

Playing a webcam with DirectShow on windows
-------------------------------------------

One can use :meth:`~ffpyplayer.tools.list_dshow_devices` to get a list of the
devices and their option for playing. For example:

.. code-block:: python

    # see http://ffmpeg.org/ffmpeg-formats.html#Format-Options for rtbufsize
    # lets use the yuv420p, 320x240, 30fps
    # 27648000 = 320*240*3 at 30fps, for 4 seconds.
    # see http://ffmpeg.org/ffmpeg-devices.html#dshow for video_size, and framerate
    lib_opts = {'framerate':'30', 'video_size':'320x240',
    'pixel_format': 'yuv420p', 'rtbufsize':'27648000'}
    ff_opts = {'f':'dshow'}
    player = MediaPlayer('video=Logitech HD Webcam C525:audio=Microphone (HD Webcam C525)',
                         ff_opts=ff_opts, lib_opts=lib_opts)

    while 1:
        frame, val = player.get_frame()
        if val == 'eof':
            break
        elif frame is None:
            time.sleep(0.01)
        else:
            img, t = frame
            print val, t, img.get_pixel_format(), img.get_buffer_size()
            time.sleep(val)
    0.0 264107.429 rgb24 (230400, 0, 0, 0)
    0.0 264108.364 rgb24 (230400, 0, 0, 0)
    0.0790016651154 264108.628 rgb24 (230400, 0, 0, 0)
    0.135997533798 264108.764 rgb24 (230400, 0, 0, 0)
    0.274529457092 264108.897 rgb24 (230400, 0, 0, 0)
    0.272421836853 264109.028 rgb24 (230400, 0, 0, 0)
    0.132406949997 264109.164 rgb24 (230400, 0, 0, 0)
    ...

    # NOTE, by default the output was rgb24. To keep the output format the
    # same as the input, do ff_opts['out_fmt'] = 'yuv420p'


Saving an image to disk
-----------------------

.. code-block:: python

    from ffpyplayer.pic import Image, SWScale
    from ffpyplayer.tools import get_supported_pixfmts

    # create image
    w, h = 500, 100
    fmt = 'rgb24'
    size = w * h * 3
    buf = bytearray([int(x * 255 / size) for x in range(size)])
    img = Image(plane_buffers=[buf], pix_fmt=fmt, size=(w, h))
    codec = 'tiff'  # we'll encode it using the tiff codec

    # make sure the output codec supports the input pixel format type
    # otherwise, convert it to the best pixel format
    ofmt = get_supported_pixfmts(codec, fmt)[0]
    if ofmt != fmt:
        sws = SWScale(w, h, fmt, ofmt=ofmt)
        img = sws.scale(img)
        fmt = ofmt

    out_opts = {'pix_fmt_in': fmt, 'width_in': w, 'height_in': h,
                'frame_rate': (30, 1), 'codec': codec}
    writer = MediaWriter('myfile.tiff', [out_opts])
    writer.write_frame(img=img, pts=0, stream=0)
    writer.close()

    # to save the file as a compressed tiff using lzw
    writer = MediaWriter('myfile.tiff', [out_opts], lib_opts={'compression_algo': 'lzw'})
    writer.write_frame(img=img, pts=0, stream=0)
    writer.close()

Simple transcoding example
--------------------------

.. code-block:: python

    from ffpyplayer.player import MediaPlayer
    from ffpyplayer.writer import MediaWriter
    import time, weakref

    # only video
    ff_opts={'an':True, 'sync':'video'}
    player = MediaPlayer(filename, ff_opts=ff_opts)
    # wait for size to be initialized (todo: add timeout and check for quitting)
    while player.get_metadata()['src_vid_size'] == (0, 0):
        time.sleep(0.01)

    frame_size = player.get_metadata()['src_vid_size']
    # use the same size as the inputs
    out_opts = {'pix_fmt_in':'rgb24', 'width_in':frame_size[0],
                'height_in':frame_size[1], 'codec':'rawvideo',
                'frame_rate':(30, 1)}

    writer = MediaWriter(filename_out, [out_opts])
    while 1:
        frame, val = player.get_frame()
        if val == 'eof':
            break
        elif frame is None:
            time.sleep(0.01)
        else:
            img, t = frame
            writer.write_frame(img=img, pts=t, stream=0)

More complex transcoding example
--------------------------------

.. code-block:: python

    from ffpyplayer.player import MediaPlayer
    from ffpyplayer.tools import free_frame_ref
    from ffpyplayer.writer import MediaWriter
    import time, weakref

    # only video, output yuv420p frames
    ff_opts={'an':True, 'sync':'video', 'out_fmt':'yuv420p'}
    player = MediaPlayer(filename, ff_opts=ff_opts)
    # wait for size to be initialized
    while player.get_metadata()['src_vid_size'] == (0, 0):
        time.sleep(0.01)

    frame_size = player.get_metadata()['src_vid_size']
    # use the half the size for the output as the input
    out_opts = {'pix_fmt_in':'yuv420p', 'width_in':frame_size[0],
                'height_in':frame_size[1], 'codec':'rawvideo',
                'frame_rate':(30, 1), 'width_out':frame_size[0] / 2,
                'height_out':frame_size[1] / 2}

    writer = MediaWriter(filename_out, [out_opts])
    while 1:
        frame, val = player.get_frame()
        if val == 'eof':
            break
        elif frame is None:
            time.sleep(0.01)
        else:
            img, t = frame
            writer.write_frame(img=img, pts=t, stream=0)

.. _write-simple:

Writing video to file
---------------------

.. code-block:: python

    from ffpyplayer.writer import MediaWriter
    from ffpyplayer.pic import Image

    w, h = 640, 480
    # write at 5 fps.
    out_opts = {'pix_fmt_in':'rgb24', 'width_in':w, 'height_in':h, 'codec':'rawvideo',
                'frame_rate':(5, 1)}
    # write using rgb24 frames into a two stream rawvideo file where the output
    # is half the input size for both streams. Avi format will be used.
    writer = MediaWriter('output.avi', [out_opts] * 2, width_out=w/2,
                         height_out=h/2)

    # Construct images
    size = w * h * 3
    buf = bytearray([int(x * 255 / size) for x in range(size)])
    img = Image(plane_buffers=[buf], pix_fmt='rgb24', size=(w, h))

    buf = bytearray([int((size - x) * 255 / size) for x in range(size)])
    img2 = Image(plane_buffers=[buf], pix_fmt='rgb24', size=(w, h))

    for i in range(20):
        writer.write_frame(img=img, pts=i / 5., stream=0)  # stream 1
        writer.write_frame(img=img2, pts=i / 5., stream=1)  # stream 2

Or force an output format of avi, even though the filename is .mp4.:

.. code-block:: python

    writer = MediaWriter('output.mp4', [out_opts] * 2, fmt='avi',
                          width_out=w/2, height_out=h/2)

.. _write-h264:

Compressing video to h264
-------------------------

Or writing compressed h264 files (notice the file is now only 5KB, while
the above results in a 10MB file):

.. code-block:: python

    from ffpyplayer.writer import MediaWriter
    from ffpyplayer.tools import get_supported_pixfmts, get_supported_framerates
    from ffpyplayer.pic import Image

    # make sure the pixel format and rate are supported.
    print get_supported_pixfmts('libx264', 'rgb24')
    #['yuv420p', 'yuvj420p', 'yuv422p', 'yuvj422p', 'yuv444p', 'yuvj444p', 'nv12', 'nv16']
    print get_supported_framerates('libx264', (5, 1))
    #[]
    w, h = 640, 480
    out_opts = {'pix_fmt_in':'rgb24', 'width_in':w, 'height_in':h, 'codec':'libx264',
                'frame_rate':(5, 1)}

    # use the following libx264 compression options
    lib_opts = {'preset':'slow', 'crf':'22'}
    # set the following metadata (ffmpeg doesn't always support writing metadata)
    metadata = {'title':'Singing in the sun', 'author':'Rat', 'genre':'Animal sounds'}

    # write using yuv420p frames into a two stream h264 codec, mp4 file where the output
    # is half the input size for both streams.
    writer = MediaWriter('output.avi', [out_opts] * 2, fmt='mp4',
                         width_out=w/2, height_out=h/2, pix_fmt_out='yuv420p',
                         lib_opts=lib_opts, metadata=metadata)

    # Construct images
    size = w * h * 3
    buf = bytearray([int(x * 255 / size) for x in range(size)])
    img = Image(plane_buffers=[buf], pix_fmt='rgb24', size=(w, h))

    buf = bytearray([int((size - x) * 255 / size) for x in range(size)])
    img2 = Image(plane_buffers=[buf], pix_fmt='rgb24', size=(w, h))

    for i in range(20):
        writer.write_frame(img=img, pts=i / 5., stream=0)  # stream 1
        writer.write_frame(img=img2, pts=i / 5., stream=1)  # stream 2


================================================
FILE: doc/source/getting_started.rst
================================================
.. _started:

####################
  Getting Started
####################

.. toctree::
   :maxdepth: 2

   installation.rst
   examples.rst


================================================
FILE: doc/source/index.rst
================================================
.. FFPyPlayer documentation master file, created by
   sphinx-quickstart on Mon Dec 23 18:07:03 2013.
   You can adapt this file completely to your liking, but it should at least
   contain the root `toctree` directive.

Welcome to FFPyPlayer's documentation!
======================================

Contents:

.. toctree::
   :maxdepth: 2

   getting_started.rst
   api.rst

*  :ref:`genindex`
*  :ref:`modindex`
*  :ref:`search`


================================================
FILE: doc/source/installation.rst
================================================
.. _install:

************
Installation
************

Using binary wheels
-------------------

On windows 7+ (64 or 32 bit) and linux (64 bit), ffpyplayer wheels can be installed for
python 3.5+ using::

    pip install ffpyplayer

.. warning::

    Although the ffpyplayer source code is licensed under the LGPL, the ffpyplayer wheels
    on PYPI are distributed under the GPL because the  FFmpeg binaries
    are GPL'd. For LGPL builds you can compile FFmpeg yourself using LGPL options.

For other OSs or to compile with master see below.

Compiling
---------

Requirements
============

To compile ffpyplayer we need:

    * Cython (``pip install --upgrade cython~=3.0.11``).
    * A c compiler e.g. gcc or MSVC.
    * SDL2 or SDL1.2 (SDL1.2 is not recommended). See :ref:`compille` for how to get it.
    * SDL2_mixer If wanting to play multiple audio files simultaneously (``USE_SDL2_MIXER`` must be set). See :ref:`compille` for how to get it.
    * A recent (2.x+, has been tested with 2.8) FFmpeg compiled with ``--enable-shared``.
      See :ref:`compille` for how to get it.

Compiling ffpyplayer
====================

* Download or compile FFMpeg and SDL2 as shown below and set the appropriate environment variables as needed.
* Install Cython with e.g.::

      pip install --upgrade cython~=3.0.11

* You can select the FFmpeg libraries to be used by defining values for CONFIG_XXX.
  For example, CONFIG_AVFILTER=0 will disable inclusion of the FFmpeg avfilter libraries.
  See setup.py for all the available flags.
* To use SDL2_mixer, which is required when multiple audio files are to be played
  simultaneously (or even when they are open at the same time) environment variable ``USE_SDL2_MIXER``
  must be set to 1 when compiling. SDL2_mixer binaries and headers must also be available.
* Finally, run::

      pip install ffpyplayer

  Or to install master, do::

      pip install https://github.com/matham/ffpyplayer/archive/master.zip

  If you have a local directory with the ffpyplayer source code. To compile, you can run within that directory
  * ``make`` on linux, or
  * ``python setup.py build_ext --inplace``, or
  * ``pip install -e .`` to also properly install it.

You should now be able to import ffpyplayer with ``import ffpyplayer``.

.. _compille

SDL and Compiling FFmpeg
------------------------

To use ffpyplayer, the compiled FFmpeg and SDL shared libraries must be available. Following are
instructions for the various OSs.

Windows
=======

You can get pre-compiled FFmpeg libaries from http://ffmpeg.zeranoe.com/builds/. You need
both the shared (which contains the .a files and headers) and the dev (which contains the dlls)
downloads.

You can download SDL2 from https://www.libsdl.org/release/. 2.0.4 is the most recent
`version <https://www.libsdl.org/release/SDL2-devel-2.0.4-mingw.tar.gz>`_.

You can download SDL2_mixer from https://www.libsdl.org/projects/SDL_mixer/. 2.0.1 is the most recent
`version <https://www.libsdl.org/projects/SDL_mixer/release/SDL2_mixer-devel-2.0.1-mingw.tar.gz>`_.

* If there's a root directory containing a ``include`` and ``lib`` directory, each containing the header
  and compiled binaries, respectively, then ``FFMPEG_ROOT`` and ``SDL_ROOT`` can be set to these
  root directories for ffmpeg and sdl, respectively. Otherwise,
* ``SDL_LIB_DIR`` and ``FFMPEG_LIB_DIR`` should point to a folder which contains the
  SDL and FFmpeg compiled shared libraries (*.dll), respectively.
* ``FFMPEG_INCLUDE_DIR`` should point to a directory which contains the FFmpeg header files.
* ``SDL_INCLUDE_DIR`` should point to a directory containg the SDL headers. For SDL2,
  this directory contains a SDL2 named directory with all the headers.

In addition, directories containing the SDL and FFmpeg shared libraries (*.dll) need to be added to the PATH.

OSX
===

You can get both FFmpeg and SDL2 using brew. You can install them using::

    brew update
    brew install sdl2 sdl2_mixer ffmpeg

Otherwise, follow the Linux instructions.

Linux
======

Ubuntu 18.04
~~~~~~~~~~~~

On Ubuntu 18.04, the following command will install the python, ffmpeg, and sdl2 dependencies::

    sudo apt install ffmpeg libavcodec-dev libavdevice-dev libavfilter-dev libavformat-dev \
    libavutil-dev libswscale-dev libswresample-dev libpostproc-dev libsdl2-dev libsdl2-2.0-0 \
    libsdl2-mixer-2.0-0 libsdl2-mixer-dev python3-dev

Other Linux platforms
~~~~~~~~~~~~~~~~~~~~~~

FFMpeg
^^^^^^^

Follow the instructions at https://trac.ffmpeg.org/wiki/CompilationGuide/Ubuntu to compile FFMpeg.
However, those instructions detail how to build the static version. We need the shared
version. This means that ``--enable-shared`` and ``--extra-cflags="-fPIC"`` need to be added
when compiling FFmpeg **AND** its dependencies. And if present, ``--disable-shared`` or
``--enable-static`` must be removed.

Following that guide, ``export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HOME/ffmpeg_build/lib`` also needs
to be executed for the compiled binaries to be found.

SDL2
^^^^^

SDL2 can usually be gotten from the package manager, e.g. in Ubuntu 16.04 you can do the following::

    sudo apt-get update
    sudo apt-get -y install libsdl2-dev libsdl2-mixer-dev

Python Headers
^^^^^^^^^^^^^^^

The Python headers are required for compilation, on Ubuntu you can get it with::

    sudo apt-get install python3-dev

For either ffmpeg or sdl2 if manually compiled, ``PKG_CONFIG_PATH`` will need to be set to the path
containing the generated `*.pc` files and ``pkg-config`` will need to be available. *Otherwise,* if
installed to a non-standard location, the paths to the compiled shared libraries and headers will need to be set with

* If there's a root directory containing a ``include`` and ``lib`` directory, each containing the header
  and compiled binaries, respectively, then ``FFMPEG_ROOT`` and ``SDL_ROOT`` can be set to these
  root directories for ffmpeg and sdl, respectively. Otherwise,
* ``SDL_LIB_DIR`` and ``FFMPEG_LIB_DIR`` should point to a folder which contains the
  SDL and FFmpeg compiled shared libraries (*.so), respectively.
* ``FFMPEG_INCLUDE_DIR`` should point to a directory which contains the FFmpeg header files.
* ``SDL_INCLUDE_DIR`` should point to a directory containg the SDL headers. For SDL2,
  this directory contains a SDL2 named directory with all the headers.

In addition, directories containing the SDL and FFmpeg shared libraries (*.so) need to be added to the PATH.

You can find a complete minimal example of compiling ffpyplayer on Ubuntu
`here <https://github.com/matham/ffpyplayer/blob/master/.travis.yml#L20>`_.
A more complete example used to build the wheels is
`here <https://github.com/matham/ffpyplayer/blob/master/.travis/build-wheels.sh>`_.


================================================
FILE: doc/source/pic.rst
================================================
.. _pic-api:

******
Images
******

:mod:`ffpyplayer.pic`
=============================

.. automodule:: ffpyplayer.pic
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: doc/source/player.rst
================================================
.. _player-api:

******
Player
******

:mod:`ffpyplayer.player`
=============================

.. automodule:: ffpyplayer.player
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: doc/source/tools.rst
================================================
.. _tools-api:

*****
Tools
*****

:mod:`ffpyplayer.tools`
=============================

.. automodule:: ffpyplayer.tools
   :members:
   :undoc-members:
   :show-inheritance:

.. autoattribute:: ffpyplayer.tools.loglevels

    A dictionary with all the available ffmpeg log levels. The keys are the loglevels
    and the values are their ffmpeg values. The lower the value, the more important
    the log. Note, this is ooposite python where the higher the level the more important
    the log.

.. autoattribute:: ffpyplayer.tools.codecs_enc

    A list of all the codecs available for encoding video.

.. autoattribute:: ffpyplayer.tools.codecs_dec

    A list of all the codecs available for decoding video and audio.

.. autoattribute:: ffpyplayer.tools.pix_fmts

    A list of all the pixel formats available to ffmpeg.

.. autoattribute:: ffpyplayer.tools.formats_in

    A list of all the formats (e.g. file formats) available for reading.

.. autoattribute:: ffpyplayer.tools.formats_out

    A list of all the formats (e.g. file formats) available for writing.


================================================
FILE: doc/source/writer.rst
================================================
.. _writer-api:

******
Writer
******

:mod:`ffpyplayer.writer`
=============================

.. automodule:: ffpyplayer.writer
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: examples/test.py
================================================
'''
To run, please provide a filename on the command line when running the file.
'''


import kivy
from kivy.base import EventLoop
EventLoop.ensure_window()
from ffpyplayer.player import MediaPlayer
from ffpyplayer.tools import set_log_callback, loglevels
from kivy.clock import Clock
from kivy.graphics.texture import Texture
from kivy.app import App
from kivy.core.window import Window
from kivy.lang import Builder
from kivy.uix.relativelayout import RelativeLayout
from kivy.weakmethod import WeakMethod
import sys
import time
from threading import RLock, Thread
import logging
logging.root.setLevel(logging.DEBUG)


Builder.load_string('''
<Root>:
    id: rt
    image: img
    volume: volume
    seek: seek
    Image:
        id: img
        size_hint: 0.95, 0.95
        pos: 0.05 * rt.width, 0.05 * rt.height
        allow_stretch: False
        on_size: app.resize()
    ProgressBar:
        id: seek
        size_hint: 0.95, 0.05
        pos: 0.05 * rt.width, 0
        on_touch_down: app.touch_down(args[1])
        value: 0
    Slider:
        id: volume
        orientation: 'vertical'
        size_hint: 0.05, 1.
        pos: 0.0, 0.0
        step: 0.01
        value: 1.
        range: 0., 1.
        on_value: app.ffplayer and app.ffplayer.set_volume(self.value)
''')

class Root(RelativeLayout):
    pass

log_level = 'debug'
logger_func = {'quiet': logging.critical, 'panic': logging.critical,
               'fatal': logging.critical, 'error': logging.error,
               'warning': logging.warning, 'info': logging.info,
               'verbose': logging.debug, 'debug': logging.debug}


def log_callback(message, level):
    message = message.strip()
    if message:
        logger_func[level]('ffpyplayer: {}'.format(message))


class PlayerApp(App):

    def __init__(self, **kwargs):
        super(PlayerApp, self).__init__(**kwargs)
        self.texture = None
        self.size = (0, 0)
        self.next_frame = None
        self._done = False
        self._lock = RLock()
        self._thread = Thread(target=self._next_frame, name='Next frame')
        self._trigger = Clock.create_trigger(self.redraw)
        self._force_refresh = False

    def build(self):
        self.root = Root()
        return self.root

    def on_start(self):
        self.callback_ref = WeakMethod(self.callback)
        filename = sys.argv[1]
        logging.info('ffpyplayer: Playing file "{}"'.format(filename))
        # try ff_opts = {'vf':'edgedetect'} http://ffmpeg.org/ffmpeg-filters.html
        ff_opts = {}
        self.ffplayer = MediaPlayer(filename, callback=self.callback_ref,
                                    loglevel=log_level, ff_opts=ff_opts)
        self._thread.start()
        self.keyboard = Window.request_keyboard(None, self.root)
        self.keyboard.bind(on_key_down=self.on_keyboard_down)

    def resize(self):
        if self.ffplayer:
            w, h = self.ffplayer.get_metadata()['src_vid_size']
            if not h:
                return
            lock = self._lock
            lock.acquire()
            if self.root.image.width < self.root.image.height * w / float(h):
                self.ffplayer.set_size(-1, self.root.image.height)
            else:
                self.ffplayer.set_size(self.root.image.width, -1)
            lock.release()
            logging.debug('ffpyplayer: Resized video.')

    def update_pts(self, *args):
        if self.ffplayer:
            self.root.seek.value = self.ffplayer.get_pts()

    def on_keyboard_down(self, keyboard, keycode, text, modifiers):
        if not self.ffplayer:
            return False
        lock = self._lock
        ctrl = 'ctrl' in modifiers
        if keycode[1] == 'p' or keycode[1] == 'spacebar':
            logging.info('Toggled pause.')
            self.ffplayer.toggle_pause()
        elif keycode[1] == 'r':
            logging.debug('ffpyplayer: Forcing a refresh.')
            self._force_refresh = True
        elif keycode[1] == 'v':
            logging.debug('ffpyplayer: Changing video stream.')
            lock.acquire()
            self.ffplayer.request_channel('video',
                                          'close' if ctrl else 'cycle')
            lock.release()
            Clock.unschedule(self.update_pts)
            if ctrl:    # need to continue updating pts, since video is disabled.
                Clock.schedule_interval(self.update_pts, 0.05)
        elif keycode[1] == 'a':
            logging.debug('ffpyplayer: Changing audio stream.')
            lock.acquire()
            self.ffplayer.request_channel('audio',
                                          'close' if ctrl else 'cycle')
            lock.release()
        elif keycode[1] == 't':
            logging.debug('ffpyplayer: Changing subtitle stream.')
            lock.acquire()
            self.ffplayer.request_channel('subtitle',
                                          'close' if ctrl else 'cycle')
            lock.release()
        elif keycode[1] == 'right':
            logging.debug('ffpyplayer: Seeking forward by 10s.')
            self.ffplayer.seek(10.)
        elif keycode[1] == 'left':
            logging.debug('ffpyplayer: Seeking back by 10s.')
            self.ffplayer.seek(-10.)
        elif keycode[1] == 'up':
            logging.debug('ffpyplayer: Increasing volume.')
            self.ffplayer.set_volume(self.ffplayer.get_volume() + 0.01)
            self.root.volume.value = self.ffplayer.get_volume()
        elif keycode[1] == 'down':
            logging.debug('ffpyplayer: Decreasing volume.')
            self.ffplayer.set_volume(self.ffplayer.get_volume() - 0.01)
            self.root.volume.value = self.ffplayer.get_volume()
        return True

    def touch_down(self, touch):
        if self.root.seek.collide_point(*touch.pos) and self.ffplayer:
            pts = ((touch.pos[0] - self.root.volume.width) /
            self.root.seek.width * self.ffplayer.get_metadata()['duration'])
            logging.debug('ffpyplayer: Seeking to {}.'.format(pts))
            self.ffplayer.seek(pts, relative=False)
            self._force_refresh = True
            return True
        return False

    def callback(self, selector, value):
        if self.ffplayer is None:
            return
        if selector == 'quit':
            logging.debug('ffpyplayer: Quitting.')
            def close(*args):
                self._done = True
                self.ffplayer = None
            Clock.schedule_once(close, 0)
        # called from internal thread, it typically reads forward
        elif selector == 'display_sub':
            self.display_subtitle(*value)

    def _next_frame(self):
        ffplayer = self.ffplayer
        sleep = time.sleep
        trigger = self._trigger
        while not self._done:
            force = self._force_refresh
            if force:
                self._force_refresh = False
            frame, val = ffplayer.get_frame(force_refresh=force)

            if val == 'eof':
                logging.debug('ffpyplayer: Got eof.')
                sleep(1 / 30.)
            elif val == 'paused':
                logging.debug('ffpyplayer: Got paused.')
                sleep(1 / 30.)
            else:
                if frame:
                    logging.debug('ffpyplayer: Next frame: {}.'.format(val))
                    sleep(val)
                    self.next_frame = frame
                    trigger()
                else:
                    val = val if val else (1 / 30.)
                    logging.debug('ffpyplayer: Schedule next frame check: {}.'
                                  .format(val))
                    sleep(val)

    def redraw(self, dt=0, force_refresh=False):
        if not self.ffplayer:
            return
        if self.next_frame:
            img, pts = self.next_frame
            if img.get_size() != self.size or self.texture is None:
                self.root.image.canvas.remove_group(str(self)+'_display')
                self.texture = Texture.create(size=img.get_size(),
                                              colorfmt='rgb')
                # by adding 'vf':'vflip' to the player initialization ffmpeg
                # will do the flipping
                self.texture.flip_vertical()
                self.texture.add_reload_observer(self.reload_buffer)
                self.size = img.get_size()
                logging.debug('ffpyplayer: Creating new image texture of '
                              'size: {}.'.format(self.size))
            self.texture.blit_buffer(img.to_memoryview()[0])
            self.root.image.texture = None
            self.root.image.texture = self.texture
            self.root.seek.value = pts
            logging.debug('ffpyplayer: Blitted new frame with time: {}.'
                          .format(pts))

        if self.root.seek.value:
            self.root.seek.max = self.ffplayer.get_metadata()['duration']

    def display_subtitle(self, text, fmt, pts, t_start, t_end):
        pass # fmt is text (unformatted), or ass (formatted subs)

    def reload_buffer(self, *args):
        logging.debug('ffpyplayer: Reloading buffer.')
        frame = self.next_frame
        if not frame:
            return
        self.texture.blit_buffer(frame[0].to_memoryview()[0], colorfmt='rgb',
                                 bufferfmt='ubyte')

if __name__ == '__main__':
    set_log_callback(log_callback)
    a = PlayerApp()
    a.run()
    # because MediaPlayer runs non-daemon threads, when the main thread exists
    # it'll get stuck waiting for those threads to close, so we manually
    # have to delete these threads by deleting the MediaPlayer object.
    a._done = True
    a.ffplayer = None
    set_log_callback(None)


================================================
FILE: ffpyplayer/__init__.py
================================================
'''
FFPyPlayer library
==================
'''
import sys
import site
import os
from os.path import join
import platform

__all__ = ('dep_bins', )

__version__ = '4.5.4.dev0'
version = __version__

# the ffmpeg src git version tested and upto date with,
# and including this commit
_ffmpeg_git = 'c926140558c60786dc577b121df6b3c6b430bd98'
# excludes commits bdf9ed41fe4bdf4e254615b7333ab0feb1977e98,
# 1be3d8a0cb77f8d34c1f39b47bf5328fe10c82d7,
# f1907faab4023517af7d10d746b5684cccc5cfcc, and
# 0995e1f1b31f6e937a1b527407ed3e850f138098 because they require ffmpeg 5.1/5.2
# which is too new as of now

# also skipped all show modes and subtitle display related functionality commits

# TODO:
# * Implement CONFIG_SDL to be able to compile without needing SDL at all.
# * Currently, it only supports text subtitles - bitmap subtitles are ignored.
#   Unless one uses a filter to overlay the subtitle.
# * We can not yet visualize audio to video. Provide a filter chain link between
#   audio to video filters to acomplish this.

dep_bins = []
'''A list of paths to the binaries used by the library. It can be used during
packaging for including required binaries.

It is read only.
'''

for d in [sys.prefix, site.USER_BASE]:
    if d is None:
        continue
    for lib in ('ffmpeg', 'sdl'):
        p = join(d, 'share', 'ffpyplayer', lib, 'bin')
        if os.path.isdir(p):
            os.environ["PATH"] = p + os.pathsep + os.environ["PATH"]
            if hasattr(os, 'add_dll_directory'):
                os.add_dll_directory(p)
            dep_bins.append(p)

if 'SDL_AUDIODRIVER' not in os.environ and platform.system() == 'Windows':
    os.environ['SDL_AUDIODRIVER'] = 'DirectSound'


================================================
FILE: ffpyplayer/clib/misc.c
================================================

#include "misc.h"

#define FLAGS (o->type == AV_OPT_TYPE_FLAGS) ? AV_DICT_APPEND : 0
void print_all_libs_info(int flags, int level)
{
#if CONFIG_AVUTIL
    PRINT_LIB_INFO(avutil,   AVUTIL,   flags, level);
#endif
#if CONFIG_AVCODEC
    PRINT_LIB_INFO(avcodec,  AVCODEC,  flags, level);
#endif
#if CONFIG_AVFORMAT
    PRINT_LIB_INFO(avformat, AVFORMAT, flags, level);
#endif
#if CONFIG_AVDEVICE
    PRINT_LIB_INFO(avdevice, AVDEVICE, flags, level);
#endif
#if CONFIG_AVFILTER
    PRINT_LIB_INFO(avfilter, AVFILTER, flags, level);
#endif
#if CONFIG_SWSCALE
    PRINT_LIB_INFO(swscale,  SWSCALE,  flags, level);
#endif
#if CONFIG_SWRESAMPLE
    PRINT_LIB_INFO(swresample,SWRESAMPLE,  flags, level);
#endif
#if CONFIG_POSTPROC
    PRINT_LIB_INFO(postproc, POSTPROC, flags, level);
#endif
}

const AVOption *opt_find(const void * obj, const char *name, const char *unit,
    int opt_flags, int search_flags)
{
    const AVOption *o = av_opt_find(obj, name, unit, opt_flags, search_flags);
    if(o && !o->flags)
        return NULL;
    return o;
}

#define FLAGS (o->type == AV_OPT_TYPE_FLAGS) ? AV_DICT_APPEND : 0
int opt_default(const char *opt, const char *arg,
    struct SwsContext *sws_opts, AVDictionary **sws_dict, AVDictionary **swr_opts,
    AVDictionary **resample_opts, AVDictionary **format_opts, AVDictionary **codec_opts)
{
    const AVOption *o;
    int consumed = 0;
    char opt_stripped[128];
    const char *p;
    const AVClass *cc = avcodec_get_class();
    const AVClass *fc = avformat_get_class();
#if CONFIG_AVRESAMPLE
    const AVClass *rc = avresample_get_class();
#endif
#if CONFIG_SWRESAMPLE
    struct SwrContext *swr;
#endif
    const AVClass *sc;
    const AVClass *swr_class;
    int ret;
#if CONFIG_SWSCALE
    struct SwsContext *sws;
#endif


    if (!strcmp(opt, "debug") || !strcmp(opt, "fdebug"))
        av_log_set_level(AV_LOG_DEBUG);

    if (!(p = strchr(opt, ':')))
        p = opt + strlen(opt);
    av_strlcpy(opt_stripped, opt, FFMIN(sizeof(opt_stripped), p - opt + 1));

    if ((o = opt_find(&cc, opt_stripped, NULL, 0,
                         AV_OPT_SEARCH_CHILDREN | AV_OPT_SEARCH_FAKE_OBJ)) ||
        ((opt[0] == 'v' || opt[0] == 'a' || opt[0] == 's') &&
         (o = opt_find(&cc, opt + 1, NULL, 0, AV_OPT_SEARCH_FAKE_OBJ)))) {
        av_dict_set(codec_opts, opt, arg, FLAGS);
        consumed = 1;
    }
    if ((o = opt_find(&fc, opt, NULL, 0,
                         AV_OPT_SEARCH_CHILDREN | AV_OPT_SEARCH_FAKE_OBJ))) {
        av_dict_set(format_opts, opt, arg, FLAGS);
        if (consumed)
            av_log(NULL, AV_LOG_VERBOSE, "Routing option %s to both codec and muxer layer\n", opt);
        consumed = 1;
    }
#if CONFIG_SWSCALE
    sc = sws_get_class();
    if (sws_dict && !consumed && (o = opt_find(&sc, opt, NULL, 0,
                         AV_OPT_SEARCH_CHILDREN | AV_OPT_SEARCH_FAKE_OBJ))) {
        sws = sws_alloc_context();
        ret = av_opt_set(sws, opt, arg, 0);
        sws_freeContext(sws);
        if (ret < 0) {
            av_log(NULL, AV_LOG_ERROR, "Error setting option %s.\n", opt);
            return ret;
        }
        if (sws_opts){
            ret = av_opt_set(sws_opts, opt, arg, 0);
            if (ret < 0) {
                av_log(NULL, AV_LOG_ERROR, "Error setting option %s for sws_opts.\n", opt);
                return ret;
            }
        }

        av_dict_set(sws_dict, opt, arg, FLAGS);

        consumed = 1;
    }
#else
    if (!consumed && !strcmp(opt, "sws_flags")) {
        av_log(NULL, AV_LOG_WARNING, "Ignoring %s %s, due to disabled swscale\n", opt, arg);
        consumed = 1;
    }
#endif
#if CONFIG_SWRESAMPLE
    swr_class = swr_get_class();
    if (swr_opts && !consumed && (o=opt_find(&swr_class, opt, NULL, 0,
                                    AV_OPT_SEARCH_CHILDREN | AV_OPT_SEARCH_FAKE_OBJ))) {
        swr = swr_alloc();
        ret = av_opt_set(swr, opt, arg, 0);
        swr_free(&swr);
        if (ret < 0) {
            av_log(NULL, AV_LOG_ERROR, "Error setting option %s.\n", opt);
            return ret;
        }
        av_dict_set(swr_opts, opt, arg, FLAGS);
        consumed = 1;
    }
#endif
#if CONFIG_AVRESAMPLE
    if (resample_opts && (o=opt_find(&rc, opt, NULL, 0,
                       AV_OPT_SEARCH_CHILDREN | AV_OPT_SEARCH_FAKE_OBJ))) {
        av_dict_set(resample_opts, opt, arg, FLAGS);
        consumed = 1;
    }
#endif

    if (consumed)
        return 0;
    return AVERROR_OPTION_NOT_FOUND;
}

int get_plane_sizes(int size[4], int required_plane[4], enum AVPixelFormat pix_fmt,
    int height, const int linesizes[4])
{
    int i, total_size;
    memset(required_plane, 0, sizeof(required_plane[0])*4);

    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
    memset(size, 0, sizeof(size[0])*4);

    if (!height)
        return AVERROR(EINVAL);

    if (!desc || desc->flags & AV_PIX_FMT_FLAG_HWACCEL)
        return AVERROR(EINVAL);

    if (linesizes[0] > (INT_MAX - 1024) / height)
        return AVERROR(EINVAL);
    size[0] = linesizes[0] * height;

    if (desc->flags & AV_PIX_FMT_FLAG_PAL) {
        size[1] = 256 * 4;
        required_plane[0] = 1;
        return size[0] + size[1];
    }

    for (i = 0; i < 4; i++)
        required_plane[desc->comp[i].plane] = 1;

    total_size = size[0];
    for (i = 1; i < 4 && required_plane[i]; i++) {
        int h, s = (i == 1 || i == 2) ? desc->log2_chroma_h : 0;
        h = (height + (1 << s) - 1) >> s;
        if (linesizes[i] > INT_MAX / h)
            return AVERROR(EINVAL);
        size[i] = h * linesizes[i];
        if (total_size > INT_MAX - size[i])
            return AVERROR(EINVAL);
        total_size += size[i];
    }

    return total_size;
}


================================================
FILE: ffpyplayer/clib/misc.h
================================================

#ifndef _FFINFO_H
#define _FFINFO_H

#include "../includes/ffconfig.h"
#include "libavcodec/avcodec.h"
#include "libavfilter/avfilter.h"
#include "libavformat/avformat.h"
#include "libavdevice/avdevice.h"
#include "libswscale/swscale.h"
#include "libswresample/swresample.h"
#include "libavutil/opt.h"
#include "libavutil/avstring.h"
#include "libavutil/pixdesc.h"


#if CONFIG_POSTPROC
#include "libpostproc/postprocess.h"
#endif

#ifndef AV_LOG_TRACE
#define AV_LOG_TRACE    56
#endif


#define INDENT        1
#define SHOW_VERSION  2
#define SHOW_CONFIG   4

#define PRINT_LIB_INFO(libname, LIBNAME, flags, level)                  \
    if (CONFIG_##LIBNAME) {                                             \
        const char *indent = flags & INDENT? "  " : "";                 \
        if (flags & SHOW_VERSION) {                                     \
            unsigned int version = libname##_version();                 \
            av_log(NULL, level,                                         \
                   "%slib%-11s %2d.%3d.%3d / %2d.%3d.%3d\n",            \
                   indent, #libname,                                    \
                   LIB##LIBNAME##_VERSION_MAJOR,                        \
                   LIB##LIBNAME##_VERSION_MINOR,                        \
                   LIB##LIBNAME##_VERSION_MICRO,                        \
                   version >> 16, version >> 8 & 0xff, version & 0xff); \
        }                                                               \
        if (flags & SHOW_CONFIG) {                                      \
            const char *cfg = libname##_configuration();                \
            av_log(NULL, level, "%s%-11s configuration: %s\n",   	   	\
                    indent, #libname, cfg);                         	\
        }                                                               \
    }

void print_all_libs_info(int flags, int level);

const AVOption *opt_find(const void * obj, const char *name, const char *unit,
                            int opt_flags, int search_flags);

int opt_default(const char *opt, const char *arg,
    struct SwsContext *sws_opts, AVDictionary **sws_dict, AVDictionary **swr_opts,
    AVDictionary **resample_opts, AVDictionary **format_opts, AVDictionary **codec_opts);

int get_plane_sizes(int size[4], int required_plane[4], enum AVPixelFormat pix_fmt,
    int height, const int linesizes[4]);

#endif


================================================
FILE: ffpyplayer/includes/ff_consts.pxi
================================================
include "ffconfig.pxi"


''' Minimum SDL audio buffer size, in samples.. '''
DEF SDL_AUDIO_MIN_BUFFER_SIZE = 512
DEF AUDIO_MIN_BUFFER_SIZE = SDL_AUDIO_MIN_BUFFER_SIZE
' Calculate actual buffer size keeping in mind not cause too frequent audio callbacks. '
DEF AUDIO_MAX_CALLBACKS_PER_SEC = 30

DEF MAX_QUEUE_SIZE = (15 * 1024 * 1024)
DEF MIN_FRAMES = 25
DEF EXTERNAL_CLOCK_MIN_FRAMES = 2
DEF EXTERNAL_CLOCK_MAX_FRAMES = 10

'no AV sync correction is done if below the minimum AV sync threshold '
DEF AV_SYNC_THRESHOLD_MIN = 0.04
'AV sync correction is done if above the maximum AV sync threshold '
DEF AV_SYNC_THRESHOLD_MAX = 0.1
'If a frame duration is longer than this, it will not be duplicated to compensate AV sync'
DEF AV_SYNC_FRAMEDUP_THRESHOLD = 0.1
'no AV correction is done if too big error'
DEF AV_NOSYNC_THRESHOLD = 10.0

'maximum audio speed change to get correct sync'
DEF SAMPLE_CORRECTION_PERCENT_MAX = 10

'external clock speed adjustment constants for realtime sources based on buffer fullness'
DEF EXTERNAL_CLOCK_SPEED_MIN = 0.900
DEF EXTERNAL_CLOCK_SPEED_MAX = 1.010
DEF EXTERNAL_CLOCK_SPEED_STEP = 0.001

'we use about AUDIO_DIFF_AVG_NB A-V differences to make the average'
DEF AUDIO_DIFF_AVG_NB = 20

'polls for possible required screen refresh at least this often, should be less than 1/fps'
DEF REFRESH_RATE = 0.0167

'''NOTE: the size must be big enough to compensate the hardware audio buffersize size
TODO: We assume that a decoded and resampled frame fits into this buffer'''
DEF SAMPLE_ARRAY_SIZE = (8 * 65536)

DEF VIDEO_PICTURE_QUEUE_SIZE = 3
DEF SUBPICTURE_QUEUE_SIZE = 16
DEF SAMPLE_QUEUE_SIZE = 9
DEF FRAME_QUEUE_SIZE = max(SAMPLE_QUEUE_SIZE, max(VIDEO_PICTURE_QUEUE_SIZE, SUBPICTURE_QUEUE_SIZE))


DEF FF_LOCK_CREATE = 0
DEF FF_LOCK_OBTAIN = 1
DEF FF_LOCK_RELEASE = 2
DEF FF_LOCK_DESTROY = 3


================================================
FILE: ffpyplayer/includes/ffmpeg.pxi
================================================

from libc.stdint cimport int64_t, uint64_t, int32_t, uint32_t, uint16_t,\
int16_t, uint8_t, int8_t, uintptr_t

cdef extern from "stdarg.h":
    ctypedef struct va_list:
        pass

ctypedef int (*lockmgr_func)(void **, int)
ctypedef int (*int_void_func)(void *) except? 1

ctypedef float FFTSample

include "ff_consts.pxi"
include "sdl.pxi"


cdef:
    extern from * nogil:
        struct AVPacket:
            uint8_t *data
            int64_t pos
            int64_t pts
            int64_t dts
            int size
            int stream_index
            int flags
            int64_t duration
        enum AVMediaType:
            AVMEDIA_TYPE_UNKNOWN = -1,  #///< Usually treated as AVMEDIA_TYPE_DATA
            AVMEDIA_TYPE_VIDEO,
            AVMEDIA_TYPE_AUDIO,
            AVMEDIA_TYPE_DATA,          #///< Opaque data information usually continuous
            AVMEDIA_TYPE_SUBTITLE,
            AVMEDIA_TYPE_ATTACHMENT,    #///< Opaque data information usually sparse
            AVMEDIA_TYPE_NB,
        struct AVBufferRef:
            pass
        int av_compare_ts(int64_t, AVRational, int64_t, AVRational)
        const char* av_get_media_type_string(AVMediaType)
        const int av_log2(unsigned int)

    extern from "libavformat/avio.h" nogil:
        int AVIO_FLAG_WRITE
        int avio_check(const char *, int)
        int avio_open2(AVIOContext **, const char *, int, const AVIOInterruptCB *,
                       AVDictionary **)
        int avio_close(AVIOContext *)
        struct AVIOContext:
            int error
            int eof_reached
        struct AVIOInterruptCB:
            int (*callback)(void*)
            void *opaque
        int avio_feof(AVIOContext *)
        int64_t avio_tell(AVIOContext *)

    extern from "libavutil/fifo.h" nogil:
        struct AVFifoBuffer:
            uint8_t *buffer
        int av_fifo_space(const AVFifoBuffer *)
        int av_fifo_grow(AVFifoBuffer *, unsigned int)
        int av_fifo_generic_write(AVFifoBuffer *, void *, int, int (*)(void*, void*, int))
        AVFifoBuffer *av_fifo_alloc(unsigned int)
        int av_fifo_size(const AVFifoBuffer *)
        int av_fifo_generic_read(AVFifoBuffer *, void *, int, void (*)(void*, void*, int))
        void av_fifo_freep(AVFifoBuffer **)

    extern from "libavutil/eval.h" nogil:
        double av_strtod(const char *, char **)

    extern from "libavutil/avstring.h" nogil:
         size_t av_strlcpy(char *, const char *, size_t)
         size_t av_strlcatf(char *, size_t, const char *, ...)
         char *av_asprintf(const char *, ...)

    extern from "libavutil/display.h" nogil:
        double av_display_rotation_get (const int32_t [])

    extern from "libavutil/mathematics.h" nogil:
        int64_t av_rescale_q(int64_t, AVRational, AVRational)

    extern from "libavutil/pixdesc.h" nogil:
        struct AVPixFmtDescriptor:
            const char *name
            uint8_t nb_components
        const char *av_get_pix_fmt_name(AVPixelFormat)
        AVPixelFormat av_get_pix_fmt(const char *)
        const AVPixFmtDescriptor *av_pix_fmt_desc_next(const AVPixFmtDescriptor *)
        AVPixelFormat av_pix_fmt_desc_get_id(const AVPixFmtDescriptor *)
        const AVPixFmtDescriptor *av_pix_fmt_desc_get(AVPixelFormat)

    extern from "libavutil/imgutils.h" nogil:
        int av_image_alloc(uint8_t **, int *, int, int, AVPixelFormat, int)
        int av_image_fill_linesizes(int *, AVPixelFormat, int)
        void av_image_copy(uint8_t **, int *, const uint8_t **, const int *,
                           AVPixelFormat, int, int)
        int av_image_fill_pointers(uint8_t **, AVPixelFormat, int, uint8_t *,
                                   const int *linesizes)
        int av_image_fill_arrays(uint8_t **, int *, const uint8_t *,
                                 AVPixelFormat, int, int, int)

    extern from "libavutil/dict.h" nogil:
        int AV_DICT_MATCH_CASE
        int AV_DICT_DONT_OVERWRITE
        int AV_DICT_IGNORE_SUFFIX
        int AV_DICT_DONT_STRDUP_VAL
        struct AVDictionaryEntry:
            char *key
            char *value
        void av_dict_free(AVDictionary **)
        AVDictionaryEntry * av_dict_get(AVDictionary *, const char *,
                                        const AVDictionaryEntry *, int)

    extern from "libavutil/samplefmt.h" nogil:
        enum AVSampleFormat:
            AV_SAMPLE_FMT_S16,
            AV_SAMPLE_FMT_NONE,
        AVSampleFormat av_get_packed_sample_fmt(AVSampleFormat)
        const char *av_get_sample_fmt_name(AVSampleFormat)
        int av_samples_get_buffer_size(int *, int, int, AVSampleFormat, int)
        int av_get_bytes_per_sample(AVSampleFormat)

    extern from "libavutil/time.h" nogil:
        int av_usleep(unsigned)
        int64_t av_gettime_relative()

    extern from "libavutil/cpu.h" nogil:
        int av_get_cpu_flags()
        int av_parse_cpu_caps(unsigned *, const char *)
        void av_force_cpu_flags(int)

    extern from * nogil:
        void av_free(void *)
        void av_freep(void *)
        void *av_malloc(size_t)
        void *av_realloc_array(void *, size_t, size_t)
        char *av_strdup(const char *)
        int av_get_channel_layout_nb_channels(uint64_t)
        void av_get_channel_layout_string(char *, int, int, uint64_t)
        int64_t av_get_default_channel_layout(int)
        int av_clip(int a, int amin, int amax)
        int64_t AV_CH_LAYOUT_STEREO_DOWNMIX

        struct AVRational:
            int num #///< numerator
            int den #///< denominator
        double av_q2d(AVRational)
        int av_find_nearest_q_idx(AVRational, const AVRational*)

        int AV_LOG_QUIET
        int AV_LOG_PANIC
        int AV_LOG_FATAL
        int AV_LOG_ERROR
        int AV_LOG_WARNING
        int AV_LOG_INFO
        int AV_LOG_VERBOSE
        int AV_LOG_DEBUG
        int AV_LOG_TRACE
        int AV_LOG_SKIP_REPEATED
        void av_log(void *, int, const char *, ...)
        void av_log_set_flags(int)
        void av_log_set_level(int)
        void av_log_set_callback(void (*)(void*, int, const char*, va_list))
        void av_log_default_callback(void*, int, const char*, va_list)
        void av_log_format_line(void *, int, const char *, va_list, char *, int, int *)

        enum AVPixelFormat:
            AV_PIX_FMT_YUV420P,
            AV_PIX_FMT_RGB24,
            AV_PIX_FMT_NONE,

        int64_t AV_NOPTS_VALUE

        struct AVDictionary:
            pass
        int av_dict_set(AVDictionary **, const char *, const char *, int)
        int av_dict_set_int(AVDictionary **, const char *, int64_t, int)

        void av_max_alloc(size_t)

        void *av_mallocz(size_t)

        int AVERROR(int)
        int AVUNERROR(int)

        enum AVPictureType:
            AV_PICTURE_TYPE_NONE
        char av_get_picture_type_char(AVPictureType)
        void av_frame_unref(AVFrame *)
        void av_frame_free(AVFrame **)
        void av_frame_move_ref(AVFrame *, AVFrame *)
        AVFrame* av_frame_clone(const AVFrame *)
        int av_frame_copy_props(AVFrame *, const AVFrame *)
        int av_frame_get_buffer(AVFrame *, int)
        unsigned av_int_list_length_for_size(unsigned, const void *, uint64_t)
        int av_opt_set_bin(void *, const char *, const uint8_t *, int, int)

        AVFrame *av_frame_alloc()
        int64_t av_frame_get_pkt_pos(const AVFrame *)
        int av_frame_get_channels(const AVFrame *)

        int AVERROR_EOF
        int AVERROR_OPTION_NOT_FOUND
        int av_strerror(int, char *, size_t)

        void *av_x_if_null(const void *p, const void *x)

        int64_t AV_TIME_BASE
        AVRational AV_TIME_BASE_Q

        struct AVClass:
            pass

    extern from "libavformat/avformat.h" nogil:
        int AVSEEK_FLAG_BYTE
        int AVFMT_NOBINSEARCH
        int AVFMT_NOGENSEARCH
        int AVFMT_NO_BYTE_SEEK
        int AVFMT_FLAG_GENPTS
        int AVFMT_TS_DISCONT
        int AV_DISPOSITION_ATTACHED_PIC
        int AVFMT_GLOBALHEADER
        int AVFMT_VARIABLE_FPS
        int AVFMT_NOTIMESTAMPS
        int AVFMT_NOFILE
        int AVFMT_RAWPICTURE
        struct AVChapter:
            int id
            AVRational time_base
            int64_t start
            int64_t end
            AVDictionary *metadata
        struct AVInputFormat:
            int (*read_seek)(AVFormatContext *, int, int64_t, int)
            int (*get_device_list)(AVFormatContext *, AVDeviceInfoList *)
            int (*create_device_capabilities)(AVFormatContext *, AVDeviceCapabilitiesQuery *)
            int flags
            const char *name
            const char *long_name
            const char *extensions
        struct AVCodecTag:
            pass
        struct AVOutputFormat:
            const char *name
            const char *long_name
            const char *extensions
            int flags
            AVCodecID video_codec
            const AVCodecTag* const* codec_tag
        struct AVFormatContext:
            AVInputFormat *iformat
            AVOutputFormat *oformat
            AVStream **streams
            AVProgram **programs
            unsigned int nb_streams
            unsigned int nb_programs
            AVIOContext *pb
            AVDictionary *metadata
            AVIOInterruptCB interrupt_callback
            int flags
            int64_t start_time
            int bit_rate
            int64_t duration
            unsigned int nb_chapters
            AVChapter **chapters
            char *url
        struct AVStream:
            int index
            AVRational time_base
            int64_t start_time
            AVDiscard discard
            AVPacket attached_pic
            int disposition
            AVRational avg_frame_rate
            AVRational r_frame_rate
            AVDictionary *metadata
            AVCodecParameters *codecpar
        struct AVProgram:
            int id
            unsigned int nb_stream_indexes
            unsigned int *stream_index
        enum  AVPacketSideDataType:
            AV_PKT_DATA_DISPLAYMATRIX
        void av_format_inject_global_side_data(AVFormatContext *)
        int avformat_network_init()
        int avformat_network_deinit()
        AVInputFormat *av_find_input_format(const char *)
        AVRational av_guess_sample_aspect_ratio(AVFormatContext *, AVStream *, AVFrame *)
        AVRational av_guess_frame_rate(AVFormatContext *, AVStream *, AVFrame *)
        int avformat_match_stream_specifier(AVFormatContext *, AVStream *,
                                            const char *)
        AVFormatContext *avformat_alloc_context()
        int avformat_open_input(AVFormatContext **, const char *, AVInputFormat *, AVDictionary **)
        void avformat_close_input(AVFormatContext **)
        int avformat_find_stream_info(AVFormatContext *, AVDictionary **)
        int avformat_seek_file(AVFormatContext *, int, int64_t, int64_t, int64_t, int)
        int av_find_best_stream(AVFormatContext *, AVMediaType, int, int, AVCodec **, int)
        void av_dump_format(AVFormatContext *, int, const char *, int)
        int av_read_pause(AVFormatContext *)
        int av_read_play(AVFormatContext *)
        int av_read_frame(AVFormatContext *, AVPacket *)
        AVProgram *av_find_program_from_stream(AVFormatContext *, AVProgram *, int)
        int avformat_write_header(AVFormatContext *, AVDictionary **)
        int av_write_trailer(AVFormatContext *)
        int avformat_alloc_output_context2(AVFormatContext **, AVOutputFormat *,
                                           const char *, const char *)
        AVStream *avformat_new_stream(AVFormatContext *, const AVCodec *)
        int av_interleaved_write_frame(AVFormatContext *, AVPacket *)
        void avformat_free_context(AVFormatContext *)
        uint8_t *av_stream_get_side_data (AVStream *, AVPacketSideDataType, int *)
        const AVOutputFormat *av_muxer_iterate(void **)
        const AVInputFormat *av_demuxer_iterate(void **)

    extern from "libavdevice/avdevice.h" nogil:
        void avdevice_register_all()
        struct AVDeviceInfo:
            char *device_name
            char *device_description
        struct AVDeviceInfoList:
            AVDeviceInfo **devices
            int nb_devices
            int default_device
        struct AVDeviceCapabilitiesQuery:
            pass

    extern from "libswscale/swscale.h" nogil:
        int SWS_BICUBIC
        struct SwsContext:
            pass
        struct SwsFilter:
            pass
        const AVClass *sws_get_class()
        SwsContext *sws_getContext(int, int, AVPixelFormat, int, int, AVPixelFormat,
                                   int, SwsFilter *, SwsFilter *, const double *)
        SwsContext *sws_getCachedContext(SwsContext *, int, int, AVPixelFormat,
                                        int, int, AVPixelFormat, int, SwsFilter *,
                                        SwsFilter *, const double *)
        int sws_scale(SwsContext *, const uint8_t *const [], const int[], int, int,
                      uint8_t *const [], const int[])
        void sws_freeContext(SwsContext *)

    extern from "libavutil/frame.h" nogil:
        enum AVFrameSideDataType:
            AV_FRAME_DATA_DISPLAYMATRIX,
        struct AVFrameSideData:
            uint8_t *data
        AVFrameSideData *av_frame_get_side_data(const AVFrame *, AVFrameSideDataType)

    extern from "libavutil/opt.h" nogil:
        int AV_OPT_SEARCH_CHILDREN
        int AV_OPT_FLAG_ENCODING_PARAM
        int AV_OPT_FLAG_DECODING_PARAM
        int AV_OPT_FLAG_VIDEO_PARAM
        int AV_OPT_FLAG_AUDIO_PARAM
        int AV_OPT_FLAG_SUBTITLE_PARAM
        int AV_OPT_SEARCH_FAKE_OBJ
        struct AVOption:
            pass
        int av_opt_eval_flags(void *, const AVOption *, const char *, int *)
        int av_opt_get_int(void *, const char *, int, int64_t *)
        int av_opt_set_int(void *, const char *, int64_t, int)
        int av_opt_set_image_size(void *, const char *, int, int, int)
        int av_opt_set(void *, const char *, const char *, int)
        const AVOption *av_opt_find(void *, const char *, const char *, int, int)

    extern from "libavcodec/packet.h" nogil:
        int av_packet_ref(AVPacket *, const AVPacket *)
        void av_packet_unref(AVPacket *)
        void av_packet_move_ref(AVPacket *, AVPacket *)
        AVPacket *av_packet_alloc()
        void av_packet_free(AVPacket **)

    extern from "libavcodec/avfft.h" nogil:
        enum RDFTransformType:
            DFT_R2C,
            IDFT_C2R,
            IDFT_R2C,
            DFT_C2R,
        struct RDFTContext:
            pass
        void av_rdft_end(RDFTContext *)
        RDFTContext *av_rdft_init(int, RDFTransformType)
        void av_rdft_calc(RDFTContext *, FFTSample *)

    extern from "libavcodec/version.h" nogil:
        pass

    extern from "libswresample/swresample.h" nogil:
        struct SwrContext:
            pass
        void swr_free(SwrContext **)
        SwrContext *swr_alloc_set_opts(SwrContext *, int64_t, AVSampleFormat,
                                       int, int64_t, AVSampleFormat, int, int, void *)
        int swr_init(SwrContext *)
        int swr_set_compensation(SwrContext *, int, int)
        int swr_convert(SwrContext *, uint8_t **, int, const uint8_t ** , int)

    extern from "libavcodec/avcodec.h" nogil:
        int AV_CODEC_FLAG2_FAST
        int AV_CODEC_CAP_DR1
        int AV_CODEC_FLAG_GLOBAL_HEADER
        int AV_PKT_FLAG_KEY
        int AV_CODEC_CAP_DELAY
        struct AVCodec:
            const char *name
            int capabilities
            const AVClass *priv_class
            AVCodecID id
            uint8_t max_lowres
            const AVRational *supported_framerates
            const AVPixelFormat *pix_fmts
            AVMediaType type
        struct AVCodecContext:
            int width
            int height
            int64_t pts_correction_num_faulty_pts  # Number of incorrect PTS values so far
            int64_t pts_correction_num_faulty_dts  # Number of incorrect DTS values so far
            AVRational sample_aspect_ratio
            AVRational time_base
            const AVCodec *codec
            AVCodecID codec_id
            AVMediaType codec_type
            int workaround_bugs
            int lowres
            int error_concealment
            int flags
            int flags2
            int sample_rate
            int channels
            uint64_t channel_layout
            AVSampleFormat sample_fmt
            AVPixelFormat pix_fmt
            AVFrame *coded_frame
            AVRational pkt_timebase
        struct AVCodecParameters:
            AVCodecID codec_id
            AVMediaType codec_type
            AVRational sample_aspect_ratio
            int sample_rate
            int channels
        struct AVSubtitle:
            uint16_t format
            uint32_t start_display_time # relative to packet pts, in ms
            uint32_t end_display_time   # relative to packet pts, in ms
            unsigned num_rects
            AVSubtitleRect **rects
            int64_t pts
        struct AVFrame:
            int top_field_first
            int interlaced_frame
            AVPictureType pict_type
            AVRational sample_aspect_ratio
            int width, height
            int format
            int key_frame
            int64_t pts
            int64_t pkt_pts
            int64_t pkt_dts
            int sample_rate
            int nb_samples
            uint64_t channel_layout
            uint8_t **extended_data
            int64_t best_effort_timestamp
            uint8_t **data
            int *linesize
            int channels
            int64_t pkt_pos
            AVBufferRef **buf
        struct AVPicture:
            uint8_t **data
            int *linesize
        struct AVSubtitleRect:
            int x         #///< top left corner  of pict, undefined when pict is not set
            int y         #///< top left corner  of pict, undefined when pict is not set
            int w         #///< width            of pict, undefined when pict is not set
            int h         #///< height           of pict, undefined when pict is not set
            AVPicture pict
            int nb_colors
            char *text
            char *ass
            AVSubtitleType type
        enum AVSubtitleType:
            SUBTITLE_NONE
            SUBTITLE_BITMAP
            SUBTITLE_TEXT
            SUBTITLE_ASS
        AVRational av_codec_get_pkt_timebase(const AVCodecContext *)
        int64_t av_frame_get_best_effort_timestamp(const AVFrame *)
        int av_codec_get_max_lowres(const AVCodec *)
        void av_codec_set_lowres(AVCodecContext *, int)
        int avcodec_parameters_from_context(AVCodecParameters *, const AVCodecContext *)
        int av_dup_packet(AVPacket *)
        void av_packet_unref(AVPacket *)
        void avsubtitle_free(AVSubtitle *)
        void av_fast_malloc(void *, unsigned int *, size_t)
        void avcodec_register_all()
        int avcodec_close(AVCodecContext *)
        int avcodec_send_packet(AVCodecContext *, const AVPacket *)
        int avcodec_receive_frame(AVCodecContext *, AVFrame *)
        void avcodec_flush_buffers(AVCodecContext *)
        void av_init_packet(AVPacket *)
        int avcodec_parameters_to_context(AVCodecContext *, const AVCodecParameters *)
        void av_codec_set_pkt_timebase(AVCodecContext *, AVRational)
        void av_picture_copy(AVPicture *, const AVPicture *,
                             AVPixelFormat, int, int)
        AVFrame* av_frame_alloc()
        int avcodec_decode_subtitle2(AVCodecContext *, AVSubtitle *,
                                     int *, AVPacket *)
        int avcodec_decode_audio4(AVCodecContext *, AVFrame *, int *, const AVPacket *)
        enum AVCodecID:
            AV_CODEC_ID_NONE
            AV_CODEC_ID_RAWVIDEO
        AVCodec *avcodec_find_decoder(AVCodecID)
        AVCodec *avcodec_find_encoder(AVCodecID)
        AVCodec *avcodec_find_encoder_by_name(const char *)
        AVCodec *avcodec_find_decoder_by_name(const char *)
        const AVClass *avcodec_get_class()
        AVCodecContext *avcodec_alloc_context3(const AVCodec *)
        void avcodec_free_context(AVCodecContext **)
        int avcodec_open2(AVCodecContext *, const AVCodec *, AVDictionary **)
        enum AVDiscard:
            AVDISCARD_DEFAULT,
            AVDISCARD_ALL
        int av_copy_packet(AVPacket *, AVPacket *)
        struct AVCodecDescriptor:
            AVCodecID id
            const char *name
            AVMediaType type
        const AVCodecDescriptor *avcodec_descriptor_get(AVCodecID)
        const AVCodecDescriptor *avcodec_descriptor_next(const AVCodecDescriptor *)
        const AVCodecDescriptor *avcodec_descriptor_get_by_name(const char *)
        AVPixelFormat avcodec_find_best_pix_fmt_of_list(AVPixelFormat *, AVPixelFormat,
                                                        int, int *)
        int avpicture_fill(AVPicture *, const uint8_t *, AVPixelFormat, int, int)
        int avcodec_encode_video2(AVCodecContext *, AVPacket *, const AVFrame *, int *)
        const char *avcodec_get_name(AVCodecID)
        const AVCodec *av_codec_iterate(void **)
        int av_codec_is_encoder(const AVCodec *)
        int av_codec_is_decoder(const AVCodec *)
        int avcodec_send_frame(AVCodecContext *, const AVFrame *)
        int avcodec_receive_packet(AVCodecContext *, AVPacket *)

    extern from "libavfilter/avfilter.h" nogil:
        struct AVFilterContext:
            AVFilterLink **inputs
        struct AVFilterLink:
            AVRational time_base
            int sample_rate
            int channels
            uint64_t channel_layout
            AVRational frame_rate
        struct AVFilterGraph:
            char *scale_sws_opts
            unsigned nb_filters
            AVFilterContext **filters
            int nb_threads
        struct AVFilterInOut:
            char *name
            AVFilterContext *filter_ctx
            int pad_idx
            AVFilterInOut *next
        struct AVFilter:
            pass
        int avfilter_link_get_channels(AVFilterLink *)
        AVFilterInOut *avfilter_inout_alloc()
        void avfilter_inout_free(AVFilterInOut **)
        int avfilter_graph_parse_ptr(AVFilterGraph *, const char *,
                                     AVFilterInOut **, AVFilterInOut **,
                                     void *)
        int avfilter_link(AVFilterContext *, unsigned,
                          AVFilterContext *, unsigned)
        int avfilter_graph_config(AVFilterGraph *, void *)
        int avfilter_graph_create_filter(AVFilterContext **, const AVFilter *,
                                         const char *, const char *, void *,
                                         AVFilterGraph *)
        AVFilter *avfilter_get_by_name(const char *)
        void avfilter_graph_free(AVFilterGraph **)
        AVFilterGraph *avfilter_graph_alloc()

    extern from "libavfilter/buffersink.h" nogil:
        int av_buffersink_get_frame_flags(AVFilterContext *, AVFrame *, int)
        AVRational av_buffersink_get_time_base(const AVFilterContext *)
        AVRational av_buffersink_get_frame_rate(const AVFilterContext *)
        int av_buffersink_get_sample_rate(const AVFilterContext *)
        int av_buffersink_get_channels(const AVFilterContext *)
        uint64_t av_buffersink_get_channel_layout(const AVFilterContext *)

    extern from "libavfilter/buffersrc.h" nogil:
        int av_buffersrc_add_frame(AVFilterContext *, AVFrame *)

    extern from "clib/misc.h" nogil:
        uint8_t INDENT
        uint8_t SHOW_VERSION
        uint8_t SHOW_CONFIG
        void print_all_libs_info(int, int)
        int opt_default(
            const char *, const char *, SwsContext *, AVDictionary **, AVDictionary **,
            AVDictionary **, AVDictionary **, AVDictionary **)
        int get_plane_sizes(int *, int *, AVPixelFormat, int, const int *)

cdef enum:
    AV_SYNC_AUDIO_MASTER, # default choice
    AV_SYNC_VIDEO_MASTER,
    AV_SYNC_EXTERNAL_CLOCK, # synchronize to an external clock


================================================
FILE: ffpyplayer/includes/inline_funcs.pxi
================================================

cdef extern from "string.h" nogil:
    char *strerror(int)

cdef extern from "errno.h" nogil:
    int EINVAL
    int EDOM

cdef extern from "limits.h" nogil:
    int INT_MAX

import sys

cdef int PY3 = sys.version_info > (3, )

cdef inline int FFMAX(int a, int b) nogil:
    if a > b:
        return a
    else:
        return b
cdef inline double FFMAXD(double a, double b) nogil:
    if a > b:
        return a
    else:
        return b
cdef inline void * FFMAXptr(void *a, void *b) nogil:
    if a > b:
        return a
    else:
        return b
cdef inline int FFMIN(int a, int b) nogil:
    if a > b:
        return b
    else:
        return a
cdef inline double FFMIND(double a, double b) nogil:
    if a > b:
        return b
    else:
        return a
cdef inline void * FFMINptr(void *a, void *b) nogil:
    if a > b:
        return b
    else:
        return a
cdef inline int compute_mod(int a, int b) nogil:
    if a < 0:
        return a%b + b
    else:
        return a%b

cdef inline int av_opt_set_int_list(void *obj, const char *name, const void *val,
                                    size_t val_deref_size, uint64_t term, int flags) nogil:
    if av_int_list_length_for_size(val_deref_size, val, term) > INT_MAX / val_deref_size:
        return AVERROR(EINVAL)
    else:
        return av_opt_set_bin(obj, name, <const uint8_t *>val,\
        av_int_list_length_for_size(val_deref_size, val, term) * val_deref_size, flags)

cdef inline int cmp_audio_fmts(AVSampleFormat fmt1, int64_t channel_count1,
                   AVSampleFormat fmt2, int64_t channel_count2) nogil:
    # If channel count == 1, planar and non-planar formats are the same
    if channel_count1 == 1 and channel_count2 == 1:
        return av_get_packed_sample_fmt(fmt1) != av_get_packed_sample_fmt(fmt2)
    else:
        return channel_count1 != channel_count2 or fmt1 != fmt2

cdef inline int64_t get_valid_channel_layout(int64_t channel_layout, int channels) nogil:
    if channel_layout and av_get_channel_layout_nb_channels(channel_layout) == channels:
        return channel_layout
    else:
        return 0

cdef inline char * emsg(int code, char *msg, int buff_size) except NULL:
    if av_strerror(code, msg, buff_size) < 0:
        if EDOM > 0:
            code = -code
        return strerror(code)
    return msg

cdef inline char * fmt_err(int code, char *msg, int buff_size) nogil:
    if av_strerror(code, msg, buff_size) < 0:
        if EDOM > 0:
            code = -code
        return strerror(code)
    return msg

cdef inline int insert_filt(
        const char *name, const char *arg, AVFilterGraph *graph,
        AVFilterContext **last_filter) nogil:
    cdef int ret
    cdef AVFilterContext *filt_ctx

    ret = avfilter_graph_create_filter(
        &filt_ctx, avfilter_get_by_name(name), name, arg, NULL, graph)
    if ret < 0:
        return ret

    ret = avfilter_link(filt_ctx, 0, last_filter[0], 0)
    if ret < 0:
        return ret

    last_filter[0] = filt_ctx
    return 0

cdef inline object tcode(bytes s):
    if PY3:
        return s.decode('utf8')
    return s


================================================
FILE: ffpyplayer/includes/sdl.pxi
================================================
from libc.stdint cimport int64_t, uint64_t, int32_t, uint32_t, uint16_t,\
int16_t, uint8_t, int8_t, uintptr_t

cdef extern from "SDL.h" nogil:
    int SDL_INIT_VIDEO
    int SDL_INIT_AUDIO
    int SDL_INIT_TIMER
    int SDL_INIT_EVENTTHREAD

    void SDL_Delay(int)

    void SDL_WaitThread(SDL_Thread *, int *)
    struct SDL_mutex:
        pass
    struct SDL_Thread:
        pass
    struct SDL_cond:
        pass

    char *SDL_GetError()

    SDL_cond *SDL_CreateCond()
    void SDL_DestroyCond(SDL_cond *)
    int SDL_CondSignal(SDL_cond *)
    int SDL_CondWait(SDL_cond *, SDL_mutex *)

    void SDL_Quit()
    int SDL_Init(uint32_t) with gil
    int SDL_InitSubSystem(uint32_t) with gil

    struct SDL_AudioSpec:
        int freq
        uint16_t format
        uint8_t channels
        uint8_t silence
        uint16_t samples
        uint16_t padding
        uint32_t size
        void (*callback)(void *, uint8_t *, int)
        void *userdata


cdef extern from "SDL_thread.h" nogil:
    SDL_Thread *SDL_CreateThread(int_void_func, const char *, void *) with gil

IF USE_SDL2_MIXER:
    cdef extern from "SDL_mixer.h" nogil:
        struct Mix_Chunk:
            int allocated
            uint8_t *abuf
            uint32_t alen
            uint8_t volume

        int Mix_OpenAudio(int, uint16_t, int, int)
        int Mix_QuerySpec(int *, uint16_t *, int *)
        void Mix_CloseAudio()

        Mix_Chunk *Mix_QuickLoad_RAW(uint8_t *, uint32_t)
        void Mix_FreeChunk(Mix_Chunk *)

        int Mix_AllocateChannels(int)
        int Mix_PlayChannel(int, Mix_Chunk *, int)
        int Mix_Volume(int, int)
        int Mix_RegisterEffect(int, void (*)(int, void *, int, void *), void (*)(int, void *), void *)
        int Mix_UnregisterEffect(int, void (*)(int, void *, int, void *))
        void Mix_Pause(int)
        void Mix_Resume(int)
        int Mix_HaltChannel(int)


cdef extern from * nogil:
    uint32_t SDL_HWACCEL
    uint32_t SDL_ASYNCBLIT
    uint32_t SDL_HWSURFACE
    uint32_t SDL_FULLSCREEN
    uint32_t SDL_RESIZABLE
    uint32_t SDL_YV12_OVERLAY
    uint8_t SDL_MIX_MAXVOLUME

    uint16_t AUDIO_S16SYS
    int SDL_OpenAudio(SDL_AudioSpec *, SDL_AudioSpec *)
    int SDL_AUDIO_ALLOW_ANY_CHANGE
    ctypedef uint32_t SDL_AudioDeviceID
    SDL_AudioDeviceID SDL_OpenAudioDevice(
        const char*, int, const SDL_AudioSpec*, SDL_AudioSpec*, int)
    void SDL_PauseAudioDevice(SDL_AudioDeviceID, int)
    void SDL_CloseAudioDevice(SDL_AudioDeviceID)
    void SDL_MixAudioFormat(
        uint8_t*, const uint8_t*, uint16_t, uint32_t, int)

    void SDL_PauseAudio(int)
    void SDL_CloseAudio()
    void SDL_MixAudio(uint8_t *, const uint8_t *, uint32_t, int)

    SDL_mutex *SDL_CreateMutex()
    void SDL_DestroyMutex(SDL_mutex *)
    int SDL_mutexP(SDL_mutex *) # SDL_LockMutex
    int SDL_mutexV(SDL_mutex *) # SDL_UnlockMutex
    int SDL_CondWaitTimeout(SDL_cond *, SDL_mutex *, uint32_t)

    void SDL_UpdateRect(SDL_Surface *, int32_t, int32_t, uint32_t, uint32_t)
    int SDL_FillRect(SDL_Surface *, SDL_Rect *, uint32_t)
    int SDL_LockYUVOverlay(SDL_Overlay *)
    void SDL_UnlockYUVOverlay(SDL_Overlay *)
    int SDL_DisplayYUVOverlay(SDL_Overlay *, SDL_Rect *)
    void SDL_FreeYUVOverlay(SDL_Overlay *)
    uint32_t SDL_MapRGB(const SDL_PixelFormat * const, const uint8_t,
                        const uint8_t, const uint8_t)
    SDL_Overlay * SDL_CreateYUVOverlay(int, int, uint32_t, SDL_Surface *)

    void SDL_WM_SetCaption(const char *, const char *)
    int SDL_setenv(const char *, const char *, int)
    char * SDL_getenv(const char *)

    SDL_Surface *SDL_SetVideoMode(int, int, int, uint32_t)
    const SDL_VideoInfo *SDL_GetVideoInfo()
    uint8_t SDL_EventState(uint8_t, int)
    void SDL_PumpEvents()

    int SDL_IGNORE
    uint8_t SDL_ACTIVEEVENT
    uint8_t SDL_SYSWMEVENT
    enum:
        SDL_VIDEOEXPOSE,
        SDL_USEREVENT,
        SDL_QUIT,
        SDL_VIDEORESIZE,
    uint32_t SDL_ALLEVENTS

    struct SDL_VideoInfo:
        int current_w
        int current_h
    struct SDL_Overlay:
        int w, h                  #/**< Read-only */
        uint16_t *pitches         #/**< Read-only */
        uint8_t **pixels          #/**< Read-write */
    struct SDL_PixelFormat:
        pass
    struct SDL_Rect:
        int16_t x, y
        uint16_t w, h
    struct SDL_Surface:
        SDL_PixelFormat *format
        int w, h


    struct SDL_UserEvent:
        uint8_t type
        int code
        void *data1
        void *data2
    struct SDL_ResizeEvent:
        uint8_t type
        int w
        int h
    union SDL_Event:
        uint8_t type
        SDL_UserEvent user
        SDL_ResizeEvent resize
    enum SDL_eventaction:
        SDL_ADDEVENT,
        SDL_PEEKEVENT,
        SDL_GETEVENT,
    int SDL_PushEvent(SDL_Event *event)
    int SDL_PeepEvents(SDL_Event *, int, SDL_eventaction, uint32_t)

    int SDL_ShowCursor(int)


================================================
FILE: ffpyplayer/pic.pxd
================================================
include 'includes/ffmpeg.pxi'


cdef class SWScale(object):
    cdef SwsContext *sws_ctx
    cdef bytes dst_pix_fmt
    cdef str dst_pix_fmt_s
    cdef int dst_h
    cdef int dst_w
    cdef AVPixelFormat src_pix_fmt
    cdef int src_h
    cdef int src_w


cdef class Image(object):

    cdef AVFrame *frame
    cdef list byte_planes
    cdef AVPixelFormat pix_fmt

    cdef int cython_init(self, AVFrame *frame) nogil except 1
    cpdef is_ref(Image self)
    cpdef is_key_frame(Image self)
    cpdef get_linesizes(Image self, keep_align=*)
    cpdef get_size(Image self)
    cpdef get_pixel_format(Image self)
    cpdef get_buffer_size(Image self, keep_align=*)
    cpdef get_required_buffers(Image self)
    cpdef to_bytearray(Image self, keep_align=*)
    cpdef to_memoryview(Image self, keep_align=*)


cdef class ImageLoader(object):
    cdef AVFormatContext *format_ctx
    cdef AVCodec *codec
    cdef AVCodecContext *codec_ctx
    cdef AVPacket pkt
    cdef AVFrame *frame
    cdef bytes filename
    cdef char msg[256]
    cdef int eof

    cpdef next_frame(self)
    cdef inline object eof_frame(self)


================================================
FILE: ffpyplayer/pic.pyx
================================================
'''
FFmpeg based image storage and conversion tools
===============================================

FFmpeg based classes to store and convert images from / to many different pixel
formats. See :class:`Image` and :class:`SWScale` for details.

Create an image in rgb24 format:

.. code-block:: python

    >>> w, h = 500, 100
    >>> size = w * h * 3
    >>> buf = bytearray([int(x * 255 / size) for x in range(size)])
    >>> img = Image(plane_buffers=[buf], pix_fmt='rgb24', size=(w, h))

Convert the image to a different size:

.. code-block:: python

    >>> sws = SWScale(w, h, img.get_pixel_format(), ow=w/2, oh=h/3)
    >>> img2 = sws.scale(img)
    >>> img2.get_size()
    (250, 33)

Convert the image to YUV420P and get the resulting plane buffers as bytearrays:

.. code-block:: python

    >>> sws = SWScale(w, h, img.get_pixel_format(), ofmt='yuv420p')
    >>> img2 = sws.scale(img)
    >>> img2.get_pixel_format()
    'yuv420p'
    >>> planes = img2.to_bytearray()
    >>> map(len, planes)
    [50000, 12500, 12500, 0]

Create an Image using default FFmpeg buffers:

.. code-block:: python

    >>> img = Image(pix_fmt='rgb24', size=(w, h))

Copy the image:

.. code-block:: python

    >>> import copy
    >>> # copy reference without actually copying the buffers
    >>> img2 = copy.copy(img)
    >>> # do deep copy
    >>> img2 = copy.deepcopy(img)
'''

__all__ = ('Image', 'SWScale', 'get_image_size', 'ImageLoader')

include "includes/inline_funcs.pxi"

from cpython.ref cimport PyObject
from cython cimport view as cyview

cdef extern from "string.h" nogil:
    void *memset(void *, int, size_t)
    void *memcpy(void *, const void *, size_t)

cdef extern from "Python.h":
    PyObject* PyString_FromStringAndSize(const char *, Py_ssize_t)
    void Py_DECREF(PyObject *)

import ffpyplayer.tools  # for initialization purposes

def get_image_size(pix_fmt, width, height):
    '''Returns the size in bytes of the buffers of each plane of an image with a
    given pixel format, width, and height.

    :Parameters:

        `pix_fmt`: str
            The pixel format in which the image is represented. Can be one of
            :attr:`~ffpyplayer.tools.pix_fmts`.
        `width`: int
            The width of the image.
        `height`: int
            The height of the image.

    :returns:

        `4-tuple of ints`:
            A tuple of buffer sizes in bytes for each plane of this pixel format
            required to store the image. Unused planes are zero.

    :

    .. code-block:: python

        >>> print get_image_size('rgb24', 100, 100)
        (30000, 0, 0, 0)
        >>> print get_image_size('yuv420p', 100, 100)
        (10000, 2500, 2500, 0)
        >>> print get_image_size('gray', 100, 100)
        (10000, 1024, 0, 0)
    '''
    cdef AVPixelFormat fmt
    cdef int res, w = width, h = height
    cdef int size[4]
    cdef int ls[4]
    cdef int req[4]
    cdef char msg[256]
    cdef bytes fmtb

    if not pix_fmt or not width or not height:
        return 0

    fmtb = pix_fmt.encode('utf8')
    fmt = av_get_pix_fmt(fmtb)
    if fmt == AV_PIX_FMT_NONE:
        raise Exception('Pixel format %s not found.' % pix_fmt)
    res = av_image_fill_linesizes(ls, fmt, w)
    if res < 0:
        raise Exception('Failed to initialize linesizes: ' + tcode(emsg(res, msg, sizeof(msg))))

    res = get_plane_sizes(size, req, fmt, h, ls)
    if res < 0:
        raise Exception('Failed to get planesizes: ' + tcode(emsg(res, msg, sizeof(msg))))
    return (size[0], size[1], size[2], size[3])


cdef class SWScale(object):
    '''Converts Images from one format and size to another format and size.

    The class accepts an Image of a given pixel format and size and converts it
    to another Image with a different pixel format and size. Each SWScale instance
    converts only images with parameters specified when creating the instance.

    :Parameters:

        `iw, ih`: int
            The width and height of the source image.
        `ifmt`: str
            The pixel format of the source image. Can be one of
            :attr:`ffpyplayer.tools.pix_fmts`.
        `ow, oh`: int
            The width and height of the output image after converting from the
            source image. A value of 0 will set that parameter to the source
            height/width. A value of -1 for one of the parameters, will result in
            a value of that parameter that maintains the original aspect ratio.
            Defaults to -1.
        `ofmt`: str
            The pixel format of the output image. Can be one of
            :attr:`ffpyplayer.tools.pix_fmts`. If empty, the source pixel format
            will be used. Defaults to empty string.

    :

    .. code-block:: python

        >>> w, h = 500, 100
        >>> size = w * h * 3
        >>> buf = bytearray([int(x * 255 / size) for x in range(size)])
        >>> img = Image(plane_buffers=[buf], pix_fmt='rgb24', size=(w, h))

        >>> # specify output w,h
        >>> sws = SWScale(w, h, img.get_pixel_format(), ow=w/2, oh=h/3)
        >>> img2 = sws.scale(img)
        >>> img2.get_size()
        (250, 33)

        >>> # use input height
        >>> sws = SWScale(w, h, img.get_pixel_format(), ow=w/2, oh=0)
        >>> img2 = sws.scale(img)
        >>> img2.get_size()
        (250, 100)

        >>> # keep aspect ratio
        >>> sws = SWScale(w, h, img.get_pixel_format(), ow=w/2)
        >>> img2 = sws.scale(img)
        >>> img2.get_size()
        (250, 50)

        >>> # convert rgb24 to yuv420p
        >>> sws = SWScale(w, h, img.get_pixel_format(), ofmt='yuv420p')
        >>> img2 = sws.scale(img)
        >>> img2.get_pixel_format()
        'yuv420p'

        >>> # convert into a previously allocated and aligned image
        >>> import math
        >>> align = lambda x: int(math.ceil(x / 32.) * 32)
        >>> img2 = Image(pix_fmt=img.get_pixel_format(), size=(w/2, h/2))
        >>> img2.get_linesizes(keep_align=True)
        (750, 0, 0, 0)
        >>> linesize = map(align, img2.get_linesizes())
        >>> linesize
        [768, 0, 0, 0]
        >>> img2 = Image(pix_fmt=img2.get_pixel_format(), size=img2.get_size(), linesize=linesize)
        >>> img2.get_linesizes(keep_align=True)
        (768, 0, 0, 0)
        >>> sws.scale(img, dst=img2)
        <ffpyplayer.pic.Image object at 0x02B44440>
        >>> img2
        <ffpyplayer.pic.Image object at 0x02B44440>

    '''

    def __cinit__(self, int iw, int ih, ifmt, int ow=-1, int oh=-1, ofmt='', **kargs):
        cdef AVPixelFormat src_pix_fmt, dst_pix_fmt
        self.dst_pix_fmt = ifmt.encode('utf8')
        self.dst_pix_fmt_s = ifmt

        self.sws_ctx = NULL
        src_pix_fmt = av_get_pix_fmt(self.dst_pix_fmt)
        if src_pix_fmt == AV_PIX_FMT_NONE:
            raise Exception('Pixel format %s not found.' % ifmt)
        dst_pix_fmt = src_pix_fmt
        if ofmt:
            self.dst_pix_fmt = ofmt.encode('utf8')
            self.dst_pix_fmt_s = ofmt
            dst_pix_fmt = av_get_pix_fmt(self.dst_pix_fmt)
            if dst_pix_fmt == AV_PIX_FMT_NONE:
                raise Exception('Pixel format %s not found.' % ofmt)
        if ow == -1 and oh == -1:
            ow = oh = 0
        if not oh:
            oh = ih
        if not ow:
            ow = iw
        if ow == -1:
            ow = <int>(oh / <double>ih * iw)
        if oh == -1:
            oh = <int>(ow / <double>iw * ih)
        self.dst_w = ow
        self.dst_h = oh
        self.src_pix_fmt = src_pix_fmt
        self.src_w = iw
        self.src_h = ih

        self.sws_ctx = sws_getCachedContext(NULL, iw, ih, src_pix_fmt, ow, oh,
                                            dst_pix_fmt, SWS_BICUBIC, NULL, NULL, NULL)
        if self.sws_ctx == NULL:
            raise Exception('Cannot initialize the conversion context.')

    def __dealloc__(self):
        if self.sws_ctx != NULL:
            sws_freeContext(self.sws_ctx)

    def scale(self, Image src, Image dst=None, int _flip=False):
        '''Scales a image into another image format and/or size as specified by the
        instance initialization parameters.

        :Parameters:

            `src`: :class:`Image`
                A image instance with values matching the source image specification
                of this instance. An exception is raised if the Image doesn't match.
                It will be used as the source image.
            `dst`: :class:`Image` or None
                A image instance with values matching the output image specification
                of this instance. An exception is raised if the Image doesn't match.
                If specified, the output image will be converted directly into this Image.
                If not specified, a new Image will be created and returned.
            `_flip`: bool, defaults to False
                Whether the image will be flipped before scaling. This only works
                for pixel formats whose color planes are the same size (e.g. rgb), so
                use with caution.

        :returns:

            :class:`Image`:
                The output image. If ``dst`` was not None ``dst`` will be returned,
                otherwise a new image containing the converted image will be returned.
        '''
        if (<AVPixelFormat>src.frame.format != self.src_pix_fmt or
            self.src_w != src.frame.width or self.src_h != src.frame.height):
            raise Exception("Source image doesn't match the specified input parameters.")
        if not dst:
            dst = Image.__new__(Image, pix_fmt=self.dst_pix_fmt_s,
                                size=(self.dst_w, self.dst_h))
        with nogil:
            if _flip:
                for i in range(4):
                    (<uint8_t * *>src.frame.data)[i] += src.frame.linesize[i] * (src.frame.height - 1)
                    src.frame.linesize[i] = -src.frame.linesize[i]
            sws_scale(self.sws_ctx, <const uint8_t *const *>src.frame.data, src.frame.linesize,
                          0, src.frame.height, dst.frame.data, dst.frame.linesize)
            if _flip:
                for i in range(4):
                    src.frame.linesize[i] = -src.frame.linesize[i]
                    (<uint8_t * *>src.frame.data)[i] -= src.frame.linesize[i] * (src.frame.height - 1)
        return dst


cdef int raise_exec(object ecls) nogil except 1:
    with gil:
        raise ecls()


cdef class Image(object):
    '''Stores a image using a specified pixel format.

    An Image can be represented by many different pixel formats, which determines
    how the buffer representing it is stored. We store the buffers as one to
    four arrays of bytes representing the one to four planes. For example,
    RGB23 has all the data in the first plane in the form of RGBRGB... while
    YUV420P uses the first three planes.

    The Image can be initialized with a list of the plane buffers, or internal
    buffers can be created when none are provided. Depending on how it's initialized
    one or more params need to be specified.

    :Paramters:

        `plane_buffers`: list
            A list of bytes or bytearray type objects representing the 1-4 planes.
            The number of planes is determined by ``pix_fmt`` (e.g. 1 for RGB24,
            3 for yuv). The length of the bytes object in each plane is a function
            of ``size``, and if provided, also ``linesize``. See ``linesize`` for details.
            The buffers are used directly without making any copies therefore, the
            bytes objects are kept alive internally as long as this instance is alive.

            If empty, internal buffers for the image will be created for the image.
        `pix_fmt`: str
            The pixel format of the image. Can be one of :attr:`ffpyplayer.tools.pix_fmts`.
            Must be provided when using ``plane_buffers``.
        `size`: 2-tuple of ints
            The size of the frame in the form of (width, height).
            Must be provided when using ``plane_buffers``.
        `linesize`: list of ints
            The linesize of each provided plane. In addition to the width of the frame,
            a linesize can be provided. The ``linesize`` represent the actual number of
            bytes in each line, and may be padded at the end to satisfy some alignment
            requirement. For example, a RGB24 frame of size ``(100, 10)`` will have
            ``3 * 100 = 300`` bytes in each horizontal line and will be 3000 bytes large.
            But, when 32 bit alignment is required, the buffer will have to padded at the
            end so that each line is 320 bytes, and the total buffer length is 3200 bytes.
            If ``linesize`` is provided, it must be provided for every valid plane.
            If it's not provided, an alignment of 1 (i.e. no alignment) is assumed.
            See :meth:`get_buffer_size` for more details.
        `no_create`: bool
            A optional argument, which if provided with True will just create the instance
            and not initialize anything. All other parameters are ignored when True.
            This is useful when instantiating later from cython with the ``cython_init`` method.

    **Copying**

    FFmpeg has an internal ref counting system where when used, it frees buffers
    it allocated only when there's no reference to it remaining thereby allowing
    multiple images to use the same buffer without making copies. When the
    Image class allocates the image buffers, e.g. when ``plane_buffers`` is empty
    such reference buffers are created. As a consequence, when copying the Image
    object, the buffers will not have to be copied.

    Using the python copy module you can do a **shallow** or a **deep** copy of
    the object. When doing a **shallow** copy, new buffers will be created if the
    original buffers were not FFmpeg created and referenced, e.g. if provided
    using ``plane_buffers``. This is to ensure the buffers won't
    go out of memory while in use.

    After the copy, the buffers will be "referenced" and additional copies will
    create more references without copying the buffers.
    A **deep** copy, however, will always create a new referenced buffer.
    The function :meth:`is_ref` indicates whether the image buffer is such a
    FFmpeg referenced buffer.

    :

    .. code-block:: python

        >>> w, h = 640, 480
        >>> size = w * h * 3
        >>> buf = bytearray([int(x * 255 / size) for x in range(size)])
        >>> img = Image(plane_buffers=[buf], pix_fmt='rgb24', size=(w, h))
        >>> img2 = Image(pix_fmt='rgb24', size=(w, h))
    '''

    def __cinit__(self, plane_buffers=[], pix_fmt='', size=(), linesize=[], **kwargs):
        cdef int i, w, h, res
        cdef object plane = None
        cdef char msg[256]
        cdef AVFrame *avframe
        cdef int buff_size[4]
        cdef int ls[4]
        cdef int req[4]
        cdef bytes fmt_b

        self.frame = NULL
        self.byte_planes = None

        if kwargs.get('no_create', False):
            return

        fmt_b = pix_fmt.encode('utf8')
        self.pix_fmt = av_get_pix_fmt(fmt_b)
        if self.pix_fmt == AV_PIX_FMT_NONE:
            raise Exception('Pixel format %s not found.' % pix_fmt)
        w, h = size
        self.frame = av_frame_alloc()
        if self.frame == NULL:
            raise MemoryError()

        self.frame.format = self.pix_fmt
        self.frame.width = w
        self.frame.height = h
        if linesize:
            for i in range(min(len(linesize), 4)):
                self.frame.linesize[i] = linesize[i]
        else:
            res = av_image_fill_linesizes(self.frame.linesize, self.pix_fmt, w)
            if res < 0:
                raise Exception('Failed to initialize linesizes: ' + tcode(emsg(res, msg, sizeof(msg))))
        av_image_fill_linesizes(ls, self.pix_fmt, w)
        for i in range(4):
            if ls[i] and not self.frame.linesize[i]:
                raise Exception('Incorrect linesize provided.')

        if plane_buffers:
            self.byte_planes = []
            res = get_plane_sizes(buff_size, req, self.pix_fmt, self.frame.height, self.frame.linesize)
            if res < 0:
                raise Exception('Failed to get plane sizes: ' + tcode(emsg(res, msg, sizeof(msg))))
            for i in range(4):
                if req[i] and buff_size[i] and (len(plane_buffers) <= i or not plane_buffers[i]):
                    raise Exception('Required plane %d not provided for %s' % (i, pix_fmt))
                if len(plane_buffers) > i and plane_buffers[i] and not buff_size[i]:
                    raise Exception('Unused plane %d provided for %s' % (i, pix_fmt))
            for i in range(4):
                if len(plane_buffers) == i:
                    break
                if not plane_buffers[i]:
                    continue
                plane = plane_buffers[i]
                if len(plane) < buff_size[i]:
                    raise Exception('Buffer for plane %d is too small, required buffer size is %d.'\
                                    % (i, buff_size[i]))
                self.byte_planes.append(plane)
                self.frame.data[i] = plane
        else:
            with nogil:
                res = av_frame_get_buffer(self.frame, 32)
            if res < 0:
                raise Exception('Could not allocate avframe buffer of size %dx%d: %s'\
                                % (w, h, tcode(emsg(res, msg, sizeof(msg)))))

    def __dealloc__(self):
        av_frame_free(&self.frame)

    cdef int cython_init(self, AVFrame *frame) nogil except 1:
        '''Can be called only once after object creation and it creates a internal
        reference to ``frame``.
        '''
        self.frame = av_frame_clone(frame)
        if self.frame == NULL:
            raise_exec(MemoryError)
        self.pix_fmt = <AVPixelFormat>self.frame.format
        return 0

    def __copy__(self):
        cdef Image img = Image.__new__(Image, no_create=True)
        with nogil:
            img.cython_init(self.frame)
        return img

    def __deepcopy__(self, memo):
        cdef AVFrame *frame = av_frame_alloc()
        cdef Image img
        if frame == NULL:
            raise MemoryError()

        frame.format = self.frame.format
        frame.width = self.frame.width
        frame.height = self.frame.height
        if av_frame_copy_props(frame, self.frame) < 0:
            av_frame_free(&frame)
            raise Exception('Cannot copy frame properties.')
        if av_frame_get_buffer(frame, 32) < 0:
            av_frame_free(&frame)
            raise Exception('Cannot allocate frame buffers.')

        img = Image.__new__(Image, no_create=True)
        with nogil:
            av_image_copy(frame.data, frame.linesize, <const uint8_t **>self.frame.data,
                          self.frame.linesize, <AVPixelFormat>frame.format,
                          frame.width, frame.height)
            img.cython_init(frame)
            av_frame_free(&frame)
        return img

    cpdef is_ref(Image self):
        '''Returns whether the image buffer is FFmpeg referenced. This can only be
        True when the buffers were allocated internally or by FFmpeg bit not when
        ``plane_buffers`` is provided. See :class:`Image` for details. After a copy,
        it will always returns True.

        :returns:

            bool: True if the buffer is FFmpeg referenced.

        For example:

        .. code-block:: python

            >>> w, h = 640, 480
            >>> img = Image(plane_buffers=[bytes(' ') * (w * h * 3)], pix_fmt='rgb24', size=(w, h))
            >>> img.is_ref()
            False
            >>> import copy
            >>> img2 = copy.copy(img)
            >>> img2.is_ref()
            True

        Or if directly allocated internally:

        .. code-block:: python

            >>> img = Image(pix_fmt='rgb24', size=(w, h))
            >>> img.is_ref()
            True
        '''
        return self.frame.buf[0] != NULL

    cpdef is_key_frame(Image self):
        '''Returns whether the image is a key frame.

        :returns:

            bool: True if the image was a key frame.
        '''
        return self.frame.key_frame == 1

    cpdef get_linesizes(Image self, keep_align=False):
        '''Returns the linesize of each plane.

        The linesize is the actual number of bytes in each horizontal line for a given plane,
        which may be padded at the end to satisfy some alignment requirement.
        For example, a RGB24 frame of size ``(100, 10)`` will have ``3 * 100 = 300``
        bytes in each line and will be 3000 bytes large. But, when 32 bit
        alignment is required, the buffer will have to padded at the end so
        that each line is 320 bytes, and the total buffer length is 3200 bytes.

        :Parameters:

            `keep_align`: bool
                If True, the original linesize alignments of the image will be returned for
                every plane. If False, linesize with an alignment of 1 (i.e. no alignment)
                will be used, returning the minimal linesize required to for the image.
                Defaults to False.

        :returns:

            4-tuple of ints:
                A 4 tuple with the linesizes of each plane. If the plane isn't used
                it'll be 0.

        By defaults there's no alignment:

        .. code-block:: python

            >>> w, h = 100, 10
            >>> img = Image(plane_buffers=[bytes(' ') * (w * h * 3)],
            ... pix_fmt='rgb24', size=(w, h))
            >>> img.get_linesizes(keep_align=True)
            (300, 0, 0, 0)

        You can force alignment e.g. 32 bits alignment:

        .. code-block:: python

            >>> import math
            >>> linesize = [int(math.ceil(w * 3 / 32.) * 32)]
            >>> linesize
            [320]
            >>> img = Image(plane_buffers=[bytes(' ') * (h * linesize[0])],
            ... pix_fmt='rgb24', size=(w, h), linesize=linesize)
            >>> img.get_linesizes(keep_align=True)
            (320, 0, 0, 0)
            >>> img.get_size()
            (100, 10)

        The linesizes of an unaligned and 32 bit aligned yuv420p image:

        .. code-block:: python

            >>> img = Image(pix_fmt='yuv420p', size=(w, h))
            >>> img.get_linesizes(keep_align=True)
            (100, 50, 50, 0)
            >>> img.get_size()
            (100, 10)

            >>> # now try align to 32 bit
            >>> linesize = img.get_linesizes(keep_align=True)
            >>> align = lambda x: int(math.ceil(x / 32.) * 32)
            >>> linesize = map(align, linesize)
            >>> linesize
            [128, 64, 64, 0]
            >>> img = Image(pix_fmt='yuv420p', size=(w, h), linesize=linesize)
            >>> img.get_linesizes(keep_align=True)
            (128, 64, 64, 0)
            >>> img.get_linesizes()
            (100, 50, 50, 0)
            >>> img.get_size()
            (100, 10)
        '''
        cdef int lsl[4]
        cdef int *ls = self.frame.linesize

        if not keep_align:
            av_image_fill_linesizes(lsl, self.pix_fmt, self.frame.width)
            ls = lsl
        return (ls[0], ls[1], ls[2], ls[3])

    cpdef get_size(Image self):
        '''Returns the size of the frame.

        :returns:

            2-tuple of ints: The size of the frame as ``(width, height)``.

        ::

            >>> img.get_size()
            (640, 480)
        '''
        return (self.frame.width, self.frame.height)

    cpdef get_pixel_format(Image self):
        '''Returns the pixel format of the image. Can be one of
        :attr:`ffpyplayer.tools.pix_fmts`.

        :returns:

            str: The pixel format of the image.

        ::

            >>> img.get_pixel_format()
            'rgb24'
        '''
        return tcode(av_get_pix_fmt_name(self.pix_fmt))

    cpdef get_buffer_size(Image self, keep_align=False):
        '''Returns the size of the buffers of each plane.

        :Parameters:

            `keep_align`: bool
                If True, the linesize alignments of the actual image will be used to
                calculate the buffer size for each plane. If False, an alignment of 1
                (i.e. no alignment) will be used, returning the minimal buffer size
                required to store the image. Defaults to False.

        :returns:

            4-tuple of ints:
                A list of buffer sizes for each plane of this pixel format.

        A (unaligned) yuv420p image has 3 planes:

        .. code-block:: python

            >>> w, h = 100, 10
            >>> img = Image(pix_fmt='yuv420p', size=(w, h))
            >>> img.get_linesizes(keep_align=True)
            (100, 50, 50, 0)
            >>> img.get_buffer_size()
            (1000, 250, 250, 0)

            >>> # align to 32 bits
            >>> linesize = img.get_linesizes(keep_align=True)
            >>> align = lambda x: int(math.ceil(x / 32.) * 32)
            >>> linesize = map(align, linesize)
            >>> linesize
            [128, 64, 64, 0]
            >>> img = Image(pix_fmt='yuv420p', size=(w, h), linesize=linesize)
            >>> img.get_linesizes(keep_align=True)
            (128, 64, 64, 0)
            >>> img.get_buffer_size(keep_align=True)
            (1280, 320, 320, 0)
            >>> img.get_buffer_size()
            (1000, 250, 250, 0)
        '''
        cdef int res
        cdef int size[4]
        cdef int ls[4]
        cdef int req[4]
        cdef char msg[256]

        if keep_align:
            memcpy(ls, self.frame.linesize, sizeof(ls))
        else:
            res = av_image_fill_linesizes(ls, self.pix_fmt, self.frame.width)
            if res < 0:
                raise Exception('Failed to initialize linesizes: ' + tcode(emsg(res, msg, sizeof(msg))))

        res = get_plane_sizes(size, req, <AVPixelFormat>self.frame.format, self.frame.height, ls)
        if res < 0:
            raise Exception('Failed to get planesizes: ' + tcode(emsg(res, msg, sizeof(msg))))
        return (size[0], size[1], size[2], size[3])

    cpdef get_required_buffers(Image self):
        '''Returns a 4 tuple of booleans indicating which of the 4 planes are required
        (i.e. even if get_buffer_size is non-zero for that plane it may still be
        optional).
        '''
        cdef int res
        cdef int size[4]
        cdef int ls[4]
        cdef int req[4]
        cdef char msg[256]

        memcpy(ls, self.frame.linesize, sizeof(ls))
        res = get_plane_sizes(size, req, <AVPixelFormat>self.frame.format, self.frame.height, ls)
        if res < 0:
            raise Exception('Failed to get planesizes: ' + tcode(emsg(res, msg, sizeof(msg))))
        return (req[0], req[1], req[2], req[3])

    cpdef to_bytearray(Image self, keep_align=False):
        '''Returns a copy of the plane buffers as bytearrays.

        :Parameters:

            `keep_align`: bool
                If True, the buffer for each plane will be padded after each horizontal
                line to match the linesize of its plane in this image. If False, an
                alignment of 1 (i.e. no alignment) will be used, returning the
                maximially packed buffer of this plane. Defaults to False.

        :returns:

            4-element list: A list of bytearray buffers for each plane of this
            pixel format. An empty bytearray is returned for unused planes.

        Get the buffer of an RGB image:

        .. code-block:: python

            >>> w, h = 100, 10
            >>> img = Image(pix_fmt='rgb24', size=(w, h))
            >>> img.get_linesizes(keep_align=True)
            (300, 0, 0, 0)
            >>> map(len, img.to_bytearray())
            [3000, 0, 0, 0]

        Get the buffers of a YUV420P image:

        .. code-block:: python

            >>> img = Image(pix_fmt='yuv420p', size=(w, h))
            >>> linesize = img.get_linesizes(keep_align=True)
            >>> linesize
            (100, 50, 50, 0)
            >>> align = lambda x: int(math.ceil(x / 32.) * 32)
            >>> linesize = map(align, linesize)
            >>> linesize
            [128, 64, 64, 0]

            >>> img = Image(pix_fmt='yuv420p', size=(w, h), linesize=linesize)
            >>> map(len, img.to_bytearray())
            [1000, 250, 250, 0]
            >>> map(len, img.to_bytearray(keep_align=True))
            [1280, 320, 320, 0]

            >>> # now initialize a new Image with it
            >>> img2 = Image(plane_buffers=img.to_bytearray(),
            ... pix_fmt=img.get_pixel_format(), size=img.get_size())
            >>> img2.get_buffer_size(keep_align=True)
            (1000, 250, 250, 0)

            >>> # keep alignment
            >>> img2 = Image(plane_buffers=img.to_bytearray(keep_align=True),
            ... pix_fmt=img.get_pixel_format(), size=img.get_size(),
            ... linesize=img.get_linesizes(keep_align=True))
            >>> img2.get_buffer_size(keep_align=True)
            (1280, 320, 320, 0)

        '''
        cdef list planes = [None, None, None, None]
        cdef int i, res
        cdef uint8_t *data[4]
        cdef int size[4]
        cdef int ls[4]
        cdef int req[4]
        cdef char msg[256]
        memset(data, 0, sizeof(data))

        if keep_align:
            memcpy(ls, self.frame.linesize, sizeof(ls))
        else:
            res = av_image_fill_linesizes(ls, self.pix_fmt, self.frame.width)
            if res < 0:
                raise Exception('Failed to initialize linesizes: ' + tcode(emsg(res, msg, sizeof(msg))))

        res = get_plane_sizes(size, req, <AVPixelFormat>self.frame.format, self.frame.height, ls)
        if res < 0:
            raise Exception('Failed to get plane sizes: ' + tcode(emsg(res, msg, sizeof(msg))))
        for i in range(4):
            planes[i] = bytearray(b'\0') * size[i]
            if size[i]:
                data[i] = planes[i]
        with nogil:
            av_image_copy(data, ls, <const uint8_t **>self.frame.data, self.frame.linesize,
                          <AVPixelFormat>self.frame.format, self.frame.width, self.frame.height)
        return planes

    cpdef to_memoryview(Image self, keep_align=False):
        '''Returns a memoryviews of the buffers of the image.

        :Parameters:

            `keep_align`: bool
                If True, the buffers of the original image will be returned
                without making any additional copies. If False, then if the
                image alignment is already 1, the original buffers will be
                returned, otherwise, new buffers will be created with an
                alignment of 1 and the buffers will be copied into them
                and returned. See :meth:`to_bytearray`.

        :Returns:

            4-element list:
                A list of cython arrays for each plane of this
                image's pixel format. If the data didn't have to be copied, the
                arrays point directly to the original image data. The arrays
                can be used where memoryviews are accepted, since cython arrays
                implement the memoryview interface.

                Unused planes are set to None.

        .. warning::
            If the data points to the original image data, you must ensure
            that this :class:`Image` instance does not go out of memory
            while the returned memoryviews of the arrays are in use. Otherwise when
            the :class:`Image` goes out of memory, the original data will become
            invalid and usage of the returned memoryviews of them will crash python.

        Get the buffer of an RGB image:

        .. code-block:: python

            >>> w, h = 100, 10
            >>> img = Image(pix_fmt='rgb24', size=(w, h))
            >>> img.get_linesizes(keep_align=True)
            (300, 0, 0, 0)
            >>> img.to_memoryview()
            [<ffpyplayer.pic.array object at 0x055DCE58>, None, None, None]
            >>> arr = img.to_memoryview()[0]
            >>> # memview is the only attribute of cython arrays
            >>> arr.memview
            <MemoryView of 'array' at 0x55d1468>
            >>> arr.memview.size
            3000
        '''
        cdef list planes = [None, None, None, None]
        cdef cyview.array cyarr
        cdef int i, res
        cdef int size[4]
        cdef char *data[4]
        cdef int ls[4]
        cdef int req[4]
        cdef int *cls = self.frame.linesize
        cdef char msg[256]
        memset(data, 0, sizeof(data))

        res = av_image_fill_linesizes(ls, self.pix_fmt, self.frame.width)
        if res < 0:
            raise Exception('Failed to initialize linesizes: ' +
                            tcode(emsg(res, msg, sizeof(msg))))

        if keep_align or (cls[0] == ls[0] and cls[1] == ls[1] and
                          cls[2] == ls[2] and cls[3] == ls[3]):
            res = get_plane_sizes(size, req, <AVPixelFormat>self.frame.format,
                                  self.frame.height, self.frame.linesize)
            if res < 0:
                raise Exception('Failed to get plane sizes: ' + tcode(emsg(res, msg, sizeof(msg))))

            for i in range(4):
                if not size[i]:
                    continue
                planes[i] = cyarr = cyview.array(shape=(size[i], ), itemsize=sizeof(char),
                format="B", mode="c", allocate_buffer=False)
                cyarr.data = <char *>self.frame.data[i]
            return planes

        res = get_plane_sizes(size, req, <AVPixelFormat>self.frame.format, self.frame.height, ls)
        if res < 0:
            raise Exception('Failed to get plane sizes: ' + tcode(emsg(res, msg, sizeof(msg))))
        for i in range(4):
            if not size[i]:
                continue
            planes[i] = cyarr = cyview.array(shape=(size[i], ), itemsize=sizeof(char),
            format="B", mode="c", allocate_buffer=True)
            data[i] = cyarr.data

        with nogil:
            av_image_copy(<uint8_t **>data, ls, <const uint8_t **>self.frame.data, self.frame.linesize,
                          <AVPixelFormat>self.frame.format, self.frame.width, self.frame.height)
        return planes


cdef class ImageLoader(object):
    '''Class that reads one or more images from a file and returns them.

    :Parameters:

        `filename`: string type
            The full path to the image file. The string will first be encoded
            using utf8 before passing to FFmpeg.

    For example, reading a simple png using the iterator syntax:

    .. code-block:: python

        >>> img = ImageLoader('file.png')
        >>> images = [m for m in img]
        >>> images
        [(<ffpyplayer.pic.Image object at 0x02B5F5D0>, 0.0)]

    Or reading it directly:

    .. code-block:: python

        >>> img = ImageLoader('file.png')
        >>> img.next_frame()
        (<ffpyplayer.pic.Image object at 0x02B74850>, 0.0)
        >>> img.next_frame()
        (None, 0)
        >>> img.next_frame()
        (None, 0)

    Or reading a gif using the iterator syntax:

    .. code-block:: python

        >>> img = ImageLoader('sapo11.gif')
        >>> images = [m for m in img]
        >>> images
        [(<ffpyplayer.pic.Image object at 0x02B749B8>, 0.0),
        (<ffpyplayer.pic.Image object at 0x02B74918>, 0.08),
        (<ffpyplayer.pic.Image object at 0x02B74990>, 0.22),
        (<ffpyplayer.pic.Image object at 0x02B749E0>, 0.36),
        (<ffpyplayer.pic.Image object at 0x02B74A08>, 0.41000000000000003),
        (<ffpyplayer.pic.Image object at 0x02B74A30>, 0.46),
        (<ffpyplayer.pic.Image object at 0x02B74A58>, 0.51)]

    Or reading it directly:

    .. code-block:: python

        >>> img = ImageLoader('sapo11.gif')
        >>> img.next_frame()
        (<ffpyplayer.pic.Image object at 0x02B74B70>, 0.0)
        >>> img.next_frame()
        (<ffpyplayer.pic.Image object at 0x02B74C60>, 0.08)
        ...
        >>> img.next_frame()
        (<ffpyplayer.pic.Image object at 0x02B74B70>, 0.51)
        >>> img.next_frame()
        (None, 0)
        >>> img.next_frame()
        (None, 0)
    '''

    def __cinit__(self, filename, **kwargs):

        cdef AVDictionary *opts = NULL
        cdef const AVDictionaryEntry *t = NULL
        cdef int ret = 0
        cdef char *fname

        fname = self.filename = filename.encode('utf8')
        self.format_ctx = NULL
        self.codec = NULL
        self.codec_ctx = avcodec_alloc_context3(NULL)
        if self.codec_ctx == NULL:
            raise MemoryError()

        self.frame = NULL
        self.eof = 0
        av_init_packet(&self.pkt)

        with nogil:
            ret = avformat_open_input(&self.format_ctx, fname, NULL, NULL)
        if ret < 0:
            raise Exception("Failed to open input file {}: {}".format(filename,
                            tcode(emsg(ret, self.msg, sizeof(self.msg)))))

        ret = avcodec_parameters_to_context(self.codec_ctx, self.format_ctx.streams[0].codecpar)
        if ret < 0:
            raise Exception("Failed to open input file {}: {}".format(filename,
                            tcode(emsg(ret, self.msg, sizeof(self.msg)))))

        self.codec = avcodec_find_decoder(self.codec_ctx.codec_id)
        if self.codec is NULL:
            raise Exception("Failed to find supported codec for file {}"
                            .format(filename))

        with nogil:
            ret = avcodec_open2(self.codec_ctx, self.codec, &opts)
        if ret < 0:
            raise Exception("Failed to open codec for {}: {}".format(filename,
                            tcode(emsg(ret, self.msg, sizeof(self.msg)))))
        t = av_dict_get(opts, "", NULL, AV_DICT_IGNORE_SUFFIX)
        if t != NULL:
            raise Exception("Option {} not found.".format(t.key))

    def __dealloc__(self):
        with nogil:
            av_packet_unref(&self.pkt)
            av_frame_free(&self.frame)
            avformat_close_input(&self.format_ctx)
            if self.codec_ctx != NULL:
                avcodec_free_context(&self.codec_ctx)

    def __iter__(self):
        while True:
            res = self.next_frame()
            if res == (None, 0):
                break
            yield res

    cpdef next_frame(self):
        ''' Returns the next available frame, or `(None, 0)` if there are no
        more frames available.

        :returns:
            a 2-tuple of `(:class:`Image`, pts)`:
            Where the first element is the next image to be displayed and `pts`
            is the time, relative to the first frame, when to display it e.g. in
            the case of a gif.

            If we reached the eof of the file and there are no more frames
            to be returned, it returns `(None, 0)`.

        .. warning::

            Both :meth:`next_frame` and the iterator syntax read the frames
            identically. Consequently, calling one, will also advance the frame
            for the other.
        '''

        cdef int frame_decoded, ret = 0
        cdef Image image
        cdef double t = 0

        if self.eof:
            return self.eof_frame()

        with nogil:
            ret = av_read_frame(self.format_ctx, &self.pkt)
        if ret < 0:
            if ret == AVERROR_EOF:
                self.eof = 1
                self.pkt.data = NULL
                return self.eof_frame()
            raise Exception("Failed to read frame: {}",
                            tcode(emsg(ret, self.msg, sizeof(self.msg))))

        with nogil:
            self.frame = av_frame_alloc()
        if self.frame is NULL:
            raise MemoryError("Failed to alloc frame")

        with nogil:
            ret = avcodec_send_packet(self.codec_ctx, &self.pkt)
            if ret >= 0:
                ret = avcodec_receive_frame(self.codec_ctx, self.frame)
        if ret < 0:
            if ret == AVERROR_EOF:
                self.eof = 1
                self.pkt.data = NULL
                return self.eof_frame()
            raise Exception("Failed to decode image from file")

        self.frame.pts = self.frame.best_effort_timestamp
        if self.frame.pts == AV_NOPTS_VALUE:
            t = 0.
        else:
            t = av_q2d(self.format_ctx.streams[0].time_base) * self.frame.pts

        image = Image(no_create=True)
        image.cython_init(self.frame)

        av_packet_unref(&self.pkt)
        av_frame_free(&self.frame)
        return image, t

    cdef inline object eof_frame(self):
        '''Used to flush the remaining frames until no more cached.
        '''
        cdef int ret = 0
        cdef Image image
        cdef double t = 0
        if self.eof == 2:
            return None, 0

        with nogil:
            self.frame = av_frame_alloc()
        if self.frame is NULL:
            raise MemoryError("Failed to alloc frame")

        with nogil:
            ret = avcodec_send_packet(self.codec_ctx, &self.pkt)
            if ret >= 0:
                ret = avcodec_receive_frame(self.codec_ctx, self.frame)
        if ret < 0:
            self.eof = 2
            av_frame_free(&self.frame)
            return None, 0

        self.frame.pts = self.frame.best_effort_timestamp
        if self.frame.pts == AV_NOPTS_VALUE:
            t = 0.
        else:
            t = av_q2d(self.format_ctx.streams[0].time_base) * self.frame.pts
        image = Image(no_create=True)
        image.cython_init(self.frame)
        av_frame_free(&self.frame)
        return image, t


================================================
FILE: ffpyplayer/player/__init__.py
================================================
'''
FFmpeg based media player
=========================

A FFmpeg based python media player. See :class:`MediaPlayer` for details.
'''

__all__ = ('MediaPlayer', )

from ffpyplayer.player.player import MediaPlayer


================================================
FILE: ffpyplayer/player/clock.pxd
================================================

include '../includes/ffmpeg.pxi'


cdef class Clock(object):
    cdef:
        double pts           # clock base
        double pts_drift     # clock base minus time at which we updated the clock
        double last_updated
        double speed
        int serial           # clock is based on a packet with this serial
        int paused
        int *queue_serial    # pointer to the current packet queue serial, used for obsolete clock detection

    cdef void cInit(Clock self, int *queue_serial) nogil
    cdef double get_clock(Clock self) nogil
    cdef void set_clock_at(Clock self, double pts, int serial, double time) nogil
    cdef void set_clock(Clock self, double pts, int serial) nogil
    cdef void set_clock_speed(Clock self, double speed) nogil
    cdef void sync_clock_to_slave(Clock self, Clock slave) nogil


================================================
FILE: ffpyplayer/player/clock.pyx
================================================
#cython: cdivision=True

__all__ = ('Clock', )

include '../includes/ff_consts.pxi'

cdef extern from "math.h" nogil:
    double NAN
    int isnan(double x)
    double fabs(double x)


cdef class Clock(object):

    def __cinit__(Clock self):
        pass
    cdef void cInit(Clock self, int *queue_serial) nogil:
        self.speed = 1.0
        self.paused = 0
        if queue_serial != NULL:
            self.queue_serial = queue_serial
        else:
            self.queue_serial = &self.serial
        self.set_clock(NAN, -1)

    def __dealloc__(Clock self):
        pass

    cdef double get_clock(Clock self) nogil:
        cdef double time
        if self.queue_serial[0] != self.serial:
            return NAN
        if self.paused:
            return self.pts
        else:
            time = av_gettime_relative() / 1000000.0
            return self.pts_drift + time - (time - self.last_updated) * (1.0 - self.speed)

    cdef void set_clock_at(Clock self, double pts, int serial, double time) nogil:
        self.pts = pts
        self.last_updated = time
        self.pts_drift = self.pts - time
        self.serial = serial

    cdef void set_clock(Clock self, double pts, int serial) nogil:
        cdef double time = av_gettime_relative() / 1000000.0
        self.set_clock_at(pts, serial, time)

    cdef void set_clock_speed(Clock self, double speed) nogil:
        self.set_clock(self.get_clock(), self.serial)
        self.speed = speed

    cdef void sync_clock_to_slave(Clock self, Clock slave) nogil:
        cdef double clock = self.get_clock()
        cdef double slave_clock = slave.get_clock()
        if (not isnan(slave_clock)) and (isnan(clock) or fabs(clock - slave_clock) > AV_NOSYNC_THRESHOLD):
            self.set_clock(slave_clock, slave.serial)


================================================
FILE: ffpyplayer/player/core.pxd
================================================

include '../includes/ffmpeg.pxi'

from ffpyplayer.player.queue cimport FFPacketQueue
from ffpyplayer.player.frame_queue cimport FrameQueue, Frame
from ffpyplayer.player.decoder cimport Decoder
from ffpyplayer.threading cimport MTGenerator, MTThread, MTMutex, MTCond
from ffpyplayer.player.clock cimport Clock
from ffpyplayer.pic cimport Image
from cpython.ref cimport PyObject


cdef struct AudioParams:
    int freq
    int channels
    int64_t channel_layout
    AVSampleFormat fmt
    int frame_size
    int bytes_per_sec


cdef class VideoState(object):
    cdef:
        MTThread read_tid
        const AVInputFormat *iformat
        int abort_request
        int paused
        int last_paused
        int queue_attachments_req
        int seek_req
        int seek_flags
        int64_t seek_pos
        int64_t seek_rel
        int read_pause_return
        AVFormatContext *ic
        int realtime
        int reached_eof
        int eof
        int audio_dev

        Clock audclk
        Clock vidclk
        Clock extclk

        FrameQueue pictq
        FrameQueue subpq
        FrameQueue sampq

        Decoder auddec
        Decoder viddec
        Decoder subdec

        int audio_stream

        int av_sync_type

        double audio_clock
        int audio_clock_serial
        double audio_diff_cum # used for AV difference average computation
        double audio_diff_avg_coef
        double audio_diff_threshold
        int audio_diff_avg_count
        AVStream *audio_st
        FFPacketQueue audioq
        int audio_hw_buf_size

        IF USE_SDL2_MIXER:
            uint8_t chunk_buf[AUDIO_MIN_BUFFER_SIZE]
            Mix_Chunk *chunk
            int audio_count

        uint8_t *audio_buf
        uint8_t *audio_buf1
        unsigned int audio_buf_size # in bytes
        unsigned int audio_buf1_size
        int audio_buf_index # in bytes
        int audio_write_buf_size
        AudioParams audio_src
        IF CONFIG_AVFILTER:
            AudioParams audio_filter_src
        AudioParams audio_tgt
        SwrContext *swr_ctx
        int frame_drops_early
        int frame_drops_late

        int16_t sample_array[SAMPLE_ARRAY_SIZE]
        int sample_array_index

        int subtitle_stream
        AVStream *subtitle_st
        FFPacketQueue subtitleq

        double frame_timer
        double frame_last_returned_time
        double frame_last_filter_delay
        int video_stream
        AVStream *video_st
        FFPacketQueue videoq
        double max_frame_duration      # maximum duration of a frame - above this, we consider the jump a timestamp discontinuity

        IF CONFIG_AVFILTER:
            int vfilter_idx
            AVFilterContext *in_video_filter   # the first filter in the video chain
            AVFilterContext *out_video_filter  # the last filter in the video chain
            AVFilterContext *in_audio_filter   # the first filter in the audio chain
            AVFilterContext *out_audio_filter  # the last filter in the audio chain
            AVFilterContext *split_audio_filter  # the last filter in the audio chain
            AVFilterGraph *agraph              # audio filter graph

        int last_video_stream, last_audio_stream, last_subtitle_stream

        MTCond continue_read_thread
        MTGenerator mt_gen
        VideoSettings *player
        int64_t last_time

        MTCond pause_cond
        double last_clock
        PyObject *self_id

        dict metadata

        object callback
        int is_ref
        AVPixelFormat pix_fmt


    cdef int cInit(self, MTGenerator mt_gen, VideoSettings *player, int paused,
                   AVPixelFormat out_fmt) nogil except 1
    cdef int cquit(VideoState self) nogil except 1
    cdef int request_thread_s(self, char *name, char *msg) nogil except 1
    cdef int request_thread(self, char *name, object msg) nogil except 1
    cdef int request_thread_py(self, object name, object msg) except 1
    cdef object get_out_pix_fmt(self)
    cdef void set_out_pix_fmt(self, AVPixelFormat out_fmt)
    cdef int get_master_sync_type(VideoState self) nogil
    cdef double get_master_clock(VideoState self) nogil except? 0.0
    cdef int check_external_clock_speed(VideoState self) nogil except 1
    cdef int stream_seek(VideoState self, int64_t pos, int64_t rel, int seek_by_bytes, int flush) nogil except 1
    cdef int seek_chapter(VideoState self, int incr, int flush) nogil except 1
    cdef int toggle_pause(VideoState self) nogil except 1
    cdef double compute_target_delay(VideoState self, double delay) nogil except? 0.0
    cdef double vp_duration(VideoState self, Frame *vp, Frame *nextvp) nogil except? 0.0
    cdef void update_video_pts(VideoState self, double pts, int64_t pos, int serial) nogil
    cdef int video_refresh(VideoState self, Image next_image, double *pts, double *remaining_time,
                           int force_refresh) nogil except -1
    cdef int get_video_frame(VideoState self, AVFrame *frame) nogil except 2
    IF CONFIG_AVFILTER:
        cdef int configure_filtergraph(VideoState self, AVFilterGraph *graph, const char *filtergraph,
                                       AVFilterContext *source_ctx, AVFilterContext *sink_ctx) nogil except? 1
        cdef int configure_video_filters(VideoState self, AVFilterGraph *graph,
                                         const char *vfilters, AVFrame *frame,
                                         AVPixelFormat pix_fmt) nogil except? 1
        cdef int configure_audio_filters(VideoState self, const char *afilters,
                                         int force_output_format) nogil except? 1
    cdef int audio_thread(self) nogil except? 1
    cdef int video_thread(VideoState self) nogil except? 1
    cdef int subtitle_thread(VideoState self) nogil except 1
    cdef int subtitle_display(self, AVSubtitle *sub) nogil except 1
    cdef int update_sample_display(VideoState self, int16_t *samples, int samples_size) nogil except 1
    cdef int synchronize_audio(VideoState self, int nb_samples) nogil except -1
    cdef int audio_decode_frame(VideoState self) nogil except? 1
    cdef int sdl_audio_callback(VideoState self, uint8_t *stream, int len) nogil except 1
    cdef inline int open_audio_device(VideoState self, SDL_AudioSpec *wanted_spec, SDL_AudioSpec *spec) nogil except 1
    cdef int audio_open(VideoState self, int64_t wanted_channel_layout, int wanted_nb_channels,
                        int wanted_sample_rate, AudioParams *audio_hw_params) nogil except? 1
    cdef int stream_component_open(VideoState self, int stream_index) nogil except 1
    cdef int stream_component_close(VideoState self, int stream_index) nogil except 1
    cdef int read_thread(VideoState self) nogil except 1
    cdef int stream_has_enough_packets(self, AVStream *st, int stream_id, FFPacketQueue queue) nogil
    cdef inline int failed(VideoState self, int ret, AVFormatContext *ic, AVPacket **pkt) nogil except 1
    cdef int stream_select_program(VideoState self, int requested_program) nogil except 1
    cdef int stream_select_channel(VideoState self, int codec_type, unsigned int requested_stream) nogil except 1
    cdef int stream_cycle_channel(VideoState self, int codec_type) nogil except 1
    cdef int decode_interrupt_cb(VideoState self) nogil


cdef struct VideoSettings:
    unsigned sws_flags
    int loglevel

    const AVInputFormat *file_iformat
    char *input_filename
    int screen_width
    int screen_height
    uint8_t audio_volume
    int muted
    int audio_sdl
    int audio_disable
    int video_disable
    int subtitle_disable
    const char* wanted_stream_spec[<int>AVMEDIA_TYPE_NB]
    int seek_by_bytes
    int show_status
    int av_sync_type
    int64_t start_time
    int64_t duration
    int fast
    int genpts
    int lowres
    int decoder_reorder_pts
    int autoexit
    int loop
    int framedrop
    int infinite_buffer
    char *audio_codec_name
    char *subtitle_codec_name
    char *video_codec_name
    const char **vfilters_list
    int nb_vfilters
    char *afilters
    char *avfilters

    int autorotate
    int find_stream_info
    int filter_threads

    #/* current context */
    int64_t audio_callback_time

    SwsContext *img_convert_ctx
    AVDictionary *format_opts
    AVDictionary *codec_opts
    AVDictionary *resample_opts
    AVDictionary *sws_dict
    AVDictionary *swr_opts


================================================
FILE: ffpyplayer/player/core.pyx
================================================

__all__ = ('VideoState', )

include '../includes/ff_consts.pxi'
include "../includes/inline_funcs.pxi"

from ffpyplayer.player.queue cimport FFPacketQueue
from ffpyplayer.player.frame_queue cimport FrameQueue
from ffpyplayer.threading cimport MTGenerator, MTThread, MTMutex, MTCond, Py_MT, SDL_MT
from ffpyplayer.player.clock cimport Clock
from ffpyplayer.pic cimport Image
from cpython.ref cimport PyObject

import ffpyplayer.tools  # for init
import traceback
from weakref import ref

# android platform detection
from os import environ
cdef int IS_ANDROID = 0

cdef extern from "ffconfig.h":
    bint WIN_IS_DEFINED

cdef extern from "Python.h":
    PyObject *PyUnicode_FromString(const char *u)
    void Py_DECREF(PyObject *)

if "ANDROID_ARGUMENT" in environ:
    import jnius
    IS_ANDROID = 1

cdef extern from "limits.h" nogil:
    int64_t INT64_MAX
    int64_t INT64_MIN

cdef extern from "math.h" nogil:
    double NAN
    int isnan(double x)
    double fabs(double x)
    double exp(double x)
    double log(double x)
    double floor(double x)
    double round(double x)

cdef extern from "errno.h" nogil:
    int ENOSYS
    int ENOMEM
    int EAGAIN

cdef extern from "stdio.h" nogil:
    int snprintf(char *, size_t, const char *, ... )

cdef extern from "stdlib.h" nogil:
    int atoi(const char *)

cdef extern from "inttypes.h" nogil:
    const char *PRId64
    const char *PRIx64

cdef extern from "string.h" nogil:
    void * memset(void *, int, size_t)
    void * memcpy(void *, const void *, size_t)
    char * strchr (char *, int)
    int strcmp(const char *, const char *)
    int strncmp(const char *, const char *, size_t)
    char * strerror(int)
    size_t strlen(const char *)
    char * strcat(char *, const char *)
    char * strcpy(char *, const char *)

ctypedef enum LoopState:
    retry,
    display

# XXX: const
cdef object sub_ass = str(b'ass'), sub_text = str(b'text'), sub_fmt

cdef AVSampleFormat *sample_fmts = [AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE]
cdef int *next_nb_channels = [0, 0, 1, 6, 2, 6, 4, 6]
cdef int *next_sample_rates = [0, 44100, 48000, 96000, 192000]
cdef int next_sample_rates_len = 5

cdef MTMutex audio_mutex = MTMutex(SDL_MT)
cdef int audio_count = 0
cdef SDL_AudioSpec spec_used


cdef void sdl_mixer_callback(int chan, uint8_t *stream, int len, VideoState self) nogil:
    self.sdl_audio_callback(stream, len)


cdef int read_thread_enter(void *obj_id) except? 1 with gil:
    cdef VideoState vs = <VideoState>obj_id
    cdef bytes msg
    try:
        with nogil:
            return vs.read_thread()
    except Exception as e:
        msg = str(e).encode('utf8')
        av_log(NULL, AV_LOG_FATAL, '%s', msg)
        msg = traceback.format_exc().encode('utf8')
        av_log(NULL, AV_LOG_FATAL, '%s', msg)
        vs.request_thread_s('read:error', e)
        if vs.mt_gen.mt_src == Py_MT:
            raise
        else:
            return 1
    finally:
        if IS_ANDROID:
            jnius.detach()

cdef int video_thread_enter(void *obj_id) except? 1 with gil:
    cdef VideoState vs = <VideoState>obj_id
    cdef bytes msg
    try:
        with nogil:
            return vs.video_thread()
    except Exception as e:
        msg = str(e).encode('utf8')
        av_log(NULL, AV_LOG_FATAL, '%s', msg)
        msg = traceback.format_exc().encode('utf8')
        av_log(NULL, AV_LOG_FATAL, '%s', msg)
        vs.request_thread_s('video:error', e)
        if vs.mt_gen.mt_src == Py_MT:
            raise
        else:
            return 1
    finally:
        if IS_ANDROID:
            jnius.detach()

cdef int audio_thread_enter(void *obj_id) except? 1 with gil:
    cdef VideoState vs = <VideoState>obj_id
    cdef bytes msg
    try:
        with nogil:
            return vs.audio_thread()
    except Exception as e:
        msg = str(e).encode('utf8')
        av_log(NULL, AV_LOG_FATAL, '%s', msg)
        msg = traceback.format_exc().encode('utf8')
        av_log(NULL, AV_LOG_FATAL, '%s', msg)
        vs.request_thread_s('audio:error', e)
        if vs.mt_gen.mt_src == Py_MT:
            raise
        else:
            return 1
    finally:
        if IS_ANDROID:
            jnius.detach()

cdef int subtitle_thread_enter(void *obj_id) except? 1 with gil:
    cdef VideoState vs = <VideoState>obj_id
    cdef bytes msg
    try:
        with nogil:
            return vs.subtitle_thread()
    except Exception as e:
        msg = str(e).encode('utf8')
        av_log(NULL, AV_LOG_FATAL, '%s', msg)
        msg = traceback.format_exc().encode('utf8')
        av_log(NULL, AV_LOG_FATAL, '%s', msg)
        vs.request_thread_s('subtitle:error', e)
        if vs.mt_gen.mt_src == Py_MT:
            raise
        else:
            return 1
    finally:
        if IS_ANDROID:
            jnius.detach()

cdef int check_stream_specifier(AVFormatContext *s, AVStream *st, const char *spec) nogil:
    cdef int ret = avformat_match_stream_specifier(s, st, spec)
    if ret < 0:
        av_log(s, AV_LOG_ERROR, b"Invalid stream specifier: %s.\n", spec)
    return ret

cdef AVDictionary *filter_codec_opts(AVDictionary *opts, AVCodecID codec_id,
                                     AVFormatContext *s, AVStream *st, AVCodec *codec) nogil:
    cdef AVDictionary *ret = NULL
    cdef const AVDictionaryEntry *t = NULL
    cdef int flags
    cdef char prefix = 0
    cdef char *p
    cdef const AVClass *cc = avcodec_get_class()
    cdef int res
    if s.oformat != NULL:
        flags = AV_OPT_FLAG_ENCODING_PARAM
    else:
        flags = AV_OPT_FLAG_DECODING_PARAM
    if codec == NULL:
        if s.oformat != NULL:
            codec = avcodec_find_encoder(codec_id)
        else:
            codec = avcodec_find_decoder(codec_id)

    if st.codecpar.codec_type == AVMEDIA_TYPE_VIDEO:
        prefix  = b'v'
        flags  |= AV_OPT_FLAG_VIDEO_PARAM
    elif st.codecpar.codec_type ==  AVMEDIA_TYPE_AUDIO:
        prefix  = b'a'
        flags  |= AV_OPT_FLAG_AUDIO_PARAM
    elif st.codecpar.codec_type ==  AVMEDIA_TYPE_SUBTITLE:
        prefix  = b's'
        flags  |= AV_OPT_FLAG_SUBTITLE_PARAM

    while 1:
        t = av_dict_get(opts, b"", t, AV_DICT_IGNORE_SUFFIX)
        if t == NULL:
            break
        p = strchr(t.key, b':')

        # check stream specification in opt name
        if p != NULL:
            res = check_stream_specifier(s, st, p + 1)
            if res == 1:
                p[0] = 0
            elif res == 0:
                continue
            else:
                return NULL

        if (av_opt_find(&cc, t.key, NULL, flags, AV_OPT_SEARCH_FAKE_OBJ) != NULL or
            (codec != NULL and codec.priv_class != NULL and
             av_opt_find(&codec.priv_class, t.key, NULL, flags, AV_OPT_SEARCH_FAKE_OBJ) != NULL)):
            av_dict_set(&ret, t.key, t.value, 0)
        elif (t.key[0] == prefix and av_opt_find(&cc, t.key + 1, NULL, flags,
                                                 AV_OPT_SEARCH_FAKE_OBJ) != NULL):
            av_dict_set(&ret, t.key + 1, t.value, 0)

        if p != NULL:
            p[0] = b':'
    return ret

cdef int is_realtime(AVFormatContext *s) nogil:
    if((not strcmp(s.iformat.name, b"rtp")) or
       (not strcmp(s.iformat.name, b"rtsp")) or
       not strcmp(s.iformat.name, b"sdp")):
        return 1
    if s.pb and ((not strncmp(s.url, b"rtp:", 4)) or
                 not strncmp(s.url, b"udp:", 4)):
        return 1
    return 0

cdef AVDictionary **setup_find_stream_info_opts(AVFormatContext *s, AVDictionary *codec_opts) nogil:
    cdef int i
    cdef AVDictionary **opts

    if not s.nb_streams:
        return NULL
    opts = <AVDictionary **>av_mallocz(s.nb_streams * sizeof(AVDictionary *))
    if opts == NULL:
        av_log(NULL, AV_LOG_ERROR, b"Could not alloc memory for stream options.\n")
        return NULL
    for i in range(s.nb_streams):
        opts[i] = filter_codec_opts(codec_opts, s.streams[i].codecpar.codec_id,
                                    s, s.streams[i], NULL)
    return opts

cdef double get_rotation(int32_t *displaymatrix) nogil:
    cdef double theta = 0

    if displaymatrix:
        theta = -round(av_display_rotation_get(<int32_t *>displaymatrix))
    theta -= 360 * floor(theta / 360. + 0.9 / 360.)

    if fabs(theta - 90 * round(theta / 90)) > 2:
        av_log(NULL, AV_LOG_WARNING, "Odd rotation angle")

    return theta

cdef bytes py_pat = bytes(b"%7.2f %s:%7.3f fd=%4d aq=%5dKB vq=%5dKB sq=%5dB f=%" + PRId64 + b"/%" + PRId64 + b"   \r")
cdef char *py_pat_str = py_pat
cdef bytes av_str = b"A-V", mv_str = b"M-V", ma_str = b"M-A", empty_str = b"   "
cdef char *str_av = av_str
cdef char *str_mv = mv_str
cdef char *str_ma = ma_str
cdef char *str_empty = empty_str


cdef class VideoState(object):

    def __cinit__(self, object callback=None):
        self.callback = callback
        self.is_ref = isinstance(callback, ref)
        self.self_id = <PyObject*>self
        self.metadata = {
            'src_vid_size': (0, 0), 'sink_vid_size': (0, 0), 'title': '',
            'duration': None, 'frame_rate': (0, 0), 'src_pix_fmt': '',
            'aspect_ratio':(1, 1)}

    cdef int cInit(self, MTGenerator mt_gen, VideoSettings *player, int paused,
                   AVPixelFormat out_fmt) nogil except 1:
        cdef int i
        self.player = player
        self.vfilter_idx = 0
        self.pix_fmt = out_fmt
        self.last_video_stream = self.video_stream = -1
        self.last_audio_stream = self.audio_stream = -1
        self.last_subtitle_stream = self.subtitle_stream = -1

        IF not CONFIG_AVFILTER:
            self.player.img_convert_ctx = NULL
        self.iformat = player.file_iformat
        with gil:
            self.read_tid = None
            self.mt_gen = mt_gen
            self.audioq = FFPacketQueue.__new__(FFPacketQueue, mt_gen)
            self.subtitleq = FFPacketQueue.__new__(FFPacketQueue, mt_gen)
            self.videoq = FFPacketQueue.__new__(FFPacketQueue, mt_gen)

            self.auddec = Decoder.__new__(Decoder)
            self.viddec = Decoder.__new__(Decoder)
            self.subdec = Decoder.__new__(Decoder)

            self.pictq = FrameQueue.__new__(
                FrameQueue, mt_gen, self.videoq,
                VIDEO_PICTURE_QUEUE_SIZE, 1)
            self.subpq = FrameQueue.__new__(
                FrameQueue, mt_gen, self.subtitleq,
                SUBPICTURE_QUEUE_SIZE, 0)
            self.sampq = FrameQueue.__new__(
                FrameQueue, mt_gen, self.audioq,
                SAMPLE_QUEUE_SIZE, 1)
            self.continue_read_thread = MTCond.__new__(MTCond, mt_gen.mt_src)
            self.pause_cond = MTCond.__new__(MTCond, mt_gen.mt_src)

            self.vidclk = Clock.__new__(Clock)
            self.audclk = Clock.__new__(Clock)
            self.extclk = Clock.__new__(Clock)

        self.vidclk.cInit(&self.videoq.serial)
        self.audclk.cInit(&self.audioq.serial)
        self.extclk.cInit(NULL)

        self.audio_clock_serial = -1
        self.av_sync_type = player.av_sync_type
        self.reached_eof = 0
        if paused:
            self.toggle_pause()

        with gil:
            self.read_tid = MTThread.__new__(MTThread, mt_gen.mt_src)
            self.read_tid.create_thread(read_thread_enter, "read_thread", self.self_id)
        return 0

    def __dealloc__(VideoState self):
        with nogil:
            self.cquit()

    cdef int cquit(VideoState self) nogil except 1:
        cdef int i
        # XXX: use a special url_shutdown call to abort parse cleanly
        if self.read_tid is None:
            return 0
        self.abort_request = 1
        self.pause_cond.lock()
        self.pause_cond.cond_signal()
        self.pause_cond.unlock()
        self.read_tid.wait_thread(NULL)

        with gil:
            self.read_tid = None

        if self.audio_stream >= 0:
            self.stream_component_close(self.audio_stream)
        if self.video_stream >= 0:
            self.stream_component_close(self.video_stream)
        if self.subtitle_stream >= 0:
            self.stream_component_close(self.subtitle_stream)

        avformat_close_input(&self.ic)
        self.ic = NULL

        IF not CONFIG_AVFILTER:
            sws_freeContext(self.player.img_convert_ctx)

        return 0

    cdef int request_thread_s(self, char *name, char *msg) nogil except 1:
        if self.callback is None:
            return 0
        with gil:
            return self.request_thread_py(tcode(name), tcode(msg))

    cdef int request_thread(self, char *name, object msg) nogil except 1:
        if self.callback is None:
            return 0
        with gil:
            return self.request_thread_py(tcode(name), msg)

    cdef int request_thread_py(self, object name, object msg) except 1:
        cdef object f
        if self.is_ref:
            f = self.callback()
            if f is not None:
                f(name, msg)
        else:
            self.callback(name, msg)
        return 0

    cdef object get_out_pix_fmt(self):
        return tcode(av_get_pix_fmt_name(self.pix_fmt))

    cdef void set_out_pix_fmt(self, AVPixelFormat out_fmt):
        '''
        Users set the pixel fmt here. If avfilter is enabled, the filter is
        changed when this is changed. If disabled, this method may only
        be called before other methods below, and can not be called once things
        are running.

        After the user changes the pix_fmt, it might take a few frames until they
        receive the new fmt in case pics were already queued.
        '''
        self.pix_fmt = out_fmt

    cdef int decode_interrupt_cb(VideoState self) nogil:
        return self.abort_request

    cdef int get_master_sync_type(VideoState self) nogil:
        if self.av_sync_type == AV_SYNC_VIDEO_MASTER:
            if self.video_st != NULL:
                return AV_SYNC_VIDEO_MASTER
            else:
                return AV_SYNC_AUDIO_MASTER
        elif self.av_sync_type == AV_SYNC_AUDIO_MASTER:
            if self.audio_st != NULL:
                return AV_SYNC_AUDIO_MASTER
            else:
                return AV_SYNC_EXTERNAL_CLOCK
        else:
            return AV_SYNC_EXTERNAL_CLOCK

    # get the current master clock value
    cdef double get_master_clock(VideoState self) nogil except? 0.0:
        cdef double val
        cdef int sync_type = self.get_master_sync_type()

        if sync_type == AV_SYNC_VIDEO_MASTER:
            val = self.vidclk.get_clock()
        elif sync_type == AV_SYNC_AUDIO_MASTER:
            val = self.audclk.get_clock()
        else:
            val = self.extclk.get_clock()
        return val

    cdef int check_external_clock_speed(VideoState self) nogil except 1:
        cdef double speed
        if self.video_stream >= 0 and self.videoq.nb_packets <= EXTERNAL_CLOCK_MIN_FRAMES or\
        self.audio_stream >= 0 and self.audioq.nb_packets <= EXTERNAL_CLOCK_MIN_FRAMES:
            self.extclk.set_clock_speed(FFMAXD(EXTERNAL_CLOCK_SPEED_MIN, self.extclk.speed - EXTERNAL_CLOCK_SPEED_STEP))
        elif (self.video_stream < 0 or self.videoq.nb_packets > EXTERNAL_CLOCK_MAX_FRAMES) and\
        (self.audio_stream < 0 or self.audioq.nb_packets > EXTERNAL_CLOCK_MAX_FRAMES):
            self.extclk.set_clock_speed(FFMIND(EXTERNAL_CLOCK_SPEED_MAX, self.extclk.speed + EXTERNAL_CLOCK_SPEED_STEP))
        else:
            speed = self.extclk.speed
            if speed != 1.0:
                self.extclk.set_clock_speed(speed + EXTERNAL_CLOCK_SPEED_STEP * (1.0 - speed) / fabs(1.0 - speed))
        return 0

    # seek in the stream
    cdef int stream_seek(VideoState self, int64_t pos, int64_t rel, int seek_by_bytes, int flush) nogil except 1:
        if not self.seek_req:
            self.viddec.set_seek_pos(-1)
            self.auddec.set_seek_pos(-1)
            if flush:
                if self.get_master_sync_type() != AV_SYNC_VIDEO_MASTER:
                    self.viddec.set_seek_pos(pos / <double>AV_TIME_BASE)
                else:
                    self.auddec.set_seek_pos(pos / <double>AV_TIME_BASE)
            self.seek_pos = pos
            self.seek_rel = rel
            self.seek_flags &= ~AVSEEK_FLAG_BYTE
            if seek_by_bytes:
                self.seek_flags |= AVSEEK_FLAG_BYTE
            self.seek_req = 1
            self.continue_read_thread.lock()
            self.continue_read_thread.cond_signal()
            self.continue_read_thread.unlock()
            if flush:
                while not self.pictq.is_empty():
                    self.pictq.frame_queue_next()
        return 0

    cdef int seek_chapter(VideoState self, int incr, int flush) nogil except 1:
        cdef int64_t pos = <int64_t>(self.get_master_clock() * AV_TIME_BASE)
        cdef int i
        cdef AVChapter *ch

        if not self.ic.nb_chapters:
            return 0

        # find the current chapter
        for i in range(self.ic.nb_chapters):
            ch = self.ic.chapters[i]
            if av_compare_ts(pos, AV_TIME_BASE_Q, ch.start, ch.time_base) < 0:
                i -= 1
                break

        i += incr
        i = FFMAX(i, 0)
        if i >= self.ic.nb_chapters:
            return 0

        if self.player.loglevel >= AV_LOG_VERBOSE:
            av_log(NULL, AV_LOG_VERBOSE, b"Seeking to chapter %d.\n", i)
        self.stream_seek(av_rescale_q(self.ic.chapters[i].start, self.ic.chapters[i].time_base, AV_TIME_BASE_Q),
                         0, 0, flush)
        return 0

    # pause or resume the video
    cdef int toggle_pause(VideoState self) nogil except 1:
        if self.paused:
            self.frame_timer += av_gettime_relative() / 1000000.0 - self.vidclk.last_updated
            if self.read_pause_return != AVERROR(ENOSYS):
                self.vidclk.paused = 0
            self.vidclk.set_clock(self.vidclk.get_clock(), self.vidclk.serial)
        self.extclk.set_clock(self.extclk.get_clock(), self.extclk.serial)
        self.paused = self.audclk.paused = self.vidclk.paused = self.extclk.paused = not self.paused
        self.pause_cond.lock()
        self.pause_cond.cond_signal()
        self.pause_cond.unlock()
        return 0

    cdef double compute_target_delay(VideoState self, double delay) nogil except? 0.0:
        cdef double sync_threshold, diff = 0

        # update delay to follow master synchronisation source
        if self.get_master_sync_type() != AV_SYNC_VIDEO_MASTER:
            ''' if video is slave, we try to correct big delays by
               duplicating or deleting a frame '''
            diff = self.vidclk.get_clock() - self.get_master_clock()
            ''' skip or repeat frame. We take into account the
               delay to compute the threshold. I still don't know
               if it is the best guess '''
            sync_threshold = FFMAXD(AV_SYNC_THRESHOLD_MIN, FFMIND(AV_SYNC_THRESHOLD_MAX, delay))
            if (not isnan(diff)) and fabs(diff) < self.max_frame_duration:
                if diff <= -sync_threshold:
                    delay = FFMAXD(0, delay + diff)
                elif diff >= sync_threshold and delay > AV_SYNC_FRAMEDUP_THRESHOLD:
                    delay = delay + diff
                elif diff >= sync_threshold:
                    delay = 2 * delay

        if self.player.loglevel >= AV_LOG_TRACE:
            av_log(NULL, AV_LOG_TRACE, b"video: delay=%0.3f A-V=%f\n", delay, -diff)
        return delay

    cdef double vp_duration(VideoState self, Frame *vp, Frame *nextvp) nogil except? 0.0:
        cdef double duration
        if vp.serial == nextvp.serial:
            duration = nextvp.pts - vp.pts
            if isnan(duration) or duration <= 0 or duration > self.max_frame_duration:
                return vp.duration
            else:
                return duration
        else:
            return 0.0

    cdef void update_video_pts(VideoState self, double pts, int64_t pos, int serial) nogil:
        # update current video pts
        self.vidclk.set_clock(pts, serial)
        self.extclk.sync_clock_to_slave(self.vidclk)

    cdef int video_refresh(VideoState self, Image next_image, double *pts, double *remaining_time,
                           int force_refresh) nogil except -1:
        ''' Returns: 1 = paused, 2 = eof, 3 = no pic but remaining_time is set, 0 = valid image
        '''
        cdef Frame *vp
        cdef Frame *vp_temp
        cdef Frame *lastvp
        cdef double time
        cdef Frame *sp
        cdef Frame *sp2
        cdef int redisplay
        cdef LoopState state = retry
        cdef double last_duration, duration, delay
        cdef Frame *nextvp
        cdef int64_t cur_time
        cdef int aqsize, vqsize, sqsize
        cdef double av_diff
        cdef const char *pat
        cdef char *m
        cdef int64_t m2, m3
        cdef int result = 3
        remaining_time[0] = 0.

        self.pictq.alloc_picture()
        if self.paused and not force_refresh:
            return 1  # paused
        if (not self.paused) and self.get_master_sync_type() == AV_SYNC_EXTERNAL_CLOCK and self.realtime:
            self.check_external_clock_speed()

        if self.video_st != NULL:
            redisplay = 0
            if force_refresh:
                redisplay = self.pictq.frame_queue_prev()
            while True:
                if state == retry:
                    if self.pictq.frame_queue_nb_remaining() == 0:
                        if self.reached_eof:
                            return 2  # eof
                        # nothing to do, no picture to display in the queue
                    else:
                        # dequeue the picture
                        lastvp = self.pictq.frame_queue_peek_last()
                        vp = self.pictq.frame_queue_peek()
                        if vp.serial != self.videoq.serial:
                            self.pictq.frame_queue_next()
                            redisplay = 0
                            continue

                        if lastvp.serial != vp.serial and not redisplay:
                            self.frame_timer = av_gettime_relative() / 1000000.0

                        if self.paused:
                            state = display
                            continue

                        # compute nominal last_duration
                        last_duration = self.vp_duration(lastvp, vp)
                        if redisplay:
                            delay = 0.0
                        else:
                            delay = self.compute_target_delay(last_duration)

                        time = av_gettime_relative() / 1000000.0
                        if time < self.frame_timer + delay and not redisplay:
                            remaining_time[0] = self.frame_timer + delay - time

                        self.frame_timer += delay
                        if delay > 0 and time - self.frame_timer > AV_SYNC_THRESHOLD_MAX:
                            self.frame_timer = time

                        self.pictq.cond.lock()
                        if (not redisplay) and not isnan(vp.pts):
                            self.update_video_pts(vp.pts, vp.pos, vp.serial)
                        self.pictq.cond.unlock()

                        if self.pictq.frame_queue_nb_remaining() > 1:
                            nextvp = self.pictq.frame_queue_peek_next()
                            duration = self.vp_duration(vp, nextvp)
                            if (redisplay or self.player.framedrop > 0 or\
                            (self.player.framedrop and self.get_master_sync_type() != AV_SYNC_VIDEO_MASTER))\
                            and time > self.frame_timer + duration:
                                if not redisplay:
                                    self.frame_drops_late += 1
                                self.pictq.frame_queue_next()
                                redisplay = 0
                                continue

                        if self.subtitle_st != NULL:
                            while self.subpq.frame_queue_nb_remaining() > 0:
                                sp = self.subpq.frame_queue_peek()

                                if self.subpq.frame_queue_nb_remaining() > 1:
                                    sp2 = self.subpq.frame_queue_peek_next()
                                else:
                                    sp2 = NULL

                                if sp.serial != self.subtitleq.serial\
                                or (self.vidclk.pts > (sp.pts + <float> sp.sub.end_display_time / 1000.))\
                                or (sp2 != NULL and self.vidclk.pts > (sp2.pts + <float> sp2.sub.start_display_time / 1000.)):
                                    self.subpq.frame_queue_next()
                                else:
                                    break
                        state = display
                        continue
                elif state == display:
                    # display picture
                    if (not self.player.video_disable) and self.video_st != NULL:
                        vp_temp = self.pictq.frame_queue_peek_last()
                        if next_image is not None:
                            next_image.cython_init(vp_temp.frame)
                        pts[0] = vp_temp.pts
                        result = 0
                    self.pictq.frame_queue_next()
                break

        if self.player.show_status:

            cur_time = av_gettime_relative()
            if (not self.last_time) or (cur_time - self.last_time) >= 30000:
                aqsize = 0
                vqsize = 0
                sqsize = 0
                if self.audio_st != NULL:
                    aqsize = self.audioq.size
                if self.video_st != NULL:
                    vqsize = self.videoq.size
                if self.subtitle_st != NULL:
                    sqsize = self.subtitleq.size
                av_diff = 0
                if self.audio_st != NULL and self.video_st != NULL:
                    av_diff = self.audclk.get_clock() - self.vidclk.get_clock()
                elif self.video_st != NULL:
                    av_diff = self.get_master_clock() - self.vidclk.get_clock()
                elif self.audio_st != NULL:
                    av_diff = self.get_master_clock() - self.audclk.get_clock()

                m = (str_av if self.audio_st != NULL and self.video_st != NULL else\
                (str_mv if self.video_st != NULL else (str_ma if self.audio_st != NULL else str_empty)))
                m2 = self.viddec.avctx.pts_correction_num_faulty_dts if self.video_st != NULL else 0
                m3 = self.viddec.avctx.pts_correction_num_faulty_pts if self.video_st != NULL else 0

                if self.player.loglevel >= AV_LOG_INFO:
                    av_log(NULL, AV_LOG_INFO,
                       py_pat_str,
                       self.get_master_clock(),
                       m,
                       av_diff,
                       self.frame_drops_early + self.frame_drops_late,
                       aqsize / 1024,
                       vqsize / 1024,
                       sqsize,
                       m2,
                       m3)
                self.last_time = cur_time
        return result

    cdef int get_video_frame(VideoState self, AVFrame *frame) nogil except 2:
        cdef int got_picture = self.viddec.decoder_decode_frame(frame, NULL, self.player.decoder_reorder_pts)
        cdef double dpts = NAN, diff

        if got_picture < 0:
            return -1

        if got_picture:
            if frame.pts != AV_NOPTS_VALUE:
                dpts = av_q2d(self.video_st.time_base) * frame.pts

            frame.sample_aspect_ratio = av_guess_sample_aspect_ratio(self.ic, self.video_st, frame)
            if self.viddec.is_seeking() or self.auddec.is_seeking():
                if dpts == NAN or dpts >= self.viddec.seek_req_pos:
                    if self.viddec.is_seeking():
                        self.viddec.set_seek_pos(-1)
                else:
                    av_frame_unref(frame)
                    return 0

            if self.player.framedrop > 0 or (self.player.framedrop and\
            self.get_master_sync_type() != AV_SYNC_VIDEO_MASTER):
                if frame.pts != AV_NOPTS_VALUE:
                    diff = dpts - self.get_master_clock()
                    if (not isnan(diff)) and\
                    fabs(diff) < AV_NOSYNC_THRESHOLD and\
                    diff - self.frame_last_filter_delay < 0 and\
                    self.viddec.pkt_serial == self.vidclk.serial and\
                    self.videoq.nb_packets:
                        self.frame_drops_early += 1
                        av_frame_unref(frame)
                        got_picture = 0
        return got_picture

    IF CONFIG_AVFILTER:
        cdef int configure_filtergraph(VideoState self, AVFilterGraph *graph, const char *filtergraph,
                                       AVFilterContext *source_ctx, AVFilterContext *sink_ctx) nogil except? 1:
            cdef int ret = 0, i
            cdef int nb_filters = graph.nb_filters
            cdef AVFilterInOut *outputs = NULL
            cdef AVFilterInOut *inputs = NULL
            cdef AVFilterContext *filt_ctx

            if filtergraph != NULL:
                outputs = avfilter_inout_alloc()
                inputs  = avfilter_inout_alloc()
                if outputs == NULL or inputs == NULL:
                    ret = AVERROR(ENOMEM)

                if not ret:
                    outputs.name       = av_strdup(b"in")
                    outputs.filter_ctx = source_ctx
                    outputs.pad_idx    = 0
                    outputs.next       = NULL

                    inputs.name        = av_strdup(b"out")
                    inputs.filter_ctx  = sink_ctx
                    inputs.pad_idx     = 0
                    inputs.next        = NULL

                    ret = avfilter_graph_parse_ptr(graph, filtergraph, &inputs,
                                                   &outputs, NULL)
                    if ret > 0:
                        ret = 0
            else:
                ret = avfilter_link(source_ctx, 0, sink_ctx, 0)
                if ret > 0:
                    ret = 0
            if not ret:
                for i in range(graph.nb_filters - nb_filters):
                    filt_ctx = graph.filters[i]
                    graph.filters[i] = graph.filters[i + nb_filters]
                    graph.filters[i + nb_filters] = filt_ctx
                ret = avfilter_graph_config(graph, NULL)
            avfilter_inout_free(&outputs)
            avfilter_inout_free(&inputs)
            return ret

        cdef int configure_video_filters(VideoState self, AVFilterGraph *graph,
                                         const char *vfilters, AVFrame *frame,
                                         AVPixelFormat pix_fmt) nogil except? 1:
            cdef char sws_flags_str[512]
            cdef char buffersrc_args[256]
            cdef char scale_args[256]
            cdef char str_flags[64]
            cdef int ret
            cdef int32_t *displaymatrix = NULL
            cdef AVFilterContext *filt_src = NULL
            cdef AVFilterContext *filt_out = NULL
            cdef AVFilterContext *last_filter = NULL
            cdef AVFilterContext *filt_scale = NULL
            cdef AVCodecParameters *codecpar = self.video_st.codecpar
            cdef AVRational fr = av_guess_frame_rate(self.ic, self.video_st, NULL)
            cdef AVPixelFormat *pix_fmts = [pix_fmt, AV_PIX_FMT_NONE]
            cdef double rot
            cdef double theta = 0
            cdef char rotate_buf[64]
            cdef const AVDictionaryEntry *e = NULL
            cdef AVFrameSideData *sd = NULL
            memset(str_flags, 0, sizeof(str_flags))
            memset(sws_flags_str, 0, sizeof(sws_flags_str))
            strcpy(str_flags, b"flags=%")
            strcat(str_flags, PRId64)

            e = av_dict_get(self.player.sws_dict, b"", e, AV_DICT_IGNORE_SUFFIX)
            while e != NULL:
                if not strcmp(e.key, b"sws_flags"):
                    av_strlcatf(sws_flags_str, sizeof(sws_flags_str), b"%s=%s:", b"flags", e.value)
                else:
                    av_strlcatf(sws_flags_str, sizeof(sws_flags_str), b"%s=%s:", e.key, e.value)
                e = av_dict_get(self.player.sws_dict, b"", e, AV_DICT_IGNORE_SUFFIX)
            if strlen(sws_flags_str):
                sws_flags_str[strlen(sws_flags_str) - 1] = b'\0'

            graph.scale_sws_opts = av_strdup(sws_flags_str)

            snprintf(buffersrc_args, sizeof(buffersrc_args),
                     b"video_size=%dx%d:pix_fmt=%d:time_base=%d/%d:pixel_aspect=%d/%d",
                     frame.width, frame.height, frame.format,
                     self.video_st.time_base.num, self.video_st.time_base.den,
                     codecpar.sample_aspect_ratio.num, FFMAX(codecpar.sample_aspect_ratio.den, 1))
            if fr.num and fr.den:
                av_strlcatf(buffersrc_args, sizeof(buffersrc_args), b":frame_rate=%d/%d", fr.num, fr.den)

            ret = avfilter_graph_create_filter(&filt_src, avfilter_get_by_name(b"buffer"),
                                               b"ffpyplayer_buffer", buffersrc_args, NULL, graph)
            if ret < 0:
                return ret

            ret = avfilter_graph_create_filter(&filt_out, avfilter_get_by_name(b"buffersink"),
                                               b"ffpyplayer_buffersink", NULL, NULL, graph)
            if ret < 0:
                return ret

            ret = av_opt_set_int_list(filt_out, b"pix_fmts", pix_fmts,
                                      sizeof(pix_fmts[0]), AV_PIX_FMT_NONE, AV_OPT_SEARCH_CHILDREN)
            if ret < 0:
                return ret

            last_filter = filt_out

            ''' SDL YUV code is not handling odd width/height for some driver
            combinations, therefore we crop the picture to an even width/height. '''
            ret = insert_filt(b"crop", b"floor(in_w/2)*2:floor(in_h/2)*2", graph, &last_filter)
            if ret < 0:
                return ret

            if self.player.autorotate:
                sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DISPLAYMATRIX)
                if sd != NULL:
                    displaymatrix = <int32_t *>sd.data
                if displaymatrix == NULL:
                    displaymatrix = <int32_t *>av_stream_get_side_data(self.video_st, AV_PKT_DATA_DISPLAYMATRIX, NULL)
                theta = get_rotation(displaymatrix)
                if fabs(theta - 90) < 1.0:
                    insert_filt(b"transpose", b"clock", graph, &last_filter)
                elif fabs(theta - 180) < 1.0:
                    insert_filt(b"hflip", NULL, graph, &last_filter)
                    insert_filt(b"vflip", NULL, graph, &last_filter)
                elif fabs(theta - 270) < 1.0:
                    insert_filt(b"transpose", b"cclock", graph, &last_filter)
                elif fabs(theta) > 1.0:
                    snprintf(rotate_buf, sizeof(rotate_buf), b"%f*PI/180", theta)
                    insert_filt(b"rotate", rotate_buf, graph, &last_filter)

            if self.player.screen_height or self.player.screen_width:
                snprintf(scale_args, sizeof(scale_args), b"%d:%d", self.player.screen_width,
                         self.player.screen_height)
                ret = avfilter_graph_create_filter(&filt_scale, avfilter_get_by_name(b"scale"),
                                                   b"ffpyplayer_scale", scale_args,
                                                   NULL, graph)
                if ret < 0:
                    return ret

                ret = avfilter_link(filt_scale, 0, last_filter, 0)
                if ret < 0:
                    return ret
                # this needs to be here in case user provided filter at the input
                ret = self.configure_filtergraph(graph, vfilters, filt_src, filt_scale)
                if ret < 0:
                    return ret
            else:
                ret = self.configure_filtergraph(graph, vfilters, filt_src, last_filter)
                if ret < 0:
                    return ret

            self.in_video_filter  = filt_src
            self.out_video_filter = filt_out
            return ret

        cdef int configure_audio_filters(VideoState self, const char *afilters, int force_output_format) nogil except? 1:
            cdef int *sample_rates = [0, -1]
            cdef int64_t *channel_layouts = [0, -1]
            cdef int *channels = [0, -1]
            cdef AVFilterContext *filt_asrc = NULL
            cdef AVFilterContext *filt_asink = NULL
            cdef char aresample_swr_opts[512]
            cdef const AVDictionaryEntry *e = NULL
            cdef char asrc_args[256]
            cdef char str_flags[64]
            cdef int ret

            memset(str_flags, 0, sizeof(str_flags))
            strcpy(str_flags, b":channel_layout=0x%")
            strcat(str_flags, PRIx64)
            aresample_swr_opts[0] = 0
            avfilter_graph_free(&self.agraph)
            self.agraph = avfilter_graph_alloc()
            if self.agraph == NULL:
                return AVERROR(ENOMEM)
            self.agraph.nb_threads = self.player.filter_threads
            e = av_dict_get(self.player.swr_opts, b"", e, AV_DICT_IGNORE_SUFFIX)
            while e != NULL:
                av_strlcatf(aresample_swr_opts, sizeof(aresample_swr_opts), b"%s=%s:", e.key, e.value)
                e = av_dict_get(self.player.swr_opts, b"", e, AV_DICT_IGNORE_SUFFIX)
            if strlen(aresample_swr_opts):
                aresample_swr_opts[strlen(aresample_swr_opts)-1] = b'\0'
            av_opt_set(self.agraph, b"aresample_swr_opts", aresample_swr_opts, 0)

            ret = snprintf(asrc_args, sizeof(asrc_args),
                           b"sample_rate=%d:sample_fmt=%s:channels=%d:time_base=%d/%d",
                           self.audio_filter_src.freq, av_get_sample_fmt_name(self.audio_filter_src.fmt),
                           self.audio_filter_src.channels, 1, self.audio_filter_src.freq)
            if self.audio_filter_src.channel_layout:
                snprintf(asrc_args + ret, sizeof(asrc_args) - ret, str_flags,
                         self.audio_filter_src.channel_layout)

            ret = avfilter_graph_create_filter(&filt_asrc, avfilter_get_by_name(b"abuffer"),
                                               b"ffpyplayer_abuffer", asrc_args, NULL, self.agraph)
            if ret >= 0:
                ret = avfilter_graph_create_filter(&filt_asink, avfilter_get_by_name(b"abuffersink"),
                                                   b"ffpyplayer_abuffersink", NULL, NULL, self.agraph)
            if ret >= 0:
                ret = av_opt_set_int_list(filt_asink, b"sample_fmts", sample_fmts, sizeof(sample_fmts[0]),
                                          AV_SAMPLE_FMT_NONE, AV_OPT_SEARCH_CHILDREN)
            if ret >= 0:
                ret = av_opt_set_int(filt_asink, b"all_channel_counts", 1, AV_OPT_SEARCH_CHILDREN)
            if ret >= 0 and force_output_format:
                channel_layouts[0] = self.audio_tgt.channel_layout
                channels       [0] = -1 if self.audio_tgt.channel_layout else self.audio_tgt.channels
                sample_rates   [0] = self.audio_tgt.freq
                ret = av_opt_set_int(filt_asink, b"all_channel_counts", 0, AV_OPT_SEARCH_CHILDREN)
                if ret >= 0:
                    ret = av_opt_set_int_list(filt_asink, b"channel_layouts", channel_layouts, sizeof(channel_layouts[0]),
                                              -1, AV_OPT_SEARCH_CHILDREN)
                if ret >= 0:
                    ret = av_opt_set_int_list(filt_asink, b"channel_counts", channels, sizeof(channels[0]),
                                              -1, AV_OPT_SEARCH_CHILDREN)
                if ret >= 0:
                    ret = av_opt_set_int_list(filt_asink, b"sample_rates", sample_rates, sizeof(sample_rates[0]),
                                              -1, AV_OPT_SEARCH_CHILDREN)
            if ret >= 0:
                ret = self.configure_filtergraph(self.agraph, afilters, filt_asrc, filt_asink)
            if ret >= 0:
                self.in_audio_filter  = filt_asrc
                self.out_audio_filter = filt_asink
            if ret < 0:
                avfilter_graph_free(&self.agraph)
            return ret

    cdef int audio_thread(self) nogil except? 1:
        cdef AVFrame *frame = av_frame_alloc()
        cdef Frame *af
        cdef int got_frame = 0
        cdef AVRational tb
        cdef int ret = 0
        cdef char err_msg[256]

        IF CONFIG_AVFILTER:
            cdef int last_serial = -1
            cdef int64_t dec_channel_layout
            cdef int reconfigure
            cdef char buf1[1024]
            cdef char buf2[1024]

        if frame == NULL:
            if self.player.loglevel >= AV_LOG_ERROR:
                av_log(NULL, AV_LOG_ERROR, b'Memory error in audio thread\n')
            self.request_thread_s(b'audio:error', fmt_err(AVERROR(ENOMEM), err_msg, sizeof(err_msg)))
            return AVERROR(ENOMEM)

        while True:
            ret = 0
            got_frame = self.auddec.decoder_decode_frame(frame, NULL, self.player.decoder_reorder_pts)
            if got_frame < 0:
                ret = -1
                break

            if got_frame:
                tb.num = 1
                tb.den = frame.sample_rate

                IF CONFIG_AVFILTER:
                    dec_channel_layout = get_valid_channel_layout(frame.channel_layout, frame.channels)
                    reconfigure = (
                        cmp_audio_fmts(self.audio_filter_src.fmt, self.audio_filter_src.channels,
                                       <AVSampleFormat>frame.format, frame.channels) or
                        self.audio_filter_src.channel_layout != dec_channel_layout or
                        self.audio_filter_src.freq != frame.sample_rate or
                        self.auddec.pkt_serial != last_serial)

                    if reconfigure:
                        av_get_channel_layout_string(buf1, sizeof(buf1), -1, self.audio_filter_src.channel_layout)
                        av_get_channel_layout_string(buf2, sizeof(buf2), -1, dec_channel_layout)
                        if self.player.loglevel >= AV_LOG_DEBUG:
                            av_log(NULL, AV_LOG_DEBUG,
                               b"Audio frame changed from rate:%d ch:%d fmt:%s layout:%s serial:%d to rate:%d ch:%d fmt:%s layout:%s serial:%d\n",
                               self.audio_filter_src.freq, self.audio_filter_src.channels,
                               av_get_sample_fmt_name(self.audio_filter_src.fmt), buf1, last_serial,
                               frame.sample_rate, frame.channels,
                               av_get_sample_fmt_name(<AVSampleFormat>frame.format), buf2, self.auddec.pkt_serial)

                        self.audio_filter_src.fmt = <AVSampleFormat>frame.format
                        self.audio_filter_src.channels = frame.channels
                        self.audio_filter_src.channel_layout = dec_channel_layout
                        self.audio_filter_src.freq = frame.sample_rate
                        last_serial = self.auddec.pkt_serial

                        ret = self.configure_audio_filters(self.player.afilters, 1)
                        if ret < 0:
                            break

                    ret = av_buffersrc_add_frame(self.in_audio_filter, frame)
                    if ret < 0:
                        break

                    ret = av_buffersink_get_frame_flags(self.out_audio_filter, frame, 0)
                    while ret >= 0:
                        tb = av_buffersink_get_time_base(self.out_audio_filter)
                        af = self.sampq.frame_queue_peek_writable()
                        if af == NULL:
                            avfilter_graph_free(&self.agraph)
                            av_frame_free(&frame)
                            if self.audioq.abort_request:
                                self.request_thread_s(b'audio:exit', b'')
                            else:
                                if self.player.loglevel >= AV_LOG_ERROR:
                                    av_log(NULL, AV_LOG_ERROR, b'Error getting writable audio frame\n')
                                self.request_thread_s(b'audio:error', fmt_err(ret, err_msg, sizeof(err_msg)))
                            return ret

                        af.pts = NAN if frame.pts == AV_NOPTS_VALUE else frame.pts * av_q2d(tb)
                        af.pos = frame.pkt_pos
                        af.serial = self.auddec.pkt_serial
                        tb.num = frame.nb_samples
                        tb.den = frame.sample_rate
                        af.duration = av_q2d(tb)

                        av_frame_move_ref(af.frame, frame)
                        self.sampq.frame_queue_push()

                        if self.audioq.serial != self.auddec.pkt_serial:
                            break
                        ret = av_buffersink_get_frame_flags(self.out_audio_filter, frame, 0)

                    if ret == AVERROR_EOF:
                        self.auddec.finished = self.auddec.pkt_serial
                ELSE:
                    af = self.sampq.frame_queue_peek_writable()
                    if af == NULL:
                       break

                    af.pts = NAN if frame.pts == AV_NOPTS_VALUE else frame.pts * av_q2d(tb)
                    af.pos = frame.pkt_pos
                    af.serial = self.auddec.pkt_serial
                    tb.num = frame.nb_samples
                    tb.den = frame.sample_rate
                    af.duration = av_q2d(tb)

                    av_frame_move_ref(af.frame, frame)
                    self.sampq.frame_queue_push()

            if ret < 0 and ret != AVERROR(EAGAIN) and ret != AVERROR_EOF:
                break

        IF CONFIG_AVFILTER:
            avfilter_graph_free(&self.agraph)
        av_frame_free(&frame)
        if ret and not self.audioq.abort_request:
            if ret != -1:
                if self.player.loglevel >= AV_LOG_ERROR:
                    av_log(NULL, AV_LOG_ERROR, b'Audio thread error: %s\n', fmt_err(ret, err_msg, sizeof(err_msg)))
                self.request_thread_s(b'audio:error', fmt_err(ret, err_msg, sizeof(err_msg)))
            else:
                if self.player.loglevel >= AV_LOG_ERROR:
                    av_log(NULL, AV_LOG_ERROR, b'Audio thread error\n')
                self.request_thread_s(b'audio:error', b'')
        else:
            self.request_thread_s(b'audio:exit', b'')
        return ret

    cdef int video_thread(VideoState self) nogil except? 1:
        cdef AVFrame *frame = av_frame_alloc()
        cdef double pts, duration
        cdef int ret
        cdef AVRational tb = self.video_st.time_base
        cdef AVRational tb_temp
        cdef AVRational frame_rate = av_guess_frame_rate(self.ic, self.video_st, NULL)
        cdef AVRational sar
        cdef char err_msg[256]
        cdef AVPixelFormat last_out_fmt = self.pix_fmt
        IF CONFIG_AVFILTER:
            cdef AVFilterGraph *graph = NULL
            cdef AVFilterContext *filt_out = NULL
            cdef AVFilterContext *filt_in = NULL
            cdef int last_w = 0
            cdef int last_h = 0
            cdef int last_scr_h = 0, last_scr_w = 0
            cdef AVPixelFormat last_format = <AVPixelFormat>-2
            cdef AVPixelFormat last_out_fmt_temp
            cdef int last_serial = -1
            cdef int last_vfilter_idx = self.vfilter_idx

        if frame == NULL:
            if self.player.loglevel >= AV_LOG_ERROR:
                av_log(NULL, AV_LOG_ERROR, b'Memory Error in video thread\n')
            self.request_thread_s(b'video:error', fmt_err(AVERROR(ENOMEM), err_msg, sizeof(err_msg)))
            return AVERROR(ENOMEM)

        while 1:
            av_frame_unref(frame)
            ret = self.get_video_frame(frame)
            if ret < 0:
                break
            if not ret:
                continue

            IF CONFIG_AVFILTER:
                last_out_fmt_temp = self.pix_fmt
                if (last_w != frame.width or last_h != frame.height
                    or last_scr_h != self.player.screen_height
                    or last_scr_w != self.player.screen_width
                    or last_format != frame.format or last_serial != self.viddec.pkt_serial
                    or last_vfilter_idx != self.vfilter_idx
                    or last_out_fmt != last_out_fmt_temp):

                    if self.player.loglevel >= AV_LOG_DEBUG:
                        av_log(NULL, AV_LOG_DEBUG,
                           b"Video frame changed from size:%dx%d format:%s serial:%d to size:%dx%d format:%s serial:%d\n",
                           last_w, last_h,
                           <const char *>av_x_if_null(av_get_pix_fmt_name(last_format), b"none"), last_serial,
                           frame.width, frame.height,
                           <const char *>av_x_if_null(av_get_pix_fmt_name(<AVPixelFormat>frame.format), b"none"),
                           self.viddec.pkt_serial)

                    avfilter_graph_free(&graph)
                    graph = avfilter_graph_alloc()
                    if graph == NULL:
                        ret = AVERROR(ENOMEM)
                        break

                    graph.nb_threads = self.player.filter_threads
                    ret = self.configure_video_filters(
                        graph, self.player.vfilters_list[self.vfilter_idx] if self.player.vfilters_list != NULL else NULL,
                        frame, last_out_fmt_temp)
                    if ret < 0:
                        break

                    filt_in  = self.in_video_filter
                    filt_out = self.out_video_filter
                    last_w = frame.width
                    last_h = frame.height
                    last_scr_h = self.player.screen_height
                    last_scr_w = self.player.screen_width
                    last_format = <AVPixelFormat>frame.format
                    last_out_fmt = last_out_fmt_temp
                    last_serial = self.viddec.pkt_serial
                    frame_rate = av_buffersink_get_frame_rate(filt_out)
                    last_vfilter_idx = self.vfilter_idx
                    sar = <AVRational>frame.sample_aspect_ratio
                    with gil:
                        self.metadata['src_vid_size'] = (last_w, last_h)
                        self.metadata['aspect_ratio'] = (sar.num, sar.den)
                        self.metadata['frame_rate'] = (frame_rate.num, frame_rate.den)

                ret = av_buffersrc_add_frame(filt_in, frame)
                if ret < 0:
                    break

                while ret >= 0:
                    self.frame_last_returned_time = av_gettime_relative() / 1000000.0
                    ret = av_buffersink_get_frame_flags(filt_out, frame, 0)
                    if ret < 0:
                        if ret == AVERROR_EOF:
                            self.viddec.finished = self.viddec.pkt_serial
                        ret = 0
                        break

                    self.frame_last_filter_delay = av_gettime_relative() / 1000000.0 - self.frame_last_returned_time
                    if fabs(self.frame_last_filter_delay) > AV_NOSYNC_THRESHOLD / 10.0:
                        self.frame_last_filter_delay = 0

                    tb = av_buffersink_get_time_base(filt_out)
                    duration = 0
                    if frame_rate.num and frame_rate.den:
                        tb_temp.num = frame_rate.den
                        tb_temp.den = frame_rate.num
                        duration = av_q2d(tb_temp)
                    if frame.pts == AV_NOPTS_VALUE:
                        pts = NAN
                    else:
                        pts = frame.pts * av_q2d(tb)
                    ret = self.pictq.queue_picture(frame, pts, duration, frame.pkt_pos,
                                             self.viddec.pkt_serial, last_out_fmt, &self.abort_request, self.player)
                    #av_frame_unref(frame)
                    if self.videoq.serial != self.viddec.pkt_serial:
                        break
            ELSE:
                duration = 0
                if frame_rate.num and frame_rate.den:
                    tb_temp.num = frame_rate.den
                    tb_temp.den = frame_rate.num
                    duration = av_q2d(tb_temp)
                if frame.pts == AV_NOPTS_VALUE:
                    pts = NAN
                else:
                    pts = frame.pts * av_q2d(tb)
                sar = <AVRational>frame.sample_aspect_ratio
                with gil:
                    self.metadata['src_vid_size'] = (frame.width, frame.height)
                    self.metadata['aspect_ratio'] = (sar.num, sar.den)
                    self.metadata['frame_rate'] = (frame_rate.num, frame_rate.den)
                ret = self.pictq.queue_picture(frame, pts, duration, frame.pkt_pos,
                                         self.viddec.pkt_serial, last_out_fmt, &self.abort_request,
                                         self.player)
                #av_frame_unref(frame)

            if ret < 0:
                break

        IF CONFIG_AVFILTER:
            avfilter_graph_free(&graph)
        av_frame_free(&frame)

        if ret and not self.videoq.abort_request:
            if ret != -1:
                if self.player.loglevel >= AV_LOG_ERROR:
                    av_log(NULL, AV_LOG_ERROR, b'Video thread error: %s\n', fmt_err(ret, err_msg, sizeof(err_msg)))
                self.request_thread_s(b'video:error', fmt_err(ret, err_msg, sizeof(err_msg)))
            else:
                if self.player.loglevel >= AV_LOG_ERROR:
                    av_log(NULL, AV_LOG_ERROR, b'Video thread error\n')
                self.request_thread_s(b'video:error', b'')
        else:
            self.request_thread_s(b'video:exit', b'')
        return 0

    cdef int subtitle_thread(VideoState self) nogil except 1:
        cdef Frame *sp
        cdef int got_subtitle
        cdef double pts
        cdef int i, j, ret = 0
        cdef int r, g, b, y, u, v, a
        cdef char err_msg[256]

        while 1:
            sp = self.subpq.frame_queue_peek_writable()
            if sp == NULL:
                ret = -1
                break

            got_subtitle = self.subdec.decoder_decode_frame(NULL, &sp.sub, self.player.decoder_reorder_pts)
            if got_subtitle < 0:
                ret = -1
                break
            pts = 0

#             if got_subtitle and sp.sub.format == 0:
#                 if sp.sub.pts != AV_NOPTS_VALUE:
#                     pts = sp.sub.pts / <double>AV_TIME_BASE
#                 sp.pts = pts
#                 sp.serial = self.subdec.pkt_serial
#
#                 for i in range(sp.sub.num_rects):
#                     for j in range(sp.sub.rects[i].nb_colors):
#                         sp.sub.rects[i]
#
#                 self.subpq.frame_queue_push()
            if got_subtitle:
                if sp.sub.format != 0:
                    self.subtitle_display(&sp.sub)
                avsubtitle_free(&sp.sub)

        if ret and not self.subtitleq.abort_request:
            if ret != -1:
                if self.player.loglevel >= AV_LOG_ERROR:
                    av_log(NULL, AV_LOG_ERROR, b'Subtitle thread error: %s\n', fmt_err(ret, err_msg, sizeof(err_msg)))
                self.request_thread_s(b'subtitle:error', fmt_err(ret, err_msg, sizeof(err_msg)))
            else:
                if self.player.loglevel >= AV_LOG_ERROR:
                    av_log(NULL, AV_LOG_ERROR, b'Subtitle thread error\n')
                self.request_thread_s(b'subtitle:error', b'')
        else:
            self.request_thread_s(b'subtitle:exit', b'')
        return 0

    cdef int subtitle_display(self, AVSubtitle *sub) nogil except 1:
        cdef PyObject *buff
        cdef int i
        cdef double pts
        with gil:
            for i in range(sub.num_rects):
                if sub.rects[i].type == SUBTITLE_ASS:
                    buff = PyUnicode_FromString(sub.rects[i].ass)
                    sub_fmt = sub_ass
                elif sub.rects[i].type == SUBTITLE_TEXT:
                    buff = PyUnicode_FromString(sub.rects[i].text)
                    sub_fmt = sub_text
                else:
                    buff = NULL
                    continue
                if sub.pts != AV_NOPTS_VALUE:
                    pts = sub.pts / <double>AV_TIME_BASE
                else:
                    pts = 0.0
                self.request_thread(b'display_sub', (
                    <object>buff, sub_fmt, pts, sub.start_display_time / 1000.,
                    sub.end_display_time / 1000.))
                if buff != NULL:
                    Py_DECREF(buff)
        return 0

    # copy samples for viewing in editor window
    cdef int update_sample_display(VideoState self, int16_t *samples, int samples_size) nogil except 1:
        cdef int size, len

        size = samples_size // sizeof(short)
        while size > 0:
            len = SAMPLE_ARRAY_SIZE - self.sample_array_index
            if len > size:
                len = size
            memcpy(&self.sample_array[self.sample_array_index], samples, len * sizeof(short))
            samples += len
            self.sample_array_index += len
            if self.sample_array_index >= SAMPLE_ARRAY_SIZE:
                self.sample_array_index = 0
            size -= len
        return 0

    ''' return the wanted number of samples to get better sync if sync_type is video
    or external master clock '''
    cdef int synchronize_audio(VideoState self, int nb_samples) nogil except -1:
        cdef int wanted_nb_samples = nb_samples
        cdef double diff, avg_diff
        cdef int min_nb_samples, max_nb_samples

        # if not master, then we try to remove or add samples to correct the clock
        if self.get_master_sync_type() != AV_SYNC_AUDIO_MASTER:
            diff = self.audclk.get_clock() - self.get_master_clock()

            if (not isnan(diff)) and fabs(diff) < AV_NOSYNC_THRESHOLD:
                self.audio_diff_cum = diff + self.audio_diff_avg_coef * self.audio_diff_cum
                if self.audio_diff_avg_count < AUDIO_DIFF_AVG_NB:
                    # not enough measures to have a correct estimate
                    self.audio_diff_avg_count += 1
                else:
                    # estimate the A-V difference
                    avg_diff = self.audio_diff_cum * (1.0 - self.audio_diff_avg_coef)
                    if fabs(avg_diff) >= self.audio_diff_threshold:
                        wanted_nb_samples = nb_samples + <int>(diff * self.audio_src.freq)
                        min_nb_samples = nb_samples * (100 - SAMPLE_CORRECTION_PERCENT_MAX) // 100
                        max_nb_samples = nb_samples * (100 + SAMPLE_CORRECTION_PERCENT_MAX) // 100
                        wanted_nb_samples = av_clip(wanted_nb_samples, min_nb_samples, max_nb_samples)
                    if self.player.loglevel >= AV_LOG_TRACE:
                        av_log(NULL, AV_LOG_TRACE, b"diff=%f adiff=%f sample_diff=%d apts=%0.3f %f\n",
                           diff, avg_diff, wanted_nb_samples - nb_samples,
                           self.audio_clock, self.audio_diff_threshold)
            else:
                ''' too big difference : may be initial PTS errors, so
                   reset A-V filter '''
                self.audio_diff_avg_count = 0
                self.audio_diff_cum       = 0
        return wanted_nb_samples


    '''
       Decode one audio frame and return its uncompressed size.

       The processed audio frame is decoded, converted if required, and
       stored in is->audio_buf, with size in bytes given by the return
       value.
    '''
    cdef int audio_decode_frame(VideoState self) nogil except? 1:
        cdef int data_size, resampled_data_size
        cdef int64_t dec_channel_layout
        cdef double audio_clock0
        cdef int wanted_nb_samples

        cdef const uint8_t **input
        cdef uint8_t **out
        cdef int out_count
        cdef int out_size
        cdef int len2
        cdef Frame *af

        if self.paused:
            return -1

        while True:
            if WIN_IS_DEFINED:
                while self.sampq.frame_queue_nb_remaining() == 0:
                    if ((av_gettime_relative() - self.player.audio_callback_time) >
                        1000000LL * self.audio_hw_buf_size / self.audio_tgt.bytes_per_sec / 2.):
                        return -1
                    av_usleep(1000)

            af = self.sampq.frame_queue_peek_readable()
            if af == NULL:
                return -1

            self.sampq.frame_queue_next()
            if af.serial == self.audioq.serial:
                break

        data_size = av_samples_get_buffer_size(NULL, af.frame.channels,
                                               af.frame.nb_samples, <AVSampleFormat>af.frame.format, 1)

        if af.frame.channel_layout and af.frame.channels ==\
        av_get_channel_layout_nb_channels(af.frame.channel_layout):
            dec_channel_layout = af.frame.channel_layout
        else:
            dec_channel_layout = av_get_default_channel_layout(af.frame.channels)
        wanted_nb_samples = self.synchronize_audio(af.frame.nb_samples)

        if (af.frame.format != self.audio_src.fmt or
            dec_channel_layout != self.audio_src.channel_layout or
            af.frame.sample_rate != self.audio_src.freq or
            (wanted_nb_samples != af.frame.nb_samples and self.swr_ctx == NULL)):
            swr_free(&self.swr_ctx)
            self.swr_ctx = swr_alloc_set_opts(NULL, self.audio_tgt.channel_layout,
                                              self.audio_tgt.fmt, self.audio_tgt.freq,
                                              dec_channel_layout, <AVSampleFormat>af.frame.format,
                                              af.frame.sample_rate, 0, NULL)
            if self.swr_ctx == NULL or swr_init(self.swr_ctx) < 0:
                if self.player.loglevel >= AV_LOG_ERROR:
                    av_log(NULL, AV_LOG_ERROR, b"Cannot create sample rate converter for \
                conversion of %d Hz %s %d channels to %d Hz %s %d channels!\n",\
                af.frame.sample_rate, av_get_sample_fmt_name(<AVSampleFormat>af.frame.format),\
                af.frame.channels, self.audio_tgt.freq,\
                av_get_sample_fmt_name(self.audio_tgt.fmt), self.audio_tgt.channels)
                return -1
            self.audio_src.channel_layout = dec_channel_layout
            self.audio_src.channels = af.frame.channels
            self.audio_src.freq = af.frame.sample_rate
            self.audio_src.fmt = <AVSampleFormat>af.frame.format

        if self.swr_ctx != NULL:
            input = <const uint8_t **>af.frame.extended_data
            out = &self.audio_buf1
            out_count = <int64_t>wanted_nb_samples * self.audio_tgt.freq // af.frame.sample_rate + 256
            out_size  = av_samples_get_buffer_size(NULL, self.audio_tgt.channels, out_count, self.audio_tgt.fmt, 0)
            if out_size < 0:
                if self.player.loglevel >= AV_LOG_ERROR:
                    av_log(NULL, AV_LOG_ERROR, b"av_samples_get_buffer_size() failed\n")
                return -1

            if wanted_nb_samples != af.frame.nb_samples:
                if swr_set_compensation(self.swr_ctx, (wanted_nb_samples - af.frame.nb_samples)\
                * self.audio_tgt.freq // af.frame.sample_rate, wanted_nb_samples *\
                self.audio_tgt.freq // af.frame.sample_rate) < 0:
                    if self.player.loglevel >= AV_LOG_ERROR:
                        av_log(NULL, AV_LOG_ERROR, b"swr_set_compensation() failed\n")
                    return -1

            av_fast_malloc(&self.audio_buf1, &self.audio_buf1_size, out_size)
            if self.audio_buf1 == NULL:
                return AVERROR(ENOMEM)
            len2 = swr_convert(self.swr_ctx, out, out_count, input, af.frame.nb_samples)
            if len2 < 0:
                if self.player.loglevel >= AV_LOG_ERROR:
                    av_log(NULL, AV_LOG_ERROR, b"swr_convert() failed\n")
                return -1

            if len2 == out_count:
                if self.player.loglevel >= AV_LOG_WARNING:
                    av_log(NULL, AV_LOG_WARNING, b"audio buffer is probably too small\n")
                if swr_init(self.swr_ctx) < 0:
                    swr_free(&self.swr_ctx)
            self.audio_buf = self.audio_buf1
            resampled_data_size = len2 * self.audio_tgt.channels * av_get_bytes_per_sample(self.audio_tgt.fmt)
        else:
            self.audio_buf = af.frame.data[0]
            resampled_data_size = data_size

        audio_clock0 = self.audio_clock
        # update the audio clock with the pts
        if not isnan(af.pts):
            self.audio_clock = af.pts + <double>af.frame.nb_samples / af.frame.sample_rate
        else:
            self.audio_clock = NAN
        self.audio_clock_serial = af.serial
#         IF DEBUG:
#             printf(b"audio: delay=%0.3f clock=%0.3f clock0=%0.3f\n",
#                    self.audio_clock - self.last_clock,
#                    self.audio_clock, audio_clock0)
#             self.last_clock = is->audio_clock;
        if self.auddec.is_seeking() or self.viddec.is_seeking():
            if self.audio_clock == NAN or self.audio_clock >= self.auddec.seek_req_pos:
                if self.auddec.is_seeking():
                    self.auddec.set_seek_pos(-1)
            else:
                return -1
        return resampled_data_size

    # prepare a new audio buffer
    cdef int sdl_audio_callback(VideoState self, uint8_t *stream, int len) nogil except 1:
        cdef int audio_size, len1
        self.player.audio_callback_time = av_gettime_relative()

        memset(stream, 0, len)
        while len > 0:
            if self.audio_buf_index >= self.audio_buf_size:
                audio_size = self.audio_decode_frame()

                if audio_size < 0:
                    # if error, just output silence
                    self.audio_buf = NULL
                    self.audio_buf_size = SDL_AUDIO_MIN_BUFFER_SIZE // self.audio_tgt.frame_size * self.audio_tgt.frame_size
                else:
#                     if self.show_mode != SHOW_MODE_VIDEO:
#                         self.update_sample_display(<int16_t *>self.audio_buf, audio_size)
                    self.audio_buf_size = audio_size
                self.audio_buf_index = 0
            len1 = self.audio_buf_size - self.audio_buf_index
            if len1 > len:
                len1 = len

            if USE_SDL2_MIXER:
                if self.audio_buf:
                    memcpy(stream, <uint8_t *>self.audio_buf + self.audio_buf_index, len1)
            elif not self.player.muted and self.player.audio_volume == SDL_MIX_MAXVOLUME:
                if self.audio_buf:
                    memcpy(stream, <uint8_t *>self.audio_buf + self.audio_buf_index, len1)
                else:
                    memset(stream, 0, len1)
            else:
                memset(stream, 0, len1)
                if not self.player.muted and self.audio_buf:
                    SDL_MixAudioFormat(stream, <uint8_t *>self.audio_buf + self.audio_buf_index,
                                       AUDIO_S16SYS, len1, self.player.audio_volume)

            len -= len1
            stream += len1
            self.audio_buf_index += len1

        self.audio_write_buf_size = self.audio_buf_size - self.audio_buf_index
        # Let's assume the audio driver that is used by SDL has two periods.
        if not isnan(self.audio_clock):
            self.audclk.set_clock_at(
                self.audio_clock - <double>(2 * self.audio_hw_buf_size + self.audio_write_buf_size) /
                self.audio_tgt.bytes_per_sec, self.audio_clock_serial, self.player.audio_callback_time / 1000000.0)
            self.extclk.sync_clock_to_slave(self.audclk)
        return 0

    cdef inline int open_audio_device(VideoState self, SDL_AudioSpec *wanted_spec,
                                      SDL_AudioSpec *spec) nogil except 1:
        cdef int error = 0
        cdef int channels
        global audio_count, spec_used

        IF USE_SDL2_MIXER:
            self.audio_count = -1
            audio_mutex.lock()
            if audio_count:
                memcpy(spec, &spec_used, sizeof(spec_used))
            else:
                memcpy(spec, wanted_spec, sizeof(spec_used))
                spec.size = spec.samples * 2 * spec.channels
                error = Mix_OpenAudio(spec.freq, AUDIO_S16SYS, spec.channels, spec.size)
                if not error:
                    if not Mix_QuerySpec(&spec.freq, &spec.format, &channels):
                        error = -1
                    spec.channels = channels

                if not error:
                    spec.samples = FFMAX(AUDIO_MIN_BUFFER_SIZE, 2 << av_log2(spec.freq // AUDIO_MAX_CALLBACKS_PER_SEC))
                    spec.size = spec.samples * 2 * spec.channels
                    memcpy(&spec_used, spec, sizeof(spec_used))

            if not error:
                self.audio_count = audio_count
                audio_count += 1
                if Mix_AllocateChannels(-1) < audio_count:
                    Mix_AllocateChannels(audio_count)
            audio_mutex.unlock()
            if error:
                return error

            memset(self.chunk_buf, 0, sizeof(self.chunk_buf))
            self.chunk = Mix_QuickLoad_RAW(self.chunk_buf, sizeof(self.chunk_buf) // sizeof(uint8_t))
            if self.chunk == NULL:
                return -1

            self.audio_dev = Mix_PlayChannel(-1, self.chunk, -1)
            if self.audio_dev == -1:
                return -1

            if not Mix_RegisterEffect(self.audio_dev, <void (*)(int, void *, int, void *) noexcept nogil>sdl_mixer_callback, NULL, self.self_id):
                return -1

        ELSE:
            self.audio_dev = <int>SDL_OpenAudioDevice(NULL, 0, wanted_spec, spec, SDL_AUDIO_ALLOW_ANY_CHANGE)
            error = 0 if self.audio_dev else -1
        return error

    cdef int audio_open(VideoState self, int64_t wanted_channel_layout, int wanted_nb_channels,
                        int wanted_sample_rate, AudioParams *audio_hw_params) nogil except? 1:
        cdef SDL_AudioSpec wanted_spec, spec
        cdef const char *env
        cdef int error
        cdef int next_sample_rate_idx = next_sample_rates_len - 1

        env = SDL_getenv(b"SDL_AUDIO_CHANNELS")
        if env != NULL:
            wanted_nb_channels = atoi(env)
            wanted_channel_layout = av_get_default_channel_layout(wanted_nb_channels)
        if ((not wanted_channel_layout) or wanted_nb_channels !=
            av_get_channel_layout_nb_channels(wanted_channel_layout)):
            wanted_channel_layout = av_get_default_channel_layout(wanted_nb_channels)
            wanted_channel_layout &= ~AV_CH_LAYOUT_STEREO_DOWNMIX

        wanted_nb_channels = av_get_channel_layout_nb_channels(wanted_channel_layout)
        wanted_spec.channels = wanted_nb_channels
        wanted_spec.freq = wanted_sample_rate
        if wanted_spec.freq <= 0 or wanted_spec.channels <= 0:
            if self.player.loglevel >= AV_LOG_ERROR:
                av_log(NULL, AV_LOG_ERROR, b"Invalid sample rate or channel count!\n")
            return -1

        while next_sample_rate_idx and next_sample_rates[next_sample_rate_idx] >= wanted_spec.freq:
            next_sample_rate_idx -= 1

        wanted_spec.format = AUDIO_S16SYS
        wanted_spec.silence = 0
        wanted_spec.samples = FFMAX(AUDIO_MIN_BUFFER_SIZE, 2 << av_log2(wanted_spec.freq // AUDIO_MAX_CALLBACKS_PER_SEC))
        wanted_spec.callback = <void (*)(void *, uint8_t *, int) noexcept nogil>self.sdl_audio_callback
        wanted_spec.userdata = self.self_id

        error = self.open_audio_device(&wanted_spec, &spec)
        while error:
            if self.player.loglevel >= AV_LOG_WARNING:
                av_log(NULL, AV_LOG_WARNING, b"SDL_OpenAudio (%d channels, %d Hz): %s\n",
                    wanted_spec.channels, wanted_spec.freq, SDL_GetError())

            wanted_spec.channels = next_nb_channels[FFMIN(7, wanted_spec.channels)]
            if not wanted_spec.channels:
                wanted_spec.freq = next_sample_rates[next_sample_rate_idx]
                next_sample_rate_idx -= 1
                wanted_spec.channels = wanted_nb_channels
                if not wanted_spec.freq:
                    if self.player.loglevel >= AV_LOG_ERROR:
                        av_log(NULL, AV_LOG_ERROR,
                           b"No more channel combinations to try, audio open failed\n")
                    return -1
            wanted_channel_layout = av_get_default_channel_layout(wanted_spec.channels)

            error = self.open_audio_device(&wanted_spec, &spec)

        if spec.format != AUDIO_S16SYS:
            if self.player.loglevel >= AV_LOG_ERROR:
                av_log(NULL, AV_LOG_ERROR,
                   b"SDL advised audio format %d is not supported!\n", spec.format)
            return -1

        if spec.channels != wanted_spec.channels:
            wanted_channel_layout = av_get_default_channel_layout(spec.channels)
            if not wanted_channel_layout:
                if self.player.loglevel >= AV_LOG_ERROR:
                    av_log(NULL, AV_LOG_ERROR,
                       b"SDL advised channel count %d is not supported!\n", spec.channels)
                return -1

        audio_hw_params.fmt = AV_SAMPLE_FMT_S16
        audio_hw_params.freq = spec.freq
        audio_hw_params.channel_layout = wanted_channel_layout
        audio_hw_params.channels =  spec.channels
        audio_hw_params.frame_size = av_samples_get_buffer_size(
            NULL, audio_hw_params.channels, 1, audio_hw_params.fmt, 1)
        audio_hw_params.bytes_per_sec = av_samples_get_buffer_size(
            NULL, audio_hw_params.channels, audio_hw_params.freq, audio_hw_params.fmt, 1)

        if audio_hw_params.bytes_per_sec <= 0 or audio_hw_params.frame_size <= 0:
            if self.player.loglevel >= AV_LOG_ERROR:
                av_log(NULL, AV_LOG_ERROR, b"av_samples_get_buffer_size failed\n")
            return -1
        if self.player.loglevel >= AV_LOG_DEBUG:
            av_log(NULL, AV_LOG_DEBUG,
               b"openaudio with fmt=%u freq=%u channel_layout=%u channels=%hhu\n",
               audio_hw_params.fmt, audio_hw_params.freq,
               audio_hw_params.channel_layout, audio_hw_params.channels)

        return spec.size

    # open a given stream. Return 0 if OK
    cdef int stream_component_open(VideoState self, int stream_index) nogil except 1:
        cdef AVFormatContext *ic = self.ic
        cdef AVCodecContext *avctx
        cdef const AVCodec *codec
        cdef const char *forced_codec_name = NULL
        cdef AVDictionary *opts = NULL
        cdef const AVDictionaryEntry *t = NULL
        cdef int sample_rate, nb_channels
        cdef int64_t channel_layout
        cdef int ret = 0
        cdef int stream_lowres = self.player.lowres
        cdef AVFilterContext *sink
        if stream_index < 0 or stream_index >= ic.nb_streams:
            return -1

        avctx = avcodec_alloc_context3(NULL)
        if avctx == NULL:
            return AVERROR(ENOMEM)

        ret = avcodec_parameters_to_context(avctx, ic.streams[stream_index].codecpar)
        if ret < 0:
            avcodec_free_context(&avctx)
            return ret
        avctx.pkt_timebase = ic.streams[stream_index].time_base

        codec = avcodec_find_decoder(avctx.codec_id)

        if avctx.codec_type == AVMEDIA_TYPE_AUDIO:
            self.last_audio_stream = stream_index
            forced_codec_name = self.player.audio_codec_name
        elif avctx.codec_type == AVMEDIA_TYPE_SUBTITLE:
            self.last_subtitle_stream = stream_index
            forced_codec_name = self.player.subtitle_codec_name
        elif avctx.codec_type == AVMEDIA_TYPE_VIDEO:
            self.last_video_stream = stream_index
            forced_codec_name = self.player.video_codec_name

        if forced_codec_name != NULL:
            codec = avcodec_find_decoder_by_name(forced_codec_name)
        if codec == NULL:
            if forced_codec_name != NULL:
                if self.player.loglevel >= AV_LOG_WARNING:
                    av_log(NULL, AV_LOG_WARNING, b"No codec could be found with name '%s'\n", forced_codec_name)
            else:
                if self.player.loglevel >= AV_LOG_WARNING:
                    av_log(NULL, AV_LOG_WARNING, b"No decoder could be found for codec %s\n", avcodec_get_name(avctx.codec_id))
            avcodec_free_context(&avctx)
            return AVERROR(EINVAL)
        avctx.codec_id = codec.id
        if stream_lowres > codec.max_lowres:
            av_log(avctx, AV_LOG_WARNING, b"The maximum value for lowres supported by the decoder is %d\n",
                    codec.max_lowres)
            stream_lowres = codec.max_lowres
        avctx.lowres =  stream_lowres

        if self.player.fast:
            avctx.flags2 |= AV_CODEC_FLAG2_FAST

        opts = filter_codec_opts(self.player.codec_opts, avctx.codec_id, ic,
                                 ic.streams[stream_index], codec)
        if av_dict_get(opts, b"threads", NULL, 0) == NULL:
            av_dict_set(&opts, b"threads", b"auto", 0)
        if stream_lowres:
            av_dict_set_int(&opts, b"lowres", stream_lowres, 0)
        if avcodec_open2(avctx, codec, &opts) < 0:
            avcodec_free_context(&avctx)
            av_dict_free(&opts)
            return -1
        t = av_dict_get(opts, b"", NULL, AV_DICT_IGNORE_SUFFIX)
        if t != NULL:
            if self.player.loglevel >= AV_LOG_ERROR:
                av_log(NULL, AV_LOG_ERROR, b"Option %s not found.\n", t.key)
            avcodec_free_context(&avctx)
            av_dict_free(&opts)
            return AVERROR_OPTION_NOT_FOUND
        self.eof = 0
        ic.streams[stream_index].discard = AVDISCARD_DEFAULT
        if avctx.codec_type == AVMEDIA_TYPE_AUDIO:
            IF CONFIG_AVFILTER:
                self.audio_filter_src.freq           = avctx.sample_rate
                self.audio_filter_src.channels       = avctx.channels
                self.audio_filter_src.channel_layout = get_valid_channel_layout(avctx.channel_layout, avctx.channels)
                self.audio_filter_src.fmt            = avctx.sample_fmt
                ret = self.configure_audio_filters(self.player.afilters, 0)
                if ret < 0:
                    avcodec_free_context(&avctx)
                    av_dict_free(&opts)
                    return ret
                sink = self.out_audio_filter
                sample_rate    = av_buffersink_get_sample_rate(sink)
                nb_channels    = av_buffersink_get_channels(sink)
                channel_layout = av_buffersink_get_channel_layout(sink)
            ELSE:
                sample_rate    = avctx.sample_rate
                nb_channels    = avctx.channels
                channel_layout = avctx.channel_layout

            # prepare audio output
            ret = self.audio_open(channel_layout, nb_channels, sample_rate, &self.audio_tgt)
            if ret < 0:
                avcodec_free_context(&avctx)
                av_dict_free(&opts)
                return ret
            self.audio_hw_buf_size = ret
            self.audio_src = self.audio_tgt
            self.audio_buf_size  = 0
            self.audio_buf_index = 0

            # init averaging filter
            self.audio_diff_avg_coef  = exp(log(0.01) / <double>AUDIO_DIFF_AVG_NB)
            self.audio_diff_avg_count = 0
            ''' since we do not have a precise anough audio fifo fullness,
            we correct audio sync only if larger than this threshold '''
            self.audio_diff_threshold = (<double>self.audio_hw_buf_size) / self.audio_tgt.bytes_per_sec

            self.audio_stream = stream_index
            self.audio_st = ic.streams[stream_index]

            self.auddec.decoder_init(self.mt_gen, avctx, self.audioq, self.continue_read_thread)
            if ((self.ic.iformat.flags & (AVFMT_NOBINSEARCH | AVFMT_NOGENSEARCH | AVFMT_NO_BYTE_SEEK)) and
                not self.ic.iformat.read_seek):
                self.auddec.start_pts = self.audio_st.start_time
                self.auddec.start_pts_tb = self.audio_st.time_base
            self.auddec.decoder_start(audio_thread_enter, "audio_decoder", self.self_id)
            IF USE_SDL2_MIXER:
                Mix_Resume(self.audio_dev)
            ELSE:
                SDL_PauseAudioDevice(<SDL_AudioDeviceID>self.audio_dev, 0)
        elif avctx.codec_type ==  AVMEDIA_TYPE_VIDEO:
            with gil:
                self.metadata['src_pix_fmt'] = <const char *>av_x_if_null(av_get_pix_fmt_name(avctx.pix_fmt), b"none")
            self.video_stream = stream_index
            self.video_st = ic.streams[stream_index]
            self.viddec.decoder_init(self.mt_gen, avctx, self.videoq, self.continue_read_thread)

            self.viddec.decoder_start(video_thread_enter, "video_decoder", self.self_id)
            self.queue_attachments_req = 1
        elif avctx.codec_type ==  AVMEDIA_TYPE_SUBTITLE:
            self.subtitle_stream = stream_index
            self.subtitle_st = ic.streams[stream_index]
            self.subdec.decoder_init(self.mt_gen, avctx, self.subtitleq, self.continue_read_thread)
            self.subdec.decoder_start(subtitle_thread_enter, "subtitle_decoder", self.self_id)
        av_dict_free(&opts)
        return 0

    cdef int stream_component_close(VideoState self, int stream_index) nogil except 1:
        cdef AVFormatContext *ic = self.ic
        cdef AVCodecParameters *codecpar
        global audio_count
        if stream_index < 0 or stream_index >= ic.nb_streams:
            return 0
        codecpar = ic.streams[stream_index].codecpar

        if codecpar.codec_type == AVMEDIA_TYPE_AUDIO:
            self.auddec.decoder_abort(self.sampq)
            IF USE_SDL2_MIXER:
                Mix_UnregisterEffect(self.audio_dev, <void (*)(int, void *, int, void *) noexcept nogil>sdl_mixer_callback)
                Mix_HaltChannel(self.audio_dev)
                Mix_FreeChunk(self.chunk)
                self.chunk = NULL

                audio_mutex.lock()
                if self.audio_count != -1:
                    audio_count -= 1
                self.audio_count = -1
                if not audio_count:
                    Mix_CloseAudio()
                audio_mutex.unlock()
            ELSE:
                SDL_CloseAudioDevice(<SDL_AudioDeviceID>self.audio_dev)

            self.auddec.decoder_destroy()
            swr_free(&self.swr_ctx)
            av_freep(&self.audio_buf1)
            self.audio_buf1_size = 0
            self.audio_buf = NULL
        elif codecpar.codec_type == AVMEDIA_TYPE_VIDEO:
            self.viddec.decoder_abort(self.pictq)
            self.viddec.decoder_destroy()
        elif codecpar.codec_type == AVMEDIA_TYPE_SUBTITLE:
            self.subdec.decoder_abort(self.subpq)
            self.subdec.decoder_destroy()

        ic.streams[stream_index].discard = AVDISCARD_ALL
        if codecpar.codec_type == AVMEDIA_TYPE_AUDIO:
            self.audio_st = NULL
            self.audio_stream = -1
        elif codecpar.codec_type == AVMEDIA_TYPE_VIDEO:
            self.video_st = NULL
            self.video_stream = -1
        elif codecpar.codec_type == AVMEDIA_TYPE_SUBTITLE:
            self.subtitle_st = NULL
            self.subtitle_stream = -1
        return 0

    # this thread gets the stream from the disk or the network
    cdef int read_thread(VideoState self) nogil except 1:
        cdef AVFormatContext *ic = NULL
        cdef int err, i, ret
        cdef int st_index[<int>AVMEDIA_TYPE_NB]
        cdef AVPacket *pkt = NULL
        cdef int64_t stream_start_time
        cdef int pkt_in_play_range = 0
        cdef const AVDictionaryEntry *t
        cdef AVDictionary **opts
        cdef int orig_nb_streams
        cdef int scan_all_pmts_set = 0
        cdef int64_t pkt_ts
        cdef char err_msg[256]
        cdef int64_t timestamp
        cdef int temp
        cdef int64_t seek_target, seek_min, seek_max
        cdef int64_t temp64, temp64_2
        cdef AVStream *st
        cdef AVMediaType media_type
        self.eof = 0
        memset(st_index, -1, sizeof(st_index))

        pkt = av_packet_alloc()
        if pkt == NULL:
            av_log(NULL, AV_LOG_FATAL, "Could not allocate packet.\n")
            return self.failed(AVERROR(ENOMEM), ic, &pkt)

        ic = avformat_alloc_context()
        if ic == NULL:
            if self.player.loglevel >= AV_LOG_FATAL:
                av_log(NULL, AV_LOG_FATAL, b"Could not allocate context.\n");
            return self.failed(AVERROR(ENOMEM), ic, &pkt)
        #av_opt_set_int(ic, b"threads", 1, 0)
        ic.interrupt_callback.callback = <int (*)(void *) noexcept>self.decode_interrupt_cb
        ic.interrupt_callback.opaque = self.self_id

        if not av_dict_get(self.player.format_opts, b"scan_all_pmts", NULL, AV_DICT_MATCH_CASE):
            av_dict_set(&self.player.format_opts, b"scan_all_pmts", b"1", AV_DICT_DONT_OVERWRITE)
            scan_all_pmts_set = 1

        err = avformat_open_input(&ic, self.player.input_filename, self.iformat, &self.player.format_opts)
        if err < 0:
            if self.player.loglevel >= AV_LOG_ERROR:
                av_log(NULL, AV_LOG_ERROR, b"%s: %s\n", self.player.input_filename, fmt_err(err, err_msg, sizeof(err_msg)))
            return self.failed(-1, ic, &pkt)

        if scan_all_pmts_set:
            av_dict_set(&self.player.format_opts, b"scan_all_pmts", NULL, AV_DICT_MATCH_CASE)
        t = av_dict_get(self.player.format_opts, b"", NULL, AV_DICT_IGNORE_SUFFIX)
        if t != NULL:
            if self.player.loglevel >= AV_LOG_ERROR:
                av_log(NULL, AV_LOG_ERROR, b"Option %s not found.\n", t.key)
            return self.failed(AVERROR_OPTION_NOT_FOUND, ic, &pkt)
        self.ic = ic

        if self.player.genpts:
            ic.flags |= AVFMT_FLAG_GENPTS
        av_format_inject_global_side_data(ic)

        if self.player.find_stream_info:
            opts = setup_find_stream_info_opts(ic, self.player.codec_opts)
            orig_nb_streams = ic.nb_streams

            err = avformat_find_stream_info(ic, opts)
            for i in range(orig_nb_streams):
                av_dict_free(&opts[i])
            av_freep(&opts)

            if err < 0:
                if self.player.loglevel >= AV_LOG_WARNING:
                    av_log(NULL, AV_LOG_WARNING, b"%s: could not find codec parameters\n", self.player.input_filename)
                return self.failed(-1, ic, &pkt)

        if ic.pb != NULL:
            ic.pb.eof_reached = 0 # FIXME hack, ffplay maybe should not use avio_feof() to test for the end

        if self.player.seek_by_bytes < 0:
            self.player.seek_by_bytes = (ic.iformat.flags & AVFMT_NO_BYTE_SEEK) == 0 \
                and (ic.iformat.flags & AVFMT_TS_DISCONT) != 0 \
                and strcmp(b"ogg", ic.iformat.name) != 0

        self.max_frame_duration = 10.0 if ic.iformat.flags & AVFMT_TS_DISCONT else 3600.0

        t = av_dict_get(ic.metadata, b"title", NULL, 0)
        if t != NULL:
            with gil:
                self.metadata['title'] = str(t.value)

        with gil:
            self.metadata['duration'] = (ic.duration / <double>AV_TIME_BASE) if ic.duration >= 0 else 0.

        # if seeking requested, we execute it
        if self.player.start_time != AV_NOPTS_VALUE:
            timestamp = self.player.start_time
            # add the stream start time
            if ic.start_time != AV_NOPTS_VALUE:
                timestamp += ic.start_time
            ret = avformat_seek_file(ic, -1, INT64_MIN, timestamp, INT64_MAX, 0)
            if ret < 0:
                if self.player.loglevel >= AV_LOG_WARNING:
                    av_log(NULL, AV_LOG_WARNING, b"%s: could not seek to position %0.3f\n",
                       self.player.input_filename, <double>timestamp / <double>AV_TIME_BASE)

        self.realtime = is_realtime(ic)
        if self.player.show_status:
            av_dump_format(ic, 0, self.player.input_filename, 0)
        for i in range(ic.nb_streams):
            ic.streams[i].discard = AVDISCARD_ALL

        for i in range(ic.nb_streams):
            st = ic.streams[i]
            media_type = st.codecpar.codec_type
            st.discard = AVDISCARD_ALL
            if <int>media_type >= 0 and self.player.wanted_stream_spec[<int>media_type] != NULL and st_index[<int>media_type] == -1:
                if avformat_match_stream_specifier(ic, st, self.player.wanted_stream_spec[<int>media_type]) > 0:
                    st_index[<int>media_type] = i

        for i in range(AVMEDIA_TYPE_NB):
            if self.player.wanted_stream_spec[i] != NULL and st_index[i] == -1:
                if self.player.loglevel >= AV_LOG_ERROR:
                    av_log(NULL, AV_LOG_ERROR, b"Stream specifier %s does not match any %s stream\n",
                       self.player.wanted_stream_spec[i], av_get_media_type_string(<AVMediaType>i))
                st_index[i] = INT_MAX

        if not self.player.video_disable:
            st_index[<int>AVMEDIA_TYPE_VIDEO] = av_find_best_stream(ic, AVMEDIA_TYPE_VIDEO,\
            st_index[<int>AVMEDIA_TYPE_VIDEO], -1, NULL, 0)
        if not self.player.audio_disable:
            st_index[<int>AVMEDIA_TYPE_AUDIO] = av_find_best_stream(ic, AVMEDIA_TYPE_AUDIO,\
            st_index[<int>AVMEDIA_TYPE_AUDIO], st_index[<int>AVMEDIA_TYPE_VIDEO], NULL, 0)
        if st_index[<int>AVMEDIA_TYPE_AUDIO] >= 0:
            temp = st_index[<int>AVMEDIA_TYPE_AUDIO]
        else:
            temp = st_index[<int>AVMEDIA_TYPE_VIDEO]
        if (not self.player.video_disable) and not self.player.subtitle_disable:
            st_index[<int>AVMEDIA_TYPE_SUBTITLE] = av_find_best_stream(ic, AVMEDIA_TYPE_SUBTITLE,\
            st_index[<int>AVMEDIA_TYPE_SUBTITLE], temp, NULL, 0)

        # open the streams
        if st_index[<int>AVMEDIA_TYPE_AUDIO] >= 0:
            self.stream_component_open(st_index[<int>AVMEDIA_TYPE_AUDIO])

        ret = -1
        if st_index[<int>AVMEDIA_TYPE_VIDEO] >= 0:
            ret = self.stream_component_open(st_index[<int>AVMEDIA_TYPE_VIDEO])
#         if self.show_mode == SHOW_MODE_NONE:
#             if ret >= 0:
#                 self.show_mode = SHOW_MODE_VIDEO
#             else:
#                 self.show_mode = SHOW_MODE_RDFT

        if st_index[<int>AVMEDIA_TYPE_SUBTITLE] >= 0:
            self.stream_component_open(st_index[<int>AVMEDIA_TYPE_SUBTITLE])

        if self.video_stream < 0 and self.audio_stream < 0:
            if self.player.loglevel >= AV_LOG_FATAL:
                av_log(NULL, AV_LOG_FATAL, b"Failed to open file '%s' or configure filtergraph\n",
                   self.player.input_filename)
            return self.failed(-1, ic, &pkt)

        if self.player.infinite_buffer < 0 and self.realtime:
            self.player.infinite_buffer = 1

        while 1:
            if self.abort_request:
                break
            if self.paused != self.last_paused:
                self.last_paused = self.paused
                if self.paused:
                    self.read_pause_return = av_read_pause(ic)
                else:
                    av_read_play(ic)
            IF CONFIG_RTSP_DEMUXER or CONFIG_MMSH_PROTOCOL:
                if self.paused and ((not strcmp(ic.iformat.name, b"rtsp")) or\
                ic.pb != NULL and not strncmp(self.player.input_filename, b"mmsh:", 5)):
                    # wait 10 ms to avoid trying to get another packet
                    # XXX: horrible
                    self.pause_cond.lock()
                    self.pause_cond.cond_wait()
                    self.pause_cond.unlock()
                    #self.mt_gen.delay(10)
                    continue
            if self.seek_req:
                self.reached_eof = 0
                seek_target = self.seek_pos
                if self.seek_rel > 0:
                    seek_min = seek_target - self.seek_rel + 2
                else:
                    seek_min = INT64_MIN
                if self.seek_rel < 0:
                    seek_max = seek_target - self.seek_rel - 2
                else:
                    seek_max = INT64_MAX
                ''' FIXME the +-2 is due to rounding being not done in the correct
                direction in generation of the seek_pos/seek_rel variables'''

                ret = avformat_seek_file(self.ic, -1, seek_min, seek_target,
                                         seek_max, self.seek_flags)
                if ret < 0:
                    if self.player.loglevel >= AV_LOG_ERROR:
                        av_log(NULL, AV_LOG_ERROR, b"%s: error while seeking\n",
                           self.ic.url)
                else:
                    if self.audio_stream >= 0:
                        self.audioq.packet_queue_flush()
                    if self.subtitle_stream >= 0:
                        self.subtitleq.packet_queue_flush()
                    if self.video_stream >= 0:
                        self.videoq.packet_queue_flush()
                    if self.seek_flags & AVSEEK_FLAG_BYTE:
                        self.extclk.set_clock(NAN, 0)
                    else:
                        self.extclk.set_clock(seek_target / <double>AV_TIME_BASE, 0)
                self.seek_req = 0
                self.queue_attachments_req = 1
                self.eof = 0

            if self.queue_attachments_req:
                if self.video_st != NULL and self.video_st.disposition & AV_DISPOSITION_ATTACHED_PIC:
                    ret = av_packet_ref(pkt, &self.video_st.attached_pic)
                    if ret < 0:
                        if self.player.loglevel >= AV_LOG_ERROR:
                            av_log(NULL, AV_LOG_ERROR, b"Failed to copy packet%s\n", fmt_err(ret, err_msg, sizeof(err_msg)))
                        return self.failed(ret, ic, &pkt)
                    self.videoq.packet_queue_put(pkt)
                    self.videoq.packet_queue_put_nullpacket(pkt, self.video_stream)
                self.queue_attachments_req = 0
            # if the queue are full, no need to read more
            if self.player.infinite_buffer < 1 and \
                (self.audioq.size + self.videoq.size + self.subtitleq.size > MAX_QUEUE_SIZE or
                (self.stream_has_enough_packets(self.audio_st, self.audio_stream, self.audioq) and
                self.stream_has_enough_packets(self.video_st, self.video_stream, self.videoq) and
                self.stream_has_enough_packets(self.subtitle_st, self.subtitle_stream, self.subtitleq))):
                # wait 10 ms
                self.continue_read_thread.lock()
                self.continue_read_thread.cond_wait_timeout(10)
                self.continue_read_thread.unlock()
                continue

            if (not self.paused) and (
                self.audio_st == NULL or (self.auddec.finished == self.audioq.serial and
                                        self.sampq.frame_queue_nb_remaining() == 0)) and (
                self.video_st == NULL or (self.viddec.finished == self.videoq.serial and
                                          self.pictq.frame_queue_nb_remaining() == 0)):
                self.auddec.set_seek_pos(-1)
                self.viddec.set_seek_pos(-1)
                if self.player.loop != 1:
                    self.request_thread_s(b'eof', b'')
                    if self.player.start_time != AV_NOPTS_VALUE:
                        temp64 = self.player.start_time
                    else:
                        temp64 = 0
                    if not self.player.loop:
                        self.stream_seek(temp64, 0, 0, 0)
                    else:
                        self.player.loop = self.player.loop - 1
                        if self.player.loop:
                            self.stream_seek(temp64, 0, 0, 0)
                elif self.player.autoexit:
                    if self.player.loglevel >= AV_LOG_INFO:
                        av_log(NULL, AV_LOG_INFO, b"Reached eof\n")
                    self.request_thread_s(b'eof', b'')
                    return self.failed(0, ic, &pkt)
                else:
                    if not self.reached_eof:
                        self.reached_eof = 1
                        self.request_thread_s(b'eof', b'')

            ret = av_read_frame(ic, pkt)
            if ret < 0:
                if (ret == AVERROR_EOF or avio_feof(ic.pb)) and not self.eof:
                    self.auddec.set_seek_pos(-1)
                    self.viddec.set_seek_pos(-1)
                    if self.video_stream >= 0:
                        self.videoq.packet_queue_put_nullpacket(pkt, self.video_stream)
                    if self.audio_stream >= 0:
                        self.audioq.packet_queue_put_nullpacket(pkt, self.audio_stream)
                    if self.subtitle_stream >= 0:
                        self.subtitleq.packet_queue_put_nullpacket(pkt, self.subtitle_stream)
                    self.eof = 1
                if ic.pb != NULL and ic.pb.error:
                    if self.player.autoexit:
                        if self.player.loglevel >= AV_LOG_INFO:
                            av_log(NULL, AV_LOG_INFO, b"Reached eof\n")
                        self.request_thread_s(b'eof', b'')
                    break
                self.continue_read_thread.lock()
                self.continue_read_thread.cond_wait_timeout(10)
                self.continue_read_thread.unlock()
                continue
            else:
                self.eof = 0

            # check if packet is in play range specified by user, then queue, otherwise discard
            stream_start_time = ic.streams[pkt.stream_index].start_time
            pkt_ts = pkt.dts if pkt.pts == AV_NOPTS_VALUE else pkt.pts
            if stream_start_time != AV_NOPTS_VALUE:
                temp64 = stream_start_time
            else:
                temp64 = 0
            if self.player.start_time != AV_NOPTS_VALUE:
                temp64_2 = self.player.start_time
            else:
                temp64_2 = 0

            pkt_in_play_range = self.player.duration == AV_NOPTS_VALUE or \
                (pkt_ts - temp64) * av_q2d(ic.streams[pkt.stream_index].time_base) - \
                <double>temp64_2 / 1000000.0 <= (<double>self.player.duration / 1000000.0)
            if pkt.stream_index == self.audio_stream and pkt_in_play_range:
                self.audioq.packet_queue_put(pkt)
            elif (pkt.stream_index == self.video_stream and pkt_in_play_range
                  and not (self.video_st.disposition & AV_DISPOSITION_ATTACHED_PIC)):
                self.videoq.packet_queue_put(pkt)
            elif pkt.stream_index == self.subtitle_stream and pkt_in_play_range:
                self.subtitleq.packet_queue_put(pkt)
            else:
                av_packet_unref(pkt)

        ret = 0
        if self.player.loglevel >= AV_LOG_INFO:
            av_log(NULL, AV_LOG_INFO, b"Exiting read thread\n")
        return self.failed(ret, ic, &pkt)

    cdef int stream_has_enough_packets(self, AVStream *st, int stream_id, FFPacketQueue queue) nogil:
        return (
            stream_id < 0 or
            queue.abort_request or
            (st.disposition & AV_DISPOSITION_ATTACHED_PIC) or
            queue.nb_packets > MIN_FRAMES and
            (not queue.duration or av_q2d(st.time_base) * queue.duration > 1.0)
        )

    cdef inline int failed(VideoState self, int ret, AVFormatContext *ic, AVPacket **pkt) nogil except 1:
        cdef char err_msg[256]
        if ic != NULL and self.ic == NULL:
            avformat_close_input(&ic)

        if pkt != NULL:
            av_packet_free(pkt)

        if ret and not self.abort_request:
            if ret != -1:
                self.request_thread_s(b'read:error', fmt_err(ret, err_msg, sizeof(err_msg)))
            else:
                self.request_thread_s(b'read:error', b'')
        else:
            self.request_thread_s(b'read:exit', b'Done')
        return 0

    cdef int stream_select_program(VideoState self,
                                   int requested_program) nogil except 1:
        cdef unsigned int i
        cdef AVProgram *p
        cdef AVProgram *selected_program = NULL
        cdef AVStream *st
        cdef unsigned int nb_streams
        cdef unsigned int stream_index
        cdef int video_stream_index = -1
        cdef int audio_stream_index = -1
        cdef int subtitle_stream_index = -1
        cdef int program = -1
        cdef unsigned int nb_programs = self.ic.nb_programs

        i = 0
        while i < nb_programs:
            p = self.ic.programs[i]

            if p.id == requested_program:
                selected_program = p
                break

            i += 1

        if selected_program == NULL:
            return -1

        nb_streams = selected_program.nb_stream_indexes

        i = 0
        while i < nb_streams:
            stream_index = selected_program.stream_index[i]
            st = self.ic.streams[stream_index]

            if st.codecpar.codec_type == AVMEDIA_TYPE_VIDEO:
                if video_stream_index == -1:
                    video_stream_index = <int>stream_index
            elif st.codecpar.codec_type == AVMEDIA_TYPE_AUDIO:
                if audio_stream_index == -1:
                    audio_stream_index = <int>stream_index
            elif st.codecpar.codec_type == AVMEDIA_TYPE_SUBTITLE:
                if subtitle_stream_index == -1:
                    subtitle_stream_index = <int>stream_index

            i += 1

        self.stream_component_close(self.video_stream)
        self.stream_component_close(self.audio_stream)
        self.stream_component_close(self.subtitle_stream)

        if video_stream_index != -1:
            self.stream_component_open(video_stream_index)

        if audio_stream_index != -1:
            self.stream_component_open(audio_stream_index)

        if subtitle_stream_index != -1:
            self.stream_component_open(subtitle_stream_index)

        return 0

    cdef int stream_select_channel(VideoState self, int codec_type,
                                  unsigned int requested_stream) nogil except 1:
        cdef int old_index
        cdef AVStream *st
        cdef unsigned int nb_streams = self.ic.nb_streams

        if codec_type == AVMEDIA_TYPE_VIDEO:
            old_index = self.video_stream
        elif codec_type == AVMEDIA_TYPE_AUDIO:
            old_index = self.audio_stream
        else:
            old_index = self.subtitle_stream

        if requested_stream >= nb_streams:
            return -1

        st = self.ic.streams[requested_stream]

        if st.codecpar.codec_type != codec_type:
            return -1

        if codec_type == AVMEDIA_TYPE_AUDIO:
            if st.codecpar.sample_rate == 0 or st.codecpar.channels == 0:
                av_log(NULL, AV_LOG_ERROR, b'Invalid audio stream #%d\n', requested_stream)
                return -1

        if self.player.loglevel >= AV_LOG_INFO:
            av_log(NULL, AV_LOG_INFO, b'Switch %s stream from #%d to #%d\n',
                    av_get_media_type_string(<AVMediaType>codec_type), old_index, requested_stream)

        self.stream_component_close(old_index)
        self.stream_component_open(<int>requested_stream)

        return 0

    cdef int stream_cycle_channel(VideoState self, int codec_type) nogil except 1:
        cdef AVFormatContext *ic = self.ic
        cdef int start_index, stream_index
        cdef int old_index, was_closed = 0
        cdef AVStream *st
        cdef AVProgram *p = NULL
        cdef int nb_streams = self.ic.nb_streams
        cdef double pos
        cdef int sync_type = self.get_master_sync_type()

        if codec_type == AVMEDIA_TYPE_VIDEO:
            start_index = self.last_video_stream
            old_index = self.video_stream
        elif codec_type == AVMEDIA_TYPE_AUDIO:
            start_index = self.last_audio_stream
            old_index = self.audio_stream
        else:
            start_index = self.last_subtitle_stream
            old_index = self.subtitle_stream
        was_closed = old_index == -1
        stream_index = start_index
        if codec_type != AVMEDIA_TYPE_VIDEO and self.video_stream != -1:
            p = av_find_program_from_stream(ic, NULL, self.video_stream)
            if p != NULL:
                nb_streams = p.nb_stream_indexes
                start_index = 0
                while start_index < nb_streams:
                    if p.stream_index[start_index] == stream_index:
                        break
                    start_index += 1
                if start_index == nb_streams:
                    start_index = -1
                stream_index = start_index
        while 1:
            if not was_closed:
                stream_index += 1
            if stream_index >= nb_streams:
                if codec_type == AVMEDIA_TYPE_SUBTITLE:
                    stream_index = -1
                    self.last_subtitle_stream = -1
                    break
                if start_index == -1:
                    return 0
                stream_index = 0
            if stream_index == start_index and not was_closed:
                return 0
            st = ic.streams[stream_index]
            if p != NULL:
                st = self.ic.streams[p.stream_index[stream_index]]
            else:
                st = self.ic.streams[stream_index]
            if st.codecpar.codec_type == codec_type:
                # check that parameters are OK
                if codec_type == AVMEDIA_TYPE_AUDIO:
                    if st.codecpar.sample_rate != 0 and st.codecpar.channels != 0:
                        break
                elif codec_type == AVMEDIA_TYPE_VIDEO or codec_type == AVMEDIA_TYPE_SUBTITLE:
                    break

        if p != NULL and stream_index != -1:
            stream_index = p.stream_index[stream_index]
        if self.player.loglevel >= AV_LOG_INFO:
            av_log(NULL, AV_LOG_INFO, b'Switch %s stream from #%d to #%d\n',
            av_get_media_type_string(<AVMediaType>codec_type), old_index, stream_index)
        self.stream_component_close(old_index)
        self.stream_component_open(stream_index)

        if was_closed:
            if (sync_type == AV_SYNC_VIDEO_MASTER and
                codec_type != AVMEDIA_TYPE_VIDEO and
                self.video_stream != -1):
                pos = self.vidclk.get_clock()
            elif (sync_type == AV_SYNC_AUDIO_MASTER and
                codec_type != AVMEDIA_TYPE_AUDIO and
                self.audio_stream != -1):
                pos = self.audclk.get_clock()
            else:
                pos = self.extclk.get_clock()
            if isnan(pos):
                pos = <double>self.seek_pos / <double>AV_TIME_BASE
            if self.ic.start_time != AV_NOPTS_VALUE and pos < self.ic.start_time / <double>AV_TIME_BASE:
                pos = self.ic.start_time / <double>AV_TIME_BASE
            self.stream_seek(<int64_t>(pos * AV_TIME_BASE), 0, 0, 1)
        return 0


================================================
FILE: ffpyplayer/player/decoder.pxd
================================================

include '../includes/ffmpeg.pxi'

from ffpyplayer.threading cimport MTGenerator, MTCond, MTMutex, MTThread
from ffpyplayer.player.queue cimport FFPacketQueue
from ffpyplayer.player.frame_queue cimport FrameQueue


cdef class Decoder(object):
    cdef:
        AVPacket *pkt
        FFPacketQueue queue
        AVCodecContext *avctx
        int pkt_serial
        int finished
        int packet_pending
        MTCond empty_queue_cond
        int64_t start_pts
        AVRational start_pts_tb
        int64_t next_pts
        AVRational next_pts_tb
        MTThread decoder_tid

        double seek_req_pos
        int seeking
        MTGenerator mt_gen

    cdef int decoder_init(self, MTGenerator mt_gen, AVCodecContext *avctx, FFPacketQueue queue,
                           MTCond empty_queue_cond) nogil except 1
    cdef void decoder_destroy(self) nogil
    cdef void set_seek_pos(self, double seek_req_pos) nogil
    cdef int is_seeking(self) nogil
    cdef int decoder_abort(self, FrameQueue fq) nogil except 1
    cdef int decoder_start(self, int_void_func func, const char *thread_name, void *arg) nogil except 1
    cdef int decoder_decode_frame(self, AVFrame *frame, AVSubtitle *sub, int decoder_reorder_pts) nogil except? 2


================================================
FILE: ffpyplayer/player/decoder.pyx
================================================

__all__ = ('Decoder', )

include '../includes/ff_consts.pxi'

cdef extern from "string.h" nogil:
    void * memset(void *, int, size_t)

cdef extern from "errno.h" nogil:
    int ENOSYS
    int ENOMEM
    int EAGAIN


cdef class Decoder(object):

    def __cinit__(Decoder self):
        self.avctx = NULL
        self.pkt = NULL

    cdef int decoder_init(
            self, MTGenerator mt_gen, AVCodecContext *avctx, FFPacketQueue queue,
            MTCond empty_queue_cond) nogil except 1:
        self.pkt = av_packet_alloc()

        with gil:
            self.queue = queue
            self.empty_queue_cond = empty_queue_cond
            self.mt_gen = mt_gen
            if self.pkt == NULL:
                raise MemoryError

        self.avctx = avctx
        self.packet_pending = self.finished = 0
        self.seeking = self.start_pts = self.next_pts = 0
        self.seek_req_pos = -1
        self.start_pts = AV_NOPTS_VALUE
        self.pkt_serial = -1
        memset(&self.start_pts_tb, 0, sizeof(self.start_pts_tb))
        memset(&self.next_pts_tb, 0, sizeof(self.next_pts_tb))
        return 0

    cdef void decoder_destroy(self) nogil:
        av_packet_free(&self.pkt)
        avcodec_free_context(&self.avctx)

    cdef void set_seek_pos(self, double seek_req_pos) nogil:
        self.seek_req_pos = seek_req_pos
        if seek_req_pos == -1:
            self.seeking = 0

    cdef int is_seeking(self) nogil:
        return self.seeking and self.seek_req_pos != -1

    cdef int decoder_abort(self, FrameQueue fq) nogil except 1:
        self.queue.packet_queue_abort()
        fq.frame_queue_signal()
        self.decoder_tid.wait_thread(NULL)
        with gil:
            self.decoder_tid = None
        self.queue.packet_queue_flush()
        return 0

    cdef int decoder_start(self, int_void_func func, const char *thread_name, void *arg) nogil except 1:
        self.queue.packet_queue_start()
        with gil:
            self.decoder_tid = MTThread(self.mt_gen.mt_src)
            self.decoder_tid.create_thread(func, thread_name, arg)
        return 0

    cdef int decoder_decode_frame(self, AVFrame *frame, AVSubtitle *sub, int decoder_reorder_pts) nogil except? 2:
        cdef int ret = AVERROR(EAGAIN)
        cdef int got_frame
        cdef AVRational tb
        cdef int old_serial

        while True:
            if self.queue.serial == self.pkt_serial:
                while True:
                    if self.queue.abort_request:
                        return -1

                    if self.avctx.codec_type == AVMEDIA_TYPE_VIDEO:
                        ret = avcodec_receive_frame(self.avctx, frame)
                        if ret >= 0:
                            if decoder_reorder_pts == -1:
                                frame.pts = frame.best_effort_timestamp
                            elif not decoder_reorder_pts:
                                frame.pts = frame.pkt_dts

                    elif self.avctx.codec_type == AVMEDIA_TYPE_AUDIO:
                        ret = avcodec_receive_frame(self.avctx, frame)
                        if ret >= 0:
                            tb.num = 1
                            tb.den = frame.sample_rate
                            if frame.pts != AV_NOPTS_VALUE:
                                frame.pts = av_rescale_q(frame.pts, self.avctx.pkt_timebase, tb)
                            elif self.next_pts != AV_NOPTS_VALUE:
                                frame.pts = av_rescale_q(self.next_pts, self.next_pts_tb, tb)
                            if frame.pts != AV_NOPTS_VALUE:
                                self.next_pts = frame.pts + frame.nb_samples
                                self.next_pts_tb = tb

                    if ret == AVERROR_EOF:
                        self.finished = self.pkt_serial
                        avcodec_flush_buffers(self.avctx)
                        return 0
                    if ret >= 0:
                        return 1
                    if ret == AVERROR(EAGAIN):
                        break

            while True:
                if not self.queue.nb_packets:
                    self.empty_queue_cond.lock()
                    self.empty_queue_cond.cond_signal()
                    self.empty_queue_cond.unlock()

                if self.packet_pending:
                    self.packet_pending = 0
                else:
                    old_serial = self.pkt_serial
                    if self.queue.packet_queue_get(self.pkt, 1, &self.pkt_serial) < 0:
                        return -1

                    if old_serial != self.pkt_serial:
                        avcodec_flush_buffers(self.avctx)
                        self.finished = 0
                        self.seeking = self.seek_req_pos != -1
                        self.next_pts = self.start_pts
                        self.next_pts_tb = self.start_pts_tb

                if self.queue.serial == self.pkt_serial:
                    break
                av_packet_unref(self.pkt)

            if self.avctx.codec_type == AVMEDIA_TYPE_SUBTITLE:
                got_frame = 0
                ret = avcodec_decode_subtitle2(self.avctx, sub, &got_frame, self.pkt)
                if ret < 0:
                    ret = AVERROR(EAGAIN)
                else:
                    if got_frame and self.pkt.data == NULL:
                       self.packet_pending = 1
                    if got_frame:
                        ret = 0
                    else:
                        ret = AVERROR(EAGAIN) if self.pkt.data != NULL else AVERROR_EOF
                av_packet_unref(self.pkt)
            else:
                if avcodec_send_packet(self.avctx, self.pkt) == AVERROR(EAGAIN):
                    av_log(self.avctx, AV_LOG_ERROR, "Receive_frame and send_packet both returned EAGAIN, which is an API violation.\n")
                    self.packet_pending = 1
                else:
                    av_packet_unref(self.pkt)


================================================
FILE: ffpyplayer/player/frame_queue.pxd
================================================

include '../includes/ffmpeg.pxi'

from ffpyplayer.threading cimport MTGenerator, MTCond, MTMutex
from ffpyplayer.player.queue cimport FFPacketQueue
from ffpyplayer.player.core cimport VideoSettings

cdef struct Frame:
    AVFrame *frame
    int need_conversion
    AVSubtitle sub
    int serial
    double pts  # presentation timestamp for the frame
    double duration  # estimated duration of the frame
    int64_t pos  # byte position of the frame in the input file
    SDL_Overlay *bmp
    int allocated
    int reallocate
    int width
    int height
    AVRational sar
    AVPixelFormat pix_fmt


cdef class FrameQueue(object):
    cdef:
        MTCond cond
        FFPacketQueue pktq
        Frame queue[FRAME_QUEUE_SIZE]
        int rindex
        int windex
        int size
        int max_size
        int keep_last
        int rindex_shown

        MTMutex alloc_mutex
        int requested_alloc

    cdef void frame_queue_unref_item(self, Frame *vp) nogil
    cdef int frame_queue_signal(self) nogil except 1
    cdef int is_empty(self) nogil
    cdef Frame *frame_queue_peek(self) nogil
    cdef Frame *frame_queue_peek_next(self) nogil
    cdef Frame *frame_queue_peek_last(self) nogil
    cdef Frame *frame_queue_peek_writable(self) nogil
    cdef Frame *frame_queue_peek_readable(self) nogil
    cdef int frame_queue_push(self) nogil except 1
    cdef int frame_queue_next(self) nogil except 1
    cdef int frame_queue_prev(self) nogil
    cdef int frame_queue_nb_remaining(self) nogil
    cdef int64_t frame_queue_last_pos(self) nogil
    cdef int copy_picture(self, Frame *vp, AVFrame *src_frame,
                          VideoSettings *player) nogil except 1
    cdef int peep_alloc(self) nogil
    cdef int queue_picture(
        self, AVFrame *src_frame, double pts, double duration, int64_t pos,
        int serial, AVPixelFormat out_fmt, int *abort_request,
        VideoSettings *player) nogil except 1
    cdef int alloc_picture(self) nogil except 1
    cdef int copy_picture(self, Frame *vp, AVFrame *src_frame,
                           VideoSettings *player) nogil except 1


================================================
FILE: ffpyplayer/player/frame_queue.pyx
================================================

__all__ = ('FrameQueue', )

include '../includes/ff_consts.pxi'
include "../includes/inline_funcs.pxi"

cdef extern from "string.h" nogil:
    void * memset(void *, int, size_t)

cdef void raise_py_exception(msg) nogil except *:
    with gil:
        raise Exception(tcode(msg))


cdef class FrameQueue(object):

    def __cinit__(FrameQueue self, MTGenerator mt_gen, FFPacketQueue pktq, int max_size, int keep_last):
        self.cond = MTCond.__new__(MTCond, mt_gen.mt_src)
        self.alloc_mutex = MTMutex.__new__(MTMutex, mt_gen.mt_src)
        self.max_size = FFMIN(max_size, FRAME_QUEUE_SIZE)
        self.pktq = pktq
        cdef int i

        with nogil:
            self.requested_alloc = 0
            memset(self.queue, 0, sizeof(self.queue))
            self.keep_last = not not keep_last

            for i in range(self.max_size):
                self.queue[i].pix_fmt = <AVPixelFormat>-1
                self.queue[i].frame = av_frame_alloc()
                if self.queue[i].frame == NULL:
                    with gil:
                        raise_py_exception(b'Could not allocate avframe buffer')

    def __dealloc__(self):
        cdef int i
        cdef Frame *vp

        with nogil:
            for i in range(self.max_size):
                vp = &self.queue[i]
                self.frame_queue_unref_item(vp)
                if vp.need_conversion:
                    av_freep(&vp.frame.data[0])
                av_frame_free(&vp.frame)

    cdef void frame_queue_unref_item(self, Frame *vp) nogil:
        av_frame_unref(vp.frame)
        avsubtitle_free(&vp.sub)

    cdef int frame_queue_signal(self) nogil except 1:
        self.cond.lock()
        self.cond.cond_signal()
        self.cond.unlock()
        return 0

    cdef int is_empty(self) nogil:
        return self.size - self.rindex_shown <= 0

    cdef Frame *frame_queue_peek(self) nogil:
        return &self.queue[(self.rindex + self.rindex_shown) % self.max_size]

    cdef Frame *frame_queue_peek_next(self) nogil:
        return &self.queue[(self.rindex + self.rindex_shown + 1) % self.max_size]

    cdef Frame *frame_queue_peek_last(self) nogil:
        return &self.queue[self.rindex]

    cdef Frame *frame_queue_peek_writable(self) nogil:
        # wait until we have space to put a new frame
        self.cond.lock()
        while self.size >= self.max_size and not self.pktq.abort_request:
            self.cond.cond_wait()
        self.cond.unlock()

        if self.pktq.abort_request:
            return NULL

        return &self.queue[self.windex]

    cdef Frame *frame_queue_peek_readable(self) nogil:
        # wait until we have a readable a new frame
        self.cond.lock()
        while self.size - self.rindex_shown <= 0 and not self.pktq.abort_request:
            self.cond.cond_wait()
        self.cond.unlock()

        if self.pktq.abort_request:
            return NULL

        return &self.queue[(self.rindex + self.rindex_shown) % self.max_size]

    cdef int frame_queue_push(self) nogil except 1:
        self.windex += 1
        if self.windex == self.max_size:
            self.windex = 0

        self.cond.lock()
        self.size += 1
        self.cond.cond_signal()
        self.cond.unlock()
        return 0

    cdef int frame_queue_next(self) nogil except 1:
        if self.keep_last and not self.rindex_shown:
            self.rindex_shown = 1
            return 0

        self.frame_queue_unref_item(&self.queue[self.rindex])
        self.rindex += 1
        if self.rindex == self.max_size:
            self.rindex = 0

        self.cond.lock()
        self.size -= 1
        self.cond.cond_signal()
        self.cond.unlock()
        return 0

    cdef int frame_queue_prev(self) nogil:
        # TODO: https://github.com/FFmpeg/FFmpeg/commit/37d201aad9f7e7f233955345aee1198421a68f5e
        # jump back to the previous frame if available by resetting rindex_shown
        cdef int ret = self.rindex_shown
        self.rindex_shown = 0
        return ret

    cdef int frame_queue_nb_remaining(self) nogil:
        # return the number of undisplayed frames in the queue
        return self.size - self.rindex_shown

    cdef int64_t frame_queue_last_pos(self) nogil:
        cdef Frame *fp = &self.queue[self.rindex]
        if self.rindex_shown and fp.serial == self.pktq.serial:
            return fp.pos
        else:
            return -1

    cdef int copy_picture(self, Frame *vp, AVFrame *src_frame,
                           VideoSettings *player) nogil except 1:
        cdef const AVDictionaryEntry *e
        cdef const AVClass *cls
        cdef const AVOption *o
        cdef int ret

        if not vp.need_conversion:
            av_frame_unref(vp.frame)
            av_frame_move_ref(vp.frame, src_frame)
        else:
            e = av_dict_get(player.sws_dict, b"sws_flags", NULL, 0)
            if e != NULL:
                cls = sws_get_class()
                o = av_opt_find(&cls, b"sws_flags", NULL, 0,
                                                   AV_OPT_SEARCH_FAKE_OBJ);
                ret = av_opt_eval_flags(&cls, o, e.value, <int *>&player.sws_flags)
                if ret < 0:
                    raise_py_exception(b'Could not av_opt_eval_flags')

            player.img_convert_ctx = sws_getCachedContext(player.img_convert_ctx,\
            vp.width, vp.height, <AVPixelFormat>src_frame.format, vp.width, vp.height,\
            vp.pix_fmt, player.sws_flags, NULL, NULL, NULL)
            if player.img_convert_ctx == NULL:
                av_log(NULL, AV_LOG_FATAL, b"Cannot initialize the conversion context\n")
                raise_py_exception(b'Cannot initialize the conversion context.')
            sws_scale(player.img_convert_ctx, <const unsigned char* const*>src_frame.data, src_frame.linesize,
                      0, vp.height, vp.frame.data, vp.frame.linesize)
            av_frame_unref(src_frame)
        return 0

    cdef int alloc_picture(self) nogil except 1:
        ''' allocate a picture (needs to do that in main thread to avoid
        potential locking problems '''
        cdef Frame *vp
        self.alloc_mutex.lock()
        if self.requested_alloc:
            vp = &self.queue[self.windex]
            self.frame_queue_unref_item(vp)
            if vp.need_conversion:
                av_freep(&vp.frame.data[0])

            if vp.need_conversion:
                if (av_image_alloc(vp.frame.data, vp.frame.linesize, vp.width,
                                   vp.height, vp.pix_fmt, 1) < 0):
                    av_log(NULL, AV_LOG_FATAL, b"Could not allocate avframe buffer.\n")
                    raise_py_exception(b'Could not allocate avframe buffer')

                vp.frame.width = vp.width
                vp.frame.height = vp.height
                vp.frame.format = <int>vp.pix_fmt

            self.cond.lock()
            vp.allocated = 1
            self.cond.cond_signal()
            self.cond.unlock()
            self.requested_alloc = 0
        self.alloc_mutex.unlock()
        return 0

    cdef int peep_alloc(self) nogil:
        cdef int requested_alloc = 0
        self.alloc_mutex.lock()
        requested_alloc = self.requested_alloc
        self.alloc_mutex.unlock()
        return requested_alloc

    cdef int queue_picture(
            self, AVFrame *src_frame, double pts, double duration, int64_t pos,
            int serial, AVPixelFormat out_fmt, int *abort_request,
            VideoSettings *player) nogil except 1:
        cdef Frame *vp

        IF 0:# and defined(DEBUG_SYNC):
            av_log(NULL, AV_LOG_DEBUG, b"frame_type=%c pts=%0.3f\n",
                   av_get_picture_type_char(src_frame.pict_type), pts)

        vp = self.frame_queue_peek_writable()
        if vp == NULL:
            return -1

        vp.sar = src_frame.sample_aspect_ratio

        # alloc or resize hardware picture buffer
        if (vp.reallocate or (not vp.allocated) or
            vp.width != src_frame.width or vp.height != src_frame.height
            or <int>vp.pix_fmt != <int>out_fmt):
            vp.allocated = 0
            vp.reallocate = 0
            vp.width = src_frame.width
            vp.height = src_frame.height
            vp.pix_fmt = out_fmt
            vp.need_conversion = not CONFIG_AVFILTER and out_fmt != <AVPixelFormat>src_frame.format

            # the allocation must be done in the main thread to avoid locking problems.
            self.alloc_mutex.lock()
            self.requested_alloc = 1
            self.alloc_mutex.unlock()

            # wait until the picture is allocated
            self.cond.lock()
            while (not vp.allocated) and not self.pktq.abort_request:
                self.cond.cond_wait()
            ''' if the queue is aborted, we have to pop the pending ALLOC event
            or wait for the allocation to complete '''
            if self.pktq.abort_request and self.peep_alloc():
                while not vp.allocated and not abort_request[0]:
                    self.cond.cond_wait()
            self.cond.unlock()

            if self.pktq.abort_request:
                return -1

        # if the frame is not skipped, then display it
        self.copy_picture(vp, src_frame, player)

        vp.pts = pts
        vp.duration = duration
        vp.pos = pos
        vp.serial = serial
        self.frame_queue_push()
        return 0


================================================
FILE: ffpyplayer/player/player.pxd
================================================

include '../includes/ffmpeg.pxi'

from ffpyplayer.threading cimport MTGenerator, MTThread, MTMutex
from ffpyplayer.player.core cimport VideoState, VideoSettings
from ffpyplayer.pic cimport Image


cdef class MediaPlayer(object):
    cdef:
        VideoSettings settings
        MTGenerator mt_gen
        VideoState ivs
        Image next_image
        int is_closed
        dict ff_opts

    cdef void _seek(self, double pts, int relative, int seek_by_bytes, int accurate) nogil
    cpdef close_player(self)


================================================
FILE: ffpyplayer/player/player.pyx
================================================

__all__ = ('MediaPlayer', )

include '../includes/ff_consts.pxi'
include "../includes/inline_funcs.pxi"

cdef extern from "Python.h":
    void PyEval_InitThreads()

cdef extern from "math.h" nogil:
    double NAN
    int isnan(double x)

cdef extern from "string.h" nogil:
    void * memset(void *, int, size_t)

from ffpyplayer.threading cimport MTGenerator, SDL_MT, Py_MT, MTThread, MTMutex
from ffpyplayer.player.queue cimport FFPacketQueue
from ffpyplayer.player.core cimport VideoState, VideoSettings
from ffpyplayer.pic cimport Image
from libc.stdio cimport printf
from cpython.ref cimport PyObject

import ffpyplayer.tools  # required to init ffmpeg
from ffpyplayer.tools import initialize_sdl_aud, encode_to_bytes, loglevels
from copy import deepcopy


cdef inline void *grow_array(void *array, int elem_size, int *size, int new_size) nogil:
    cdef uint8_t *tmp
    if new_size >= INT_MAX / elem_size:
        return NULL

    if size[0] < new_size:
        tmp  = <uint8_t *>av_realloc_array(array, new_size, elem_size)
        if tmp == NULL:
            return NULL

        memset(tmp + size[0] * elem_size, 0, (new_size - size[0]) * elem_size)
        size[0] = new_size
        return tmp
    return array


cdef class MediaPlayer(object):
    '''An FFmpeg based media player.

    Was originally ported from FFplay. Most options offered in FFplay is
    also available here.

    The class provides a player interface to a media file. Video components
    of the file are returned with :meth:`get_frame`. Audio is played directly
    using SDL. And subtitles are acquired either through the callback function
    (text subtitles only), or are overlaid directly using the subtitle filter.

    .. note::

        All strings that are passed to the program, e.g. ``filename`` will first be
        internally encoded using utf-8 before handing off to FFmpeg.

    .. note::

        If playing or even opening multiple audio files simultaneously
        SDL2_mixer is required. The audio parameters of the first audio file opened
        will set the audio output parameters for all the subsequent audio files opened
        until they are all closed and a new file is opened.

    :Parameters:

        `filename`: str
            The filename or url of the media object. This can be physical files,
            remote files or even webcam name's e.g. for direct show or Video4Linux
            webcams. The ``f`` specifier in ``ff_opts`` can be used to indicate the
            format needed to open the file (e.g. dshow).
        `callback`: Function or ref to function or None
            A function, which if not None will be called when a internal thread quits,
            when eof is reached (as determined by whichever is the main ``sync`` stream,
            audio or video), or when text subtitles are available. In future version it
            may be extended.

            The function takes two parameters, ``selector``, and ``value``.
            ``selector`` can be one of:

            `eof`:
                When eof is reached. ``value`` is the empty string.

            `display_sub`:
                When a new subtitle string is available. ``value`` will be a
                5-tuple of the form ``(text, fmt, pts, start, end)``. Where

                `text`: is the unicode text
                `fmt`: is the subtitle format e.g. 'ass'
                `pts`: is the timestamp of the text
                `start`: is the time in video time when to start displaying the text
                `end`: is the time in video time when to end displaying the text

            `exceptions or thread exits`:
                In case of an exception by the internal audio, video, subtitle, or read threads,
                or when these threads exit, it is called with a ``value`` of the error message
                or an empty string when an error is not available.

                The ``selector`` will be one of
                ``audio:error``, ``audio:exit``, ``video:error``, ``video:exit``,
                ``subtitle:error``, ``subtitle:exit``, ``read:error``, or ``read:exit``
                indicating which thread called and why.

            .. warning::

                This functions gets called from a second internal thread.

        `loglevel`: str
            The level of logs to emit. Defaults to ``'trace'``. Its value is one of the keys
            of :attr:`~ffpyplayer.tools.loglevels`.

            Although log are also filtered globally according to the level of
            :attr:`~ffpyplayer.tools.set_loglevel`, this is applied first to quickly filter
            logs generated by this instance (it's not applied to internal ffmpeg logs).
        `thread_lib`: str
            The threading library to use internally. Can be one of 'SDL' or 'python'.

            .. warning::

                If the python threading library is used, care must be taken to delete
                the player before exiting python, otherwise it may hang. The reason is
                that the internal threads are created as non-daemon, consequently, when the
                python main thread exits, the internal threads will keep python alive.
                By deleting the player directly, the internal threads will be shut down
                before python exits.

        `audio_sink`: str
            Currently it must be 'SDL'. Defaults to 'SDL'.
        `lib_opts`: dict
            A dictionary of options that will be passed to the ffmpeg libraries,
            codecs, sws, swr, and formats when opening them. This accepts most of the
            options that can be passed to FFplay. Examples are "threads":"auto",
            "lowres":"1" etc. Both the keywords and values must be strings.
            See :ref:`examples` for `lib_opts` usage examples.
        `ff_opts`: dict
            A dictionary with options for the player. Following are
            the available options. Note, many options have identical names and meaning
            as in the FFplay options: www.ffmpeg.org/ffplay.html :

            `paused`: bool
                If True, the player will be in a paused state after creation, otherwise,
                it will immediately start playing. Defaults to False.
            `cpuflags`: str
                Similar to ffplay
            `max_alloc`: int
                Set the maximum size that may me allocated in one block.
            `infbuf`: bool
                If True, do not limit the input buffer size and read as much data as possible
                from the input as soon as possible. Enabled by default for realtime streams,
                where data may be dropped if not read in time. Use this option to enable
                infinite buffers for all inputs.
            `framedrop`: bool
                Drop video frames if video is out of sync. Enabled by default if the master
                clock (``sync``) is not set to video. Use this option to enable/disable frame
                dropping for all master clock sources.
            `loop`: int
                Loops movie playback <number> times. 0 means forever. Defaults to 1.
            `autoexit`: bool
                If True, the player stops on eof. Defaults to False.
            `lowres`: int
                low resolution decoding, 1-> 1/2 size, 2->1/4 size, defaults to zero.
            `drp`: int
                let decoder reorder pts 0=off 1=on -1=auto. Defaults to 0.
            `genpts`: bool
                Generate missing pts even if it requires parsing future frames, defaults to False.
            `fast`: bool
                Enable non-spec-compliant optimizations, defaults to False.
            `stats`: bool
                Print several playback statistics, in particular show the stream duration,
                the codec parameters, the current position in the stream and the audio/video
                synchronisation drift. Defaults to False.
            `pixel_format`: str
                Sets the pixel format. Note, this sets the format of the input file. For the output
                format see ``out_fmt``.
            `t`: float
                Play only ``t`` seconds of the audio/video. Defaults to the full audio/video.
            `ss`: float
                Seek to pos ``ss`` into the file when starting. Note that in most formats it is not
                possible to seek exactly, so it will seek to the nearest seek point to ``ss``.
                Defaults to the start of the file.
            `sync`: str
                Set the master clock to audio, video, or external (ext). Default is audio.
                The master clock is used to control audio-video synchronization. Most
                media players use audio as master clock, but in some cases (streaming or
                high quality broadcast) it is necessary to change that. Also, setting
                it to video can ensure the reproducibility of timestamps of video frames.
            `acodec, vcodec, and scodec`: str
                Forces a specific audio, video, and/or subtitle decoder. Defaults to None.
            `ast`: str
                Select the desired audio stream. If this option is not specified, the "best" audio
                stream is selected in the program of the already selected video stream.
                See https://ffmpeg.org/ffplay.html#Stream-specifiers-1 for the format.
            `vst`: str
                Select the desired video stream. If this option is not specified, the "best" video
                stream is selected.
                See https://ffmpeg.org/ffplay.html#Stream-specifiers-1 for the format.
            `sst`: str
                Select the desired subtitle stream. If this option is not specified, the "best" audio
                stream is selected in the program of the already selected video or audio stream.
                See https://ffmpeg.org/ffplay.html#Stream-specifiers-1 for the format.
            `an`: bool
                Disable audio. Default to False.
            `vn`: bool
                Disable video. Default to False.
            `sn`: bool
                Disable subtitle. Default to False.
            `f`: str
                Force the format to open the file with. E.g. dshow for webcams on windows.
                See :ref:`dshow-example` for an example. Defaults to none specified.
            `vf`: str or list of strings
                The filtergraph(s) used to filter the video stream. A filtergraph is applied to the
                stream, and must have a single video input and a single video output.
                In the filtergraph, the input is associated to the label in, and the output
                to the label out. See the ffmpeg-filters manual for more information
                about the filtergraph syntax.

                Examples are 'crop=100:100' to crop, 'vflip' to flip horizontally, 'subtitles=filename'
                to overlay subtitles from another media or text file etc. If a list of filters is
                specified, :meth:`select_video_filter` can be used to select the desired filter.

                CONFIG_AVFILTER must be True (the default) when compiling in order to use this.
                Defaults to no filters.
            `af`: str
                Similar to ``vf``. However, unlike ``vf``, ``af`` only accepts a single string
                filter and not a list of filters.
            `x`: int
                The desired width of the output frames returned by :meth:`get_frame`. Accepts the
                same values as the width parameter of :meth:`set_size`.
            `y`: int
                The desired height of the output frames returned by :meth:`get_frame`. Accepts the
                same values as the height parameter of :meth:`set_size`.

                CONFIG_AVFILTER must be True (the default) when compiling in order to use this.
                Defaults to 0.
            `out_fmt`: str
                The desired pixel format for the data returned by :meth:`get_frame`. Accepts
                the same value as :meth:`set_output_pix_fmt` and can be
                one of :attr:`ffpyplayer.tools.pix_fmts`. Defaults to rgb24.
            `autorotate`: bool
                Whether to automatically rotate the video according to presentation metadata.
                Defaults to True.
            `volume`: float
                The default volume. A value between 0.0 - 1.0.
            `find_stream_info`: bool
                Read and decode the streams to fill missing information with heuristics.
                Defaults to True.
            `filter_threads`: int
                The number of filter threads per graph. Defaults to zero
                (determined by the number of available CPUs).

    For example, a simple player:

    .. code-block:: python

        from ffpyplayer.player import MediaPlayer
        player = MediaPlayer(filename)
        while 1:
            frame, val = player.get_frame()
            if val == 'eof':
                break
            elif frame is None:
                time.sleep(0.01)
            else:
                img, t = frame
                print val, t, img.get_pixel_format(), img.get_buffer_size()
                time.sleep(val)
        # which prints
        0.0 0.0 rgb24 (929280, 0, 0, 0)
        0.0 0.0611284 rgb24 (929280, 0, 0, 0)
        0.0411274433136 0.1222568 rgb24 (929280, 0, 0, 0)
        0.122380971909 0.1833852 rgb24 (929280, 0, 0, 0)
        0.121630907059 0.2445136 rgb24 (929280, 0, 0, 0)
        ...

    See also :ref:`examples`.

    .. warning::

        Most of the methods of this class are not thread safe. That is, they
        should not be called from different threads for the same instance
        without protecting them.
    '''

    def __cinit__(self, filename, callback=None, loglevel='trace', ff_opts={},
                  thread_lib='SDL', audio_sink='SDL', lib_opts={}, **kargs):
        cdef unsigned flags
        cdef VideoSettings *settings = &self.settings
        cdef AVPixelFormat out_fmt
        cdef int res, paused
        ff_opts_orig = ff_opts
        ff_opts = self.ff_opts = encode_to_bytes(deepcopy(ff_opts))
        lib_opts = encode_to_bytes(deepcopy(lib_opts))
        kargs = encode_to_bytes(deepcopy(kargs))
        filename = encode_to_bytes(filename)

        self.is_closed = 0
        memset(&self.settings, 0, sizeof(VideoSettings))
        self.ivs = None
        PyEval_InitThreads()

        settings.loglevel = loglevels[loglevel]
        av_dict_set(&settings.sws_dict, b"flags", b"bicubic", 0)
        # set x, or y to -1 to preserve pixel ratio
        settings.screen_width  = ff_opts['x'] if 'x' in ff_opts else 0
        settings.screen_height = ff_opts['y'] if 'y' in ff_opts else 0
        if not CONFIG_AVFILTER and (settings.screen_width or settings.screen_height):
            raise Exception('You can only set the screen size when avfilter is enabled.')
        settings.audio_disable = bool(ff_opts['an']) if 'an' in ff_opts else 0
        settings.video_disable = bool(ff_opts['vn']) if 'vn' in ff_opts else 0
        settings.subtitle_disable = bool(ff_opts['sn']) if 'sn' in ff_opts else 0

        settings.wanted_stream_spec[<int>AVMEDIA_TYPE_AUDIO] = \
        settings.wanted_stream_spec[<int>AVMEDIA_TYPE_VIDEO] = \
        settings.wanted_stream_spec[<int>AVMEDIA_TYPE_SUBTITLE] = NULL

        if 'ast' in ff_opts:
            settings.wanted_stream_spec[<int>AVMEDIA_TYPE_AUDIO] =  ff_opts['ast']
        if 'vst' in ff_opts:
            settings.wanted_stream_spec[<int>AVMEDIA_TYPE_VIDEO] =  ff_opts['vst']
        if 'sst' in ff_opts:
            settings.wanted_stream_spec[<int>AVMEDIA_TYPE_SUBTITLE] =  ff_opts['sst']
        settings.start_time = ff_opts['ss'] * 1000000 if 'ss' in ff_opts else AV_NOPTS_VALUE
        settings.duration = ff_opts['t'] * 1000000 if 't' in ff_opts else AV_NOPTS_VALUE
        settings.autorotate = bool(ff_opts.get('autorotate', 1))
        settings.find_stream_info = bool(ff_opts.get('find_stream_info', 1))
        settings.filter_threads = int(ff_opts.get('filter_threads', 0))
        settings.seek_by_bytes = -1
        settings.file_iformat = NULL
        if 'f' in ff_opts:
            settings.file_iformat = av_find_input_format(ff_opts['f'])
            if settings.file_iformat == NULL:
                raise ValueError('Unknown input format: %s.' % ff_opts['f'])
        if 'pixel_format' in ff_opts:
            av_dict_set(<AVDictionary **>&settings.format_opts, "pixel_format", ff_opts['pixel_format'], 0)
        settings.show_status = bool(ff_opts['stats']) if 'stats' in ff_opts else 0
        settings.fast = bool(ff_opts['fast']) if 'fast' in ff_opts else 0
        settings.genpts = bool(ff_opts['genpts']) if 'genpts' in ff_opts else 0
        settings.decoder_reorder_pts = -1
        if 'drp' in ff_opts:
            val = ff_opts['drp']
            if val != 1 and val != 0 and val != -1:
                raise ValueError('Invalid drp option value.')
            settings.decoder_reorder_pts = val
        settings.lowres = ff_opts['lowres'] if 'lowres' in ff_opts else 0
        settings.av_sync_type = AV_SYNC_AUDIO_MASTER
        settings.audio_volume = av_clip(ff_opts.get('volume', 1) * SDL_MIX_MAXVOLUME, 0, SDL_MIX_MAXVOLUME)
        if 'sync' in ff_opts_orig:
            val = ff_opts_orig['sync']
            if val == 'audio':
                settings.av_sync_type = AV_SYNC_AUDIO_MASTER
            elif val == 'video':
                settings.av_sync_type = AV_SYNC_VIDEO_MASTER
            elif val == 'ext':
                settings.av_sync_type = AV_SYNC_EXTERNAL_CLOCK
            else:
                raise ValueError('Invalid sync option value.')
        settings.autoexit = bool(ff_opts['autoexit']) if 'autoexit' in ff_opts else 0
        settings.loop = ff_opts['loop'] if 'loop' in ff_opts else 1
        settings.framedrop = bool(ff_opts['framedrop']) if 'framedrop' in ff_opts else -1
        # -1 means not infinite, not respected if real time.
        settings.infinite_buffer = 1 if 'infbuf' in ff_opts and ff_opts['infbuf'] else -1

        IF CONFIG_AVFILTER:
            if 'vf' in ff_opts:
                vfilters = ff_opts['vf']
                if isinstance(vfilters, basestring):
                    vfilters = ff_opts['vf'] = [vfilters]
                for vfilt in vfilters:
                    settings.vfilters_list = <const char **>grow_array(
                        settings.vfilters_list, sizeof(settings.vfilters_list[0]),
                        &settings.nb_vfilters, settings.nb_vfilters + 1)
                    settings.vfilters_list[settings.nb_vfilters - 1] = vfilt

            settings.afilters = NULL
            if 'af' in ff_opts:
                settings.afilters = ff_opts['af']
            settings.avfilters = NULL
            if 'avf' in ff_opts:
                settings.avfilters = ff_opts['avf']
        settings.audio_codec_name = NULL
        if 'acodec' in ff_opts:
            settings.audio_codec_name = ff_opts['acodec']
        settings.video_codec_name = NULL
        if 'vcodec' in ff_opts:
            settings.video_codec_name = ff_opts['vcodec']
        settings.subtitle_codec_name = NULL
        if 'scodec' in ff_opts:
            settings.subtitle_codec_name = ff_opts['scodec']
        if 'max_alloc' in ff_opts:
            av_max_alloc(ff_opts['max_alloc'])
        if 'cpuflags' in ff_opts:
            flags = av_get_cpu_flags()
            if av_parse_cpu_caps(&flags, ff_opts['cpuflags']) < 0:
                raise ValueError('Invalid cpuflags option value.')
            av_force_cpu_flags(flags)

        for k, v in lib_opts.iteritems():
            k_new = k.encode('utf8')
            if opt_default(
                    k_new, v, NULL, &settings.sws_dict, &settings.swr_opts,
                    &settings.resample_opts, &settings.format_opts,
                    &self.settings.codec_opts) < 0:
                raise Exception('library option %s: %s not found' % (k, v))

        # filename can start with pipe:
        settings.input_filename = av_strdup(<char *>filename)
        if settings.input_filename == NULL:
            raise MemoryError()
        if thread_lib == 'SDL':
            if not CONFIG_SDL:
                raise Exception('FFPyPlayer extension not compiled with SDL support.')
            self.mt_gen = MTGenerator(SDL_MT)
        elif thread_lib == 'python':
            self.mt_gen = MTGenerator(Py_MT)
        else:
            raise Exception('Thread library parameter not recognized.')

        settings.audio_sdl = audio_sink == 'SDL'
        if audio_sink != 'SDL':
            raise Exception('Audio sink "{}" not recognized'.format(audio_sink))
        if callback is not None and not callable(callback):
            raise Exception('Video sink parameter not recognized.')

        if 'out_fmt' in ff_opts:
            out_fmt = av_get_pix_fmt(ff_opts['out_fmt'])
        else:
            out_fmt = av_get_pix_fmt(b'rgb24')
        if out_fmt == AV_PIX_FMT_NONE:
            raise Exception('Unrecognized output pixel format.')

        if not settings.audio_disable:
            initialize_sdl_aud()

        self.next_image = Image.__new__(Image, no_create=True)
        self.ivs = VideoState(callback)
        paused = ff_opts.get('paused', False)
        with nogil:
            self.ivs.cInit(self.mt_gen, settings, paused, out_fmt)

    def __dealloc__(self):
        self.close_player()

    cpdef close_player(self):
        '''Closes the player and all resources.

        .. warning::

            After calling this method, calling any other class method on this instance may
            result in a crash or program corruption.
        '''
        if self.is_closed:
            return
        self.is_closed = 1

        #XXX: cquit has to be called, otherwise the read_thread never exitsts.
        # probably some circular referencing somewhere (in event_loop)
        if self.ivs:
            with nogil:
                self.ivs.cquit()
        self.ivs = None

        av_dict_free(&self.settings.format_opts)
        av_dict_free(&self.settings.resample_opts)
        av_dict_free(&self.settings.codec_opts)
        av_dict_free(&self.settings.swr_opts)
        av_dict_free(&self.settings.sws_dict)
        IF CONFIG_AVFILTER:
            av_freep(&self.settings.vfilters_list)
        # avformat_network_deinit()
        av_free(self.settings.input_filename)
        # if self.settings.show_status:
        #    av_log(NULL, AV_LOG_INFO, b"\n")
        # SDL_Quit()
        # av_log(NULL, AV_LOG_QUIET, b"")

    def get_frame(self, force_refresh=False, show=True, *args):
        '''Retrieves the next available frame if ready.

        The frame is returned as a :class:`ffpyplayer.pic.Image`. If CONFIG_AVFILTER
        is True when compiling, or if the video pixel format is the same as the
        output pixel format, the Image returned is just a new reference to the internal
        buffers and no copying occurs (see :class:`ffpyplayer.pic.Image`), otherwise
        the buffers are newly created and copied.

        :Parameters:

            `force_refresh`: bool
                If True, a new instance of the last frame will be returned again.
                Defaults to False.
            `show`: bool
                If True a image is returned as normal, if False, no image will be
                returned, even when one is available. Can be useful if we just need
                the timestamps or when ``force_refresh`` to just get the timestamps.
                Defaults to True.

        :returns:

            `A 2-tuple of (frame, val)` where
                `frame`: is None or a 2-tuple
                `val`: is either 'paused', 'eof', or a float

            If ``val`` is either ``'paused'`` or ``'eof'`` then ``frame`` is None.

            Otherwise, if ``frame`` is not None, ``val`` is the realtime time from now
            one should wait before displaying this frame to the user to achieve a play
            rate of 1.0.

            Finally, if ``frame`` is not None then it's a 2-tuple of ``(image, pts)`` where:

                `image`: The :class:`ffpyplayer.pic.Image` instance containing
                    the frame. The size of the image can change because the output
                    can be resized dynamically (see :meth:`set_size`). If `show` was
                    False, it will be None.
                `pts`: The presentation timestamp of this frame. This is the time
                    when the frame should be displayed to the user in video time (i.e.
                    not realtime).

        .. note::

            The audio plays at a normal play rate, independent of when and if
            this function is called. Therefore, 'eof' will only be received when
            the audio is complete, even if all the frames have been read (unless
            audio is disabled or sync is set to video). I.e. a None frame will
            be sent after all the frames have been read until eof.

        For example, playing as soon as frames are read:

        .. code-block:: python

            >>> while 1:
            ...     frame, val = player.get_frame()
            ...     if val == 'eof':
            ...         break
            ...     elif frame is None:
            ...         time.sleep(0.01)
            ...         print 'not ready'
            ...     else:
            ...         img, t = frame
            ...         print val, t, img
            not ready
            0.0 0.0 <ffpyplayer.pic.Image object at 0x023D17B0>
            not ready
            0.0351264476776 0.0611284 <ffpyplayer.pic.Image object at 0x023D1828>
            0.096254825592 0.1222568 <ffpyplayer.pic.Image object at 0x02411800>
            not ready
            0.208511352539 0.1833852 <ffpyplayer.pic.Image object at 0x02411B70>

        vs displaying frames at their proper times:

        .. code-block:: python

            >>> while 1:
            ...     frame, val = player.get_frame()
            ...     if val == 'eof':
            ...         break
            ...     elif frame is None:
            ...         time.sleep(0.01)
            ...         print 'not ready'
            ...     else:
            ...         img, t = frame
            ...         print val, t, img
            ...         time.sleep(val)
            not ready
            0.0 0.0 <ffpyplayer.pic.Image object at 0x02411800>
            not ready
            0.0351274013519 0.0611284 <ffpyplayer.pic.Image object at 0x02411878>
            0.0602538585663 0.1222568 <ffpyplayer.pic.Image object at 0x024118A0>
            0.122507572174 0.1833852 <ffpyplayer.pic.Image object at 0x024118C8>
            ...
            0.0607514381409 1.222568 <ffpyplayer.pic.Image object at 0x02411B70>
            0.0618767738342 1.2836964 <ffpyplayer.pic.Image object at 0x02411B98>
            0.0610010623932 1.3448248 <ffpyplayer.pic.Image object at 0x02411BC0>
            0.0611264705658 1.4059532 <ffpyplayer.pic.Image object at 0x02411BE8>

        Or when the output format is yuv420p:

        .. code-block:: python

            >>> player = MediaPlayer(filename, callback=weakref.ref(callback),
            ... ff_opts={'out_fmt':'yuv420p'})
            >>> while 1:
            ...     frame, val = player.get_frame()
            ...     if val == 'eof':
            ...         break
            ...     elif frame is None:
            ...         time.sleep(0.01)
            ...         print 'not ready'
            ...     else:
            ...         img, t = frame
            ...         print val, t, img.get_pixel_format(), img.get_buffer_size()
            ...         time.sleep(val)
            ...
            0.0 0.0 yuv420p (309760, 77440, 77440, 0)
            0.0361273288727 0.0611284 yuv420p (309760, 77440, 77440, 0)
            0.0502526760101 0.1222568 yuv420p (309760, 77440, 77440, 0)
            0.12150645256 0.1833852 yuv420p (309760, 77440, 77440, 0)
            0.122756242752 0.2445136 yuv420p (309760, 77440, 77440, 0)
        '''
        cdef Image next_image = self.next_image
        cdef int res, f = force_refresh
        cdef int s = show
        cdef double pts, remaining_time

        if not s:
            next_image = None
        with nogil:
            res = self.ivs.video_refresh(next_image, &pts, &remaining_time, f)

        if res == 1:
            return (None, 'paused')
        elif res == 2:
            return (None, 'eof')
        elif res == 3:
            return (None, remaining_time)

        if s:
            self.next_image = Image.__new__(Image, no_create=True)
        return ((next_image, pts), remaining_time)

    def get_metadata(self):
        '''Returns metadata of the file being played.

        :returns:

            dict:
                Media file metadata. e.g. `frame_rate` is reported as a
                numerator and denominator. src and sink video sizes correspond to
                the frame size of the original video, and the frames returned by
                :meth:`get_frame`, respectively. `src_pix_fmt` is the pixel format
                of the original input stream. 'aspect_ratio' is the source to
                display aspect ratio as a numerator and denominator. Duration
                is the file duration and defaults to None until updated.

        :

        .. code-block:: python

            >>> print player.get_metadata()
            {'duration': 71.972, 'sink_vid_size': (0, 0), 'src_vid_size':
             (704, 480), 'frame_rate': (13978, 583),
             'title': 'The Melancholy of Haruhi Suzumiya: Special Ending',
             'src_pix_fmt': 'yuv420p'}

        .. warning::

            The dictionary returned will have default values until the file is
            open and read. Because a second thread is created and used to read
            the file, when the constructor returns the dict might still have
            the default values.

            After the first frame is read, the dictionary entries are correct
            with respect to the file metadata. Alternatively, you can wait
            until the desired parameter is updated from its default value.
            Note, the metadata dict will be updated even if the video is
            paused.

        .. note::

            Some paramteres can change as the streams are manipulated (e.g. the
            frame size and source format parameters).
        '''
        return self.ivs.metadata

    def select_video_filter(self, index=0):
        '''Selects the video filter to use from among the list of filters passed
        with the ff_opts `vf` options.
        '''
        if (self.settings.vfilters_list == NULL or
            index >= self.settings.nb_vfilters or index < 0):
            raise ValueError(index)
        self.ivs.vfilter_idx = index

    def set_volume(self, volume):
        '''Sets the volume of the audio.

        :Parameters:

            `volume`: float
                A value between 0.0 - 1.0.
        '''
        self.settings.audio_volume = av_clip(volume * SDL_MIX_MAXVOLUME, 0, SDL_MIX_MAXVOLUME)
        IF USE_SDL2_MIXER:
            with nogil:
                Mix_Volume(self.ivs.audio_dev, self.settings.audio_volume)

    def get_volume(self):
        '''Returns the volume of the audio.

        :returns:

            `float`: A value between 0.0 - 1.0.
        '''
        return self.settings.audio_volume / <double>SDL_MIX_MAXVOLUME

    def set_mute(self, state):
        '''Mutes or un-mutes the audio.

        :Parameters:

            `state`: bool
                Whether to mute or unmute the audio.
        '''
        self.settings.muted = state

    def get_mute(self):
        '''Returns whether the player is muted.
        '''
        return bool(self.settings.muted)

    def toggle_pause(self):
        '''Toggles the player's pause state.
        '''
        with nogil:
            self.ivs.toggle_pause()

    def set_pause(self, state):
        '''Pauses or un-pauses the file.

        :Parameters:

            `state`: bool
                Whether to pause or un-pause the player.
        '''
        if self.ivs.paused and state or not self.ivs.paused and not state:
            return
        with nogil:
            self.ivs.toggle_pause()

    def get_pause(self):
        '''Returns whether the player is paused.
        '''
        return bool(self.ivs.paused)

    def get_pts(VideoState self):
        '''Returns the elapsed play time.

        :returns:

            `float`:
                The amount of the time that the file has been playing.
                The time is from the clock used for the player (default is audio,
                see ``sync`` options). If the clock is based on video, it should correspond
                with the pts from get_frame.
        '''
        cdef double pos
        cdef int sync_type = self.ivs.get_master_sync_type()
        if (sync_type == AV_SYNC_VIDEO_MASTER and
            self.ivs.video_stream != -1):
            pos = self.ivs.vidclk.get_clock()
        elif (sync_type == AV_SYNC_AUDIO_MASTER and
            self.ivs.audio_stream != -1):
            pos = self.ivs.audclk.get_clock()
        else:
            pos = self.ivs.extclk.get_clock()
        if isnan(pos):
            pos = <double>self.ivs.seek_pos / <double>AV_TIME_BASE
        if (self.ivs.ic.start_time != AV_NOPTS_VALUE and
            pos < self.ivs.ic.start_time / <double>AV_TIME_BASE):
            pos = self.ivs.ic.start_time / <double>AV_TIME_BASE
        return pos

    def set_size(self, int width=-1, int height=-1):
        '''Dynamically sets the size of the frames returned by :meth:`get_frame`.

        :Parameters:

            `width, height`: int
                The width and height of the output frames.
                A value of 0 will set that parameter to the source width/height.
                A value of -1 for one of the parameters, will result in a value of that
                parameter that maintains the original aspect ratio.

        For example:

        .. code-block:: python

            >>> print player.get_frame()[0][0].get_size()
            (704, 480)

            >>> player.set_size(200, 200)
            >>> print player.get_frame()[0][0].get_size()
            (704, 480)
            >>> print player.get_frame()[0][0].get_size()
            (704, 480)
            >>> print player.get_frame()[0][0].get_size()
            (704, 480)
            >>> print player.get_frame()[0][0].get_size()
            (200, 200)

            >>> player.set_size(200, 0)
            >>> print player.get_frame()[0][0].get_size()
            (200, 200)
            >>> print player.get_frame()[0][0].get_size()
            (200, 200)
            >>> print player.get_frame()[0][0].get_size()
            (200, 480)

            >>> player.set_size(200, -1)
            >>> print player.get_frame()[0][0].get_size()
            (200, 480)
            >>> print player.get_frame()[0][0].get_size()
            (200, 480)
            >>> print player.get_frame()[0][0].get_size()
            (200, 136)

        Note, that it takes a few calls to flush the old frames.

        .. note::

            if CONFIG_AVFILTER was False when compiling, this function will raise
            an error.
        '''
        if not CONFIG_AVFILTER and (width or height):
            raise Exception('You can only set the screen size when avfilter is enabled.')
        self.settings.screen_width = width
        self.settings.screen_height = height

    def get_output_pix_fmt(self):
        '''Returns the pixel fmt in which output images are returned when calling
        :attr:`get_frame`.

        You can set the output format by specifying ``out_fmt`` in ``ff_opts``
        when creating this instance. Also, if avfilter is enabled, you can
        change it dynamically with :meth:`set_output_pix_fmt`.

        ::

            >>> print(player.get_output_pix_fmt())
            rgb24
        '''
        return self.ivs.get_out_pix_fmt()

    def set_output_pix_fmt(self, pix_fmt):
        '''Sets the pixel fmt in which output images are returned when calling
        :meth:`get_frame`.

        For example::

            >>> player.set_output_pix_fmt('yuv420p')

        sets the output format to use. This will only take effect on images that
        have not been queued yet so it may take a few calls to :meth:`get_frame`
        to reflect the new pixel format.

        .. note::

            if CONFIG_AVFILTER was False when compiling, this function will raise
            an exception.
        '''
        cdef AVPixelFormat fmt
        cdef bytes pix_fmt_b
        if not CONFIG_AVFILTER:
            raise Exception('You can only change the fmt when avfilter is enabled.')

        pix_fmt_b = pix_fmt.encode('utf8')
        fmt = av_get_pix_fmt(pix_fmt_b)
        if fmt == AV_PIX_FMT_NONE:
            raise Exception('Unrecognized output pixel format {}.'.format(pix_fmt))
        self.ivs.set_out_pix_fmt(fmt)

    # Currently, if a stream is re-opened when the stream was not open before
    # it'l cause some seeking. We can probably remove it by setting a seek flag
    # only for this stream and not for all, provided is not the master clock stream.
    def request_channel(self, stream_type, action='cycle', int requested_stream=-1):
        '''Opens or closes a stream dynamically.

        This function may result in seeking when opening a new stream.

        :Parameters:

            `stream_type`: str
                The stream group on which to operate. Can be one of ``'audio'``,
                ``'video'``, or ``'subtitle'``.
            `action`: str
                The action to perform. Can be one of ``'open'``, ``'close'``, or
                ``'cycle'``. A value of 'cycle' will close the current stream and
                open the next stream in this group.
            `requested_stream`: int
                The stream to open next when ``action`` is ``'cycle'`` or ``'open'``.
                If ``-1``, the next stream will be opened. Otherwise, this stream will
                be attempted to be opened.
        '''

        cdef int stream, old_index
        if stream_type == 'audio':
            stream = AVMEDIA_TYPE_AUDIO
            old_index = self.ivs.audio_stream
        elif stream_type == 'video':
            stream = AVMEDIA_TYPE_VIDEO
            old_index = self.ivs.video_stream
        elif stream_type == 'subtitle':
            stream = AVMEDIA_TYPE_SUBTITLE
            old_index = self.ivs.subtitle_stream
        else:
            raise Exception('Invalid stream type')

        if action == 'cycle' or requested_stream == -1:
            with nogil:
                self.ivs.stream_cycle_channel(stream)
        elif action == 'open':
            if requested_stream < 0 or <unsigned int>requested_stream >= self.ivs.ic.nb_streams:
                raise Exception('Stream number out of range')

            with nogil:
                self.ivs.stream_select_channel(stream, <unsigned int>requested_stream)
        elif action == 'close':
            with nogil:
                self.ivs.stream_component_close(old_index)

    def get_programs(self):
        '''Returns a list of available program IDs.

        ::

            >>> print(player.get_programs())
            [0, 1, 2, 3, 4]
        '''

        cdef list programs = []
        cdef unsigned int i

        i = 0
        while i < self.ivs.ic.nb_programs:
            programs.append(self.ivs.ic.programs[i].id)
            i += 1

        return programs

    def request_program(self, int requested_program):
        '''Opens video, audio and subtitle streams associated with a program.

        This closes all current streams and opens the first video, audio and
        subtitle streams found in the program.

        :Parameters:

            `requested_program`: int
                The program ID.
        '''

        with nogil:
            self.ivs.stream_select_program(requested_program)

    def seek(self, pts, relative=True, seek_by_bytes='auto', accurate=True):
        '''Seeks in the current streams.

        Seeks to the desired timepoint as close as possible while not exceeding
        that time.

        :Parameters:

            `pts`: float
                The timestamp to seek to (in seconds).
            `relative`: bool
                Whether the pts parameter is interpreted as the
                time offset from the current stream position (can be negative if True).
            `seek_by_bytes`: bool or ``'auto'``
                Whether we seek based on the position in bytes or in time. In some
                instances seeking by bytes may be more accurate (don't ask me which).
                If ``'auto'``, the default, it is automatically decided based on
                the media.
            `accurate`: bool
                Whether to do finer seeking if we didn't seek directly to the requested
                frame. This is likely to be slower because after the coarser seek,
                we have to walk through the frames until the requested frame is
                reached. If paused or we reached eof this is ignored. Defaults to True.

        For example:

        .. code-block:: python

            >>> print player.get_frame()[0][1]
            1016.392

            >>> player.seek(200., accurate=False)
            >>> player.get_frame()
            >>> print player.get_frame()[0][1]
            1249.876

            >>> player.seek(200, relative=False, accurate=False)
            >>> player.get_frame()
            >>> print player.get_frame()[0][1]
            198.49

        Note that it may take a few calls to get new frames after seeking.
        '''
        cdef int c_relative = relative
        cdef int c_accurate = accurate
        cdef int c_seek_by_bytes
        cdef double c_pts = pts
        if seek_by_bytes == 'auto':
            c_seek_by_bytes = self.settings.seek_by_bytes > 0
        else:
            c_seek_by_bytes = seek_by_bytes

        with nogil:
            self._seek(c_pts, c_relative, c_seek_by_bytes, c_accurate)

    def seek_to_chapter(self, increment, accurate=True):
        '''Seeks forwards or backwards (if negative) by ``increment`` chapters.

        :Parameters:

            `increment`: int
                The number of chapters to seek forwards or backwards to.
            `accurate`: bool
                Whether to do finer seeking if we didn't seek directly to the requested
                frame. This is likely to be slower because after the coarser seek,
                we have to walk through the frames until the requested frame is
                reached. Defaults to True.
        '''
        cdef int c_increment = increment
        cdef int c_accurate = accurate
        with nogil:
            self.ivs.seek_chapter(c_increment, c_accurate)

    cdef void _seek(self, double pts, int relative, int seek_by_bytes, int accurate) nogil:
        '''Returns the actual pos where we wanted to seek to.
        '''
        cdef double incr, pos
        cdef int64_t t_pos = 0, t_rel = 0

        if relative:
            incr = pts
            if seek_by_bytes:
                pos = -1
                if self.ivs.video_stream >= 0:
                    pos = self.ivs.pictq.frame_queue_last_pos()
                if pos < 0 and self.ivs.audio_stream >= 0:
                    pos = self.ivs.sampq.frame_queue_last_pos()
                if pos < 0:
                    pos = avio_tell(self.ivs.ic.pb)
                if self.ivs.ic.bit_rate:
                    incr *= self.ivs.ic.bit_rate / 8.0
                else:
                    incr *= 180000.0
                pos += incr
                t_pos = <int64_t>pos
                t_rel = <int64_t>incr
            else:
                pos = self.ivs.get_master_clock()
                if isnan(pos):
                    # seek_pos might never have been set
                    pos = <double>self.ivs.seek_pos / <double>AV_TIME_BASE
                pos += incr
                if self.ivs.ic.start_time != AV_NOPTS_VALUE and pos < self.ivs.ic.start_time / <double>AV_TIME_BASE:
                    pos = self.ivs.ic.start_time / <double>AV_TIME_BASE
                t_pos = <int64_t>(pos * AV_TIME_BASE)
                t_rel = <int64_t>(incr * AV_TIME_BASE)
        else:
            pos = pts
            if seek_by_bytes:
                if self.ivs.ic.bit_rate:
                    pos *= self.ivs.ic.bit_rate / 8.0
                else:
                    pos *= 180000.0
                t_pos = <int64_t>pos
            else:
                t_pos = <int64_t>(pos * AV_TIME_BASE)
                if self.ivs.ic.start_time != AV_NOPTS_VALUE and t_pos < self.ivs.ic.start_time:
                    t_pos = self.ivs.ic.start_time
        self.ivs.stream_seek(t_pos, t_rel, seek_by_bytes, accurate)


================================================
FILE: ffpyplayer/player/queue.pxd
================================================

include '../includes/ffmpeg.pxi'

from ffpyplayer.threading cimport MTGenerator, MTCond

cdef struct MyAVPacketList:
    AVPacket *pkt
    int serial


cdef class FFPacketQueue(object):
    cdef:
        MTGenerator mt_gen
        AVFifoBuffer *pkt_list
        int nb_packets
        int size
        int64_t duration
        int abort_request
        int serial
        MTCond cond

    cdef int packet_queue_put_private(FFPacketQueue self, AVPacket *pkt) nogil except 1
    cdef int packet_queue_put_nullpacket(FFPacketQueue self, AVPacket *pkt, int stream_index) nogil except 1
    cdef int packet_queue_put(FFPacketQueue self, AVPacket *pkt) nogil except 1
    cdef int packet_queue_flush(FFPacketQueue self) nogil except 1
    cdef int packet_queue_abort(FFPacketQueue self) nogil except 1
    cdef int packet_queue_start(FFPacketQueue self) nogil except 1
    # return < 0 if aborted, 0 if no packet and > 0 if packet.
    cdef int packet_queue_get(FFPacketQueue self, AVPacket *pkt, int block, int *serial) nogil except 0


================================================
FILE: ffpyplayer/player/queue.pyx
================================================

__all__ = ('FFPacketQueue', )

include '../includes/ff_consts.pxi'

from ffpyplayer.threading cimport MTGenerator, MTMutex, MTCond


cdef class FFPacketQueue(object):

    def __cinit__(FFPacketQueue self, MTGenerator mt_gen):
        self.mt_gen = mt_gen
        self.pkt_list = NULL
        self.nb_packets = self.size = self.serial = 0
        self.duration = 0

        self.pkt_list = av_fifo_alloc(sizeof(MyAVPacketList))
        if self.pkt_list == NULL:
            raise MemoryError

        self.cond = MTCond.__new__(MTCond, mt_gen.mt_src)
        self.abort_request = 1

    def __dealloc__(self):
        if self.cond is None:
            return
        with nogil:
            self.packet_queue_flush()
            av_fifo_freep(&self.pkt_list)

    cdef int packet_queue_put_private(FFPacketQueue self, AVPacket *pkt) nogil except 1:
        cdef MyAVPacketList pkt1
        cdef int ret

        if self.abort_request:
            return -1

        if av_fifo_space(self.pkt_list) < sizeof(pkt1):
            ret = av_fifo_grow(self.pkt_list, sizeof(pkt1))
            if ret < 0:
                return ret

        pkt1.pkt = pkt
        pkt1.serial = self.serial

        ret = av_fifo_generic_write(self.pkt_list, &pkt1, sizeof(pkt1), NULL)
        if ret < 0:
            return ret
        self.nb_packets += 1
        self.size += pkt1.pkt.size + sizeof(pkt1)
        self.duration += pkt1.pkt.duration
        #/* XXX: should duplicate packet data in DV case */
        self.cond.cond_signal()
        return 0

    cdef int packet_queue_put(FFPacketQueue self, AVPacket *pkt) nogil except 1:
        cdef AVPacket *pkt1 = av_packet_alloc()
        cdef int ret = -1

        if pkt1 == NULL:
            av_packet_unref(pkt)
            return -1
        av_packet_move_ref(pkt1, pkt)

        self.cond.lock()
        ret = self.packet_queue_put_private(pkt1)
        self.cond.unlock()

        if ret < 0:
            av_packet_free(&pkt1)

        return ret

    cdef int packet_queue_put_nullpacket(FFPacketQueue self, AVPacket *pkt, int stream_index) nogil except 1:
        pkt.stream_index = stream_index
        return self.packet_queue_put(pkt)

    cdef int packet_queue_flush(FFPacketQueue self) nogil except 1:
        cdef MyAVPacketList pkt1
        cdef int ret = 0

        self.cond.lock()
        while av_fifo_size(self.pkt_list) >= sizeof(pkt1):
            ret = av_fifo_generic_read(self.pkt_list, &pkt1, sizeof(pkt1), NULL)
            if ret < 0:
                break
            av_packet_free(&pkt1.pkt)

        self.nb_packets = 0
        self.size = 0
        self.duration = 0
        self.serial += 1
        self.cond.unlock()
        return ret

    cdef int packet_queue_abort(FFPacketQueue self) nogil except 1:
        self.cond.lock()
        self.abort_request = 1
        self.cond.cond_signal()
        self.cond.unlock()
        return 0

    cdef int packet_queue_start(FFPacketQueue self) nogil except 1:
        self.cond.lock()
        self.abort_request = 0
        self.serial += 1
        self.cond.unlock()
        return 0

    # return < 0 if aborted, 0 if no packet and > 0 if packet.
    cdef int packet_queue_get(FFPacketQueue self, AVPacket *pkt, int block, int *serial) nogil except 0:
        cdef MyAVPacketList pkt1
        cdef int ret = 0

        self.cond.lock()

        while True:
            if self.abort_request:
                ret = -1
                break

            if av_fifo_size(self.pkt_list) >= sizeof(pkt1):
                ret = av_fifo_generic_read(self.pkt_list, &pkt1, sizeof(pkt1), NULL)
                if ret < 0:
                    break
                self.nb_packets -= 1
                self.size -= pkt1.pkt.size + sizeof(pkt1)
                self.duration -= pkt1.pkt.duration

                av_packet_move_ref(pkt, pkt1.pkt)
                if serial != NULL:
                    serial[0] = pkt1.serial
                av_packet_free(&pkt1.pkt)
                ret = 1
                break
            elif not block:
                ret = -1
                break
            else:
                self.cond.cond_wait()
        self.cond.unlock()
        return ret


================================================
FILE: ffpyplayer/tests/__init__.py
================================================


================================================
FILE: ffpyplayer/tests/common.py
================================================

__all__ = ('get_media', )

from os import environ
from os.path import join, abspath, dirname, exists, pathsep

from ffpyplayer.tools import set_loglevel, set_log_callback
import logging

set_log_callback(logger=logging, default_only=True)
set_loglevel('trace')


def get_media(fname):
    if exists(fname):
        return abspath(fname)

    root = dirname(__file__)
    if exists(join(root, fname)):
        return join(root, fname)

    ex = abspath(join(root, '../../examples', fname))
    if exists(ex):
        return ex

    if 'FFPYPLAYER_TEST_DIRS' in environ:
        for d in environ['FFPYPLAYER_TEST_DIRS'].split(pathsep):
            d = d.strip()
            if not d:
                continue

            if exists(join(d, fname)):
                return join(d, fname)

    raise IOError("{} doesn't exist".format(fname))


================================================
FILE: ffpyplayer/tests/test_pic.py
================================================

def create_image(size):
    from ffpyplayer.pic import Image

    w, h = size
    size = w * h * 3
    buf = bytearray([int(x * 255 / size) for x in range(size)])
    return Image(plane_buffers=[buf], pix_fmt='rgb24', size=(w, h))


def test_pic():
    from ffpyplayer.pic import SWScale

    size = w, h = 500, 100
    img = create_image(size)

    assert not img.is_ref()
    assert img.get_size() == (w, h)

    sws = SWScale(w, h, img.get_pixel_format(), ofmt='yuv420p')

    img2 = sws.scale(img)
    assert img2.get_pixel_format() == 'yuv420p'
    planes = img2.to_bytearray()
    assert list(map(len, planes)) == [w * h, w * h / 4, w * h / 4, 0]


================================================
FILE: ffpyplayer/tests/test_play.py
================================================

def test_play():
    from .common import get_media
    from ffpyplayer.player import MediaPlayer
    import time

    error = [None, ]

    def callback(selector, value):
        if selector.endswith('error'):
            error[0] = selector, value

    # only video
    ff_opts = {'an': True, 'sync': 'video'}
    player = MediaPlayer(
        get_media('dw11222.mp4'), callback=callback, ff_opts=ff_opts)

    i = 0
    while not error[0]:
        frame, val = player.get_frame()
        if val == 'eof':
            break
        elif frame is None:
            time.sleep(0.001)
        else:
            img, t = frame
            i += 1

    player.close_player()
    if error[0]:
        raise Exception('{}: {}'.format(*error[0]))

    assert i == 6077


================================================
FILE: ffpyplayer/tests/test_write.py
================================================
import time
import math
import pytest


def get_image(w, h):
    from ffpyplayer.pic import Image

    # Construct images
    size = w * h * 3
    buf = bytearray([int(x * 255 / size) for x in range(size)])
    img = Image(plane_buffers=[buf], pix_fmt='rgb24', size=(w, h))
    return img


def get_gray_image_with_val(w, h, val):
    from ffpyplayer.pic import Image

    # Construct images
    size = w * h
    buf = bytearray([int(val)] * size)
    img = Image(plane_buffers=[buf], pix_fmt='gray', size=(w, h))
    return img


def verify_frames(filename, timestamps, frame_vals=None):
    from ffpyplayer.player import MediaPlayer
    error = [None, ]

    def callback(selector, value):
        if selector.endswith('error'):
            error[0] = selector, value

    player = MediaPlayer(filename, callback=callback)

    read_timestamps = set()
    try:
        i = -1
        while not error[0]:
            frame, val = player.get_frame()
            if val == 'eof':
                break
            if val == 'paused':
                raise ValueError('Got paused')
            elif frame is None:
                time.sleep(0.01)
            else:
                img, t = frame
                print(i, t)
                if i < 0:
                    i += 1
                    continue

                print(i, t, timestamps[i])
                read_timestamps.add(t)
                assert math.isclose(t, timestamps[i], rel_tol=.1)

                if frame_vals:
                    assert frame_vals[i] == img.to_bytearray()[0][0]

                i += 1
    finally:
        player.close_player()

    if error[0] is not None:
        raise Exception('{}: {}'.format(*error[0]))

    assert len(timestamps) - 1 == i
    assert len(read_timestamps) == i


def test_write_streams(tmp_path):
    from ffpyplayer.writer import MediaWriter
    from ffpyplayer.tools import get_supported_pixfmts, get_supported_framerates
    from ffpyplayer.pic import Image
    from ffpyplayer.tools import get_codecs
    fname = str(tmp_path / 'test_video.avi')

    lib_opts = {}
    codec = 'rawvideo'
    if 'libx264' in get_codecs(encode=True, video=True):
        codec = 'libx264'
        lib_opts = {'preset': 'slow', 'crf': '22'}

    w, h = 640, 480
    out_opts = {
        'pix_fmt_in': 'rgb24', 'width_in': w, 'height_in': h,
        'codec': codec, 'frame_rate': (5, 1)}

    metadata = {
        'title': 'Singing in the sun', 'author': 'Rat',
        'genre': 'Animal sounds'}
    writer = MediaWriter(fname, [out_opts] * 2, fmt='mp4',
                         width_out=w/2, height_out=h/2, pix_fmt_out='yuv420p',
                         lib_opts=lib_opts, metadata=metadata)

    # Construct images
    size = w * h * 3
    buf = bytearray([int(x * 255 / size) for x in range(size)])
    img = Image(plane_buffers=[buf], pix_fmt='rgb24', size=(w, h))

    buf = bytearray([int((size - x) * 255 / size) for x in range(size)])
    img2 = Image(plane_buffers=[buf], pix_fmt='rgb24', size=(w, h))

    for i in range(20):
        writer.write_frame(img=img, pts=i / 5., stream=0)  # stream 1
        writer.write_frame(img=img2, pts=i / 5., stream=1)  # stream 2
    writer.close()


@pytest.mark.parametrize('fmt', [('mkv', 'matroska'), ('avi', 'avi')])
def test_write_correct_frame_rate(tmp_path, fmt):
    from ffpyplayer.writer import MediaWriter
    fname = str(tmp_path / 'test_frame.') + fmt[0]

    w, h = 64, 64
    out_opts = {
        'pix_fmt_in': 'gray', 'width_in': w, 'height_in': h,
        'codec': 'rawvideo', 'frame_rate': (2997, 100)}

    writer = MediaWriter(fname, [out_opts], fmt=fmt[1])

    timestamps = []
    image_vals = []
    for i in range(20):
        timestamps.append(i / 29.97)
        image_vals.append(i * 5)

        writer.write_frame(
            img=get_gray_image_with_val(w, h, i * 5), pts=i / 29.97, stream=0)
    writer.close()

    verify_frames(fname, timestamps, image_vals)


@pytest.mark.parametrize('fmt', [('mkv', 'matroska'), ('avi', 'avi')])
def test_write_larger_than_frame_rate(tmp_path, fmt):
    from ffpyplayer.writer import MediaWriter
    fname = str(tmp_path / 'test_frame.') + fmt[0]

    w, h = 64, 64
    out_opts = {
        'pix_fmt_in': 'gray', 'width_in': w, 'height_in': h,
        'codec': 'rawvideo', 'frame_rate': (15, 1)}

    writer = MediaWriter(fname, [out_opts], fmt=fmt[1])

    timestamps = []
    image_vals = []
    for i in range(20):
        timestamps.append(i)
        image_vals.append(i * 5)

        writer.write_frame(
            img=get_gray_image_with_val(w, h, i * 5), pts=i, stream=0)
    writer.close()

    verify_frames(fname, timestamps, image_vals)


@pytest.mark.parametrize('fmt', [('mkv', 'matroska'), ('avi', 'avi')])
def test_write_smaller_than_frame_rate(tmp_path, fmt):
    from ffpyplayer.writer import MediaWriter
    fname = str(tmp_path / 'test_frame.') + fmt[0]

    w, h = 64, 64
    out_opts = {
        'pix_fmt_in': 'rgb24', 'width_in': w, 'height_in': h,
        'codec': 'rawvideo', 'pix_fmt_out': 'yuv420p',
        'frame_rate': (30, 1)}

    writer = MediaWriter(fname, [out_opts], fmt=fmt[1])
    img = get_image(w, h)

    if fmt[0] == 'avi':
        with pytest.raises(Exception):
            for i in range(20):
                writer.write_frame(img=img, pts=i / 300, stream=0)
    else:
        for i in range(20):
            writer.write_frame(img=img, pts=i / 300, stream=0)
    writer.close()


================================================
FILE: ffpyplayer/threading.pxd
================================================

include "includes/ffmpeg.pxi"


cdef enum MT_lib:
    SDL_MT,
    Py_MT

cdef class MTMutex(object):
    cdef MT_lib lib
    cdef void* mutex

    cdef int lock(MTMutex self) nogil except 2
    cdef int _lock_py(MTMutex self) nogil except 2
    cdef int unlock(MTMutex self) nogil except 2
    cdef int _unlock_py(MTMutex self) nogil except 2

cdef class MTCond(object):
    cdef MT_lib lib
    cdef MTMutex mutex
    cdef void *cond

    cdef int lock(MTCond self) nogil except 2
    cdef int unlock(MTCond self) nogil except 2
    cdef int cond_signal(MTCond self) nogil except 2
    cdef int _cond_signal_py(MTCond self) nogil except 2
    cdef int cond_wait(MTCond self) nogil except 2
    cdef int _cond_wait_py(MTCond self) nogil except 2
    cdef int cond_wait_timeout(MTCond self, uint32_t val) nogil except 2
    cdef int _cond_wait_timeout_py(MTCond self, uint32_t val) nogil except 2

cdef class MTThread(object):
    cdef MT_lib lib
    cdef void* thread

    cdef int create_thread(MTThread self, int_void_func func, const char *thread_name, void *arg) nogil except 2
    cdef int wait_thread(MTThread self, int *status) nogil except 2


cdef class MTGenerator(object):
    cdef MT_lib mt_src

    cdef int delay(MTGenerator self, int delay) nogil except 2
    cdef lockmgr_func get_lockmgr(MTGenerator self) nogil

cdef lockmgr_func get_lib_lockmgr(MT_lib lib) nogil


================================================
FILE: ffpyplayer/threading.pyx
================================================

__all__ = ('MTGenerator', )

include "includes/ff_consts.pxi"
include "includes/inline_funcs.pxi"

from cpython.ref cimport PyObject

cdef extern from "Python.h":
    void Py_INCREF(PyObject *)
    void Py_XINCREF(PyObject *)
    void Py_DECREF(PyObject *)

ctypedef int (*int_cls_method)(void *) nogil

import traceback

cdef int sdl_initialized = 0
def initialize_sdl():
    '''Initializes sdl. Must be called before anything can be used.
    It is automatically called by the modules that use SDL.
    '''
    global sdl_initialized
    if sdl_initialized:
        return
    if SDL_Init(0):
        raise ValueError('Could not initialize SDL - %s' % SDL_GetError())
    sdl_initialized = 1
initialize_sdl()


cdef class MTMutex(object):

    def __cinit__(MTMutex self, MT_lib lib):
        self.lib = lib
        self.mutex = NULL
        if lib == SDL_MT:
            self.mutex = SDL_CreateMutex()
            if self.mutex == NULL:
                raise Exception('Cannot create mutex.')
        elif lib == Py_MT:
            import threading
            mutex = threading.Lock()
            self.mutex = <PyObject *>mutex
            Py_INCREF(<PyObject *>self.mutex)

    def __dealloc__(MTMutex self):
        if self.lib == SDL_MT:
            if self.mutex != NULL:
                SDL_DestroyMutex(<SDL_mutex *>self.mutex)
        elif self.lib == Py_MT:
            Py_DECREF(<PyObject *>self.mutex)

    cdef int lock(MTMutex self) nogil except 2:
        if self.lib == SDL_MT:
            return SDL_mutexP(<SDL_mutex *>self.mutex)
        elif self.lib == Py_MT:
            return self._lock_py()

    cdef int _lock_py(MTMutex self) nogil except 2:
        with gil:
            return not (<object>self.mutex).acquire()

    cdef int unlock(MTMutex self) nogil except 2:
        if self.lib == SDL_MT:
            return SDL_mutexV(<SDL_mutex *>self.mutex)
        elif self.lib == Py_MT:
            return self._unlock_py()

    cdef int _unlock_py(MTMutex self) nogil except 2:
        with gil:
            (<object>self.mutex).release()
        return 0

cdef class MTCond(object):

    def __cinit__(MTCond self, MT_lib lib):
        self.lib = lib
        self.mutex = MTMutex.__new__(MTMutex, lib)
        self.cond = NULL
        if self.lib == SDL_MT:
            self.cond = SDL_CreateCond()
            if self.cond == NULL:
                raise Exception('Cannot create condition.')
        elif self.lib == Py_MT:
            import threading
            cond = threading.Condition(<object>self.mutex.mutex)
            self.cond = <PyObject *>cond
            Py_INCREF(<PyObject *>self.cond)

    def __dealloc__(MTCond self):
        if self.lib == SDL_MT:
            if self.cond != NULL:
                SDL_DestroyCond(<SDL_cond *>self.cond)
        elif self.lib == Py_MT:
            Py_DECREF(<PyObject *>self.cond)

    cdef int lock(MTCond self) nogil except 2:
        self.mutex.lock()

    cdef int unlock(MTCond self) nogil except 2:
        self.mutex.unlock()

    cdef int cond_signal(MTCond self) nogil except 2:
        if self.lib == SDL_MT:
            return SDL_CondSignal(<SDL_cond *>self.cond)
        elif self.lib == Py_MT:
            return self._cond_signal_py()

    cdef int _cond_signal_py(MTCond self) nogil except 2:
        with gil:
            (<object>self.cond).notify()
        return 0

    cdef int cond_wait(MTCond self) nogil except 2:
        if self.lib == SDL_MT:
            return SDL_CondWait(<SDL_cond *>self.cond, <SDL_mutex *>self.mutex.mutex)
        elif self.lib == Py_MT:
            return self._cond_wait_py()

    cdef int _cond_wait_py(MTCond self) nogil except 2:
        with gil:
            (<object>self.cond).wait()
        return 0

    cdef int cond_wait_timeout(MTCond self, uint32_t val) nogil except 2:
        if self.lib == SDL_MT:
            return SDL_CondWaitTimeout(<SDL_cond *>self.cond, <SDL_mutex *>self.mutex.mutex, val)
        elif self.lib == Py_MT:
            return self._cond_wait_timeout_py(val)

    cdef int _cond_wait_timeout_py(MTCond self, uint32_t val) nogil except 2:
        with gil:
            (<object>self.cond).wait(val / 1000.)
        return 0

def enterance_func(target_func, target_arg):
    return (<int_void_func><uintptr_t>target_func)(<void *><uintptr_t>target_arg)

cdef class MTThread(object):

    def __cinit__(MTThread self, MT_lib lib):
        self.lib = lib
        self.thread = NULL

    def __dealloc__(MTThread self):
        if self.lib == Py_MT and self.thread != NULL:
            Py_DECREF(<PyObject *>self.thread)

    cdef int create_thread(MTThread self, int_void_func func, const char *thread_name, void *arg) nogil except 2:
        if self.lib == SDL_MT:
            with gil:
                self.thread = SDL_CreateThread(func, thread_name, arg)
                if self.thread == NULL:
                    raise Exception('Cannot create thread.')
        elif self.lib == Py_MT:
            with gil:
                import threading
                thread = threading.Thread(group=None, target=enterance_func,
                                          name=None, args=(<uintptr_t>func, <uintptr_t>arg), kwargs={})
                self.thread = <PyObject *>thread
                Py_INCREF(<PyObject *>self.thread)
                thread.start()
        return 0

    cdef int wait_thread(MTThread self, int *status) nogil except 2:
        if self.lib == SDL_MT:
            if self.thread != NULL:
                SDL_WaitThread(<SDL_Thread *>self.thread, status)
        elif self.lib == Py_MT:
            with gil:
                (<object>self.thread).join()
                if status != NULL:
                    status[0] = 0
        return 0


cdef int_cls_method mutex_lock = <int_cls_method>MTMutex.lock
cdef int_cls_method mutex_release = <int_cls_method>MTMutex.unlock

cdef int _SDL_lockmgr_py(void ** mtx, int op) with gil:
    cdef bytes msg
    cdef int res = 1
    cdef MTMutex mutex

    try:
        if op == FF_LOCK_CREATE:
            mutex = MTMutex.__new__(MTMutex, SDL_MT)
            Py_INCREF(<PyObject *>mutex)
            mtx[0] = <PyObject *>mutex
            res = 0
        elif op == FF_LOCK_DESTROY:
            if mtx[0] != NULL:
                Py_DECREF(<PyObject *>mtx[0])
            res = 0
    except:
        msg = traceback.format_exc().encode('utf8')
        av_log(NULL, AV_LOG_ERROR, '%s', msg)
    return res

cdef int SDL_lockmgr(void ** mtx, int op) nogil:
    if op == FF_LOCK_OBTAIN:
        return not not mutex_lock(mtx[0])
    elif op == FF_LOCK_RELEASE:
        return not not mutex_release(mtx[0])
    else:
        return _SDL_lockmgr_py(mtx, op)

cdef int Py_lockmgr(void ** mtx, int op) with gil:
    cdef int res = 1
    cdef bytes msg
    cdef MTMutex mutex

    try:
        if op == FF_LOCK_CREATE:
            mutex = MTMutex.__new__(MTMutex, Py_MT)
            Py_INCREF(<PyObject *>mutex)
            mtx[0] = <PyObject *>mutex
            res = 0
        elif op == FF_LOCK_OBTAIN:
            mutex = <MTMutex>mtx[0]
            res = not not mutex.lock() # force it to 0, or 1
        elif op == FF_LOCK_RELEASE:
            mutex = <MTMutex>mtx[0]
            res = not not mutex.unlock()
        elif op == FF_LOCK_DESTROY:
            if mtx[0] != NULL:
                Py_DECREF(<PyObject *>mtx[0])
            res = 0
    except:
        msg = traceback.format_exc().encode('utf8')
        av_log(NULL, AV_LOG_ERROR, '%s', msg)
        res = 1
    return res


cdef lockmgr_func get_lib_lockmgr(MT_lib lib) nogil:
    if lib == SDL_MT:
        return SDL_lockmgr
    elif lib == Py_MT:
        return Py_lockmgr


cdef class MTGenerator(object):

    def __cinit__(MTGenerator self, MT_lib mt_src, **kwargs):
        self.mt_src = mt_src

    cdef int delay(MTGenerator self, int delay) nogil except 2:
        if self.mt_src == SDL_MT:
            SDL_Delay(delay)
        elif self.mt_src == Py_MT:
            with gil:
                import time
                time.sleep(delay / 1000.)
        return 0

    cdef lockmgr_func get_lockmgr(MTGenerator self) nogil:
        return get_lib_lockmgr(self.mt_src)


================================================
FILE: ffpyplayer/tools.pyx
================================================
'''
FFmpeg tools
============

Module for manipulating and finding information of FFmpeg formats, codecs,
devices, pixel formats and more.
'''

__all__ = (
    'initialize_sdl_aud', 'loglevels', 'codecs_enc', 'codecs_dec', 'pix_fmts',
    'formats_in', 'formats_out', 'set_log_callback', 'get_log_callback',
    'set_loglevel', 'get_loglevel', 'get_codecs', 'get_fmts',
    'get_format_codec',
    'get_supported_framerates', 'get_supported_pixfmts', 'get_best_pix_fmt',
    'emit_library_info',
    'list_dshow_devices', 'encode_to_bytes', 'decode_to_unicode',
    'convert_to_str', 'list_dshow_opts')

include "includes/ffmpeg.pxi"
include "includes/inline_funcs.pxi"

cdef extern from "stdlib.h" nogil:
    void *malloc(size_t)
    void free(void *)

from ffpyplayer.threading cimport Py_MT, MTMutex, get_lib_lockmgr, SDL_MT
import ffpyplayer.threading  # for sdl init
import re
import sys
from functools import partial

cdef int sdl_aud_initialized = 0
def initialize_sdl_aud():
    '''Initializes sdl audio subsystem. Must be called before audio can be used.
    It is automatically called by the modules that use SDL audio.
    '''
    global sdl_aud_initialized
    if sdl_aud_initialized:
        return

    # Try to work around an occasional ALSA buffer underflow issue when the
    # period size is NPOT due to ALSA resampling by forcing the buffer size.
    if not SDL_getenv("SDL_AUDIO_ALSA_SET_BUFFER_SIZE"):
        SDL_setenv("SDL_AUDIO_ALSA_SET_BUFFER_SIZE", "1", 0)

    if SDL_InitSubSystem(SDL_INIT_AUDIO):
        raise ValueError('Could not initialize SDL audio - %s' % SDL_GetError())
    sdl_aud_initialized = 1


cdef int ffmpeg_initialized = 0
def _initialize_ffmpeg():
    '''Initializes ffmpeg libraries. Must be called before anything can be used.
    Called automatically when importing this module.
    '''
    global ffmpeg_initialized
    if not ffmpeg_initialized:
        av_log_set_flags(AV_LOG_SKIP_REPEATED)
        IF CONFIG_AVDEVICE:
            avdevice_register_all()
        avformat_network_init()
        ffmpeg_initialized = 1
_initialize_ffmpeg()


def _get_item0(x):
    return x[0]

'see http://ffmpeg.org/ffmpeg.html for log levels'
loglevels = {
    "quiet": AV_LOG_QUIET, "panic": AV_LOG_PANIC, "fatal": AV_LOG_FATAL,
    "error": AV_LOG_ERROR, "warning": AV_LOG_WARNING, "info": AV_LOG_INFO,
    "verbose": AV_LOG_VERBOSE, "debug": AV_LOG_DEBUG, "trace": AV_LOG_TRACE}
'''A dictionary with all the available ffmpeg log levels. The keys are the loglevels
and the values are their ffmpeg values. The lower the value, the more important
the log. Note, this is ooposite python where the higher the level the more important
the log.
'''
_loglevel_inverse = {v:k for k, v in loglevels.iteritems()}

cdef object _log_callback = None
cdef MTMutex _log_mutex= MTMutex(SDL_MT)
cdef int log_level = AV_LOG_WARNING
cdef int print_prefix = 1

cdef void gil_call_callback(char *line, int level):
    cdef object callback
    callback = _log_callback
    if callback is None:
        return
    callback(tcode(line), _loglevel_inverse[level])

cdef void call_callback(char *line, int level) nogil:
    with gil:
        gil_call_callback(line, level)

cdef void _log_callback_func(void* ptr, int level, const char* fmt, va_list vl) noexcept nogil:
    cdef char line[2048]
    if fmt == NULL or level > log_level:
        return

    av_log_format_line(ptr, level, fmt, vl, line, sizeof(line), &print_prefix)
    call_callback(line, level)

def _logger_callback(logger_dict, message, level):
    message = message.strip()
    if message:
        logger_dict[level]('FFPyPlayer: {}'.format(message))

def set_log_callback(object callback=None, logger=None, int default_only=False):
    '''Sets a callback to be used by ffmpeg when emitting logs.
    This function is thread safe.

    See also :func:`set_loglevel`.

    :Parameters:

        `callback`: callable or None
            A function which will be called with strings to be printed. It takes
            two parameters: ``message`` and ``level``. ``message`` is the string
            to be printed. ``level`` is the log level of the string and is one
            of the keys of the :attr:`loglevels` dict. If ``callback`` and ``logger``
            are None, the default ffmpeg log callback will be set, which prints to stderr.
            Defaults to None.
        `logger`: a python logger object or None
            If ``callback`` is None and this is not None, this logger object's
            ``critical``, ``error``, ``warning``, ``info``, ``debug``, and ``trace``
            methods will be called directly to forward ffmpeg's log outputs.

            .. note::

                If the logger doesn't have a trace method, the trace output will be
                redirected to debug. However, the trace level outputs a lot of logs.

        `default_only`: bool
            If True, when ``callback`` or ``logger`` are not ``None``, they
            will only be set if a callback or logger has not already been set.

    :returns:

        The previous callback set (None, if it has not been set).

    >>> from ffpyplayer.tools import set_log_callback, loglevels
    >>> loglevel_emit = 'error' # This and worse errors will be emitted.
    >>> def log_callback(message, level):
    ...     message = message.strip()
    ...     if message and loglevels[level] <= loglevels[loglevel_emit]:
    ...         print '%s: %s' %(level, message.strip())
    >>> set_log_callback(log_callback)
    ...
    >>> set_log_callback(None)
    '''
    global _log_callback
    if callback is not None and not callable(callback):
        raise Exception('Log callback needs to be callable.')

    if callback is None and logger is not None:
        logger_dict = {
            'quiet': logger.critical, 'panic': logger.critical,
            'fatal': logger.critical, 'error': logger.error, 'warning': logger.warning,
            'info': logger.info, 'verbose': logger.debug, 'debug': logger.debug,
            'trace': getattr(logger, 'trace', logger.debug)}
        callback = partial(_logger_callback, logger_dict)

    _log_mutex.lock()
    old_callback = _log_callback
    if callback is None:
        av_log_set_callback(&av_log_default_callback)
        _log_callback = None
    elif not default_only or old_callback is None:
        av_log_set_callback(&_log_callback_func)
        _log_callback = callback
    _log_mutex.unlock()
    return old_callback

def get_log_callback():
    '''Returns the last log callback set, or None if it has not been set.
    See :func:`set_log_callback`.
    '''
    _log_mutex.lock()
    old_callback = _log_callback
    _log_mutex.unlock()
    return old_callback


def set_loglevel(loglevel):
    '''This sets the global FFmpeg log level. less important log levels are filtered
    and not passsed on to the logger or callback set by :func:`set_log_callback`.
    It also set the loglevel of FFmpeg if not callback or logger is set.

    The global log level, if not set, defaults to ``'warning'``.

    :Parameters:

        `loglevel`: str
            The log level. Can be one of the keys of :attr:`loglevels`.
    '''
    cdef int level
    global log_level
    if loglevel not in loglevels:
        raise ValueError('Invalid loglevel {}'.format(loglevel))
    level = loglevels[loglevel]
    _log_mutex.lock()
    av_log_set_level(level)
    log_level = level
    _log_mutex.unlock()
set_loglevel(_loglevel_inverse[log_level])

def get_loglevel():
    '''Returns the log level set with :func:`set_loglevel`, or the default level if not
    set. It is one of the keys of :attr:`loglevels`.
    '''
    cdef int level
    _log_mutex.lock()
    level = log_level
    _log_mutex.unlock()
    return _loglevel_inverse[level]


cpdef get_codecs(
        int encode=False, int decode=False, int video=False, int audio=False,
        int data=False, int subtitle=False, int attachment=False, other=False):
    '''Returns a list of codecs (e.g. h264) that is available by ffpyplayer for
    encoding or decoding and matches the media types, e.g. video or audio.

    The parameters determine which codecs is included in the result. The parameters
    all default to False.

    :Parameters:

        `encode`: bool
            If True, includes the encoding codecs in the result. Defaults to False.
        `decode`: bool
            If True, includes the decoding codecs in the result. Defaults to False.
        `video`: bool
            If True, includes the video codecs in the result. Defaults to False.
        `audio`: bool
            If True, includes the audio codecs in the result. Defaults to False.
        `data`: bool
            If True, includes the (continuous) side data codecs in the result. Defaults to False.
        `subtitle`: bool
            If True, includes the subtitle codecs in the result. Defaults to False.
        `attachment`: bool
            If True, includes the (sparse) data attachment codecs in the result. Defaults to False.
        `other`: bool
            If True, returns all the codec media types.

    :returns:

        A sorted list of the matching codec names.
    '''
    cdef list codecs = []
    cdef AVCodec *codec = NULL
    cdef void *iter_codec = NULL
    codec = av_codec_iterate(&iter_codec)

    while codec != NULL:
        if ((encode and av_codec_is_encoder(codec) or
             decode and av_codec_is_decoder(codec)) and
            (video and codec.type == AVMEDIA_TYPE_VIDEO or
             audio and codec.type == AVMEDIA_TYPE_AUDIO or
             data and codec.type == AVMEDIA_TYPE_DATA or
             subtitle and codec.type == AVMEDIA_TYPE_SUBTITLE or
             attachment and codec.type == AVMEDIA_TYPE_ATTACHMENT or
             other)):
            codecs.append(tcode(codec.name))
        codec = av_codec_iterate(&iter_codec)
    return sorted(codecs)

codecs_enc = get_codecs(encode=True, video=True)
'''A list of all the codecs available for encoding video. '''
codecs_dec = get_codecs(decode=True, video=True, audio=True)
'''A list of all the codecs available for decoding video and audio. '''

cdef list list_pixfmts():
    cdef list fmts = []
    cdef const AVPixFmtDescriptor *desc = NULL
    desc = av_pix_fmt_desc_next(desc)

    while desc != NULL:
        fmts.append(tcode(desc.name))
        desc = av_pix_fmt_desc_next(desc)
    return sorted(fmts)

pix_fmts = list_pixfmts()
'''A list of all the pixel formats available to ffmpeg. '''

cpdef get_fmts(int input=False, int output=False):
    '''Returns the formats available in FFmpeg.

    :Parameters:

        `input`: bool
            If True, also includes input formats in the result. Defaults to False
        `output`: bool
            If True, also includes output formats in the result. Defaults to False

    :returns:

        A 3-tuple of 3 lists, ``formats``, ``full_names``, and ``extensions``.
        Each of the three lists are of identical length.

        `formats`: list
            A list of the names of the formats.
        `full_names`: list
            A list of the corresponding human readable names for each of the
            formats. Can be the empty string if none is available.
        `extensions`: list
            A list of the extensions associated with the corresponding formats.
            Each item is a (possibly empty) list of extensions names.
    '''
    cdef list fmts = [], full_names = [], exts = []
    cdef AVOutputFormat *ofmt = NULL
    cdef AVInputFormat *ifmt = NULL
    cdef void *ifmt_opaque = NULL
    cdef void *ofmt_opaque = NULL
    cdef object names, full_name, ext

    if output:
        ofmt = av_muxer_iterate(&ofmt_opaque)
        while ofmt != NULL:
            if ofmt.name != NULL:
                names = tcode(ofmt.name).split(',')
                full_name = tcode(ofmt.long_name) if ofmt.long_name != NULL else ''
                ext = tcode(ofmt.extensions).split(',') if ofmt.extensions != NULL else []

                fmts.extend(names)
                full_names.extend([full_name, ] * len(names))
                exts.extend([ext, ] * len(names))
            ofmt = av_muxer_iterate(&ofmt_opaque)

    if input:
        ifmt = av_demuxer_iterate(&ifmt_opaque)
        while ifmt != NULL:
            if ifmt.name != NULL:
                names = tcode(ifmt.name).split(',')
                full_name = tcode(ifmt.long_name) if ifmt.long_name != NULL else ''
                ext = tcode(ifmt.extensions).split(',') if ifmt.extensions != NULL else []

                fmts.extend(names)
                full_names.extend([full_name, ] * len(names))
                exts.extend([ext, ] * len(names))
            ifmt = av_demuxer_iterate(&ifmt_opaque)

    exts = [x for (y, x) in sorted(zip(fmts, exts), key=_get_item0)]
    full_names = [x for (y, x) in sorted(zip(fmts, full_names), key=_get_item0)]
    fmts = sorted(fmts)
    return fmts, full_names, exts

formats_in = get_fmts(input=True)[0]
'''A list of all the formats (e.g. file formats) available for reading. '''
formats_out = get_fmts(output=True)[0]
'''A list of all the formats (e.g. file formats) available for writing. '''

def get_format_codec(filename=None, fmt=None):
    '''Returns the best codec associated with the file format. The format
    can be provided using either ``filename`` or ``fmt``.

    :Parameters:
        `filename`: str or None
            The output filename. If provided, the extension of the filename
            is used to guess the format.
        `fmt`: str or None.
            The format to use. Can be one of :attr:`ffpyplayer.tools.formats_out`.

    :returns:

        str:
            The name from :attr:`ffpyplayer.tools.codecs_enc`
            of the best codec that can be used with this format.

    For example:

    .. code-block:: python

        >>> get_format_codecs('test.png')
        'mjpeg'
        >>> get_format_codecs('test.jpg')
        'mjpeg'
        >>> get_format_codecs('test.mkv')
        'libx264'
        >>> get_format_codecs(fmt='h264')
        'libx264'
    '''
    cdef int res
    cdef char *format_name = NULL
    cdef char *name = NULL
    cdef const AVCodec *codec_desc = NULL
    cdef AVFormatContext *fmt_ctx = NULL
    cdef char msg[256]
    cdef AVCodecID codec_id

    if fmt:
        fmt = fmt.encode('utf8')
        format_name = fmt
    if filename:
        filename = filename.encode('utf8')
        name = filename

    res = avformat_alloc_output_context2(&fmt_ctx, NULL, format_name, name)
    if res < 0 or fmt_ctx == NULL or fmt_ctx.oformat == NULL:
        raise Exception('Failed to find format: ' + tcode(emsg(res, msg, sizeof(msg))))

    codec_id = fmt_ctx.oformat.video_codec
    codec_desc = avcodec_find_encoder(codec_id)
    if codec_desc == NULL:
        raise Exception('Default codec not found for format')
    return tcode(codec_desc.name)


def get_supported_framerates(codec_name, rate=()):
    '''Returns the supported frame rates for encoding codecs. If a desired rate is
    provided, it also returns the closest valid rate.

    :Parameters:

        `codec_name`: str
            The name of a encoding codec.
        `rate`: 2-tuple of ints, or empty tuple.
            If provided, a 2-tuple where the first element is the numerator,
            and the second the denominator of the frame rate we wish to use. E.g.
            (2997, 100) means a frame rate of 29.97.

    :returns:

        (list of 2-tuples, or empty list):
            If there are no restrictions on the frame rate (i.e. all rates are valid)
            it returns a empty list, otherwise it returns a list with the valid
            frame rates. If `rate` is provided and there are restrictions on the frame
            rates, the closest frame rate is the zero'th element in the list.

    For example:

    .. code-block:: python

        >>> print get_supported_framerates('mpeg1video')
        [(24000, 1001), (24, 1), (25, 1), (30000, 1001), (30, 1), (50, 1),
        (60000, 1001), (60, 1), (15, 1), (5, 1), (10, 1), (12, 1), (15, 1)]

        >>> print get_supported_framerates('mpeg1video', (2997, 100))
        [(30000, 1001), (24000, 1001), (24, 1), (25, 1), (30, 1), (50, 1),
        (60000, 1001), (60, 1), (15, 1), (5, 1), (10, 1), (12, 1), (15, 1)]
    '''
    cdef AVRational rate_struct
    cdef list rate_list = []
    cdef int i = 0
    cdef bytes name = codec_name if isinstance(codec_name, bytes) else codec_name.encode('utf8')
    cdef AVCodec *codec = avcodec_find_encoder_by_name(name)
    if codec == NULL:
        raise Exception('Encoder codec %s not available.' % codec_name)
    if codec.supported_framerates == NULL:
        return rate_list

    while codec.supported_framerates[i].den:
        rate_list.append((codec.supported_framerates[i].num, codec.supported_framerates[i].den))
        i += 1
    if rate:
        rate_struct.num, rate_struct.den = rate
        i = av_find_nearest_q_idx(rate_struct, codec.supported_framerates)
        rate = rate_list[i]
        del rate_list[i]
        rate_list.insert(0, rate)
    return rate_list

def get_supported_pixfmts(codec_name, pix_fmt=''):
    '''Returns the supported pixel formats for encoding codecs. If a desired format
    is provided, it also returns the closest format (i.e. the format with minimum
    conversion loss).

    :Parameters:

        `codec_name`: str
            The name of a encoding codec.
        `pix_fmt`: str
            If not empty, the name of a pixel format we wish to use with this codec,
            e.g. 'rgb24'.

    :returns:

        (list of pixel formats, or empty list):
            If there are no restrictions on the pixel formats (i.e. all the formats
            are valid) it returns a empty list, otherwise it returns a list with the
            valid formats. If pix_fmt is not empty and there are restrictions to the
            formats, the closest format which results in the minimum loss when converting
            will be returned as the zero'th element in the list.

    For example:

    .. code-block:: python

        >>> print get_supported_pixfmts('ffv1')
        ['yuv420p', 'yuva420p', 'yuva422p', 'yuv444p', 'yuva444p', 'yuv440p', ...
        'gray16le', 'gray', 'gbrp9le', 'gbrp10le', 'gbrp12le', 'gbrp14le']

        >>> print get_supported_pixfmts('ffv1', 'gray')
        ['gray', 'yuv420p', 'yuva420p', 'yuva422p', 'yuv444p', 'yuva444p', ...
        'gray16le', 'gbrp9le', 'gbrp10le', 'gbrp12le', 'gbrp14le']
    '''
    cdef AVPixelFormat fmt
    cdef bytes name = codec_name if isinstance(codec_name, bytes) else codec_name.encode('utf8')
    cdef bytes fmt_b = pix_fmt if isinstance(pix_fmt, bytes) else pix_fmt.encode('utf8')
    cdef list fmt_list = []
    cdef int i = 0, loss = 0, has_alpha = 0
    cdef AVCodec *codec = avcodec_find_encoder_by_name(name)
    if codec == NULL:
        raise Exception('Encoder codec %s not available.' % codec_name)
    if pix_fmt and av_get_pix_fmt(fmt_b) == AV_PIX_FMT_NONE:
        raise Exception('Pixel format not recognized.')
    if codec.pix_fmts == NULL:
        return fmt_list

    while codec.pix_fmts[i] != AV_PIX_FMT_NONE:
        fmt_list.append(tcode(av_get_pix_fmt_name(codec.pix_fmts[i])))
        i += 1
    if pix_fmt:
        # XXX: fix this to check if NULL (although kinda already checked above)
        has_alpha = av_pix_fmt_desc_get(av_get_pix_fmt(fmt_b)).nb_components % 2 == 0
        fmt = avcodec_find_best_pix_fmt_of_list(codec.pix_fmts, av_get_pix_fmt(fmt_b),
                                                has_alpha, &loss)
        i = fmt_list.index(tcode(av_get_pix_fmt_name(fmt)))
        pix = fmt_list[i]
        del fmt_list[i]
        fmt_list.insert(0, pix)
    return fmt_list

def get_best_pix_fmt(pix_fmt, pix_fmts):
    '''Returns the best pixel format with the least conversion loss from the
    original pixel format, given a list of potential pixel formats.

    :Parameters:

        `pix_fmt`: str
            The name of a original pixel format.
        `pix_fmts`: list-type of strings
            A list of possible pixel formats from which the best will be chosen.

    :returns:

        The pixel format with the least conversion loss.

    .. note::

        The returned pixel format seems to be somewhat sensitive to the order
        of the input pixel formats. Higher quality pixel formats should therefore
        be at the beginning of the list.


    For example:

    .. code-block:: python

        >>> get_best_pix_fmt('yuv420p', ['rgb24', 'rgba', 'yuv444p', 'gray'])
        'rgb24'
        >>> get_best_pix_fmt('gray', ['rgb24', 'rgba', 'yuv444p', 'gray'])
        'gray'
        >>> get_best_pix_fmt('rgb8', ['rgb24', 'yuv420p', 'rgba', 'yuv444p', 'gray'])
        'rgb24'
    '''
    cdef AVPixelFormat fmt, fmt_src
    cdef bytes fmt_src_b = pix_fmt if isinstance(pix_fmt, bytes) else pix_fmt.encode('utf8')
    cdef bytes fmt_b
    cdef int i = 0, loss = 0, has_alpha = 0
    cdef AVPixelFormat *fmts = NULL

    if not pix_fmt or not pix_fmts:
        raise ValueError('Invalid arguments {}, {}'.format(pix_fmt, pix_fmts))
    fmt_src = av_get_pix_fmt(fmt_src_b)
    if fmt_src == AV_PIX_FMT_NONE:
        raise Exception('Pixel format {} not recognized.'.format(pix_fmt))

    fmts = <AVPixelFormat *>malloc(sizeof(AVPixelFormat) * (len(pix_fmts) + 1))
    if fmts == NULL:
        raise MemoryError()

    try:
        fmts[len(pix_fmts)] = AV_PIX_FMT_NONE

        for i, fmt_s in enumerate(pix_fmts):
            fmt_b = fmt_s if isinstance(fmt_s, bytes) else fmt_s.encode('utf8')
            fmts[i] = av_get_pix_fmt(fmt_b)
            if fmts[i] == AV_PIX_FMT_NONE:
                raise Exception('Pixel format {} not recognized.'.format(fmt_s))

        has_alpha = av_pix_fmt_desc_get(fmt_src).nb_components % 2 == 0
        fmt = avcodec_find_best_pix_fmt_of_list(fmts, fmt_src, has_alpha, &loss)
    finally:
        free(fmts)

    return tcode(av_get_pix_fmt_name(fmt))

def emit_library_info():
    '''Prints to the ffmpeg log all the ffmpeg library's versions and configure
    options.
    '''
    print_all_libs_info(INDENT|SHOW_CONFIG,  AV_LOG_INFO)
    print_all_libs_info(INDENT|SHOW_VERSION, AV_LOG_INFO)

def _dshow_log_callback(log, message, level):
    message = message.encode('utf8')

    if not log:
        log.append((message, level))
        return

    last_msg, last_level = log[-1]
    if last_level == level:
        log[-1] = last_msg + message, level
    else:
        log.append((message, level))


cpdef int list_dshow_opts(list log, bytes stream, bytes option) except 1:
    cdef AVFormatContext *fmt = NULL
    cdef AVDictionary* opts = NULL
    cdef AVInputFormat *ifmt
    cdef object old_callback
    cdef int level
    cdef list temp_log = []
    cdef bytes item
    global log_level

    ifmt = av_find_input_format(b"dshow")
    if ifmt == NULL:
        raise Exception('Direct show not found.')

    av_dict_set(&opts, option, b"true", 0)
    _log_mutex.lock()
    old_callback = set_log_callback(partial(_dshow_log_callback, temp_log))
    level = log_level

    av_log_set_level(AV_LOG_TRACE)
    log_level = AV_LOG_TRACE
    avformat_open_input(&fmt, stream, ifmt, &opts)

    av_log_set_level(level)
    log_level = level
    set_log_callback(old_callback)

    _log_mutex.unlock()
    avformat_close_input(&fmt)
    av_dict_free(&opts)

    for item, l in temp_log:
        for line in item.splitlines():
            log.append((line, l))
    return 0

def list_dshow_devices():
    '''Returns a list of the dshow devices available.

    :returns:

        `3-tuple`: A 3-tuple, of (`video`, `audio`, `names`)

            `video`: dict
                A dict of all the direct show **video** devices. The keys
                of the dict are the unique names of the available direct show devices. The values
                are a list of the available configurations for that device. Each
                element in the list has the following format:
                ``(pix_fmt, codec_fmt, (frame_width, frame_height), (min_framerate, max_framerate))``
            `audio`: dict
                A dict of all the direct show **audio** devices. The keys
                of the dict are the unique names of the available direct show devices. The values
                are a list of the available configurations for that device. Each
                element in the list has the following format:
                ``((min_num_channels, min_num_channels), (min_bits, max_bits), (min_rate, max_rate))``.
            `names`: dict
                A dict mapping the unique names of the video and audio devices to
                a more human friendly (possibly non-unique) name. Either of these
                names can be used when opening the device. However, if using the non-unique
                name, it's not guarenteed which of the devices sharing the name will be opened.


    For example:

    .. code-block:: python

        >>> from ffpyplayer.player import MediaPlayer
        >>> from ffpyplayer.tools import list_dshow_devices
        >>> import time, weakref
        >>> dev = list_dshow_devices()
        >>> print dev
        ({'@device_pnp_...223196\\global': [('bgr24', '', (160, 120), (5, 30)),
        ('bgr24', '', (176, 144), (5, 30)), ('bgr24', '', (320, 176), (5, 30)),
        ('bgr24', '', (320, 240), (5, 30)), ('bgr24', '', (352, 288), (5, 30)),
        ...
        ('yuv420p', '', (320, 240), (5, 30)), ('yuv420p', '', (352, 288), (5, 30))],
        '@device_pnp_...223196\\global': [('bgr24', '', (160, 120), (30, 30)),
        ...
        ('yuyv422', '', (352, 288), (30, 30)),
        ('yuyv422', '', (640, 480), (30, 30))]},
        {'@device_cm_...2- HD Webcam C615)': [((1, 2), (8, 16), (11025, 44100))],
        '@device_cm_...HD Webcam C615)': [((1, 2), (8, 16), (11025, 44100))]},
        {'@device_cm_...- HD Webcam C615)': 'Microphone (2- HD Webcam C615)',
         '@device_cm_...2- HD Webcam C615)': 'Microphone (3- HD Webcam C615)',
        ...
         '@device_pnp...223196\\global': 'HD Webcam C615',
         '@device_pnp...223196\\global': 'Laptop Integrated Webcam'})

    See :ref:`dshow-example` for a full example.
    '''
    cdef list res = []
    cdef dict video = {}, audio = {}, curr = None
    cdef object last
    cdef bytes msg, msg2
    cdef dict name_map = {}

    # list devices
    list_dshow_opts(res, b'dummy', b'list_devices')
    # primary dev name
    pname = re.compile(' *\[dshow *@ *[\w]+\] *"(.+)" *\\((video|audio)\\) *')
    # alternate dev name
    apname = re.compile(' *\[dshow *@ *[\w]+\] *Alternative name *"(.+)" *')
    m = None
    for msg, level in res:
        message = msg.decode('utf8')

        # do we match a primary name - i.e. next device
        m_temp = pname.match(message)
        if m_temp:
            m = m_temp
            curr = audio if m.group(2) == 'audio' else video
            curr[m.group(1)] = []
            name_map[m.group(1)] = m.group(1)
            continue

        m_temp = apname.match(message)
        # if we match alternate name and already have primary, then we're adding it
        if m_temp and m:
            curr[m_temp.group(1)] = []
            name_map[m_temp.group(1)] = m.group(1)
            del curr[m.group(1)]
            del name_map[m.group(1)]
        else:
            msg2 = message.encode('utf8')
            av_log(NULL, loglevels[level], '%s', msg2)

        m = None

    # list video devices options
    vid_opts = re.compile(' *\[dshow *@ *[\w]+\] +(pixel_format|vcodec)=([\w]+) +min +s=\d+x\d+ +fps=(\d+)\
 +max +s=(\d+)x(\d+) +fps=(\d+).*')
    pheader1 = re.compile(' *\[dshow *@ *[\w]+\] *(?:Pin|Selecting pin) (?:"Capture"|"Output"|Capture|Output).*')
    pheader2 = re.compile(' *\[dshow *@ *[\w]+\] *DirectShow (?:video|audio) (?:only )?device options.*')
    for video_stream in video:
        res = []
        list_dshow_opts(res, ("video=%s" % video_stream).encode('utf8'), b'list_options')

        for msg, level in res:
            message = msg.decode('utf8')
            opts = vid_opts.match(message)

            if not opts:
                if not pheader1.match(message) and not pheader2.match(message):
                    av_log(NULL, loglevels[level], '%s', msg)
                continue

            g1, g2, g3, g4, g5, g6 = opts.groups()
            if g1 == 'pixel_format':
                item = g2, "", (int(g4), int(g5)), (int(g3), int(g6))
            else:
                item = "", g2, (int(g4), int(g5)), (int(g3), int(g6))

            if item not in video[video_stream]:
                video[video_stream].append(item)

        video[video_stream] = sorted(video[video_stream])

    # list audio devices options
    paud_opts = re.compile(' *\[dshow *@ *[\w]+\] +ch= *(\d+), +bits= *(\d+),\
 +rate= *(\d+).*')
    for audio_stream in audio:
        res = []
        list_dshow_opts(res, ("audio=%s" % audio_stream).encode('utf8'), b'list_options')
        for msg, level in res:
            message = msg.decode('utf8')
            mopts = paud_opts.match(message)

            if mopts:
                opts = (int(mopts.group(1)), int(mopts.group(2)), int(mopts.group(3)))
                if opts not in audio[audio_stream]:
                    audio[audio_stream].append(opts)
            elif (not pheader1.match(message)) and (not pheader2.match(message)):
                av_log(NULL, loglevels[level], '%s', msg)
        audio[audio_stream] = sorted(audio[audio_stream])

    return video, audio, name_map


cdef object encode_text(object item, int encode):
    if isinstance(item, basestring):
        if encode:
            return item.encode('utf8')
        return item.decode('utf8')

    if isinstance(item, dict):
        for k, v in item.items():
            item[k] = encode_text(v, encode)
        return item

    try:
        iter(item)
    except TypeError:
        return item

    return item.__class__((encode_text(i, encode) for i in item))

def encode_to_bytes(item):
    '''Takes the item and walks it recursively whether it's a string, int, iterable,
    etc. and encodes all the strings to utf-8.

    :Parameters:

        `item`: anything
            The object to be walked and encoded.

    :returns:

        An object identical to the ``item``, but with all strings encoded to utf-8.
    '''
    return encode_text(item, 1)

def decode_to_unicode(item):
    '''Takes the item and walks it recursively whether it's a string, int, iterable,
    etc. and encodes all the strings to utf-8.

    :Parameters:

        `item`: anything
            The object to be walked and encoded.

    :returns:

        An object identical to the ``item``, but with all strings encoded to utf-8.
    '''
    return encode_text(item, 0)

def convert_to_str(item):
    '''Takes the item and walks it recursively whether it's a string, int, iterable,
    etc. and encodes all the strings to utf-8.

    :Parameters:

        `item`: anything
            The object to be walked and encoded.

    :returns:

        An object identical to the ``item``, but with all strings encoded to utf-8.
    '''
    return encode_text(item, False)


================================================
FILE: ffpyplayer/writer.pxd
================================================
include 'includes/ffmpeg.pxi'


cdef class MediaWriter(object):
    cdef AVFormatContext *fmt_ctx
    cdef MediaStream *streams
    cdef int n_streams
    cdef list config
    cdef AVDictionary *format_opts
    cdef int64_t total_size
    cdef int closed

    cpdef close(self)
    cdef void clean_up(MediaWriter self) nogil


cdef struct MediaStream:
    # pointer to the stream to which we're adding frames.
    AVStream *av_stream
    int index
    AVCodec *codec
    AVCodecContext *codec_ctx
    # codec used to encode video
    AVCodecID codec_id
    # the size of the frame passed in
    int width_in
    int width_out
    # the size of the frame actually written to disk
    int height_in
    int height_out
    # The denominator of the frame rate of the stream
    int den
    # The numerator of the frame rate of the stream
    int num
    # the pixel format of the frame passed in
    AVPixelFormat pix_fmt_in
    # the pixel format of the frame actually written to disk
    # if it's -1 (AV_PIX_FMT_NONE) then input will be used. '''
    AVPixelFormat pix_fmt_out

    # The frame in which the final image to be written to disk is held, when we
    # need to convert.
    AVFrame *av_frame
    SwsContext *sws_ctx
    int count
    int64_t pts
    int sync_fmt

    AVDictionary *codec_opts


================================================
FILE: ffpyplayer/writer.pyx
================================================
'''
FFmpeg based media writer
=========================

A FFmpeg based python media writer. See :class:`MediaWriter` for details.
Currently writes only video.
'''

__all__ = ('MediaWriter', )

include "includes/inline_funcs.pxi"

cdef extern from "string.h" nogil:
    void *memset(void *, int, size_t)

cdef extern from "stdlib.h" nogil:
    void *malloc(size_t)
    void free(void *)

cdef extern from "math.h" nogil:
    double floor(double)

cdef extern from "errno.h" nogil:
    int ENOENT
    int EAGAIN

from ffpyplayer.pic cimport Image

import ffpyplayer.tools  # required to init ffmpeg
from ffpyplayer.tools import encode_to_bytes, convert_to_str
from copy import deepcopy
from ffpyplayer.tools import get_supported_framerates, get_supported_pixfmts

DEF VSYNC_PASSTHROUGH = 0
DEF VSYNC_CFR = 1
DEF VSYNC_VFR = 2
DEF VSYNC_DROP = 0xff

cdef int AV_ENOENT = ENOENT if ENOENT < 0 else -ENOENT
cdef int AV_EAGAIN = EAGAIN if EAGAIN < 0 else -EAGAIN


cdef class MediaWriter(object):
    '''An FFmpeg based media writer class. Currently only supports video.

    With this class one can write images frames stored in many different pixel
    formats into a multi-stream video file using :meth:`write_frame`. All FFmpeg
    codecs and pixel formats are supported.

    :Parameters:

        `filename`: str
            The filename of the media file to create. Will be encoded using utf8
            berfore passing to FFmpeg.
        `streams`: list of dicts
            A list of streams to create in the file. ``streams``
            is a list of dicts, where each dict configures the corresponding stream.
            The keywords listed below are available. One can also specify default
            values for the keywords for all streams using ``kwargs``. Keywords also
            found in ``streams`` will overwrite those in ``kwargs``:

                `pix_fmt_in`: str
                    The pixel format of the :class:`~ffpyplayer.pic.Image`
                    to be passed to :meth:`write_frame` for this stream. Can be one of
                    :attr:`ffpyplayer.tools.pix_fmts`.
                `width_in`: int
                    The width of the :class:`ffpyplayer.pic.Image` that will be
                    passed to :meth:`write_frame` for this stream.
                `height_in`: int
                    The height of the :class:`ffpyplayer.pic.Image` that will be
                    passed to :meth:`write_frame` for this stream.
                `pix_fmt_out`: str
                    The pixel format in which frames will be
                    written to the file for this stream. Can be one of
                    :attr:`ffpyplayer.tools.pix_fmts`. Defaults to ``pix_fmt_in``
                    if not provided. Not every pixel format is supported for each
                    encoding codec, see :func:`~ffpyplayer.tools.get_supported_pixfmts`
                    for which pixel formats are supported for ``codec``.
                `width_out`: int
                    The width at which frames will be written to the file for this
                    stream. Defaults to ``width_in`` if not provided.
                `height_out`: int
                    The height at which frames will be written to the file for this
                    stream. Defaults to ``height_in`` if not provided.
                `codec`: str
                    The codec used to write the frames to the file. Can be one of
                    the encoding codecs in :attr:`ffpyplayer.tools.codecs_enc`.

                    If not provided, it defaults to the default best codec for the format
                    provided in ``fmt`` or guessed from the ``filename``.
                    See :func:`ffpyplayer.tools.get_format_codecs`
                `frame_rate`: 2-tuple of ints
                    A 2-tuple of ints representing the frame rate to be used when writing
                    the file. The first element is the numerator, while the second is the
                    denuminator of a ratio describing the rate. E.g. (2997, 100) describes
                    29.97 fps.

                    The timestamps of the frames written using :meth:`write_frame` do
                    not necessarily need to be multiples of the frame rate because they might be
                    forced to matching timestamps if required. Not every frame rate is
                    supported for each encoding codec, see
                    :func:`ffpyplayer.tools.get_supported_framerates` for which frame
                    rates are supported for ``codec``.

        `fmt`: str
            The format to use for the output. Can be one of
            :attr:`ffpyplayer.tools.formats_out`. Defaults to empty string.
            If not provided, ``filename`` will be used determine the format,
            otherwise this arg will be used.
        `lib_opts`: dict or list of dicts
            A dictionary of options that will be passed
            to the ffmpeg libraries, codecs, sws, and formats when opening them.
            This accepts most of the options that can be passed to ffmpeg libraries.
            See below for examples. Both the keywords and values must be strings.
            It can be passed a dict in which case it'll be applied to all the streams
            or a list containing a dict for each stream.
        `metadata`: dict or list of dicts
            Metadata that will be written to the streams, if
            supported by the stream. See below for examples. Both the keywords and
            values must be strings. It can be passed a dict in which case it'll be
            applied to all the streams or a list containing a dict for each stream.
            If (these) metadata is not supported, it will silently fail to write them.
        `overwrite`: bool
            Whether we should overwrite an existing file.
            If False, an error will be raised if the file already exists. If True,
            the file will be overwritten if it exists.
        `**kwargs`:
            Accepts default values for all ``streams`` which will be used if these
            keywords are not provided for any stream.

    See :ref:`write-simple` and :ref:`write-h264` for examples.
    '''

    def __cinit__(self, filename, streams, fmt='', lib_opts={}, metadata={},
                  overwrite=False, **kwargs):
        cdef int res = 0, n = len(streams), r
        cdef char *format_name = NULL
        cdef char msg[256]
        cdef MediaStream *s
        cdef AVDictionaryEntry *dict_temp = NULL
        cdef bytes msg2
        cdef const AVCodec *codec_desc

        filename = encode_to_bytes(filename)
        streams = encode_to_bytes(deepcopy(streams))
        if fmt:
            fmt = fmt.encode('utf8')
        lib_opts = encode_to_bytes(deepcopy(lib_opts))
        metadata = encode_to_bytes(deepcopy(metadata))
        kwargs = encode_to_bytes(deepcopy(kwargs))

        self.total_size = 0
        self.closed = 0
        self.format_opts = NULL
        if fmt:
            format_name = fmt
        if not n:
            raise Exception('Streams parameters not provided.')
        conf = [deepcopy(kwargs) for i in streams]
        for r in range(n):
            conf[r].update(streams[r])
        self.config = conf

        self.fmt_ctx = NULL
        res = avformat_alloc_output_context2(&self.fmt_ctx, NULL, format_name, filename)
        if res < 0 or self.fmt_ctx == NULL:
            raise Exception('Failed to create format context: ' + tcode(emsg(res, msg, sizeof(msg))))
        self.streams = <MediaStream *>malloc(n * sizeof(MediaStream))
        if self.streams == NULL:
            self.clean_up()
            raise MemoryError()
        s = self.streams
        self.n_streams = n
        memset(s, 0, n * sizeof(MediaStream))
        if isinstance(lib_opts, dict):
            lib_opts = [lib_opts, ] * n
        elif len(lib_opts) == 1:
            lib_opts = lib_opts * n
        if isinstance(metadata, dict):
            metadata = [metadata, ] * n
        elif len(metadata) == 1:
            metadata = metadata * n

        for r in range(n):
            s[r].codec_opts = NULL
            config = conf[r]
            if 'pix_fmt_out' not in config or not config['pix_fmt_out']:
                config['pix_fmt_out'] = config['pix_fmt_in']
            if 'width_out' not in config or not config['width_out']:
                config['width_out'] = config['width_in']
            if 'height_out' not in config or not config['height_out']:
                config['height_out'] = config['height_in']
            if 'codec' not in config or not config['codec']:
                s[r].codec_id = self.fmt_ctx.oformat.video_codec
                codec_desc = avcodec_find_encoder(s[r].codec_id)
                if codec_desc == NULL:
                    raise Exception('Default codec not found for output file.')
                config['codec'] = codec_desc.name
            else:
                codec_desc = avcodec_find_encoder_by_name(config['codec'])
                if codec_desc == NULL:
                    self.clean_up()
                    raise Exception('Encoder codec %s not available.' % config['codec'])
                s[r].codec_id = codec_desc.id
            s[r].width_in = config['width_in']
            s[r].width_out = config['width_out']
            s[r].height_in = config['height_in']
            s[r].height_out = config['height_out']
            s[r].num, s[r].den = config['frame_rate']
            if av_get_pix_fmt(config['pix_fmt_in']) == AV_PIX_FMT_NONE:
                self.clean_up()
                raise Exception('Pixel format %s not found.' % config['pix_fmt_in'])
            if av_get_pix_fmt(config['pix_fmt_out']) == AV_PIX_FMT_NONE:
                self.clean_up()
                raise Exception('Pixel format %s not found.' % config['pix_fmt_out'])
            s[r].pix_fmt_in = av_get_pix_fmt(config['pix_fmt_in'])
            s[r].pix_fmt_out = av_get_pix_fmt(config['pix_fmt_out'])

            s[r].codec = avcodec_find_encoder(s[r].codec_id)
            if s[r].codec == NULL:
                self.clean_up()
                raise Exception('Codec %s not found.' % config['codec'])
            s[r].av_stream = avformat_new_stream(self.fmt_ctx, NULL)
            if s[r].av_stream == NULL:
                self.clean_up()
                raise Exception("Couldn't create stream %d." % r)
            s[r].index = s[r].av_stream.index

            s[r].codec_ctx = avcodec_alloc_context3(s[r].codec)
            if s[r].codec_ctx == NULL:
                self.clean_up()
                raise MemoryError("Couldn't create stream %d." % r)

            s[r].codec_ctx.width = s[r].width_out
            s[r].codec_ctx.height = s[r].height_out
            supported_rates = get_supported_framerates(config['codec'], (s[r].num, s[r].den))
            if supported_rates and supported_rates[0] != (s[r].num, s[r].den):
                self.clean_up()
                raise Exception('%d/%d is not a supported frame rate for codec %s, the \
                closest valid rate is %d/%d' % (s[r].num, s[r].den, config['codec'],
                                                supported_rates[0][0], supported_rates[0][1]))
            s[r].av_stream.avg_frame_rate.num = s[r].num
            s[r].av_stream.avg_frame_rate.den = s[r].den
            s[r].av_stream.r_frame_rate.num = s[r].num
            s[r].av_stream.r_frame_rate.den = s[r].den
            s[r].codec_ctx.time_base.den = s[r].num
            s[r].codec_ctx.time_base.num = s[r].den
            s[r].codec_ctx.pix_fmt = s[r].pix_fmt_out

            for k, v in metadata[r].items():
                k_b = k.encode('utf8')
                res = av_dict_set(&s[r].av_stream.metadata, k_b, v, 0)
                if res < 0:
                    av_dict_free(&s[r].av_stream.metadata)
                    self.clean_up()
                    raise Exception('Failed to set option %s: %s for stream %d; %s'
                                    % (k, v, r, tcode(emsg(res, msg, sizeof(msg)))))
            # Some formats want stream headers to be separate
            if self.fmt_ctx.oformat.flags & AVFMT_GLOBALHEADER:
                s[r].codec_ctx.flags |= AV_CODEC_FLAG_GLOBAL_HEADER

            supported_fmts = get_supported_pixfmts(config['codec'], config['pix_fmt_out'])
            if supported_fmts and supported_fmts[0] != config['pix_fmt_out'].decode('utf8'):
                self.clean_up()
                raise Exception('%s is not a supported pixel format for codec %s, the '
                'best valid format is %s' % (config['pix_fmt_out'], config['codec'],
                                            supported_fmts[0]))

            if (s[r].codec_ctx.pix_fmt != s[r].pix_fmt_in or s[r].codec_ctx.width != s[r].width_in or
                s[r].codec_ctx.height != s[r].height_in):
                s[r].av_frame = av_frame_alloc()
                if s[r].av_frame == NULL:
                    self.clean_up()
                    raise MemoryError()
                s[r].av_frame.format = s[r].pix_fmt_out
                s[r].av_frame.width = s[r].width_out
                s[r].av_frame.height = s[r].height_out
                if av_frame_get_buffer(s[r].av_frame, 32) < 0:
                    raise Exception('Cannot allocate frame buffers.')

                s[r].sws_ctx = sws_getCachedContext(NULL, s[r].width_in, s[r].height_in,\
                s[r].pix_fmt_in, s[r].codec_ctx.width, s[r].codec_ctx.height,\
                s[r].codec_ctx.pix_fmt, SWS_BICUBIC, NULL, NULL, NULL)
                if s[r].sws_ctx == NULL:
                    self.clean_up()
                    raise Exception('Cannot find conversion context.')

            for k, v in lib_opts[r].items():
                k_b = k.encode('utf8')
                if opt_default(k_b, v, s[r].sws_ctx, NULL, NULL, NULL, &self.format_opts, &s[r].codec_opts) < 0:
                    raise Exception('library option %s: %s not found' % (k, v))

            res = avcodec_open2(s[r].codec_ctx, s[r].codec, &s[r].codec_opts)
            bad_vals = ''
            dict_temp = av_dict_get(s[r].codec_opts, b"", dict_temp, AV_DICT_IGNORE_SUFFIX)
            while dict_temp != NULL:
                bad_vals += '%s: %s, ' % (dict_temp.key, dict_temp.value)
                dict_temp = av_dict_get(s[r].codec_opts, b"", dict_temp, AV_DICT_IGNORE_SUFFIX)
            av_dict_free(&s[r].codec_opts)
            if bad_vals:
                msg2 = ("The following options were not recognized: %s.\n" % bad_vals).encode('utf8')
                av_log(NULL, AV_LOG_ERROR, '%s', msg2)
            if res < 0:
                self.clean_up()
                raise Exception('Failed to open codec for stream %d; %s' % (r, tcode(emsg(res, msg, sizeof(msg)))))

            res = avcodec_parameters_from_context(s[r].av_stream.codecpar, s[r].codec_ctx)
            if res < 0:
                self.clean_up()
                raise Exception('Failed to initialize stream parameters for stream %d; %s' % (r, tcode(emsg(res, msg, sizeof(msg)))))

            s[r].pts = 0
            if self.fmt_ctx.oformat.flags & AVFMT_VARIABLE_FPS:
                if self.fmt_ctx.oformat.flags & AVFMT_NOTIMESTAMPS:
                    s[r].sync_fmt = VSYNC_PASSTHROUGH
                else:
                    s[r].sync_fmt = VSYNC_VFR
            else:
                s[r].sync_fmt = VSYNC_CFR

        if not (self.fmt_ctx.oformat.flags & AVFMT_NOFILE):
            res = avio_check(filename, 0)
            if (not res) and not overwrite:
                self.clean_up()
                raise Exception('File %s already exists.' % filename)
            elif res < 0 and res != AV_ENOENT:
                self.clean_up()
                raise Exception('File error: ' + tcode(emsg(res, msg, sizeof(msg))))
            res = avio_open2(&self.fmt_ctx.pb, filename, AVIO_FLAG_WRITE, NULL, NULL)
            if res < 0:
                self.clean_up()
                raise Exception('File error: ' + tcode(emsg(res, msg, sizeof(msg))))
        res = avformat_write_header(self.fmt_ctx, &self.format_opts)
        bad_vals = ''
        dict_temp = av_dict_get(self.format_opts, b"", dict_temp, AV_DICT_IGNORE_SUFFIX)
        while dict_temp != NULL:
            bad_vals += '%s: %s, ' % (dict_temp.key, dict_temp.value)
            dict_temp = av_dict_get(self.format_opts, "", dict_temp, AV_DICT_IGNORE_SUFFIX)
        av_dict_free(&self.format_opts)
        if bad_vals:
            msg2 = ("The following options were not recognized: %s.\n" % bad_vals).encode('utf8')
            av_log(NULL, AV_LOG_ERROR, '%s', msg2)
        if res < 0:
            self.clean_up()
            raise Exception('Error writing header: ' + tcode(emsg(res, msg, sizeof(msg))))

    def __dealloc__(self):
        self.close()

    cpdef close(self):
        '''Closes the writer and writes any frames cached and not yet written.

        Until called, or until the instance is deleted (and this is implicitly called)
        the file is not fully written.

        .. warning::

            After calling this method, calling any other class method on this instance may
            result in a crash or program corruption.
        '''
        cdef int r, res, wrote = 0
        cdef char msg[256]
        cdef AVPacket pkt
        if self.closed:
            return
        self.closed = 1

        with nogil:
            if self.fmt_ctx == NULL or (not self.n_streams) or self.streams[0].codec_ctx == NULL:
                self.clean_up()
                with gil:
                    return

            for r in range(self.n_streams):
                if not self.streams[r].count:
                    continue
                wrote = 1

                av_init_packet(&pkt)
                pkt.data = NULL
                pkt.size = 0

                # flush
                res = avcodec_send_frame(self.streams[r].codec_ctx, NULL)
                if res < 0:
                    with gil:
                        raise Exception('Error sending NULL frame: ' + tcode(emsg(res, msg, sizeof(msg))))

                while True:
                    res = avcodec_receive_packet(self.streams[r].codec_ctx, &pkt)
                    if res < 0:
                        if res != AVERROR_EOF:
                            with gil:
                                raise Exception('Error getting encoded packet: ' + tcode(emsg(res, msg, sizeof(msg))))
                        break

                    if pkt.pts != AV_NOPTS_VALUE:
                        pkt.pts = av_rescale_q(pkt.pts, self.streams[r].codec_ctx.time_base, self.streams[r].av_stream.time_base)
                    if pkt.dts != AV_NOPTS_VALUE:
                        pkt.dts = av_rescale_q(pkt.dts, self.streams[r].codec_ctx.time_base, self.streams[r].av_stream.time_base)
                    pkt.stream_index = self.streams[r].av_stream.index
                    self.total_size += pkt.size

                    res = av_interleaved_write_frame(self.fmt_ctx, &pkt)
                    if res < 0:
                        with gil:
                            raise Exception('Error writing packet: ' + tcode(emsg(res, msg, sizeof(msg))))
            if wrote:
                av_write_trailer(self.fmt_ctx)
            self.clean_up()

    def write_frame(MediaWriter self, Image img, double pts, int stream=0):
        '''Writes a :class:`ffpyplayer.pic.Image` frame to the specified stream.

        If the input data is different than the frame written to disk in either
        size or pixel format as specified when creating the stream, the frame
        is converted before writing. But the input image must match the size and
        format as that specified when creating this stream.

        :Parameters:

            `img`: :class:`ffpyplayer.pic.Image`
                The :class:`ffpyplayer.pic.Image` instance containing the frame
                to be written to disk.
            `pts`: float
                The timestamp of this frame in video time. E.g. 0.5
                means the frame should be displayed by a player at 0.5 seconds after
                the video started playing. In a sense, the frame rate defines which
                timestamps are valid timestamps. However, this is not always the
                case, so if timestamps are invalid for a particular format, they are
                forced to valid values, if possible.
            `stream`: int
                The stream number to which to write this frame. Defaults to 0.

        :returns:

            (int): The approximate number of bytes written to disk so far for this file.

            .. note::

                This is not the same as the number of bytes passed to this function
                so far, because the encoders cache data before writing to disk.
                So although some frames may have been passed, the return value
                may not represent this.

                An extreme example is where the same frame is passed many times
                to h264; the encoder will only write this frame once when the Writer
                object is closed and encoders are flushed, so this function
                will only return 0.

        See :ref:`examples` for its usage.
        '''
        cdef int res = 0, got_pkt
        cdef int frame_cloned = 0
        cdef AVFrame *frame_in = img.frame
        cdef AVFrame *frame_out
        cdef MediaStream *s
        cdef double ipts, dpts
        cdef int64_t rounded_pts
        cdef AVPacket pkt
        cdef char msg[256]
        if stream >= self.n_streams:
            raise Exception('Invalid stream number %d' % stream)
        s = self.streams + stream
        if (frame_in.width != s.width_in or frame_in.height != s.height_in or
            frame_in.format != <AVPixelFormat>s.pix_fmt_in):
            raise Exception("Input image doesn't match stream specified parameters.")

        with nogil:
            if s.av_frame != NULL:
                frame_out = s.av_frame
                sws_scale(s.sws_ctx, <const uint8_t *const *>frame_in.data, frame_in.linesize,
                          0, frame_in.height, frame_out.data, frame_out.linesize)
            else:
                frame_out = av_frame_clone(frame_in)
                frame_cloned = 1
                if frame_out == NULL:
                    with gil:
                        raise MemoryError

            rounded_pts = <int64_t>floor(pts / av_q2d(s.codec_ctx.time_base) + 0.5)
            frame_out.pict_type = AV_PICTURE_TYPE_NONE
            frame_out.pts = rounded_pts

            av_init_packet(&pkt)
            pkt.data = NULL
            pkt.size = 0

            res = avcodec_send_frame(s.codec_ctx, frame_out)
            if res < 0:
                if frame_cloned:
                    av_frame_free(&frame_out)
                with gil:
                    raise Exception('Error sending frame: ' + tcode(emsg(res, msg, sizeof(msg))))

            while True:
                res = avcodec_receive_packet(s.codec_ctx, &pkt)
                if res < 0:
                    if frame_cloned:
                        av_frame_free(&frame_out)
                    if res != AVERROR_EOF and res != AV_EAGAIN:
                        with gil:
                            raise Exception('Error getting encoded packet: ' + tcode(emsg(res, msg, sizeof(msg))))
                    break

                if pkt.pts != AV_NOPTS_VALUE:
                    pkt.pts = av_rescale_q(pkt.pts, s.codec_ctx.time_base, s.av_stream.time_base)
                if pkt.dts != AV_NOPTS_VALUE:
                    pkt.dts = av_rescale_q(pkt.dts, s.codec_ctx.time_base, s.av_stream.time_base)
                pkt.stream_index = s.av_stream.index
                self.total_size += pkt.size

                res = av_interleaved_write_frame(self.fmt_ctx, &pkt)
                if res < 0:
                    if frame_cloned:
                        av_frame_free(&frame_out)
                    with gil:
                        raise Exception('Error writing packet: ' + tcode(emsg(res, msg, sizeof(msg))))

            s.pts += 1
            s.count += 1
            if frame_cloned:
                av_frame_free(&frame_out)
        return self.total_size

    def get_configuration(self):
        '''Returns the configuration parameters used to initialize all the streams for this
        instance.

        This is not the same as the dicts passed when creating the file because
        this uses the actual parameters used.

        :returns:

            list: List of dicts for each stream.

        For example:

        .. code-block:: python

            from ffpyplayer.writer import MediaWriter

            w, h = 640, 480
            out_opts = {'pix_fmt_in':'rgb24', 'width_in':w, 'height_in':h, 'codec':'rawvideo',
                        'frame_rate':(5, 1)}
            writer = MediaWriter('output.avi', [out_opts] * 2, width_out=w/2, height_out=h/2)

            print writer.get_configuration()
            [{'height_in': 480, 'codec': 'rawvideo', 'width_in': 640, 'frame_rate': (5, 1),
            'pix_fmt_in': 'rgb24', 'width_out': 320, 'height_out': 240, 'pix_fmt_out': 'rgb24'},
            {'height_in': 480, 'codec': 'rawvideo', 'width_in': 640, 'frame_rate': (5, 1),
            'pix_fmt_in': 'rgb24', 'width_out': 320, 'height_out': 240, 'pix_fmt_out': 'rgb24'}]
        '''
        return convert_to_str(deepcopy(self.config))

    cdef void clean_up(MediaWriter self) nogil:
        cdef int r

        for r in range(self.n_streams):
            # If the in and out formats are different we must delete the out frame data buffer
            if self.streams[r].av_frame != NULL:
                av_frame_free(&self.streams[r].av_frame)
                self.streams[r].av_frame = NULL
            if self.streams[r].sws_ctx != NULL:
                sws_freeContext(self.streams[r].sws_ctx)
                self.streams[r].sws_ctx= NULL
            if self.streams[r].codec_opts:
                av_dict_free(&self.streams[r].codec_opts)
            if self.streams[r].codec_ctx:
                avcodec_free_context(&self.streams[r].codec_ctx)
        free(self.streams)
        self.streams = NULL
        self.n_streams = 0

        if self.fmt_ctx != NULL:
            if self.fmt_ctx.pb != NULL and not (self.fmt_ctx.oformat.flags & AVFMT_NOFILE):
                avio_close(self.fmt_ctx.pb)
            avformat_free_context(self.fmt_ctx)
            self.fmt_ctx = NULL
        av_dict_free(&self.format_opts)
        self.total_size = 0


================================================
FILE: pyproject.toml
================================================
[build-system]
requires = [
    "setuptools", "wheel", "cython~=3.0.11",
]


================================================
FILE: setup.py
================================================
from setuptools import setup, Extension
from os.path import join, exists, isdir, dirname, abspath
from os import environ, listdir, mkdir
from distutils.command.build_ext import build_ext
import sys
import ffpyplayer


# Determine on which platform we are
platform = sys.platform

# detect Python for android project (http://github.com/kivy/python-for-android)
# or kivy-ios (http://github.com/kivy/kivy-ios)
ndkplatform = environ.get('NDKPLATFORM')
if ndkplatform is not None and environ.get('LIBLINK'):
    platform = 'android'
kivy_ios_root = environ.get('KIVYIOSROOT', None)
if kivy_ios_root is not None:
    platform = 'ios'


# There are issues with using cython at all on some platforms;
# exclude them from using or declaring cython.

# This determines whether Cython specific functionality may be used.
can_use_cython = True
# This sets whether or not Cython gets added to setup_requires.
declare_cython = False

if platform in ('ios', 'android'):
    # NEVER use or declare cython on these platforms
    print('Not using cython on %s' % platform)
    can_use_cython = False
else:
    declare_cython = True

src_path = build_path = dirname(__file__)
print(f'Source/build path: {src_path}')

# select which ffmpeg libraries will be available
c_options = {
    # If true, filters will be used'
    'config_avfilter': True,
    'config_avdevice': True,
    'config_swscale': True,
    'config_rtsp_demuxer': True,
    'config_mmsh_protocol': True,
    'config_postproc': platform != 'win32',
    # whether sdl is included as an option
    'config_sdl': True, # not implemented yet
    'has_sdl2': True,
    'use_sdl2_mixer': False,
    # these should be true
    'config_avutil':True,
    'config_avcodec':True,
    'config_avformat':True,
    'config_swresample':True
}

for key in list(c_options.keys()):
    if key == 'has_sdl2':
        continue

    ukey = key.upper()
    if ukey in environ:
        value = bool(int(environ[ukey]))
        print('Environ change {0} -> {1}'.format(key, value))
        c_options[key] = value

if (not c_options['config_avfilter']) and not c_options['config_swscale']:
    raise Exception(
        'At least one of config_avfilter and config_swscale must be enabled.')

# if c_options['config_avfilter'] and ((not c_options['config_postproc']) or \
#     not c_options['config_swscale']):
#     raise Exception(
#         'config_avfilter requires the postproc and swscale binaries.')
c_options['config_avutil'] = c_options['config_avutil'] = True
c_options['config_avformat'] = c_options['config_swresample'] = True


class FFBuildExt(build_ext, object):

    def __new__(cls, *a, **kw):
        # Note how this class is declared as a subclass of distutils
        # build_ext as the Cython version may not be available in the
        # environment it is initially started in. However, if Cython
        # can be used, setuptools will bring Cython into the environment
        # thus its version of build_ext will become available.
        # The reason why this is done as a __new__ rather than through a
        # factory function is because there are distutils functions that check
        # the values provided by cmdclass with issublcass, and so it would
        # result in an exception.
        # The following essentially supply a dynamically generated subclass
        # that mix in the cython version of build_ext so that the
        # functionality provided will also be executed.
        if can_use_cython:
            from Cython.Distutils import build_ext as cython_build_ext
            build_ext_cls = type(
                'FFBuildExt', (FFBuildExt, cython_build_ext), {})
            return super(FFBuildExt, cls).__new__(build_ext_cls)
        else:
            return super(FFBuildExt, cls).__new__(cls)

    def finalize_options(self):
        retval = super(FFBuildExt, self).finalize_options()
        global build_path
        if (self.build_lib is not None and exists(self.build_lib) and
                not self.inplace):
            build_path = self.build_lib
            print(f'Build path changed to: {src_path}')
        return retval

    def build_extensions(self):
        compiler = self.compiler.compiler_type
        print('Using compiler "{}"'.format(compiler))

        args = []
        link_args = []
        if compiler != 'msvc':
            args += ["-O3", '-fno-strict-aliasing', '-Wno-error']

            if platform == 'darwin':
                link_args.append('-headerpad_max_install_names')
        print('Using compiler args: {}'.format(args))
        print('Using linker args: {}'.format(link_args))

        for ext in self.extensions:
            ext.extra_compile_args = args
            ext.extra_link_args = link_args
        super(FFBuildExt, self).build_extensions()


cmdclass = {'build_ext': FFBuildExt}


def getoutput(cmd):
    import subprocess
    p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE,
                         stderr=subprocess.PIPE)
    p.wait()
    if p.returncode:  # if not returncode == 0
        print('WARNING: A problem occured while running {0} (code {1})\n'
              .format(cmd, p.returncode))
        stderr_content = p.stderr.read()
        if stderr_content:
            print('{0}\n'.format(stderr_content))
        return ""
    return p.stdout.read()


def pkgconfig(*packages, **kw):
    flag_map = {'-I': 'include_dirs', '-L': 'library_dirs', '-l': 'libraries'}
    cmd = 'pkg-config --libs --cflags {}'.format(' '.join(packages))
    results = getoutput(cmd).split()
    for token in results:
        ext = token[:2].decode('utf-8')
        flag = flag_map.get(ext)
        if not flag:
            continue
        kw.setdefault(flag, []).append(token[2:].decode('utf-8'))
    return kw


def get_paths(name):
    root = environ.get('{}_ROOT'.format(name))
    print('{}_ROOT: "{}"'.format(name, root))
    if root is not None and not isdir(root):
        print('Root directory "{}" is not valid'.format(root))
        root = None

    if root is not None:
        include = environ.get(
            '{}_INCLUDE_DIR'.format(name), join(root, 'include'))
        lib = environ.get('{}_LIB_DIR'.format(name), join(root, 'lib'))
    else:
        include = environ.get('{}_INCLUDE_DIR'.format(name))
        lib = environ.get('{}_LIB_DIR'.format(name))

    if include is not None and not isdir(include):
        print('Include directory "{}" is not valid'.format(include))
        include = None
    if lib is not None and not isdir(lib):
        print('Lib directory "{}" is not valid'.format(lib))
        lib = None
    return lib, include


libraries = []
library_dirs = []
include_dirs = []

if "KIVYIOSROOT" in environ:
    # enable kivy-ios compilation
    include_dirs = [
        environ.get("SDL_INCLUDE_DIR"),
        environ.get("FFMPEG_INCLUDE_DIR")]

elif "NDKPLATFORM" in environ:
    # enable python-for-android/py4a compilation

    # ffmpeg:
    ffmpeg_lib, ffmpeg_include = get_paths('FFMPEG')
    libraries.extend([
        'avcodec', 'avdevice', 'avfilter', 'avformat',
        'avutil', 'swscale', 'swresample', 'm'
    ])
    if c_options['config_postproc']:
        libraries.append('postproc')
    library_dirs.append(ffmpeg_lib)
    include_dirs.append(ffmpeg_include)

    # sdl:
    sdl_lib, sdl_include = get_paths('SDL')
    if sdl_lib and sdl_include:
        libraries.append('SDL2')
        library_dirs.append(sdl_lib)
        include_dirs.append(sdl_include)
    else:  # old toolchain
        raise ValueError('SDL2 not found')

    # sdl2 mixer:
    c_options['use_sdl2_mixer'] = c_options['use_sdl2_mixer']
    if c_options['use_sdl2_mixer']:
        _, mixer_include = get_paths('SDL2_MIXER')
        libraries.append('SDL2_mixer')
        include_dirs.append(mixer_include)

else:

    # ffmpeg
    objects = [
        'avcodec', 'avdevice', 'avfilter', 'avformat',
        'avutil', 'swscale', 'swresample'
    ]
    if c_options['config_postproc']:
        objects.append('postproc')
    for libname in objects[:]:
        for key, val in c_options.items():
            if key.endswith(libname) and not val:
                objects.remove(libname)
                break

    ffmpeg_lib, ffmpeg_include = get_paths('FFMPEG')
    flags = {'include_dirs': [], 'library_dirs': [], 'libraries': []}
    if ffmpeg_lib is None and ffmpeg_include is None:
        flags = pkgconfig(*['lib' + l for l in objects])

    library_dirs = flags.get('library_dirs', []) if ffmpeg_lib is None \
        else [ffmpeg_lib]
    include_dirs = flags.get('include_dirs', []) if ffmpeg_include is None \
        else [ffmpeg_include]
    libraries = objects[:]

    # sdl
    sdl_lib, sdl_include = get_paths('SDL')

    flags = {}
    if sdl_lib is None and sdl_include is None:
        flags = pkgconfig('sdl2')

    sdl_libs = flags.get('library_dirs', []) if sdl_lib is None \
        else [sdl_lib]
    sdl_includes = flags.get('include_dirs', []) if sdl_include is None \
        else [join(sdl_include, 'SDL2'), sdl_include]

    library_dirs.extend(sdl_libs)
    include_dirs.extend(sdl_includes)
    libraries.extend(flags.get('libraries', ['SDL2']))

    c_options['use_sdl2_mixer'] = c_options['use_sdl2_mixer']
    if c_options['use_sdl2_mixer']:
        flags = {}
        if sdl_lib is None and sdl_include is None:
            flags = pkgconfig('SDL2_mixer')

        library_dirs.extend(flags.get('library_dirs', []))
        include_dirs.extend(flags.get('include_dirs', []))
        libraries.extend(flags.get('libraries', ['SDL2_mixer']))


def get_wheel_data():
    data = []
    ff = environ.get('FFMPEG_ROOT')
    if ff:
        if isdir(join(ff, 'bin')):
            data.append(('share/ffpyplayer/ffmpeg/bin', [
                join(ff, 'bin', f) for f in listdir(join(ff, 'bin'))]))
        if isdir(join(ff, 'licenses')):
            data.append(('share/ffpyplayer/ffmpeg/licenses', [
                join(ff, 'licenses', f) for
                f in listdir(join(ff, 'licenses'))]))
        if exists(join(ff, 'README.txt')):
            data.append(('share/ffpyplayer/ffmpeg', [join(ff, 'README.txt')]))

    sdl = environ.get('SDL_ROOT')
    if sdl:
        if isdir(join(sdl, 'bin')):
            data.append(
                ('share/ffpyplayer/sdl/bin', [
                    join(sdl, 'bin', f) for f in listdir(join(sdl, 'bin'))]))
    return data


mods = [
    'pic', 'threading', 'tools', 'writer', 'player/clock', 'player/core',
    'player/decoder', 'player/frame_queue', 'player/player', 'player/queue']
c_options['use_sdl2_mixer'] = c_options['use_sdl2_mixer']


if can_use_cython:
    mod_suffix = '.pyx'
else:
    mod_suffix = '.c'

print('Generating ffconfig.h')
if not exists(join(src_path, 'ffpyplayer', 'includes')):
    mkdir(join(src_path, 'ffpyplayer', 'includes'))
with open(join(src_path, 'ffpyplayer', 'includes', 'ffconfig.h'), 'w') as f:
    f.write('''
#ifndef _FFCONFIG_H
#define _FFCONFIG_H

#include "SDL_version.h"
#define SDL_VERSIONNUM(X, Y, Z) ((X)*1000 + (Y)*100 + (Z))
#define SDL_VERSION_ATLEAST(X, Y, Z) (SDL_COMPILEDVERSION >= SDL_VERSIONNUM(X, Y, Z))
#if defined(__APPLE__) && SDL_VERSION_ATLEAST(1, 2, 14)
#define MAC_REALLOC 1
#else
#define MAC_REALLOC 0
#endif

#if !defined(_WIN32) && !defined(__APPLE__)
#define NOT_WIN_MAC 1
#else
#define NOT_WIN_MAC 0
#endif

#if defined(_WIN32)
#define WIN_IS_DEFINED 1
#else
#define WIN_IS_DEFINED 0
#endif

''')
    for k, v in c_options.items():
        f.write('#define %s %d\n' % (k.upper(), int(v)))
    f.write('''
#endif
''')

print('Generating ffconfig.pxi')
with open(join(src_path, 'ffpyplayer', 'includes', 'ffconfig.pxi'), 'w') as f:
    for k, v in c_options.items():
        f.write('DEF %s = %d\n' % (k.upper(), int(v)))

include_dirs.extend(
    [join(src_path, 'ffpyplayer'),
     join(src_path, 'ffpyplayer', 'includes')])
print('Include directories: {}'.format(include_dirs))
print('Library directories: {}'.format(library_dirs))
ext_modules = [Extension(
    'ffpyplayer.' + src_file.replace('/', '.'),
    sources=[join(src_path, 'ffpyplayer', *(src_file + mod_suffix).split('/')),
             join(src_path, 'ffpyplayer', 'clib', 'misc.c')],
    libraries=libraries,
    include_dirs=include_dirs,
    library_dirs=library_dirs)
               for src_file in mods]

for e in ext_modules:
    e.cython_directives = {"embedsignature": True, 'language_level': 3}

with open('README.rst') as fh:
    long_description = fh.read()

setup_requires = []
if declare_cython:
    setup_requires.append('cython~=3.0.11')

setup(name='ffpyplayer',
      version=ffpyplayer.__version__,
      author='Matthew Einhorn',
      author_email='matt@einhorn.dev',
      license='LGPL3',
      description='A cython implementation of an ffmpeg based player.',
      url='https://matham.github.io/ffpyplayer/',
      long_description=long_description,
      classifiers=[
        'License :: OSI Approved :: GNU Lesser General Public License v3 '
        '(LGPLv3)',
        'Topic :: Multimedia :: Video',
        'Topic :: Multimedia :: Video :: Display',
        'Topic :: Multimedia :: Sound/Audio :: Players',
        'Topic :: Multimedia :: Sound/Audio :: Players :: MP3',
        'Programming Language :: Python :: 3.7',
        'Programming Language :: Python :: 3.8',
        'Programming Language :: Python :: 3.9',
        'Programming Language :: Python :: 3.10',
        'Programming Language :: Python :: 3.11',
        'Programming Language :: Python :: 3.12',
        'Programming Language :: Python :: 3.13',
        'Operating System :: MacOS :: MacOS X',
        'Operating System :: Microsoft :: Windows',
        'Operating System :: POSIX :: BSD :: FreeBSD',
        'Operating System :: POSIX :: Linux',
        'Intended Audience :: Developers'],
      packages=['ffpyplayer', 'ffpyplayer.player', 'ffpyplayer.tests'],
      package_data={
        'ffpyplayer': [
            'player/*.pxd', 'clib/misc.h', 'includes/*.pxi', 'includes/*.h',
            '*.pxd', 'player/*.pyx', 'clib/misc.c', '*.pyx']},
      data_files=get_wheel_data(),
      cmdclass=cmdclass, ext_modules=ext_modules,
      setup_requires=setup_requires)