Repository: szpajder/RTLSDR-Airband Branch: main Commit: f8a17d7f0e5a Files: 109 Total size: 919.5 KB Directory structure: gitextract_bz4e8om4/ ├── .clang-format ├── .devcontainer/ │ ├── Dockerfile │ ├── devcontainer.json │ └── shell ├── .github/ │ ├── ISSUE_TEMPLATE/ │ │ ├── bug_report.md │ │ ├── config.yml │ │ └── feature_request.md │ ├── install_dependencies │ ├── platform_build │ └── workflows/ │ ├── build_docker_containers.yml │ ├── ci_build.yml │ ├── code_formatting.yml │ ├── platform_build.yml │ └── version_bump.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .vscode/ │ ├── c_cpp_properties.json │ ├── launch.json │ └── settings.json ├── CMakeLists.txt ├── Dockerfile ├── LICENSE ├── NEWS.md ├── README.md ├── config/ │ ├── basic_multichannel.conf │ ├── basic_scanning.conf │ ├── big_mixer.conf │ ├── mixers.conf │ ├── noaa.conf │ └── two_dongles_multiple_outputs.conf ├── init.d/ │ ├── rtl_airband-debian.sh │ ├── rtl_airband-freebsd.sh │ ├── rtl_airband-gentoo.sh │ └── rtl_airband.service ├── scripts/ │ ├── find_version │ └── reformat_code └── src/ ├── .gitignore ├── CMakeLists.txt ├── CMakeModules/ │ ├── FindBCM_VC.cmake │ ├── FindLame.cmake │ ├── FindMiriSDR.cmake │ ├── FindRTLSDR.cmake │ └── version.cmake ├── config.cpp ├── config.h.in ├── ctcss.cpp ├── ctcss.h ├── filters.cpp ├── filters.h ├── generate_signal.cpp ├── generate_signal.h ├── hello_fft/ │ ├── CMakeLists.txt │ ├── gpu_fft.c │ ├── gpu_fft.h │ ├── gpu_fft.txt │ ├── gpu_fft_base.c │ ├── gpu_fft_shaders.c │ ├── gpu_fft_trans.h │ ├── gpu_fft_twiddles.c │ ├── hex/ │ │ ├── shader_1024k.hex │ │ ├── shader_128k.hex │ │ ├── shader_16k.hex │ │ ├── shader_1k.hex │ │ ├── shader_2048k.hex │ │ ├── shader_256.hex │ │ ├── shader_256k.hex │ │ ├── shader_2k.hex │ │ ├── shader_32k.hex │ │ ├── shader_4k.hex │ │ ├── shader_512.hex │ │ ├── shader_512k.hex │ │ ├── shader_64k.hex │ │ ├── shader_8k.hex │ │ └── shader_trans.hex │ ├── mailbox.c │ └── mailbox.h ├── helper_functions.cpp ├── helper_functions.h ├── input-common.cpp ├── input-common.h ├── input-file.cpp ├── input-file.h ├── input-helpers.cpp ├── input-helpers.h ├── input-mirisdr.cpp ├── input-mirisdr.h ├── input-rtlsdr.cpp ├── input-rtlsdr.h ├── input-soapysdr.cpp ├── input-soapysdr.h ├── logging.cpp ├── logging.h ├── mixer.cpp ├── output.cpp ├── pulse.cpp ├── rtl_airband.cpp ├── rtl_airband.h ├── rtl_airband_neon.s ├── squelch.cpp ├── squelch.h ├── test_base_class.cpp ├── test_base_class.h ├── test_ctcss.cpp ├── test_filters.cpp ├── test_generate_signal.cpp ├── test_helper_functions.cpp ├── test_squelch.cpp ├── udp_stream.cpp └── util.cpp ================================================ FILE CONTENTS ================================================ ================================================ FILE: .clang-format ================================================ --- BasedOnStyle: Chromium IndentWidth: 4 ObjCBlockIndentWidth: 4 ColumnLimit: 200 ================================================ FILE: .devcontainer/Dockerfile ================================================ FROM ubuntu:latest RUN sed -i 's/^# \(.*export LS_OPTIONS.*$\)/\1/g' ~/.bashrc && \ sed -i 's/^# \(.*alias ll.*$\)/\1/g' ~/.bashrc RUN ln -fs /usr/share/zoneinfo/America/Los_Angeles /etc/localtime RUN DEBIAN_FRONTEND=noninteractive \ apt-get update && \ apt-get install -y \ tzdata\ git \ sudo \ gdb \ clang-format-14 \ python3-pip \ pre-commit \ vim WORKDIR /app COPY .github/install_dependencies /app/ RUN /app/install_dependencies RUN apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* ================================================ FILE: .devcontainer/devcontainer.json ================================================ // For format details, see https://aka.ms/devcontainer.json. For config options, see the README at: // https://github.com/microsoft/vscode-dev-containers/tree/v0.177.0/containers/docker-existing-dockerfile { "name": "Existing Dockerfile", // Sets the run context to one level up instead of the .devcontainer folder. "context": "..", "dockerFile": "Dockerfile", "updateContentCommand" : "apt-get install git", "postCreateCommand" : "cmake -B /app/build -DCMAKE_BUILD_TYPE=Debug -DNFM=TRUE -DBUILD_UNITTESTS=true ; pre-commit install", // vs code extensions to install in the dev container "customizations": { "vscode": { "extensions": [ "ms-vscode.cpptools", "ms-vscode.cmake-tools", "ms-vscode.cpptools-extension-pack", "twxs.cmake", "streetsidesoftware.code-spell-checker", "ms-azuretools.vscode-docker", "GitHub.vscode-github-actions", "xaver.clang-format" ] } }, // Use 'forwardPorts' to make a list of ports inside the container available locally. // "forwardPorts": [], "runArgs": [ "--cap-add=SYS_PTRACE", "--security-opt", "seccomp=unconfined" ] } ================================================ FILE: .devcontainer/shell ================================================ #!/bin/bash -e cd `dirname $0`/../ # build container docker build -t rtl_airband-dev -f .devcontainer/Dockerfile . # run bash in container docker run --rm -v $(pwd):/app/ -it --entrypoint bash rtl_airband-dev ================================================ FILE: .github/ISSUE_TEMPLATE/bug_report.md ================================================ --- name: Bug report about: Report a bug you found when using RTLSDR-Airband title: "[BUG]" labels: '' assignees: '' --- **Describe your environment** - RTLSDR-Airband version you are using (stable release number or branch/commit): - `make` options used to build the program: - Hardware platform (eg. x86_64, Raspberry Pi v4): - Operating system name and version: **What happened?** **What you expected to happen?** **Steps to Reproduce** **Additional context** Add any other relevant information about the problem here. **Your rtl_airband.conf file** Remove passwords, server addresses and other private information. ================================================ FILE: .github/ISSUE_TEMPLATE/config.yml ================================================ blank_issues_enabled: false contact_links: - name: Questions & Help url: https://github.com/rtl-airband/RTLSDR-Airband/discussions/categories/q-a about: Please ask and answer questions here. ================================================ FILE: .github/ISSUE_TEMPLATE/feature_request.md ================================================ --- name: Feature request about: Suggest an idea for this project title: "[FEATURE]" labels: '' assignees: '' --- **Is your feature request related to a problem? If so, please describe.** A description of what the problem is. Ex. I'm always frustrated when [...] **Describe the solution you'd like** **Describe alternative solutions or features you've considered** **Additional context** Add any other relevant information about the feature request here. ================================================ FILE: .github/install_dependencies ================================================ #!/bin/bash unameOut="$(uname -s)" echo "Running on ${unameOut} as ${USER}" case "${unameOut}" in Linux*) echo "Installing Linux dependencies" sudo apt-get update -y sudo apt-get install -y \ build-essential \ cmake \ libmp3lame-dev \ libshout3-dev \ libconfig++-dev \ libfftw3-dev \ librtlsdr-dev \ libsoapysdr-dev \ libpulse-dev ( git clone https://github.com/f4exb/libmirisdr-4 cd libmirisdr-4 mkdir build cd build cmake ../ sudo make install sudo ldconfig ) ;; Darwin*) echo "Installing MacOS dependencies" # detect when running in github workflow and skip `brew update` (relay on fresh OS image) if [ -n "${GITHUB_ACTION}" ] ; then echo "running in GitHub Workflow, skipping brew update" export HOMEBREW_NO_AUTO_UPDATE=1 export HOMEBREW_NO_INSTALL_UPGRADE=1 export HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK=1 echo "running ${ImageOS} vsersion ${ImageVersion}" else brew update fi brew install \ lame \ libshout \ libconfig \ fftw \ librtlsdr \ soapysdr \ pulseaudio \ pkg-config ;; *) echo "Error: Machine not supported" exit -1 esac ================================================ FILE: .github/platform_build ================================================ #!/bin/bash -e platform="${1}" if [ -z "${platform}" ]; then echo "Error: platform not set" exit -1 fi echo "running build for ${platform} on $(source /etc/os-release ; echo ${VERSION})" case "${platform}" in rpi3b) CMAKE_ARGS="-DPLATFORM=rpiv2 -DCMAKE_BUILD_TYPE=Release -DNFM=TRUE -DBUILD_UNITTESTS=TRUE" ;; ubuntu-22.04-arm) CMAKE_ARGS="-DPLATFORM=native -DCMAKE_BUILD_TYPE=Release -DNFM=TRUE -DBUILD_UNITTESTS=TRUE" ;; *) echo "Error: Platform '${platform}' not supported" exit -1 esac # make a build dir rm -rf build || true ; mkdir build cd build # configure and build cmake ${CMAKE_ARGS} ../ VERBOSE=1 make -j # run unit tests src/unittests # run rtl_airband to get version string and exit src/rtl_airband -v ================================================ FILE: .github/workflows/build_docker_containers.yml ================================================ name: Build and Publish Containers on: push: branches: [main, unstable] tags: ['v*'] pull_request: workflow_dispatch: schedule: - cron: '29 13 * * *' # run daily jobs: build: strategy: fail-fast: false matrix: include: - os: ubuntu-latest platform: linux/amd64 - os: ubuntu-latest platform: linux/386 - os: ubuntu-24.04-arm platform: linux/arm64 - os: ubuntu-24.04-arm platform: linux/arm/v6 - os: ubuntu-24.04-arm platform: linux/arm/v7 runs-on: ${{ matrix.os }} permissions: contents: read packages: write attestations: write id-token: write steps: - name: Runner Info run: printenv | sort - name: Prepare id: prep run: | echo "platform_pair=${platform//\//-}" >> $GITHUB_OUTPUT echo "repo_lowercase=$(echo '${{ github.repository }}' | tr '[:upper:]' '[:lower:]')" >> $GITHUB_OUTPUT env: platform: ${{ matrix.platform }} - name: Checkout uses: actions/checkout@v4 with: fetch-depth: '0' # need full history to get version from git tag - name: Container metadata id: metadata uses: docker/metadata-action@v5 with: images: ghcr.io/${{ steps.prep.outputs.repo_lowercase }} - name: Set up QEMU uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - name: Login to GitHub Container Registry uses: docker/login-action@v3 with: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - name: Build and push by digest id: build uses: docker/build-push-action@v6 with: platforms: ${{ matrix.platform }} cache-from: type=gha,scope=build-${{ steps.prep.outputs.platform_pair }} cache-to: type=gha,mode=max,scope=build-${{ steps.prep.outputs.platform_pair }} context: . outputs: type=image,name=ghcr.io/${{ steps.prep.outputs.repo_lowercase }},push-by-digest=true,name-canonical=true,push=true - name: Export digest run: | mkdir -p /tmp/digests digest="${{ steps.build.outputs.digest }}" touch "/tmp/digests/${digest#sha256:}" - name: Upload digest uses: actions/upload-artifact@v4 with: name: digests-${{ steps.prep.outputs.platform_pair }} path: /tmp/digests/* if-no-files-found: error retention-days: 1 merge: runs-on: ubuntu-latest needs: build permissions: contents: read packages: write steps: - name: Runner Info run: printenv | sort - name: Prepare id: prep run: | echo "repo_lowercase=$(echo '${{ github.repository }}' | tr '[:upper:]' '[:lower:]')" >> $GITHUB_OUTPUT - name: Download digests uses: actions/download-artifact@v4 with: path: /tmp/digests pattern: digests-* merge-multiple: true - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - name: Container metadata id: metadata uses: docker/metadata-action@v5 with: images: ghcr.io/${{ steps.prep.outputs.repo_lowercase }} - name: Login to GitHub Container Registry uses: docker/login-action@v3 with: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - name: Create manifest list and push working-directory: /tmp/digests run: | docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \ $(printf 'ghcr.io/${{ steps.prep.outputs.repo_lowercase }}@sha256:%s ' *) - name: Inspect image run: | docker buildx imagetools inspect ghcr.io/${{ steps.prep.outputs.repo_lowercase }}:${{ steps.metadata.outputs.version }} ================================================ FILE: .github/workflows/ci_build.yml ================================================ name: Run CI on: push: branches: [main] tags: ['v*'] pull_request: workflow_dispatch: schedule: - cron: '39 13 * * *' # run daily jobs: ci_build: strategy: matrix: os: [ ubuntu-22.04, macos-14, ubuntu-22.04-arm ] runs-on: ${{ matrix.os }} timeout-minutes: 35 # runtime across all OSs, runs can get queued steps: - name: Runner Info run: printenv | sort - name: Checkout uses: actions/checkout@v4 with: fetch-depth: '0' # need full history to get version from git tag - name: Install packaged dependencies run: .github/install_dependencies - name: Configure run: | cmake -B ${{github.workspace}}/build_Debug -DCMAKE_BUILD_TYPE=Debug -DBUILD_UNITTESTS=TRUE cmake -B ${{github.workspace}}/build_Debug_NFM -DCMAKE_BUILD_TYPE=Debug -DNFM=TRUE -DBUILD_UNITTESTS=TRUE cmake -B ${{github.workspace}}/build_Release -DCMAKE_BUILD_TYPE=Release -DBUILD_UNITTESTS=TRUE cmake -B ${{github.workspace}}/build_Release_NFM -DCMAKE_BUILD_TYPE=Release -DNFM=TRUE -DBUILD_UNITTESTS=TRUE - name: Build run: | VERBOSE=1 cmake --build ${{github.workspace}}/build_Debug -j4 VERBOSE=1 cmake --build ${{github.workspace}}/build_Debug_NFM -j4 VERBOSE=1 cmake --build ${{github.workspace}}/build_Release -j4 VERBOSE=1 cmake --build ${{github.workspace}}/build_Release_NFM -j4 - name: Unit Tests run: | ${{github.workspace}}/build_Debug/src/unittests ${{github.workspace}}/build_Debug_NFM/src/unittests ${{github.workspace}}/build_Release/src/unittests ${{github.workspace}}/build_Release_NFM/src/unittests - name: Install run: sudo cmake --install ${{github.workspace}}/build_Release_NFM - name: Test run run: /usr/local/bin/rtl_airband -v ================================================ FILE: .github/workflows/code_formatting.yml ================================================ name: Code Formatting on: pull_request: schedule: - cron: '39 13 * * *' # run daily jobs: code_formatting: runs-on: ubuntu-latest steps: - name: Runner Info run: printenv | sort - name: Checkout uses: actions/checkout@v4 - name: Install Clang Format run: sudo apt-get install clang-format-14 - name: Run Clang Format run: | ./scripts/reformat_code git diff --exit-code ================================================ FILE: .github/workflows/platform_build.yml ================================================ name: Platform Build on: push: branches: [main] tags: ['v*'] pull_request: workflow_dispatch: schedule: - cron: '39 13 * * *' # run daily jobs: platform_build: strategy: matrix: # os: [ rpi3b ] os: [ ubuntu-22.04-arm ] runs-on: ${{ matrix.os }} timeout-minutes: 35 # runtime across all OSs, runs can get queued steps: - name: Runner Info run: printenv | sort - name: Checkout uses: actions/checkout@v4 with: fetch-depth: '0' # need full history to get version from git tag - name: Install packaged dependencies run: .github/install_dependencies - name: Configure Build and Test run: .github/platform_build ${{ matrix.os }} ================================================ FILE: .github/workflows/version_bump.yml ================================================ name: Bump version on: pull_request: types: - closed branches: - main jobs: version_bump: if: github.event.pull_request.merged == true runs-on: ubuntu-22.04 permissions: contents: write actions: write steps: - uses: actions/checkout@v4 with: ref: ${{ github.event.pull_request.merge_commit_sha }} fetch-depth: '0' - name: Bump version and push tag id: tag uses: anothrNick/github-tag-action@1.64.0 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} WITH_V: true DEFAULT_BUMP: patch - name: Create release for ${{ steps.tag.outputs.new_tag }} if: steps.tag.outputs.part != 'patch' env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} tag: ${{ steps.tag.outputs.new_tag }} run: | gh release create "$tag" \ --repo="$GITHUB_REPOSITORY" \ --title="Version ${tag#v}" \ --generate-notes - name: Run CI on ${{ steps.tag.outputs.new_tag }} env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | gh workflow run ci_build.yml --ref ${{ steps.tag.outputs.new_tag }} - name: Run Platform Build ${{ steps.tag.outputs.new_tag }} env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | gh workflow run platform_build.yml --ref ${{ steps.tag.outputs.new_tag }} - name: Build and Publish Containers for ${{ steps.tag.outputs.new_tag }} env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | gh workflow run build_docker_containers.yml --ref ${{ steps.tag.outputs.new_tag }} ================================================ FILE: .gitignore ================================================ build*/ .DS_Store .cache compile_commands.json rtl_airband*.log ================================================ FILE: .pre-commit-config.yaml ================================================ repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.5.0 hooks: - id: check-yaml - id: end-of-file-fixer - id: trailing-whitespace - id: check-shebang-scripts-are-executable - repo: https://github.com/pre-commit/mirrors-clang-format rev: v14.0.6 hooks: - id: clang-format files: src/.*\.cpp|src/.*\.h ================================================ FILE: .vscode/c_cpp_properties.json ================================================ { "configurations": [ { "name": "Linux", "includePath": [ "${workspaceFolder}/**", "${workspaceFolder}/build/_deps/googletest-src/googletest/include/", "${workspaceFolder}/build/src/" ], "defines": [], "compilerPath": "/usr/bin/gcc", "cStandard": "c17", "cppStandard": "gnu++17", "intelliSenseMode": "linux-gcc-arm64", "configurationProvider": "ms-vscode.cmake-tools" } ], "version": 4 } ================================================ FILE: .vscode/launch.json ================================================ { // Use IntelliSense to learn about possible attributes. // Hover to view descriptions of existing attributes. // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 "version": "0.2.0", "configurations": [ { "name": "(gdb) Launch Unit Test", "type": "cppdbg", "request": "launch", "program": "${workspaceFolder}/build/src/unittests", "args": [], "stopAtEntry": false, "cwd": "${fileDirname}", "environment": [], "externalConsole": false, "MIMode": "gdb", "setupCommands": [ { "description": "Enable pretty-printing for gdb", "text": "-enable-pretty-printing", "ignoreFailures": true }, { "description": "Set Disassembly Flavor to Intel", "text": "-gdb-set disassembly-flavor intel", "ignoreFailures": true } ] } ] } ================================================ FILE: .vscode/settings.json ================================================ { "editor.formatOnPaste": true, "editor.formatOnSave": true, "editor.formatOnType": true, "editor.defaultFormatter": "xaver.clang-format", "clang-format.executable": "clang-format-14" } ================================================ FILE: CMakeLists.txt ================================================ cmake_minimum_required (VERSION 3.1...3.18 FATAL_ERROR) project (RTLSDR-Airband CXX) execute_process(COMMAND ${PROJECT_SOURCE_DIR}/scripts/find_version OUTPUT_VARIABLE RTL_AIRBAND_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_VARIABLE RTL_AIRBAND_VERSION_ERROR ERROR_STRIP_TRAILING_WHITESPACE) string(COMPARE EQUAL "${RTL_AIRBAND_VERSION}" "" RTL_AIRBAND_VERSION_UNSET) if(RTL_AIRBAND_VERSION_UNSET) message(FATAL_ERROR "Failed to detect RTL_AIRBAND_VERSION - \"${RTL_AIRBAND_VERSION_ERROR}\"") endif() set (CMAKE_CXX_STANDARD 11) set (CXX_STANDARD_REQUIRED ON) set (CMAKE_CXX_EXTENSIONS OFF) set (CMAKE_COMPILE_WARNING_AS_ERROR ON) if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release) message(STATUS "Build type not specified: defaulting to Release") endif(NOT CMAKE_BUILD_TYPE) # TODO: flags to add: -Wfloat-equal -Wconversion -Wstrict-overflow=5 -Waggregate-return -Wpedantic -Wcast-align # TODO: these could be added except for gtest: -Wswitch-enum -Wundef -Wswitch-default set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wshadow -Wdate-time -Wpointer-arith -Wwrite-strings -Wcast-qual -Wunreachable-code -Werror") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Og -DDEBUG") if(DEBUG_SQUELCH) set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DDEBUG_SQUELCH") endif() add_subdirectory (src) ================================================ FILE: Dockerfile ================================================ # build container FROM debian:bookworm-slim AS build # install build dependencies RUN apt-get update && \ apt-get upgrade -y && \ apt-get install -y --no-install-recommends \ build-essential \ cmake \ libmp3lame-dev \ libshout3-dev \ libconfig++-dev \ libfftw3-dev \ libsoapysdr-dev \ libpulse-dev \ \ git \ ca-certificates \ libusb-1.0-0-dev \ debhelper \ pkg-config \ && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* # set working dir for compiling dependencies WORKDIR /build_dependencies # compile / install rtl-sdr-blog version of rtl-sdr for v4 support RUN git clone https://github.com/rtlsdrblog/rtl-sdr-blog && \ cd rtl-sdr-blog/ && \ dpkg-buildpackage -b --no-sign && \ cd .. && \ dpkg -i librtlsdr0_*.deb && \ dpkg -i librtlsdr-dev_*.deb && \ dpkg -i rtl-sdr_*.deb # compile / install libmirisdr-4 RUN git clone https://github.com/f4exb/libmirisdr-4 && \ cd libmirisdr-4 && \ mkdir build && \ cd build && \ cmake ../ && \ VERBOSE=1 make install && \ ldconfig # TODO: build anything from source? # set working dir for project build WORKDIR /rtl_airband_build # copy in the rtl_airband source, coping in the full repo so find_version will be correct COPY ./ . # configure and build # TODO: detect platforms RUN cmake -B build_dir -DPLATFORM=generic -DCMAKE_BUILD_TYPE=Release -DNFM=TRUE -DBUILD_UNITTESTS=TRUE && \ VERBOSE=1 cmake --build build_dir -j4 # make sure unit tests pass RUN ./build_dir/src/unittests # application container FROM debian:bookworm-slim # install runtime dependencies RUN apt-get update && \ apt-get upgrade -y && \ apt-get install -y --no-install-recommends \ tini \ libc6 \ libmp3lame0 \ libshout3 \ libconfig++9v5 \ libfftw3-single3 \ libsoapysdr0.8 \ libpulse0 \ libusb-1.0-0-dev \ ca-certificates \ && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* # install (from build container) rtl-sdr-blog version of rtl-sdr for v4 support COPY --from=build /build_dependencies/librtlsdr0_*.deb /build_dependencies/librtlsdr-dev_*.deb /build_dependencies/rtl-sdr_*.deb /tmp/ RUN dpkg -i /tmp/librtlsdr0_*.deb && \ dpkg -i /tmp/librtlsdr-dev_*.deb && \ dpkg -i /tmp/rtl-sdr_*.deb && \ rm -rf /tmp/*.deb && \ echo '' | tee --append /etc/modprobe.d/rtl_sdr.conf && \ echo 'blacklist dvb_usb_rtl28xxun' | tee --append /etc/modprobe.d/rtl_sdr.conf && \ echo 'blacklist rtl2832' | tee --append /etc/modprobe.d/rtl_sdr.conf && \ echo 'blacklist rtl2830' | tee --append /etc/modprobe.d/rtl_sdr.conf # copy (from build container) libmirisdr-4 library COPY --from=build /usr/local/lib/libmirisdr.so.4 /usr/local/lib/ # Copy rtl_airband from the build container COPY LICENSE /app/ COPY --from=build /rtl_airband_build/build_dir/src/unittests /app/ COPY --from=build /rtl_airband_build/build_dir/src/rtl_airband /app/ RUN chmod a+x /app/unittests /app/rtl_airband # make sure unit tests pass RUN /app/unittests # Use tini as init and run rtl_airband from /app/ ENTRYPOINT ["/usr/bin/tini", "--"] WORKDIR /app/ CMD ["/app/rtl_airband", "-F", "-e", "-c", "/app/rtl_airband.conf"] ================================================ FILE: LICENSE ================================================ GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS ================================================ FILE: NEWS.md ================================================ # NEWS This file will no longer be updated with each release, for changes between releases, see PRs merged to the repo Version 5.0.0 (Jan 21, 2024): * NOTE: Going forward a release tag will be automatically created on each merge to `main`, and changes will not be reflected in this file. For changes between versions see the repo's [release history](https://github.com/rtl-airband/RTLSDR-Airband/releases). * NOTE: Going forward PRs will be opened directly against `main` and the `unstable` branch will no longer be used. * NOTE: This repo has significantly diverged from the original project [microtony/RTLSDR-Airband](https://github.com/microtony/RTLSDR-Airband) so it has been been detached (ie no longer a fork). * Changes in this release, see [#444](https://github.com/rtl-airband/RTLSDR-Airband/pull/444): * build and publish docker containers * changes to supported `cmake` platforms: * depreciate `rpiv1`, `armv7-generic`, and `armv8-generic` and build platforms * change default build platform to `native` * rename `default` to `generic` * enable a series of compile warnings and cleanup code * remove `SSE` specific code - let the compiler "do the right thing" * remove some no longer supported windows `ifdef`'s * fix CTCSS bug that could miss a tone when multiple tones have the same power (happens with less accurate floating point operations, ie i386) Version 4.2.0 (Oct 13, 2023): * Changes in this release: * Add support for building with libshout v2.4.6, see [#382](https://github.com/rtl-airband/RTLSDR-Airband/pull/382) and [#422](https://github.com/rtl-airband/RTLSDR-Airband/pull/422) * Add error checking for lowpass <= highpass, see [#399](https://github.com/rtl-airband/RTLSDR-Airband/pull/399) and [#412](https://github.com/rtl-airband/RTLSDR-Airband/pull/412) * Remove limit on count of mixer inputs (thanks @cdknox), see [#408](https://github.com/rtl-airband/RTLSDR-Airband/pull/408) * Add `dated_subdirectories` config option for output files (thanks, @marcin-osowski), see [#413](https://github.com/rtl-airband/RTLSDR-Airband/pull/413) Version 4.1.1 (May 1, 2023): * Changes in this release: * Fix build issues when using VideoCore GPU, see [#378](https://github.com/rtl-airband/RTLSDR-Airband/pull/378) Version 4.1.0 (April 23, 2023): * Changes in this release: * Add `channel_dbfs_noise_level` and `channel_dbfs_signal_level` to the stats file, see [#355](https://github.com/rtl-airband/RTLSDR-Airband/pull/355) * Add squelch support for CTCSS, add `channel_ctcss_counter` and `channel_no_ctcss_counter` to the stats file, see [#368](https://github.com/rtl-airband/RTLSDR-Airband/pull/368) * Support `ampfactor` on a per-channel basis (in addition to mixer inputs), see [#369](https://github.com/rtl-airband/RTLSDR-Airband/pull/369) * Fix config error messages, see [#371](https://github.com/rtl-airband/RTLSDR-Airband/pull/371) * Multiple CI / workflow improvements, including: * Addition of Dockerfiles and shell scripts for multiple build environments * Addition of vscode devcontainer configuration * Addition of gtest, code refactoring, addition of unit tests, running unit tests on each pull request * Running more combinations of OSs, build types, and build options on each pull request Version 4.0.3 (Jan 10, 2023): * Changes in this release: * Add `channel_squelch_level` to stats file, see [#332](https://github.com/rtl-airband/RTLSDR-Airband/pull/332) * Support "default" values in lists for `squelch_snr_threshold` and `notch_q`, see [#334](https://github.com/rtl-airband/RTLSDR-Airband/pull/334) * Set cmake `ENABLE_EXPORTS` property, see [#339](https://github.com/rtl-airband/RTLSDR-Airband/pull/339) * Other items to note: * Repo maintainer has changed, see [#342](https://github.com/rtl-airband/RTLSDR-Airband/discussions/342) * Repo URL has moved to https://github.com/rtl-airband/RTLSDR-Airband * Default branch / Top of Tree has been renamed to `main` Version 4.0.2 (Dec 26, 2021): * Added a new `PLATFORM` value `default` (which, as the name says, is the new default). It results in a portable binary without any architecture-specific optimizations. This also allows the program to be built with compilers that do not support `-march=native` option (notably Clang on Apple M1) (#303). Version 4.0.1 (Nov 14, 2021): * Fixed compilation error on RaspberryPi OS 11 (Bullseye) Version 4.0.0 (Oct 19, 2021): * RTLSDR-Airband is now built with CMake. Refer to the wiki for updated compilation instructions. * When compiling the program, a new `PLATFORM` value `native` can now be specified. It enables `-march=native -mtune=native` compilation options. This causes the compiler to apply the most appropriate optimizations for the hardware on which the app is being built (thx @charlie-foxtrot). * BACKWARDS-INCOMPATIBLE CHANGE: Signal level and noise level estimates displayed in the textual waterfalls are now expressed in dBFS (decibels related to the full scale of the analog-to-digital converter). The main benefit of the new approach is that these values do not depend on the `fft_size` value(thx @charlie-foxtrot). * BACKWARDS-INCOMPATIBLE CHANGE: Improved squelch algorithm with new configuration parameters. `squelch` keyword has been replaced with `squelch_threshold` which takes an absolute signal value in dBFS as an argument. Alternatively, a minimum signal-to-noise ratio (in dB) that should trigger the squelch might be configured using `squelch_snr_threshold` option (thx @charlie-foxtrot). * BACKWARDS-INCOMPATIBLE CHANGE: `include_freq` config option for file outputs now causes the frequency to be appended after the timestamp rather than before it. This feature now works correctly in scan mode, when `split_on_transmission` feature is enabled. (thx @charlie-foxtrot). * BACKWARDS-INCOMPATIBLE CHANGE: sample format in files produced by `rawfile` outputs has been changed from CS16 to CF32. File name suffix is now `.cf32`. * Improved squelch indicator in the textual waterfalls. In addition to the `*` character indicating that the squelch is open, there is also a `~` character indicating that the channel has a signal that is being suppressed because it is outside the band of the channel filter (thx @charlie-foxtrot). * New output type `udp_stream` for sending uncompressed audio to another host via UDP/IP (thx @charlie-foxtrot). * Added `multiple_output_threads` global option. When set to `true`, a separate output thread is spawned for each device (thx @charlie-foxtrot). * Modulation in scan mode is now configurable per channel (thx @charlie-foxtrot). * SoapySDR errors like TIMEOUT or OVERFLOW are no longer treated as fatal. They often appear intermittently, especially when the CPU usage is high. There is no point in failing the input in this case. * Added `.tmp` suffix to the names of the output files currently being written to. The suffix is removed when the file is closed. External applications that consume recorded files can now figure out which files are not yet complete. * Added logging and statistics for output thread overruns and mixer input/output overruns (thx @charlie-foxtrot). * The program can now be built on MacOS. * Miscellaneous bug fixes and code cleanups. Version 3.2.1 (Nov 13, 2020): * Fixed a compile error when using libshout older than 2.4.0 Version 3.2.0 (Nov 08, 2020): * Added `split_on_transmission` output file option which allows creating a new file for every transmission on the channel (thx @charlie-foxtrot). * Added `include_freq` output file option, which causes the channel frequency to be appended to the file name (thx @charlie-foxtrot). * Added support for notch filters for eliminating narrowband interference, like CTCSS tones (thx @charlie-foxtrot). * Added `bandwidth` channel option which causes the channelized I/Q signal to be lowpass-filtered before demodulation. This might help in situations where neighboring channels are closely spaced and interfere with the channel of interest. It also reduces the bandwidth of the resulting audio signal, and thus eliminates the high-frequency noise (thx @charlie-foxtrot). * Added support for multithreaded demodulation. Each device can now have its own demodulation thread. This allows spreading the demodulation work across multiple CPU cores. Enable with `multiple_demod_threads` global option (thx @charlie-foxtrot). * Added support for highpass/lowpass MP3 filters for mixers (thx @charlie-foxtrot) * Added support for frequency usage statistics (thx @charlie-foxtrot). * Workaround for Fitipower tuner problem of not honoring the first gain setting when the device is first used (thx @eshaz). * Finalize the MP3 file properly before opening a new one (thx @jratke). * Close the RTL device properly on program exit (thx @jratke). * Updated the SoapySDR input driver to reflect changes in SoapySDR library API. * Minor cleanups. Version 3.1.0 (Jan 19, 2020): * SoapySDR: added support for complex float 32-bit samples * SoapySDR: allow using AGC if the device supports it. Gain setting for soapy devices is now optional - if it's not specified, the program will try to enable AGC. * Use lowpass/highpass filters provided by LAME library to improve audio quality of MP3 streams. Filter cutoff frequencies may be configured per output, using `highpass` and `lowpass` config options. Credit: clydebarrow. * Added `log_scan_activity` global config option. When set to `true`, a log message is written whenever a squelch opens on a scanned channel, effectively producing a channel activity log. Credit: clam-i-am. * Improved squelch behaviour in some corner cases. * Fix for incorrect naming of pulseaudio context. Name set in the config was not used as it should. Credit: Darryl Pogue. * Don't fail when the configured gain value is negative. Some SDRs support this (eg. FC0012-based dongles). * Fix a bug which in some cases could prevent the icecast output from reconnecting with the Icecast server after the connection has failed. Version 3.0.1 (Feb 16, 2018): * Fix for squelch staying constantly open when configured manually with NFM=off (#84) Version 3.0.0 (Feb 10, 2018): * Major overhaul of the SDR input code - now it's modular and hardware-agnostic (no longer tightly coupled with librtlsdr). * Support for SoapySDR vendor-neutral SDR library - any SDR which has a plugin for SoapySDR shall now work in RTLSDR-Airband. * Support for Mirics DVB-T dongles via libmirisdr-4 library. * Support for RTLSDR is now optional and can be disabled at compilation stage. * Removed the 8-channels-per-device limit in multichannel mode. * Configurable per-device sampling rate. * Configurable FFT size. * Support for multibyte input samples. * Support for rawfile outputs (ie. writing raw I/Q data from a narrowband channel to a file for processing with other programs, line GNUradio or csdr). * INCOMPATIBLE CHANGE: removed `rtlsdr_buffers` global configuration option; buffer count can now be adjusted with a per-device "buffers" option. * INCOMPATIBLE CHANGE: removed `syslog` global configuration option; syslog logging is now enabled by default, both in foreground and background mode. To force logging to standard error, use -e command line option. * Added -F command line option for better cooperation with systemd. Runs the program in foreground, but without textual waterfalls. Together with -e it allows running rtl_airband as a service of type "simple" under systemd. Example rtl_airband.service file has been adjusted to reflect this change. * Added `type` device configuration option. It sets the device type (ie. the input driver which shall be used to talk to the device). "rtlsdr" is assumed as a default type for backward compatibility. If RTLSDR support has been disabled at compilation stage, then there is no default type - it must be set manually, or the program will throw an error on startup. * Frequencies in the config can now be expressed in Hz, kHz, MHz or GHz for improved readability. * Lots of bugfixes. * Rewritten documentation on [Github Wiki](https://github.com/rtl-airband/RTLSDR-Airband/wiki). Version 2.4.0 (Oct 15, 2017): * Support for PulseAudio output via new output type `pulse`. With this feature you can eg. play the sound via the soundcard of the Raspberry Pi you run RTLSDR-Airband on (you need to install and run pulseaudio daemon on it, though). Or you can stream the audio from a Pi located near the antenna (eg. in the attic) to speakers connected to the desktop PC you are sitting at, without launching a local Icecast server, as before. Because the audio stream is sent uncompressed, it is not recommended to run it across the Internet - jitter or packet loss will easily cause the audio to become choppy. However in a local network PulseAudio is a good choice. And it gives much lower latency as compared to Icecast (typically under 0.5 seconds). Thanks to Marcus Ströbel for the idea and initial implementation. * Support for referring to RTL devices by their serial numbers in the config file. Instead of `index = ` parameter, use `serial = ` to get consistent behavior across reboots and hardware reconfigurations. * Set RTL gain to the nearest gain value supported by the device. This is required for E4000 tuners, which do not round the given gain value to the nearest supported setting, which causes the gain setting operation to fail. * Improved squelch operation in scan mode. All squelch-related variables (noise floor, AGC coefficients, etc) are now calculated and stored separately for each scanned channel. Earlier their values were common to all channels, which caused squelch problems in case when noise floor varied considerably between channels. Thanks to @strix-technica. * Added build target for FreeBSD on x86. Use `PLATFORM=x86-freebsd` to compile and `PLATFORM=x86-freebsd gmake install` to install. Thanks to @nyammy. * Display squelch setting in waterfall in place of noise floor value when squelch is set manually. * Bug fixes, performance improvements. * Decluttered and more understandable documentation. Version 2.3.0 (Jan 2, 2017): * Added support for mixers. It is now possible to produce audio streams combined from several input channels. Both mono and stereo mixing is supported. Usage example is provided in config/mixers.conf. All mixer-related parameters are documented in config/reference.conf. * Added build options for 64-bit ARM architectures, like Odroid C2. Please use PLATFORM=armv8-generic when compiling. * Fixed a long-standing bug in RTL sample processing, which caused some samples to be processed twice. If you were annoyed by these regular clicks in NFM audio every 125 ms, they are now gone. * Reduced CPU usage on x86 * Some code restructuring and cleanups * Added several configuration file examples for typical real-life scenarios. They are placed in config/ subdirectory. rtl_airband.conf.example file has been moved to config/reference.conf. It is meant to be a reference for all supported config knobs together with their description. This is still an interim solution before some more readable and understandable documentation gets written. Version 2.2.0 (Oct 8, 2016): * Support for Icecast stream metadata updates in scanning mode. When enabled, every time the scanner stops on a channel, current frequency is written into Icecast song title, which in turn is displayed in the player. Alternatively, textual labels can be configured for each frequency. It is possible to configure the amount of delay between the stream and metadata updates to synchronize them with the audio. There are some caveats however - read comments in rtl_airband.conf.example for details. * Added global option 'localtime'. When enabled, rtl_airband uses local time instead of UTC time for output file names. (Credit: ScanOC). * Auto gain feature removed. RTL auto gain does not work well for narrowband channels. Most often it sets the gain too high which causes problems for auto squelch and audio bleeding between adjacent channels. Gain must be configured manually from now on. * Dropped unmaintained Windows build. * Reverted to power level calculation algorithm from version 2.0.2. The new algo didn't really do much to sensitivity, but introduced annoying clicks on squelch open/close. * Improved DC offset estimator for AM mode. This one hardly ever clicks on squelch opening. * Boosted AM audio volume. * Reduced squelch flapping in NFM mode. Version 2.1.0 (Aug 11, 2016): * Narrowband FM demodulation support * Automatic Frequency Control * Append mode for recording (enabled by default) * Dongles, channels and outputs can be individually enabled and disabled by a simple config flag (no need to comment out or delete large configuration sections) * Use VBR for MP3 encoding * Modified power level calculation algorithm (better sensitivity) * Support for manual squelch setting * Bug fixes Version 2.0.2 (Mar 26, 2016): * Fixed a problem with running three dongles or more, simultaneously Version 2.0.1 (Jan 24, 2016): * Fixed crash on output initialization Version 2.0.0 (Dec 27, 2015): * util/convert_cfg: can be used to convert old-style config.txt to the new format * Syslog logging (enabled by default) * Daemon mode * Reworked makefiles, added install rule * /dev/vcio is now used to access GPU on RPi; creating char_dev no longer necessary * Startup scripts for Debian and Gentoo * Support for auto gain setting * Support for multiple outputs per channel * Support for recording streams to local MP3 files * Support for ARMv7-based platforms other than RPi (eg. Cubieboard) * Updated documentation * Numerous bugfixes and stability improvements Version 1.0.0 (May 12, 2015): * Linux x86/x86_64 support (Windows build is currently unmaintained and might not work) * Raspberry Pi V2 support * Bundled hello_fft code (v2.0) * More robust interaction with Icecast servers * Important stability fixes ================================================ FILE: README.md ================================================ # RTLSDR-Airband ![main](https://github.com/rtl-airband/RTLSDR-Airband/actions/workflows/ci_build.yml/badge.svg?branch=main) ![main](https://github.com/rtl-airband/RTLSDR-Airband/actions/workflows/platform_build.yml/badge.svg?branch=main) ![main](https://github.com/rtl-airband/RTLSDR-Airband/actions/workflows/build_docker_containers.yml/badge.svg?branch=main) ![main](https://github.com/rtl-airband/RTLSDR-Airband/actions/workflows/code_formatting.yml/badge.svg?branch=main) Changes as of v5.1.0: - License is now GPLv2 [#503](https://github.com/rtl-airband/RTLSDR-Airband/discussions/503) NOTE: Repo URL has moved to https://github.com/rtl-airband/RTLSDR-Airband see [#502](https://github.com/rtl-airband/RTLSDR-Airband/discussions/502) for info Changes as of v5.0.0: - PRs will be opened directly against `main` and the `unstable` branch will no longer be used - Version tags will be automatically created on each merge to `main` - A release will be created on each `major` or `minor` version tag but not `minor` tags - Checking out `main` is recommended over using a release artifact to stay on the latest version - This repo has significantly diverged from the original project [microtony/RTLSDR-Airband](https://github.com/microtony/RTLSDR-Airband) so it has been been detached (ie no longer a fork). - Specific build support for `rpiv1`, `armv7-generic`, and `armv8-generic` have been deprecated for the new default `native`, see [#447](https://github.com/rtl-airband/RTLSDR-Airband/discussions/447) ## Overview RTLSDR-Airband receives analog radio voice channels and produces audio streams which can be routed to various outputs, such as online streaming services like LiveATC.net. Originally the only SDR type supported by the program was Realtek DVB-T dongle (hence the project's name). However, thanks to SoapySDR vendor-neutral SDR library, other radios are now supported as well. ## Documentation User's manual is now on the [wiki](https://github.com/rtl-airband/RTLSDR-Airband/wiki). ## Credits and thanks I hereby express my gratitude to everybody who helped with the development and testing of RTLSDR-Airband. Special thanks go to: * Dave Pascoe * SDR Guru * Marcus Ströbel * strix-technica * charlie-foxtrot ## License Copyright (C) 2022-2025 charlie-foxtrot Copyright (C) 2015-2022 Tomasz Lemiech Based on original work by Wong Man Hang This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, see . ## Open Source Licenses of bundled code ### gpu_fft BCM2835 "GPU_FFT" release 2.0 Copyright (c) 2014, Andrew Holme. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ### rtl-sdr * Copyright (C) 2012 by Steve Markgraf * Copyright (C) 2015 by Kyle Keen * GNU General Public License Version 2 ================================================ FILE: config/basic_multichannel.conf ================================================ # This is a minimalistic configuration file for RTLSDR-Airband. # Just a single RTL dongle with two AM channels in multichannel mode. # Each channel is sent to a single Icecast output. # Refer to https://github.com/rtl-airband/RTLSDR-Airband/wiki # for description of keywords and config syntax. devices: ({ type = "rtlsdr"; index = 0; gain = 25; centerfreq = 120.0; correction = 80; channels: ( { freq = 119.5; outputs: ( { type = "icecast"; server = "icecast.server.example.org"; port = 8080; mountpoint = "TWR.mp3"; name = "Tower"; genre = "ATC"; username = "source"; password = "mypassword"; } ); }, { freq = 120.225; outputs: ( { type = "icecast"; server = "icecast.server.example.org"; port = 8080; mountpoint = "GND.mp3"; name = "Ground"; genre = "ATC"; description = "My local airport - ground feed"; username = "source"; password = "mypassword"; } ); } ); } ); ================================================ FILE: config/basic_scanning.conf ================================================ # Scanning mode example # Single dongle, three frequencies, output to Icecast server and to a file. # Refer to https://github.com/rtl-airband/RTLSDR-Airband/wiki # for description of keywords and config syntax. devices: ({ type = "rtlsdr"; index = 0; gain = 25; correction = 80; mode = "scan"; channels: ( { freqs = ( 118.15, 124.7, 132.1 ); labels = ( "Tower", "Ground", "Approach" ); outputs: ( { type = "icecast"; server = "icecast.server.example.org"; port = 8080; mountpoint = "stream.mp3"; name = "Tower + Ground + Approach"; genre = "ATC"; description = "My local airport - aggregated feed"; username = "source"; password = "mypassword"; send_scan_freq_tags = false; }, { type = "file"; directory = "/home/pi/recordings"; filename_template = "TWR+GND+APP"; } ); } ); } ); ================================================ FILE: config/big_mixer.conf ================================================ mixers: { big_mixer: { outputs: ( { type = "file"; directory = "./"; filename_template = "big_mixer"; } ); } }; devices: ({ type = "rtlsdr"; index = 0; gain = 25; centerfreq = 156.7375; channels: ( { freq = 156.050; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 156.175; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 156.250; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 156.275; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 156.300; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 156.325; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 156.350; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 156.375; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 156.400; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 156.425; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 156.450; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 156.475; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 156.500; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 156.525; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 156.550; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 156.575; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 156.600; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 156.625; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 156.650; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 156.675; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 156.700; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 156.725; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 156.750; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 156.800; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 156.850; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 156.875; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 156.900; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 156.925; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 156.950; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 156.975; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 157.000; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 157.025; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 157.050; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 157.075; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 157.100; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 157.125; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 157.150; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 157.175; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 157.200; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 157.225; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 157.250; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 157.275; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 157.300; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 157.325; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 157.350; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 157.375; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 157.400; outputs: ( { type = "mixer"; name = "big_mixer"; } ); }, { freq = 157.425; outputs: ( { type = "mixer"; name = "big_mixer"; } ); } ) }); ================================================ FILE: config/mixers.conf ================================================ # This config file demonstrates the usage of mixers. # First, two mixers are defined: # # - mixer1: sends the mixed stream to Icecast and saves it to a file # - mixer2: sends the mixed stream to Icecast # # Two dongles are used, both in AM, multichannel mode: # # - dongle 1: 3 channels: # - channel 1 goes to mixer1 (center, volume decreased to 30%) # - channel 2 goes to mixer1 (full left) # - channel 3 goes to mixer2 (85% right) # # - dongle 2: 2 channels: # - channel 1 goes to mixer1 (full right) # - channel 2 goes to mixer2 (85% left, volume set to 200%) # # Refer to https://github.com/rtl-airband/RTLSDR-Airband/wiki # for description of keywords and config syntax. mixers: { mixer1: { outputs: ( { type = "icecast"; server = "icecast.server.example.org"; port = 8080; mountpoint = "mixer1.mp3"; name = "VOLMET + Approach + Director" genre = "ATC"; username = "source"; password = "mypassword"; }, { type = "file"; directory = "/home/pi/recordings"; filename_template = "mixer1"; } ); }, mixer2: { outputs: ( { type = "icecast"; server = "icecast.server.example.org"; port = 8080; mountpoint = "mixer2.mp3"; name = "Ground + Delivery" genre = "ATC"; username = "source"; password = "mypassword"; } ); } }; devices: ({ type = "rtlsdr"; index = 0; gain = 25; centerfreq = 121.2; correction = 81; channels: ( # VOLMET { freq = 120.875; # VOLMET/ATIS/AWOS channels often transmit continuously. # Auto squelch does not perform well in such cases, so it's best to set the # squelch threshold manually. squelch_threshold defines an absolute signal # level (in dBFS). squelch_threshold = -40; lowpass = 5; highpass = 5; outputs: ( { type = "mixer"; name = "mixer1"; ampfactor = 0.3; } ); }, # Approach { freq = 121.8; outputs: ( { type = "mixer"; name = "mixer1"; balance = -1.0; } ); }, # Director { freq = 121.925; outputs: ( { type = "mixer"; name = "mixer2"; balance = 0.85; } ); } ); }, { type = "rtlsdr"; index = 1; gain = 33; centerfreq = 131.2; correction = 48; channels: ( # Ground { freq = 130.925; # Another way of tweaking the squelch is to specify custom SNR threshold (in dB) squelch_snr_threshold = 5.0; outputs: ( { type = "mixer"; name = "mixer1"; balance = 1.0; } ); }, # Delivery { freq = 131.4; outputs: ( { type = "mixer"; name = "mixer2"; balance = -0.85; ampfactor = 2.0; } ); } ); } ); ================================================ FILE: config/noaa.conf ================================================ fft_size = 1024; localtime = true; multiple_demod_threads = true; multiple_output_threads = true; devices: ( { type = "rtlsdr"; index = 0; gain = 19.7; centerfreq = 162.48200; correction = 0; sample_rate = 2.40; channels: ( { freq = 162.40000; label = "NOAA 162.400"; modulation = "nfm"; lowpass = -1; highpass = -1; bandwidth = 5000; ampfactor = 2.00; squelch_snr_threshold = 0.00; outputs: ( { type = "file"; directory = "/recordings"; filename_template = "NOAA_162.400"; } ); }, { freq = 162.42500; label = "NOAA 162.425"; modulation = "nfm"; lowpass = -1; highpass = -1; bandwidth = 5000; ampfactor = 2.00; squelch_snr_threshold = 0.00; outputs: ( { type = "file"; directory = "/recordings"; filename_template = "NOAA_162.425"; } ); }, { freq = 162.45000; label = "NOAA 162.450"; modulation = "nfm"; lowpass = -1; highpass = -1; bandwidth = 5000; ampfactor = 2.00; squelch_snr_threshold = 0.00; outputs: ( { type = "file"; directory = "/recordings"; filename_template = "NOAA_162.450"; } ); }, { freq = 162.47500; label = "NOAA 162.475"; modulation = "nfm"; lowpass = -1; highpass = -1; bandwidth = 5000; ampfactor = 2.00; squelch_snr_threshold = 0.00; outputs: ( { type = "file"; directory = "/recordings"; filename_template = "NOAA_162.475"; } ); }, { freq = 162.50000; label = "NOAA 162.500"; modulation = "nfm"; lowpass = -1; highpass = -1; bandwidth = 5000; ampfactor = 2.00; squelch_snr_threshold = 0.00; outputs: ( { type = "file"; directory = "/recordings"; filename_template = "NOAA_162.500"; } ); }, { freq = 162.52500; label = "NOAA 162.525"; modulation = "nfm"; lowpass = -1; highpass = -1; bandwidth = 5000; ampfactor = 2.00; squelch_snr_threshold = 0.00; outputs: ( { type = "file"; directory = "/recordings"; filename_template = "NOAA_162.525"; } ); }, { freq = 162.55000; label = "NOAA 162.550"; modulation = "nfm"; lowpass = -1; highpass = -1; bandwidth = 5000; ampfactor = 2.00; squelch_snr_threshold = 0.00; outputs: ( { type = "file"; directory = "/recordings"; filename_template = "NOAA_162.550"; } ); } ); } ); ================================================ FILE: config/two_dongles_multiple_outputs.conf ================================================ # Example configuration file for 2 dongles. # First dongle - scanning mode, NFM modulation, three frequencies, # output to Icecast stream, to a file and to PulseAudio server # on a local network. # Second dongle - multichannel mode, three channels: # # - channel 1: AM, goes to Icecast stream # - channel 2: AM, goes to two Icecast streams # - channel 3: NFM, goes to two files # # Dongles are specified with their serial numbers instead of # indexes, because the latter can change when devices are # reconnected into different USB ports. # # Refer to https://github.com/rtl-airband/RTLSDR-Airband/wiki # for description of keywords and config syntax. devices: ({ type = "rtlsdr"; serial = "777755221"; gain = 25; correction = 80; mode = "scan"; channels: ( { modulation = "nfm"; freqs = ( 152.1, 168.25, 168.375 ); outputs: ( { type = "icecast"; server = "icecast.server.example.org"; port = 8080; mountpoint = "utility.mp3"; name = "Utility channels"; username = "source"; password = "mypassword"; }, { type = "file"; directory = "/home/pi/recordings"; filename_template = "utility"; }, { type = "pulse"; server = "192.168.11.10"; stream_name = "Utility channels"; continuous = false; } ); } ); }, { type = "rtlsdr"; serial = "33433123"; gain = 20; centerfreq = 118.5; correction = 43; mode = "multichannel"; channels: ( { freq = 118.15; outputs: ( { type = "icecast"; server = "icecast.server.example.org"; port = 8080; mountpoint = "TWR.mp3"; name = "Tower"; genre = "ATC"; username = "source"; password = "mypassword"; } ); }, { freq = 119.425; outputs: ( { type = "icecast"; server = "icecast.server.example.org"; port = 8080; mountpoint = "ACC.mp3"; name = "Radar"; genre = "ATC"; username = "source"; password = "mypassword"; }, { type = "icecast"; server = "other.server.example.org"; port = 9999; mountpoint = "feed.mp3"; username = "user"; password = "secretpass"; } ); }, { freq = 119.6; modulation = "nfm"; outputs: ( { type = "file"; directory = "/home/pi/recordings"; filename_template = "somechannel"; }, { type = "file"; directory = "/home/pi/recordings"; filename_template = "somechannel_full"; continuous = true; } ); } ); } ); ================================================ FILE: init.d/rtl_airband-debian.sh ================================================ #! /bin/sh ### BEGIN INIT INFO # Provides: rtl_airband # Required-Start: $remote_fs $syslog # Required-Stop: $remote_fs $syslog # Default-Start: 2 3 4 5 # Default-Stop: 0 1 6 # Short-Description: rtl_airband initscript ### END INIT INFO # Author: Tomasz Lemiech PATH=/sbin:/usr/sbin:/bin:/usr/bin:/usr/local/bin DESC="RTLSDR airband receiver" NAME=rtl_airband DAEMON=/usr/local/bin/$NAME DAEMON_ARGS="" PIDFILE=/run/$NAME.pid SCRIPTNAME=/etc/init.d/$NAME # Exit if the package is not installed [ -x "$DAEMON" ] || exit 0 # Read configuration variable file if it is present [ -r /etc/default/$NAME ] && . /etc/default/$NAME # Load the VERBOSE setting and other rcS variables . /lib/init/vars.sh # Define LSB log_* functions. # Depend on lsb-base (>= 3.2-14) to ensure that this file is present # and status_of_proc is working. . /lib/lsb/init-functions # # Function that starts the daemon/service # do_start() { # Return # 0 if daemon has been started # 1 if daemon was already running # 2 if daemon could not be started start-stop-daemon --start --quiet --pidfile $PIDFILE --exec $DAEMON --test > /dev/null \ || return 1 start-stop-daemon --start --quiet --pidfile $PIDFILE --exec $DAEMON -- \ $DAEMON_ARGS \ || return 2 # on this one. As a last resort, sleep for some time. } do_stop() { # Return # 0 if daemon has been stopped # 1 if daemon was already stopped # 2 if daemon could not be stopped # other if a failure occurred start-stop-daemon --stop --quiet --retry=TERM/30/KILL/5 --pidfile $PIDFILE --name $NAME RETVAL="$?" [ "$RETVAL" = 2 ] && return 2 # Wait for children to finish too if this is a daemon that forks # and if the daemon is only ever run from this initscript. # If the above conditions are not satisfied then add some other code # that waits for the process to drop all resources that could be # needed by services started subsequently. A last resort is to # sleep for some time. start-stop-daemon --stop --quiet --oknodo --retry=0/30/KILL/5 --exec $DAEMON [ "$?" = 2 ] && return 2 rm -f $PIDFILE return "$RETVAL" } case "$1" in start) [ "$VERBOSE" != no ] && log_daemon_msg "Starting $DESC" "$NAME" do_start case "$?" in 0|1) [ "$VERBOSE" != no ] && log_end_msg 0 ;; 2) [ "$VERBOSE" != no ] && log_end_msg 1 ;; esac ;; stop) [ "$VERBOSE" != no ] && log_daemon_msg "Stopping $DESC" "$NAME" do_stop case "$?" in 0|1) [ "$VERBOSE" != no ] && log_end_msg 0 ;; 2) [ "$VERBOSE" != no ] && log_end_msg 1 ;; esac ;; status) status_of_proc "$DAEMON" "$NAME" && exit 0 || exit $? ;; restart|force-reload) log_daemon_msg "Restarting $DESC" "$NAME" do_stop case "$?" in 0|1) do_start case "$?" in 0) log_end_msg 0 ;; 1) log_end_msg 1 ;; # Old process is still running *) log_end_msg 1 ;; # Failed to start esac ;; *) # Failed to stop log_end_msg 1 ;; esac ;; *) echo "Usage: $SCRIPTNAME {start|stop|status|restart|force-reload}" >&2 exit 3 ;; esac : ================================================ FILE: init.d/rtl_airband-freebsd.sh ================================================ #!/bin/sh # PROVIDE: rtl_airband # REQUIRE: DAEMON # BEFORE: LOGIN # KEYWORD: nojail shutdown . /etc/rc.subr name=rtl_airband rcvar=rtl_airband_enable command="/usr/local/bin/rtl_airband" load_rc_config ${name} run_rc_command "$1" ================================================ FILE: init.d/rtl_airband-gentoo.sh ================================================ #!/sbin/runscript # rtl_airband Gentoo startup script # (c) 2015 Tomasz Lemiech RTLAIRBAND_CONFDIR=${RTLAIRBAND_CONFDIR:-/usr/local/etc} RTLAIRBAND_CONFIG=${RTLAIRBAND_CONFIG:-${RTLAIRBAND_CONFDIR}/rtl_airband.conf} RTLAIRBAND_PIDFILE=${RTLAIRBAND_PIDFILE:-/run/${SVCNAME}.pid} RTLAIRBAND_BINARY=${RTLAIRBAND_BINARY:-/usr/local/bin/rtl_airband} depend() { use logger dns } checkconfig() { if [ ! -e "${RTLAIRBAND_CONFIG}" ] ; then eerror "You need an ${RTLAIRBAND_CONFIG} file to run rtl_airband" return 1 fi } start() { checkconfig || return 1 ebegin "Starting ${SVCNAME}" start-stop-daemon --start --exec "${RTLAIRBAND_BINARY}" \ --pidfile "${RTLAIRBAND_PIDFILE}" \ -- ${RTLAIRBAND_OPTS} eend $? } stop() { if [ "${RC_CMD}" = "restart" ] ; then checkconfig || return 1 fi ebegin "Stopping ${SVCNAME}" start-stop-daemon --stop --exec "${RTLAIRBAND_BINARY}" \ --pidfile "${RTLAIRBAND_PIDFILE}" --quiet eend $? } ================================================ FILE: init.d/rtl_airband.service ================================================ [Unit] Description=SDR AM/NFM demodulator Documentation=https://github.com/rtl-airband/RTLSDR-Airband/wiki Wants=network.target # NOTE: `network-online.target` may be better for some use cases After=network.target # NOTE: `network-online.target` may be better for some use cases [Service] Type=simple ExecStart=/usr/local/bin/rtl_airband -Fe # The program may exit only due to startup failure (eg. misconfiguration) # or due to failure of all SDR devices (eg. disconnection). In either case, # there is no point to restart it, because it would fail once again. Restart=no [Install] WantedBy=multi-user.target ================================================ FILE: scripts/find_version ================================================ #!/bin/bash PROJECT_ROOT_PATH="$(cd $(dirname "$0")/../ ; pwd)" PROJECT_GIT_DIR_PATH="${PROJECT_ROOT_PATH}/.git" PROJECT_DIR_NAME="$(basename ${PROJECT_ROOT_PATH})" # if there is a .git directory at the project root then rely on git for the version string if [ -r "${PROJECT_GIT_DIR_PATH}" ] ; then git describe --tags --abbrev --dirty --always exit 0 fi # if the proejct root directory matches the naming convetion of an extracted archive then # get the version number out of that if [[ "${PROJECT_DIR_NAME}" =~ ^RTLSDR-Airband-[0-9]*\.[0-9]*\.[0-9]*$ ]]; then echo ${PROJECT_DIR_NAME} | cut -d '-' -f 3 exit 0 fi # print an error string to stderr (any output to stdout is considered success) >&2 echo "did not find a git root directory at ${PROJECT_GIT_DIR_PATH} and failed to extract a version from ${PROJECT_DIR_NAME}" ================================================ FILE: scripts/reformat_code ================================================ #!/bin/bash find src/*.h src/*.cpp src/hello_fft/*.h src/hello_fft/*.c | xargs clang-format-14 -i ================================================ FILE: src/.gitignore ================================================ config.h ================================================ FILE: src/CMakeLists.txt ================================================ include(CheckCXXCompilerFlag) include(CheckCXXSymbolExists) list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules") set(CMAKE_EXPORT_COMPILE_COMMANDS 1) if(UNIX OR MINGW) add_definitions(-D_FILE_OFFSET_BITS=64) # isnormal() add_definitions(-D_POSIX_C_SOURCE=200112L) endif() CHECK_CXX_COMPILER_FLAG(-pthread CXX_HAS_PTHREAD) if(CXX_HAS_PTHREAD) add_compile_options(-pthread) endif() CHECK_CXX_COMPILER_FLAG(-ffast-math CXX_HAS_FFAST_MATH) if(CXX_HAS_FFAST_MATH) add_compile_options(-ffast-math) endif() # asprintf on MacOS if(APPLE) add_definitions(-D_DARWIN_C_SOURCE) endif() # sincosf on linux vs __sincosf on MacOS set(CMAKE_REQUIRED_DEFINITIONS_ORIG ${CMAKE_REQUIRED_DEFINITIONS}) list(APPEND CMAKE_REQUIRED_DEFINITIONS "-D_GNU_SOURCE") set(CMAKE_REQUIRED_LIBRARIES_ORIG ${CMAKE_REQUIRED_LIBRARIES}) list(APPEND CMAKE_REQUIRED_LIBRARIES m) CHECK_SYMBOL_EXISTS(sincosf math.h HAVE_SINCOSF) if(HAVE_SINCOSF) set(SINCOSF "sincosf") else() CHECK_SYMBOL_EXISTS(__sincosf math.h HAVE___SINCOSF) if(HAVE___SINCOSF) set(SINCOSF "__sincosf") endif() endif() if(NOT HAVE_SINCOSF AND NOT HAVE___SINCOSF) message(FATAL_ERROR "Required function sincosf() is unavailable") endif() set(CMAKE_REQUIRED_DEFINITIONS ${CMAKE_REQUIRED_DEFINITIONS_ORIG}) set(CMAKE_REQUIRED_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES_ORIG}) find_library(LIBM m REQUIRED) find_library(LIBDL dl REQUIRED) find_library(LIBPTHREAD pthread REQUIRED) find_package(PkgConfig REQUIRED) pkg_check_modules(CONFIG REQUIRED libconfig++) list(APPEND rtl_airband_extra_libs ${CONFIG_LIBRARIES}) list(APPEND rtl_airband_include_dirs ${CONFIG_INCLUDE_DIRS}) list(APPEND link_dirs ${CONFIG_LIBRARY_DIRS}) # Can't use pkg_check_modules here, as some distros do not install lame.pc file find_package(Lame REQUIRED) list(APPEND rtl_airband_extra_libs ${LAME_LIBRARIES}) list(APPEND rtl_airband_include_dirs ${LAME_INCLUDE_DIR}) pkg_check_modules(SHOUT REQUIRED shout) list(APPEND rtl_airband_extra_libs ${SHOUT_LIBRARIES}) list(APPEND rtl_airband_include_dirs ${SHOUT_INCLUDE_DIRS}) list(APPEND link_dirs ${SHOUT_LIBRARY_DIRS}) set(CMAKE_REQUIRED_INCLUDES_SAVE ${CMAKE_REQUIRED_INCLUDES}) set(CMAKE_REQUIRED_LIBRARIES_SAVE ${CMAKE_REQUIRED_LIBRARIES}) set(CMAKE_REQUIRED_LINK_OPTIONS_SAVE ${CMAKE_REQUIRED_LINK_OPTIONS}) set(CMAKE_REQUIRED_INCLUDES "${CMAKE_REQUIRED_INCLUDES} ${SHOUT_INCLUDE_DIRS}") set(CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES} ${SHOUT_LIBRARIES}") if ( NOT "${SHOUT_LIBRARY_DIRS}" STREQUAL "" ) set(CMAKE_REQUIRED_LINK_OPTIONS "-L${SHOUT_LIBRARY_DIRS}") endif() set(LIBSHOUT_HEADER "shout/shout.h") CHECK_CXX_SYMBOL_EXISTS("SHOUT_TLS_AUTO" ${LIBSHOUT_HEADER} HAVE_SHOUT_TLS_AUTO) CHECK_CXX_SYMBOL_EXISTS("SHOUT_TLS_AUTO_NO_PLAIN" ${LIBSHOUT_HEADER} HAVE_SHOUT_TLS_AUTO_NO_PLAIN) CHECK_CXX_SYMBOL_EXISTS("SHOUT_TLS_RFC2818" ${LIBSHOUT_HEADER} HAVE_SHOUT_TLS_RFC2818) CHECK_CXX_SYMBOL_EXISTS("SHOUT_TLS_RFC2817" ${LIBSHOUT_HEADER} HAVE_SHOUT_TLS_RFC2817) CHECK_CXX_SYMBOL_EXISTS("SHOUT_TLS_DISABLED" ${LIBSHOUT_HEADER} HAVE_SHOUT_TLS_DISABLED) CHECK_CXX_SYMBOL_EXISTS("shout_set_tls" ${LIBSHOUT_HEADER} HAVE_SHOUT_SET_TLS) CHECK_CXX_SYMBOL_EXISTS("shout_set_content_format" ${LIBSHOUT_HEADER} LIBSHOUT_HAS_CONTENT_FORMAT) if(HAVE_SHOUT_TLS_AUTO AND HAVE_SHOUT_TLS_AUTO_NO_PLAIN AND HAVE_SHOUT_TLS_RFC2818 AND HAVE_SHOUT_TLS_RFC2817 AND HAVE_SHOUT_TLS_DISABLED AND HAVE_SHOUT_SET_TLS) set(LIBSHOUT_HAS_TLS TRUE) else() set(LIBSHOUT_HAS_TLS FALSE) endif() # check for shout_set_metadata_utf8() - introduced in libshout v2.4.6 CHECK_CXX_SYMBOL_EXISTS("shout_set_metadata_utf8" ${LIBSHOUT_HEADER} HAVE_SHOUT_SET_METADATA_UTF8) if(HAVE_SHOUT_SET_METADATA_UTF8) set(SHOUT_SET_METADATA "shout_set_metadata_utf8") else() set(SHOUT_SET_METADATA "shout_set_metadata") endif() set(CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES_SAVE}) set(CMAKE_REQUIRED_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES_SAVE}) set(CMAKE_REQUIRED_LINK_OPTIONS ${CMAKE_REQUIRED_LINK_OPTIONS_SAVE}) option(NFM "Enable support for narrow FM channels" OFF) set(PLATFORM "native" CACHE STRING "Optimize the build for the given hardware platform") option(RTLSDR "Enable RTL-SDR support" ON) set(WITH_RTLSDR FALSE) option(MIRISDR "Enable Mirics support" ON) set(WITH_MIRISDR FALSE) option(SOAPYSDR "Enable SoapySDR support" ON) set(WITH_SOAPYSDR FALSE) option(PULSEAUDIO "Enable PulseAudio support" ON) set(WITH_PULSEAUDIO FALSE) option(PROFILING "Enable profiling with gperftools") set(WITH_PROFILING FALSE) if(RTLSDR) find_package(RTLSDR) if(RTLSDR_FOUND) list(APPEND rtl_airband_extra_sources input-rtlsdr.cpp) list(APPEND rtl_airband_extra_libs ${RTLSDR_LIBRARIES}) list(APPEND rtl_airband_include_dirs ${RTLSDR_INCLUDE_DIRS}) list(APPEND link_dirs ${RTLSDR_LIBRARY_DIRS}) set(WITH_RTLSDR TRUE) endif() endif() if(MIRISDR) find_package(MiriSDR) if(MIRISDR_FOUND) set(WITH_MIRISDR TRUE) list(APPEND rtl_airband_extra_sources input-mirisdr.cpp) list(APPEND rtl_airband_extra_libs ${MIRISDR_LIBRARIES}) list(APPEND rtl_airband_include_dirs ${MIRISDR_INCLUDE_DIRS}) list(APPEND link_dirs ${MIRISDR_LIBRARY_DIRS}) endif() endif() if(SOAPYSDR) message(STATUS "Checking for SoapySDR") find_package(SoapySDR NO_MODULE) if(SoapySDR_FOUND) list(APPEND rtl_airband_extra_sources input-soapysdr.cpp) message(STATUS " SoapySDR found, ${SoapySDR_INCLUDE_DIRS}, ${SoapySDR_LIBRARIES}") list(APPEND rtl_airband_extra_libs ${SoapySDR_LIBRARIES}) list(APPEND rtl_airband_include_dirs ${SoapySDR_INCLUDE_DIRS}) set(WITH_SOAPYSDR TRUE) else() message(STATUS " SoapySDR not found") endif() endif() if(PULSEAUDIO) pkg_check_modules(PULSEAUDIO libpulse) if(PULSEAUDIO_FOUND) list(APPEND rtl_airband_extra_sources pulse.cpp) list(APPEND rtl_airband_extra_libs ${PULSEAUDIO_LIBRARIES}) list(APPEND rtl_airband_include_dirs ${PULSEAUDIO_INCLUDE_DIRS}) list(APPEND link_dirs ${PULSEAUDIO_LIBRARY_DIRS}) set(WITH_PULSEAUDIO TRUE) endif() endif() if(PROFILING) pkg_check_modules(PROFILING libprofiler) if(PROFILING_FOUND) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g") list(APPEND rtl_airband_extra_libs ${PROFILING_LIBRARIES}) list(APPEND rtl_airband_include_dirs ${PROFILING_INCLUDE_DIRS}) list(APPEND link_dirs ${PROFILING_LIBRARY_DIRS}) set(WITH_PROFILING TRUE) endif() endif() option(BCM_VC "Enable Broadcom Videocore 3 support" OFF) set(WITH_BCM_VC FALSE) # error out on depricated PLATFORM values if(PLATFORM STREQUAL "rpiv1" OR PLATFORM STREQUAL "armv7-generic" OR PLATFORM STREQUAL "armv8-generic") message(FATAL_ERROR "platform '${PLATFORM}' has been deprecated, see https://github.com/rtl-airband/RTLSDR-Airband/discussions/447") # rpiv2 - Raspberry Pi 2 or Raspberry Pi 3 using Broadcom VideoCore IV GPU for FFT # NOTE: use 'native' to not use the GPU for FFT elseif(PLATFORM STREQUAL "rpiv2") set(BCM_VC ON) add_compile_options(-march=armv7-a -mfpu=neon-vfpv4 -mfloat-abi=hard) enable_language(ASM) list(APPEND rtl_airband_extra_sources rtl_airband_neon.s) # native - let the complier optimize to run on local hardware (default) elseif(PLATFORM STREQUAL "native") CHECK_CXX_COMPILER_FLAG(-march=native CXX_HAS_MARCH_NATIVE) if(CXX_HAS_MARCH_NATIVE) add_compile_options(-march=native) else() message(FATAL_ERROR "Cannot build with PLATFORM=native: the compiler does not support -march=native option") endif() # generic - dont add any hardware related flags, used to build a "portable" binary elseif(PLATFORM STREQUAL "generic") # NO-OP # error out on unrecongnnized PLATFORM value else() message(FATAL_ERROR "Unknown platform '${PLATFORM}'. Valid options are: rpiv2, native, and generic") endif() # Try using VC GPU if enabled. Fallback to fftw3f if disabled or if VC lib not found if(BCM_VC) find_package(BCM_VC) if(BCM_VC_FOUND) add_subdirectory(hello_fft) list(APPEND rtl_airband_obj_files $) list(APPEND rtl_airband_extra_libs ${BCM_VC_LIBRARIES}) set(WITH_BCM_VC TRUE) endif() endif() if(NOT BCM_VC_FOUND) pkg_check_modules(FFTW3F REQUIRED fftw3f) if(FFTW3F_FOUND) list(APPEND rtl_airband_extra_libs ${FFTW3F_LIBRARIES}) list(APPEND rtl_airband_include_dirs ${FFTW3F_INCLUDE_DIRS}) list(APPEND link_dirs ${FFTW3F_LIBRARY_DIRS}) endif() endif() if(CMAKE_SYSTEM_NAME STREQUAL "FreeBSD") list(APPEND rtl_airband_extra_libs c++) endif() if(BUILD_UNITTESTS) set(BUILD_UNITTESTS TRUE) else() set(BUILD_UNITTESTS FALSE) endif() message(STATUS "RTLSDR-Airband configuration summary:\n") message(STATUS "- Version string:\t\t${RTL_AIRBAND_VERSION}") message(STATUS "- Build type:\t\t${CMAKE_BUILD_TYPE}") message(STATUS "- Operating system:\t\t${CMAKE_SYSTEM_NAME}") message(STATUS "- SDR drivers:") message(STATUS " - librtlsdr:\t\trequested: ${RTLSDR}, enabled: ${WITH_RTLSDR}") message(STATUS " - mirisdr:\t\t\trequested: ${MIRISDR}, enabled: ${WITH_MIRISDR}") message(STATUS " - soapysdr:\t\trequested: ${SOAPYSDR}, enabled: ${WITH_SOAPYSDR}") message(STATUS "- Other options:") message(STATUS " - Platform:\t\t${PLATFORM}") message(STATUS " - Build Unit Tests:\t${BUILD_UNITTESTS}") message(STATUS " - Broadcom VideoCore GPU:\t${WITH_BCM_VC}") message(STATUS " - NFM support:\t\t${NFM}") message(STATUS " - PulseAudio:\t\trequested: ${PULSEAUDIO}, enabled: ${WITH_PULSEAUDIO}") message(STATUS " - Profiling:\t\trequested: ${PROFILING}, enabled: ${WITH_PROFILING}") message(STATUS " - Icecast TLS support:\t${LIBSHOUT_HAS_TLS}") if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/config.h) message(FATAL_ERROR "${CMAKE_CURRENT_SOURCE_DIR}/config.h nolonger used, delete before continuing") endif() configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/config.h.in" "${CMAKE_CURRENT_BINARY_DIR}/config.h" @ONLY ) add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/version.cpp ${CMAKE_CURRENT_BINARY_DIR}/_version.cpp COMMAND ${CMAKE_COMMAND} -DRTL_AIRBAND_VERSION=${RTL_AIRBAND_VERSION} -P ${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules/version.cmake ) add_library (rtl_airband_base OBJECT config.cpp input-common.cpp input-file.cpp input-helpers.cpp mixer.cpp output.cpp rtl_airband.cpp squelch.cpp ctcss.cpp util.cpp udp_stream.cpp logging.cpp filters.cpp helper_functions.cpp ${CMAKE_CURRENT_BINARY_DIR}/version.cpp ${rtl_airband_extra_sources} ) target_include_directories (rtl_airband_base PUBLIC ${CMAKE_CURRENT_BINARY_DIR} # needed for config.h ${rtl_airband_include_dirs} ) # can't do this per target with cmake <3.13 link_directories(${link_dirs}) list(APPEND rtl_airband_obj_files $) add_executable (rtl_airband ${rtl_airband_obj_files}) set_property(TARGET rtl_airband PROPERTY ENABLE_EXPORTS 1) # add include for config.h target_include_directories (rtl_airband PUBLIC ${CMAKE_CURRENT_BINARY_DIR} ) target_link_libraries (rtl_airband dl m pthread ${rtl_airband_extra_libs} ) install(TARGETS rtl_airband RUNTIME DESTINATION bin ) # TODO: install config if not present if(BUILD_UNITTESTS) cmake_minimum_required(VERSION 3.1...3.18 FATAL_ERROR) # GoogleTest requires at least C++14 set(CMAKE_CXX_STANDARD 14) # set timestamps of URL extracted files to the extraction time if(POLICY CMP0135) cmake_policy(SET CMP0135 NEW) endif() # pull in GoogleTest as a dependency include(FetchContent) FetchContent_Declare( googletest URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip ) FetchContent_MakeAvailable(googletest) enable_testing() file(GLOB_RECURSE TEST_FILES "test_*.cpp") list(APPEND TEST_FILES squelch.cpp logging.cpp filters.cpp ctcss.cpp generate_signal.cpp helper_functions.cpp ) add_executable( unittests ${TEST_FILES} ) target_link_libraries( unittests GTest::gtest_main dl ${rtl_airband_extra_libs} ) # add include for config.h target_include_directories (unittests PUBLIC ${CMAKE_CURRENT_BINARY_DIR} ) include(GoogleTest) gtest_discover_tests(unittests) endif() ================================================ FILE: src/CMakeModules/FindBCM_VC.cmake ================================================ if(NOT BCM_VC_FOUND) set(BCM_VC_PATH "/opt/vc" CACHE STRING "List of paths to search for Broadcom VideoCore library") find_path(BCM_VC_INCLUDE_DIR bcm_host.h PATHS ${BCM_VC_PATH}/include) find_library(BCM_VC_LIBRARY NAMES bcm_host PATHS ${BCM_VC_PATH}/lib) set(BCM_VC_LIBRARIES ${BCM_VC_LIBRARY} ) set(BCM_VC_INCLUDE_DIRS ${BCM_VC_INCLUDE_DIR} ) include(FindPackageHandleStandardArgs) # handle the QUIETLY and REQUIRED arguments and set BCM_VC_FOUND to TRUE # if all listed variables are TRUE find_package_handle_standard_args(BCM_VC DEFAULT_MSG BCM_VC_LIBRARY BCM_VC_INCLUDE_DIR) mark_as_advanced(BCM_VC_INCLUDE_DIR BCM_VC_LIBRARY) endif() ================================================ FILE: src/CMakeModules/FindLame.cmake ================================================ FIND_PATH(LAME_INCLUDE_DIR lame/lame.h) FIND_LIBRARY(LAME_LIBRARIES NAMES mp3lame) IF(LAME_INCLUDE_DIR AND LAME_LIBRARIES) SET(LAME_FOUND TRUE) ENDIF(LAME_INCLUDE_DIR AND LAME_LIBRARIES) IF(LAME_FOUND) IF (NOT Lame_FIND_QUIETLY) MESSAGE(STATUS "Found lame includes: ${LAME_INCLUDE_DIR}/lame/lame.h") MESSAGE(STATUS "Found lame library: ${LAME_LIBRARIES}") ENDIF (NOT Lame_FIND_QUIETLY) ELSE(LAME_FOUND) IF (Lame_FIND_REQUIRED) MESSAGE(FATAL_ERROR "lame library required but not found") ENDIF (Lame_FIND_REQUIRED) ENDIF(LAME_FOUND) ================================================ FILE: src/CMakeModules/FindMiriSDR.cmake ================================================ # - Try to find mirisdr - the hardware driver for Mirics chip in the dvb receivers # Once done this will define # MIRISDR_FOUND - System has mirisdr # MIRISDR_LIBRARIES - The mirisdr libraries # MIRISDR_INCLUDE_DIRS - The mirisdr include directories # MIRISDR_LIB_DIRS - The mirisdr library directories if(NOT MIRISDR_FOUND) find_package(PkgConfig) pkg_check_modules (MIRISDR_PKG libmirisdr) set(MIRISDR_DEFINITIONS ${PC_MIRISDR_CFLAGS_OTHER}) find_path(MIRISDR_INCLUDE_DIR NAMES mirisdr.h HINTS ${MIRISDR_PKG_INCLUDE_DIRS} $ENV{MIRISDR_DIR}/include PATHS /usr/local/include /usr/include /opt/include /opt/local/include) find_library(MIRISDR_LIBRARY NAMES mirisdr HINTS ${MIRISDR_PKG_LIBRARY_DIRS} $ENV{MIRISDR_DIR}/include PATHS /usr/local/lib /usr/lib /opt/lib /opt/local/lib) set(MIRISDR_LIBRARIES ${MIRISDR_LIBRARY} ) set(MIRISDR_INCLUDE_DIRS ${MIRISDR_INCLUDE_DIR} ) include(FindPackageHandleStandardArgs) # handle the QUIETLY and REQUIRED arguments and set LibMIRISDR_FOUND to TRUE # if all listed variables are TRUE find_package_handle_standard_args(MiriSDR DEFAULT_MSG MIRISDR_LIBRARY MIRISDR_INCLUDE_DIR) mark_as_advanced(MIRISDR_INCLUDE_DIR MIRISDR_LIBRARY) endif(NOT MIRISDR_FOUND) ================================================ FILE: src/CMakeModules/FindRTLSDR.cmake ================================================ # # Copyright 2012-2013 The Iris Project Developers. See the # COPYRIGHT file at the top-level directory of this distribution # and at http://www.softwareradiosystems.com/iris/copyright.html. # # This file is part of the Iris Project. # # Iris is free software: you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as # published by the Free Software Foundation, either version 3 of # the License, or (at your option) any later version. # # Iris is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # A copy of the GNU Lesser General Public License can be found in # the LICENSE file in the top-level directory of this distribution # and at http://www.gnu.org/licenses/. # # - Try to find rtlsdr - the hardware driver for the realtek chip in the dvb receivers # Once done this will define # RTLSDR_FOUND - System has rtlsdr # RTLSDR_LIBRARIES - The rtlsdr libraries # RTLSDR_INCLUDE_DIRS - The rtlsdr include directories # RTLSDR_LIB_DIRS - The rtlsdr library directories if(NOT RTLSDR_FOUND) find_package(PkgConfig) pkg_check_modules (RTLSDR_PKG librtlsdr) set(RTLSDR_DEFINITIONS ${PC_RTLSDR_CFLAGS_OTHER}) find_path(RTLSDR_INCLUDE_DIR NAMES rtl-sdr.h HINTS ${RTLSDR_PKG_INCLUDE_DIRS} $ENV{RTLSDR_DIR}/include PATHS /usr/local/include /usr/include /opt/include /opt/local/include) find_library(RTLSDR_LIBRARY NAMES rtlsdr HINTS ${RTLSDR_PKG_LIBRARY_DIRS} $ENV{RTLSDR_DIR}/include PATHS /usr/local/lib /usr/lib /opt/lib /opt/local/lib) set(RTLSDR_LIBRARIES ${RTLSDR_LIBRARY} ) set(RTLSDR_INCLUDE_DIRS ${RTLSDR_INCLUDE_DIR} ) include(FindPackageHandleStandardArgs) # handle the QUIETLY and REQUIRED arguments and set LibRTLSDR_FOUND to TRUE # if all listed variables are TRUE find_package_handle_standard_args(RTLSDR DEFAULT_MSG RTLSDR_LIBRARY RTLSDR_INCLUDE_DIR) mark_as_advanced(RTLSDR_INCLUDE_DIR RTLSDR_LIBRARY) endif(NOT RTLSDR_FOUND) ================================================ FILE: src/CMakeModules/version.cmake ================================================ set (VERSION "char const *RTL_AIRBAND_VERSION=\"${RTL_AIRBAND_VERSION}\";\n") if(EXISTS ${CMAKE_CURRENT_BINARY_DIR}/version.cpp) file(READ ${CMAKE_CURRENT_BINARY_DIR}/version.cpp VERSION_) else() set(VERSION_ "") endif() if (NOT "${VERSION}" STREQUAL "${VERSION_}") file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/version.cpp "${VERSION}") endif() ================================================ FILE: src/config.cpp ================================================ /* * config.cpp * Configuration parsing routines * * Copyright (c) 2015-2021 Tomasz Lemiech * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ #include #include // uint32_t #include #include #include #include #include #include #include "input-common.h" // input_t #include "rtl_airband.h" using namespace std; static int parse_outputs(libconfig::Setting& outs, channel_t* channel, int i, int j, bool parsing_mixers) { int oo = 0; for (int o = 0; o < channel->output_count; o++) { channel->outputs[oo].has_mp3_output = false; channel->outputs[oo].lame = NULL; channel->outputs[oo].lamebuf = NULL; if (outs[o].exists("disable") && (bool)outs[o]["disable"] == true) { continue; } if (!strncmp(outs[o]["type"], "icecast", 7)) { channel->outputs[oo].data = XCALLOC(1, sizeof(struct icecast_data)); channel->outputs[oo].type = O_ICECAST; icecast_data* idata = (icecast_data*)(channel->outputs[oo].data); idata->hostname = strdup(outs[o]["server"]); idata->port = outs[o]["port"]; idata->mountpoint = strdup(outs[o]["mountpoint"]); idata->username = strdup(outs[o]["username"]); idata->password = strdup(outs[o]["password"]); if (outs[o].exists("name")) idata->name = strdup(outs[o]["name"]); if (outs[o].exists("genre")) idata->genre = strdup(outs[o]["genre"]); if (outs[o].exists("description")) idata->description = strdup(outs[o]["description"]); if (outs[o].exists("send_scan_freq_tags")) idata->send_scan_freq_tags = (bool)outs[o]["send_scan_freq_tags"]; else idata->send_scan_freq_tags = 0; #ifdef LIBSHOUT_HAS_TLS if (outs[o].exists("tls")) { if (outs[o]["tls"].getType() == libconfig::Setting::TypeString) { if (!strcmp(outs[o]["tls"], "auto")) { idata->tls_mode = SHOUT_TLS_AUTO; } else if (!strcmp(outs[o]["tls"], "auto_no_plain")) { idata->tls_mode = SHOUT_TLS_AUTO_NO_PLAIN; } else if (!strcmp(outs[o]["tls"], "transport")) { idata->tls_mode = SHOUT_TLS_RFC2818; } else if (!strcmp(outs[o]["tls"], "upgrade")) { idata->tls_mode = SHOUT_TLS_RFC2817; } else if (!strcmp(outs[o]["tls"], "disabled")) { idata->tls_mode = SHOUT_TLS_DISABLED; } else { if (parsing_mixers) { cerr << "Configuration error: mixers.[" << i << "] outputs.[" << o << "]: "; } else { cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "] outputs.[" << o << "]: "; } cerr << "invalid value for tls; must be one of: auto, auto_no_plain, transport, upgrade, disabled\n"; error(); } } else { if (parsing_mixers) { cerr << "Configuration error: mixers.[" << i << "] outputs.[" << o << "]: "; } else { cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "] outputs.[" << o << "]: "; } cerr << "tls value must be a string\n"; error(); } } else { idata->tls_mode = SHOUT_TLS_DISABLED; } #endif /* LIBSHOUT_HAS_TLS */ channel->outputs[oo].has_mp3_output = true; } else if (!strncmp(outs[o]["type"], "file", 4)) { channel->outputs[oo].data = XCALLOC(1, sizeof(struct file_data)); channel->outputs[oo].type = O_FILE; file_data* fdata = (file_data*)(channel->outputs[oo].data); fdata->type = O_FILE; if (!outs[o].exists("directory") || !outs[o].exists("filename_template")) { if (parsing_mixers) { cerr << "Configuration error: mixers.[" << i << "] outputs.[" << o << "]: "; } else { cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "] outputs.[" << o << "]: "; } cerr << "both directory and filename_template required for file\n"; error(); } fdata->basedir = outs[o]["directory"].c_str(); fdata->basename = outs[o]["filename_template"].c_str(); fdata->dated_subdirectories = outs[o].exists("dated_subdirectories") ? (bool)(outs[o]["dated_subdirectories"]) : false; fdata->suffix = ".mp3"; fdata->continuous = outs[o].exists("continuous") ? (bool)(outs[o]["continuous"]) : false; fdata->append = (!outs[o].exists("append")) || (bool)(outs[o]["append"]); fdata->split_on_transmission = outs[o].exists("split_on_transmission") ? (bool)(outs[o]["split_on_transmission"]) : false; fdata->include_freq = outs[o].exists("include_freq") ? (bool)(outs[o]["include_freq"]) : false; channel->outputs[oo].has_mp3_output = true; if (fdata->split_on_transmission) { if (parsing_mixers) { cerr << "Configuration error: mixers.[" << i << "] outputs.[" << o << "]: split_on_transmission is not allowed for mixers\n"; error(); } if (fdata->continuous) { cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "] outputs.[" << o << "]: can't have both continuous and split_on_transmission\n"; error(); } } } else if (!strncmp(outs[o]["type"], "rawfile", 7)) { if (parsing_mixers) { // rawfile outputs not allowed for mixers cerr << "Configuration error: mixers.[" << i << "] outputs[" << o << "]: rawfile output is not allowed for mixers\n"; error(); } channel->outputs[oo].data = XCALLOC(1, sizeof(struct file_data)); channel->outputs[oo].type = O_RAWFILE; file_data* fdata = (file_data*)(channel->outputs[oo].data); fdata->type = O_RAWFILE; if (!outs[o].exists("directory") || !outs[o].exists("filename_template")) { cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "] outputs.[" << o << "]: both directory and filename_template required for file\n"; error(); } fdata->basedir = outs[o]["directory"].c_str(); fdata->basename = outs[o]["filename_template"].c_str(); fdata->dated_subdirectories = outs[o].exists("dated_subdirectories") ? (bool)(outs[o]["dated_subdirectories"]) : false; fdata->suffix = ".cf32"; fdata->continuous = outs[o].exists("continuous") ? (bool)(outs[o]["continuous"]) : false; fdata->append = (!outs[o].exists("append")) || (bool)(outs[o]["append"]); fdata->split_on_transmission = outs[o].exists("split_on_transmission") ? (bool)(outs[o]["split_on_transmission"]) : false; fdata->include_freq = outs[o].exists("include_freq") ? (bool)(outs[o]["include_freq"]) : false; channel->needs_raw_iq = channel->has_iq_outputs = 1; if (fdata->continuous && fdata->split_on_transmission) { cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "] outputs.[" << o << "]: can't have both continuous and split_on_transmission\n"; error(); } } else if (!strncmp(outs[o]["type"], "mixer", 5)) { if (parsing_mixers) { // mixer outputs not allowed for mixers cerr << "Configuration error: mixers.[" << i << "] outputs.[" << o << "]: mixer output is not allowed for mixers\n"; error(); } channel->outputs[oo].data = XCALLOC(1, sizeof(struct mixer_data)); channel->outputs[oo].type = O_MIXER; mixer_data* mdata = (mixer_data*)(channel->outputs[oo].data); const char* name = (const char*)outs[o]["name"]; if ((mdata->mixer = getmixerbyname(name)) == NULL) { cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "] outputs.[" << o << "]: unknown mixer \"" << name << "\"\n"; error(); } float ampfactor = outs[o].exists("ampfactor") ? (float)outs[o]["ampfactor"] : 1.0f; float balance = outs[o].exists("balance") ? (float)outs[o]["balance"] : 0.0f; if (balance < -1.0f || balance > 1.0f) { cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "] outputs.[" << o << "]: balance out of allowed range <-1.0;1.0>\n"; error(); } if ((mdata->input = mixer_connect_input(mdata->mixer, ampfactor, balance)) < 0) { cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "] outputs.[" << o << "]: " "could not connect to mixer " << name << ": " << mixer_get_error() << "\n"; error(); } debug_print("dev[%d].chan[%d].out[%d] connected to mixer %s as input %d (ampfactor=%.1f balance=%.1f)\n", i, j, o, name, mdata->input, ampfactor, balance); } else if (!strncmp(outs[o]["type"], "udp_stream", 6)) { channel->outputs[oo].data = XCALLOC(1, sizeof(struct udp_stream_data)); channel->outputs[oo].type = O_UDP_STREAM; udp_stream_data* sdata = (udp_stream_data*)channel->outputs[oo].data; sdata->continuous = outs[o].exists("continuous") ? (bool)(outs[o]["continuous"]) : false; if (outs[o].exists("dest_address")) { sdata->dest_address = strdup(outs[o]["dest_address"]); } else { if (parsing_mixers) { cerr << "Configuration error: mixers.[" << i << "] outputs.[" << o << "]: "; } else { cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "] outputs.[" << o << "]: "; } cerr << "missing dest_address\n"; error(); } if (outs[o].exists("dest_port")) { if (outs[o]["dest_port"].getType() == libconfig::Setting::TypeInt) { char buffer[12]; sprintf(buffer, "%d", (int)outs[o]["dest_port"]); sdata->dest_port = strdup(buffer); } else { sdata->dest_port = strdup(outs[o]["dest_port"]); } } else { if (parsing_mixers) { cerr << "Configuration error: mixers.[" << i << "] outputs.[" << o << "]: "; } else { cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "] outputs.[" << o << "]: "; } cerr << "missing dest_port\n"; error(); } #ifdef WITH_PULSEAUDIO } else if (!strncmp(outs[o]["type"], "pulse", 5)) { channel->outputs[oo].data = XCALLOC(1, sizeof(struct pulse_data)); channel->outputs[oo].type = O_PULSE; pulse_data* pdata = (pulse_data*)(channel->outputs[oo].data); pdata->continuous = outs[o].exists("continuous") ? (bool)(outs[o]["continuous"]) : false; pdata->server = outs[o].exists("server") ? strdup(outs[o]["server"]) : NULL; pdata->name = outs[o].exists("name") ? strdup(outs[o]["name"]) : "rtl_airband"; pdata->sink = outs[o].exists("sink") ? strdup(outs[o]["sink"]) : NULL; if (outs[o].exists("stream_name")) { pdata->stream_name = strdup(outs[o]["stream_name"]); } else { if (parsing_mixers) { cerr << "Configuration error: mixers.[" << i << "] outputs.[" << o << "]: PulseAudio outputs of mixers must have stream_name defined\n"; error(); } char buf[1024]; snprintf(buf, sizeof(buf), "%.3f MHz", (float)channel->freqlist[0].frequency / 1000000.0f); pdata->stream_name = strdup(buf); } #endif /* WITH_PULSEAUDIO */ } else { if (parsing_mixers) { cerr << "Configuration error: mixers.[" << i << "] outputs.[" << o << "]: "; } else { cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "] outputs.[" << o << "]: "; } cerr << "unknown output type\n"; error(); } channel->outputs[oo].enabled = true; channel->outputs[oo].active = false; oo++; } return oo; } static struct freq_t* mk_freqlist(int n) { if (n < 1) { cerr << "mk_freqlist: invalid list length " << n << "\n"; error(); } struct freq_t* fl = (struct freq_t*)XCALLOC(n, sizeof(struct freq_t)); for (int i = 0; i < n; i++) { fl[i].frequency = 0; fl[i].label = NULL; fl[i].agcavgfast = 0.5f; fl[i].ampfactor = 1.0f; fl[i].squelch = Squelch(); fl[i].active_counter = 0; fl[i].modulation = MOD_AM; } return fl; } static void warn_if_freq_not_in_range(int devidx, int chanidx, int freq, int centerfreq, int sample_rate) { static const float soft_bw_threshold = 0.9f; float bw_limit = (float)sample_rate / 2.f * soft_bw_threshold; if ((float)abs(freq - centerfreq) >= bw_limit) { log(LOG_WARNING, "Warning: dev[%d].channel[%d]: frequency %.3f MHz is outside of SDR operating bandwidth (%.3f-%.3f MHz)\n", devidx, chanidx, (double)freq / 1e6, (double)(centerfreq - bw_limit) / 1e6, (double)(centerfreq + bw_limit) / 1e6); } } static int parse_anynum2int(libconfig::Setting& f) { int ret = 0; if (f.getType() == libconfig::Setting::TypeInt) { ret = (int)f; } else if (f.getType() == libconfig::Setting::TypeFloat) { ret = (int)((double)f * 1e6); } else if (f.getType() == libconfig::Setting::TypeString) { char* s = strdup((char const*)f); ret = (int)atofs(s); free(s); } return ret; } static int parse_channels(libconfig::Setting& chans, device_t* dev, int i) { int jj = 0; for (int j = 0; j < chans.getLength(); j++) { if (chans[j].exists("disable") && (bool)chans[j]["disable"] == true) { continue; } channel_t* channel = dev->channels + jj; for (int k = 0; k < AGC_EXTRA; k++) { channel->wavein[k] = 20; channel->waveout[k] = 0.5; } channel->axcindicate = NO_SIGNAL; channel->mode = MM_MONO; channel->freq_count = 1; channel->freq_idx = 0; channel->highpass = chans[j].exists("highpass") ? (int)chans[j]["highpass"] : 100; channel->lowpass = chans[j].exists("lowpass") ? (int)chans[j]["lowpass"] : 2500; #ifdef NFM channel->pr = 0; channel->pj = 0; channel->prev_waveout = 0.5; channel->alpha = dev->alpha; #endif /* NFM */ // Make sure lowpass / highpass aren't flipped. // If lowpass is enabled (greater than zero) it must be larger than highpass if (channel->lowpass > 0 && channel->lowpass < channel->highpass) { cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "]: lowpass (" << channel->lowpass << ") must be greater than or equal to highpass (" << channel->highpass << ")\n"; error(); } modulations channel_modulation = MOD_AM; if (chans[j].exists("modulation")) { #ifdef NFM if (strncmp(chans[j]["modulation"], "nfm", 3) == 0) { channel_modulation = MOD_NFM; } else #endif /* NFM */ if (strncmp(chans[j]["modulation"], "am", 2) != 0) { cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "]: unknown modulation\n"; error(); } } channel->afc = chans[j].exists("afc") ? (unsigned char)(unsigned int)chans[j]["afc"] : 0; if (dev->mode == R_MULTICHANNEL) { channel->freqlist = mk_freqlist(1); channel->freqlist[0].frequency = parse_anynum2int(chans[j]["freq"]); warn_if_freq_not_in_range(i, j, channel->freqlist[0].frequency, dev->input->centerfreq, dev->input->sample_rate); if (chans[j].exists("label")) { channel->freqlist[0].label = strdup(chans[j]["label"]); } channel->freqlist[0].modulation = channel_modulation; } else { /* R_SCAN */ channel->freq_count = chans[j]["freqs"].getLength(); if (channel->freq_count < 1) { cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "]: freqs should be a list with at least one element\n"; error(); } channel->freqlist = mk_freqlist(channel->freq_count); if (chans[j].exists("labels") && chans[j]["labels"].getLength() < channel->freq_count) { cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "]: labels should be a list with at least " << channel->freq_count << " elements\n"; error(); } if (chans[j].exists("squelch_threshold") && libconfig::Setting::TypeList == chans[j]["squelch_threshold"].getType() && chans[j]["squelch_threshold"].getLength() < channel->freq_count) { cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "]: squelch_threshold should be an int or a list of ints with at least " << channel->freq_count << " elements\n"; error(); } if (chans[j].exists("squelch_snr_threshold") && libconfig::Setting::TypeList == chans[j]["squelch_snr_threshold"].getType() && chans[j]["squelch_snr_threshold"].getLength() < channel->freq_count) { cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "]: squelch_snr_threshold should be an int, a float or a list of " "ints or floats with at least " << channel->freq_count << " elements\n"; error(); } if (chans[j].exists("notch") && libconfig::Setting::TypeList == chans[j]["notch"].getType() && chans[j]["notch"].getLength() < channel->freq_count) { cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "]: notch should be an float or a list of floats with at least " << channel->freq_count << " elements\n"; error(); } if (chans[j].exists("notch_q") && libconfig::Setting::TypeList == chans[j]["notch_q"].getType() && chans[j]["notch_q"].getLength() < channel->freq_count) { cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "]: notch_q should be a float or a list of floats with at least " << channel->freq_count << " elements\n"; error(); } if (chans[j].exists("ctcss") && libconfig::Setting::TypeList == chans[j]["ctcss"].getType() && chans[j]["ctcss"].getLength() < channel->freq_count) { cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "]: ctcss should be an float or a list of floats with at least " << channel->freq_count << " elements\n"; error(); } if (chans[j].exists("modulation") && chans[j].exists("modulations")) { cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "]: can't set both modulation and modulations\n"; error(); } if (chans[j].exists("modulations") && chans[j]["modulations"].getLength() < channel->freq_count) { cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "]: modulations should be a list with at least " << channel->freq_count << " elements\n"; error(); } for (int f = 0; f < channel->freq_count; f++) { channel->freqlist[f].frequency = parse_anynum2int((chans[j]["freqs"][f])); if (chans[j].exists("labels")) { channel->freqlist[f].label = strdup(chans[j]["labels"][f]); } if (chans[j].exists("modulations")) { #ifdef NFM if (strncmp(chans[j]["modulations"][f], "nfm", 3) == 0) { channel->freqlist[f].modulation = MOD_NFM; } else #endif /* NFM */ if (strncmp(chans[j]["modulations"][f], "am", 2) == 0) { channel->freqlist[f].modulation = MOD_AM; } else { cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "] modulations.[" << f << "]: unknown modulation\n"; error(); } } else { channel->freqlist[f].modulation = channel_modulation; } } // Set initial frequency for scanning // We tune 20 FFT bins higher to avoid DC spike dev->input->centerfreq = channel->freqlist[0].frequency + 20 * (double)(dev->input->sample_rate / fft_size); } if (chans[j].exists("squelch")) { cerr << "Warning: 'squelch' no longer supported and will be ignored, use 'squelch_threshold' or 'squelch_snr_threshold' instead\n"; } if (chans[j].exists("squelch_threshold") && chans[j].exists("squelch_snr_threshold")) { cerr << "Warning: Both 'squelch_threshold' and 'squelch_snr_threshold' are set and may conflict\n"; } if (chans[j].exists("squelch_threshold")) { // Value is dBFS, zero disables manual threshold (ie use auto squelch), negative is valid, positive is invalid if (libconfig::Setting::TypeList == chans[j]["squelch_threshold"].getType()) { // New-style array of per-frequency squelch settings for (int f = 0; f < channel->freq_count; f++) { int threshold_dBFS = (int)chans[j]["squelch_threshold"][f]; if (threshold_dBFS > 0) { cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "]: squelch_threshold must be less than or equal to 0\n"; error(); } else if (threshold_dBFS == 0) { channel->freqlist[f].squelch.set_squelch_level_threshold(0); } else { channel->freqlist[f].squelch.set_squelch_level_threshold(dBFS_to_level(threshold_dBFS)); } } } else if (libconfig::Setting::TypeInt == chans[j]["squelch_threshold"].getType()) { // Legacy (single squelch for all frequencies) int threshold_dBFS = (int)chans[j]["squelch_threshold"]; float level; if (threshold_dBFS > 0) { cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "]: squelch_threshold must be less than or equal to 0\n"; error(); } else if (threshold_dBFS == 0) { level = 0; } else { level = dBFS_to_level(threshold_dBFS); } for (int f = 0; f < channel->freq_count; f++) { channel->freqlist[f].squelch.set_squelch_level_threshold(level); } } else { cerr << "Invalid value for squelch_threshold (should be int or list - use parentheses)\n"; error(); } } if (chans[j].exists("squelch_snr_threshold")) { // Value is SNR in dB, zero disables squelch (ie always open), -1 uses default value, positive is valid, other negative values are invalid if (libconfig::Setting::TypeList == chans[j]["squelch_snr_threshold"].getType()) { // New-style array of per-frequency squelch settings for (int f = 0; f < channel->freq_count; f++) { float snr = 0.f; if (libconfig::Setting::TypeFloat == chans[j]["squelch_snr_threshold"][f].getType()) { snr = (float)chans[j]["squelch_snr_threshold"][f]; } else if (libconfig::Setting::TypeInt == chans[j]["squelch_snr_threshold"][f].getType()) { snr = (int)chans[j]["squelch_snr_threshold"][f]; } else { cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "]: squelch_snr_threshold list must be of int or float\n"; error(); } if (snr == -1.0) { continue; // "disable" for this channel in list } else if (snr < 0) { cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "]: squelch_snr_threshold must be greater than or equal to 0\n"; error(); } else { channel->freqlist[f].squelch.set_squelch_snr_threshold(snr); } } } else if (libconfig::Setting::TypeFloat == chans[j]["squelch_snr_threshold"].getType() || libconfig::Setting::TypeInt == chans[j]["squelch_snr_threshold"].getType()) { // Legacy (single squelch for all frequencies) float snr = (libconfig::Setting::TypeFloat == chans[j]["squelch_snr_threshold"].getType()) ? (float)chans[j]["squelch_snr_threshold"] : (int)chans[j]["squelch_snr_threshold"]; if (snr == -1.0) { continue; // "disable" so use the default without error message } else if (snr < 0) { cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "]: squelch_snr_threshold must be greater than or equal to 0\n"; error(); } for (int f = 0; f < channel->freq_count; f++) { channel->freqlist[f].squelch.set_squelch_snr_threshold(snr); } } else { cerr << "Invalid value for squelch_snr_threshold (should be float, int, or list of int/float - use parentheses)\n"; error(); } } if (chans[j].exists("notch")) { static const float default_q = 10.0; if (chans[j].exists("notch_q") && chans[j]["notch"].getType() != chans[j]["notch_q"].getType()) { cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "]: notch_q (if set) must be the same type as notch - " << "float or a list of floats with at least " << channel->freq_count << " elements\n"; error(); } if (libconfig::Setting::TypeList == chans[j]["notch"].getType()) { for (int f = 0; f < channel->freq_count; f++) { float freq = (float)chans[j]["notch"][f]; float q = chans[j].exists("notch_q") ? (float)chans[j]["notch_q"][f] : default_q; if (q == 0.0) { q = default_q; } else if (q <= 0.0) { cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "] freq.[" << f << "]: invalid value for notch_q: " << q << " (must be greater than 0.0)\n"; error(); } if (freq == 0) { continue; // "disable" for this channel in list } else if (freq < 0) { cerr << "devices.[" << i << "] channels.[" << j << "] freq.[" << f << "]: invalid value for notch: " << freq << ", ignoring\n"; } else { channel->freqlist[f].notch_filter = NotchFilter(freq, WAVE_RATE, q); } } } else if (libconfig::Setting::TypeFloat == chans[j]["notch"].getType()) { float freq = (float)chans[j]["notch"]; float q = chans[j].exists("notch_q") ? (float)chans[j]["notch_q"] : default_q; if (q <= 0.0) { cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "]: invalid value for notch_q: " << q << " (must be greater than 0.0)\n"; error(); } for (int f = 0; f < channel->freq_count; f++) { if (freq == 0) { continue; // "disable" is default so ignore without error message } else if (freq < 0) { cerr << "devices.[" << i << "] channels.[" << j << "]: notch value '" << freq << "' invalid, ignoring\n"; } else { channel->freqlist[f].notch_filter = NotchFilter(freq, WAVE_RATE, q); } } } else { cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "]: notch should be an float or a list of floats with at least " << channel->freq_count << " elements\n"; error(); } } if (chans[j].exists("ctcss")) { if (libconfig::Setting::TypeList == chans[j]["ctcss"].getType()) { for (int f = 0; f < channel->freq_count; f++) { float freq = (float)chans[j]["ctcss"][f]; if (freq == 0) { continue; // "disable" for this channel in list } else if (freq < 0) { cerr << "devices.[" << i << "] channels.[" << j << "] freq.[" << f << "]: invalid value for ctcss: " << freq << ", ignoring\n"; } else { channel->freqlist[f].squelch.set_ctcss_freq(freq, WAVE_RATE); } } } else if (libconfig::Setting::TypeFloat == chans[j]["ctcss"].getType()) { float freq = (float)chans[j]["ctcss"]; for (int f = 0; f < channel->freq_count; f++) { if (freq <= 0) { cerr << "devices.[" << i << "] channels.[" << j << "]: ctcss value '" << freq << "' invalid, ignoring\n"; } else { channel->freqlist[f].squelch.set_ctcss_freq(freq, WAVE_RATE); } } } else { cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "]: ctcss should be an float or a list of floats with at least " << channel->freq_count << " elements\n"; error(); } } if (chans[j].exists("bandwidth")) { channel->needs_raw_iq = 1; if (libconfig::Setting::TypeList == chans[j]["bandwidth"].getType()) { for (int f = 0; f < channel->freq_count; f++) { int bandwidth = parse_anynum2int(chans[j]["bandwidth"][f]); if (bandwidth == 0) { continue; // "disable" for this channel in list } else if (bandwidth < 0) { cerr << "devices.[" << i << "] channels.[" << j << "] freq.[" << f << "]: bandwidth value '" << bandwidth << "' invalid, ignoring\n"; } else { channel->freqlist[f].lowpass_filter = LowpassFilter((float)bandwidth / 2, WAVE_RATE); } } } else { int bandwidth = parse_anynum2int(chans[j]["bandwidth"]); if (bandwidth == 0) { continue; // "disable" is default so ignore without error message } else if (bandwidth < 0) { cerr << "devices.[" << i << "] channels.[" << j << "]: bandwidth value '" << bandwidth << "' invalid, ignoring\n"; } else { for (int f = 0; f < channel->freq_count; f++) { channel->freqlist[f].lowpass_filter = LowpassFilter((float)bandwidth / 2, WAVE_RATE); } } } } if (chans[j].exists("ampfactor")) { if (libconfig::Setting::TypeList == chans[j]["ampfactor"].getType()) { for (int f = 0; f < channel->freq_count; f++) { float ampfactor = (float)chans[j]["ampfactor"][f]; if (ampfactor < 0) { cerr << "devices.[" << i << "] channels.[" << j << "] freq.[" << f << "]: ampfactor '" << ampfactor << "' must not be negative\n"; error(); } channel->freqlist[f].ampfactor = ampfactor; } } else { float ampfactor = (float)chans[j]["ampfactor"]; if (ampfactor < 0) { cerr << "devices.[" << i << "] channels.[" << j << "]: ampfactor '" << ampfactor << "' must not be negative\n"; error(); } for (int f = 0; f < channel->freq_count; f++) { channel->freqlist[f].ampfactor = ampfactor; } } } #ifdef NFM if (chans[j].exists("tau")) { channel->alpha = ((int)chans[j]["tau"] == 0 ? 0.0f : exp(-1.0f / (WAVE_RATE * 1e-6 * (int)chans[j]["tau"]))); } #endif /* NFM */ libconfig::Setting& outputs = chans[j]["outputs"]; channel->output_count = outputs.getLength(); if (channel->output_count < 1) { cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "]: no outputs defined\n"; error(); } channel->outputs = (output_t*)XCALLOC(channel->output_count, sizeof(struct output_t)); int outputs_enabled = parse_outputs(outputs, channel, i, j, false); if (outputs_enabled < 1) { cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "]: no outputs defined\n"; error(); } channel->outputs = (output_t*)XREALLOC(channel->outputs, outputs_enabled * sizeof(struct output_t)); channel->output_count = outputs_enabled; dev->base_bins[jj] = dev->bins[jj] = (size_t)ceil((channel->freqlist[0].frequency + dev->input->sample_rate - dev->input->centerfreq) / (double)(dev->input->sample_rate / fft_size) - 1.0) % fft_size; debug_print("bins[%d]: %zu\n", jj, dev->bins[jj]); #ifdef NFM for (int f = 0; f < channel->freq_count; f++) { if (channel->freqlist[f].modulation == MOD_NFM) { channel->needs_raw_iq = 1; break; } } #endif /* NFM */ if (channel->needs_raw_iq) { // Downmixing is done only for NFM and raw IQ outputs. It's not critical to have some residual // freq offset in AM, as it doesn't affect sound quality significantly. double dm_dphi = (double)(channel->freqlist[0].frequency - dev->input->centerfreq); // downmix freq in Hz // In general, sample_rate is not required to be an integer multiple of WAVE_RATE. // However the FFT window may only slide by an integer number of input samples. A non-zero rounding error // introduces additional phase rotation which we have to compensate in order to shift the channel of interest // to the center of the spectrum of the output I/Q stream. This is important for correct NFM demodulation. // The error value (in Hz): // - has an absolute value 0..WAVE_RATE/2 // - is linear with the error introduced by rounding the value of sample_rate/WAVE_RATE to the nearest integer // (range of -0.5..0.5) // - is linear with the distance between center frequency and the channel frequency, normalized to 0..1 double decimation_factor = ((double)dev->input->sample_rate / (double)WAVE_RATE); double dm_dphi_correction = (double)WAVE_RATE / 2.0; dm_dphi_correction *= (decimation_factor - round(decimation_factor)); dm_dphi_correction *= (double)(channel->freqlist[0].frequency - dev->input->centerfreq) / ((double)dev->input->sample_rate / 2.0); debug_print("dev[%d].chan[%d]: dm_dphi: %f Hz dm_dphi_correction: %f Hz\n", i, jj, dm_dphi, dm_dphi_correction); dm_dphi -= dm_dphi_correction; debug_print("dev[%d].chan[%d]: dm_dphi_corrected: %f Hz\n", i, jj, dm_dphi); // Normalize dm_dphi /= (double)WAVE_RATE; // Unalias it, to prevent overflow of int during cast dm_dphi -= trunc(dm_dphi); debug_print("dev[%d].chan[%d]: dm_dphi_normalized=%f\n", i, jj, dm_dphi); // Translate this to uint32_t range 0x00000000-0x00ffffff dm_dphi *= 256.0 * 65536.0; // Cast it to signed int first, because casting negative float to uint is not portable channel->dm_dphi = (uint32_t)((int)dm_dphi); debug_print("dev[%d].chan[%d]: dm_dphi_scaled=%f cast=0x%x\n", i, jj, dm_dphi, channel->dm_dphi); channel->dm_phi = 0.f; } #ifdef DEBUG_SQUELCH // Setup squelch debug file, if enabled char tmp_filepath[1024]; for (int f = 0; f < channel->freq_count; f++) { snprintf(tmp_filepath, sizeof(tmp_filepath), "./squelch_debug-%d-%d.dat", j, f); channel->freqlist[f].squelch.set_debug_file(tmp_filepath); } #endif /* DEBUG_SQUELCH */ jj++; } return jj; } int parse_devices(libconfig::Setting& devs) { int devcnt = 0; for (int i = 0; i < devs.getLength(); i++) { if (devs[i].exists("disable") && (bool)devs[i]["disable"] == true) continue; device_t* dev = devices + devcnt; if (devs[i].exists("type")) { dev->input = input_new(devs[i]["type"]); if (dev->input == NULL) { cerr << "Configuration error: devices.[" << i << "]: unsupported device type\n"; error(); } } else { #ifdef WITH_RTLSDR cerr << "Warning: devices.[" << i << "]: assuming device type \"rtlsdr\", please set \"type\" in the device section.\n"; dev->input = input_new("rtlsdr"); #else cerr << "Configuration error: devices.[" << i << "]: mandatory parameter missing: type\n"; error(); #endif /* WITH_RTLSDR */ } assert(dev->input != NULL); if (devs[i].exists("sample_rate")) { int sample_rate = parse_anynum2int(devs[i]["sample_rate"]); if (sample_rate < WAVE_RATE) { cerr << "Configuration error: devices.[" << i << "]: sample_rate must be greater than " << WAVE_RATE << "\n"; error(); } dev->input->sample_rate = sample_rate; } if (devs[i].exists("mode")) { if (!strncmp(devs[i]["mode"], "multichannel", 12)) { dev->mode = R_MULTICHANNEL; } else if (!strncmp(devs[i]["mode"], "scan", 4)) { dev->mode = R_SCAN; } else { cerr << "Configuration error: devices.[" << i << "]: invalid mode (must be one of: \"scan\", \"multichannel\")\n"; error(); } } else { dev->mode = R_MULTICHANNEL; } if (dev->mode == R_MULTICHANNEL) { dev->input->centerfreq = parse_anynum2int(devs[i]["centerfreq"]); } // centerfreq for R_SCAN will be set by parse_channels() after frequency list has been read #ifdef NFM if (devs[i].exists("tau")) { dev->alpha = ((int)devs[i]["tau"] == 0 ? 0.0f : exp(-1.0f / (WAVE_RATE * 1e-6 * (int)devs[i]["tau"]))); } else { dev->alpha = alpha; } #endif /* NFM */ // Parse hardware-dependent configuration parameters if (input_parse_config(dev->input, devs[i]) < 0) { // FIXME: get and display error string from input_parse_config // Right now it exits the program on failure. } // Some basic sanity checks for crucial parameters which have to be set // (or can be modified) by the input driver assert(dev->input->sfmt != SFMT_UNDEF); assert(dev->input->fullscale > 0); assert(dev->input->bytes_per_sample > 0); assert(dev->input->sample_rate > WAVE_RATE); // For the input buffer size use a base value and round it up to the nearest multiple // of FFT_BATCH blocks of input samples. // ceil is required here because sample rate is not guaranteed to be an integer multiple of WAVE_RATE. size_t fft_batch_len = FFT_BATCH * (2 * dev->input->bytes_per_sample * (size_t)ceil((double)dev->input->sample_rate / (double)WAVE_RATE)); dev->input->buf_size = MIN_BUF_SIZE; if (dev->input->buf_size % fft_batch_len != 0) dev->input->buf_size += fft_batch_len - dev->input->buf_size % fft_batch_len; debug_print("dev->input->buf_size: %zu\n", dev->input->buf_size); dev->input->buffer = (unsigned char*)XCALLOC(sizeof(unsigned char), dev->input->buf_size + 2 * dev->input->bytes_per_sample * fft_size); dev->input->bufs = dev->input->bufe = 0; dev->input->overflow_count = 0; dev->output_overrun_count = 0; dev->waveend = dev->waveavail = dev->row = dev->tq_head = dev->tq_tail = 0; dev->last_frequency = -1; libconfig::Setting& chans = devs[i]["channels"]; if (chans.getLength() < 1) { cerr << "Configuration error: devices.[" << i << "]: no channels configured\n"; error(); } dev->channels = (channel_t*)XCALLOC(chans.getLength(), sizeof(channel_t)); dev->bins = (size_t*)XCALLOC(chans.getLength(), sizeof(size_t)); dev->base_bins = (size_t*)XCALLOC(chans.getLength(), sizeof(size_t)); dev->channel_count = 0; int channel_count = parse_channels(chans, dev, i); if (channel_count < 1) { cerr << "Configuration error: devices.[" << i << "]: no channels enabled\n"; error(); } if (dev->mode == R_SCAN && channel_count > 1) { cerr << "Configuration error: devices.[" << i << "]: only one channel is allowed in scan mode\n"; error(); } dev->channels = (channel_t*)XREALLOC(dev->channels, channel_count * sizeof(channel_t)); dev->bins = (size_t*)XREALLOC(dev->bins, channel_count * sizeof(size_t)); dev->base_bins = (size_t*)XREALLOC(dev->base_bins, channel_count * sizeof(size_t)); dev->channel_count = channel_count; devcnt++; } return devcnt; } int parse_mixers(libconfig::Setting& mx) { const char* name; int mm = 0; for (int i = 0; i < mx.getLength(); i++) { if (mx[i].exists("disable") && (bool)mx[i]["disable"] == true) continue; if ((name = mx[i].getName()) == NULL) { cerr << "Configuration error: mixers.[" << i << "]: undefined mixer name\n"; error(); } debug_print("mm=%d name=%s\n", mm, name); mixer_t* mixer = &mixers[mm]; mixer->name = strdup(name); mixer->enabled = false; mixer->interval = MIX_DIVISOR; mixer->output_overrun_count = 0; mixer->input_count = 0; mixer->inputs = NULL; mixer->inputs_todo = NULL; mixer->input_mask = NULL; channel_t* channel = &mixer->channel; channel->highpass = mx[i].exists("highpass") ? (int)mx[i]["highpass"] : 100; channel->lowpass = mx[i].exists("lowpass") ? (int)mx[i]["lowpass"] : 2500; channel->mode = MM_MONO; // Make sure lowpass / highpass aren't flipped. // If lowpass is enabled (greater than zero) it must be larger than highpass if (channel->lowpass > 0 && channel->lowpass < channel->highpass) { cerr << "Configuration error: mixers.[" << i << "]: lowpass (" << channel->lowpass << ") must be greater than or equal to highpass (" << channel->highpass << ")\n"; error(); } libconfig::Setting& outputs = mx[i]["outputs"]; channel->output_count = outputs.getLength(); if (channel->output_count < 1) { cerr << "Configuration error: mixers.[" << i << "]: no outputs defined\n"; error(); } channel->outputs = (output_t*)XCALLOC(channel->output_count, sizeof(struct output_t)); int outputs_enabled = parse_outputs(outputs, channel, i, 0, true); if (outputs_enabled < 1) { cerr << "Configuration error: mixers.[" << i << "]: no outputs defined\n"; error(); } channel->outputs = (output_t*)XREALLOC(channel->outputs, outputs_enabled * sizeof(struct output_t)); channel->output_count = outputs_enabled; mm++; } return mm; } // vim: ts=4 ================================================ FILE: src/config.h.in ================================================ /* * config.h.in * Template for cmake-generated config.h * * Copyright (c) 2015-2021 Tomasz Lemiech * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ #ifndef _CONFIG_H #define _CONFIG_H #cmakedefine WITH_RTLSDR #cmakedefine WITH_MIRISDR #cmakedefine WITH_SOAPYSDR #cmakedefine WITH_PROFILING #cmakedefine WITH_PULSEAUDIO #cmakedefine NFM #cmakedefine WITH_BCM_VC #cmakedefine LIBSHOUT_HAS_TLS #cmakedefine LIBSHOUT_HAS_CONTENT_FORMAT #define SINCOSF @SINCOSF@ #define SHOUT_SET_METADATA @SHOUT_SET_METADATA@ #endif /* _CONFIG_H */ ================================================ FILE: src/ctcss.cpp ================================================ /* * ctcss.h * * Copyright (C) 2022-2023 charlie-foxtrot * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ #include // M_PI #include // sort #include "logging.h" // debug_print() #include "ctcss.h" using namespace std; // Implementation of https://www.embedded.com/detecting-ctcss-tones-with-goertzels-algorithm/ // also https://www.embedded.com/the-goertzel-algorithm/ ToneDetector::ToneDetector(float tone_freq, float sample_rate, int window_size) { tone_freq_ = tone_freq; magnitude_ = 0.0; window_size_ = window_size; int k = (0.5 + window_size * tone_freq / sample_rate); float omega = (2.0 * M_PI * k) / window_size; coeff_ = 2.0 * cos(omega); reset(); } void ToneDetector::process_sample(const float& sample) { q0_ = coeff_ * q1_ - q2_ + sample; q2_ = q1_; q1_ = q0_; count_++; if (count_ == window_size_) { magnitude_ = q1_ * q1_ + q2_ * q2_ - q1_ * q2_ * coeff_; count_ = 0; } } void ToneDetector::reset(void) { count_ = 0; q0_ = q1_ = q2_ = 0.0; } bool ToneDetectorSet::add(const float& tone_freq, const float& sample_rate, int window_size) { ToneDetector new_tone = ToneDetector(tone_freq, sample_rate, window_size); for (const auto tone : tones_) { if (new_tone.coefficient() == tone.coefficient()) { debug_print("Skipping tone %f, too close to other tones\n", tone_freq); return false; } } tones_.push_back(new_tone); return true; } void ToneDetectorSet::process_sample(const float& sample) { for (vector::iterator it = tones_.begin(); it != tones_.end(); ++it) { it->process_sample(sample); } } void ToneDetectorSet::reset(void) { for (vector::iterator it = tones_.begin(); it != tones_.end(); ++it) { it->reset(); } } float ToneDetectorSet::sorted_powers(vector& powers) { powers.clear(); float total_power = 0.0; for (size_t i = 0; i < tones_.size(); ++i) { powers.push_back({tones_[i].relative_power(), tones_[i].freq()}); total_power += tones_[i].relative_power(); } sort(powers.begin(), powers.end(), [](PowerIndex a, PowerIndex b) { return a.power > b.power; }); return total_power / tones_.size(); } vector CTCSS::standard_tones = {67.0, 69.3, 71.9, 74.4, 77.0, 79.7, 82.5, 85.4, 88.5, 91.5, 94.8, 97.4, 100.0, 103.5, 107.2, 110.9, 114.8, 118.8, 123.0, 127.3, 131.8, 136.5, 141.3, 146.2, 150.0, 151.4, 156.7, 159.8, 162.2, 165.5, 167.9, 171.3, 173.8, 177.3, 179.9, 183.5, 186.2, 189.9, 192.8, 196.6, 199.5, 203.5, 206.5, 210.7, 218.1, 225.7, 229.1, 233.6, 241.8, 250.3, 254.1}; CTCSS::CTCSS(const float& ctcss_freq, const float& sample_rate, int window_size) : enabled_(true), ctcss_freq_(ctcss_freq), window_size_(window_size), found_count_(0), not_found_count_(0) { debug_print("Adding CTCSS detector for %f Hz with a sample rate of %f and window %d\n", ctcss_freq, sample_rate, window_size_); // Add the target CTCSS frequency first followed by the other "standard tones", except those // within +/- 5 Hz powers_.add(ctcss_freq, sample_rate, window_size_); for (const auto tone : standard_tones) { if (abs(ctcss_freq - tone) < 5) { debug_print("Skipping tone %f, too close to other tones\n", tone); continue; } powers_.add(tone, sample_rate, window_size_); } // clear all values to start NOTE: has_tone_ will be true until the first window count of samples are processed reset(); } void CTCSS::process_audio_sample(const float& sample) { if (!enabled_) { return; } powers_.process_sample(sample); sample_count_++; if (sample_count_ < window_size_) { return; } enough_samples_ = true; // if this is sample fills out the window then check if one of the "strongest" // tones is the CTCSS tone we are looking for. NOTE: there can be multiple "strongest" // tones based on floating point math vector tone_powers; float avg_power = powers_.sorted_powers(tone_powers); float ctcss_tone_power = 0.0; for (const auto i : tone_powers) { if (i.freq == ctcss_freq_) { ctcss_tone_power = i.power; break; } } if (ctcss_tone_power == tone_powers[0].power && ctcss_tone_power > avg_power) { debug_print("CTCSS tone of %f Hz detected\n", ctcss_freq_); has_tone_ = true; found_count_++; } else { debug_print("CTCSS tone of %f Hz not detected - highest power was %f Hz at %f vs %f\n", ctcss_freq_, tone_powers[0].freq, tone_powers[0].power, ctcss_tone_power); has_tone_ = false; not_found_count_++; } // reset everything for the next window's worth of samples powers_.reset(); sample_count_ = 0; } void CTCSS::reset(void) { if (enabled_) { powers_.reset(); enough_samples_ = false; sample_count_ = 0; has_tone_ = false; } } ================================================ FILE: src/ctcss.h ================================================ /* * ctcss.h * * Copyright (C) 2022-2023 charlie-foxtrot * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ #ifndef _CTCSS_H #define _CTCSS_H 1 #include // size_t #include class ToneDetector { public: ToneDetector(float tone_freq, float sample_freq, int window_size); void process_sample(const float& sample); void reset(void); const float& relative_power(void) const { return magnitude_; } const float& freq(void) const { return tone_freq_; } const float& coefficient(void) const { return coeff_; } private: float tone_freq_; float magnitude_; int window_size_; float coeff_; int count_; float q0_; float q1_; float q2_; }; class ToneDetectorSet { public: struct PowerIndex { float power; float freq; }; ToneDetectorSet() {} bool add(const float& tone_freq, const float& sample_freq, int window_size); void process_sample(const float& sample); void reset(void); float sorted_powers(std::vector& powers); private: std::vector tones_; }; class CTCSS { public: CTCSS(void) : enabled_(false), found_count_(0), not_found_count_(0) {} CTCSS(const float& ctcss_freq, const float& sample_rate, int window_size); void process_audio_sample(const float& sample); void reset(void); const size_t& found_count(void) const { return found_count_; } const size_t& not_found_count(void) const { return not_found_count_; } bool is_enabled(void) const { return enabled_; } bool enough_samples(void) const { return enough_samples_; } bool has_tone(void) const { return !enabled_ || has_tone_; } static std::vector standard_tones; private: bool enabled_; float ctcss_freq_; int window_size_; size_t found_count_; size_t not_found_count_; ToneDetectorSet powers_; bool enough_samples_; int sample_count_; bool has_tone_; }; #endif /* _CTCSS_H */ ================================================ FILE: src/filters.cpp ================================================ /* * filters.cpp * * Copyright (C) 2022-2023 charlie-foxtrot * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ #include "logging.h" // debug_print() #include "filters.h" using namespace std; // Default constructor is no filter NotchFilter::NotchFilter(void) : enabled_(false) {} // Notch Filter based on https://www.dsprelated.com/showcode/173.php NotchFilter::NotchFilter(float notch_freq, float sample_freq, float q) : enabled_(true), x{0.0}, y{0.0} { if (notch_freq <= 0.0) { debug_print("Invalid frequency %f Hz, disabling notch filter\n", notch_freq); enabled_ = false; return; } debug_print("Adding notch filter for %f Hz with parameters {%f, %f}\n", notch_freq, sample_freq, q); float wo = 2 * M_PI * (notch_freq / sample_freq); e = 1 / (1 + tan(wo / (q * 2))); p = cos(wo); d[0] = e; d[1] = 2 * e * p; d[2] = (2 * e - 1); debug_print("wo:%f e:%f p:%f d:{%f,%f,%f}\n", wo, e, p, d[0], d[1], d[2]); } void NotchFilter::apply(float& value) { if (!enabled_) { return; } x[0] = x[1]; x[1] = x[2]; x[2] = value; y[0] = y[1]; y[1] = y[2]; y[2] = d[0] * x[2] - d[1] * x[1] + d[0] * x[0] + d[1] * y[1] - d[2] * y[0]; value = y[2]; } // Default constructor is no filter LowpassFilter::LowpassFilter(void) : enabled_(false) {} // 2nd order lowpass Bessel filter, based entirely on a simplification of https://www-users.cs.york.ac.uk/~fisher/mkfilter/ LowpassFilter::LowpassFilter(float freq, float sample_freq) : enabled_(true) { if (freq <= 0.0) { debug_print("Invalid frequency %f Hz, disabling lowpass filter\n", freq); enabled_ = false; return; } debug_print("Adding lowpass filter at %f Hz with a sample rate of %f\n", freq, sample_freq); double raw_alpha = (double)freq / sample_freq; double warped_alpha = tan(M_PI * raw_alpha) / M_PI; complex zeros[2] = {-1.0, -1.0}; complex poles[2]; poles[0] = blt(M_PI * 2 * warped_alpha * complex(-1.10160133059e+00, 6.36009824757e-01)); poles[1] = blt(M_PI * 2 * warped_alpha * conj(complex(-1.10160133059e+00, 6.36009824757e-01))); complex topcoeffs[3]; complex botcoeffs[3]; expand(zeros, 2, topcoeffs); expand(poles, 2, botcoeffs); complex gain_complex = evaluate(topcoeffs, 2, botcoeffs, 2, 1.0); gain = hypot(gain_complex.imag(), gain_complex.real()); for (int i = 0; i <= 2; i++) { ycoeffs[i] = -(botcoeffs[i].real() / botcoeffs[2].real()); } debug_print("gain: %f, ycoeffs: {%f, %f}\n", gain, ycoeffs[0], ycoeffs[1]); } complex LowpassFilter::blt(complex pz) { return (2.0 + pz) / (2.0 - pz); } /* evaluate response, substituting for z */ complex LowpassFilter::evaluate(complex topco[], int nz, complex botco[], int np, complex z) { return eval(topco, nz, z) / eval(botco, np, z); } /* evaluate polynomial in z, substituting for z */ complex LowpassFilter::eval(complex coeffs[], int npz, complex z) { complex sum(0.0); for (int i = npz; i >= 0; i--) { sum = (sum * z) + coeffs[i]; } return sum; } /* compute product of poles or zeros as a polynomial of z */ void LowpassFilter::expand(complex pz[], int npz, complex coeffs[]) { coeffs[0] = 1.0; for (int i = 0; i < npz; i++) { coeffs[i + 1] = 0.0; } for (int i = 0; i < npz; i++) { multin(pz[i], npz, coeffs); } /* check computed coeffs of z^k are all real */ for (int i = 0; i < npz + 1; i++) { if (fabs(coeffs[i].imag()) > 1e-10) { log(LOG_ERR, "coeff of z^%d is not real; poles/zeros are not complex conjugates\n", i); error(); } } } void LowpassFilter::multin(complex w, int npz, complex coeffs[]) { /* multiply factor (z-w) into coeffs */ complex nw = -w; for (int i = npz; i >= 1; i--) { coeffs[i] = (nw * coeffs[i]) + coeffs[i - 1]; } coeffs[0] = nw * coeffs[0]; } void LowpassFilter::apply(float& r, float& j) { if (!enabled_) { return; } complex input(r, j); xv[0] = xv[1]; xv[1] = xv[2]; xv[2] = input / gain; yv[0] = yv[1]; yv[1] = yv[2]; yv[2] = (xv[0] + xv[2]) + (2.0f * xv[1]) + (ycoeffs[0] * yv[0]) + (ycoeffs[1] * yv[1]); r = yv[2].real(); j = yv[2].imag(); } ================================================ FILE: src/filters.h ================================================ /* * filters.h * * Copyright (C) 2022-2023 charlie-foxtrot * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ #ifndef _FILTERS_H #define _FILTERS_H 1 #include class NotchFilter { public: NotchFilter(void); NotchFilter(float notch_freq, float sample_freq, float q); void apply(float& value); bool enabled(void) { return enabled_; } private: bool enabled_; float e; float p; float d[3]; float x[3]; float y[3]; }; class LowpassFilter { public: LowpassFilter(void); LowpassFilter(float freq, float sample_freq); void apply(float& r, float& j); bool enabled(void) const { return enabled_; } private: static std::complex blt(std::complex pz); static void expand(std::complex pz[], int npz, std::complex coeffs[]); static void multin(std::complex w, int npz, std::complex coeffs[]); static std::complex evaluate(std::complex topco[], int nz, std::complex botco[], int np, std::complex z); static std::complex eval(std::complex coeffs[], int npz, std::complex z); bool enabled_; float ycoeffs[3]; float gain; std::complex xv[3]; std::complex yv[3]; }; #endif /* _FILTERS_H */ ================================================ FILE: src/generate_signal.cpp ================================================ /* * generate_signal.cpp * * Copyright (C) 2023 charlie-foxtrot * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ #include #include "generate_signal.h" using namespace std; float Tone::WEAK = 0.05; float Tone::NORMAL = 0.2; float Tone::STRONG = 0.4; Tone::Tone(int sample_rate, const float& freq, const float& ampl) : sample_rate_(sample_rate), freq_(freq), ampl_(ampl), sample_count_(0) {} float Tone::get_sample(void) { sample_count_++; return ampl_ * sin(2 * M_PI * sample_count_ * freq_ / sample_rate_); } float Noise::WEAK = 0.05; float Noise::NORMAL = 0.2; float Noise::STRONG = 0.5; Noise::Noise(const float& ampl) : ampl_(ampl) { // create a seeded generator std::random_device r; std::seed_seq s{r(), r(), r(), r(), r(), r(), r(), r()}; generator = std::mt19937(s); // centered at 0.0, standard deviation of 0.1 distribution = normal_distribution(0.0, 0.1); } float Noise::get_sample(void) { return ampl_ * distribution(generator); } GenerateSignal::GenerateSignal(int sample_rate) : sample_rate_(sample_rate) {} void GenerateSignal::add_tone(const float& freq, const float& ampl) { tones_.push_back(Tone(sample_rate_, freq, ampl)); } void GenerateSignal::add_noise(const float& ampl) { noises_.push_back(Noise(ampl)); } float GenerateSignal::get_sample(void) { float value = 0.0; for (vector::iterator tone = tones_.begin(); tone != tones_.end(); ++tone) { value += tone->get_sample(); } for (vector::iterator noise = noises_.begin(); noise != noises_.end(); ++noise) { value += noise->get_sample(); } return value; } void GenerateSignal::write_file(const string& filepath, const float& seconds) { FILE* fp = fopen(filepath.c_str(), "wb"); for (int i = 0; i < sample_rate_ * seconds; ++i) { float sample = get_sample(); fwrite(&sample, sizeof(float), 1, fp); } fclose(fp); } ================================================ FILE: src/generate_signal.h ================================================ /* * generate_signal.h * * Copyright (C) 2023 charlie-foxtrot * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ #ifndef _GENERATE_SIGNAL_H #define _GENERATE_SIGNAL_H #include #include #include class Tone { public: static float WEAK; static float NORMAL; static float STRONG; Tone(int sample_rate, const float& freq, const float& ampl); float get_sample(void); private: int sample_rate_; float freq_; float ampl_; size_t sample_count_; }; class Noise { public: static float WEAK; static float NORMAL; static float STRONG; Noise(const float& ampl); float get_sample(void); private: float ampl_; std::mt19937 generator; std::normal_distribution distribution; }; class GenerateSignal { public: GenerateSignal(int sample_rate); void add_tone(const float& freq, const float& ampl); void add_noise(const float& ampl); float get_sample(void); void write_file(const std::string& filepath, const float& seconds); private: int sample_rate_; std::vector tones_; std::vector noises_; }; #endif /* _GENERATE_SIGNAL_H */ ================================================ FILE: src/hello_fft/CMakeLists.txt ================================================ set(hello_fft_source_files mailbox.c gpu_fft.c gpu_fft_twiddles.c gpu_fft_shaders.c gpu_fft_base.c ) # Temp hack due to the fact that mailbox.c includes ../rtl_airband.h which # is a C++ header. SET_SOURCE_FILES_PROPERTIES(${hello_fft_source_files} PROPERTIES LANGUAGE CXX ) add_library(hello_fft OBJECT ${hello_fft_source_files} ) target_include_directories(hello_fft PUBLIC ".." # needed for rtl_airband.h "${CMAKE_CURRENT_BINARY_DIR}/.." # needed for config.h ${BCM_VC_INCLUDE_DIRS} ) # disable -Wcast-qual for this folder set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-cast-qual") ================================================ FILE: src/hello_fft/gpu_fft.c ================================================ /* BCM2835 "GPU_FFT" release 2.0 Copyright (c) 2014, Andrew Holme. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "gpu_fft.h" #define GPU_FFT_BUSY_WAIT_LIMIT (5 << 12) // ~1ms typedef struct GPU_FFT_COMPLEX COMPLEX; int gpu_fft_prepare(int mb, // mailbox file_desc int log2_N, // log2(FFT_length) = 8...20 int direction, // GPU_FFT_FWD: fft(); GPU_FFT_REV: ifft() int jobs, // number of transforms in batch struct GPU_FFT** fft) { unsigned info_bytes, twid_bytes, data_bytes, code_bytes, unif_bytes, mail_bytes; unsigned size, *uptr, vc_tw, vc_data; int i, q, shared, unique, passes, ret; struct GPU_FFT_BASE* base; struct GPU_FFT_PTR ptr; struct GPU_FFT* info; if (gpu_fft_twiddle_size(log2_N, &shared, &unique, &passes)) return -2; info_bytes = 4096; data_bytes = (1 + ((sizeof(COMPLEX) << log2_N) | 4095)); code_bytes = gpu_fft_shader_size(log2_N); twid_bytes = sizeof(COMPLEX) * 16 * (shared + GPU_FFT_QPUS * unique); unif_bytes = sizeof(int) * GPU_FFT_QPUS * (5 + jobs * 2); mail_bytes = sizeof(int) * GPU_FFT_QPUS * 2; size = info_bytes + // header data_bytes * jobs * 2 + // ping-pong data, aligned code_bytes + // shader, aligned twid_bytes + // twiddles unif_bytes + // uniforms mail_bytes; // mailbox message ret = gpu_fft_alloc(mb, size, &ptr); if (ret) return ret; // Header info = (struct GPU_FFT*)ptr.arm.vptr; base = (struct GPU_FFT_BASE*)info; gpu_fft_ptr_inc(&ptr, info_bytes); // For transpose info->x = 1 << log2_N; info->y = jobs; // Ping-pong buffers leave results in or out of place info->in = info->out = ptr.arm.cptr; info->step = data_bytes / sizeof(COMPLEX); if (passes & 1) info->out += info->step * jobs; // odd => out of place vc_data = gpu_fft_ptr_inc(&ptr, data_bytes * jobs * 2); // Shader code memcpy(ptr.arm.vptr, gpu_fft_shader_code(log2_N), code_bytes); base->vc_code = gpu_fft_ptr_inc(&ptr, code_bytes); // Twiddles gpu_fft_twiddle_data(log2_N, direction, ptr.arm.fptr); vc_tw = gpu_fft_ptr_inc(&ptr, twid_bytes); uptr = ptr.arm.uptr; // Uniforms for (q = 0; q < GPU_FFT_QPUS; q++) { *uptr++ = vc_tw; *uptr++ = vc_tw + sizeof(COMPLEX) * 16 * (shared + q * unique); *uptr++ = q; for (i = 0; i < jobs; i++) { *uptr++ = vc_data + data_bytes * i; *uptr++ = vc_data + data_bytes * i + data_bytes * jobs; } *uptr++ = 0; *uptr++ = (q == 0); // For mailbox: IRQ enable, master only base->vc_unifs[q] = gpu_fft_ptr_inc(&ptr, sizeof(int) * (5 + jobs * 2)); } if ((jobs << log2_N) <= GPU_FFT_BUSY_WAIT_LIMIT) { // Direct register poking with busy wait base->vc_msg = 0; } else { // Mailbox message for (q = 0; q < GPU_FFT_QPUS; q++) { *uptr++ = base->vc_unifs[q]; *uptr++ = base->vc_code; } base->vc_msg = ptr.vc; } *fft = info; return 0; } unsigned gpu_fft_execute(struct GPU_FFT* info) { return gpu_fft_base_exec(&info->base, GPU_FFT_QPUS); } void gpu_fft_release(struct GPU_FFT* info) { gpu_fft_base_release(&info->base); } ================================================ FILE: src/hello_fft/gpu_fft.h ================================================ /* BCM2835 "GPU_FFT" release 2.0 Copyright (c) 2014, Andrew Holme. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef __GPU_FFT__ #define __GPU_FFT__ #define GPU_FFT_QPUS 8 #define GPU_FFT_PI 3.14159265358979323846 #define GPU_FFT_FWD 0 // forward FFT #define GPU_FFT_REV 1 // inverse FFT struct GPU_FFT_COMPLEX { float re, im; }; struct GPU_FFT_PTR { unsigned vc; union { struct GPU_FFT_COMPLEX* cptr; void* vptr; char* bptr; float* fptr; unsigned* uptr; } arm; }; struct GPU_FFT_BASE { int mb; unsigned handle, size, vc_msg, vc_code, vc_unifs[GPU_FFT_QPUS]; volatile unsigned* peri; }; struct GPU_FFT { struct GPU_FFT_BASE base; struct GPU_FFT_COMPLEX *in, *out; int x, y, step; }; int gpu_fft_prepare(int mb, // mailbox file_desc int log2_N, // log2(FFT_length) = 8...20 int direction, // GPU_FFT_FWD: fft(); GPU_FFT_REV: ifft() int jobs, // number of transforms in batch struct GPU_FFT** fft); unsigned gpu_fft_execute(struct GPU_FFT* info); void gpu_fft_release(struct GPU_FFT* info); // private int gpu_fft_twiddle_size(int, int*, int*, int*); void gpu_fft_twiddle_data(int, int, float*); unsigned int gpu_fft_shader_size(int); unsigned int* gpu_fft_shader_code(int); // gpu_fft_base: unsigned gpu_fft_base_exec(struct GPU_FFT_BASE* base, unsigned num_qpus); int gpu_fft_alloc(int mb, unsigned size, struct GPU_FFT_PTR* ptr); void gpu_fft_base_release(struct GPU_FFT_BASE* base); unsigned gpu_fft_ptr_inc(struct GPU_FFT_PTR* ptr, int bytes); #endif // __GPU_FFT__ ================================================ FILE: src/hello_fft/gpu_fft.txt ================================================ BCM2835 "GPU_FFT" release 2.0 by Andrew Holme, 2014. GPU_FFT is an FFT library for the Raspberry Pi which exploits the BCM2835 SoC 3D hardware to deliver ten times more data throughput than is possible on the 700 MHz ARM. Kernels are provided for all power-of-2 FFT lengths between 256 and 2,097,152 points inclusive. A transpose function, which also uses the 3D hardware, is provided to support 2-dimensional transforms. *** Accuracy *** GPU_FFT uses single-precision floats for data and twiddle factors. The output is not scaled. The relative root-mean-square (rms) error in parts-per-million (ppm) for different transform lengths (N) is typically: log2(N) | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 ppm rms | 0.27 | 0.42 | 0.50 | 0.70 | 2.3 | 4.4 | 7.6 | 9.2 | 18 | 70 log2(N) | 18 | 19 | 20 | 21 | 8...17 batch of 10 ppm rms | 100 | 180 | 360 | 720 | 18...21 batch of 1 *** Throughput *** GPU_FFT 1.0 had to be invoked through a "mailbox" which added a 100us overhead on every call. To mitigate this, batches of transforms could be submitted via a single call. GPU_FFT 2.0 avoids this 100us overhead by poking GPU registers directly from the ARM if total batch runtime will be short; but still uses the mailbox for longer jobs to avoid busy waiting at 100% CPU for too long. Typical per-transform runtimes for batch sizes of 1 and 10; and comparative figures for FFTW (FFTW_MEASURE mode) are: log2(N) | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 1 | 0.036 | 0.051 | 0.070 | 0.11 | 0.24 | 0.58 | 1.2 | 3.3 | 10 | 0.016 | 0.027 | 0.045 | 0.095 | 0.25 | 0.61 | 1.2 | 3.2 | FFTW | 0.092 | 0.22 | 0.48 | 0.95 | 3.0 | 5.1 | 12 | 31 | log2(N) | 16 | 17 | 18 | 19 | 20 | 21 | All times in 1 | 6.8 | 16 | 42 | 95 | 190 | 380 | milliseconds FFTW | 83 | 180 | 560 | 670 | 1600 | 3400 | 2 sig. figs. *** API functions *** gpu_fft_prepare() Call once to allocate memory and initialise data structures. Returns 0 for success. gpu_fft_execute() Call one or more times to execute a previously prepared FFT batch. Returns 0 for success. gpu_fft_release() Call once to release resources after use. GPU memory is permanently lost if not freed. *** Parameters *** int mb Mailbox file descriptor obtained by calling mbox_open() int log2_N log2(FFT length) = 8 to 21 int direction FFT direction: GPU_FFT_FWD for forward FFT GPU_FFT_REV for inverse FFT int jobs Number of transforms in batch = 1 or more GPU_FFT ** Output parameter from prepare: control structure. GPU_FFT * Input parameter to execute and release *** Data format *** Complex data arrays are stored as alternate real and imaginary parts: struct GPU_FFT_COMPLEX { float re, im; }; The GPU_FFT struct created by gpu_fft_prepare() contains pointers to the input and output arrays: struct GPU_FFT { struct GPU_FFT_COMPLEX *in, *out; When executing a batch of transforms, buffer pointers are obtained as follows: struct GPU_FFT *fft = gpu_fft_prepare( ... , jobs); for (int j=0; jin + j*fft->step; struct GPU_FFT_COMPLEX *out = fft->out + j*fft->step; GPU_FFT.step is greater than FFT length because a guard space is left between buffers for caching and alignment reasons. GPU_FFT performs multiple passes between ping-pong buffers. The final output lands in the same buffer as input after an even number of passes. Transforms where log2_N=12...16 use an odd number of passes and the final result is left out-of-place. The input data is never preserved. *** Example program *** The code that produced the above accuracy and performance figures is included as a demo with the latest Raspbian distro. Build and run it as follows: cd /opt/vc/src/hello_pi/hello_fft make sudo mknod char_dev c 100 0 sudo ./hello_fft.bin 12 It accepts three optional command-line arguments: The special character device is required for the ioctl mailbox through which the ARM communicates with the Videocore GPU. *** With Open GL *** GPU_FFT and Open GL will run concurrently if the GPU_FFT_MEM_* defines in file gpu_fft.c are changed as follows: #define GPU_FFT_MEM_FLG 0x4 // cached=0xC; direct=0x4 #define GPU_FFT_MEM_MAP 0x20000000 // cached=0x0; direct=0x20000000 Overall performance will probably be higher if GPU_FFT and Open GL take turns at using the 3D hardware. Since eglSwapBuffers() returns immediately without waiting for rendering, call glFlush() and glFinish() afterwards as follows: for (;;) { .... eglSwapBuffers(....); // non-blocking call returns immediately glFlush(); glFinish(); // wait until V3D hardware is idle .... gpu_fft_execute(....); // blocking call .... } *** 2-dimensional FFT *** Please study the hello_fft_2d demo source, which is built and executed thus: make hello_fft_2d.bin sudo ./hello_fft_2d.bin This generates a Windows BMP file: "hello_fft_2d.bmp" The demo uses a square 512x512 array; however, rectangular arrays are allowed. The following lines in gpu_fft_trans.c will do what is safe: ptr.arm.uptr[6] = src->x < dst->y? src->x : dst->y; ptr.arm.uptr[7] = src->y < dst->x? src->y : dst->x; One may transpose the output from the second FFT pass back into the first pass input buffer, by preparing and executing a second transposition; however, this is probably unnecessary. It depends on how the final output will be accessed. ================================================ FILE: src/hello_fft/gpu_fft_base.c ================================================ /* BCM2835 "GPU_FFT" release 2.0 Copyright (c) 2014, Andrew Holme. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "bcm_host.h" #include "gpu_fft.h" #include "mailbox.h" #define BUS_TO_PHYS(x) ((x) & ~0xC0000000) // V3D spec: http://www.broadcom.com/docs/support/videocore/VideoCoreIV-AG100-R.pdf #define V3D_L2CACTL (0xC00020 >> 2) #define V3D_SLCACTL (0xC00024 >> 2) #define V3D_SRQPC (0xC00430 >> 2) #define V3D_SRQUA (0xC00434 >> 2) #define V3D_SRQCS (0xC0043c >> 2) #define V3D_DBCFG (0xC00e00 >> 2) #define V3D_DBQITE (0xC00e2c >> 2) #define V3D_DBQITC (0xC00e30 >> 2) #define GPU_FFT_MEM_MAP 0x0 // cached=0x0; direct=0x20000000 #define GPU_FFT_NO_FLUSH 1 #define GPU_FFT_TIMEOUT 2000 // ms unsigned gpu_fft_base_exec_direct(struct GPU_FFT_BASE* base, unsigned num_qpus) { unsigned q; base->peri[V3D_DBCFG] = 0; // Disallow IRQ base->peri[V3D_DBQITE] = 0; // Disable IRQ base->peri[V3D_DBQITC] = -1; // Resets IRQ flags base->peri[V3D_L2CACTL] = 1 << 2; // Clear L2 cache base->peri[V3D_SLCACTL] = -1; // Clear other caches base->peri[V3D_SRQCS] = (1 << 7) | (1 << 8) | (1 << 16); // Reset error bit and counts for (q = 0; q < num_qpus; q++) { // Launch shader(s) base->peri[V3D_SRQUA] = base->vc_unifs[q]; base->peri[V3D_SRQPC] = base->vc_code; } // Busy wait polling for (;;) { if (((base->peri[V3D_SRQCS] >> 16) & 0xff) == num_qpus) break; // All done? } return 0; } unsigned gpu_fft_base_exec(struct GPU_FFT_BASE* base, unsigned num_qpus) { if (base->vc_msg) { // Use mailbox // Returns: 0x0 for success; 0x80000000 for timeout return execute_qpu(base->mb, num_qpus, base->vc_msg, GPU_FFT_NO_FLUSH, GPU_FFT_TIMEOUT); } else { // Direct register poking return gpu_fft_base_exec_direct(base, num_qpus); } } int gpu_fft_alloc(int mb, unsigned size, struct GPU_FFT_PTR* ptr) { struct GPU_FFT_BASE* base; volatile unsigned* peri; unsigned handle; if (qpu_enable(mb, 1)) return -1; // Shared memory : cached=0xC; direct=0x4 unsigned mem_flg = bcm_host_get_sdram_address() == 0x40000000 ? 0xC : 0x4; handle = mem_alloc(mb, size, 4096, mem_flg); if (!handle) { qpu_enable(mb, 0); return -3; } peri = (volatile unsigned*)mapmem(bcm_host_get_peripheral_address(), bcm_host_get_peripheral_size()); if (!peri) { mem_free(mb, handle); qpu_enable(mb, 0); return -4; } ptr->vc = mem_lock(mb, handle); ptr->arm.vptr = mapmem(BUS_TO_PHYS(ptr->vc + GPU_FFT_MEM_MAP), size); base = (struct GPU_FFT_BASE*)ptr->arm.vptr; base->peri = peri; base->mb = mb; base->handle = handle; base->size = size; return 0; } void gpu_fft_base_release(struct GPU_FFT_BASE* base) { int mb = base->mb; unsigned handle = base->handle, size = base->size; unmapmem((void*)base->peri, bcm_host_get_peripheral_size()); unmapmem((void*)base, size); mem_unlock(mb, handle); mem_free(mb, handle); qpu_enable(mb, 0); } unsigned gpu_fft_ptr_inc(struct GPU_FFT_PTR* ptr, int bytes) { unsigned vc = ptr->vc; ptr->vc += bytes; ptr->arm.bptr += bytes; return vc; } ================================================ FILE: src/hello_fft/gpu_fft_shaders.c ================================================ /* BCM2835 "GPU_FFT" release 2.0 Copyright (c) 2014, Andrew Holme. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ static unsigned int shader_256[] = { #include "hex/shader_256.hex" }; static unsigned int shader_512[] = { #include "hex/shader_512.hex" }; static unsigned int shader_1k[] = { #include "hex/shader_1k.hex" }; static unsigned int shader_2k[] = { #include "hex/shader_2k.hex" }; static unsigned int shader_4k[] = { #include "hex/shader_4k.hex" }; static unsigned int shader_8k[] = { #include "hex/shader_8k.hex" }; static unsigned int shader_16k[] = { #include "hex/shader_16k.hex" }; static unsigned int shader_32k[] = { #include "hex/shader_32k.hex" }; static unsigned int shader_64k[] = { #include "hex/shader_64k.hex" }; static unsigned int shader_128k[] = { #include "hex/shader_128k.hex" }; static unsigned int shader_256k[] = { #include "hex/shader_256k.hex" }; static unsigned int shader_512k[] = { #include "hex/shader_512k.hex" }; static unsigned int shader_1024k[] = { #include "hex/shader_1024k.hex" }; static unsigned int shader_2048k[] = { #include "hex/shader_2048k.hex" }; static struct { unsigned int size, *code; } shaders[] = {{sizeof(shader_256), shader_256}, {sizeof(shader_512), shader_512}, {sizeof(shader_1k), shader_1k}, {sizeof(shader_2k), shader_2k}, {sizeof(shader_4k), shader_4k}, {sizeof(shader_8k), shader_8k}, {sizeof(shader_16k), shader_16k}, {sizeof(shader_32k), shader_32k}, {sizeof(shader_64k), shader_64k}, {sizeof(shader_128k), shader_128k}, {sizeof(shader_256k), shader_256k}, {sizeof(shader_512k), shader_512k}, {sizeof(shader_1024k), shader_1024k}, {sizeof(shader_2048k), shader_2048k}}; unsigned int gpu_fft_shader_size(int log2_N) { return shaders[log2_N - 8].size; } unsigned int* gpu_fft_shader_code(int log2_N) { return shaders[log2_N - 8].code; } ================================================ FILE: src/hello_fft/gpu_fft_trans.h ================================================ /* BCM2835 "GPU_FFT" release 2.0 Copyright (c) 2014, Andrew Holme. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "gpu_fft.h" struct GPU_FFT_TRANS { struct GPU_FFT_BASE base; }; int gpu_fft_trans_prepare(int mb, struct GPU_FFT* src, struct GPU_FFT* dst, struct GPU_FFT_TRANS** out); unsigned gpu_fft_trans_execute( // src->out ==> T ==> dst->in struct GPU_FFT_TRANS* info); void gpu_fft_trans_release(struct GPU_FFT_TRANS* info); ================================================ FILE: src/hello_fft/gpu_fft_twiddles.c ================================================ /* BCM2835 "GPU_FFT" release 2.0 Copyright (c) 2014, Andrew Holme. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "gpu_fft.h" #define ALPHA(dx) (2 * pow(sin((dx) / 2), 2)) #define BETA(dx) (sin(dx)) static double k[16] = {0, 8, 4, 4, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1}; static double m[16] = {0, 0, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7}; /****************************************************************************/ static float* twiddles_base_16(double two_pi, float* out, double theta) { int i; for (i = 0; i < 16; i++) { *out++ = cos(two_pi / 16 * k[i] * m[i] + theta * k[i]); *out++ = sin(two_pi / 16 * k[i] * m[i] + theta * k[i]); } return out; } static float* twiddles_base_32(double two_pi, float* out, double theta) { int i; for (i = 0; i < 16; i++) { *out++ = cos(two_pi / 32 * i + theta); *out++ = sin(two_pi / 32 * i + theta); } return twiddles_base_16(two_pi, out, 2 * theta); } static float* twiddles_base_64(double two_pi, float* out) { int i; for (i = 0; i < 32; i++) { *out++ = cos(two_pi / 64 * i); *out++ = sin(two_pi / 64 * i); } return twiddles_base_32(two_pi, out, 0); } /****************************************************************************/ static float* twiddles_step_16(double /*two_pi*/, float* out, double theta) { int i; for (i = 0; i < 16; i++) { *out++ = ALPHA(theta * k[i]); *out++ = BETA(theta * k[i]); } return out; } static float* twiddles_step_32(double two_pi, float* out, double theta) { int i; for (i = 0; i < 16; i++) { *out++ = ALPHA(theta); *out++ = BETA(theta); } return twiddles_step_16(two_pi, out, 2 * theta); } /****************************************************************************/ static void twiddles_256(double two_pi, float* out) { double N = 256; int q; out = twiddles_base_16(two_pi, out, 0); out = twiddles_step_16(two_pi, out, two_pi / N * GPU_FFT_QPUS); for (q = 0; q < GPU_FFT_QPUS; q++) out = twiddles_base_16(two_pi, out, two_pi / N * q); } static void twiddles_512(double two_pi, float* out) { double N = 512; int q; out = twiddles_base_32(two_pi, out, 0); out = twiddles_step_16(two_pi, out, two_pi / N * GPU_FFT_QPUS); for (q = 0; q < GPU_FFT_QPUS; q++) out = twiddles_base_16(two_pi, out, two_pi / N * q); } static void twiddles_1k(double two_pi, float* out) { double N = 1024; int q; out = twiddles_base_32(two_pi, out, 0); out = twiddles_step_32(two_pi, out, two_pi / N * GPU_FFT_QPUS); for (q = 0; q < GPU_FFT_QPUS; q++) out = twiddles_base_32(two_pi, out, two_pi / N * q); } static void twiddles_2k(double two_pi, float* out) { double N = 2048; int q; out = twiddles_base_64(two_pi, out); out = twiddles_step_32(two_pi, out, two_pi / N * GPU_FFT_QPUS); for (q = 0; q < GPU_FFT_QPUS; q++) out = twiddles_base_32(two_pi, out, two_pi / N * q); } static void twiddles_4k(double two_pi, float* out) { double N = 4096; int q; out = twiddles_base_16(two_pi, out, 0); out = twiddles_step_16(two_pi, out, two_pi / N * 16); out = twiddles_step_16(two_pi, out, two_pi / N * GPU_FFT_QPUS); for (q = 0; q < GPU_FFT_QPUS; q++) out = twiddles_base_16(two_pi, out, two_pi / N * q); } static void twiddles_8k(double two_pi, float* out) { double N = 8192; int q; out = twiddles_base_32(two_pi, out, 0); out = twiddles_step_16(two_pi, out, two_pi / N * 16); out = twiddles_step_16(two_pi, out, two_pi / N * GPU_FFT_QPUS); for (q = 0; q < GPU_FFT_QPUS; q++) out = twiddles_base_16(two_pi, out, two_pi / N * q); } static void twiddles_16k(double two_pi, float* out) { double N = 16384; int q; out = twiddles_base_32(two_pi, out, 0); out = twiddles_step_32(two_pi, out, two_pi / N * 16); out = twiddles_step_16(two_pi, out, two_pi / N * GPU_FFT_QPUS); for (q = 0; q < GPU_FFT_QPUS; q++) out = twiddles_base_16(two_pi, out, two_pi / N * q); } static void twiddles_32k(double two_pi, float* out) { double N = 32768; int q; out = twiddles_base_32(two_pi, out, 0); out = twiddles_step_32(two_pi, out, two_pi / N * 32); out = twiddles_step_32(two_pi, out, two_pi / N * GPU_FFT_QPUS); for (q = 0; q < GPU_FFT_QPUS; q++) out = twiddles_base_32(two_pi, out, two_pi / N * q); } static void twiddles_64k(double two_pi, float* out) { double N = 65536; int q; out = twiddles_base_64(two_pi, out); out = twiddles_step_32(two_pi, out, two_pi / N * 32); out = twiddles_step_32(two_pi, out, two_pi / N * GPU_FFT_QPUS); for (q = 0; q < GPU_FFT_QPUS; q++) out = twiddles_base_32(two_pi, out, two_pi / N * q); } static void twiddles_128k(double two_pi, float* out) { double N = 128 * 1024; int q; out = twiddles_base_32(two_pi, out, 0); out = twiddles_step_16(two_pi, out, two_pi / N * 16 * 16); out = twiddles_step_16(two_pi, out, two_pi / N * 16); out = twiddles_step_16(two_pi, out, two_pi / N * GPU_FFT_QPUS); for (q = 0; q < GPU_FFT_QPUS; q++) out = twiddles_base_16(two_pi, out, two_pi / N * q); } static void twiddles_256k(double two_pi, float* out) { double N = 256 * 1024; int q; out = twiddles_base_32(two_pi, out, 0); out = twiddles_step_16(two_pi, out, two_pi / N * 32 * 16); out = twiddles_step_16(two_pi, out, two_pi / N * 32); out = twiddles_step_32(two_pi, out, two_pi / N * GPU_FFT_QPUS); for (q = 0; q < GPU_FFT_QPUS; q++) out = twiddles_base_32(two_pi, out, two_pi / N * q); } static void twiddles_512k(double two_pi, float* out) { double N = 512 * 1024; int q; out = twiddles_base_32(two_pi, out, 0); out = twiddles_step_16(two_pi, out, two_pi / N * 32 * 32); out = twiddles_step_32(two_pi, out, two_pi / N * 32); out = twiddles_step_32(two_pi, out, two_pi / N * GPU_FFT_QPUS); for (q = 0; q < GPU_FFT_QPUS; q++) out = twiddles_base_32(two_pi, out, two_pi / N * q); } static void twiddles_1024k(double two_pi, float* out) { double N = 1024 * 1024; int q; out = twiddles_base_32(two_pi, out, 0); out = twiddles_step_32(two_pi, out, two_pi / N * 32 * 32); out = twiddles_step_32(two_pi, out, two_pi / N * 32); out = twiddles_step_32(two_pi, out, two_pi / N * GPU_FFT_QPUS); for (q = 0; q < GPU_FFT_QPUS; q++) out = twiddles_base_32(two_pi, out, two_pi / N * q); } static void twiddles_2048k(double two_pi, float* out) { double N = 2048 * 1024; int q; out = twiddles_base_64(two_pi, out); out = twiddles_step_32(two_pi, out, two_pi / N * 32 * 32); out = twiddles_step_32(two_pi, out, two_pi / N * 32); out = twiddles_step_32(two_pi, out, two_pi / N * GPU_FFT_QPUS); for (q = 0; q < GPU_FFT_QPUS; q++) out = twiddles_base_32(two_pi, out, two_pi / N * q); } /****************************************************************************/ static struct { int passes, shared, unique; void (*twiddles)(double, float*); } shaders[] = {{2, 2, 1, twiddles_256}, {2, 3, 1, twiddles_512}, {2, 4, 2, twiddles_1k}, {2, 6, 2, twiddles_2k}, {3, 3, 1, twiddles_4k}, {3, 4, 1, twiddles_8k}, {3, 5, 1, twiddles_16k}, {3, 6, 2, twiddles_32k}, {3, 8, 2, twiddles_64k}, {4, 5, 1, twiddles_128k}, {4, 6, 2, twiddles_256k}, {4, 7, 2, twiddles_512k}, {4, 8, 2, twiddles_1024k}, {4, 10, 2, twiddles_2048k}}; int gpu_fft_twiddle_size(int log2_N, int* shared, int* unique, int* passes) { if (log2_N < 8 || log2_N > 21) return -1; *shared = shaders[log2_N - 8].shared; *unique = shaders[log2_N - 8].unique; *passes = shaders[log2_N - 8].passes; return 0; } void gpu_fft_twiddle_data(int log2_N, int direction, float* out) { shaders[log2_N - 8].twiddles((direction == GPU_FFT_FWD ? -2 : 2) * GPU_FFT_PI, out); } ================================================ FILE: src/hello_fft/hex/shader_1024k.hex ================================================ 0x00000014, 0xe0021227, // mov rb_STAGES, STAGES 0x00000010, 0xe00216e7, // mov rb_0x10, 0x10 0x00000040, 0xe0021727, // mov rb_0x40, 0x40 0x00000080, 0xe0021767, // mov rb_0x80, 0x80 0x000000f0, 0xe00217a7, // mov rb_0xF0, 0xF0 0x00000100, 0xe00217e7, // mov rb_0x100, 0x100 0x55555555, 0xe0020767, // mov rx_0x55555555, 0x55555555 0x33333333, 0xe00207a7, // mov rx_0x33333333, 0x33333333 0x0f0f0f0f, 0xe00207e7, // mov rx_0x0F0F0F0F, 0x0F0F0F0F 0x00ff00ff, 0xe0021027, // mov rx_0x00FF00FF, 0x00FF00FF 0x0000ffff, 0xe00216a7, // mov rx_0x0000FFFF, 0x0000FFFF 0x80904000, 0xe0020727, // mov ra_vdw_32, vdw_setup_0(1, 16, dma_h32( 0,0)) 0x80905000, 0xe0021067, // mov rb_vdw_32, vdw_setup_0(1, 16, dma_h32(32,0)) 0x15827d80, 0x10020827, // mov r0, addr 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100202e7, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020327, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020367, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100203a7, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100203e7, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020427, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020467, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100204a7, // mov ra_tw_re+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100212e7, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021327, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021367, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100213a7, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100213e7, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021427, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021467, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100214a7, // mov rb_tw_im+off+i, r4 0x15827d80, 0x10020827, // mov r0, addr 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100204e7, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020527, // mov ra_tw_re+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100214e7, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021527, // mov rb_tw_im+off+i, r4 0x15827d80, 0x10021167, // mov rb_inst, unif 0x00101200, 0xe0020827, // mov r0, vpm_setup(1, 1, v32( 0,0)) 0x00000010, 0xe0020867, // mov r1, vpm_setup(1, 1, v32(16,0)) - vpm_setup(1, 1, v32(0,0)) 0x00000002, 0xe00208a7, // mov r2, vpm_setup(1, 1, v32( 0,2)) - vpm_setup(1, 1, v32(0,0)) 0x409c5017, 0x100049e2, // nop; mul24 r2, r2, in_inst 0xcc9e7081, 0x100246a0, // add out_0, r0, r2; v8adds r0, r0, r1 0xcc9e7081, 0x100246e0, // add out_1, r0, r2; v8adds r0, r0, r1 0xcc9e7081, 0x100250a0, // add out_2, r0, r2; v8adds r0, r0, r1 0x0c9e7080, 0x100211e7, // add out_3, r0, r2 0x000002e8, 0xf0f802a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x156a7d80, 0x10021c67, // mov vw_setup, ra_vpm_lo 0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0 0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1 0x156e7d80, 0x10021c67, // mov vw_setup, ra_vpm_hi 0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0 0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1 0x159f2fc0, 0x100009e7, // mov -, vw_wait 0x00000019, 0xe80009e7, // mov -, sacq(i+9) 0x00000001, 0xe80009e7, // mov -, srel(i+1) 0x0000001a, 0xe80009e7, // mov -, sacq(i+9) 0x00000002, 0xe80009e7, // mov -, srel(i+1) 0x0000001b, 0xe80009e7, // mov -, sacq(i+9) 0x00000003, 0xe80009e7, // mov -, srel(i+1) 0x0000001c, 0xe80009e7, // mov -, sacq(i+9) 0x00000004, 0xe80009e7, // mov -, srel(i+1) 0x0000001d, 0xe80009e7, // mov -, sacq(i+9) 0x00000005, 0xe80009e7, // mov -, srel(i+1) 0x0000001e, 0xe80009e7, // mov -, sacq(i+9) 0x00000006, 0xe80009e7, // mov -, srel(i+1) 0x0000001f, 0xe80009e7, // mov -, sacq(i+9) 0x00000007, 0xe80009e7, // mov -, srel(i+1) 0x15727d80, 0x10020827, // mov r0, ra_vdw_32 0x8c05cdf6, 0x10024061, // add ra_save_ptr, ra_save_ptr, rb_0x40; mov r1, ra_save_ptr 0x00000080, 0xe00208a7, // mov r2, vdw_setup_0(1, 16, dma_h32(1,0)) - vdw_setup_0(1, 16, dma_h32(0,0)) 0x00040000, 0xe00208e7, // mov r3, PASS32_STRIDE 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x00000050, 0xf0f812a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x156a7d80, 0x10021c67, // mov vw_setup, ra_vpm_lo 0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0 0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1 0x156e7d80, 0x10021c67, // mov vw_setup, ra_vpm_hi 0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0 0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1 0x00000000, 0xf0f4c9e7, // bra -, ra_sync 0x009e7000, 0x100009e7, // nop 0x156a7d80, 0x10020c67, // mov vr_setup, ra_vpm_lo 0x15c27d80, 0x100009e7, // mov -, vpm 0x00000080, 0xf0f801a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x159f2fc0, 0x100009e7, // mov -, vw_wait 0x00000019, 0xe80009e7, // mov -, sacq(i+9) 0x00000001, 0xe80009e7, // mov -, srel(i+1) 0x0000001a, 0xe80009e7, // mov -, sacq(i+9) 0x00000002, 0xe80009e7, // mov -, srel(i+1) 0x0000001b, 0xe80009e7, // mov -, sacq(i+9) 0x00000003, 0xe80009e7, // mov -, srel(i+1) 0x0000001c, 0xe80009e7, // mov -, sacq(i+9) 0x00000004, 0xe80009e7, // mov -, srel(i+1) 0x0000001d, 0xe80009e7, // mov -, sacq(i+9) 0x00000005, 0xe80009e7, // mov -, srel(i+1) 0x0000001e, 0xe80009e7, // mov -, sacq(i+9) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x00000006, 0xe80009e7, // mov -, srel(i+1) 0x0000001f, 0xe80009e7, // mov -, sacq(i+9) 0x00000007, 0xe80009e7, // mov -, srel(i+1) 0x000005d8, 0xf0f811a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x00000009, 0xe80009e7, // mov -, srel(i+9) 0x00000011, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000a, 0xe80009e7, // mov -, srel(i+9) 0x00000012, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000b, 0xe80009e7, // mov -, srel(i+9) 0x00000013, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000c, 0xe80009e7, // mov -, srel(i+9) 0x00000014, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000d, 0xe80009e7, // mov -, srel(i+9) 0x00000015, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000e, 0xe80009e7, // mov -, srel(i+9) 0x00000016, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000f, 0xe80009e7, // mov -, srel(i+9) 0x00000017, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0xfffffd78, 0xf0f80027, // brr ra_link_0, call 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1 0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x14767180, 0x10020867, // and r1, r0, mask 0x0e9c11c0, 0xd0020827, // shr r0, r0, shift 0x14767180, 0x10020827, // and r0, r0, mask 0x119c13c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147a7180, 0x10020867, // and r1, r0, mask 0x0e9c21c0, 0xd0020827, // shr r0, r0, shift 0x147a7180, 0x10020827, // and r0, r0, mask 0x119c23c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147e7180, 0x10020867, // and r1, r0, mask 0x0e9c41c0, 0xd0020827, // shr r0, r0, shift 0x147e7180, 0x10020827, // and r0, r0, mask 0x119c43c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x149c01c0, 0x10020867, // and r1, r0, mask 0x0e9c81c0, 0xd0020827, // shr r0, r0, shift 0x149c01c0, 0x10020827, // and r0, r0, mask 0x119c83c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x149da1c0, 0x10020867, // and r1, r0, mask 0x0e9db1c0, 0x10020827, // shr r0, r0, shift 0x149da1c0, 0x10020827, // and r0, r0, mask 0x119db3c0, 0x10020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x0e9c91c0, 0xd0020827, // shr r0, r0, 32-STAGES-3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0xfffffc30, 0xf0f80027, // brr ra_link_0, call 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x20567006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE 0x209d500f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE 0x2056700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE 0x22095c87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE 0x019e72c0, 0x10020867, // fadd r1, r1, r3 0x00000000, 0xf0f549e7, // bra -, ra_save_32 0x95682ff6, 0x10024682, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo 0x956c7ff6, 0x100246c7, // mov ra_vpm_hi, rb_vpm_hi; mov rb_vpm_hi, ra_vpm_hi 0x95701ff6, 0x10024701, // mov ra_vdw_32, rb_vdw_32; mov rb_vdw_32, ra_vdw_32 0x8c15edf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffba0, 0xf0f80027, // brr ra_link_0, call 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1 0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffb50, 0xf0f80027, // brr ra_link_0, call 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0x20567006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE 0x209d500f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE 0x2056700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE 0x22095c87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE 0x019e72c0, 0x10020867, // fadd r1, r1, r3 0x00000000, 0xf0f549e7, // bra -, ra_save_32 0x95682ff6, 0x10024682, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo 0x956c7ff6, 0x100246c7, // mov ra_vpm_hi, rb_vpm_hi; mov rb_vpm_hi, ra_vpm_hi 0x95701ff6, 0x10024701, // mov ra_vdw_32, rb_vdw_32; mov rb_vdw_32, ra_vdw_32 0x159c5fc0, 0x10022827, // mov.setf r0, rb_inst 0x0d9c11c0, 0xd0020827, // sub r0, r0, 1 0x119c51c0, 0xd0020827, // shl r0, r0, 5 0x0c9c6e00, 0x100601a7, // add.ifnz ra_sync, rx_sync_slave, r0 0x159cafc0, 0x100602a7, // mov.ifnz ra_save_32, rx_save_slave_32 0x15827d80, 0x100220e7, // mov.setf ra_addr_x, unif 0x15827d80, 0x100210e7, // mov rb_addr_y, unif 0x000008d0, 0xf00809e7, // brr.allz -, r:end 0x952cbdbf, 0x10024555, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32 0x9530cdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c662, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d663, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c622, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d623, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c5e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d5e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c51c0, 0xd0020827, // shl r0, r0, m 0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num 0x00000000, 0xe00201e7, // mov ra_points, 0 0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y 0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x14767180, 0x10020867, // and r1, r0, mask 0x0e9c11c0, 0xd0020827, // shr r0, r0, shift 0x14767180, 0x10020827, // and r0, r0, mask 0x119c13c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147a7180, 0x10020867, // and r1, r0, mask 0x0e9c21c0, 0xd0020827, // shr r0, r0, shift 0x147a7180, 0x10020827, // and r0, r0, mask 0x119c23c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147e7180, 0x10020867, // and r1, r0, mask 0x0e9c41c0, 0xd0020827, // shr r0, r0, shift 0x147e7180, 0x10020827, // and r0, r0, mask 0x119c43c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x149c01c0, 0x10020867, // and r1, r0, mask 0x0e9c81c0, 0xd0020827, // shr r0, r0, shift 0x149c01c0, 0x10020827, // and r0, r0, mask 0x119c83c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x149da1c0, 0x10020867, // and r1, r0, mask 0x0e9db1c0, 0x10020827, // shr r0, r0, shift 0x149da1c0, 0x10020827, // and r0, r0, mask 0x119db3c0, 0x10020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x0e9c91c0, 0xd0020827, // shr r0, r0, 32-STAGES-3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffa50, 0xf0f80227, // brr ra_link_1, r:pass_1 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100 0x0e1c8dc0, 0x100229e7, // shr.setf -, ra_points, rb_STAGES 0xfffffa28, 0xf00809e7, // brr.allz -, r:pass_1 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100 0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync 0x009e7000, 0x100009e7, // nop 0x009e7000, 0xa00009e7, // ldtmu0 0x009e7000, 0xa00009e7, // ldtmu0 0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y 0x952cbdbf, 0x10024555, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32 0x9530cdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c662, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d663, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c622, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d623, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c5e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d5e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c51c0, 0xd0020827, // shl r0, r0, m 0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num 0x00000000, 0xe00201e7, // mov ra_points, 0 0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y 0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffbf0, 0xf0f80227, // brr ra_link_1, r:pass_2 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100 0x00007fff, 0xe0020827, // mov r0, 0x7FFF 0x141e7c00, 0x100229e7, // and.setf -, ra_points, r0 0xfffffbc0, 0xf01809e7, // brr.allnz -, r:pass_2 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dfdc0, 0x100601e7, // add.ifnz ra_points, ra_points, rb_0x100 0x95555dbf, 0x100248a3, // mov r2, ra_tw_re+TW32_ACTIVE; mov r3, rb_tw_im+TW32_ACTIVE 0x20367016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw32 0x209cd017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw32 0x209cd01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw32 0x2136709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw32 0x029e7640, 0x100208e7, // fsub r3, r3, r1 0x02567c80, 0x10020567, // fsub ra_tw_re+TW32_ACTIVE, ra_tw_re+TW32_ACTIVE, r2 0x029d5ec0, 0x10021567, // fsub rb_tw_im+TW32_ACTIVE, rb_tw_im+TW32_ACTIVE, r3 0x95659dbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3 0x203a7016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16 0x209ce017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16 0x209ce01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16 0x213a709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16 0x029e7640, 0x100208e7, // fsub r3, r3, r1 0x02667c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2 0x029d9ec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c662, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d663, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c622, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d623, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c5e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d5e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x0e1c8dc0, 0x100229e7, // shr.setf -, ra_points, rb_STAGES 0xfffffab8, 0xf00809e7, // brr.allz -, r:pass_2 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100 0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync 0x009e7000, 0x100009e7, // nop 0x009e7000, 0xa00009e7, // ldtmu0 0x009e7000, 0xa00009e7, // ldtmu0 0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y 0x952cbdbf, 0x10024555, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32 0x9530cdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c662, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d663, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c622, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d623, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c5e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d5e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c51c0, 0xd0020827, // shl r0, r0, m 0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num 0x00000000, 0xe00201e7, // mov ra_points, 0 0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y 0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffff9b0, 0xf0f80227, // brr ra_link_1, r:pass_3 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100 0xfffff990, 0xf0f80227, // brr ra_link_1, r:pass_3 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100 0xfffff970, 0xf0f80227, // brr ra_link_1, r:pass_3 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100 0xfffff950, 0xf0f80227, // brr ra_link_1, r:pass_3 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100 0x95555dbf, 0x100248a3, // mov r2, ra_tw_re+TW32_ACTIVE; mov r3, rb_tw_im+TW32_ACTIVE 0x203e7016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw32 0x209cf017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw32 0x209cf01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw32 0x213e709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw32 0x029e7640, 0x100208e7, // fsub r3, r3, r1 0x02567c80, 0x10020567, // fsub ra_tw_re+TW32_ACTIVE, ra_tw_re+TW32_ACTIVE, r2 0x029d5ec0, 0x10021567, // fsub rb_tw_im+TW32_ACTIVE, rb_tw_im+TW32_ACTIVE, r3 0x95659dbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3 0x20427016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16 0x209d0017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16 0x209d001f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16 0x2142709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16 0x029e7640, 0x100208e7, // fsub r3, r3, r1 0x02667c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2 0x029d9ec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c662, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d663, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c622, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d623, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c5e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d5e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x0e1c8dc0, 0x100229e7, // shr.setf -, ra_points, rb_STAGES 0xfffff848, 0xf00809e7, // brr.allz -, r:pass_3 0x00000060, 0xe0020827, // mov r0, 3*4*8 0x0d227c00, 0x10020227, // sub ra_link_1, ra_link_1, r0 0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100 0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync 0x009e7000, 0x100009e7, // nop 0x009e7000, 0xa00009e7, // ldtmu0 0x009e7000, 0xa00009e7, // ldtmu0 0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y 0x954d3dbf, 0x10024555, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32 0x95514dbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c662, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d663, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c622, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d623, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c5e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d5e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c51c0, 0xd0020827, // shl r0, r0, m 0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num 0x00000000, 0xe00201e7, // mov ra_points, 0 0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y 0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffff740, 0xf0f80227, // brr ra_link_1, r:pass_4 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100 0x95555dbf, 0x100248a3, // mov r2, ra_tw_re+TW32_ACTIVE; mov r3, rb_tw_im+TW32_ACTIVE 0x20467016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw32 0x209d1017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw32 0x209d101f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw32 0x2146709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw32 0x029e7640, 0x100208e7, // fsub r3, r3, r1 0x02567c80, 0x10020567, // fsub ra_tw_re+TW32_ACTIVE, ra_tw_re+TW32_ACTIVE, r2 0x029d5ec0, 0x10021567, // fsub rb_tw_im+TW32_ACTIVE, rb_tw_im+TW32_ACTIVE, r3 0x95659dbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3 0x204a7016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16 0x209d2017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16 0x209d201f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16 0x214a709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16 0x029e7640, 0x100208e7, // fsub r3, r3, r1 0x02667c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2 0x029d9ec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c662, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d663, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c622, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d623, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c5e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d5e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x0e1c8dc0, 0x100229e7, // shr.setf -, ra_points, rb_STAGES 0xfffff638, 0xf00809e7, // brr.allz -, r:pass_4 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100 0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync 0x009e7000, 0x100009e7, // nop 0x009e7000, 0xa00009e7, // ldtmu0 0x009e7000, 0xa00009e7, // ldtmu0 0xfffff700, 0xf0f809e7, // brr -, r:loop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x159c3fc0, 0x100209a7, // mov interrupt, flag 0x009e7000, 0x300009e7, // nop; nop; thrend 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop ================================================ FILE: src/hello_fft/hex/shader_128k.hex ================================================ 0x00000011, 0xe0021227, // mov rb_STAGES, STAGES 0x00000010, 0xe00216a7, // mov rb_0x10, 0x10 0x00000040, 0xe00216e7, // mov rb_0x40, 0x40 0x00000080, 0xe0021727, // mov rb_0x80, 0x80 0x000000f0, 0xe0021767, // mov rb_0xF0, 0xF0 0x00000100, 0xe00217a7, // mov rb_0x100, 0x100 0x00000fff, 0xe00217e7, // mov rb_0xFFF, 0xFFF 0x55555555, 0xe0020767, // mov rx_0x55555555, 0x55555555 0x33333333, 0xe00207a7, // mov rx_0x33333333, 0x33333333 0x0f0f0f0f, 0xe00207e7, // mov rx_0x0F0F0F0F, 0x0F0F0F0F 0x00ff00ff, 0xe0021627, // mov rx_0x00FF00FF, 0x00FF00FF 0x0000ffff, 0xe0021667, // mov rx_0x0000FFFF, 0x0000FFFF 0x88104000, 0xe00206e7, // mov ra_vdw_16, vdw_setup_0(16, 16, dma_h32( 0,0)) 0x88105000, 0xe0021027, // mov rb_vdw_16, vdw_setup_0(16, 16, dma_h32(32,0)) 0x90104000, 0xe0020727, // mov ra_vdw_32, vdw_setup_0(32, 16, dma_h32( 0,0)) 0x90105000, 0xe0021067, // mov rb_vdw_32, vdw_setup_0(32, 16, dma_h32(32,0)) 0x15827d80, 0x10020827, // mov r0, addr 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dc1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100202e7, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dc1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020327, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dc1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020367, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dc1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100203a7, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dc1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100203e7, // mov ra_tw_re+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dc3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100212e7, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dc3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021327, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dc3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021367, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dc3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100213a7, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dc3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100213e7, // mov rb_tw_im+off+i, r4 0x15827d80, 0x10020827, // mov r0, addr 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dc1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020427, // mov ra_tw_re+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dc3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021427, // mov rb_tw_im+off+i, r4 0x15827d80, 0x10021167, // mov rb_inst, unif 0x00101200, 0xe0020827, // mov r0, vpm_setup(1, 1, v32( 0,0)) 0x00000010, 0xe0020867, // mov r1, vpm_setup(1, 1, v32(16,0)) - vpm_setup(1, 1, v32(0,0)) 0x00000002, 0xe00208a7, // mov r2, vpm_setup(1, 1, v32( 0,2)) - vpm_setup(1, 1, v32(0,0)) 0x409c5017, 0x100049e2, // nop; mul24 r2, r2, in_inst 0xcc9e7081, 0x10024660, // add out_0, r0, r2; v8adds r0, r0, r1 0xcc9e7081, 0x100246a0, // add out_1, r0, r2; v8adds r0, r0, r1 0xcc9e7081, 0x100250a0, // add out_2, r0, r2; v8adds r0, r0, r1 0x0c9e7080, 0x100211e7, // add out_3, r0, r2 0x000000b0, 0xf0f80127, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x15667d80, 0x10021c67, // mov vw_setup, arg 0x159e7000, 0x10020c27, // mov vpm, r0 0x159e7240, 0x10020c27, // mov vpm, r1 0x159f2fc0, 0x100009e7, // mov -, vw_wait 0x00000019, 0xe80009e7, // mov -, sacq(i+9) 0x00000001, 0xe80009e7, // mov -, srel(i+1) 0x0000001a, 0xe80009e7, // mov -, sacq(i+9) 0x00000002, 0xe80009e7, // mov -, srel(i+1) 0x0000001b, 0xe80009e7, // mov -, sacq(i+9) 0x00000003, 0xe80009e7, // mov -, srel(i+1) 0x0000001c, 0xe80009e7, // mov -, sacq(i+9) 0x00000004, 0xe80009e7, // mov -, srel(i+1) 0x0000001d, 0xe80009e7, // mov -, sacq(i+9) 0x00000005, 0xe80009e7, // mov -, srel(i+1) 0x0000001e, 0xe80009e7, // mov -, sacq(i+9) 0x00000006, 0xe80009e7, // mov -, srel(i+1) 0x0000001f, 0xe80009e7, // mov -, sacq(i+9) 0x00000007, 0xe80009e7, // mov -, srel(i+1) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x156e7d80, 0x10021c67, // mov vw_setup, arg_vdw 0xc000ffc0, 0xe0021c67, // mov vw_setup, vdw_setup_1(0) + PASS16_STRIDE-16*4 0x8c05bdf6, 0x10024072, // add ra_save_ptr, ra_save_ptr, rb_0x40; mov vw_addr, ra_save_ptr 0x00000038, 0xf0f81127, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x15667d80, 0x10021c67, // mov vw_setup, arg 0x159e7000, 0x10020c27, // mov vpm, r0 0x159e7240, 0x10020c27, // mov vpm, r1 0x00000000, 0xf0f4c9e7, // bra -, ra_sync 0x009e7000, 0x100009e7, // nop 0x15667d80, 0x10020c67, // mov vr_setup, arg_vpm 0x15c27d80, 0x100009e7, // mov -, vpm 0x000000c8, 0xf0f802a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x15667d80, 0x10021c67, // mov vw_setup, ra_vpm_lo 0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0 0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1 0x156a7d80, 0x10021c67, // mov vw_setup, ra_vpm_hi 0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0 0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1 0x159f2fc0, 0x100009e7, // mov -, vw_wait 0x00000019, 0xe80009e7, // mov -, sacq(i+9) 0x00000001, 0xe80009e7, // mov -, srel(i+1) 0x0000001a, 0xe80009e7, // mov -, sacq(i+9) 0x00000002, 0xe80009e7, // mov -, srel(i+1) 0x0000001b, 0xe80009e7, // mov -, sacq(i+9) 0x00000003, 0xe80009e7, // mov -, srel(i+1) 0x0000001c, 0xe80009e7, // mov -, sacq(i+9) 0x00000004, 0xe80009e7, // mov -, srel(i+1) 0x0000001d, 0xe80009e7, // mov -, sacq(i+9) 0x00000005, 0xe80009e7, // mov -, srel(i+1) 0x0000001e, 0xe80009e7, // mov -, sacq(i+9) 0x00000006, 0xe80009e7, // mov -, srel(i+1) 0x0000001f, 0xe80009e7, // mov -, sacq(i+9) 0x00000007, 0xe80009e7, // mov -, srel(i+1) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x15727d80, 0x10021c67, // mov vw_setup, ra_vdw_32 0xc0007fc0, 0xe0021c67, // mov vw_setup, vdw_setup_1(0) + PASS32_STRIDE-16*4 0x8c05bdf6, 0x10024072, // add ra_save_ptr, ra_save_ptr, rb_0x40; mov vw_addr, ra_save_ptr 0x00000050, 0xf0f812a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x15667d80, 0x10021c67, // mov vw_setup, ra_vpm_lo 0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0 0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1 0x156a7d80, 0x10021c67, // mov vw_setup, ra_vpm_hi 0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0 0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1 0x00000000, 0xf0f4c9e7, // bra -, ra_sync 0x009e7000, 0x100009e7, // nop 0x15667d80, 0x10020c67, // mov vr_setup, ra_vpm_lo 0x15c27d80, 0x100009e7, // mov -, vpm 0x00000080, 0xf0f801a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x159f2fc0, 0x100009e7, // mov -, vw_wait 0x00000019, 0xe80009e7, // mov -, sacq(i+9) 0x00000001, 0xe80009e7, // mov -, srel(i+1) 0x0000001a, 0xe80009e7, // mov -, sacq(i+9) 0x00000002, 0xe80009e7, // mov -, srel(i+1) 0x0000001b, 0xe80009e7, // mov -, sacq(i+9) 0x00000003, 0xe80009e7, // mov -, srel(i+1) 0x0000001c, 0xe80009e7, // mov -, sacq(i+9) 0x00000004, 0xe80009e7, // mov -, srel(i+1) 0x0000001d, 0xe80009e7, // mov -, sacq(i+9) 0x00000005, 0xe80009e7, // mov -, srel(i+1) 0x0000001e, 0xe80009e7, // mov -, sacq(i+9) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x00000006, 0xe80009e7, // mov -, srel(i+1) 0x0000001f, 0xe80009e7, // mov -, sacq(i+9) 0x00000007, 0xe80009e7, // mov -, srel(i+1) 0x00000560, 0xf0f811a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x00000009, 0xe80009e7, // mov -, srel(i+9) 0x00000011, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000a, 0xe80009e7, // mov -, srel(i+9) 0x00000012, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000b, 0xe80009e7, // mov -, srel(i+9) 0x00000013, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000c, 0xe80009e7, // mov -, srel(i+9) 0x00000014, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000d, 0xe80009e7, // mov -, srel(i+9) 0x00000015, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000e, 0xe80009e7, // mov -, srel(i+9) 0x00000016, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000f, 0xe80009e7, // mov -, srel(i+9) 0x00000017, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0xfffffd78, 0xf0f80027, // brr ra_link_0, call 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1 0x8c15adf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x14767180, 0x10020867, // and r1, r0, mask 0x0e9c11c0, 0xd0020827, // shr r0, r0, shift 0x14767180, 0x10020827, // and r0, r0, mask 0x119c13c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147a7180, 0x10020867, // and r1, r0, mask 0x0e9c21c0, 0xd0020827, // shr r0, r0, shift 0x147a7180, 0x10020827, // and r0, r0, mask 0x119c23c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147e7180, 0x10020867, // and r1, r0, mask 0x0e9c41c0, 0xd0020827, // shr r0, r0, shift 0x147e7180, 0x10020827, // and r0, r0, mask 0x119c43c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x149d81c0, 0x10020867, // and r1, r0, mask 0x0e9c81c0, 0xd0020827, // shr r0, r0, shift 0x149d81c0, 0x10020827, // and r0, r0, mask 0x119c83c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x149d91c0, 0x10020867, // and r1, r0, mask 0x0e9da1c0, 0x10020827, // shr r0, r0, shift 0x149d91c0, 0x10020827, // and r0, r0, mask 0x119da3c0, 0x10020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x0e9cc1c0, 0xd0020827, // shr r0, r0, 32-STAGES-3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0xfffffc30, 0xf0f80027, // brr ra_link_0, call 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x20467006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE 0x209d100f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE 0x2046700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE 0x22091c87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE 0x019e72c0, 0x10020867, // fadd r1, r1, r3 0x00000000, 0xf0f549e7, // bra -, ra_save_32 0x95642ff6, 0x10024642, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo 0x95687ff6, 0x10024687, // mov ra_vpm_hi, rb_vpm_hi; mov rb_vpm_hi, ra_vpm_hi 0x95701ff6, 0x10024701, // mov ra_vdw_32, rb_vdw_32; mov rb_vdw_32, ra_vdw_32 0x8c15cdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffba0, 0xf0f80027, // brr ra_link_0, call 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0x00000000, 0xf0f489e7, // bra -, ra_save_16 0x009e7000, 0x100009e7, // nop 0x95642ff6, 0x10024642, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo 0x956c0ff6, 0x100246c0, // mov ra_vdw_16, rb_vdw_16; mov rb_vdw_16, ra_vdw_16 0x159c5fc0, 0x10022827, // mov.setf r0, rb_inst 0x0d9c11c0, 0xd0020827, // sub r0, r0, 1 0x119c51c0, 0xd0020827, // shl r0, r0, 5 0x0c9c6e00, 0x100601a7, // add.ifnz ra_sync, rx_sync_slave, r0 0x159c4fc0, 0x10060127, // mov.ifnz ra_save_16, rx_save_slave_16 0x159cafc0, 0x100602a7, // mov.ifnz ra_save_32, rx_save_slave_32 0x15827d80, 0x100220e7, // mov.setf ra_addr_x, unif 0x15827d80, 0x100210e7, // mov rb_addr_y, unif 0x000007b0, 0xf00809e7, // brr.allz -, r:end 0x952cbdbf, 0x10024451, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32 0x9530cdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c51c0, 0xd0020827, // shl r0, r0, m 0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num 0x00000000, 0xe00201e7, // mov ra_points, 0 0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y 0x8c15adf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x14767180, 0x10020867, // and r1, r0, mask 0x0e9c11c0, 0xd0020827, // shr r0, r0, shift 0x14767180, 0x10020827, // and r0, r0, mask 0x119c13c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147a7180, 0x10020867, // and r1, r0, mask 0x0e9c21c0, 0xd0020827, // shr r0, r0, shift 0x147a7180, 0x10020827, // and r0, r0, mask 0x119c23c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147e7180, 0x10020867, // and r1, r0, mask 0x0e9c41c0, 0xd0020827, // shr r0, r0, shift 0x147e7180, 0x10020827, // and r0, r0, mask 0x119c43c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x149d81c0, 0x10020867, // and r1, r0, mask 0x0e9c81c0, 0xd0020827, // shr r0, r0, shift 0x149d81c0, 0x10020827, // and r0, r0, mask 0x119c83c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x149d91c0, 0x10020867, // and r1, r0, mask 0x0e9da1c0, 0x10020827, // shr r0, r0, shift 0x149d91c0, 0x10020827, // and r0, r0, mask 0x119da3c0, 0x10020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x0e9cc1c0, 0xd0020827, // shr r0, r0, 32-STAGES-3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffac0, 0xf0f80227, // brr ra_link_1, r:pass_1 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dedc0, 0x100201e7, // add ra_points, ra_points, rb_0x100 0x0e1c8dc0, 0x100229e7, // shr.setf -, ra_points, rb_STAGES 0xfffffa98, 0xf00809e7, // brr.allz -, r:pass_1 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dedc0, 0x100201e7, // add ra_points, ra_points, rb_0x100 0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync 0x009e7000, 0x100009e7, // nop 0x009e7000, 0xa00009e7, // ldtmu0 0x009e7000, 0xa00009e7, // ldtmu0 0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y 0x9530cdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c41c0, 0xd0020827, // shl r0, r0, m 0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num 0x00000000, 0xe00201e7, // mov ra_points, 0 0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y 0x8c15cdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffc68, 0xf0f80227, // brr ra_link_1, r:pass_2 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dcdc0, 0x100201e7, // add ra_points, ra_points, rb_0x80 0x141dfdc0, 0x100229e7, // and.setf -, ra_points, rb_0xFFF 0xfffffc40, 0xf01809e7, // brr.allnz -, r:pass_2 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dcdc0, 0x100601e7, // add.ifnz ra_points, ra_points, rb_0x80 0x95555dbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3 0x20367016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16 0x209cd017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16 0x209cd01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16 0x2136709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16 0x029e7640, 0x100208e7, // fsub r3, r3, r1 0x02567c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2 0x029d5ec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x0e1c8dc0, 0x100229e7, // shr.setf -, ra_points, rb_STAGES 0xfffffb78, 0xf00809e7, // brr.allz -, r:pass_2 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dcdc0, 0x100201e7, // add ra_points, ra_points, rb_0x80 0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync 0x009e7000, 0x100009e7, // nop 0x009e7000, 0xa00009e7, // ldtmu0 0x009e7000, 0xa00009e7, // ldtmu0 0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y 0x9530cdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c41c0, 0xd0020827, // shl r0, r0, m 0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num 0x00000000, 0xe00201e7, // mov ra_points, 0 0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y 0x8c15cdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffa78, 0xf0f80227, // brr ra_link_1, r:pass_3 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dcdc0, 0x100201e7, // add ra_points, ra_points, rb_0x80 0xfffffa58, 0xf0f80227, // brr ra_link_1, r:pass_3 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dcdc0, 0x100201e7, // add ra_points, ra_points, rb_0x80 0x95555dbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3 0x203a7016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16 0x209ce017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16 0x209ce01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16 0x213a709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16 0x029e7640, 0x100208e7, // fsub r3, r3, r1 0x02567c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2 0x029d5ec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x0e1c8dc0, 0x100229e7, // shr.setf -, ra_points, rb_STAGES 0xfffff990, 0xf00809e7, // brr.allz -, r:pass_3 0x00000020, 0xe0020827, // mov r0, 4*8 0x0d227c00, 0x10020227, // sub ra_link_1, ra_link_1, r0 0x0c1dcdc0, 0x100201e7, // add ra_points, ra_points, rb_0x80 0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync 0x009e7000, 0x100009e7, // nop 0x009e7000, 0xa00009e7, // ldtmu0 0x009e7000, 0xa00009e7, // ldtmu0 0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y 0x95410dbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c41c0, 0xd0020827, // shl r0, r0, m 0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num 0x00000000, 0xe00201e7, // mov ra_points, 0 0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y 0x8c15cdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffff890, 0xf0f80227, // brr ra_link_1, r:pass_4 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dcdc0, 0x100201e7, // add ra_points, ra_points, rb_0x80 0x95555dbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3 0x203e7016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16 0x209cf017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16 0x209cf01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16 0x213e709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16 0x029e7640, 0x100208e7, // fsub r3, r3, r1 0x02567c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2 0x029d5ec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x0e1c8dc0, 0x100229e7, // shr.setf -, ra_points, rb_STAGES 0xfffff7c8, 0xf00809e7, // brr.allz -, r:pass_4 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dcdc0, 0x100201e7, // add ra_points, ra_points, rb_0x80 0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync 0x009e7000, 0x100009e7, // nop 0x009e7000, 0xa00009e7, // ldtmu0 0x009e7000, 0xa00009e7, // ldtmu0 0xfffff820, 0xf0f809e7, // brr -, r:loop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x159c3fc0, 0x100209a7, // mov interrupt, flag 0x009e7000, 0x300009e7, // nop; nop; thrend 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop ================================================ FILE: src/hello_fft/hex/shader_16k.hex ================================================ 0x00000010, 0xe00216e7, // mov rb_0x10, 0x10 0x00000040, 0xe0021727, // mov rb_0x40, 0x40 0x00000080, 0xe0021767, // mov rb_0x80, 0x80 0x000000f0, 0xe00217a7, // mov rb_0xF0, 0xF0 0x00000100, 0xe00217e7, // mov rb_0x100, 0x100 0x00005555, 0xe0020767, // mov rx_0x5555, 0x5555 0x00003333, 0xe00207a7, // mov rx_0x3333, 0x3333 0x00000f0f, 0xe00207e7, // mov rx_0x0F0F, 0x0F0F 0x000000ff, 0xe00216a7, // mov rx_0x00FF, 0x00FF 0x88104000, 0xe00206e7, // mov ra_vdw_16, vdw_setup_0(16, 16, dma_h32( 0,0)) 0x88105000, 0xe0021027, // mov rb_vdw_16, vdw_setup_0(16, 16, dma_h32(32,0)) 0x90104000, 0xe0020727, // mov ra_vdw_32, vdw_setup_0(32, 16, dma_h32( 0,0)) 0x90105000, 0xe0021067, // mov rb_vdw_32, vdw_setup_0(32, 16, dma_h32(32,0)) 0x15827d80, 0x10020827, // mov r0, addr 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100202e7, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020327, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020367, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100203a7, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100203e7, // mov ra_tw_re+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100212e7, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021327, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021367, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100213a7, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100213e7, // mov rb_tw_im+off+i, r4 0x15827d80, 0x10020827, // mov r0, addr 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020427, // mov ra_tw_re+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021427, // mov rb_tw_im+off+i, r4 0x15827d80, 0x10021167, // mov rb_inst, unif 0x00101200, 0xe0020827, // mov r0, vpm_setup(1, 1, v32( 0,0)) 0x00000010, 0xe0020867, // mov r1, vpm_setup(1, 1, v32(16,0)) - vpm_setup(1, 1, v32(0,0)) 0x00000002, 0xe00208a7, // mov r2, vpm_setup(1, 1, v32( 0,2)) - vpm_setup(1, 1, v32(0,0)) 0x409c5017, 0x100049e2, // nop; mul24 r2, r2, in_inst 0xcc9e7081, 0x10024660, // add out_0, r0, r2; v8adds r0, r0, r1 0xcc9e7081, 0x100246a0, // add out_1, r0, r2; v8adds r0, r0, r1 0xcc9e7081, 0x100250a0, // add out_2, r0, r2; v8adds r0, r0, r1 0x0c9e7080, 0x100211e7, // add out_3, r0, r2 0x000000b0, 0xf0f80127, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x15667d80, 0x10021c67, // mov vw_setup, arg 0x159e7000, 0x10020c27, // mov vpm, r0 0x159e7240, 0x10020c27, // mov vpm, r1 0x159f2fc0, 0x100009e7, // mov -, vw_wait 0x00000019, 0xe80009e7, // mov -, sacq(i+9) 0x00000001, 0xe80009e7, // mov -, srel(i+1) 0x0000001a, 0xe80009e7, // mov -, sacq(i+9) 0x00000002, 0xe80009e7, // mov -, srel(i+1) 0x0000001b, 0xe80009e7, // mov -, sacq(i+9) 0x00000003, 0xe80009e7, // mov -, srel(i+1) 0x0000001c, 0xe80009e7, // mov -, sacq(i+9) 0x00000004, 0xe80009e7, // mov -, srel(i+1) 0x0000001d, 0xe80009e7, // mov -, sacq(i+9) 0x00000005, 0xe80009e7, // mov -, srel(i+1) 0x0000001e, 0xe80009e7, // mov -, sacq(i+9) 0x00000006, 0xe80009e7, // mov -, srel(i+1) 0x0000001f, 0xe80009e7, // mov -, sacq(i+9) 0x00000007, 0xe80009e7, // mov -, srel(i+1) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x156e7d80, 0x10021c67, // mov vw_setup, arg_vdw 0xc0001fc0, 0xe0021c67, // mov vw_setup, vdw_setup_1(0) + PASS16_STRIDE-16*4 0x8c05cdf6, 0x10024072, // add ra_save_ptr, ra_save_ptr, rb_0x40; mov vw_addr, ra_save_ptr 0x00000038, 0xf0f81127, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x15667d80, 0x10021c67, // mov vw_setup, arg 0x159e7000, 0x10020c27, // mov vpm, r0 0x159e7240, 0x10020c27, // mov vpm, r1 0x00000000, 0xf0f4c9e7, // bra -, ra_sync 0x009e7000, 0x100009e7, // nop 0x15667d80, 0x10020c67, // mov vr_setup, arg_vpm 0x15c27d80, 0x100009e7, // mov -, vpm 0x000000c8, 0xf0f802a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x15667d80, 0x10021c67, // mov vw_setup, ra_vpm_lo 0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0 0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1 0x156a7d80, 0x10021c67, // mov vw_setup, ra_vpm_hi 0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0 0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1 0x159f2fc0, 0x100009e7, // mov -, vw_wait 0x00000019, 0xe80009e7, // mov -, sacq(i+9) 0x00000001, 0xe80009e7, // mov -, srel(i+1) 0x0000001a, 0xe80009e7, // mov -, sacq(i+9) 0x00000002, 0xe80009e7, // mov -, srel(i+1) 0x0000001b, 0xe80009e7, // mov -, sacq(i+9) 0x00000003, 0xe80009e7, // mov -, srel(i+1) 0x0000001c, 0xe80009e7, // mov -, sacq(i+9) 0x00000004, 0xe80009e7, // mov -, srel(i+1) 0x0000001d, 0xe80009e7, // mov -, sacq(i+9) 0x00000005, 0xe80009e7, // mov -, srel(i+1) 0x0000001e, 0xe80009e7, // mov -, sacq(i+9) 0x00000006, 0xe80009e7, // mov -, srel(i+1) 0x0000001f, 0xe80009e7, // mov -, sacq(i+9) 0x00000007, 0xe80009e7, // mov -, srel(i+1) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x15727d80, 0x10021c67, // mov vw_setup, ra_vdw_32 0xc0000fc0, 0xe0021c67, // mov vw_setup, vdw_setup_1(0) + PASS32_STRIDE-16*4 0x8c05cdf6, 0x10024072, // add ra_save_ptr, ra_save_ptr, rb_0x40; mov vw_addr, ra_save_ptr 0x00000050, 0xf0f812a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x15667d80, 0x10021c67, // mov vw_setup, ra_vpm_lo 0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0 0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1 0x156a7d80, 0x10021c67, // mov vw_setup, ra_vpm_hi 0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0 0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1 0x00000000, 0xf0f4c9e7, // bra -, ra_sync 0x009e7000, 0x100009e7, // nop 0x15667d80, 0x10020c67, // mov vr_setup, ra_vpm_lo 0x15c27d80, 0x100009e7, // mov -, vpm 0x00000080, 0xf0f801a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x159f2fc0, 0x100009e7, // mov -, vw_wait 0x00000019, 0xe80009e7, // mov -, sacq(i+9) 0x00000001, 0xe80009e7, // mov -, srel(i+1) 0x0000001a, 0xe80009e7, // mov -, sacq(i+9) 0x00000002, 0xe80009e7, // mov -, srel(i+1) 0x0000001b, 0xe80009e7, // mov -, sacq(i+9) 0x00000003, 0xe80009e7, // mov -, srel(i+1) 0x0000001c, 0xe80009e7, // mov -, sacq(i+9) 0x00000004, 0xe80009e7, // mov -, srel(i+1) 0x0000001d, 0xe80009e7, // mov -, sacq(i+9) 0x00000005, 0xe80009e7, // mov -, srel(i+1) 0x0000001e, 0xe80009e7, // mov -, sacq(i+9) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x00000006, 0xe80009e7, // mov -, srel(i+1) 0x0000001f, 0xe80009e7, // mov -, sacq(i+9) 0x00000007, 0xe80009e7, // mov -, srel(i+1) 0x000005f0, 0xf0f811a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x00000009, 0xe80009e7, // mov -, srel(i+9) 0x00000011, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000a, 0xe80009e7, // mov -, srel(i+9) 0x00000012, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000b, 0xe80009e7, // mov -, srel(i+9) 0x00000013, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000c, 0xe80009e7, // mov -, srel(i+9) 0x00000014, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000d, 0xe80009e7, // mov -, srel(i+9) 0x00000015, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000e, 0xe80009e7, // mov -, srel(i+9) 0x00000016, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000f, 0xe80009e7, // mov -, srel(i+9) 0x00000017, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0xfffffda0, 0xf0f80027, // brr ra_link_0, call 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1 0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x14767180, 0x10020867, // and r1, r0, mask 0x0e9c11c0, 0xd0020827, // shr r0, r0, shift 0x14767180, 0x10020827, // and r0, r0, mask 0x119c13c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147a7180, 0x10020867, // and r1, r0, mask 0x0e9c21c0, 0xd0020827, // shr r0, r0, shift 0x147a7180, 0x10020827, // and r0, r0, mask 0x119c23c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147e7180, 0x10020867, // and r1, r0, mask 0x0e9c41c0, 0xd0020827, // shr r0, r0, shift 0x147e7180, 0x10020827, // and r0, r0, mask 0x119c43c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x149da1c0, 0x10020867, // and r1, r0, mask 0x0e9c81c0, 0xd0020827, // shr r0, r0, shift 0x149da1c0, 0x10020827, // and r0, r0, mask 0x119c83c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x119c11c0, 0xd0020827, // shl r0, r0, STAGES-13 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0xfffffc80, 0xf0f80027, // brr ra_link_0, call 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x20467006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE 0x209d100f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE 0x2046700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE 0x22091c87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE 0x019e72c0, 0x10020867, // fadd r1, r1, r3 0x00000000, 0xf0f549e7, // bra -, ra_save_32 0x95642ff6, 0x10024642, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo 0x95687ff6, 0x10024687, // mov ra_vpm_hi, rb_vpm_hi; mov rb_vpm_hi, ra_vpm_hi 0x95701ff6, 0x10024701, // mov ra_vdw_32, rb_vdw_32; mov rb_vdw_32, ra_vdw_32 0x8c15edf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffbf0, 0xf0f80027, // brr ra_link_0, call 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1 0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffba0, 0xf0f80027, // brr ra_link_0, call 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0x20467006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE 0x209d100f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE 0x2046700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE 0x22091c87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE 0x019e72c0, 0x10020867, // fadd r1, r1, r3 0x00000000, 0xf0f549e7, // bra -, ra_save_32 0x95642ff6, 0x10024642, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo 0x95687ff6, 0x10024687, // mov ra_vpm_hi, rb_vpm_hi; mov rb_vpm_hi, ra_vpm_hi 0x95701ff6, 0x10024701, // mov ra_vdw_32, rb_vdw_32; mov rb_vdw_32, ra_vdw_32 0x8c15ddf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffb10, 0xf0f80027, // brr ra_link_0, call 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0x00000000, 0xf0f489e7, // bra -, ra_save_16 0x009e7000, 0x100009e7, // nop 0x95642ff6, 0x10024642, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo 0x956c0ff6, 0x100246c0, // mov ra_vdw_16, rb_vdw_16; mov rb_vdw_16, ra_vdw_16 0x159c5fc0, 0x10022827, // mov.setf r0, rb_inst 0x0d9c11c0, 0xd0020827, // sub r0, r0, 1 0x119c51c0, 0xd0020827, // shl r0, r0, 5 0x0c9c6e00, 0x100601a7, // add.ifnz ra_sync, rx_sync_slave, r0 0x159c4fc0, 0x10060127, // mov.ifnz ra_save_16, rx_save_slave_16 0x159cafc0, 0x100602a7, // mov.ifnz ra_save_32, rx_save_slave_32 0x15827d80, 0x100220e7, // mov.setf ra_addr_x, unif 0x15827d80, 0x100210e7, // mov rb_addr_y, unif 0x000005e0, 0xf00809e7, // brr.allz -, r:end 0x952cbdbf, 0x10024451, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32 0x9530cdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c51c0, 0xd0020827, // shl r0, r0, m 0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num 0x00000000, 0xe00201e7, // mov ra_points, 0 0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y 0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x14767180, 0x10020867, // and r1, r0, mask 0x0e9c11c0, 0xd0020827, // shr r0, r0, shift 0x14767180, 0x10020827, // and r0, r0, mask 0x119c13c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147a7180, 0x10020867, // and r1, r0, mask 0x0e9c21c0, 0xd0020827, // shr r0, r0, shift 0x147a7180, 0x10020827, // and r0, r0, mask 0x119c23c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147e7180, 0x10020867, // and r1, r0, mask 0x0e9c41c0, 0xd0020827, // shr r0, r0, shift 0x147e7180, 0x10020827, // and r0, r0, mask 0x119c43c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x149da1c0, 0x10020867, // and r1, r0, mask 0x0e9c81c0, 0xd0020827, // shr r0, r0, shift 0x149da1c0, 0x10020827, // and r0, r0, mask 0x119c83c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x119c11c0, 0xd0020827, // shl r0, r0, STAGES-13 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffa58, 0xf0f80227, // brr ra_link_1, r:pass_1 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100 0x0e1cedc0, 0xd00229e7, // shr.setf -, ra_points, STAGES 0xfffffa30, 0xf00809e7, // brr.allz -, r:pass_1 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100 0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync 0x009e7000, 0x100009e7, // nop 0x009e7000, 0xa00009e7, // ldtmu0 0x009e7000, 0xa00009e7, // ldtmu0 0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y 0x952cbdbf, 0x10024451, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32 0x9530cdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c51c0, 0xd0020827, // shl r0, r0, m 0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num 0x00000000, 0xe00201e7, // mov ra_points, 0 0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y 0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffba8, 0xf0f80227, // brr ra_link_1, r:pass_2 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100 0xfffffb88, 0xf0f80227, // brr ra_link_1, r:pass_2 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100 0x95451dbf, 0x100248a3, // mov r2, ra_tw_re+TW32_ACTIVE; mov r3, rb_tw_im+TW32_ACTIVE 0x20367016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw32 0x209cd017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw32 0x209cd01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw32 0x2136709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw32 0x029e7640, 0x100208e7, // fsub r3, r3, r1 0x02467c80, 0x10020467, // fsub ra_tw_re+TW32_ACTIVE, ra_tw_re+TW32_ACTIVE, r2 0x029d1ec0, 0x10021467, // fsub rb_tw_im+TW32_ACTIVE, rb_tw_im+TW32_ACTIVE, r3 0x95555dbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3 0x203a7016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16 0x209ce017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16 0x209ce01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16 0x213a709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16 0x029e7640, 0x100208e7, // fsub r3, r3, r1 0x02567c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2 0x029d5ec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x0e1cedc0, 0xd00229e7, // shr.setf -, ra_points, STAGES 0xfffffa80, 0xf00809e7, // brr.allz -, r:pass_2 0x00000020, 0xe0020827, // mov r0, 4*8 0x0d227c00, 0x10020227, // sub ra_link_1, ra_link_1, r0 0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100 0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync 0x009e7000, 0x100009e7, // nop 0x009e7000, 0xa00009e7, // ldtmu0 0x009e7000, 0xa00009e7, // ldtmu0 0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y 0x95410dbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c41c0, 0xd0020827, // shl r0, r0, m 0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num 0x00000000, 0xe00201e7, // mov ra_points, 0 0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y 0x8c15ddf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffa60, 0xf0f80227, // brr ra_link_1, r:pass_3 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dddc0, 0x100201e7, // add ra_points, ra_points, rb_0x80 0x95555dbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3 0x203e7016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16 0x209cf017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16 0x209cf01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16 0x213e709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16 0x029e7640, 0x100208e7, // fsub r3, r3, r1 0x02567c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2 0x029d5ec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x0e1cedc0, 0xd00229e7, // shr.setf -, ra_points, STAGES 0xfffff998, 0xf00809e7, // brr.allz -, r:pass_3 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dddc0, 0x100201e7, // add ra_points, ra_points, rb_0x80 0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync 0x009e7000, 0x100009e7, // nop 0x009e7000, 0xa00009e7, // ldtmu0 0x009e7000, 0xa00009e7, // ldtmu0 0xfffff9f0, 0xf0f809e7, // brr -, r:loop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x159c3fc0, 0x100209a7, // mov interrupt, flag 0x009e7000, 0x300009e7, // nop; nop; thrend 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop ================================================ FILE: src/hello_fft/hex/shader_1k.hex ================================================ 0x00000010, 0xe00216e7, // mov rb_0x10, 0x10 0x00000040, 0xe0021727, // mov rb_0x40, 0x40 0x000000f0, 0xe0021767, // mov rb_0xF0, 0xF0 0x00005555, 0xe00207a7, // mov rx_0x5555, 0x5555 0x00003333, 0xe00217a7, // mov rx_0x3333, 0x3333 0x00000f0f, 0xe00207e7, // mov rx_0x0F0F, 0x0F0F 0x000000ff, 0xe00217e7, // mov rx_0x00FF, 0x00FF 0x90104000, 0xe0020767, // mov ra_vdw_32, vdw_setup_0(32, 16, dma_h32( 0,0)) 0x90105000, 0xe0021067, // mov rb_vdw_32, vdw_setup_0(32, 16, dma_h32(32,0)) 0x00000080, 0xe00208e7, // mov r3, 0x80 0x15827d80, 0x10020827, // mov r0, addr 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100202a7, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100202e7, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020327, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020367, // mov ra_tw_re+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100212a7, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100212e7, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021327, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021367, // mov rb_tw_im+off+i, r4 0x15827d80, 0x10020827, // mov r0, addr 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100203a7, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100203e7, // mov ra_tw_re+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100213a7, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100213e7, // mov rb_tw_im+off+i, r4 0x15827d80, 0x10021167, // mov rb_inst, unif 0x00101200, 0xe0020827, // mov r0, vpm_setup(1, 1, v32( 0,0)) 0x00000010, 0xe0020867, // mov r1, vpm_setup(1, 1, v32(16,0)) - vpm_setup(1, 1, v32(0,0)) 0x00000002, 0xe00208a7, // mov r2, vpm_setup(1, 1, v32( 0,2)) - vpm_setup(1, 1, v32(0,0)) 0x409c5017, 0x100049e2, // nop; mul24 r2, r2, in_inst 0xcc9e7081, 0x100246e0, // add out_0, r0, r2; v8adds r0, r0, r1 0xcc9e7081, 0x10024720, // add out_1, r0, r2; v8adds r0, r0, r1 0xcc9e7081, 0x100250a0, // add out_2, r0, r2; v8adds r0, r0, r1 0x0c9e7080, 0x100211e7, // add out_3, r0, r2 0x000000c8, 0xf0f80127, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x156e7d80, 0x10021c67, // mov vw_setup, ra_vpm_lo 0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0 0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1 0x15727d80, 0x10021c67, // mov vw_setup, ra_vpm_hi 0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0 0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1 0x159f2fc0, 0x100009e7, // mov -, vw_wait 0x00000019, 0xe80009e7, // mov -, sacq(i+9) 0x00000001, 0xe80009e7, // mov -, srel(i+1) 0x0000001a, 0xe80009e7, // mov -, sacq(i+9) 0x00000002, 0xe80009e7, // mov -, srel(i+1) 0x0000001b, 0xe80009e7, // mov -, sacq(i+9) 0x00000003, 0xe80009e7, // mov -, srel(i+1) 0x0000001c, 0xe80009e7, // mov -, sacq(i+9) 0x00000004, 0xe80009e7, // mov -, srel(i+1) 0x0000001d, 0xe80009e7, // mov -, sacq(i+9) 0x00000005, 0xe80009e7, // mov -, srel(i+1) 0x0000001e, 0xe80009e7, // mov -, sacq(i+9) 0x00000006, 0xe80009e7, // mov -, srel(i+1) 0x0000001f, 0xe80009e7, // mov -, sacq(i+9) 0x00000007, 0xe80009e7, // mov -, srel(i+1) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x15767d80, 0x10021c67, // mov vw_setup, ra_vdw_32 0xc00000c0, 0xe0021c67, // mov vw_setup, vdw_setup_1(0) + PASS32_STRIDE-16*4 0x8c05cdf6, 0x10024072, // add ra_save_ptr, ra_save_ptr, rb_0x40; mov vw_addr, ra_save_ptr 0x00000050, 0xf0f81127, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x156e7d80, 0x10021c67, // mov vw_setup, ra_vpm_lo 0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0 0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1 0x15727d80, 0x10021c67, // mov vw_setup, ra_vpm_hi 0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0 0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1 0x00000000, 0xf0f4c9e7, // bra -, ra_sync 0x009e7000, 0x100009e7, // nop 0x156e7d80, 0x10020c67, // mov vr_setup, ra_vpm_lo 0x15c27d80, 0x100009e7, // mov -, vpm 0x00000080, 0xf0f801a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x159f2fc0, 0x100009e7, // mov -, vw_wait 0x00000019, 0xe80009e7, // mov -, sacq(i+9) 0x00000001, 0xe80009e7, // mov -, srel(i+1) 0x0000001a, 0xe80009e7, // mov -, sacq(i+9) 0x00000002, 0xe80009e7, // mov -, srel(i+1) 0x0000001b, 0xe80009e7, // mov -, sacq(i+9) 0x00000003, 0xe80009e7, // mov -, srel(i+1) 0x0000001c, 0xe80009e7, // mov -, sacq(i+9) 0x00000004, 0xe80009e7, // mov -, srel(i+1) 0x0000001d, 0xe80009e7, // mov -, sacq(i+9) 0x00000005, 0xe80009e7, // mov -, srel(i+1) 0x0000001e, 0xe80009e7, // mov -, sacq(i+9) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x00000006, 0xe80009e7, // mov -, srel(i+1) 0x0000001f, 0xe80009e7, // mov -, sacq(i+9) 0x00000007, 0xe80009e7, // mov -, srel(i+1) 0x00000588, 0xf0f811a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x00000009, 0xe80009e7, // mov -, srel(i+9) 0x00000011, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000a, 0xe80009e7, // mov -, srel(i+9) 0x00000012, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000b, 0xe80009e7, // mov -, srel(i+9) 0x00000013, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000c, 0xe80009e7, // mov -, srel(i+9) 0x00000014, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000d, 0xe80009e7, // mov -, srel(i+9) 0x00000015, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000e, 0xe80009e7, // mov -, srel(i+9) 0x00000016, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000f, 0xe80009e7, // mov -, srel(i+9) 0x00000017, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0xfffffda0, 0xf0f80027, // brr ra_link_0, call 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1 0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x147a7180, 0x10020867, // and r1, r0, mask 0x0e9c11c0, 0xd0020827, // shr r0, r0, shift 0x147a7180, 0x10020827, // and r0, r0, mask 0x119c13c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x149de1c0, 0x10020867, // and r1, r0, mask 0x0e9c21c0, 0xd0020827, // shr r0, r0, shift 0x149de1c0, 0x10020827, // and r0, r0, mask 0x119c23c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147e7180, 0x10020867, // and r1, r0, mask 0x0e9c41c0, 0xd0020827, // shr r0, r0, shift 0x147e7180, 0x10020827, // and r0, r0, mask 0x119c43c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x149df1c0, 0x10020867, // and r1, r0, mask 0x0e9c81c0, 0xd0020827, // shr r0, r0, shift 0x149df1c0, 0x10020827, // and r0, r0, mask 0x119c83c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x0e9c31c0, 0xd0020827, // shr r0, r0, 13-STAGES 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0xfffffc80, 0xf0f80027, // brr ra_link_0, call 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x20427006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE 0x209d000f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE 0x2042700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE 0x22090c87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE 0x019e72c0, 0x10020867, // fadd r1, r1, r3 0x00000000, 0xf0f489e7, // bra -, ra_save_32 0x956c2ff6, 0x100246c2, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo 0x95707ff6, 0x10024707, // mov ra_vpm_hi, rb_vpm_hi; mov rb_vpm_hi, ra_vpm_hi 0x95741ff6, 0x10024741, // mov ra_vdw_32, rb_vdw_32; mov rb_vdw_32, ra_vdw_32 0x8c15ddf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffbf0, 0xf0f80027, // brr ra_link_0, call 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1 0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffba0, 0xf0f80027, // brr ra_link_0, call 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0x20427006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE 0x209d000f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE 0x2042700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE 0x22090c87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE 0x019e72c0, 0x10020867, // fadd r1, r1, r3 0x00000000, 0xf0f489e7, // bra -, ra_save_32 0x956c2ff6, 0x100246c2, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo 0x95707ff6, 0x10024707, // mov ra_vpm_hi, rb_vpm_hi; mov rb_vpm_hi, ra_vpm_hi 0x95741ff6, 0x10024741, // mov ra_vdw_32, rb_vdw_32; mov rb_vdw_32, ra_vdw_32 0x159c5fc0, 0x10022827, // mov.setf r0, rb_inst 0x0d9c11c0, 0xd0020827, // sub r0, r0, 1 0x119c51c0, 0xd0020827, // shl r0, r0, 5 0x0c9c6e00, 0x100601a7, // add.ifnz ra_sync, rx_sync_slave, r0 0x159c4fc0, 0x10060127, // mov.ifnz ra_save_32, rx_save_slave_32 0x15827d80, 0x100220e7, // mov.setf ra_addr_x, unif 0x15827d80, 0x100210e7, // mov rb_addr_y, unif 0x000003f8, 0xf00809e7, // brr.allz -, r:end 0x9528adbf, 0x10024410, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32 0x952cbdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c462, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d463, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c51c0, 0xd0020827, // shl r0, r0, m 0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num 0x00000000, 0xe00201e7, // mov ra_points, 0 0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y 0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x147a7180, 0x10020867, // and r1, r0, mask 0x0e9c11c0, 0xd0020827, // shr r0, r0, shift 0x147a7180, 0x10020827, // and r0, r0, mask 0x119c13c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x149de1c0, 0x10020867, // and r1, r0, mask 0x0e9c21c0, 0xd0020827, // shr r0, r0, shift 0x149de1c0, 0x10020827, // and r0, r0, mask 0x119c23c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147e7180, 0x10020867, // and r1, r0, mask 0x0e9c41c0, 0xd0020827, // shr r0, r0, shift 0x147e7180, 0x10020827, // and r0, r0, mask 0x119c43c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x149df1c0, 0x10020867, // and r1, r0, mask 0x0e9c81c0, 0xd0020827, // shr r0, r0, shift 0x149df1c0, 0x10020827, // and r0, r0, mask 0x119c83c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x0e9c31c0, 0xd0020827, // shr r0, r0, 13-STAGES 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffac8, 0xf0f80227, // brr ra_link_1, r:pass_1 0x009e7000, 0x100009e7, // nop 0x00000100, 0xe0020827, // mov r0, 0x100 0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0 0x0e1cadc0, 0xd00229e7, // shr.setf -, ra_points, STAGES 0xfffffaa0, 0xf00809e7, // brr.allz -, r:pass_1 0x009e7000, 0x100009e7, // nop 0x00000100, 0xe0020827, // mov r0, 0x100 0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0 0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync 0x009e7000, 0x100009e7, // nop 0x009e7000, 0xa00009e7, // ldtmu0 0x009e7000, 0xa00009e7, // ldtmu0 0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y 0x9538edbf, 0x10024410, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32 0x953cfdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c462, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d463, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c51c0, 0xd0020827, // shl r0, r0, m 0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num 0x00000000, 0xe00201e7, // mov ra_points, 0 0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y 0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffc18, 0xf0f80227, // brr ra_link_1, r:pass_2 0x009e7000, 0x100009e7, // nop 0x00000100, 0xe0020827, // mov r0, 0x100 0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0 0x95410dbf, 0x100248a3, // mov r2, ra_tw_re+TW32_ACTIVE; mov r3, rb_tw_im+TW32_ACTIVE 0x20327016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw32 0x209cc017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw32 0x209cc01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw32 0x2132709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw32 0x029e7640, 0x100208e7, // fsub r3, r3, r1 0x02427c80, 0x10020427, // fsub ra_tw_re+TW32_ACTIVE, ra_tw_re+TW32_ACTIVE, r2 0x029d0ec0, 0x10021427, // fsub rb_tw_im+TW32_ACTIVE, rb_tw_im+TW32_ACTIVE, r3 0x95514dbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3 0x20367016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16 0x209cd017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16 0x209cd01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16 0x2136709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16 0x029e7640, 0x100208e7, // fsub r3, r3, r1 0x02527c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2 0x029d4ec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c462, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d463, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x0e1cadc0, 0xd00229e7, // shr.setf -, ra_points, STAGES 0xfffffb10, 0xf00809e7, // brr.allz -, r:pass_2 0x009e7000, 0x100009e7, // nop 0x00000100, 0xe0020827, // mov r0, 0x100 0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0 0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync 0x009e7000, 0x100009e7, // nop 0x009e7000, 0xa00009e7, // ldtmu0 0x009e7000, 0xa00009e7, // ldtmu0 0xfffffbd8, 0xf0f809e7, // brr -, r:loop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x159c3fc0, 0x100209a7, // mov interrupt, flag 0x009e7000, 0x300009e7, // nop; nop; thrend 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop ================================================ FILE: src/hello_fft/hex/shader_2048k.hex ================================================ 0x00000010, 0xe0021227, // mov rb_0x10, 0x10 0x000001d0, 0xe0021967, // mov r5rep, 0x1D0 0x00000080, 0xe00208e7, // mov r3, 0x80 0x15827d80, 0x10020827, // mov r0, addr 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100203e7, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020427, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020467, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100204a7, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100204e7, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020527, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020567, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100205a7, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100205e7, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020627, // mov ra_tw_re+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100213e7, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021427, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021467, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100214a7, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100214e7, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021527, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021567, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100215a7, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100215e7, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021627, // mov rb_tw_im+off+i, r4 0x15827d80, 0x10020827, // mov r0, addr 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020667, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100206a7, // mov ra_tw_re+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021667, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100216a7, // mov rb_tw_im+off+i, r4 0x15827d80, 0x10021167, // mov rb_inst, unif 0x00101200, 0xe0020827, // mov r0, vpm_setup(1, 1, v32( 0,0)) 0x00000010, 0xe0020867, // mov r1, vpm_setup(1, 1, v32(16,0)) - vpm_setup(1, 1, v32(0,0)) 0x00000002, 0xe00208a7, // mov r2, vpm_setup(1, 1, v32( 0,2)) - vpm_setup(1, 1, v32(0,0)) 0x409c5017, 0x100049e2, // nop; mul24 r2, r2, in_inst 0xcc9e7081, 0x10025020, // add out_0, r0, r2; v8adds r0, r0, r1 0xcc9e7081, 0x10025060, // add out_1, r0, r2; v8adds r0, r0, r1 0xcc9e7081, 0x100250a0, // add out_2, r0, r2; v8adds r0, r0, r1 0x0c9e7080, 0x100211e7, // add out_3, r0, r2 0x000002e8, 0xf0f80127, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x152e7d80, 0x10021c67, // mov vw_setup, ra_vpm_lo 0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0 0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1 0x15327d80, 0x10021c67, // mov vw_setup, ra_vpm_hi 0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0 0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1 0x159f2fc0, 0x100009e7, // mov -, vw_wait 0x00000019, 0xe80009e7, // mov -, sacq(i+9) 0x00000001, 0xe80009e7, // mov -, srel(i+1) 0x0000001a, 0xe80009e7, // mov -, sacq(i+9) 0x00000002, 0xe80009e7, // mov -, srel(i+1) 0x0000001b, 0xe80009e7, // mov -, sacq(i+9) 0x00000003, 0xe80009e7, // mov -, srel(i+1) 0x0000001c, 0xe80009e7, // mov -, sacq(i+9) 0x00000004, 0xe80009e7, // mov -, srel(i+1) 0x0000001d, 0xe80009e7, // mov -, sacq(i+9) 0x00000005, 0xe80009e7, // mov -, srel(i+1) 0x0000001e, 0xe80009e7, // mov -, sacq(i+9) 0x00000006, 0xe80009e7, // mov -, srel(i+1) 0x0000001f, 0xe80009e7, // mov -, sacq(i+9) 0x00000007, 0xe80009e7, // mov -, srel(i+1) 0x153a7d80, 0x10020827, // mov r0, ra_vdw_32 0x8c04ddf6, 0x10024061, // add ra_save_ptr, ra_save_ptr, rb_0x40; mov r1, ra_save_ptr 0x00000080, 0xe00208a7, // mov r2, vdw_setup_0(1, 16, dma_h32(1,0)) - vdw_setup_0(1, 16, dma_h32(0,0)) 0x00080000, 0xe00208e7, // mov r3, PASS32_STRIDE 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x00000050, 0xf0f81127, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x152e7d80, 0x10021c67, // mov vw_setup, ra_vpm_lo 0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0 0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1 0x15327d80, 0x10021c67, // mov vw_setup, ra_vpm_hi 0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0 0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1 0x00000000, 0xf0f4c9e7, // bra -, ra_sync 0x009e7000, 0x100009e7, // nop 0x152e7d80, 0x10020c67, // mov vr_setup, ra_vpm_lo 0x15c27d80, 0x100009e7, // mov -, vpm 0x00000520, 0xf0f802a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x159f2fc0, 0x100009e7, // mov -, vw_wait 0x00000001, 0xe80009e7, // mov -, srel(i+1) 0x00000002, 0xe80009e7, // mov -, srel(i+1) 0x00000003, 0xe80009e7, // mov -, srel(i+1) 0x00000004, 0xe80009e7, // mov -, srel(i+1) 0x00000005, 0xe80009e7, // mov -, srel(i+1) 0x00000006, 0xe80009e7, // mov -, srel(i+1) 0x00000007, 0xe80009e7, // mov -, srel(i+1) 0x159c0fc0, 0x10021c67, // mov vw_setup, rb_vpm 0x012cbdc0, 0x10020c27, // fadd vpm, ra_64+0, rb_64+0 0x0130cdc0, 0x10020c27, // fadd vpm, ra_64+1, rb_64+1 0x159c1fc0, 0x10021c67, // mov vw_setup, rb_vpm_16 0x0134ddc0, 0x10020c27, // fadd vpm, ra_64+2, rb_64+2 0x0138edc0, 0x10020c27, // fadd vpm, ra_64+3, rb_64+3 0x159c2fc0, 0x10021c67, // mov vw_setup, rb_vpm_32 0x022cbdc0, 0x10020c27, // fsub vpm, ra_64+0, rb_64+0 0x0230cdc0, 0x10020c27, // fsub vpm, ra_64+1, rb_64+1 0x159c7fc0, 0x10021c67, // mov vw_setup, rb_vpm_48 0x0234ddc0, 0x10020c27, // fsub vpm, ra_64+2, rb_64+2 0x0238edc0, 0x10020c27, // fsub vpm, ra_64+3, rb_64+3 0x00000019, 0xe80009e7, // mov -, sacq(i+9) 0x0000001a, 0xe80009e7, // mov -, sacq(i+9) 0x0000001b, 0xe80009e7, // mov -, sacq(i+9) 0x0000001c, 0xe80009e7, // mov -, sacq(i+9) 0x0000001d, 0xe80009e7, // mov -, sacq(i+9) 0x0000001e, 0xe80009e7, // mov -, sacq(i+9) 0x0000001f, 0xe80009e7, // mov -, sacq(i+9) 0x80904000, 0xe0020827, // mov r0, vdw_setup_0(1, 16, dma_h32(0,0)) 0x00000040, 0xe0020867, // mov r1, 0x40 0x8c067c76, 0x10024061, // add ra_save_ptr, ra_save_ptr, r1; mov r1, ra_save_ptr 0x00000080, 0xe00208a7, // mov r2, vdw_setup_0(1, 16, dma_h32(1,0)) - vdw_setup_0(1, 16, dma_h32(0,0)) 0x00040000, 0xe00208e7, // mov r3, PASS64_STRIDE 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x000002b8, 0xf0f812a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c51c0, 0xd00200a7, // shl ra_temp, r0, 5 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0fc49e7, // brr -, ra_temp 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x000000e0, 0xf0f809e7, // brr -, r:2f 0x00000010, 0xe80009e7, // mov -, sacq(i) 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x000000c0, 0xf0f809e7, // brr -, r:2f 0x00000011, 0xe80009e7, // mov -, sacq(i) 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x000000a0, 0xf0f809e7, // brr -, r:2f 0x00000012, 0xe80009e7, // mov -, sacq(i) 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x00000080, 0xf0f809e7, // brr -, r:2f 0x00000013, 0xe80009e7, // mov -, sacq(i) 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x00000060, 0xf0f809e7, // brr -, r:2f 0x00000014, 0xe80009e7, // mov -, sacq(i) 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x00000040, 0xf0f809e7, // brr -, r:2f 0x00000015, 0xe80009e7, // mov -, sacq(i) 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x00000020, 0xf0f809e7, // brr -, r:2f 0x00000016, 0xe80009e7, // mov -, sacq(i) 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f809e7, // brr -, r:2f 0x00000017, 0xe80009e7, // mov -, sacq(i) 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x159c0fc0, 0x10021c67, // mov vw_setup, rb_vpm 0x012cbdc0, 0x10020c27, // fadd vpm, ra_64+0, rb_64+0 0x0130cdc0, 0x10020c27, // fadd vpm, ra_64+1, rb_64+1 0x159c1fc0, 0x10021c67, // mov vw_setup, rb_vpm_16 0x0134ddc0, 0x10020c27, // fadd vpm, ra_64+2, rb_64+2 0x0138edc0, 0x10020c27, // fadd vpm, ra_64+3, rb_64+3 0x159c2fc0, 0x10021c67, // mov vw_setup, rb_vpm_32 0x022cbdc0, 0x10020c27, // fsub vpm, ra_64+0, rb_64+0 0x0230cdc0, 0x10020c27, // fsub vpm, ra_64+1, rb_64+1 0x159c7fc0, 0x10021c67, // mov vw_setup, rb_vpm_48 0x0234ddc0, 0x10020c27, // fsub vpm, ra_64+2, rb_64+2 0x0238edc0, 0x10020c27, // fsub vpm, ra_64+3, rb_64+3 0x00000000, 0xf0fc49e7, // brr -, ra_temp 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm 0x15c27d80, 0x100009e7, // mov -, vpm 0x00000008, 0xe80009e7, // mov -, srel(i+8) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm 0x15c27d80, 0x100009e7, // mov -, vpm 0x00000009, 0xe80009e7, // mov -, srel(i+8) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm 0x15c27d80, 0x100009e7, // mov -, vpm 0x0000000a, 0xe80009e7, // mov -, srel(i+8) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm 0x15c27d80, 0x100009e7, // mov -, vpm 0x0000000b, 0xe80009e7, // mov -, srel(i+8) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm 0x15c27d80, 0x100009e7, // mov -, vpm 0x0000000c, 0xe80009e7, // mov -, srel(i+8) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm 0x15c27d80, 0x100009e7, // mov -, vpm 0x0000000d, 0xe80009e7, // mov -, srel(i+8) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm 0x15c27d80, 0x100009e7, // mov -, vpm 0x0000000e, 0xe80009e7, // mov -, srel(i+8) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm 0x15c27d80, 0x100009e7, // mov -, vpm 0x0000000f, 0xe80009e7, // mov -, srel(i+8) 0x00000080, 0xf0f801a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x159f2fc0, 0x100009e7, // mov -, vw_wait 0x00000019, 0xe80009e7, // mov -, sacq(i+9) 0x00000001, 0xe80009e7, // mov -, srel(i+1) 0x0000001a, 0xe80009e7, // mov -, sacq(i+9) 0x00000002, 0xe80009e7, // mov -, srel(i+1) 0x0000001b, 0xe80009e7, // mov -, sacq(i+9) 0x00000003, 0xe80009e7, // mov -, srel(i+1) 0x0000001c, 0xe80009e7, // mov -, sacq(i+9) 0x00000004, 0xe80009e7, // mov -, srel(i+1) 0x0000001d, 0xe80009e7, // mov -, sacq(i+9) 0x00000005, 0xe80009e7, // mov -, srel(i+1) 0x0000001e, 0xe80009e7, // mov -, sacq(i+9) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x00000006, 0xe80009e7, // mov -, srel(i+1) 0x0000001f, 0xe80009e7, // mov -, sacq(i+9) 0x00000007, 0xe80009e7, // mov -, srel(i+1) 0x00000998, 0xf0f811a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x00000009, 0xe80009e7, // mov -, srel(i+9) 0x00000011, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000a, 0xe80009e7, // mov -, srel(i+9) 0x00000012, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000b, 0xe80009e7, // mov -, srel(i+9) 0x00000013, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000c, 0xe80009e7, // mov -, srel(i+9) 0x00000014, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000d, 0xe80009e7, // mov -, srel(i+9) 0x00000015, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000e, 0xe80009e7, // mov -, srel(i+9) 0x00000016, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000f, 0xe80009e7, // mov -, srel(i+9) 0x00000017, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0xfffffd50, 0xf0f80027, // brr ra_link_0, call 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1 0x8c148df6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x55555555, 0xe00208a7, // mov r2, mask 0x149e7080, 0x10020867, // and r1, r0, r2 0x0e9c11c0, 0xd0020827, // shr r0, r0, shift 0x149e7080, 0x10020827, // and r0, r0, r2 0x119c13c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x33333333, 0xe00208a7, // mov r2, mask 0x149e7080, 0x10020867, // and r1, r0, r2 0x0e9c21c0, 0xd0020827, // shr r0, r0, shift 0x149e7080, 0x10020827, // and r0, r0, r2 0x119c23c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x0f0f0f0f, 0xe00208a7, // mov r2, mask 0x149e7080, 0x10020867, // and r1, r0, r2 0x0e9c41c0, 0xd0020827, // shr r0, r0, shift 0x149e7080, 0x10020827, // and r0, r0, r2 0x119c43c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x00ff00ff, 0xe00208a7, // mov r2, mask 0x149e7080, 0x10020867, // and r1, r0, r2 0x0e9c81c0, 0xd0020827, // shr r0, r0, shift 0x149e7080, 0x10020827, // and r0, r0, r2 0x119c83c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x0000ffff, 0xe00208a7, // mov r2, mask 0x149e7080, 0x10020867, // and r1, r0, r2 0x0e9c81c0, 0x10020827, // shr r0, r0, shift 0x149e7080, 0x10020827, // and r0, r0, r2 0x119c83c0, 0x10020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x0e9c81c0, 0xd0020827, // shr r0, r0, 32-STAGES-3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0xfffffbe0, 0xf0f80027, // brr ra_link_0, call 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x206e7006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE 0x209db00f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE 0x206e700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE 0x2209bc87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE 0x019e72c0, 0x10020867, // fadd r1, r1, r3 0x01267c00, 0x100202e7, // fadd ra_64+0, ra_32_re, r0 0x019c9e40, 0x10020327, // fadd ra_64+1, rb_32_im, r1 0x02267c00, 0x10020367, // fsub ra_64+2, ra_32_re, r0 0x029c9e40, 0x100203a7, // fsub ra_64+3, rb_32_im, r1 0x8c167d76, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x55555555, 0xe00208a7, // mov r2, mask 0x149e7080, 0x10020867, // and r1, r0, r2 0x0e9c11c0, 0xd0020827, // shr r0, r0, shift 0x149e7080, 0x10020827, // and r0, r0, r2 0x119c13c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x33333333, 0xe00208a7, // mov r2, mask 0x149e7080, 0x10020867, // and r1, r0, r2 0x0e9c21c0, 0xd0020827, // shr r0, r0, shift 0x149e7080, 0x10020827, // and r0, r0, r2 0x119c23c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x0f0f0f0f, 0xe00208a7, // mov r2, mask 0x149e7080, 0x10020867, // and r1, r0, r2 0x0e9c41c0, 0xd0020827, // shr r0, r0, shift 0x149e7080, 0x10020827, // and r0, r0, r2 0x119c43c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x00ff00ff, 0xe00208a7, // mov r2, mask 0x149e7080, 0x10020867, // and r1, r0, r2 0x0e9c81c0, 0xd0020827, // shr r0, r0, shift 0x149e7080, 0x10020827, // and r0, r0, r2 0x119c83c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x0000ffff, 0xe00208a7, // mov r2, mask 0x149e7080, 0x10020867, // and r1, r0, r2 0x0e9c81c0, 0x10020827, // shr r0, r0, shift 0x149e7080, 0x10020827, // and r0, r0, r2 0x119c83c0, 0x10020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x0e9c81c0, 0xd0020827, // shr r0, r0, 32-STAGES-3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0xfffffa30, 0xf0f80027, // brr ra_link_0, call 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1 0x8c148df6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x55555555, 0xe00208a7, // mov r2, mask 0x149e7080, 0x10020867, // and r1, r0, r2 0x0e9c11c0, 0xd0020827, // shr r0, r0, shift 0x149e7080, 0x10020827, // and r0, r0, r2 0x119c13c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x33333333, 0xe00208a7, // mov r2, mask 0x149e7080, 0x10020867, // and r1, r0, r2 0x0e9c21c0, 0xd0020827, // shr r0, r0, shift 0x149e7080, 0x10020827, // and r0, r0, r2 0x119c23c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x0f0f0f0f, 0xe00208a7, // mov r2, mask 0x149e7080, 0x10020867, // and r1, r0, r2 0x0e9c41c0, 0xd0020827, // shr r0, r0, shift 0x149e7080, 0x10020827, // and r0, r0, r2 0x119c43c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x00ff00ff, 0xe00208a7, // mov r2, mask 0x149e7080, 0x10020867, // and r1, r0, r2 0x0e9c81c0, 0xd0020827, // shr r0, r0, shift 0x149e7080, 0x10020827, // and r0, r0, r2 0x119c83c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x0000ffff, 0xe00208a7, // mov r2, mask 0x149e7080, 0x10020867, // and r1, r0, r2 0x0e9c81c0, 0x10020827, // shr r0, r0, shift 0x149e7080, 0x10020827, // and r0, r0, r2 0x119c83c0, 0x10020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x0e9c81c0, 0xd0020827, // shr r0, r0, 32-STAGES-3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0xfffff8c0, 0xf0f80027, // brr ra_link_0, call 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x206e7006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE 0x209db00f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE 0x206e700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE 0x2209bc87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE 0x019e72c0, 0x10020867, // fadd r1, r1, r3 0x029c9e40, 0x100208e7, // fsub r3, rb_32_im, r1 0x02267c00, 0x100208a7, // fsub r2, ra_32_re, r0 0x019c9e40, 0x10020867, // fadd r1, rb_32_im, r1 0x01267c00, 0x10020827, // fadd r0, ra_32_re, r0 0x203e700e, 0x100049c9, // nop; fmul rb_32_im, r1, ra_tw_re+TW64_P1_BASE0 0x209cf00f, 0x100059c9, // nop; fmul ra_32_re, r1, rb_tw_im+TW64_P1_BASE0 0x209cf007, 0x100049e1, // nop; fmul r1, r0, rb_tw_im+TW64_P1_BASE0 0x213c93c6, 0x10025320, // fadd rb_64+1, r1, rb_32_im; fmul r0, r0, ra_tw_re+TW64_P1_BASE0 0x2225019f, 0x100252c9, // fsub rb_64+0, r0, ra_32_re; fmul ra_32_re, r3, rb_tw_im+TW64_P1_BASE1 0x2042701e, 0x100049c9, // nop; fmul rb_32_im, r3, ra_tw_re+TW64_P1_BASE1 0x00000000, 0xf0f549e7, // bra -, ra_save_64 0x209d0017, 0x100049e3, // nop; fmul r3, r2, rb_tw_im+TW64_P1_BASE1 0x214097d6, 0x100253a2, // fadd rb_64+3, r3, rb_32_im; fmul r2, r2, ra_tw_re+TW64_P1_BASE1 0x02267580, 0x10021367, // fsub rb_64+2, r2, ra_32_re 0x8c14cdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffff7e0, 0xf0f80027, // brr ra_link_0, call 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1 0x8c148df6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffff790, 0xf0f80027, // brr ra_link_0, call 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0x206e7006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE 0x209db00f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE 0x206e700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE 0x2209bc87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE 0x019e72c0, 0x10020867, // fadd r1, r1, r3 0x00000000, 0xf0f489e7, // bra -, ra_save_32 0x952c2ff6, 0x100242c2, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo 0x95307ff6, 0x10024307, // mov ra_vpm_hi, rb_vpm_hi; mov rb_vpm_hi, ra_vpm_hi 0x9538eff6, 0x1002438e, // mov ra_vdw_32, rb_vdw_32; mov rb_vdw_32, ra_vdw_32 0x159c5fc0, 0x10022827, // mov.setf r0, rb_inst 0x0d9c11c0, 0xd0020827, // sub r0, r0, 1 0x119c51c0, 0xd0020827, // shl r0, r0, 5 0x0c9c6e00, 0x100601a7, // add.ifnz ra_sync, rx_sync_slave, r0 0x159c4fc0, 0x10060127, // mov.ifnz ra_save_32, rx_save_slave_32 0x159cafc0, 0x100602a7, // mov.ifnz ra_save_64, rx_save_slave_64 0x15827d80, 0x100220e7, // mov.setf ra_addr_x, unif 0x15827d80, 0x100210e7, // mov rb_addr_y, unif 0x00000940, 0xf00809e7, // brr.allz -, r:end 0x95451dbf, 0x100246db, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32 0x95492dbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c7e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d7e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c7a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d7a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c762, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d763, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c722, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d723, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c61c0, 0xd0020827, // shl r0, r0, m 0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num 0x00000000, 0xe00201e7, // mov ra_points, 0 0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y 0x8c148df6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x55555555, 0xe00208a7, // mov r2, mask 0x149e7080, 0x10020867, // and r1, r0, r2 0x0e9c11c0, 0xd0020827, // shr r0, r0, shift 0x149e7080, 0x10020827, // and r0, r0, r2 0x119c13c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x33333333, 0xe00208a7, // mov r2, mask 0x149e7080, 0x10020867, // and r1, r0, r2 0x0e9c21c0, 0xd0020827, // shr r0, r0, shift 0x149e7080, 0x10020827, // and r0, r0, r2 0x119c23c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x0f0f0f0f, 0xe00208a7, // mov r2, mask 0x149e7080, 0x10020867, // and r1, r0, r2 0x0e9c41c0, 0xd0020827, // shr r0, r0, shift 0x149e7080, 0x10020827, // and r0, r0, r2 0x119c43c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x00ff00ff, 0xe00208a7, // mov r2, mask 0x149e7080, 0x10020867, // and r1, r0, r2 0x0e9c81c0, 0xd0020827, // shr r0, r0, shift 0x149e7080, 0x10020827, // and r0, r0, r2 0x119c83c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x0000ffff, 0xe00208a7, // mov r2, mask 0x149e7080, 0x10020867, // and r1, r0, r2 0x0e9c81c0, 0x10020827, // shr r0, r0, shift 0x149e7080, 0x10020827, // and r0, r0, r2 0x119c83c0, 0x10020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x0e9c81c0, 0xd0020827, // shr r0, r0, 32-STAGES-3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffff660, 0xf0f80227, // brr ra_link_1, r:pass_1 0x009e7000, 0x100009e7, // nop 0x00000200, 0xe0020827, // mov r0, 0x200 0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0 0x00000015, 0xe0020867, // mov r1, STAGES 0x0e1e7c40, 0x100229e7, // shr.setf -, ra_points, r1 0xfffff630, 0xf00809e7, // brr.allz -, r:pass_1 0x009e7000, 0x100009e7, // nop 0x00000200, 0xe0020827, // mov r0, 0x200 0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0 0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync 0x009e7000, 0x100009e7, // nop 0x009e7000, 0xa00009e7, // ldtmu0 0x009e7000, 0xa00009e7, // ldtmu0 0x159c0fc0, 0x100202e7, // mov ra_vpm_lo, rb_vpm 0x159c1fc0, 0x10020327, // mov ra_vpm_hi, rb_vpm_16 0x80904000, 0xe00203a7, // mov ra_vdw_32, vdw_setup_0(1, 16, dma_h32( 0,0)) 0x80905000, 0xe00213a7, // mov rb_vdw_32, vdw_setup_0(1, 16, dma_h32(32,0)) 0x00000015, 0xe00212e7, // mov rb_STAGES, STAGES 0x000000f0, 0xe0021327, // mov rb_0xF0, 0xF0 0x00000040, 0xe0021367, // mov rb_0x40, 0x40 0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y 0x95451dbf, 0x100246db, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32 0x95492dbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c7e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d7e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c7a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d7a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c762, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d763, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c722, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d723, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c51c0, 0xd0020827, // shl r0, r0, m 0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num 0x00000000, 0xe00201e7, // mov ra_points, 0 0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y 0x8c148df6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffb80, 0xf0f80227, // brr ra_link_1, r:pass_2 0x009e7000, 0x100009e7, // nop 0x00000100, 0xe0020827, // mov r0, 0x100 0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0 0x00007fff, 0xe0020827, // mov r0, 0x7FFF 0x141e7c00, 0x100229e7, // and.setf -, ra_points, r0 0xfffffb50, 0xf01809e7, // brr.allnz -, r:pass_2 0x009e7000, 0x100009e7, // nop 0x00000100, 0xe0020827, // mov r0, 0x100 0x0c1e7c00, 0x100601e7, // add.ifnz ra_points, ra_points, r0 0x956dbdbf, 0x100248a3, // mov r2, ra_tw_re+TW32_ACTIVE; mov r3, rb_tw_im+TW32_ACTIVE 0x204e7016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw32 0x209d3017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw32 0x209d301f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw32 0x214e709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw32 0x029e7640, 0x100208e7, // fsub r3, r3, r1 0x026e7c80, 0x100206e7, // fsub ra_tw_re+TW32_ACTIVE, ra_tw_re+TW32_ACTIVE, r2 0x029dbec0, 0x100216e7, // fsub rb_tw_im+TW32_ACTIVE, rb_tw_im+TW32_ACTIVE, r3 0x957dfdbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3 0x20527016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16 0x209d4017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16 0x209d401f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16 0x2152709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16 0x029e7640, 0x100208e7, // fsub r3, r3, r1 0x027e7c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2 0x029dfec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c7e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d7e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c7a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d7a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c762, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d763, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c722, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d723, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x0e1cbdc0, 0x100229e7, // shr.setf -, ra_points, rb_STAGES 0xfffffa48, 0xf00809e7, // brr.allz -, r:pass_2 0x009e7000, 0x100009e7, // nop 0x00000100, 0xe0020827, // mov r0, 0x100 0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0 0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync 0x009e7000, 0x100009e7, // nop 0x009e7000, 0xa00009e7, // ldtmu0 0x009e7000, 0xa00009e7, // ldtmu0 0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y 0x95451dbf, 0x100246db, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32 0x95492dbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c7e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d7e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c7a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d7a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c762, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d763, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c722, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d723, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c51c0, 0xd0020827, // shl r0, r0, m 0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num 0x00000000, 0xe00201e7, // mov ra_points, 0 0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y 0x8c148df6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffff940, 0xf0f80227, // brr ra_link_1, r:pass_3 0x009e7000, 0x100009e7, // nop 0x00000100, 0xe0020827, // mov r0, 0x100 0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0 0xfffff920, 0xf0f80227, // brr ra_link_1, r:pass_3 0x009e7000, 0x100009e7, // nop 0x00000100, 0xe0020827, // mov r0, 0x100 0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0 0xfffff900, 0xf0f80227, // brr ra_link_1, r:pass_3 0x009e7000, 0x100009e7, // nop 0x00000100, 0xe0020827, // mov r0, 0x100 0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0 0xfffff8e0, 0xf0f80227, // brr ra_link_1, r:pass_3 0x009e7000, 0x100009e7, // nop 0x00000100, 0xe0020827, // mov r0, 0x100 0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0 0x956dbdbf, 0x100248a3, // mov r2, ra_tw_re+TW32_ACTIVE; mov r3, rb_tw_im+TW32_ACTIVE 0x20567016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw32 0x209d5017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw32 0x209d501f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw32 0x2156709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw32 0x029e7640, 0x100208e7, // fsub r3, r3, r1 0x026e7c80, 0x100206e7, // fsub ra_tw_re+TW32_ACTIVE, ra_tw_re+TW32_ACTIVE, r2 0x029dbec0, 0x100216e7, // fsub rb_tw_im+TW32_ACTIVE, rb_tw_im+TW32_ACTIVE, r3 0x957dfdbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3 0x205a7016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16 0x209d6017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16 0x209d601f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16 0x215a709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16 0x029e7640, 0x100208e7, // fsub r3, r3, r1 0x027e7c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2 0x029dfec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c7e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d7e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c7a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d7a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c762, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d763, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c722, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d723, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x0e1cbdc0, 0x100229e7, // shr.setf -, ra_points, rb_STAGES 0x00000100, 0xe0020827, // mov r0, 0x100 0xfffff7d0, 0xf00809e7, // brr.allz -, r:pass_3 0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0 0x00000060, 0xe0020827, // mov r0, (4-1)*4*8 0x0d227c00, 0x10020227, // sub ra_link_1, ra_link_1, r0 0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync 0x009e7000, 0x100009e7, // nop 0x009e7000, 0xa00009e7, // ldtmu0 0x009e7000, 0xa00009e7, // ldtmu0 0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y 0x95659dbf, 0x100246db, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32 0x9569adbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c7e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d7e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c7a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d7a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c762, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d763, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c722, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d723, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c51c0, 0xd0020827, // shl r0, r0, m 0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num 0x00000000, 0xe00201e7, // mov ra_points, 0 0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y 0x8c148df6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffff6c8, 0xf0f80227, // brr ra_link_1, r:pass_4 0x009e7000, 0x100009e7, // nop 0x00000100, 0xe0020827, // mov r0, 0x100 0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0 0x956dbdbf, 0x100248a3, // mov r2, ra_tw_re+TW32_ACTIVE; mov r3, rb_tw_im+TW32_ACTIVE 0x205e7016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw32 0x209d7017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw32 0x209d701f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw32 0x215e709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw32 0x029e7640, 0x100208e7, // fsub r3, r3, r1 0x026e7c80, 0x100206e7, // fsub ra_tw_re+TW32_ACTIVE, ra_tw_re+TW32_ACTIVE, r2 0x029dbec0, 0x100216e7, // fsub rb_tw_im+TW32_ACTIVE, rb_tw_im+TW32_ACTIVE, r3 0x957dfdbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3 0x20627016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16 0x209d8017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16 0x209d801f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16 0x2162709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16 0x029e7640, 0x100208e7, // fsub r3, r3, r1 0x027e7c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2 0x029dfec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c7e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d7e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c7a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d7a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c762, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d763, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c722, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d723, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x0e1cbdc0, 0x100229e7, // shr.setf -, ra_points, rb_STAGES 0xfffff5c0, 0xf00809e7, // brr.allz -, r:pass_4 0x009e7000, 0x100009e7, // nop 0x00000100, 0xe0020827, // mov r0, 0x100 0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0 0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync 0x009e7000, 0x100009e7, // nop 0x009e7000, 0xa00009e7, // ldtmu0 0x009e7000, 0xa00009e7, // ldtmu0 0xfffff690, 0xf0f809e7, // brr -, r:loop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x159c3fc0, 0x100209a7, // mov interrupt, flag 0x009e7000, 0x300009e7, // nop; nop; thrend 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop ================================================ FILE: src/hello_fft/hex/shader_256.hex ================================================ 0x00000040, 0xe00217a7, // mov rb_0x40, 0x40 0x00000080, 0xe00217e7, // mov rb_0x80, 0x80 0x00005555, 0xe0020767, // mov rx_0x5555, 0x5555 0x00003333, 0xe00207a7, // mov rx_0x3333, 0x3333 0x00000f0f, 0xe00207e7, // mov rx_0x0F0F, 0x0F0F 0x88104000, 0xe0020727, // mov ra_vdw, vdw_setup_0(16, 16, dma_h32( 0,0)) 0x88104800, 0xe0021727, // mov rb_vdw, vdw_setup_0(16, 16, dma_h32(16,0)) 0x15827d80, 0x10020827, // mov r0, addr 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9df1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020227, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9df1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020267, // mov ra_tw_re+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9df3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021227, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9df3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021267, // mov rb_tw_im+off+i, r4 0x15827d80, 0x10020827, // mov r0, addr 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9df1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100202a7, // mov ra_tw_re+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9df3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100212a7, // mov rb_tw_im+off+i, r4 0x15827d80, 0x10021167, // mov rb_inst, unif 0x00101200, 0xe0020827, // mov r0, vpm_setup(1, 1, v32( 0,0)) 0x00000010, 0xe0020867, // mov r1, vpm_setup(1, 1, v32(16,0)) - vpm_setup(1, 1, v32(0,0)) 0x00000002, 0xe00208a7, // mov r2, vpm_setup(1, 1, v32( 0,2)) - vpm_setup(1, 1, v32(0,0)) 0x409c5017, 0x100049e2, // nop; mul24 r2, r2, in_inst 0xcc9e7081, 0x100246e0, // add out_0, r0, r2; v8adds r0, r0, r1 0xcc9e7081, 0x100256e0, // add out_1, r0, r2; v8adds r0, r0, r1 0xcc9e7081, 0x100049e0, // add out_2, r0, r2; v8adds r0, r0, r1 0x0c9e7080, 0x100009e7, // add out_3, r0, r2 0x000000b0, 0xf0f80127, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x156e7d80, 0x10021c67, // mov vw_setup, arg 0x159e7000, 0x10020c27, // mov vpm, r0 0x159e7240, 0x10020c27, // mov vpm, r1 0x159f2fc0, 0x100009e7, // mov -, vw_wait 0x00000019, 0xe80009e7, // mov -, sacq(i+9) 0x00000001, 0xe80009e7, // mov -, srel(i+1) 0x0000001a, 0xe80009e7, // mov -, sacq(i+9) 0x00000002, 0xe80009e7, // mov -, srel(i+1) 0x0000001b, 0xe80009e7, // mov -, sacq(i+9) 0x00000003, 0xe80009e7, // mov -, srel(i+1) 0x0000001c, 0xe80009e7, // mov -, sacq(i+9) 0x00000004, 0xe80009e7, // mov -, srel(i+1) 0x0000001d, 0xe80009e7, // mov -, sacq(i+9) 0x00000005, 0xe80009e7, // mov -, srel(i+1) 0x0000001e, 0xe80009e7, // mov -, sacq(i+9) 0x00000006, 0xe80009e7, // mov -, srel(i+1) 0x0000001f, 0xe80009e7, // mov -, sacq(i+9) 0x00000007, 0xe80009e7, // mov -, srel(i+1) 0x00000000, 0xf0f409e7, // bra -, ra_link_1 0x15727d80, 0x10021c67, // mov vw_setup, arg_vdw 0xc0000040, 0xe0021c67, // mov vw_setup, vdw_setup_1(0) + PASS16_STRIDE-16*4 0x8c05edf6, 0x10024072, // add ra_save_ptr, ra_save_ptr, rb_0x40; mov vw_addr, ra_save_ptr 0x00000038, 0xf0f81127, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x156e7d80, 0x10021c67, // mov vw_setup, arg 0x159e7000, 0x10020c27, // mov vpm, r0 0x159e7240, 0x10020c27, // mov vpm, r1 0x00000000, 0xf0f4c9e7, // bra -, ra_sync 0x009e7000, 0x100009e7, // nop 0x156e7d80, 0x10020c67, // mov vr_setup, arg_vpm 0x15c27d80, 0x100009e7, // mov -, vpm 0x00000080, 0xf0f801a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x159f2fc0, 0x100009e7, // mov -, vw_wait 0x00000019, 0xe80009e7, // mov -, sacq(i+9) 0x00000001, 0xe80009e7, // mov -, srel(i+1) 0x0000001a, 0xe80009e7, // mov -, sacq(i+9) 0x00000002, 0xe80009e7, // mov -, srel(i+1) 0x0000001b, 0xe80009e7, // mov -, sacq(i+9) 0x00000003, 0xe80009e7, // mov -, srel(i+1) 0x0000001c, 0xe80009e7, // mov -, sacq(i+9) 0x00000004, 0xe80009e7, // mov -, srel(i+1) 0x0000001d, 0xe80009e7, // mov -, sacq(i+9) 0x00000005, 0xe80009e7, // mov -, srel(i+1) 0x0000001e, 0xe80009e7, // mov -, sacq(i+9) 0x00000000, 0xf0f409e7, // bra -, ra_link_1 0x00000006, 0xe80009e7, // mov -, srel(i+1) 0x0000001f, 0xe80009e7, // mov -, sacq(i+9) 0x00000007, 0xe80009e7, // mov -, srel(i+1) 0x00000248, 0xf0f811a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f409e7, // bra -, ra_link_1 0x00000009, 0xe80009e7, // mov -, srel(i+9) 0x00000011, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f409e7, // bra -, ra_link_1 0x0000000a, 0xe80009e7, // mov -, srel(i+9) 0x00000012, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f409e7, // bra -, ra_link_1 0x0000000b, 0xe80009e7, // mov -, srel(i+9) 0x00000013, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f409e7, // bra -, ra_link_1 0x0000000c, 0xe80009e7, // mov -, srel(i+9) 0x00000014, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f409e7, // bra -, ra_link_1 0x0000000d, 0xe80009e7, // mov -, srel(i+9) 0x00000015, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f409e7, // bra -, ra_link_1 0x0000000e, 0xe80009e7, // mov -, srel(i+9) 0x00000016, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f409e7, // bra -, ra_link_1 0x0000000f, 0xe80009e7, // mov -, srel(i+9) 0x00000017, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<>i) 0x959f8492, 0xd002c3a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d3a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c362, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d363, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c322, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d323, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c2e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d2e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c41c0, 0xd0020827, // shl r0, r0, m 0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num 0x00000000, 0xe00201e7, // mov ra_points, 0 0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y 0x8c15fdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x14767180, 0x10020867, // and r1, r0, mask 0x0e9c11c0, 0xd0020827, // shr r0, r0, shift 0x14767180, 0x10020827, // and r0, r0, mask 0x119c13c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147a7180, 0x10020867, // and r1, r0, mask 0x0e9c21c0, 0xd0020827, // shr r0, r0, shift 0x147a7180, 0x10020827, // and r0, r0, mask 0x119c23c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147e7180, 0x10020867, // and r1, r0, mask 0x0e9c41c0, 0xd0020827, // shr r0, r0, shift 0x147e7180, 0x10020827, // and r0, r0, mask 0x119c43c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0x8c15fdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x14767180, 0x10020867, // and r1, r0, mask 0x0e9c11c0, 0xd0020827, // shr r0, r0, shift 0x14767180, 0x10020827, // and r0, r0, mask 0x119c13c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147a7180, 0x10020867, // and r1, r0, mask 0x0e9c21c0, 0xd0020827, // shr r0, r0, shift 0x147a7180, 0x10020827, // and r0, r0, mask 0x119c23c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147e7180, 0x10020867, // and r1, r0, mask 0x0e9c41c0, 0xd0020827, // shr r0, r0, shift 0x147e7180, 0x10020827, // and r0, r0, mask 0x119c43c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffdb0, 0xf0f80027, // brr ra_link_1, r:pass_1 0x009e7000, 0x100009e7, // nop 0x956dbff6, 0x100246db, // mov ra_vpm, rb_vpm; mov rb_vpm, ra_vpm 0x9571cff6, 0x1002471c, // mov ra_vdw, rb_vdw; mov rb_vdw, ra_vdw 0xfffffd90, 0xf0f80027, // brr ra_link_1, r:pass_1 0x009e7000, 0x100009e7, // nop 0x956dbff6, 0x100246db, // mov ra_vpm, rb_vpm; mov rb_vpm, ra_vpm 0x9571cff6, 0x1002471c, // mov ra_vdw, rb_vdw; mov rb_vdw, ra_vdw 0x00000000, 0xf0f4c027, // bra ra_link_1, ra_sync 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y 0x9528adbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c3a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d3a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c362, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d363, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c322, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d323, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c2e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d2e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c41c0, 0xd0020827, // shl r0, r0, m 0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num 0x00000000, 0xe00201e7, // mov ra_points, 0 0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y 0x8c15fdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0x8c15fdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffc68, 0xf0f80027, // brr ra_link_1, r:pass_2 0x009e7000, 0x100009e7, // nop 0x956dbff6, 0x100246db, // mov ra_vpm, rb_vpm; mov rb_vpm, ra_vpm 0x9571cff6, 0x1002471c, // mov ra_vdw, rb_vdw; mov rb_vdw, ra_vdw 0x9538edbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3 0x20267016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16 0x209c9017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16 0x209c901f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16 0x2126709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16 0x029e7640, 0x100208e7, // fsub r3, r3, r1 0x023a7c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2 0x029ceec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c3a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d3a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c362, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d363, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c322, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d323, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c2e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d2e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0xfffffba8, 0xf0f80027, // brr ra_link_1, r:pass_2 0x009e7000, 0x100009e7, // nop 0x956dbff6, 0x100246db, // mov ra_vpm, rb_vpm; mov rb_vpm, ra_vpm 0x9571cff6, 0x1002471c, // mov ra_vdw, rb_vdw; mov rb_vdw, ra_vdw 0x00000000, 0xf0f4c027, // bra ra_link_1, ra_sync 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0xfffffbb0, 0xf0f809e7, // brr -, r:loop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x159c3fc0, 0x100209a7, // mov interrupt, flag 0x009e7000, 0x300009e7, // nop; nop; thrend 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop ================================================ FILE: src/hello_fft/hex/shader_256k.hex ================================================ 0x00000012, 0xe0021227, // mov rb_STAGES, STAGES 0x00000010, 0xe00216a7, // mov rb_0x10, 0x10 0x00000040, 0xe00216e7, // mov rb_0x40, 0x40 0x00000080, 0xe0021727, // mov rb_0x80, 0x80 0x000000f0, 0xe0021767, // mov rb_0xF0, 0xF0 0x00000100, 0xe00217a7, // mov rb_0x100, 0x100 0x00001fff, 0xe00217e7, // mov rb_0x1FFF, 0x1FFF 0x55555555, 0xe0020767, // mov rx_0x55555555, 0x55555555 0x33333333, 0xe00207a7, // mov rx_0x33333333, 0x33333333 0x0f0f0f0f, 0xe00207e7, // mov rx_0x0F0F0F0F, 0x0F0F0F0F 0x00ff00ff, 0xe0021627, // mov rx_0x00FF00FF, 0x00FF00FF 0x0000ffff, 0xe0021667, // mov rx_0x0000FFFF, 0x0000FFFF 0x80904000, 0xe00206e7, // mov ra_vdw_16, vdw_setup_0( 1, 16, dma_h32( 0,0)) 0x80905000, 0xe0021027, // mov rb_vdw_16, vdw_setup_0( 1, 16, dma_h32(32,0)) 0x90104000, 0xe0020727, // mov ra_vdw_32, vdw_setup_0(32, 16, dma_h32( 0,0)) 0x90105000, 0xe0021067, // mov rb_vdw_32, vdw_setup_0(32, 16, dma_h32(32,0)) 0x15827d80, 0x10020827, // mov r0, addr 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dc1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100202e7, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dc1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020327, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dc1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020367, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dc1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100203a7, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dc1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100203e7, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dc1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020427, // mov ra_tw_re+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dc3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100212e7, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dc3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021327, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dc3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021367, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dc3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100213a7, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dc3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100213e7, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dc3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021427, // mov rb_tw_im+off+i, r4 0x15827d80, 0x10020827, // mov r0, addr 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dc1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020467, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dc1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100204a7, // mov ra_tw_re+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dc3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021467, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dc3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100214a7, // mov rb_tw_im+off+i, r4 0x15827d80, 0x10021167, // mov rb_inst, unif 0x00101200, 0xe0020827, // mov r0, vpm_setup(1, 1, v32( 0,0)) 0x00000010, 0xe0020867, // mov r1, vpm_setup(1, 1, v32(16,0)) - vpm_setup(1, 1, v32(0,0)) 0x00000002, 0xe00208a7, // mov r2, vpm_setup(1, 1, v32( 0,2)) - vpm_setup(1, 1, v32(0,0)) 0x409c5017, 0x100049e2, // nop; mul24 r2, r2, in_inst 0xcc9e7081, 0x10024660, // add out_0, r0, r2; v8adds r0, r0, r1 0xcc9e7081, 0x100246a0, // add out_1, r0, r2; v8adds r0, r0, r1 0xcc9e7081, 0x100250a0, // add out_2, r0, r2; v8adds r0, r0, r1 0x0c9e7080, 0x100211e7, // add out_3, r0, r2 0x000001d0, 0xf0f80127, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x15667d80, 0x10021c67, // mov vw_setup, arg 0x159e7000, 0x10020c27, // mov vpm, r0 0x159e7240, 0x10020c27, // mov vpm, r1 0x159f2fc0, 0x100009e7, // mov -, vw_wait 0x00000019, 0xe80009e7, // mov -, sacq(i+9) 0x00000001, 0xe80009e7, // mov -, srel(i+1) 0x0000001a, 0xe80009e7, // mov -, sacq(i+9) 0x00000002, 0xe80009e7, // mov -, srel(i+1) 0x0000001b, 0xe80009e7, // mov -, sacq(i+9) 0x00000003, 0xe80009e7, // mov -, srel(i+1) 0x0000001c, 0xe80009e7, // mov -, sacq(i+9) 0x00000004, 0xe80009e7, // mov -, srel(i+1) 0x0000001d, 0xe80009e7, // mov -, sacq(i+9) 0x00000005, 0xe80009e7, // mov -, srel(i+1) 0x0000001e, 0xe80009e7, // mov -, sacq(i+9) 0x00000006, 0xe80009e7, // mov -, srel(i+1) 0x0000001f, 0xe80009e7, // mov -, sacq(i+9) 0x00000007, 0xe80009e7, // mov -, srel(i+1) 0x156e7d80, 0x10020827, // mov r0, arg_vdw 0x8c05bdf6, 0x10024061, // add ra_save_ptr, ra_save_ptr, rb_0x40; mov r1, ra_save_ptr 0x00000080, 0xe00208a7, // mov r2, vdw_setup_0(1, 16, dma_h32(1,0)) - vdw_setup_0(1, 16, dma_h32(0,0)) 0x00020000, 0xe00208e7, // mov r3, PASS16_STRIDE 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x00000038, 0xf0f81127, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x15667d80, 0x10021c67, // mov vw_setup, arg 0x159e7000, 0x10020c27, // mov vpm, r0 0x159e7240, 0x10020c27, // mov vpm, r1 0x00000000, 0xf0f4c9e7, // bra -, ra_sync 0x009e7000, 0x100009e7, // nop 0x15667d80, 0x10020c67, // mov vr_setup, arg_vpm 0x15c27d80, 0x100009e7, // mov -, vpm 0x000000c8, 0xf0f802a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x15667d80, 0x10021c67, // mov vw_setup, ra_vpm_lo 0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0 0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1 0x156a7d80, 0x10021c67, // mov vw_setup, ra_vpm_hi 0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0 0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1 0x159f2fc0, 0x100009e7, // mov -, vw_wait 0x00000019, 0xe80009e7, // mov -, sacq(i+9) 0x00000001, 0xe80009e7, // mov -, srel(i+1) 0x0000001a, 0xe80009e7, // mov -, sacq(i+9) 0x00000002, 0xe80009e7, // mov -, srel(i+1) 0x0000001b, 0xe80009e7, // mov -, sacq(i+9) 0x00000003, 0xe80009e7, // mov -, srel(i+1) 0x0000001c, 0xe80009e7, // mov -, sacq(i+9) 0x00000004, 0xe80009e7, // mov -, srel(i+1) 0x0000001d, 0xe80009e7, // mov -, sacq(i+9) 0x00000005, 0xe80009e7, // mov -, srel(i+1) 0x0000001e, 0xe80009e7, // mov -, sacq(i+9) 0x00000006, 0xe80009e7, // mov -, srel(i+1) 0x0000001f, 0xe80009e7, // mov -, sacq(i+9) 0x00000007, 0xe80009e7, // mov -, srel(i+1) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x15727d80, 0x10021c67, // mov vw_setup, ra_vdw_32 0xc000ffc0, 0xe0021c67, // mov vw_setup, vdw_setup_1(0) + PASS32_STRIDE-16*4 0x8c05bdf6, 0x10024072, // add ra_save_ptr, ra_save_ptr, rb_0x40; mov vw_addr, ra_save_ptr 0x00000050, 0xf0f812a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x15667d80, 0x10021c67, // mov vw_setup, ra_vpm_lo 0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0 0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1 0x156a7d80, 0x10021c67, // mov vw_setup, ra_vpm_hi 0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0 0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1 0x00000000, 0xf0f4c9e7, // bra -, ra_sync 0x009e7000, 0x100009e7, // nop 0x15667d80, 0x10020c67, // mov vr_setup, ra_vpm_lo 0x15c27d80, 0x100009e7, // mov -, vpm 0x00000080, 0xf0f801a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x159f2fc0, 0x100009e7, // mov -, vw_wait 0x00000019, 0xe80009e7, // mov -, sacq(i+9) 0x00000001, 0xe80009e7, // mov -, srel(i+1) 0x0000001a, 0xe80009e7, // mov -, sacq(i+9) 0x00000002, 0xe80009e7, // mov -, srel(i+1) 0x0000001b, 0xe80009e7, // mov -, sacq(i+9) 0x00000003, 0xe80009e7, // mov -, srel(i+1) 0x0000001c, 0xe80009e7, // mov -, sacq(i+9) 0x00000004, 0xe80009e7, // mov -, srel(i+1) 0x0000001d, 0xe80009e7, // mov -, sacq(i+9) 0x00000005, 0xe80009e7, // mov -, srel(i+1) 0x0000001e, 0xe80009e7, // mov -, sacq(i+9) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x00000006, 0xe80009e7, // mov -, srel(i+1) 0x0000001f, 0xe80009e7, // mov -, sacq(i+9) 0x00000007, 0xe80009e7, // mov -, srel(i+1) 0x00000640, 0xf0f811a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x00000009, 0xe80009e7, // mov -, srel(i+9) 0x00000011, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000a, 0xe80009e7, // mov -, srel(i+9) 0x00000012, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000b, 0xe80009e7, // mov -, srel(i+9) 0x00000013, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000c, 0xe80009e7, // mov -, srel(i+9) 0x00000014, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000d, 0xe80009e7, // mov -, srel(i+9) 0x00000015, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000e, 0xe80009e7, // mov -, srel(i+9) 0x00000016, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000f, 0xe80009e7, // mov -, srel(i+9) 0x00000017, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0xfffffd78, 0xf0f80027, // brr ra_link_0, call 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1 0x8c15adf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x14767180, 0x10020867, // and r1, r0, mask 0x0e9c11c0, 0xd0020827, // shr r0, r0, shift 0x14767180, 0x10020827, // and r0, r0, mask 0x119c13c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147a7180, 0x10020867, // and r1, r0, mask 0x0e9c21c0, 0xd0020827, // shr r0, r0, shift 0x147a7180, 0x10020827, // and r0, r0, mask 0x119c23c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147e7180, 0x10020867, // and r1, r0, mask 0x0e9c41c0, 0xd0020827, // shr r0, r0, shift 0x147e7180, 0x10020827, // and r0, r0, mask 0x119c43c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x149d81c0, 0x10020867, // and r1, r0, mask 0x0e9c81c0, 0xd0020827, // shr r0, r0, shift 0x149d81c0, 0x10020827, // and r0, r0, mask 0x119c83c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x149d91c0, 0x10020867, // and r1, r0, mask 0x0e9da1c0, 0x10020827, // shr r0, r0, shift 0x149d91c0, 0x10020827, // and r0, r0, mask 0x119da3c0, 0x10020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x0e9cb1c0, 0xd0020827, // shr r0, r0, 32-STAGES-3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0xfffffc30, 0xf0f80027, // brr ra_link_0, call 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x204e7006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE 0x209d300f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE 0x204e700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE 0x22093c87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE 0x019e72c0, 0x10020867, // fadd r1, r1, r3 0x00000000, 0xf0f549e7, // bra -, ra_save_32 0x95642ff6, 0x10024642, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo 0x95687ff6, 0x10024687, // mov ra_vpm_hi, rb_vpm_hi; mov rb_vpm_hi, ra_vpm_hi 0x95701ff6, 0x10024701, // mov ra_vdw_32, rb_vdw_32; mov rb_vdw_32, ra_vdw_32 0x8c15cdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffba0, 0xf0f80027, // brr ra_link_0, call 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0x00000000, 0xf0f489e7, // bra -, ra_save_16 0x009e7000, 0x100009e7, // nop 0x95642ff6, 0x10024642, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo 0x956c0ff6, 0x100246c0, // mov ra_vdw_16, rb_vdw_16; mov rb_vdw_16, ra_vdw_16 0x8c15ddf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffb38, 0xf0f80027, // brr ra_link_0, call 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1 0x8c15adf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffae8, 0xf0f80027, // brr ra_link_0, call 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0x204e7006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE 0x209d300f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE 0x204e700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE 0x22093c87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE 0x019e72c0, 0x10020867, // fadd r1, r1, r3 0x00000000, 0xf0f549e7, // bra -, ra_save_32 0x95642ff6, 0x10024642, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo 0x95687ff6, 0x10024687, // mov ra_vpm_hi, rb_vpm_hi; mov rb_vpm_hi, ra_vpm_hi 0x95701ff6, 0x10024701, // mov ra_vdw_32, rb_vdw_32; mov rb_vdw_32, ra_vdw_32 0x159c5fc0, 0x10022827, // mov.setf r0, rb_inst 0x0d9c11c0, 0xd0020827, // sub r0, r0, 1 0x119c51c0, 0xd0020827, // shl r0, r0, 5 0x0c9c6e00, 0x100601a7, // add.ifnz ra_sync, rx_sync_slave, r0 0x159c4fc0, 0x10060127, // mov.ifnz ra_save_16, rx_save_slave_16 0x159cafc0, 0x100602a7, // mov.ifnz ra_save_32, rx_save_slave_32 0x15827d80, 0x100220e7, // mov.setf ra_addr_x, unif 0x15827d80, 0x100210e7, // mov rb_addr_y, unif 0x00000838, 0xf00809e7, // brr.allz -, r:end 0x952cbdbf, 0x100244d3, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32 0x9530cdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c5e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d5e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c51c0, 0xd0020827, // shl r0, r0, m 0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num 0x00000000, 0xe00201e7, // mov ra_points, 0 0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y 0x8c15adf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x14767180, 0x10020867, // and r1, r0, mask 0x0e9c11c0, 0xd0020827, // shr r0, r0, shift 0x14767180, 0x10020827, // and r0, r0, mask 0x119c13c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147a7180, 0x10020867, // and r1, r0, mask 0x0e9c21c0, 0xd0020827, // shr r0, r0, shift 0x147a7180, 0x10020827, // and r0, r0, mask 0x119c23c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147e7180, 0x10020867, // and r1, r0, mask 0x0e9c41c0, 0xd0020827, // shr r0, r0, shift 0x147e7180, 0x10020827, // and r0, r0, mask 0x119c43c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x149d81c0, 0x10020867, // and r1, r0, mask 0x0e9c81c0, 0xd0020827, // shr r0, r0, shift 0x149d81c0, 0x10020827, // and r0, r0, mask 0x119c83c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x149d91c0, 0x10020867, // and r1, r0, mask 0x0e9da1c0, 0x10020827, // shr r0, r0, shift 0x149d91c0, 0x10020827, // and r0, r0, mask 0x119da3c0, 0x10020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x0e9cb1c0, 0xd0020827, // shr r0, r0, 32-STAGES-3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffff9e0, 0xf0f80227, // brr ra_link_1, r:pass_1 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dedc0, 0x100201e7, // add ra_points, ra_points, rb_0x100 0x0e1c8dc0, 0x100229e7, // shr.setf -, ra_points, rb_STAGES 0xfffff9b8, 0xf00809e7, // brr.allz -, r:pass_1 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dedc0, 0x100201e7, // add ra_points, ra_points, rb_0x100 0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync 0x009e7000, 0x100009e7, // nop 0x009e7000, 0xa00009e7, // ldtmu0 0x009e7000, 0xa00009e7, // ldtmu0 0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y 0x9530cdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c5e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d5e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c41c0, 0xd0020827, // shl r0, r0, m 0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num 0x00000000, 0xe00201e7, // mov ra_points, 0 0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y 0x8c15cdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffb88, 0xf0f80227, // brr ra_link_1, r:pass_2 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dcdc0, 0x100201e7, // add ra_points, ra_points, rb_0x80 0x141dfdc0, 0x100229e7, // and.setf -, ra_points, rb_0x1FFF 0xfffffb60, 0xf01809e7, // brr.allnz -, r:pass_2 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dcdc0, 0x100601e7, // add.ifnz ra_points, ra_points, rb_0x80 0x955d7dbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3 0x20367016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16 0x209cd017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16 0x209cd01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16 0x2136709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16 0x029e7640, 0x100208e7, // fsub r3, r3, r1 0x025e7c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2 0x029d7ec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c5e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d5e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x0e1c8dc0, 0x100229e7, // shr.setf -, ra_points, rb_STAGES 0xfffffa98, 0xf00809e7, // brr.allz -, r:pass_2 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dcdc0, 0x100201e7, // add ra_points, ra_points, rb_0x80 0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync 0x009e7000, 0x100009e7, // nop 0x009e7000, 0xa00009e7, // ldtmu0 0x009e7000, 0xa00009e7, // ldtmu0 0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y 0x9530cdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c5e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d5e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c41c0, 0xd0020827, // shl r0, r0, m 0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num 0x00000000, 0xe00201e7, // mov ra_points, 0 0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y 0x8c15cdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffff998, 0xf0f80227, // brr ra_link_1, r:pass_3 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dcdc0, 0x100201e7, // add ra_points, ra_points, rb_0x80 0xfffff978, 0xf0f80227, // brr ra_link_1, r:pass_3 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dcdc0, 0x100201e7, // add ra_points, ra_points, rb_0x80 0xfffff958, 0xf0f80227, // brr ra_link_1, r:pass_3 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dcdc0, 0x100201e7, // add ra_points, ra_points, rb_0x80 0xfffff938, 0xf0f80227, // brr ra_link_1, r:pass_3 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dcdc0, 0x100201e7, // add ra_points, ra_points, rb_0x80 0x955d7dbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3 0x203a7016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16 0x209ce017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16 0x209ce01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16 0x213a709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16 0x029e7640, 0x100208e7, // fsub r3, r3, r1 0x025e7c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2 0x029d7ec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c5e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d5e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x0e1c8dc0, 0x100229e7, // shr.setf -, ra_points, rb_STAGES 0xfffff870, 0xf00809e7, // brr.allz -, r:pass_3 0x00000060, 0xe0020827, // mov r0, 3*4*8 0x0d227c00, 0x10020227, // sub ra_link_1, ra_link_1, r0 0x0c1dcdc0, 0x100201e7, // add ra_points, ra_points, rb_0x80 0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync 0x009e7000, 0x100009e7, // nop 0x009e7000, 0xa00009e7, // ldtmu0 0x009e7000, 0xa00009e7, // ldtmu0 0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y 0x95451dbf, 0x100244d3, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32 0x95492dbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c5e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d5e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c51c0, 0xd0020827, // shl r0, r0, m 0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num 0x00000000, 0xe00201e7, // mov ra_points, 0 0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y 0x8c15adf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffff7d0, 0xf0f80227, // brr ra_link_1, r:pass_4 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dedc0, 0x100201e7, // add ra_points, ra_points, rb_0x100 0x954d3dbf, 0x100248a3, // mov r2, ra_tw_re+TW32_ACTIVE; mov r3, rb_tw_im+TW32_ACTIVE 0x203e7016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw32 0x209cf017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw32 0x209cf01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw32 0x213e709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw32 0x029e7640, 0x100208e7, // fsub r3, r3, r1 0x024e7c80, 0x100204e7, // fsub ra_tw_re+TW32_ACTIVE, ra_tw_re+TW32_ACTIVE, r2 0x029d3ec0, 0x100214e7, // fsub rb_tw_im+TW32_ACTIVE, rb_tw_im+TW32_ACTIVE, r3 0x955d7dbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3 0x20427016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16 0x209d0017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16 0x209d001f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16 0x2142709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16 0x029e7640, 0x100208e7, // fsub r3, r3, r1 0x025e7c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2 0x029d7ec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c5e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d5e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x0e1c8dc0, 0x100229e7, // shr.setf -, ra_points, rb_STAGES 0xfffff6c8, 0xf00809e7, // brr.allz -, r:pass_4 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dedc0, 0x100201e7, // add ra_points, ra_points, rb_0x100 0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync 0x009e7000, 0x100009e7, // nop 0x009e7000, 0xa00009e7, // ldtmu0 0x009e7000, 0xa00009e7, // ldtmu0 0xfffff798, 0xf0f809e7, // brr -, r:loop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x159c3fc0, 0x100209a7, // mov interrupt, flag 0x009e7000, 0x300009e7, // nop; nop; thrend 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop ================================================ FILE: src/hello_fft/hex/shader_2k.hex ================================================ 0x00000010, 0xe0021727, // mov rb_0x10, 0x10 0x00000040, 0xe0021767, // mov rb_0x40, 0x40 0x000000f0, 0xe00217a7, // mov rb_0xF0, 0xF0 0x000001d0, 0xe00217e7, // mov rb_0x1D0, 0x1D0 0x00005555, 0xe0020727, // mov rx_0x5555, 0x5555 0x00003333, 0xe0020767, // mov rx_0x3333, 0x3333 0x00000f0f, 0xe00207a7, // mov rx_0x0F0F, 0x0F0F 0x000000ff, 0xe00207e7, // mov rx_0x00FF, 0x00FF 0x00000080, 0xe00208e7, // mov r3, 0x80 0x15827d80, 0x10020827, // mov r0, addr 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100203e7, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020427, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020467, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100204a7, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100204e7, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020527, // mov ra_tw_re+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100213e7, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021427, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021467, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100214a7, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100214e7, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021527, // mov rb_tw_im+off+i, r4 0x15827d80, 0x10020827, // mov r0, addr 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020567, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100205a7, // mov ra_tw_re+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021567, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100215a7, // mov rb_tw_im+off+i, r4 0x15827d80, 0x10021167, // mov rb_inst, unif 0x00101200, 0xe0020827, // mov r0, vpm_setup(1, 1, v32( 0,0)) 0x00000010, 0xe0020867, // mov r1, vpm_setup(1, 1, v32(16,0)) - vpm_setup(1, 1, v32(0,0)) 0x00000002, 0xe00208a7, // mov r2, vpm_setup(1, 1, v32( 0,2)) - vpm_setup(1, 1, v32(0,0)) 0x409c5017, 0x100049e2, // nop; mul24 r2, r2, in_inst 0xcc9e7081, 0x10025020, // add out_0, r0, r2; v8adds r0, r0, r1 0xcc9e7081, 0x10025060, // add out_1, r0, r2; v8adds r0, r0, r1 0xcc9e7081, 0x100250a0, // add out_2, r0, r2; v8adds r0, r0, r1 0x0c9e7080, 0x100211e7, // add out_3, r0, r2 0x000000c8, 0xf0f80127, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x152e7d80, 0x10021c67, // mov vw_setup, ra_vpm_lo 0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0 0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1 0x15327d80, 0x10021c67, // mov vw_setup, ra_vpm_hi 0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0 0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1 0x159f2fc0, 0x100009e7, // mov -, vw_wait 0x00000019, 0xe80009e7, // mov -, sacq(i+9) 0x00000001, 0xe80009e7, // mov -, srel(i+1) 0x0000001a, 0xe80009e7, // mov -, sacq(i+9) 0x00000002, 0xe80009e7, // mov -, srel(i+1) 0x0000001b, 0xe80009e7, // mov -, sacq(i+9) 0x00000003, 0xe80009e7, // mov -, srel(i+1) 0x0000001c, 0xe80009e7, // mov -, sacq(i+9) 0x00000004, 0xe80009e7, // mov -, srel(i+1) 0x0000001d, 0xe80009e7, // mov -, sacq(i+9) 0x00000005, 0xe80009e7, // mov -, srel(i+1) 0x0000001e, 0xe80009e7, // mov -, sacq(i+9) 0x00000006, 0xe80009e7, // mov -, srel(i+1) 0x0000001f, 0xe80009e7, // mov -, sacq(i+9) 0x00000007, 0xe80009e7, // mov -, srel(i+1) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x15367d80, 0x10021c67, // mov vw_setup, ra_vdw_32 0xc00001c0, 0xe0021c67, // mov vw_setup, vdw_setup_1(0) + PASS32_STRIDE-16*4 0x8c05ddf6, 0x10024072, // add ra_save_ptr, ra_save_ptr, rb_0x40; mov vw_addr, ra_save_ptr 0x00000050, 0xf0f81127, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x152e7d80, 0x10021c67, // mov vw_setup, ra_vpm_lo 0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0 0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1 0x15327d80, 0x10021c67, // mov vw_setup, ra_vpm_hi 0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0 0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1 0x00000000, 0xf0f4c9e7, // bra -, ra_sync 0x009e7000, 0x100009e7, // nop 0x152e7d80, 0x10020c67, // mov vr_setup, ra_vpm_lo 0x15c27d80, 0x100009e7, // mov -, vpm 0x000000f8, 0xf0f802a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x159f2fc0, 0x100009e7, // mov -, vw_wait 0x00000001, 0xe80009e7, // mov -, srel(i+1) 0x00000002, 0xe80009e7, // mov -, srel(i+1) 0x00000003, 0xe80009e7, // mov -, srel(i+1) 0x00000004, 0xe80009e7, // mov -, srel(i+1) 0x00000005, 0xe80009e7, // mov -, srel(i+1) 0x00000006, 0xe80009e7, // mov -, srel(i+1) 0x00000007, 0xe80009e7, // mov -, srel(i+1) 0x159c0fc0, 0x10021c67, // mov vw_setup, rb_vpm 0x012cbdc0, 0x10020c27, // fadd vpm, ra_64+0, rb_64+0 0x0130cdc0, 0x10020c27, // fadd vpm, ra_64+1, rb_64+1 0x159c1fc0, 0x10021c67, // mov vw_setup, rb_vpm_16 0x0134ddc0, 0x10020c27, // fadd vpm, ra_64+2, rb_64+2 0x0138edc0, 0x10020c27, // fadd vpm, ra_64+3, rb_64+3 0x159c2fc0, 0x10021c67, // mov vw_setup, rb_vpm_32 0x022cbdc0, 0x10020c27, // fsub vpm, ra_64+0, rb_64+0 0x0230cdc0, 0x10020c27, // fsub vpm, ra_64+1, rb_64+1 0x159c7fc0, 0x10021c67, // mov vw_setup, rb_vpm_48 0x0234ddc0, 0x10020c27, // fsub vpm, ra_64+2, rb_64+2 0x0238edc0, 0x10020c27, // fsub vpm, ra_64+3, rb_64+3 0x00000019, 0xe80009e7, // mov -, sacq(i+9) 0x0000001a, 0xe80009e7, // mov -, sacq(i+9) 0x0000001b, 0xe80009e7, // mov -, sacq(i+9) 0x0000001c, 0xe80009e7, // mov -, sacq(i+9) 0x0000001d, 0xe80009e7, // mov -, sacq(i+9) 0x0000001e, 0xe80009e7, // mov -, sacq(i+9) 0x0000001f, 0xe80009e7, // mov -, sacq(i+9) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0xa0104000, 0xe0021c67, // mov vw_setup, vdw_setup_0(64, 16, dma_h32(0,0)) 0xc00000c0, 0xe0021c67, // mov vw_setup, vdw_setup_1(PASS64_STRIDE-16*4) 0x8c05ddf6, 0x10024072, // add ra_save_ptr, ra_save_ptr, step; mov vw_addr, ra_save_ptr 0x000002b8, 0xf0f812a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c51c0, 0xd00200a7, // shl ra_temp, r0, 5 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0fc49e7, // brr -, ra_temp 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x000000e0, 0xf0f809e7, // brr -, r:2f 0x00000010, 0xe80009e7, // mov -, sacq(i) 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x000000c0, 0xf0f809e7, // brr -, r:2f 0x00000011, 0xe80009e7, // mov -, sacq(i) 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x000000a0, 0xf0f809e7, // brr -, r:2f 0x00000012, 0xe80009e7, // mov -, sacq(i) 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x00000080, 0xf0f809e7, // brr -, r:2f 0x00000013, 0xe80009e7, // mov -, sacq(i) 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x00000060, 0xf0f809e7, // brr -, r:2f 0x00000014, 0xe80009e7, // mov -, sacq(i) 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x00000040, 0xf0f809e7, // brr -, r:2f 0x00000015, 0xe80009e7, // mov -, sacq(i) 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x00000020, 0xf0f809e7, // brr -, r:2f 0x00000016, 0xe80009e7, // mov -, sacq(i) 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f809e7, // brr -, r:2f 0x00000017, 0xe80009e7, // mov -, sacq(i) 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x159c0fc0, 0x10021c67, // mov vw_setup, rb_vpm 0x012cbdc0, 0x10020c27, // fadd vpm, ra_64+0, rb_64+0 0x0130cdc0, 0x10020c27, // fadd vpm, ra_64+1, rb_64+1 0x159c1fc0, 0x10021c67, // mov vw_setup, rb_vpm_16 0x0134ddc0, 0x10020c27, // fadd vpm, ra_64+2, rb_64+2 0x0138edc0, 0x10020c27, // fadd vpm, ra_64+3, rb_64+3 0x159c2fc0, 0x10021c67, // mov vw_setup, rb_vpm_32 0x022cbdc0, 0x10020c27, // fsub vpm, ra_64+0, rb_64+0 0x0230cdc0, 0x10020c27, // fsub vpm, ra_64+1, rb_64+1 0x159c7fc0, 0x10021c67, // mov vw_setup, rb_vpm_48 0x0234ddc0, 0x10020c27, // fsub vpm, ra_64+2, rb_64+2 0x0238edc0, 0x10020c27, // fsub vpm, ra_64+3, rb_64+3 0x00000000, 0xf0fc49e7, // brr -, ra_temp 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm 0x15c27d80, 0x100009e7, // mov -, vpm 0x00000008, 0xe80009e7, // mov -, srel(i+8) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm 0x15c27d80, 0x100009e7, // mov -, vpm 0x00000009, 0xe80009e7, // mov -, srel(i+8) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm 0x15c27d80, 0x100009e7, // mov -, vpm 0x0000000a, 0xe80009e7, // mov -, srel(i+8) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm 0x15c27d80, 0x100009e7, // mov -, vpm 0x0000000b, 0xe80009e7, // mov -, srel(i+8) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm 0x15c27d80, 0x100009e7, // mov -, vpm 0x0000000c, 0xe80009e7, // mov -, srel(i+8) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm 0x15c27d80, 0x100009e7, // mov -, vpm 0x0000000d, 0xe80009e7, // mov -, srel(i+8) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm 0x15c27d80, 0x100009e7, // mov -, vpm 0x0000000e, 0xe80009e7, // mov -, srel(i+8) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm 0x15c27d80, 0x100009e7, // mov -, vpm 0x0000000f, 0xe80009e7, // mov -, srel(i+8) 0x00000080, 0xf0f801a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x159f2fc0, 0x100009e7, // mov -, vw_wait 0x00000019, 0xe80009e7, // mov -, sacq(i+9) 0x00000001, 0xe80009e7, // mov -, srel(i+1) 0x0000001a, 0xe80009e7, // mov -, sacq(i+9) 0x00000002, 0xe80009e7, // mov -, srel(i+1) 0x0000001b, 0xe80009e7, // mov -, sacq(i+9) 0x00000003, 0xe80009e7, // mov -, srel(i+1) 0x0000001c, 0xe80009e7, // mov -, sacq(i+9) 0x00000004, 0xe80009e7, // mov -, srel(i+1) 0x0000001d, 0xe80009e7, // mov -, sacq(i+9) 0x00000005, 0xe80009e7, // mov -, srel(i+1) 0x0000001e, 0xe80009e7, // mov -, sacq(i+9) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x00000006, 0xe80009e7, // mov -, srel(i+1) 0x0000001f, 0xe80009e7, // mov -, sacq(i+9) 0x00000007, 0xe80009e7, // mov -, srel(i+1) 0x00000858, 0xf0f811a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x00000009, 0xe80009e7, // mov -, srel(i+9) 0x00000011, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000a, 0xe80009e7, // mov -, srel(i+9) 0x00000012, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000b, 0xe80009e7, // mov -, srel(i+9) 0x00000013, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000c, 0xe80009e7, // mov -, srel(i+9) 0x00000014, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000d, 0xe80009e7, // mov -, srel(i+9) 0x00000015, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000e, 0xe80009e7, // mov -, srel(i+9) 0x00000016, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000f, 0xe80009e7, // mov -, srel(i+9) 0x00000017, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0xfffffda0, 0xf0f80027, // brr ra_link_0, call 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1 0x8c15cdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x14727180, 0x10020867, // and r1, r0, mask 0x0e9c11c0, 0xd0020827, // shr r0, r0, shift 0x14727180, 0x10020827, // and r0, r0, mask 0x119c13c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x14767180, 0x10020867, // and r1, r0, mask 0x0e9c21c0, 0xd0020827, // shr r0, r0, shift 0x14767180, 0x10020827, // and r0, r0, mask 0x119c23c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147a7180, 0x10020867, // and r1, r0, mask 0x0e9c41c0, 0xd0020827, // shr r0, r0, shift 0x147a7180, 0x10020827, // and r0, r0, mask 0x119c43c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147e7180, 0x10020867, // and r1, r0, mask 0x0e9c81c0, 0xd0020827, // shr r0, r0, shift 0x147e7180, 0x10020827, // and r0, r0, mask 0x119c83c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x0e9c21c0, 0xd0020827, // shr r0, r0, 13-STAGES 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0xfffffc80, 0xf0f80027, // brr ra_link_0, call 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x205e7006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE 0x209d700f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE 0x205e700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE 0x22097c87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE 0x019e72c0, 0x10020867, // fadd r1, r1, r3 0x01267c00, 0x100202e7, // fadd ra_64+0, ra_32_re, r0 0x019c9e40, 0x10020327, // fadd ra_64+1, rb_32_im, r1 0x02267c00, 0x10020367, // fsub ra_64+2, ra_32_re, r0 0x029c9e40, 0x100203a7, // fsub ra_64+3, rb_32_im, r1 0x8c15fdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x14727180, 0x10020867, // and r1, r0, mask 0x0e9c11c0, 0xd0020827, // shr r0, r0, shift 0x14727180, 0x10020827, // and r0, r0, mask 0x119c13c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x14767180, 0x10020867, // and r1, r0, mask 0x0e9c21c0, 0xd0020827, // shr r0, r0, shift 0x14767180, 0x10020827, // and r0, r0, mask 0x119c23c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147a7180, 0x10020867, // and r1, r0, mask 0x0e9c41c0, 0xd0020827, // shr r0, r0, shift 0x147a7180, 0x10020827, // and r0, r0, mask 0x119c43c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147e7180, 0x10020867, // and r1, r0, mask 0x0e9c81c0, 0xd0020827, // shr r0, r0, shift 0x147e7180, 0x10020827, // and r0, r0, mask 0x119c83c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x0e9c21c0, 0xd0020827, // shr r0, r0, 13-STAGES 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0xfffffb20, 0xf0f80027, // brr ra_link_0, call 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1 0x8c15cdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x14727180, 0x10020867, // and r1, r0, mask 0x0e9c11c0, 0xd0020827, // shr r0, r0, shift 0x14727180, 0x10020827, // and r0, r0, mask 0x119c13c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x14767180, 0x10020867, // and r1, r0, mask 0x0e9c21c0, 0xd0020827, // shr r0, r0, shift 0x14767180, 0x10020827, // and r0, r0, mask 0x119c23c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147a7180, 0x10020867, // and r1, r0, mask 0x0e9c41c0, 0xd0020827, // shr r0, r0, shift 0x147a7180, 0x10020827, // and r0, r0, mask 0x119c43c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147e7180, 0x10020867, // and r1, r0, mask 0x0e9c81c0, 0xd0020827, // shr r0, r0, shift 0x147e7180, 0x10020827, // and r0, r0, mask 0x119c83c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x0e9c21c0, 0xd0020827, // shr r0, r0, 13-STAGES 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0xfffffa00, 0xf0f80027, // brr ra_link_0, call 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x205e7006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE 0x209d700f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE 0x205e700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE 0x22097c87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE 0x019e72c0, 0x10020867, // fadd r1, r1, r3 0x029c9e40, 0x100208e7, // fsub r3, rb_32_im, r1 0x02267c00, 0x100208a7, // fsub r2, ra_32_re, r0 0x019c9e40, 0x10020867, // fadd r1, rb_32_im, r1 0x01267c00, 0x10020827, // fadd r0, ra_32_re, r0 0x203e700e, 0x100049c9, // nop; fmul rb_32_im, r1, ra_tw_re+TW64_P1_BASE0 0x209cf00f, 0x100059c9, // nop; fmul ra_32_re, r1, rb_tw_im+TW64_P1_BASE0 0x209cf007, 0x100049e1, // nop; fmul r1, r0, rb_tw_im+TW64_P1_BASE0 0x213c93c6, 0x10025320, // fadd rb_64+1, r1, rb_32_im; fmul r0, r0, ra_tw_re+TW64_P1_BASE0 0x2225019f, 0x100252c9, // fsub rb_64+0, r0, ra_32_re; fmul ra_32_re, r3, rb_tw_im+TW64_P1_BASE1 0x2042701e, 0x100049c9, // nop; fmul rb_32_im, r3, ra_tw_re+TW64_P1_BASE1 0x00000000, 0xf0f549e7, // bra -, ra_save_64 0x209d0017, 0x100049e3, // nop; fmul r3, r2, rb_tw_im+TW64_P1_BASE1 0x214097d6, 0x100253a2, // fadd rb_64+3, r3, rb_32_im; fmul r2, r2, ra_tw_re+TW64_P1_BASE1 0x02267580, 0x10021367, // fsub rb_64+2, r2, ra_32_re 0x8c15edf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffff920, 0xf0f80027, // brr ra_link_0, call 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1 0x8c15cdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffff8d0, 0xf0f80027, // brr ra_link_0, call 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0x205e7006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE 0x209d700f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE 0x205e700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE 0x22097c87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE 0x019e72c0, 0x10020867, // fadd r1, r1, r3 0x00000000, 0xf0f489e7, // bra -, ra_save_32 0x952c2ff6, 0x100242c2, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo 0x95307ff6, 0x10024307, // mov ra_vpm_hi, rb_vpm_hi; mov rb_vpm_hi, ra_vpm_hi 0x9534dff6, 0x1002434d, // mov ra_vdw_32, rb_vdw_32; mov rb_vdw_32, ra_vdw_32 0x159c5fc0, 0x10022827, // mov.setf r0, rb_inst 0x0d9c11c0, 0xd0020827, // sub r0, r0, 1 0x119c51c0, 0xd0020827, // shl r0, r0, 5 0x0c9c6e00, 0x100601a7, // add.ifnz ra_sync, rx_sync_slave, r0 0x159c4fc0, 0x10060127, // mov.ifnz ra_save_32, rx_save_slave_32 0x159cafc0, 0x100602a7, // mov.ifnz ra_save_64, rx_save_slave_64 0x15827d80, 0x100220e7, // mov.setf ra_addr_x, unif 0x15827d80, 0x100210e7, // mov rb_addr_y, unif 0x00000418, 0xf00809e7, // brr.allz -, r:end 0x95451dbf, 0x100245d7, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32 0x95492dbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c6e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d6e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c6a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d6a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c662, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d663, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c622, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d623, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c61c0, 0xd0020827, // shl r0, r0, m 0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num 0x00000000, 0xe00201e7, // mov ra_points, 0 0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y 0x8c15cdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x14727180, 0x10020867, // and r1, r0, mask 0x0e9c11c0, 0xd0020827, // shr r0, r0, shift 0x14727180, 0x10020827, // and r0, r0, mask 0x119c13c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x14767180, 0x10020867, // and r1, r0, mask 0x0e9c21c0, 0xd0020827, // shr r0, r0, shift 0x14767180, 0x10020827, // and r0, r0, mask 0x119c23c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147a7180, 0x10020867, // and r1, r0, mask 0x0e9c41c0, 0xd0020827, // shr r0, r0, shift 0x147a7180, 0x10020827, // and r0, r0, mask 0x119c43c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147e7180, 0x10020867, // and r1, r0, mask 0x0e9c81c0, 0xd0020827, // shr r0, r0, shift 0x147e7180, 0x10020827, // and r0, r0, mask 0x119c83c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x0e9c21c0, 0xd0020827, // shr r0, r0, 13-STAGES 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffff7f0, 0xf0f80227, // brr ra_link_1, r:pass_1 0x009e7000, 0x100009e7, // nop 0x00000200, 0xe0020827, // mov r0, 0x200 0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0 0x0e1cbdc0, 0xd00229e7, // shr.setf -, ra_points, STAGES 0xfffff7c8, 0xf00809e7, // brr.allz -, r:pass_1 0x009e7000, 0x100009e7, // nop 0x00000200, 0xe0020827, // mov r0, 0x200 0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0 0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync 0x009e7000, 0x100009e7, // nop 0x009e7000, 0xa00009e7, // ldtmu0 0x009e7000, 0xa00009e7, // ldtmu0 0x159c0fc0, 0x100202e7, // mov ra_vpm_lo, rb_vpm 0x159c1fc0, 0x10020327, // mov ra_vpm_hi, rb_vpm_16 0x90104000, 0xe0020367, // mov ra_vdw_32, vdw_setup_0(32, 16, dma_h32( 0,0)) 0x90105000, 0xe0021367, // mov rb_vdw_32, vdw_setup_0(32, 16, dma_h32(32,0)) 0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y 0x95555dbf, 0x100245d7, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32 0x95596dbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c6e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d6e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c6a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d6a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c662, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d663, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c622, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d623, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c51c0, 0xd0020827, // shl r0, r0, m 0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num 0x00000000, 0xe00201e7, // mov ra_points, 0 0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y 0x8c15cdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffbf0, 0xf0f80227, // brr ra_link_1, r:pass_2 0x009e7000, 0x100009e7, // nop 0x00000100, 0xe0020827, // mov r0, 0x100 0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0 0x955d7dbf, 0x100248a3, // mov r2, ra_tw_re+TW32_ACTIVE; mov r3, rb_tw_im+TW32_ACTIVE 0x204e7016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw32 0x209d3017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw32 0x209d301f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw32 0x214e709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw32 0x029e7640, 0x100208e7, // fsub r3, r3, r1 0x025e7c80, 0x100205e7, // fsub ra_tw_re+TW32_ACTIVE, ra_tw_re+TW32_ACTIVE, r2 0x029d7ec0, 0x100215e7, // fsub rb_tw_im+TW32_ACTIVE, rb_tw_im+TW32_ACTIVE, r3 0x956dbdbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3 0x20527016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16 0x209d4017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16 0x209d401f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16 0x2152709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16 0x029e7640, 0x100208e7, // fsub r3, r3, r1 0x026e7c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2 0x029dbec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c6e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d6e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c6a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d6a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c662, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d663, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c622, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d623, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x0e1cbdc0, 0xd00229e7, // shr.setf -, ra_points, STAGES 0xfffffae8, 0xf00809e7, // brr.allz -, r:pass_2 0x009e7000, 0x100009e7, // nop 0x00000100, 0xe0020827, // mov r0, 0x100 0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0 0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync 0x009e7000, 0x100009e7, // nop 0x009e7000, 0xa00009e7, // ldtmu0 0x009e7000, 0xa00009e7, // ldtmu0 0xfffffbb8, 0xf0f809e7, // brr -, r:loop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x159c3fc0, 0x100209a7, // mov interrupt, flag 0x009e7000, 0x300009e7, // nop; nop; thrend 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop ================================================ FILE: src/hello_fft/hex/shader_32k.hex ================================================ 0x00000010, 0xe00216e7, // mov rb_0x10, 0x10 0x00000040, 0xe0021727, // mov rb_0x40, 0x40 0x00000080, 0xe0021767, // mov rb_0x80, 0x80 0x000000f0, 0xe00217a7, // mov rb_0xF0, 0xF0 0x00000100, 0xe00217e7, // mov rb_0x100, 0x100 0x00005555, 0xe0020767, // mov rx_0x5555, 0x5555 0x00003333, 0xe00207a7, // mov rx_0x3333, 0x3333 0x00000f0f, 0xe00207e7, // mov rx_0x0F0F, 0x0F0F 0x000000ff, 0xe00216a7, // mov rx_0x00FF, 0x00FF 0x90104000, 0xe0020727, // mov ra_vdw_32, vdw_setup_0(32, 16, dma_h32( 0,0)) 0x90105000, 0xe0021067, // mov rb_vdw_32, vdw_setup_0(32, 16, dma_h32(32,0)) 0x15827d80, 0x10020827, // mov r0, addr 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100202a7, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100202e7, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020327, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020367, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100203a7, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100203e7, // mov ra_tw_re+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100212a7, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100212e7, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021327, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021367, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100213a7, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100213e7, // mov rb_tw_im+off+i, r4 0x15827d80, 0x10020827, // mov r0, addr 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020427, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020467, // mov ra_tw_re+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021427, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021467, // mov rb_tw_im+off+i, r4 0x15827d80, 0x10021167, // mov rb_inst, unif 0x00101200, 0xe0020827, // mov r0, vpm_setup(1, 1, v32( 0,0)) 0x00000010, 0xe0020867, // mov r1, vpm_setup(1, 1, v32(16,0)) - vpm_setup(1, 1, v32(0,0)) 0x00000002, 0xe00208a7, // mov r2, vpm_setup(1, 1, v32( 0,2)) - vpm_setup(1, 1, v32(0,0)) 0x409c5017, 0x100049e2, // nop; mul24 r2, r2, in_inst 0xcc9e7081, 0x100246a0, // add out_0, r0, r2; v8adds r0, r0, r1 0xcc9e7081, 0x100246e0, // add out_1, r0, r2; v8adds r0, r0, r1 0xcc9e7081, 0x100250a0, // add out_2, r0, r2; v8adds r0, r0, r1 0x0c9e7080, 0x100211e7, // add out_3, r0, r2 0x000000c8, 0xf0f80127, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x156a7d80, 0x10021c67, // mov vw_setup, ra_vpm_lo 0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0 0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1 0x156e7d80, 0x10021c67, // mov vw_setup, ra_vpm_hi 0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0 0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1 0x159f2fc0, 0x100009e7, // mov -, vw_wait 0x00000019, 0xe80009e7, // mov -, sacq(i+9) 0x00000001, 0xe80009e7, // mov -, srel(i+1) 0x0000001a, 0xe80009e7, // mov -, sacq(i+9) 0x00000002, 0xe80009e7, // mov -, srel(i+1) 0x0000001b, 0xe80009e7, // mov -, sacq(i+9) 0x00000003, 0xe80009e7, // mov -, srel(i+1) 0x0000001c, 0xe80009e7, // mov -, sacq(i+9) 0x00000004, 0xe80009e7, // mov -, srel(i+1) 0x0000001d, 0xe80009e7, // mov -, sacq(i+9) 0x00000005, 0xe80009e7, // mov -, srel(i+1) 0x0000001e, 0xe80009e7, // mov -, sacq(i+9) 0x00000006, 0xe80009e7, // mov -, srel(i+1) 0x0000001f, 0xe80009e7, // mov -, sacq(i+9) 0x00000007, 0xe80009e7, // mov -, srel(i+1) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x15727d80, 0x10021c67, // mov vw_setup, ra_vdw_32 0xc0001fc0, 0xe0021c67, // mov vw_setup, vdw_setup_1(0) + PASS32_STRIDE-16*4 0x8c05cdf6, 0x10024072, // add ra_save_ptr, ra_save_ptr, rb_0x40; mov vw_addr, ra_save_ptr 0x00000050, 0xf0f81127, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x156a7d80, 0x10021c67, // mov vw_setup, ra_vpm_lo 0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0 0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1 0x156e7d80, 0x10021c67, // mov vw_setup, ra_vpm_hi 0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0 0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1 0x00000000, 0xf0f4c9e7, // bra -, ra_sync 0x009e7000, 0x100009e7, // nop 0x156a7d80, 0x10020c67, // mov vr_setup, ra_vpm_lo 0x15c27d80, 0x100009e7, // mov -, vpm 0x00000080, 0xf0f801a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x159f2fc0, 0x100009e7, // mov -, vw_wait 0x00000019, 0xe80009e7, // mov -, sacq(i+9) 0x00000001, 0xe80009e7, // mov -, srel(i+1) 0x0000001a, 0xe80009e7, // mov -, sacq(i+9) 0x00000002, 0xe80009e7, // mov -, srel(i+1) 0x0000001b, 0xe80009e7, // mov -, sacq(i+9) 0x00000003, 0xe80009e7, // mov -, srel(i+1) 0x0000001c, 0xe80009e7, // mov -, sacq(i+9) 0x00000004, 0xe80009e7, // mov -, srel(i+1) 0x0000001d, 0xe80009e7, // mov -, sacq(i+9) 0x00000005, 0xe80009e7, // mov -, srel(i+1) 0x0000001e, 0xe80009e7, // mov -, sacq(i+9) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x00000006, 0xe80009e7, // mov -, srel(i+1) 0x0000001f, 0xe80009e7, // mov -, sacq(i+9) 0x00000007, 0xe80009e7, // mov -, srel(i+1) 0x00000588, 0xf0f811a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x00000009, 0xe80009e7, // mov -, srel(i+9) 0x00000011, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000a, 0xe80009e7, // mov -, srel(i+9) 0x00000012, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000b, 0xe80009e7, // mov -, srel(i+9) 0x00000013, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000c, 0xe80009e7, // mov -, srel(i+9) 0x00000014, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000d, 0xe80009e7, // mov -, srel(i+9) 0x00000015, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000e, 0xe80009e7, // mov -, srel(i+9) 0x00000016, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000f, 0xe80009e7, // mov -, srel(i+9) 0x00000017, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0xfffffda0, 0xf0f80027, // brr ra_link_0, call 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1 0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x14767180, 0x10020867, // and r1, r0, mask 0x0e9c11c0, 0xd0020827, // shr r0, r0, shift 0x14767180, 0x10020827, // and r0, r0, mask 0x119c13c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147a7180, 0x10020867, // and r1, r0, mask 0x0e9c21c0, 0xd0020827, // shr r0, r0, shift 0x147a7180, 0x10020827, // and r0, r0, mask 0x119c23c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147e7180, 0x10020867, // and r1, r0, mask 0x0e9c41c0, 0xd0020827, // shr r0, r0, shift 0x147e7180, 0x10020827, // and r0, r0, mask 0x119c43c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x149da1c0, 0x10020867, // and r1, r0, mask 0x0e9c81c0, 0xd0020827, // shr r0, r0, shift 0x149da1c0, 0x10020827, // and r0, r0, mask 0x119c83c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x119c21c0, 0xd0020827, // shl r0, r0, STAGES-13 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0xfffffc80, 0xf0f80027, // brr ra_link_0, call 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x204a7006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE 0x209d200f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE 0x204a700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE 0x22092c87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE 0x019e72c0, 0x10020867, // fadd r1, r1, r3 0x00000000, 0xf0f489e7, // bra -, ra_save_32 0x95682ff6, 0x10024682, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo 0x956c7ff6, 0x100246c7, // mov ra_vpm_hi, rb_vpm_hi; mov rb_vpm_hi, ra_vpm_hi 0x95701ff6, 0x10024701, // mov ra_vdw_32, rb_vdw_32; mov rb_vdw_32, ra_vdw_32 0x8c15edf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffbf0, 0xf0f80027, // brr ra_link_0, call 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1 0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffba0, 0xf0f80027, // brr ra_link_0, call 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0x204a7006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE 0x209d200f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE 0x204a700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE 0x22092c87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE 0x019e72c0, 0x10020867, // fadd r1, r1, r3 0x00000000, 0xf0f489e7, // bra -, ra_save_32 0x95682ff6, 0x10024682, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo 0x956c7ff6, 0x100246c7, // mov ra_vpm_hi, rb_vpm_hi; mov rb_vpm_hi, ra_vpm_hi 0x95701ff6, 0x10024701, // mov ra_vdw_32, rb_vdw_32; mov rb_vdw_32, ra_vdw_32 0x159c5fc0, 0x10022827, // mov.setf r0, rb_inst 0x0d9c11c0, 0xd0020827, // sub r0, r0, 1 0x119c51c0, 0xd0020827, // shl r0, r0, 5 0x0c9c6e00, 0x100601a7, // add.ifnz ra_sync, rx_sync_slave, r0 0x159c4fc0, 0x10060127, // mov.ifnz ra_save_32, rx_save_slave_32 0x15827d80, 0x100220e7, // mov.setf ra_addr_x, unif 0x15827d80, 0x100210e7, // mov rb_addr_y, unif 0x00000668, 0xf00809e7, // brr.allz -, r:end 0x9528adbf, 0x10024492, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32 0x952cbdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c51c0, 0xd0020827, // shl r0, r0, m 0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num 0x00000000, 0xe00201e7, // mov ra_points, 0 0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y 0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x14767180, 0x10020867, // and r1, r0, mask 0x0e9c11c0, 0xd0020827, // shr r0, r0, shift 0x14767180, 0x10020827, // and r0, r0, mask 0x119c13c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147a7180, 0x10020867, // and r1, r0, mask 0x0e9c21c0, 0xd0020827, // shr r0, r0, shift 0x147a7180, 0x10020827, // and r0, r0, mask 0x119c23c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147e7180, 0x10020867, // and r1, r0, mask 0x0e9c41c0, 0xd0020827, // shr r0, r0, shift 0x147e7180, 0x10020827, // and r0, r0, mask 0x119c43c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x149da1c0, 0x10020867, // and r1, r0, mask 0x0e9c81c0, 0xd0020827, // shr r0, r0, shift 0x149da1c0, 0x10020827, // and r0, r0, mask 0x119c83c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x119c21c0, 0xd0020827, // shl r0, r0, STAGES-13 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffac8, 0xf0f80227, // brr ra_link_1, r:pass_1 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100 0x0e1cfdc0, 0xd00229e7, // shr.setf -, ra_points, STAGES 0xfffffaa0, 0xf00809e7, // brr.allz -, r:pass_1 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100 0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync 0x009e7000, 0x100009e7, // nop 0x009e7000, 0xa00009e7, // ldtmu0 0x009e7000, 0xa00009e7, // ldtmu0 0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y 0x9528adbf, 0x10024492, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32 0x952cbdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c51c0, 0xd0020827, // shl r0, r0, m 0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num 0x00000000, 0xe00201e7, // mov ra_points, 0 0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y 0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffc18, 0xf0f80227, // brr ra_link_1, r:pass_2 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100 0xfffffbf8, 0xf0f80227, // brr ra_link_1, r:pass_2 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100 0xfffffbd8, 0xf0f80227, // brr ra_link_1, r:pass_2 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100 0xfffffbb8, 0xf0f80227, // brr ra_link_1, r:pass_2 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100 0x95492dbf, 0x100248a3, // mov r2, ra_tw_re+TW32_ACTIVE; mov r3, rb_tw_im+TW32_ACTIVE 0x20327016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw32 0x209cc017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw32 0x209cc01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw32 0x2132709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw32 0x029e7640, 0x100208e7, // fsub r3, r3, r1 0x024a7c80, 0x100204a7, // fsub ra_tw_re+TW32_ACTIVE, ra_tw_re+TW32_ACTIVE, r2 0x029d2ec0, 0x100214a7, // fsub rb_tw_im+TW32_ACTIVE, rb_tw_im+TW32_ACTIVE, r3 0x95596dbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3 0x20367016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16 0x209cd017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16 0x209cd01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16 0x2136709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16 0x029e7640, 0x100208e7, // fsub r3, r3, r1 0x025a7c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2 0x029d6ec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x0e1cfdc0, 0xd00229e7, // shr.setf -, ra_points, STAGES 0xfffffab0, 0xf00809e7, // brr.allz -, r:pass_2 0x00000060, 0xe0020827, // mov r0, 3*4*8 0x0d227c00, 0x10020227, // sub ra_link_1, ra_link_1, r0 0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100 0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync 0x009e7000, 0x100009e7, // nop 0x009e7000, 0xa00009e7, // ldtmu0 0x009e7000, 0xa00009e7, // ldtmu0 0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y 0x95410dbf, 0x10024492, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32 0x95451dbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c51c0, 0xd0020827, // shl r0, r0, m 0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num 0x00000000, 0xe00201e7, // mov ra_points, 0 0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y 0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffff9a8, 0xf0f80227, // brr ra_link_1, r:pass_3 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100 0x95492dbf, 0x100248a3, // mov r2, ra_tw_re+TW32_ACTIVE; mov r3, rb_tw_im+TW32_ACTIVE 0x203a7016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw32 0x209ce017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw32 0x209ce01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw32 0x213a709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw32 0x029e7640, 0x100208e7, // fsub r3, r3, r1 0x024a7c80, 0x100204a7, // fsub ra_tw_re+TW32_ACTIVE, ra_tw_re+TW32_ACTIVE, r2 0x029d2ec0, 0x100214a7, // fsub rb_tw_im+TW32_ACTIVE, rb_tw_im+TW32_ACTIVE, r3 0x95596dbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3 0x203e7016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16 0x209cf017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16 0x209cf01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16 0x213e709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16 0x029e7640, 0x100208e7, // fsub r3, r3, r1 0x025a7c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2 0x029d6ec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x0e1cfdc0, 0xd00229e7, // shr.setf -, ra_points, STAGES 0xfffff8a0, 0xf00809e7, // brr.allz -, r:pass_3 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100 0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync 0x009e7000, 0x100009e7, // nop 0x009e7000, 0xa00009e7, // ldtmu0 0x009e7000, 0xa00009e7, // ldtmu0 0xfffff968, 0xf0f809e7, // brr -, r:loop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x159c3fc0, 0x100209a7, // mov interrupt, flag 0x009e7000, 0x300009e7, // nop; nop; thrend 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop ================================================ FILE: src/hello_fft/hex/shader_4k.hex ================================================ 0x00000020, 0xe0021767, // mov rb_0x20, 0x20 0x00000040, 0xe00217a7, // mov rb_0x40, 0x40 0x00000080, 0xe00217e7, // mov rb_0x80, 0x80 0x00005555, 0xe0020727, // mov rx_0x5555, 0x5555 0x00003333, 0xe0020767, // mov rx_0x3333, 0x3333 0x00000f0f, 0xe00207a7, // mov rx_0x0F0F, 0x0F0F 0x000000ff, 0xe00207e7, // mov rx_0x00FF, 0x00FF 0x88104000, 0xe00206e7, // mov ra_vdw, vdw_setup_0(16, 16, dma_h32( 0,0)) 0x88104800, 0xe00216e7, // mov rb_vdw, vdw_setup_0(16, 16, dma_h32(16,0)) 0x15827d80, 0x10020827, // mov r0, addr 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9df1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020227, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9df1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020267, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9df1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100202a7, // mov ra_tw_re+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9df3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021227, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9df3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021267, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9df3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100212a7, // mov rb_tw_im+off+i, r4 0x15827d80, 0x10020827, // mov r0, addr 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9df1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100202e7, // mov ra_tw_re+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9df3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100212e7, // mov rb_tw_im+off+i, r4 0x15827d80, 0x10021167, // mov rb_inst, unif 0x00101200, 0xe0020827, // mov r0, vpm_setup(1, 1, v32( 0,0)) 0x00000010, 0xe0020867, // mov r1, vpm_setup(1, 1, v32(16,0)) - vpm_setup(1, 1, v32(0,0)) 0x00000002, 0xe00208a7, // mov r2, vpm_setup(1, 1, v32( 0,2)) - vpm_setup(1, 1, v32(0,0)) 0x409c5017, 0x100049e2, // nop; mul24 r2, r2, in_inst 0xcc9e7081, 0x100246a0, // add out_0, r0, r2; v8adds r0, r0, r1 0xcc9e7081, 0x100256a0, // add out_1, r0, r2; v8adds r0, r0, r1 0xcc9e7081, 0x100049e0, // add out_2, r0, r2; v8adds r0, r0, r1 0x0c9e7080, 0x100009e7, // add out_3, r0, r2 0x000000b0, 0xf0f80127, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x156a7d80, 0x10021c67, // mov vw_setup, arg 0x159e7000, 0x10020c27, // mov vpm, r0 0x159e7240, 0x10020c27, // mov vpm, r1 0x159f2fc0, 0x100009e7, // mov -, vw_wait 0x00000019, 0xe80009e7, // mov -, sacq(i+9) 0x00000001, 0xe80009e7, // mov -, srel(i+1) 0x0000001a, 0xe80009e7, // mov -, sacq(i+9) 0x00000002, 0xe80009e7, // mov -, srel(i+1) 0x0000001b, 0xe80009e7, // mov -, sacq(i+9) 0x00000003, 0xe80009e7, // mov -, srel(i+1) 0x0000001c, 0xe80009e7, // mov -, sacq(i+9) 0x00000004, 0xe80009e7, // mov -, srel(i+1) 0x0000001d, 0xe80009e7, // mov -, sacq(i+9) 0x00000005, 0xe80009e7, // mov -, srel(i+1) 0x0000001e, 0xe80009e7, // mov -, sacq(i+9) 0x00000006, 0xe80009e7, // mov -, srel(i+1) 0x0000001f, 0xe80009e7, // mov -, sacq(i+9) 0x00000007, 0xe80009e7, // mov -, srel(i+1) 0x00000000, 0xf0f409e7, // bra -, ra_link_1 0x156e7d80, 0x10021c67, // mov vw_setup, arg_vdw 0xc00007c0, 0xe0021c67, // mov vw_setup, vdw_setup_1(0) + PASS16_STRIDE-16*4 0x8c05edf6, 0x10024072, // add ra_save_ptr, ra_save_ptr, rb_0x40; mov vw_addr, ra_save_ptr 0x00000038, 0xf0f81127, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x156a7d80, 0x10021c67, // mov vw_setup, arg 0x159e7000, 0x10020c27, // mov vpm, r0 0x159e7240, 0x10020c27, // mov vpm, r1 0x00000000, 0xf0f4c9e7, // bra -, ra_sync 0x009e7000, 0x100009e7, // nop 0x156a7d80, 0x10020c67, // mov vr_setup, arg_vpm 0x15c27d80, 0x100009e7, // mov -, vpm 0x00000080, 0xf0f801a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x159f2fc0, 0x100009e7, // mov -, vw_wait 0x00000019, 0xe80009e7, // mov -, sacq(i+9) 0x00000001, 0xe80009e7, // mov -, srel(i+1) 0x0000001a, 0xe80009e7, // mov -, sacq(i+9) 0x00000002, 0xe80009e7, // mov -, srel(i+1) 0x0000001b, 0xe80009e7, // mov -, sacq(i+9) 0x00000003, 0xe80009e7, // mov -, srel(i+1) 0x0000001c, 0xe80009e7, // mov -, sacq(i+9) 0x00000004, 0xe80009e7, // mov -, srel(i+1) 0x0000001d, 0xe80009e7, // mov -, sacq(i+9) 0x00000005, 0xe80009e7, // mov -, srel(i+1) 0x0000001e, 0xe80009e7, // mov -, sacq(i+9) 0x00000000, 0xf0f409e7, // bra -, ra_link_1 0x00000006, 0xe80009e7, // mov -, srel(i+1) 0x0000001f, 0xe80009e7, // mov -, sacq(i+9) 0x00000007, 0xe80009e7, // mov -, srel(i+1) 0x000003e8, 0xf0f811a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f409e7, // bra -, ra_link_1 0x00000009, 0xe80009e7, // mov -, srel(i+9) 0x00000011, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f409e7, // bra -, ra_link_1 0x0000000a, 0xe80009e7, // mov -, srel(i+9) 0x00000012, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f409e7, // bra -, ra_link_1 0x0000000b, 0xe80009e7, // mov -, srel(i+9) 0x00000013, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f409e7, // bra -, ra_link_1 0x0000000c, 0xe80009e7, // mov -, srel(i+9) 0x00000014, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f409e7, // bra -, ra_link_1 0x0000000d, 0xe80009e7, // mov -, srel(i+9) 0x00000015, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f409e7, // bra -, ra_link_1 0x0000000e, 0xe80009e7, // mov -, srel(i+9) 0x00000016, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f409e7, // bra -, ra_link_1 0x0000000f, 0xe80009e7, // mov -, srel(i+9) 0x00000017, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x000000cc, 0xe20229e7, // mov.setf -, [0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0] 0x959fa000, 0xd002c8a0, // mov r2, r0; mov.ifnz r0, r0 << 6 0x959fa249, 0xd002c8e1, // mov r3, r1; mov.ifnz r1, r1 << 6 0x00003300, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0] 0x809f6012, 0xd000c9e0, // nop; mov.ifnz r0, r2 >> 6 0x809f601b, 0xd000c9e1, // nop; mov.ifnz r1, r3 >> 6 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0x000000cc, 0xe20229e7, // mov.setf -, [0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0] 0x959fa000, 0xd002c8a0, // mov r2, r0; mov.ifnz r0, r0 << 6 0x959fa249, 0xd002c8e1, // mov r3, r1; mov.ifnz r1, r1 << 6 0x00003300, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0] 0x809f6012, 0xd000c9e0, // nop; mov.ifnz r0, r2 >> 6 0x809f601b, 0xd000c9e1, // nop; mov.ifnz r1, r3 >> 6 0xfffffd40, 0xf0f809e7, // brr -, r:fft_16 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x8c15fdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffcf8, 0xf0f809e7, // brr -, r:fft_16 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0x159c5fc0, 0x10022827, // mov.setf r0, rb_inst 0x0d9c11c0, 0xd0020827, // sub r0, r0, 1 0x119c51c0, 0xd0020827, // shl r0, r0, 5 0x0c9c6e00, 0x100601a7, // add.ifnz ra_sync, rx_sync_slave, r0 0x159c4fc0, 0x10060127, // mov.ifnz ra_save_16, rx_save_slave_16 0x15827d80, 0x100220e7, // mov.setf ra_addr_x, unif 0x15827d80, 0x100210e7, // mov rb_addr_y, unif 0x000005c8, 0xf00809e7, // brr.allz -, r:end 0x95208dbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c3e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d3e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c3a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d3a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c362, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d363, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c322, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d323, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c41c0, 0xd0020827, // shl r0, r0, m 0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num 0x00000000, 0xe00201e7, // mov ra_points, 0 0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y 0x8c15fdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x14727180, 0x10020867, // and r1, r0, mask 0x0e9c11c0, 0xd0020827, // shr r0, r0, shift 0x14727180, 0x10020827, // and r0, r0, mask 0x119c13c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x14767180, 0x10020867, // and r1, r0, mask 0x0e9c21c0, 0xd0020827, // shr r0, r0, shift 0x14767180, 0x10020827, // and r0, r0, mask 0x119c23c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147a7180, 0x10020867, // and r1, r0, mask 0x0e9c41c0, 0xd0020827, // shr r0, r0, shift 0x147a7180, 0x10020827, // and r0, r0, mask 0x119c43c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147e7180, 0x10020867, // and r1, r0, mask 0x0e9c81c0, 0xd0020827, // shr r0, r0, shift 0x147e7180, 0x10020827, // and r0, r0, mask 0x119c83c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x0e9c11c0, 0xd0020827, // shr r0, r0, 13-STAGES 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x000000cc, 0xe20229e7, // mov.setf -, [0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0] 0x959fa000, 0xd002c8a0, // mov r2, r0; mov.ifnz r0, r0 << 6 0x959fa249, 0xd002c8e1, // mov r3, r1; mov.ifnz r1, r1 << 6 0x00003300, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0] 0x809f6012, 0xd000c9e0, // nop; mov.ifnz r0, r2 >> 6 0x809f601b, 0xd000c9e1, // nop; mov.ifnz r1, r3 >> 6 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffc40, 0xf0f80027, // brr ra_link_1, r:pass_1 0x9569aff6, 0x1002469a, // mov ra_vpm, rb_vpm; mov rb_vpm, ra_vpm 0x956dbff6, 0x100246db, // mov ra_vdw, rb_vdw; mov rb_vdw, ra_vdw 0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x80 0x0e1ccdc0, 0xd00229e7, // shr.setf -, ra_points, STAGES 0xfffffc18, 0xf00809e7, // brr.allz -, r:pass_1 0x9569aff6, 0x1002469a, // mov ra_vpm, rb_vpm; mov rb_vpm, ra_vpm 0x956dbff6, 0x100246db, // mov ra_vdw, rb_vdw; mov rb_vdw, ra_vdw 0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x80 0x00000000, 0xf0f4c027, // bra ra_link_1, ra_sync 0x009e7000, 0x100009e7, // nop 0x009e7000, 0xa00009e7, // ldtmu0 0x009e7000, 0xa00009e7, // ldtmu0 0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y 0x95208dbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c3e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d3e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c3a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d3a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c362, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d363, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c322, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d323, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c41c0, 0xd0020827, // shl r0, r0, m 0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num 0x00000000, 0xe00201e7, // mov ra_points, 0 0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y 0x8c15fdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffc90, 0xf0f80027, // brr ra_link_1, r:pass_2 0x9569aff6, 0x1002469a, // mov ra_vpm, rb_vpm; mov rb_vpm, ra_vpm 0x956dbff6, 0x100246db, // mov ra_vdw, rb_vdw; mov rb_vdw, ra_vdw 0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x80 0xfffffc70, 0xf0f80027, // brr ra_link_1, r:pass_2 0x9569aff6, 0x1002469a, // mov ra_vpm, rb_vpm; mov rb_vpm, ra_vpm 0x956dbff6, 0x100246db, // mov ra_vdw, rb_vdw; mov rb_vdw, ra_vdw 0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x80 0x0d01ddc0, 0x10020027, // sub ra_link_1, ra_link_1, rb_0x20 0x953cfdbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3 0x20267016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16 0x209c9017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16 0x209c901f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16 0x2126709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16 0x029e7640, 0x100208e7, // fsub r3, r3, r1 0x023e7c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2 0x029cfec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c3e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d3e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c3a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d3a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c362, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d363, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c322, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d323, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x0e1ccdc0, 0xd00229e7, // shr.setf -, ra_points, STAGES 0xfffffba0, 0xf00809e7, // brr.allz -, r:pass_2 0x9569aff6, 0x1002469a, // mov ra_vpm, rb_vpm; mov rb_vpm, ra_vpm 0x956dbff6, 0x100246db, // mov ra_vdw, rb_vdw; mov rb_vdw, ra_vdw 0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x80 0x00000000, 0xf0f4c027, // bra ra_link_1, ra_sync 0x009e7000, 0x100009e7, // nop 0x009e7000, 0xa00009e7, // ldtmu0 0x009e7000, 0xa00009e7, // ldtmu0 0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y 0x952cbdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c3e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d3e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c3a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d3a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c362, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d363, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c322, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d323, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c41c0, 0xd0020827, // shl r0, r0, m 0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num 0x00000000, 0xe00201e7, // mov ra_points, 0 0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y 0x8c15fdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffaa0, 0xf0f80027, // brr ra_link_1, r:pass_3 0x9569aff6, 0x1002469a, // mov ra_vpm, rb_vpm; mov rb_vpm, ra_vpm 0x956dbff6, 0x100246db, // mov ra_vdw, rb_vdw; mov rb_vdw, ra_vdw 0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x80 0x953cfdbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3 0x202a7016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16 0x209ca017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16 0x209ca01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16 0x212a709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16 0x029e7640, 0x100208e7, // fsub r3, r3, r1 0x023e7c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2 0x029cfec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c3e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d3e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c3a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d3a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c362, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d363, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c322, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d323, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x0e1ccdc0, 0xd00229e7, // shr.setf -, ra_points, STAGES 0xfffff9d8, 0xf00809e7, // brr.allz -, r:pass_3 0x9569aff6, 0x1002469a, // mov ra_vpm, rb_vpm; mov rb_vpm, ra_vpm 0x956dbff6, 0x100246db, // mov ra_vdw, rb_vdw; mov rb_vdw, ra_vdw 0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x80 0x00000000, 0xf0f4c027, // bra ra_link_1, ra_sync 0x009e7000, 0x100009e7, // nop 0x009e7000, 0xa00009e7, // ldtmu0 0x009e7000, 0xa00009e7, // ldtmu0 0xfffffa08, 0xf0f809e7, // brr -, r:loop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x159c3fc0, 0x100209a7, // mov interrupt, flag 0x009e7000, 0x300009e7, // nop; nop; thrend 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop ================================================ FILE: src/hello_fft/hex/shader_512.hex ================================================ 0x00000010, 0xe0021727, // mov rb_0x10, 0x10 0x00000040, 0xe0021767, // mov rb_0x40, 0x40 0x00000080, 0xe00217a7, // mov rb_0x80, 0x80 0x000000f0, 0xe00217e7, // mov rb_0xF0, 0xF0 0x00005555, 0xe0020727, // mov rx_0x5555, 0x5555 0x00003333, 0xe0020767, // mov rx_0x3333, 0x3333 0x00000f0f, 0xe00207a7, // mov rx_0x0F0F, 0x0F0F 0x000000ff, 0xe00207e7, // mov rx_0x00FF, 0x00FF 0x88104000, 0xe00206a7, // mov ra_vdw_16, vdw_setup_0(16, 16, dma_h32( 0,0)) 0x88105000, 0xe0021027, // mov rb_vdw_16, vdw_setup_0(16, 16, dma_h32(32,0)) 0x90104000, 0xe00206e7, // mov ra_vdw_32, vdw_setup_0(32, 16, dma_h32( 0,0)) 0x90105000, 0xe0021067, // mov rb_vdw_32, vdw_setup_0(32, 16, dma_h32(32,0)) 0x15827d80, 0x10020827, // mov r0, addr 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9de1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100202e7, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9de1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020327, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9de1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020367, // mov ra_tw_re+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9de3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100212e7, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9de3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021327, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9de3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021367, // mov rb_tw_im+off+i, r4 0x15827d80, 0x10020827, // mov r0, addr 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9de1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100203a7, // mov ra_tw_re+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9de3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100213a7, // mov rb_tw_im+off+i, r4 0x15827d80, 0x10021167, // mov rb_inst, unif 0x00101200, 0xe0020827, // mov r0, vpm_setup(1, 1, v32( 0,0)) 0x00000010, 0xe0020867, // mov r1, vpm_setup(1, 1, v32(16,0)) - vpm_setup(1, 1, v32(0,0)) 0x00000002, 0xe00208a7, // mov r2, vpm_setup(1, 1, v32( 0,2)) - vpm_setup(1, 1, v32(0,0)) 0x409c5017, 0x100049e2, // nop; mul24 r2, r2, in_inst 0xcc9e7081, 0x10024620, // add out_0, r0, r2; v8adds r0, r0, r1 0xcc9e7081, 0x10024660, // add out_1, r0, r2; v8adds r0, r0, r1 0xcc9e7081, 0x100250a0, // add out_2, r0, r2; v8adds r0, r0, r1 0x0c9e7080, 0x100211e7, // add out_3, r0, r2 0x000000b0, 0xf0f80127, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x15627d80, 0x10021c67, // mov vw_setup, arg 0x159e7000, 0x10020c27, // mov vpm, r0 0x159e7240, 0x10020c27, // mov vpm, r1 0x159f2fc0, 0x100009e7, // mov -, vw_wait 0x00000019, 0xe80009e7, // mov -, sacq(i+9) 0x00000001, 0xe80009e7, // mov -, srel(i+1) 0x0000001a, 0xe80009e7, // mov -, sacq(i+9) 0x00000002, 0xe80009e7, // mov -, srel(i+1) 0x0000001b, 0xe80009e7, // mov -, sacq(i+9) 0x00000003, 0xe80009e7, // mov -, srel(i+1) 0x0000001c, 0xe80009e7, // mov -, sacq(i+9) 0x00000004, 0xe80009e7, // mov -, srel(i+1) 0x0000001d, 0xe80009e7, // mov -, sacq(i+9) 0x00000005, 0xe80009e7, // mov -, srel(i+1) 0x0000001e, 0xe80009e7, // mov -, sacq(i+9) 0x00000006, 0xe80009e7, // mov -, srel(i+1) 0x0000001f, 0xe80009e7, // mov -, sacq(i+9) 0x00000007, 0xe80009e7, // mov -, srel(i+1) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x156a7d80, 0x10021c67, // mov vw_setup, arg_vdw 0xc00000c0, 0xe0021c67, // mov vw_setup, vdw_setup_1(0) + PASS16_STRIDE-16*4 0x8c05ddf6, 0x10024072, // add ra_save_ptr, ra_save_ptr, rb_0x40; mov vw_addr, ra_save_ptr 0x00000038, 0xf0f81127, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x15627d80, 0x10021c67, // mov vw_setup, arg 0x159e7000, 0x10020c27, // mov vpm, r0 0x159e7240, 0x10020c27, // mov vpm, r1 0x00000000, 0xf0f4c9e7, // bra -, ra_sync 0x009e7000, 0x100009e7, // nop 0x15627d80, 0x10020c67, // mov vr_setup, arg_vpm 0x15c27d80, 0x100009e7, // mov -, vpm 0x000000c8, 0xf0f802a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x15627d80, 0x10021c67, // mov vw_setup, ra_vpm_lo 0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0 0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1 0x15667d80, 0x10021c67, // mov vw_setup, ra_vpm_hi 0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0 0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1 0x159f2fc0, 0x100009e7, // mov -, vw_wait 0x00000019, 0xe80009e7, // mov -, sacq(i+9) 0x00000001, 0xe80009e7, // mov -, srel(i+1) 0x0000001a, 0xe80009e7, // mov -, sacq(i+9) 0x00000002, 0xe80009e7, // mov -, srel(i+1) 0x0000001b, 0xe80009e7, // mov -, sacq(i+9) 0x00000003, 0xe80009e7, // mov -, srel(i+1) 0x0000001c, 0xe80009e7, // mov -, sacq(i+9) 0x00000004, 0xe80009e7, // mov -, srel(i+1) 0x0000001d, 0xe80009e7, // mov -, sacq(i+9) 0x00000005, 0xe80009e7, // mov -, srel(i+1) 0x0000001e, 0xe80009e7, // mov -, sacq(i+9) 0x00000006, 0xe80009e7, // mov -, srel(i+1) 0x0000001f, 0xe80009e7, // mov -, sacq(i+9) 0x00000007, 0xe80009e7, // mov -, srel(i+1) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x156e7d80, 0x10021c67, // mov vw_setup, ra_vdw_32 0xc0000040, 0xe0021c67, // mov vw_setup, vdw_setup_1(0) + PASS32_STRIDE-16*4 0x8c05ddf6, 0x10024072, // add ra_save_ptr, ra_save_ptr, rb_0x40; mov vw_addr, ra_save_ptr 0x00000050, 0xf0f812a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x15627d80, 0x10021c67, // mov vw_setup, ra_vpm_lo 0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0 0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1 0x15667d80, 0x10021c67, // mov vw_setup, ra_vpm_hi 0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0 0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1 0x00000000, 0xf0f4c9e7, // bra -, ra_sync 0x009e7000, 0x100009e7, // nop 0x15627d80, 0x10020c67, // mov vr_setup, ra_vpm_lo 0x15c27d80, 0x100009e7, // mov -, vpm 0x00000080, 0xf0f801a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x159f2fc0, 0x100009e7, // mov -, vw_wait 0x00000019, 0xe80009e7, // mov -, sacq(i+9) 0x00000001, 0xe80009e7, // mov -, srel(i+1) 0x0000001a, 0xe80009e7, // mov -, sacq(i+9) 0x00000002, 0xe80009e7, // mov -, srel(i+1) 0x0000001b, 0xe80009e7, // mov -, sacq(i+9) 0x00000003, 0xe80009e7, // mov -, srel(i+1) 0x0000001c, 0xe80009e7, // mov -, sacq(i+9) 0x00000004, 0xe80009e7, // mov -, srel(i+1) 0x0000001d, 0xe80009e7, // mov -, sacq(i+9) 0x00000005, 0xe80009e7, // mov -, srel(i+1) 0x0000001e, 0xe80009e7, // mov -, sacq(i+9) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x00000006, 0xe80009e7, // mov -, srel(i+1) 0x0000001f, 0xe80009e7, // mov -, sacq(i+9) 0x00000007, 0xe80009e7, // mov -, srel(i+1) 0x00000510, 0xf0f811a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x00000009, 0xe80009e7, // mov -, srel(i+9) 0x00000011, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000a, 0xe80009e7, // mov -, srel(i+9) 0x00000012, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000b, 0xe80009e7, // mov -, srel(i+9) 0x00000013, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000c, 0xe80009e7, // mov -, srel(i+9) 0x00000014, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000d, 0xe80009e7, // mov -, srel(i+9) 0x00000015, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000e, 0xe80009e7, // mov -, srel(i+9) 0x00000016, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000f, 0xe80009e7, // mov -, srel(i+9) 0x00000017, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0xfffffda0, 0xf0f80027, // brr ra_link_0, call 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1 0x8c15cdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x14727180, 0x10020867, // and r1, r0, mask 0x0e9c11c0, 0xd0020827, // shr r0, r0, shift 0x14727180, 0x10020827, // and r0, r0, mask 0x119c13c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x14767180, 0x10020867, // and r1, r0, mask 0x0e9c21c0, 0xd0020827, // shr r0, r0, shift 0x14767180, 0x10020827, // and r0, r0, mask 0x119c23c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147a7180, 0x10020867, // and r1, r0, mask 0x0e9c41c0, 0xd0020827, // shr r0, r0, shift 0x147a7180, 0x10020827, // and r0, r0, mask 0x119c43c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147e7180, 0x10020867, // and r1, r0, mask 0x0e9c81c0, 0xd0020827, // shr r0, r0, shift 0x147e7180, 0x10020827, // and r0, r0, mask 0x119c83c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x0e9c41c0, 0xd0020827, // shr r0, r0, 13-STAGES 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0xfffffc80, 0xf0f80027, // brr ra_link_0, call 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x203e7006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE 0x209cf00f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE 0x203e700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE 0x2208fc87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE 0x019e72c0, 0x10020867, // fadd r1, r1, r3 0x00000000, 0xf0f549e7, // bra -, ra_save_32 0x95602ff6, 0x10024602, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo 0x95647ff6, 0x10024647, // mov ra_vpm_hi, rb_vpm_hi; mov rb_vpm_hi, ra_vpm_hi 0x956c1ff6, 0x100246c1, // mov ra_vdw_32, rb_vdw_32; mov rb_vdw_32, ra_vdw_32 0x8c15edf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffbf0, 0xf0f80027, // brr ra_link_0, call 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0x00000000, 0xf0f489e7, // bra -, ra_save_16 0x009e7000, 0x100009e7, // nop 0x95602ff6, 0x10024602, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo 0x95680ff6, 0x10024680, // mov ra_vdw_16, rb_vdw_16; mov rb_vdw_16, ra_vdw_16 0x159c5fc0, 0x10022827, // mov.setf r0, rb_inst 0x0d9c11c0, 0xd0020827, // sub r0, r0, 1 0x119c51c0, 0xd0020827, // shl r0, r0, 5 0x0c9c6e00, 0x100601a7, // add.ifnz ra_sync, rx_sync_slave, r0 0x159c4fc0, 0x10060127, // mov.ifnz ra_save_16, rx_save_slave_16 0x159cafc0, 0x100602a7, // mov.ifnz ra_save_32, rx_save_slave_32 0x15827d80, 0x100220e7, // mov.setf ra_addr_x, unif 0x15827d80, 0x100210e7, // mov rb_addr_y, unif 0x000003a8, 0xf00809e7, // brr.allz -, r:end 0x952cbdbf, 0x100243cf, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32 0x9530cdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c462, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d463, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c422, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d423, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c51c0, 0xd0020827, // shl r0, r0, m 0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num 0x00000000, 0xe00201e7, // mov ra_points, 0 0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y 0x8c15cdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x14727180, 0x10020867, // and r1, r0, mask 0x0e9c11c0, 0xd0020827, // shr r0, r0, shift 0x14727180, 0x10020827, // and r0, r0, mask 0x119c13c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x14767180, 0x10020867, // and r1, r0, mask 0x0e9c21c0, 0xd0020827, // shr r0, r0, shift 0x14767180, 0x10020827, // and r0, r0, mask 0x119c23c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147a7180, 0x10020867, // and r1, r0, mask 0x0e9c41c0, 0xd0020827, // shr r0, r0, shift 0x147a7180, 0x10020827, // and r0, r0, mask 0x119c43c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147e7180, 0x10020867, // and r1, r0, mask 0x0e9c81c0, 0xd0020827, // shr r0, r0, shift 0x147e7180, 0x10020827, // and r0, r0, mask 0x119c83c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x0e9c41c0, 0xd0020827, // shr r0, r0, 13-STAGES 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffb38, 0xf0f80227, // brr ra_link_1, r:pass_1 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0xfffffb18, 0xf0f80227, // brr ra_link_1, r:pass_1 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync 0x009e7000, 0x100009e7, // nop 0x009e7000, 0xa00009e7, // ldtmu0 0x009e7000, 0xa00009e7, // ldtmu0 0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y 0x9538edbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c462, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d463, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c422, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d423, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c41c0, 0xd0020827, // shl r0, r0, m 0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num 0x00000000, 0xe00201e7, // mov ra_points, 0 0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y 0x8c15edf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffc98, 0xf0f80227, // brr ra_link_1, r:pass_2 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dedc0, 0x100201e7, // add ra_points, ra_points, rb_0x80 0x954d3dbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3 0x20367016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16 0x209cd017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16 0x209cd01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16 0x2136709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16 0x029e7640, 0x100208e7, // fsub r3, r3, r1 0x024e7c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2 0x029d3ec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c462, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d463, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c422, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d423, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x0e1c9dc0, 0xd00229e7, // shr.setf -, ra_points, STAGES 0xfffffbd0, 0xf00809e7, // brr.allz -, r:pass_2 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dedc0, 0x100201e7, // add ra_points, ra_points, rb_0x80 0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync 0x009e7000, 0x100009e7, // nop 0x009e7000, 0xa00009e7, // ldtmu0 0x009e7000, 0xa00009e7, // ldtmu0 0xfffffc28, 0xf0f809e7, // brr -, r:loop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x159c3fc0, 0x100209a7, // mov interrupt, flag 0x009e7000, 0x300009e7, // nop; nop; thrend 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop ================================================ FILE: src/hello_fft/hex/shader_512k.hex ================================================ 0x00000013, 0xe0021227, // mov rb_STAGES, STAGES 0x00000010, 0xe00216e7, // mov rb_0x10, 0x10 0x00000040, 0xe0021727, // mov rb_0x40, 0x40 0x00000080, 0xe0021767, // mov rb_0x80, 0x80 0x000000f0, 0xe00217a7, // mov rb_0xF0, 0xF0 0x00000100, 0xe00217e7, // mov rb_0x100, 0x100 0x55555555, 0xe0020767, // mov rx_0x55555555, 0x55555555 0x33333333, 0xe00207a7, // mov rx_0x33333333, 0x33333333 0x0f0f0f0f, 0xe00207e7, // mov rx_0x0F0F0F0F, 0x0F0F0F0F 0x00ff00ff, 0xe0021667, // mov rx_0x00FF00FF, 0x00FF00FF 0x0000ffff, 0xe00216a7, // mov rx_0x0000FFFF, 0x0000FFFF 0x80904000, 0xe00206e7, // mov ra_vdw_16, vdw_setup_0(1, 16, dma_h32( 0,0)) 0x80905000, 0xe0021027, // mov rb_vdw_16, vdw_setup_0(1, 16, dma_h32(32,0)) 0x80904000, 0xe0020727, // mov ra_vdw_32, vdw_setup_0(1, 16, dma_h32( 0,0)) 0x80905000, 0xe0021067, // mov rb_vdw_32, vdw_setup_0(1, 16, dma_h32(32,0)) 0x15827d80, 0x10020827, // mov r0, addr 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100202e7, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020327, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020367, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100203a7, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100203e7, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020427, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020467, // mov ra_tw_re+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100212e7, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021327, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021367, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100213a7, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100213e7, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021427, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021467, // mov rb_tw_im+off+i, r4 0x15827d80, 0x10020827, // mov r0, addr 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100204a7, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100204e7, // mov ra_tw_re+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100214a7, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100214e7, // mov rb_tw_im+off+i, r4 0x15827d80, 0x10021167, // mov rb_inst, unif 0x00101200, 0xe0020827, // mov r0, vpm_setup(1, 1, v32( 0,0)) 0x00000010, 0xe0020867, // mov r1, vpm_setup(1, 1, v32(16,0)) - vpm_setup(1, 1, v32(0,0)) 0x00000002, 0xe00208a7, // mov r2, vpm_setup(1, 1, v32( 0,2)) - vpm_setup(1, 1, v32(0,0)) 0x409c5017, 0x100049e2, // nop; mul24 r2, r2, in_inst 0xcc9e7081, 0x10024660, // add out_0, r0, r2; v8adds r0, r0, r1 0xcc9e7081, 0x100246a0, // add out_1, r0, r2; v8adds r0, r0, r1 0xcc9e7081, 0x100250a0, // add out_2, r0, r2; v8adds r0, r0, r1 0x0c9e7080, 0x100211e7, // add out_3, r0, r2 0x000001d0, 0xf0f80127, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x15667d80, 0x10021c67, // mov vw_setup, arg 0x159e7000, 0x10020c27, // mov vpm, r0 0x159e7240, 0x10020c27, // mov vpm, r1 0x159f2fc0, 0x100009e7, // mov -, vw_wait 0x00000019, 0xe80009e7, // mov -, sacq(i+9) 0x00000001, 0xe80009e7, // mov -, srel(i+1) 0x0000001a, 0xe80009e7, // mov -, sacq(i+9) 0x00000002, 0xe80009e7, // mov -, srel(i+1) 0x0000001b, 0xe80009e7, // mov -, sacq(i+9) 0x00000003, 0xe80009e7, // mov -, srel(i+1) 0x0000001c, 0xe80009e7, // mov -, sacq(i+9) 0x00000004, 0xe80009e7, // mov -, srel(i+1) 0x0000001d, 0xe80009e7, // mov -, sacq(i+9) 0x00000005, 0xe80009e7, // mov -, srel(i+1) 0x0000001e, 0xe80009e7, // mov -, sacq(i+9) 0x00000006, 0xe80009e7, // mov -, srel(i+1) 0x0000001f, 0xe80009e7, // mov -, sacq(i+9) 0x00000007, 0xe80009e7, // mov -, srel(i+1) 0x156e7d80, 0x10020827, // mov r0, arg_vdw 0x8c05cdf6, 0x10024061, // add ra_save_ptr, ra_save_ptr, rb_0x40; mov r1, ra_save_ptr 0x00000080, 0xe00208a7, // mov r2, vdw_setup_0(1, 16, dma_h32(1,0)) - vdw_setup_0(1, 16, dma_h32(0,0)) 0x00040000, 0xe00208e7, // mov r3, PASS16_STRIDE 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x00000038, 0xf0f81127, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x15667d80, 0x10021c67, // mov vw_setup, arg 0x159e7000, 0x10020c27, // mov vpm, r0 0x159e7240, 0x10020c27, // mov vpm, r1 0x00000000, 0xf0f4c9e7, // bra -, ra_sync 0x009e7000, 0x100009e7, // nop 0x15667d80, 0x10020c67, // mov vr_setup, arg_vpm 0x15c27d80, 0x100009e7, // mov -, vpm 0x000002e8, 0xf0f802a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x15667d80, 0x10021c67, // mov vw_setup, ra_vpm_lo 0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0 0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1 0x156a7d80, 0x10021c67, // mov vw_setup, ra_vpm_hi 0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0 0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1 0x159f2fc0, 0x100009e7, // mov -, vw_wait 0x00000019, 0xe80009e7, // mov -, sacq(i+9) 0x00000001, 0xe80009e7, // mov -, srel(i+1) 0x0000001a, 0xe80009e7, // mov -, sacq(i+9) 0x00000002, 0xe80009e7, // mov -, srel(i+1) 0x0000001b, 0xe80009e7, // mov -, sacq(i+9) 0x00000003, 0xe80009e7, // mov -, srel(i+1) 0x0000001c, 0xe80009e7, // mov -, sacq(i+9) 0x00000004, 0xe80009e7, // mov -, srel(i+1) 0x0000001d, 0xe80009e7, // mov -, sacq(i+9) 0x00000005, 0xe80009e7, // mov -, srel(i+1) 0x0000001e, 0xe80009e7, // mov -, sacq(i+9) 0x00000006, 0xe80009e7, // mov -, srel(i+1) 0x0000001f, 0xe80009e7, // mov -, sacq(i+9) 0x00000007, 0xe80009e7, // mov -, srel(i+1) 0x15727d80, 0x10020827, // mov r0, ra_vdw_32 0x8c05cdf6, 0x10024061, // add ra_save_ptr, ra_save_ptr, rb_0x40; mov r1, ra_save_ptr 0x00000080, 0xe00208a7, // mov r2, vdw_setup_0(1, 16, dma_h32(1,0)) - vdw_setup_0(1, 16, dma_h32(0,0)) 0x00020000, 0xe00208e7, // mov r3, PASS32_STRIDE 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0 0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x00000050, 0xf0f812a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x15667d80, 0x10021c67, // mov vw_setup, ra_vpm_lo 0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0 0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1 0x156a7d80, 0x10021c67, // mov vw_setup, ra_vpm_hi 0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0 0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1 0x00000000, 0xf0f4c9e7, // bra -, ra_sync 0x009e7000, 0x100009e7, // nop 0x15667d80, 0x10020c67, // mov vr_setup, ra_vpm_lo 0x15c27d80, 0x100009e7, // mov -, vpm 0x00000080, 0xf0f801a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x159f2fc0, 0x100009e7, // mov -, vw_wait 0x00000019, 0xe80009e7, // mov -, sacq(i+9) 0x00000001, 0xe80009e7, // mov -, srel(i+1) 0x0000001a, 0xe80009e7, // mov -, sacq(i+9) 0x00000002, 0xe80009e7, // mov -, srel(i+1) 0x0000001b, 0xe80009e7, // mov -, sacq(i+9) 0x00000003, 0xe80009e7, // mov -, srel(i+1) 0x0000001c, 0xe80009e7, // mov -, sacq(i+9) 0x00000004, 0xe80009e7, // mov -, srel(i+1) 0x0000001d, 0xe80009e7, // mov -, sacq(i+9) 0x00000005, 0xe80009e7, // mov -, srel(i+1) 0x0000001e, 0xe80009e7, // mov -, sacq(i+9) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x00000006, 0xe80009e7, // mov -, srel(i+1) 0x0000001f, 0xe80009e7, // mov -, sacq(i+9) 0x00000007, 0xe80009e7, // mov -, srel(i+1) 0x00000640, 0xf0f811a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x00000009, 0xe80009e7, // mov -, srel(i+9) 0x00000011, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000a, 0xe80009e7, // mov -, srel(i+9) 0x00000012, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000b, 0xe80009e7, // mov -, srel(i+9) 0x00000013, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000c, 0xe80009e7, // mov -, srel(i+9) 0x00000014, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000d, 0xe80009e7, // mov -, srel(i+9) 0x00000015, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000e, 0xe80009e7, // mov -, srel(i+9) 0x00000016, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000f, 0xe80009e7, // mov -, srel(i+9) 0x00000017, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0xfffffd78, 0xf0f80027, // brr ra_link_0, call 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1 0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x14767180, 0x10020867, // and r1, r0, mask 0x0e9c11c0, 0xd0020827, // shr r0, r0, shift 0x14767180, 0x10020827, // and r0, r0, mask 0x119c13c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147a7180, 0x10020867, // and r1, r0, mask 0x0e9c21c0, 0xd0020827, // shr r0, r0, shift 0x147a7180, 0x10020827, // and r0, r0, mask 0x119c23c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147e7180, 0x10020867, // and r1, r0, mask 0x0e9c41c0, 0xd0020827, // shr r0, r0, shift 0x147e7180, 0x10020827, // and r0, r0, mask 0x119c43c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x149d91c0, 0x10020867, // and r1, r0, mask 0x0e9c81c0, 0xd0020827, // shr r0, r0, shift 0x149d91c0, 0x10020827, // and r0, r0, mask 0x119c83c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x149da1c0, 0x10020867, // and r1, r0, mask 0x0e9db1c0, 0x10020827, // shr r0, r0, shift 0x149da1c0, 0x10020827, // and r0, r0, mask 0x119db3c0, 0x10020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x0e9ca1c0, 0xd0020827, // shr r0, r0, 32-STAGES-3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0xfffffc30, 0xf0f80027, // brr ra_link_0, call 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x20527006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE 0x209d400f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE 0x2052700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE 0x22094c87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE 0x019e72c0, 0x10020867, // fadd r1, r1, r3 0x00000000, 0xf0f549e7, // bra -, ra_save_32 0x95642ff6, 0x10024642, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo 0x95687ff6, 0x10024687, // mov ra_vpm_hi, rb_vpm_hi; mov rb_vpm_hi, ra_vpm_hi 0x95701ff6, 0x10024701, // mov ra_vdw_32, rb_vdw_32; mov rb_vdw_32, ra_vdw_32 0x8c15ddf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffba0, 0xf0f80027, // brr ra_link_0, call 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0x00000000, 0xf0f489e7, // bra -, ra_save_16 0x009e7000, 0x100009e7, // nop 0x95642ff6, 0x10024642, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo 0x956c0ff6, 0x100246c0, // mov ra_vdw_16, rb_vdw_16; mov rb_vdw_16, ra_vdw_16 0x8c15edf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffb38, 0xf0f80027, // brr ra_link_0, call 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1 0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffae8, 0xf0f80027, // brr ra_link_0, call 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0x20527006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE 0x209d400f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE 0x2052700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE 0x22094c87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE 0x019e72c0, 0x10020867, // fadd r1, r1, r3 0x00000000, 0xf0f549e7, // bra -, ra_save_32 0x95642ff6, 0x10024642, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo 0x95687ff6, 0x10024687, // mov ra_vpm_hi, rb_vpm_hi; mov rb_vpm_hi, ra_vpm_hi 0x95701ff6, 0x10024701, // mov ra_vdw_32, rb_vdw_32; mov rb_vdw_32, ra_vdw_32 0x159c5fc0, 0x10022827, // mov.setf r0, rb_inst 0x0d9c11c0, 0xd0020827, // sub r0, r0, 1 0x119c51c0, 0xd0020827, // shl r0, r0, 5 0x0c9c6e00, 0x100601a7, // add.ifnz ra_sync, rx_sync_slave, r0 0x159c4fc0, 0x10060127, // mov.ifnz ra_save_16, rx_save_slave_16 0x159cafc0, 0x100602a7, // mov.ifnz ra_save_32, rx_save_slave_32 0x15827d80, 0x100220e7, // mov.setf ra_addr_x, unif 0x15827d80, 0x100210e7, // mov rb_addr_y, unif 0x00000888, 0xf00809e7, // brr.allz -, r:end 0x952cbdbf, 0x10024514, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32 0x9530cdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c622, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d623, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c5e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d5e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c51c0, 0xd0020827, // shl r0, r0, m 0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num 0x00000000, 0xe00201e7, // mov ra_points, 0 0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y 0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x14767180, 0x10020867, // and r1, r0, mask 0x0e9c11c0, 0xd0020827, // shr r0, r0, shift 0x14767180, 0x10020827, // and r0, r0, mask 0x119c13c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147a7180, 0x10020867, // and r1, r0, mask 0x0e9c21c0, 0xd0020827, // shr r0, r0, shift 0x147a7180, 0x10020827, // and r0, r0, mask 0x119c23c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147e7180, 0x10020867, // and r1, r0, mask 0x0e9c41c0, 0xd0020827, // shr r0, r0, shift 0x147e7180, 0x10020827, // and r0, r0, mask 0x119c43c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x149d91c0, 0x10020867, // and r1, r0, mask 0x0e9c81c0, 0xd0020827, // shr r0, r0, shift 0x149d91c0, 0x10020827, // and r0, r0, mask 0x119c83c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x149da1c0, 0x10020867, // and r1, r0, mask 0x0e9db1c0, 0x10020827, // shr r0, r0, shift 0x149da1c0, 0x10020827, // and r0, r0, mask 0x119db3c0, 0x10020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x0e9ca1c0, 0xd0020827, // shr r0, r0, 32-STAGES-3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffff9e0, 0xf0f80227, // brr ra_link_1, r:pass_1 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100 0x0e1c8dc0, 0x100229e7, // shr.setf -, ra_points, rb_STAGES 0xfffff9b8, 0xf00809e7, // brr.allz -, r:pass_1 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100 0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync 0x009e7000, 0x100009e7, // nop 0x009e7000, 0xa00009e7, // ldtmu0 0x009e7000, 0xa00009e7, // ldtmu0 0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y 0x9530cdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c622, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d623, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c5e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d5e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c41c0, 0xd0020827, // shl r0, r0, m 0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num 0x00000000, 0xe00201e7, // mov ra_points, 0 0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y 0x8c15ddf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffb88, 0xf0f80227, // brr ra_link_1, r:pass_2 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dddc0, 0x100201e7, // add ra_points, ra_points, rb_0x80 0x00003fff, 0xe0020827, // mov r0, 0x3FFF 0x141e7c00, 0x100229e7, // and.setf -, ra_points, r0 0xfffffb58, 0xf01809e7, // brr.allnz -, r:pass_2 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dddc0, 0x100601e7, // add.ifnz ra_points, ra_points, rb_0x80 0x95618dbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3 0x20367016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16 0x209cd017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16 0x209cd01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16 0x2136709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16 0x029e7640, 0x100208e7, // fsub r3, r3, r1 0x02627c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2 0x029d8ec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c622, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d623, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c5e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d5e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x0e1c8dc0, 0x100229e7, // shr.setf -, ra_points, rb_STAGES 0xfffffa90, 0xf00809e7, // brr.allz -, r:pass_2 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dddc0, 0x100201e7, // add ra_points, ra_points, rb_0x80 0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync 0x009e7000, 0x100009e7, // nop 0x009e7000, 0xa00009e7, // ldtmu0 0x009e7000, 0xa00009e7, // ldtmu0 0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y 0x952cbdbf, 0x10024514, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32 0x9530cdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c622, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d623, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c5e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d5e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c51c0, 0xd0020827, // shl r0, r0, m 0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num 0x00000000, 0xe00201e7, // mov ra_points, 0 0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y 0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffff9f0, 0xf0f80227, // brr ra_link_1, r:pass_3 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100 0xfffff9d0, 0xf0f80227, // brr ra_link_1, r:pass_3 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100 0xfffff9b0, 0xf0f80227, // brr ra_link_1, r:pass_3 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100 0xfffff990, 0xf0f80227, // brr ra_link_1, r:pass_3 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100 0x95514dbf, 0x100248a3, // mov r2, ra_tw_re+TW32_ACTIVE; mov r3, rb_tw_im+TW32_ACTIVE 0x203a7016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw32 0x209ce017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw32 0x209ce01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw32 0x213a709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw32 0x029e7640, 0x100208e7, // fsub r3, r3, r1 0x02527c80, 0x10020527, // fsub ra_tw_re+TW32_ACTIVE, ra_tw_re+TW32_ACTIVE, r2 0x029d4ec0, 0x10021527, // fsub rb_tw_im+TW32_ACTIVE, rb_tw_im+TW32_ACTIVE, r3 0x95618dbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3 0x203e7016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16 0x209cf017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16 0x209cf01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16 0x213e709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16 0x029e7640, 0x100208e7, // fsub r3, r3, r1 0x02627c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2 0x029d8ec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c622, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d623, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c5e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d5e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x0e1c8dc0, 0x100229e7, // shr.setf -, ra_points, rb_STAGES 0xfffff888, 0xf00809e7, // brr.allz -, r:pass_3 0x00000060, 0xe0020827, // mov r0, 3*4*8 0x0d227c00, 0x10020227, // sub ra_link_1, ra_link_1, r0 0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100 0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync 0x009e7000, 0x100009e7, // nop 0x009e7000, 0xa00009e7, // ldtmu0 0x009e7000, 0xa00009e7, // ldtmu0 0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y 0x95492dbf, 0x10024514, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32 0x954d3dbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c622, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d623, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c5e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d5e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c51c0, 0xd0020827, // shl r0, r0, m 0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num 0x00000000, 0xe00201e7, // mov ra_points, 0 0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y 0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffff780, 0xf0f80227, // brr ra_link_1, r:pass_4 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100 0x95514dbf, 0x100248a3, // mov r2, ra_tw_re+TW32_ACTIVE; mov r3, rb_tw_im+TW32_ACTIVE 0x20427016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw32 0x209d0017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw32 0x209d001f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw32 0x2142709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw32 0x029e7640, 0x100208e7, // fsub r3, r3, r1 0x02527c80, 0x10020527, // fsub ra_tw_re+TW32_ACTIVE, ra_tw_re+TW32_ACTIVE, r2 0x029d4ec0, 0x10021527, // fsub rb_tw_im+TW32_ACTIVE, rb_tw_im+TW32_ACTIVE, r3 0x95618dbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3 0x20467016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16 0x209d1017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16 0x209d101f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16 0x2146709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16 0x029e7640, 0x100208e7, // fsub r3, r3, r1 0x02627c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2 0x029d8ec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c622, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d623, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c5e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d5e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x0e1c8dc0, 0x100229e7, // shr.setf -, ra_points, rb_STAGES 0xfffff678, 0xf00809e7, // brr.allz -, r:pass_4 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100 0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync 0x009e7000, 0x100009e7, // nop 0x009e7000, 0xa00009e7, // ldtmu0 0x009e7000, 0xa00009e7, // ldtmu0 0xfffff748, 0xf0f809e7, // brr -, r:loop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x159c3fc0, 0x100209a7, // mov interrupt, flag 0x009e7000, 0x300009e7, // nop; nop; thrend 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop ================================================ FILE: src/hello_fft/hex/shader_64k.hex ================================================ 0x00000010, 0xe0021227, // mov rb_0x10, 0x10 0x000001d0, 0xe0021967, // mov r5rep, 0x1D0 0x00005555, 0xe00207a7, // mov rx_0x5555, 0x5555 0x00003333, 0xe00217a7, // mov rx_0x3333, 0x3333 0x00000f0f, 0xe00207e7, // mov rx_0x0F0F, 0x0F0F 0x000000ff, 0xe00217e7, // mov rx_0x00FF, 0x00FF 0x00000080, 0xe00208e7, // mov r3, 0x80 0x15827d80, 0x10020827, // mov r0, addr 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100203e7, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020427, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020467, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100204a7, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100204e7, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020527, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020567, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100205a7, // mov ra_tw_re+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100213e7, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021427, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021467, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100214a7, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100214e7, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021527, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021567, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100215a7, // mov rb_tw_im+off+i, r4 0x15827d80, 0x10020827, // mov r0, addr 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100205e7, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020627, // mov ra_tw_re+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100215e7, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021627, // mov rb_tw_im+off+i, r4 0x15827d80, 0x10021167, // mov rb_inst, unif 0x00101200, 0xe0020827, // mov r0, vpm_setup(1, 1, v32( 0,0)) 0x00000010, 0xe0020867, // mov r1, vpm_setup(1, 1, v32(16,0)) - vpm_setup(1, 1, v32(0,0)) 0x00000002, 0xe00208a7, // mov r2, vpm_setup(1, 1, v32( 0,2)) - vpm_setup(1, 1, v32(0,0)) 0x409c5017, 0x100049e2, // nop; mul24 r2, r2, in_inst 0xcc9e7081, 0x10025020, // add out_0, r0, r2; v8adds r0, r0, r1 0xcc9e7081, 0x10025060, // add out_1, r0, r2; v8adds r0, r0, r1 0xcc9e7081, 0x100250a0, // add out_2, r0, r2; v8adds r0, r0, r1 0x0c9e7080, 0x100211e7, // add out_3, r0, r2 0x000000c8, 0xf0f80127, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x152e7d80, 0x10021c67, // mov vw_setup, ra_vpm_lo 0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0 0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1 0x15327d80, 0x10021c67, // mov vw_setup, ra_vpm_hi 0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0 0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1 0x159f2fc0, 0x100009e7, // mov -, vw_wait 0x00000019, 0xe80009e7, // mov -, sacq(i+9) 0x00000001, 0xe80009e7, // mov -, srel(i+1) 0x0000001a, 0xe80009e7, // mov -, sacq(i+9) 0x00000002, 0xe80009e7, // mov -, srel(i+1) 0x0000001b, 0xe80009e7, // mov -, sacq(i+9) 0x00000003, 0xe80009e7, // mov -, srel(i+1) 0x0000001c, 0xe80009e7, // mov -, sacq(i+9) 0x00000004, 0xe80009e7, // mov -, srel(i+1) 0x0000001d, 0xe80009e7, // mov -, sacq(i+9) 0x00000005, 0xe80009e7, // mov -, srel(i+1) 0x0000001e, 0xe80009e7, // mov -, sacq(i+9) 0x00000006, 0xe80009e7, // mov -, srel(i+1) 0x0000001f, 0xe80009e7, // mov -, sacq(i+9) 0x00000007, 0xe80009e7, // mov -, srel(i+1) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x153a7d80, 0x10021c67, // mov vw_setup, ra_vdw_32 0xc0003fc0, 0xe0021c67, // mov vw_setup, vdw_setup_1(0) + PASS32_STRIDE-16*4 0x8c04ddf6, 0x10024072, // add ra_save_ptr, ra_save_ptr, rb_0x40; mov vw_addr, ra_save_ptr 0x00000050, 0xf0f81127, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x152e7d80, 0x10021c67, // mov vw_setup, ra_vpm_lo 0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0 0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1 0x15327d80, 0x10021c67, // mov vw_setup, ra_vpm_hi 0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0 0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1 0x00000000, 0xf0f4c9e7, // bra -, ra_sync 0x009e7000, 0x100009e7, // nop 0x152e7d80, 0x10020c67, // mov vr_setup, ra_vpm_lo 0x15c27d80, 0x100009e7, // mov -, vpm 0x00000100, 0xf0f802a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x00000040, 0xe0020827, // mov r0, 0x40 0x159f2fc0, 0x100009e7, // mov -, vw_wait 0x00000001, 0xe80009e7, // mov -, srel(i+1) 0x00000002, 0xe80009e7, // mov -, srel(i+1) 0x00000003, 0xe80009e7, // mov -, srel(i+1) 0x00000004, 0xe80009e7, // mov -, srel(i+1) 0x00000005, 0xe80009e7, // mov -, srel(i+1) 0x00000006, 0xe80009e7, // mov -, srel(i+1) 0x00000007, 0xe80009e7, // mov -, srel(i+1) 0x159c0fc0, 0x10021c67, // mov vw_setup, rb_vpm 0x012cbdc0, 0x10020c27, // fadd vpm, ra_64+0, rb_64+0 0x0130cdc0, 0x10020c27, // fadd vpm, ra_64+1, rb_64+1 0x159c1fc0, 0x10021c67, // mov vw_setup, rb_vpm_16 0x0134ddc0, 0x10020c27, // fadd vpm, ra_64+2, rb_64+2 0x0138edc0, 0x10020c27, // fadd vpm, ra_64+3, rb_64+3 0x159c2fc0, 0x10021c67, // mov vw_setup, rb_vpm_32 0x022cbdc0, 0x10020c27, // fsub vpm, ra_64+0, rb_64+0 0x0230cdc0, 0x10020c27, // fsub vpm, ra_64+1, rb_64+1 0x159c7fc0, 0x10021c67, // mov vw_setup, rb_vpm_48 0x0234ddc0, 0x10020c27, // fsub vpm, ra_64+2, rb_64+2 0x0238edc0, 0x10020c27, // fsub vpm, ra_64+3, rb_64+3 0x00000019, 0xe80009e7, // mov -, sacq(i+9) 0x0000001a, 0xe80009e7, // mov -, sacq(i+9) 0x0000001b, 0xe80009e7, // mov -, sacq(i+9) 0x0000001c, 0xe80009e7, // mov -, sacq(i+9) 0x0000001d, 0xe80009e7, // mov -, sacq(i+9) 0x0000001e, 0xe80009e7, // mov -, sacq(i+9) 0x0000001f, 0xe80009e7, // mov -, sacq(i+9) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0xa0104000, 0xe0021c67, // mov vw_setup, vdw_setup_0(64, 16, dma_h32(0,0)) 0xc0001fc0, 0xe0021c67, // mov vw_setup, vdw_setup_1(PASS64_STRIDE-16*4) 0x8c067c36, 0x10024072, // add ra_save_ptr, ra_save_ptr, step; mov vw_addr, ra_save_ptr 0x000002b8, 0xf0f812a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c51c0, 0xd00200a7, // shl ra_temp, r0, 5 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0fc49e7, // brr -, ra_temp 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x000000e0, 0xf0f809e7, // brr -, r:2f 0x00000010, 0xe80009e7, // mov -, sacq(i) 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x000000c0, 0xf0f809e7, // brr -, r:2f 0x00000011, 0xe80009e7, // mov -, sacq(i) 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x000000a0, 0xf0f809e7, // brr -, r:2f 0x00000012, 0xe80009e7, // mov -, sacq(i) 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x00000080, 0xf0f809e7, // brr -, r:2f 0x00000013, 0xe80009e7, // mov -, sacq(i) 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x00000060, 0xf0f809e7, // brr -, r:2f 0x00000014, 0xe80009e7, // mov -, sacq(i) 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x00000040, 0xf0f809e7, // brr -, r:2f 0x00000015, 0xe80009e7, // mov -, sacq(i) 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x00000020, 0xf0f809e7, // brr -, r:2f 0x00000016, 0xe80009e7, // mov -, sacq(i) 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f809e7, // brr -, r:2f 0x00000017, 0xe80009e7, // mov -, sacq(i) 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x159c0fc0, 0x10021c67, // mov vw_setup, rb_vpm 0x012cbdc0, 0x10020c27, // fadd vpm, ra_64+0, rb_64+0 0x0130cdc0, 0x10020c27, // fadd vpm, ra_64+1, rb_64+1 0x159c1fc0, 0x10021c67, // mov vw_setup, rb_vpm_16 0x0134ddc0, 0x10020c27, // fadd vpm, ra_64+2, rb_64+2 0x0138edc0, 0x10020c27, // fadd vpm, ra_64+3, rb_64+3 0x159c2fc0, 0x10021c67, // mov vw_setup, rb_vpm_32 0x022cbdc0, 0x10020c27, // fsub vpm, ra_64+0, rb_64+0 0x0230cdc0, 0x10020c27, // fsub vpm, ra_64+1, rb_64+1 0x159c7fc0, 0x10021c67, // mov vw_setup, rb_vpm_48 0x0234ddc0, 0x10020c27, // fsub vpm, ra_64+2, rb_64+2 0x0238edc0, 0x10020c27, // fsub vpm, ra_64+3, rb_64+3 0x00000000, 0xf0fc49e7, // brr -, ra_temp 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm 0x15c27d80, 0x100009e7, // mov -, vpm 0x00000008, 0xe80009e7, // mov -, srel(i+8) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm 0x15c27d80, 0x100009e7, // mov -, vpm 0x00000009, 0xe80009e7, // mov -, srel(i+8) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm 0x15c27d80, 0x100009e7, // mov -, vpm 0x0000000a, 0xe80009e7, // mov -, srel(i+8) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm 0x15c27d80, 0x100009e7, // mov -, vpm 0x0000000b, 0xe80009e7, // mov -, srel(i+8) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm 0x15c27d80, 0x100009e7, // mov -, vpm 0x0000000c, 0xe80009e7, // mov -, srel(i+8) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm 0x15c27d80, 0x100009e7, // mov -, vpm 0x0000000d, 0xe80009e7, // mov -, srel(i+8) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm 0x15c27d80, 0x100009e7, // mov -, vpm 0x0000000e, 0xe80009e7, // mov -, srel(i+8) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm 0x15c27d80, 0x100009e7, // mov -, vpm 0x0000000f, 0xe80009e7, // mov -, srel(i+8) 0x00000080, 0xf0f801a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x159f2fc0, 0x100009e7, // mov -, vw_wait 0x00000019, 0xe80009e7, // mov -, sacq(i+9) 0x00000001, 0xe80009e7, // mov -, srel(i+1) 0x0000001a, 0xe80009e7, // mov -, sacq(i+9) 0x00000002, 0xe80009e7, // mov -, srel(i+1) 0x0000001b, 0xe80009e7, // mov -, sacq(i+9) 0x00000003, 0xe80009e7, // mov -, srel(i+1) 0x0000001c, 0xe80009e7, // mov -, sacq(i+9) 0x00000004, 0xe80009e7, // mov -, srel(i+1) 0x0000001d, 0xe80009e7, // mov -, sacq(i+9) 0x00000005, 0xe80009e7, // mov -, srel(i+1) 0x0000001e, 0xe80009e7, // mov -, sacq(i+9) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x00000006, 0xe80009e7, // mov -, srel(i+1) 0x0000001f, 0xe80009e7, // mov -, sacq(i+9) 0x00000007, 0xe80009e7, // mov -, srel(i+1) 0x00000858, 0xf0f811a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x00000009, 0xe80009e7, // mov -, srel(i+9) 0x00000011, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000a, 0xe80009e7, // mov -, srel(i+9) 0x00000012, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000b, 0xe80009e7, // mov -, srel(i+9) 0x00000013, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000c, 0xe80009e7, // mov -, srel(i+9) 0x00000014, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000d, 0xe80009e7, // mov -, srel(i+9) 0x00000015, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000e, 0xe80009e7, // mov -, srel(i+9) 0x00000016, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000f, 0xe80009e7, // mov -, srel(i+9) 0x00000017, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0xfffffda0, 0xf0f80027, // brr ra_link_0, call 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1 0x8c148df6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x147a7180, 0x10020867, // and r1, r0, mask 0x0e9c11c0, 0xd0020827, // shr r0, r0, shift 0x147a7180, 0x10020827, // and r0, r0, mask 0x119c13c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x149de1c0, 0x10020867, // and r1, r0, mask 0x0e9c21c0, 0xd0020827, // shr r0, r0, shift 0x149de1c0, 0x10020827, // and r0, r0, mask 0x119c23c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147e7180, 0x10020867, // and r1, r0, mask 0x0e9c41c0, 0xd0020827, // shr r0, r0, shift 0x147e7180, 0x10020827, // and r0, r0, mask 0x119c43c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x149df1c0, 0x10020867, // and r1, r0, mask 0x0e9c81c0, 0xd0020827, // shr r0, r0, shift 0x149df1c0, 0x10020827, // and r0, r0, mask 0x119c83c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x119c31c0, 0xd0020827, // shl r0, r0, STAGES-13 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0xfffffc80, 0xf0f80027, // brr ra_link_0, call 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x20667006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE 0x209d900f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE 0x2066700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE 0x22099c87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE 0x019e72c0, 0x10020867, // fadd r1, r1, r3 0x01267c00, 0x100202e7, // fadd ra_64+0, ra_32_re, r0 0x019c9e40, 0x10020327, // fadd ra_64+1, rb_32_im, r1 0x02267c00, 0x10020367, // fsub ra_64+2, ra_32_re, r0 0x029c9e40, 0x100203a7, // fsub ra_64+3, rb_32_im, r1 0x8c167d76, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x147a7180, 0x10020867, // and r1, r0, mask 0x0e9c11c0, 0xd0020827, // shr r0, r0, shift 0x147a7180, 0x10020827, // and r0, r0, mask 0x119c13c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x149de1c0, 0x10020867, // and r1, r0, mask 0x0e9c21c0, 0xd0020827, // shr r0, r0, shift 0x149de1c0, 0x10020827, // and r0, r0, mask 0x119c23c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147e7180, 0x10020867, // and r1, r0, mask 0x0e9c41c0, 0xd0020827, // shr r0, r0, shift 0x147e7180, 0x10020827, // and r0, r0, mask 0x119c43c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x149df1c0, 0x10020867, // and r1, r0, mask 0x0e9c81c0, 0xd0020827, // shr r0, r0, shift 0x149df1c0, 0x10020827, // and r0, r0, mask 0x119c83c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x119c31c0, 0xd0020827, // shl r0, r0, STAGES-13 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0xfffffb20, 0xf0f80027, // brr ra_link_0, call 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1 0x8c148df6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x147a7180, 0x10020867, // and r1, r0, mask 0x0e9c11c0, 0xd0020827, // shr r0, r0, shift 0x147a7180, 0x10020827, // and r0, r0, mask 0x119c13c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x149de1c0, 0x10020867, // and r1, r0, mask 0x0e9c21c0, 0xd0020827, // shr r0, r0, shift 0x149de1c0, 0x10020827, // and r0, r0, mask 0x119c23c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147e7180, 0x10020867, // and r1, r0, mask 0x0e9c41c0, 0xd0020827, // shr r0, r0, shift 0x147e7180, 0x10020827, // and r0, r0, mask 0x119c43c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x149df1c0, 0x10020867, // and r1, r0, mask 0x0e9c81c0, 0xd0020827, // shr r0, r0, shift 0x149df1c0, 0x10020827, // and r0, r0, mask 0x119c83c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x119c31c0, 0xd0020827, // shl r0, r0, STAGES-13 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0xfffffa00, 0xf0f80027, // brr ra_link_0, call 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x20667006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE 0x209d900f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE 0x2066700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE 0x22099c87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE 0x019e72c0, 0x10020867, // fadd r1, r1, r3 0x029c9e40, 0x100208e7, // fsub r3, rb_32_im, r1 0x02267c00, 0x100208a7, // fsub r2, ra_32_re, r0 0x019c9e40, 0x10020867, // fadd r1, rb_32_im, r1 0x01267c00, 0x10020827, // fadd r0, ra_32_re, r0 0x203e700e, 0x100049c9, // nop; fmul rb_32_im, r1, ra_tw_re+TW64_P1_BASE0 0x209cf00f, 0x100059c9, // nop; fmul ra_32_re, r1, rb_tw_im+TW64_P1_BASE0 0x209cf007, 0x100049e1, // nop; fmul r1, r0, rb_tw_im+TW64_P1_BASE0 0x213c93c6, 0x10025320, // fadd rb_64+1, r1, rb_32_im; fmul r0, r0, ra_tw_re+TW64_P1_BASE0 0x2225019f, 0x100252c9, // fsub rb_64+0, r0, ra_32_re; fmul ra_32_re, r3, rb_tw_im+TW64_P1_BASE1 0x2042701e, 0x100049c9, // nop; fmul rb_32_im, r3, ra_tw_re+TW64_P1_BASE1 0x00000000, 0xf0f549e7, // bra -, ra_save_64 0x209d0017, 0x100049e3, // nop; fmul r3, r2, rb_tw_im+TW64_P1_BASE1 0x214097d6, 0x100253a2, // fadd rb_64+3, r3, rb_32_im; fmul r2, r2, ra_tw_re+TW64_P1_BASE1 0x02267580, 0x10021367, // fsub rb_64+2, r2, ra_32_re 0x8c14cdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffff920, 0xf0f80027, // brr ra_link_0, call 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1 0x8c148df6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffff8d0, 0xf0f80027, // brr ra_link_0, call 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0x20667006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE 0x209d900f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE 0x2066700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE 0x22099c87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE 0x019e72c0, 0x10020867, // fadd r1, r1, r3 0x00000000, 0xf0f489e7, // bra -, ra_save_32 0x952c2ff6, 0x100242c2, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo 0x95307ff6, 0x10024307, // mov ra_vpm_hi, rb_vpm_hi; mov rb_vpm_hi, ra_vpm_hi 0x9538eff6, 0x1002438e, // mov ra_vdw_32, rb_vdw_32; mov rb_vdw_32, ra_vdw_32 0x159c5fc0, 0x10022827, // mov.setf r0, rb_inst 0x0d9c11c0, 0xd0020827, // sub r0, r0, 1 0x119c51c0, 0xd0020827, // shl r0, r0, 5 0x0c9c6e00, 0x100601a7, // add.ifnz ra_sync, rx_sync_slave, r0 0x159c4fc0, 0x10060127, // mov.ifnz ra_save_32, rx_save_slave_32 0x159cafc0, 0x100602a7, // mov.ifnz ra_save_64, rx_save_slave_64 0x15827d80, 0x100220e7, // mov.setf ra_addr_x, unif 0x15827d80, 0x100210e7, // mov rb_addr_y, unif 0x000006a8, 0xf00809e7, // brr.allz -, r:end 0x95451dbf, 0x10024659, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32 0x95492dbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c762, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d763, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c722, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d723, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c6e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d6e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c6a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d6a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c61c0, 0xd0020827, // shl r0, r0, m 0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num 0x00000000, 0xe00201e7, // mov ra_points, 0 0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y 0x8c148df6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x147a7180, 0x10020867, // and r1, r0, mask 0x0e9c11c0, 0xd0020827, // shr r0, r0, shift 0x147a7180, 0x10020827, // and r0, r0, mask 0x119c13c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x149de1c0, 0x10020867, // and r1, r0, mask 0x0e9c21c0, 0xd0020827, // shr r0, r0, shift 0x149de1c0, 0x10020827, // and r0, r0, mask 0x119c23c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147e7180, 0x10020867, // and r1, r0, mask 0x0e9c41c0, 0xd0020827, // shr r0, r0, shift 0x147e7180, 0x10020827, // and r0, r0, mask 0x119c43c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x149df1c0, 0x10020867, // and r1, r0, mask 0x0e9c81c0, 0xd0020827, // shr r0, r0, shift 0x149df1c0, 0x10020827, // and r0, r0, mask 0x119c83c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x119c31c0, 0xd0020827, // shl r0, r0, STAGES-13 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffff7f0, 0xf0f80227, // brr ra_link_1, r:pass_1 0x009e7000, 0x100009e7, // nop 0x00000200, 0xe0020827, // mov r0, 0x200 0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0 0x00000010, 0xe0020867, // mov r1, STAGES 0x0e1e7c40, 0x100229e7, // shr.setf -, ra_points, r1 0xfffff7c0, 0xf00809e7, // brr.allz -, r:pass_1 0x009e7000, 0x100009e7, // nop 0x00000200, 0xe0020827, // mov r0, 0x200 0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0 0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync 0x009e7000, 0x100009e7, // nop 0x009e7000, 0xa00009e7, // ldtmu0 0x009e7000, 0xa00009e7, // ldtmu0 0x159c0fc0, 0x100202e7, // mov ra_vpm_lo, rb_vpm 0x159c1fc0, 0x10020327, // mov ra_vpm_hi, rb_vpm_16 0x90104000, 0xe00203a7, // mov ra_vdw_32, vdw_setup_0(32, 16, dma_h32( 0,0)) 0x90105000, 0xe00213a7, // mov rb_vdw_32, vdw_setup_0(32, 16, dma_h32(32,0)) 0x00000060, 0xe00212e7, // mov rb_3x4x8, 3*4*8 0x000000f0, 0xe0021327, // mov rb_0xF0, 0xF0 0x00000040, 0xe0021367, // mov rb_0x40, 0x40 0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y 0x95451dbf, 0x10024659, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32 0x95492dbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c762, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d763, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c722, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d723, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c6e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d6e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c6a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d6a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c51c0, 0xd0020827, // shl r0, r0, m 0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num 0x00000000, 0xe00201e7, // mov ra_points, 0 0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y 0x8c148df6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffbd0, 0xf0f80227, // brr ra_link_1, r:pass_2 0x009e7000, 0x100009e7, // nop 0x00000100, 0xe0020827, // mov r0, 0x100 0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0 0xfffffbb0, 0xf0f80227, // brr ra_link_1, r:pass_2 0x009e7000, 0x100009e7, // nop 0x00000100, 0xe0020827, // mov r0, 0x100 0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0 0xfffffb90, 0xf0f80227, // brr ra_link_1, r:pass_2 0x009e7000, 0x100009e7, // nop 0x00000100, 0xe0020827, // mov r0, 0x100 0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0 0xfffffb70, 0xf0f80227, // brr ra_link_1, r:pass_2 0x009e7000, 0x100009e7, // nop 0x00000100, 0xe0020827, // mov r0, 0x100 0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0 0x95659dbf, 0x100248a3, // mov r2, ra_tw_re+TW32_ACTIVE; mov r3, rb_tw_im+TW32_ACTIVE 0x204e7016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw32 0x209d3017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw32 0x209d301f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw32 0x214e709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw32 0x029e7640, 0x100208e7, // fsub r3, r3, r1 0x02667c80, 0x10020667, // fsub ra_tw_re+TW32_ACTIVE, ra_tw_re+TW32_ACTIVE, r2 0x029d9ec0, 0x10021667, // fsub rb_tw_im+TW32_ACTIVE, rb_tw_im+TW32_ACTIVE, r3 0x9575ddbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3 0x20527016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16 0x209d4017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16 0x209d401f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16 0x2152709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16 0x029e7640, 0x100208e7, // fsub r3, r3, r1 0x02767c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2 0x029ddec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c762, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d763, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c722, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d723, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c6e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d6e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c6a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d6a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x0e1c8dc0, 0x100229e7, // shr.setf -, ra_points, rb_STAGES 0xfffffa68, 0xf00809e7, // brr.allz -, r:pass_2 0x00000100, 0xe0020827, // mov r0, 0x100 0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0 0x0d20bdc0, 0x10020227, // sub ra_link_1, ra_link_1, rb_3x4x8 0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync 0x009e7000, 0x100009e7, // nop 0x009e7000, 0xa00009e7, // ldtmu0 0x009e7000, 0xa00009e7, // ldtmu0 0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y 0x955d7dbf, 0x10024659, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32 0x95618dbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c762, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d763, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c722, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d723, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c6e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d6e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c6a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d6a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c51c0, 0xd0020827, // shl r0, r0, m 0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num 0x00000000, 0xe00201e7, // mov ra_points, 0 0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y 0x8c148df6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffff960, 0xf0f80227, // brr ra_link_1, r:pass_3 0x009e7000, 0x100009e7, // nop 0x00000100, 0xe0020827, // mov r0, 0x100 0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0 0x95659dbf, 0x100248a3, // mov r2, ra_tw_re+TW32_ACTIVE; mov r3, rb_tw_im+TW32_ACTIVE 0x20567016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw32 0x209d5017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw32 0x209d501f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw32 0x2156709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw32 0x029e7640, 0x100208e7, // fsub r3, r3, r1 0x02667c80, 0x10020667, // fsub ra_tw_re+TW32_ACTIVE, ra_tw_re+TW32_ACTIVE, r2 0x029d9ec0, 0x10021667, // fsub rb_tw_im+TW32_ACTIVE, rb_tw_im+TW32_ACTIVE, r3 0x9575ddbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3 0x205a7016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16 0x209d6017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16 0x209d601f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16 0x215a709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16 0x029e7640, 0x100208e7, // fsub r3, r3, r1 0x02767c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2 0x029ddec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c762, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d763, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c722, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d723, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c6e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d6e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c6a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d6a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x0e1c8dc0, 0x100229e7, // shr.setf -, ra_points, rb_STAGES 0xfffff858, 0xf00809e7, // brr.allz -, r:pass_3 0x009e7000, 0x100009e7, // nop 0x00000100, 0xe0020827, // mov r0, 0x100 0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0 0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync 0x009e7000, 0x100009e7, // nop 0x009e7000, 0xa00009e7, // ldtmu0 0x009e7000, 0xa00009e7, // ldtmu0 0xfffff928, 0xf0f809e7, // brr -, r:loop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x159c3fc0, 0x100209a7, // mov interrupt, flag 0x009e7000, 0x300009e7, // nop; nop; thrend 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop ================================================ FILE: src/hello_fft/hex/shader_8k.hex ================================================ 0x00000010, 0xe00216e7, // mov rb_0x10, 0x10 0x00000040, 0xe0021727, // mov rb_0x40, 0x40 0x00000080, 0xe0021767, // mov rb_0x80, 0x80 0x000000f0, 0xe00217a7, // mov rb_0xF0, 0xF0 0x00000100, 0xe00217e7, // mov rb_0x100, 0x100 0x00005555, 0xe0020767, // mov rx_0x5555, 0x5555 0x00003333, 0xe00207a7, // mov rx_0x3333, 0x3333 0x00000f0f, 0xe00207e7, // mov rx_0x0F0F, 0x0F0F 0x000000ff, 0xe00216a7, // mov rx_0x00FF, 0x00FF 0x88104000, 0xe00206e7, // mov ra_vdw_16, vdw_setup_0(16, 16, dma_h32( 0,0)) 0x88105000, 0xe0021027, // mov rb_vdw_16, vdw_setup_0(16, 16, dma_h32(32,0)) 0x90104000, 0xe0020727, // mov ra_vdw_32, vdw_setup_0(32, 16, dma_h32( 0,0)) 0x90105000, 0xe0021067, // mov rb_vdw_32, vdw_setup_0(32, 16, dma_h32(32,0)) 0x15827d80, 0x10020827, // mov r0, addr 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100202e7, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020327, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x10020367, // mov ra_tw_re+off+i, r4 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100203a7, // mov ra_tw_re+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100212e7, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021327, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x10021367, // mov rb_tw_im+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100213a7, // mov rb_tw_im+off+i, r4 0x15827d80, 0x10020827, // mov r0, addr 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3 0x0c9e7080, 0x10020e27, // add t0s, r0, r2 0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0 0x159e7900, 0x100203e7, // mov ra_tw_re+off+i, r4 0x0c9e7280, 0x10020e27, // add t0s, r1, r2 0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0 0x159e7900, 0x100213e7, // mov rb_tw_im+off+i, r4 0x15827d80, 0x10021167, // mov rb_inst, unif 0x00101200, 0xe0020827, // mov r0, vpm_setup(1, 1, v32( 0,0)) 0x00000010, 0xe0020867, // mov r1, vpm_setup(1, 1, v32(16,0)) - vpm_setup(1, 1, v32(0,0)) 0x00000002, 0xe00208a7, // mov r2, vpm_setup(1, 1, v32( 0,2)) - vpm_setup(1, 1, v32(0,0)) 0x409c5017, 0x100049e2, // nop; mul24 r2, r2, in_inst 0xcc9e7081, 0x10024660, // add out_0, r0, r2; v8adds r0, r0, r1 0xcc9e7081, 0x100246a0, // add out_1, r0, r2; v8adds r0, r0, r1 0xcc9e7081, 0x100250a0, // add out_2, r0, r2; v8adds r0, r0, r1 0x0c9e7080, 0x100211e7, // add out_3, r0, r2 0x000000b0, 0xf0f80127, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x15667d80, 0x10021c67, // mov vw_setup, arg 0x159e7000, 0x10020c27, // mov vpm, r0 0x159e7240, 0x10020c27, // mov vpm, r1 0x159f2fc0, 0x100009e7, // mov -, vw_wait 0x00000019, 0xe80009e7, // mov -, sacq(i+9) 0x00000001, 0xe80009e7, // mov -, srel(i+1) 0x0000001a, 0xe80009e7, // mov -, sacq(i+9) 0x00000002, 0xe80009e7, // mov -, srel(i+1) 0x0000001b, 0xe80009e7, // mov -, sacq(i+9) 0x00000003, 0xe80009e7, // mov -, srel(i+1) 0x0000001c, 0xe80009e7, // mov -, sacq(i+9) 0x00000004, 0xe80009e7, // mov -, srel(i+1) 0x0000001d, 0xe80009e7, // mov -, sacq(i+9) 0x00000005, 0xe80009e7, // mov -, srel(i+1) 0x0000001e, 0xe80009e7, // mov -, sacq(i+9) 0x00000006, 0xe80009e7, // mov -, srel(i+1) 0x0000001f, 0xe80009e7, // mov -, sacq(i+9) 0x00000007, 0xe80009e7, // mov -, srel(i+1) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x156e7d80, 0x10021c67, // mov vw_setup, arg_vdw 0xc0000fc0, 0xe0021c67, // mov vw_setup, vdw_setup_1(0) + PASS16_STRIDE-16*4 0x8c05cdf6, 0x10024072, // add ra_save_ptr, ra_save_ptr, rb_0x40; mov vw_addr, ra_save_ptr 0x00000038, 0xf0f81127, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x15667d80, 0x10021c67, // mov vw_setup, arg 0x159e7000, 0x10020c27, // mov vpm, r0 0x159e7240, 0x10020c27, // mov vpm, r1 0x00000000, 0xf0f4c9e7, // bra -, ra_sync 0x009e7000, 0x100009e7, // nop 0x15667d80, 0x10020c67, // mov vr_setup, arg_vpm 0x15c27d80, 0x100009e7, // mov -, vpm 0x000000c8, 0xf0f802a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x15667d80, 0x10021c67, // mov vw_setup, ra_vpm_lo 0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0 0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1 0x156a7d80, 0x10021c67, // mov vw_setup, ra_vpm_hi 0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0 0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1 0x159f2fc0, 0x100009e7, // mov -, vw_wait 0x00000019, 0xe80009e7, // mov -, sacq(i+9) 0x00000001, 0xe80009e7, // mov -, srel(i+1) 0x0000001a, 0xe80009e7, // mov -, sacq(i+9) 0x00000002, 0xe80009e7, // mov -, srel(i+1) 0x0000001b, 0xe80009e7, // mov -, sacq(i+9) 0x00000003, 0xe80009e7, // mov -, srel(i+1) 0x0000001c, 0xe80009e7, // mov -, sacq(i+9) 0x00000004, 0xe80009e7, // mov -, srel(i+1) 0x0000001d, 0xe80009e7, // mov -, sacq(i+9) 0x00000005, 0xe80009e7, // mov -, srel(i+1) 0x0000001e, 0xe80009e7, // mov -, sacq(i+9) 0x00000006, 0xe80009e7, // mov -, srel(i+1) 0x0000001f, 0xe80009e7, // mov -, sacq(i+9) 0x00000007, 0xe80009e7, // mov -, srel(i+1) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x15727d80, 0x10021c67, // mov vw_setup, ra_vdw_32 0xc00007c0, 0xe0021c67, // mov vw_setup, vdw_setup_1(0) + PASS32_STRIDE-16*4 0x8c05cdf6, 0x10024072, // add ra_save_ptr, ra_save_ptr, rb_0x40; mov vw_addr, ra_save_ptr 0x00000050, 0xf0f812a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x15667d80, 0x10021c67, // mov vw_setup, ra_vpm_lo 0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0 0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1 0x156a7d80, 0x10021c67, // mov vw_setup, ra_vpm_hi 0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0 0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1 0x00000000, 0xf0f4c9e7, // bra -, ra_sync 0x009e7000, 0x100009e7, // nop 0x15667d80, 0x10020c67, // mov vr_setup, ra_vpm_lo 0x15c27d80, 0x100009e7, // mov -, vpm 0x00000080, 0xf0f801a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x159f2fc0, 0x100009e7, // mov -, vw_wait 0x00000019, 0xe80009e7, // mov -, sacq(i+9) 0x00000001, 0xe80009e7, // mov -, srel(i+1) 0x0000001a, 0xe80009e7, // mov -, sacq(i+9) 0x00000002, 0xe80009e7, // mov -, srel(i+1) 0x0000001b, 0xe80009e7, // mov -, sacq(i+9) 0x00000003, 0xe80009e7, // mov -, srel(i+1) 0x0000001c, 0xe80009e7, // mov -, sacq(i+9) 0x00000004, 0xe80009e7, // mov -, srel(i+1) 0x0000001d, 0xe80009e7, // mov -, sacq(i+9) 0x00000005, 0xe80009e7, // mov -, srel(i+1) 0x0000001e, 0xe80009e7, // mov -, sacq(i+9) 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x00000006, 0xe80009e7, // mov -, srel(i+1) 0x0000001f, 0xe80009e7, // mov -, sacq(i+9) 0x00000007, 0xe80009e7, // mov -, srel(i+1) 0x00000500, 0xf0f811a7, // brr rx_ptr, label 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x00000009, 0xe80009e7, // mov -, srel(i+9) 0x00000011, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000a, 0xe80009e7, // mov -, srel(i+9) 0x00000012, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000b, 0xe80009e7, // mov -, srel(i+9) 0x00000013, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000c, 0xe80009e7, // mov -, srel(i+9) 0x00000014, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000d, 0xe80009e7, // mov -, srel(i+9) 0x00000015, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000e, 0xe80009e7, // mov -, srel(i+9) 0x00000016, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x00000000, 0xf0f509e7, // bra -, ra_link_1 0x0000000f, 0xe80009e7, // mov -, srel(i+9) 0x00000017, 0xe80009e7, // mov -, sacq(i+1) 0x009e7000, 0x100009e7, // nop 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0xfffffda8, 0xf0f80027, // brr ra_link_0, call 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1 0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x14767180, 0x10020867, // and r1, r0, mask 0x0e9c11c0, 0xd0020827, // shr r0, r0, shift 0x14767180, 0x10020827, // and r0, r0, mask 0x119c13c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147a7180, 0x10020867, // and r1, r0, mask 0x0e9c21c0, 0xd0020827, // shr r0, r0, shift 0x147a7180, 0x10020827, // and r0, r0, mask 0x119c23c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147e7180, 0x10020867, // and r1, r0, mask 0x0e9c41c0, 0xd0020827, // shr r0, r0, shift 0x147e7180, 0x10020827, // and r0, r0, mask 0x119c43c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x149da1c0, 0x10020867, // and r1, r0, mask 0x0e9c81c0, 0xd0020827, // shr r0, r0, shift 0x149da1c0, 0x10020827, // and r0, r0, mask 0x119c83c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0xfffffc90, 0xf0f80027, // brr ra_link_0, call 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x20427006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE 0x209d000f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE 0x2042700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE 0x22090c87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE 0x019e72c0, 0x10020867, // fadd r1, r1, r3 0x00000000, 0xf0f549e7, // bra -, ra_save_32 0x95642ff6, 0x10024642, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo 0x95687ff6, 0x10024687, // mov ra_vpm_hi, rb_vpm_hi; mov rb_vpm_hi, ra_vpm_hi 0x95701ff6, 0x10024701, // mov ra_vdw_32, rb_vdw_32; mov rb_vdw_32, ra_vdw_32 0x8c15ddf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffc00, 0xf0f80027, // brr ra_link_0, call 0x009e7000, 0xa00009e7, // nop; ldtmu0 0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0 0x159e7900, 0x10020867, // mov r1, r4 0x00000000, 0xf0f489e7, // bra -, ra_save_16 0x009e7000, 0x100009e7, // nop 0x95642ff6, 0x10024642, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo 0x956c0ff6, 0x100246c0, // mov ra_vdw_16, rb_vdw_16; mov rb_vdw_16, ra_vdw_16 0x159c5fc0, 0x10022827, // mov.setf r0, rb_inst 0x0d9c11c0, 0xd0020827, // sub r0, r0, 1 0x119c51c0, 0xd0020827, // shl r0, r0, 5 0x0c9c6e00, 0x100601a7, // add.ifnz ra_sync, rx_sync_slave, r0 0x159c4fc0, 0x10060127, // mov.ifnz ra_save_16, rx_save_slave_16 0x159cafc0, 0x100602a7, // mov.ifnz ra_save_32, rx_save_slave_32 0x15827d80, 0x100220e7, // mov.setf ra_addr_x, unif 0x15827d80, 0x100210e7, // mov rb_addr_y, unif 0x00000590, 0xf00809e7, // brr.allz -, r:end 0x952cbdbf, 0x10024410, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32 0x9530cdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c462, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d463, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c51c0, 0xd0020827, // shl r0, r0, m 0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num 0x00000000, 0xe00201e7, // mov ra_points, 0 0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y 0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x14767180, 0x10020867, // and r1, r0, mask 0x0e9c11c0, 0xd0020827, // shr r0, r0, shift 0x14767180, 0x10020827, // and r0, r0, mask 0x119c13c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147a7180, 0x10020867, // and r1, r0, mask 0x0e9c21c0, 0xd0020827, // shr r0, r0, shift 0x147a7180, 0x10020827, // and r0, r0, mask 0x119c23c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x147e7180, 0x10020867, // and r1, r0, mask 0x0e9c41c0, 0xd0020827, // shr r0, r0, shift 0x147e7180, 0x10020827, // and r0, r0, mask 0x119c43c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x149da1c0, 0x10020867, // and r1, r0, mask 0x0e9c81c0, 0xd0020827, // shr r0, r0, shift 0x149da1c0, 0x10020827, // and r0, r0, mask 0x119c83c0, 0xd0020867, // shl r1, r1, shift 0x159e7040, 0x10020827, // or r0, r0, r1 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0 0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1 0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffb50, 0xf0f80227, // brr ra_link_1, r:pass_1 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100 0x0e1cddc0, 0xd00229e7, // shr.setf -, ra_points, STAGES 0xfffffb28, 0xf00809e7, // brr.allz -, r:pass_1 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100 0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync 0x009e7000, 0x100009e7, // nop 0x009e7000, 0xa00009e7, // ldtmu0 0x009e7000, 0xa00009e7, // ldtmu0 0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y 0x9530cdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c462, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d463, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c41c0, 0xd0020827, // shl r0, r0, m 0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num 0x00000000, 0xe00201e7, // mov ra_points, 0 0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y 0x8c15ddf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffc98, 0xf0f80227, // brr ra_link_1, r:pass_2 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dddc0, 0x100201e7, // add ra_points, ra_points, rb_0x80 0xfffffc78, 0xf0f80227, // brr ra_link_1, r:pass_2 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dddc0, 0x100201e7, // add ra_points, ra_points, rb_0x80 0x95514dbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3 0x20367016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16 0x209cd017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16 0x209cd01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16 0x2136709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16 0x029e7640, 0x100208e7, // fsub r3, r3, r1 0x02527c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2 0x029d4ec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c462, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d463, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x0e1cddc0, 0xd00229e7, // shr.setf -, ra_points, STAGES 0xfffffbb0, 0xf00809e7, // brr.allz -, r:pass_2 0x00000020, 0xe0020827, // mov r0, 4*8 0x0d227c00, 0x10020227, // sub ra_link_1, ra_link_1, r0 0x0c1dddc0, 0x100201e7, // add ra_points, ra_points, rb_0x80 0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync 0x009e7000, 0x100009e7, // nop 0x009e7000, 0xa00009e7, // ldtmu0 0x009e7000, 0xa00009e7, // ldtmu0 0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y 0x953cfdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c462, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d463, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x159c5fc0, 0x10020827, // mov r0, rb_inst 0x119c41c0, 0xd0020827, // shl r0, r0, m 0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num 0x00000000, 0xe00201e7, // mov ra_points, 0 0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y 0x8c15ddf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx 0x119c31c0, 0xd0020827, // shl r0, r0, 3 0x0c9c41c0, 0xd0020867, // add r1, r0, 4 0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0 0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1 0xfffffab0, 0xf0f80227, // brr ra_link_1, r:pass_3 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dddc0, 0x100201e7, // add ra_points, ra_points, rb_0x80 0x95514dbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3 0x203a7016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16 0x209ce017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16 0x209ce01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16 0x213a709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16 0x029e7640, 0x100208e7, // fsub r3, r3, r1 0x02527c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2 0x029d4ec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3 0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f8492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f86db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f4492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f46db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f2492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f26db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i) 0x959f1492, 0xd002c462, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i) 0x959f16db, 0xd002d463, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i) 0x0e1cddc0, 0xd00229e7, // shr.setf -, ra_points, STAGES 0xfffff9e8, 0xf00809e7, // brr.allz -, r:pass_3 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c1dddc0, 0x100201e7, // add ra_points, ra_points, rb_0x80 0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync 0x009e7000, 0x100009e7, // nop 0x009e7000, 0xa00009e7, // ldtmu0 0x009e7000, 0xa00009e7, // ldtmu0 0xfffffa40, 0xf0f809e7, // brr -, r:loop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x159c3fc0, 0x100209a7, // mov interrupt, flag 0x009e7000, 0x300009e7, // nop; nop; thrend 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop ================================================ FILE: src/hello_fft/hex/shader_trans.hex ================================================ 0x15827d80, 0x10020e27, // mov t0s, unif 0x009e7000, 0xa00009e7, // ldtmu0 0x0c9cc9c0, 0xd0020e27, // add t0s, r4, 3*4 0x009e7000, 0xa00009e7, // ldtmu0 0x0c827980, 0x100200a7, // add ra_src_base, r4, unif 0x15827d80, 0x10020e27, // mov t0s, unif 0x009e7000, 0xa00009e7, // ldtmu0 0x0c9cc9c0, 0xd0020e27, // add t0s, r4, 3*4 0x009e7000, 0xa00009e7, // ldtmu0 0x0c827980, 0x100200e7, // add ra_dst_base, r4, unif 0x15827d80, 0x100214a7, // mov rb_Y_STRIDE_SRC, unif 0x15827d80, 0x100214e7, // mov rb_Y_STRIDE_DST, unif 0x15827d80, 0x10021527, // mov rb_NX, unif 0x15827d80, 0x10021567, // mov rb_NY, unif 0x00000008, 0xe0021467, // mov rb_X_STRIDE, 2*4 0x00000010, 0xe0021427, // mov rb_0x10, 0x10 0xc0000000, 0xe0020827, // mov r0, vdw_setup_1(0) 0x0c9d31c0, 0x10020827, // add r0, r0, rb_Y_STRIDE_DST 0x00000040, 0xe0020867, // mov r1, 16*4 0x0d9e7040, 0x100201a7, // sub ra_vdw_stride, r0, r1 0x40991037, 0x100049e0, // nop; mul24 r0, elem_num, rb_X_STRIDE 0x159e7000, 0x10021027, // mov rb_offsets_re+i, r0 0x0c9c41c0, 0xd0021227, // add rb_offsets_im+i, r0, 4 0x0c9d21c0, 0x10020827, // add r0, r0, rb_Y_STRIDE_SRC 0x159e7000, 0x10021067, // mov rb_offsets_re+i, r0 0x0c9c41c0, 0xd0021267, // add rb_offsets_im+i, r0, 4 0x0c9d21c0, 0x10020827, // add r0, r0, rb_Y_STRIDE_SRC 0x159e7000, 0x100210a7, // mov rb_offsets_re+i, r0 0x0c9c41c0, 0xd00212a7, // add rb_offsets_im+i, r0, 4 0x0c9d21c0, 0x10020827, // add r0, r0, rb_Y_STRIDE_SRC 0x159e7000, 0x100210e7, // mov rb_offsets_re+i, r0 0x0c9c41c0, 0xd00212e7, // add rb_offsets_im+i, r0, 4 0x0c9d21c0, 0x10020827, // add r0, r0, rb_Y_STRIDE_SRC 0x159e7000, 0x10021127, // mov rb_offsets_re+i, r0 0x0c9c41c0, 0xd0021327, // add rb_offsets_im+i, r0, 4 0x0c9d21c0, 0x10020827, // add r0, r0, rb_Y_STRIDE_SRC 0x159e7000, 0x10021167, // mov rb_offsets_re+i, r0 0x0c9c41c0, 0xd0021367, // add rb_offsets_im+i, r0, 4 0x0c9d21c0, 0x10020827, // add r0, r0, rb_Y_STRIDE_SRC 0x159e7000, 0x100211a7, // mov rb_offsets_re+i, r0 0x0c9c41c0, 0xd00213a7, // add rb_offsets_im+i, r0, 4 0x0c9d21c0, 0x10020827, // add r0, r0, rb_Y_STRIDE_SRC 0x159e7000, 0x100211e7, // mov rb_offsets_re+i, r0 0x0c9c41c0, 0xd00213e7, // add rb_offsets_im+i, r0, 4 0x0c9d21c0, 0x10020827, // add r0, r0, rb_Y_STRIDE_SRC 0x00000000, 0xe0020067, // mov ra_y, 0 0x00000000, 0xe0020027, // mov ra_x, 0 0x40052037, 0x100049e1, // nop; mul24 r1, ra_y, rb_Y_STRIDE_SRC 0x40011037, 0x100049e0, // nop; mul24 r0, ra_x, rb_X_STRIDE 0x0c9e7040, 0x10020827, // add r0, r0, r1 0x0c0a7c00, 0x10020127, // add ra_src_cell, ra_src_base, r0 0x40013037, 0x100049e1, // nop; mul24 r1, ra_x, rb_Y_STRIDE_DST 0x40051037, 0x100049e0, // nop; mul24 r0, ra_y, rb_X_STRIDE 0x0c9e7040, 0x10020827, // add r0, r0, r1 0x0c0e7c00, 0x10020167, // add ra_dst_cell, ra_dst_base, r0 0x00001200, 0xe0021c67, // mov vw_setup, vpm_setup(16, 1, v32(0,0)) 0x0c100dc0, 0x10020e27, // add t0s, ra_src_cell, rb_offsets_re 0x0c108dc0, 0x10020f27, // add t1s, ra_src_cell, rb_offsets_im 0x0c101dc0, 0x10020e27, // add t0s, ra_src_cell, rb_offsets_re+1+i 0x0c109dc0, 0x10020f27, // add t1s, ra_src_cell, rb_offsets_im+1+i 0x009e7000, 0xa00009e7, // ldtmu0 0x159e7900, 0x10020c27, // mov vpm, r4 0x009e7000, 0xb00009e7, // ldtmu1 0x159e7900, 0x10020c27, // mov vpm, r4 0x0c102dc0, 0x10020e27, // add t0s, ra_src_cell, rb_offsets_re+1+i 0x0c10adc0, 0x10020f27, // add t1s, ra_src_cell, rb_offsets_im+1+i 0x009e7000, 0xa00009e7, // ldtmu0 0x159e7900, 0x10020c27, // mov vpm, r4 0x009e7000, 0xb00009e7, // ldtmu1 0x159e7900, 0x10020c27, // mov vpm, r4 0x0c103dc0, 0x10020e27, // add t0s, ra_src_cell, rb_offsets_re+1+i 0x0c10bdc0, 0x10020f27, // add t1s, ra_src_cell, rb_offsets_im+1+i 0x009e7000, 0xa00009e7, // ldtmu0 0x159e7900, 0x10020c27, // mov vpm, r4 0x009e7000, 0xb00009e7, // ldtmu1 0x159e7900, 0x10020c27, // mov vpm, r4 0x0c104dc0, 0x10020e27, // add t0s, ra_src_cell, rb_offsets_re+1+i 0x0c10cdc0, 0x10020f27, // add t1s, ra_src_cell, rb_offsets_im+1+i 0x009e7000, 0xa00009e7, // ldtmu0 0x159e7900, 0x10020c27, // mov vpm, r4 0x009e7000, 0xb00009e7, // ldtmu1 0x159e7900, 0x10020c27, // mov vpm, r4 0x0c105dc0, 0x10020e27, // add t0s, ra_src_cell, rb_offsets_re+1+i 0x0c10ddc0, 0x10020f27, // add t1s, ra_src_cell, rb_offsets_im+1+i 0x009e7000, 0xa00009e7, // ldtmu0 0x159e7900, 0x10020c27, // mov vpm, r4 0x009e7000, 0xb00009e7, // ldtmu1 0x159e7900, 0x10020c27, // mov vpm, r4 0x0c106dc0, 0x10020e27, // add t0s, ra_src_cell, rb_offsets_re+1+i 0x0c10edc0, 0x10020f27, // add t1s, ra_src_cell, rb_offsets_im+1+i 0x009e7000, 0xa00009e7, // ldtmu0 0x159e7900, 0x10020c27, // mov vpm, r4 0x009e7000, 0xb00009e7, // ldtmu1 0x159e7900, 0x10020c27, // mov vpm, r4 0x0c107dc0, 0x10020e27, // add t0s, ra_src_cell, rb_offsets_re+1+i 0x0c10fdc0, 0x10020f27, // add t1s, ra_src_cell, rb_offsets_im+1+i 0x009e7000, 0xa00009e7, // ldtmu0 0x159e7900, 0x10020c27, // mov vpm, r4 0x009e7000, 0xb00009e7, // ldtmu1 0x159e7900, 0x10020c27, // mov vpm, r4 0x009e7000, 0xa00009e7, // ldtmu0 0x159e7900, 0x10020c27, // mov vpm, r4 0x009e7000, 0xb00009e7, // ldtmu1 0x159e7900, 0x10020c27, // mov vpm, r4 0x88104000, 0xe0021c67, // mov vw_setup, vdw_setup_0(16, 16, dma_h32(0,0)) 0x151a7d80, 0x10021c67, // mov vw_setup, ra_vdw_stride 0x15167d80, 0x10021ca7, // mov vw_addr, ra_dst_cell 0x159f2fc0, 0x100009e7, // mov -, vw_wait 0x0c010dc0, 0x10020027, // add ra_x, ra_x, rb_0x10 0x009e7000, 0x100009e7, // nop 0x0d014dc0, 0x100229e7, // sub.setf -, ra_x, rb_NX 0xfffffde0, 0xf01809e7, // brr.allnz -, r:inner 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x0c048dc0, 0xd0020067, // add ra_y, ra_y, 8 0x009e7000, 0x100009e7, // nop 0x0d055dc0, 0x100229e7, // sub.setf -, ra_y, rb_NY 0xfffffda0, 0xf01809e7, // brr.allnz -, r:outer 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop 0x00000001, 0xe00209a7, // mov interrupt, 1 0x009e7000, 0x300009e7, // nop; nop; thrend 0x009e7000, 0x100009e7, // nop 0x009e7000, 0x100009e7, // nop ================================================ FILE: src/hello_fft/mailbox.c ================================================ /* Copyright (c) 2012, Broadcom Europe Ltd. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include "mailbox.h" #define PAGE_SIZE (4 * 1024) void* mapmem(unsigned base, unsigned size) { int mem_fd; unsigned offset = base % PAGE_SIZE; base = base - offset; /* open /dev/mem */ if ((mem_fd = open("/dev/mem", O_RDWR | O_SYNC)) < 0) { log(LOG_CRIT, "mapmem(): can't open /dev/mem: %s\n", strerror(errno)); exit(-1); } void* mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED /*|MAP_FIXED*/, mem_fd, base); #ifdef GPU_FFT_DEBUG printf("base=0x%x, mem=%p\n", base, mem); #endif if (mem == MAP_FAILED) { log(LOG_CRIT, "mapmem(): mmap error: %s\n", strerror(errno)); exit(-1); } close(mem_fd); return (char*)mem + offset; } void unmapmem(void* addr, unsigned size) { int s = munmap(addr, size); if (s != 0) { log(LOG_CRIT, "unmapmem(): munmap error: %s\n", strerror(errno)); exit(-1); } } /* * use ioctl to send mbox property message */ static int mbox_property(int file_desc, void* buf) { int ret_val = ioctl(file_desc, IOCTL_MBOX_PROPERTY, buf); if (ret_val < 0) { log(LOG_ERR, "mbox_property(): ioctl_set_msg failed: %s\n", strerror(errno)); } #ifdef GPU_FFT_DEBUG unsigned* p = buf; int i; unsigned size = *(unsigned*)buf; for (i = 0; i < size / 4; i++) printf("%04x: 0x%08x\n", i * sizeof *p, p[i]); #endif return ret_val; } unsigned mem_alloc(int file_desc, unsigned size, unsigned align, unsigned flags) { int i = 0; unsigned p[32]; p[i++] = 0; // size p[i++] = 0x00000000; // process request p[i++] = 0x3000c; // (the tag id) p[i++] = 12; // (size of the buffer) p[i++] = 12; // (size of the data) p[i++] = size; // (num bytes? or pages?) p[i++] = align; // (alignment) p[i++] = flags; // (MEM_FLAG_L1_NONALLOCATING) p[i++] = 0x00000000; // end tag p[0] = i * sizeof *p; // actual size mbox_property(file_desc, p); return p[5]; } unsigned mem_free(int file_desc, unsigned handle) { int i = 0; unsigned p[32]; p[i++] = 0; // size p[i++] = 0x00000000; // process request p[i++] = 0x3000f; // (the tag id) p[i++] = 4; // (size of the buffer) p[i++] = 4; // (size of the data) p[i++] = handle; p[i++] = 0x00000000; // end tag p[0] = i * sizeof *p; // actual size mbox_property(file_desc, p); return p[5]; } unsigned mem_lock(int file_desc, unsigned handle) { int i = 0; unsigned p[32]; p[i++] = 0; // size p[i++] = 0x00000000; // process request p[i++] = 0x3000d; // (the tag id) p[i++] = 4; // (size of the buffer) p[i++] = 4; // (size of the data) p[i++] = handle; p[i++] = 0x00000000; // end tag p[0] = i * sizeof *p; // actual size mbox_property(file_desc, p); return p[5]; } unsigned mem_unlock(int file_desc, unsigned handle) { int i = 0; unsigned p[32]; p[i++] = 0; // size p[i++] = 0x00000000; // process request p[i++] = 0x3000e; // (the tag id) p[i++] = 4; // (size of the buffer) p[i++] = 4; // (size of the data) p[i++] = handle; p[i++] = 0x00000000; // end tag p[0] = i * sizeof *p; // actual size mbox_property(file_desc, p); return p[5]; } unsigned execute_code(int file_desc, unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5) { int i = 0; unsigned p[32]; p[i++] = 0; // size p[i++] = 0x00000000; // process request p[i++] = 0x30010; // (the tag id) p[i++] = 28; // (size of the buffer) p[i++] = 28; // (size of the data) p[i++] = code; p[i++] = r0; p[i++] = r1; p[i++] = r2; p[i++] = r3; p[i++] = r4; p[i++] = r5; p[i++] = 0x00000000; // end tag p[0] = i * sizeof *p; // actual size mbox_property(file_desc, p); return p[5]; } unsigned qpu_enable(int file_desc, unsigned enable) { int i = 0; unsigned p[32]; p[i++] = 0; // size p[i++] = 0x00000000; // process request p[i++] = 0x30012; // (the tag id) p[i++] = 4; // (size of the buffer) p[i++] = 4; // (size of the data) p[i++] = enable; p[i++] = 0x00000000; // end tag p[0] = i * sizeof *p; // actual size mbox_property(file_desc, p); return p[5]; } unsigned execute_qpu(int file_desc, unsigned num_qpus, unsigned control, unsigned noflush, unsigned timeout) { int i = 0; unsigned p[32]; p[i++] = 0; // size p[i++] = 0x00000000; // process request p[i++] = 0x30011; // (the tag id) p[i++] = 16; // (size of the buffer) p[i++] = 16; // (size of the data) p[i++] = num_qpus; p[i++] = control; p[i++] = noflush; p[i++] = timeout; // ms p[i++] = 0x00000000; // end tag p[0] = i * sizeof *p; // actual size mbox_property(file_desc, p); return p[5]; } int mbox_open() { int file_desc; // open a char device file used for communicating with kernel mbox driver file_desc = open(DEVICE_FILE_NAME, 0); if (file_desc < 0) { log(LOG_CRIT, "Can't open device file %s: %s\n", DEVICE_FILE_NAME, strerror(errno)); exit(-1); } return file_desc; } void mbox_close(int file_desc) { close(file_desc); } ================================================ FILE: src/hello_fft/mailbox.h ================================================ /* Copyright (c) 2012, Broadcom Europe Ltd. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #define MAJOR_NUM 100 #define IOCTL_MBOX_PROPERTY _IOWR(MAJOR_NUM, 0, char*) #define DEVICE_FILE_NAME "/dev/vcio" int mbox_open(); void mbox_close(int file_desc); unsigned get_version(int file_desc); unsigned mem_alloc(int file_desc, unsigned size, unsigned align, unsigned flags); unsigned mem_free(int file_desc, unsigned handle); unsigned mem_lock(int file_desc, unsigned handle); unsigned mem_unlock(int file_desc, unsigned handle); void* mapmem(unsigned base, unsigned size); void unmapmem(void* addr, unsigned size); unsigned execute_code(int file_desc, unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5); unsigned execute_qpu(int file_desc, unsigned num_qpus, unsigned control, unsigned noflush, unsigned timeout); unsigned qpu_enable(int file_desc, unsigned enable); ================================================ FILE: src/helper_functions.cpp ================================================ /* * helper_functions.cpp * * Copyright (C) 2023 charlie-foxtrot * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ #include // struct stat, S_ISDIR #include // size_t #include // strerror #include "helper_functions.h" #include "logging.h" using namespace std; bool dir_exists(const string& dir_path) { struct stat st; return (stat(dir_path.c_str(), &st) == 0 && S_ISDIR(st.st_mode)); } bool file_exists(const string& file_path) { struct stat st; return (stat(file_path.c_str(), &st) == 0 && S_ISREG(st.st_mode)); } bool make_dir(const string& dir_path) { if (dir_exists(dir_path)) { return true; } if (mkdir(dir_path.c_str(), 0755) != 0) { log(LOG_ERR, "Could not create directory %s: %s\n", dir_path.c_str(), strerror(errno)); return false; } return true; } bool make_subdirs(const string& basedir, const string& subdirs) { // if final directory exists then nothing to do const string delim = "/"; const string final_path = basedir + delim + subdirs; if (dir_exists(final_path)) { return true; } // otherwise scan through subdirs for each slash and make each directory. start with index of 0 // to create basedir incase that doesn't exist size_t index = 0; while (index != string::npos) { if (!make_dir(basedir + delim + subdirs.substr(0, index))) { return false; } index = subdirs.find_first_of(delim, index + 1); } make_dir(final_path); return dir_exists(final_path); } string make_dated_subdirs(const string& basedir, const struct tm* time) { // use the time to build the date subdirectories char date_path[11]; strftime(date_path, sizeof(date_path), "%Y/%m/%d", time); const string date_path_str = string(date_path); // make all the subdirectories, and return the full path if successful if (make_subdirs(basedir, date_path_str)) { return basedir + "/" + date_path_str; } // on any error return empty string return ""; } ================================================ FILE: src/helper_functions.h ================================================ /* * helper_functions.h * * Copyright (C) 2023 charlie-foxtrot * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ #ifndef _HELPER_FUNCTIONS_H #define _HELPER_FUNCTIONS_H #include // struct tm #include bool dir_exists(const std::string& dir_path); bool file_exists(const std::string& file_path); bool make_dir(const std::string& dir_path); bool make_subdirs(const std::string& basedir, const std::string& subdirs); std::string make_dated_subdirs(const std::string& basedir, const struct tm* time); #endif /* _HELPER_FUNCTIONS_H */ ================================================ FILE: src/input-common.cpp ================================================ /* * input-common.cpp * common input handling routines * * Copyright (c) 2015-2021 Tomasz Lemiech * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ #include "input-common.h" #include #include // dlopen, dlsym #include #include #include // asprintf #include // free #include #include using namespace std; typedef input_t* (*input_new_func_t)(void); input_t* input_new(char const* const type) { assert(type != NULL); void* dlhandle = dlopen(NULL, RTLD_NOW); assert(dlhandle != NULL); char* fname = NULL; int chars_written = asprintf(&fname, "%s_input_new", type); if (chars_written <= 0) { return NULL; } input_new_func_t fptr = (input_new_func_t)dlsym(dlhandle, fname); free(fname); if (fptr == NULL) { return NULL; } input_t* input = (*fptr)(); assert(input->init != NULL); assert(input->run_rx_thread != NULL); assert(input->set_centerfreq != NULL); return input; } int input_init(input_t* const input) { assert(input != NULL); input_state_t new_state = INPUT_FAILED; // fail-safe default errno = 0; int ret = input->init(input); if (ret < 0) { ret = -1; } else if ((ret = pthread_mutex_init(&input->buffer_lock, NULL)) != 0) { errno = ret; ret = -1; } else { new_state = INPUT_INITIALIZED; ret = 0; } input->state = new_state; return ret; } int input_start(input_t* const input) { assert(input != NULL); assert(input->dev_data != NULL); assert(input->state == INPUT_INITIALIZED); int err = pthread_create(&input->rx_thread, NULL, input->run_rx_thread, (void*)input); if (err != 0) { errno = err; return -1; } return 0; } int input_parse_config(input_t* const input, libconfig::Setting& cfg) { assert(input != NULL); if (input->parse_config != NULL) { return input->parse_config(input, cfg); } else { // Very simple inputs (like stdin) might not necessarily have any configuration // variables, so it's legal not to have parse_config defined. return 0; } } int input_stop(input_t* const input) { assert(input != NULL); assert(input->dev_data != NULL); int err = 0; errno = 0; if (input->state == INPUT_RUNNING && input->stop != NULL) { err = input->stop(input); if (err != 0) { input->state = INPUT_FAILED; return -1; } } input->state = INPUT_STOPPED; err = pthread_join(input->rx_thread, NULL); if (err != 0) { errno = err; return -1; } return 0; } int input_set_centerfreq(input_t* const input, int const centerfreq) { assert(input != NULL); assert(input->dev_data != NULL); if (input->state != INPUT_RUNNING) { return -1; } int ret = input->set_centerfreq(input, centerfreq); if (ret != 0) { input->state = INPUT_FAILED; return -1; } input->centerfreq = centerfreq; return 0; } ================================================ FILE: src/input-common.h ================================================ /* * input-common.h * * Copyright (c) 2015-2021 Tomasz Lemiech * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ #ifndef _INPUT_COMMON_H #define _INPUT_COMMON_H 1 #include #include #if __GNUC__ >= 4 #define MODULE_EXPORT extern "C" __attribute__((visibility("default"))) #else #define MODULE_EXPORT extern "C" #endif /* __GNUC__ */ typedef enum { SFMT_UNDEF = 0, SFMT_U8, SFMT_S8, SFMT_S16, SFMT_F32 } sample_format_t; #define SAMPLE_FORMAT_CNT 5 typedef enum { INPUT_UNKNOWN = 0, INPUT_INITIALIZED, INPUT_RUNNING, INPUT_FAILED, INPUT_STOPPED, INPUT_DISABLED } input_state_t; #define INPUT_STATE_CNT 6 typedef struct input_t input_t; struct input_t { unsigned char* buffer; void* dev_data; size_t buf_size, bufs, bufe; size_t overflow_count; input_state_t state; sample_format_t sfmt; float fullscale; int bytes_per_sample; int sample_rate; int centerfreq; int (*parse_config)(input_t* const input, libconfig::Setting& cfg); int (*init)(input_t* const input); void* (*run_rx_thread)(void* input_ptr); // to be launched via pthread_create() int (*set_centerfreq)(input_t* const input, int const centerfreq); int (*stop)(input_t* const input); pthread_t rx_thread; pthread_mutex_t buffer_lock; }; input_t* input_new(char const* const type); int input_init(input_t* const input); int input_parse_config(input_t* const input, libconfig::Setting& cfg); int input_start(input_t* const input); int input_set_centerfreq(input_t* const input, int const centerfreq); int input_stop(input_t* const input); #endif /* _INPUT_COMMON_H */ ================================================ FILE: src/input-file.cpp ================================================ /* * input-file.cpp * binary file specific routines * * Copyright (c) 2015-2021 Tomasz Lemiech * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ #include "input-file.h" // file_dev_data_t #include #include // SCHAR_MAX #include #include #include // FIXME: get rid of this #include // usleep #include // Setting #include "input-common.h" // input_t, sample_format_t, input_state_t, MODULE_EXPORT #include "input-helpers.h" // circbuffer_append #include "rtl_airband.h" // do_exit, fft_size, debug_print, XCALLOC, error() using namespace std; int file_parse_config(input_t* const input, libconfig::Setting& cfg) { assert(input != NULL); file_dev_data_t* dev_data = (file_dev_data_t*)input->dev_data; assert(dev_data != NULL); if (cfg.exists("filepath")) { dev_data->filepath = strdup(cfg["filepath"]); } else { cerr << "File configuration error: no 'filepath' given\n"; error(); } if (cfg.exists("speedup_factor")) { if (cfg["speedup_factor"].getType() == libconfig::Setting::TypeInt) { dev_data->speedup_factor = (int)cfg["speedup_factor"]; } else if (cfg["speedup_factor"].getType() == libconfig::Setting::TypeFloat) { dev_data->speedup_factor = (float)cfg["speedup_factor"]; } else { cerr << "File configuration error: 'speedup_factor' must be a float or int if set\n"; error(); } if (dev_data->speedup_factor <= 0.0) { cerr << "File configuration error: 'speedup_factor' must be >= 0.0\n"; error(); } } else { dev_data->speedup_factor = 4; } return 0; } int file_init(input_t* const input) { assert(input != NULL); file_dev_data_t* dev_data = (file_dev_data_t*)input->dev_data; assert(dev_data != NULL); dev_data->input_file = fopen(dev_data->filepath, "rb"); if (!dev_data->input_file) { cerr << "File input failed to open '" << dev_data->filepath << "' - " << strerror(errno) << endl; error(); } log(LOG_INFO, "File input %s initialized\n", dev_data->filepath); return 0; } void* file_rx_thread(void* ctx) { input_t* input = (input_t*)ctx; assert(input != NULL); assert(input->sample_rate != 0); file_dev_data_t* dev_data = (file_dev_data_t*)input->dev_data; assert(dev_data != NULL); assert(dev_data->input_file != NULL); assert(dev_data->speedup_factor != 0.0); size_t buf_len = (input->buf_size / 2) - 1; unsigned char* buf = (unsigned char*)XCALLOC(1, buf_len); float time_per_byte_ms = 1000 / (input->sample_rate * input->bytes_per_sample * 2 * dev_data->speedup_factor); log(LOG_DEBUG, "sample_rate: %d, bytes_per_sample: %d, speedup_factor: %f, time_per_byte_ms: %f\n", input->sample_rate, input->bytes_per_sample, dev_data->speedup_factor, time_per_byte_ms); input->state = INPUT_RUNNING; while (true) { if (do_exit) { break; } if (feof(dev_data->input_file)) { log(LOG_INFO, "File '%s': hit end of file at %d, disabling\n", dev_data->filepath, ftell(dev_data->input_file)); input->state = INPUT_FAILED; break; } if (ferror(dev_data->input_file)) { log(LOG_ERR, "File '%s': read error (%d), disabling\n", dev_data->filepath, ferror(dev_data->input_file)); input->state = INPUT_FAILED; break; } timeval start; gettimeofday(&start, NULL); size_t space_left; pthread_mutex_lock(&input->buffer_lock); if (input->bufe >= input->bufs) { space_left = input->bufs + (input->buf_size - input->bufe); } else { space_left = input->bufs - input->bufe; } pthread_mutex_unlock(&input->buffer_lock); if (space_left > buf_len) { size_t len = fread(buf, sizeof(unsigned char), buf_len, dev_data->input_file); circbuffer_append(input, buf, len); timeval end; gettimeofday(&end, NULL); int time_taken_ms = delta_sec(&start, &end) * 1000; int sleep_time_ms = len * time_per_byte_ms - time_taken_ms; if (sleep_time_ms > 0) { SLEEP(sleep_time_ms); } } else { SLEEP(10); } } free(buf); return 0; } int file_set_centerfreq(input_t* const /*input*/, int const /*centerfreq*/) { return 0; } int file_stop(input_t* const input) { assert(input != NULL); file_dev_data_t* dev_data = (file_dev_data_t*)input->dev_data; assert(dev_data != NULL); fclose(dev_data->input_file); dev_data->input_file = NULL; return 0; } MODULE_EXPORT input_t* file_input_new() { file_dev_data_t* dev_data = (file_dev_data_t*)XCALLOC(1, sizeof(file_dev_data_t)); dev_data->input_file = NULL; dev_data->speedup_factor = 0.0; input_t* input = (input_t*)XCALLOC(1, sizeof(input_t)); input->dev_data = dev_data; input->state = INPUT_UNKNOWN; input->sfmt = SFMT_U8; input->fullscale = (float)SCHAR_MAX - 0.5f; input->bytes_per_sample = sizeof(unsigned char); input->sample_rate = 0; input->parse_config = &file_parse_config; input->init = &file_init; input->run_rx_thread = &file_rx_thread; input->set_centerfreq = &file_set_centerfreq; input->stop = &file_stop; return input; } ================================================ FILE: src/input-file.h ================================================ /* * input-file.h * RTLSDR-specific declarations * * Copyright (c) 2015-2021 Tomasz Lemiech * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ #include #include #include typedef struct { char* filepath; FILE* input_file; float speedup_factor; } file_dev_data_t; ================================================ FILE: src/input-helpers.cpp ================================================ /* * input-helpers.cpp * Convenience functions to be called by input drivers * * Copyright (c) 2015-2021 Tomasz Lemiech * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ #include // pthread_mutex_lock, unlock #include // memcpy #include // cerr #include "input-common.h" // input_t #include "rtl_airband.h" // debug_print /* Write input data into circular buffer input->buffer. * In general, input->buffer_size is not an exact multiple of len, * so we have to take care about proper wrapping. * input->buffer_size is an exact multiple of FFT_BATCH * bps * (input bytes per output audio sample) and input->buffer's real length * is input->buf_size + 2 * bytes_per_input-sample * fft_size. On each * wrap we copy 2 * fft_size bytes from the start of input->buffer to its end, * so that the signal windowing function could handle the whole FFT batch * without wrapping. */ void circbuffer_append(input_t* const input, unsigned char* buf, size_t len) { if (len == 0) return; pthread_mutex_lock(&input->buffer_lock); size_t space_left = input->buf_size - input->bufe; if (space_left >= len) { memcpy(input->buffer + input->bufe, buf, len); if (input->bufe == 0) { memcpy(input->buffer + input->buf_size, input->buffer, std::min(len, 2 * input->bytes_per_sample * fft_size)); debug_print("tail_len=%zu bytes\n", std::min(len, 2 * input->bytes_per_sample * fft_size)); } } else { memcpy(input->buffer + input->bufe, buf, space_left); memcpy(input->buffer, buf + space_left, len - space_left); memcpy(input->buffer + input->buf_size, input->buffer, std::min(len - space_left, 2 * input->bytes_per_sample * fft_size)); debug_print("buf wrap: space_left=%zu len=%zu bufe=%zu wrap_len=%zu tail_len=%zu\n", space_left, len, input->bufe, len - space_left, std::min(len - space_left, 2 * input->bytes_per_sample * fft_size)); } size_t old_end = input->bufe; input->bufe = (input->bufe + len) % input->buf_size; if (old_end < input->bufs && input->bufe >= input->bufs) { std::cerr << "Warning: buffer overflow\n"; input->overflow_count++; } pthread_mutex_unlock(&input->buffer_lock); } ================================================ FILE: src/input-helpers.h ================================================ /* * input-helpers.h * Convenience functions to be called by input drivers * * Copyright (c) 2015-2021 Tomasz Lemiech * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ #include "input-common.h" // input_t // input-helpers.cpp void circbuffer_append(input_t* const input, unsigned char* buf, size_t len); ================================================ FILE: src/input-mirisdr.cpp ================================================ /* * input-mirisdr.cpp * MiriSDR-specific routines * * Copyright (c) 2015-2021 Tomasz Lemiech * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ #include "input-mirisdr.h" // mirisdr_dev_data_t #include #include // SCHAR_MAX #include #include // uint32_t #include #include #include #include // FIXME: get rid of this #include #include // Setting #include "input-common.h" // input_t, sample_format_t, input_state_t, MODULE_EXPORT #include "input-helpers.h" // circbuffer_append #include "rtl_airband.h" // do_exit, fft_size, debug_print, XCALLOC, error() using namespace std; static void mirisdr_callback(unsigned char* buf, uint32_t len, void* ctx) { if (do_exit) return; input_t* input = (input_t*)ctx; circbuffer_append(input, buf, (size_t)len); } /* based on librtlsdr-keenerd, (c) Kyle Keen */ static bool mirisdr_nearest_gain(mirisdr_dev_t* dev, int target_gain, int* nearest) { assert(nearest != NULL); int i, r, err1, err2, count; int* gains; r = mirisdr_set_tuner_gain_mode(dev, 1); if (r < 0) { return false; } count = mirisdr_get_tuner_gains(dev, NULL); if (count <= 0) { return false; } gains = (int*)XCALLOC(count, sizeof(int)); count = mirisdr_get_tuner_gains(dev, gains); *nearest = gains[0]; for (i = 0; i < count; i++) { err1 = abs(target_gain - *nearest); err2 = abs(target_gain - gains[i]); if (err2 < err1) { *nearest = gains[i]; } } free(gains); return true; } static int mirisdr_find_device_by_serial(char const* const s) { char vendor[256] = {0}, product[256] = {0}, serial[256] = {0}; int count = mirisdr_get_device_count(); if (count < 1) { return -1; } for (int i = 0; i < count; i++) { mirisdr_get_device_usb_strings(i, vendor, product, serial); if (strcmp(s, serial) != 0) { continue; } return i; } return -1; } int mirisdr_init(input_t* const input) { mirisdr_dev_data_t* dev_data = (mirisdr_dev_data_t*)input->dev_data; if (dev_data->serial != NULL) { dev_data->index = mirisdr_find_device_by_serial(dev_data->serial); if (dev_data->index < 0) { cerr << "MiriSDR device with serial number " << dev_data->serial << " not found\n"; error(); } } dev_data->dev = NULL; mirisdr_open(&dev_data->dev, dev_data->index); if (NULL == dev_data->dev) { log(LOG_ERR, "Failed to open mirisdr device #%d.\n", dev_data->index); error(); } char transfer_str[] = "BULK"; char sample_format_str[] = "504_S8"; mirisdr_dev_t* miri = dev_data->dev; int r = mirisdr_set_transfer(miri, transfer_str); if (r < 0) { log(LOG_ERR, "Failed to set bulk transfer mode for MiriSDR device #%d: error %d\n", dev_data->index, r); error(); } r = mirisdr_set_sample_rate(miri, input->sample_rate); if (r < 0) { log(LOG_ERR, "Failed to set sample rate for device #%d. Error %d.\n", dev_data->index, r); } r = mirisdr_set_center_freq(miri, input->centerfreq - dev_data->correction); if (r < 0) { log(LOG_ERR, "Failed to set center freq for device #%d. Error %d.\n", dev_data->index, r); } int ngain = 0; if (mirisdr_nearest_gain(miri, dev_data->gain, &ngain) != true) { log(LOG_ERR, "Failed to read supported gain list for device #%d\n", dev_data->index); error(); } r = mirisdr_set_tuner_gain_mode(miri, 1); r |= mirisdr_set_tuner_gain(miri, ngain); if (r < 0) { log(LOG_ERR, "Failed to set gain to %d for device #%d: error %d\n", ngain, dev_data->index, r); } else { log(LOG_INFO, "Device #%d: gain set to %d dB\n", dev_data->index, mirisdr_get_tuner_gain(miri)); } r = mirisdr_set_sample_format(miri, sample_format_str); if (r < 0) { log(LOG_ERR, "Failed to set sample format for device #%d: error %d\n", dev_data->index, r); error(); } mirisdr_reset_buffer(miri); log(LOG_INFO, "MiriSDR device %d initialized\n", dev_data->index); return 0; } void* mirisdr_rx_thread(void* ctx) { input_t* input = (input_t*)ctx; mirisdr_dev_data_t* dev_data = (mirisdr_dev_data_t*)input->dev_data; assert(dev_data->dev != NULL); input->state = INPUT_RUNNING; if (mirisdr_read_async(dev_data->dev, mirisdr_callback, ctx, dev_data->bufcnt, MIRISDR_BUFSIZE) < 0) { log(LOG_ERR, "MiriSDR device #%d: async read failed, disabling\n", dev_data->index); input->state = INPUT_FAILED; } return 0; } int mirisdr_stop(input_t* const input) { mirisdr_dev_data_t* dev_data = (mirisdr_dev_data_t*)input->dev_data; assert(dev_data->dev != NULL); if (mirisdr_cancel_async(dev_data->dev) < 0) { return -1; } return 0; } int mirisdr_set_centerfreq(input_t* const input, int const centerfreq) { mirisdr_dev_data_t* dev_data = (mirisdr_dev_data_t*)input->dev_data; assert(dev_data->dev != NULL); int r = mirisdr_set_center_freq(dev_data->dev, centerfreq - dev_data->correction); if (r < 0) { log(LOG_ERR, "Failed to set centerfreq for MiriSDR device #%d: error %d\n", dev_data->index, r); return -1; } return 0; } int mirisdr_parse_config(input_t* const input, libconfig::Setting& cfg) { mirisdr_dev_data_t* dev_data = (mirisdr_dev_data_t*)input->dev_data; if (cfg.exists("serial")) { dev_data->serial = strdup(cfg["serial"]); } else if (cfg.exists("index")) { dev_data->index = (int)cfg["index"]; } else { cerr << "MiriSDR configuration error: no index and no serial number given\n"; error(); } if (cfg.exists("gain")) { dev_data->gain = (int)cfg["gain"]; } else { cerr << "MiriSDR configuration error: gain is not configured\n"; error(); } if (cfg.exists("correction")) { dev_data->correction = (int)cfg["correction"]; } if (cfg.exists("num_buffers")) { dev_data->bufcnt = (int)(cfg["num_buffers"]); if (dev_data->bufcnt < 1) { cerr << "MiriSDR configuration error: num_buffers must be greater than 0\n"; error(); } } return 0; } MODULE_EXPORT input_t* mirisdr_input_new() { mirisdr_dev_data_t* dev_data = (mirisdr_dev_data_t*)XCALLOC(1, sizeof(mirisdr_dev_data_t)); dev_data->index = -1; // invalid default receiver index dev_data->gain = -1; // invalid default gain value dev_data->bufcnt = MIRISDR_DEFAULT_LIBUSB_BUFFER_COUNT; /* return &( input_t ){ .dev_data = dev_data, .state = INPUT_UNKNOWN, .sfmt = SFMT_U8, .sample_rate = MIRISDR_DEFAULT_SAMPLE_RATE, .parse_config = &mirisdr_parse_config, .init = &mirisdr_init, .run_rx_thread = &mirisdr_rx_thread, .set_centerfreq = &mirisdr_set_centerfreq, .stop = &mirisdr_stop }; */ input_t* input = (input_t*)XCALLOC(1, sizeof(input_t)); input->dev_data = dev_data; input->state = INPUT_UNKNOWN; input->sfmt = SFMT_S8; input->fullscale = (float)SCHAR_MAX - 0.5f; input->bytes_per_sample = sizeof(char); input->sample_rate = MIRISDR_DEFAULT_SAMPLE_RATE; input->parse_config = &mirisdr_parse_config; input->init = &mirisdr_init; input->run_rx_thread = &mirisdr_rx_thread; input->set_centerfreq = &mirisdr_set_centerfreq; input->stop = &mirisdr_stop; return input; } ================================================ FILE: src/input-mirisdr.h ================================================ /* * input-mirisdr.h * MiriSDR-specific declarations * * Copyright (c) 2015-2021 Tomasz Lemiech * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ #include // mirisdr_dev_t #define MIRISDR_BUFSIZE 320000 #define MIRISDR_DEFAULT_LIBUSB_BUFFER_COUNT 10 #define MIRISDR_DEFAULT_SAMPLE_RATE 2560000 typedef struct { mirisdr_dev_t* dev; // pointer to libmirisdr device struct char* serial; // dongle serial number int index; // dongle index int correction; // correction in Hertz (PPM correction is not supported by libmirisdr) int gain; // gain in dB int bufcnt; // libusb buffer count } mirisdr_dev_data_t; ================================================ FILE: src/input-rtlsdr.cpp ================================================ /* * input-rtlsdr.cpp * RTLSDR-specific routines * * Copyright (c) 2015-2021 Tomasz Lemiech * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ #include "input-rtlsdr.h" // rtlsdr_dev_data_t #include #include // SCHAR_MAX #include #include // uint32_t #include #include #include #include // FIXME: get rid of this #include #include // Setting #include "input-common.h" // input_t, sample_format_t, input_state_t, MODULE_EXPORT #include "input-helpers.h" // circbuffer_append #include "rtl_airband.h" // do_exit, fft_size, debug_print, XCALLOC, error() using namespace std; static void rtlsdr_callback(unsigned char* buf, uint32_t len, void* ctx) { if (do_exit) return; input_t* input = (input_t*)ctx; circbuffer_append(input, buf, (size_t)len); } /* based on librtlsdr-keenerd, (c) Kyle Keen */ static bool rtlsdr_nearest_gain(rtlsdr_dev_t* dev, int target_gain, int* nearest) { assert(nearest != NULL); int i, r, err1, err2, count; int* gains; r = rtlsdr_set_tuner_gain_mode(dev, 1); if (r < 0) { return false; } count = rtlsdr_get_tuner_gains(dev, NULL); if (count <= 0) { return false; } gains = (int*)XCALLOC(count, sizeof(int)); count = rtlsdr_get_tuner_gains(dev, gains); *nearest = gains[0]; for (i = 0; i < count; i++) { err1 = abs(target_gain - *nearest); err2 = abs(target_gain - gains[i]); if (err2 < err1) { *nearest = gains[i]; } } free(gains); return true; } static int rtlsdr_find_device_by_serial(char const* const s) { char vendor[256] = {0}, product[256] = {0}, serial[256] = {0}; int count = rtlsdr_get_device_count(); if (count < 1) { return -1; } for (int i = 0; i < count; i++) { rtlsdr_get_device_usb_strings(i, vendor, product, serial); if (strcmp(s, serial) != 0) { continue; } return i; } return -1; } int rtlsdr_init(input_t* const input) { rtlsdr_dev_data_t* dev_data = (rtlsdr_dev_data_t*)input->dev_data; if (dev_data->serial != NULL) { dev_data->index = rtlsdr_find_device_by_serial(dev_data->serial); if (dev_data->index < 0) { cerr << "RTLSDR device with serial number " << dev_data->serial << " not found\n"; error(); } } dev_data->dev = NULL; rtlsdr_open(&dev_data->dev, dev_data->index); if (NULL == dev_data->dev) { log(LOG_ERR, "Failed to open rtlsdr device #%d.\n", dev_data->index); error(); } rtlsdr_dev_t* rtl = dev_data->dev; int r = rtlsdr_set_sample_rate(rtl, input->sample_rate); if (r < 0) { log(LOG_ERR, "Failed to set sample rate for device #%d. Error %d.\n", dev_data->index, r); } r = rtlsdr_set_center_freq(rtl, input->centerfreq); if (r < 0) { log(LOG_ERR, "Failed to set center freq for device #%d. Error %d.\n", dev_data->index, r); } r = rtlsdr_set_freq_correction(rtl, dev_data->correction); if (r < 0 && r != -2) { log(LOG_ERR, "Failed to set freq correction for device #%d. Error %d.\n", dev_data->index, r); } // Fitipower FC0012 gain needs to be initialized to its lowest value before setting it to the desired value if (rtlsdr_get_tuner_type(rtl) == RTLSDR_TUNER_FC0012) { int initialGain = 0; if (rtlsdr_nearest_gain(rtl, -99, &initialGain) != true) { log(LOG_ERR, "Failed to read supported gain list for device #%d\n", dev_data->index); error(); } r |= rtlsdr_set_tuner_gain(rtl, initialGain); if (r < 0) { log(LOG_ERR, "Failed to initialize gain for device #%d: error %d\n", (float)initialGain / 10.f, dev_data->index, r); } } int ngain = 0; if (rtlsdr_nearest_gain(rtl, dev_data->gain, &ngain) != true) { log(LOG_ERR, "Failed to read supported gain list for device #%d\n", dev_data->index); error(); } r = rtlsdr_set_tuner_gain_mode(rtl, 1); r |= rtlsdr_set_tuner_gain(rtl, ngain); if (r < 0) { log(LOG_ERR, "Failed to set gain to %0.2f for device #%d: error %d\n", (float)ngain / 10.f, dev_data->index, r); } else { log(LOG_INFO, "Device #%d: gain set to %0.2f dB\n", dev_data->index, (float)rtlsdr_get_tuner_gain(rtl) / 10.f); } r = rtlsdr_set_agc_mode(rtl, 0); if (r < 0) { log(LOG_ERR, "Failed to disable AGC for device #%d. Error %d.\n", dev_data->index, r); } rtlsdr_reset_buffer(rtl); log(LOG_INFO, "RTLSDR device %d initialized\n", dev_data->index); return 0; } void* rtlsdr_rx_thread(void* ctx) { input_t* input = (input_t*)ctx; rtlsdr_dev_data_t* dev_data = (rtlsdr_dev_data_t*)input->dev_data; assert(dev_data->dev != NULL); input->state = INPUT_RUNNING; if (rtlsdr_read_async(dev_data->dev, rtlsdr_callback, ctx, dev_data->bufcnt, RTLSDR_BUFSIZE) < 0) { log(LOG_ERR, "RTLSDR device #%d: async read failed, disabling\n", dev_data->index); input->state = INPUT_FAILED; } return 0; } int rtlsdr_stop(input_t* const input) { rtlsdr_dev_data_t* dev_data = (rtlsdr_dev_data_t*)input->dev_data; assert(dev_data->dev != NULL); if (rtlsdr_cancel_async(dev_data->dev) < 0) { return -1; } return rtlsdr_close(dev_data->dev); } int rtlsdr_set_centerfreq(input_t* const input, int const centerfreq) { rtlsdr_dev_data_t* dev_data = (rtlsdr_dev_data_t*)input->dev_data; assert(dev_data->dev != NULL); int r = rtlsdr_set_center_freq(dev_data->dev, centerfreq); if (r < 0) { log(LOG_ERR, "Failed to set centerfreq for RTLSDR device #%d: error %d\n", dev_data->index, r); return -1; } return 0; } int rtlsdr_parse_config(input_t* const input, libconfig::Setting& cfg) { rtlsdr_dev_data_t* dev_data = (rtlsdr_dev_data_t*)input->dev_data; if (cfg.exists("serial")) { dev_data->serial = strdup(cfg["serial"]); } else if (cfg.exists("index")) { dev_data->index = (int)cfg["index"]; } else { cerr << "RTLSDR configuration error: no index and no serial number given\n"; error(); } if (cfg.exists("gain")) { if (cfg["gain"].getType() == libconfig::Setting::TypeInt) { // backward compatibility dev_data->gain = (int)cfg["gain"] * 10; } else if (cfg["gain"].getType() == libconfig::Setting::TypeFloat) { dev_data->gain = (int)((float)cfg["gain"] * 10.0f); } } else { cerr << "RTLSDR configuration error: gain is not configured\n"; error(); } if (cfg.exists("correction")) { dev_data->correction = (int)cfg["correction"]; } if (cfg.exists("buffers")) { dev_data->bufcnt = (int)(cfg["buffers"]); if (dev_data->bufcnt < 1) { cerr << "RTLSDR configuration error: buffers must be greater than 0\n"; error(); } } return 0; } MODULE_EXPORT input_t* rtlsdr_input_new() { rtlsdr_dev_data_t* dev_data = (rtlsdr_dev_data_t*)XCALLOC(1, sizeof(rtlsdr_dev_data_t)); dev_data->index = -1; // invalid default receiver index dev_data->gain = -1; // invalid default gain value dev_data->bufcnt = RTLSDR_DEFAULT_LIBUSB_BUFFER_COUNT; /* return &( input_t ){ .dev_data = dev_data, .state = INPUT_UNKNOWN, .sfmt = SFMT_U8, .sample_rate = RTLSDR_DEFAULT_SAMPLE_RATE, .parse_config = &rtlsdr_parse_config, .init = &rtlsdr_init, .run_rx_thread = &rtlsdr_rx_thread, .set_centerfreq = &rtlsdr_set_centerfreq, .stop = &rtlsdr_stop }; */ input_t* input = (input_t*)XCALLOC(1, sizeof(input_t)); input->dev_data = dev_data; input->state = INPUT_UNKNOWN; input->sfmt = SFMT_U8; input->fullscale = (float)SCHAR_MAX - 0.5f; input->bytes_per_sample = sizeof(unsigned char); input->sample_rate = RTLSDR_DEFAULT_SAMPLE_RATE; input->parse_config = &rtlsdr_parse_config; input->init = &rtlsdr_init; input->run_rx_thread = &rtlsdr_rx_thread; input->set_centerfreq = &rtlsdr_set_centerfreq; input->stop = &rtlsdr_stop; return input; } ================================================ FILE: src/input-rtlsdr.h ================================================ /* * input-rtlsdr.h * RTLSDR-specific declarations * * Copyright (c) 2015-2021 Tomasz Lemiech * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ #include // rtlsdr_dev_t #define RTLSDR_BUFSIZE 320000 #define RTLSDR_DEFAULT_LIBUSB_BUFFER_COUNT 10 #define RTLSDR_DEFAULT_SAMPLE_RATE 2560000 typedef struct { rtlsdr_dev_t* dev; // pointer to librtlsdr device struct char* serial; // dongle serial number int index; // dongle index int correction; // PPM correction int gain; // gain in tenths of dB int bufcnt; // libusb buffer count } rtlsdr_dev_data_t; ================================================ FILE: src/input-soapysdr.cpp ================================================ /* * input-soapysdr.cpp * SoapySDR-specific routines * * Copyright (c) 2015-2021 Tomasz Lemiech * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ #include "input-soapysdr.h" // soapysdr_dev_data_t #include // SoapySDRDevice, SoapySDRDevice_makeStrArgs #include // SOAPY_SDR_CS constants #include // SOAPY_SDR_API_VERSION #include #include // SCHAR_MAX, SHRT_MAX #include // round #include // calloc #include // memcpy, strcmp #include // LOG_* macros #include #include // Setting #include "input-common.h" // input_t, sample_format_t, input_state_t, MODULE_EXPORT #include "input-helpers.h" // circbuffer_append #include "rtl_airband.h" // do_exit, fft_size, debug_print, XCALLOC, error() using namespace std; // Map SoapySDR sample format string to our internal sample format // and set bytes_per_sample and fullscale values appropriately. // We prefer U8 and S8 over S16 to minimize CPU load. // If fullscale is > 0, it means it has been read by // SoapySDRDevice_getNativeStreamFormat, so we treat this value as valid. // Otherwise, guess a suitable default value. static bool soapysdr_match_sfmt(input_t* const input, char const* const fmt, double const fullscale) { if (strcmp(fmt, SOAPY_SDR_CU8) == 0) { input->sfmt = SFMT_U8; input->bytes_per_sample = sizeof(unsigned char); input->fullscale = (fullscale > 0 ? fullscale : (float)SCHAR_MAX - 0.5f); goto matched; } else if (strcmp(fmt, SOAPY_SDR_CS8) == 0) { input->sfmt = SFMT_S8; input->bytes_per_sample = sizeof(char); input->fullscale = (fullscale > 0 ? fullscale : (float)SCHAR_MAX - 0.5f); goto matched; } else if (strcmp(fmt, SOAPY_SDR_CS16) == 0) { input->sfmt = SFMT_S16; input->bytes_per_sample = sizeof(short); input->fullscale = (fullscale > 0 ? fullscale : (float)SHRT_MAX - 0.5f); goto matched; } else if (strcmp(fmt, SOAPY_SDR_CF32) == 0) { input->sfmt = SFMT_F32; input->bytes_per_sample = sizeof(float); input->fullscale = (fullscale > 0 ? fullscale : 1.0f); goto matched; } return false; matched: soapysdr_dev_data_t* dev_data = (soapysdr_dev_data_t*)input->dev_data; dev_data->sample_format = strdup(fmt); return true; } // Choose a suitable sample format. // Bail out if no supported sample format is found. static bool soapysdr_choose_sample_format(SoapySDRDevice* const sdr, input_t* const input) { bool ret = false; size_t len = 0; char** formats = NULL; soapysdr_dev_data_t* dev_data = (soapysdr_dev_data_t*)input->dev_data; input->sfmt = SFMT_UNDEF; // First try device's native format to avoid extra conversion double fullscale = 0.0; char* fmt = SoapySDRDevice_getNativeStreamFormat(sdr, SOAPY_SDR_RX, dev_data->channel, &fullscale); if (soapysdr_match_sfmt(input, fmt, fullscale) == true) { log(LOG_NOTICE, "SoapySDR: device '%s': using native sample format '%s' (fullScale=%.1f)\n", dev_data->device_string, fmt, input->fullscale); ret = true; goto end; } // Native format is not supported by rtl_airband; find out if there is anything else. formats = SoapySDRDevice_getStreamFormats(sdr, SOAPY_SDR_RX, dev_data->channel, &len); if (formats == NULL || len == 0) { log(LOG_ERR, "SoapySDR: device '%s': failed to read supported sample formats\n", dev_data->device_string); ret = false; goto end; } for (size_t i = 0; i < len; i++) { if (soapysdr_match_sfmt(input, formats[i], -1.0) == true) { log(LOG_NOTICE, "SoapySDR: device '%s': using non-native sample format '%s' (assuming fullScale=%.1f)\n", dev_data->device_string, formats[i], input->fullscale); ret = true; goto end; } } // Nothing found; we can't use this device. log(LOG_ERR, "SoapySDR: device '%s': no supported sample format found\n", dev_data->device_string); end: return ret; } static int sdrplay_get_nearest_sample_rate(SoapySDRDevice* sdr, int channel, int sample_rate) { size_t len = 0; double sr = (double)sample_rate; SoapySDRRange* range = SoapySDRDevice_getSampleRateRange(sdr, SOAPY_SDR_RX, channel, &len); if (range == NULL) { log(LOG_ERR, "SoapySDR: failed to read supported sampling rate ranges from the device\n"); return -1; } debug_print("Got %zu ranges\n", len); double nearest_rate = range[0].minimum; double offset1, offset2; for (size_t i = 0; i < len; i++) { debug_print("sr=%.1f min=%.1f max=%.1f step=%.1f\n", sr, range[i].minimum, range[i].maximum, range[i].step); if (sr >= range[i].minimum && sr <= range[i].maximum) { debug_print("Found suitable range: min=%.0f max=%0.f step=%0.f\n", range[i].minimum, range[i].maximum, range[i].step); if (range[i].step == 0.0 || range[i].step >= (range[i].maximum - range[i].minimum)) { return (int)(range[i].maximum - sr > sr - range[i].minimum ? range[i].minimum : range[i].maximum); } sr = (int)(range[i].minimum + range[i].step * round((sr - range[i].minimum) / range[i].step)); if (sr > range[i].maximum) { sr = (int)range[i].maximum; } return (int)sr; } else { offset1 = abs(sr - nearest_rate); offset2 = abs(sr - range[i].minimum); if (offset2 < offset1) nearest_rate = range[i].minimum; offset1 = abs(sr - nearest_rate); offset2 = abs(sr - range[i].maximum); if (offset2 < offset1) nearest_rate = range[i].maximum; } } return (int)nearest_rate; } int soapysdr_parse_config(input_t* const input, libconfig::Setting& cfg) { soapysdr_dev_data_t* dev_data = (soapysdr_dev_data_t*)input->dev_data; if (cfg.exists("device_string")) { dev_data->device_string = strdup(cfg["device_string"]); } else { cerr << "SoapySDR configuration error: mandatory parameter missing: device_string\n"; error(); } if (cfg.exists("gain")) { if (cfg["gain"].getType() == libconfig::Setting::TypeInt) { dev_data->gain = (double)((int)cfg["gain"]); } else if (cfg["gain"].getType() == libconfig::Setting::TypeFloat) { dev_data->gain = (double)cfg["gain"]; } else { // Either it's a string or an unsupported type which will cause an exception - this is fine dev_data->gains = SoapySDRKwargs_fromString((const char*)cfg["gain"]); if (dev_data->gains.size < 1) { cerr << "SoapySDR configuration error: device '" << dev_data->device_string << "': gain: syntax error (must be a sequence of 'name1=value1,name2=value2,...')\n"; error(); } } dev_data->agc = false; } else { dev_data->agc = true; } if (cfg.exists("correction")) { if (cfg["correction"].getType() == libconfig::Setting::TypeInt) { dev_data->correction = (double)((int)cfg["correction"]); } else if (cfg["correction"].getType() == libconfig::Setting::TypeFloat) { dev_data->correction = (float)cfg["correction"]; } else { cerr << "SoapySDR configuration error: device '" << dev_data->device_string << "': correction value must be numeric\n"; error(); } } if (cfg.exists("channel")) { dev_data->channel = (size_t)(int)cfg["channel"]; } if (cfg.exists("antenna")) { dev_data->antenna = strdup(cfg["antenna"]); } // Find a suitable sample format and sample rate (unless set in the config) // based on device capabilities. // We have to do this here and not in soapysdr_init, because parse_devices() // requires sample_rate and bytes_per_sample to be set correctly in order to // calculate the size of the sample buffer, which has to be done before // soapysdr_init() is run. SoapySDRDevice* sdr = SoapySDRDevice_makeStrArgs(dev_data->device_string); if (sdr == NULL) { log(LOG_ERR, "Failed to open SoapySDR device '%s': %s\n", dev_data->device_string, SoapySDRDevice_lastError()); error(); } if (soapysdr_choose_sample_format(sdr, input) == false) { cerr << "SoapySDR configuration error: device '" << dev_data->device_string << "': no suitable sample format found\n"; error(); } if (input->sample_rate < 0) { input->sample_rate = sdrplay_get_nearest_sample_rate(sdr, dev_data->channel, SOAPYSDR_DEFAULT_SAMPLE_RATE); if (input->sample_rate < 0) { log(LOG_ERR, "Failed to find a suitable sample rate for SoapySDR device '%s'\n", dev_data->device_string); log(LOG_ERR, "Specify a supported value using \"sample_rate\" option in the device configuration\n"); error(); } } SoapySDRDevice_unmake(sdr); return 0; } int soapysdr_init(input_t* const input) { soapysdr_dev_data_t* dev_data = (soapysdr_dev_data_t*)input->dev_data; dev_data->dev = SoapySDRDevice_makeStrArgs(dev_data->device_string); if (dev_data->dev == NULL) { log(LOG_ERR, "Failed to open SoapySDR device '%s': %s\n", dev_data->device_string, SoapySDRDevice_lastError()); error(); } SoapySDRDevice* sdr = dev_data->dev; if (SoapySDRDevice_setSampleRate(sdr, SOAPY_SDR_RX, dev_data->channel, input->sample_rate) != 0) { log(LOG_ERR, "Failed to set sample rate for SoapySDR device '%s': %s\n", dev_data->device_string, SoapySDRDevice_lastError()); error(); } log(LOG_INFO, "SoapySDR: device '%s': sample rate set to %.0f sps\n", dev_data->device_string, SoapySDRDevice_getSampleRate(sdr, SOAPY_SDR_RX, dev_data->channel)); if (SoapySDRDevice_setFrequency(sdr, SOAPY_SDR_RX, dev_data->channel, input->centerfreq, NULL) != 0) { log(LOG_ERR, "Failed to set frequency for SoapySDR device '%s': %s\n", dev_data->device_string, SoapySDRDevice_lastError()); error(); } if (SoapySDRDevice_setFrequencyCorrection(sdr, SOAPY_SDR_RX, dev_data->channel, dev_data->correction) != 0) { log(LOG_ERR, "Failed to set frequency correction for SoapySDR device '%s': %s\n", dev_data->device_string, SoapySDRDevice_lastError()); error(); } if (dev_data->antenna != NULL) { if (SoapySDRDevice_setAntenna(sdr, SOAPY_SDR_RX, dev_data->channel, dev_data->antenna) != 0) { log(LOG_ERR, "Failed to set antenna to '%s' for SoapySDR device '%s': %s\n", dev_data->device_string, dev_data->antenna, SoapySDRDevice_lastError()); error(); } log(LOG_INFO, "SoapySDR: device '%s': antenna set to '%s'\n", dev_data->device_string, SoapySDRDevice_getAntenna(sdr, SOAPY_SDR_RX, dev_data->channel)); } if (SoapySDRDevice_setGainMode(sdr, SOAPY_SDR_RX, dev_data->channel, dev_data->agc) != 0) { log(LOG_ERR, "Failed to %s AGC for SoapySDR device '%s': %s\n", dev_data->agc ? "enable" : "disable", dev_data->device_string, SoapySDRDevice_lastError()); error(); } log(LOG_INFO, "SoapySDR: device '%s': AGC %s (requested: %s)\n", dev_data->device_string, SoapySDRDevice_getGainMode(sdr, SOAPY_SDR_RX, dev_data->channel) ? "on" : "off", dev_data->agc ? "on" : "off"); if (!dev_data->agc) { if (dev_data->gains.size > 0) { for (size_t i = 0; i < dev_data->gains.size; i++) { char* const key = dev_data->gains.keys[i]; double val = atof(dev_data->gains.vals[i]); if (SoapySDRDevice_setGainElement(sdr, SOAPY_SDR_RX, dev_data->channel, key, val) != 0) { log(LOG_ERR, "Failed to set gain element '%s' for SoapySDR device '%s': %s\n", key, dev_data->device_string, SoapySDRDevice_lastError()); error(); } log(LOG_INFO, "SoapySDR: device '%s': gain '%s' set to %.1f dB\n", dev_data->device_string, key, SoapySDRDevice_getGainElement(sdr, SOAPY_SDR_RX, dev_data->channel, key)); } } else { if (SoapySDRDevice_setGain(sdr, SOAPY_SDR_RX, dev_data->channel, dev_data->gain) != 0) { log(LOG_ERR, "Failed to set gain for SoapySDR device '%s': %s\n", dev_data->device_string, SoapySDRDevice_lastError()); error(); } log(LOG_INFO, "SoapySDR: device '%s': gain set to %.1f dB\n", dev_data->device_string, SoapySDRDevice_getGain(sdr, SOAPY_SDR_RX, dev_data->channel)); } } log(LOG_INFO, "SoapySDR: device '%s' initialized\n", dev_data->device_string); return 0; } void* soapysdr_rx_thread(void* ctx) { input_t* input = (input_t*)ctx; soapysdr_dev_data_t* dev_data = (soapysdr_dev_data_t*)input->dev_data; SoapySDRDevice* sdr = dev_data->dev; assert(sdr != NULL); unsigned char buf[SOAPYSDR_BUFSIZE]; // size of the buffer in number of I/Q sample pairs size_t num_elems = SOAPYSDR_BUFSIZE / (2 * input->bytes_per_sample); SoapySDRStream* rxStream = NULL; #if SOAPY_SDR_API_VERSION < 0x00080000 if (SoapySDRDevice_setupStream(sdr, &rxStream, SOAPY_SDR_RX, dev_data->sample_format, &dev_data->channel, 1, NULL) != 0) { #else if ((rxStream = SoapySDRDevice_setupStream(sdr, SOAPY_SDR_RX, dev_data->sample_format, &dev_data->channel, 1, NULL)) == NULL) { #endif /* SOAPY_SDR_API_VERSION */ log(LOG_ERR, "Failed to set up stream for SoapySDR device '%s': %s\n", dev_data->device_string, SoapySDRDevice_lastError()); input->state = INPUT_FAILED; goto cleanup; } if (SoapySDRDevice_activateStream(sdr, rxStream, 0, 0, 0)) { // start streaming log(LOG_ERR, "Failed to activate stream for SoapySDR device '%s': %s\n", dev_data->device_string, SoapySDRDevice_lastError()); input->state = INPUT_FAILED; goto cleanup; } input->state = INPUT_RUNNING; log(LOG_NOTICE, "SoapySDR: device '%s' started\n", dev_data->device_string); while (!do_exit) { void* bufs[] = {buf}; // array of buffers int flags; // flags set by receive operation long long timeNs; // timestamp for receive buffer int samples_read = SoapySDRDevice_readStream(sdr, rxStream, bufs, num_elems, &flags, &timeNs, SOAPYSDR_READSTREAM_TIMEOUT_US); if (samples_read < 0) { // when it's negative, it's the error code log(LOG_ERR, "SoapySDR device '%s': readStream failed: %s\n", dev_data->device_string, SoapySDR_errToStr(samples_read)); continue; } circbuffer_append(input, buf, (size_t)(samples_read * 2 * input->bytes_per_sample)); } cleanup: SoapySDRDevice_deactivateStream(sdr, rxStream, 0, 0); SoapySDRDevice_closeStream(sdr, rxStream); SoapySDRDevice_unmake(sdr); return 0; } int soapysdr_set_centerfreq(input_t* const input, int const centerfreq) { soapysdr_dev_data_t* dev_data = (soapysdr_dev_data_t*)input->dev_data; assert(dev_data->dev != NULL); if (SoapySDRDevice_setFrequency(dev_data->dev, SOAPY_SDR_RX, dev_data->channel, centerfreq, NULL) != 0) { log(LOG_ERR, "Failed to set frequency for SoapySDR device '%s': %s\n", dev_data->device_string, SoapySDRDevice_lastError()); return -1; } return 0; } MODULE_EXPORT input_t* soapysdr_input_new() { soapysdr_dev_data_t* dev_data = (soapysdr_dev_data_t*)XCALLOC(1, sizeof(soapysdr_dev_data_t)); dev_data->gain = -1.0; // invalid default gain value dev_data->agc = false; memset(&dev_data->gains, 0, sizeof(dev_data->gains)); dev_data->channel = 0; dev_data->antenna = NULL; /* return &( input_t ){ .dev_data = dev_data, .state = INPUT_UNKNOWN, .sfmt = SFMT_U8, .sample_rate = -1, .parse_config = &soapysdr_parse_config, .init = &soapysdr_init, .run_rx_thread = &soapysdr_rx_thread, .set_centerfreq = &soapysdr_set_centerfreq, .stop = &soapysdr_stop }; */ input_t* input = (input_t*)XCALLOC(1, sizeof(input_t)); input->dev_data = dev_data; // invalid values as defaults input->state = INPUT_UNKNOWN; input->sfmt = SFMT_UNDEF; input->fullscale = 0.0f; input->bytes_per_sample = 0; input->sample_rate = -1; input->parse_config = &soapysdr_parse_config; input->init = &soapysdr_init; input->run_rx_thread = &soapysdr_rx_thread; input->set_centerfreq = &soapysdr_set_centerfreq; input->stop = NULL; return input; } ================================================ FILE: src/input-soapysdr.h ================================================ /* * input-soapysdr.h * SoapySDR-specific declarations * * Copyright (c) 2015-2021 Tomasz Lemiech * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ #include // SoapySDRDevice #include // SoapySDRKwargs #define SOAPYSDR_DEFAULT_SAMPLE_RATE 2560000 #define SOAPYSDR_BUFSIZE 320000 #define SOAPYSDR_READSTREAM_TIMEOUT_US 1000000L typedef struct { SoapySDRDevice* dev; // pointer to device struct char const* device_string; // SoapySDR device arg string char const* sample_format; // sample format char const* antenna; // antenna name SoapySDRKwargs gains; // gain elements and their values double correction; // PPM correction double gain; // gain in dB size_t channel; // HW channel number bool agc; // enable AGC } soapysdr_dev_data_t; ================================================ FILE: src/logging.cpp ================================================ /* * logging.cpp * * Copyright (C) 2022-2023 charlie-foxtrot * Copyright (c) 2015-2022 Tomasz Lemiech * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ #include // va_start() / va_end() #include // fopen() #include // strerror() #include // cerr() #include "logging.h" LogDestination log_destination = SYSLOG; FILE* debugf = NULL; void error() { close_debug(); _Exit(1); } void init_debug(const char* file) { #ifdef DEBUG if (!file) return; if ((debugf = fopen(file, "a")) == NULL) { std::cerr << "Could not open debug file " << file << ": " << strerror(errno) << "\n"; error(); } #else UNUSED(file); #endif /* DEBUG */ } void close_debug() { #ifdef DEBUG if (!debugf) return; fclose(debugf); #endif /* DEBUG */ } void log(int priority, const char* format, ...) { va_list args; va_start(args, format); switch (log_destination) { case SYSLOG: vsyslog(priority, format, args); break; case STDERR: vfprintf(stderr, format, args); break; case NONE: break; } va_end(args); } ================================================ FILE: src/logging.h ================================================ /* * logging.h * * Copyright (C) 2022-2023 charlie-foxtrot * Copyright (c) 2015-2022 Tomasz Lemiech * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ #ifndef _LOGGING_H #define _LOGGING_H 1 #include // LOG_ERR #include // FILE #define nop() \ do { \ } while (0) #define UNUSED(x) (void)(x) #ifdef DEBUG #define DEBUG_PATH "rtl_airband_debug.log" #define debug_print(fmt, ...) \ do { \ fprintf(debugf, "%s(): " fmt, __func__, __VA_ARGS__); \ fflush(debugf); \ } while (0) #define debug_bulk_print(fmt, ...) \ do { \ fprintf(debugf, "%s(): " fmt, __func__, __VA_ARGS__); \ } while (0) #else #define debug_print(fmt, ...) nop() #define debug_bulk_print(fmt, ...) nop() #endif /* DEBUG */ enum LogDestination { SYSLOG, STDERR, NONE }; extern LogDestination log_destination; extern FILE* debugf; void error(); void init_debug(const char* file); void close_debug(); void log(int priority, const char* format, ...); #endif /* _LOGGING_H */ ================================================ FILE: src/mixer.cpp ================================================ /* * mixer.cpp * Mixer related routines * * Copyright (c) 2015-2021 Tomasz Lemiech * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ #include #include #include #include #include #include #include #include "config.h" #include "rtl_airband.h" static char* err; static inline void mixer_set_error(const char* msg) { err = strdup(msg); } const char* mixer_get_error() { return (const char*)err; } mixer_t* getmixerbyname(const char* name) { for (int i = 0; i < mixer_count; i++) { if (!strcmp(mixers[i].name, name)) { debug_print("%s found at %d\n", name, i); return &mixers[i]; } } debug_print("%s not found\n", name); return NULL; } void mixer_disable(mixer_t* mixer) { mixer->enabled = false; disable_channel_outputs(&mixer->channel); } int mixer_connect_input(mixer_t* mixer, float ampfactor, float balance) { if (!mixer) { mixer_set_error("mixer is undefined"); return (-1); } int i = mixer->input_count; // allocate new mixer - this could be more efficient by pre-allocating but this // is only run at startup so not a big deal if (mixer->inputs == NULL) { mixer->inputs = (mixinput_t*)XCALLOC(i + 1, sizeof(struct mixinput_t)); mixer->inputs_todo = (bool*)XCALLOC(i + 1, sizeof(bool)); mixer->input_mask = (bool*)XCALLOC(i + 1, sizeof(bool)); } else { mixer->inputs = (mixinput_t*)XREALLOC(mixer->inputs, (i + 1) * sizeof(struct mixinput_t)); mixer->inputs_todo = (bool*)XREALLOC(mixer->inputs_todo, (i + 1) * sizeof(bool)); mixer->input_mask = (bool*)XREALLOC(mixer->input_mask, (i + 1) * sizeof(bool)); } mixer->inputs[i].wavein = (float*)XCALLOC(WAVE_LEN, sizeof(float)); if ((pthread_mutex_init(&mixer->inputs[i].mutex, NULL)) != 0) { mixer_set_error("failed to initialize input mutex"); return (-1); } mixer->inputs[i].ampfactor = ampfactor; mixer->inputs[i].ampl = fminf(1.0f, 1.0f - balance); mixer->inputs[i].ampr = fminf(1.0f, 1.0f + balance); if (balance != 0.0f) mixer->channel.mode = MM_STEREO; mixer->inputs[i].ready = false; mixer->inputs[i].has_signal = false; mixer->inputs[i].input_overrun_count = 0; mixer->input_mask[i] = true; mixer->inputs_todo[i] = true; mixer->enabled = true; debug_print("ampfactor=%.1f ampl=%.1f ampr=%.1f\n", mixer->inputs[i].ampfactor, mixer->inputs[i].ampl, mixer->inputs[i].ampr); return (mixer->input_count++); } void mixer_disable_input(mixer_t* mixer, int input_idx) { assert(mixer); assert(input_idx < mixer->input_count); mixer->input_mask[input_idx] = false; // break out if any inputs remain true for (int i = 0; i < mixer->input_count; i++) { if (mixer->input_mask[i]) { return; } } // all inputs are false so disable the mixer log(LOG_NOTICE, "Disabling mixer '%s' - all inputs died\n", mixer->name); mixer_disable(mixer); } void mixer_put_samples(mixer_t* mixer, int input_idx, const float* samples, bool has_signal, unsigned int len) { assert(mixer); assert(samples); assert(input_idx < mixer->input_count); mixinput_t* input = &mixer->inputs[input_idx]; pthread_mutex_lock(&input->mutex); input->has_signal = has_signal; if (has_signal) { memcpy(input->wavein, samples, len * sizeof(float)); } if (input->ready == true) { debug_print("input %d overrun\n", input_idx); input->input_overrun_count++; } else { input->ready = true; } pthread_mutex_unlock(&input->mutex); } void mix_waveforms(float* sum, const float* in, float mult, int size) { if (mult == 0.0f) { return; } for (int s = 0; s < size; s++) { sum[s] += in[s] * mult; } } /* Samples are delivered to mixer inputs in batches of WAVE_BATCH size (default 1000, ie. 1/8 secs * of audio). mixer_thread emits mixed audio in batches of the same size, but the loop runs * twice more often (MIX_DIVISOR = 2) in order to accomodate for any possible input jitter * caused by irregular process scheduling, RTL clock instability, etc. For this purpose * we allow each input batch to become delayed by 1/16 secs (max). This is accomplished by * the mixer->interval counter, which counts from 2 to 0: * - 2 - initial state after mixed audio output. We don't expect inputs to be ready yet, * but we check their readiness anyway. * - 1 - here we expect most (if not all) inputs to be ready, so we mix them. If there are no * inputs left to handle in this WAVE_BATCH interval, we emit the mixed audio and reset * mixer->interval to the initial state (2). * - 0 - here we expect to get output from all delayed inputs, which were not ready in the * interval. Any input which is still not ready, is skipped (filled with 0s), because * here we must emit the mixed audio to keep the desired audio bitrate. */ void* mixer_thread(void* param) { assert(param != NULL); Signal* signal = (Signal*)param; int interval_usec = 1e+6 * WAVE_BATCH / WAVE_RATE / MIX_DIVISOR; debug_print("Starting mixer thread, signal %p\n", signal); if (mixer_count <= 0) return 0; #ifdef DEBUG struct timeval ts, te; gettimeofday(&ts, NULL); #endif /* DEBUG */ while (!do_exit) { usleep(interval_usec); if (do_exit) return 0; for (int i = 0; i < mixer_count; i++) { mixer_t* mixer = mixers + i; if (mixer->enabled == false) continue; channel_t* channel = &mixer->channel; if (channel->state == CH_READY) { // previous output not yet handled by output thread if (--mixer->interval > 0) { continue; } else { debug_print("mixer[%d]: output channel overrun\n", i); mixer->output_overrun_count++; } } for (int j = 0; j < mixer->input_count; j++) { mixinput_t* input = mixer->inputs + j; pthread_mutex_lock(&input->mutex); if (mixer->inputs_todo[j] && mixer->input_mask[j] && input->ready) { if (channel->state == CH_DIRTY) { memset(channel->waveout, 0, WAVE_BATCH * sizeof(float)); if (channel->mode == MM_STEREO) memset(channel->waveout_r, 0, WAVE_BATCH * sizeof(float)); channel->axcindicate = NO_SIGNAL; channel->state = CH_WORKING; } debug_bulk_print("mixer[%d]: ampleft=%.1f ampright=%.1f\n", i, input->ampfactor * input->ampl, input->ampfactor * input->ampr); if (input->has_signal) { /* left channel */ mix_waveforms(channel->waveout, input->wavein, input->ampfactor * input->ampl, WAVE_BATCH); /* right channel */ if (channel->mode == MM_STEREO) { mix_waveforms(channel->waveout_r, input->wavein, input->ampfactor * input->ampr, WAVE_BATCH); } channel->axcindicate = SIGNAL; } input->ready = false; mixer->inputs_todo[j] = false; } pthread_mutex_unlock(&input->mutex); } // check if all "good" inputs have been handled. this means there is no enabled mixer (mixer->input_mask is true) that has a // input to handle (mixer->inputs_todo is true) bool all_good_inputs_handled = true; for (int k = 0; k < mixer->input_count && all_good_inputs_handled; k++) { if (mixer->inputs_todo[k] && mixer->input_mask[k]) { all_good_inputs_handled = false; } } if ((all_good_inputs_handled == true) || mixer->interval == 0) { // all good inputs handled or last interval passed #ifdef DEBUG gettimeofday(&te, NULL); char* inputs_todo_char = (char*)XCALLOC(mixer->input_count + 1, sizeof(char)); char* input_mask_char = (char*)XCALLOC(mixer->input_count + 1, sizeof(char)); for (int k = 0; k < mixer->input_count; k++) { inputs_todo_char[k] = mixer->inputs_todo[k] ? '+' : '-'; input_mask_char[k] = mixer->input_mask[k] ? '+' : '-'; } inputs_todo_char[mixer->input_count] = '\0'; input_mask_char[mixer->input_count] = '\0'; debug_bulk_print("mixerinput: %lu.%lu %lu int=%d inp_unhandled=%s inp_mask=%s\n", te.tv_sec, (unsigned long)te.tv_usec, (te.tv_sec - ts.tv_sec) * 1000000UL + te.tv_usec - ts.tv_usec, mixer->interval, inputs_todo_char, input_mask_char); free(inputs_todo_char); free(input_mask_char); ts.tv_sec = te.tv_sec; ts.tv_usec = te.tv_usec; #endif /* DEBUG */ channel->state = CH_READY; signal->send(); mixer->interval = MIX_DIVISOR; for (int k = 0; k < mixer->input_count; k++) { mixer->inputs_todo[k] = true; } } else { mixer->interval--; } } } return 0; } ================================================ FILE: src/output.cpp ================================================ /* * output.cpp * Output related routines * * Copyright (c) 2015-2021 Tomasz Lemiech * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ #include #include #include #include #include #include #include #include #include // SHOUTERR_RETRY is available since libshout 2.4.0. // Set it to an impossible value if it's not there. #ifndef SHOUTERR_RETRY #define SHOUTERR_RETRY (-255) #endif /* SHOUTERR_RETRY */ #include #ifdef WITH_PULSEAUDIO #include #endif /* WITH_PULSEAUDIO */ #include #include #include #include #include #include #include #include #include "config.h" #include "helper_functions.h" #include "input-common.h" #include "rtl_airband.h" void shout_setup(icecast_data* icecast, mix_modes mixmode) { int ret; shout_t* shouttemp = shout_new(); if (shouttemp == NULL) { printf("cannot allocate\n"); } if (shout_set_host(shouttemp, icecast->hostname) != SHOUTERR_SUCCESS) { shout_free(shouttemp); return; } if (shout_set_protocol(shouttemp, SHOUT_PROTOCOL_HTTP) != SHOUTERR_SUCCESS) { shout_free(shouttemp); return; } if (shout_set_port(shouttemp, icecast->port) != SHOUTERR_SUCCESS) { shout_free(shouttemp); return; } #ifdef LIBSHOUT_HAS_TLS if (shout_set_tls(shouttemp, icecast->tls_mode) != SHOUTERR_SUCCESS) { shout_free(shouttemp); return; } #endif /* LIBSHOUT_HAS_TLS */ char mp[100]; sprintf(mp, "/%s", icecast->mountpoint); if (shout_set_mount(shouttemp, mp) != SHOUTERR_SUCCESS) { shout_free(shouttemp); return; } if (shout_set_user(shouttemp, icecast->username) != SHOUTERR_SUCCESS) { shout_free(shouttemp); return; } if (shout_set_password(shouttemp, icecast->password) != SHOUTERR_SUCCESS) { shout_free(shouttemp); return; } #ifdef LIBSHOUT_HAS_CONTENT_FORMAT if (shout_set_content_format(shouttemp, SHOUT_FORMAT_MP3, SHOUT_USAGE_AUDIO, NULL) != SHOUTERR_SUCCESS) { #else if (shout_set_format(shouttemp, SHOUT_FORMAT_MP3) != SHOUTERR_SUCCESS) { #endif /* LIBSHOUT_HAS_CONTENT_FORMAT */ shout_free(shouttemp); return; } if (icecast->name && shout_set_meta(shouttemp, SHOUT_META_NAME, icecast->name) != SHOUTERR_SUCCESS) { shout_free(shouttemp); return; } if (icecast->genre && shout_set_meta(shouttemp, SHOUT_META_GENRE, icecast->genre) != SHOUTERR_SUCCESS) { shout_free(shouttemp); return; } if (icecast->description && shout_set_meta(shouttemp, SHOUT_META_DESCRIPTION, icecast->description) != SHOUTERR_SUCCESS) { shout_free(shouttemp); return; } char samplerates[20]; sprintf(samplerates, "%d", MP3_RATE); shout_set_audio_info(shouttemp, SHOUT_AI_SAMPLERATE, samplerates); shout_set_audio_info(shouttemp, SHOUT_AI_CHANNELS, (mixmode == MM_STEREO ? "2" : "1")); if (shout_set_nonblocking(shouttemp, 1) != SHOUTERR_SUCCESS) { log(LOG_ERR, "Error setting non-blocking mode: %s\n", shout_get_error(shouttemp)); return; } ret = shout_open(shouttemp); if (ret == SHOUTERR_SUCCESS) ret = SHOUTERR_CONNECTED; if (ret == SHOUTERR_BUSY || ret == SHOUTERR_RETRY) log(LOG_NOTICE, "Connecting to %s:%d/%s...\n", icecast->hostname, icecast->port, icecast->mountpoint); int shout_timeout = 30 * 5; // 30 * 5 * 200ms = 30s while ((ret == SHOUTERR_BUSY || ret == SHOUTERR_RETRY) && shout_timeout-- > 0) { SLEEP(200); ret = shout_get_connected(shouttemp); } if (ret == SHOUTERR_CONNECTED) { log(LOG_NOTICE, "Connected to %s:%d/%s\n", icecast->hostname, icecast->port, icecast->mountpoint); SLEEP(100); icecast->shout = shouttemp; } else { log(LOG_WARNING, "Could not connect to %s:%d/%s: %s\n", icecast->hostname, icecast->port, icecast->mountpoint, shout_get_error(shouttemp)); shout_close(shouttemp); shout_free(shouttemp); return; } } lame_t airlame_init(mix_modes mixmode, int highpass, int lowpass) { lame_t lame = lame_init(); if (!lame) { log(LOG_WARNING, "lame_init failed\n"); return NULL; } lame_set_in_samplerate(lame, WAVE_RATE); lame_set_VBR(lame, vbr_mtrh); lame_set_brate(lame, 16); lame_set_quality(lame, 7); lame_set_lowpassfreq(lame, lowpass); lame_set_highpassfreq(lame, highpass); lame_set_out_samplerate(lame, MP3_RATE); if (mixmode == MM_STEREO) { lame_set_num_channels(lame, 2); lame_set_mode(lame, JOINT_STEREO); } else { lame_set_num_channels(lame, 1); lame_set_mode(lame, MONO); } debug_print("lame init with mixmode=%s\n", mixmode == MM_STEREO ? "MM_STEREO" : "MM_MONO"); lame_init_params(lame); return lame; } class LameTone { unsigned char* _data; int _bytes; public: LameTone(mix_modes mixmode, int msec, unsigned int hz = 0) : _data(NULL), _bytes(0) { _data = (unsigned char*)XCALLOC(1, LAMEBUF_SIZE); int samples = (msec * WAVE_RATE) / 1000; float* buf = (float*)XCALLOC(samples, sizeof(float)); debug_print("LameTone with mixmode=%s msec=%d hz=%u\n", mixmode == MM_STEREO ? "MM_STEREO" : "MM_MONO", msec, hz); if (hz > 0) { const float period = 1.0 / (float)hz; const float sample_time = 1.0 / (float)WAVE_RATE; float t = 0; for (int i = 0; i < samples; ++i, t += sample_time) { buf[i] = 0.9 * sinf(t * 2.0 * M_PI / period); } } else memset(buf, 0, samples * sizeof(float)); lame_t lame = airlame_init(mixmode, 0, 0); if (lame) { _bytes = lame_encode_buffer_ieee_float(lame, buf, (mixmode == MM_STEREO ? buf : NULL), samples, _data, LAMEBUF_SIZE); if (_bytes > 0) { int flush_ofs = _bytes; if (flush_ofs & 0x1f) flush_ofs += 0x20 - (flush_ofs & 0x1f); if (flush_ofs < LAMEBUF_SIZE) { int flush_bytes = lame_encode_flush(lame, _data + flush_ofs, LAMEBUF_SIZE - flush_ofs); if (flush_bytes > 0) { memmove(_data + _bytes, _data + flush_ofs, flush_bytes); _bytes += flush_bytes; } } } else log(LOG_WARNING, "lame_encode_buffer_ieee_float: %d\n", _bytes); lame_close(lame); } free(buf); } ~LameTone() { if (_data) free(_data); } int write(FILE* f) { if (!_data || _bytes <= 0) return 1; if (fwrite(_data, 1, _bytes, f) != (unsigned int)_bytes) { log(LOG_WARNING, "LameTone: failed to write %d bytes\n", _bytes); return -1; } return 0; } }; int rename_if_exists(char const* oldpath, char const* newpath) { int ret = rename(oldpath, newpath); if (ret < 0) { if (errno == ENOENT) { return 0; } else { log(LOG_ERR, "Could not rename %s to %s: %s\n", oldpath, newpath, strerror(errno)); } } return ret; } /* * Open output file (mp3 or raw IQ) for append or initial write. * If appending to an audio file, insert discontinuity indictor tones * as well as the appropriate amount of silence when in continuous mode. */ static int open_file(file_data* fdata, mix_modes mixmode, int is_audio) { int rename_result = rename_if_exists(fdata->file_path.c_str(), fdata->file_path_tmp.c_str()); fdata->f = fopen(fdata->file_path_tmp.c_str(), fdata->append ? "a+" : "w"); if (fdata->f == NULL) { return -1; } struct stat st = {}; if (!fdata->append || fstat(fileno(fdata->f), &st) != 0 || st.st_size == 0) { if (!fdata->split_on_transmission) { log(LOG_INFO, "Writing to %s\n", fdata->file_path.c_str()); } else { debug_print("Writing to %s\n", fdata->file_path_tmp.c_str()); } return 0; } if (rename_result < 0) { log(LOG_INFO, "Writing to %s\n", fdata->file_path.c_str()); debug_print("Writing to %s\n", fdata->file_path_tmp.c_str()); } else { log(LOG_INFO, "Appending from pos %llu to %s\n", (unsigned long long)st.st_size, fdata->file_path.c_str()); debug_print("Appending from pos %llu to %s\n", (unsigned long long)st.st_size, fdata->file_path_tmp.c_str()); } if (is_audio) { // fill missing space with marker tones LameTone lt_a(mixmode, 120, 2222); LameTone lt_b(mixmode, 120, 1111); LameTone lt_c(mixmode, 120, 555); int r = lt_a.write(fdata->f); if (r == 0) r = lt_b.write(fdata->f); if (r == 0) r = lt_c.write(fdata->f); // fill in time delta with silence if continuous output mode if (fdata->continuous) { time_t now = time(NULL); if (now > st.st_mtime) { time_t delta = now - st.st_mtime; if (delta > 3600) { log(LOG_WARNING, "Too big time difference: %llu sec, limiting to one hour\n", (unsigned long long)delta); delta = 3600; } LameTone lt_silence(mixmode, 1000); for (; (r == 0 && delta > 1); --delta) r = lt_silence.write(fdata->f); } } if (r == 0) r = lt_c.write(fdata->f); if (r == 0) r = lt_b.write(fdata->f); if (r == 0) r = lt_a.write(fdata->f); if (r < 0) fseek(fdata->f, st.st_size, SEEK_SET); } return 0; } static void close_file(output_t* output) { file_data* fdata = (file_data*)(output->data); if (!fdata) { return; } // close all mp3 files for every output that has a lame context if (fdata->type == O_FILE && fdata->f && output->lame) { int encoded = lame_encode_flush_nogap(output->lame, output->lamebuf, LAMEBUF_SIZE); debug_print("closing file %s flushed %d\n", fdata->file_path.c_str(), encoded); if (encoded > 0) { size_t written = fwrite((void*)output->lamebuf, 1, (size_t)encoded, fdata->f); if (written == 0 || written < (size_t)encoded) log(LOG_WARNING, "Problem writing %s (%s)\n", fdata->file_path.c_str(), strerror(errno)); } // write the lametag to the beginning of the file const int lametag_size = lame_get_lametag_frame(output->lame, output->lamebuf, LAMEBUF_SIZE); fseek(fdata->f, 0, SEEK_SET); fwrite(output->lamebuf, 1, lametag_size, fdata->f); } if (fdata->f) { fclose(fdata->f); fdata->f = NULL; rename_if_exists(fdata->file_path_tmp.c_str(), fdata->file_path.c_str()); } fdata->file_path.clear(); fdata->file_path_tmp.clear(); } /* * Close current output file based on certain conditions: * If "split_on_transmission" mode is true check: * If current duration too long, or we've been idle too long * else (append or continuous) check: * if hour is different. */ static void close_if_necessary(output_t* output) { file_data* fdata = (file_data*)(output->data); static const double MIN_TRANSMISSION_TIME_SEC = 1.0; static const double MAX_TRANSMISSION_TIME_SEC = 60.0 * 60.0; static const double MAX_TRANSMISSION_IDLE_SEC = 0.5; if (!fdata || !fdata->f) { return; } timeval current_time; gettimeofday(¤t_time, NULL); if (fdata->split_on_transmission) { double duration_sec = delta_sec(&fdata->open_time, ¤t_time); double idle_sec = delta_sec(&fdata->last_write_time, ¤t_time); if (duration_sec > MAX_TRANSMISSION_TIME_SEC || (duration_sec > MIN_TRANSMISSION_TIME_SEC && idle_sec > MAX_TRANSMISSION_IDLE_SEC)) { debug_print("closing file %s, duration %f sec, idle %f sec\n", fdata->file_path.c_str(), duration_sec, idle_sec); close_file(output); } return; } // Check if the hour boundary was just crossed. NOTE: Actual hour number doesn't matter but still // need to use localtime if enabled (some timezones have partial hour offsets) int start_hour; int current_hour; if (use_localtime) { start_hour = localtime(&(fdata->open_time.tv_sec))->tm_hour; current_hour = localtime(¤t_time.tv_sec)->tm_hour; } else { start_hour = gmtime(&(fdata->open_time.tv_sec))->tm_hour; current_hour = gmtime(¤t_time.tv_sec)->tm_hour; } if (start_hour != current_hour) { debug_print("closing file %s after crossing hour boundary\n", fdata->file_path.c_str()); close_file(output); } } /* * For a particular channel file output, check if there is a file currently open. * If so, that file may need to be flushed and closed. * * If the existing open file is good for continued use, return true. * Otherwise, create a file name based on the current timestamp and * open that new file. If that file open succeeded, return true. */ static bool output_file_ready(channel_t* channel, output_t* output) { file_data* fdata = (file_data*)(output->data); if (!fdata) { return false; } close_if_necessary(output); if (fdata->f) { // still open return true; } timeval current_time; gettimeofday(¤t_time, NULL); struct tm* time; if (use_localtime) { time = localtime(¤t_time.tv_sec); } else { time = gmtime(¤t_time.tv_sec); } char timestamp[32]; if (strftime(timestamp, sizeof(timestamp), fdata->split_on_transmission ? "_%Y%m%d_%H%M%S" : "_%Y%m%d_%H", time) == 0) { log(LOG_NOTICE, "strftime returned 0\n"); return false; } std::string output_dir; if (fdata->dated_subdirectories) { output_dir = make_dated_subdirs(fdata->basedir, time); if (output_dir.empty()) { log(LOG_ERR, "Failed to create dated subdirectory\n"); return false; } } else { output_dir = fdata->basedir; make_dir(output_dir); } // use a string stream to build the output filepath std::stringstream ss; ss << output_dir << '/' << fdata->basename << timestamp; if (fdata->include_freq) { ss << '_' << channel->freqlist[channel->freq_idx].frequency; } ss << fdata->suffix; fdata->file_path = ss.str(); fdata->file_path_tmp = fdata->file_path + ".tmp"; fdata->open_time = fdata->last_write_time = current_time; const int is_audio = output->type == O_RAWFILE ? 0 : 1; if (open_file(fdata, channel->mode, is_audio) < 0) { log(LOG_WARNING, "Cannot open output file %s (%s)\n", fdata->file_path_tmp.c_str(), strerror(errno)); return false; } return true; } // Create all the output for a particular channel. void process_outputs(channel_t* channel, int cur_scan_freq) { for (int k = 0; k < channel->output_count; k++) { if (channel->outputs[k].enabled == false) continue; if (channel->outputs[k].type == O_ICECAST) { icecast_data* icecast = (icecast_data*)(channel->outputs[k].data); if (icecast->shout == NULL) continue; // encode and send mp3 to shoutcast output const auto& lame = channel->outputs[k].lame; const auto& lamebuf = channel->outputs[k].lamebuf; int mp3_bytes = lame_encode_buffer_ieee_float(lame, channel->waveout, (channel->mode == MM_STEREO ? channel->waveout_r : NULL), WAVE_BATCH, lamebuf, LAMEBUF_SIZE); if (mp3_bytes < 0) { log(LOG_WARNING, "lame_encode_buffer_ieee_float: %d\n", mp3_bytes); } if (mp3_bytes == 0) { continue; } int ret = shout_send(icecast->shout, channel->outputs[k].lamebuf, mp3_bytes); if (ret != SHOUTERR_SUCCESS || shout_queuelen(icecast->shout) > MAX_SHOUT_QUEUELEN) { if (shout_queuelen(icecast->shout) > MAX_SHOUT_QUEUELEN) log(LOG_WARNING, "Exceeded max backlog for %s:%d/%s, disconnecting\n", icecast->hostname, icecast->port, icecast->mountpoint); // reset connection log(LOG_WARNING, "Lost connection to %s:%d/%s\n", icecast->hostname, icecast->port, icecast->mountpoint); shout_close(icecast->shout); shout_free(icecast->shout); icecast->shout = NULL; } else if (icecast->send_scan_freq_tags && cur_scan_freq >= 0) { shout_metadata_t* meta = shout_metadata_new(); char description[32]; if (channel->freqlist[channel->freq_idx].label != NULL) { if (shout_metadata_add(meta, "song", channel->freqlist[channel->freq_idx].label) != SHOUTERR_SUCCESS) { log(LOG_WARNING, "Failed to add shout metadata\n"); } } else { snprintf(description, sizeof(description), "%.3f MHz", channel->freqlist[channel->freq_idx].frequency / 1000000.0); if (shout_metadata_add(meta, "song", description) != SHOUTERR_SUCCESS) { log(LOG_WARNING, "Failed to add shout metadata\n"); } } if (SHOUT_SET_METADATA(icecast->shout, meta) != SHOUTERR_SUCCESS) { log(LOG_WARNING, "Failed to add shout metadata\n"); } shout_metadata_free(meta); } } else if (channel->outputs[k].type == O_FILE || channel->outputs[k].type == O_RAWFILE) { file_data* fdata = (file_data*)(channel->outputs[k].data); if (fdata->continuous == false && channel->axcindicate == NO_SIGNAL && channel->outputs[k].active == false) { close_if_necessary(&channel->outputs[k]); continue; } if (!output_file_ready(channel, &channel->outputs[k])) { log(LOG_WARNING, "Output disabled\n"); channel->outputs[k].enabled = false; continue; }; // encode mp3 bytes if O_FILE const auto& lame = channel->outputs[k].lame; const auto& lamebuf = channel->outputs[k].lamebuf; int mp3_bytes = 0; if (channel->outputs[k].type == O_FILE) { mp3_bytes = lame_encode_buffer_ieee_float(lame, channel->waveout, (channel->mode == MM_STEREO ? channel->waveout_r : NULL), WAVE_BATCH, lamebuf, LAMEBUF_SIZE); if (mp3_bytes < 0) { log(LOG_WARNING, "lame_encode_buffer_ieee_float: %d\n", mp3_bytes); } if (mp3_bytes <= 0) { continue; } } size_t buflen = 0, written = 0; if (channel->outputs[k].type == O_FILE) { buflen = (size_t)mp3_bytes; written = fwrite(lamebuf, 1, buflen, fdata->f); } else if (channel->outputs[k].type == O_RAWFILE) { buflen = 2 * sizeof(float) * WAVE_BATCH; written = fwrite(channel->iq_out, 1, buflen, fdata->f); } if (written < buflen) { if (ferror(fdata->f)) log(LOG_WARNING, "Cannot write to %s (%s), output disabled\n", fdata->file_path.c_str(), strerror(errno)); else log(LOG_WARNING, "Short write on %s, output disabled\n", fdata->file_path.c_str()); close_file(&channel->outputs[k]); channel->outputs[k].enabled = false; } channel->outputs[k].active = (channel->axcindicate != NO_SIGNAL); gettimeofday(&fdata->last_write_time, NULL); } else if (channel->outputs[k].type == O_MIXER) { mixer_data* mdata = (mixer_data*)(channel->outputs[k].data); mixer_put_samples(mdata->mixer, mdata->input, channel->waveout, channel->axcindicate != NO_SIGNAL, WAVE_BATCH); } else if (channel->outputs[k].type == O_UDP_STREAM) { udp_stream_data* sdata = (udp_stream_data*)channel->outputs[k].data; if (sdata->continuous == false && channel->axcindicate == NO_SIGNAL) { continue; } if (channel->mode == MM_MONO) { udp_stream_write(sdata, channel->waveout, (size_t)WAVE_BATCH * sizeof(float)); } else { udp_stream_write(sdata, channel->waveout, channel->waveout_r, (size_t)WAVE_BATCH * sizeof(float)); } #ifdef WITH_PULSEAUDIO } else if (channel->outputs[k].type == O_PULSE) { pulse_data* pdata = (pulse_data*)(channel->outputs[k].data); if (pdata->continuous == false && channel->axcindicate == NO_SIGNAL) continue; pulse_write_stream(pdata, channel->mode, channel->waveout, channel->waveout_r, (size_t)WAVE_BATCH * sizeof(float)); #endif /* WITH_PULSEAUDIO */ } } } void disable_channel_outputs(channel_t* channel) { for (int k = 0; k < channel->output_count; k++) { output_t* output = channel->outputs + k; output->enabled = false; if (output->type == O_ICECAST) { icecast_data* icecast = (icecast_data*)(channel->outputs[k].data); if (icecast->shout == NULL) continue; log(LOG_WARNING, "Closing connection to %s:%d/%s\n", icecast->hostname, icecast->port, icecast->mountpoint); shout_close(icecast->shout); shout_free(icecast->shout); icecast->shout = NULL; } else if (output->type == O_FILE || output->type == O_RAWFILE) { close_file(&channel->outputs[k]); } else if (output->type == O_MIXER) { mixer_data* mdata = (mixer_data*)(output->data); mixer_disable_input(mdata->mixer, mdata->input); } else if (output->type == O_UDP_STREAM) { udp_stream_data* sdata = (udp_stream_data*)output->data; udp_stream_shutdown(sdata); #ifdef WITH_PULSEAUDIO } else if (output->type == O_PULSE) { pulse_data* pdata = (pulse_data*)(output->data); pulse_shutdown(pdata); #endif /* WITH_PULSEAUDIO */ } } } void disable_device_outputs(device_t* dev) { log(LOG_INFO, "Disabling device outputs\n"); for (int j = 0; j < dev->channel_count; j++) { disable_channel_outputs(dev->channels + j); } } static void print_channel_metric(FILE* f, char const* name, float freq, char* label) { fprintf(f, "%s{freq=\"%.3f\"", name, freq / 1000000.0); if (label != NULL) { fprintf(f, ",label=\"%s\"", label); } fprintf(f, "}"); } static void output_channel_noise_levels(FILE* f) { fprintf(f, "# HELP channel_noise_level Raw squelch noise_level.\n" "# TYPE channel_noise_level gauge\n"); for (int i = 0; i < device_count; i++) { device_t* dev = devices + i; for (int j = 0; j < dev->channel_count; j++) { channel_t* channel = devices[i].channels + j; for (int k = 0; k < channel->freq_count; k++) { print_channel_metric(f, "channel_noise_level", channel->freqlist[k].frequency, channel->freqlist[k].label); fprintf(f, "\t%.3f\n", channel->freqlist[k].squelch.noise_level()); } } } fprintf(f, "\n"); } static void output_channel_dbfs_noise_levels(FILE* f) { fprintf(f, "# HELP channel_dbfs_noise_level Squelch noise_level as dBFS.\n" "# TYPE channel_dbfs_noise_level gauge\n"); for (int i = 0; i < device_count; i++) { device_t* dev = devices + i; for (int j = 0; j < dev->channel_count; j++) { channel_t* channel = devices[i].channels + j; for (int k = 0; k < channel->freq_count; k++) { print_channel_metric(f, "channel_dbfs_noise_level", channel->freqlist[k].frequency, channel->freqlist[k].label); fprintf(f, "\t%.3f\n", level_to_dBFS(channel->freqlist[k].squelch.noise_level())); } } } fprintf(f, "\n"); } static void output_channel_signal_levels(FILE* f) { fprintf(f, "# HELP channel_signal_level Raw squelch signal_level.\n" "# TYPE channel_signal_level gauge\n"); for (int i = 0; i < device_count; i++) { device_t* dev = devices + i; for (int j = 0; j < dev->channel_count; j++) { channel_t* channel = devices[i].channels + j; for (int k = 0; k < channel->freq_count; k++) { print_channel_metric(f, "channel_signal_level", channel->freqlist[k].frequency, channel->freqlist[k].label); fprintf(f, "\t%.3f\n", channel->freqlist[k].squelch.signal_level()); } } } fprintf(f, "\n"); } static void output_channel_dbfs_signal_levels(FILE* f) { fprintf(f, "# HELP channel_dbfs_signal_level Squelch signal_level as dBFS.\n" "# TYPE channel_dbfs_signal_level gauge\n"); for (int i = 0; i < device_count; i++) { device_t* dev = devices + i; for (int j = 0; j < dev->channel_count; j++) { channel_t* channel = devices[i].channels + j; for (int k = 0; k < channel->freq_count; k++) { print_channel_metric(f, "channel_dbfs_signal_level", channel->freqlist[k].frequency, channel->freqlist[k].label); fprintf(f, "\t%.3f\n", level_to_dBFS(channel->freqlist[k].squelch.signal_level())); } } } fprintf(f, "\n"); } static void output_channel_squelch_levels(FILE* f) { fprintf(f, "# HELP channel_squelch_level Squelch squelch_level.\n" "# TYPE channel_squelch_level gauge\n"); for (int i = 0; i < device_count; i++) { device_t* dev = devices + i; for (int j = 0; j < dev->channel_count; j++) { channel_t* channel = devices[i].channels + j; for (int k = 0; k < channel->freq_count; k++) { print_channel_metric(f, "channel_squelch_level", channel->freqlist[k].frequency, channel->freqlist[k].label); fprintf(f, "\t%.3f\n", channel->freqlist[k].squelch.squelch_level()); } } } fprintf(f, "\n"); } static void output_channel_squelch_counter(FILE* f) { fprintf(f, "# HELP channel_squelch_counter Squelch open_count.\n" "# TYPE channel_squelch_counter counter\n"); for (int i = 0; i < device_count; i++) { device_t* dev = devices + i; for (int j = 0; j < dev->channel_count; j++) { channel_t* channel = devices[i].channels + j; for (int k = 0; k < channel->freq_count; k++) { print_channel_metric(f, "channel_squelch_counter", channel->freqlist[k].frequency, channel->freqlist[k].label); fprintf(f, "\t%zu\n", channel->freqlist[k].squelch.open_count()); } } } fprintf(f, "\n"); } static void output_channel_flappy_counter(FILE* f) { fprintf(f, "# HELP channel_flappy_counter Squelch flappy_count.\n" "# TYPE channel_flappy_counter counter\n"); for (int i = 0; i < device_count; i++) { device_t* dev = devices + i; for (int j = 0; j < dev->channel_count; j++) { channel_t* channel = devices[i].channels + j; for (int k = 0; k < channel->freq_count; k++) { print_channel_metric(f, "channel_flappy_counter", channel->freqlist[k].frequency, channel->freqlist[k].label); fprintf(f, "\t%zu\n", channel->freqlist[k].squelch.flappy_count()); } } } fprintf(f, "\n"); } static void output_channel_ctcss_counter(FILE* f) { fprintf(f, "# HELP channel_ctcss_counter count of windows with CTCSS detected.\n" "# TYPE channel_ctcss_counter counter\n"); for (int i = 0; i < device_count; i++) { device_t* dev = devices + i; for (int j = 0; j < dev->channel_count; j++) { channel_t* channel = devices[i].channels + j; for (int k = 0; k < channel->freq_count; k++) { print_channel_metric(f, "channel_ctcss_counter", channel->freqlist[k].frequency, channel->freqlist[k].label); fprintf(f, "\t%zu\n", channel->freqlist[k].squelch.ctcss_count()); } } } fprintf(f, "\n"); } static void output_channel_no_ctcss_counter(FILE* f) { fprintf(f, "# HELP channel_no_ctcss_counter count of windows without CTCSS detected.\n" "# TYPE channel_no_ctcss_counter counter\n"); for (int i = 0; i < device_count; i++) { device_t* dev = devices + i; for (int j = 0; j < dev->channel_count; j++) { channel_t* channel = devices[i].channels + j; for (int k = 0; k < channel->freq_count; k++) { print_channel_metric(f, "channel_no_ctcss_counter", channel->freqlist[k].frequency, channel->freqlist[k].label); fprintf(f, "\t%zu\n", channel->freqlist[k].squelch.no_ctcss_count()); } } } fprintf(f, "\n"); } static void output_channel_activity_counters(FILE* f) { fprintf(f, "# HELP channel_activity_counter Loops of output_thread with frequency active.\n" "# TYPE channel_activity_counter counter\n"); for (int i = 0; i < device_count; i++) { device_t* dev = devices + i; for (int j = 0; j < dev->channel_count; j++) { channel_t* channel = devices[i].channels + j; for (int k = 0; k < channel->freq_count; k++) { print_channel_metric(f, "channel_activity_counter", channel->freqlist[k].frequency, channel->freqlist[k].label); fprintf(f, "\t%zu\n", channel->freqlist[k].active_counter); } } } fprintf(f, "\n"); } static void output_device_buffer_overflows(FILE* f) { fprintf(f, "# HELP buffer_overflow_count Number of times a device's buffer has overflowed.\n" "# TYPE buffer_overflow_count counter\n"); for (int i = 0; i < device_count; i++) { device_t* dev = devices + i; fprintf(f, "buffer_overflow_count{device=\"%d\"}\t%zu\n", i, dev->input->overflow_count); } fprintf(f, "\n"); } static void output_output_overruns(FILE* f) { fprintf(f, "# HELP output_overrun_count Number of times a device or mixer output has overrun.\n" "# TYPE output_overrun_count counter\n"); for (int i = 0; i < device_count; i++) { device_t* dev = devices + i; fprintf(f, "output_overrun_count{device=\"%d\"}\t%zu\n", i, dev->output_overrun_count); } for (int i = 0; i < mixer_count; i++) { mixer_t* mixer = mixers + i; fprintf(f, "output_overrun_count{mixer=\"%d\"}\t%zu\n", i, mixer->output_overrun_count); } fprintf(f, "\n"); } static void output_input_overruns(FILE* f) { if (mixer_count == 0) { return; } fprintf(f, "# HELP input_overrun_count Number of times mixer input has overrun.\n" "# TYPE input_overrun_count counter\n"); for (int i = 0; i < mixer_count; i++) { mixer_t* mixer = mixers + i; for (int j = 0; j < mixer->input_count; j++) { mixinput_t* input = mixer->inputs + j; fprintf(f, "input_overrun_count{mixer=\"%d\",input=\"%d\"}\t%zu\n", i, j, input->input_overrun_count); } } fprintf(f, "\n"); } void write_stats_file(timeval* last_stats_write) { if (!stats_filepath) { return; } timeval current_time; gettimeofday(¤t_time, NULL); static const double STATS_FILE_TIMING = 15.0; if (!do_exit && delta_sec(last_stats_write, ¤t_time) < STATS_FILE_TIMING) { return; } *last_stats_write = current_time; FILE* file = fopen(stats_filepath, "w"); if (!file) { log(LOG_WARNING, "Cannot open output file %s (%s)\n", stats_filepath, strerror(errno)); return; } output_channel_activity_counters(file); output_channel_noise_levels(file); output_channel_dbfs_noise_levels(file); output_channel_signal_levels(file); output_channel_dbfs_signal_levels(file); output_channel_squelch_counter(file); output_channel_squelch_levels(file); output_channel_flappy_counter(file); output_channel_ctcss_counter(file); output_channel_no_ctcss_counter(file); output_device_buffer_overflows(file); output_output_overruns(file); output_input_overruns(file); fclose(file); } void* output_thread(void* param) { assert(param != NULL); output_params_t* output_param = (output_params_t*)param; struct freq_tag tag; struct timeval tv; int new_freq = -1; timeval last_stats_write = {0, 0}; debug_print("Starting output thread, devices %d:%d, mixers %d:%d, signal %p\n", output_param->device_start, output_param->device_end, output_param->mixer_start, output_param->mixer_end, output_param->mp3_signal); #ifdef DEBUG timeval ts, te; gettimeofday(&ts, NULL); #endif /* DEBUG */ while (!do_exit) { output_param->mp3_signal->wait(); for (int i = output_param->mixer_start; i < output_param->mixer_end; i++) { if (mixers[i].enabled == false) continue; channel_t* channel = &mixers[i].channel; if (channel->state == CH_READY) { process_outputs(channel, -1); channel->state = CH_DIRTY; } } #ifdef DEBUG gettimeofday(&te, NULL); debug_bulk_print("mixeroutput: %lu.%lu %lu\n", te.tv_sec, (unsigned long)te.tv_usec, (te.tv_sec - ts.tv_sec) * 1000000UL + te.tv_usec - ts.tv_usec); ts.tv_sec = te.tv_sec; ts.tv_usec = te.tv_usec; #endif /* DEBUG */ for (int i = output_param->device_start; i < output_param->device_end; i++) { device_t* dev = devices + i; if (dev->input->state == INPUT_RUNNING && dev->waveavail) { if (dev->mode == R_SCAN) { tag_queue_get(dev, &tag); if (tag.freq >= 0) { tag.tv.tv_sec += shout_metadata_delay; gettimeofday(&tv, NULL); if (tag.tv.tv_sec < tv.tv_sec || (tag.tv.tv_sec == tv.tv_sec && tag.tv.tv_usec <= tv.tv_usec)) { new_freq = tag.freq; tag_queue_advance(dev); } } } for (int j = 0; j < dev->channel_count; j++) { channel_t* channel = devices[i].channels + j; process_outputs(channel, new_freq); memcpy(channel->waveout, channel->waveout + WAVE_BATCH, AGC_EXTRA * 4); } dev->waveavail = 0; } // make sure we don't carry new_freq value to the next receiver which might be working // in multichannel mode new_freq = -1; } if (output_param->device_start == 0) { write_stats_file(&last_stats_write); } } return 0; } // reconnect as required void* output_check_thread(void*) { while (!do_exit) { SLEEP(10000); for (int i = 0; i < device_count; i++) { device_t* dev = devices + i; for (int j = 0; j < dev->channel_count; j++) { for (int k = 0; k < dev->channels[j].output_count; k++) { if (dev->channels[j].outputs[k].type == O_ICECAST) { icecast_data* icecast = (icecast_data*)(dev->channels[j].outputs[k].data); if (dev->input->state == INPUT_FAILED) { if (icecast->shout) { log(LOG_WARNING, "Device #%d failed, disconnecting stream %s:%d/%s\n", i, icecast->hostname, icecast->port, icecast->mountpoint); shout_close(icecast->shout); shout_free(icecast->shout); icecast->shout = NULL; } } else if (dev->input->state == INPUT_RUNNING) { if (icecast->shout == NULL) { log(LOG_NOTICE, "Trying to reconnect to %s:%d/%s...\n", icecast->hostname, icecast->port, icecast->mountpoint); shout_setup(icecast, dev->channels[j].mode); } } } else if (dev->channels[j].outputs[k].type == O_UDP_STREAM) { udp_stream_data* sdata = (udp_stream_data*)dev->channels[j].outputs[k].data; if (dev->input->state == INPUT_FAILED) { udp_stream_shutdown(sdata); } #ifdef WITH_PULSEAUDIO } else if (dev->channels[j].outputs[k].type == O_PULSE) { pulse_data* pdata = (pulse_data*)(dev->channels[j].outputs[k].data); if (dev->input->state == INPUT_FAILED) { if (pdata->context) { pulse_shutdown(pdata); } } else if (dev->input->state == INPUT_RUNNING) { if (pdata->context == NULL) { pulse_setup(pdata, dev->channels[j].mode); } } #endif /* WITH_PULSEAUDIO */ } } } } for (int i = 0; i < mixer_count; i++) { if (mixers[i].enabled == false) continue; for (int k = 0; k < mixers[i].channel.output_count; k++) { if (mixers[i].channel.outputs[k].enabled == false) continue; if (mixers[i].channel.outputs[k].type == O_ICECAST) { icecast_data* icecast = (icecast_data*)(mixers[i].channel.outputs[k].data); if (icecast->shout == NULL) { log(LOG_NOTICE, "Trying to reconnect to %s:%d/%s...\n", icecast->hostname, icecast->port, icecast->mountpoint); shout_setup(icecast, mixers[i].channel.mode); } #ifdef WITH_PULSEAUDIO } else if (mixers[i].channel.outputs[k].type == O_PULSE) { pulse_data* pdata = (pulse_data*)(mixers[i].channel.outputs[k].data); if (pdata->context == NULL) { pulse_setup(pdata, mixers[i].channel.mode); } #endif /* WITH_PULSEAUDIO */ } } } } return 0; } ================================================ FILE: src/pulse.cpp ================================================ /* * pulse.cpp * PulseAudio output routines * * Copyright (c) 2015-2021 Tomasz Lemiech * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ #include #include #include #include "rtl_airband.h" #define SERVER_IFNOTNULL(x) ((x) ? (x) : "") #define PA_LOOP_LOCK(x) \ if (!pa_threaded_mainloop_in_thread(x)) { \ pa_threaded_mainloop_lock(x); \ } #define PA_LOOP_UNLOCK(x) \ if (!pa_threaded_mainloop_in_thread(x)) { \ pa_threaded_mainloop_unlock(x); \ } using namespace std; pa_threaded_mainloop* mainloop = NULL; void pulse_shutdown(pulse_data* pdata) { if (!pdata) return; PA_LOOP_LOCK(mainloop); if (pdata->left) { pa_stream_disconnect(pdata->left); pa_stream_unref(pdata->left); pdata->left = NULL; } if (pdata->right) { pa_stream_disconnect(pdata->right); pa_stream_unref(pdata->right); pdata->right = NULL; } if (pdata->context) { pa_context_disconnect(pdata->context); pa_context_unref(pdata->context); pdata->context = NULL; } PA_LOOP_UNLOCK(mainloop); } static void pulse_stream_underflow_cb(pa_stream*, void* userdata) { pulse_data* pdata = (pulse_data*)userdata; if (pdata->continuous) // do not flood the logs on every squelch closing log(LOG_INFO, "pulse: %s: stream \"%s\": underflow\n", SERVER_IFNOTNULL(pdata->server), pdata->stream_name); } static void pulse_stream_overflow_cb(pa_stream*, void* userdata) { pulse_data* pdata = (pulse_data*)userdata; log(LOG_INFO, "pulse: %s: stream \"%s\": overflow\n", SERVER_IFNOTNULL(pdata->server), pdata->stream_name); } static void stream_state_cb(pa_stream* stream, void* userdata) { pulse_data* pdata = (pulse_data*)userdata; switch (pa_stream_get_state(stream)) { case PA_STREAM_READY: if (pdata->mode == MM_MONO || (pa_stream_get_state(pdata->left) == PA_STREAM_READY && pa_stream_get_state(pdata->right) == PA_STREAM_READY)) pa_stream_cork(pdata->left, 0, NULL, NULL); break; case PA_STREAM_UNCONNECTED: case PA_STREAM_CREATING: break; case PA_STREAM_FAILED: log(LOG_WARNING, "pulse: %s: stream \"%s\" failed: %s\n", SERVER_IFNOTNULL(pdata->server), pdata->stream_name, pa_strerror(pa_context_errno(pdata->context))); break; case PA_STREAM_TERMINATED: log(LOG_WARNING, "pulse: %s: stream \"%s\" terminated\n", SERVER_IFNOTNULL(pdata->server), pdata->stream_name); break; break; } } static pa_stream* pulse_setup_stream(pulse_data* pdata, const pa_sample_spec* ss, pa_channel_map* cmap, pa_stream* sync_stream) { pa_stream* stream = NULL; PA_LOOP_LOCK(mainloop); if (!(stream = pa_stream_new(pdata->context, pdata->stream_name, ss, cmap))) { log(LOG_ERR, "pulse: %s: failed to create stream \"%s\": %s\n", SERVER_IFNOTNULL(pdata->server), pdata->stream_name, pa_strerror(pa_context_errno(pdata->context))); goto fail; } pa_stream_set_state_callback(stream, stream_state_cb, pdata); pa_stream_set_underflow_callback(stream, pulse_stream_underflow_cb, pdata); pa_stream_set_overflow_callback(stream, pulse_stream_overflow_cb, pdata); // Initially streams are corked (paused). For mono streams this is irrelevant, // but for stereo mixers it's required to keep left and right channels in sync. // Starting the left channel stream before the other stream from the sync pair is // set up causes the left channel stream to fail. if (pa_stream_connect_playback(stream, pdata->sink, NULL, (pa_stream_flags_t)(PA_STREAM_INTERPOLATE_TIMING | PA_STREAM_ADJUST_LATENCY | PA_STREAM_START_CORKED | PA_STREAM_AUTO_TIMING_UPDATE), NULL, sync_stream) < 0) { log(LOG_ERR, "pulse: %s: failed to connect stream \"%s\": %s\n", SERVER_IFNOTNULL(pdata->server), pdata->stream_name, pa_strerror(pa_context_errno(pdata->context))); goto fail; } log(LOG_INFO, "pulse: %s: stream \"%s\" connected\n", SERVER_IFNOTNULL(pdata->server), pdata->stream_name); PA_LOOP_UNLOCK(mainloop); return stream; fail: PA_LOOP_UNLOCK(mainloop); return NULL; } static void pulse_setup_streams(pulse_data* pdata) { const pa_sample_spec ss = { #if __cplusplus >= 199711L .format = PA_SAMPLE_FLOAT32LE, .rate = WAVE_RATE, .channels = 1 #else // for g++ 4.6 (eg. Raspbian Wheezy) PA_SAMPLE_FLOAT32LE, WAVE_RATE, 1 #endif /* __cplusplus */ }; pa_channel_map_init_mono(&pdata->lmap); pdata->lmap.map[0] = (pdata->mode == MM_STEREO ? PA_CHANNEL_POSITION_LEFT : PA_CHANNEL_POSITION_MONO); if (!(pdata->left = pulse_setup_stream(pdata, &ss, &pdata->lmap, NULL))) goto fail; if (pdata->mode == MM_STEREO) { pa_channel_map_init_mono(&pdata->rmap); pdata->rmap.map[0] = PA_CHANNEL_POSITION_RIGHT; if (!(pdata->right = pulse_setup_stream(pdata, &ss, &pdata->rmap, pdata->left))) goto fail; } return; fail: pulse_shutdown(pdata); } static void pulse_ctx_state_cb(pa_context* c, void* userdata) { pulse_data* pdata = (pulse_data*)userdata; switch (pa_context_get_state(c)) { case PA_CONTEXT_READY: pulse_setup_streams(pdata); break; case PA_CONTEXT_TERMINATED: break; case PA_CONTEXT_FAILED: log(LOG_ERR, "pulse: %s: connection failed: %s\n", SERVER_IFNOTNULL(pdata->server), pa_strerror(pa_context_errno(pdata->context))); pulse_shutdown(pdata); break; case PA_CONTEXT_CONNECTING: log(LOG_INFO, "pulse: %s: connecting...\n", SERVER_IFNOTNULL(pdata->server)); break; case PA_CONTEXT_UNCONNECTED: case PA_CONTEXT_AUTHORIZING: case PA_CONTEXT_SETTING_NAME: break; } } void pulse_init() { if (!mainloop && !(mainloop = pa_threaded_mainloop_new())) { cerr << "Failed to initialize PulseAudio main loop - aborting\n"; error(); } } int pulse_setup(pulse_data* pdata, mix_modes mixmode) { if (!(pdata->context = pa_context_new(pa_threaded_mainloop_get_api(mainloop), pdata->name))) { log(LOG_ERR, "%s", "pulse: failed to create context\n"); return -1; } pdata->mode = mixmode; PA_LOOP_LOCK(mainloop); int ret = 0; pa_context_set_state_callback(pdata->context, &pulse_ctx_state_cb, pdata); if (pa_context_connect(pdata->context, pdata->server, PA_CONTEXT_NOFLAGS, NULL) < 0) { log(LOG_WARNING, "pulse: %s: failed to connect: %s\n", SERVER_IFNOTNULL(pdata->server), pa_strerror(pa_context_errno(pdata->context))); // Don't clean up things here, context state is now set to PA_CONTEXT_FAILED, // so pulse_ctx_state_cb will take care of that. ret = -1; } PA_LOOP_UNLOCK(mainloop); return ret; } void pulse_start() { if (!mainloop) return; PA_LOOP_LOCK(mainloop); pa_threaded_mainloop_start(mainloop); PA_LOOP_UNLOCK(mainloop); } static int pulse_write_single_stream(pa_stream* stream, pulse_data* pdata, const float* data, size_t len, bool is_master) { pa_usec_t latency; int ret = -1; int lret; PA_LOOP_LOCK(mainloop); if (!stream || pa_stream_get_state(stream) != PA_STREAM_READY) goto end; if (is_master) { /* latency info is only meaningful for master stream) */ lret = pa_stream_get_latency(stream, &latency, NULL); if (lret < 0) { log(LOG_WARNING, "pulse: %s: failed to get latency info for stream \"%s\" (error is: %s), disconnecting\n", SERVER_IFNOTNULL(pdata->server), pdata->stream_name, pa_strerror(lret)); goto end; } if (latency > PULSE_STREAM_LATENCY_LIMIT) { log(LOG_INFO, "pulse: %s: exceeded max backlog for stream \"%s\", disconnecting\n", SERVER_IFNOTNULL(pdata->server), pdata->stream_name); goto end; } debug_bulk_print("pulse: %s: stream=\"%s\" lret=%d latency=%f ms\n", SERVER_IFNOTNULL(pdata->server), pdata->stream_name, lret, (float)latency / 1000.0f); } if (pa_stream_write(stream, data, len, NULL, 0LL, PA_SEEK_RELATIVE) < 0) { log(LOG_WARNING, "pulse: %s: could not write to stream \"%s\", disconnecting\n", SERVER_IFNOTNULL(pdata->server), pdata->stream_name); goto end; } ret = 0; end: PA_LOOP_UNLOCK(mainloop); return ret; } void pulse_write_stream(pulse_data* pdata, mix_modes mode, const float* data_left, const float* data_right, size_t len) { PA_LOOP_LOCK(mainloop); if (!pdata->context || pa_context_get_state(pdata->context) != PA_CONTEXT_READY) goto end; if (pulse_write_single_stream(pdata->left, pdata, data_left, len, true) < 0) goto fail; if (mode == MM_STEREO && pulse_write_single_stream(pdata->right, pdata, data_right, len, false) < 0) goto fail; goto end; fail: pulse_shutdown(pdata); end: PA_LOOP_UNLOCK(mainloop); return; } ================================================ FILE: src/rtl_airband.cpp ================================================ /* * RTLSDR AM/NFM demodulator, mixer, streamer and recorder * * Copyright (c) 2014 Wong Man Hang * Copyright (c) 2015-2021 Tomasz Lemiech * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ #include "config.h" #if defined WITH_BCM_VC && !defined __arm__ #error Broadcom VideoCore support can only be enabled on ARM builds #endif // From this point we may safely assume that WITH_BCM_VC implies __arm__ #ifdef WITH_BCM_VC #include "hello_fft/gpu_fft.h" #include "hello_fft/mailbox.h" #endif /* WITH_BCM_VC */ #include #include #include #include #include #include // uint8_t #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "input-common.h" #include "logging.h" #include "rtl_airband.h" #include "squelch.h" #ifdef WITH_PROFILING #include "gperftools/profiler.h" #endif /* WITH_PROFILING */ using namespace std; using namespace libconfig; device_t* devices; mixer_t* mixers; int device_count, mixer_count; static int devices_running = 0; int tui = 0; // do not display textual user interface int shout_metadata_delay = 3; volatile int do_exit = 0; bool use_localtime = false; bool multiple_demod_threads = false; bool multiple_output_threads = false; bool log_scan_activity = false; char* stats_filepath = NULL; size_t fft_size_log = DEFAULT_FFT_SIZE_LOG; size_t fft_size = 1 << fft_size_log; #ifdef NFM float alpha = exp(-1.0f / (WAVE_RATE * 2e-4)); enum fm_demod_algo { FM_FAST_ATAN2, FM_QUADRI_DEMOD }; enum fm_demod_algo fm_demod = FM_FAST_ATAN2; #endif /* NFM */ #ifdef DEBUG char* debug_path; #endif /* DEBUG */ void sighandler(int sig) { log(LOG_NOTICE, "Got signal %d, exiting\n", sig); do_exit = 1; } void* controller_thread(void* params) { device_t* dev = (device_t*)params; int i = 0; int consecutive_squelch_off = 0; int new_centerfreq = 0; struct timeval tv; if (dev->channels[0].freq_count < 2) return 0; while (!do_exit) { SLEEP(200); if (dev->channels[0].axcindicate == NO_SIGNAL) { if (consecutive_squelch_off < 10) { consecutive_squelch_off++; } else { i++; i %= dev->channels[0].freq_count; dev->channels[0].freq_idx = i; new_centerfreq = dev->channels[0].freqlist[i].frequency + 20 * (double)(dev->input->sample_rate / fft_size); if (input_set_centerfreq(dev->input, new_centerfreq) < 0) { break; } } } else { if (consecutive_squelch_off == 10) { if (log_scan_activity) log(LOG_INFO, "Activity on %7.3f MHz (%s)\n", dev->channels[0].freqlist[i].frequency / 1000000.0, dev->channels[0].freqlist[i].label); if (i != dev->last_frequency) { // squelch has just opened on a new frequency - we might need to update outputs' metadata gettimeofday(&tv, NULL); tag_queue_put(dev, i, tv); dev->last_frequency = i; } } consecutive_squelch_off = 0; } } return 0; } void multiply(float ar, float aj, float br, float bj, float* cr, float* cj) { *cr = ar * br - aj * bj; *cj = aj * br + ar * bj; } #ifdef NFM float fast_atan2(float y, float x) { float yabs, angle; float pi4 = M_PI_4, pi34 = 3 * M_PI_4; if (x == 0.0f && y == 0.0f) { return 0; } yabs = y; if (yabs < 0.0f) { yabs = -yabs; } if (x >= 0.0f) { angle = pi4 - pi4 * (x - yabs) / (x + yabs); } else { angle = pi34 - pi4 * (x + yabs) / (yabs - x); } if (y < 0.0f) { return -angle; } return angle; } float polar_disc_fast(float ar, float aj, float br, float bj) { float cr, cj; multiply(ar, aj, br, -bj, &cr, &cj); return (float)(fast_atan2(cj, cr) * M_1_PI); } float fm_quadri_demod(float ar, float aj, float br, float bj) { return (float)((br * aj - ar * bj) / (ar * ar + aj * aj + 1.0f) * M_1_PI); } #endif /* NFM */ class AFC { const status _prev_axcindicate; #ifdef WITH_BCM_VC float square(const GPU_FFT_COMPLEX* fft_results, size_t index) { return fft_results[index].re * fft_results[index].re + fft_results[index].im * fft_results[index].im; } #else float square(const fftwf_complex* fft_results, size_t index) { return fft_results[index][0] * fft_results[index][0] + fft_results[index][1] * fft_results[index][1]; } #endif /* WITH_BCM_VC */ template size_t check(const FFT_RESULTS* fft_results, const size_t base, const float base_value, unsigned char afc) { float threshold = 0; size_t bin; for (bin = base;; bin += STEP) { if (STEP < 0) { if (bin < -STEP) break; } else if ((size_t)(bin + STEP) >= fft_size) break; const float value = square(fft_results, (size_t)(bin + STEP)); if (value <= base_value) break; if (base == (size_t)bin) { threshold = (value - base_value) / (float)afc; } else { if ((value - base_value) < threshold) break; threshold += threshold / 10.0; } } return bin; } public: AFC(device_t* dev, int index) : _prev_axcindicate(dev->channels[index].axcindicate) {} template void finalize(device_t* dev, int index, const FFT_RESULTS* fft_results) { channel_t* channel = &dev->channels[index]; if (channel->afc == 0) return; const char axcindicate = channel->axcindicate; if (axcindicate != NO_SIGNAL && _prev_axcindicate == NO_SIGNAL) { const size_t base = dev->base_bins[index]; const float base_value = square(fft_results, base); size_t bin = check(fft_results, base, base_value, channel->afc); if (bin == base) bin = check(fft_results, base, base_value, channel->afc); if (dev->bins[index] != bin) { #ifdef AFC_LOGGING log(LOG_INFO, "AFC device=%d channel=%d: base=%zu prev=%zu now=%zu\n", dev->device, index, base, dev->bins[index], bin); #endif /* AFC_LOGGING */ dev->bins[index] = bin; if (bin > base) channel->axcindicate = AFC_UP; else if (bin < base) channel->axcindicate = AFC_DOWN; } } else if (axcindicate == NO_SIGNAL && _prev_axcindicate != NO_SIGNAL) dev->bins[index] = dev->base_bins[index]; } }; void init_demod(demod_params_t* params, Signal* signal, int device_start, int device_end) { assert(params != NULL); assert(signal != NULL); params->mp3_signal = signal; params->device_start = device_start; params->device_end = device_end; #ifndef WITH_BCM_VC params->fftin = fftwf_alloc_complex(fft_size); params->fftout = fftwf_alloc_complex(fft_size); params->fft = fftwf_plan_dft_1d(fft_size, params->fftin, params->fftout, FFTW_FORWARD, FFTW_MEASURE); #endif /* WITH_BCM_VC */ } bool init_output(channel_t* channel, output_t* output) { if (output->has_mp3_output) { output->lame = airlame_init(channel->mode, channel->highpass, channel->lowpass); output->lamebuf = (unsigned char*)malloc(sizeof(unsigned char) * LAMEBUF_SIZE); } if (output->type == O_ICECAST) { shout_setup((icecast_data*)(output->data), channel->mode); } else if (output->type == O_UDP_STREAM) { udp_stream_data* sdata = (udp_stream_data*)(output->data); if (!udp_stream_init(sdata, channel->mode, (size_t)WAVE_BATCH * sizeof(float))) { return false; } #ifdef WITH_PULSEAUDIO } else if (output->type == O_PULSE) { pulse_init(); pulse_setup((pulse_data*)(output->data), channel->mode); #endif /* WITH_PULSEAUDIO */ } return true; } void init_output_params(output_params_t* params, int device_start, int device_end, int mixer_start, int mixer_end) { assert(params != NULL); params->mp3_signal = new Signal; params->device_start = device_start; params->device_end = device_end; params->mixer_start = mixer_start; params->mixer_end = mixer_end; } int next_device(demod_params_t* params, int current) { current++; if (current < params->device_end) { return current; } return params->device_start; } void* demodulate(void* params) { assert(params != NULL); demod_params_t* demod_params = (demod_params_t*)params; debug_print("Starting demod thread, devices %d:%d, signal %p\n", demod_params->device_start, demod_params->device_end, demod_params->mp3_signal); // initialize fft engine #ifdef WITH_BCM_VC int mb = mbox_open(); struct GPU_FFT* fft; int ret = gpu_fft_prepare(mb, fft_size_log, GPU_FFT_FWD, FFT_BATCH, &fft); switch (ret) { case -1: log(LOG_CRIT, "Unable to enable V3D. Please check your firmware is up to date.\n"); error(); break; case -2: log(LOG_CRIT, "log2_N=%d not supported. Try between 8 and 17.\n", fft_size_log); error(); break; case -3: log(LOG_CRIT, "Out of memory. Try a smaller batch or increase GPU memory.\n"); error(); break; } #else fftwf_complex* fftin = demod_params->fftin; fftwf_complex* fftout = demod_params->fftout; #endif /* WITH_BCM_VC */ float ALIGNED32 levels_u8[256], levels_s8[256]; float* levels_ptr = NULL; for (int i = 0; i < 256; i++) { levels_u8[i] = (i - 127.5f) / 127.5f; } for (int16_t i = -127; i < 128; i++) { levels_s8[(uint8_t)i] = i / 128.0f; } // initialize fft window // blackman 7 // the whole matrix is computed #ifdef WITH_BCM_VC float ALIGNED32 window[fft_size * 2]; #else float ALIGNED32 window[fft_size]; #endif /* WITH_BCM_VC */ const double a0 = 0.27105140069342f; const double a1 = 0.43329793923448f; const double a2 = 0.21812299954311f; const double a3 = 0.06592544638803f; const double a4 = 0.01081174209837f; const double a5 = 0.00077658482522f; const double a6 = 0.00001388721735f; for (size_t i = 0; i < fft_size; i++) { double x = a0 - (a1 * cos((2.0 * M_PI * i) / (fft_size - 1))) + (a2 * cos((4.0 * M_PI * i) / (fft_size - 1))) - (a3 * cos((6.0 * M_PI * i) / (fft_size - 1))) + (a4 * cos((8.0 * M_PI * i) / (fft_size - 1))) - (a5 * cos((10.0 * M_PI * i) / (fft_size - 1))) + (a6 * cos((12.0 * M_PI * i) / (fft_size - 1))); #ifdef WITH_BCM_VC window[i * 2] = window[i * 2 + 1] = (float)x; #else window[i] = (float)x; #endif /* WITH_BCM_VC */ } #ifdef DEBUG struct timeval ts, te; gettimeofday(&ts, NULL); #endif /* DEBUG */ size_t available; int device_num = demod_params->device_start; while (true) { if (do_exit) { #ifdef WITH_BCM_VC log(LOG_INFO, "Freeing GPU memory\n"); gpu_fft_release(fft); #endif /* WITH_BCM_VC */ return NULL; } device_t* dev = devices + device_num; pthread_mutex_lock(&dev->input->buffer_lock); if (dev->input->bufe >= dev->input->bufs) available = dev->input->bufe - dev->input->bufs; else available = dev->input->buf_size - dev->input->bufs + dev->input->bufe; pthread_mutex_unlock(&dev->input->buffer_lock); if (devices_running == 0) { log(LOG_ERR, "All receivers failed, exiting\n"); do_exit = 1; continue; } if (dev->input->state != INPUT_RUNNING) { if (dev->input->state == INPUT_FAILED) { dev->input->state = INPUT_DISABLED; disable_device_outputs(dev); devices_running--; } device_num = next_device(demod_params, device_num); continue; } // number of input bytes per output wave sample (x 2 for I and Q) size_t bps = 2 * dev->input->bytes_per_sample * (size_t)round((double)dev->input->sample_rate / (double)WAVE_RATE); if (available < bps * FFT_BATCH + fft_size * dev->input->bytes_per_sample * 2) { // move to next device device_num = next_device(demod_params, device_num); SLEEP(10); continue; } if (dev->input->sfmt == SFMT_S16) { float const scale = 1.0f / dev->input->fullscale; #ifdef WITH_BCM_VC struct GPU_FFT_COMPLEX* ptr = fft->in; for (size_t b = 0; b < FFT_BATCH; b++, ptr += fft->step) { short* buf2 = (short*)(dev->input->buffer + dev->input->bufs + b * bps); for (size_t i = 0; i < fft_size; i++, buf2 += 2) { ptr[i].re = scale * (float)buf2[0] * window[i * 2]; ptr[i].im = scale * (float)buf2[1] * window[i * 2]; } } #else short* buf2 = (short*)(dev->input->buffer + dev->input->bufs); for (size_t i = 0; i < fft_size; i++, buf2 += 2) { fftin[i][0] = scale * (float)buf2[0] * window[i]; fftin[i][1] = scale * (float)buf2[1] * window[i]; } #endif /* WITH_BCM_VC */ } else if (dev->input->sfmt == SFMT_F32) { float const scale = 1.0f / dev->input->fullscale; #ifdef WITH_BCM_VC struct GPU_FFT_COMPLEX* ptr = fft->in; for (size_t b = 0; b < FFT_BATCH; b++, ptr += fft->step) { float* buf2 = (float*)(dev->input->buffer + dev->input->bufs + b * bps); for (size_t i = 0; i < fft_size; i++, buf2 += 2) { ptr[i].re = scale * buf2[0] * window[i * 2]; ptr[i].im = scale * buf2[1] * window[i * 2]; } } #else // WITH_BCM_VC float* buf2 = (float*)(dev->input->buffer + dev->input->bufs); for (size_t i = 0; i < fft_size; i++, buf2 += 2) { fftin[i][0] = scale * buf2[0] * window[i]; fftin[i][1] = scale * buf2[1] * window[i]; } #endif /* WITH_BCM_VC */ } else { // S8 or U8 levels_ptr = (dev->input->sfmt == SFMT_U8 ? levels_u8 : levels_s8); #ifdef WITH_BCM_VC sample_fft_arg sfa = {fft_size / 4, fft->in}; for (size_t i = 0; i < FFT_BATCH; i++) { samplefft(&sfa, dev->input->buffer + dev->input->bufs + i * bps, window, levels_ptr); sfa.dest += fft->step; } #else unsigned char* buf2 = dev->input->buffer + dev->input->bufs; for (size_t i = 0; i < fft_size; i++, buf2 += 2) { fftin[i][0] = levels_ptr[buf2[0]] * window[i]; fftin[i][1] = levels_ptr[buf2[1]] * window[i]; } #endif /* WITH_BCM_VC */ } #ifdef WITH_BCM_VC gpu_fft_execute(fft); #else fftwf_execute(demod_params->fft); #endif /* WITH_BCM_VC */ #ifdef WITH_BCM_VC for (int i = 0; i < dev->channel_count; i++) { float* wavein = dev->channels[i].wavein + dev->waveend; __builtin_prefetch(wavein, 1); const int bin = dev->bins[i]; const GPU_FFT_COMPLEX* fftout = fft->out + bin; for (int j = 0; j < FFT_BATCH; j++, ++wavein, fftout += fft->step) *wavein = sqrtf(fftout->im * fftout->im + fftout->re * fftout->re); } for (int j = 0; j < dev->channel_count; j++) { if (dev->channels[j].needs_raw_iq) { struct GPU_FFT_COMPLEX* ptr = fft->out; for (int job = 0; job < FFT_BATCH; job++) { dev->channels[j].iq_in[2 * (dev->waveend + job)] = ptr[dev->bins[j]].re; dev->channels[j].iq_in[2 * (dev->waveend + job) + 1] = ptr[dev->bins[j]].im; ptr += fft->step; } } } #else for (int j = 0; j < dev->channel_count; j++) { dev->channels[j].wavein[dev->waveend] = sqrtf(fftout[dev->bins[j]][0] * fftout[dev->bins[j]][0] + fftout[dev->bins[j]][1] * fftout[dev->bins[j]][1]); if (dev->channels[j].needs_raw_iq) { dev->channels[j].iq_in[2 * dev->waveend] = fftout[dev->bins[j]][0]; dev->channels[j].iq_in[2 * dev->waveend + 1] = fftout[dev->bins[j]][1]; } } #endif /* WITH_BCM_VC */ dev->waveend += FFT_BATCH; if (dev->waveend >= WAVE_BATCH + AGC_EXTRA) { for (int i = 0; i < dev->channel_count; i++) { AFC afc(dev, i); channel_t* channel = dev->channels + i; freq_t* fparms = channel->freqlist + channel->freq_idx; // set to NO_SIGNAL, will be updated to SIGNAL based on squelch below channel->axcindicate = NO_SIGNAL; for (int j = AGC_EXTRA; j < WAVE_BATCH + AGC_EXTRA; j++) { float& real = channel->iq_in[2 * (j - AGC_EXTRA)]; float& imag = channel->iq_in[2 * (j - AGC_EXTRA) + 1]; fparms->squelch.process_raw_sample(channel->wavein[j]); // If squelch is open / opening and using I/Q, then cleanup the signal and possibly update squelch. if (fparms->squelch.should_filter_sample() && channel->needs_raw_iq) { // remove phase rotation introduced by FFT sliding window float swf, cwf, re_tmp, im_tmp; sincosf_lut(channel->dm_phi, &swf, &cwf); multiply(real, imag, cwf, -swf, &re_tmp, &im_tmp); channel->dm_phi += channel->dm_dphi; channel->dm_phi &= 0xffffff; // apply lowpass filter, will be a no-op if not configured fparms->lowpass_filter.apply(re_tmp, im_tmp); // update I/Q and wave real = re_tmp; imag = im_tmp; channel->wavein[j] = sqrt(real * real + imag * imag); // update squelch post-cleanup if (fparms->lowpass_filter.enabled()) { fparms->squelch.process_filtered_sample(channel->wavein[j]); } } if (fparms->modulation == MOD_AM) { // if squelch is just opening then bootstrip agcavgfast with prior values of wavein if (fparms->squelch.first_open_sample()) { for (int k = j - AGC_EXTRA; k < j; k++) { if (channel->wavein[k] >= fparms->squelch.squelch_level()) { fparms->agcavgfast = fparms->agcavgfast * 0.9f + channel->wavein[k] * 0.1f; } } } // if squelch is just closing then fade out the prior samples of waveout else if (fparms->squelch.last_open_sample()) { for (int k = j - AGC_EXTRA + 1; k < j; k++) { channel->waveout[k] = channel->waveout[k - 1] * 0.94f; } } } float& waveout = channel->waveout[j]; // If squelch sees power then do modulation-specific processing if (fparms->squelch.should_process_audio()) { if (fparms->modulation == MOD_AM) { if (channel->wavein[j] > fparms->squelch.squelch_level()) { fparms->agcavgfast = fparms->agcavgfast * 0.995f + channel->wavein[j] * 0.005f; } waveout = (channel->wavein[j - AGC_EXTRA] - fparms->agcavgfast) / (fparms->agcavgfast * 1.5f); if (abs(waveout) > 0.8f) { waveout *= 0.85f; fparms->agcavgfast *= 1.15f; } } #ifdef NFM else if (fparms->modulation == MOD_NFM) { // FM demod if (fm_demod == FM_FAST_ATAN2) { waveout = polar_disc_fast(real, imag, channel->pr, channel->pj); } else if (fm_demod == FM_QUADRI_DEMOD) { waveout = fm_quadri_demod(real, imag, channel->pr, channel->pj); } channel->pr = real; channel->pj = imag; // de-emphasis IIR + DC blocking fparms->agcavgfast = fparms->agcavgfast * 0.995f + waveout * 0.005f; waveout -= fparms->agcavgfast; waveout = waveout * (1.0f - channel->alpha) + channel->prev_waveout * channel->alpha; // save off waveout before notch and ampfactor channel->prev_waveout = waveout; } #endif /* NFM */ // process audio sample for CTCSS, will be no-op if not configured fparms->squelch.process_audio_sample(waveout); } // If squelch is still open then save samples to output if (fparms->squelch.is_open()) { // apply the notch filter, will be a no-op if not configured fparms->notch_filter.apply(waveout); // apply the ampfactor waveout *= fparms->ampfactor; // make sure the value is between +/- 1 (requirement for libmp3lame) if (isnan(waveout)) { waveout = 0.0; } else if (waveout > 1.0) { waveout = 1.0; } else if (waveout < -1.0) { waveout = -1.0; } channel->axcindicate = SIGNAL; if (channel->has_iq_outputs) { channel->iq_out[2 * (j - AGC_EXTRA)] = real; channel->iq_out[2 * (j - AGC_EXTRA) + 1] = imag; } // Squelch is closed } else { waveout = 0; if (channel->has_iq_outputs) { channel->iq_out[2 * (j - AGC_EXTRA)] = 0; channel->iq_out[2 * (j - AGC_EXTRA) + 1] = 0; } } } memmove(channel->wavein, channel->wavein + WAVE_BATCH, (dev->waveend - WAVE_BATCH) * sizeof(float)); if (channel->needs_raw_iq) { memmove(channel->iq_in, channel->iq_in + 2 * WAVE_BATCH, (dev->waveend - WAVE_BATCH) * sizeof(float) * 2); } #ifdef WITH_BCM_VC afc.finalize(dev, i, fft->out); #else afc.finalize(dev, i, demod_params->fftout); #endif /* WITH_BCM_VC */ if (tui) { char symbol = fparms->squelch.signal_outside_filter() ? '~' : (char)channel->axcindicate; if (dev->mode == R_SCAN) { GOTOXY(0, device_num * 17 + dev->row + 3); printf("%4.0f/%3.0f%c %7.3f ", level_to_dBFS(fparms->squelch.signal_level()), level_to_dBFS(fparms->squelch.noise_level()), symbol, (dev->channels[0].freqlist[channel->freq_idx].frequency / 1000000.0)); } else { GOTOXY(i * 10, device_num * 17 + dev->row + 3); printf("%4.0f/%3.0f%c ", level_to_dBFS(fparms->squelch.signal_level()), level_to_dBFS(fparms->squelch.noise_level()), symbol); } fflush(stdout); } if (channel->axcindicate != NO_SIGNAL) { channel->freqlist[channel->freq_idx].active_counter++; } } if (dev->waveavail == 1) { debug_print("devices[%d]: output channel overrun\n", device_num); dev->output_overrun_count++; } else { dev->waveavail = 1; } dev->waveend -= WAVE_BATCH; #ifdef DEBUG gettimeofday(&te, NULL); debug_bulk_print("waveavail %lu.%lu %lu\n", te.tv_sec, (unsigned long)te.tv_usec, (te.tv_sec - ts.tv_sec) * 1000000UL + te.tv_usec - ts.tv_usec); ts.tv_sec = te.tv_sec; ts.tv_usec = te.tv_usec; #endif /* DEBUG */ demod_params->mp3_signal->send(); dev->row++; if (dev->row == 12) { dev->row = 0; } } dev->input->bufs = (dev->input->bufs + bps * FFT_BATCH) % dev->input->buf_size; device_num = next_device(demod_params, device_num); } } void usage() { cout << "Usage: rtl_airband [options] [-c ]\n\ \t-h\t\t\tDisplay this help text\n\ \t-f\t\t\tRun in foreground, display textual waterfalls\n\ \t-F\t\t\tRun in foreground, do not display waterfalls (for running as a systemd service)\n"; #ifdef NFM cout << "\t-Q\t\t\tUse quadri correlator for FM demodulation (default is atan2)\n"; #endif /* NFM */ #ifdef DEBUG cout << "\t-d \t\tLog debugging information to (default is " << DEBUG_PATH << ")\n"; #endif /* DEBUG */ cout << "\t-e\t\t\tPrint messages to standard error (disables syslog logging)\n"; cout << "\t-c \tUse non-default configuration file\n\t\t\t\t(default: " << CFGFILE << ")\n\ \t-v\t\t\tDisplay version and exit\n"; exit(EXIT_SUCCESS); } static int count_devices_running() { int ret = 0; for (int i = 0; i < device_count; i++) { if (devices[i].input->state == INPUT_RUNNING) { ret++; } } return ret; } int main(int argc, char* argv[]) { #ifdef WITH_PROFILING ProfilerStart("rtl_airband.prof"); #endif /* WITH_PROFILING */ #pragma GCC diagnostic ignored "-Wwrite-strings" char* cfgfile = CFGFILE; char* pidfile = PIDFILE; #pragma GCC diagnostic warning "-Wwrite-strings" int opt; char optstring[16] = "efFhvc:"; #ifdef NFM strcat(optstring, "Q"); #endif /* NFM */ #ifdef DEBUG strcat(optstring, "d:"); #endif /* DEBUG */ int foreground = 0; // daemonize int do_syslog = 1; while ((opt = getopt(argc, argv, optstring)) != -1) { switch (opt) { #ifdef NFM case 'Q': fm_demod = FM_QUADRI_DEMOD; break; #endif /* NFM */ #ifdef DEBUG case 'd': debug_path = strdup(optarg); break; #endif /* DEBUG */ case 'e': do_syslog = 0; break; case 'f': foreground = 1; tui = 1; break; case 'F': foreground = 1; tui = 0; break; case 'c': cfgfile = optarg; break; case 'v': cout << "RTLSDR-Airband version " << RTL_AIRBAND_VERSION << "\n"; exit(EXIT_SUCCESS); case 'h': default: usage(); break; } } #ifdef DEBUG if (!debug_path) debug_path = strdup(DEBUG_PATH); init_debug(debug_path); #endif /* DEBUG */ // If executing other than as root, GPU memory gets alloc'd and the // 'permission denied' message on /dev/mem kills rtl_airband without // releasing GPU memory. #ifdef WITH_BCM_VC // XXX should probably do this check in other circumstances also. if (0 != getuid()) { cerr << "FFT library requires that rtl_airband be executed as root\n"; exit(1); } #endif /* WITH_BCM_VC */ // read config try { Config config; config.readFile(cfgfile); Setting& root = config.getRoot(); if (root.exists("pidfile")) pidfile = strdup(root["pidfile"]); if (root.exists("fft_size")) { int fsize = (int)(root["fft_size"]); fft_size_log = 0; for (size_t i = MIN_FFT_SIZE_LOG; i <= MAX_FFT_SIZE_LOG; i++) { if (fsize == 1 << i) { fft_size = (size_t)fsize; fft_size_log = i; break; } } if (fft_size_log == 0) { cerr << "Configuration error: invalid fft_size value (must be a power of two in range " << (1 << MIN_FFT_SIZE_LOG) << "-" << (1 << MAX_FFT_SIZE_LOG) << ")\n"; error(); } } if (root.exists("shout_metadata_delay")) shout_metadata_delay = (int)(root["shout_metadata_delay"]); if (shout_metadata_delay < 0 || shout_metadata_delay > 2 * TAG_QUEUE_LEN) { cerr << "Configuration error: shout_metadata_delay is out of allowed range (0-" << 2 * TAG_QUEUE_LEN << ")\n"; error(); } if (root.exists("localtime") && (bool)root["localtime"] == true) use_localtime = true; if (root.exists("multiple_demod_threads") && (bool)root["multiple_demod_threads"] == true) { #ifdef WITH_BCM_VC cerr << "Using multiple_demod_threads not supported with BCM VideoCore for FFT\n"; exit(1); #endif /* WITH_BCM_VC */ multiple_demod_threads = true; } if (root.exists("multiple_output_threads") && (bool)root["multiple_output_threads"] == true) { multiple_output_threads = true; } if (root.exists("log_scan_activity") && (bool)root["log_scan_activity"] == true) log_scan_activity = true; if (root.exists("stats_filepath")) stats_filepath = strdup(root["stats_filepath"]); #ifdef NFM if (root.exists("tau")) alpha = ((int)root["tau"] == 0 ? 0.0f : exp(-1.0f / (WAVE_RATE * 1e-6 * (int)root["tau"]))); #endif /* NFM */ Setting& devs = config.lookup("devices"); device_count = devs.getLength(); if (device_count < 1) { cerr << "Configuration error: no devices defined\n"; error(); } struct sigaction sigact, pipeact; memset(&sigact, 0, sizeof(sigact)); memset(&pipeact, 0, sizeof(pipeact)); pipeact.sa_handler = SIG_IGN; sigact.sa_handler = &sighandler; sigaction(SIGPIPE, &pipeact, NULL); sigaction(SIGHUP, &sigact, NULL); sigaction(SIGINT, &sigact, NULL); sigaction(SIGQUIT, &sigact, NULL); sigaction(SIGTERM, &sigact, NULL); devices = (device_t*)XCALLOC(device_count, sizeof(device_t)); shout_init(); if (do_syslog) { openlog("rtl_airband", LOG_PID, LOG_DAEMON); log_destination = SYSLOG; } else if (foreground) { log_destination = STDERR; } else { log_destination = NONE; } if (root.exists("mixers")) { Setting& mx = config.lookup("mixers"); mixers = (mixer_t*)XCALLOC(mx.getLength(), sizeof(struct mixer_t)); if ((mixer_count = parse_mixers(mx)) > 0) { mixers = (mixer_t*)XREALLOC(mixers, mixer_count * sizeof(struct mixer_t)); } else { free(mixers); } } else { mixer_count = 0; } uint32_t devs_enabled = parse_devices(devs); if (devs_enabled < 1) { cerr << "Configuration error: no devices defined\n"; error(); } device_count = devs_enabled; debug_print("mixer_count=%d\n", mixer_count); #ifdef DEBUG for (int z = 0; z < mixer_count; z++) { mixer_t* m = &mixers[z]; debug_print("mixer[%d]: name=%s, input_count=%d, output_count=%d\n", z, m->name, m->input_count, m->channel.output_count); } #endif /* DEBUG */ } catch (const FileIOException& e) { cerr << "Cannot read configuration file " << cfgfile << "\n"; error(); } catch (const ParseException& e) { cerr << "Error while parsing configuration file " << cfgfile << " line " << e.getLine() << ": " << e.getError() << "\n"; error(); } catch (const SettingNotFoundException& e) { cerr << "Configuration error: mandatory parameter missing: " << e.getPath() << "\n"; error(); } catch (const SettingTypeException& e) { cerr << "Configuration error: invalid parameter type: " << e.getPath() << "\n"; error(); } catch (const ConfigException& e) { cerr << "Unhandled config exception\n"; error(); } log(LOG_INFO, "RTLSDR-Airband version %s starting\n", RTL_AIRBAND_VERSION); if (!foreground) { int pid1, pid2; if ((pid1 = fork()) == -1) { cerr << "Cannot fork child process: " << strerror(errno) << "\n"; error(); } if (pid1) { waitpid(-1, NULL, 0); return (0); } else { if ((pid2 = fork()) == -1) { cerr << "Cannot fork child process: " << strerror(errno) << "\n"; error(); } if (pid2) { return (0); } else { int nullfd, dupfd; if ((nullfd = open("/dev/null", O_RDWR)) == -1) { log(LOG_CRIT, "Cannot open /dev/null: %s\n", strerror(errno)); error(); } for (dupfd = 0; dupfd <= 2; dupfd++) { if (dup2(nullfd, dupfd) == -1) { log(LOG_CRIT, "dup2(): %s\n", strerror(errno)); error(); } } if (nullfd > 2) close(nullfd); FILE* f = fopen(pidfile, "w"); if (f == NULL) { log(LOG_WARNING, "Cannot write pidfile: %s\n", strerror(errno)); } else { fprintf(f, "%ld\n", (long)getpid()); fclose(f); } } } } for (int i = 0; i < mixer_count; i++) { if (mixers[i].enabled == false) { continue; // no inputs connected = no need to initialize output } channel_t* channel = &mixers[i].channel; for (int k = 0; k < channel->output_count; k++) { output_t* output = channel->outputs + k; if (!init_output(channel, output)) { cerr << "Failed to initialize mixer " << i << " output " << k << " - aborting\n"; error(); } } } for (int i = 0; i < device_count; i++) { device_t* dev = devices + i; for (int j = 0; j < dev->channel_count; j++) { channel_t* channel = dev->channels + j; for (int k = 0; k < channel->output_count; k++) { output_t* output = channel->outputs + k; if (!init_output(channel, output)) { cerr << "Failed to initialize device " << i << " channel " << j << " output " << k << " - aborting\n"; error(); } } } if (input_init(dev->input) != 0 || dev->input->state != INPUT_INITIALIZED) { if (errno != 0) { cerr << "Failed to initialize input device " << i << ": " << strerror(errno) << " - aborting\n"; } else { cerr << "Failed to initialize input device " << i << " - aborting\n"; } error(); } if (input_start(dev->input) != 0) { cerr << "Failed to start input on device " << i << ": " << strerror(errno) << " - aborting\n"; error(); } if (dev->mode == R_SCAN) { // FIXME: set errno if (pthread_mutex_init(&dev->tag_queue_lock, NULL) != 0) { cerr << "Failed to initialize mutex - aborting\n"; error(); } // FIXME: not needed when freq_count == 1? pthread_create(&dev->controller_thread, NULL, &controller_thread, dev); } } int timeout = 50; // 5 seconds while ((devices_running = count_devices_running()) != device_count && timeout > 0) { SLEEP(100); timeout--; } if ((devices_running = count_devices_running()) != device_count) { log(LOG_ERR, "%d device(s) failed to initialize - aborting\n", device_count - devices_running); error(); } if (tui) { printf("\e[1;1H\e[2J"); GOTOXY(0, 0); printf(" "); for (int i = 0; i < device_count; i++) { GOTOXY(0, i * 17 + 1); for (int j = 0; j < devices[i].channel_count; j++) { printf(" %7.3f ", devices[i].channels[j].freqlist[devices[i].channels[j].freq_idx].frequency / 1000000.0); } if (i != device_count - 1) { GOTOXY(0, i * 17 + 16); printf("-------------------------------------------------------------------------------"); } } } THREAD output_check; pthread_create(&output_check, NULL, &output_check_thread, NULL); int demod_thread_count = multiple_demod_threads ? device_count : 1; demod_params_t* demod_params = (demod_params_t*)XCALLOC(demod_thread_count, sizeof(demod_params_t)); THREAD* demod_threads = (THREAD*)XCALLOC(demod_thread_count, sizeof(THREAD)); int output_thread_count = 1; if (multiple_output_threads) { output_thread_count = demod_thread_count; if (mixer_count > 0) { output_thread_count++; } } output_params_t* output_params = (output_params_t*)XCALLOC(output_thread_count, sizeof(output_params_t)); THREAD* output_threads = (THREAD*)XCALLOC(output_thread_count, sizeof(THREAD)); // Setup the output and demod threads if (multiple_output_threads == false) { init_output_params(&output_params[0], 0, device_count, 0, mixer_count); if (multiple_demod_threads == false) { init_demod(&demod_params[0], output_params[0].mp3_signal, 0, device_count); } else { for (int i = 0; i < demod_thread_count; i++) { init_demod(&demod_params[i], output_params[0].mp3_signal, i, i + 1); } } } else { if (multiple_demod_threads == false) { init_output_params(&output_params[0], 0, device_count, 0, 0); init_demod(&demod_params[0], output_params[0].mp3_signal, 0, device_count); } else { for (int i = 0; i < device_count; i++) { init_output_params(&output_params[i], i, i + 1, 0, 0); init_demod(&demod_params[i], output_params[i].mp3_signal, i, i + 1); } } if (mixer_count > 0) { init_output_params(&output_params[output_thread_count - 1], 0, 0, 0, mixer_count); } } // Startup the output threads for (int i = 0; i < output_thread_count; i++) { pthread_create(&output_threads[i], NULL, &output_thread, &output_params[i]); } // Startup the mixer thread (if there is one) using the signal for the last output thread THREAD mixer; if (mixer_count > 0) { pthread_create(&mixer, NULL, &mixer_thread, output_params[output_thread_count - 1].mp3_signal); } #ifdef WITH_PULSEAUDIO pulse_start(); #endif /* WITH_PULSEAUDIO */ sincosf_lut_init(); // Startup the demod threads for (int i = 0; i < demod_thread_count; i++) { pthread_create(&demod_threads[i], NULL, &demodulate, &demod_params[i]); } // Wait for demod threads to exit for (int i = 0; i < demod_thread_count; i++) { pthread_join(demod_threads[i], NULL); } log(LOG_INFO, "Cleaning up\n"); for (int i = 0; i < device_count; i++) { if (devices[i].mode == R_SCAN) pthread_join(devices[i].controller_thread, NULL); if (input_stop(devices[i].input) != 0 || devices[i].input->state != INPUT_STOPPED) { if (errno != 0) { log(LOG_ERR, "Failed do stop device #%d: %s\n", i, strerror(errno)); } else { log(LOG_ERR, "Failed do stop device #%d\n", i); } } } log(LOG_INFO, "Input threads closed\n"); for (int i = 0; i < device_count; i++) { device_t* dev = devices + i; disable_device_outputs(dev); } if (mixer_count > 0) { log(LOG_INFO, "Closing mixer thread\n"); pthread_join(mixer, NULL); } log(LOG_INFO, "Closing output thread(s)\n"); for (int i = 0; i < output_thread_count; i++) { output_params[i].mp3_signal->send(); pthread_join(output_threads[i], NULL); } for (int i = 0; i < device_count; i++) { device_t* dev = devices + i; for (int j = 0; j < dev->channel_count; j++) { channel_t* channel = dev->channels + j; for (int k = 0; k < channel->output_count; k++) { output_t* output = channel->outputs + k; if (output->lame) { lame_close(output->lame); } } } } close_debug(); #ifdef WITH_PROFILING ProfilerStop(); #endif /* WITH_PROFILING */ return 0; } ================================================ FILE: src/rtl_airband.h ================================================ /* * rtl_airband.h * Global declarations * * Copyright (c) 2015-2021 Tomasz Lemiech * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ #ifndef _RTL_AIRBAND_H #define _RTL_AIRBAND_H 1 #include #include // sockaddr_in #include #include #include // uint32_t #include #include #include #include #include #include "config.h" #ifdef WITH_BCM_VC #include "hello_fft/gpu_fft.h" #else #include #endif /* WITH_BCM_VC */ #ifdef WITH_PULSEAUDIO #include #include #endif /* WITH_PULSEAUDIO */ #include "filters.h" #include "input-common.h" // input_t #include "logging.h" #include "squelch.h" #define ALIGNED32 __attribute__((aligned(32))) #define SLEEP(x) usleep(x * 1000) #define THREAD pthread_t #define GOTOXY(x, y) printf("%c[%d;%df", 0x1B, y, x) #ifndef SYSCONFDIR #define SYSCONFDIR "/usr/local/etc" #endif /* SYSCONFDIR */ #define CFGFILE SYSCONFDIR "/rtl_airband.conf" #define PIDFILE "/run/rtl_airband.pid" #define MIN_BUF_SIZE 2560000 #define DEFAULT_SAMPLE_RATE 2560000 #ifdef NFM #define WAVE_RATE 16000 #else #define WAVE_RATE 8000 #endif /* NFM */ #define WAVE_BATCH WAVE_RATE / 8 #define AGC_EXTRA 100 #define WAVE_LEN 2 * WAVE_BATCH + AGC_EXTRA #define MP3_RATE 8000 #define MAX_SHOUT_QUEUELEN 32768 #define TAG_QUEUE_LEN 16 #define MIN_FFT_SIZE_LOG 8 #define DEFAULT_FFT_SIZE_LOG 9 #define MAX_FFT_SIZE_LOG 13 #define LAMEBUF_SIZE 22000 // todo: calculate #define MIX_DIVISOR 2 #ifdef WITH_BCM_VC struct sample_fft_arg { size_t fft_size_by4; GPU_FFT_COMPLEX* dest; }; extern "C" void samplefft(sample_fft_arg* a, unsigned char* buffer, float* window, float* levels); #define FFT_BATCH 250 #else #define FFT_BATCH 1 #endif /* WITH_BCM_VC */ //#define AFC_LOGGING enum status { NO_SIGNAL = ' ', SIGNAL = '*', AFC_UP = '<', AFC_DOWN = '>' }; enum ch_states { CH_DIRTY, CH_WORKING, CH_READY }; enum mix_modes { MM_MONO, MM_STEREO }; enum output_type { O_ICECAST, O_FILE, O_RAWFILE, O_MIXER, O_UDP_STREAM #ifdef WITH_PULSEAUDIO , O_PULSE #endif /* WITH_PULSEAUDIO */ }; struct icecast_data { const char* hostname; int port; #ifdef LIBSHOUT_HAS_TLS int tls_mode; #endif /* LIBSHOUT_HAS_TLS */ const char* username; const char* password; const char* mountpoint; const char* name; const char* genre; const char* description; bool send_scan_freq_tags; shout_t* shout; }; struct file_data { std::string basedir; std::string basename; std::string suffix; std::string file_path; std::string file_path_tmp; bool dated_subdirectories; bool continuous; bool append; bool split_on_transmission; bool include_freq; timeval open_time; timeval last_write_time; FILE* f; enum output_type type; }; struct udp_stream_data { float* stereo_buffer; size_t stereo_buffer_len; bool continuous; const char* dest_address; const char* dest_port; int send_socket; struct sockaddr dest_sockaddr; socklen_t dest_sockaddr_len; }; #ifdef WITH_PULSEAUDIO struct pulse_data { const char* server; const char* name; const char* sink; const char* stream_name; pa_context* context; pa_stream *left, *right; pa_channel_map lmap, rmap; mix_modes mode; bool continuous; }; #endif /* WITH_PULSEAUDIO */ struct mixer_data { struct mixer_t* mixer; int input; }; struct output_t { enum output_type type; bool enabled; bool active; void* data; // set to true in order to initialize `lame` and `lamebuf` after config parsing // is complete bool has_mp3_output; // lame encoder and buffer for mp3 output. initialized after config parsing // if `uses_mp3_output` is true lame_t lame; unsigned char* lamebuf; }; struct freq_tag { int freq; struct timeval tv; }; enum modulations { MOD_AM #ifdef NFM , MOD_NFM #endif /* NFM */ }; class Signal { public: Signal(void) { pthread_cond_init(&cond_, NULL); pthread_mutex_init(&mutex_, NULL); } void send(void) { pthread_mutex_lock(&mutex_); pthread_cond_signal(&cond_); pthread_mutex_unlock(&mutex_); } void wait(void) { pthread_mutex_lock(&mutex_); pthread_cond_wait(&cond_, &mutex_); pthread_mutex_unlock(&mutex_); } private: pthread_cond_t cond_; pthread_mutex_t mutex_; }; struct freq_t { int frequency; // scan frequency char* label; // frequency label float agcavgfast; // average power, for AGC float ampfactor; // multiplier to increase / decrease volume Squelch squelch; size_t active_counter; // count of loops where channel has signal NotchFilter notch_filter; // notch filter - good to remove CTCSS tones LowpassFilter lowpass_filter; // lowpass filter, applied to I/Q after derotation, set at bandwidth/2 to remove out of band noise enum modulations modulation; }; struct channel_t { float wavein[WAVE_LEN]; // FFT output waveform float waveout[WAVE_LEN]; // waveform after squelch + AGC (left/center channel mixer output) float waveout_r[WAVE_LEN]; // right channel mixer output float iq_in[2 * WAVE_LEN]; // raw input samples for I/Q outputs and NFM demod float iq_out[2 * WAVE_LEN]; // raw output samples for I/Q outputs (FIXME: allocate only if required) #ifdef NFM float pr; // previous sample - real part float pj; // previous sample - imaginary part float prev_waveout; // previous sample - waveout before notch / ampfactor float alpha; #endif /* NFM */ uint32_t dm_dphi, dm_phi; // derotation frequency and current phase value enum mix_modes mode; // mono or stereo status axcindicate; unsigned char afc; // 0 - AFC disabled; 1 - minimal AFC; 2 - more aggressive AFC and so on to 255 struct freq_t* freqlist; int freq_count; int freq_idx; int needs_raw_iq; int has_iq_outputs; enum ch_states state; // mixer channel state flag int output_count; output_t* outputs; int highpass; // highpass filter cutoff int lowpass; // lowpass filter cutoff }; enum rec_modes { R_MULTICHANNEL, R_SCAN }; struct device_t { input_t* input; #ifdef NFM float alpha; #endif /* NFM */ int channel_count; size_t *base_bins, *bins; channel_t* channels; // FIXME: size_t int waveend; int waveavail; THREAD controller_thread; struct freq_tag tag_queue[TAG_QUEUE_LEN]; int tq_head, tq_tail; int last_frequency; pthread_mutex_t tag_queue_lock; int row; int failed; enum rec_modes mode; size_t output_overrun_count; }; struct mixinput_t { float* wavein; float ampfactor; float ampl, ampr; bool ready; bool has_signal; pthread_mutex_t mutex; size_t input_overrun_count; }; struct mixer_t { const char* name; bool enabled; int interval; size_t output_overrun_count; int input_count; mixinput_t* inputs; bool* inputs_todo; bool* input_mask; channel_t channel; }; struct demod_params_t { Signal* mp3_signal; int device_start; int device_end; #ifndef WITH_BCM_VC fftwf_plan fft; fftwf_complex* fftin; fftwf_complex* fftout; #endif /* WITH_BCM_VC */ }; struct output_params_t { Signal* mp3_signal; int device_start; int device_end; int mixer_start; int mixer_end; }; // version.cpp extern char const* RTL_AIRBAND_VERSION; // output.cpp lame_t airlame_init(mix_modes mixmode, int highpass, int lowpass); void shout_setup(icecast_data* icecast, mix_modes mixmode); void disable_device_outputs(device_t* dev); void disable_channel_outputs(channel_t* channel); void* output_check_thread(void* params); void* output_thread(void* params); // rtl_airband.cpp extern bool use_localtime; extern bool multiple_demod_threads; extern bool multiple_output_threads; extern char* stats_filepath; extern size_t fft_size, fft_size_log; extern int device_count, mixer_count; extern int shout_metadata_delay; extern volatile int do_exit, device_opened; extern float alpha; extern device_t* devices; extern mixer_t* mixers; // util.cpp int atomic_inc(volatile int* pv); int atomic_dec(volatile int* pv); int atomic_get(volatile int* pv); double atofs(char* s); double delta_sec(const timeval* start, const timeval* stop); void log(int priority, const char* format, ...); void tag_queue_put(device_t* dev, int freq, struct timeval tv); void tag_queue_get(device_t* dev, struct freq_tag* tag); void tag_queue_advance(device_t* dev); void sincosf_lut_init(); void sincosf_lut(uint32_t phi, float* sine, float* cosine); void* xcalloc(size_t nmemb, size_t size, const char* file, const int line, const char* func); void* xrealloc(void* ptr, size_t size, const char* file, const int line, const char* func); #define XCALLOC(nmemb, size) xcalloc((nmemb), (size), __FILE__, __LINE__, __func__) #define XREALLOC(ptr, size) xrealloc((ptr), (size), __FILE__, __LINE__, __func__) float dBFS_to_level(const float& dBFS); float level_to_dBFS(const float& level); // mixer.cpp mixer_t* getmixerbyname(const char* name); int mixer_connect_input(mixer_t* mixer, float ampfactor, float balance); void mixer_disable_input(mixer_t* mixer, int input_idx); void mixer_put_samples(mixer_t* mixer, int input_idx, const float* samples, bool has_signal, unsigned int len); void* mixer_thread(void* params); const char* mixer_get_error(); // config.cpp int parse_devices(libconfig::Setting& devs); int parse_mixers(libconfig::Setting& mx); // udp_stream.cpp bool udp_stream_init(udp_stream_data* sdata, mix_modes mode, size_t len); void udp_stream_write(udp_stream_data* sdata, const float* data, size_t len); void udp_stream_write(udp_stream_data* sdata, const float* data_left, const float* data_right, size_t len); void udp_stream_shutdown(udp_stream_data* sdata); #ifdef WITH_PULSEAUDIO #define PULSE_STREAM_LATENCY_LIMIT 10000000UL // pulse.cpp void pulse_init(); int pulse_setup(pulse_data* pdata, mix_modes mixmode); void pulse_start(); void pulse_shutdown(pulse_data* pdata); void pulse_write_stream(pulse_data* pdata, mix_modes mode, const float* data_left, const float* data_right, size_t len); #endif /* WITH_PULSEAUDIO */ #endif /* _RTL_AIRBAND_H */ ================================================ FILE: src/rtl_airband_neon.s ================================================ # # RTLSDR AM demodulator and streaming # # Copyright (c) 2014 Wong Man Hang # # Updates for NEON coprocessor by Tomasz Lemiech # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, see . # .text .align 2 .global samplefft .type samplefft, %function .fpu neon samplefft: push {r4-r12, lr} vpush {d4-d15} #r0 is sample_fft_arg #[r0, #0] is fft_size_by4 #[r0, #4] is dest ldr r4, [r0] ldr r0, [r0, #4] ldrb r5, [r1] ldrb r6, [r1, #1] ldrb r7, [r1, #2] ldrb r8, [r1, #3] ldrb r9, [r1, #4] ldrb r10, [r1, #5] ldrb r11, [r1, #6] ldrb r12, [r1, #7] .a: ldr r5, [r3, r5, LSL #2] ldr r6, [r3, r6, LSL #2] ldr r7, [r3, r7, LSL #2] ldr r8, [r3, r8, LSL #2] ldr r9, [r3, r9, LSL #2] ldr r10, [r3, r10, LSL #2] ldr r11, [r3, r11, LSL #2] ldr r12, [r3, r12, LSL #2] # load window to NEON registers vldmia r2!,{d8-d11} add r1, r1, #8 # move level from ARM to NEON registers vmov d4, r5, r6 vmov d5, r7, r8 vmov d6, r9, r10 vmov d7, r11, r12 pld [r1, #16] vmul.f32 q6, q2, q4 vmul.f32 q7, q3, q5 pld [r2, #8] ldrb r5, [r1] ldrb r6, [r1, #1] ldrb r7, [r1, #2] ldrb r8, [r1, #3] ldrb r9, [r1, #4] ldrb r10, [r1, #5] ldrb r11, [r1, #6] ldrb r12, [r1, #7] vstmia r0!,{q6-q7} subs r4, r4, #1 bne .a vpop {d4-d15} pop {r4-r12, pc} ================================================ FILE: src/squelch.cpp ================================================ /* * squelch.cpp * * Copyright (C) 2022-2023 charlie-foxtrot * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ #include "squelch.h" #ifdef DEBUG_SQUELCH #include // errno #include // strerror() #endif /* DEBUG_SQUELCH _*/ #include // calloc() #include // min() #include // assert() #include // pow() #include "logging.h" // debug_print() using namespace std; Squelch::Squelch(void) { noise_floor_ = 5.0f; set_squelch_snr_threshold(9.54f); // depends on noise_floor_, sets using_manual_level_, normal_signal_ratio_, flappy_signal_ratio_, and moving_avg_cap_ manual_signal_level_ = -1.0; pre_filter_ = {0.001f, 0.001f}; post_filter_ = {0.001f, 0.001f}; squelch_level_ = 0.0f; using_post_filter_ = false; pre_vs_post_factor_ = 0.9f; open_delay_ = 197; close_delay_ = 197; low_signal_abort_ = 88; next_state_ = CLOSED; current_state_ = CLOSED; delay_ = 0; open_count_ = 0; sample_count_ = -1; flappy_count_ = 0; low_signal_count_ = 0; recent_sample_size_ = 1000; flap_opens_threshold_ = 3; recent_open_count_ = 0; closed_sample_count_ = 0; buffer_size_ = 102; // NOTE: this is specific to the 2nd order lowpass Bessel filter buffer_head_ = 0; buffer_tail_ = 1; buffer_ = (float*)calloc(buffer_size_, sizeof(float)); #ifdef DEBUG_SQUELCH debug_file_ = NULL; raw_input_ = 0.0; filtered_input_ = 0.0; #endif /* DEBUG_SQUELCH */ assert(open_delay_ > buffer_size_); debug_print("Created Squelch, open_delay_: %d, close_delay_: %d, low_signal_abort: %d, using_manual_level_: %s\n", open_delay_, close_delay_, low_signal_abort_, using_manual_level_ ? "true" : "false"); } void Squelch::set_squelch_level_threshold(const float& level) { if (level > 0) { using_manual_level_ = true; manual_signal_level_ = level; } else { using_manual_level_ = false; } // Need to update moving_avg_cap_ - depends on using_manual_level_ and manual_signal_level_ calculate_moving_avg_cap(); debug_print("Set level threshold, using_manual_level_: %s, manual_signal_level_: %f, moving_avg_cap_: %f\n", using_manual_level_ ? "true" : "false", manual_signal_level_, moving_avg_cap_); } void Squelch::set_squelch_snr_threshold(const float& db) { using_manual_level_ = false; normal_signal_ratio_ = pow(10.0, db / 20.0); flappy_signal_ratio_ = normal_signal_ratio_ * 0.9f; // Need to update moving_avg_cap_ - depends on using_manual_level_ and normal_signal_ratio_ calculate_moving_avg_cap(); debug_print("SNR threshold updated, using_manual_level_: %s, normal_signal_ratio_: %f, flappy_signal_ratio_: %f, moving_avg_cap_: %f\n", using_manual_level_ ? "true" : "false", normal_signal_ratio_, flappy_signal_ratio_, moving_avg_cap_); } void Squelch::set_ctcss_freq(const float& ctcss_freq, const float& sample_rate) { // create two CTCSS detectors with different window sizes. 0.4 sec is required to tell between all the "standard" // tones but 0.05 is enough to tell between tones ~20 Hz appart. Will use ctcss_fast_ until there are enough samples // for ctcss_slow_ ctcss_fast_ = CTCSS(ctcss_freq, sample_rate, sample_rate * 0.05); ctcss_slow_ = CTCSS(ctcss_freq, sample_rate, sample_rate * 0.4); } bool Squelch::is_open(void) const { // if current state is OPEN or CLOSING then decide based on CTCSS (if enabled) if (current_state_ == OPEN || current_state_ == CLOSING) { // if CTCSS is enabled then use slow (more accurate) if it has enough samples, otherwise // use fast (will return false if also not enough samples) if (ctcss_slow_.is_enabled()) { if (ctcss_slow_.enough_samples()) { return ctcss_slow_.has_tone(); } return ctcss_fast_.has_tone(); } return true; } return false; } bool Squelch::should_filter_sample(void) { return ((has_pre_filter_signal() || current_state_ != CLOSED) && current_state_ != LOW_SIGNAL_ABORT); } bool Squelch::should_process_audio(void) { return (current_state_ == OPEN || current_state_ == CLOSING); } bool Squelch::first_open_sample(void) const { return (current_state_ != OPEN && next_state_ == OPEN); } bool Squelch::last_open_sample(void) const { return (current_state_ == CLOSING && next_state_ == CLOSED) || (current_state_ != LOW_SIGNAL_ABORT && next_state_ == LOW_SIGNAL_ABORT); } bool Squelch::signal_outside_filter(void) { return (using_post_filter_ && has_pre_filter_signal() && !has_post_filter_signal()); } const float& Squelch::noise_level(void) const { return noise_floor_; } const float& Squelch::signal_level(void) const { return pre_filter_.full_; } const float& Squelch::squelch_level(void) { if (using_manual_level_) { return manual_signal_level_; } if (squelch_level_ == 0.0f) { if (currently_flapping() && flappy_signal_ratio_ < normal_signal_ratio_) { squelch_level_ = flappy_signal_ratio_ * noise_floor_; } else { squelch_level_ = normal_signal_ratio_ * noise_floor_; } } return squelch_level_; } const size_t& Squelch::open_count(void) const { return open_count_; } const size_t& Squelch::flappy_count(void) const { return flappy_count_; } const size_t& Squelch::ctcss_count(void) const { return ctcss_slow_.found_count(); } const size_t& Squelch::no_ctcss_count(void) const { return ctcss_slow_.not_found_count(); } void Squelch::process_raw_sample(const float& sample) { // Update current state based on previous state from last iteration update_current_state(); #ifdef DEBUG_SQUELCH raw_input_ = sample; #endif /* DEBUG_SQUELCH */ sample_count_++; // Auto noise floor // - Doing this every 16 samples instead of every sample allows a gradual signal increase // to cross the squelch threshold (that is a function of the noise floor) sooner. // - Updating even when squelch is open and / or signal is outside filter means the noise // floor (and squelch threshold) will slowly increasing during a long signal. This can lead // to flapping, but this keeps a sudden and sustained increase of noise from locking squelch // OPEN. if (sample_count_ % 16 == 0) { calculate_noise_floor(); } update_moving_avg(pre_filter_, sample); // Apply the comparison factor before adding to the buffer, will later be used as the threshold // for the post_filter_ buffer_[buffer_head_] = pre_filter_.capped_ * pre_vs_post_factor_; // Check signal against thresholds if (current_state_ == OPEN && !has_signal()) { debug_print("Closing at %zu: no signal after timeout (%f, %f, %f)\n", sample_count_, pre_filter_.capped_, post_filter_.capped_, squelch_level()); set_state(CLOSING); } if (current_state_ == CLOSED && has_signal()) { debug_print("Opening at %zu: signal (%f, %f, %f)\n", sample_count_, pre_filter_.capped_, post_filter_.capped_, squelch_level()); set_state(OPENING); } // Override squelch and close if there are repeated samples under the squelch level // NOTE: this can cause squelch to close, but it may immediately be re-opened if the signal level still hasn't fallen after the delays if (current_state_ != CLOSED && current_state_ != LOW_SIGNAL_ABORT) { if (sample >= squelch_level()) { low_signal_count_ = 0; } else { low_signal_count_++; if (low_signal_count_ >= low_signal_abort_) { debug_print("Low signal abort at %zu: low signal count %d\n", sample_count_, low_signal_count_); set_state(LOW_SIGNAL_ABORT); } } } } void Squelch::process_filtered_sample(const float& sample) { #ifdef DEBUG_SQUELCH filtered_input_ = sample; #endif /* DEBUG_SQUELCH */ if (!should_filter_sample()) { return; } if (current_state_ == OPENING) { // While OPENING, need to wait until the pre-filter value gets through the buffer if (delay_ < buffer_size_) { return; } // Buffer has been filled, initialize post-filter with the pre-filter value if (delay_ == buffer_size_) { post_filter_ = {buffer_[buffer_tail_], buffer_[buffer_tail_]}; } } using_post_filter_ = true; update_moving_avg(post_filter_, sample); // Always comparing the post-filter average to the buffered pre-filtered value if (post_filter_.capped_ < buffer_[buffer_tail_]) { debug_print("Closing at %zu: signal level post filter (%f < %f)\n", sample_count_, post_filter_.capped_, squelch_level()); set_state(CLOSED); } } void Squelch::process_audio_sample(const float& sample) { #ifdef DEBUG_SQUELCH audio_input_ = sample; #endif /* DEBUG_SQUELCH */ if (!ctcss_slow_.is_enabled()) { return; } // ctcss_ is reset on transition to CLOSED and stays "unused" while CLOSED if (current_state_ != CLOSED) { // always send the sample to the slow (more accurate) detector, also send to the fast if there havent been enough yet ctcss_slow_.process_audio_sample(sample); if (!ctcss_slow_.enough_samples()) { ctcss_fast_.process_audio_sample(sample); } } } void Squelch::set_state(State update) { // Valid transitions (current_state_ -> next_state_) are: // - CLOSED -> CLOSED // - CLOSED -> OPENING // --------------------------- // - OPENING -> CLOSED // - OPENING -> OPENING // - OPENING -> CLOSING // - OPENING -> OPEN // --------------------------- // - CLOSING -> CLOSED // - CLOSING -> OPENING // - CLOSING -> CLOSING // - CLOSING -> LOW_SIGNAL_ABORT // - CLOSING -> OPEN // --------------------------- // - LOW_SIGNAL_ABORT -> CLOSED // - LOW_SIGNAL_ABORT -> LOW_SIGNAL_ABORT // --------------------------- // - OPEN -> CLOSING // - OPEN -> LOW_SIGNAL_ABORT // - OPEN -> OPEN // Invalid transistions (current_state_ -> next_state_) are: // CLOSED -> CLOSING (if already CLOSED cant go backwards) if (current_state_ == CLOSED && update == CLOSING) { update = CLOSED; } // CLOSED -> LOW_SIGNAL_ABORT (if already CLOSED cant go backwards) else if (current_state_ == CLOSED && update == LOW_SIGNAL_ABORT) { update = CLOSED; } // CLOSED -> OPEN (must go through OPENING to get to OPEN) else if (current_state_ == CLOSED && update == OPEN) { update = OPENING; } // OPENING -> LOW_SIGNAL_ABORT (just go to CLOSED instead) else if (current_state_ == OPENING && update == LOW_SIGNAL_ABORT) { update = CLOSED; } // LOW_SIGNAL_ABORT -> OPENING (LOW_SIGNAL_ABORT can only go to CLOSED) // LOW_SIGNAL_ABORT -> OPEN (LOW_SIGNAL_ABORT can only go to CLOSED) // LOW_SIGNAL_ABORT -> CLOSING (LOW_SIGNAL_ABORT can only go to CLOSED) else if (current_state_ == LOW_SIGNAL_ABORT && update != LOW_SIGNAL_ABORT && update != CLOSED) { update = CLOSED; } // OPEN -> CLOSED (must go through CLOSING to get to CLOSED) else if (current_state_ == OPEN && update == CLOSED) { update = CLOSING; } // OPEN -> OPENING (if already OPEN cant go backwards) else if (current_state_ == OPEN && update == OPENING) { update = OPEN; } next_state_ = update; } void Squelch::update_current_state(void) { if (next_state_ == OPENING) { if (current_state_ != OPENING) { debug_print("%zu: transitioning to OPENING\n", sample_count_); delay_ = 0; low_signal_count_ = 0; using_post_filter_ = false; current_state_ = next_state_; } else { // in OPENING delay delay_++; if (delay_ >= open_delay_) { // After getting through OPENING delay, count this as an "open" for flap // detection even if signal has gone. NOTE - if process_filtered_sample() would // have already sent state to CLOSED before the delay if post_filter_.capped_ was // too low, so that wont count towards flapping if (closed_sample_count_ < recent_sample_size_) { recent_open_count_++; if (currently_flapping()) { flappy_count_++; } // Force squelch_level_ recalculation at next call to squelch_level() squelch_level_ = 0.0f; } // Check signal level after delay to either go to OPEN or CLOSED if (has_signal()) { next_state_ = OPEN; } else { debug_print("%zu: no signal after OPENING delay, going to CLOSED\n", sample_count_); next_state_ = CLOSED; } } } } else if (next_state_ == CLOSING) { if (current_state_ != CLOSING) { debug_print("%zu: transitioning to CLOSING\n", sample_count_); delay_ = 0; current_state_ = next_state_; } else { // in CLOSING delay delay_++; if (delay_ >= close_delay_) { if (!has_signal()) { next_state_ = CLOSED; } else { debug_print("%zu: signal after CLOSING delay, reverting to OPEN\n", sample_count_); current_state_ = OPEN; // set current_state_ to avoid incrementing open_count_ next_state_ = OPEN; } } } } else if (next_state_ == LOW_SIGNAL_ABORT) { if (current_state_ != LOW_SIGNAL_ABORT) { debug_print("%zu: transitioning to LOW_SIGNAL_ABORT\n", sample_count_); // If coming from CLOSING then keep the delay counter that has already started if (current_state_ != CLOSING) { delay_ = 0; } current_state_ = next_state_; } else { // in LOW_SIGNAL_ABORT delay delay_++; if (delay_ >= close_delay_) { next_state_ = CLOSED; } } } else if (next_state_ == OPEN && current_state_ != OPEN) { debug_print("%zu: transitioning to OPEN\n", sample_count_); open_count_++; current_state_ = next_state_; } else if (next_state_ == CLOSED && current_state_ != CLOSED) { debug_print("%zu: transitioning to CLOSED\n", sample_count_); using_post_filter_ = false; closed_sample_count_ = 0; current_state_ = next_state_; ctcss_fast_.reset(); ctcss_slow_.reset(); } else if (next_state_ == CLOSED && current_state_ == CLOSED) { // Count this as a closed sample towards flap detection (can stop counting at recent_sample_size_) if (closed_sample_count_ < recent_sample_size_) { closed_sample_count_++; } else if (closed_sample_count_ == recent_sample_size_) { recent_open_count_ = 0; squelch_level_ = 0.0f; // Force squelch_level_ recalculation } } else { current_state_ = next_state_; } buffer_tail_ = (buffer_tail_ + 1) % buffer_size_; buffer_head_ = (buffer_head_ + 1) % buffer_size_; #ifdef DEBUG_SQUELCH debug_state(); #endif /* DEBUG_SQUELCH */ } bool Squelch::has_pre_filter_signal(void) { return pre_filter_.capped_ >= squelch_level(); } bool Squelch::has_post_filter_signal(void) { return using_post_filter_ && post_filter_.capped_ >= buffer_[buffer_tail_]; } bool Squelch::has_signal(void) { if (using_post_filter_) { return has_pre_filter_signal() && has_post_filter_signal(); } return has_pre_filter_signal(); } void Squelch::calculate_noise_floor(void) { static const float decay_factor = 0.97f; static const float new_factor = 1.0 - decay_factor; noise_floor_ = noise_floor_ * decay_factor + std::min(pre_filter_.capped_, noise_floor_) * new_factor + 1e-6f; debug_print("%zu: noise floor is now %f\n", sample_count_, noise_floor_); // Need to update moving_avg_cap_ - depends on noise_floor_ calculate_moving_avg_cap(); // Force squelch_level_ recalculation at next call to squelch_level() - depends on noise_floor_ squelch_level_ = 0.0f; } void Squelch::calculate_moving_avg_cap(void) { // set max value for MovingAverage's capped_ to 1.5 x the normal / manual squelch level. if (using_manual_level_) { moving_avg_cap_ = 1.5f * manual_signal_level_; } else { moving_avg_cap_ = 1.5f * normal_signal_ratio_ * noise_floor_; } } void Squelch::update_moving_avg(MovingAverage& avg, const float& sample) { static const float decay_factor = 0.99f; static const float new_factor = 1.0 - decay_factor; avg.full_ = avg.full_ * decay_factor + sample * new_factor; // Cap average level, this lets the average drop after the signal goes away more quickly // (if current value and update are both at/above the max then can avoid the float multiplications) if (avg.capped_ >= moving_avg_cap_ && sample >= moving_avg_cap_) { avg.capped_ = moving_avg_cap_; } else { avg.capped_ = min(moving_avg_cap_, avg.capped_ * decay_factor + sample * new_factor); } } bool Squelch::currently_flapping(void) const { return recent_open_count_ >= flap_opens_threshold_; } #ifdef DEBUG_SQUELCH /* Debug file methods ================== Values written to file are: - (int16_t) process_raw_sample input - (int16_t) process_filtered_sample input - (int16_t) process_audio_sample input - (int16_t) noise_floor_ - (int16_t) pre_filter_.capped_ - (int16_t) post_filter_.capped_ - (int) current_state_ - (int) delay_ - (int) low_signalcount_ - (int) ctcss_fast_.has_tone() - (int) ctcss_slow_.has_tone() The output file can be read / plotted in python as follows: import matplotlib.pyplot as plt import numpy as np def plot_squelch_debug(filepath): dt = np.dtype([('raw_input', np.single), ('filtered_input', np.single), ('audio_input', np.single), ('noise_floor', np.single), ('pre_filter_capped', np.single), ('post_filter_capped', np.single), ('current_state', np.intc), ('delay', np.intc), ('low_signalcount', np.intc), ('ctcss_fast_has_tone', np.intc), ('ctcss_slow_has_tone', np.intc) ]) dat = np.fromfile(filepath, dtype=dt) plt.figure() plt.plot(dat['raw_input'], 'b') plt.plot(dat['pre_filter_capped'], 'g') plt.plot(dat['noise_floor'], 'r') plt.show(block=False) plt.figure() plt.plot(dat['post_filter_capped'], 'k') plt.show(block=False) plt.figure() axis = plt.subplot2grid((3, 1), (0, 0)) axis.plot(dat['current_state'], 'c') axis = plt.subplot2grid((3, 1), (1, 0)) axis.plot(dat['delay'], 'm') axis = plt.subplot2grid((3, 1), (2, 0)) axis.plot(dat['low_signalcount'], 'y') plt.show(block=False) return */ Squelch::~Squelch(void) { if (debug_file_) { fclose(debug_file_); } } void Squelch::set_debug_file(const char* filepath) { debug_file_ = fopen(filepath, "wb"); } void Squelch::debug_value(const float& value) { if (!debug_file_) { return; } if (fwrite(&value, sizeof(value), 1, debug_file_) != 1) { debug_print("Error writing to squelch debug file: %s\n", strerror(errno)); } } void Squelch::debug_value(const int& value) { if (!debug_file_) { return; } if (fwrite(&value, sizeof(value), 1, debug_file_) != 1) { debug_print("Error writing to squelch debug file: %s\n", strerror(errno)); } } void Squelch::debug_state(void) { if (!debug_file_) { return; } debug_value(raw_input_); debug_value(filtered_input_); debug_value(audio_input_); raw_input_ = 0.0; filtered_input_ = 0.0; audio_input_ = 0.0; debug_value(noise_floor_); debug_value(pre_filter_.capped_); debug_value(post_filter_.capped_); debug_value((int)current_state_); debug_value(delay_); debug_value(low_signal_count_); debug_value((int)ctcss_fast_.has_tone()); debug_value((int)ctcss_slow_.has_tone()); } #endif /* DEBUG_SQUELCH */ ================================================ FILE: src/squelch.h ================================================ /* * squelch.h * * Copyright (C) 2022-2023 charlie-foxtrot * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ #ifndef _SQUELCH_H #define _SQUELCH_H #include // size_t #ifdef DEBUG_SQUELCH #include // needed for debug file output #endif /* DEBUG_SQUELCH */ #include "ctcss.h" /* Theory of operation: Squelch has 5 states, OPEN (has audio), CLOSED (no audio), OPENING (transitioning from CLOSED to OPEN), CLOSING (transitioning from OPEN to CLOSED), and LOW_SIGNAL_ABORT (same as CLOSING but because of a constant signal drop). Squelch is considered "open" when the state is OPEN or CLOSING and squelch is considered "closed" when the state is OPENING, LOW_SIGNAL_ABORT, or CLOSED. Noise floor is computed using a low pass filter and updated with the current sample or prior value, whatever is lower. Noise floor is updated every 16 stamples, except when squelch is open. Low pass filters are also used to track the current signal levels. One level is for the sample before filtering, the second for post signal filtering (if any). The pre-filter signal level is updated for every sample. The post-filter level is optional. When used, the post-filter signal level is compared to a delayed pre-filter value. The post-filter is set to a fraction of the pre-filtered value each time state transitions to OPENING, and is not updated while state is CLOSED. Squelch level can be set manually or is computed as a function of the noise floor. When the signal level exceeds the squelch level, the state transitions to OPENING and a delay counter starts, then once the counter is over the state moves to OPEN if there is signal, otherwise back to CLOSED. The same (but opposite) happens when the signal level drops below the squelch level. While the squelch is OPEN, a count of continuous samples that are below the squelch level is maintained. If this count exceeds a threshold then the state moves to LOW_SIGNAL_ABORT. This allows the squelch to close after a sharp drop off in signal before the signal level has caught up. A count of "recent opens" is maintained as a way to detect squelch flapping (ie rapidly opening and closing). When flapping is detected the squelch level is decreased in an attempt to keep squelch open longer. CTCSS tone detection can be enabled. If used, two tone detectors are created at different window lengths. The “fast” detector has less resolution but needs fewer samples while the “slow” detector is more accurate. When CTCSS is enabled, squelch remains CLOSED for an additional 0.05 sec until a tone is detected by the “fast” detector. */ class Squelch { public: Squelch(); void set_squelch_level_threshold(const float& level); void set_squelch_snr_threshold(const float& db); void set_ctcss_freq(const float& ctcss_freq, const float& sample_rate); void process_raw_sample(const float& sample); void process_filtered_sample(const float& sample); void process_audio_sample(const float& sample); bool is_open(void) const; bool should_filter_sample(void); bool should_process_audio(void); bool first_open_sample(void) const; bool last_open_sample(void) const; bool signal_outside_filter(void); const float& noise_level(void) const; const float& signal_level(void) const; const float& squelch_level(void); const size_t& open_count(void) const; const size_t& flappy_count(void) const; const size_t& ctcss_count(void) const; const size_t& no_ctcss_count(void) const; #ifdef DEBUG_SQUELCH ~Squelch(void); void set_debug_file(const char* filepath); #endif /* DEBUG_SQUELCH */ private: enum State { CLOSED, // Audio is suppressed OPENING, // Transitioning closed -> open CLOSING, // Transitioning open -> closed LOW_SIGNAL_ABORT, // Like CLOSING but is_open() is false OPEN // Audio not suppressed }; struct MovingAverage { float full_; float capped_; }; float noise_floor_; // noise level bool using_manual_level_; // if using a manually set signal level threshold float manual_signal_level_; // manually configured squelch level, < 0 for disabled float normal_signal_ratio_; // signal-to-noise ratio for normal squelch - ratio, not in dB float flappy_signal_ratio_; // signal-to-noise ratio for flappy squelch - ratio, not in dB float moving_avg_cap_; // the max value for capped moving average MovingAverage pre_filter_; // average signal level for reference sample MovingAverage post_filter_; // average signal level for post-filter sample float squelch_level_; // cached calculation of the squelch_level() value bool using_post_filter_; // if the caller is providing filtered samples float pre_vs_post_factor_; // multiplier when doing pre vs post filter compaison int open_delay_; // how long to wait after signal level crosses squelch to open int close_delay_; // how long to wait after signal level crosses squelch to close int low_signal_abort_; // number of repeated samples below squelch to cause a close State next_state_; State current_state_; int delay_; // samples to wait before making next squelch decision size_t open_count_; // number of times squelch is opened size_t sample_count_; // number of samples processed (for logging) size_t flappy_count_; // number of times squelch was detected as flapping OPEN/CLOSED int low_signal_count_; // number of repeated samples below squelch // Flap detection parameters size_t recent_sample_size_; // number of samples defined as "recent" size_t flap_opens_threshold_; // number of opens to count as flapping size_t recent_open_count_; // number of times squelch recently opened size_t closed_sample_count_; // number of continuous samples where squelch has been CLOSED // Buffered pre-filtered values int buffer_size_; // size of buffer int buffer_head_; // index to add new values int buffer_tail_; // index to read buffered values float* buffer_; // buffer CTCSS ctcss_fast_; // ctcss tone detection CTCSS ctcss_slow_; // ctcss tone detection void set_state(State update); void update_current_state(void); bool has_pre_filter_signal(void); bool has_post_filter_signal(void); bool has_signal(void); void calculate_noise_floor(void); void calculate_moving_avg_cap(void); void update_moving_avg(MovingAverage& avg, const float& sample); bool currently_flapping(void) const; #ifdef DEBUG_SQUELCH FILE* debug_file_; float raw_input_; float filtered_input_; float audio_input_; void debug_value(const float& value); void debug_value(const int& value); void debug_state(void); #endif /* DEBUG_SQUELCH */ }; #endif /* _SQUELCH_H */ ================================================ FILE: src/test_base_class.cpp ================================================ /* * test_base_class.cpp * * Copyright (C) 2023 charlie-foxtrot * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ #include #include "logging.h" #include "test_base_class.h" using namespace std; void delete_directory(const string& root) { DIR* dp = NULL; dp = opendir(root.c_str()); if (dp == NULL) { cerr << "Error opening directory " << root << endl; return; } string current_dir = "."; string parent_dir = ".."; struct dirent* entry = NULL; while ((entry = readdir(dp))) { if (current_dir.compare(entry->d_name) == 0 || parent_dir.compare(entry->d_name) == 0) { continue; } struct stat info; string filepath = root + "/" + string(entry->d_name); if (stat(filepath.c_str(), &info) != 0) { cerr << "Error getting info on " << filepath.c_str() << ": " << strerror(errno) << endl; continue; } if (S_ISDIR(info.st_mode)) { delete_directory(filepath); } else { unlink(filepath.c_str()); } } closedir(dp); rmdir(root.c_str()); } string make_temp_dir(void) { char temp_path_template[] = "/tmp/temp_unittest_dir_XXXXXX"; if (mkdtemp(temp_path_template) == NULL) { cerr << "Error making temp dir for test files: " << strerror(errno) << endl; return ""; } return string(temp_path_template); } void TestBaseClass::SetUp(void) { ::testing::Test::SetUp(); // setup debug log file for each test temp_dir = make_temp_dir(); ASSERT_FALSE(temp_dir.empty()); string debug_filepath = temp_dir + "/debug_file.log"; init_debug(debug_filepath.c_str()); // point logging to stderr log_destination = STDERR; } void TestBaseClass::TearDown(void) { ::testing::Test::TearDown(); close_debug(); delete_directory(temp_dir); } TEST(TestHelpers, make_temp_dir) { // make a temp dir string temp_dir = make_temp_dir(); // path should not be empty string ASSERT_FALSE(temp_dir.empty()); // a directory should exist at the path struct stat info; ASSERT_EQ(stat(temp_dir.c_str(), &info), 0); EXPECT_TRUE(S_ISDIR(info.st_mode)); delete_directory(temp_dir); } TEST(TestHelpers, delete_directory) { // make a temp dir string temp_dir = make_temp_dir(); ASSERT_FALSE(temp_dir.empty()); // build a bunch of nested sub-dirs and files string path = temp_dir; for (int i = 0; i < 5; ++i) { path = path + "/sub_dir"; mkdir(path.c_str(), 0777); string filename = path + "/some_file"; fclose(fopen(filename.c_str(), "w")); } // last sub-dir should exist and be a directory struct stat info; ASSERT_EQ(stat(path.c_str(), &info), 0); EXPECT_TRUE(S_ISDIR(info.st_mode)); // last sub-dir should have a file in it string filename = path + "/some_file"; ASSERT_EQ(stat(filename.c_str(), &info), 0); EXPECT_TRUE(S_ISREG(info.st_mode)); // delete the root temp dir delete_directory(temp_dir); // root temp dir should no longer exist ASSERT_NE(stat(temp_dir.c_str(), &info), 0); } ================================================ FILE: src/test_base_class.h ================================================ /* * test_base_class.h * * Copyright (C) 2023 charlie-foxtrot * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ #ifndef _TEST_BASE_CLASS_H #define _TEST_BASE_CLASS_H #include #include class TestBaseClass : public ::testing::Test { protected: void SetUp(void); void TearDown(void); std::string temp_dir; }; #endif /* _TEST_BASE_CLASS_H */ ================================================ FILE: src/test_ctcss.cpp ================================================ /* * test_ctcss.cpp * * Copyright (C) 2023 charlie-foxtrot * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ #include "generate_signal.h" #include "test_base_class.h" #include "ctcss.h" using namespace std; class CTCSSTest : public TestBaseClass { protected: int sample_rate; int fast_window_size; int slow_window_size; void SetUp(void) { TestBaseClass::SetUp(); sample_rate = 8000; fast_window_size = sample_rate * 0.05; slow_window_size = sample_rate * 0.4; } void write_file(const vector& samples, const string& filepath) { cerr << "writing file out to " << filepath << endl; FILE* fp = fopen(filepath.c_str(), "wb"); for (auto sample : samples) { fwrite(&sample, sizeof(float), 1, fp); } fclose(fp); } void load_from_file(CTCSS& ctcss, const string& filepath) { FILE* fp = fopen(filepath.c_str(), "rb"); while (!ctcss.enough_samples()) { float sample; if (fread(&sample, sizeof(float), 1, fp) != 1) { break; } ctcss.process_audio_sample(sample); } fclose(fp); ASSERT_TRUE(ctcss.enough_samples()); } void test_all_tones(GenerateSignal& signal, const float& tone = 0) { for (auto standard_tone : CTCSS::standard_tones) { // skipping tones within +/- 5Hz if (abs(standard_tone - tone) < 5) { continue; } CTCSS ctcss(standard_tone, sample_rate, slow_window_size); vector samples; run_signal(ctcss, signal, samples); EXPECT_FALSE(ctcss.has_tone()) << "Tone of " << standard_tone << " found, expected " << tone; // on failure write out a file for debugging if (ctcss.has_tone()) { // double the samples to write to the file for later testing size_t initial_count = samples.size(); while (samples.size() < initial_count * 2) { samples.push_back(signal.get_sample()); } string filepath = "/tmp/found_" + to_string(standard_tone) + "_expected_" + to_string(tone); write_file(samples, filepath); } } if (tone != 0) { CTCSS ctcss(tone, sample_rate, slow_window_size); vector samples; run_signal(ctcss, signal, samples); EXPECT_TRUE(ctcss.has_tone()) << "Expected tone of " << tone << " not found"; // on failure write out a file for debugging if (!ctcss.has_tone()) { // double the samples to write to the file for later testing size_t initial_count = samples.size(); while (samples.size() < initial_count * 2) { samples.push_back(signal.get_sample()); } string filepath = "/tmp/didnt_find_" + to_string(tone); write_file(samples, filepath); } } } void run_signal(CTCSS& ctcss, GenerateSignal& signal, vector& samples) { EXPECT_TRUE(ctcss.is_enabled()) << "CTCSS not enabled"; while (!ctcss.enough_samples()) { float sample = signal.get_sample(); samples.push_back(sample); ctcss.process_audio_sample(sample); } } }; TEST_F(CTCSSTest, creation) { CTCSS ctcss; EXPECT_FALSE(ctcss.is_enabled()); } TEST_F(CTCSSTest, no_signal) { GenerateSignal signal(sample_rate); test_all_tones(signal); } TEST_F(CTCSSTest, has_tone) { float tone = CTCSS::standard_tones[0]; GenerateSignal signal(sample_rate); signal.add_tone(tone, Tone::NORMAL); signal.add_noise(Noise::NORMAL); test_all_tones(signal, tone); } TEST_F(CTCSSTest, has_non_standard_tone) { float tone = (CTCSS::standard_tones[0] + CTCSS::standard_tones[0]) / 2; GenerateSignal signal(sample_rate); signal.add_tone(tone, Tone::NORMAL); signal.add_noise(Noise::NORMAL); test_all_tones(signal, tone); } TEST_F(CTCSSTest, has_each_standard_tone) { for (auto tone : CTCSS::standard_tones) { GenerateSignal signal(sample_rate); signal.add_tone(tone, Tone::NORMAL); signal.add_noise(Noise::NORMAL); test_all_tones(signal, tone); } } ================================================ FILE: src/test_filters.cpp ================================================ /* * test_filters.cpp * * Copyright (C) 2023 charlie-foxtrot * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ #include "test_base_class.h" #include "filters.h" using namespace std; class FiltersTest : public TestBaseClass { protected: void SetUp(void) { TestBaseClass::SetUp(); } void TearDown(void) { TestBaseClass::TearDown(); } }; TEST_F(FiltersTest, default_notch) { NotchFilter notch; EXPECT_FALSE(notch.enabled()); } TEST_F(FiltersTest, default_lowpass) { LowpassFilter lowpass; EXPECT_FALSE(lowpass.enabled()); } ================================================ FILE: src/test_generate_signal.cpp ================================================ /* * test_generate_signal.cpp * * Copyright (C) 2023 charlie-foxtrot * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ #include #include "test_base_class.h" #include "generate_signal.h" using namespace std; class ToneTest : public TestBaseClass {}; TEST_F(ToneTest, simple_object) { // simple case the sample rate is a multiple of the frequency so specific points can be measured float tone_freq = 100; // tone at 100 Hz // set sample rate to 1000 times the tone so there will be 250 samples per quarter float sample_rate = 1000 * tone_freq; float amplitude = Tone::STRONG; Tone tone(sample_rate, tone_freq, amplitude); float last_sample = 0.0; float this_sample = 0.0; // loop through some number of cycles for (int j = 0; j < 10; ++j) { // first 249 samples will be positive and increasing for (int i = 0; i < 249; ++i) { this_sample = tone.get_sample(); ASSERT_GT(this_sample, 0.0); ASSERT_GT(this_sample, last_sample); last_sample = this_sample; } // sample 250 will be the amp this_sample = tone.get_sample(); ASSERT_EQ(this_sample, amplitude); ASSERT_GT(this_sample, last_sample); last_sample = this_sample; // next 249 samples will be positive and decreasing for (int i = 0; i < 249; ++i) { this_sample = tone.get_sample(); ASSERT_GT(this_sample, 0.0); ASSERT_LT(this_sample, last_sample); last_sample = this_sample; } // sample 500 will be zero-ish this_sample = tone.get_sample(); ASSERT_LT(this_sample, 0.000001); ASSERT_LT(this_sample, last_sample); last_sample = this_sample; // next 249 samples will be negative and decreasing for (int i = 0; i < 249; ++i) { this_sample = tone.get_sample(); ASSERT_LT(this_sample, 0.0); ASSERT_LT(this_sample, last_sample); last_sample = this_sample; } // sample 750 will be negative amp this_sample = tone.get_sample(); ASSERT_EQ(this_sample, -1.0 * amplitude); ASSERT_LT(this_sample, last_sample); last_sample = this_sample; // next 249 samples will be negative and increasing for (int i = 0; i < 249; ++i) { this_sample = tone.get_sample(); ASSERT_LT(this_sample, 0.0); ASSERT_GT(this_sample, last_sample); last_sample = this_sample; } // sample 1000 will be zero-ish this_sample = tone.get_sample(); ASSERT_LT(this_sample, 0.000001); ASSERT_GT(this_sample, last_sample); last_sample = this_sample; } } TEST_F(ToneTest, strengths) { float tone_freq = 100; float sample_rate = 8000; Tone tone_weak(sample_rate, tone_freq, Tone::WEAK); Tone tone_normal(sample_rate, tone_freq, Tone::NORMAL); Tone tone_strong(sample_rate, tone_freq, Tone::STRONG); for (int i = 0; i < 100 * sample_rate; ++i) { float weak_sample = tone_weak.get_sample(); float normal_sample = tone_normal.get_sample(); float strong_sample = tone_strong.get_sample(); if (weak_sample > 0.0) { ASSERT_LT(weak_sample, normal_sample); ASSERT_LT(normal_sample, strong_sample); } else if (weak_sample == 0.0) { ASSERT_EQ(weak_sample, 0.0); ASSERT_EQ(normal_sample, 0.0); ASSERT_EQ(strong_sample, 0.0); } else { ASSERT_GT(weak_sample, normal_sample); ASSERT_GT(normal_sample, strong_sample); } } } class NoiseTest : public TestBaseClass {}; TEST_F(NoiseTest, simple_object) { Noise noise(Noise::STRONG); int sample_count = 10000; float sample_max = 0.0; float sample_min = 0.0; float sample_sum = 0.0; for (int i = 0; i < sample_count; ++i) { float sample = noise.get_sample(); sample_max = max(sample, sample_max); sample_min = min(sample, sample_min); sample_sum += sample; } float sample_avg = sample_sum / sample_count; // average is near zero EXPECT_LE(abs(sample_avg), 0.01); // max and min are off of zero EXPECT_LE(sample_min, Noise::STRONG * -0.3); EXPECT_GT(sample_max, Noise::STRONG * 0.3); } TEST_F(NoiseTest, strengths) { Noise noise_weak(Noise::WEAK); Noise noise_normal(Noise::NORMAL); Noise noise_strong(Noise::STRONG); float weak_max = 0.0; float normal_max = 0.0; float strong_max = 0.0; for (int i = 0; i < 10000; ++i) { weak_max = max(weak_max, abs(noise_weak.get_sample())); normal_max = max(normal_max, abs(noise_normal.get_sample())); strong_max = max(strong_max, abs(noise_strong.get_sample())); } EXPECT_NE(weak_max, 0.0); EXPECT_GT(normal_max, weak_max); EXPECT_GT(strong_max, normal_max); } class GenerateSignalTest : public TestBaseClass { protected: int sample_rate; void SetUp(void) { TestBaseClass::SetUp(); sample_rate = 8000; } }; TEST_F(GenerateSignalTest, default_object) { GenerateSignal signal(8000); EXPECT_EQ(signal.get_sample(), 0.0); } TEST_F(GenerateSignalTest, generate_file) { float file_seconds = 10.5; GenerateSignal signal(sample_rate); string test_filepath = temp_dir + "/10_sec_file.dat"; signal.write_file(test_filepath, file_seconds); // make sure the file exists and is the right size struct stat info; ASSERT_EQ(stat(test_filepath.c_str(), &info), 0); EXPECT_TRUE(S_ISREG(info.st_mode)); EXPECT_EQ(info.st_size, sample_rate * file_seconds * sizeof(float)); } TEST_F(GenerateSignalTest, get_sample_no_signals) { GenerateSignal signal(sample_rate); for (int i = 0; i < 60 * sample_rate; ++i) { ASSERT_EQ(signal.get_sample(), 0.0); } } TEST_F(GenerateSignalTest, get_sample_single_tone_only) { float tone_freq = 123.34; float tone_ampl = 0.32; GenerateSignal signal(sample_rate); signal.add_tone(tone_freq, tone_ampl); Tone tone(sample_rate, tone_freq, tone_ampl); for (int i = 0; i < 60 * sample_rate; ++i) { ASSERT_FLOAT_EQ(signal.get_sample(), tone.get_sample()); } } TEST_F(GenerateSignalTest, get_sample_two_tones) { float tone1_freq = 123.34; float tone2_freq = 231.43; float tone1_ampl = Tone::NORMAL; float tone2_ampl = Tone::STRONG; GenerateSignal signal(sample_rate); signal.add_tone(tone1_freq, tone1_ampl); signal.add_tone(tone2_freq, tone2_ampl); Tone tone1(sample_rate, tone1_freq, tone1_ampl); Tone tone2(sample_rate, tone2_freq, tone2_ampl); for (int i = 0; i < 60 * sample_rate; ++i) { ASSERT_NEAR(signal.get_sample(), tone1.get_sample() + tone2.get_sample(), 0.000001); } } TEST_F(GenerateSignalTest, get_sample_only_noise) { GenerateSignal signal(sample_rate); signal.add_noise(Noise::NORMAL); float max_value = 0; float min_value = 0; for (int i = 0; i < 600 * sample_rate; ++i) { float sample = signal.get_sample(); min_value = min(sample, min_value); max_value = max(sample, max_value); } EXPECT_GT(max_value, 0); EXPECT_LT(max_value, Noise::NORMAL); EXPECT_LT(min_value, 0); EXPECT_GT(min_value, -1.0 * Noise::NORMAL); } TEST_F(GenerateSignalTest, get_sample_two_tones_and_noise) { float tone1_freq = 123.34; float tone2_freq = 231.43; float tone1_ampl = Tone::NORMAL; float tone2_ampl = Tone::WEAK; GenerateSignal signal(sample_rate); signal.add_tone(tone1_freq, tone1_ampl); signal.add_tone(tone2_freq, tone2_ampl); signal.add_noise(Noise::NORMAL); Tone tone1(sample_rate, tone1_freq, tone1_ampl); Tone tone2(sample_rate, tone2_freq, tone2_ampl); float max_value = 0; float min_value = 0; for (int i = 0; i < 60 * sample_rate; ++i) { float sample_noise = signal.get_sample() - tone1.get_sample() - tone2.get_sample(); min_value = min(sample_noise, min_value); max_value = max(sample_noise, max_value); } EXPECT_GT(max_value, 0); EXPECT_LT(max_value, Noise::NORMAL); EXPECT_LT(min_value, 0); EXPECT_GT(min_value, -1.0 * Noise::NORMAL); } ================================================ FILE: src/test_helper_functions.cpp ================================================ /* * test_output.cpp * * Copyright (C) 2023 charlie-foxtrot * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ #include "test_base_class.h" #include "helper_functions.h" using namespace std; class HelperFunctionsTest : public TestBaseClass { protected: void SetUp(void) { TestBaseClass::SetUp(); } void create_file(const string& filepath) { fclose(fopen(filepath.c_str(), "wb")); EXPECT_TRUE(file_exists(filepath)); } }; TEST_F(HelperFunctionsTest, dir_exists_true) { EXPECT_TRUE(dir_exists(temp_dir)); } TEST_F(HelperFunctionsTest, dir_exists_false) { EXPECT_FALSE(dir_exists("/not/a/real/dir")); } TEST_F(HelperFunctionsTest, dir_exists_not_dir) { string file_in_dir = temp_dir + "/some_file"; create_file(file_in_dir); EXPECT_FALSE(dir_exists(file_in_dir)); } TEST_F(HelperFunctionsTest, file_exists_true) { string file_in_dir = temp_dir + "/some_file"; create_file(file_in_dir); EXPECT_TRUE(file_exists(file_in_dir)); } TEST_F(HelperFunctionsTest, file_exists_false) { EXPECT_FALSE(file_exists(temp_dir + "/nothing")); } TEST_F(HelperFunctionsTest, file_exists_not_file) { EXPECT_FALSE(file_exists(temp_dir)); EXPECT_TRUE(dir_exists(temp_dir)); } TEST_F(HelperFunctionsTest, make_dir_normal) { const string dir_path = temp_dir + "/a"; EXPECT_FALSE(dir_exists(dir_path)); EXPECT_TRUE(make_dir(dir_path)); EXPECT_TRUE(dir_exists(dir_path)); } TEST_F(HelperFunctionsTest, make_dir_exists) { EXPECT_TRUE(dir_exists(temp_dir)); EXPECT_TRUE(make_dir(temp_dir)); EXPECT_TRUE(dir_exists(temp_dir)); } TEST_F(HelperFunctionsTest, make_dir_empty) { EXPECT_FALSE(make_dir("")); } TEST_F(HelperFunctionsTest, make_dir_fail) { EXPECT_FALSE(make_dir("/this/path/does/not/exist")); } TEST_F(HelperFunctionsTest, make_dir_file_in_the_way) { const string file_path = temp_dir + "/some_file"; create_file(file_path); EXPECT_FALSE(make_dir(file_path)); } TEST_F(HelperFunctionsTest, make_subdirs_exists) { EXPECT_TRUE(dir_exists(temp_dir)); EXPECT_TRUE(make_subdirs(temp_dir, "")); EXPECT_TRUE(dir_exists(temp_dir)); } TEST_F(HelperFunctionsTest, make_subdirs_one_subdir) { const string path = "bob"; EXPECT_FALSE(dir_exists(temp_dir + "/" + path)); EXPECT_TRUE(make_subdirs(temp_dir, path)); EXPECT_TRUE(dir_exists(temp_dir + "/" + path)); } TEST_F(HelperFunctionsTest, make_subdirs_multiple_subdir) { const string path = "bob/joe/sam"; EXPECT_FALSE(dir_exists(temp_dir + "/" + path)); EXPECT_TRUE(make_subdirs(temp_dir, path)); EXPECT_TRUE(dir_exists(temp_dir + "/" + path)); } TEST_F(HelperFunctionsTest, make_subdirs_file_in_the_way) { const string file_in_dir = temp_dir + "/some_file"; create_file(file_in_dir); EXPECT_TRUE(file_exists(file_in_dir)); EXPECT_FALSE(make_subdirs(temp_dir, "some_file/some_dir")); EXPECT_FALSE(dir_exists(file_in_dir)); EXPECT_TRUE(file_exists(file_in_dir)); } TEST_F(HelperFunctionsTest, make_subdirs_create_base) { EXPECT_FALSE(dir_exists(temp_dir + "/base_dir/a")); EXPECT_TRUE(make_subdirs(temp_dir + "/base_dir", "a")); EXPECT_TRUE(dir_exists(temp_dir + "/base_dir/a")); } TEST_F(HelperFunctionsTest, make_subdirs_extra_slashes) { EXPECT_FALSE(dir_exists(temp_dir + "/a/b/c/d")); EXPECT_TRUE(make_subdirs(temp_dir, "///a/b////c///d")); EXPECT_TRUE(dir_exists(temp_dir + "/a/b/c/d")); } TEST_F(HelperFunctionsTest, make_dated_subdirs_normal) { struct tm time_struct; strptime("2010-3-7", "%Y-%m-%d", &time_struct); const string dir_path = temp_dir + "/2010/03/07"; EXPECT_FALSE(dir_exists(dir_path)); EXPECT_EQ(make_dated_subdirs(temp_dir, &time_struct), dir_path); EXPECT_TRUE(dir_exists(dir_path)); } TEST_F(HelperFunctionsTest, make_dated_subdirs_fail) { struct tm time_struct; strptime("2010-3-7", "%Y-%m-%d", &time_struct); EXPECT_EQ(make_dated_subdirs("/invalid/base/dir", &time_struct), ""); } TEST_F(HelperFunctionsTest, make_dated_subdirs_some_exist) { struct tm time_struct; const string dir_through_month = temp_dir + "/2010/03/"; strptime("2010-3-7", "%Y-%m-%d", &time_struct); EXPECT_EQ(make_dated_subdirs(temp_dir, &time_struct), dir_through_month + "07"); EXPECT_TRUE(dir_exists(dir_through_month)); EXPECT_FALSE(dir_exists(dir_through_month + "08")); strptime("2010-3-8", "%Y-%m-%d", &time_struct); EXPECT_EQ(make_dated_subdirs(temp_dir, &time_struct), dir_through_month + "08"); EXPECT_TRUE(dir_exists(dir_through_month + "08")); } ================================================ FILE: src/test_squelch.cpp ================================================ /* * test_squelch.cpp * * Copyright (C) 2023 charlie-foxtrot * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ #include "generate_signal.h" #include "test_base_class.h" #include "squelch.h" using namespace std; class SquelchTest : public TestBaseClass { protected: void SetUp(void) { TestBaseClass::SetUp(); raw_no_signal_sample = 0.05; raw_signal_sample = 0.75; } void TearDown(void) { TestBaseClass::TearDown(); } // send through "no signal" samples to get noise floor down void send_samples_for_noise_floor(Squelch& squelch) { while (squelch.noise_level() > 1.01 * raw_no_signal_sample) { squelch.process_raw_sample(raw_no_signal_sample); } ASSERT_LE(squelch.noise_level(), 1.01 * raw_no_signal_sample); ASSERT_GT(raw_signal_sample, squelch.squelch_level()); } float raw_no_signal_sample; float raw_signal_sample; }; TEST_F(SquelchTest, default_object) { Squelch squelch; EXPECT_EQ(squelch.open_count(), 0); } TEST_F(SquelchTest, noise_floor) { Squelch squelch; // noise floor starts high EXPECT_GT(squelch.noise_level(), 10.0 * raw_no_signal_sample); // noise floor drifts down towards (but never at) the incoming raw sample level float last_noise_level, this_noise_level; this_noise_level = squelch.noise_level(); do { last_noise_level = this_noise_level; // not all samples update noise floor for (int j = 0; j < 25; ++j) { squelch.process_raw_sample(raw_no_signal_sample); } this_noise_level = squelch.noise_level(); ASSERT_LE(this_noise_level, last_noise_level); } while (this_noise_level != last_noise_level); // noise floor ends up close to the incoming level EXPECT_LT(squelch.noise_level(), 1.01 * raw_no_signal_sample); } TEST_F(SquelchTest, normal_operation) { Squelch squelch; // send through "no signal" samples to get noise floor down send_samples_for_noise_floor(squelch); ASSERT_LE(squelch.noise_level(), 1.01 * raw_no_signal_sample); ASSERT_GT(raw_signal_sample, squelch.squelch_level()); // send through "signal" samples and squelch should open shortly for (int i = 0; i < 500 && !squelch.is_open(); ++i) { squelch.process_raw_sample(raw_signal_sample); } ASSERT_TRUE(squelch.is_open()); ASSERT_TRUE(squelch.should_process_audio()); // send through a bunch more "signal" values and squelch stays open for (int i = 0; i < 1000; ++i) { squelch.process_raw_sample(raw_signal_sample); } ASSERT_TRUE(squelch.is_open()); ASSERT_TRUE(squelch.should_process_audio()); // send through "no signal" samples and squelch should close quickly for (int i = 0; i < 100 && squelch.is_open(); ++i) { squelch.process_raw_sample(raw_no_signal_sample); } ASSERT_FALSE(squelch.is_open()); ASSERT_FALSE(squelch.should_process_audio()); } TEST_F(SquelchTest, dead_spot) { Squelch squelch; send_samples_for_noise_floor(squelch); // send through "signal" samples and squelch should open shortly for (int i = 0; i < 500 && !squelch.is_open(); ++i) { squelch.process_raw_sample(raw_signal_sample); } ASSERT_TRUE(squelch.is_open()); ASSERT_TRUE(squelch.should_process_audio()); // send through a bunch more "signal" values and squelch stays open for (int i = 0; i < 1000; ++i) { squelch.process_raw_sample(raw_signal_sample); } ASSERT_TRUE(squelch.is_open()); ASSERT_TRUE(squelch.should_process_audio()); // send through a dead spot of "no signal" and squelch should stay open for (int i = 0; i < 50; ++i) { squelch.process_raw_sample(raw_no_signal_sample); ASSERT_TRUE(squelch.is_open()); ASSERT_TRUE(squelch.should_process_audio()); } // send go back to "signal" samples and squelch stays open for (int i = 0; i < 1000; ++i) { squelch.process_raw_sample(raw_signal_sample); ASSERT_TRUE(squelch.is_open()); ASSERT_TRUE(squelch.should_process_audio()); } } TEST_F(SquelchTest, should_process_audio) { Squelch squelch; send_samples_for_noise_floor(squelch); // should_process_audio is true as soon as squelch opens for (int i = 0; i < 500 && !squelch.is_open(); ++i) { ASSERT_FALSE(squelch.should_process_audio()); squelch.process_raw_sample(raw_signal_sample); } ASSERT_TRUE(squelch.is_open()); ASSERT_TRUE(squelch.should_process_audio()); // and stays true until fully closed for (int i = 0; i < 100 && squelch.is_open(); ++i) { ASSERT_TRUE(squelch.should_process_audio()); squelch.process_raw_sample(raw_no_signal_sample); } ASSERT_FALSE(squelch.is_open()); ASSERT_FALSE(squelch.should_process_audio()); } TEST_F(SquelchTest, good_ctcss) { float tone = CTCSS::standard_tones[5]; float sample_rate = 8000; Squelch squelch; squelch.set_ctcss_freq(tone, sample_rate); send_samples_for_noise_floor(squelch); GenerateSignal signal_with_tone(sample_rate); signal_with_tone.add_tone(tone, Tone::NORMAL); // send through "signal" samples until its time to process audio for (int i = 0; i < 500 && !squelch.should_process_audio(); ++i) { squelch.process_raw_sample(raw_signal_sample); } ASSERT_FALSE(squelch.is_open()); ASSERT_TRUE(squelch.should_process_audio()); // process audio samples and "signal" samples until squelch is open for (int i = 0; i < 500 && !squelch.is_open(); ++i) { squelch.process_audio_sample(signal_with_tone.get_sample()); squelch.process_raw_sample(raw_signal_sample); } ASSERT_TRUE(squelch.is_open()); ASSERT_TRUE(squelch.should_process_audio()); // run through a lot more to ensure squelch stays open for (int i = 0; i < 100000; ++i) { squelch.process_audio_sample(signal_with_tone.get_sample()); squelch.process_raw_sample(raw_signal_sample); ASSERT_TRUE(squelch.is_open()); ASSERT_TRUE(squelch.should_process_audio()); } EXPECT_GT(squelch.ctcss_count(), 0); EXPECT_EQ(squelch.no_ctcss_count(), 0); } TEST_F(SquelchTest, wrong_ctcss) { float actual_tone = CTCSS::standard_tones[0]; float expected_tone = CTCSS::standard_tones[7]; float sample_rate = 8000; Squelch squelch; squelch.set_ctcss_freq(expected_tone, sample_rate); send_samples_for_noise_floor(squelch); GenerateSignal signal_with_tone(sample_rate); signal_with_tone.add_tone(actual_tone, Tone::NORMAL); // send through "signal" samples until its time to process audio for (int i = 0; i < 500 && !squelch.should_process_audio(); ++i) { squelch.process_raw_sample(raw_signal_sample); } ASSERT_TRUE(squelch.should_process_audio()); ASSERT_FALSE(squelch.is_open()); // process lots of audio samples and "signal" samples and squelch never opens for (int i = 0; i < 100000; ++i) { squelch.process_audio_sample(signal_with_tone.get_sample()); squelch.process_raw_sample(raw_signal_sample); ASSERT_TRUE(squelch.should_process_audio()); ASSERT_FALSE(squelch.is_open()); } EXPECT_EQ(squelch.ctcss_count(), 0); EXPECT_GT(squelch.no_ctcss_count(), 0); } TEST_F(SquelchTest, close_ctcss) { float actual_tone = CTCSS::standard_tones[5]; float expected_tone = CTCSS::standard_tones[7]; float sample_rate = 8000; Squelch squelch; squelch.set_ctcss_freq(expected_tone, sample_rate); send_samples_for_noise_floor(squelch); GenerateSignal signal_with_tone(sample_rate); signal_with_tone.add_tone(actual_tone, Tone::NORMAL); // send through "signal" samples until its time to process audio for (int i = 0; i < 500 && !squelch.should_process_audio(); ++i) { squelch.process_raw_sample(raw_signal_sample); } ASSERT_TRUE(squelch.should_process_audio()); ASSERT_FALSE(squelch.is_open()); // process of audio samples and "signal" samples until squelch opens for (int i = 0; i < 500 && !squelch.is_open(); ++i) { squelch.process_audio_sample(signal_with_tone.get_sample()); squelch.process_raw_sample(raw_signal_sample); ASSERT_TRUE(squelch.should_process_audio()); } ASSERT_TRUE(squelch.is_open()); // keep processing samples until squelch closes again for (int i = 0; i < 3000 && squelch.is_open(); ++i) { squelch.process_audio_sample(signal_with_tone.get_sample()); squelch.process_raw_sample(raw_signal_sample); ASSERT_TRUE(squelch.should_process_audio()); } ASSERT_FALSE(squelch.is_open()); // process lots of audio samples and "signal" samples and squelch stays closed for (int i = 0; i < 100000; ++i) { squelch.process_audio_sample(signal_with_tone.get_sample()); squelch.process_raw_sample(raw_signal_sample); ASSERT_TRUE(squelch.should_process_audio()); ASSERT_FALSE(squelch.is_open()); } EXPECT_EQ(squelch.ctcss_count(), 0); EXPECT_GT(squelch.no_ctcss_count(), 0); } ================================================ FILE: src/udp_stream.cpp ================================================ /* * udp_stream.cpp * * Copyright (C) 2024 charlie-foxtrot * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ #include // strerror() #include // LOG_INFO / LOG_ERR #include // close() #include // assert() #include // inet_aton() #include // getaddrinfo() #include "rtl_airband.h" bool udp_stream_init(udp_stream_data* sdata, mix_modes mode, size_t len) { // pre-allocate the stereo buffer if (mode == MM_STEREO) { sdata->stereo_buffer_len = len * 2; sdata->stereo_buffer = (float*)XCALLOC(sdata->stereo_buffer_len, sizeof(float)); } else { sdata->stereo_buffer_len = 0; sdata->stereo_buffer = NULL; } sdata->send_socket = -1; sdata->dest_sockaddr_len = 0; // lookup address / port struct addrinfo hints, *result, *rptr; memset(&hints, 0, sizeof(struct addrinfo)); hints.ai_family = AF_UNSPEC; hints.ai_socktype = SOCK_DGRAM; hints.ai_flags = 0; hints.ai_protocol = 0; int error = getaddrinfo(sdata->dest_address, sdata->dest_port, &hints, &result); if (error) { log(LOG_ERR, "udp_stream: could not resolve %s:%s - %s\n", sdata->dest_address, sdata->dest_port, gai_strerror(error)); return false; } // check each result and try to create a connection for (rptr = result; rptr != NULL; rptr = rptr->ai_next) { sdata->send_socket = socket(rptr->ai_family, rptr->ai_socktype, rptr->ai_protocol); if (sdata->send_socket == -1) { log(LOG_ERR, "udp_stream: socket failed: %s\n", strerror(errno)); continue; } if (connect(sdata->send_socket, rptr->ai_addr, rptr->ai_addrlen) == -1) { log(LOG_INFO, "udp_stream: connect to %s:%s failed: %s\n", sdata->dest_address, sdata->dest_port, strerror(errno)); close(sdata->send_socket); sdata->send_socket = -1; continue; } sdata->dest_sockaddr = *rptr->ai_addr; sdata->dest_sockaddr_len = rptr->ai_addrlen; break; } freeaddrinfo(result); // error if no valid socket if (sdata->send_socket == -1) { log(LOG_ERR, "udp_stream: could not set up UDP socket to %s:%s - all addresses failed\n", sdata->dest_address, sdata->dest_port); return false; } log(LOG_INFO, "udp_stream: sending %s 32-bit float at %d Hz to %s:%s\n", mode == MM_MONO ? "Mono" : "Stereo", WAVE_RATE, sdata->dest_address, sdata->dest_port); return true; } void udp_stream_write(udp_stream_data* sdata, const float* data, size_t len) { if (sdata->send_socket != -1) { // Send without blocking or checking for success sendto(sdata->send_socket, data, len, MSG_DONTWAIT | MSG_NOSIGNAL, &sdata->dest_sockaddr, sdata->dest_sockaddr_len); } } void udp_stream_write(udp_stream_data* sdata, const float* data_left, const float* data_right, size_t len) { if (sdata->send_socket != -1) { assert(len * 2 <= sdata->stereo_buffer_len); for (size_t i = 0; i < len; ++i) { sdata->stereo_buffer[2 * i] = data_left[i]; sdata->stereo_buffer[2 * i + 1] = data_right[i]; } udp_stream_write(sdata, sdata->stereo_buffer, len * 2); } } void udp_stream_shutdown(udp_stream_data* sdata) { if (sdata->send_socket != -1) { close(sdata->send_socket); } } ================================================ FILE: src/util.cpp ================================================ /* * util.cpp * Miscellaneous routines * * Copyright (c) 2015-2021 Tomasz Lemiech * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ #include #include #include // uint32_t #include #include #include #include #include #include #include #include "config.h" #include "logging.h" #include "rtl_airband.h" int atomic_inc(volatile int* pv) { return __sync_fetch_and_add(pv, 1); } int atomic_dec(volatile int* pv) { return __sync_fetch_and_sub(pv, 1); } int atomic_get(volatile int* pv) { return __sync_fetch_and_add(pv, 0); } void tag_queue_put(device_t* dev, int freq, struct timeval tv) { pthread_mutex_lock(&dev->tag_queue_lock); dev->tq_head++; dev->tq_head %= TAG_QUEUE_LEN; if (dev->tq_head == dev->tq_tail) { log(LOG_WARNING, "tag_queue_put: queue overrun\n"); dev->tq_tail++; } dev->tag_queue[dev->tq_head].freq = freq; memcpy(&dev->tag_queue[dev->tq_head].tv, &tv, sizeof(struct timeval)); pthread_mutex_unlock(&dev->tag_queue_lock); } void tag_queue_get(device_t* dev, struct freq_tag* tag) { int i; if (!tag) return; pthread_mutex_lock(&dev->tag_queue_lock); if (dev->tq_head == dev->tq_tail) { /* empty queue */ tag->freq = -1; } else { // read queue entry at pos tq_tail+1 without dequeueing it i = dev->tq_tail + 1; i %= TAG_QUEUE_LEN; tag->freq = dev->tag_queue[i].freq; memcpy(&tag->tv, &dev->tag_queue[i].tv, sizeof(struct timeval)); } pthread_mutex_unlock(&dev->tag_queue_lock); } void tag_queue_advance(device_t* dev) { pthread_mutex_lock(&dev->tag_queue_lock); dev->tq_tail++; dev->tq_tail %= TAG_QUEUE_LEN; pthread_mutex_unlock(&dev->tag_queue_lock); } void* xcalloc(size_t nmemb, size_t size, const char* file, const int line, const char* func) { void* ptr = calloc(nmemb, size); if (ptr == NULL) { log(LOG_ERR, "%s:%d: %s(): calloc(%zu, %zu) failed: %s\n", file, line, func, nmemb, size, strerror(errno)); error(); } return ptr; } void* xrealloc(void* ptr, size_t size, const char* file, const int line, const char* func) { ptr = realloc(ptr, size); if (ptr == NULL) { log(LOG_ERR, "%s:%d: %s(): realloc(%zu) failed: %s\n", file, line, func, size, strerror(errno)); error(); } return ptr; } static float sin_lut[257], cos_lut[257]; void sincosf_lut_init() { for (uint32_t i = 0; i < 256; i++) SINCOSF(2.0F * M_PI * (float)i / 256.0f, sin_lut + i, cos_lut + i); sin_lut[256] = sin_lut[0]; cos_lut[256] = cos_lut[0]; } // phi range must be (0..1), rescaled to 0x0-0xFFFFFF void sincosf_lut(uint32_t phi, float* sine, float* cosine) { float v1, v2, fract; uint32_t idx; // get LUT index idx = phi >> 16; // cast fixed point fraction to float fract = (float)(phi & 0xffff) / 65536.0f; // get two adjacent values from LUT and interpolate v1 = sin_lut[idx]; v2 = sin_lut[idx + 1]; *sine = v1 + (v2 - v1) * fract; v1 = cos_lut[idx]; v2 = cos_lut[idx + 1]; *cosine = v1 + (v2 - v1) * fract; } /* librtlsdr-keenerd, (c) Kyle Keen */ double atofs(char* s) { char last; int len; double suff = 1.0; len = strlen(s); last = s[len - 1]; s[len - 1] = '\0'; switch (last) { case 'g': case 'G': suff *= 1e3; [[fallthrough]]; case 'm': case 'M': suff *= 1e3; [[fallthrough]]; case 'k': case 'K': suff *= 1e3; suff *= atof(s); s[len - 1] = last; return suff; } s[len - 1] = last; return atof(s); } double delta_sec(const timeval* start, const timeval* stop) { timeval delta; timersub(stop, start, &delta); return delta.tv_sec + delta.tv_usec / 1000000.0; } // level to/from dBFS conversion assumes level is nomalized to 1 and is based on: // https://kluedo.ub.uni-kl.de/frontdoor/deliver/index/docId/4293/file/exact_fft_measurements.pdf // // expanded form: // 20.0f * log10f(level / fft_size) + 7.54f + 10.0f * log10f(fft_size/2) - 2.38f const float& dBFS_offset(void) { static const float offset = 7.54f + 10.0f * log10f(fft_size / 2) - 2.38f; return offset; } float dBFS_to_level(const float& dBFS) { return pow(10.0, (dBFS - dBFS_offset()) / 20.0f) * fft_size; } float level_to_dBFS(const float& level) { return std::min(0.0f, 20.0f * log10f(level / fft_size) + dBFS_offset()); }