Repository: szpajder/RTLSDR-Airband
Branch: main
Commit: f8a17d7f0e5a
Files: 109
Total size: 919.5 KB
Directory structure:
gitextract_bz4e8om4/
├── .clang-format
├── .devcontainer/
│ ├── Dockerfile
│ ├── devcontainer.json
│ └── shell
├── .github/
│ ├── ISSUE_TEMPLATE/
│ │ ├── bug_report.md
│ │ ├── config.yml
│ │ └── feature_request.md
│ ├── install_dependencies
│ ├── platform_build
│ └── workflows/
│ ├── build_docker_containers.yml
│ ├── ci_build.yml
│ ├── code_formatting.yml
│ ├── platform_build.yml
│ └── version_bump.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .vscode/
│ ├── c_cpp_properties.json
│ ├── launch.json
│ └── settings.json
├── CMakeLists.txt
├── Dockerfile
├── LICENSE
├── NEWS.md
├── README.md
├── config/
│ ├── basic_multichannel.conf
│ ├── basic_scanning.conf
│ ├── big_mixer.conf
│ ├── mixers.conf
│ ├── noaa.conf
│ └── two_dongles_multiple_outputs.conf
├── init.d/
│ ├── rtl_airband-debian.sh
│ ├── rtl_airband-freebsd.sh
│ ├── rtl_airband-gentoo.sh
│ └── rtl_airband.service
├── scripts/
│ ├── find_version
│ └── reformat_code
└── src/
├── .gitignore
├── CMakeLists.txt
├── CMakeModules/
│ ├── FindBCM_VC.cmake
│ ├── FindLame.cmake
│ ├── FindMiriSDR.cmake
│ ├── FindRTLSDR.cmake
│ └── version.cmake
├── config.cpp
├── config.h.in
├── ctcss.cpp
├── ctcss.h
├── filters.cpp
├── filters.h
├── generate_signal.cpp
├── generate_signal.h
├── hello_fft/
│ ├── CMakeLists.txt
│ ├── gpu_fft.c
│ ├── gpu_fft.h
│ ├── gpu_fft.txt
│ ├── gpu_fft_base.c
│ ├── gpu_fft_shaders.c
│ ├── gpu_fft_trans.h
│ ├── gpu_fft_twiddles.c
│ ├── hex/
│ │ ├── shader_1024k.hex
│ │ ├── shader_128k.hex
│ │ ├── shader_16k.hex
│ │ ├── shader_1k.hex
│ │ ├── shader_2048k.hex
│ │ ├── shader_256.hex
│ │ ├── shader_256k.hex
│ │ ├── shader_2k.hex
│ │ ├── shader_32k.hex
│ │ ├── shader_4k.hex
│ │ ├── shader_512.hex
│ │ ├── shader_512k.hex
│ │ ├── shader_64k.hex
│ │ ├── shader_8k.hex
│ │ └── shader_trans.hex
│ ├── mailbox.c
│ └── mailbox.h
├── helper_functions.cpp
├── helper_functions.h
├── input-common.cpp
├── input-common.h
├── input-file.cpp
├── input-file.h
├── input-helpers.cpp
├── input-helpers.h
├── input-mirisdr.cpp
├── input-mirisdr.h
├── input-rtlsdr.cpp
├── input-rtlsdr.h
├── input-soapysdr.cpp
├── input-soapysdr.h
├── logging.cpp
├── logging.h
├── mixer.cpp
├── output.cpp
├── pulse.cpp
├── rtl_airband.cpp
├── rtl_airband.h
├── rtl_airband_neon.s
├── squelch.cpp
├── squelch.h
├── test_base_class.cpp
├── test_base_class.h
├── test_ctcss.cpp
├── test_filters.cpp
├── test_generate_signal.cpp
├── test_helper_functions.cpp
├── test_squelch.cpp
├── udp_stream.cpp
└── util.cpp
================================================
FILE CONTENTS
================================================
================================================
FILE: .clang-format
================================================
---
BasedOnStyle: Chromium
IndentWidth: 4
ObjCBlockIndentWidth: 4
ColumnLimit: 200
================================================
FILE: .devcontainer/Dockerfile
================================================
FROM ubuntu:latest
RUN sed -i 's/^# \(.*export LS_OPTIONS.*$\)/\1/g' ~/.bashrc && \
sed -i 's/^# \(.*alias ll.*$\)/\1/g' ~/.bashrc
RUN ln -fs /usr/share/zoneinfo/America/Los_Angeles /etc/localtime
RUN DEBIAN_FRONTEND=noninteractive \
apt-get update && \
apt-get install -y \
tzdata\
git \
sudo \
gdb \
clang-format-14 \
python3-pip \
pre-commit \
vim
WORKDIR /app
COPY .github/install_dependencies /app/
RUN /app/install_dependencies
RUN apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
================================================
FILE: .devcontainer/devcontainer.json
================================================
// For format details, see https://aka.ms/devcontainer.json. For config options, see the README at:
// https://github.com/microsoft/vscode-dev-containers/tree/v0.177.0/containers/docker-existing-dockerfile
{
"name": "Existing Dockerfile",
// Sets the run context to one level up instead of the .devcontainer folder.
"context": "..",
"dockerFile": "Dockerfile",
"updateContentCommand" : "apt-get install git",
"postCreateCommand" : "cmake -B /app/build -DCMAKE_BUILD_TYPE=Debug -DNFM=TRUE -DBUILD_UNITTESTS=true ; pre-commit install",
// vs code extensions to install in the dev container
"customizations": {
"vscode": {
"extensions": [
"ms-vscode.cpptools",
"ms-vscode.cmake-tools",
"ms-vscode.cpptools-extension-pack",
"twxs.cmake",
"streetsidesoftware.code-spell-checker",
"ms-azuretools.vscode-docker",
"GitHub.vscode-github-actions",
"xaver.clang-format"
]
}
},
// Use 'forwardPorts' to make a list of ports inside the container available locally.
// "forwardPorts": [],
"runArgs": [ "--cap-add=SYS_PTRACE", "--security-opt", "seccomp=unconfined" ]
}
================================================
FILE: .devcontainer/shell
================================================
#!/bin/bash -e
cd `dirname $0`/../
# build container
docker build -t rtl_airband-dev -f .devcontainer/Dockerfile .
# run bash in container
docker run --rm -v $(pwd):/app/ -it --entrypoint bash rtl_airband-dev
================================================
FILE: .github/ISSUE_TEMPLATE/bug_report.md
================================================
---
name: Bug report
about: Report a bug you found when using RTLSDR-Airband
title: "[BUG]"
labels: ''
assignees: ''
---
**Describe your environment**
- RTLSDR-Airband version you are using (stable release number or branch/commit):
- `make` options used to build the program:
- Hardware platform (eg. x86_64, Raspberry Pi v4):
- Operating system name and version:
**What happened?**
**What you expected to happen?**
**Steps to Reproduce**
**Additional context**
Add any other relevant information about the problem here.
**Your rtl_airband.conf file**
Remove passwords, server addresses and other private information.
================================================
FILE: .github/ISSUE_TEMPLATE/config.yml
================================================
blank_issues_enabled: false
contact_links:
- name: Questions & Help
url: https://github.com/rtl-airband/RTLSDR-Airband/discussions/categories/q-a
about: Please ask and answer questions here.
================================================
FILE: .github/ISSUE_TEMPLATE/feature_request.md
================================================
---
name: Feature request
about: Suggest an idea for this project
title: "[FEATURE]"
labels: ''
assignees: ''
---
**Is your feature request related to a problem? If so, please describe.**
A description of what the problem is. Ex. I'm always frustrated when [...]
**Describe the solution you'd like**
**Describe alternative solutions or features you've considered**
**Additional context**
Add any other relevant information about the feature request here.
================================================
FILE: .github/install_dependencies
================================================
#!/bin/bash
unameOut="$(uname -s)"
echo "Running on ${unameOut} as ${USER}"
case "${unameOut}" in
Linux*)
echo "Installing Linux dependencies"
sudo apt-get update -y
sudo apt-get install -y \
build-essential \
cmake \
libmp3lame-dev \
libshout3-dev \
libconfig++-dev \
libfftw3-dev \
librtlsdr-dev \
libsoapysdr-dev \
libpulse-dev
(
git clone https://github.com/f4exb/libmirisdr-4
cd libmirisdr-4
mkdir build
cd build
cmake ../
sudo make install
sudo ldconfig
)
;;
Darwin*)
echo "Installing MacOS dependencies"
# detect when running in github workflow and skip `brew update` (relay on fresh OS image)
if [ -n "${GITHUB_ACTION}" ] ; then
echo "running in GitHub Workflow, skipping brew update"
export HOMEBREW_NO_AUTO_UPDATE=1
export HOMEBREW_NO_INSTALL_UPGRADE=1
export HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK=1
echo "running ${ImageOS} vsersion ${ImageVersion}"
else
brew update
fi
brew install \
lame \
libshout \
libconfig \
fftw \
librtlsdr \
soapysdr \
pulseaudio \
pkg-config
;;
*)
echo "Error: Machine not supported"
exit -1
esac
================================================
FILE: .github/platform_build
================================================
#!/bin/bash -e
platform="${1}"
if [ -z "${platform}" ]; then
echo "Error: platform not set"
exit -1
fi
echo "running build for ${platform} on $(source /etc/os-release ; echo ${VERSION})"
case "${platform}" in
rpi3b)
CMAKE_ARGS="-DPLATFORM=rpiv2 -DCMAKE_BUILD_TYPE=Release -DNFM=TRUE -DBUILD_UNITTESTS=TRUE"
;;
ubuntu-22.04-arm)
CMAKE_ARGS="-DPLATFORM=native -DCMAKE_BUILD_TYPE=Release -DNFM=TRUE -DBUILD_UNITTESTS=TRUE"
;;
*)
echo "Error: Platform '${platform}' not supported"
exit -1
esac
# make a build dir
rm -rf build || true ; mkdir build
cd build
# configure and build
cmake ${CMAKE_ARGS} ../
VERBOSE=1 make -j
# run unit tests
src/unittests
# run rtl_airband to get version string and exit
src/rtl_airband -v
================================================
FILE: .github/workflows/build_docker_containers.yml
================================================
name: Build and Publish Containers
on:
push:
branches: [main, unstable]
tags: ['v*']
pull_request:
workflow_dispatch:
schedule:
- cron: '29 13 * * *' # run daily
jobs:
build:
strategy:
fail-fast: false
matrix:
include:
- os: ubuntu-latest
platform: linux/amd64
- os: ubuntu-latest
platform: linux/386
- os: ubuntu-24.04-arm
platform: linux/arm64
- os: ubuntu-24.04-arm
platform: linux/arm/v6
- os: ubuntu-24.04-arm
platform: linux/arm/v7
runs-on: ${{ matrix.os }}
permissions:
contents: read
packages: write
attestations: write
id-token: write
steps:
- name: Runner Info
run: printenv | sort
- name: Prepare
id: prep
run: |
echo "platform_pair=${platform//\//-}" >> $GITHUB_OUTPUT
echo "repo_lowercase=$(echo '${{ github.repository }}' | tr '[:upper:]' '[:lower:]')" >> $GITHUB_OUTPUT
env:
platform: ${{ matrix.platform }}
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: '0' # need full history to get version from git tag
- name: Container metadata
id: metadata
uses: docker/metadata-action@v5
with:
images: ghcr.io/${{ steps.prep.outputs.repo_lowercase }}
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and push by digest
id: build
uses: docker/build-push-action@v6
with:
platforms: ${{ matrix.platform }}
cache-from: type=gha,scope=build-${{ steps.prep.outputs.platform_pair }}
cache-to: type=gha,mode=max,scope=build-${{ steps.prep.outputs.platform_pair }}
context: .
outputs: type=image,name=ghcr.io/${{ steps.prep.outputs.repo_lowercase }},push-by-digest=true,name-canonical=true,push=true
- name: Export digest
run: |
mkdir -p /tmp/digests
digest="${{ steps.build.outputs.digest }}"
touch "/tmp/digests/${digest#sha256:}"
- name: Upload digest
uses: actions/upload-artifact@v4
with:
name: digests-${{ steps.prep.outputs.platform_pair }}
path: /tmp/digests/*
if-no-files-found: error
retention-days: 1
merge:
runs-on: ubuntu-latest
needs: build
permissions:
contents: read
packages: write
steps:
- name: Runner Info
run: printenv | sort
- name: Prepare
id: prep
run: |
echo "repo_lowercase=$(echo '${{ github.repository }}' | tr '[:upper:]' '[:lower:]')" >> $GITHUB_OUTPUT
- name: Download digests
uses: actions/download-artifact@v4
with:
path: /tmp/digests
pattern: digests-*
merge-multiple: true
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Container metadata
id: metadata
uses: docker/metadata-action@v5
with:
images: ghcr.io/${{ steps.prep.outputs.repo_lowercase }}
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Create manifest list and push
working-directory: /tmp/digests
run: |
docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
$(printf 'ghcr.io/${{ steps.prep.outputs.repo_lowercase }}@sha256:%s ' *)
- name: Inspect image
run: |
docker buildx imagetools inspect ghcr.io/${{ steps.prep.outputs.repo_lowercase }}:${{ steps.metadata.outputs.version }}
================================================
FILE: .github/workflows/ci_build.yml
================================================
name: Run CI
on:
push:
branches: [main]
tags: ['v*']
pull_request:
workflow_dispatch:
schedule:
- cron: '39 13 * * *' # run daily
jobs:
ci_build:
strategy:
matrix:
os: [ ubuntu-22.04, macos-14, ubuntu-22.04-arm ]
runs-on: ${{ matrix.os }}
timeout-minutes: 35 # runtime across all OSs, runs can get queued
steps:
- name: Runner Info
run: printenv | sort
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: '0' # need full history to get version from git tag
- name: Install packaged dependencies
run: .github/install_dependencies
- name: Configure
run: |
cmake -B ${{github.workspace}}/build_Debug -DCMAKE_BUILD_TYPE=Debug -DBUILD_UNITTESTS=TRUE
cmake -B ${{github.workspace}}/build_Debug_NFM -DCMAKE_BUILD_TYPE=Debug -DNFM=TRUE -DBUILD_UNITTESTS=TRUE
cmake -B ${{github.workspace}}/build_Release -DCMAKE_BUILD_TYPE=Release -DBUILD_UNITTESTS=TRUE
cmake -B ${{github.workspace}}/build_Release_NFM -DCMAKE_BUILD_TYPE=Release -DNFM=TRUE -DBUILD_UNITTESTS=TRUE
- name: Build
run: |
VERBOSE=1 cmake --build ${{github.workspace}}/build_Debug -j4
VERBOSE=1 cmake --build ${{github.workspace}}/build_Debug_NFM -j4
VERBOSE=1 cmake --build ${{github.workspace}}/build_Release -j4
VERBOSE=1 cmake --build ${{github.workspace}}/build_Release_NFM -j4
- name: Unit Tests
run: |
${{github.workspace}}/build_Debug/src/unittests
${{github.workspace}}/build_Debug_NFM/src/unittests
${{github.workspace}}/build_Release/src/unittests
${{github.workspace}}/build_Release_NFM/src/unittests
- name: Install
run: sudo cmake --install ${{github.workspace}}/build_Release_NFM
- name: Test run
run: /usr/local/bin/rtl_airband -v
================================================
FILE: .github/workflows/code_formatting.yml
================================================
name: Code Formatting
on:
pull_request:
schedule:
- cron: '39 13 * * *' # run daily
jobs:
code_formatting:
runs-on: ubuntu-latest
steps:
- name: Runner Info
run: printenv | sort
- name: Checkout
uses: actions/checkout@v4
- name: Install Clang Format
run: sudo apt-get install clang-format-14
- name: Run Clang Format
run: |
./scripts/reformat_code
git diff --exit-code
================================================
FILE: .github/workflows/platform_build.yml
================================================
name: Platform Build
on:
push:
branches: [main]
tags: ['v*']
pull_request:
workflow_dispatch:
schedule:
- cron: '39 13 * * *' # run daily
jobs:
platform_build:
strategy:
matrix:
# os: [ rpi3b ]
os: [ ubuntu-22.04-arm ]
runs-on: ${{ matrix.os }}
timeout-minutes: 35 # runtime across all OSs, runs can get queued
steps:
- name: Runner Info
run: printenv | sort
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: '0' # need full history to get version from git tag
- name: Install packaged dependencies
run: .github/install_dependencies
- name: Configure Build and Test
run: .github/platform_build ${{ matrix.os }}
================================================
FILE: .github/workflows/version_bump.yml
================================================
name: Bump version
on:
pull_request:
types:
- closed
branches:
- main
jobs:
version_bump:
if: github.event.pull_request.merged == true
runs-on: ubuntu-22.04
permissions:
contents: write
actions: write
steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.merge_commit_sha }}
fetch-depth: '0'
- name: Bump version and push tag
id: tag
uses: anothrNick/github-tag-action@1.64.0
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
WITH_V: true
DEFAULT_BUMP: patch
- name: Create release for ${{ steps.tag.outputs.new_tag }}
if: steps.tag.outputs.part != 'patch'
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
tag: ${{ steps.tag.outputs.new_tag }}
run: |
gh release create "$tag" \
--repo="$GITHUB_REPOSITORY" \
--title="Version ${tag#v}" \
--generate-notes
- name: Run CI on ${{ steps.tag.outputs.new_tag }}
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
gh workflow run ci_build.yml --ref ${{ steps.tag.outputs.new_tag }}
- name: Run Platform Build ${{ steps.tag.outputs.new_tag }}
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
gh workflow run platform_build.yml --ref ${{ steps.tag.outputs.new_tag }}
- name: Build and Publish Containers for ${{ steps.tag.outputs.new_tag }}
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
gh workflow run build_docker_containers.yml --ref ${{ steps.tag.outputs.new_tag }}
================================================
FILE: .gitignore
================================================
build*/
.DS_Store
.cache
compile_commands.json
rtl_airband*.log
================================================
FILE: .pre-commit-config.yaml
================================================
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
hooks:
- id: check-yaml
- id: end-of-file-fixer
- id: trailing-whitespace
- id: check-shebang-scripts-are-executable
- repo: https://github.com/pre-commit/mirrors-clang-format
rev: v14.0.6
hooks:
- id: clang-format
files: src/.*\.cpp|src/.*\.h
================================================
FILE: .vscode/c_cpp_properties.json
================================================
{
"configurations": [
{
"name": "Linux",
"includePath": [
"${workspaceFolder}/**",
"${workspaceFolder}/build/_deps/googletest-src/googletest/include/",
"${workspaceFolder}/build/src/"
],
"defines": [],
"compilerPath": "/usr/bin/gcc",
"cStandard": "c17",
"cppStandard": "gnu++17",
"intelliSenseMode": "linux-gcc-arm64",
"configurationProvider": "ms-vscode.cmake-tools"
}
],
"version": 4
}
================================================
FILE: .vscode/launch.json
================================================
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "(gdb) Launch Unit Test",
"type": "cppdbg",
"request": "launch",
"program": "${workspaceFolder}/build/src/unittests",
"args": [],
"stopAtEntry": false,
"cwd": "${fileDirname}",
"environment": [],
"externalConsole": false,
"MIMode": "gdb",
"setupCommands": [
{
"description": "Enable pretty-printing for gdb",
"text": "-enable-pretty-printing",
"ignoreFailures": true
},
{
"description": "Set Disassembly Flavor to Intel",
"text": "-gdb-set disassembly-flavor intel",
"ignoreFailures": true
}
]
}
]
}
================================================
FILE: .vscode/settings.json
================================================
{
"editor.formatOnPaste": true,
"editor.formatOnSave": true,
"editor.formatOnType": true,
"editor.defaultFormatter": "xaver.clang-format",
"clang-format.executable": "clang-format-14"
}
================================================
FILE: CMakeLists.txt
================================================
cmake_minimum_required (VERSION 3.1...3.18 FATAL_ERROR)
project (RTLSDR-Airband CXX)
execute_process(COMMAND ${PROJECT_SOURCE_DIR}/scripts/find_version
OUTPUT_VARIABLE RTL_AIRBAND_VERSION
OUTPUT_STRIP_TRAILING_WHITESPACE
ERROR_VARIABLE RTL_AIRBAND_VERSION_ERROR
ERROR_STRIP_TRAILING_WHITESPACE)
string(COMPARE EQUAL "${RTL_AIRBAND_VERSION}" "" RTL_AIRBAND_VERSION_UNSET)
if(RTL_AIRBAND_VERSION_UNSET)
message(FATAL_ERROR "Failed to detect RTL_AIRBAND_VERSION - \"${RTL_AIRBAND_VERSION_ERROR}\"")
endif()
set (CMAKE_CXX_STANDARD 11)
set (CXX_STANDARD_REQUIRED ON)
set (CMAKE_CXX_EXTENSIONS OFF)
set (CMAKE_COMPILE_WARNING_AS_ERROR ON)
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
message(STATUS "Build type not specified: defaulting to Release")
endif(NOT CMAKE_BUILD_TYPE)
# TODO: flags to add: -Wfloat-equal -Wconversion -Wstrict-overflow=5 -Waggregate-return -Wpedantic -Wcast-align
# TODO: these could be added except for gtest: -Wswitch-enum -Wundef -Wswitch-default
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wshadow -Wdate-time -Wpointer-arith -Wwrite-strings -Wcast-qual -Wunreachable-code -Werror")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Og -DDEBUG")
if(DEBUG_SQUELCH)
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DDEBUG_SQUELCH")
endif()
add_subdirectory (src)
================================================
FILE: Dockerfile
================================================
# build container
FROM debian:bookworm-slim AS build
# install build dependencies
RUN apt-get update && \
apt-get upgrade -y && \
apt-get install -y --no-install-recommends \
build-essential \
cmake \
libmp3lame-dev \
libshout3-dev \
libconfig++-dev \
libfftw3-dev \
libsoapysdr-dev \
libpulse-dev \
\
git \
ca-certificates \
libusb-1.0-0-dev \
debhelper \
pkg-config \
&& \
apt-get clean && \
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
# set working dir for compiling dependencies
WORKDIR /build_dependencies
# compile / install rtl-sdr-blog version of rtl-sdr for v4 support
RUN git clone https://github.com/rtlsdrblog/rtl-sdr-blog && \
cd rtl-sdr-blog/ && \
dpkg-buildpackage -b --no-sign && \
cd .. && \
dpkg -i librtlsdr0_*.deb && \
dpkg -i librtlsdr-dev_*.deb && \
dpkg -i rtl-sdr_*.deb
# compile / install libmirisdr-4
RUN git clone https://github.com/f4exb/libmirisdr-4 && \
cd libmirisdr-4 && \
mkdir build && \
cd build && \
cmake ../ && \
VERBOSE=1 make install && \
ldconfig
# TODO: build anything from source?
# set working dir for project build
WORKDIR /rtl_airband_build
# copy in the rtl_airband source, coping in the full repo so find_version will be correct
COPY ./ .
# configure and build
# TODO: detect platforms
RUN cmake -B build_dir -DPLATFORM=generic -DCMAKE_BUILD_TYPE=Release -DNFM=TRUE -DBUILD_UNITTESTS=TRUE && \
VERBOSE=1 cmake --build build_dir -j4
# make sure unit tests pass
RUN ./build_dir/src/unittests
# application container
FROM debian:bookworm-slim
# install runtime dependencies
RUN apt-get update && \
apt-get upgrade -y && \
apt-get install -y --no-install-recommends \
tini \
libc6 \
libmp3lame0 \
libshout3 \
libconfig++9v5 \
libfftw3-single3 \
libsoapysdr0.8 \
libpulse0 \
libusb-1.0-0-dev \
ca-certificates \
&& \
apt-get clean && \
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
# install (from build container) rtl-sdr-blog version of rtl-sdr for v4 support
COPY --from=build /build_dependencies/librtlsdr0_*.deb /build_dependencies/librtlsdr-dev_*.deb /build_dependencies/rtl-sdr_*.deb /tmp/
RUN dpkg -i /tmp/librtlsdr0_*.deb && \
dpkg -i /tmp/librtlsdr-dev_*.deb && \
dpkg -i /tmp/rtl-sdr_*.deb && \
rm -rf /tmp/*.deb && \
echo '' | tee --append /etc/modprobe.d/rtl_sdr.conf && \
echo 'blacklist dvb_usb_rtl28xxun' | tee --append /etc/modprobe.d/rtl_sdr.conf && \
echo 'blacklist rtl2832' | tee --append /etc/modprobe.d/rtl_sdr.conf && \
echo 'blacklist rtl2830' | tee --append /etc/modprobe.d/rtl_sdr.conf
# copy (from build container) libmirisdr-4 library
COPY --from=build /usr/local/lib/libmirisdr.so.4 /usr/local/lib/
# Copy rtl_airband from the build container
COPY LICENSE /app/
COPY --from=build /rtl_airband_build/build_dir/src/unittests /app/
COPY --from=build /rtl_airband_build/build_dir/src/rtl_airband /app/
RUN chmod a+x /app/unittests /app/rtl_airband
# make sure unit tests pass
RUN /app/unittests
# Use tini as init and run rtl_airband from /app/
ENTRYPOINT ["/usr/bin/tini", "--"]
WORKDIR /app/
CMD ["/app/rtl_airband", "-F", "-e", "-c", "/app/rtl_airband.conf"]
================================================
FILE: LICENSE
================================================
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too.
When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things.
To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it.
For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights.
We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software.
Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations.
Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and modification follow.
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you".
Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does.
1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program.
You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee.
2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions:
a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change.
b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License.
c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.)
These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it.
Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program.
In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License.
3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following:
a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or,
b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or,
c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.)
The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable.
If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code.
4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance.
5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it.
6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License.
7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program.
If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances.
It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice.
This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License.
8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License.
9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns.
Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation.
10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally.
NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
END OF TERMS AND CONDITIONS
================================================
FILE: NEWS.md
================================================
# NEWS
This file will no longer be updated with each release, for changes between releases, see PRs merged to the repo
Version 5.0.0 (Jan 21, 2024):
* NOTE: Going forward a release tag will be automatically created on each merge to `main`, and changes will not be reflected in this file. For changes between versions see the repo's [release history](https://github.com/rtl-airband/RTLSDR-Airband/releases).
* NOTE: Going forward PRs will be opened directly against `main` and the `unstable` branch will no longer be used.
* NOTE: This repo has significantly diverged from the original project [microtony/RTLSDR-Airband](https://github.com/microtony/RTLSDR-Airband) so it has been been detached (ie no longer a fork).
* Changes in this release, see [#444](https://github.com/rtl-airband/RTLSDR-Airband/pull/444):
* build and publish docker containers
* changes to supported `cmake` platforms:
* depreciate `rpiv1`, `armv7-generic`, and `armv8-generic` and build platforms
* change default build platform to `native`
* rename `default` to `generic`
* enable a series of compile warnings and cleanup code
* remove `SSE` specific code - let the compiler "do the right thing"
* remove some no longer supported windows `ifdef`'s
* fix CTCSS bug that could miss a tone when multiple tones have the same power (happens with less accurate floating point operations, ie i386)
Version 4.2.0 (Oct 13, 2023):
* Changes in this release:
* Add support for building with libshout v2.4.6, see [#382](https://github.com/rtl-airband/RTLSDR-Airband/pull/382) and [#422](https://github.com/rtl-airband/RTLSDR-Airband/pull/422)
* Add error checking for lowpass <= highpass, see [#399](https://github.com/rtl-airband/RTLSDR-Airband/pull/399) and [#412](https://github.com/rtl-airband/RTLSDR-Airband/pull/412)
* Remove limit on count of mixer inputs (thanks @cdknox), see [#408](https://github.com/rtl-airband/RTLSDR-Airband/pull/408)
* Add `dated_subdirectories` config option for output files (thanks, @marcin-osowski), see [#413](https://github.com/rtl-airband/RTLSDR-Airband/pull/413)
Version 4.1.1 (May 1, 2023):
* Changes in this release:
* Fix build issues when using VideoCore GPU, see [#378](https://github.com/rtl-airband/RTLSDR-Airband/pull/378)
Version 4.1.0 (April 23, 2023):
* Changes in this release:
* Add `channel_dbfs_noise_level` and `channel_dbfs_signal_level` to the stats file, see [#355](https://github.com/rtl-airband/RTLSDR-Airband/pull/355)
* Add squelch support for CTCSS, add `channel_ctcss_counter` and `channel_no_ctcss_counter` to the stats file, see [#368](https://github.com/rtl-airband/RTLSDR-Airband/pull/368)
* Support `ampfactor` on a per-channel basis (in addition to mixer inputs), see [#369](https://github.com/rtl-airband/RTLSDR-Airband/pull/369)
* Fix config error messages, see [#371](https://github.com/rtl-airband/RTLSDR-Airband/pull/371)
* Multiple CI / workflow improvements, including:
* Addition of Dockerfiles and shell scripts for multiple build environments
* Addition of vscode devcontainer configuration
* Addition of gtest, code refactoring, addition of unit tests, running unit tests on each pull request
* Running more combinations of OSs, build types, and build options on each pull request
Version 4.0.3 (Jan 10, 2023):
* Changes in this release:
* Add `channel_squelch_level` to stats file, see [#332](https://github.com/rtl-airband/RTLSDR-Airband/pull/332)
* Support "default" values in lists for `squelch_snr_threshold` and `notch_q`,
see [#334](https://github.com/rtl-airband/RTLSDR-Airband/pull/334)
* Set cmake `ENABLE_EXPORTS` property, see [#339](https://github.com/rtl-airband/RTLSDR-Airband/pull/339)
* Other items to note:
* Repo maintainer has changed, see [#342](https://github.com/rtl-airband/RTLSDR-Airband/discussions/342)
* Repo URL has moved to https://github.com/rtl-airband/RTLSDR-Airband
* Default branch / Top of Tree has been renamed to `main`
Version 4.0.2 (Dec 26, 2021):
* Added a new `PLATFORM` value `default` (which, as the name says, is the new
default). It results in a portable binary without any architecture-specific
optimizations. This also allows the program to be built with compilers that
do not support `-march=native` option (notably Clang on Apple M1) (#303).
Version 4.0.1 (Nov 14, 2021):
* Fixed compilation error on RaspberryPi OS 11 (Bullseye)
Version 4.0.0 (Oct 19, 2021):
* RTLSDR-Airband is now built with CMake. Refer to the wiki for updated
compilation instructions.
* When compiling the program, a new `PLATFORM` value `native` can now be
specified. It enables `-march=native -mtune=native` compilation options. This
causes the compiler to apply the most appropriate optimizations for the
hardware on which the app is being built (thx @charlie-foxtrot).
* BACKWARDS-INCOMPATIBLE CHANGE: Signal level and noise level estimates
displayed in the textual waterfalls are now expressed in dBFS (decibels
related to the full scale of the analog-to-digital converter). The main
benefit of the new approach is that these values do not depend on the
`fft_size` value(thx @charlie-foxtrot).
* BACKWARDS-INCOMPATIBLE CHANGE: Improved squelch algorithm with new
configuration parameters. `squelch` keyword has been replaced with
`squelch_threshold` which takes an absolute signal value in dBFS as an
argument. Alternatively, a minimum signal-to-noise ratio (in dB) that should
trigger the squelch might be configured using `squelch_snr_threshold` option
(thx @charlie-foxtrot).
* BACKWARDS-INCOMPATIBLE CHANGE: `include_freq` config option for file outputs
now causes the frequency to be appended after the timestamp rather than
before it. This feature now works correctly in scan mode, when
`split_on_transmission` feature is enabled. (thx @charlie-foxtrot).
* BACKWARDS-INCOMPATIBLE CHANGE: sample format in files produced by `rawfile`
outputs has been changed from CS16 to CF32. File name suffix is now `.cf32`.
* Improved squelch indicator in the textual waterfalls. In addition to the `*`
character indicating that the squelch is open, there is also a `~` character
indicating that the channel has a signal that is being suppressed because it
is outside the band of the channel filter (thx @charlie-foxtrot).
* New output type `udp_stream` for sending uncompressed audio to another host
via UDP/IP (thx @charlie-foxtrot).
* Added `multiple_output_threads` global option. When set to `true`, a separate
output thread is spawned for each device (thx @charlie-foxtrot).
* Modulation in scan mode is now configurable per channel (thx
@charlie-foxtrot).
* SoapySDR errors like TIMEOUT or OVERFLOW are no longer treated as fatal. They
often appear intermittently, especially when the CPU usage is high. There is
no point in failing the input in this case.
* Added `.tmp` suffix to the names of the output files currently being written
to. The suffix is removed when the file is closed. External applications that
consume recorded files can now figure out which files are not yet complete.
* Added logging and statistics for output thread overruns and mixer
input/output overruns (thx @charlie-foxtrot).
* The program can now be built on MacOS.
* Miscellaneous bug fixes and code cleanups.
Version 3.2.1 (Nov 13, 2020):
* Fixed a compile error when using libshout older than 2.4.0
Version 3.2.0 (Nov 08, 2020):
* Added `split_on_transmission` output file option which allows creating
a new file for every transmission on the channel (thx @charlie-foxtrot).
* Added `include_freq` output file option, which causes the channel frequency
to be appended to the file name (thx @charlie-foxtrot).
* Added support for notch filters for eliminating narrowband interference,
like CTCSS tones (thx @charlie-foxtrot).
* Added `bandwidth` channel option which causes the channelized I/Q signal
to be lowpass-filtered before demodulation. This might help in situations
where neighboring channels are closely spaced and interfere with the channel
of interest. It also reduces the bandwidth of the resulting audio signal,
and thus eliminates the high-frequency noise (thx @charlie-foxtrot).
* Added support for multithreaded demodulation. Each device can now have its
own demodulation thread. This allows spreading the demodulation work across
multiple CPU cores. Enable with `multiple_demod_threads` global option
(thx @charlie-foxtrot).
* Added support for highpass/lowpass MP3 filters for mixers (thx @charlie-foxtrot)
* Added support for frequency usage statistics (thx @charlie-foxtrot).
* Workaround for Fitipower tuner problem of not honoring the first gain
setting when the device is first used (thx @eshaz).
* Finalize the MP3 file properly before opening a new one (thx @jratke).
* Close the RTL device properly on program exit (thx @jratke).
* Updated the SoapySDR input driver to reflect changes in SoapySDR library API.
* Minor cleanups.
Version 3.1.0 (Jan 19, 2020):
* SoapySDR: added support for complex float 32-bit samples
* SoapySDR: allow using AGC if the device supports it. Gain setting for
soapy devices is now optional - if it's not specified, the program will
try to enable AGC.
* Use lowpass/highpass filters provided by LAME library to improve audio
quality of MP3 streams. Filter cutoff frequencies may be configured per
output, using `highpass` and `lowpass` config options. Credit: clydebarrow.
* Added `log_scan_activity` global config option. When set to `true`, a
log message is written whenever a squelch opens on a scanned channel,
effectively producing a channel activity log. Credit: clam-i-am.
* Improved squelch behaviour in some corner cases.
* Fix for incorrect naming of pulseaudio context. Name set in the config
was not used as it should. Credit: Darryl Pogue.
* Don't fail when the configured gain value is negative. Some SDRs support
this (eg. FC0012-based dongles).
* Fix a bug which in some cases could prevent the icecast output from
reconnecting with the Icecast server after the connection has failed.
Version 3.0.1 (Feb 16, 2018):
* Fix for squelch staying constantly open when configured manually
with NFM=off (#84)
Version 3.0.0 (Feb 10, 2018):
* Major overhaul of the SDR input code - now it's modular and
hardware-agnostic (no longer tightly coupled with librtlsdr).
* Support for SoapySDR vendor-neutral SDR library - any SDR which has
a plugin for SoapySDR shall now work in RTLSDR-Airband.
* Support for Mirics DVB-T dongles via libmirisdr-4 library.
* Support for RTLSDR is now optional and can be disabled at compilation
stage.
* Removed the 8-channels-per-device limit in multichannel mode.
* Configurable per-device sampling rate.
* Configurable FFT size.
* Support for multibyte input samples.
* Support for rawfile outputs (ie. writing raw I/Q data from a
narrowband channel to a file for processing with other programs,
line GNUradio or csdr).
* INCOMPATIBLE CHANGE: removed `rtlsdr_buffers` global configuration
option; buffer count can now be adjusted with a per-device
"buffers" option.
* INCOMPATIBLE CHANGE: removed `syslog` global configuration option;
syslog logging is now enabled by default, both in foreground and
background mode. To force logging to standard error, use -e command
line option.
* Added -F command line option for better cooperation with systemd.
Runs the program in foreground, but without textual waterfalls.
Together with -e it allows running rtl_airband as a service of type
"simple" under systemd. Example rtl_airband.service file has been
adjusted to reflect this change.
* Added `type` device configuration option. It sets the device type
(ie. the input driver which shall be used to talk to the device).
"rtlsdr" is assumed as a default type for backward compatibility.
If RTLSDR support has been disabled at compilation stage, then
there is no default type - it must be set manually, or the program
will throw an error on startup.
* Frequencies in the config can now be expressed in Hz, kHz, MHz or GHz
for improved readability.
* Lots of bugfixes.
* Rewritten documentation on [Github Wiki](https://github.com/rtl-airband/RTLSDR-Airband/wiki).
Version 2.4.0 (Oct 15, 2017):
* Support for PulseAudio output via new output type `pulse`. With this
feature you can eg. play the sound via the soundcard of the Raspberry
Pi you run RTLSDR-Airband on (you need to install and run pulseaudio
daemon on it, though). Or you can stream the audio from a Pi located
near the antenna (eg. in the attic) to speakers connected to the desktop
PC you are sitting at, without launching a local Icecast server,
as before. Because the audio stream is sent uncompressed, it is
not recommended to run it across the Internet - jitter or packet loss
will easily cause the audio to become choppy. However in a local network
PulseAudio is a good choice. And it gives much lower latency as compared
to Icecast (typically under 0.5 seconds). Thanks to Marcus Ströbel
for the idea and initial implementation.
* Support for referring to RTL devices by their serial numbers in the
config file. Instead of `index = ` parameter, use `serial =
` to get consistent behavior across reboots
and hardware reconfigurations.
* Set RTL gain to the nearest gain value supported by the device. This is
required for E4000 tuners, which do not round the given gain value to
the nearest supported setting, which causes the gain setting operation
to fail.
* Improved squelch operation in scan mode. All squelch-related variables
(noise floor, AGC coefficients, etc) are now calculated and stored
separately for each scanned channel. Earlier their values were common
to all channels, which caused squelch problems in case when noise floor
varied considerably between channels. Thanks to @strix-technica.
* Added build target for FreeBSD on x86. Use `PLATFORM=x86-freebsd` to
compile and `PLATFORM=x86-freebsd gmake install` to install. Thanks
to @nyammy.
* Display squelch setting in waterfall in place of noise floor value when
squelch is set manually.
* Bug fixes, performance improvements.
* Decluttered and more understandable documentation.
Version 2.3.0 (Jan 2, 2017):
* Added support for mixers. It is now possible to produce audio streams
combined from several input channels. Both mono and stereo mixing is
supported. Usage example is provided in config/mixers.conf. All
mixer-related parameters are documented in config/reference.conf.
* Added build options for 64-bit ARM architectures, like Odroid C2.
Please use PLATFORM=armv8-generic when compiling.
* Fixed a long-standing bug in RTL sample processing, which caused some
samples to be processed twice. If you were annoyed by these regular
clicks in NFM audio every 125 ms, they are now gone.
* Reduced CPU usage on x86
* Some code restructuring and cleanups
* Added several configuration file examples for typical real-life
scenarios. They are placed in config/ subdirectory. rtl_airband.conf.example
file has been moved to config/reference.conf. It is meant to be a reference
for all supported config knobs together with their description. This is
still an interim solution before some more readable and understandable
documentation gets written.
Version 2.2.0 (Oct 8, 2016):
* Support for Icecast stream metadata updates in scanning mode. When enabled,
every time the scanner stops on a channel, current frequency is written into
Icecast song title, which in turn is displayed in the player. Alternatively,
textual labels can be configured for each frequency. It is possible
to configure the amount of delay between the stream and metadata updates to
synchronize them with the audio. There are some caveats however - read
comments in rtl_airband.conf.example for details.
* Added global option 'localtime'. When enabled, rtl_airband uses local time
instead of UTC time for output file names. (Credit: ScanOC).
* Auto gain feature removed. RTL auto gain does not work well for narrowband
channels. Most often it sets the gain too high which causes problems for
auto squelch and audio bleeding between adjacent channels. Gain must be
configured manually from now on.
* Dropped unmaintained Windows build.
* Reverted to power level calculation algorithm from version 2.0.2. The new
algo didn't really do much to sensitivity, but introduced annoying clicks
on squelch open/close.
* Improved DC offset estimator for AM mode. This one hardly ever clicks
on squelch opening.
* Boosted AM audio volume.
* Reduced squelch flapping in NFM mode.
Version 2.1.0 (Aug 11, 2016):
* Narrowband FM demodulation support
* Automatic Frequency Control
* Append mode for recording (enabled by default)
* Dongles, channels and outputs can be individually enabled and disabled
by a simple config flag (no need to comment out or delete large
configuration sections)
* Use VBR for MP3 encoding
* Modified power level calculation algorithm (better sensitivity)
* Support for manual squelch setting
* Bug fixes
Version 2.0.2 (Mar 26, 2016):
* Fixed a problem with running three dongles or more, simultaneously
Version 2.0.1 (Jan 24, 2016):
* Fixed crash on output initialization
Version 2.0.0 (Dec 27, 2015):
* util/convert_cfg: can be used to convert old-style config.txt to the new format
* Syslog logging (enabled by default)
* Daemon mode
* Reworked makefiles, added install rule
* /dev/vcio is now used to access GPU on RPi; creating char_dev no longer necessary
* Startup scripts for Debian and Gentoo
* Support for auto gain setting
* Support for multiple outputs per channel
* Support for recording streams to local MP3 files
* Support for ARMv7-based platforms other than RPi (eg. Cubieboard)
* Updated documentation
* Numerous bugfixes and stability improvements
Version 1.0.0 (May 12, 2015):
* Linux x86/x86_64 support (Windows build is currently unmaintained and might not work)
* Raspberry Pi V2 support
* Bundled hello_fft code (v2.0)
* More robust interaction with Icecast servers
* Important stability fixes
================================================
FILE: README.md
================================================
# RTLSDR-Airband




Changes as of v5.1.0:
- License is now GPLv2 [#503](https://github.com/rtl-airband/RTLSDR-Airband/discussions/503)
NOTE: Repo URL has moved to https://github.com/rtl-airband/RTLSDR-Airband see [#502](https://github.com/rtl-airband/RTLSDR-Airband/discussions/502) for info
Changes as of v5.0.0:
- PRs will be opened directly against `main` and the `unstable` branch will no longer be used
- Version tags will be automatically created on each merge to `main`
- A release will be created on each `major` or `minor` version tag but not `minor` tags
- Checking out `main` is recommended over using a release artifact to stay on the latest version
- This repo has significantly diverged from the original project [microtony/RTLSDR-Airband](https://github.com/microtony/RTLSDR-Airband) so it has been been detached (ie no longer a fork).
- Specific build support for `rpiv1`, `armv7-generic`, and `armv8-generic` have been deprecated for the new default `native`, see [#447](https://github.com/rtl-airband/RTLSDR-Airband/discussions/447)
## Overview
RTLSDR-Airband receives analog radio voice channels and produces
audio streams which can be routed to various outputs, such as online
streaming services like LiveATC.net. Originally the only SDR type
supported by the program was Realtek DVB-T dongle (hence the project's
name). However, thanks to SoapySDR vendor-neutral SDR library, other
radios are now supported as well.
## Documentation
User's manual is now on the [wiki](https://github.com/rtl-airband/RTLSDR-Airband/wiki).
## Credits and thanks
I hereby express my gratitude to everybody who helped with the development and testing
of RTLSDR-Airband. Special thanks go to:
* Dave Pascoe
* SDR Guru
* Marcus Ströbel
* strix-technica
* charlie-foxtrot
## License
Copyright (C) 2022-2025 charlie-foxtrot
Copyright (C) 2015-2022 Tomasz Lemiech
Based on original work by Wong Man Hang
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see .
## Open Source Licenses of bundled code
### gpu_fft
BCM2835 "GPU_FFT" release 2.0
Copyright (c) 2014, Andrew Holme.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
### rtl-sdr
* Copyright (C) 2012 by Steve Markgraf
* Copyright (C) 2015 by Kyle Keen
* GNU General Public License Version 2
================================================
FILE: config/basic_multichannel.conf
================================================
# This is a minimalistic configuration file for RTLSDR-Airband.
# Just a single RTL dongle with two AM channels in multichannel mode.
# Each channel is sent to a single Icecast output.
# Refer to https://github.com/rtl-airband/RTLSDR-Airband/wiki
# for description of keywords and config syntax.
devices:
({
type = "rtlsdr";
index = 0;
gain = 25;
centerfreq = 120.0;
correction = 80;
channels:
(
{
freq = 119.5;
outputs: (
{
type = "icecast";
server = "icecast.server.example.org";
port = 8080;
mountpoint = "TWR.mp3";
name = "Tower";
genre = "ATC";
username = "source";
password = "mypassword";
}
);
},
{
freq = 120.225;
outputs: (
{
type = "icecast";
server = "icecast.server.example.org";
port = 8080;
mountpoint = "GND.mp3";
name = "Ground";
genre = "ATC";
description = "My local airport - ground feed";
username = "source";
password = "mypassword";
}
);
}
);
}
);
================================================
FILE: config/basic_scanning.conf
================================================
# Scanning mode example
# Single dongle, three frequencies, output to Icecast server and to a file.
# Refer to https://github.com/rtl-airband/RTLSDR-Airband/wiki
# for description of keywords and config syntax.
devices:
({
type = "rtlsdr";
index = 0;
gain = 25;
correction = 80;
mode = "scan";
channels:
(
{
freqs = ( 118.15, 124.7, 132.1 );
labels = ( "Tower", "Ground", "Approach" );
outputs: (
{
type = "icecast";
server = "icecast.server.example.org";
port = 8080;
mountpoint = "stream.mp3";
name = "Tower + Ground + Approach";
genre = "ATC";
description = "My local airport - aggregated feed";
username = "source";
password = "mypassword";
send_scan_freq_tags = false;
},
{
type = "file";
directory = "/home/pi/recordings";
filename_template = "TWR+GND+APP";
}
);
}
);
}
);
================================================
FILE: config/big_mixer.conf
================================================
mixers: {
big_mixer: {
outputs: (
{
type = "file";
directory = "./";
filename_template = "big_mixer";
}
);
}
};
devices:
({
type = "rtlsdr";
index = 0;
gain = 25;
centerfreq = 156.7375;
channels:
(
{
freq = 156.050;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 156.175;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 156.250;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 156.275;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 156.300;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 156.325;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 156.350;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 156.375;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 156.400;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 156.425;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 156.450;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 156.475;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 156.500;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 156.525;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 156.550;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 156.575;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 156.600;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 156.625;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 156.650;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 156.675;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 156.700;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 156.725;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 156.750;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 156.800;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 156.850;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 156.875;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 156.900;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 156.925;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 156.950;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 156.975;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 157.000;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 157.025;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 157.050;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 157.075;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 157.100;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 157.125;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 157.150;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 157.175;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 157.200;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 157.225;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 157.250;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 157.275;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 157.300;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 157.325;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 157.350;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 157.375;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 157.400;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
},
{
freq = 157.425;
outputs: (
{
type = "mixer";
name = "big_mixer";
}
);
}
)
});
================================================
FILE: config/mixers.conf
================================================
# This config file demonstrates the usage of mixers.
# First, two mixers are defined:
#
# - mixer1: sends the mixed stream to Icecast and saves it to a file
# - mixer2: sends the mixed stream to Icecast
#
# Two dongles are used, both in AM, multichannel mode:
#
# - dongle 1: 3 channels:
# - channel 1 goes to mixer1 (center, volume decreased to 30%)
# - channel 2 goes to mixer1 (full left)
# - channel 3 goes to mixer2 (85% right)
#
# - dongle 2: 2 channels:
# - channel 1 goes to mixer1 (full right)
# - channel 2 goes to mixer2 (85% left, volume set to 200%)
#
# Refer to https://github.com/rtl-airband/RTLSDR-Airband/wiki
# for description of keywords and config syntax.
mixers: {
mixer1: {
outputs: (
{
type = "icecast";
server = "icecast.server.example.org";
port = 8080;
mountpoint = "mixer1.mp3";
name = "VOLMET + Approach + Director"
genre = "ATC";
username = "source";
password = "mypassword";
},
{
type = "file";
directory = "/home/pi/recordings";
filename_template = "mixer1";
}
);
},
mixer2: {
outputs: (
{
type = "icecast";
server = "icecast.server.example.org";
port = 8080;
mountpoint = "mixer2.mp3";
name = "Ground + Delivery"
genre = "ATC";
username = "source";
password = "mypassword";
}
);
}
};
devices:
({
type = "rtlsdr";
index = 0;
gain = 25;
centerfreq = 121.2;
correction = 81;
channels:
(
# VOLMET
{
freq = 120.875;
# VOLMET/ATIS/AWOS channels often transmit continuously.
# Auto squelch does not perform well in such cases, so it's best to set the
# squelch threshold manually. squelch_threshold defines an absolute signal
# level (in dBFS).
squelch_threshold = -40;
lowpass = 5;
highpass = 5;
outputs: (
{
type = "mixer";
name = "mixer1";
ampfactor = 0.3;
}
);
},
# Approach
{
freq = 121.8;
outputs: (
{
type = "mixer";
name = "mixer1";
balance = -1.0;
}
);
},
# Director
{
freq = 121.925;
outputs: (
{
type = "mixer";
name = "mixer2";
balance = 0.85;
}
);
}
);
},
{
type = "rtlsdr";
index = 1;
gain = 33;
centerfreq = 131.2;
correction = 48;
channels:
(
# Ground
{
freq = 130.925;
# Another way of tweaking the squelch is to specify custom SNR threshold (in dB)
squelch_snr_threshold = 5.0;
outputs: (
{
type = "mixer";
name = "mixer1";
balance = 1.0;
}
);
},
# Delivery
{
freq = 131.4;
outputs: (
{
type = "mixer";
name = "mixer2";
balance = -0.85;
ampfactor = 2.0;
}
);
}
);
}
);
================================================
FILE: config/noaa.conf
================================================
fft_size = 1024;
localtime = true;
multiple_demod_threads = true;
multiple_output_threads = true;
devices:
(
{
type = "rtlsdr";
index = 0;
gain = 19.7;
centerfreq = 162.48200;
correction = 0;
sample_rate = 2.40;
channels:
(
{
freq = 162.40000;
label = "NOAA 162.400";
modulation = "nfm";
lowpass = -1;
highpass = -1;
bandwidth = 5000;
ampfactor = 2.00;
squelch_snr_threshold = 0.00;
outputs:
(
{
type = "file";
directory = "/recordings";
filename_template = "NOAA_162.400";
}
);
},
{
freq = 162.42500;
label = "NOAA 162.425";
modulation = "nfm";
lowpass = -1;
highpass = -1;
bandwidth = 5000;
ampfactor = 2.00;
squelch_snr_threshold = 0.00;
outputs:
(
{
type = "file";
directory = "/recordings";
filename_template = "NOAA_162.425";
}
);
},
{
freq = 162.45000;
label = "NOAA 162.450";
modulation = "nfm";
lowpass = -1;
highpass = -1;
bandwidth = 5000;
ampfactor = 2.00;
squelch_snr_threshold = 0.00;
outputs:
(
{
type = "file";
directory = "/recordings";
filename_template = "NOAA_162.450";
}
);
},
{
freq = 162.47500;
label = "NOAA 162.475";
modulation = "nfm";
lowpass = -1;
highpass = -1;
bandwidth = 5000;
ampfactor = 2.00;
squelch_snr_threshold = 0.00;
outputs:
(
{
type = "file";
directory = "/recordings";
filename_template = "NOAA_162.475";
}
);
},
{
freq = 162.50000;
label = "NOAA 162.500";
modulation = "nfm";
lowpass = -1;
highpass = -1;
bandwidth = 5000;
ampfactor = 2.00;
squelch_snr_threshold = 0.00;
outputs:
(
{
type = "file";
directory = "/recordings";
filename_template = "NOAA_162.500";
}
);
},
{
freq = 162.52500;
label = "NOAA 162.525";
modulation = "nfm";
lowpass = -1;
highpass = -1;
bandwidth = 5000;
ampfactor = 2.00;
squelch_snr_threshold = 0.00;
outputs:
(
{
type = "file";
directory = "/recordings";
filename_template = "NOAA_162.525";
}
);
},
{
freq = 162.55000;
label = "NOAA 162.550";
modulation = "nfm";
lowpass = -1;
highpass = -1;
bandwidth = 5000;
ampfactor = 2.00;
squelch_snr_threshold = 0.00;
outputs:
(
{
type = "file";
directory = "/recordings";
filename_template = "NOAA_162.550";
}
);
}
);
}
);
================================================
FILE: config/two_dongles_multiple_outputs.conf
================================================
# Example configuration file for 2 dongles.
# First dongle - scanning mode, NFM modulation, three frequencies,
# output to Icecast stream, to a file and to PulseAudio server
# on a local network.
# Second dongle - multichannel mode, three channels:
#
# - channel 1: AM, goes to Icecast stream
# - channel 2: AM, goes to two Icecast streams
# - channel 3: NFM, goes to two files
#
# Dongles are specified with their serial numbers instead of
# indexes, because the latter can change when devices are
# reconnected into different USB ports.
#
# Refer to https://github.com/rtl-airband/RTLSDR-Airband/wiki
# for description of keywords and config syntax.
devices:
({
type = "rtlsdr";
serial = "777755221";
gain = 25;
correction = 80;
mode = "scan";
channels:
(
{
modulation = "nfm";
freqs = ( 152.1, 168.25, 168.375 );
outputs: (
{
type = "icecast";
server = "icecast.server.example.org";
port = 8080;
mountpoint = "utility.mp3";
name = "Utility channels";
username = "source";
password = "mypassword";
},
{
type = "file";
directory = "/home/pi/recordings";
filename_template = "utility";
},
{
type = "pulse";
server = "192.168.11.10";
stream_name = "Utility channels";
continuous = false;
}
);
}
);
},
{
type = "rtlsdr";
serial = "33433123";
gain = 20;
centerfreq = 118.5;
correction = 43;
mode = "multichannel";
channels:
(
{
freq = 118.15;
outputs: (
{
type = "icecast";
server = "icecast.server.example.org";
port = 8080;
mountpoint = "TWR.mp3";
name = "Tower";
genre = "ATC";
username = "source";
password = "mypassword";
}
);
},
{
freq = 119.425;
outputs: (
{
type = "icecast";
server = "icecast.server.example.org";
port = 8080;
mountpoint = "ACC.mp3";
name = "Radar";
genre = "ATC";
username = "source";
password = "mypassword";
},
{
type = "icecast";
server = "other.server.example.org";
port = 9999;
mountpoint = "feed.mp3";
username = "user";
password = "secretpass";
}
);
},
{
freq = 119.6;
modulation = "nfm";
outputs: (
{
type = "file";
directory = "/home/pi/recordings";
filename_template = "somechannel";
},
{
type = "file";
directory = "/home/pi/recordings";
filename_template = "somechannel_full";
continuous = true;
}
);
}
);
}
);
================================================
FILE: init.d/rtl_airband-debian.sh
================================================
#! /bin/sh
### BEGIN INIT INFO
# Provides: rtl_airband
# Required-Start: $remote_fs $syslog
# Required-Stop: $remote_fs $syslog
# Default-Start: 2 3 4 5
# Default-Stop: 0 1 6
# Short-Description: rtl_airband initscript
### END INIT INFO
# Author: Tomasz Lemiech
PATH=/sbin:/usr/sbin:/bin:/usr/bin:/usr/local/bin
DESC="RTLSDR airband receiver"
NAME=rtl_airband
DAEMON=/usr/local/bin/$NAME
DAEMON_ARGS=""
PIDFILE=/run/$NAME.pid
SCRIPTNAME=/etc/init.d/$NAME
# Exit if the package is not installed
[ -x "$DAEMON" ] || exit 0
# Read configuration variable file if it is present
[ -r /etc/default/$NAME ] && . /etc/default/$NAME
# Load the VERBOSE setting and other rcS variables
. /lib/init/vars.sh
# Define LSB log_* functions.
# Depend on lsb-base (>= 3.2-14) to ensure that this file is present
# and status_of_proc is working.
. /lib/lsb/init-functions
#
# Function that starts the daemon/service
#
do_start()
{
# Return
# 0 if daemon has been started
# 1 if daemon was already running
# 2 if daemon could not be started
start-stop-daemon --start --quiet --pidfile $PIDFILE --exec $DAEMON --test > /dev/null \
|| return 1
start-stop-daemon --start --quiet --pidfile $PIDFILE --exec $DAEMON -- \
$DAEMON_ARGS \
|| return 2
# on this one. As a last resort, sleep for some time.
}
do_stop()
{
# Return
# 0 if daemon has been stopped
# 1 if daemon was already stopped
# 2 if daemon could not be stopped
# other if a failure occurred
start-stop-daemon --stop --quiet --retry=TERM/30/KILL/5 --pidfile $PIDFILE --name $NAME
RETVAL="$?"
[ "$RETVAL" = 2 ] && return 2
# Wait for children to finish too if this is a daemon that forks
# and if the daemon is only ever run from this initscript.
# If the above conditions are not satisfied then add some other code
# that waits for the process to drop all resources that could be
# needed by services started subsequently. A last resort is to
# sleep for some time.
start-stop-daemon --stop --quiet --oknodo --retry=0/30/KILL/5 --exec $DAEMON
[ "$?" = 2 ] && return 2
rm -f $PIDFILE
return "$RETVAL"
}
case "$1" in
start)
[ "$VERBOSE" != no ] && log_daemon_msg "Starting $DESC" "$NAME"
do_start
case "$?" in
0|1) [ "$VERBOSE" != no ] && log_end_msg 0 ;;
2) [ "$VERBOSE" != no ] && log_end_msg 1 ;;
esac
;;
stop)
[ "$VERBOSE" != no ] && log_daemon_msg "Stopping $DESC" "$NAME"
do_stop
case "$?" in
0|1) [ "$VERBOSE" != no ] && log_end_msg 0 ;;
2) [ "$VERBOSE" != no ] && log_end_msg 1 ;;
esac
;;
status)
status_of_proc "$DAEMON" "$NAME" && exit 0 || exit $?
;;
restart|force-reload)
log_daemon_msg "Restarting $DESC" "$NAME"
do_stop
case "$?" in
0|1)
do_start
case "$?" in
0) log_end_msg 0 ;;
1) log_end_msg 1 ;; # Old process is still running
*) log_end_msg 1 ;; # Failed to start
esac
;;
*)
# Failed to stop
log_end_msg 1
;;
esac
;;
*)
echo "Usage: $SCRIPTNAME {start|stop|status|restart|force-reload}" >&2
exit 3
;;
esac
:
================================================
FILE: init.d/rtl_airband-freebsd.sh
================================================
#!/bin/sh
# PROVIDE: rtl_airband
# REQUIRE: DAEMON
# BEFORE: LOGIN
# KEYWORD: nojail shutdown
. /etc/rc.subr
name=rtl_airband
rcvar=rtl_airband_enable
command="/usr/local/bin/rtl_airband"
load_rc_config ${name}
run_rc_command "$1"
================================================
FILE: init.d/rtl_airband-gentoo.sh
================================================
#!/sbin/runscript
# rtl_airband Gentoo startup script
# (c) 2015 Tomasz Lemiech
RTLAIRBAND_CONFDIR=${RTLAIRBAND_CONFDIR:-/usr/local/etc}
RTLAIRBAND_CONFIG=${RTLAIRBAND_CONFIG:-${RTLAIRBAND_CONFDIR}/rtl_airband.conf}
RTLAIRBAND_PIDFILE=${RTLAIRBAND_PIDFILE:-/run/${SVCNAME}.pid}
RTLAIRBAND_BINARY=${RTLAIRBAND_BINARY:-/usr/local/bin/rtl_airband}
depend() {
use logger dns
}
checkconfig() {
if [ ! -e "${RTLAIRBAND_CONFIG}" ] ; then
eerror "You need an ${RTLAIRBAND_CONFIG} file to run rtl_airband"
return 1
fi
}
start() {
checkconfig || return 1
ebegin "Starting ${SVCNAME}"
start-stop-daemon --start --exec "${RTLAIRBAND_BINARY}" \
--pidfile "${RTLAIRBAND_PIDFILE}" \
-- ${RTLAIRBAND_OPTS}
eend $?
}
stop() {
if [ "${RC_CMD}" = "restart" ] ; then
checkconfig || return 1
fi
ebegin "Stopping ${SVCNAME}"
start-stop-daemon --stop --exec "${RTLAIRBAND_BINARY}" \
--pidfile "${RTLAIRBAND_PIDFILE}" --quiet
eend $?
}
================================================
FILE: init.d/rtl_airband.service
================================================
[Unit]
Description=SDR AM/NFM demodulator
Documentation=https://github.com/rtl-airband/RTLSDR-Airband/wiki
Wants=network.target # NOTE: `network-online.target` may be better for some use cases
After=network.target # NOTE: `network-online.target` may be better for some use cases
[Service]
Type=simple
ExecStart=/usr/local/bin/rtl_airband -Fe
# The program may exit only due to startup failure (eg. misconfiguration)
# or due to failure of all SDR devices (eg. disconnection). In either case,
# there is no point to restart it, because it would fail once again.
Restart=no
[Install]
WantedBy=multi-user.target
================================================
FILE: scripts/find_version
================================================
#!/bin/bash
PROJECT_ROOT_PATH="$(cd $(dirname "$0")/../ ; pwd)"
PROJECT_GIT_DIR_PATH="${PROJECT_ROOT_PATH}/.git"
PROJECT_DIR_NAME="$(basename ${PROJECT_ROOT_PATH})"
# if there is a .git directory at the project root then rely on git for the version string
if [ -r "${PROJECT_GIT_DIR_PATH}" ] ; then
git describe --tags --abbrev --dirty --always
exit 0
fi
# if the proejct root directory matches the naming convetion of an extracted archive then
# get the version number out of that
if [[ "${PROJECT_DIR_NAME}" =~ ^RTLSDR-Airband-[0-9]*\.[0-9]*\.[0-9]*$ ]]; then
echo ${PROJECT_DIR_NAME} | cut -d '-' -f 3
exit 0
fi
# print an error string to stderr (any output to stdout is considered success)
>&2 echo "did not find a git root directory at ${PROJECT_GIT_DIR_PATH} and failed to extract a version from ${PROJECT_DIR_NAME}"
================================================
FILE: scripts/reformat_code
================================================
#!/bin/bash
find src/*.h src/*.cpp src/hello_fft/*.h src/hello_fft/*.c | xargs clang-format-14 -i
================================================
FILE: src/.gitignore
================================================
config.h
================================================
FILE: src/CMakeLists.txt
================================================
include(CheckCXXCompilerFlag)
include(CheckCXXSymbolExists)
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules")
set(CMAKE_EXPORT_COMPILE_COMMANDS 1)
if(UNIX OR MINGW)
add_definitions(-D_FILE_OFFSET_BITS=64)
# isnormal()
add_definitions(-D_POSIX_C_SOURCE=200112L)
endif()
CHECK_CXX_COMPILER_FLAG(-pthread CXX_HAS_PTHREAD)
if(CXX_HAS_PTHREAD)
add_compile_options(-pthread)
endif()
CHECK_CXX_COMPILER_FLAG(-ffast-math CXX_HAS_FFAST_MATH)
if(CXX_HAS_FFAST_MATH)
add_compile_options(-ffast-math)
endif()
# asprintf on MacOS
if(APPLE)
add_definitions(-D_DARWIN_C_SOURCE)
endif()
# sincosf on linux vs __sincosf on MacOS
set(CMAKE_REQUIRED_DEFINITIONS_ORIG ${CMAKE_REQUIRED_DEFINITIONS})
list(APPEND CMAKE_REQUIRED_DEFINITIONS "-D_GNU_SOURCE")
set(CMAKE_REQUIRED_LIBRARIES_ORIG ${CMAKE_REQUIRED_LIBRARIES})
list(APPEND CMAKE_REQUIRED_LIBRARIES m)
CHECK_SYMBOL_EXISTS(sincosf math.h HAVE_SINCOSF)
if(HAVE_SINCOSF)
set(SINCOSF "sincosf")
else()
CHECK_SYMBOL_EXISTS(__sincosf math.h HAVE___SINCOSF)
if(HAVE___SINCOSF)
set(SINCOSF "__sincosf")
endif()
endif()
if(NOT HAVE_SINCOSF AND NOT HAVE___SINCOSF)
message(FATAL_ERROR "Required function sincosf() is unavailable")
endif()
set(CMAKE_REQUIRED_DEFINITIONS ${CMAKE_REQUIRED_DEFINITIONS_ORIG})
set(CMAKE_REQUIRED_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES_ORIG})
find_library(LIBM m REQUIRED)
find_library(LIBDL dl REQUIRED)
find_library(LIBPTHREAD pthread REQUIRED)
find_package(PkgConfig REQUIRED)
pkg_check_modules(CONFIG REQUIRED libconfig++)
list(APPEND rtl_airband_extra_libs ${CONFIG_LIBRARIES})
list(APPEND rtl_airband_include_dirs ${CONFIG_INCLUDE_DIRS})
list(APPEND link_dirs ${CONFIG_LIBRARY_DIRS})
# Can't use pkg_check_modules here, as some distros do not install lame.pc file
find_package(Lame REQUIRED)
list(APPEND rtl_airband_extra_libs ${LAME_LIBRARIES})
list(APPEND rtl_airband_include_dirs ${LAME_INCLUDE_DIR})
pkg_check_modules(SHOUT REQUIRED shout)
list(APPEND rtl_airband_extra_libs ${SHOUT_LIBRARIES})
list(APPEND rtl_airband_include_dirs ${SHOUT_INCLUDE_DIRS})
list(APPEND link_dirs ${SHOUT_LIBRARY_DIRS})
set(CMAKE_REQUIRED_INCLUDES_SAVE ${CMAKE_REQUIRED_INCLUDES})
set(CMAKE_REQUIRED_LIBRARIES_SAVE ${CMAKE_REQUIRED_LIBRARIES})
set(CMAKE_REQUIRED_LINK_OPTIONS_SAVE ${CMAKE_REQUIRED_LINK_OPTIONS})
set(CMAKE_REQUIRED_INCLUDES "${CMAKE_REQUIRED_INCLUDES} ${SHOUT_INCLUDE_DIRS}")
set(CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES} ${SHOUT_LIBRARIES}")
if ( NOT "${SHOUT_LIBRARY_DIRS}" STREQUAL "" )
set(CMAKE_REQUIRED_LINK_OPTIONS "-L${SHOUT_LIBRARY_DIRS}")
endif()
set(LIBSHOUT_HEADER "shout/shout.h")
CHECK_CXX_SYMBOL_EXISTS("SHOUT_TLS_AUTO" ${LIBSHOUT_HEADER}
HAVE_SHOUT_TLS_AUTO)
CHECK_CXX_SYMBOL_EXISTS("SHOUT_TLS_AUTO_NO_PLAIN" ${LIBSHOUT_HEADER}
HAVE_SHOUT_TLS_AUTO_NO_PLAIN)
CHECK_CXX_SYMBOL_EXISTS("SHOUT_TLS_RFC2818" ${LIBSHOUT_HEADER}
HAVE_SHOUT_TLS_RFC2818)
CHECK_CXX_SYMBOL_EXISTS("SHOUT_TLS_RFC2817" ${LIBSHOUT_HEADER}
HAVE_SHOUT_TLS_RFC2817)
CHECK_CXX_SYMBOL_EXISTS("SHOUT_TLS_DISABLED" ${LIBSHOUT_HEADER}
HAVE_SHOUT_TLS_DISABLED)
CHECK_CXX_SYMBOL_EXISTS("shout_set_tls" ${LIBSHOUT_HEADER}
HAVE_SHOUT_SET_TLS)
CHECK_CXX_SYMBOL_EXISTS("shout_set_content_format" ${LIBSHOUT_HEADER}
LIBSHOUT_HAS_CONTENT_FORMAT)
if(HAVE_SHOUT_TLS_AUTO AND HAVE_SHOUT_TLS_AUTO_NO_PLAIN AND
HAVE_SHOUT_TLS_RFC2818 AND HAVE_SHOUT_TLS_RFC2817 AND
HAVE_SHOUT_TLS_DISABLED AND HAVE_SHOUT_SET_TLS)
set(LIBSHOUT_HAS_TLS TRUE)
else()
set(LIBSHOUT_HAS_TLS FALSE)
endif()
# check for shout_set_metadata_utf8() - introduced in libshout v2.4.6
CHECK_CXX_SYMBOL_EXISTS("shout_set_metadata_utf8" ${LIBSHOUT_HEADER}
HAVE_SHOUT_SET_METADATA_UTF8)
if(HAVE_SHOUT_SET_METADATA_UTF8)
set(SHOUT_SET_METADATA "shout_set_metadata_utf8")
else()
set(SHOUT_SET_METADATA "shout_set_metadata")
endif()
set(CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES_SAVE})
set(CMAKE_REQUIRED_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES_SAVE})
set(CMAKE_REQUIRED_LINK_OPTIONS ${CMAKE_REQUIRED_LINK_OPTIONS_SAVE})
option(NFM "Enable support for narrow FM channels" OFF)
set(PLATFORM "native" CACHE STRING "Optimize the build for the given hardware platform")
option(RTLSDR "Enable RTL-SDR support" ON)
set(WITH_RTLSDR FALSE)
option(MIRISDR "Enable Mirics support" ON)
set(WITH_MIRISDR FALSE)
option(SOAPYSDR "Enable SoapySDR support" ON)
set(WITH_SOAPYSDR FALSE)
option(PULSEAUDIO "Enable PulseAudio support" ON)
set(WITH_PULSEAUDIO FALSE)
option(PROFILING "Enable profiling with gperftools")
set(WITH_PROFILING FALSE)
if(RTLSDR)
find_package(RTLSDR)
if(RTLSDR_FOUND)
list(APPEND rtl_airband_extra_sources input-rtlsdr.cpp)
list(APPEND rtl_airband_extra_libs ${RTLSDR_LIBRARIES})
list(APPEND rtl_airband_include_dirs ${RTLSDR_INCLUDE_DIRS})
list(APPEND link_dirs ${RTLSDR_LIBRARY_DIRS})
set(WITH_RTLSDR TRUE)
endif()
endif()
if(MIRISDR)
find_package(MiriSDR)
if(MIRISDR_FOUND)
set(WITH_MIRISDR TRUE)
list(APPEND rtl_airband_extra_sources input-mirisdr.cpp)
list(APPEND rtl_airband_extra_libs ${MIRISDR_LIBRARIES})
list(APPEND rtl_airband_include_dirs ${MIRISDR_INCLUDE_DIRS})
list(APPEND link_dirs ${MIRISDR_LIBRARY_DIRS})
endif()
endif()
if(SOAPYSDR)
message(STATUS "Checking for SoapySDR")
find_package(SoapySDR NO_MODULE)
if(SoapySDR_FOUND)
list(APPEND rtl_airband_extra_sources input-soapysdr.cpp)
message(STATUS " SoapySDR found, ${SoapySDR_INCLUDE_DIRS}, ${SoapySDR_LIBRARIES}")
list(APPEND rtl_airband_extra_libs ${SoapySDR_LIBRARIES})
list(APPEND rtl_airband_include_dirs ${SoapySDR_INCLUDE_DIRS})
set(WITH_SOAPYSDR TRUE)
else()
message(STATUS " SoapySDR not found")
endif()
endif()
if(PULSEAUDIO)
pkg_check_modules(PULSEAUDIO libpulse)
if(PULSEAUDIO_FOUND)
list(APPEND rtl_airband_extra_sources pulse.cpp)
list(APPEND rtl_airband_extra_libs ${PULSEAUDIO_LIBRARIES})
list(APPEND rtl_airband_include_dirs ${PULSEAUDIO_INCLUDE_DIRS})
list(APPEND link_dirs ${PULSEAUDIO_LIBRARY_DIRS})
set(WITH_PULSEAUDIO TRUE)
endif()
endif()
if(PROFILING)
pkg_check_modules(PROFILING libprofiler)
if(PROFILING_FOUND)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g")
list(APPEND rtl_airband_extra_libs ${PROFILING_LIBRARIES})
list(APPEND rtl_airband_include_dirs ${PROFILING_INCLUDE_DIRS})
list(APPEND link_dirs ${PROFILING_LIBRARY_DIRS})
set(WITH_PROFILING TRUE)
endif()
endif()
option(BCM_VC "Enable Broadcom Videocore 3 support" OFF)
set(WITH_BCM_VC FALSE)
# error out on depricated PLATFORM values
if(PLATFORM STREQUAL "rpiv1" OR PLATFORM STREQUAL "armv7-generic" OR PLATFORM STREQUAL "armv8-generic")
message(FATAL_ERROR "platform '${PLATFORM}' has been deprecated, see https://github.com/rtl-airband/RTLSDR-Airband/discussions/447")
# rpiv2 - Raspberry Pi 2 or Raspberry Pi 3 using Broadcom VideoCore IV GPU for FFT
# NOTE: use 'native' to not use the GPU for FFT
elseif(PLATFORM STREQUAL "rpiv2")
set(BCM_VC ON)
add_compile_options(-march=armv7-a -mfpu=neon-vfpv4 -mfloat-abi=hard)
enable_language(ASM)
list(APPEND rtl_airband_extra_sources rtl_airband_neon.s)
# native - let the complier optimize to run on local hardware (default)
elseif(PLATFORM STREQUAL "native")
CHECK_CXX_COMPILER_FLAG(-march=native CXX_HAS_MARCH_NATIVE)
if(CXX_HAS_MARCH_NATIVE)
add_compile_options(-march=native)
else()
message(FATAL_ERROR "Cannot build with PLATFORM=native: the compiler does not support -march=native option")
endif()
# generic - dont add any hardware related flags, used to build a "portable" binary
elseif(PLATFORM STREQUAL "generic")
# NO-OP
# error out on unrecongnnized PLATFORM value
else()
message(FATAL_ERROR "Unknown platform '${PLATFORM}'. Valid options are: rpiv2, native, and generic")
endif()
# Try using VC GPU if enabled. Fallback to fftw3f if disabled or if VC lib not found
if(BCM_VC)
find_package(BCM_VC)
if(BCM_VC_FOUND)
add_subdirectory(hello_fft)
list(APPEND rtl_airband_obj_files $)
list(APPEND rtl_airband_extra_libs ${BCM_VC_LIBRARIES})
set(WITH_BCM_VC TRUE)
endif()
endif()
if(NOT BCM_VC_FOUND)
pkg_check_modules(FFTW3F REQUIRED fftw3f)
if(FFTW3F_FOUND)
list(APPEND rtl_airband_extra_libs ${FFTW3F_LIBRARIES})
list(APPEND rtl_airband_include_dirs ${FFTW3F_INCLUDE_DIRS})
list(APPEND link_dirs ${FFTW3F_LIBRARY_DIRS})
endif()
endif()
if(CMAKE_SYSTEM_NAME STREQUAL "FreeBSD")
list(APPEND rtl_airband_extra_libs c++)
endif()
if(BUILD_UNITTESTS)
set(BUILD_UNITTESTS TRUE)
else()
set(BUILD_UNITTESTS FALSE)
endif()
message(STATUS "RTLSDR-Airband configuration summary:\n")
message(STATUS "- Version string:\t\t${RTL_AIRBAND_VERSION}")
message(STATUS "- Build type:\t\t${CMAKE_BUILD_TYPE}")
message(STATUS "- Operating system:\t\t${CMAKE_SYSTEM_NAME}")
message(STATUS "- SDR drivers:")
message(STATUS " - librtlsdr:\t\trequested: ${RTLSDR}, enabled: ${WITH_RTLSDR}")
message(STATUS " - mirisdr:\t\t\trequested: ${MIRISDR}, enabled: ${WITH_MIRISDR}")
message(STATUS " - soapysdr:\t\trequested: ${SOAPYSDR}, enabled: ${WITH_SOAPYSDR}")
message(STATUS "- Other options:")
message(STATUS " - Platform:\t\t${PLATFORM}")
message(STATUS " - Build Unit Tests:\t${BUILD_UNITTESTS}")
message(STATUS " - Broadcom VideoCore GPU:\t${WITH_BCM_VC}")
message(STATUS " - NFM support:\t\t${NFM}")
message(STATUS " - PulseAudio:\t\trequested: ${PULSEAUDIO}, enabled: ${WITH_PULSEAUDIO}")
message(STATUS " - Profiling:\t\trequested: ${PROFILING}, enabled: ${WITH_PROFILING}")
message(STATUS " - Icecast TLS support:\t${LIBSHOUT_HAS_TLS}")
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/config.h)
message(FATAL_ERROR "${CMAKE_CURRENT_SOURCE_DIR}/config.h nolonger used, delete before continuing")
endif()
configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/config.h.in"
"${CMAKE_CURRENT_BINARY_DIR}/config.h"
@ONLY
)
add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/version.cpp
${CMAKE_CURRENT_BINARY_DIR}/_version.cpp
COMMAND ${CMAKE_COMMAND} -DRTL_AIRBAND_VERSION=${RTL_AIRBAND_VERSION} -P
${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules/version.cmake
)
add_library (rtl_airband_base OBJECT
config.cpp
input-common.cpp
input-file.cpp
input-helpers.cpp
mixer.cpp
output.cpp
rtl_airband.cpp
squelch.cpp
ctcss.cpp
util.cpp
udp_stream.cpp
logging.cpp
filters.cpp
helper_functions.cpp
${CMAKE_CURRENT_BINARY_DIR}/version.cpp
${rtl_airband_extra_sources}
)
target_include_directories (rtl_airband_base PUBLIC
${CMAKE_CURRENT_BINARY_DIR} # needed for config.h
${rtl_airband_include_dirs}
)
# can't do this per target with cmake <3.13
link_directories(${link_dirs})
list(APPEND rtl_airband_obj_files $)
add_executable (rtl_airband ${rtl_airband_obj_files})
set_property(TARGET rtl_airband PROPERTY ENABLE_EXPORTS 1)
# add include for config.h
target_include_directories (rtl_airband PUBLIC
${CMAKE_CURRENT_BINARY_DIR}
)
target_link_libraries (rtl_airband
dl
m
pthread
${rtl_airband_extra_libs}
)
install(TARGETS rtl_airband
RUNTIME DESTINATION bin
)
# TODO: install config if not present
if(BUILD_UNITTESTS)
cmake_minimum_required(VERSION 3.1...3.18 FATAL_ERROR)
# GoogleTest requires at least C++14
set(CMAKE_CXX_STANDARD 14)
# set timestamps of URL extracted files to the extraction time
if(POLICY CMP0135)
cmake_policy(SET CMP0135 NEW)
endif()
# pull in GoogleTest as a dependency
include(FetchContent)
FetchContent_Declare(
googletest
URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip
)
FetchContent_MakeAvailable(googletest)
enable_testing()
file(GLOB_RECURSE TEST_FILES "test_*.cpp")
list(APPEND TEST_FILES
squelch.cpp
logging.cpp
filters.cpp
ctcss.cpp
generate_signal.cpp
helper_functions.cpp
)
add_executable(
unittests
${TEST_FILES}
)
target_link_libraries(
unittests
GTest::gtest_main
dl
${rtl_airband_extra_libs}
)
# add include for config.h
target_include_directories (unittests PUBLIC
${CMAKE_CURRENT_BINARY_DIR}
)
include(GoogleTest)
gtest_discover_tests(unittests)
endif()
================================================
FILE: src/CMakeModules/FindBCM_VC.cmake
================================================
if(NOT BCM_VC_FOUND)
set(BCM_VC_PATH "/opt/vc" CACHE STRING "List of paths to search for Broadcom VideoCore library")
find_path(BCM_VC_INCLUDE_DIR bcm_host.h PATHS ${BCM_VC_PATH}/include)
find_library(BCM_VC_LIBRARY NAMES bcm_host PATHS ${BCM_VC_PATH}/lib)
set(BCM_VC_LIBRARIES ${BCM_VC_LIBRARY} )
set(BCM_VC_INCLUDE_DIRS ${BCM_VC_INCLUDE_DIR} )
include(FindPackageHandleStandardArgs)
# handle the QUIETLY and REQUIRED arguments and set BCM_VC_FOUND to TRUE
# if all listed variables are TRUE
find_package_handle_standard_args(BCM_VC DEFAULT_MSG
BCM_VC_LIBRARY BCM_VC_INCLUDE_DIR)
mark_as_advanced(BCM_VC_INCLUDE_DIR BCM_VC_LIBRARY)
endif()
================================================
FILE: src/CMakeModules/FindLame.cmake
================================================
FIND_PATH(LAME_INCLUDE_DIR lame/lame.h)
FIND_LIBRARY(LAME_LIBRARIES NAMES mp3lame)
IF(LAME_INCLUDE_DIR AND LAME_LIBRARIES)
SET(LAME_FOUND TRUE)
ENDIF(LAME_INCLUDE_DIR AND LAME_LIBRARIES)
IF(LAME_FOUND)
IF (NOT Lame_FIND_QUIETLY)
MESSAGE(STATUS "Found lame includes: ${LAME_INCLUDE_DIR}/lame/lame.h")
MESSAGE(STATUS "Found lame library: ${LAME_LIBRARIES}")
ENDIF (NOT Lame_FIND_QUIETLY)
ELSE(LAME_FOUND)
IF (Lame_FIND_REQUIRED)
MESSAGE(FATAL_ERROR "lame library required but not found")
ENDIF (Lame_FIND_REQUIRED)
ENDIF(LAME_FOUND)
================================================
FILE: src/CMakeModules/FindMiriSDR.cmake
================================================
# - Try to find mirisdr - the hardware driver for Mirics chip in the dvb receivers
# Once done this will define
# MIRISDR_FOUND - System has mirisdr
# MIRISDR_LIBRARIES - The mirisdr libraries
# MIRISDR_INCLUDE_DIRS - The mirisdr include directories
# MIRISDR_LIB_DIRS - The mirisdr library directories
if(NOT MIRISDR_FOUND)
find_package(PkgConfig)
pkg_check_modules (MIRISDR_PKG libmirisdr)
set(MIRISDR_DEFINITIONS ${PC_MIRISDR_CFLAGS_OTHER})
find_path(MIRISDR_INCLUDE_DIR
NAMES mirisdr.h
HINTS ${MIRISDR_PKG_INCLUDE_DIRS} $ENV{MIRISDR_DIR}/include
PATHS /usr/local/include /usr/include /opt/include /opt/local/include)
find_library(MIRISDR_LIBRARY
NAMES mirisdr
HINTS ${MIRISDR_PKG_LIBRARY_DIRS} $ENV{MIRISDR_DIR}/include
PATHS /usr/local/lib /usr/lib /opt/lib /opt/local/lib)
set(MIRISDR_LIBRARIES ${MIRISDR_LIBRARY} )
set(MIRISDR_INCLUDE_DIRS ${MIRISDR_INCLUDE_DIR} )
include(FindPackageHandleStandardArgs)
# handle the QUIETLY and REQUIRED arguments and set LibMIRISDR_FOUND to TRUE
# if all listed variables are TRUE
find_package_handle_standard_args(MiriSDR DEFAULT_MSG
MIRISDR_LIBRARY MIRISDR_INCLUDE_DIR)
mark_as_advanced(MIRISDR_INCLUDE_DIR MIRISDR_LIBRARY)
endif(NOT MIRISDR_FOUND)
================================================
FILE: src/CMakeModules/FindRTLSDR.cmake
================================================
#
# Copyright 2012-2013 The Iris Project Developers. See the
# COPYRIGHT file at the top-level directory of this distribution
# and at http://www.softwareradiosystems.com/iris/copyright.html.
#
# This file is part of the Iris Project.
#
# Iris is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as
# published by the Free Software Foundation, either version 3 of
# the License, or (at your option) any later version.
#
# Iris is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# A copy of the GNU Lesser General Public License can be found in
# the LICENSE file in the top-level directory of this distribution
# and at http://www.gnu.org/licenses/.
#
# - Try to find rtlsdr - the hardware driver for the realtek chip in the dvb receivers
# Once done this will define
# RTLSDR_FOUND - System has rtlsdr
# RTLSDR_LIBRARIES - The rtlsdr libraries
# RTLSDR_INCLUDE_DIRS - The rtlsdr include directories
# RTLSDR_LIB_DIRS - The rtlsdr library directories
if(NOT RTLSDR_FOUND)
find_package(PkgConfig)
pkg_check_modules (RTLSDR_PKG librtlsdr)
set(RTLSDR_DEFINITIONS ${PC_RTLSDR_CFLAGS_OTHER})
find_path(RTLSDR_INCLUDE_DIR
NAMES rtl-sdr.h
HINTS ${RTLSDR_PKG_INCLUDE_DIRS} $ENV{RTLSDR_DIR}/include
PATHS /usr/local/include /usr/include /opt/include /opt/local/include)
find_library(RTLSDR_LIBRARY
NAMES rtlsdr
HINTS ${RTLSDR_PKG_LIBRARY_DIRS} $ENV{RTLSDR_DIR}/include
PATHS /usr/local/lib /usr/lib /opt/lib /opt/local/lib)
set(RTLSDR_LIBRARIES ${RTLSDR_LIBRARY} )
set(RTLSDR_INCLUDE_DIRS ${RTLSDR_INCLUDE_DIR} )
include(FindPackageHandleStandardArgs)
# handle the QUIETLY and REQUIRED arguments and set LibRTLSDR_FOUND to TRUE
# if all listed variables are TRUE
find_package_handle_standard_args(RTLSDR DEFAULT_MSG
RTLSDR_LIBRARY RTLSDR_INCLUDE_DIR)
mark_as_advanced(RTLSDR_INCLUDE_DIR RTLSDR_LIBRARY)
endif(NOT RTLSDR_FOUND)
================================================
FILE: src/CMakeModules/version.cmake
================================================
set (VERSION "char const *RTL_AIRBAND_VERSION=\"${RTL_AIRBAND_VERSION}\";\n")
if(EXISTS ${CMAKE_CURRENT_BINARY_DIR}/version.cpp)
file(READ ${CMAKE_CURRENT_BINARY_DIR}/version.cpp VERSION_)
else()
set(VERSION_ "")
endif()
if (NOT "${VERSION}" STREQUAL "${VERSION_}")
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/version.cpp "${VERSION}")
endif()
================================================
FILE: src/config.cpp
================================================
/*
* config.cpp
* Configuration parsing routines
*
* Copyright (c) 2015-2021 Tomasz Lemiech
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see .
*/
#include
#include // uint32_t
#include
#include
#include
#include
#include
#include
#include "input-common.h" // input_t
#include "rtl_airband.h"
using namespace std;
static int parse_outputs(libconfig::Setting& outs, channel_t* channel, int i, int j, bool parsing_mixers) {
int oo = 0;
for (int o = 0; o < channel->output_count; o++) {
channel->outputs[oo].has_mp3_output = false;
channel->outputs[oo].lame = NULL;
channel->outputs[oo].lamebuf = NULL;
if (outs[o].exists("disable") && (bool)outs[o]["disable"] == true) {
continue;
}
if (!strncmp(outs[o]["type"], "icecast", 7)) {
channel->outputs[oo].data = XCALLOC(1, sizeof(struct icecast_data));
channel->outputs[oo].type = O_ICECAST;
icecast_data* idata = (icecast_data*)(channel->outputs[oo].data);
idata->hostname = strdup(outs[o]["server"]);
idata->port = outs[o]["port"];
idata->mountpoint = strdup(outs[o]["mountpoint"]);
idata->username = strdup(outs[o]["username"]);
idata->password = strdup(outs[o]["password"]);
if (outs[o].exists("name"))
idata->name = strdup(outs[o]["name"]);
if (outs[o].exists("genre"))
idata->genre = strdup(outs[o]["genre"]);
if (outs[o].exists("description"))
idata->description = strdup(outs[o]["description"]);
if (outs[o].exists("send_scan_freq_tags"))
idata->send_scan_freq_tags = (bool)outs[o]["send_scan_freq_tags"];
else
idata->send_scan_freq_tags = 0;
#ifdef LIBSHOUT_HAS_TLS
if (outs[o].exists("tls")) {
if (outs[o]["tls"].getType() == libconfig::Setting::TypeString) {
if (!strcmp(outs[o]["tls"], "auto")) {
idata->tls_mode = SHOUT_TLS_AUTO;
} else if (!strcmp(outs[o]["tls"], "auto_no_plain")) {
idata->tls_mode = SHOUT_TLS_AUTO_NO_PLAIN;
} else if (!strcmp(outs[o]["tls"], "transport")) {
idata->tls_mode = SHOUT_TLS_RFC2818;
} else if (!strcmp(outs[o]["tls"], "upgrade")) {
idata->tls_mode = SHOUT_TLS_RFC2817;
} else if (!strcmp(outs[o]["tls"], "disabled")) {
idata->tls_mode = SHOUT_TLS_DISABLED;
} else {
if (parsing_mixers) {
cerr << "Configuration error: mixers.[" << i << "] outputs.[" << o << "]: ";
} else {
cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "] outputs.[" << o << "]: ";
}
cerr << "invalid value for tls; must be one of: auto, auto_no_plain, transport, upgrade, disabled\n";
error();
}
} else {
if (parsing_mixers) {
cerr << "Configuration error: mixers.[" << i << "] outputs.[" << o << "]: ";
} else {
cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "] outputs.[" << o << "]: ";
}
cerr << "tls value must be a string\n";
error();
}
} else {
idata->tls_mode = SHOUT_TLS_DISABLED;
}
#endif /* LIBSHOUT_HAS_TLS */
channel->outputs[oo].has_mp3_output = true;
} else if (!strncmp(outs[o]["type"], "file", 4)) {
channel->outputs[oo].data = XCALLOC(1, sizeof(struct file_data));
channel->outputs[oo].type = O_FILE;
file_data* fdata = (file_data*)(channel->outputs[oo].data);
fdata->type = O_FILE;
if (!outs[o].exists("directory") || !outs[o].exists("filename_template")) {
if (parsing_mixers) {
cerr << "Configuration error: mixers.[" << i << "] outputs.[" << o << "]: ";
} else {
cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "] outputs.[" << o << "]: ";
}
cerr << "both directory and filename_template required for file\n";
error();
}
fdata->basedir = outs[o]["directory"].c_str();
fdata->basename = outs[o]["filename_template"].c_str();
fdata->dated_subdirectories = outs[o].exists("dated_subdirectories") ? (bool)(outs[o]["dated_subdirectories"]) : false;
fdata->suffix = ".mp3";
fdata->continuous = outs[o].exists("continuous") ? (bool)(outs[o]["continuous"]) : false;
fdata->append = (!outs[o].exists("append")) || (bool)(outs[o]["append"]);
fdata->split_on_transmission = outs[o].exists("split_on_transmission") ? (bool)(outs[o]["split_on_transmission"]) : false;
fdata->include_freq = outs[o].exists("include_freq") ? (bool)(outs[o]["include_freq"]) : false;
channel->outputs[oo].has_mp3_output = true;
if (fdata->split_on_transmission) {
if (parsing_mixers) {
cerr << "Configuration error: mixers.[" << i << "] outputs.[" << o << "]: split_on_transmission is not allowed for mixers\n";
error();
}
if (fdata->continuous) {
cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "] outputs.[" << o << "]: can't have both continuous and split_on_transmission\n";
error();
}
}
} else if (!strncmp(outs[o]["type"], "rawfile", 7)) {
if (parsing_mixers) { // rawfile outputs not allowed for mixers
cerr << "Configuration error: mixers.[" << i << "] outputs[" << o << "]: rawfile output is not allowed for mixers\n";
error();
}
channel->outputs[oo].data = XCALLOC(1, sizeof(struct file_data));
channel->outputs[oo].type = O_RAWFILE;
file_data* fdata = (file_data*)(channel->outputs[oo].data);
fdata->type = O_RAWFILE;
if (!outs[o].exists("directory") || !outs[o].exists("filename_template")) {
cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "] outputs.[" << o << "]: both directory and filename_template required for file\n";
error();
}
fdata->basedir = outs[o]["directory"].c_str();
fdata->basename = outs[o]["filename_template"].c_str();
fdata->dated_subdirectories = outs[o].exists("dated_subdirectories") ? (bool)(outs[o]["dated_subdirectories"]) : false;
fdata->suffix = ".cf32";
fdata->continuous = outs[o].exists("continuous") ? (bool)(outs[o]["continuous"]) : false;
fdata->append = (!outs[o].exists("append")) || (bool)(outs[o]["append"]);
fdata->split_on_transmission = outs[o].exists("split_on_transmission") ? (bool)(outs[o]["split_on_transmission"]) : false;
fdata->include_freq = outs[o].exists("include_freq") ? (bool)(outs[o]["include_freq"]) : false;
channel->needs_raw_iq = channel->has_iq_outputs = 1;
if (fdata->continuous && fdata->split_on_transmission) {
cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "] outputs.[" << o << "]: can't have both continuous and split_on_transmission\n";
error();
}
} else if (!strncmp(outs[o]["type"], "mixer", 5)) {
if (parsing_mixers) { // mixer outputs not allowed for mixers
cerr << "Configuration error: mixers.[" << i << "] outputs.[" << o << "]: mixer output is not allowed for mixers\n";
error();
}
channel->outputs[oo].data = XCALLOC(1, sizeof(struct mixer_data));
channel->outputs[oo].type = O_MIXER;
mixer_data* mdata = (mixer_data*)(channel->outputs[oo].data);
const char* name = (const char*)outs[o]["name"];
if ((mdata->mixer = getmixerbyname(name)) == NULL) {
cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "] outputs.[" << o << "]: unknown mixer \"" << name << "\"\n";
error();
}
float ampfactor = outs[o].exists("ampfactor") ? (float)outs[o]["ampfactor"] : 1.0f;
float balance = outs[o].exists("balance") ? (float)outs[o]["balance"] : 0.0f;
if (balance < -1.0f || balance > 1.0f) {
cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "] outputs.[" << o << "]: balance out of allowed range <-1.0;1.0>\n";
error();
}
if ((mdata->input = mixer_connect_input(mdata->mixer, ampfactor, balance)) < 0) {
cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "] outputs.[" << o
<< "]: "
"could not connect to mixer "
<< name << ": " << mixer_get_error() << "\n";
error();
}
debug_print("dev[%d].chan[%d].out[%d] connected to mixer %s as input %d (ampfactor=%.1f balance=%.1f)\n", i, j, o, name, mdata->input, ampfactor, balance);
} else if (!strncmp(outs[o]["type"], "udp_stream", 6)) {
channel->outputs[oo].data = XCALLOC(1, sizeof(struct udp_stream_data));
channel->outputs[oo].type = O_UDP_STREAM;
udp_stream_data* sdata = (udp_stream_data*)channel->outputs[oo].data;
sdata->continuous = outs[o].exists("continuous") ? (bool)(outs[o]["continuous"]) : false;
if (outs[o].exists("dest_address")) {
sdata->dest_address = strdup(outs[o]["dest_address"]);
} else {
if (parsing_mixers) {
cerr << "Configuration error: mixers.[" << i << "] outputs.[" << o << "]: ";
} else {
cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "] outputs.[" << o << "]: ";
}
cerr << "missing dest_address\n";
error();
}
if (outs[o].exists("dest_port")) {
if (outs[o]["dest_port"].getType() == libconfig::Setting::TypeInt) {
char buffer[12];
sprintf(buffer, "%d", (int)outs[o]["dest_port"]);
sdata->dest_port = strdup(buffer);
} else {
sdata->dest_port = strdup(outs[o]["dest_port"]);
}
} else {
if (parsing_mixers) {
cerr << "Configuration error: mixers.[" << i << "] outputs.[" << o << "]: ";
} else {
cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "] outputs.[" << o << "]: ";
}
cerr << "missing dest_port\n";
error();
}
#ifdef WITH_PULSEAUDIO
} else if (!strncmp(outs[o]["type"], "pulse", 5)) {
channel->outputs[oo].data = XCALLOC(1, sizeof(struct pulse_data));
channel->outputs[oo].type = O_PULSE;
pulse_data* pdata = (pulse_data*)(channel->outputs[oo].data);
pdata->continuous = outs[o].exists("continuous") ? (bool)(outs[o]["continuous"]) : false;
pdata->server = outs[o].exists("server") ? strdup(outs[o]["server"]) : NULL;
pdata->name = outs[o].exists("name") ? strdup(outs[o]["name"]) : "rtl_airband";
pdata->sink = outs[o].exists("sink") ? strdup(outs[o]["sink"]) : NULL;
if (outs[o].exists("stream_name")) {
pdata->stream_name = strdup(outs[o]["stream_name"]);
} else {
if (parsing_mixers) {
cerr << "Configuration error: mixers.[" << i << "] outputs.[" << o << "]: PulseAudio outputs of mixers must have stream_name defined\n";
error();
}
char buf[1024];
snprintf(buf, sizeof(buf), "%.3f MHz", (float)channel->freqlist[0].frequency / 1000000.0f);
pdata->stream_name = strdup(buf);
}
#endif /* WITH_PULSEAUDIO */
} else {
if (parsing_mixers) {
cerr << "Configuration error: mixers.[" << i << "] outputs.[" << o << "]: ";
} else {
cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "] outputs.[" << o << "]: ";
}
cerr << "unknown output type\n";
error();
}
channel->outputs[oo].enabled = true;
channel->outputs[oo].active = false;
oo++;
}
return oo;
}
static struct freq_t* mk_freqlist(int n) {
if (n < 1) {
cerr << "mk_freqlist: invalid list length " << n << "\n";
error();
}
struct freq_t* fl = (struct freq_t*)XCALLOC(n, sizeof(struct freq_t));
for (int i = 0; i < n; i++) {
fl[i].frequency = 0;
fl[i].label = NULL;
fl[i].agcavgfast = 0.5f;
fl[i].ampfactor = 1.0f;
fl[i].squelch = Squelch();
fl[i].active_counter = 0;
fl[i].modulation = MOD_AM;
}
return fl;
}
static void warn_if_freq_not_in_range(int devidx, int chanidx, int freq, int centerfreq, int sample_rate) {
static const float soft_bw_threshold = 0.9f;
float bw_limit = (float)sample_rate / 2.f * soft_bw_threshold;
if ((float)abs(freq - centerfreq) >= bw_limit) {
log(LOG_WARNING, "Warning: dev[%d].channel[%d]: frequency %.3f MHz is outside of SDR operating bandwidth (%.3f-%.3f MHz)\n", devidx, chanidx, (double)freq / 1e6,
(double)(centerfreq - bw_limit) / 1e6, (double)(centerfreq + bw_limit) / 1e6);
}
}
static int parse_anynum2int(libconfig::Setting& f) {
int ret = 0;
if (f.getType() == libconfig::Setting::TypeInt) {
ret = (int)f;
} else if (f.getType() == libconfig::Setting::TypeFloat) {
ret = (int)((double)f * 1e6);
} else if (f.getType() == libconfig::Setting::TypeString) {
char* s = strdup((char const*)f);
ret = (int)atofs(s);
free(s);
}
return ret;
}
static int parse_channels(libconfig::Setting& chans, device_t* dev, int i) {
int jj = 0;
for (int j = 0; j < chans.getLength(); j++) {
if (chans[j].exists("disable") && (bool)chans[j]["disable"] == true) {
continue;
}
channel_t* channel = dev->channels + jj;
for (int k = 0; k < AGC_EXTRA; k++) {
channel->wavein[k] = 20;
channel->waveout[k] = 0.5;
}
channel->axcindicate = NO_SIGNAL;
channel->mode = MM_MONO;
channel->freq_count = 1;
channel->freq_idx = 0;
channel->highpass = chans[j].exists("highpass") ? (int)chans[j]["highpass"] : 100;
channel->lowpass = chans[j].exists("lowpass") ? (int)chans[j]["lowpass"] : 2500;
#ifdef NFM
channel->pr = 0;
channel->pj = 0;
channel->prev_waveout = 0.5;
channel->alpha = dev->alpha;
#endif /* NFM */
// Make sure lowpass / highpass aren't flipped.
// If lowpass is enabled (greater than zero) it must be larger than highpass
if (channel->lowpass > 0 && channel->lowpass < channel->highpass) {
cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "]: lowpass (" << channel->lowpass << ") must be greater than or equal to highpass (" << channel->highpass << ")\n";
error();
}
modulations channel_modulation = MOD_AM;
if (chans[j].exists("modulation")) {
#ifdef NFM
if (strncmp(chans[j]["modulation"], "nfm", 3) == 0) {
channel_modulation = MOD_NFM;
} else
#endif /* NFM */
if (strncmp(chans[j]["modulation"], "am", 2) != 0) {
cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "]: unknown modulation\n";
error();
}
}
channel->afc = chans[j].exists("afc") ? (unsigned char)(unsigned int)chans[j]["afc"] : 0;
if (dev->mode == R_MULTICHANNEL) {
channel->freqlist = mk_freqlist(1);
channel->freqlist[0].frequency = parse_anynum2int(chans[j]["freq"]);
warn_if_freq_not_in_range(i, j, channel->freqlist[0].frequency, dev->input->centerfreq, dev->input->sample_rate);
if (chans[j].exists("label")) {
channel->freqlist[0].label = strdup(chans[j]["label"]);
}
channel->freqlist[0].modulation = channel_modulation;
} else { /* R_SCAN */
channel->freq_count = chans[j]["freqs"].getLength();
if (channel->freq_count < 1) {
cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "]: freqs should be a list with at least one element\n";
error();
}
channel->freqlist = mk_freqlist(channel->freq_count);
if (chans[j].exists("labels") && chans[j]["labels"].getLength() < channel->freq_count) {
cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "]: labels should be a list with at least " << channel->freq_count << " elements\n";
error();
}
if (chans[j].exists("squelch_threshold") && libconfig::Setting::TypeList == chans[j]["squelch_threshold"].getType() && chans[j]["squelch_threshold"].getLength() < channel->freq_count) {
cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "]: squelch_threshold should be an int or a list of ints with at least " << channel->freq_count
<< " elements\n";
error();
}
if (chans[j].exists("squelch_snr_threshold") && libconfig::Setting::TypeList == chans[j]["squelch_snr_threshold"].getType() &&
chans[j]["squelch_snr_threshold"].getLength() < channel->freq_count) {
cerr << "Configuration error: devices.[" << i << "] channels.[" << j
<< "]: squelch_snr_threshold should be an int, a float or a list of "
"ints or floats with at least "
<< channel->freq_count << " elements\n";
error();
}
if (chans[j].exists("notch") && libconfig::Setting::TypeList == chans[j]["notch"].getType() && chans[j]["notch"].getLength() < channel->freq_count) {
cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "]: notch should be an float or a list of floats with at least " << channel->freq_count << " elements\n";
error();
}
if (chans[j].exists("notch_q") && libconfig::Setting::TypeList == chans[j]["notch_q"].getType() && chans[j]["notch_q"].getLength() < channel->freq_count) {
cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "]: notch_q should be a float or a list of floats with at least " << channel->freq_count << " elements\n";
error();
}
if (chans[j].exists("ctcss") && libconfig::Setting::TypeList == chans[j]["ctcss"].getType() && chans[j]["ctcss"].getLength() < channel->freq_count) {
cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "]: ctcss should be an float or a list of floats with at least " << channel->freq_count << " elements\n";
error();
}
if (chans[j].exists("modulation") && chans[j].exists("modulations")) {
cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "]: can't set both modulation and modulations\n";
error();
}
if (chans[j].exists("modulations") && chans[j]["modulations"].getLength() < channel->freq_count) {
cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "]: modulations should be a list with at least " << channel->freq_count << " elements\n";
error();
}
for (int f = 0; f < channel->freq_count; f++) {
channel->freqlist[f].frequency = parse_anynum2int((chans[j]["freqs"][f]));
if (chans[j].exists("labels")) {
channel->freqlist[f].label = strdup(chans[j]["labels"][f]);
}
if (chans[j].exists("modulations")) {
#ifdef NFM
if (strncmp(chans[j]["modulations"][f], "nfm", 3) == 0) {
channel->freqlist[f].modulation = MOD_NFM;
} else
#endif /* NFM */
if (strncmp(chans[j]["modulations"][f], "am", 2) == 0) {
channel->freqlist[f].modulation = MOD_AM;
} else {
cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "] modulations.[" << f << "]: unknown modulation\n";
error();
}
} else {
channel->freqlist[f].modulation = channel_modulation;
}
}
// Set initial frequency for scanning
// We tune 20 FFT bins higher to avoid DC spike
dev->input->centerfreq = channel->freqlist[0].frequency + 20 * (double)(dev->input->sample_rate / fft_size);
}
if (chans[j].exists("squelch")) {
cerr << "Warning: 'squelch' no longer supported and will be ignored, use 'squelch_threshold' or 'squelch_snr_threshold' instead\n";
}
if (chans[j].exists("squelch_threshold") && chans[j].exists("squelch_snr_threshold")) {
cerr << "Warning: Both 'squelch_threshold' and 'squelch_snr_threshold' are set and may conflict\n";
}
if (chans[j].exists("squelch_threshold")) {
// Value is dBFS, zero disables manual threshold (ie use auto squelch), negative is valid, positive is invalid
if (libconfig::Setting::TypeList == chans[j]["squelch_threshold"].getType()) {
// New-style array of per-frequency squelch settings
for (int f = 0; f < channel->freq_count; f++) {
int threshold_dBFS = (int)chans[j]["squelch_threshold"][f];
if (threshold_dBFS > 0) {
cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "]: squelch_threshold must be less than or equal to 0\n";
error();
} else if (threshold_dBFS == 0) {
channel->freqlist[f].squelch.set_squelch_level_threshold(0);
} else {
channel->freqlist[f].squelch.set_squelch_level_threshold(dBFS_to_level(threshold_dBFS));
}
}
} else if (libconfig::Setting::TypeInt == chans[j]["squelch_threshold"].getType()) {
// Legacy (single squelch for all frequencies)
int threshold_dBFS = (int)chans[j]["squelch_threshold"];
float level;
if (threshold_dBFS > 0) {
cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "]: squelch_threshold must be less than or equal to 0\n";
error();
} else if (threshold_dBFS == 0) {
level = 0;
} else {
level = dBFS_to_level(threshold_dBFS);
}
for (int f = 0; f < channel->freq_count; f++) {
channel->freqlist[f].squelch.set_squelch_level_threshold(level);
}
} else {
cerr << "Invalid value for squelch_threshold (should be int or list - use parentheses)\n";
error();
}
}
if (chans[j].exists("squelch_snr_threshold")) {
// Value is SNR in dB, zero disables squelch (ie always open), -1 uses default value, positive is valid, other negative values are invalid
if (libconfig::Setting::TypeList == chans[j]["squelch_snr_threshold"].getType()) {
// New-style array of per-frequency squelch settings
for (int f = 0; f < channel->freq_count; f++) {
float snr = 0.f;
if (libconfig::Setting::TypeFloat == chans[j]["squelch_snr_threshold"][f].getType()) {
snr = (float)chans[j]["squelch_snr_threshold"][f];
} else if (libconfig::Setting::TypeInt == chans[j]["squelch_snr_threshold"][f].getType()) {
snr = (int)chans[j]["squelch_snr_threshold"][f];
} else {
cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "]: squelch_snr_threshold list must be of int or float\n";
error();
}
if (snr == -1.0) {
continue; // "disable" for this channel in list
} else if (snr < 0) {
cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "]: squelch_snr_threshold must be greater than or equal to 0\n";
error();
} else {
channel->freqlist[f].squelch.set_squelch_snr_threshold(snr);
}
}
} else if (libconfig::Setting::TypeFloat == chans[j]["squelch_snr_threshold"].getType() || libconfig::Setting::TypeInt == chans[j]["squelch_snr_threshold"].getType()) {
// Legacy (single squelch for all frequencies)
float snr = (libconfig::Setting::TypeFloat == chans[j]["squelch_snr_threshold"].getType()) ? (float)chans[j]["squelch_snr_threshold"] : (int)chans[j]["squelch_snr_threshold"];
if (snr == -1.0) {
continue; // "disable" so use the default without error message
} else if (snr < 0) {
cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "]: squelch_snr_threshold must be greater than or equal to 0\n";
error();
}
for (int f = 0; f < channel->freq_count; f++) {
channel->freqlist[f].squelch.set_squelch_snr_threshold(snr);
}
} else {
cerr << "Invalid value for squelch_snr_threshold (should be float, int, or list of int/float - use parentheses)\n";
error();
}
}
if (chans[j].exists("notch")) {
static const float default_q = 10.0;
if (chans[j].exists("notch_q") && chans[j]["notch"].getType() != chans[j]["notch_q"].getType()) {
cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "]: notch_q (if set) must be the same type as notch - "
<< "float or a list of floats with at least " << channel->freq_count << " elements\n";
error();
}
if (libconfig::Setting::TypeList == chans[j]["notch"].getType()) {
for (int f = 0; f < channel->freq_count; f++) {
float freq = (float)chans[j]["notch"][f];
float q = chans[j].exists("notch_q") ? (float)chans[j]["notch_q"][f] : default_q;
if (q == 0.0) {
q = default_q;
} else if (q <= 0.0) {
cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "] freq.[" << f << "]: invalid value for notch_q: " << q << " (must be greater than 0.0)\n";
error();
}
if (freq == 0) {
continue; // "disable" for this channel in list
} else if (freq < 0) {
cerr << "devices.[" << i << "] channels.[" << j << "] freq.[" << f << "]: invalid value for notch: " << freq << ", ignoring\n";
} else {
channel->freqlist[f].notch_filter = NotchFilter(freq, WAVE_RATE, q);
}
}
} else if (libconfig::Setting::TypeFloat == chans[j]["notch"].getType()) {
float freq = (float)chans[j]["notch"];
float q = chans[j].exists("notch_q") ? (float)chans[j]["notch_q"] : default_q;
if (q <= 0.0) {
cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "]: invalid value for notch_q: " << q << " (must be greater than 0.0)\n";
error();
}
for (int f = 0; f < channel->freq_count; f++) {
if (freq == 0) {
continue; // "disable" is default so ignore without error message
} else if (freq < 0) {
cerr << "devices.[" << i << "] channels.[" << j << "]: notch value '" << freq << "' invalid, ignoring\n";
} else {
channel->freqlist[f].notch_filter = NotchFilter(freq, WAVE_RATE, q);
}
}
} else {
cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "]: notch should be an float or a list of floats with at least " << channel->freq_count << " elements\n";
error();
}
}
if (chans[j].exists("ctcss")) {
if (libconfig::Setting::TypeList == chans[j]["ctcss"].getType()) {
for (int f = 0; f < channel->freq_count; f++) {
float freq = (float)chans[j]["ctcss"][f];
if (freq == 0) {
continue; // "disable" for this channel in list
} else if (freq < 0) {
cerr << "devices.[" << i << "] channels.[" << j << "] freq.[" << f << "]: invalid value for ctcss: " << freq << ", ignoring\n";
} else {
channel->freqlist[f].squelch.set_ctcss_freq(freq, WAVE_RATE);
}
}
} else if (libconfig::Setting::TypeFloat == chans[j]["ctcss"].getType()) {
float freq = (float)chans[j]["ctcss"];
for (int f = 0; f < channel->freq_count; f++) {
if (freq <= 0) {
cerr << "devices.[" << i << "] channels.[" << j << "]: ctcss value '" << freq << "' invalid, ignoring\n";
} else {
channel->freqlist[f].squelch.set_ctcss_freq(freq, WAVE_RATE);
}
}
} else {
cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "]: ctcss should be an float or a list of floats with at least " << channel->freq_count << " elements\n";
error();
}
}
if (chans[j].exists("bandwidth")) {
channel->needs_raw_iq = 1;
if (libconfig::Setting::TypeList == chans[j]["bandwidth"].getType()) {
for (int f = 0; f < channel->freq_count; f++) {
int bandwidth = parse_anynum2int(chans[j]["bandwidth"][f]);
if (bandwidth == 0) {
continue; // "disable" for this channel in list
} else if (bandwidth < 0) {
cerr << "devices.[" << i << "] channels.[" << j << "] freq.[" << f << "]: bandwidth value '" << bandwidth << "' invalid, ignoring\n";
} else {
channel->freqlist[f].lowpass_filter = LowpassFilter((float)bandwidth / 2, WAVE_RATE);
}
}
} else {
int bandwidth = parse_anynum2int(chans[j]["bandwidth"]);
if (bandwidth == 0) {
continue; // "disable" is default so ignore without error message
} else if (bandwidth < 0) {
cerr << "devices.[" << i << "] channels.[" << j << "]: bandwidth value '" << bandwidth << "' invalid, ignoring\n";
} else {
for (int f = 0; f < channel->freq_count; f++) {
channel->freqlist[f].lowpass_filter = LowpassFilter((float)bandwidth / 2, WAVE_RATE);
}
}
}
}
if (chans[j].exists("ampfactor")) {
if (libconfig::Setting::TypeList == chans[j]["ampfactor"].getType()) {
for (int f = 0; f < channel->freq_count; f++) {
float ampfactor = (float)chans[j]["ampfactor"][f];
if (ampfactor < 0) {
cerr << "devices.[" << i << "] channels.[" << j << "] freq.[" << f << "]: ampfactor '" << ampfactor << "' must not be negative\n";
error();
}
channel->freqlist[f].ampfactor = ampfactor;
}
} else {
float ampfactor = (float)chans[j]["ampfactor"];
if (ampfactor < 0) {
cerr << "devices.[" << i << "] channels.[" << j << "]: ampfactor '" << ampfactor << "' must not be negative\n";
error();
}
for (int f = 0; f < channel->freq_count; f++) {
channel->freqlist[f].ampfactor = ampfactor;
}
}
}
#ifdef NFM
if (chans[j].exists("tau")) {
channel->alpha = ((int)chans[j]["tau"] == 0 ? 0.0f : exp(-1.0f / (WAVE_RATE * 1e-6 * (int)chans[j]["tau"])));
}
#endif /* NFM */
libconfig::Setting& outputs = chans[j]["outputs"];
channel->output_count = outputs.getLength();
if (channel->output_count < 1) {
cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "]: no outputs defined\n";
error();
}
channel->outputs = (output_t*)XCALLOC(channel->output_count, sizeof(struct output_t));
int outputs_enabled = parse_outputs(outputs, channel, i, j, false);
if (outputs_enabled < 1) {
cerr << "Configuration error: devices.[" << i << "] channels.[" << j << "]: no outputs defined\n";
error();
}
channel->outputs = (output_t*)XREALLOC(channel->outputs, outputs_enabled * sizeof(struct output_t));
channel->output_count = outputs_enabled;
dev->base_bins[jj] = dev->bins[jj] =
(size_t)ceil((channel->freqlist[0].frequency + dev->input->sample_rate - dev->input->centerfreq) / (double)(dev->input->sample_rate / fft_size) - 1.0) % fft_size;
debug_print("bins[%d]: %zu\n", jj, dev->bins[jj]);
#ifdef NFM
for (int f = 0; f < channel->freq_count; f++) {
if (channel->freqlist[f].modulation == MOD_NFM) {
channel->needs_raw_iq = 1;
break;
}
}
#endif /* NFM */
if (channel->needs_raw_iq) {
// Downmixing is done only for NFM and raw IQ outputs. It's not critical to have some residual
// freq offset in AM, as it doesn't affect sound quality significantly.
double dm_dphi = (double)(channel->freqlist[0].frequency - dev->input->centerfreq); // downmix freq in Hz
// In general, sample_rate is not required to be an integer multiple of WAVE_RATE.
// However the FFT window may only slide by an integer number of input samples. A non-zero rounding error
// introduces additional phase rotation which we have to compensate in order to shift the channel of interest
// to the center of the spectrum of the output I/Q stream. This is important for correct NFM demodulation.
// The error value (in Hz):
// - has an absolute value 0..WAVE_RATE/2
// - is linear with the error introduced by rounding the value of sample_rate/WAVE_RATE to the nearest integer
// (range of -0.5..0.5)
// - is linear with the distance between center frequency and the channel frequency, normalized to 0..1
double decimation_factor = ((double)dev->input->sample_rate / (double)WAVE_RATE);
double dm_dphi_correction = (double)WAVE_RATE / 2.0;
dm_dphi_correction *= (decimation_factor - round(decimation_factor));
dm_dphi_correction *= (double)(channel->freqlist[0].frequency - dev->input->centerfreq) / ((double)dev->input->sample_rate / 2.0);
debug_print("dev[%d].chan[%d]: dm_dphi: %f Hz dm_dphi_correction: %f Hz\n", i, jj, dm_dphi, dm_dphi_correction);
dm_dphi -= dm_dphi_correction;
debug_print("dev[%d].chan[%d]: dm_dphi_corrected: %f Hz\n", i, jj, dm_dphi);
// Normalize
dm_dphi /= (double)WAVE_RATE;
// Unalias it, to prevent overflow of int during cast
dm_dphi -= trunc(dm_dphi);
debug_print("dev[%d].chan[%d]: dm_dphi_normalized=%f\n", i, jj, dm_dphi);
// Translate this to uint32_t range 0x00000000-0x00ffffff
dm_dphi *= 256.0 * 65536.0;
// Cast it to signed int first, because casting negative float to uint is not portable
channel->dm_dphi = (uint32_t)((int)dm_dphi);
debug_print("dev[%d].chan[%d]: dm_dphi_scaled=%f cast=0x%x\n", i, jj, dm_dphi, channel->dm_dphi);
channel->dm_phi = 0.f;
}
#ifdef DEBUG_SQUELCH
// Setup squelch debug file, if enabled
char tmp_filepath[1024];
for (int f = 0; f < channel->freq_count; f++) {
snprintf(tmp_filepath, sizeof(tmp_filepath), "./squelch_debug-%d-%d.dat", j, f);
channel->freqlist[f].squelch.set_debug_file(tmp_filepath);
}
#endif /* DEBUG_SQUELCH */
jj++;
}
return jj;
}
int parse_devices(libconfig::Setting& devs) {
int devcnt = 0;
for (int i = 0; i < devs.getLength(); i++) {
if (devs[i].exists("disable") && (bool)devs[i]["disable"] == true)
continue;
device_t* dev = devices + devcnt;
if (devs[i].exists("type")) {
dev->input = input_new(devs[i]["type"]);
if (dev->input == NULL) {
cerr << "Configuration error: devices.[" << i << "]: unsupported device type\n";
error();
}
} else {
#ifdef WITH_RTLSDR
cerr << "Warning: devices.[" << i << "]: assuming device type \"rtlsdr\", please set \"type\" in the device section.\n";
dev->input = input_new("rtlsdr");
#else
cerr << "Configuration error: devices.[" << i << "]: mandatory parameter missing: type\n";
error();
#endif /* WITH_RTLSDR */
}
assert(dev->input != NULL);
if (devs[i].exists("sample_rate")) {
int sample_rate = parse_anynum2int(devs[i]["sample_rate"]);
if (sample_rate < WAVE_RATE) {
cerr << "Configuration error: devices.[" << i << "]: sample_rate must be greater than " << WAVE_RATE << "\n";
error();
}
dev->input->sample_rate = sample_rate;
}
if (devs[i].exists("mode")) {
if (!strncmp(devs[i]["mode"], "multichannel", 12)) {
dev->mode = R_MULTICHANNEL;
} else if (!strncmp(devs[i]["mode"], "scan", 4)) {
dev->mode = R_SCAN;
} else {
cerr << "Configuration error: devices.[" << i << "]: invalid mode (must be one of: \"scan\", \"multichannel\")\n";
error();
}
} else {
dev->mode = R_MULTICHANNEL;
}
if (dev->mode == R_MULTICHANNEL) {
dev->input->centerfreq = parse_anynum2int(devs[i]["centerfreq"]);
} // centerfreq for R_SCAN will be set by parse_channels() after frequency list has been read
#ifdef NFM
if (devs[i].exists("tau")) {
dev->alpha = ((int)devs[i]["tau"] == 0 ? 0.0f : exp(-1.0f / (WAVE_RATE * 1e-6 * (int)devs[i]["tau"])));
} else {
dev->alpha = alpha;
}
#endif /* NFM */
// Parse hardware-dependent configuration parameters
if (input_parse_config(dev->input, devs[i]) < 0) {
// FIXME: get and display error string from input_parse_config
// Right now it exits the program on failure.
}
// Some basic sanity checks for crucial parameters which have to be set
// (or can be modified) by the input driver
assert(dev->input->sfmt != SFMT_UNDEF);
assert(dev->input->fullscale > 0);
assert(dev->input->bytes_per_sample > 0);
assert(dev->input->sample_rate > WAVE_RATE);
// For the input buffer size use a base value and round it up to the nearest multiple
// of FFT_BATCH blocks of input samples.
// ceil is required here because sample rate is not guaranteed to be an integer multiple of WAVE_RATE.
size_t fft_batch_len = FFT_BATCH * (2 * dev->input->bytes_per_sample * (size_t)ceil((double)dev->input->sample_rate / (double)WAVE_RATE));
dev->input->buf_size = MIN_BUF_SIZE;
if (dev->input->buf_size % fft_batch_len != 0)
dev->input->buf_size += fft_batch_len - dev->input->buf_size % fft_batch_len;
debug_print("dev->input->buf_size: %zu\n", dev->input->buf_size);
dev->input->buffer = (unsigned char*)XCALLOC(sizeof(unsigned char), dev->input->buf_size + 2 * dev->input->bytes_per_sample * fft_size);
dev->input->bufs = dev->input->bufe = 0;
dev->input->overflow_count = 0;
dev->output_overrun_count = 0;
dev->waveend = dev->waveavail = dev->row = dev->tq_head = dev->tq_tail = 0;
dev->last_frequency = -1;
libconfig::Setting& chans = devs[i]["channels"];
if (chans.getLength() < 1) {
cerr << "Configuration error: devices.[" << i << "]: no channels configured\n";
error();
}
dev->channels = (channel_t*)XCALLOC(chans.getLength(), sizeof(channel_t));
dev->bins = (size_t*)XCALLOC(chans.getLength(), sizeof(size_t));
dev->base_bins = (size_t*)XCALLOC(chans.getLength(), sizeof(size_t));
dev->channel_count = 0;
int channel_count = parse_channels(chans, dev, i);
if (channel_count < 1) {
cerr << "Configuration error: devices.[" << i << "]: no channels enabled\n";
error();
}
if (dev->mode == R_SCAN && channel_count > 1) {
cerr << "Configuration error: devices.[" << i << "]: only one channel is allowed in scan mode\n";
error();
}
dev->channels = (channel_t*)XREALLOC(dev->channels, channel_count * sizeof(channel_t));
dev->bins = (size_t*)XREALLOC(dev->bins, channel_count * sizeof(size_t));
dev->base_bins = (size_t*)XREALLOC(dev->base_bins, channel_count * sizeof(size_t));
dev->channel_count = channel_count;
devcnt++;
}
return devcnt;
}
int parse_mixers(libconfig::Setting& mx) {
const char* name;
int mm = 0;
for (int i = 0; i < mx.getLength(); i++) {
if (mx[i].exists("disable") && (bool)mx[i]["disable"] == true)
continue;
if ((name = mx[i].getName()) == NULL) {
cerr << "Configuration error: mixers.[" << i << "]: undefined mixer name\n";
error();
}
debug_print("mm=%d name=%s\n", mm, name);
mixer_t* mixer = &mixers[mm];
mixer->name = strdup(name);
mixer->enabled = false;
mixer->interval = MIX_DIVISOR;
mixer->output_overrun_count = 0;
mixer->input_count = 0;
mixer->inputs = NULL;
mixer->inputs_todo = NULL;
mixer->input_mask = NULL;
channel_t* channel = &mixer->channel;
channel->highpass = mx[i].exists("highpass") ? (int)mx[i]["highpass"] : 100;
channel->lowpass = mx[i].exists("lowpass") ? (int)mx[i]["lowpass"] : 2500;
channel->mode = MM_MONO;
// Make sure lowpass / highpass aren't flipped.
// If lowpass is enabled (greater than zero) it must be larger than highpass
if (channel->lowpass > 0 && channel->lowpass < channel->highpass) {
cerr << "Configuration error: mixers.[" << i << "]: lowpass (" << channel->lowpass << ") must be greater than or equal to highpass (" << channel->highpass << ")\n";
error();
}
libconfig::Setting& outputs = mx[i]["outputs"];
channel->output_count = outputs.getLength();
if (channel->output_count < 1) {
cerr << "Configuration error: mixers.[" << i << "]: no outputs defined\n";
error();
}
channel->outputs = (output_t*)XCALLOC(channel->output_count, sizeof(struct output_t));
int outputs_enabled = parse_outputs(outputs, channel, i, 0, true);
if (outputs_enabled < 1) {
cerr << "Configuration error: mixers.[" << i << "]: no outputs defined\n";
error();
}
channel->outputs = (output_t*)XREALLOC(channel->outputs, outputs_enabled * sizeof(struct output_t));
channel->output_count = outputs_enabled;
mm++;
}
return mm;
}
// vim: ts=4
================================================
FILE: src/config.h.in
================================================
/*
* config.h.in
* Template for cmake-generated config.h
*
* Copyright (c) 2015-2021 Tomasz Lemiech
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see .
*/
#ifndef _CONFIG_H
#define _CONFIG_H
#cmakedefine WITH_RTLSDR
#cmakedefine WITH_MIRISDR
#cmakedefine WITH_SOAPYSDR
#cmakedefine WITH_PROFILING
#cmakedefine WITH_PULSEAUDIO
#cmakedefine NFM
#cmakedefine WITH_BCM_VC
#cmakedefine LIBSHOUT_HAS_TLS
#cmakedefine LIBSHOUT_HAS_CONTENT_FORMAT
#define SINCOSF @SINCOSF@
#define SHOUT_SET_METADATA @SHOUT_SET_METADATA@
#endif /* _CONFIG_H */
================================================
FILE: src/ctcss.cpp
================================================
/*
* ctcss.h
*
* Copyright (C) 2022-2023 charlie-foxtrot
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see .
*/
#include // M_PI
#include // sort
#include "logging.h" // debug_print()
#include "ctcss.h"
using namespace std;
// Implementation of https://www.embedded.com/detecting-ctcss-tones-with-goertzels-algorithm/
// also https://www.embedded.com/the-goertzel-algorithm/
ToneDetector::ToneDetector(float tone_freq, float sample_rate, int window_size) {
tone_freq_ = tone_freq;
magnitude_ = 0.0;
window_size_ = window_size;
int k = (0.5 + window_size * tone_freq / sample_rate);
float omega = (2.0 * M_PI * k) / window_size;
coeff_ = 2.0 * cos(omega);
reset();
}
void ToneDetector::process_sample(const float& sample) {
q0_ = coeff_ * q1_ - q2_ + sample;
q2_ = q1_;
q1_ = q0_;
count_++;
if (count_ == window_size_) {
magnitude_ = q1_ * q1_ + q2_ * q2_ - q1_ * q2_ * coeff_;
count_ = 0;
}
}
void ToneDetector::reset(void) {
count_ = 0;
q0_ = q1_ = q2_ = 0.0;
}
bool ToneDetectorSet::add(const float& tone_freq, const float& sample_rate, int window_size) {
ToneDetector new_tone = ToneDetector(tone_freq, sample_rate, window_size);
for (const auto tone : tones_) {
if (new_tone.coefficient() == tone.coefficient()) {
debug_print("Skipping tone %f, too close to other tones\n", tone_freq);
return false;
}
}
tones_.push_back(new_tone);
return true;
}
void ToneDetectorSet::process_sample(const float& sample) {
for (vector::iterator it = tones_.begin(); it != tones_.end(); ++it) {
it->process_sample(sample);
}
}
void ToneDetectorSet::reset(void) {
for (vector::iterator it = tones_.begin(); it != tones_.end(); ++it) {
it->reset();
}
}
float ToneDetectorSet::sorted_powers(vector& powers) {
powers.clear();
float total_power = 0.0;
for (size_t i = 0; i < tones_.size(); ++i) {
powers.push_back({tones_[i].relative_power(), tones_[i].freq()});
total_power += tones_[i].relative_power();
}
sort(powers.begin(), powers.end(), [](PowerIndex a, PowerIndex b) { return a.power > b.power; });
return total_power / tones_.size();
}
vector CTCSS::standard_tones = {67.0, 69.3, 71.9, 74.4, 77.0, 79.7, 82.5, 85.4, 88.5, 91.5, 94.8, 97.4, 100.0, 103.5, 107.2, 110.9, 114.8,
118.8, 123.0, 127.3, 131.8, 136.5, 141.3, 146.2, 150.0, 151.4, 156.7, 159.8, 162.2, 165.5, 167.9, 171.3, 173.8, 177.3,
179.9, 183.5, 186.2, 189.9, 192.8, 196.6, 199.5, 203.5, 206.5, 210.7, 218.1, 225.7, 229.1, 233.6, 241.8, 250.3, 254.1};
CTCSS::CTCSS(const float& ctcss_freq, const float& sample_rate, int window_size) : enabled_(true), ctcss_freq_(ctcss_freq), window_size_(window_size), found_count_(0), not_found_count_(0) {
debug_print("Adding CTCSS detector for %f Hz with a sample rate of %f and window %d\n", ctcss_freq, sample_rate, window_size_);
// Add the target CTCSS frequency first followed by the other "standard tones", except those
// within +/- 5 Hz
powers_.add(ctcss_freq, sample_rate, window_size_);
for (const auto tone : standard_tones) {
if (abs(ctcss_freq - tone) < 5) {
debug_print("Skipping tone %f, too close to other tones\n", tone);
continue;
}
powers_.add(tone, sample_rate, window_size_);
}
// clear all values to start NOTE: has_tone_ will be true until the first window count of samples are processed
reset();
}
void CTCSS::process_audio_sample(const float& sample) {
if (!enabled_) {
return;
}
powers_.process_sample(sample);
sample_count_++;
if (sample_count_ < window_size_) {
return;
}
enough_samples_ = true;
// if this is sample fills out the window then check if one of the "strongest"
// tones is the CTCSS tone we are looking for. NOTE: there can be multiple "strongest"
// tones based on floating point math
vector tone_powers;
float avg_power = powers_.sorted_powers(tone_powers);
float ctcss_tone_power = 0.0;
for (const auto i : tone_powers) {
if (i.freq == ctcss_freq_) {
ctcss_tone_power = i.power;
break;
}
}
if (ctcss_tone_power == tone_powers[0].power && ctcss_tone_power > avg_power) {
debug_print("CTCSS tone of %f Hz detected\n", ctcss_freq_);
has_tone_ = true;
found_count_++;
} else {
debug_print("CTCSS tone of %f Hz not detected - highest power was %f Hz at %f vs %f\n", ctcss_freq_, tone_powers[0].freq, tone_powers[0].power, ctcss_tone_power);
has_tone_ = false;
not_found_count_++;
}
// reset everything for the next window's worth of samples
powers_.reset();
sample_count_ = 0;
}
void CTCSS::reset(void) {
if (enabled_) {
powers_.reset();
enough_samples_ = false;
sample_count_ = 0;
has_tone_ = false;
}
}
================================================
FILE: src/ctcss.h
================================================
/*
* ctcss.h
*
* Copyright (C) 2022-2023 charlie-foxtrot
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see .
*/
#ifndef _CTCSS_H
#define _CTCSS_H 1
#include // size_t
#include
class ToneDetector {
public:
ToneDetector(float tone_freq, float sample_freq, int window_size);
void process_sample(const float& sample);
void reset(void);
const float& relative_power(void) const { return magnitude_; }
const float& freq(void) const { return tone_freq_; }
const float& coefficient(void) const { return coeff_; }
private:
float tone_freq_;
float magnitude_;
int window_size_;
float coeff_;
int count_;
float q0_;
float q1_;
float q2_;
};
class ToneDetectorSet {
public:
struct PowerIndex {
float power;
float freq;
};
ToneDetectorSet() {}
bool add(const float& tone_freq, const float& sample_freq, int window_size);
void process_sample(const float& sample);
void reset(void);
float sorted_powers(std::vector& powers);
private:
std::vector tones_;
};
class CTCSS {
public:
CTCSS(void) : enabled_(false), found_count_(0), not_found_count_(0) {}
CTCSS(const float& ctcss_freq, const float& sample_rate, int window_size);
void process_audio_sample(const float& sample);
void reset(void);
const size_t& found_count(void) const { return found_count_; }
const size_t& not_found_count(void) const { return not_found_count_; }
bool is_enabled(void) const { return enabled_; }
bool enough_samples(void) const { return enough_samples_; }
bool has_tone(void) const { return !enabled_ || has_tone_; }
static std::vector standard_tones;
private:
bool enabled_;
float ctcss_freq_;
int window_size_;
size_t found_count_;
size_t not_found_count_;
ToneDetectorSet powers_;
bool enough_samples_;
int sample_count_;
bool has_tone_;
};
#endif /* _CTCSS_H */
================================================
FILE: src/filters.cpp
================================================
/*
* filters.cpp
*
* Copyright (C) 2022-2023 charlie-foxtrot
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see .
*/
#include "logging.h" // debug_print()
#include "filters.h"
using namespace std;
// Default constructor is no filter
NotchFilter::NotchFilter(void) : enabled_(false) {}
// Notch Filter based on https://www.dsprelated.com/showcode/173.php
NotchFilter::NotchFilter(float notch_freq, float sample_freq, float q) : enabled_(true), x{0.0}, y{0.0} {
if (notch_freq <= 0.0) {
debug_print("Invalid frequency %f Hz, disabling notch filter\n", notch_freq);
enabled_ = false;
return;
}
debug_print("Adding notch filter for %f Hz with parameters {%f, %f}\n", notch_freq, sample_freq, q);
float wo = 2 * M_PI * (notch_freq / sample_freq);
e = 1 / (1 + tan(wo / (q * 2)));
p = cos(wo);
d[0] = e;
d[1] = 2 * e * p;
d[2] = (2 * e - 1);
debug_print("wo:%f e:%f p:%f d:{%f,%f,%f}\n", wo, e, p, d[0], d[1], d[2]);
}
void NotchFilter::apply(float& value) {
if (!enabled_) {
return;
}
x[0] = x[1];
x[1] = x[2];
x[2] = value;
y[0] = y[1];
y[1] = y[2];
y[2] = d[0] * x[2] - d[1] * x[1] + d[0] * x[0] + d[1] * y[1] - d[2] * y[0];
value = y[2];
}
// Default constructor is no filter
LowpassFilter::LowpassFilter(void) : enabled_(false) {}
// 2nd order lowpass Bessel filter, based entirely on a simplification of https://www-users.cs.york.ac.uk/~fisher/mkfilter/
LowpassFilter::LowpassFilter(float freq, float sample_freq) : enabled_(true) {
if (freq <= 0.0) {
debug_print("Invalid frequency %f Hz, disabling lowpass filter\n", freq);
enabled_ = false;
return;
}
debug_print("Adding lowpass filter at %f Hz with a sample rate of %f\n", freq, sample_freq);
double raw_alpha = (double)freq / sample_freq;
double warped_alpha = tan(M_PI * raw_alpha) / M_PI;
complex zeros[2] = {-1.0, -1.0};
complex poles[2];
poles[0] = blt(M_PI * 2 * warped_alpha * complex(-1.10160133059e+00, 6.36009824757e-01));
poles[1] = blt(M_PI * 2 * warped_alpha * conj(complex(-1.10160133059e+00, 6.36009824757e-01)));
complex topcoeffs[3];
complex botcoeffs[3];
expand(zeros, 2, topcoeffs);
expand(poles, 2, botcoeffs);
complex gain_complex = evaluate(topcoeffs, 2, botcoeffs, 2, 1.0);
gain = hypot(gain_complex.imag(), gain_complex.real());
for (int i = 0; i <= 2; i++) {
ycoeffs[i] = -(botcoeffs[i].real() / botcoeffs[2].real());
}
debug_print("gain: %f, ycoeffs: {%f, %f}\n", gain, ycoeffs[0], ycoeffs[1]);
}
complex LowpassFilter::blt(complex pz) {
return (2.0 + pz) / (2.0 - pz);
}
/* evaluate response, substituting for z */
complex LowpassFilter::evaluate(complex topco[], int nz, complex botco[], int np, complex z) {
return eval(topco, nz, z) / eval(botco, np, z);
}
/* evaluate polynomial in z, substituting for z */
complex LowpassFilter::eval(complex coeffs[], int npz, complex z) {
complex sum(0.0);
for (int i = npz; i >= 0; i--) {
sum = (sum * z) + coeffs[i];
}
return sum;
}
/* compute product of poles or zeros as a polynomial of z */
void LowpassFilter::expand(complex pz[], int npz, complex coeffs[]) {
coeffs[0] = 1.0;
for (int i = 0; i < npz; i++) {
coeffs[i + 1] = 0.0;
}
for (int i = 0; i < npz; i++) {
multin(pz[i], npz, coeffs);
}
/* check computed coeffs of z^k are all real */
for (int i = 0; i < npz + 1; i++) {
if (fabs(coeffs[i].imag()) > 1e-10) {
log(LOG_ERR, "coeff of z^%d is not real; poles/zeros are not complex conjugates\n", i);
error();
}
}
}
void LowpassFilter::multin(complex w, int npz, complex coeffs[]) {
/* multiply factor (z-w) into coeffs */
complex nw = -w;
for (int i = npz; i >= 1; i--) {
coeffs[i] = (nw * coeffs[i]) + coeffs[i - 1];
}
coeffs[0] = nw * coeffs[0];
}
void LowpassFilter::apply(float& r, float& j) {
if (!enabled_) {
return;
}
complex input(r, j);
xv[0] = xv[1];
xv[1] = xv[2];
xv[2] = input / gain;
yv[0] = yv[1];
yv[1] = yv[2];
yv[2] = (xv[0] + xv[2]) + (2.0f * xv[1]) + (ycoeffs[0] * yv[0]) + (ycoeffs[1] * yv[1]);
r = yv[2].real();
j = yv[2].imag();
}
================================================
FILE: src/filters.h
================================================
/*
* filters.h
*
* Copyright (C) 2022-2023 charlie-foxtrot
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see .
*/
#ifndef _FILTERS_H
#define _FILTERS_H 1
#include
class NotchFilter {
public:
NotchFilter(void);
NotchFilter(float notch_freq, float sample_freq, float q);
void apply(float& value);
bool enabled(void) { return enabled_; }
private:
bool enabled_;
float e;
float p;
float d[3];
float x[3];
float y[3];
};
class LowpassFilter {
public:
LowpassFilter(void);
LowpassFilter(float freq, float sample_freq);
void apply(float& r, float& j);
bool enabled(void) const { return enabled_; }
private:
static std::complex blt(std::complex pz);
static void expand(std::complex pz[], int npz, std::complex coeffs[]);
static void multin(std::complex w, int npz, std::complex coeffs[]);
static std::complex evaluate(std::complex topco[], int nz, std::complex botco[], int np, std::complex z);
static std::complex eval(std::complex coeffs[], int npz, std::complex z);
bool enabled_;
float ycoeffs[3];
float gain;
std::complex xv[3];
std::complex yv[3];
};
#endif /* _FILTERS_H */
================================================
FILE: src/generate_signal.cpp
================================================
/*
* generate_signal.cpp
*
* Copyright (C) 2023 charlie-foxtrot
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see .
*/
#include
#include "generate_signal.h"
using namespace std;
float Tone::WEAK = 0.05;
float Tone::NORMAL = 0.2;
float Tone::STRONG = 0.4;
Tone::Tone(int sample_rate, const float& freq, const float& ampl) : sample_rate_(sample_rate), freq_(freq), ampl_(ampl), sample_count_(0) {}
float Tone::get_sample(void) {
sample_count_++;
return ampl_ * sin(2 * M_PI * sample_count_ * freq_ / sample_rate_);
}
float Noise::WEAK = 0.05;
float Noise::NORMAL = 0.2;
float Noise::STRONG = 0.5;
Noise::Noise(const float& ampl) : ampl_(ampl) {
// create a seeded generator
std::random_device r;
std::seed_seq s{r(), r(), r(), r(), r(), r(), r(), r()};
generator = std::mt19937(s);
// centered at 0.0, standard deviation of 0.1
distribution = normal_distribution(0.0, 0.1);
}
float Noise::get_sample(void) {
return ampl_ * distribution(generator);
}
GenerateSignal::GenerateSignal(int sample_rate) : sample_rate_(sample_rate) {}
void GenerateSignal::add_tone(const float& freq, const float& ampl) {
tones_.push_back(Tone(sample_rate_, freq, ampl));
}
void GenerateSignal::add_noise(const float& ampl) {
noises_.push_back(Noise(ampl));
}
float GenerateSignal::get_sample(void) {
float value = 0.0;
for (vector::iterator tone = tones_.begin(); tone != tones_.end(); ++tone) {
value += tone->get_sample();
}
for (vector::iterator noise = noises_.begin(); noise != noises_.end(); ++noise) {
value += noise->get_sample();
}
return value;
}
void GenerateSignal::write_file(const string& filepath, const float& seconds) {
FILE* fp = fopen(filepath.c_str(), "wb");
for (int i = 0; i < sample_rate_ * seconds; ++i) {
float sample = get_sample();
fwrite(&sample, sizeof(float), 1, fp);
}
fclose(fp);
}
================================================
FILE: src/generate_signal.h
================================================
/*
* generate_signal.h
*
* Copyright (C) 2023 charlie-foxtrot
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see .
*/
#ifndef _GENERATE_SIGNAL_H
#define _GENERATE_SIGNAL_H
#include
#include
#include
class Tone {
public:
static float WEAK;
static float NORMAL;
static float STRONG;
Tone(int sample_rate, const float& freq, const float& ampl);
float get_sample(void);
private:
int sample_rate_;
float freq_;
float ampl_;
size_t sample_count_;
};
class Noise {
public:
static float WEAK;
static float NORMAL;
static float STRONG;
Noise(const float& ampl);
float get_sample(void);
private:
float ampl_;
std::mt19937 generator;
std::normal_distribution distribution;
};
class GenerateSignal {
public:
GenerateSignal(int sample_rate);
void add_tone(const float& freq, const float& ampl);
void add_noise(const float& ampl);
float get_sample(void);
void write_file(const std::string& filepath, const float& seconds);
private:
int sample_rate_;
std::vector tones_;
std::vector noises_;
};
#endif /* _GENERATE_SIGNAL_H */
================================================
FILE: src/hello_fft/CMakeLists.txt
================================================
set(hello_fft_source_files
mailbox.c
gpu_fft.c
gpu_fft_twiddles.c
gpu_fft_shaders.c
gpu_fft_base.c
)
# Temp hack due to the fact that mailbox.c includes ../rtl_airband.h which
# is a C++ header.
SET_SOURCE_FILES_PROPERTIES(${hello_fft_source_files} PROPERTIES LANGUAGE CXX )
add_library(hello_fft OBJECT
${hello_fft_source_files}
)
target_include_directories(hello_fft PUBLIC
".." # needed for rtl_airband.h
"${CMAKE_CURRENT_BINARY_DIR}/.." # needed for config.h
${BCM_VC_INCLUDE_DIRS}
)
# disable -Wcast-qual for this folder
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-cast-qual")
================================================
FILE: src/hello_fft/gpu_fft.c
================================================
/*
BCM2835 "GPU_FFT" release 2.0
Copyright (c) 2014, Andrew Holme.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include
#include "gpu_fft.h"
#define GPU_FFT_BUSY_WAIT_LIMIT (5 << 12) // ~1ms
typedef struct GPU_FFT_COMPLEX COMPLEX;
int gpu_fft_prepare(int mb, // mailbox file_desc
int log2_N, // log2(FFT_length) = 8...20
int direction, // GPU_FFT_FWD: fft(); GPU_FFT_REV: ifft()
int jobs, // number of transforms in batch
struct GPU_FFT** fft) {
unsigned info_bytes, twid_bytes, data_bytes, code_bytes, unif_bytes, mail_bytes;
unsigned size, *uptr, vc_tw, vc_data;
int i, q, shared, unique, passes, ret;
struct GPU_FFT_BASE* base;
struct GPU_FFT_PTR ptr;
struct GPU_FFT* info;
if (gpu_fft_twiddle_size(log2_N, &shared, &unique, &passes))
return -2;
info_bytes = 4096;
data_bytes = (1 + ((sizeof(COMPLEX) << log2_N) | 4095));
code_bytes = gpu_fft_shader_size(log2_N);
twid_bytes = sizeof(COMPLEX) * 16 * (shared + GPU_FFT_QPUS * unique);
unif_bytes = sizeof(int) * GPU_FFT_QPUS * (5 + jobs * 2);
mail_bytes = sizeof(int) * GPU_FFT_QPUS * 2;
size = info_bytes + // header
data_bytes * jobs * 2 + // ping-pong data, aligned
code_bytes + // shader, aligned
twid_bytes + // twiddles
unif_bytes + // uniforms
mail_bytes; // mailbox message
ret = gpu_fft_alloc(mb, size, &ptr);
if (ret)
return ret;
// Header
info = (struct GPU_FFT*)ptr.arm.vptr;
base = (struct GPU_FFT_BASE*)info;
gpu_fft_ptr_inc(&ptr, info_bytes);
// For transpose
info->x = 1 << log2_N;
info->y = jobs;
// Ping-pong buffers leave results in or out of place
info->in = info->out = ptr.arm.cptr;
info->step = data_bytes / sizeof(COMPLEX);
if (passes & 1)
info->out += info->step * jobs; // odd => out of place
vc_data = gpu_fft_ptr_inc(&ptr, data_bytes * jobs * 2);
// Shader code
memcpy(ptr.arm.vptr, gpu_fft_shader_code(log2_N), code_bytes);
base->vc_code = gpu_fft_ptr_inc(&ptr, code_bytes);
// Twiddles
gpu_fft_twiddle_data(log2_N, direction, ptr.arm.fptr);
vc_tw = gpu_fft_ptr_inc(&ptr, twid_bytes);
uptr = ptr.arm.uptr;
// Uniforms
for (q = 0; q < GPU_FFT_QPUS; q++) {
*uptr++ = vc_tw;
*uptr++ = vc_tw + sizeof(COMPLEX) * 16 * (shared + q * unique);
*uptr++ = q;
for (i = 0; i < jobs; i++) {
*uptr++ = vc_data + data_bytes * i;
*uptr++ = vc_data + data_bytes * i + data_bytes * jobs;
}
*uptr++ = 0;
*uptr++ = (q == 0); // For mailbox: IRQ enable, master only
base->vc_unifs[q] = gpu_fft_ptr_inc(&ptr, sizeof(int) * (5 + jobs * 2));
}
if ((jobs << log2_N) <= GPU_FFT_BUSY_WAIT_LIMIT) {
// Direct register poking with busy wait
base->vc_msg = 0;
} else {
// Mailbox message
for (q = 0; q < GPU_FFT_QPUS; q++) {
*uptr++ = base->vc_unifs[q];
*uptr++ = base->vc_code;
}
base->vc_msg = ptr.vc;
}
*fft = info;
return 0;
}
unsigned gpu_fft_execute(struct GPU_FFT* info) {
return gpu_fft_base_exec(&info->base, GPU_FFT_QPUS);
}
void gpu_fft_release(struct GPU_FFT* info) {
gpu_fft_base_release(&info->base);
}
================================================
FILE: src/hello_fft/gpu_fft.h
================================================
/*
BCM2835 "GPU_FFT" release 2.0
Copyright (c) 2014, Andrew Holme.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __GPU_FFT__
#define __GPU_FFT__
#define GPU_FFT_QPUS 8
#define GPU_FFT_PI 3.14159265358979323846
#define GPU_FFT_FWD 0 // forward FFT
#define GPU_FFT_REV 1 // inverse FFT
struct GPU_FFT_COMPLEX {
float re, im;
};
struct GPU_FFT_PTR {
unsigned vc;
union {
struct GPU_FFT_COMPLEX* cptr;
void* vptr;
char* bptr;
float* fptr;
unsigned* uptr;
} arm;
};
struct GPU_FFT_BASE {
int mb;
unsigned handle, size, vc_msg, vc_code, vc_unifs[GPU_FFT_QPUS];
volatile unsigned* peri;
};
struct GPU_FFT {
struct GPU_FFT_BASE base;
struct GPU_FFT_COMPLEX *in, *out;
int x, y, step;
};
int gpu_fft_prepare(int mb, // mailbox file_desc
int log2_N, // log2(FFT_length) = 8...20
int direction, // GPU_FFT_FWD: fft(); GPU_FFT_REV: ifft()
int jobs, // number of transforms in batch
struct GPU_FFT** fft);
unsigned gpu_fft_execute(struct GPU_FFT* info);
void gpu_fft_release(struct GPU_FFT* info);
// private
int gpu_fft_twiddle_size(int, int*, int*, int*);
void gpu_fft_twiddle_data(int, int, float*);
unsigned int gpu_fft_shader_size(int);
unsigned int* gpu_fft_shader_code(int);
// gpu_fft_base:
unsigned gpu_fft_base_exec(struct GPU_FFT_BASE* base, unsigned num_qpus);
int gpu_fft_alloc(int mb, unsigned size, struct GPU_FFT_PTR* ptr);
void gpu_fft_base_release(struct GPU_FFT_BASE* base);
unsigned gpu_fft_ptr_inc(struct GPU_FFT_PTR* ptr, int bytes);
#endif // __GPU_FFT__
================================================
FILE: src/hello_fft/gpu_fft.txt
================================================
BCM2835 "GPU_FFT" release 2.0 by Andrew Holme, 2014.
GPU_FFT is an FFT library for the Raspberry Pi which exploits the BCM2835 SoC
3D hardware to deliver ten times more data throughput than is possible on the
700 MHz ARM. Kernels are provided for all power-of-2 FFT lengths between 256
and 2,097,152 points inclusive. A transpose function, which also uses the 3D
hardware, is provided to support 2-dimensional transforms.
*** Accuracy ***
GPU_FFT uses single-precision floats for data and twiddle factors. The output
is not scaled. The relative root-mean-square (rms) error in parts-per-million
(ppm) for different transform lengths (N) is typically:
log2(N) | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17
ppm rms | 0.27 | 0.42 | 0.50 | 0.70 | 2.3 | 4.4 | 7.6 | 9.2 | 18 | 70
log2(N) | 18 | 19 | 20 | 21 | 8...17 batch of 10
ppm rms | 100 | 180 | 360 | 720 | 18...21 batch of 1
*** Throughput ***
GPU_FFT 1.0 had to be invoked through a "mailbox" which added a 100us overhead
on every call. To mitigate this, batches of transforms could be submitted via
a single call. GPU_FFT 2.0 avoids this 100us overhead by poking GPU registers
directly from the ARM if total batch runtime will be short; but still uses the
mailbox for longer jobs to avoid busy waiting at 100% CPU for too long.
Typical per-transform runtimes for batch sizes of 1 and 10; and comparative
figures for FFTW (FFTW_MEASURE mode) are:
log2(N) | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 |
1 | 0.036 | 0.051 | 0.070 | 0.11 | 0.24 | 0.58 | 1.2 | 3.3 |
10 | 0.016 | 0.027 | 0.045 | 0.095 | 0.25 | 0.61 | 1.2 | 3.2 |
FFTW | 0.092 | 0.22 | 0.48 | 0.95 | 3.0 | 5.1 | 12 | 31 |
log2(N) | 16 | 17 | 18 | 19 | 20 | 21 | All times in
1 | 6.8 | 16 | 42 | 95 | 190 | 380 | milliseconds
FFTW | 83 | 180 | 560 | 670 | 1600 | 3400 | 2 sig. figs.
*** API functions ***
gpu_fft_prepare() Call once to allocate memory and initialise data
structures. Returns 0 for success.
gpu_fft_execute() Call one or more times to execute a previously
prepared FFT batch. Returns 0 for success.
gpu_fft_release() Call once to release resources after use.
GPU memory is permanently lost if not freed.
*** Parameters ***
int mb Mailbox file descriptor obtained by calling mbox_open()
int log2_N log2(FFT length) = 8 to 21
int direction FFT direction: GPU_FFT_FWD for forward FFT
GPU_FFT_REV for inverse FFT
int jobs Number of transforms in batch = 1 or more
GPU_FFT ** Output parameter from prepare: control structure.
GPU_FFT * Input parameter to execute and release
*** Data format ***
Complex data arrays are stored as alternate real and imaginary parts:
struct GPU_FFT_COMPLEX {
float re, im;
};
The GPU_FFT struct created by gpu_fft_prepare() contains pointers to the input
and output arrays:
struct GPU_FFT {
struct GPU_FFT_COMPLEX *in, *out;
When executing a batch of transforms, buffer pointers are obtained as follows:
struct GPU_FFT *fft = gpu_fft_prepare( ... , jobs);
for (int j=0; jin + j*fft->step;
struct GPU_FFT_COMPLEX *out = fft->out + j*fft->step;
GPU_FFT.step is greater than FFT length because a guard space is left between
buffers for caching and alignment reasons.
GPU_FFT performs multiple passes between ping-pong buffers. The final output
lands in the same buffer as input after an even number of passes. Transforms
where log2_N=12...16 use an odd number of passes and the final result is left
out-of-place. The input data is never preserved.
*** Example program ***
The code that produced the above accuracy and performance figures is included
as a demo with the latest Raspbian distro. Build and run it as follows:
cd /opt/vc/src/hello_pi/hello_fft
make
sudo mknod char_dev c 100 0
sudo ./hello_fft.bin 12
It accepts three optional command-line arguments:
The special character device is required for the ioctl mailbox through which
the ARM communicates with the Videocore GPU.
*** With Open GL ***
GPU_FFT and Open GL will run concurrently if the GPU_FFT_MEM_* defines in
file gpu_fft.c are changed as follows:
#define GPU_FFT_MEM_FLG 0x4 // cached=0xC; direct=0x4
#define GPU_FFT_MEM_MAP 0x20000000 // cached=0x0; direct=0x20000000
Overall performance will probably be higher if GPU_FFT and Open GL take turns
at using the 3D hardware. Since eglSwapBuffers() returns immediately without
waiting for rendering, call glFlush() and glFinish() afterwards as follows:
for (;;) {
....
eglSwapBuffers(....); // non-blocking call returns immediately
glFlush();
glFinish(); // wait until V3D hardware is idle
....
gpu_fft_execute(....); // blocking call
....
}
*** 2-dimensional FFT ***
Please study the hello_fft_2d demo source, which is built and executed thus:
make hello_fft_2d.bin
sudo ./hello_fft_2d.bin
This generates a Windows BMP file: "hello_fft_2d.bmp"
The demo uses a square 512x512 array; however, rectangular arrays are allowed.
The following lines in gpu_fft_trans.c will do what is safe:
ptr.arm.uptr[6] = src->x < dst->y? src->x : dst->y;
ptr.arm.uptr[7] = src->y < dst->x? src->y : dst->x;
One may transpose the output from the second FFT pass back into the first pass
input buffer, by preparing and executing a second transposition; however, this
is probably unnecessary. It depends on how the final output will be accessed.
================================================
FILE: src/hello_fft/gpu_fft_base.c
================================================
/*
BCM2835 "GPU_FFT" release 2.0
Copyright (c) 2014, Andrew Holme.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "bcm_host.h"
#include "gpu_fft.h"
#include "mailbox.h"
#define BUS_TO_PHYS(x) ((x) & ~0xC0000000)
// V3D spec: http://www.broadcom.com/docs/support/videocore/VideoCoreIV-AG100-R.pdf
#define V3D_L2CACTL (0xC00020 >> 2)
#define V3D_SLCACTL (0xC00024 >> 2)
#define V3D_SRQPC (0xC00430 >> 2)
#define V3D_SRQUA (0xC00434 >> 2)
#define V3D_SRQCS (0xC0043c >> 2)
#define V3D_DBCFG (0xC00e00 >> 2)
#define V3D_DBQITE (0xC00e2c >> 2)
#define V3D_DBQITC (0xC00e30 >> 2)
#define GPU_FFT_MEM_MAP 0x0 // cached=0x0; direct=0x20000000
#define GPU_FFT_NO_FLUSH 1
#define GPU_FFT_TIMEOUT 2000 // ms
unsigned gpu_fft_base_exec_direct(struct GPU_FFT_BASE* base, unsigned num_qpus) {
unsigned q;
base->peri[V3D_DBCFG] = 0; // Disallow IRQ
base->peri[V3D_DBQITE] = 0; // Disable IRQ
base->peri[V3D_DBQITC] = -1; // Resets IRQ flags
base->peri[V3D_L2CACTL] = 1 << 2; // Clear L2 cache
base->peri[V3D_SLCACTL] = -1; // Clear other caches
base->peri[V3D_SRQCS] = (1 << 7) | (1 << 8) | (1 << 16); // Reset error bit and counts
for (q = 0; q < num_qpus; q++) { // Launch shader(s)
base->peri[V3D_SRQUA] = base->vc_unifs[q];
base->peri[V3D_SRQPC] = base->vc_code;
}
// Busy wait polling
for (;;) {
if (((base->peri[V3D_SRQCS] >> 16) & 0xff) == num_qpus)
break; // All done?
}
return 0;
}
unsigned gpu_fft_base_exec(struct GPU_FFT_BASE* base, unsigned num_qpus) {
if (base->vc_msg) {
// Use mailbox
// Returns: 0x0 for success; 0x80000000 for timeout
return execute_qpu(base->mb, num_qpus, base->vc_msg, GPU_FFT_NO_FLUSH, GPU_FFT_TIMEOUT);
} else {
// Direct register poking
return gpu_fft_base_exec_direct(base, num_qpus);
}
}
int gpu_fft_alloc(int mb, unsigned size, struct GPU_FFT_PTR* ptr) {
struct GPU_FFT_BASE* base;
volatile unsigned* peri;
unsigned handle;
if (qpu_enable(mb, 1))
return -1;
// Shared memory : cached=0xC; direct=0x4
unsigned mem_flg = bcm_host_get_sdram_address() == 0x40000000 ? 0xC : 0x4;
handle = mem_alloc(mb, size, 4096, mem_flg);
if (!handle) {
qpu_enable(mb, 0);
return -3;
}
peri = (volatile unsigned*)mapmem(bcm_host_get_peripheral_address(), bcm_host_get_peripheral_size());
if (!peri) {
mem_free(mb, handle);
qpu_enable(mb, 0);
return -4;
}
ptr->vc = mem_lock(mb, handle);
ptr->arm.vptr = mapmem(BUS_TO_PHYS(ptr->vc + GPU_FFT_MEM_MAP), size);
base = (struct GPU_FFT_BASE*)ptr->arm.vptr;
base->peri = peri;
base->mb = mb;
base->handle = handle;
base->size = size;
return 0;
}
void gpu_fft_base_release(struct GPU_FFT_BASE* base) {
int mb = base->mb;
unsigned handle = base->handle, size = base->size;
unmapmem((void*)base->peri, bcm_host_get_peripheral_size());
unmapmem((void*)base, size);
mem_unlock(mb, handle);
mem_free(mb, handle);
qpu_enable(mb, 0);
}
unsigned gpu_fft_ptr_inc(struct GPU_FFT_PTR* ptr, int bytes) {
unsigned vc = ptr->vc;
ptr->vc += bytes;
ptr->arm.bptr += bytes;
return vc;
}
================================================
FILE: src/hello_fft/gpu_fft_shaders.c
================================================
/*
BCM2835 "GPU_FFT" release 2.0
Copyright (c) 2014, Andrew Holme.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
static unsigned int shader_256[] = {
#include "hex/shader_256.hex"
};
static unsigned int shader_512[] = {
#include "hex/shader_512.hex"
};
static unsigned int shader_1k[] = {
#include "hex/shader_1k.hex"
};
static unsigned int shader_2k[] = {
#include "hex/shader_2k.hex"
};
static unsigned int shader_4k[] = {
#include "hex/shader_4k.hex"
};
static unsigned int shader_8k[] = {
#include "hex/shader_8k.hex"
};
static unsigned int shader_16k[] = {
#include "hex/shader_16k.hex"
};
static unsigned int shader_32k[] = {
#include "hex/shader_32k.hex"
};
static unsigned int shader_64k[] = {
#include "hex/shader_64k.hex"
};
static unsigned int shader_128k[] = {
#include "hex/shader_128k.hex"
};
static unsigned int shader_256k[] = {
#include "hex/shader_256k.hex"
};
static unsigned int shader_512k[] = {
#include "hex/shader_512k.hex"
};
static unsigned int shader_1024k[] = {
#include "hex/shader_1024k.hex"
};
static unsigned int shader_2048k[] = {
#include "hex/shader_2048k.hex"
};
static struct {
unsigned int size, *code;
} shaders[] = {{sizeof(shader_256), shader_256}, {sizeof(shader_512), shader_512}, {sizeof(shader_1k), shader_1k}, {sizeof(shader_2k), shader_2k}, {sizeof(shader_4k), shader_4k},
{sizeof(shader_8k), shader_8k}, {sizeof(shader_16k), shader_16k}, {sizeof(shader_32k), shader_32k}, {sizeof(shader_64k), shader_64k}, {sizeof(shader_128k), shader_128k},
{sizeof(shader_256k), shader_256k}, {sizeof(shader_512k), shader_512k}, {sizeof(shader_1024k), shader_1024k}, {sizeof(shader_2048k), shader_2048k}};
unsigned int gpu_fft_shader_size(int log2_N) {
return shaders[log2_N - 8].size;
}
unsigned int* gpu_fft_shader_code(int log2_N) {
return shaders[log2_N - 8].code;
}
================================================
FILE: src/hello_fft/gpu_fft_trans.h
================================================
/*
BCM2835 "GPU_FFT" release 2.0
Copyright (c) 2014, Andrew Holme.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "gpu_fft.h"
struct GPU_FFT_TRANS {
struct GPU_FFT_BASE base;
};
int gpu_fft_trans_prepare(int mb, struct GPU_FFT* src, struct GPU_FFT* dst, struct GPU_FFT_TRANS** out);
unsigned gpu_fft_trans_execute( // src->out ==> T ==> dst->in
struct GPU_FFT_TRANS* info);
void gpu_fft_trans_release(struct GPU_FFT_TRANS* info);
================================================
FILE: src/hello_fft/gpu_fft_twiddles.c
================================================
/*
BCM2835 "GPU_FFT" release 2.0
Copyright (c) 2014, Andrew Holme.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include
#include "gpu_fft.h"
#define ALPHA(dx) (2 * pow(sin((dx) / 2), 2))
#define BETA(dx) (sin(dx))
static double k[16] = {0, 8, 4, 4, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1};
static double m[16] = {0, 0, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7};
/****************************************************************************/
static float* twiddles_base_16(double two_pi, float* out, double theta) {
int i;
for (i = 0; i < 16; i++) {
*out++ = cos(two_pi / 16 * k[i] * m[i] + theta * k[i]);
*out++ = sin(two_pi / 16 * k[i] * m[i] + theta * k[i]);
}
return out;
}
static float* twiddles_base_32(double two_pi, float* out, double theta) {
int i;
for (i = 0; i < 16; i++) {
*out++ = cos(two_pi / 32 * i + theta);
*out++ = sin(two_pi / 32 * i + theta);
}
return twiddles_base_16(two_pi, out, 2 * theta);
}
static float* twiddles_base_64(double two_pi, float* out) {
int i;
for (i = 0; i < 32; i++) {
*out++ = cos(two_pi / 64 * i);
*out++ = sin(two_pi / 64 * i);
}
return twiddles_base_32(two_pi, out, 0);
}
/****************************************************************************/
static float* twiddles_step_16(double /*two_pi*/, float* out, double theta) {
int i;
for (i = 0; i < 16; i++) {
*out++ = ALPHA(theta * k[i]);
*out++ = BETA(theta * k[i]);
}
return out;
}
static float* twiddles_step_32(double two_pi, float* out, double theta) {
int i;
for (i = 0; i < 16; i++) {
*out++ = ALPHA(theta);
*out++ = BETA(theta);
}
return twiddles_step_16(two_pi, out, 2 * theta);
}
/****************************************************************************/
static void twiddles_256(double two_pi, float* out) {
double N = 256;
int q;
out = twiddles_base_16(two_pi, out, 0);
out = twiddles_step_16(two_pi, out, two_pi / N * GPU_FFT_QPUS);
for (q = 0; q < GPU_FFT_QPUS; q++)
out = twiddles_base_16(two_pi, out, two_pi / N * q);
}
static void twiddles_512(double two_pi, float* out) {
double N = 512;
int q;
out = twiddles_base_32(two_pi, out, 0);
out = twiddles_step_16(two_pi, out, two_pi / N * GPU_FFT_QPUS);
for (q = 0; q < GPU_FFT_QPUS; q++)
out = twiddles_base_16(two_pi, out, two_pi / N * q);
}
static void twiddles_1k(double two_pi, float* out) {
double N = 1024;
int q;
out = twiddles_base_32(two_pi, out, 0);
out = twiddles_step_32(two_pi, out, two_pi / N * GPU_FFT_QPUS);
for (q = 0; q < GPU_FFT_QPUS; q++)
out = twiddles_base_32(two_pi, out, two_pi / N * q);
}
static void twiddles_2k(double two_pi, float* out) {
double N = 2048;
int q;
out = twiddles_base_64(two_pi, out);
out = twiddles_step_32(two_pi, out, two_pi / N * GPU_FFT_QPUS);
for (q = 0; q < GPU_FFT_QPUS; q++)
out = twiddles_base_32(two_pi, out, two_pi / N * q);
}
static void twiddles_4k(double two_pi, float* out) {
double N = 4096;
int q;
out = twiddles_base_16(two_pi, out, 0);
out = twiddles_step_16(two_pi, out, two_pi / N * 16);
out = twiddles_step_16(two_pi, out, two_pi / N * GPU_FFT_QPUS);
for (q = 0; q < GPU_FFT_QPUS; q++)
out = twiddles_base_16(two_pi, out, two_pi / N * q);
}
static void twiddles_8k(double two_pi, float* out) {
double N = 8192;
int q;
out = twiddles_base_32(two_pi, out, 0);
out = twiddles_step_16(two_pi, out, two_pi / N * 16);
out = twiddles_step_16(two_pi, out, two_pi / N * GPU_FFT_QPUS);
for (q = 0; q < GPU_FFT_QPUS; q++)
out = twiddles_base_16(two_pi, out, two_pi / N * q);
}
static void twiddles_16k(double two_pi, float* out) {
double N = 16384;
int q;
out = twiddles_base_32(two_pi, out, 0);
out = twiddles_step_32(two_pi, out, two_pi / N * 16);
out = twiddles_step_16(two_pi, out, two_pi / N * GPU_FFT_QPUS);
for (q = 0; q < GPU_FFT_QPUS; q++)
out = twiddles_base_16(two_pi, out, two_pi / N * q);
}
static void twiddles_32k(double two_pi, float* out) {
double N = 32768;
int q;
out = twiddles_base_32(two_pi, out, 0);
out = twiddles_step_32(two_pi, out, two_pi / N * 32);
out = twiddles_step_32(two_pi, out, two_pi / N * GPU_FFT_QPUS);
for (q = 0; q < GPU_FFT_QPUS; q++)
out = twiddles_base_32(two_pi, out, two_pi / N * q);
}
static void twiddles_64k(double two_pi, float* out) {
double N = 65536;
int q;
out = twiddles_base_64(two_pi, out);
out = twiddles_step_32(two_pi, out, two_pi / N * 32);
out = twiddles_step_32(two_pi, out, two_pi / N * GPU_FFT_QPUS);
for (q = 0; q < GPU_FFT_QPUS; q++)
out = twiddles_base_32(two_pi, out, two_pi / N * q);
}
static void twiddles_128k(double two_pi, float* out) {
double N = 128 * 1024;
int q;
out = twiddles_base_32(two_pi, out, 0);
out = twiddles_step_16(two_pi, out, two_pi / N * 16 * 16);
out = twiddles_step_16(two_pi, out, two_pi / N * 16);
out = twiddles_step_16(two_pi, out, two_pi / N * GPU_FFT_QPUS);
for (q = 0; q < GPU_FFT_QPUS; q++)
out = twiddles_base_16(two_pi, out, two_pi / N * q);
}
static void twiddles_256k(double two_pi, float* out) {
double N = 256 * 1024;
int q;
out = twiddles_base_32(two_pi, out, 0);
out = twiddles_step_16(two_pi, out, two_pi / N * 32 * 16);
out = twiddles_step_16(two_pi, out, two_pi / N * 32);
out = twiddles_step_32(two_pi, out, two_pi / N * GPU_FFT_QPUS);
for (q = 0; q < GPU_FFT_QPUS; q++)
out = twiddles_base_32(two_pi, out, two_pi / N * q);
}
static void twiddles_512k(double two_pi, float* out) {
double N = 512 * 1024;
int q;
out = twiddles_base_32(two_pi, out, 0);
out = twiddles_step_16(two_pi, out, two_pi / N * 32 * 32);
out = twiddles_step_32(two_pi, out, two_pi / N * 32);
out = twiddles_step_32(two_pi, out, two_pi / N * GPU_FFT_QPUS);
for (q = 0; q < GPU_FFT_QPUS; q++)
out = twiddles_base_32(two_pi, out, two_pi / N * q);
}
static void twiddles_1024k(double two_pi, float* out) {
double N = 1024 * 1024;
int q;
out = twiddles_base_32(two_pi, out, 0);
out = twiddles_step_32(two_pi, out, two_pi / N * 32 * 32);
out = twiddles_step_32(two_pi, out, two_pi / N * 32);
out = twiddles_step_32(two_pi, out, two_pi / N * GPU_FFT_QPUS);
for (q = 0; q < GPU_FFT_QPUS; q++)
out = twiddles_base_32(two_pi, out, two_pi / N * q);
}
static void twiddles_2048k(double two_pi, float* out) {
double N = 2048 * 1024;
int q;
out = twiddles_base_64(two_pi, out);
out = twiddles_step_32(two_pi, out, two_pi / N * 32 * 32);
out = twiddles_step_32(two_pi, out, two_pi / N * 32);
out = twiddles_step_32(two_pi, out, two_pi / N * GPU_FFT_QPUS);
for (q = 0; q < GPU_FFT_QPUS; q++)
out = twiddles_base_32(two_pi, out, two_pi / N * q);
}
/****************************************************************************/
static struct {
int passes, shared, unique;
void (*twiddles)(double, float*);
} shaders[] = {{2, 2, 1, twiddles_256}, {2, 3, 1, twiddles_512}, {2, 4, 2, twiddles_1k}, {2, 6, 2, twiddles_2k}, {3, 3, 1, twiddles_4k}, {3, 4, 1, twiddles_8k}, {3, 5, 1, twiddles_16k},
{3, 6, 2, twiddles_32k}, {3, 8, 2, twiddles_64k}, {4, 5, 1, twiddles_128k}, {4, 6, 2, twiddles_256k}, {4, 7, 2, twiddles_512k}, {4, 8, 2, twiddles_1024k}, {4, 10, 2, twiddles_2048k}};
int gpu_fft_twiddle_size(int log2_N, int* shared, int* unique, int* passes) {
if (log2_N < 8 || log2_N > 21)
return -1;
*shared = shaders[log2_N - 8].shared;
*unique = shaders[log2_N - 8].unique;
*passes = shaders[log2_N - 8].passes;
return 0;
}
void gpu_fft_twiddle_data(int log2_N, int direction, float* out) {
shaders[log2_N - 8].twiddles((direction == GPU_FFT_FWD ? -2 : 2) * GPU_FFT_PI, out);
}
================================================
FILE: src/hello_fft/hex/shader_1024k.hex
================================================
0x00000014, 0xe0021227, // mov rb_STAGES, STAGES
0x00000010, 0xe00216e7, // mov rb_0x10, 0x10
0x00000040, 0xe0021727, // mov rb_0x40, 0x40
0x00000080, 0xe0021767, // mov rb_0x80, 0x80
0x000000f0, 0xe00217a7, // mov rb_0xF0, 0xF0
0x00000100, 0xe00217e7, // mov rb_0x100, 0x100
0x55555555, 0xe0020767, // mov rx_0x55555555, 0x55555555
0x33333333, 0xe00207a7, // mov rx_0x33333333, 0x33333333
0x0f0f0f0f, 0xe00207e7, // mov rx_0x0F0F0F0F, 0x0F0F0F0F
0x00ff00ff, 0xe0021027, // mov rx_0x00FF00FF, 0x00FF00FF
0x0000ffff, 0xe00216a7, // mov rx_0x0000FFFF, 0x0000FFFF
0x80904000, 0xe0020727, // mov ra_vdw_32, vdw_setup_0(1, 16, dma_h32( 0,0))
0x80905000, 0xe0021067, // mov rb_vdw_32, vdw_setup_0(1, 16, dma_h32(32,0))
0x15827d80, 0x10020827, // mov r0, addr
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100202e7, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020327, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020367, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100203a7, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100203e7, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020427, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020467, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100204a7, // mov ra_tw_re+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100212e7, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021327, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021367, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100213a7, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100213e7, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021427, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021467, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100214a7, // mov rb_tw_im+off+i, r4
0x15827d80, 0x10020827, // mov r0, addr
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100204e7, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020527, // mov ra_tw_re+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100214e7, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021527, // mov rb_tw_im+off+i, r4
0x15827d80, 0x10021167, // mov rb_inst, unif
0x00101200, 0xe0020827, // mov r0, vpm_setup(1, 1, v32( 0,0))
0x00000010, 0xe0020867, // mov r1, vpm_setup(1, 1, v32(16,0)) - vpm_setup(1, 1, v32(0,0))
0x00000002, 0xe00208a7, // mov r2, vpm_setup(1, 1, v32( 0,2)) - vpm_setup(1, 1, v32(0,0))
0x409c5017, 0x100049e2, // nop; mul24 r2, r2, in_inst
0xcc9e7081, 0x100246a0, // add out_0, r0, r2; v8adds r0, r0, r1
0xcc9e7081, 0x100246e0, // add out_1, r0, r2; v8adds r0, r0, r1
0xcc9e7081, 0x100250a0, // add out_2, r0, r2; v8adds r0, r0, r1
0x0c9e7080, 0x100211e7, // add out_3, r0, r2
0x000002e8, 0xf0f802a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x156a7d80, 0x10021c67, // mov vw_setup, ra_vpm_lo
0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0
0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1
0x156e7d80, 0x10021c67, // mov vw_setup, ra_vpm_hi
0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0
0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1
0x159f2fc0, 0x100009e7, // mov -, vw_wait
0x00000019, 0xe80009e7, // mov -, sacq(i+9)
0x00000001, 0xe80009e7, // mov -, srel(i+1)
0x0000001a, 0xe80009e7, // mov -, sacq(i+9)
0x00000002, 0xe80009e7, // mov -, srel(i+1)
0x0000001b, 0xe80009e7, // mov -, sacq(i+9)
0x00000003, 0xe80009e7, // mov -, srel(i+1)
0x0000001c, 0xe80009e7, // mov -, sacq(i+9)
0x00000004, 0xe80009e7, // mov -, srel(i+1)
0x0000001d, 0xe80009e7, // mov -, sacq(i+9)
0x00000005, 0xe80009e7, // mov -, srel(i+1)
0x0000001e, 0xe80009e7, // mov -, sacq(i+9)
0x00000006, 0xe80009e7, // mov -, srel(i+1)
0x0000001f, 0xe80009e7, // mov -, sacq(i+9)
0x00000007, 0xe80009e7, // mov -, srel(i+1)
0x15727d80, 0x10020827, // mov r0, ra_vdw_32
0x8c05cdf6, 0x10024061, // add ra_save_ptr, ra_save_ptr, rb_0x40; mov r1, ra_save_ptr
0x00000080, 0xe00208a7, // mov r2, vdw_setup_0(1, 16, dma_h32(1,0)) - vdw_setup_0(1, 16, dma_h32(0,0))
0x00040000, 0xe00208e7, // mov r3, PASS32_STRIDE
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x00000050, 0xf0f812a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x156a7d80, 0x10021c67, // mov vw_setup, ra_vpm_lo
0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0
0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1
0x156e7d80, 0x10021c67, // mov vw_setup, ra_vpm_hi
0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0
0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1
0x00000000, 0xf0f4c9e7, // bra -, ra_sync
0x009e7000, 0x100009e7, // nop
0x156a7d80, 0x10020c67, // mov vr_setup, ra_vpm_lo
0x15c27d80, 0x100009e7, // mov -, vpm
0x00000080, 0xf0f801a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x159f2fc0, 0x100009e7, // mov -, vw_wait
0x00000019, 0xe80009e7, // mov -, sacq(i+9)
0x00000001, 0xe80009e7, // mov -, srel(i+1)
0x0000001a, 0xe80009e7, // mov -, sacq(i+9)
0x00000002, 0xe80009e7, // mov -, srel(i+1)
0x0000001b, 0xe80009e7, // mov -, sacq(i+9)
0x00000003, 0xe80009e7, // mov -, srel(i+1)
0x0000001c, 0xe80009e7, // mov -, sacq(i+9)
0x00000004, 0xe80009e7, // mov -, srel(i+1)
0x0000001d, 0xe80009e7, // mov -, sacq(i+9)
0x00000005, 0xe80009e7, // mov -, srel(i+1)
0x0000001e, 0xe80009e7, // mov -, sacq(i+9)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x00000006, 0xe80009e7, // mov -, srel(i+1)
0x0000001f, 0xe80009e7, // mov -, sacq(i+9)
0x00000007, 0xe80009e7, // mov -, srel(i+1)
0x000005d8, 0xf0f811a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x00000009, 0xe80009e7, // mov -, srel(i+9)
0x00000011, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000a, 0xe80009e7, // mov -, srel(i+9)
0x00000012, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000b, 0xe80009e7, // mov -, srel(i+9)
0x00000013, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000c, 0xe80009e7, // mov -, srel(i+9)
0x00000014, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000d, 0xe80009e7, // mov -, srel(i+9)
0x00000015, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000e, 0xe80009e7, // mov -, srel(i+9)
0x00000016, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000f, 0xe80009e7, // mov -, srel(i+9)
0x00000017, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0xfffffd78, 0xf0f80027, // brr ra_link_0, call
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1
0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x14767180, 0x10020867, // and r1, r0, mask
0x0e9c11c0, 0xd0020827, // shr r0, r0, shift
0x14767180, 0x10020827, // and r0, r0, mask
0x119c13c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147a7180, 0x10020867, // and r1, r0, mask
0x0e9c21c0, 0xd0020827, // shr r0, r0, shift
0x147a7180, 0x10020827, // and r0, r0, mask
0x119c23c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147e7180, 0x10020867, // and r1, r0, mask
0x0e9c41c0, 0xd0020827, // shr r0, r0, shift
0x147e7180, 0x10020827, // and r0, r0, mask
0x119c43c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x149c01c0, 0x10020867, // and r1, r0, mask
0x0e9c81c0, 0xd0020827, // shr r0, r0, shift
0x149c01c0, 0x10020827, // and r0, r0, mask
0x119c83c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x149da1c0, 0x10020867, // and r1, r0, mask
0x0e9db1c0, 0x10020827, // shr r0, r0, shift
0x149da1c0, 0x10020827, // and r0, r0, mask
0x119db3c0, 0x10020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x0e9c91c0, 0xd0020827, // shr r0, r0, 32-STAGES-3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0xfffffc30, 0xf0f80027, // brr ra_link_0, call
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x20567006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE
0x209d500f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE
0x2056700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE
0x22095c87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE
0x019e72c0, 0x10020867, // fadd r1, r1, r3
0x00000000, 0xf0f549e7, // bra -, ra_save_32
0x95682ff6, 0x10024682, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo
0x956c7ff6, 0x100246c7, // mov ra_vpm_hi, rb_vpm_hi; mov rb_vpm_hi, ra_vpm_hi
0x95701ff6, 0x10024701, // mov ra_vdw_32, rb_vdw_32; mov rb_vdw_32, ra_vdw_32
0x8c15edf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffba0, 0xf0f80027, // brr ra_link_0, call
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1
0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffb50, 0xf0f80027, // brr ra_link_0, call
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0x20567006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE
0x209d500f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE
0x2056700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE
0x22095c87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE
0x019e72c0, 0x10020867, // fadd r1, r1, r3
0x00000000, 0xf0f549e7, // bra -, ra_save_32
0x95682ff6, 0x10024682, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo
0x956c7ff6, 0x100246c7, // mov ra_vpm_hi, rb_vpm_hi; mov rb_vpm_hi, ra_vpm_hi
0x95701ff6, 0x10024701, // mov ra_vdw_32, rb_vdw_32; mov rb_vdw_32, ra_vdw_32
0x159c5fc0, 0x10022827, // mov.setf r0, rb_inst
0x0d9c11c0, 0xd0020827, // sub r0, r0, 1
0x119c51c0, 0xd0020827, // shl r0, r0, 5
0x0c9c6e00, 0x100601a7, // add.ifnz ra_sync, rx_sync_slave, r0
0x159cafc0, 0x100602a7, // mov.ifnz ra_save_32, rx_save_slave_32
0x15827d80, 0x100220e7, // mov.setf ra_addr_x, unif
0x15827d80, 0x100210e7, // mov rb_addr_y, unif
0x000008d0, 0xf00809e7, // brr.allz -, r:end
0x952cbdbf, 0x10024555, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32
0x9530cdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c662, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d663, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c622, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d623, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c5e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d5e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c51c0, 0xd0020827, // shl r0, r0, m
0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num
0x00000000, 0xe00201e7, // mov ra_points, 0
0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y
0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x14767180, 0x10020867, // and r1, r0, mask
0x0e9c11c0, 0xd0020827, // shr r0, r0, shift
0x14767180, 0x10020827, // and r0, r0, mask
0x119c13c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147a7180, 0x10020867, // and r1, r0, mask
0x0e9c21c0, 0xd0020827, // shr r0, r0, shift
0x147a7180, 0x10020827, // and r0, r0, mask
0x119c23c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147e7180, 0x10020867, // and r1, r0, mask
0x0e9c41c0, 0xd0020827, // shr r0, r0, shift
0x147e7180, 0x10020827, // and r0, r0, mask
0x119c43c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x149c01c0, 0x10020867, // and r1, r0, mask
0x0e9c81c0, 0xd0020827, // shr r0, r0, shift
0x149c01c0, 0x10020827, // and r0, r0, mask
0x119c83c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x149da1c0, 0x10020867, // and r1, r0, mask
0x0e9db1c0, 0x10020827, // shr r0, r0, shift
0x149da1c0, 0x10020827, // and r0, r0, mask
0x119db3c0, 0x10020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x0e9c91c0, 0xd0020827, // shr r0, r0, 32-STAGES-3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffa50, 0xf0f80227, // brr ra_link_1, r:pass_1
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100
0x0e1c8dc0, 0x100229e7, // shr.setf -, ra_points, rb_STAGES
0xfffffa28, 0xf00809e7, // brr.allz -, r:pass_1
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100
0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync
0x009e7000, 0x100009e7, // nop
0x009e7000, 0xa00009e7, // ldtmu0
0x009e7000, 0xa00009e7, // ldtmu0
0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y
0x952cbdbf, 0x10024555, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32
0x9530cdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c662, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d663, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c622, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d623, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c5e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d5e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c51c0, 0xd0020827, // shl r0, r0, m
0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num
0x00000000, 0xe00201e7, // mov ra_points, 0
0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y
0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffbf0, 0xf0f80227, // brr ra_link_1, r:pass_2
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100
0x00007fff, 0xe0020827, // mov r0, 0x7FFF
0x141e7c00, 0x100229e7, // and.setf -, ra_points, r0
0xfffffbc0, 0xf01809e7, // brr.allnz -, r:pass_2
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dfdc0, 0x100601e7, // add.ifnz ra_points, ra_points, rb_0x100
0x95555dbf, 0x100248a3, // mov r2, ra_tw_re+TW32_ACTIVE; mov r3, rb_tw_im+TW32_ACTIVE
0x20367016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw32
0x209cd017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw32
0x209cd01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw32
0x2136709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw32
0x029e7640, 0x100208e7, // fsub r3, r3, r1
0x02567c80, 0x10020567, // fsub ra_tw_re+TW32_ACTIVE, ra_tw_re+TW32_ACTIVE, r2
0x029d5ec0, 0x10021567, // fsub rb_tw_im+TW32_ACTIVE, rb_tw_im+TW32_ACTIVE, r3
0x95659dbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3
0x203a7016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16
0x209ce017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16
0x209ce01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16
0x213a709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16
0x029e7640, 0x100208e7, // fsub r3, r3, r1
0x02667c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2
0x029d9ec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c662, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d663, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c622, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d623, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c5e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d5e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x0e1c8dc0, 0x100229e7, // shr.setf -, ra_points, rb_STAGES
0xfffffab8, 0xf00809e7, // brr.allz -, r:pass_2
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100
0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync
0x009e7000, 0x100009e7, // nop
0x009e7000, 0xa00009e7, // ldtmu0
0x009e7000, 0xa00009e7, // ldtmu0
0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y
0x952cbdbf, 0x10024555, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32
0x9530cdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c662, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d663, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c622, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d623, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c5e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d5e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c51c0, 0xd0020827, // shl r0, r0, m
0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num
0x00000000, 0xe00201e7, // mov ra_points, 0
0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y
0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffff9b0, 0xf0f80227, // brr ra_link_1, r:pass_3
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100
0xfffff990, 0xf0f80227, // brr ra_link_1, r:pass_3
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100
0xfffff970, 0xf0f80227, // brr ra_link_1, r:pass_3
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100
0xfffff950, 0xf0f80227, // brr ra_link_1, r:pass_3
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100
0x95555dbf, 0x100248a3, // mov r2, ra_tw_re+TW32_ACTIVE; mov r3, rb_tw_im+TW32_ACTIVE
0x203e7016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw32
0x209cf017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw32
0x209cf01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw32
0x213e709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw32
0x029e7640, 0x100208e7, // fsub r3, r3, r1
0x02567c80, 0x10020567, // fsub ra_tw_re+TW32_ACTIVE, ra_tw_re+TW32_ACTIVE, r2
0x029d5ec0, 0x10021567, // fsub rb_tw_im+TW32_ACTIVE, rb_tw_im+TW32_ACTIVE, r3
0x95659dbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3
0x20427016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16
0x209d0017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16
0x209d001f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16
0x2142709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16
0x029e7640, 0x100208e7, // fsub r3, r3, r1
0x02667c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2
0x029d9ec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c662, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d663, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c622, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d623, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c5e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d5e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x0e1c8dc0, 0x100229e7, // shr.setf -, ra_points, rb_STAGES
0xfffff848, 0xf00809e7, // brr.allz -, r:pass_3
0x00000060, 0xe0020827, // mov r0, 3*4*8
0x0d227c00, 0x10020227, // sub ra_link_1, ra_link_1, r0
0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100
0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync
0x009e7000, 0x100009e7, // nop
0x009e7000, 0xa00009e7, // ldtmu0
0x009e7000, 0xa00009e7, // ldtmu0
0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y
0x954d3dbf, 0x10024555, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32
0x95514dbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c662, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d663, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c622, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d623, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c5e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d5e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c51c0, 0xd0020827, // shl r0, r0, m
0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num
0x00000000, 0xe00201e7, // mov ra_points, 0
0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y
0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffff740, 0xf0f80227, // brr ra_link_1, r:pass_4
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100
0x95555dbf, 0x100248a3, // mov r2, ra_tw_re+TW32_ACTIVE; mov r3, rb_tw_im+TW32_ACTIVE
0x20467016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw32
0x209d1017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw32
0x209d101f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw32
0x2146709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw32
0x029e7640, 0x100208e7, // fsub r3, r3, r1
0x02567c80, 0x10020567, // fsub ra_tw_re+TW32_ACTIVE, ra_tw_re+TW32_ACTIVE, r2
0x029d5ec0, 0x10021567, // fsub rb_tw_im+TW32_ACTIVE, rb_tw_im+TW32_ACTIVE, r3
0x95659dbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3
0x204a7016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16
0x209d2017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16
0x209d201f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16
0x214a709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16
0x029e7640, 0x100208e7, // fsub r3, r3, r1
0x02667c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2
0x029d9ec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c662, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d663, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c622, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d623, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c5e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d5e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x0e1c8dc0, 0x100229e7, // shr.setf -, ra_points, rb_STAGES
0xfffff638, 0xf00809e7, // brr.allz -, r:pass_4
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100
0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync
0x009e7000, 0x100009e7, // nop
0x009e7000, 0xa00009e7, // ldtmu0
0x009e7000, 0xa00009e7, // ldtmu0
0xfffff700, 0xf0f809e7, // brr -, r:loop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x159c3fc0, 0x100209a7, // mov interrupt, flag
0x009e7000, 0x300009e7, // nop; nop; thrend
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
================================================
FILE: src/hello_fft/hex/shader_128k.hex
================================================
0x00000011, 0xe0021227, // mov rb_STAGES, STAGES
0x00000010, 0xe00216a7, // mov rb_0x10, 0x10
0x00000040, 0xe00216e7, // mov rb_0x40, 0x40
0x00000080, 0xe0021727, // mov rb_0x80, 0x80
0x000000f0, 0xe0021767, // mov rb_0xF0, 0xF0
0x00000100, 0xe00217a7, // mov rb_0x100, 0x100
0x00000fff, 0xe00217e7, // mov rb_0xFFF, 0xFFF
0x55555555, 0xe0020767, // mov rx_0x55555555, 0x55555555
0x33333333, 0xe00207a7, // mov rx_0x33333333, 0x33333333
0x0f0f0f0f, 0xe00207e7, // mov rx_0x0F0F0F0F, 0x0F0F0F0F
0x00ff00ff, 0xe0021627, // mov rx_0x00FF00FF, 0x00FF00FF
0x0000ffff, 0xe0021667, // mov rx_0x0000FFFF, 0x0000FFFF
0x88104000, 0xe00206e7, // mov ra_vdw_16, vdw_setup_0(16, 16, dma_h32( 0,0))
0x88105000, 0xe0021027, // mov rb_vdw_16, vdw_setup_0(16, 16, dma_h32(32,0))
0x90104000, 0xe0020727, // mov ra_vdw_32, vdw_setup_0(32, 16, dma_h32( 0,0))
0x90105000, 0xe0021067, // mov rb_vdw_32, vdw_setup_0(32, 16, dma_h32(32,0))
0x15827d80, 0x10020827, // mov r0, addr
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dc1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100202e7, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dc1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020327, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dc1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020367, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dc1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100203a7, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dc1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100203e7, // mov ra_tw_re+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dc3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100212e7, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dc3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021327, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dc3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021367, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dc3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100213a7, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dc3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100213e7, // mov rb_tw_im+off+i, r4
0x15827d80, 0x10020827, // mov r0, addr
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dc1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020427, // mov ra_tw_re+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dc3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021427, // mov rb_tw_im+off+i, r4
0x15827d80, 0x10021167, // mov rb_inst, unif
0x00101200, 0xe0020827, // mov r0, vpm_setup(1, 1, v32( 0,0))
0x00000010, 0xe0020867, // mov r1, vpm_setup(1, 1, v32(16,0)) - vpm_setup(1, 1, v32(0,0))
0x00000002, 0xe00208a7, // mov r2, vpm_setup(1, 1, v32( 0,2)) - vpm_setup(1, 1, v32(0,0))
0x409c5017, 0x100049e2, // nop; mul24 r2, r2, in_inst
0xcc9e7081, 0x10024660, // add out_0, r0, r2; v8adds r0, r0, r1
0xcc9e7081, 0x100246a0, // add out_1, r0, r2; v8adds r0, r0, r1
0xcc9e7081, 0x100250a0, // add out_2, r0, r2; v8adds r0, r0, r1
0x0c9e7080, 0x100211e7, // add out_3, r0, r2
0x000000b0, 0xf0f80127, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x15667d80, 0x10021c67, // mov vw_setup, arg
0x159e7000, 0x10020c27, // mov vpm, r0
0x159e7240, 0x10020c27, // mov vpm, r1
0x159f2fc0, 0x100009e7, // mov -, vw_wait
0x00000019, 0xe80009e7, // mov -, sacq(i+9)
0x00000001, 0xe80009e7, // mov -, srel(i+1)
0x0000001a, 0xe80009e7, // mov -, sacq(i+9)
0x00000002, 0xe80009e7, // mov -, srel(i+1)
0x0000001b, 0xe80009e7, // mov -, sacq(i+9)
0x00000003, 0xe80009e7, // mov -, srel(i+1)
0x0000001c, 0xe80009e7, // mov -, sacq(i+9)
0x00000004, 0xe80009e7, // mov -, srel(i+1)
0x0000001d, 0xe80009e7, // mov -, sacq(i+9)
0x00000005, 0xe80009e7, // mov -, srel(i+1)
0x0000001e, 0xe80009e7, // mov -, sacq(i+9)
0x00000006, 0xe80009e7, // mov -, srel(i+1)
0x0000001f, 0xe80009e7, // mov -, sacq(i+9)
0x00000007, 0xe80009e7, // mov -, srel(i+1)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x156e7d80, 0x10021c67, // mov vw_setup, arg_vdw
0xc000ffc0, 0xe0021c67, // mov vw_setup, vdw_setup_1(0) + PASS16_STRIDE-16*4
0x8c05bdf6, 0x10024072, // add ra_save_ptr, ra_save_ptr, rb_0x40; mov vw_addr, ra_save_ptr
0x00000038, 0xf0f81127, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x15667d80, 0x10021c67, // mov vw_setup, arg
0x159e7000, 0x10020c27, // mov vpm, r0
0x159e7240, 0x10020c27, // mov vpm, r1
0x00000000, 0xf0f4c9e7, // bra -, ra_sync
0x009e7000, 0x100009e7, // nop
0x15667d80, 0x10020c67, // mov vr_setup, arg_vpm
0x15c27d80, 0x100009e7, // mov -, vpm
0x000000c8, 0xf0f802a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x15667d80, 0x10021c67, // mov vw_setup, ra_vpm_lo
0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0
0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1
0x156a7d80, 0x10021c67, // mov vw_setup, ra_vpm_hi
0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0
0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1
0x159f2fc0, 0x100009e7, // mov -, vw_wait
0x00000019, 0xe80009e7, // mov -, sacq(i+9)
0x00000001, 0xe80009e7, // mov -, srel(i+1)
0x0000001a, 0xe80009e7, // mov -, sacq(i+9)
0x00000002, 0xe80009e7, // mov -, srel(i+1)
0x0000001b, 0xe80009e7, // mov -, sacq(i+9)
0x00000003, 0xe80009e7, // mov -, srel(i+1)
0x0000001c, 0xe80009e7, // mov -, sacq(i+9)
0x00000004, 0xe80009e7, // mov -, srel(i+1)
0x0000001d, 0xe80009e7, // mov -, sacq(i+9)
0x00000005, 0xe80009e7, // mov -, srel(i+1)
0x0000001e, 0xe80009e7, // mov -, sacq(i+9)
0x00000006, 0xe80009e7, // mov -, srel(i+1)
0x0000001f, 0xe80009e7, // mov -, sacq(i+9)
0x00000007, 0xe80009e7, // mov -, srel(i+1)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x15727d80, 0x10021c67, // mov vw_setup, ra_vdw_32
0xc0007fc0, 0xe0021c67, // mov vw_setup, vdw_setup_1(0) + PASS32_STRIDE-16*4
0x8c05bdf6, 0x10024072, // add ra_save_ptr, ra_save_ptr, rb_0x40; mov vw_addr, ra_save_ptr
0x00000050, 0xf0f812a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x15667d80, 0x10021c67, // mov vw_setup, ra_vpm_lo
0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0
0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1
0x156a7d80, 0x10021c67, // mov vw_setup, ra_vpm_hi
0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0
0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1
0x00000000, 0xf0f4c9e7, // bra -, ra_sync
0x009e7000, 0x100009e7, // nop
0x15667d80, 0x10020c67, // mov vr_setup, ra_vpm_lo
0x15c27d80, 0x100009e7, // mov -, vpm
0x00000080, 0xf0f801a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x159f2fc0, 0x100009e7, // mov -, vw_wait
0x00000019, 0xe80009e7, // mov -, sacq(i+9)
0x00000001, 0xe80009e7, // mov -, srel(i+1)
0x0000001a, 0xe80009e7, // mov -, sacq(i+9)
0x00000002, 0xe80009e7, // mov -, srel(i+1)
0x0000001b, 0xe80009e7, // mov -, sacq(i+9)
0x00000003, 0xe80009e7, // mov -, srel(i+1)
0x0000001c, 0xe80009e7, // mov -, sacq(i+9)
0x00000004, 0xe80009e7, // mov -, srel(i+1)
0x0000001d, 0xe80009e7, // mov -, sacq(i+9)
0x00000005, 0xe80009e7, // mov -, srel(i+1)
0x0000001e, 0xe80009e7, // mov -, sacq(i+9)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x00000006, 0xe80009e7, // mov -, srel(i+1)
0x0000001f, 0xe80009e7, // mov -, sacq(i+9)
0x00000007, 0xe80009e7, // mov -, srel(i+1)
0x00000560, 0xf0f811a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x00000009, 0xe80009e7, // mov -, srel(i+9)
0x00000011, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000a, 0xe80009e7, // mov -, srel(i+9)
0x00000012, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000b, 0xe80009e7, // mov -, srel(i+9)
0x00000013, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000c, 0xe80009e7, // mov -, srel(i+9)
0x00000014, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000d, 0xe80009e7, // mov -, srel(i+9)
0x00000015, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000e, 0xe80009e7, // mov -, srel(i+9)
0x00000016, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000f, 0xe80009e7, // mov -, srel(i+9)
0x00000017, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0xfffffd78, 0xf0f80027, // brr ra_link_0, call
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1
0x8c15adf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x14767180, 0x10020867, // and r1, r0, mask
0x0e9c11c0, 0xd0020827, // shr r0, r0, shift
0x14767180, 0x10020827, // and r0, r0, mask
0x119c13c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147a7180, 0x10020867, // and r1, r0, mask
0x0e9c21c0, 0xd0020827, // shr r0, r0, shift
0x147a7180, 0x10020827, // and r0, r0, mask
0x119c23c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147e7180, 0x10020867, // and r1, r0, mask
0x0e9c41c0, 0xd0020827, // shr r0, r0, shift
0x147e7180, 0x10020827, // and r0, r0, mask
0x119c43c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x149d81c0, 0x10020867, // and r1, r0, mask
0x0e9c81c0, 0xd0020827, // shr r0, r0, shift
0x149d81c0, 0x10020827, // and r0, r0, mask
0x119c83c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x149d91c0, 0x10020867, // and r1, r0, mask
0x0e9da1c0, 0x10020827, // shr r0, r0, shift
0x149d91c0, 0x10020827, // and r0, r0, mask
0x119da3c0, 0x10020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x0e9cc1c0, 0xd0020827, // shr r0, r0, 32-STAGES-3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0xfffffc30, 0xf0f80027, // brr ra_link_0, call
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x20467006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE
0x209d100f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE
0x2046700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE
0x22091c87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE
0x019e72c0, 0x10020867, // fadd r1, r1, r3
0x00000000, 0xf0f549e7, // bra -, ra_save_32
0x95642ff6, 0x10024642, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo
0x95687ff6, 0x10024687, // mov ra_vpm_hi, rb_vpm_hi; mov rb_vpm_hi, ra_vpm_hi
0x95701ff6, 0x10024701, // mov ra_vdw_32, rb_vdw_32; mov rb_vdw_32, ra_vdw_32
0x8c15cdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffba0, 0xf0f80027, // brr ra_link_0, call
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0x00000000, 0xf0f489e7, // bra -, ra_save_16
0x009e7000, 0x100009e7, // nop
0x95642ff6, 0x10024642, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo
0x956c0ff6, 0x100246c0, // mov ra_vdw_16, rb_vdw_16; mov rb_vdw_16, ra_vdw_16
0x159c5fc0, 0x10022827, // mov.setf r0, rb_inst
0x0d9c11c0, 0xd0020827, // sub r0, r0, 1
0x119c51c0, 0xd0020827, // shl r0, r0, 5
0x0c9c6e00, 0x100601a7, // add.ifnz ra_sync, rx_sync_slave, r0
0x159c4fc0, 0x10060127, // mov.ifnz ra_save_16, rx_save_slave_16
0x159cafc0, 0x100602a7, // mov.ifnz ra_save_32, rx_save_slave_32
0x15827d80, 0x100220e7, // mov.setf ra_addr_x, unif
0x15827d80, 0x100210e7, // mov rb_addr_y, unif
0x000007b0, 0xf00809e7, // brr.allz -, r:end
0x952cbdbf, 0x10024451, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32
0x9530cdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c51c0, 0xd0020827, // shl r0, r0, m
0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num
0x00000000, 0xe00201e7, // mov ra_points, 0
0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y
0x8c15adf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x14767180, 0x10020867, // and r1, r0, mask
0x0e9c11c0, 0xd0020827, // shr r0, r0, shift
0x14767180, 0x10020827, // and r0, r0, mask
0x119c13c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147a7180, 0x10020867, // and r1, r0, mask
0x0e9c21c0, 0xd0020827, // shr r0, r0, shift
0x147a7180, 0x10020827, // and r0, r0, mask
0x119c23c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147e7180, 0x10020867, // and r1, r0, mask
0x0e9c41c0, 0xd0020827, // shr r0, r0, shift
0x147e7180, 0x10020827, // and r0, r0, mask
0x119c43c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x149d81c0, 0x10020867, // and r1, r0, mask
0x0e9c81c0, 0xd0020827, // shr r0, r0, shift
0x149d81c0, 0x10020827, // and r0, r0, mask
0x119c83c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x149d91c0, 0x10020867, // and r1, r0, mask
0x0e9da1c0, 0x10020827, // shr r0, r0, shift
0x149d91c0, 0x10020827, // and r0, r0, mask
0x119da3c0, 0x10020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x0e9cc1c0, 0xd0020827, // shr r0, r0, 32-STAGES-3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffac0, 0xf0f80227, // brr ra_link_1, r:pass_1
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dedc0, 0x100201e7, // add ra_points, ra_points, rb_0x100
0x0e1c8dc0, 0x100229e7, // shr.setf -, ra_points, rb_STAGES
0xfffffa98, 0xf00809e7, // brr.allz -, r:pass_1
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dedc0, 0x100201e7, // add ra_points, ra_points, rb_0x100
0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync
0x009e7000, 0x100009e7, // nop
0x009e7000, 0xa00009e7, // ldtmu0
0x009e7000, 0xa00009e7, // ldtmu0
0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y
0x9530cdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c41c0, 0xd0020827, // shl r0, r0, m
0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num
0x00000000, 0xe00201e7, // mov ra_points, 0
0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y
0x8c15cdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffc68, 0xf0f80227, // brr ra_link_1, r:pass_2
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dcdc0, 0x100201e7, // add ra_points, ra_points, rb_0x80
0x141dfdc0, 0x100229e7, // and.setf -, ra_points, rb_0xFFF
0xfffffc40, 0xf01809e7, // brr.allnz -, r:pass_2
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dcdc0, 0x100601e7, // add.ifnz ra_points, ra_points, rb_0x80
0x95555dbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3
0x20367016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16
0x209cd017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16
0x209cd01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16
0x2136709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16
0x029e7640, 0x100208e7, // fsub r3, r3, r1
0x02567c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2
0x029d5ec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x0e1c8dc0, 0x100229e7, // shr.setf -, ra_points, rb_STAGES
0xfffffb78, 0xf00809e7, // brr.allz -, r:pass_2
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dcdc0, 0x100201e7, // add ra_points, ra_points, rb_0x80
0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync
0x009e7000, 0x100009e7, // nop
0x009e7000, 0xa00009e7, // ldtmu0
0x009e7000, 0xa00009e7, // ldtmu0
0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y
0x9530cdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c41c0, 0xd0020827, // shl r0, r0, m
0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num
0x00000000, 0xe00201e7, // mov ra_points, 0
0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y
0x8c15cdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffa78, 0xf0f80227, // brr ra_link_1, r:pass_3
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dcdc0, 0x100201e7, // add ra_points, ra_points, rb_0x80
0xfffffa58, 0xf0f80227, // brr ra_link_1, r:pass_3
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dcdc0, 0x100201e7, // add ra_points, ra_points, rb_0x80
0x95555dbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3
0x203a7016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16
0x209ce017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16
0x209ce01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16
0x213a709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16
0x029e7640, 0x100208e7, // fsub r3, r3, r1
0x02567c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2
0x029d5ec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x0e1c8dc0, 0x100229e7, // shr.setf -, ra_points, rb_STAGES
0xfffff990, 0xf00809e7, // brr.allz -, r:pass_3
0x00000020, 0xe0020827, // mov r0, 4*8
0x0d227c00, 0x10020227, // sub ra_link_1, ra_link_1, r0
0x0c1dcdc0, 0x100201e7, // add ra_points, ra_points, rb_0x80
0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync
0x009e7000, 0x100009e7, // nop
0x009e7000, 0xa00009e7, // ldtmu0
0x009e7000, 0xa00009e7, // ldtmu0
0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y
0x95410dbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c41c0, 0xd0020827, // shl r0, r0, m
0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num
0x00000000, 0xe00201e7, // mov ra_points, 0
0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y
0x8c15cdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffff890, 0xf0f80227, // brr ra_link_1, r:pass_4
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dcdc0, 0x100201e7, // add ra_points, ra_points, rb_0x80
0x95555dbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3
0x203e7016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16
0x209cf017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16
0x209cf01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16
0x213e709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16
0x029e7640, 0x100208e7, // fsub r3, r3, r1
0x02567c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2
0x029d5ec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x0e1c8dc0, 0x100229e7, // shr.setf -, ra_points, rb_STAGES
0xfffff7c8, 0xf00809e7, // brr.allz -, r:pass_4
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dcdc0, 0x100201e7, // add ra_points, ra_points, rb_0x80
0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync
0x009e7000, 0x100009e7, // nop
0x009e7000, 0xa00009e7, // ldtmu0
0x009e7000, 0xa00009e7, // ldtmu0
0xfffff820, 0xf0f809e7, // brr -, r:loop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x159c3fc0, 0x100209a7, // mov interrupt, flag
0x009e7000, 0x300009e7, // nop; nop; thrend
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
================================================
FILE: src/hello_fft/hex/shader_16k.hex
================================================
0x00000010, 0xe00216e7, // mov rb_0x10, 0x10
0x00000040, 0xe0021727, // mov rb_0x40, 0x40
0x00000080, 0xe0021767, // mov rb_0x80, 0x80
0x000000f0, 0xe00217a7, // mov rb_0xF0, 0xF0
0x00000100, 0xe00217e7, // mov rb_0x100, 0x100
0x00005555, 0xe0020767, // mov rx_0x5555, 0x5555
0x00003333, 0xe00207a7, // mov rx_0x3333, 0x3333
0x00000f0f, 0xe00207e7, // mov rx_0x0F0F, 0x0F0F
0x000000ff, 0xe00216a7, // mov rx_0x00FF, 0x00FF
0x88104000, 0xe00206e7, // mov ra_vdw_16, vdw_setup_0(16, 16, dma_h32( 0,0))
0x88105000, 0xe0021027, // mov rb_vdw_16, vdw_setup_0(16, 16, dma_h32(32,0))
0x90104000, 0xe0020727, // mov ra_vdw_32, vdw_setup_0(32, 16, dma_h32( 0,0))
0x90105000, 0xe0021067, // mov rb_vdw_32, vdw_setup_0(32, 16, dma_h32(32,0))
0x15827d80, 0x10020827, // mov r0, addr
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100202e7, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020327, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020367, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100203a7, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100203e7, // mov ra_tw_re+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100212e7, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021327, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021367, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100213a7, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100213e7, // mov rb_tw_im+off+i, r4
0x15827d80, 0x10020827, // mov r0, addr
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020427, // mov ra_tw_re+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021427, // mov rb_tw_im+off+i, r4
0x15827d80, 0x10021167, // mov rb_inst, unif
0x00101200, 0xe0020827, // mov r0, vpm_setup(1, 1, v32( 0,0))
0x00000010, 0xe0020867, // mov r1, vpm_setup(1, 1, v32(16,0)) - vpm_setup(1, 1, v32(0,0))
0x00000002, 0xe00208a7, // mov r2, vpm_setup(1, 1, v32( 0,2)) - vpm_setup(1, 1, v32(0,0))
0x409c5017, 0x100049e2, // nop; mul24 r2, r2, in_inst
0xcc9e7081, 0x10024660, // add out_0, r0, r2; v8adds r0, r0, r1
0xcc9e7081, 0x100246a0, // add out_1, r0, r2; v8adds r0, r0, r1
0xcc9e7081, 0x100250a0, // add out_2, r0, r2; v8adds r0, r0, r1
0x0c9e7080, 0x100211e7, // add out_3, r0, r2
0x000000b0, 0xf0f80127, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x15667d80, 0x10021c67, // mov vw_setup, arg
0x159e7000, 0x10020c27, // mov vpm, r0
0x159e7240, 0x10020c27, // mov vpm, r1
0x159f2fc0, 0x100009e7, // mov -, vw_wait
0x00000019, 0xe80009e7, // mov -, sacq(i+9)
0x00000001, 0xe80009e7, // mov -, srel(i+1)
0x0000001a, 0xe80009e7, // mov -, sacq(i+9)
0x00000002, 0xe80009e7, // mov -, srel(i+1)
0x0000001b, 0xe80009e7, // mov -, sacq(i+9)
0x00000003, 0xe80009e7, // mov -, srel(i+1)
0x0000001c, 0xe80009e7, // mov -, sacq(i+9)
0x00000004, 0xe80009e7, // mov -, srel(i+1)
0x0000001d, 0xe80009e7, // mov -, sacq(i+9)
0x00000005, 0xe80009e7, // mov -, srel(i+1)
0x0000001e, 0xe80009e7, // mov -, sacq(i+9)
0x00000006, 0xe80009e7, // mov -, srel(i+1)
0x0000001f, 0xe80009e7, // mov -, sacq(i+9)
0x00000007, 0xe80009e7, // mov -, srel(i+1)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x156e7d80, 0x10021c67, // mov vw_setup, arg_vdw
0xc0001fc0, 0xe0021c67, // mov vw_setup, vdw_setup_1(0) + PASS16_STRIDE-16*4
0x8c05cdf6, 0x10024072, // add ra_save_ptr, ra_save_ptr, rb_0x40; mov vw_addr, ra_save_ptr
0x00000038, 0xf0f81127, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x15667d80, 0x10021c67, // mov vw_setup, arg
0x159e7000, 0x10020c27, // mov vpm, r0
0x159e7240, 0x10020c27, // mov vpm, r1
0x00000000, 0xf0f4c9e7, // bra -, ra_sync
0x009e7000, 0x100009e7, // nop
0x15667d80, 0x10020c67, // mov vr_setup, arg_vpm
0x15c27d80, 0x100009e7, // mov -, vpm
0x000000c8, 0xf0f802a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x15667d80, 0x10021c67, // mov vw_setup, ra_vpm_lo
0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0
0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1
0x156a7d80, 0x10021c67, // mov vw_setup, ra_vpm_hi
0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0
0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1
0x159f2fc0, 0x100009e7, // mov -, vw_wait
0x00000019, 0xe80009e7, // mov -, sacq(i+9)
0x00000001, 0xe80009e7, // mov -, srel(i+1)
0x0000001a, 0xe80009e7, // mov -, sacq(i+9)
0x00000002, 0xe80009e7, // mov -, srel(i+1)
0x0000001b, 0xe80009e7, // mov -, sacq(i+9)
0x00000003, 0xe80009e7, // mov -, srel(i+1)
0x0000001c, 0xe80009e7, // mov -, sacq(i+9)
0x00000004, 0xe80009e7, // mov -, srel(i+1)
0x0000001d, 0xe80009e7, // mov -, sacq(i+9)
0x00000005, 0xe80009e7, // mov -, srel(i+1)
0x0000001e, 0xe80009e7, // mov -, sacq(i+9)
0x00000006, 0xe80009e7, // mov -, srel(i+1)
0x0000001f, 0xe80009e7, // mov -, sacq(i+9)
0x00000007, 0xe80009e7, // mov -, srel(i+1)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x15727d80, 0x10021c67, // mov vw_setup, ra_vdw_32
0xc0000fc0, 0xe0021c67, // mov vw_setup, vdw_setup_1(0) + PASS32_STRIDE-16*4
0x8c05cdf6, 0x10024072, // add ra_save_ptr, ra_save_ptr, rb_0x40; mov vw_addr, ra_save_ptr
0x00000050, 0xf0f812a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x15667d80, 0x10021c67, // mov vw_setup, ra_vpm_lo
0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0
0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1
0x156a7d80, 0x10021c67, // mov vw_setup, ra_vpm_hi
0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0
0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1
0x00000000, 0xf0f4c9e7, // bra -, ra_sync
0x009e7000, 0x100009e7, // nop
0x15667d80, 0x10020c67, // mov vr_setup, ra_vpm_lo
0x15c27d80, 0x100009e7, // mov -, vpm
0x00000080, 0xf0f801a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x159f2fc0, 0x100009e7, // mov -, vw_wait
0x00000019, 0xe80009e7, // mov -, sacq(i+9)
0x00000001, 0xe80009e7, // mov -, srel(i+1)
0x0000001a, 0xe80009e7, // mov -, sacq(i+9)
0x00000002, 0xe80009e7, // mov -, srel(i+1)
0x0000001b, 0xe80009e7, // mov -, sacq(i+9)
0x00000003, 0xe80009e7, // mov -, srel(i+1)
0x0000001c, 0xe80009e7, // mov -, sacq(i+9)
0x00000004, 0xe80009e7, // mov -, srel(i+1)
0x0000001d, 0xe80009e7, // mov -, sacq(i+9)
0x00000005, 0xe80009e7, // mov -, srel(i+1)
0x0000001e, 0xe80009e7, // mov -, sacq(i+9)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x00000006, 0xe80009e7, // mov -, srel(i+1)
0x0000001f, 0xe80009e7, // mov -, sacq(i+9)
0x00000007, 0xe80009e7, // mov -, srel(i+1)
0x000005f0, 0xf0f811a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x00000009, 0xe80009e7, // mov -, srel(i+9)
0x00000011, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000a, 0xe80009e7, // mov -, srel(i+9)
0x00000012, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000b, 0xe80009e7, // mov -, srel(i+9)
0x00000013, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000c, 0xe80009e7, // mov -, srel(i+9)
0x00000014, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000d, 0xe80009e7, // mov -, srel(i+9)
0x00000015, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000e, 0xe80009e7, // mov -, srel(i+9)
0x00000016, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000f, 0xe80009e7, // mov -, srel(i+9)
0x00000017, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0xfffffda0, 0xf0f80027, // brr ra_link_0, call
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1
0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x14767180, 0x10020867, // and r1, r0, mask
0x0e9c11c0, 0xd0020827, // shr r0, r0, shift
0x14767180, 0x10020827, // and r0, r0, mask
0x119c13c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147a7180, 0x10020867, // and r1, r0, mask
0x0e9c21c0, 0xd0020827, // shr r0, r0, shift
0x147a7180, 0x10020827, // and r0, r0, mask
0x119c23c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147e7180, 0x10020867, // and r1, r0, mask
0x0e9c41c0, 0xd0020827, // shr r0, r0, shift
0x147e7180, 0x10020827, // and r0, r0, mask
0x119c43c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x149da1c0, 0x10020867, // and r1, r0, mask
0x0e9c81c0, 0xd0020827, // shr r0, r0, shift
0x149da1c0, 0x10020827, // and r0, r0, mask
0x119c83c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x119c11c0, 0xd0020827, // shl r0, r0, STAGES-13
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0xfffffc80, 0xf0f80027, // brr ra_link_0, call
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x20467006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE
0x209d100f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE
0x2046700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE
0x22091c87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE
0x019e72c0, 0x10020867, // fadd r1, r1, r3
0x00000000, 0xf0f549e7, // bra -, ra_save_32
0x95642ff6, 0x10024642, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo
0x95687ff6, 0x10024687, // mov ra_vpm_hi, rb_vpm_hi; mov rb_vpm_hi, ra_vpm_hi
0x95701ff6, 0x10024701, // mov ra_vdw_32, rb_vdw_32; mov rb_vdw_32, ra_vdw_32
0x8c15edf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffbf0, 0xf0f80027, // brr ra_link_0, call
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1
0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffba0, 0xf0f80027, // brr ra_link_0, call
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0x20467006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE
0x209d100f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE
0x2046700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE
0x22091c87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE
0x019e72c0, 0x10020867, // fadd r1, r1, r3
0x00000000, 0xf0f549e7, // bra -, ra_save_32
0x95642ff6, 0x10024642, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo
0x95687ff6, 0x10024687, // mov ra_vpm_hi, rb_vpm_hi; mov rb_vpm_hi, ra_vpm_hi
0x95701ff6, 0x10024701, // mov ra_vdw_32, rb_vdw_32; mov rb_vdw_32, ra_vdw_32
0x8c15ddf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffb10, 0xf0f80027, // brr ra_link_0, call
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0x00000000, 0xf0f489e7, // bra -, ra_save_16
0x009e7000, 0x100009e7, // nop
0x95642ff6, 0x10024642, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo
0x956c0ff6, 0x100246c0, // mov ra_vdw_16, rb_vdw_16; mov rb_vdw_16, ra_vdw_16
0x159c5fc0, 0x10022827, // mov.setf r0, rb_inst
0x0d9c11c0, 0xd0020827, // sub r0, r0, 1
0x119c51c0, 0xd0020827, // shl r0, r0, 5
0x0c9c6e00, 0x100601a7, // add.ifnz ra_sync, rx_sync_slave, r0
0x159c4fc0, 0x10060127, // mov.ifnz ra_save_16, rx_save_slave_16
0x159cafc0, 0x100602a7, // mov.ifnz ra_save_32, rx_save_slave_32
0x15827d80, 0x100220e7, // mov.setf ra_addr_x, unif
0x15827d80, 0x100210e7, // mov rb_addr_y, unif
0x000005e0, 0xf00809e7, // brr.allz -, r:end
0x952cbdbf, 0x10024451, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32
0x9530cdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c51c0, 0xd0020827, // shl r0, r0, m
0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num
0x00000000, 0xe00201e7, // mov ra_points, 0
0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y
0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x14767180, 0x10020867, // and r1, r0, mask
0x0e9c11c0, 0xd0020827, // shr r0, r0, shift
0x14767180, 0x10020827, // and r0, r0, mask
0x119c13c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147a7180, 0x10020867, // and r1, r0, mask
0x0e9c21c0, 0xd0020827, // shr r0, r0, shift
0x147a7180, 0x10020827, // and r0, r0, mask
0x119c23c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147e7180, 0x10020867, // and r1, r0, mask
0x0e9c41c0, 0xd0020827, // shr r0, r0, shift
0x147e7180, 0x10020827, // and r0, r0, mask
0x119c43c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x149da1c0, 0x10020867, // and r1, r0, mask
0x0e9c81c0, 0xd0020827, // shr r0, r0, shift
0x149da1c0, 0x10020827, // and r0, r0, mask
0x119c83c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x119c11c0, 0xd0020827, // shl r0, r0, STAGES-13
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffa58, 0xf0f80227, // brr ra_link_1, r:pass_1
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100
0x0e1cedc0, 0xd00229e7, // shr.setf -, ra_points, STAGES
0xfffffa30, 0xf00809e7, // brr.allz -, r:pass_1
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100
0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync
0x009e7000, 0x100009e7, // nop
0x009e7000, 0xa00009e7, // ldtmu0
0x009e7000, 0xa00009e7, // ldtmu0
0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y
0x952cbdbf, 0x10024451, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32
0x9530cdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c51c0, 0xd0020827, // shl r0, r0, m
0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num
0x00000000, 0xe00201e7, // mov ra_points, 0
0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y
0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffba8, 0xf0f80227, // brr ra_link_1, r:pass_2
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100
0xfffffb88, 0xf0f80227, // brr ra_link_1, r:pass_2
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100
0x95451dbf, 0x100248a3, // mov r2, ra_tw_re+TW32_ACTIVE; mov r3, rb_tw_im+TW32_ACTIVE
0x20367016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw32
0x209cd017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw32
0x209cd01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw32
0x2136709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw32
0x029e7640, 0x100208e7, // fsub r3, r3, r1
0x02467c80, 0x10020467, // fsub ra_tw_re+TW32_ACTIVE, ra_tw_re+TW32_ACTIVE, r2
0x029d1ec0, 0x10021467, // fsub rb_tw_im+TW32_ACTIVE, rb_tw_im+TW32_ACTIVE, r3
0x95555dbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3
0x203a7016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16
0x209ce017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16
0x209ce01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16
0x213a709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16
0x029e7640, 0x100208e7, // fsub r3, r3, r1
0x02567c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2
0x029d5ec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x0e1cedc0, 0xd00229e7, // shr.setf -, ra_points, STAGES
0xfffffa80, 0xf00809e7, // brr.allz -, r:pass_2
0x00000020, 0xe0020827, // mov r0, 4*8
0x0d227c00, 0x10020227, // sub ra_link_1, ra_link_1, r0
0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100
0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync
0x009e7000, 0x100009e7, // nop
0x009e7000, 0xa00009e7, // ldtmu0
0x009e7000, 0xa00009e7, // ldtmu0
0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y
0x95410dbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c41c0, 0xd0020827, // shl r0, r0, m
0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num
0x00000000, 0xe00201e7, // mov ra_points, 0
0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y
0x8c15ddf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffa60, 0xf0f80227, // brr ra_link_1, r:pass_3
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dddc0, 0x100201e7, // add ra_points, ra_points, rb_0x80
0x95555dbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3
0x203e7016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16
0x209cf017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16
0x209cf01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16
0x213e709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16
0x029e7640, 0x100208e7, // fsub r3, r3, r1
0x02567c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2
0x029d5ec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x0e1cedc0, 0xd00229e7, // shr.setf -, ra_points, STAGES
0xfffff998, 0xf00809e7, // brr.allz -, r:pass_3
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dddc0, 0x100201e7, // add ra_points, ra_points, rb_0x80
0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync
0x009e7000, 0x100009e7, // nop
0x009e7000, 0xa00009e7, // ldtmu0
0x009e7000, 0xa00009e7, // ldtmu0
0xfffff9f0, 0xf0f809e7, // brr -, r:loop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x159c3fc0, 0x100209a7, // mov interrupt, flag
0x009e7000, 0x300009e7, // nop; nop; thrend
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
================================================
FILE: src/hello_fft/hex/shader_1k.hex
================================================
0x00000010, 0xe00216e7, // mov rb_0x10, 0x10
0x00000040, 0xe0021727, // mov rb_0x40, 0x40
0x000000f0, 0xe0021767, // mov rb_0xF0, 0xF0
0x00005555, 0xe00207a7, // mov rx_0x5555, 0x5555
0x00003333, 0xe00217a7, // mov rx_0x3333, 0x3333
0x00000f0f, 0xe00207e7, // mov rx_0x0F0F, 0x0F0F
0x000000ff, 0xe00217e7, // mov rx_0x00FF, 0x00FF
0x90104000, 0xe0020767, // mov ra_vdw_32, vdw_setup_0(32, 16, dma_h32( 0,0))
0x90105000, 0xe0021067, // mov rb_vdw_32, vdw_setup_0(32, 16, dma_h32(32,0))
0x00000080, 0xe00208e7, // mov r3, 0x80
0x15827d80, 0x10020827, // mov r0, addr
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100202a7, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100202e7, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020327, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020367, // mov ra_tw_re+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100212a7, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100212e7, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021327, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021367, // mov rb_tw_im+off+i, r4
0x15827d80, 0x10020827, // mov r0, addr
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100203a7, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100203e7, // mov ra_tw_re+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100213a7, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100213e7, // mov rb_tw_im+off+i, r4
0x15827d80, 0x10021167, // mov rb_inst, unif
0x00101200, 0xe0020827, // mov r0, vpm_setup(1, 1, v32( 0,0))
0x00000010, 0xe0020867, // mov r1, vpm_setup(1, 1, v32(16,0)) - vpm_setup(1, 1, v32(0,0))
0x00000002, 0xe00208a7, // mov r2, vpm_setup(1, 1, v32( 0,2)) - vpm_setup(1, 1, v32(0,0))
0x409c5017, 0x100049e2, // nop; mul24 r2, r2, in_inst
0xcc9e7081, 0x100246e0, // add out_0, r0, r2; v8adds r0, r0, r1
0xcc9e7081, 0x10024720, // add out_1, r0, r2; v8adds r0, r0, r1
0xcc9e7081, 0x100250a0, // add out_2, r0, r2; v8adds r0, r0, r1
0x0c9e7080, 0x100211e7, // add out_3, r0, r2
0x000000c8, 0xf0f80127, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x156e7d80, 0x10021c67, // mov vw_setup, ra_vpm_lo
0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0
0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1
0x15727d80, 0x10021c67, // mov vw_setup, ra_vpm_hi
0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0
0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1
0x159f2fc0, 0x100009e7, // mov -, vw_wait
0x00000019, 0xe80009e7, // mov -, sacq(i+9)
0x00000001, 0xe80009e7, // mov -, srel(i+1)
0x0000001a, 0xe80009e7, // mov -, sacq(i+9)
0x00000002, 0xe80009e7, // mov -, srel(i+1)
0x0000001b, 0xe80009e7, // mov -, sacq(i+9)
0x00000003, 0xe80009e7, // mov -, srel(i+1)
0x0000001c, 0xe80009e7, // mov -, sacq(i+9)
0x00000004, 0xe80009e7, // mov -, srel(i+1)
0x0000001d, 0xe80009e7, // mov -, sacq(i+9)
0x00000005, 0xe80009e7, // mov -, srel(i+1)
0x0000001e, 0xe80009e7, // mov -, sacq(i+9)
0x00000006, 0xe80009e7, // mov -, srel(i+1)
0x0000001f, 0xe80009e7, // mov -, sacq(i+9)
0x00000007, 0xe80009e7, // mov -, srel(i+1)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x15767d80, 0x10021c67, // mov vw_setup, ra_vdw_32
0xc00000c0, 0xe0021c67, // mov vw_setup, vdw_setup_1(0) + PASS32_STRIDE-16*4
0x8c05cdf6, 0x10024072, // add ra_save_ptr, ra_save_ptr, rb_0x40; mov vw_addr, ra_save_ptr
0x00000050, 0xf0f81127, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x156e7d80, 0x10021c67, // mov vw_setup, ra_vpm_lo
0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0
0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1
0x15727d80, 0x10021c67, // mov vw_setup, ra_vpm_hi
0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0
0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1
0x00000000, 0xf0f4c9e7, // bra -, ra_sync
0x009e7000, 0x100009e7, // nop
0x156e7d80, 0x10020c67, // mov vr_setup, ra_vpm_lo
0x15c27d80, 0x100009e7, // mov -, vpm
0x00000080, 0xf0f801a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x159f2fc0, 0x100009e7, // mov -, vw_wait
0x00000019, 0xe80009e7, // mov -, sacq(i+9)
0x00000001, 0xe80009e7, // mov -, srel(i+1)
0x0000001a, 0xe80009e7, // mov -, sacq(i+9)
0x00000002, 0xe80009e7, // mov -, srel(i+1)
0x0000001b, 0xe80009e7, // mov -, sacq(i+9)
0x00000003, 0xe80009e7, // mov -, srel(i+1)
0x0000001c, 0xe80009e7, // mov -, sacq(i+9)
0x00000004, 0xe80009e7, // mov -, srel(i+1)
0x0000001d, 0xe80009e7, // mov -, sacq(i+9)
0x00000005, 0xe80009e7, // mov -, srel(i+1)
0x0000001e, 0xe80009e7, // mov -, sacq(i+9)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x00000006, 0xe80009e7, // mov -, srel(i+1)
0x0000001f, 0xe80009e7, // mov -, sacq(i+9)
0x00000007, 0xe80009e7, // mov -, srel(i+1)
0x00000588, 0xf0f811a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x00000009, 0xe80009e7, // mov -, srel(i+9)
0x00000011, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000a, 0xe80009e7, // mov -, srel(i+9)
0x00000012, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000b, 0xe80009e7, // mov -, srel(i+9)
0x00000013, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000c, 0xe80009e7, // mov -, srel(i+9)
0x00000014, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000d, 0xe80009e7, // mov -, srel(i+9)
0x00000015, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000e, 0xe80009e7, // mov -, srel(i+9)
0x00000016, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000f, 0xe80009e7, // mov -, srel(i+9)
0x00000017, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0xfffffda0, 0xf0f80027, // brr ra_link_0, call
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1
0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x147a7180, 0x10020867, // and r1, r0, mask
0x0e9c11c0, 0xd0020827, // shr r0, r0, shift
0x147a7180, 0x10020827, // and r0, r0, mask
0x119c13c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x149de1c0, 0x10020867, // and r1, r0, mask
0x0e9c21c0, 0xd0020827, // shr r0, r0, shift
0x149de1c0, 0x10020827, // and r0, r0, mask
0x119c23c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147e7180, 0x10020867, // and r1, r0, mask
0x0e9c41c0, 0xd0020827, // shr r0, r0, shift
0x147e7180, 0x10020827, // and r0, r0, mask
0x119c43c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x149df1c0, 0x10020867, // and r1, r0, mask
0x0e9c81c0, 0xd0020827, // shr r0, r0, shift
0x149df1c0, 0x10020827, // and r0, r0, mask
0x119c83c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x0e9c31c0, 0xd0020827, // shr r0, r0, 13-STAGES
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0xfffffc80, 0xf0f80027, // brr ra_link_0, call
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x20427006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE
0x209d000f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE
0x2042700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE
0x22090c87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE
0x019e72c0, 0x10020867, // fadd r1, r1, r3
0x00000000, 0xf0f489e7, // bra -, ra_save_32
0x956c2ff6, 0x100246c2, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo
0x95707ff6, 0x10024707, // mov ra_vpm_hi, rb_vpm_hi; mov rb_vpm_hi, ra_vpm_hi
0x95741ff6, 0x10024741, // mov ra_vdw_32, rb_vdw_32; mov rb_vdw_32, ra_vdw_32
0x8c15ddf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffbf0, 0xf0f80027, // brr ra_link_0, call
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1
0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffba0, 0xf0f80027, // brr ra_link_0, call
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0x20427006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE
0x209d000f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE
0x2042700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE
0x22090c87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE
0x019e72c0, 0x10020867, // fadd r1, r1, r3
0x00000000, 0xf0f489e7, // bra -, ra_save_32
0x956c2ff6, 0x100246c2, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo
0x95707ff6, 0x10024707, // mov ra_vpm_hi, rb_vpm_hi; mov rb_vpm_hi, ra_vpm_hi
0x95741ff6, 0x10024741, // mov ra_vdw_32, rb_vdw_32; mov rb_vdw_32, ra_vdw_32
0x159c5fc0, 0x10022827, // mov.setf r0, rb_inst
0x0d9c11c0, 0xd0020827, // sub r0, r0, 1
0x119c51c0, 0xd0020827, // shl r0, r0, 5
0x0c9c6e00, 0x100601a7, // add.ifnz ra_sync, rx_sync_slave, r0
0x159c4fc0, 0x10060127, // mov.ifnz ra_save_32, rx_save_slave_32
0x15827d80, 0x100220e7, // mov.setf ra_addr_x, unif
0x15827d80, 0x100210e7, // mov rb_addr_y, unif
0x000003f8, 0xf00809e7, // brr.allz -, r:end
0x9528adbf, 0x10024410, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32
0x952cbdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c462, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d463, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c51c0, 0xd0020827, // shl r0, r0, m
0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num
0x00000000, 0xe00201e7, // mov ra_points, 0
0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y
0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x147a7180, 0x10020867, // and r1, r0, mask
0x0e9c11c0, 0xd0020827, // shr r0, r0, shift
0x147a7180, 0x10020827, // and r0, r0, mask
0x119c13c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x149de1c0, 0x10020867, // and r1, r0, mask
0x0e9c21c0, 0xd0020827, // shr r0, r0, shift
0x149de1c0, 0x10020827, // and r0, r0, mask
0x119c23c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147e7180, 0x10020867, // and r1, r0, mask
0x0e9c41c0, 0xd0020827, // shr r0, r0, shift
0x147e7180, 0x10020827, // and r0, r0, mask
0x119c43c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x149df1c0, 0x10020867, // and r1, r0, mask
0x0e9c81c0, 0xd0020827, // shr r0, r0, shift
0x149df1c0, 0x10020827, // and r0, r0, mask
0x119c83c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x0e9c31c0, 0xd0020827, // shr r0, r0, 13-STAGES
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffac8, 0xf0f80227, // brr ra_link_1, r:pass_1
0x009e7000, 0x100009e7, // nop
0x00000100, 0xe0020827, // mov r0, 0x100
0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0
0x0e1cadc0, 0xd00229e7, // shr.setf -, ra_points, STAGES
0xfffffaa0, 0xf00809e7, // brr.allz -, r:pass_1
0x009e7000, 0x100009e7, // nop
0x00000100, 0xe0020827, // mov r0, 0x100
0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0
0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync
0x009e7000, 0x100009e7, // nop
0x009e7000, 0xa00009e7, // ldtmu0
0x009e7000, 0xa00009e7, // ldtmu0
0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y
0x9538edbf, 0x10024410, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32
0x953cfdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c462, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d463, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c51c0, 0xd0020827, // shl r0, r0, m
0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num
0x00000000, 0xe00201e7, // mov ra_points, 0
0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y
0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffc18, 0xf0f80227, // brr ra_link_1, r:pass_2
0x009e7000, 0x100009e7, // nop
0x00000100, 0xe0020827, // mov r0, 0x100
0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0
0x95410dbf, 0x100248a3, // mov r2, ra_tw_re+TW32_ACTIVE; mov r3, rb_tw_im+TW32_ACTIVE
0x20327016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw32
0x209cc017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw32
0x209cc01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw32
0x2132709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw32
0x029e7640, 0x100208e7, // fsub r3, r3, r1
0x02427c80, 0x10020427, // fsub ra_tw_re+TW32_ACTIVE, ra_tw_re+TW32_ACTIVE, r2
0x029d0ec0, 0x10021427, // fsub rb_tw_im+TW32_ACTIVE, rb_tw_im+TW32_ACTIVE, r3
0x95514dbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3
0x20367016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16
0x209cd017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16
0x209cd01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16
0x2136709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16
0x029e7640, 0x100208e7, // fsub r3, r3, r1
0x02527c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2
0x029d4ec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c462, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d463, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x0e1cadc0, 0xd00229e7, // shr.setf -, ra_points, STAGES
0xfffffb10, 0xf00809e7, // brr.allz -, r:pass_2
0x009e7000, 0x100009e7, // nop
0x00000100, 0xe0020827, // mov r0, 0x100
0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0
0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync
0x009e7000, 0x100009e7, // nop
0x009e7000, 0xa00009e7, // ldtmu0
0x009e7000, 0xa00009e7, // ldtmu0
0xfffffbd8, 0xf0f809e7, // brr -, r:loop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x159c3fc0, 0x100209a7, // mov interrupt, flag
0x009e7000, 0x300009e7, // nop; nop; thrend
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
================================================
FILE: src/hello_fft/hex/shader_2048k.hex
================================================
0x00000010, 0xe0021227, // mov rb_0x10, 0x10
0x000001d0, 0xe0021967, // mov r5rep, 0x1D0
0x00000080, 0xe00208e7, // mov r3, 0x80
0x15827d80, 0x10020827, // mov r0, addr
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100203e7, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020427, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020467, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100204a7, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100204e7, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020527, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020567, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100205a7, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100205e7, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020627, // mov ra_tw_re+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100213e7, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021427, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021467, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100214a7, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100214e7, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021527, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021567, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100215a7, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100215e7, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021627, // mov rb_tw_im+off+i, r4
0x15827d80, 0x10020827, // mov r0, addr
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020667, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100206a7, // mov ra_tw_re+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021667, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100216a7, // mov rb_tw_im+off+i, r4
0x15827d80, 0x10021167, // mov rb_inst, unif
0x00101200, 0xe0020827, // mov r0, vpm_setup(1, 1, v32( 0,0))
0x00000010, 0xe0020867, // mov r1, vpm_setup(1, 1, v32(16,0)) - vpm_setup(1, 1, v32(0,0))
0x00000002, 0xe00208a7, // mov r2, vpm_setup(1, 1, v32( 0,2)) - vpm_setup(1, 1, v32(0,0))
0x409c5017, 0x100049e2, // nop; mul24 r2, r2, in_inst
0xcc9e7081, 0x10025020, // add out_0, r0, r2; v8adds r0, r0, r1
0xcc9e7081, 0x10025060, // add out_1, r0, r2; v8adds r0, r0, r1
0xcc9e7081, 0x100250a0, // add out_2, r0, r2; v8adds r0, r0, r1
0x0c9e7080, 0x100211e7, // add out_3, r0, r2
0x000002e8, 0xf0f80127, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x152e7d80, 0x10021c67, // mov vw_setup, ra_vpm_lo
0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0
0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1
0x15327d80, 0x10021c67, // mov vw_setup, ra_vpm_hi
0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0
0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1
0x159f2fc0, 0x100009e7, // mov -, vw_wait
0x00000019, 0xe80009e7, // mov -, sacq(i+9)
0x00000001, 0xe80009e7, // mov -, srel(i+1)
0x0000001a, 0xe80009e7, // mov -, sacq(i+9)
0x00000002, 0xe80009e7, // mov -, srel(i+1)
0x0000001b, 0xe80009e7, // mov -, sacq(i+9)
0x00000003, 0xe80009e7, // mov -, srel(i+1)
0x0000001c, 0xe80009e7, // mov -, sacq(i+9)
0x00000004, 0xe80009e7, // mov -, srel(i+1)
0x0000001d, 0xe80009e7, // mov -, sacq(i+9)
0x00000005, 0xe80009e7, // mov -, srel(i+1)
0x0000001e, 0xe80009e7, // mov -, sacq(i+9)
0x00000006, 0xe80009e7, // mov -, srel(i+1)
0x0000001f, 0xe80009e7, // mov -, sacq(i+9)
0x00000007, 0xe80009e7, // mov -, srel(i+1)
0x153a7d80, 0x10020827, // mov r0, ra_vdw_32
0x8c04ddf6, 0x10024061, // add ra_save_ptr, ra_save_ptr, rb_0x40; mov r1, ra_save_ptr
0x00000080, 0xe00208a7, // mov r2, vdw_setup_0(1, 16, dma_h32(1,0)) - vdw_setup_0(1, 16, dma_h32(0,0))
0x00080000, 0xe00208e7, // mov r3, PASS32_STRIDE
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x00000050, 0xf0f81127, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x152e7d80, 0x10021c67, // mov vw_setup, ra_vpm_lo
0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0
0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1
0x15327d80, 0x10021c67, // mov vw_setup, ra_vpm_hi
0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0
0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1
0x00000000, 0xf0f4c9e7, // bra -, ra_sync
0x009e7000, 0x100009e7, // nop
0x152e7d80, 0x10020c67, // mov vr_setup, ra_vpm_lo
0x15c27d80, 0x100009e7, // mov -, vpm
0x00000520, 0xf0f802a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x159f2fc0, 0x100009e7, // mov -, vw_wait
0x00000001, 0xe80009e7, // mov -, srel(i+1)
0x00000002, 0xe80009e7, // mov -, srel(i+1)
0x00000003, 0xe80009e7, // mov -, srel(i+1)
0x00000004, 0xe80009e7, // mov -, srel(i+1)
0x00000005, 0xe80009e7, // mov -, srel(i+1)
0x00000006, 0xe80009e7, // mov -, srel(i+1)
0x00000007, 0xe80009e7, // mov -, srel(i+1)
0x159c0fc0, 0x10021c67, // mov vw_setup, rb_vpm
0x012cbdc0, 0x10020c27, // fadd vpm, ra_64+0, rb_64+0
0x0130cdc0, 0x10020c27, // fadd vpm, ra_64+1, rb_64+1
0x159c1fc0, 0x10021c67, // mov vw_setup, rb_vpm_16
0x0134ddc0, 0x10020c27, // fadd vpm, ra_64+2, rb_64+2
0x0138edc0, 0x10020c27, // fadd vpm, ra_64+3, rb_64+3
0x159c2fc0, 0x10021c67, // mov vw_setup, rb_vpm_32
0x022cbdc0, 0x10020c27, // fsub vpm, ra_64+0, rb_64+0
0x0230cdc0, 0x10020c27, // fsub vpm, ra_64+1, rb_64+1
0x159c7fc0, 0x10021c67, // mov vw_setup, rb_vpm_48
0x0234ddc0, 0x10020c27, // fsub vpm, ra_64+2, rb_64+2
0x0238edc0, 0x10020c27, // fsub vpm, ra_64+3, rb_64+3
0x00000019, 0xe80009e7, // mov -, sacq(i+9)
0x0000001a, 0xe80009e7, // mov -, sacq(i+9)
0x0000001b, 0xe80009e7, // mov -, sacq(i+9)
0x0000001c, 0xe80009e7, // mov -, sacq(i+9)
0x0000001d, 0xe80009e7, // mov -, sacq(i+9)
0x0000001e, 0xe80009e7, // mov -, sacq(i+9)
0x0000001f, 0xe80009e7, // mov -, sacq(i+9)
0x80904000, 0xe0020827, // mov r0, vdw_setup_0(1, 16, dma_h32(0,0))
0x00000040, 0xe0020867, // mov r1, 0x40
0x8c067c76, 0x10024061, // add ra_save_ptr, ra_save_ptr, r1; mov r1, ra_save_ptr
0x00000080, 0xe00208a7, // mov r2, vdw_setup_0(1, 16, dma_h32(1,0)) - vdw_setup_0(1, 16, dma_h32(0,0))
0x00040000, 0xe00208e7, // mov r3, PASS64_STRIDE
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x000002b8, 0xf0f812a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c51c0, 0xd00200a7, // shl ra_temp, r0, 5
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0fc49e7, // brr -, ra_temp
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x000000e0, 0xf0f809e7, // brr -, r:2f
0x00000010, 0xe80009e7, // mov -, sacq(i)
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x000000c0, 0xf0f809e7, // brr -, r:2f
0x00000011, 0xe80009e7, // mov -, sacq(i)
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x000000a0, 0xf0f809e7, // brr -, r:2f
0x00000012, 0xe80009e7, // mov -, sacq(i)
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x00000080, 0xf0f809e7, // brr -, r:2f
0x00000013, 0xe80009e7, // mov -, sacq(i)
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x00000060, 0xf0f809e7, // brr -, r:2f
0x00000014, 0xe80009e7, // mov -, sacq(i)
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x00000040, 0xf0f809e7, // brr -, r:2f
0x00000015, 0xe80009e7, // mov -, sacq(i)
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x00000020, 0xf0f809e7, // brr -, r:2f
0x00000016, 0xe80009e7, // mov -, sacq(i)
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f809e7, // brr -, r:2f
0x00000017, 0xe80009e7, // mov -, sacq(i)
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x159c0fc0, 0x10021c67, // mov vw_setup, rb_vpm
0x012cbdc0, 0x10020c27, // fadd vpm, ra_64+0, rb_64+0
0x0130cdc0, 0x10020c27, // fadd vpm, ra_64+1, rb_64+1
0x159c1fc0, 0x10021c67, // mov vw_setup, rb_vpm_16
0x0134ddc0, 0x10020c27, // fadd vpm, ra_64+2, rb_64+2
0x0138edc0, 0x10020c27, // fadd vpm, ra_64+3, rb_64+3
0x159c2fc0, 0x10021c67, // mov vw_setup, rb_vpm_32
0x022cbdc0, 0x10020c27, // fsub vpm, ra_64+0, rb_64+0
0x0230cdc0, 0x10020c27, // fsub vpm, ra_64+1, rb_64+1
0x159c7fc0, 0x10021c67, // mov vw_setup, rb_vpm_48
0x0234ddc0, 0x10020c27, // fsub vpm, ra_64+2, rb_64+2
0x0238edc0, 0x10020c27, // fsub vpm, ra_64+3, rb_64+3
0x00000000, 0xf0fc49e7, // brr -, ra_temp
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm
0x15c27d80, 0x100009e7, // mov -, vpm
0x00000008, 0xe80009e7, // mov -, srel(i+8)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm
0x15c27d80, 0x100009e7, // mov -, vpm
0x00000009, 0xe80009e7, // mov -, srel(i+8)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm
0x15c27d80, 0x100009e7, // mov -, vpm
0x0000000a, 0xe80009e7, // mov -, srel(i+8)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm
0x15c27d80, 0x100009e7, // mov -, vpm
0x0000000b, 0xe80009e7, // mov -, srel(i+8)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm
0x15c27d80, 0x100009e7, // mov -, vpm
0x0000000c, 0xe80009e7, // mov -, srel(i+8)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm
0x15c27d80, 0x100009e7, // mov -, vpm
0x0000000d, 0xe80009e7, // mov -, srel(i+8)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm
0x15c27d80, 0x100009e7, // mov -, vpm
0x0000000e, 0xe80009e7, // mov -, srel(i+8)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm
0x15c27d80, 0x100009e7, // mov -, vpm
0x0000000f, 0xe80009e7, // mov -, srel(i+8)
0x00000080, 0xf0f801a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x159f2fc0, 0x100009e7, // mov -, vw_wait
0x00000019, 0xe80009e7, // mov -, sacq(i+9)
0x00000001, 0xe80009e7, // mov -, srel(i+1)
0x0000001a, 0xe80009e7, // mov -, sacq(i+9)
0x00000002, 0xe80009e7, // mov -, srel(i+1)
0x0000001b, 0xe80009e7, // mov -, sacq(i+9)
0x00000003, 0xe80009e7, // mov -, srel(i+1)
0x0000001c, 0xe80009e7, // mov -, sacq(i+9)
0x00000004, 0xe80009e7, // mov -, srel(i+1)
0x0000001d, 0xe80009e7, // mov -, sacq(i+9)
0x00000005, 0xe80009e7, // mov -, srel(i+1)
0x0000001e, 0xe80009e7, // mov -, sacq(i+9)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x00000006, 0xe80009e7, // mov -, srel(i+1)
0x0000001f, 0xe80009e7, // mov -, sacq(i+9)
0x00000007, 0xe80009e7, // mov -, srel(i+1)
0x00000998, 0xf0f811a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x00000009, 0xe80009e7, // mov -, srel(i+9)
0x00000011, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000a, 0xe80009e7, // mov -, srel(i+9)
0x00000012, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000b, 0xe80009e7, // mov -, srel(i+9)
0x00000013, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000c, 0xe80009e7, // mov -, srel(i+9)
0x00000014, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000d, 0xe80009e7, // mov -, srel(i+9)
0x00000015, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000e, 0xe80009e7, // mov -, srel(i+9)
0x00000016, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000f, 0xe80009e7, // mov -, srel(i+9)
0x00000017, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0xfffffd50, 0xf0f80027, // brr ra_link_0, call
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1
0x8c148df6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x55555555, 0xe00208a7, // mov r2, mask
0x149e7080, 0x10020867, // and r1, r0, r2
0x0e9c11c0, 0xd0020827, // shr r0, r0, shift
0x149e7080, 0x10020827, // and r0, r0, r2
0x119c13c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x33333333, 0xe00208a7, // mov r2, mask
0x149e7080, 0x10020867, // and r1, r0, r2
0x0e9c21c0, 0xd0020827, // shr r0, r0, shift
0x149e7080, 0x10020827, // and r0, r0, r2
0x119c23c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x0f0f0f0f, 0xe00208a7, // mov r2, mask
0x149e7080, 0x10020867, // and r1, r0, r2
0x0e9c41c0, 0xd0020827, // shr r0, r0, shift
0x149e7080, 0x10020827, // and r0, r0, r2
0x119c43c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x00ff00ff, 0xe00208a7, // mov r2, mask
0x149e7080, 0x10020867, // and r1, r0, r2
0x0e9c81c0, 0xd0020827, // shr r0, r0, shift
0x149e7080, 0x10020827, // and r0, r0, r2
0x119c83c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x0000ffff, 0xe00208a7, // mov r2, mask
0x149e7080, 0x10020867, // and r1, r0, r2
0x0e9c81c0, 0x10020827, // shr r0, r0, shift
0x149e7080, 0x10020827, // and r0, r0, r2
0x119c83c0, 0x10020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x0e9c81c0, 0xd0020827, // shr r0, r0, 32-STAGES-3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0xfffffbe0, 0xf0f80027, // brr ra_link_0, call
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x206e7006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE
0x209db00f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE
0x206e700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE
0x2209bc87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE
0x019e72c0, 0x10020867, // fadd r1, r1, r3
0x01267c00, 0x100202e7, // fadd ra_64+0, ra_32_re, r0
0x019c9e40, 0x10020327, // fadd ra_64+1, rb_32_im, r1
0x02267c00, 0x10020367, // fsub ra_64+2, ra_32_re, r0
0x029c9e40, 0x100203a7, // fsub ra_64+3, rb_32_im, r1
0x8c167d76, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x55555555, 0xe00208a7, // mov r2, mask
0x149e7080, 0x10020867, // and r1, r0, r2
0x0e9c11c0, 0xd0020827, // shr r0, r0, shift
0x149e7080, 0x10020827, // and r0, r0, r2
0x119c13c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x33333333, 0xe00208a7, // mov r2, mask
0x149e7080, 0x10020867, // and r1, r0, r2
0x0e9c21c0, 0xd0020827, // shr r0, r0, shift
0x149e7080, 0x10020827, // and r0, r0, r2
0x119c23c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x0f0f0f0f, 0xe00208a7, // mov r2, mask
0x149e7080, 0x10020867, // and r1, r0, r2
0x0e9c41c0, 0xd0020827, // shr r0, r0, shift
0x149e7080, 0x10020827, // and r0, r0, r2
0x119c43c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x00ff00ff, 0xe00208a7, // mov r2, mask
0x149e7080, 0x10020867, // and r1, r0, r2
0x0e9c81c0, 0xd0020827, // shr r0, r0, shift
0x149e7080, 0x10020827, // and r0, r0, r2
0x119c83c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x0000ffff, 0xe00208a7, // mov r2, mask
0x149e7080, 0x10020867, // and r1, r0, r2
0x0e9c81c0, 0x10020827, // shr r0, r0, shift
0x149e7080, 0x10020827, // and r0, r0, r2
0x119c83c0, 0x10020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x0e9c81c0, 0xd0020827, // shr r0, r0, 32-STAGES-3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0xfffffa30, 0xf0f80027, // brr ra_link_0, call
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1
0x8c148df6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x55555555, 0xe00208a7, // mov r2, mask
0x149e7080, 0x10020867, // and r1, r0, r2
0x0e9c11c0, 0xd0020827, // shr r0, r0, shift
0x149e7080, 0x10020827, // and r0, r0, r2
0x119c13c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x33333333, 0xe00208a7, // mov r2, mask
0x149e7080, 0x10020867, // and r1, r0, r2
0x0e9c21c0, 0xd0020827, // shr r0, r0, shift
0x149e7080, 0x10020827, // and r0, r0, r2
0x119c23c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x0f0f0f0f, 0xe00208a7, // mov r2, mask
0x149e7080, 0x10020867, // and r1, r0, r2
0x0e9c41c0, 0xd0020827, // shr r0, r0, shift
0x149e7080, 0x10020827, // and r0, r0, r2
0x119c43c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x00ff00ff, 0xe00208a7, // mov r2, mask
0x149e7080, 0x10020867, // and r1, r0, r2
0x0e9c81c0, 0xd0020827, // shr r0, r0, shift
0x149e7080, 0x10020827, // and r0, r0, r2
0x119c83c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x0000ffff, 0xe00208a7, // mov r2, mask
0x149e7080, 0x10020867, // and r1, r0, r2
0x0e9c81c0, 0x10020827, // shr r0, r0, shift
0x149e7080, 0x10020827, // and r0, r0, r2
0x119c83c0, 0x10020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x0e9c81c0, 0xd0020827, // shr r0, r0, 32-STAGES-3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0xfffff8c0, 0xf0f80027, // brr ra_link_0, call
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x206e7006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE
0x209db00f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE
0x206e700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE
0x2209bc87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE
0x019e72c0, 0x10020867, // fadd r1, r1, r3
0x029c9e40, 0x100208e7, // fsub r3, rb_32_im, r1
0x02267c00, 0x100208a7, // fsub r2, ra_32_re, r0
0x019c9e40, 0x10020867, // fadd r1, rb_32_im, r1
0x01267c00, 0x10020827, // fadd r0, ra_32_re, r0
0x203e700e, 0x100049c9, // nop; fmul rb_32_im, r1, ra_tw_re+TW64_P1_BASE0
0x209cf00f, 0x100059c9, // nop; fmul ra_32_re, r1, rb_tw_im+TW64_P1_BASE0
0x209cf007, 0x100049e1, // nop; fmul r1, r0, rb_tw_im+TW64_P1_BASE0
0x213c93c6, 0x10025320, // fadd rb_64+1, r1, rb_32_im; fmul r0, r0, ra_tw_re+TW64_P1_BASE0
0x2225019f, 0x100252c9, // fsub rb_64+0, r0, ra_32_re; fmul ra_32_re, r3, rb_tw_im+TW64_P1_BASE1
0x2042701e, 0x100049c9, // nop; fmul rb_32_im, r3, ra_tw_re+TW64_P1_BASE1
0x00000000, 0xf0f549e7, // bra -, ra_save_64
0x209d0017, 0x100049e3, // nop; fmul r3, r2, rb_tw_im+TW64_P1_BASE1
0x214097d6, 0x100253a2, // fadd rb_64+3, r3, rb_32_im; fmul r2, r2, ra_tw_re+TW64_P1_BASE1
0x02267580, 0x10021367, // fsub rb_64+2, r2, ra_32_re
0x8c14cdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffff7e0, 0xf0f80027, // brr ra_link_0, call
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1
0x8c148df6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffff790, 0xf0f80027, // brr ra_link_0, call
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0x206e7006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE
0x209db00f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE
0x206e700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE
0x2209bc87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE
0x019e72c0, 0x10020867, // fadd r1, r1, r3
0x00000000, 0xf0f489e7, // bra -, ra_save_32
0x952c2ff6, 0x100242c2, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo
0x95307ff6, 0x10024307, // mov ra_vpm_hi, rb_vpm_hi; mov rb_vpm_hi, ra_vpm_hi
0x9538eff6, 0x1002438e, // mov ra_vdw_32, rb_vdw_32; mov rb_vdw_32, ra_vdw_32
0x159c5fc0, 0x10022827, // mov.setf r0, rb_inst
0x0d9c11c0, 0xd0020827, // sub r0, r0, 1
0x119c51c0, 0xd0020827, // shl r0, r0, 5
0x0c9c6e00, 0x100601a7, // add.ifnz ra_sync, rx_sync_slave, r0
0x159c4fc0, 0x10060127, // mov.ifnz ra_save_32, rx_save_slave_32
0x159cafc0, 0x100602a7, // mov.ifnz ra_save_64, rx_save_slave_64
0x15827d80, 0x100220e7, // mov.setf ra_addr_x, unif
0x15827d80, 0x100210e7, // mov rb_addr_y, unif
0x00000940, 0xf00809e7, // brr.allz -, r:end
0x95451dbf, 0x100246db, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32
0x95492dbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c7e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d7e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c7a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d7a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c762, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d763, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c722, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d723, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c61c0, 0xd0020827, // shl r0, r0, m
0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num
0x00000000, 0xe00201e7, // mov ra_points, 0
0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y
0x8c148df6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x55555555, 0xe00208a7, // mov r2, mask
0x149e7080, 0x10020867, // and r1, r0, r2
0x0e9c11c0, 0xd0020827, // shr r0, r0, shift
0x149e7080, 0x10020827, // and r0, r0, r2
0x119c13c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x33333333, 0xe00208a7, // mov r2, mask
0x149e7080, 0x10020867, // and r1, r0, r2
0x0e9c21c0, 0xd0020827, // shr r0, r0, shift
0x149e7080, 0x10020827, // and r0, r0, r2
0x119c23c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x0f0f0f0f, 0xe00208a7, // mov r2, mask
0x149e7080, 0x10020867, // and r1, r0, r2
0x0e9c41c0, 0xd0020827, // shr r0, r0, shift
0x149e7080, 0x10020827, // and r0, r0, r2
0x119c43c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x00ff00ff, 0xe00208a7, // mov r2, mask
0x149e7080, 0x10020867, // and r1, r0, r2
0x0e9c81c0, 0xd0020827, // shr r0, r0, shift
0x149e7080, 0x10020827, // and r0, r0, r2
0x119c83c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x0000ffff, 0xe00208a7, // mov r2, mask
0x149e7080, 0x10020867, // and r1, r0, r2
0x0e9c81c0, 0x10020827, // shr r0, r0, shift
0x149e7080, 0x10020827, // and r0, r0, r2
0x119c83c0, 0x10020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x0e9c81c0, 0xd0020827, // shr r0, r0, 32-STAGES-3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffff660, 0xf0f80227, // brr ra_link_1, r:pass_1
0x009e7000, 0x100009e7, // nop
0x00000200, 0xe0020827, // mov r0, 0x200
0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0
0x00000015, 0xe0020867, // mov r1, STAGES
0x0e1e7c40, 0x100229e7, // shr.setf -, ra_points, r1
0xfffff630, 0xf00809e7, // brr.allz -, r:pass_1
0x009e7000, 0x100009e7, // nop
0x00000200, 0xe0020827, // mov r0, 0x200
0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0
0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync
0x009e7000, 0x100009e7, // nop
0x009e7000, 0xa00009e7, // ldtmu0
0x009e7000, 0xa00009e7, // ldtmu0
0x159c0fc0, 0x100202e7, // mov ra_vpm_lo, rb_vpm
0x159c1fc0, 0x10020327, // mov ra_vpm_hi, rb_vpm_16
0x80904000, 0xe00203a7, // mov ra_vdw_32, vdw_setup_0(1, 16, dma_h32( 0,0))
0x80905000, 0xe00213a7, // mov rb_vdw_32, vdw_setup_0(1, 16, dma_h32(32,0))
0x00000015, 0xe00212e7, // mov rb_STAGES, STAGES
0x000000f0, 0xe0021327, // mov rb_0xF0, 0xF0
0x00000040, 0xe0021367, // mov rb_0x40, 0x40
0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y
0x95451dbf, 0x100246db, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32
0x95492dbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c7e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d7e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c7a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d7a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c762, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d763, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c722, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d723, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c51c0, 0xd0020827, // shl r0, r0, m
0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num
0x00000000, 0xe00201e7, // mov ra_points, 0
0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y
0x8c148df6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffb80, 0xf0f80227, // brr ra_link_1, r:pass_2
0x009e7000, 0x100009e7, // nop
0x00000100, 0xe0020827, // mov r0, 0x100
0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0
0x00007fff, 0xe0020827, // mov r0, 0x7FFF
0x141e7c00, 0x100229e7, // and.setf -, ra_points, r0
0xfffffb50, 0xf01809e7, // brr.allnz -, r:pass_2
0x009e7000, 0x100009e7, // nop
0x00000100, 0xe0020827, // mov r0, 0x100
0x0c1e7c00, 0x100601e7, // add.ifnz ra_points, ra_points, r0
0x956dbdbf, 0x100248a3, // mov r2, ra_tw_re+TW32_ACTIVE; mov r3, rb_tw_im+TW32_ACTIVE
0x204e7016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw32
0x209d3017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw32
0x209d301f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw32
0x214e709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw32
0x029e7640, 0x100208e7, // fsub r3, r3, r1
0x026e7c80, 0x100206e7, // fsub ra_tw_re+TW32_ACTIVE, ra_tw_re+TW32_ACTIVE, r2
0x029dbec0, 0x100216e7, // fsub rb_tw_im+TW32_ACTIVE, rb_tw_im+TW32_ACTIVE, r3
0x957dfdbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3
0x20527016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16
0x209d4017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16
0x209d401f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16
0x2152709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16
0x029e7640, 0x100208e7, // fsub r3, r3, r1
0x027e7c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2
0x029dfec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c7e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d7e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c7a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d7a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c762, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d763, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c722, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d723, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x0e1cbdc0, 0x100229e7, // shr.setf -, ra_points, rb_STAGES
0xfffffa48, 0xf00809e7, // brr.allz -, r:pass_2
0x009e7000, 0x100009e7, // nop
0x00000100, 0xe0020827, // mov r0, 0x100
0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0
0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync
0x009e7000, 0x100009e7, // nop
0x009e7000, 0xa00009e7, // ldtmu0
0x009e7000, 0xa00009e7, // ldtmu0
0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y
0x95451dbf, 0x100246db, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32
0x95492dbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c7e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d7e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c7a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d7a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c762, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d763, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c722, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d723, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c51c0, 0xd0020827, // shl r0, r0, m
0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num
0x00000000, 0xe00201e7, // mov ra_points, 0
0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y
0x8c148df6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffff940, 0xf0f80227, // brr ra_link_1, r:pass_3
0x009e7000, 0x100009e7, // nop
0x00000100, 0xe0020827, // mov r0, 0x100
0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0
0xfffff920, 0xf0f80227, // brr ra_link_1, r:pass_3
0x009e7000, 0x100009e7, // nop
0x00000100, 0xe0020827, // mov r0, 0x100
0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0
0xfffff900, 0xf0f80227, // brr ra_link_1, r:pass_3
0x009e7000, 0x100009e7, // nop
0x00000100, 0xe0020827, // mov r0, 0x100
0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0
0xfffff8e0, 0xf0f80227, // brr ra_link_1, r:pass_3
0x009e7000, 0x100009e7, // nop
0x00000100, 0xe0020827, // mov r0, 0x100
0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0
0x956dbdbf, 0x100248a3, // mov r2, ra_tw_re+TW32_ACTIVE; mov r3, rb_tw_im+TW32_ACTIVE
0x20567016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw32
0x209d5017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw32
0x209d501f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw32
0x2156709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw32
0x029e7640, 0x100208e7, // fsub r3, r3, r1
0x026e7c80, 0x100206e7, // fsub ra_tw_re+TW32_ACTIVE, ra_tw_re+TW32_ACTIVE, r2
0x029dbec0, 0x100216e7, // fsub rb_tw_im+TW32_ACTIVE, rb_tw_im+TW32_ACTIVE, r3
0x957dfdbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3
0x205a7016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16
0x209d6017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16
0x209d601f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16
0x215a709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16
0x029e7640, 0x100208e7, // fsub r3, r3, r1
0x027e7c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2
0x029dfec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c7e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d7e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c7a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d7a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c762, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d763, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c722, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d723, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x0e1cbdc0, 0x100229e7, // shr.setf -, ra_points, rb_STAGES
0x00000100, 0xe0020827, // mov r0, 0x100
0xfffff7d0, 0xf00809e7, // brr.allz -, r:pass_3
0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0
0x00000060, 0xe0020827, // mov r0, (4-1)*4*8
0x0d227c00, 0x10020227, // sub ra_link_1, ra_link_1, r0
0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync
0x009e7000, 0x100009e7, // nop
0x009e7000, 0xa00009e7, // ldtmu0
0x009e7000, 0xa00009e7, // ldtmu0
0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y
0x95659dbf, 0x100246db, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32
0x9569adbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c7e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d7e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c7a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d7a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c762, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d763, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c722, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d723, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c51c0, 0xd0020827, // shl r0, r0, m
0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num
0x00000000, 0xe00201e7, // mov ra_points, 0
0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y
0x8c148df6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffff6c8, 0xf0f80227, // brr ra_link_1, r:pass_4
0x009e7000, 0x100009e7, // nop
0x00000100, 0xe0020827, // mov r0, 0x100
0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0
0x956dbdbf, 0x100248a3, // mov r2, ra_tw_re+TW32_ACTIVE; mov r3, rb_tw_im+TW32_ACTIVE
0x205e7016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw32
0x209d7017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw32
0x209d701f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw32
0x215e709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw32
0x029e7640, 0x100208e7, // fsub r3, r3, r1
0x026e7c80, 0x100206e7, // fsub ra_tw_re+TW32_ACTIVE, ra_tw_re+TW32_ACTIVE, r2
0x029dbec0, 0x100216e7, // fsub rb_tw_im+TW32_ACTIVE, rb_tw_im+TW32_ACTIVE, r3
0x957dfdbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3
0x20627016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16
0x209d8017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16
0x209d801f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16
0x2162709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16
0x029e7640, 0x100208e7, // fsub r3, r3, r1
0x027e7c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2
0x029dfec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c7e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d7e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c7a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d7a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c762, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d763, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c722, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d723, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x0e1cbdc0, 0x100229e7, // shr.setf -, ra_points, rb_STAGES
0xfffff5c0, 0xf00809e7, // brr.allz -, r:pass_4
0x009e7000, 0x100009e7, // nop
0x00000100, 0xe0020827, // mov r0, 0x100
0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0
0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync
0x009e7000, 0x100009e7, // nop
0x009e7000, 0xa00009e7, // ldtmu0
0x009e7000, 0xa00009e7, // ldtmu0
0xfffff690, 0xf0f809e7, // brr -, r:loop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x159c3fc0, 0x100209a7, // mov interrupt, flag
0x009e7000, 0x300009e7, // nop; nop; thrend
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
================================================
FILE: src/hello_fft/hex/shader_256.hex
================================================
0x00000040, 0xe00217a7, // mov rb_0x40, 0x40
0x00000080, 0xe00217e7, // mov rb_0x80, 0x80
0x00005555, 0xe0020767, // mov rx_0x5555, 0x5555
0x00003333, 0xe00207a7, // mov rx_0x3333, 0x3333
0x00000f0f, 0xe00207e7, // mov rx_0x0F0F, 0x0F0F
0x88104000, 0xe0020727, // mov ra_vdw, vdw_setup_0(16, 16, dma_h32( 0,0))
0x88104800, 0xe0021727, // mov rb_vdw, vdw_setup_0(16, 16, dma_h32(16,0))
0x15827d80, 0x10020827, // mov r0, addr
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9df1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020227, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9df1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020267, // mov ra_tw_re+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9df3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021227, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9df3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021267, // mov rb_tw_im+off+i, r4
0x15827d80, 0x10020827, // mov r0, addr
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9df1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100202a7, // mov ra_tw_re+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9df3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100212a7, // mov rb_tw_im+off+i, r4
0x15827d80, 0x10021167, // mov rb_inst, unif
0x00101200, 0xe0020827, // mov r0, vpm_setup(1, 1, v32( 0,0))
0x00000010, 0xe0020867, // mov r1, vpm_setup(1, 1, v32(16,0)) - vpm_setup(1, 1, v32(0,0))
0x00000002, 0xe00208a7, // mov r2, vpm_setup(1, 1, v32( 0,2)) - vpm_setup(1, 1, v32(0,0))
0x409c5017, 0x100049e2, // nop; mul24 r2, r2, in_inst
0xcc9e7081, 0x100246e0, // add out_0, r0, r2; v8adds r0, r0, r1
0xcc9e7081, 0x100256e0, // add out_1, r0, r2; v8adds r0, r0, r1
0xcc9e7081, 0x100049e0, // add out_2, r0, r2; v8adds r0, r0, r1
0x0c9e7080, 0x100009e7, // add out_3, r0, r2
0x000000b0, 0xf0f80127, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x156e7d80, 0x10021c67, // mov vw_setup, arg
0x159e7000, 0x10020c27, // mov vpm, r0
0x159e7240, 0x10020c27, // mov vpm, r1
0x159f2fc0, 0x100009e7, // mov -, vw_wait
0x00000019, 0xe80009e7, // mov -, sacq(i+9)
0x00000001, 0xe80009e7, // mov -, srel(i+1)
0x0000001a, 0xe80009e7, // mov -, sacq(i+9)
0x00000002, 0xe80009e7, // mov -, srel(i+1)
0x0000001b, 0xe80009e7, // mov -, sacq(i+9)
0x00000003, 0xe80009e7, // mov -, srel(i+1)
0x0000001c, 0xe80009e7, // mov -, sacq(i+9)
0x00000004, 0xe80009e7, // mov -, srel(i+1)
0x0000001d, 0xe80009e7, // mov -, sacq(i+9)
0x00000005, 0xe80009e7, // mov -, srel(i+1)
0x0000001e, 0xe80009e7, // mov -, sacq(i+9)
0x00000006, 0xe80009e7, // mov -, srel(i+1)
0x0000001f, 0xe80009e7, // mov -, sacq(i+9)
0x00000007, 0xe80009e7, // mov -, srel(i+1)
0x00000000, 0xf0f409e7, // bra -, ra_link_1
0x15727d80, 0x10021c67, // mov vw_setup, arg_vdw
0xc0000040, 0xe0021c67, // mov vw_setup, vdw_setup_1(0) + PASS16_STRIDE-16*4
0x8c05edf6, 0x10024072, // add ra_save_ptr, ra_save_ptr, rb_0x40; mov vw_addr, ra_save_ptr
0x00000038, 0xf0f81127, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x156e7d80, 0x10021c67, // mov vw_setup, arg
0x159e7000, 0x10020c27, // mov vpm, r0
0x159e7240, 0x10020c27, // mov vpm, r1
0x00000000, 0xf0f4c9e7, // bra -, ra_sync
0x009e7000, 0x100009e7, // nop
0x156e7d80, 0x10020c67, // mov vr_setup, arg_vpm
0x15c27d80, 0x100009e7, // mov -, vpm
0x00000080, 0xf0f801a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x159f2fc0, 0x100009e7, // mov -, vw_wait
0x00000019, 0xe80009e7, // mov -, sacq(i+9)
0x00000001, 0xe80009e7, // mov -, srel(i+1)
0x0000001a, 0xe80009e7, // mov -, sacq(i+9)
0x00000002, 0xe80009e7, // mov -, srel(i+1)
0x0000001b, 0xe80009e7, // mov -, sacq(i+9)
0x00000003, 0xe80009e7, // mov -, srel(i+1)
0x0000001c, 0xe80009e7, // mov -, sacq(i+9)
0x00000004, 0xe80009e7, // mov -, srel(i+1)
0x0000001d, 0xe80009e7, // mov -, sacq(i+9)
0x00000005, 0xe80009e7, // mov -, srel(i+1)
0x0000001e, 0xe80009e7, // mov -, sacq(i+9)
0x00000000, 0xf0f409e7, // bra -, ra_link_1
0x00000006, 0xe80009e7, // mov -, srel(i+1)
0x0000001f, 0xe80009e7, // mov -, sacq(i+9)
0x00000007, 0xe80009e7, // mov -, srel(i+1)
0x00000248, 0xf0f811a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f409e7, // bra -, ra_link_1
0x00000009, 0xe80009e7, // mov -, srel(i+9)
0x00000011, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f409e7, // bra -, ra_link_1
0x0000000a, 0xe80009e7, // mov -, srel(i+9)
0x00000012, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f409e7, // bra -, ra_link_1
0x0000000b, 0xe80009e7, // mov -, srel(i+9)
0x00000013, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f409e7, // bra -, ra_link_1
0x0000000c, 0xe80009e7, // mov -, srel(i+9)
0x00000014, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f409e7, // bra -, ra_link_1
0x0000000d, 0xe80009e7, // mov -, srel(i+9)
0x00000015, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f409e7, // bra -, ra_link_1
0x0000000e, 0xe80009e7, // mov -, srel(i+9)
0x00000016, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f409e7, // bra -, ra_link_1
0x0000000f, 0xe80009e7, // mov -, srel(i+9)
0x00000017, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<>i)
0x959f8492, 0xd002c3a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d3a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c362, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d363, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c322, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d323, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c2e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d2e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c41c0, 0xd0020827, // shl r0, r0, m
0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num
0x00000000, 0xe00201e7, // mov ra_points, 0
0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y
0x8c15fdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x14767180, 0x10020867, // and r1, r0, mask
0x0e9c11c0, 0xd0020827, // shr r0, r0, shift
0x14767180, 0x10020827, // and r0, r0, mask
0x119c13c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147a7180, 0x10020867, // and r1, r0, mask
0x0e9c21c0, 0xd0020827, // shr r0, r0, shift
0x147a7180, 0x10020827, // and r0, r0, mask
0x119c23c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147e7180, 0x10020867, // and r1, r0, mask
0x0e9c41c0, 0xd0020827, // shr r0, r0, shift
0x147e7180, 0x10020827, // and r0, r0, mask
0x119c43c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0x8c15fdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x14767180, 0x10020867, // and r1, r0, mask
0x0e9c11c0, 0xd0020827, // shr r0, r0, shift
0x14767180, 0x10020827, // and r0, r0, mask
0x119c13c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147a7180, 0x10020867, // and r1, r0, mask
0x0e9c21c0, 0xd0020827, // shr r0, r0, shift
0x147a7180, 0x10020827, // and r0, r0, mask
0x119c23c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147e7180, 0x10020867, // and r1, r0, mask
0x0e9c41c0, 0xd0020827, // shr r0, r0, shift
0x147e7180, 0x10020827, // and r0, r0, mask
0x119c43c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffdb0, 0xf0f80027, // brr ra_link_1, r:pass_1
0x009e7000, 0x100009e7, // nop
0x956dbff6, 0x100246db, // mov ra_vpm, rb_vpm; mov rb_vpm, ra_vpm
0x9571cff6, 0x1002471c, // mov ra_vdw, rb_vdw; mov rb_vdw, ra_vdw
0xfffffd90, 0xf0f80027, // brr ra_link_1, r:pass_1
0x009e7000, 0x100009e7, // nop
0x956dbff6, 0x100246db, // mov ra_vpm, rb_vpm; mov rb_vpm, ra_vpm
0x9571cff6, 0x1002471c, // mov ra_vdw, rb_vdw; mov rb_vdw, ra_vdw
0x00000000, 0xf0f4c027, // bra ra_link_1, ra_sync
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y
0x9528adbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c3a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d3a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c362, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d363, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c322, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d323, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c2e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d2e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c41c0, 0xd0020827, // shl r0, r0, m
0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num
0x00000000, 0xe00201e7, // mov ra_points, 0
0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y
0x8c15fdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0x8c15fdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffc68, 0xf0f80027, // brr ra_link_1, r:pass_2
0x009e7000, 0x100009e7, // nop
0x956dbff6, 0x100246db, // mov ra_vpm, rb_vpm; mov rb_vpm, ra_vpm
0x9571cff6, 0x1002471c, // mov ra_vdw, rb_vdw; mov rb_vdw, ra_vdw
0x9538edbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3
0x20267016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16
0x209c9017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16
0x209c901f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16
0x2126709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16
0x029e7640, 0x100208e7, // fsub r3, r3, r1
0x023a7c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2
0x029ceec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c3a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d3a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c362, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d363, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c322, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d323, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c2e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d2e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0xfffffba8, 0xf0f80027, // brr ra_link_1, r:pass_2
0x009e7000, 0x100009e7, // nop
0x956dbff6, 0x100246db, // mov ra_vpm, rb_vpm; mov rb_vpm, ra_vpm
0x9571cff6, 0x1002471c, // mov ra_vdw, rb_vdw; mov rb_vdw, ra_vdw
0x00000000, 0xf0f4c027, // bra ra_link_1, ra_sync
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0xfffffbb0, 0xf0f809e7, // brr -, r:loop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x159c3fc0, 0x100209a7, // mov interrupt, flag
0x009e7000, 0x300009e7, // nop; nop; thrend
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
================================================
FILE: src/hello_fft/hex/shader_256k.hex
================================================
0x00000012, 0xe0021227, // mov rb_STAGES, STAGES
0x00000010, 0xe00216a7, // mov rb_0x10, 0x10
0x00000040, 0xe00216e7, // mov rb_0x40, 0x40
0x00000080, 0xe0021727, // mov rb_0x80, 0x80
0x000000f0, 0xe0021767, // mov rb_0xF0, 0xF0
0x00000100, 0xe00217a7, // mov rb_0x100, 0x100
0x00001fff, 0xe00217e7, // mov rb_0x1FFF, 0x1FFF
0x55555555, 0xe0020767, // mov rx_0x55555555, 0x55555555
0x33333333, 0xe00207a7, // mov rx_0x33333333, 0x33333333
0x0f0f0f0f, 0xe00207e7, // mov rx_0x0F0F0F0F, 0x0F0F0F0F
0x00ff00ff, 0xe0021627, // mov rx_0x00FF00FF, 0x00FF00FF
0x0000ffff, 0xe0021667, // mov rx_0x0000FFFF, 0x0000FFFF
0x80904000, 0xe00206e7, // mov ra_vdw_16, vdw_setup_0( 1, 16, dma_h32( 0,0))
0x80905000, 0xe0021027, // mov rb_vdw_16, vdw_setup_0( 1, 16, dma_h32(32,0))
0x90104000, 0xe0020727, // mov ra_vdw_32, vdw_setup_0(32, 16, dma_h32( 0,0))
0x90105000, 0xe0021067, // mov rb_vdw_32, vdw_setup_0(32, 16, dma_h32(32,0))
0x15827d80, 0x10020827, // mov r0, addr
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dc1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100202e7, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dc1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020327, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dc1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020367, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dc1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100203a7, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dc1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100203e7, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dc1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020427, // mov ra_tw_re+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dc3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100212e7, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dc3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021327, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dc3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021367, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dc3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100213a7, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dc3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100213e7, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dc3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021427, // mov rb_tw_im+off+i, r4
0x15827d80, 0x10020827, // mov r0, addr
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dc1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020467, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dc1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100204a7, // mov ra_tw_re+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dc3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021467, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dc3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100214a7, // mov rb_tw_im+off+i, r4
0x15827d80, 0x10021167, // mov rb_inst, unif
0x00101200, 0xe0020827, // mov r0, vpm_setup(1, 1, v32( 0,0))
0x00000010, 0xe0020867, // mov r1, vpm_setup(1, 1, v32(16,0)) - vpm_setup(1, 1, v32(0,0))
0x00000002, 0xe00208a7, // mov r2, vpm_setup(1, 1, v32( 0,2)) - vpm_setup(1, 1, v32(0,0))
0x409c5017, 0x100049e2, // nop; mul24 r2, r2, in_inst
0xcc9e7081, 0x10024660, // add out_0, r0, r2; v8adds r0, r0, r1
0xcc9e7081, 0x100246a0, // add out_1, r0, r2; v8adds r0, r0, r1
0xcc9e7081, 0x100250a0, // add out_2, r0, r2; v8adds r0, r0, r1
0x0c9e7080, 0x100211e7, // add out_3, r0, r2
0x000001d0, 0xf0f80127, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x15667d80, 0x10021c67, // mov vw_setup, arg
0x159e7000, 0x10020c27, // mov vpm, r0
0x159e7240, 0x10020c27, // mov vpm, r1
0x159f2fc0, 0x100009e7, // mov -, vw_wait
0x00000019, 0xe80009e7, // mov -, sacq(i+9)
0x00000001, 0xe80009e7, // mov -, srel(i+1)
0x0000001a, 0xe80009e7, // mov -, sacq(i+9)
0x00000002, 0xe80009e7, // mov -, srel(i+1)
0x0000001b, 0xe80009e7, // mov -, sacq(i+9)
0x00000003, 0xe80009e7, // mov -, srel(i+1)
0x0000001c, 0xe80009e7, // mov -, sacq(i+9)
0x00000004, 0xe80009e7, // mov -, srel(i+1)
0x0000001d, 0xe80009e7, // mov -, sacq(i+9)
0x00000005, 0xe80009e7, // mov -, srel(i+1)
0x0000001e, 0xe80009e7, // mov -, sacq(i+9)
0x00000006, 0xe80009e7, // mov -, srel(i+1)
0x0000001f, 0xe80009e7, // mov -, sacq(i+9)
0x00000007, 0xe80009e7, // mov -, srel(i+1)
0x156e7d80, 0x10020827, // mov r0, arg_vdw
0x8c05bdf6, 0x10024061, // add ra_save_ptr, ra_save_ptr, rb_0x40; mov r1, ra_save_ptr
0x00000080, 0xe00208a7, // mov r2, vdw_setup_0(1, 16, dma_h32(1,0)) - vdw_setup_0(1, 16, dma_h32(0,0))
0x00020000, 0xe00208e7, // mov r3, PASS16_STRIDE
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x00000038, 0xf0f81127, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x15667d80, 0x10021c67, // mov vw_setup, arg
0x159e7000, 0x10020c27, // mov vpm, r0
0x159e7240, 0x10020c27, // mov vpm, r1
0x00000000, 0xf0f4c9e7, // bra -, ra_sync
0x009e7000, 0x100009e7, // nop
0x15667d80, 0x10020c67, // mov vr_setup, arg_vpm
0x15c27d80, 0x100009e7, // mov -, vpm
0x000000c8, 0xf0f802a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x15667d80, 0x10021c67, // mov vw_setup, ra_vpm_lo
0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0
0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1
0x156a7d80, 0x10021c67, // mov vw_setup, ra_vpm_hi
0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0
0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1
0x159f2fc0, 0x100009e7, // mov -, vw_wait
0x00000019, 0xe80009e7, // mov -, sacq(i+9)
0x00000001, 0xe80009e7, // mov -, srel(i+1)
0x0000001a, 0xe80009e7, // mov -, sacq(i+9)
0x00000002, 0xe80009e7, // mov -, srel(i+1)
0x0000001b, 0xe80009e7, // mov -, sacq(i+9)
0x00000003, 0xe80009e7, // mov -, srel(i+1)
0x0000001c, 0xe80009e7, // mov -, sacq(i+9)
0x00000004, 0xe80009e7, // mov -, srel(i+1)
0x0000001d, 0xe80009e7, // mov -, sacq(i+9)
0x00000005, 0xe80009e7, // mov -, srel(i+1)
0x0000001e, 0xe80009e7, // mov -, sacq(i+9)
0x00000006, 0xe80009e7, // mov -, srel(i+1)
0x0000001f, 0xe80009e7, // mov -, sacq(i+9)
0x00000007, 0xe80009e7, // mov -, srel(i+1)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x15727d80, 0x10021c67, // mov vw_setup, ra_vdw_32
0xc000ffc0, 0xe0021c67, // mov vw_setup, vdw_setup_1(0) + PASS32_STRIDE-16*4
0x8c05bdf6, 0x10024072, // add ra_save_ptr, ra_save_ptr, rb_0x40; mov vw_addr, ra_save_ptr
0x00000050, 0xf0f812a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x15667d80, 0x10021c67, // mov vw_setup, ra_vpm_lo
0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0
0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1
0x156a7d80, 0x10021c67, // mov vw_setup, ra_vpm_hi
0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0
0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1
0x00000000, 0xf0f4c9e7, // bra -, ra_sync
0x009e7000, 0x100009e7, // nop
0x15667d80, 0x10020c67, // mov vr_setup, ra_vpm_lo
0x15c27d80, 0x100009e7, // mov -, vpm
0x00000080, 0xf0f801a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x159f2fc0, 0x100009e7, // mov -, vw_wait
0x00000019, 0xe80009e7, // mov -, sacq(i+9)
0x00000001, 0xe80009e7, // mov -, srel(i+1)
0x0000001a, 0xe80009e7, // mov -, sacq(i+9)
0x00000002, 0xe80009e7, // mov -, srel(i+1)
0x0000001b, 0xe80009e7, // mov -, sacq(i+9)
0x00000003, 0xe80009e7, // mov -, srel(i+1)
0x0000001c, 0xe80009e7, // mov -, sacq(i+9)
0x00000004, 0xe80009e7, // mov -, srel(i+1)
0x0000001d, 0xe80009e7, // mov -, sacq(i+9)
0x00000005, 0xe80009e7, // mov -, srel(i+1)
0x0000001e, 0xe80009e7, // mov -, sacq(i+9)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x00000006, 0xe80009e7, // mov -, srel(i+1)
0x0000001f, 0xe80009e7, // mov -, sacq(i+9)
0x00000007, 0xe80009e7, // mov -, srel(i+1)
0x00000640, 0xf0f811a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x00000009, 0xe80009e7, // mov -, srel(i+9)
0x00000011, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000a, 0xe80009e7, // mov -, srel(i+9)
0x00000012, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000b, 0xe80009e7, // mov -, srel(i+9)
0x00000013, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000c, 0xe80009e7, // mov -, srel(i+9)
0x00000014, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000d, 0xe80009e7, // mov -, srel(i+9)
0x00000015, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000e, 0xe80009e7, // mov -, srel(i+9)
0x00000016, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000f, 0xe80009e7, // mov -, srel(i+9)
0x00000017, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0xfffffd78, 0xf0f80027, // brr ra_link_0, call
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1
0x8c15adf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x14767180, 0x10020867, // and r1, r0, mask
0x0e9c11c0, 0xd0020827, // shr r0, r0, shift
0x14767180, 0x10020827, // and r0, r0, mask
0x119c13c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147a7180, 0x10020867, // and r1, r0, mask
0x0e9c21c0, 0xd0020827, // shr r0, r0, shift
0x147a7180, 0x10020827, // and r0, r0, mask
0x119c23c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147e7180, 0x10020867, // and r1, r0, mask
0x0e9c41c0, 0xd0020827, // shr r0, r0, shift
0x147e7180, 0x10020827, // and r0, r0, mask
0x119c43c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x149d81c0, 0x10020867, // and r1, r0, mask
0x0e9c81c0, 0xd0020827, // shr r0, r0, shift
0x149d81c0, 0x10020827, // and r0, r0, mask
0x119c83c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x149d91c0, 0x10020867, // and r1, r0, mask
0x0e9da1c0, 0x10020827, // shr r0, r0, shift
0x149d91c0, 0x10020827, // and r0, r0, mask
0x119da3c0, 0x10020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x0e9cb1c0, 0xd0020827, // shr r0, r0, 32-STAGES-3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0xfffffc30, 0xf0f80027, // brr ra_link_0, call
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x204e7006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE
0x209d300f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE
0x204e700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE
0x22093c87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE
0x019e72c0, 0x10020867, // fadd r1, r1, r3
0x00000000, 0xf0f549e7, // bra -, ra_save_32
0x95642ff6, 0x10024642, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo
0x95687ff6, 0x10024687, // mov ra_vpm_hi, rb_vpm_hi; mov rb_vpm_hi, ra_vpm_hi
0x95701ff6, 0x10024701, // mov ra_vdw_32, rb_vdw_32; mov rb_vdw_32, ra_vdw_32
0x8c15cdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffba0, 0xf0f80027, // brr ra_link_0, call
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0x00000000, 0xf0f489e7, // bra -, ra_save_16
0x009e7000, 0x100009e7, // nop
0x95642ff6, 0x10024642, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo
0x956c0ff6, 0x100246c0, // mov ra_vdw_16, rb_vdw_16; mov rb_vdw_16, ra_vdw_16
0x8c15ddf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffb38, 0xf0f80027, // brr ra_link_0, call
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1
0x8c15adf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffae8, 0xf0f80027, // brr ra_link_0, call
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0x204e7006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE
0x209d300f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE
0x204e700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE
0x22093c87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE
0x019e72c0, 0x10020867, // fadd r1, r1, r3
0x00000000, 0xf0f549e7, // bra -, ra_save_32
0x95642ff6, 0x10024642, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo
0x95687ff6, 0x10024687, // mov ra_vpm_hi, rb_vpm_hi; mov rb_vpm_hi, ra_vpm_hi
0x95701ff6, 0x10024701, // mov ra_vdw_32, rb_vdw_32; mov rb_vdw_32, ra_vdw_32
0x159c5fc0, 0x10022827, // mov.setf r0, rb_inst
0x0d9c11c0, 0xd0020827, // sub r0, r0, 1
0x119c51c0, 0xd0020827, // shl r0, r0, 5
0x0c9c6e00, 0x100601a7, // add.ifnz ra_sync, rx_sync_slave, r0
0x159c4fc0, 0x10060127, // mov.ifnz ra_save_16, rx_save_slave_16
0x159cafc0, 0x100602a7, // mov.ifnz ra_save_32, rx_save_slave_32
0x15827d80, 0x100220e7, // mov.setf ra_addr_x, unif
0x15827d80, 0x100210e7, // mov rb_addr_y, unif
0x00000838, 0xf00809e7, // brr.allz -, r:end
0x952cbdbf, 0x100244d3, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32
0x9530cdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c5e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d5e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c51c0, 0xd0020827, // shl r0, r0, m
0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num
0x00000000, 0xe00201e7, // mov ra_points, 0
0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y
0x8c15adf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x14767180, 0x10020867, // and r1, r0, mask
0x0e9c11c0, 0xd0020827, // shr r0, r0, shift
0x14767180, 0x10020827, // and r0, r0, mask
0x119c13c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147a7180, 0x10020867, // and r1, r0, mask
0x0e9c21c0, 0xd0020827, // shr r0, r0, shift
0x147a7180, 0x10020827, // and r0, r0, mask
0x119c23c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147e7180, 0x10020867, // and r1, r0, mask
0x0e9c41c0, 0xd0020827, // shr r0, r0, shift
0x147e7180, 0x10020827, // and r0, r0, mask
0x119c43c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x149d81c0, 0x10020867, // and r1, r0, mask
0x0e9c81c0, 0xd0020827, // shr r0, r0, shift
0x149d81c0, 0x10020827, // and r0, r0, mask
0x119c83c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x149d91c0, 0x10020867, // and r1, r0, mask
0x0e9da1c0, 0x10020827, // shr r0, r0, shift
0x149d91c0, 0x10020827, // and r0, r0, mask
0x119da3c0, 0x10020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x0e9cb1c0, 0xd0020827, // shr r0, r0, 32-STAGES-3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffff9e0, 0xf0f80227, // brr ra_link_1, r:pass_1
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dedc0, 0x100201e7, // add ra_points, ra_points, rb_0x100
0x0e1c8dc0, 0x100229e7, // shr.setf -, ra_points, rb_STAGES
0xfffff9b8, 0xf00809e7, // brr.allz -, r:pass_1
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dedc0, 0x100201e7, // add ra_points, ra_points, rb_0x100
0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync
0x009e7000, 0x100009e7, // nop
0x009e7000, 0xa00009e7, // ldtmu0
0x009e7000, 0xa00009e7, // ldtmu0
0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y
0x9530cdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c5e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d5e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c41c0, 0xd0020827, // shl r0, r0, m
0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num
0x00000000, 0xe00201e7, // mov ra_points, 0
0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y
0x8c15cdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffb88, 0xf0f80227, // brr ra_link_1, r:pass_2
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dcdc0, 0x100201e7, // add ra_points, ra_points, rb_0x80
0x141dfdc0, 0x100229e7, // and.setf -, ra_points, rb_0x1FFF
0xfffffb60, 0xf01809e7, // brr.allnz -, r:pass_2
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dcdc0, 0x100601e7, // add.ifnz ra_points, ra_points, rb_0x80
0x955d7dbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3
0x20367016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16
0x209cd017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16
0x209cd01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16
0x2136709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16
0x029e7640, 0x100208e7, // fsub r3, r3, r1
0x025e7c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2
0x029d7ec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c5e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d5e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x0e1c8dc0, 0x100229e7, // shr.setf -, ra_points, rb_STAGES
0xfffffa98, 0xf00809e7, // brr.allz -, r:pass_2
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dcdc0, 0x100201e7, // add ra_points, ra_points, rb_0x80
0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync
0x009e7000, 0x100009e7, // nop
0x009e7000, 0xa00009e7, // ldtmu0
0x009e7000, 0xa00009e7, // ldtmu0
0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y
0x9530cdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c5e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d5e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c41c0, 0xd0020827, // shl r0, r0, m
0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num
0x00000000, 0xe00201e7, // mov ra_points, 0
0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y
0x8c15cdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffff998, 0xf0f80227, // brr ra_link_1, r:pass_3
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dcdc0, 0x100201e7, // add ra_points, ra_points, rb_0x80
0xfffff978, 0xf0f80227, // brr ra_link_1, r:pass_3
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dcdc0, 0x100201e7, // add ra_points, ra_points, rb_0x80
0xfffff958, 0xf0f80227, // brr ra_link_1, r:pass_3
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dcdc0, 0x100201e7, // add ra_points, ra_points, rb_0x80
0xfffff938, 0xf0f80227, // brr ra_link_1, r:pass_3
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dcdc0, 0x100201e7, // add ra_points, ra_points, rb_0x80
0x955d7dbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3
0x203a7016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16
0x209ce017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16
0x209ce01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16
0x213a709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16
0x029e7640, 0x100208e7, // fsub r3, r3, r1
0x025e7c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2
0x029d7ec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c5e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d5e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x0e1c8dc0, 0x100229e7, // shr.setf -, ra_points, rb_STAGES
0xfffff870, 0xf00809e7, // brr.allz -, r:pass_3
0x00000060, 0xe0020827, // mov r0, 3*4*8
0x0d227c00, 0x10020227, // sub ra_link_1, ra_link_1, r0
0x0c1dcdc0, 0x100201e7, // add ra_points, ra_points, rb_0x80
0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync
0x009e7000, 0x100009e7, // nop
0x009e7000, 0xa00009e7, // ldtmu0
0x009e7000, 0xa00009e7, // ldtmu0
0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y
0x95451dbf, 0x100244d3, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32
0x95492dbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c5e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d5e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c51c0, 0xd0020827, // shl r0, r0, m
0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num
0x00000000, 0xe00201e7, // mov ra_points, 0
0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y
0x8c15adf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffff7d0, 0xf0f80227, // brr ra_link_1, r:pass_4
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dedc0, 0x100201e7, // add ra_points, ra_points, rb_0x100
0x954d3dbf, 0x100248a3, // mov r2, ra_tw_re+TW32_ACTIVE; mov r3, rb_tw_im+TW32_ACTIVE
0x203e7016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw32
0x209cf017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw32
0x209cf01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw32
0x213e709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw32
0x029e7640, 0x100208e7, // fsub r3, r3, r1
0x024e7c80, 0x100204e7, // fsub ra_tw_re+TW32_ACTIVE, ra_tw_re+TW32_ACTIVE, r2
0x029d3ec0, 0x100214e7, // fsub rb_tw_im+TW32_ACTIVE, rb_tw_im+TW32_ACTIVE, r3
0x955d7dbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3
0x20427016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16
0x209d0017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16
0x209d001f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16
0x2142709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16
0x029e7640, 0x100208e7, // fsub r3, r3, r1
0x025e7c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2
0x029d7ec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c5e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d5e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x0e1c8dc0, 0x100229e7, // shr.setf -, ra_points, rb_STAGES
0xfffff6c8, 0xf00809e7, // brr.allz -, r:pass_4
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dedc0, 0x100201e7, // add ra_points, ra_points, rb_0x100
0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync
0x009e7000, 0x100009e7, // nop
0x009e7000, 0xa00009e7, // ldtmu0
0x009e7000, 0xa00009e7, // ldtmu0
0xfffff798, 0xf0f809e7, // brr -, r:loop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x159c3fc0, 0x100209a7, // mov interrupt, flag
0x009e7000, 0x300009e7, // nop; nop; thrend
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
================================================
FILE: src/hello_fft/hex/shader_2k.hex
================================================
0x00000010, 0xe0021727, // mov rb_0x10, 0x10
0x00000040, 0xe0021767, // mov rb_0x40, 0x40
0x000000f0, 0xe00217a7, // mov rb_0xF0, 0xF0
0x000001d0, 0xe00217e7, // mov rb_0x1D0, 0x1D0
0x00005555, 0xe0020727, // mov rx_0x5555, 0x5555
0x00003333, 0xe0020767, // mov rx_0x3333, 0x3333
0x00000f0f, 0xe00207a7, // mov rx_0x0F0F, 0x0F0F
0x000000ff, 0xe00207e7, // mov rx_0x00FF, 0x00FF
0x00000080, 0xe00208e7, // mov r3, 0x80
0x15827d80, 0x10020827, // mov r0, addr
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100203e7, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020427, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020467, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100204a7, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100204e7, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020527, // mov ra_tw_re+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100213e7, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021427, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021467, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100214a7, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100214e7, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021527, // mov rb_tw_im+off+i, r4
0x15827d80, 0x10020827, // mov r0, addr
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020567, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100205a7, // mov ra_tw_re+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021567, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100215a7, // mov rb_tw_im+off+i, r4
0x15827d80, 0x10021167, // mov rb_inst, unif
0x00101200, 0xe0020827, // mov r0, vpm_setup(1, 1, v32( 0,0))
0x00000010, 0xe0020867, // mov r1, vpm_setup(1, 1, v32(16,0)) - vpm_setup(1, 1, v32(0,0))
0x00000002, 0xe00208a7, // mov r2, vpm_setup(1, 1, v32( 0,2)) - vpm_setup(1, 1, v32(0,0))
0x409c5017, 0x100049e2, // nop; mul24 r2, r2, in_inst
0xcc9e7081, 0x10025020, // add out_0, r0, r2; v8adds r0, r0, r1
0xcc9e7081, 0x10025060, // add out_1, r0, r2; v8adds r0, r0, r1
0xcc9e7081, 0x100250a0, // add out_2, r0, r2; v8adds r0, r0, r1
0x0c9e7080, 0x100211e7, // add out_3, r0, r2
0x000000c8, 0xf0f80127, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x152e7d80, 0x10021c67, // mov vw_setup, ra_vpm_lo
0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0
0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1
0x15327d80, 0x10021c67, // mov vw_setup, ra_vpm_hi
0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0
0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1
0x159f2fc0, 0x100009e7, // mov -, vw_wait
0x00000019, 0xe80009e7, // mov -, sacq(i+9)
0x00000001, 0xe80009e7, // mov -, srel(i+1)
0x0000001a, 0xe80009e7, // mov -, sacq(i+9)
0x00000002, 0xe80009e7, // mov -, srel(i+1)
0x0000001b, 0xe80009e7, // mov -, sacq(i+9)
0x00000003, 0xe80009e7, // mov -, srel(i+1)
0x0000001c, 0xe80009e7, // mov -, sacq(i+9)
0x00000004, 0xe80009e7, // mov -, srel(i+1)
0x0000001d, 0xe80009e7, // mov -, sacq(i+9)
0x00000005, 0xe80009e7, // mov -, srel(i+1)
0x0000001e, 0xe80009e7, // mov -, sacq(i+9)
0x00000006, 0xe80009e7, // mov -, srel(i+1)
0x0000001f, 0xe80009e7, // mov -, sacq(i+9)
0x00000007, 0xe80009e7, // mov -, srel(i+1)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x15367d80, 0x10021c67, // mov vw_setup, ra_vdw_32
0xc00001c0, 0xe0021c67, // mov vw_setup, vdw_setup_1(0) + PASS32_STRIDE-16*4
0x8c05ddf6, 0x10024072, // add ra_save_ptr, ra_save_ptr, rb_0x40; mov vw_addr, ra_save_ptr
0x00000050, 0xf0f81127, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x152e7d80, 0x10021c67, // mov vw_setup, ra_vpm_lo
0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0
0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1
0x15327d80, 0x10021c67, // mov vw_setup, ra_vpm_hi
0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0
0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1
0x00000000, 0xf0f4c9e7, // bra -, ra_sync
0x009e7000, 0x100009e7, // nop
0x152e7d80, 0x10020c67, // mov vr_setup, ra_vpm_lo
0x15c27d80, 0x100009e7, // mov -, vpm
0x000000f8, 0xf0f802a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x159f2fc0, 0x100009e7, // mov -, vw_wait
0x00000001, 0xe80009e7, // mov -, srel(i+1)
0x00000002, 0xe80009e7, // mov -, srel(i+1)
0x00000003, 0xe80009e7, // mov -, srel(i+1)
0x00000004, 0xe80009e7, // mov -, srel(i+1)
0x00000005, 0xe80009e7, // mov -, srel(i+1)
0x00000006, 0xe80009e7, // mov -, srel(i+1)
0x00000007, 0xe80009e7, // mov -, srel(i+1)
0x159c0fc0, 0x10021c67, // mov vw_setup, rb_vpm
0x012cbdc0, 0x10020c27, // fadd vpm, ra_64+0, rb_64+0
0x0130cdc0, 0x10020c27, // fadd vpm, ra_64+1, rb_64+1
0x159c1fc0, 0x10021c67, // mov vw_setup, rb_vpm_16
0x0134ddc0, 0x10020c27, // fadd vpm, ra_64+2, rb_64+2
0x0138edc0, 0x10020c27, // fadd vpm, ra_64+3, rb_64+3
0x159c2fc0, 0x10021c67, // mov vw_setup, rb_vpm_32
0x022cbdc0, 0x10020c27, // fsub vpm, ra_64+0, rb_64+0
0x0230cdc0, 0x10020c27, // fsub vpm, ra_64+1, rb_64+1
0x159c7fc0, 0x10021c67, // mov vw_setup, rb_vpm_48
0x0234ddc0, 0x10020c27, // fsub vpm, ra_64+2, rb_64+2
0x0238edc0, 0x10020c27, // fsub vpm, ra_64+3, rb_64+3
0x00000019, 0xe80009e7, // mov -, sacq(i+9)
0x0000001a, 0xe80009e7, // mov -, sacq(i+9)
0x0000001b, 0xe80009e7, // mov -, sacq(i+9)
0x0000001c, 0xe80009e7, // mov -, sacq(i+9)
0x0000001d, 0xe80009e7, // mov -, sacq(i+9)
0x0000001e, 0xe80009e7, // mov -, sacq(i+9)
0x0000001f, 0xe80009e7, // mov -, sacq(i+9)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0xa0104000, 0xe0021c67, // mov vw_setup, vdw_setup_0(64, 16, dma_h32(0,0))
0xc00000c0, 0xe0021c67, // mov vw_setup, vdw_setup_1(PASS64_STRIDE-16*4)
0x8c05ddf6, 0x10024072, // add ra_save_ptr, ra_save_ptr, step; mov vw_addr, ra_save_ptr
0x000002b8, 0xf0f812a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c51c0, 0xd00200a7, // shl ra_temp, r0, 5
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0fc49e7, // brr -, ra_temp
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x000000e0, 0xf0f809e7, // brr -, r:2f
0x00000010, 0xe80009e7, // mov -, sacq(i)
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x000000c0, 0xf0f809e7, // brr -, r:2f
0x00000011, 0xe80009e7, // mov -, sacq(i)
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x000000a0, 0xf0f809e7, // brr -, r:2f
0x00000012, 0xe80009e7, // mov -, sacq(i)
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x00000080, 0xf0f809e7, // brr -, r:2f
0x00000013, 0xe80009e7, // mov -, sacq(i)
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x00000060, 0xf0f809e7, // brr -, r:2f
0x00000014, 0xe80009e7, // mov -, sacq(i)
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x00000040, 0xf0f809e7, // brr -, r:2f
0x00000015, 0xe80009e7, // mov -, sacq(i)
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x00000020, 0xf0f809e7, // brr -, r:2f
0x00000016, 0xe80009e7, // mov -, sacq(i)
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f809e7, // brr -, r:2f
0x00000017, 0xe80009e7, // mov -, sacq(i)
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x159c0fc0, 0x10021c67, // mov vw_setup, rb_vpm
0x012cbdc0, 0x10020c27, // fadd vpm, ra_64+0, rb_64+0
0x0130cdc0, 0x10020c27, // fadd vpm, ra_64+1, rb_64+1
0x159c1fc0, 0x10021c67, // mov vw_setup, rb_vpm_16
0x0134ddc0, 0x10020c27, // fadd vpm, ra_64+2, rb_64+2
0x0138edc0, 0x10020c27, // fadd vpm, ra_64+3, rb_64+3
0x159c2fc0, 0x10021c67, // mov vw_setup, rb_vpm_32
0x022cbdc0, 0x10020c27, // fsub vpm, ra_64+0, rb_64+0
0x0230cdc0, 0x10020c27, // fsub vpm, ra_64+1, rb_64+1
0x159c7fc0, 0x10021c67, // mov vw_setup, rb_vpm_48
0x0234ddc0, 0x10020c27, // fsub vpm, ra_64+2, rb_64+2
0x0238edc0, 0x10020c27, // fsub vpm, ra_64+3, rb_64+3
0x00000000, 0xf0fc49e7, // brr -, ra_temp
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm
0x15c27d80, 0x100009e7, // mov -, vpm
0x00000008, 0xe80009e7, // mov -, srel(i+8)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm
0x15c27d80, 0x100009e7, // mov -, vpm
0x00000009, 0xe80009e7, // mov -, srel(i+8)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm
0x15c27d80, 0x100009e7, // mov -, vpm
0x0000000a, 0xe80009e7, // mov -, srel(i+8)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm
0x15c27d80, 0x100009e7, // mov -, vpm
0x0000000b, 0xe80009e7, // mov -, srel(i+8)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm
0x15c27d80, 0x100009e7, // mov -, vpm
0x0000000c, 0xe80009e7, // mov -, srel(i+8)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm
0x15c27d80, 0x100009e7, // mov -, vpm
0x0000000d, 0xe80009e7, // mov -, srel(i+8)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm
0x15c27d80, 0x100009e7, // mov -, vpm
0x0000000e, 0xe80009e7, // mov -, srel(i+8)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm
0x15c27d80, 0x100009e7, // mov -, vpm
0x0000000f, 0xe80009e7, // mov -, srel(i+8)
0x00000080, 0xf0f801a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x159f2fc0, 0x100009e7, // mov -, vw_wait
0x00000019, 0xe80009e7, // mov -, sacq(i+9)
0x00000001, 0xe80009e7, // mov -, srel(i+1)
0x0000001a, 0xe80009e7, // mov -, sacq(i+9)
0x00000002, 0xe80009e7, // mov -, srel(i+1)
0x0000001b, 0xe80009e7, // mov -, sacq(i+9)
0x00000003, 0xe80009e7, // mov -, srel(i+1)
0x0000001c, 0xe80009e7, // mov -, sacq(i+9)
0x00000004, 0xe80009e7, // mov -, srel(i+1)
0x0000001d, 0xe80009e7, // mov -, sacq(i+9)
0x00000005, 0xe80009e7, // mov -, srel(i+1)
0x0000001e, 0xe80009e7, // mov -, sacq(i+9)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x00000006, 0xe80009e7, // mov -, srel(i+1)
0x0000001f, 0xe80009e7, // mov -, sacq(i+9)
0x00000007, 0xe80009e7, // mov -, srel(i+1)
0x00000858, 0xf0f811a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x00000009, 0xe80009e7, // mov -, srel(i+9)
0x00000011, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000a, 0xe80009e7, // mov -, srel(i+9)
0x00000012, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000b, 0xe80009e7, // mov -, srel(i+9)
0x00000013, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000c, 0xe80009e7, // mov -, srel(i+9)
0x00000014, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000d, 0xe80009e7, // mov -, srel(i+9)
0x00000015, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000e, 0xe80009e7, // mov -, srel(i+9)
0x00000016, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000f, 0xe80009e7, // mov -, srel(i+9)
0x00000017, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0xfffffda0, 0xf0f80027, // brr ra_link_0, call
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1
0x8c15cdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x14727180, 0x10020867, // and r1, r0, mask
0x0e9c11c0, 0xd0020827, // shr r0, r0, shift
0x14727180, 0x10020827, // and r0, r0, mask
0x119c13c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x14767180, 0x10020867, // and r1, r0, mask
0x0e9c21c0, 0xd0020827, // shr r0, r0, shift
0x14767180, 0x10020827, // and r0, r0, mask
0x119c23c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147a7180, 0x10020867, // and r1, r0, mask
0x0e9c41c0, 0xd0020827, // shr r0, r0, shift
0x147a7180, 0x10020827, // and r0, r0, mask
0x119c43c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147e7180, 0x10020867, // and r1, r0, mask
0x0e9c81c0, 0xd0020827, // shr r0, r0, shift
0x147e7180, 0x10020827, // and r0, r0, mask
0x119c83c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x0e9c21c0, 0xd0020827, // shr r0, r0, 13-STAGES
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0xfffffc80, 0xf0f80027, // brr ra_link_0, call
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x205e7006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE
0x209d700f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE
0x205e700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE
0x22097c87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE
0x019e72c0, 0x10020867, // fadd r1, r1, r3
0x01267c00, 0x100202e7, // fadd ra_64+0, ra_32_re, r0
0x019c9e40, 0x10020327, // fadd ra_64+1, rb_32_im, r1
0x02267c00, 0x10020367, // fsub ra_64+2, ra_32_re, r0
0x029c9e40, 0x100203a7, // fsub ra_64+3, rb_32_im, r1
0x8c15fdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x14727180, 0x10020867, // and r1, r0, mask
0x0e9c11c0, 0xd0020827, // shr r0, r0, shift
0x14727180, 0x10020827, // and r0, r0, mask
0x119c13c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x14767180, 0x10020867, // and r1, r0, mask
0x0e9c21c0, 0xd0020827, // shr r0, r0, shift
0x14767180, 0x10020827, // and r0, r0, mask
0x119c23c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147a7180, 0x10020867, // and r1, r0, mask
0x0e9c41c0, 0xd0020827, // shr r0, r0, shift
0x147a7180, 0x10020827, // and r0, r0, mask
0x119c43c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147e7180, 0x10020867, // and r1, r0, mask
0x0e9c81c0, 0xd0020827, // shr r0, r0, shift
0x147e7180, 0x10020827, // and r0, r0, mask
0x119c83c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x0e9c21c0, 0xd0020827, // shr r0, r0, 13-STAGES
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0xfffffb20, 0xf0f80027, // brr ra_link_0, call
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1
0x8c15cdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x14727180, 0x10020867, // and r1, r0, mask
0x0e9c11c0, 0xd0020827, // shr r0, r0, shift
0x14727180, 0x10020827, // and r0, r0, mask
0x119c13c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x14767180, 0x10020867, // and r1, r0, mask
0x0e9c21c0, 0xd0020827, // shr r0, r0, shift
0x14767180, 0x10020827, // and r0, r0, mask
0x119c23c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147a7180, 0x10020867, // and r1, r0, mask
0x0e9c41c0, 0xd0020827, // shr r0, r0, shift
0x147a7180, 0x10020827, // and r0, r0, mask
0x119c43c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147e7180, 0x10020867, // and r1, r0, mask
0x0e9c81c0, 0xd0020827, // shr r0, r0, shift
0x147e7180, 0x10020827, // and r0, r0, mask
0x119c83c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x0e9c21c0, 0xd0020827, // shr r0, r0, 13-STAGES
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0xfffffa00, 0xf0f80027, // brr ra_link_0, call
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x205e7006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE
0x209d700f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE
0x205e700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE
0x22097c87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE
0x019e72c0, 0x10020867, // fadd r1, r1, r3
0x029c9e40, 0x100208e7, // fsub r3, rb_32_im, r1
0x02267c00, 0x100208a7, // fsub r2, ra_32_re, r0
0x019c9e40, 0x10020867, // fadd r1, rb_32_im, r1
0x01267c00, 0x10020827, // fadd r0, ra_32_re, r0
0x203e700e, 0x100049c9, // nop; fmul rb_32_im, r1, ra_tw_re+TW64_P1_BASE0
0x209cf00f, 0x100059c9, // nop; fmul ra_32_re, r1, rb_tw_im+TW64_P1_BASE0
0x209cf007, 0x100049e1, // nop; fmul r1, r0, rb_tw_im+TW64_P1_BASE0
0x213c93c6, 0x10025320, // fadd rb_64+1, r1, rb_32_im; fmul r0, r0, ra_tw_re+TW64_P1_BASE0
0x2225019f, 0x100252c9, // fsub rb_64+0, r0, ra_32_re; fmul ra_32_re, r3, rb_tw_im+TW64_P1_BASE1
0x2042701e, 0x100049c9, // nop; fmul rb_32_im, r3, ra_tw_re+TW64_P1_BASE1
0x00000000, 0xf0f549e7, // bra -, ra_save_64
0x209d0017, 0x100049e3, // nop; fmul r3, r2, rb_tw_im+TW64_P1_BASE1
0x214097d6, 0x100253a2, // fadd rb_64+3, r3, rb_32_im; fmul r2, r2, ra_tw_re+TW64_P1_BASE1
0x02267580, 0x10021367, // fsub rb_64+2, r2, ra_32_re
0x8c15edf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffff920, 0xf0f80027, // brr ra_link_0, call
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1
0x8c15cdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffff8d0, 0xf0f80027, // brr ra_link_0, call
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0x205e7006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE
0x209d700f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE
0x205e700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE
0x22097c87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE
0x019e72c0, 0x10020867, // fadd r1, r1, r3
0x00000000, 0xf0f489e7, // bra -, ra_save_32
0x952c2ff6, 0x100242c2, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo
0x95307ff6, 0x10024307, // mov ra_vpm_hi, rb_vpm_hi; mov rb_vpm_hi, ra_vpm_hi
0x9534dff6, 0x1002434d, // mov ra_vdw_32, rb_vdw_32; mov rb_vdw_32, ra_vdw_32
0x159c5fc0, 0x10022827, // mov.setf r0, rb_inst
0x0d9c11c0, 0xd0020827, // sub r0, r0, 1
0x119c51c0, 0xd0020827, // shl r0, r0, 5
0x0c9c6e00, 0x100601a7, // add.ifnz ra_sync, rx_sync_slave, r0
0x159c4fc0, 0x10060127, // mov.ifnz ra_save_32, rx_save_slave_32
0x159cafc0, 0x100602a7, // mov.ifnz ra_save_64, rx_save_slave_64
0x15827d80, 0x100220e7, // mov.setf ra_addr_x, unif
0x15827d80, 0x100210e7, // mov rb_addr_y, unif
0x00000418, 0xf00809e7, // brr.allz -, r:end
0x95451dbf, 0x100245d7, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32
0x95492dbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c6e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d6e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c6a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d6a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c662, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d663, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c622, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d623, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c61c0, 0xd0020827, // shl r0, r0, m
0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num
0x00000000, 0xe00201e7, // mov ra_points, 0
0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y
0x8c15cdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x14727180, 0x10020867, // and r1, r0, mask
0x0e9c11c0, 0xd0020827, // shr r0, r0, shift
0x14727180, 0x10020827, // and r0, r0, mask
0x119c13c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x14767180, 0x10020867, // and r1, r0, mask
0x0e9c21c0, 0xd0020827, // shr r0, r0, shift
0x14767180, 0x10020827, // and r0, r0, mask
0x119c23c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147a7180, 0x10020867, // and r1, r0, mask
0x0e9c41c0, 0xd0020827, // shr r0, r0, shift
0x147a7180, 0x10020827, // and r0, r0, mask
0x119c43c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147e7180, 0x10020867, // and r1, r0, mask
0x0e9c81c0, 0xd0020827, // shr r0, r0, shift
0x147e7180, 0x10020827, // and r0, r0, mask
0x119c83c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x0e9c21c0, 0xd0020827, // shr r0, r0, 13-STAGES
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffff7f0, 0xf0f80227, // brr ra_link_1, r:pass_1
0x009e7000, 0x100009e7, // nop
0x00000200, 0xe0020827, // mov r0, 0x200
0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0
0x0e1cbdc0, 0xd00229e7, // shr.setf -, ra_points, STAGES
0xfffff7c8, 0xf00809e7, // brr.allz -, r:pass_1
0x009e7000, 0x100009e7, // nop
0x00000200, 0xe0020827, // mov r0, 0x200
0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0
0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync
0x009e7000, 0x100009e7, // nop
0x009e7000, 0xa00009e7, // ldtmu0
0x009e7000, 0xa00009e7, // ldtmu0
0x159c0fc0, 0x100202e7, // mov ra_vpm_lo, rb_vpm
0x159c1fc0, 0x10020327, // mov ra_vpm_hi, rb_vpm_16
0x90104000, 0xe0020367, // mov ra_vdw_32, vdw_setup_0(32, 16, dma_h32( 0,0))
0x90105000, 0xe0021367, // mov rb_vdw_32, vdw_setup_0(32, 16, dma_h32(32,0))
0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y
0x95555dbf, 0x100245d7, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32
0x95596dbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c6e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d6e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c6a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d6a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c662, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d663, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c622, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d623, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c51c0, 0xd0020827, // shl r0, r0, m
0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num
0x00000000, 0xe00201e7, // mov ra_points, 0
0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y
0x8c15cdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffbf0, 0xf0f80227, // brr ra_link_1, r:pass_2
0x009e7000, 0x100009e7, // nop
0x00000100, 0xe0020827, // mov r0, 0x100
0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0
0x955d7dbf, 0x100248a3, // mov r2, ra_tw_re+TW32_ACTIVE; mov r3, rb_tw_im+TW32_ACTIVE
0x204e7016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw32
0x209d3017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw32
0x209d301f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw32
0x214e709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw32
0x029e7640, 0x100208e7, // fsub r3, r3, r1
0x025e7c80, 0x100205e7, // fsub ra_tw_re+TW32_ACTIVE, ra_tw_re+TW32_ACTIVE, r2
0x029d7ec0, 0x100215e7, // fsub rb_tw_im+TW32_ACTIVE, rb_tw_im+TW32_ACTIVE, r3
0x956dbdbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3
0x20527016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16
0x209d4017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16
0x209d401f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16
0x2152709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16
0x029e7640, 0x100208e7, // fsub r3, r3, r1
0x026e7c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2
0x029dbec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c6e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d6e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c6a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d6a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c662, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d663, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c622, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d623, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x0e1cbdc0, 0xd00229e7, // shr.setf -, ra_points, STAGES
0xfffffae8, 0xf00809e7, // brr.allz -, r:pass_2
0x009e7000, 0x100009e7, // nop
0x00000100, 0xe0020827, // mov r0, 0x100
0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0
0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync
0x009e7000, 0x100009e7, // nop
0x009e7000, 0xa00009e7, // ldtmu0
0x009e7000, 0xa00009e7, // ldtmu0
0xfffffbb8, 0xf0f809e7, // brr -, r:loop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x159c3fc0, 0x100209a7, // mov interrupt, flag
0x009e7000, 0x300009e7, // nop; nop; thrend
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
================================================
FILE: src/hello_fft/hex/shader_32k.hex
================================================
0x00000010, 0xe00216e7, // mov rb_0x10, 0x10
0x00000040, 0xe0021727, // mov rb_0x40, 0x40
0x00000080, 0xe0021767, // mov rb_0x80, 0x80
0x000000f0, 0xe00217a7, // mov rb_0xF0, 0xF0
0x00000100, 0xe00217e7, // mov rb_0x100, 0x100
0x00005555, 0xe0020767, // mov rx_0x5555, 0x5555
0x00003333, 0xe00207a7, // mov rx_0x3333, 0x3333
0x00000f0f, 0xe00207e7, // mov rx_0x0F0F, 0x0F0F
0x000000ff, 0xe00216a7, // mov rx_0x00FF, 0x00FF
0x90104000, 0xe0020727, // mov ra_vdw_32, vdw_setup_0(32, 16, dma_h32( 0,0))
0x90105000, 0xe0021067, // mov rb_vdw_32, vdw_setup_0(32, 16, dma_h32(32,0))
0x15827d80, 0x10020827, // mov r0, addr
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100202a7, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100202e7, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020327, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020367, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100203a7, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100203e7, // mov ra_tw_re+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100212a7, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100212e7, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021327, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021367, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100213a7, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100213e7, // mov rb_tw_im+off+i, r4
0x15827d80, 0x10020827, // mov r0, addr
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020427, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020467, // mov ra_tw_re+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021427, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021467, // mov rb_tw_im+off+i, r4
0x15827d80, 0x10021167, // mov rb_inst, unif
0x00101200, 0xe0020827, // mov r0, vpm_setup(1, 1, v32( 0,0))
0x00000010, 0xe0020867, // mov r1, vpm_setup(1, 1, v32(16,0)) - vpm_setup(1, 1, v32(0,0))
0x00000002, 0xe00208a7, // mov r2, vpm_setup(1, 1, v32( 0,2)) - vpm_setup(1, 1, v32(0,0))
0x409c5017, 0x100049e2, // nop; mul24 r2, r2, in_inst
0xcc9e7081, 0x100246a0, // add out_0, r0, r2; v8adds r0, r0, r1
0xcc9e7081, 0x100246e0, // add out_1, r0, r2; v8adds r0, r0, r1
0xcc9e7081, 0x100250a0, // add out_2, r0, r2; v8adds r0, r0, r1
0x0c9e7080, 0x100211e7, // add out_3, r0, r2
0x000000c8, 0xf0f80127, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x156a7d80, 0x10021c67, // mov vw_setup, ra_vpm_lo
0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0
0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1
0x156e7d80, 0x10021c67, // mov vw_setup, ra_vpm_hi
0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0
0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1
0x159f2fc0, 0x100009e7, // mov -, vw_wait
0x00000019, 0xe80009e7, // mov -, sacq(i+9)
0x00000001, 0xe80009e7, // mov -, srel(i+1)
0x0000001a, 0xe80009e7, // mov -, sacq(i+9)
0x00000002, 0xe80009e7, // mov -, srel(i+1)
0x0000001b, 0xe80009e7, // mov -, sacq(i+9)
0x00000003, 0xe80009e7, // mov -, srel(i+1)
0x0000001c, 0xe80009e7, // mov -, sacq(i+9)
0x00000004, 0xe80009e7, // mov -, srel(i+1)
0x0000001d, 0xe80009e7, // mov -, sacq(i+9)
0x00000005, 0xe80009e7, // mov -, srel(i+1)
0x0000001e, 0xe80009e7, // mov -, sacq(i+9)
0x00000006, 0xe80009e7, // mov -, srel(i+1)
0x0000001f, 0xe80009e7, // mov -, sacq(i+9)
0x00000007, 0xe80009e7, // mov -, srel(i+1)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x15727d80, 0x10021c67, // mov vw_setup, ra_vdw_32
0xc0001fc0, 0xe0021c67, // mov vw_setup, vdw_setup_1(0) + PASS32_STRIDE-16*4
0x8c05cdf6, 0x10024072, // add ra_save_ptr, ra_save_ptr, rb_0x40; mov vw_addr, ra_save_ptr
0x00000050, 0xf0f81127, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x156a7d80, 0x10021c67, // mov vw_setup, ra_vpm_lo
0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0
0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1
0x156e7d80, 0x10021c67, // mov vw_setup, ra_vpm_hi
0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0
0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1
0x00000000, 0xf0f4c9e7, // bra -, ra_sync
0x009e7000, 0x100009e7, // nop
0x156a7d80, 0x10020c67, // mov vr_setup, ra_vpm_lo
0x15c27d80, 0x100009e7, // mov -, vpm
0x00000080, 0xf0f801a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x159f2fc0, 0x100009e7, // mov -, vw_wait
0x00000019, 0xe80009e7, // mov -, sacq(i+9)
0x00000001, 0xe80009e7, // mov -, srel(i+1)
0x0000001a, 0xe80009e7, // mov -, sacq(i+9)
0x00000002, 0xe80009e7, // mov -, srel(i+1)
0x0000001b, 0xe80009e7, // mov -, sacq(i+9)
0x00000003, 0xe80009e7, // mov -, srel(i+1)
0x0000001c, 0xe80009e7, // mov -, sacq(i+9)
0x00000004, 0xe80009e7, // mov -, srel(i+1)
0x0000001d, 0xe80009e7, // mov -, sacq(i+9)
0x00000005, 0xe80009e7, // mov -, srel(i+1)
0x0000001e, 0xe80009e7, // mov -, sacq(i+9)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x00000006, 0xe80009e7, // mov -, srel(i+1)
0x0000001f, 0xe80009e7, // mov -, sacq(i+9)
0x00000007, 0xe80009e7, // mov -, srel(i+1)
0x00000588, 0xf0f811a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x00000009, 0xe80009e7, // mov -, srel(i+9)
0x00000011, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000a, 0xe80009e7, // mov -, srel(i+9)
0x00000012, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000b, 0xe80009e7, // mov -, srel(i+9)
0x00000013, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000c, 0xe80009e7, // mov -, srel(i+9)
0x00000014, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000d, 0xe80009e7, // mov -, srel(i+9)
0x00000015, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000e, 0xe80009e7, // mov -, srel(i+9)
0x00000016, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000f, 0xe80009e7, // mov -, srel(i+9)
0x00000017, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0xfffffda0, 0xf0f80027, // brr ra_link_0, call
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1
0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x14767180, 0x10020867, // and r1, r0, mask
0x0e9c11c0, 0xd0020827, // shr r0, r0, shift
0x14767180, 0x10020827, // and r0, r0, mask
0x119c13c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147a7180, 0x10020867, // and r1, r0, mask
0x0e9c21c0, 0xd0020827, // shr r0, r0, shift
0x147a7180, 0x10020827, // and r0, r0, mask
0x119c23c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147e7180, 0x10020867, // and r1, r0, mask
0x0e9c41c0, 0xd0020827, // shr r0, r0, shift
0x147e7180, 0x10020827, // and r0, r0, mask
0x119c43c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x149da1c0, 0x10020867, // and r1, r0, mask
0x0e9c81c0, 0xd0020827, // shr r0, r0, shift
0x149da1c0, 0x10020827, // and r0, r0, mask
0x119c83c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x119c21c0, 0xd0020827, // shl r0, r0, STAGES-13
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0xfffffc80, 0xf0f80027, // brr ra_link_0, call
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x204a7006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE
0x209d200f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE
0x204a700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE
0x22092c87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE
0x019e72c0, 0x10020867, // fadd r1, r1, r3
0x00000000, 0xf0f489e7, // bra -, ra_save_32
0x95682ff6, 0x10024682, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo
0x956c7ff6, 0x100246c7, // mov ra_vpm_hi, rb_vpm_hi; mov rb_vpm_hi, ra_vpm_hi
0x95701ff6, 0x10024701, // mov ra_vdw_32, rb_vdw_32; mov rb_vdw_32, ra_vdw_32
0x8c15edf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffbf0, 0xf0f80027, // brr ra_link_0, call
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1
0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffba0, 0xf0f80027, // brr ra_link_0, call
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0x204a7006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE
0x209d200f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE
0x204a700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE
0x22092c87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE
0x019e72c0, 0x10020867, // fadd r1, r1, r3
0x00000000, 0xf0f489e7, // bra -, ra_save_32
0x95682ff6, 0x10024682, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo
0x956c7ff6, 0x100246c7, // mov ra_vpm_hi, rb_vpm_hi; mov rb_vpm_hi, ra_vpm_hi
0x95701ff6, 0x10024701, // mov ra_vdw_32, rb_vdw_32; mov rb_vdw_32, ra_vdw_32
0x159c5fc0, 0x10022827, // mov.setf r0, rb_inst
0x0d9c11c0, 0xd0020827, // sub r0, r0, 1
0x119c51c0, 0xd0020827, // shl r0, r0, 5
0x0c9c6e00, 0x100601a7, // add.ifnz ra_sync, rx_sync_slave, r0
0x159c4fc0, 0x10060127, // mov.ifnz ra_save_32, rx_save_slave_32
0x15827d80, 0x100220e7, // mov.setf ra_addr_x, unif
0x15827d80, 0x100210e7, // mov rb_addr_y, unif
0x00000668, 0xf00809e7, // brr.allz -, r:end
0x9528adbf, 0x10024492, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32
0x952cbdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c51c0, 0xd0020827, // shl r0, r0, m
0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num
0x00000000, 0xe00201e7, // mov ra_points, 0
0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y
0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x14767180, 0x10020867, // and r1, r0, mask
0x0e9c11c0, 0xd0020827, // shr r0, r0, shift
0x14767180, 0x10020827, // and r0, r0, mask
0x119c13c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147a7180, 0x10020867, // and r1, r0, mask
0x0e9c21c0, 0xd0020827, // shr r0, r0, shift
0x147a7180, 0x10020827, // and r0, r0, mask
0x119c23c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147e7180, 0x10020867, // and r1, r0, mask
0x0e9c41c0, 0xd0020827, // shr r0, r0, shift
0x147e7180, 0x10020827, // and r0, r0, mask
0x119c43c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x149da1c0, 0x10020867, // and r1, r0, mask
0x0e9c81c0, 0xd0020827, // shr r0, r0, shift
0x149da1c0, 0x10020827, // and r0, r0, mask
0x119c83c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x119c21c0, 0xd0020827, // shl r0, r0, STAGES-13
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffac8, 0xf0f80227, // brr ra_link_1, r:pass_1
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100
0x0e1cfdc0, 0xd00229e7, // shr.setf -, ra_points, STAGES
0xfffffaa0, 0xf00809e7, // brr.allz -, r:pass_1
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100
0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync
0x009e7000, 0x100009e7, // nop
0x009e7000, 0xa00009e7, // ldtmu0
0x009e7000, 0xa00009e7, // ldtmu0
0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y
0x9528adbf, 0x10024492, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32
0x952cbdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c51c0, 0xd0020827, // shl r0, r0, m
0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num
0x00000000, 0xe00201e7, // mov ra_points, 0
0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y
0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffc18, 0xf0f80227, // brr ra_link_1, r:pass_2
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100
0xfffffbf8, 0xf0f80227, // brr ra_link_1, r:pass_2
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100
0xfffffbd8, 0xf0f80227, // brr ra_link_1, r:pass_2
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100
0xfffffbb8, 0xf0f80227, // brr ra_link_1, r:pass_2
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100
0x95492dbf, 0x100248a3, // mov r2, ra_tw_re+TW32_ACTIVE; mov r3, rb_tw_im+TW32_ACTIVE
0x20327016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw32
0x209cc017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw32
0x209cc01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw32
0x2132709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw32
0x029e7640, 0x100208e7, // fsub r3, r3, r1
0x024a7c80, 0x100204a7, // fsub ra_tw_re+TW32_ACTIVE, ra_tw_re+TW32_ACTIVE, r2
0x029d2ec0, 0x100214a7, // fsub rb_tw_im+TW32_ACTIVE, rb_tw_im+TW32_ACTIVE, r3
0x95596dbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3
0x20367016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16
0x209cd017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16
0x209cd01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16
0x2136709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16
0x029e7640, 0x100208e7, // fsub r3, r3, r1
0x025a7c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2
0x029d6ec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x0e1cfdc0, 0xd00229e7, // shr.setf -, ra_points, STAGES
0xfffffab0, 0xf00809e7, // brr.allz -, r:pass_2
0x00000060, 0xe0020827, // mov r0, 3*4*8
0x0d227c00, 0x10020227, // sub ra_link_1, ra_link_1, r0
0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100
0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync
0x009e7000, 0x100009e7, // nop
0x009e7000, 0xa00009e7, // ldtmu0
0x009e7000, 0xa00009e7, // ldtmu0
0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y
0x95410dbf, 0x10024492, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32
0x95451dbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c51c0, 0xd0020827, // shl r0, r0, m
0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num
0x00000000, 0xe00201e7, // mov ra_points, 0
0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y
0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffff9a8, 0xf0f80227, // brr ra_link_1, r:pass_3
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100
0x95492dbf, 0x100248a3, // mov r2, ra_tw_re+TW32_ACTIVE; mov r3, rb_tw_im+TW32_ACTIVE
0x203a7016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw32
0x209ce017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw32
0x209ce01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw32
0x213a709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw32
0x029e7640, 0x100208e7, // fsub r3, r3, r1
0x024a7c80, 0x100204a7, // fsub ra_tw_re+TW32_ACTIVE, ra_tw_re+TW32_ACTIVE, r2
0x029d2ec0, 0x100214a7, // fsub rb_tw_im+TW32_ACTIVE, rb_tw_im+TW32_ACTIVE, r3
0x95596dbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3
0x203e7016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16
0x209cf017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16
0x209cf01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16
0x213e709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16
0x029e7640, 0x100208e7, // fsub r3, r3, r1
0x025a7c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2
0x029d6ec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x0e1cfdc0, 0xd00229e7, // shr.setf -, ra_points, STAGES
0xfffff8a0, 0xf00809e7, // brr.allz -, r:pass_3
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100
0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync
0x009e7000, 0x100009e7, // nop
0x009e7000, 0xa00009e7, // ldtmu0
0x009e7000, 0xa00009e7, // ldtmu0
0xfffff968, 0xf0f809e7, // brr -, r:loop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x159c3fc0, 0x100209a7, // mov interrupt, flag
0x009e7000, 0x300009e7, // nop; nop; thrend
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
================================================
FILE: src/hello_fft/hex/shader_4k.hex
================================================
0x00000020, 0xe0021767, // mov rb_0x20, 0x20
0x00000040, 0xe00217a7, // mov rb_0x40, 0x40
0x00000080, 0xe00217e7, // mov rb_0x80, 0x80
0x00005555, 0xe0020727, // mov rx_0x5555, 0x5555
0x00003333, 0xe0020767, // mov rx_0x3333, 0x3333
0x00000f0f, 0xe00207a7, // mov rx_0x0F0F, 0x0F0F
0x000000ff, 0xe00207e7, // mov rx_0x00FF, 0x00FF
0x88104000, 0xe00206e7, // mov ra_vdw, vdw_setup_0(16, 16, dma_h32( 0,0))
0x88104800, 0xe00216e7, // mov rb_vdw, vdw_setup_0(16, 16, dma_h32(16,0))
0x15827d80, 0x10020827, // mov r0, addr
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9df1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020227, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9df1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020267, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9df1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100202a7, // mov ra_tw_re+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9df3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021227, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9df3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021267, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9df3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100212a7, // mov rb_tw_im+off+i, r4
0x15827d80, 0x10020827, // mov r0, addr
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9df1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100202e7, // mov ra_tw_re+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9df3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100212e7, // mov rb_tw_im+off+i, r4
0x15827d80, 0x10021167, // mov rb_inst, unif
0x00101200, 0xe0020827, // mov r0, vpm_setup(1, 1, v32( 0,0))
0x00000010, 0xe0020867, // mov r1, vpm_setup(1, 1, v32(16,0)) - vpm_setup(1, 1, v32(0,0))
0x00000002, 0xe00208a7, // mov r2, vpm_setup(1, 1, v32( 0,2)) - vpm_setup(1, 1, v32(0,0))
0x409c5017, 0x100049e2, // nop; mul24 r2, r2, in_inst
0xcc9e7081, 0x100246a0, // add out_0, r0, r2; v8adds r0, r0, r1
0xcc9e7081, 0x100256a0, // add out_1, r0, r2; v8adds r0, r0, r1
0xcc9e7081, 0x100049e0, // add out_2, r0, r2; v8adds r0, r0, r1
0x0c9e7080, 0x100009e7, // add out_3, r0, r2
0x000000b0, 0xf0f80127, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x156a7d80, 0x10021c67, // mov vw_setup, arg
0x159e7000, 0x10020c27, // mov vpm, r0
0x159e7240, 0x10020c27, // mov vpm, r1
0x159f2fc0, 0x100009e7, // mov -, vw_wait
0x00000019, 0xe80009e7, // mov -, sacq(i+9)
0x00000001, 0xe80009e7, // mov -, srel(i+1)
0x0000001a, 0xe80009e7, // mov -, sacq(i+9)
0x00000002, 0xe80009e7, // mov -, srel(i+1)
0x0000001b, 0xe80009e7, // mov -, sacq(i+9)
0x00000003, 0xe80009e7, // mov -, srel(i+1)
0x0000001c, 0xe80009e7, // mov -, sacq(i+9)
0x00000004, 0xe80009e7, // mov -, srel(i+1)
0x0000001d, 0xe80009e7, // mov -, sacq(i+9)
0x00000005, 0xe80009e7, // mov -, srel(i+1)
0x0000001e, 0xe80009e7, // mov -, sacq(i+9)
0x00000006, 0xe80009e7, // mov -, srel(i+1)
0x0000001f, 0xe80009e7, // mov -, sacq(i+9)
0x00000007, 0xe80009e7, // mov -, srel(i+1)
0x00000000, 0xf0f409e7, // bra -, ra_link_1
0x156e7d80, 0x10021c67, // mov vw_setup, arg_vdw
0xc00007c0, 0xe0021c67, // mov vw_setup, vdw_setup_1(0) + PASS16_STRIDE-16*4
0x8c05edf6, 0x10024072, // add ra_save_ptr, ra_save_ptr, rb_0x40; mov vw_addr, ra_save_ptr
0x00000038, 0xf0f81127, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x156a7d80, 0x10021c67, // mov vw_setup, arg
0x159e7000, 0x10020c27, // mov vpm, r0
0x159e7240, 0x10020c27, // mov vpm, r1
0x00000000, 0xf0f4c9e7, // bra -, ra_sync
0x009e7000, 0x100009e7, // nop
0x156a7d80, 0x10020c67, // mov vr_setup, arg_vpm
0x15c27d80, 0x100009e7, // mov -, vpm
0x00000080, 0xf0f801a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x159f2fc0, 0x100009e7, // mov -, vw_wait
0x00000019, 0xe80009e7, // mov -, sacq(i+9)
0x00000001, 0xe80009e7, // mov -, srel(i+1)
0x0000001a, 0xe80009e7, // mov -, sacq(i+9)
0x00000002, 0xe80009e7, // mov -, srel(i+1)
0x0000001b, 0xe80009e7, // mov -, sacq(i+9)
0x00000003, 0xe80009e7, // mov -, srel(i+1)
0x0000001c, 0xe80009e7, // mov -, sacq(i+9)
0x00000004, 0xe80009e7, // mov -, srel(i+1)
0x0000001d, 0xe80009e7, // mov -, sacq(i+9)
0x00000005, 0xe80009e7, // mov -, srel(i+1)
0x0000001e, 0xe80009e7, // mov -, sacq(i+9)
0x00000000, 0xf0f409e7, // bra -, ra_link_1
0x00000006, 0xe80009e7, // mov -, srel(i+1)
0x0000001f, 0xe80009e7, // mov -, sacq(i+9)
0x00000007, 0xe80009e7, // mov -, srel(i+1)
0x000003e8, 0xf0f811a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f409e7, // bra -, ra_link_1
0x00000009, 0xe80009e7, // mov -, srel(i+9)
0x00000011, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f409e7, // bra -, ra_link_1
0x0000000a, 0xe80009e7, // mov -, srel(i+9)
0x00000012, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f409e7, // bra -, ra_link_1
0x0000000b, 0xe80009e7, // mov -, srel(i+9)
0x00000013, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f409e7, // bra -, ra_link_1
0x0000000c, 0xe80009e7, // mov -, srel(i+9)
0x00000014, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f409e7, // bra -, ra_link_1
0x0000000d, 0xe80009e7, // mov -, srel(i+9)
0x00000015, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f409e7, // bra -, ra_link_1
0x0000000e, 0xe80009e7, // mov -, srel(i+9)
0x00000016, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f409e7, // bra -, ra_link_1
0x0000000f, 0xe80009e7, // mov -, srel(i+9)
0x00000017, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x000000cc, 0xe20229e7, // mov.setf -, [0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]
0x959fa000, 0xd002c8a0, // mov r2, r0; mov.ifnz r0, r0 << 6
0x959fa249, 0xd002c8e1, // mov r3, r1; mov.ifnz r1, r1 << 6
0x00003300, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0]
0x809f6012, 0xd000c9e0, // nop; mov.ifnz r0, r2 >> 6
0x809f601b, 0xd000c9e1, // nop; mov.ifnz r1, r3 >> 6
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0x000000cc, 0xe20229e7, // mov.setf -, [0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]
0x959fa000, 0xd002c8a0, // mov r2, r0; mov.ifnz r0, r0 << 6
0x959fa249, 0xd002c8e1, // mov r3, r1; mov.ifnz r1, r1 << 6
0x00003300, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0]
0x809f6012, 0xd000c9e0, // nop; mov.ifnz r0, r2 >> 6
0x809f601b, 0xd000c9e1, // nop; mov.ifnz r1, r3 >> 6
0xfffffd40, 0xf0f809e7, // brr -, r:fft_16
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x8c15fdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffcf8, 0xf0f809e7, // brr -, r:fft_16
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0x159c5fc0, 0x10022827, // mov.setf r0, rb_inst
0x0d9c11c0, 0xd0020827, // sub r0, r0, 1
0x119c51c0, 0xd0020827, // shl r0, r0, 5
0x0c9c6e00, 0x100601a7, // add.ifnz ra_sync, rx_sync_slave, r0
0x159c4fc0, 0x10060127, // mov.ifnz ra_save_16, rx_save_slave_16
0x15827d80, 0x100220e7, // mov.setf ra_addr_x, unif
0x15827d80, 0x100210e7, // mov rb_addr_y, unif
0x000005c8, 0xf00809e7, // brr.allz -, r:end
0x95208dbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c3e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d3e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c3a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d3a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c362, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d363, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c322, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d323, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c41c0, 0xd0020827, // shl r0, r0, m
0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num
0x00000000, 0xe00201e7, // mov ra_points, 0
0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y
0x8c15fdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x14727180, 0x10020867, // and r1, r0, mask
0x0e9c11c0, 0xd0020827, // shr r0, r0, shift
0x14727180, 0x10020827, // and r0, r0, mask
0x119c13c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x14767180, 0x10020867, // and r1, r0, mask
0x0e9c21c0, 0xd0020827, // shr r0, r0, shift
0x14767180, 0x10020827, // and r0, r0, mask
0x119c23c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147a7180, 0x10020867, // and r1, r0, mask
0x0e9c41c0, 0xd0020827, // shr r0, r0, shift
0x147a7180, 0x10020827, // and r0, r0, mask
0x119c43c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147e7180, 0x10020867, // and r1, r0, mask
0x0e9c81c0, 0xd0020827, // shr r0, r0, shift
0x147e7180, 0x10020827, // and r0, r0, mask
0x119c83c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x0e9c11c0, 0xd0020827, // shr r0, r0, 13-STAGES
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x000000cc, 0xe20229e7, // mov.setf -, [0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]
0x959fa000, 0xd002c8a0, // mov r2, r0; mov.ifnz r0, r0 << 6
0x959fa249, 0xd002c8e1, // mov r3, r1; mov.ifnz r1, r1 << 6
0x00003300, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0]
0x809f6012, 0xd000c9e0, // nop; mov.ifnz r0, r2 >> 6
0x809f601b, 0xd000c9e1, // nop; mov.ifnz r1, r3 >> 6
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffc40, 0xf0f80027, // brr ra_link_1, r:pass_1
0x9569aff6, 0x1002469a, // mov ra_vpm, rb_vpm; mov rb_vpm, ra_vpm
0x956dbff6, 0x100246db, // mov ra_vdw, rb_vdw; mov rb_vdw, ra_vdw
0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x80
0x0e1ccdc0, 0xd00229e7, // shr.setf -, ra_points, STAGES
0xfffffc18, 0xf00809e7, // brr.allz -, r:pass_1
0x9569aff6, 0x1002469a, // mov ra_vpm, rb_vpm; mov rb_vpm, ra_vpm
0x956dbff6, 0x100246db, // mov ra_vdw, rb_vdw; mov rb_vdw, ra_vdw
0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x80
0x00000000, 0xf0f4c027, // bra ra_link_1, ra_sync
0x009e7000, 0x100009e7, // nop
0x009e7000, 0xa00009e7, // ldtmu0
0x009e7000, 0xa00009e7, // ldtmu0
0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y
0x95208dbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c3e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d3e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c3a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d3a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c362, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d363, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c322, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d323, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c41c0, 0xd0020827, // shl r0, r0, m
0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num
0x00000000, 0xe00201e7, // mov ra_points, 0
0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y
0x8c15fdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffc90, 0xf0f80027, // brr ra_link_1, r:pass_2
0x9569aff6, 0x1002469a, // mov ra_vpm, rb_vpm; mov rb_vpm, ra_vpm
0x956dbff6, 0x100246db, // mov ra_vdw, rb_vdw; mov rb_vdw, ra_vdw
0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x80
0xfffffc70, 0xf0f80027, // brr ra_link_1, r:pass_2
0x9569aff6, 0x1002469a, // mov ra_vpm, rb_vpm; mov rb_vpm, ra_vpm
0x956dbff6, 0x100246db, // mov ra_vdw, rb_vdw; mov rb_vdw, ra_vdw
0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x80
0x0d01ddc0, 0x10020027, // sub ra_link_1, ra_link_1, rb_0x20
0x953cfdbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3
0x20267016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16
0x209c9017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16
0x209c901f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16
0x2126709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16
0x029e7640, 0x100208e7, // fsub r3, r3, r1
0x023e7c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2
0x029cfec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c3e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d3e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c3a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d3a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c362, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d363, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c322, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d323, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x0e1ccdc0, 0xd00229e7, // shr.setf -, ra_points, STAGES
0xfffffba0, 0xf00809e7, // brr.allz -, r:pass_2
0x9569aff6, 0x1002469a, // mov ra_vpm, rb_vpm; mov rb_vpm, ra_vpm
0x956dbff6, 0x100246db, // mov ra_vdw, rb_vdw; mov rb_vdw, ra_vdw
0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x80
0x00000000, 0xf0f4c027, // bra ra_link_1, ra_sync
0x009e7000, 0x100009e7, // nop
0x009e7000, 0xa00009e7, // ldtmu0
0x009e7000, 0xa00009e7, // ldtmu0
0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y
0x952cbdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c3e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d3e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c3a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d3a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c362, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d363, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c322, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d323, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c41c0, 0xd0020827, // shl r0, r0, m
0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num
0x00000000, 0xe00201e7, // mov ra_points, 0
0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y
0x8c15fdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffaa0, 0xf0f80027, // brr ra_link_1, r:pass_3
0x9569aff6, 0x1002469a, // mov ra_vpm, rb_vpm; mov rb_vpm, ra_vpm
0x956dbff6, 0x100246db, // mov ra_vdw, rb_vdw; mov rb_vdw, ra_vdw
0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x80
0x953cfdbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3
0x202a7016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16
0x209ca017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16
0x209ca01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16
0x212a709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16
0x029e7640, 0x100208e7, // fsub r3, r3, r1
0x023e7c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2
0x029cfec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c3e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d3e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c3a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d3a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c362, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d363, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c322, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d323, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x0e1ccdc0, 0xd00229e7, // shr.setf -, ra_points, STAGES
0xfffff9d8, 0xf00809e7, // brr.allz -, r:pass_3
0x9569aff6, 0x1002469a, // mov ra_vpm, rb_vpm; mov rb_vpm, ra_vpm
0x956dbff6, 0x100246db, // mov ra_vdw, rb_vdw; mov rb_vdw, ra_vdw
0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x80
0x00000000, 0xf0f4c027, // bra ra_link_1, ra_sync
0x009e7000, 0x100009e7, // nop
0x009e7000, 0xa00009e7, // ldtmu0
0x009e7000, 0xa00009e7, // ldtmu0
0xfffffa08, 0xf0f809e7, // brr -, r:loop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x159c3fc0, 0x100209a7, // mov interrupt, flag
0x009e7000, 0x300009e7, // nop; nop; thrend
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
================================================
FILE: src/hello_fft/hex/shader_512.hex
================================================
0x00000010, 0xe0021727, // mov rb_0x10, 0x10
0x00000040, 0xe0021767, // mov rb_0x40, 0x40
0x00000080, 0xe00217a7, // mov rb_0x80, 0x80
0x000000f0, 0xe00217e7, // mov rb_0xF0, 0xF0
0x00005555, 0xe0020727, // mov rx_0x5555, 0x5555
0x00003333, 0xe0020767, // mov rx_0x3333, 0x3333
0x00000f0f, 0xe00207a7, // mov rx_0x0F0F, 0x0F0F
0x000000ff, 0xe00207e7, // mov rx_0x00FF, 0x00FF
0x88104000, 0xe00206a7, // mov ra_vdw_16, vdw_setup_0(16, 16, dma_h32( 0,0))
0x88105000, 0xe0021027, // mov rb_vdw_16, vdw_setup_0(16, 16, dma_h32(32,0))
0x90104000, 0xe00206e7, // mov ra_vdw_32, vdw_setup_0(32, 16, dma_h32( 0,0))
0x90105000, 0xe0021067, // mov rb_vdw_32, vdw_setup_0(32, 16, dma_h32(32,0))
0x15827d80, 0x10020827, // mov r0, addr
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9de1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100202e7, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9de1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020327, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9de1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020367, // mov ra_tw_re+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9de3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100212e7, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9de3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021327, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9de3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021367, // mov rb_tw_im+off+i, r4
0x15827d80, 0x10020827, // mov r0, addr
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9de1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100203a7, // mov ra_tw_re+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9de3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100213a7, // mov rb_tw_im+off+i, r4
0x15827d80, 0x10021167, // mov rb_inst, unif
0x00101200, 0xe0020827, // mov r0, vpm_setup(1, 1, v32( 0,0))
0x00000010, 0xe0020867, // mov r1, vpm_setup(1, 1, v32(16,0)) - vpm_setup(1, 1, v32(0,0))
0x00000002, 0xe00208a7, // mov r2, vpm_setup(1, 1, v32( 0,2)) - vpm_setup(1, 1, v32(0,0))
0x409c5017, 0x100049e2, // nop; mul24 r2, r2, in_inst
0xcc9e7081, 0x10024620, // add out_0, r0, r2; v8adds r0, r0, r1
0xcc9e7081, 0x10024660, // add out_1, r0, r2; v8adds r0, r0, r1
0xcc9e7081, 0x100250a0, // add out_2, r0, r2; v8adds r0, r0, r1
0x0c9e7080, 0x100211e7, // add out_3, r0, r2
0x000000b0, 0xf0f80127, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x15627d80, 0x10021c67, // mov vw_setup, arg
0x159e7000, 0x10020c27, // mov vpm, r0
0x159e7240, 0x10020c27, // mov vpm, r1
0x159f2fc0, 0x100009e7, // mov -, vw_wait
0x00000019, 0xe80009e7, // mov -, sacq(i+9)
0x00000001, 0xe80009e7, // mov -, srel(i+1)
0x0000001a, 0xe80009e7, // mov -, sacq(i+9)
0x00000002, 0xe80009e7, // mov -, srel(i+1)
0x0000001b, 0xe80009e7, // mov -, sacq(i+9)
0x00000003, 0xe80009e7, // mov -, srel(i+1)
0x0000001c, 0xe80009e7, // mov -, sacq(i+9)
0x00000004, 0xe80009e7, // mov -, srel(i+1)
0x0000001d, 0xe80009e7, // mov -, sacq(i+9)
0x00000005, 0xe80009e7, // mov -, srel(i+1)
0x0000001e, 0xe80009e7, // mov -, sacq(i+9)
0x00000006, 0xe80009e7, // mov -, srel(i+1)
0x0000001f, 0xe80009e7, // mov -, sacq(i+9)
0x00000007, 0xe80009e7, // mov -, srel(i+1)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x156a7d80, 0x10021c67, // mov vw_setup, arg_vdw
0xc00000c0, 0xe0021c67, // mov vw_setup, vdw_setup_1(0) + PASS16_STRIDE-16*4
0x8c05ddf6, 0x10024072, // add ra_save_ptr, ra_save_ptr, rb_0x40; mov vw_addr, ra_save_ptr
0x00000038, 0xf0f81127, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x15627d80, 0x10021c67, // mov vw_setup, arg
0x159e7000, 0x10020c27, // mov vpm, r0
0x159e7240, 0x10020c27, // mov vpm, r1
0x00000000, 0xf0f4c9e7, // bra -, ra_sync
0x009e7000, 0x100009e7, // nop
0x15627d80, 0x10020c67, // mov vr_setup, arg_vpm
0x15c27d80, 0x100009e7, // mov -, vpm
0x000000c8, 0xf0f802a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x15627d80, 0x10021c67, // mov vw_setup, ra_vpm_lo
0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0
0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1
0x15667d80, 0x10021c67, // mov vw_setup, ra_vpm_hi
0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0
0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1
0x159f2fc0, 0x100009e7, // mov -, vw_wait
0x00000019, 0xe80009e7, // mov -, sacq(i+9)
0x00000001, 0xe80009e7, // mov -, srel(i+1)
0x0000001a, 0xe80009e7, // mov -, sacq(i+9)
0x00000002, 0xe80009e7, // mov -, srel(i+1)
0x0000001b, 0xe80009e7, // mov -, sacq(i+9)
0x00000003, 0xe80009e7, // mov -, srel(i+1)
0x0000001c, 0xe80009e7, // mov -, sacq(i+9)
0x00000004, 0xe80009e7, // mov -, srel(i+1)
0x0000001d, 0xe80009e7, // mov -, sacq(i+9)
0x00000005, 0xe80009e7, // mov -, srel(i+1)
0x0000001e, 0xe80009e7, // mov -, sacq(i+9)
0x00000006, 0xe80009e7, // mov -, srel(i+1)
0x0000001f, 0xe80009e7, // mov -, sacq(i+9)
0x00000007, 0xe80009e7, // mov -, srel(i+1)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x156e7d80, 0x10021c67, // mov vw_setup, ra_vdw_32
0xc0000040, 0xe0021c67, // mov vw_setup, vdw_setup_1(0) + PASS32_STRIDE-16*4
0x8c05ddf6, 0x10024072, // add ra_save_ptr, ra_save_ptr, rb_0x40; mov vw_addr, ra_save_ptr
0x00000050, 0xf0f812a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x15627d80, 0x10021c67, // mov vw_setup, ra_vpm_lo
0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0
0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1
0x15667d80, 0x10021c67, // mov vw_setup, ra_vpm_hi
0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0
0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1
0x00000000, 0xf0f4c9e7, // bra -, ra_sync
0x009e7000, 0x100009e7, // nop
0x15627d80, 0x10020c67, // mov vr_setup, ra_vpm_lo
0x15c27d80, 0x100009e7, // mov -, vpm
0x00000080, 0xf0f801a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x159f2fc0, 0x100009e7, // mov -, vw_wait
0x00000019, 0xe80009e7, // mov -, sacq(i+9)
0x00000001, 0xe80009e7, // mov -, srel(i+1)
0x0000001a, 0xe80009e7, // mov -, sacq(i+9)
0x00000002, 0xe80009e7, // mov -, srel(i+1)
0x0000001b, 0xe80009e7, // mov -, sacq(i+9)
0x00000003, 0xe80009e7, // mov -, srel(i+1)
0x0000001c, 0xe80009e7, // mov -, sacq(i+9)
0x00000004, 0xe80009e7, // mov -, srel(i+1)
0x0000001d, 0xe80009e7, // mov -, sacq(i+9)
0x00000005, 0xe80009e7, // mov -, srel(i+1)
0x0000001e, 0xe80009e7, // mov -, sacq(i+9)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x00000006, 0xe80009e7, // mov -, srel(i+1)
0x0000001f, 0xe80009e7, // mov -, sacq(i+9)
0x00000007, 0xe80009e7, // mov -, srel(i+1)
0x00000510, 0xf0f811a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x00000009, 0xe80009e7, // mov -, srel(i+9)
0x00000011, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000a, 0xe80009e7, // mov -, srel(i+9)
0x00000012, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000b, 0xe80009e7, // mov -, srel(i+9)
0x00000013, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000c, 0xe80009e7, // mov -, srel(i+9)
0x00000014, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000d, 0xe80009e7, // mov -, srel(i+9)
0x00000015, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000e, 0xe80009e7, // mov -, srel(i+9)
0x00000016, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000f, 0xe80009e7, // mov -, srel(i+9)
0x00000017, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0xfffffda0, 0xf0f80027, // brr ra_link_0, call
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1
0x8c15cdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x14727180, 0x10020867, // and r1, r0, mask
0x0e9c11c0, 0xd0020827, // shr r0, r0, shift
0x14727180, 0x10020827, // and r0, r0, mask
0x119c13c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x14767180, 0x10020867, // and r1, r0, mask
0x0e9c21c0, 0xd0020827, // shr r0, r0, shift
0x14767180, 0x10020827, // and r0, r0, mask
0x119c23c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147a7180, 0x10020867, // and r1, r0, mask
0x0e9c41c0, 0xd0020827, // shr r0, r0, shift
0x147a7180, 0x10020827, // and r0, r0, mask
0x119c43c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147e7180, 0x10020867, // and r1, r0, mask
0x0e9c81c0, 0xd0020827, // shr r0, r0, shift
0x147e7180, 0x10020827, // and r0, r0, mask
0x119c83c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x0e9c41c0, 0xd0020827, // shr r0, r0, 13-STAGES
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0xfffffc80, 0xf0f80027, // brr ra_link_0, call
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x203e7006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE
0x209cf00f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE
0x203e700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE
0x2208fc87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE
0x019e72c0, 0x10020867, // fadd r1, r1, r3
0x00000000, 0xf0f549e7, // bra -, ra_save_32
0x95602ff6, 0x10024602, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo
0x95647ff6, 0x10024647, // mov ra_vpm_hi, rb_vpm_hi; mov rb_vpm_hi, ra_vpm_hi
0x956c1ff6, 0x100246c1, // mov ra_vdw_32, rb_vdw_32; mov rb_vdw_32, ra_vdw_32
0x8c15edf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffbf0, 0xf0f80027, // brr ra_link_0, call
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0x00000000, 0xf0f489e7, // bra -, ra_save_16
0x009e7000, 0x100009e7, // nop
0x95602ff6, 0x10024602, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo
0x95680ff6, 0x10024680, // mov ra_vdw_16, rb_vdw_16; mov rb_vdw_16, ra_vdw_16
0x159c5fc0, 0x10022827, // mov.setf r0, rb_inst
0x0d9c11c0, 0xd0020827, // sub r0, r0, 1
0x119c51c0, 0xd0020827, // shl r0, r0, 5
0x0c9c6e00, 0x100601a7, // add.ifnz ra_sync, rx_sync_slave, r0
0x159c4fc0, 0x10060127, // mov.ifnz ra_save_16, rx_save_slave_16
0x159cafc0, 0x100602a7, // mov.ifnz ra_save_32, rx_save_slave_32
0x15827d80, 0x100220e7, // mov.setf ra_addr_x, unif
0x15827d80, 0x100210e7, // mov rb_addr_y, unif
0x000003a8, 0xf00809e7, // brr.allz -, r:end
0x952cbdbf, 0x100243cf, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32
0x9530cdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c462, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d463, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c422, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d423, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c51c0, 0xd0020827, // shl r0, r0, m
0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num
0x00000000, 0xe00201e7, // mov ra_points, 0
0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y
0x8c15cdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x14727180, 0x10020867, // and r1, r0, mask
0x0e9c11c0, 0xd0020827, // shr r0, r0, shift
0x14727180, 0x10020827, // and r0, r0, mask
0x119c13c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x14767180, 0x10020867, // and r1, r0, mask
0x0e9c21c0, 0xd0020827, // shr r0, r0, shift
0x14767180, 0x10020827, // and r0, r0, mask
0x119c23c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147a7180, 0x10020867, // and r1, r0, mask
0x0e9c41c0, 0xd0020827, // shr r0, r0, shift
0x147a7180, 0x10020827, // and r0, r0, mask
0x119c43c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147e7180, 0x10020867, // and r1, r0, mask
0x0e9c81c0, 0xd0020827, // shr r0, r0, shift
0x147e7180, 0x10020827, // and r0, r0, mask
0x119c83c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x0e9c41c0, 0xd0020827, // shr r0, r0, 13-STAGES
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffb38, 0xf0f80227, // brr ra_link_1, r:pass_1
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0xfffffb18, 0xf0f80227, // brr ra_link_1, r:pass_1
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync
0x009e7000, 0x100009e7, // nop
0x009e7000, 0xa00009e7, // ldtmu0
0x009e7000, 0xa00009e7, // ldtmu0
0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y
0x9538edbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c462, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d463, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c422, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d423, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c41c0, 0xd0020827, // shl r0, r0, m
0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num
0x00000000, 0xe00201e7, // mov ra_points, 0
0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y
0x8c15edf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffc98, 0xf0f80227, // brr ra_link_1, r:pass_2
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dedc0, 0x100201e7, // add ra_points, ra_points, rb_0x80
0x954d3dbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3
0x20367016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16
0x209cd017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16
0x209cd01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16
0x2136709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16
0x029e7640, 0x100208e7, // fsub r3, r3, r1
0x024e7c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2
0x029d3ec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c462, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d463, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c422, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d423, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x0e1c9dc0, 0xd00229e7, // shr.setf -, ra_points, STAGES
0xfffffbd0, 0xf00809e7, // brr.allz -, r:pass_2
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dedc0, 0x100201e7, // add ra_points, ra_points, rb_0x80
0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync
0x009e7000, 0x100009e7, // nop
0x009e7000, 0xa00009e7, // ldtmu0
0x009e7000, 0xa00009e7, // ldtmu0
0xfffffc28, 0xf0f809e7, // brr -, r:loop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x159c3fc0, 0x100209a7, // mov interrupt, flag
0x009e7000, 0x300009e7, // nop; nop; thrend
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
================================================
FILE: src/hello_fft/hex/shader_512k.hex
================================================
0x00000013, 0xe0021227, // mov rb_STAGES, STAGES
0x00000010, 0xe00216e7, // mov rb_0x10, 0x10
0x00000040, 0xe0021727, // mov rb_0x40, 0x40
0x00000080, 0xe0021767, // mov rb_0x80, 0x80
0x000000f0, 0xe00217a7, // mov rb_0xF0, 0xF0
0x00000100, 0xe00217e7, // mov rb_0x100, 0x100
0x55555555, 0xe0020767, // mov rx_0x55555555, 0x55555555
0x33333333, 0xe00207a7, // mov rx_0x33333333, 0x33333333
0x0f0f0f0f, 0xe00207e7, // mov rx_0x0F0F0F0F, 0x0F0F0F0F
0x00ff00ff, 0xe0021667, // mov rx_0x00FF00FF, 0x00FF00FF
0x0000ffff, 0xe00216a7, // mov rx_0x0000FFFF, 0x0000FFFF
0x80904000, 0xe00206e7, // mov ra_vdw_16, vdw_setup_0(1, 16, dma_h32( 0,0))
0x80905000, 0xe0021027, // mov rb_vdw_16, vdw_setup_0(1, 16, dma_h32(32,0))
0x80904000, 0xe0020727, // mov ra_vdw_32, vdw_setup_0(1, 16, dma_h32( 0,0))
0x80905000, 0xe0021067, // mov rb_vdw_32, vdw_setup_0(1, 16, dma_h32(32,0))
0x15827d80, 0x10020827, // mov r0, addr
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100202e7, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020327, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020367, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100203a7, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100203e7, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020427, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020467, // mov ra_tw_re+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100212e7, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021327, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021367, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100213a7, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100213e7, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021427, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021467, // mov rb_tw_im+off+i, r4
0x15827d80, 0x10020827, // mov r0, addr
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100204a7, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100204e7, // mov ra_tw_re+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100214a7, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100214e7, // mov rb_tw_im+off+i, r4
0x15827d80, 0x10021167, // mov rb_inst, unif
0x00101200, 0xe0020827, // mov r0, vpm_setup(1, 1, v32( 0,0))
0x00000010, 0xe0020867, // mov r1, vpm_setup(1, 1, v32(16,0)) - vpm_setup(1, 1, v32(0,0))
0x00000002, 0xe00208a7, // mov r2, vpm_setup(1, 1, v32( 0,2)) - vpm_setup(1, 1, v32(0,0))
0x409c5017, 0x100049e2, // nop; mul24 r2, r2, in_inst
0xcc9e7081, 0x10024660, // add out_0, r0, r2; v8adds r0, r0, r1
0xcc9e7081, 0x100246a0, // add out_1, r0, r2; v8adds r0, r0, r1
0xcc9e7081, 0x100250a0, // add out_2, r0, r2; v8adds r0, r0, r1
0x0c9e7080, 0x100211e7, // add out_3, r0, r2
0x000001d0, 0xf0f80127, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x15667d80, 0x10021c67, // mov vw_setup, arg
0x159e7000, 0x10020c27, // mov vpm, r0
0x159e7240, 0x10020c27, // mov vpm, r1
0x159f2fc0, 0x100009e7, // mov -, vw_wait
0x00000019, 0xe80009e7, // mov -, sacq(i+9)
0x00000001, 0xe80009e7, // mov -, srel(i+1)
0x0000001a, 0xe80009e7, // mov -, sacq(i+9)
0x00000002, 0xe80009e7, // mov -, srel(i+1)
0x0000001b, 0xe80009e7, // mov -, sacq(i+9)
0x00000003, 0xe80009e7, // mov -, srel(i+1)
0x0000001c, 0xe80009e7, // mov -, sacq(i+9)
0x00000004, 0xe80009e7, // mov -, srel(i+1)
0x0000001d, 0xe80009e7, // mov -, sacq(i+9)
0x00000005, 0xe80009e7, // mov -, srel(i+1)
0x0000001e, 0xe80009e7, // mov -, sacq(i+9)
0x00000006, 0xe80009e7, // mov -, srel(i+1)
0x0000001f, 0xe80009e7, // mov -, sacq(i+9)
0x00000007, 0xe80009e7, // mov -, srel(i+1)
0x156e7d80, 0x10020827, // mov r0, arg_vdw
0x8c05cdf6, 0x10024061, // add ra_save_ptr, ra_save_ptr, rb_0x40; mov r1, ra_save_ptr
0x00000080, 0xe00208a7, // mov r2, vdw_setup_0(1, 16, dma_h32(1,0)) - vdw_setup_0(1, 16, dma_h32(0,0))
0x00040000, 0xe00208e7, // mov r3, PASS16_STRIDE
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x00000038, 0xf0f81127, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x15667d80, 0x10021c67, // mov vw_setup, arg
0x159e7000, 0x10020c27, // mov vpm, r0
0x159e7240, 0x10020c27, // mov vpm, r1
0x00000000, 0xf0f4c9e7, // bra -, ra_sync
0x009e7000, 0x100009e7, // nop
0x15667d80, 0x10020c67, // mov vr_setup, arg_vpm
0x15c27d80, 0x100009e7, // mov -, vpm
0x000002e8, 0xf0f802a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x15667d80, 0x10021c67, // mov vw_setup, ra_vpm_lo
0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0
0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1
0x156a7d80, 0x10021c67, // mov vw_setup, ra_vpm_hi
0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0
0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1
0x159f2fc0, 0x100009e7, // mov -, vw_wait
0x00000019, 0xe80009e7, // mov -, sacq(i+9)
0x00000001, 0xe80009e7, // mov -, srel(i+1)
0x0000001a, 0xe80009e7, // mov -, sacq(i+9)
0x00000002, 0xe80009e7, // mov -, srel(i+1)
0x0000001b, 0xe80009e7, // mov -, sacq(i+9)
0x00000003, 0xe80009e7, // mov -, srel(i+1)
0x0000001c, 0xe80009e7, // mov -, sacq(i+9)
0x00000004, 0xe80009e7, // mov -, srel(i+1)
0x0000001d, 0xe80009e7, // mov -, sacq(i+9)
0x00000005, 0xe80009e7, // mov -, srel(i+1)
0x0000001e, 0xe80009e7, // mov -, sacq(i+9)
0x00000006, 0xe80009e7, // mov -, srel(i+1)
0x0000001f, 0xe80009e7, // mov -, sacq(i+9)
0x00000007, 0xe80009e7, // mov -, srel(i+1)
0x15727d80, 0x10020827, // mov r0, ra_vdw_32
0x8c05cdf6, 0x10024061, // add ra_save_ptr, ra_save_ptr, rb_0x40; mov r1, ra_save_ptr
0x00000080, 0xe00208a7, // mov r2, vdw_setup_0(1, 16, dma_h32(1,0)) - vdw_setup_0(1, 16, dma_h32(0,0))
0x00020000, 0xe00208e7, // mov r3, PASS32_STRIDE
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x8c9e7080, 0x10024831, // add r0, r0, r2; mov vw_setup, r0
0x8c9e72c9, 0x10024872, // add r1, r1, r3; mov vw_addr, r1
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x00000050, 0xf0f812a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x15667d80, 0x10021c67, // mov vw_setup, ra_vpm_lo
0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0
0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1
0x156a7d80, 0x10021c67, // mov vw_setup, ra_vpm_hi
0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0
0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1
0x00000000, 0xf0f4c9e7, // bra -, ra_sync
0x009e7000, 0x100009e7, // nop
0x15667d80, 0x10020c67, // mov vr_setup, ra_vpm_lo
0x15c27d80, 0x100009e7, // mov -, vpm
0x00000080, 0xf0f801a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x159f2fc0, 0x100009e7, // mov -, vw_wait
0x00000019, 0xe80009e7, // mov -, sacq(i+9)
0x00000001, 0xe80009e7, // mov -, srel(i+1)
0x0000001a, 0xe80009e7, // mov -, sacq(i+9)
0x00000002, 0xe80009e7, // mov -, srel(i+1)
0x0000001b, 0xe80009e7, // mov -, sacq(i+9)
0x00000003, 0xe80009e7, // mov -, srel(i+1)
0x0000001c, 0xe80009e7, // mov -, sacq(i+9)
0x00000004, 0xe80009e7, // mov -, srel(i+1)
0x0000001d, 0xe80009e7, // mov -, sacq(i+9)
0x00000005, 0xe80009e7, // mov -, srel(i+1)
0x0000001e, 0xe80009e7, // mov -, sacq(i+9)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x00000006, 0xe80009e7, // mov -, srel(i+1)
0x0000001f, 0xe80009e7, // mov -, sacq(i+9)
0x00000007, 0xe80009e7, // mov -, srel(i+1)
0x00000640, 0xf0f811a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x00000009, 0xe80009e7, // mov -, srel(i+9)
0x00000011, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000a, 0xe80009e7, // mov -, srel(i+9)
0x00000012, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000b, 0xe80009e7, // mov -, srel(i+9)
0x00000013, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000c, 0xe80009e7, // mov -, srel(i+9)
0x00000014, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000d, 0xe80009e7, // mov -, srel(i+9)
0x00000015, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000e, 0xe80009e7, // mov -, srel(i+9)
0x00000016, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000f, 0xe80009e7, // mov -, srel(i+9)
0x00000017, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0xfffffd78, 0xf0f80027, // brr ra_link_0, call
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1
0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x14767180, 0x10020867, // and r1, r0, mask
0x0e9c11c0, 0xd0020827, // shr r0, r0, shift
0x14767180, 0x10020827, // and r0, r0, mask
0x119c13c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147a7180, 0x10020867, // and r1, r0, mask
0x0e9c21c0, 0xd0020827, // shr r0, r0, shift
0x147a7180, 0x10020827, // and r0, r0, mask
0x119c23c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147e7180, 0x10020867, // and r1, r0, mask
0x0e9c41c0, 0xd0020827, // shr r0, r0, shift
0x147e7180, 0x10020827, // and r0, r0, mask
0x119c43c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x149d91c0, 0x10020867, // and r1, r0, mask
0x0e9c81c0, 0xd0020827, // shr r0, r0, shift
0x149d91c0, 0x10020827, // and r0, r0, mask
0x119c83c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x149da1c0, 0x10020867, // and r1, r0, mask
0x0e9db1c0, 0x10020827, // shr r0, r0, shift
0x149da1c0, 0x10020827, // and r0, r0, mask
0x119db3c0, 0x10020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x0e9ca1c0, 0xd0020827, // shr r0, r0, 32-STAGES-3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0xfffffc30, 0xf0f80027, // brr ra_link_0, call
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x20527006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE
0x209d400f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE
0x2052700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE
0x22094c87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE
0x019e72c0, 0x10020867, // fadd r1, r1, r3
0x00000000, 0xf0f549e7, // bra -, ra_save_32
0x95642ff6, 0x10024642, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo
0x95687ff6, 0x10024687, // mov ra_vpm_hi, rb_vpm_hi; mov rb_vpm_hi, ra_vpm_hi
0x95701ff6, 0x10024701, // mov ra_vdw_32, rb_vdw_32; mov rb_vdw_32, ra_vdw_32
0x8c15ddf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffba0, 0xf0f80027, // brr ra_link_0, call
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0x00000000, 0xf0f489e7, // bra -, ra_save_16
0x009e7000, 0x100009e7, // nop
0x95642ff6, 0x10024642, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo
0x956c0ff6, 0x100246c0, // mov ra_vdw_16, rb_vdw_16; mov rb_vdw_16, ra_vdw_16
0x8c15edf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffb38, 0xf0f80027, // brr ra_link_0, call
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1
0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffae8, 0xf0f80027, // brr ra_link_0, call
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0x20527006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE
0x209d400f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE
0x2052700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE
0x22094c87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE
0x019e72c0, 0x10020867, // fadd r1, r1, r3
0x00000000, 0xf0f549e7, // bra -, ra_save_32
0x95642ff6, 0x10024642, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo
0x95687ff6, 0x10024687, // mov ra_vpm_hi, rb_vpm_hi; mov rb_vpm_hi, ra_vpm_hi
0x95701ff6, 0x10024701, // mov ra_vdw_32, rb_vdw_32; mov rb_vdw_32, ra_vdw_32
0x159c5fc0, 0x10022827, // mov.setf r0, rb_inst
0x0d9c11c0, 0xd0020827, // sub r0, r0, 1
0x119c51c0, 0xd0020827, // shl r0, r0, 5
0x0c9c6e00, 0x100601a7, // add.ifnz ra_sync, rx_sync_slave, r0
0x159c4fc0, 0x10060127, // mov.ifnz ra_save_16, rx_save_slave_16
0x159cafc0, 0x100602a7, // mov.ifnz ra_save_32, rx_save_slave_32
0x15827d80, 0x100220e7, // mov.setf ra_addr_x, unif
0x15827d80, 0x100210e7, // mov rb_addr_y, unif
0x00000888, 0xf00809e7, // brr.allz -, r:end
0x952cbdbf, 0x10024514, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32
0x9530cdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c622, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d623, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c5e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d5e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c51c0, 0xd0020827, // shl r0, r0, m
0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num
0x00000000, 0xe00201e7, // mov ra_points, 0
0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y
0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x14767180, 0x10020867, // and r1, r0, mask
0x0e9c11c0, 0xd0020827, // shr r0, r0, shift
0x14767180, 0x10020827, // and r0, r0, mask
0x119c13c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147a7180, 0x10020867, // and r1, r0, mask
0x0e9c21c0, 0xd0020827, // shr r0, r0, shift
0x147a7180, 0x10020827, // and r0, r0, mask
0x119c23c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147e7180, 0x10020867, // and r1, r0, mask
0x0e9c41c0, 0xd0020827, // shr r0, r0, shift
0x147e7180, 0x10020827, // and r0, r0, mask
0x119c43c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x149d91c0, 0x10020867, // and r1, r0, mask
0x0e9c81c0, 0xd0020827, // shr r0, r0, shift
0x149d91c0, 0x10020827, // and r0, r0, mask
0x119c83c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x149da1c0, 0x10020867, // and r1, r0, mask
0x0e9db1c0, 0x10020827, // shr r0, r0, shift
0x149da1c0, 0x10020827, // and r0, r0, mask
0x119db3c0, 0x10020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x0e9ca1c0, 0xd0020827, // shr r0, r0, 32-STAGES-3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffff9e0, 0xf0f80227, // brr ra_link_1, r:pass_1
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100
0x0e1c8dc0, 0x100229e7, // shr.setf -, ra_points, rb_STAGES
0xfffff9b8, 0xf00809e7, // brr.allz -, r:pass_1
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100
0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync
0x009e7000, 0x100009e7, // nop
0x009e7000, 0xa00009e7, // ldtmu0
0x009e7000, 0xa00009e7, // ldtmu0
0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y
0x9530cdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c622, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d623, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c5e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d5e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c41c0, 0xd0020827, // shl r0, r0, m
0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num
0x00000000, 0xe00201e7, // mov ra_points, 0
0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y
0x8c15ddf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffb88, 0xf0f80227, // brr ra_link_1, r:pass_2
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dddc0, 0x100201e7, // add ra_points, ra_points, rb_0x80
0x00003fff, 0xe0020827, // mov r0, 0x3FFF
0x141e7c00, 0x100229e7, // and.setf -, ra_points, r0
0xfffffb58, 0xf01809e7, // brr.allnz -, r:pass_2
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dddc0, 0x100601e7, // add.ifnz ra_points, ra_points, rb_0x80
0x95618dbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3
0x20367016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16
0x209cd017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16
0x209cd01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16
0x2136709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16
0x029e7640, 0x100208e7, // fsub r3, r3, r1
0x02627c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2
0x029d8ec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c622, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d623, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c5e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d5e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x0e1c8dc0, 0x100229e7, // shr.setf -, ra_points, rb_STAGES
0xfffffa90, 0xf00809e7, // brr.allz -, r:pass_2
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dddc0, 0x100201e7, // add ra_points, ra_points, rb_0x80
0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync
0x009e7000, 0x100009e7, // nop
0x009e7000, 0xa00009e7, // ldtmu0
0x009e7000, 0xa00009e7, // ldtmu0
0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y
0x952cbdbf, 0x10024514, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32
0x9530cdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c622, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d623, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c5e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d5e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c51c0, 0xd0020827, // shl r0, r0, m
0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num
0x00000000, 0xe00201e7, // mov ra_points, 0
0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y
0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffff9f0, 0xf0f80227, // brr ra_link_1, r:pass_3
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100
0xfffff9d0, 0xf0f80227, // brr ra_link_1, r:pass_3
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100
0xfffff9b0, 0xf0f80227, // brr ra_link_1, r:pass_3
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100
0xfffff990, 0xf0f80227, // brr ra_link_1, r:pass_3
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100
0x95514dbf, 0x100248a3, // mov r2, ra_tw_re+TW32_ACTIVE; mov r3, rb_tw_im+TW32_ACTIVE
0x203a7016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw32
0x209ce017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw32
0x209ce01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw32
0x213a709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw32
0x029e7640, 0x100208e7, // fsub r3, r3, r1
0x02527c80, 0x10020527, // fsub ra_tw_re+TW32_ACTIVE, ra_tw_re+TW32_ACTIVE, r2
0x029d4ec0, 0x10021527, // fsub rb_tw_im+TW32_ACTIVE, rb_tw_im+TW32_ACTIVE, r3
0x95618dbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3
0x203e7016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16
0x209cf017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16
0x209cf01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16
0x213e709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16
0x029e7640, 0x100208e7, // fsub r3, r3, r1
0x02627c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2
0x029d8ec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c622, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d623, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c5e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d5e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x0e1c8dc0, 0x100229e7, // shr.setf -, ra_points, rb_STAGES
0xfffff888, 0xf00809e7, // brr.allz -, r:pass_3
0x00000060, 0xe0020827, // mov r0, 3*4*8
0x0d227c00, 0x10020227, // sub ra_link_1, ra_link_1, r0
0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100
0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync
0x009e7000, 0x100009e7, // nop
0x009e7000, 0xa00009e7, // ldtmu0
0x009e7000, 0xa00009e7, // ldtmu0
0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y
0x95492dbf, 0x10024514, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32
0x954d3dbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c622, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d623, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c5e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d5e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c51c0, 0xd0020827, // shl r0, r0, m
0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num
0x00000000, 0xe00201e7, // mov ra_points, 0
0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y
0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffff780, 0xf0f80227, // brr ra_link_1, r:pass_4
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100
0x95514dbf, 0x100248a3, // mov r2, ra_tw_re+TW32_ACTIVE; mov r3, rb_tw_im+TW32_ACTIVE
0x20427016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw32
0x209d0017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw32
0x209d001f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw32
0x2142709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw32
0x029e7640, 0x100208e7, // fsub r3, r3, r1
0x02527c80, 0x10020527, // fsub ra_tw_re+TW32_ACTIVE, ra_tw_re+TW32_ACTIVE, r2
0x029d4ec0, 0x10021527, // fsub rb_tw_im+TW32_ACTIVE, rb_tw_im+TW32_ACTIVE, r3
0x95618dbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3
0x20467016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16
0x209d1017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16
0x209d101f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16
0x2146709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16
0x029e7640, 0x100208e7, // fsub r3, r3, r1
0x02627c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2
0x029d8ec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c622, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d623, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c5e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d5e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c5a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d5a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c562, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d563, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x0e1c8dc0, 0x100229e7, // shr.setf -, ra_points, rb_STAGES
0xfffff678, 0xf00809e7, // brr.allz -, r:pass_4
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100
0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync
0x009e7000, 0x100009e7, // nop
0x009e7000, 0xa00009e7, // ldtmu0
0x009e7000, 0xa00009e7, // ldtmu0
0xfffff748, 0xf0f809e7, // brr -, r:loop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x159c3fc0, 0x100209a7, // mov interrupt, flag
0x009e7000, 0x300009e7, // nop; nop; thrend
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
================================================
FILE: src/hello_fft/hex/shader_64k.hex
================================================
0x00000010, 0xe0021227, // mov rb_0x10, 0x10
0x000001d0, 0xe0021967, // mov r5rep, 0x1D0
0x00005555, 0xe00207a7, // mov rx_0x5555, 0x5555
0x00003333, 0xe00217a7, // mov rx_0x3333, 0x3333
0x00000f0f, 0xe00207e7, // mov rx_0x0F0F, 0x0F0F
0x000000ff, 0xe00217e7, // mov rx_0x00FF, 0x00FF
0x00000080, 0xe00208e7, // mov r3, 0x80
0x15827d80, 0x10020827, // mov r0, addr
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100203e7, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020427, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020467, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100204a7, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100204e7, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020527, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020567, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100205a7, // mov ra_tw_re+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100213e7, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021427, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021467, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100214a7, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100214e7, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021527, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021567, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100215a7, // mov rb_tw_im+off+i, r4
0x15827d80, 0x10020827, // mov r0, addr
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100205e7, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9e70c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020627, // mov ra_tw_re+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100215e7, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9e72c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021627, // mov rb_tw_im+off+i, r4
0x15827d80, 0x10021167, // mov rb_inst, unif
0x00101200, 0xe0020827, // mov r0, vpm_setup(1, 1, v32( 0,0))
0x00000010, 0xe0020867, // mov r1, vpm_setup(1, 1, v32(16,0)) - vpm_setup(1, 1, v32(0,0))
0x00000002, 0xe00208a7, // mov r2, vpm_setup(1, 1, v32( 0,2)) - vpm_setup(1, 1, v32(0,0))
0x409c5017, 0x100049e2, // nop; mul24 r2, r2, in_inst
0xcc9e7081, 0x10025020, // add out_0, r0, r2; v8adds r0, r0, r1
0xcc9e7081, 0x10025060, // add out_1, r0, r2; v8adds r0, r0, r1
0xcc9e7081, 0x100250a0, // add out_2, r0, r2; v8adds r0, r0, r1
0x0c9e7080, 0x100211e7, // add out_3, r0, r2
0x000000c8, 0xf0f80127, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x152e7d80, 0x10021c67, // mov vw_setup, ra_vpm_lo
0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0
0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1
0x15327d80, 0x10021c67, // mov vw_setup, ra_vpm_hi
0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0
0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1
0x159f2fc0, 0x100009e7, // mov -, vw_wait
0x00000019, 0xe80009e7, // mov -, sacq(i+9)
0x00000001, 0xe80009e7, // mov -, srel(i+1)
0x0000001a, 0xe80009e7, // mov -, sacq(i+9)
0x00000002, 0xe80009e7, // mov -, srel(i+1)
0x0000001b, 0xe80009e7, // mov -, sacq(i+9)
0x00000003, 0xe80009e7, // mov -, srel(i+1)
0x0000001c, 0xe80009e7, // mov -, sacq(i+9)
0x00000004, 0xe80009e7, // mov -, srel(i+1)
0x0000001d, 0xe80009e7, // mov -, sacq(i+9)
0x00000005, 0xe80009e7, // mov -, srel(i+1)
0x0000001e, 0xe80009e7, // mov -, sacq(i+9)
0x00000006, 0xe80009e7, // mov -, srel(i+1)
0x0000001f, 0xe80009e7, // mov -, sacq(i+9)
0x00000007, 0xe80009e7, // mov -, srel(i+1)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x153a7d80, 0x10021c67, // mov vw_setup, ra_vdw_32
0xc0003fc0, 0xe0021c67, // mov vw_setup, vdw_setup_1(0) + PASS32_STRIDE-16*4
0x8c04ddf6, 0x10024072, // add ra_save_ptr, ra_save_ptr, rb_0x40; mov vw_addr, ra_save_ptr
0x00000050, 0xf0f81127, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x152e7d80, 0x10021c67, // mov vw_setup, ra_vpm_lo
0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0
0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1
0x15327d80, 0x10021c67, // mov vw_setup, ra_vpm_hi
0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0
0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1
0x00000000, 0xf0f4c9e7, // bra -, ra_sync
0x009e7000, 0x100009e7, // nop
0x152e7d80, 0x10020c67, // mov vr_setup, ra_vpm_lo
0x15c27d80, 0x100009e7, // mov -, vpm
0x00000100, 0xf0f802a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x00000040, 0xe0020827, // mov r0, 0x40
0x159f2fc0, 0x100009e7, // mov -, vw_wait
0x00000001, 0xe80009e7, // mov -, srel(i+1)
0x00000002, 0xe80009e7, // mov -, srel(i+1)
0x00000003, 0xe80009e7, // mov -, srel(i+1)
0x00000004, 0xe80009e7, // mov -, srel(i+1)
0x00000005, 0xe80009e7, // mov -, srel(i+1)
0x00000006, 0xe80009e7, // mov -, srel(i+1)
0x00000007, 0xe80009e7, // mov -, srel(i+1)
0x159c0fc0, 0x10021c67, // mov vw_setup, rb_vpm
0x012cbdc0, 0x10020c27, // fadd vpm, ra_64+0, rb_64+0
0x0130cdc0, 0x10020c27, // fadd vpm, ra_64+1, rb_64+1
0x159c1fc0, 0x10021c67, // mov vw_setup, rb_vpm_16
0x0134ddc0, 0x10020c27, // fadd vpm, ra_64+2, rb_64+2
0x0138edc0, 0x10020c27, // fadd vpm, ra_64+3, rb_64+3
0x159c2fc0, 0x10021c67, // mov vw_setup, rb_vpm_32
0x022cbdc0, 0x10020c27, // fsub vpm, ra_64+0, rb_64+0
0x0230cdc0, 0x10020c27, // fsub vpm, ra_64+1, rb_64+1
0x159c7fc0, 0x10021c67, // mov vw_setup, rb_vpm_48
0x0234ddc0, 0x10020c27, // fsub vpm, ra_64+2, rb_64+2
0x0238edc0, 0x10020c27, // fsub vpm, ra_64+3, rb_64+3
0x00000019, 0xe80009e7, // mov -, sacq(i+9)
0x0000001a, 0xe80009e7, // mov -, sacq(i+9)
0x0000001b, 0xe80009e7, // mov -, sacq(i+9)
0x0000001c, 0xe80009e7, // mov -, sacq(i+9)
0x0000001d, 0xe80009e7, // mov -, sacq(i+9)
0x0000001e, 0xe80009e7, // mov -, sacq(i+9)
0x0000001f, 0xe80009e7, // mov -, sacq(i+9)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0xa0104000, 0xe0021c67, // mov vw_setup, vdw_setup_0(64, 16, dma_h32(0,0))
0xc0001fc0, 0xe0021c67, // mov vw_setup, vdw_setup_1(PASS64_STRIDE-16*4)
0x8c067c36, 0x10024072, // add ra_save_ptr, ra_save_ptr, step; mov vw_addr, ra_save_ptr
0x000002b8, 0xf0f812a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c51c0, 0xd00200a7, // shl ra_temp, r0, 5
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0fc49e7, // brr -, ra_temp
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x000000e0, 0xf0f809e7, // brr -, r:2f
0x00000010, 0xe80009e7, // mov -, sacq(i)
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x000000c0, 0xf0f809e7, // brr -, r:2f
0x00000011, 0xe80009e7, // mov -, sacq(i)
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x000000a0, 0xf0f809e7, // brr -, r:2f
0x00000012, 0xe80009e7, // mov -, sacq(i)
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x00000080, 0xf0f809e7, // brr -, r:2f
0x00000013, 0xe80009e7, // mov -, sacq(i)
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x00000060, 0xf0f809e7, // brr -, r:2f
0x00000014, 0xe80009e7, // mov -, sacq(i)
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x00000040, 0xf0f809e7, // brr -, r:2f
0x00000015, 0xe80009e7, // mov -, sacq(i)
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x00000020, 0xf0f809e7, // brr -, r:2f
0x00000016, 0xe80009e7, // mov -, sacq(i)
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f809e7, // brr -, r:2f
0x00000017, 0xe80009e7, // mov -, sacq(i)
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x159c0fc0, 0x10021c67, // mov vw_setup, rb_vpm
0x012cbdc0, 0x10020c27, // fadd vpm, ra_64+0, rb_64+0
0x0130cdc0, 0x10020c27, // fadd vpm, ra_64+1, rb_64+1
0x159c1fc0, 0x10021c67, // mov vw_setup, rb_vpm_16
0x0134ddc0, 0x10020c27, // fadd vpm, ra_64+2, rb_64+2
0x0138edc0, 0x10020c27, // fadd vpm, ra_64+3, rb_64+3
0x159c2fc0, 0x10021c67, // mov vw_setup, rb_vpm_32
0x022cbdc0, 0x10020c27, // fsub vpm, ra_64+0, rb_64+0
0x0230cdc0, 0x10020c27, // fsub vpm, ra_64+1, rb_64+1
0x159c7fc0, 0x10021c67, // mov vw_setup, rb_vpm_48
0x0234ddc0, 0x10020c27, // fsub vpm, ra_64+2, rb_64+2
0x0238edc0, 0x10020c27, // fsub vpm, ra_64+3, rb_64+3
0x00000000, 0xf0fc49e7, // brr -, ra_temp
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm
0x15c27d80, 0x100009e7, // mov -, vpm
0x00000008, 0xe80009e7, // mov -, srel(i+8)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm
0x15c27d80, 0x100009e7, // mov -, vpm
0x00000009, 0xe80009e7, // mov -, srel(i+8)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm
0x15c27d80, 0x100009e7, // mov -, vpm
0x0000000a, 0xe80009e7, // mov -, srel(i+8)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm
0x15c27d80, 0x100009e7, // mov -, vpm
0x0000000b, 0xe80009e7, // mov -, srel(i+8)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm
0x15c27d80, 0x100009e7, // mov -, vpm
0x0000000c, 0xe80009e7, // mov -, srel(i+8)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm
0x15c27d80, 0x100009e7, // mov -, vpm
0x0000000d, 0xe80009e7, // mov -, srel(i+8)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm
0x15c27d80, 0x100009e7, // mov -, vpm
0x0000000e, 0xe80009e7, // mov -, srel(i+8)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x159c0fc0, 0x10020c67, // mov vr_setup, rb_vpm
0x15c27d80, 0x100009e7, // mov -, vpm
0x0000000f, 0xe80009e7, // mov -, srel(i+8)
0x00000080, 0xf0f801a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x159f2fc0, 0x100009e7, // mov -, vw_wait
0x00000019, 0xe80009e7, // mov -, sacq(i+9)
0x00000001, 0xe80009e7, // mov -, srel(i+1)
0x0000001a, 0xe80009e7, // mov -, sacq(i+9)
0x00000002, 0xe80009e7, // mov -, srel(i+1)
0x0000001b, 0xe80009e7, // mov -, sacq(i+9)
0x00000003, 0xe80009e7, // mov -, srel(i+1)
0x0000001c, 0xe80009e7, // mov -, sacq(i+9)
0x00000004, 0xe80009e7, // mov -, srel(i+1)
0x0000001d, 0xe80009e7, // mov -, sacq(i+9)
0x00000005, 0xe80009e7, // mov -, srel(i+1)
0x0000001e, 0xe80009e7, // mov -, sacq(i+9)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x00000006, 0xe80009e7, // mov -, srel(i+1)
0x0000001f, 0xe80009e7, // mov -, sacq(i+9)
0x00000007, 0xe80009e7, // mov -, srel(i+1)
0x00000858, 0xf0f811a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x00000009, 0xe80009e7, // mov -, srel(i+9)
0x00000011, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000a, 0xe80009e7, // mov -, srel(i+9)
0x00000012, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000b, 0xe80009e7, // mov -, srel(i+9)
0x00000013, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000c, 0xe80009e7, // mov -, srel(i+9)
0x00000014, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000d, 0xe80009e7, // mov -, srel(i+9)
0x00000015, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000e, 0xe80009e7, // mov -, srel(i+9)
0x00000016, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000f, 0xe80009e7, // mov -, srel(i+9)
0x00000017, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0xfffffda0, 0xf0f80027, // brr ra_link_0, call
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1
0x8c148df6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x147a7180, 0x10020867, // and r1, r0, mask
0x0e9c11c0, 0xd0020827, // shr r0, r0, shift
0x147a7180, 0x10020827, // and r0, r0, mask
0x119c13c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x149de1c0, 0x10020867, // and r1, r0, mask
0x0e9c21c0, 0xd0020827, // shr r0, r0, shift
0x149de1c0, 0x10020827, // and r0, r0, mask
0x119c23c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147e7180, 0x10020867, // and r1, r0, mask
0x0e9c41c0, 0xd0020827, // shr r0, r0, shift
0x147e7180, 0x10020827, // and r0, r0, mask
0x119c43c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x149df1c0, 0x10020867, // and r1, r0, mask
0x0e9c81c0, 0xd0020827, // shr r0, r0, shift
0x149df1c0, 0x10020827, // and r0, r0, mask
0x119c83c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x119c31c0, 0xd0020827, // shl r0, r0, STAGES-13
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0xfffffc80, 0xf0f80027, // brr ra_link_0, call
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x20667006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE
0x209d900f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE
0x2066700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE
0x22099c87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE
0x019e72c0, 0x10020867, // fadd r1, r1, r3
0x01267c00, 0x100202e7, // fadd ra_64+0, ra_32_re, r0
0x019c9e40, 0x10020327, // fadd ra_64+1, rb_32_im, r1
0x02267c00, 0x10020367, // fsub ra_64+2, ra_32_re, r0
0x029c9e40, 0x100203a7, // fsub ra_64+3, rb_32_im, r1
0x8c167d76, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x147a7180, 0x10020867, // and r1, r0, mask
0x0e9c11c0, 0xd0020827, // shr r0, r0, shift
0x147a7180, 0x10020827, // and r0, r0, mask
0x119c13c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x149de1c0, 0x10020867, // and r1, r0, mask
0x0e9c21c0, 0xd0020827, // shr r0, r0, shift
0x149de1c0, 0x10020827, // and r0, r0, mask
0x119c23c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147e7180, 0x10020867, // and r1, r0, mask
0x0e9c41c0, 0xd0020827, // shr r0, r0, shift
0x147e7180, 0x10020827, // and r0, r0, mask
0x119c43c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x149df1c0, 0x10020867, // and r1, r0, mask
0x0e9c81c0, 0xd0020827, // shr r0, r0, shift
0x149df1c0, 0x10020827, // and r0, r0, mask
0x119c83c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x119c31c0, 0xd0020827, // shl r0, r0, STAGES-13
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0xfffffb20, 0xf0f80027, // brr ra_link_0, call
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1
0x8c148df6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x147a7180, 0x10020867, // and r1, r0, mask
0x0e9c11c0, 0xd0020827, // shr r0, r0, shift
0x147a7180, 0x10020827, // and r0, r0, mask
0x119c13c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x149de1c0, 0x10020867, // and r1, r0, mask
0x0e9c21c0, 0xd0020827, // shr r0, r0, shift
0x149de1c0, 0x10020827, // and r0, r0, mask
0x119c23c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147e7180, 0x10020867, // and r1, r0, mask
0x0e9c41c0, 0xd0020827, // shr r0, r0, shift
0x147e7180, 0x10020827, // and r0, r0, mask
0x119c43c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x149df1c0, 0x10020867, // and r1, r0, mask
0x0e9c81c0, 0xd0020827, // shr r0, r0, shift
0x149df1c0, 0x10020827, // and r0, r0, mask
0x119c83c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x119c31c0, 0xd0020827, // shl r0, r0, STAGES-13
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0xfffffa00, 0xf0f80027, // brr ra_link_0, call
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x20667006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE
0x209d900f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE
0x2066700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE
0x22099c87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE
0x019e72c0, 0x10020867, // fadd r1, r1, r3
0x029c9e40, 0x100208e7, // fsub r3, rb_32_im, r1
0x02267c00, 0x100208a7, // fsub r2, ra_32_re, r0
0x019c9e40, 0x10020867, // fadd r1, rb_32_im, r1
0x01267c00, 0x10020827, // fadd r0, ra_32_re, r0
0x203e700e, 0x100049c9, // nop; fmul rb_32_im, r1, ra_tw_re+TW64_P1_BASE0
0x209cf00f, 0x100059c9, // nop; fmul ra_32_re, r1, rb_tw_im+TW64_P1_BASE0
0x209cf007, 0x100049e1, // nop; fmul r1, r0, rb_tw_im+TW64_P1_BASE0
0x213c93c6, 0x10025320, // fadd rb_64+1, r1, rb_32_im; fmul r0, r0, ra_tw_re+TW64_P1_BASE0
0x2225019f, 0x100252c9, // fsub rb_64+0, r0, ra_32_re; fmul ra_32_re, r3, rb_tw_im+TW64_P1_BASE1
0x2042701e, 0x100049c9, // nop; fmul rb_32_im, r3, ra_tw_re+TW64_P1_BASE1
0x00000000, 0xf0f549e7, // bra -, ra_save_64
0x209d0017, 0x100049e3, // nop; fmul r3, r2, rb_tw_im+TW64_P1_BASE1
0x214097d6, 0x100253a2, // fadd rb_64+3, r3, rb_32_im; fmul r2, r2, ra_tw_re+TW64_P1_BASE1
0x02267580, 0x10021367, // fsub rb_64+2, r2, ra_32_re
0x8c14cdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffff920, 0xf0f80027, // brr ra_link_0, call
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1
0x8c148df6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffff8d0, 0xf0f80027, // brr ra_link_0, call
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0x20667006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE
0x209d900f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE
0x2066700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE
0x22099c87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE
0x019e72c0, 0x10020867, // fadd r1, r1, r3
0x00000000, 0xf0f489e7, // bra -, ra_save_32
0x952c2ff6, 0x100242c2, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo
0x95307ff6, 0x10024307, // mov ra_vpm_hi, rb_vpm_hi; mov rb_vpm_hi, ra_vpm_hi
0x9538eff6, 0x1002438e, // mov ra_vdw_32, rb_vdw_32; mov rb_vdw_32, ra_vdw_32
0x159c5fc0, 0x10022827, // mov.setf r0, rb_inst
0x0d9c11c0, 0xd0020827, // sub r0, r0, 1
0x119c51c0, 0xd0020827, // shl r0, r0, 5
0x0c9c6e00, 0x100601a7, // add.ifnz ra_sync, rx_sync_slave, r0
0x159c4fc0, 0x10060127, // mov.ifnz ra_save_32, rx_save_slave_32
0x159cafc0, 0x100602a7, // mov.ifnz ra_save_64, rx_save_slave_64
0x15827d80, 0x100220e7, // mov.setf ra_addr_x, unif
0x15827d80, 0x100210e7, // mov rb_addr_y, unif
0x000006a8, 0xf00809e7, // brr.allz -, r:end
0x95451dbf, 0x10024659, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32
0x95492dbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c762, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d763, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c722, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d723, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c6e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d6e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c6a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d6a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c61c0, 0xd0020827, // shl r0, r0, m
0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num
0x00000000, 0xe00201e7, // mov ra_points, 0
0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y
0x8c148df6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x147a7180, 0x10020867, // and r1, r0, mask
0x0e9c11c0, 0xd0020827, // shr r0, r0, shift
0x147a7180, 0x10020827, // and r0, r0, mask
0x119c13c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x149de1c0, 0x10020867, // and r1, r0, mask
0x0e9c21c0, 0xd0020827, // shr r0, r0, shift
0x149de1c0, 0x10020827, // and r0, r0, mask
0x119c23c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147e7180, 0x10020867, // and r1, r0, mask
0x0e9c41c0, 0xd0020827, // shr r0, r0, shift
0x147e7180, 0x10020827, // and r0, r0, mask
0x119c43c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x149df1c0, 0x10020867, // and r1, r0, mask
0x0e9c81c0, 0xd0020827, // shr r0, r0, shift
0x149df1c0, 0x10020827, // and r0, r0, mask
0x119c83c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x119c31c0, 0xd0020827, // shl r0, r0, STAGES-13
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffff7f0, 0xf0f80227, // brr ra_link_1, r:pass_1
0x009e7000, 0x100009e7, // nop
0x00000200, 0xe0020827, // mov r0, 0x200
0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0
0x00000010, 0xe0020867, // mov r1, STAGES
0x0e1e7c40, 0x100229e7, // shr.setf -, ra_points, r1
0xfffff7c0, 0xf00809e7, // brr.allz -, r:pass_1
0x009e7000, 0x100009e7, // nop
0x00000200, 0xe0020827, // mov r0, 0x200
0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0
0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync
0x009e7000, 0x100009e7, // nop
0x009e7000, 0xa00009e7, // ldtmu0
0x009e7000, 0xa00009e7, // ldtmu0
0x159c0fc0, 0x100202e7, // mov ra_vpm_lo, rb_vpm
0x159c1fc0, 0x10020327, // mov ra_vpm_hi, rb_vpm_16
0x90104000, 0xe00203a7, // mov ra_vdw_32, vdw_setup_0(32, 16, dma_h32( 0,0))
0x90105000, 0xe00213a7, // mov rb_vdw_32, vdw_setup_0(32, 16, dma_h32(32,0))
0x00000060, 0xe00212e7, // mov rb_3x4x8, 3*4*8
0x000000f0, 0xe0021327, // mov rb_0xF0, 0xF0
0x00000040, 0xe0021367, // mov rb_0x40, 0x40
0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y
0x95451dbf, 0x10024659, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32
0x95492dbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c762, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d763, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c722, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d723, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c6e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d6e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c6a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d6a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c51c0, 0xd0020827, // shl r0, r0, m
0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num
0x00000000, 0xe00201e7, // mov ra_points, 0
0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y
0x8c148df6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffbd0, 0xf0f80227, // brr ra_link_1, r:pass_2
0x009e7000, 0x100009e7, // nop
0x00000100, 0xe0020827, // mov r0, 0x100
0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0
0xfffffbb0, 0xf0f80227, // brr ra_link_1, r:pass_2
0x009e7000, 0x100009e7, // nop
0x00000100, 0xe0020827, // mov r0, 0x100
0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0
0xfffffb90, 0xf0f80227, // brr ra_link_1, r:pass_2
0x009e7000, 0x100009e7, // nop
0x00000100, 0xe0020827, // mov r0, 0x100
0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0
0xfffffb70, 0xf0f80227, // brr ra_link_1, r:pass_2
0x009e7000, 0x100009e7, // nop
0x00000100, 0xe0020827, // mov r0, 0x100
0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0
0x95659dbf, 0x100248a3, // mov r2, ra_tw_re+TW32_ACTIVE; mov r3, rb_tw_im+TW32_ACTIVE
0x204e7016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw32
0x209d3017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw32
0x209d301f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw32
0x214e709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw32
0x029e7640, 0x100208e7, // fsub r3, r3, r1
0x02667c80, 0x10020667, // fsub ra_tw_re+TW32_ACTIVE, ra_tw_re+TW32_ACTIVE, r2
0x029d9ec0, 0x10021667, // fsub rb_tw_im+TW32_ACTIVE, rb_tw_im+TW32_ACTIVE, r3
0x9575ddbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3
0x20527016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16
0x209d4017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16
0x209d401f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16
0x2152709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16
0x029e7640, 0x100208e7, // fsub r3, r3, r1
0x02767c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2
0x029ddec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c762, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d763, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c722, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d723, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c6e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d6e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c6a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d6a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x0e1c8dc0, 0x100229e7, // shr.setf -, ra_points, rb_STAGES
0xfffffa68, 0xf00809e7, // brr.allz -, r:pass_2
0x00000100, 0xe0020827, // mov r0, 0x100
0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0
0x0d20bdc0, 0x10020227, // sub ra_link_1, ra_link_1, rb_3x4x8
0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync
0x009e7000, 0x100009e7, // nop
0x009e7000, 0xa00009e7, // ldtmu0
0x009e7000, 0xa00009e7, // ldtmu0
0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y
0x955d7dbf, 0x10024659, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32
0x95618dbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c762, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d763, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c722, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d723, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c6e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d6e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c6a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d6a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c51c0, 0xd0020827, // shl r0, r0, m
0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num
0x00000000, 0xe00201e7, // mov ra_points, 0
0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y
0x8c148df6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffff960, 0xf0f80227, // brr ra_link_1, r:pass_3
0x009e7000, 0x100009e7, // nop
0x00000100, 0xe0020827, // mov r0, 0x100
0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0
0x95659dbf, 0x100248a3, // mov r2, ra_tw_re+TW32_ACTIVE; mov r3, rb_tw_im+TW32_ACTIVE
0x20567016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw32
0x209d5017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw32
0x209d501f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw32
0x2156709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw32
0x029e7640, 0x100208e7, // fsub r3, r3, r1
0x02667c80, 0x10020667, // fsub ra_tw_re+TW32_ACTIVE, ra_tw_re+TW32_ACTIVE, r2
0x029d9ec0, 0x10021667, // fsub rb_tw_im+TW32_ACTIVE, rb_tw_im+TW32_ACTIVE, r3
0x9575ddbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3
0x205a7016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16
0x209d6017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16
0x209d601f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16
0x215a709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16
0x029e7640, 0x100208e7, // fsub r3, r3, r1
0x02767c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2
0x029ddec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c762, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d763, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c722, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d723, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c6e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d6e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c6a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d6a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x0e1c8dc0, 0x100229e7, // shr.setf -, ra_points, rb_STAGES
0xfffff858, 0xf00809e7, // brr.allz -, r:pass_3
0x009e7000, 0x100009e7, // nop
0x00000100, 0xe0020827, // mov r0, 0x100
0x0c1e7c00, 0x100201e7, // add ra_points, ra_points, r0
0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync
0x009e7000, 0x100009e7, // nop
0x009e7000, 0xa00009e7, // ldtmu0
0x009e7000, 0xa00009e7, // ldtmu0
0xfffff928, 0xf0f809e7, // brr -, r:loop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x159c3fc0, 0x100209a7, // mov interrupt, flag
0x009e7000, 0x300009e7, // nop; nop; thrend
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
================================================
FILE: src/hello_fft/hex/shader_8k.hex
================================================
0x00000010, 0xe00216e7, // mov rb_0x10, 0x10
0x00000040, 0xe0021727, // mov rb_0x40, 0x40
0x00000080, 0xe0021767, // mov rb_0x80, 0x80
0x000000f0, 0xe00217a7, // mov rb_0xF0, 0xF0
0x00000100, 0xe00217e7, // mov rb_0x100, 0x100
0x00005555, 0xe0020767, // mov rx_0x5555, 0x5555
0x00003333, 0xe00207a7, // mov rx_0x3333, 0x3333
0x00000f0f, 0xe00207e7, // mov rx_0x0F0F, 0x0F0F
0x000000ff, 0xe00216a7, // mov rx_0x00FF, 0x00FF
0x88104000, 0xe00206e7, // mov ra_vdw_16, vdw_setup_0(16, 16, dma_h32( 0,0))
0x88105000, 0xe0021027, // mov rb_vdw_16, vdw_setup_0(16, 16, dma_h32(32,0))
0x90104000, 0xe0020727, // mov ra_vdw_32, vdw_setup_0(32, 16, dma_h32( 0,0))
0x90105000, 0xe0021067, // mov rb_vdw_32, vdw_setup_0(32, 16, dma_h32(32,0))
0x15827d80, 0x10020827, // mov r0, addr
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100202e7, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020327, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x10020367, // mov ra_tw_re+off+i, r4
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100203a7, // mov ra_tw_re+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100212e7, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021327, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x10021367, // mov rb_tw_im+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100213a7, // mov rb_tw_im+off+i, r4
0x15827d80, 0x10020827, // mov r0, addr
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x11983dc0, 0xd00208a7, // shl r2, elem_num, 3
0x0c9e7080, 0x10020e27, // add t0s, r0, r2
0x0c9dd1c0, 0xa0020827, // add r0, r0, stride; ldtmu0
0x159e7900, 0x100203e7, // mov ra_tw_re+off+i, r4
0x0c9e7280, 0x10020e27, // add t0s, r1, r2
0x0c9dd3c0, 0xa0020867, // add r1, r1, stride; ldtmu0
0x159e7900, 0x100213e7, // mov rb_tw_im+off+i, r4
0x15827d80, 0x10021167, // mov rb_inst, unif
0x00101200, 0xe0020827, // mov r0, vpm_setup(1, 1, v32( 0,0))
0x00000010, 0xe0020867, // mov r1, vpm_setup(1, 1, v32(16,0)) - vpm_setup(1, 1, v32(0,0))
0x00000002, 0xe00208a7, // mov r2, vpm_setup(1, 1, v32( 0,2)) - vpm_setup(1, 1, v32(0,0))
0x409c5017, 0x100049e2, // nop; mul24 r2, r2, in_inst
0xcc9e7081, 0x10024660, // add out_0, r0, r2; v8adds r0, r0, r1
0xcc9e7081, 0x100246a0, // add out_1, r0, r2; v8adds r0, r0, r1
0xcc9e7081, 0x100250a0, // add out_2, r0, r2; v8adds r0, r0, r1
0x0c9e7080, 0x100211e7, // add out_3, r0, r2
0x000000b0, 0xf0f80127, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x15667d80, 0x10021c67, // mov vw_setup, arg
0x159e7000, 0x10020c27, // mov vpm, r0
0x159e7240, 0x10020c27, // mov vpm, r1
0x159f2fc0, 0x100009e7, // mov -, vw_wait
0x00000019, 0xe80009e7, // mov -, sacq(i+9)
0x00000001, 0xe80009e7, // mov -, srel(i+1)
0x0000001a, 0xe80009e7, // mov -, sacq(i+9)
0x00000002, 0xe80009e7, // mov -, srel(i+1)
0x0000001b, 0xe80009e7, // mov -, sacq(i+9)
0x00000003, 0xe80009e7, // mov -, srel(i+1)
0x0000001c, 0xe80009e7, // mov -, sacq(i+9)
0x00000004, 0xe80009e7, // mov -, srel(i+1)
0x0000001d, 0xe80009e7, // mov -, sacq(i+9)
0x00000005, 0xe80009e7, // mov -, srel(i+1)
0x0000001e, 0xe80009e7, // mov -, sacq(i+9)
0x00000006, 0xe80009e7, // mov -, srel(i+1)
0x0000001f, 0xe80009e7, // mov -, sacq(i+9)
0x00000007, 0xe80009e7, // mov -, srel(i+1)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x156e7d80, 0x10021c67, // mov vw_setup, arg_vdw
0xc0000fc0, 0xe0021c67, // mov vw_setup, vdw_setup_1(0) + PASS16_STRIDE-16*4
0x8c05cdf6, 0x10024072, // add ra_save_ptr, ra_save_ptr, rb_0x40; mov vw_addr, ra_save_ptr
0x00000038, 0xf0f81127, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x15667d80, 0x10021c67, // mov vw_setup, arg
0x159e7000, 0x10020c27, // mov vpm, r0
0x159e7240, 0x10020c27, // mov vpm, r1
0x00000000, 0xf0f4c9e7, // bra -, ra_sync
0x009e7000, 0x100009e7, // nop
0x15667d80, 0x10020c67, // mov vr_setup, arg_vpm
0x15c27d80, 0x100009e7, // mov -, vpm
0x000000c8, 0xf0f802a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x15667d80, 0x10021c67, // mov vw_setup, ra_vpm_lo
0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0
0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1
0x156a7d80, 0x10021c67, // mov vw_setup, ra_vpm_hi
0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0
0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1
0x159f2fc0, 0x100009e7, // mov -, vw_wait
0x00000019, 0xe80009e7, // mov -, sacq(i+9)
0x00000001, 0xe80009e7, // mov -, srel(i+1)
0x0000001a, 0xe80009e7, // mov -, sacq(i+9)
0x00000002, 0xe80009e7, // mov -, srel(i+1)
0x0000001b, 0xe80009e7, // mov -, sacq(i+9)
0x00000003, 0xe80009e7, // mov -, srel(i+1)
0x0000001c, 0xe80009e7, // mov -, sacq(i+9)
0x00000004, 0xe80009e7, // mov -, srel(i+1)
0x0000001d, 0xe80009e7, // mov -, sacq(i+9)
0x00000005, 0xe80009e7, // mov -, srel(i+1)
0x0000001e, 0xe80009e7, // mov -, sacq(i+9)
0x00000006, 0xe80009e7, // mov -, srel(i+1)
0x0000001f, 0xe80009e7, // mov -, sacq(i+9)
0x00000007, 0xe80009e7, // mov -, srel(i+1)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x15727d80, 0x10021c67, // mov vw_setup, ra_vdw_32
0xc00007c0, 0xe0021c67, // mov vw_setup, vdw_setup_1(0) + PASS32_STRIDE-16*4
0x8c05cdf6, 0x10024072, // add ra_save_ptr, ra_save_ptr, rb_0x40; mov vw_addr, ra_save_ptr
0x00000050, 0xf0f812a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x15667d80, 0x10021c67, // mov vw_setup, ra_vpm_lo
0x01267c00, 0x10020c27, // fadd vpm, ra_32_re, r0
0x019c9e40, 0x10020c27, // fadd vpm, rb_32_im, r1
0x156a7d80, 0x10021c67, // mov vw_setup, ra_vpm_hi
0x02267c00, 0x10020c27, // fsub vpm, ra_32_re, r0
0x029c9e40, 0x10020c27, // fsub vpm, rb_32_im, r1
0x00000000, 0xf0f4c9e7, // bra -, ra_sync
0x009e7000, 0x100009e7, // nop
0x15667d80, 0x10020c67, // mov vr_setup, ra_vpm_lo
0x15c27d80, 0x100009e7, // mov -, vpm
0x00000080, 0xf0f801a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x159f2fc0, 0x100009e7, // mov -, vw_wait
0x00000019, 0xe80009e7, // mov -, sacq(i+9)
0x00000001, 0xe80009e7, // mov -, srel(i+1)
0x0000001a, 0xe80009e7, // mov -, sacq(i+9)
0x00000002, 0xe80009e7, // mov -, srel(i+1)
0x0000001b, 0xe80009e7, // mov -, sacq(i+9)
0x00000003, 0xe80009e7, // mov -, srel(i+1)
0x0000001c, 0xe80009e7, // mov -, sacq(i+9)
0x00000004, 0xe80009e7, // mov -, srel(i+1)
0x0000001d, 0xe80009e7, // mov -, sacq(i+9)
0x00000005, 0xe80009e7, // mov -, srel(i+1)
0x0000001e, 0xe80009e7, // mov -, sacq(i+9)
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x00000006, 0xe80009e7, // mov -, srel(i+1)
0x0000001f, 0xe80009e7, // mov -, sacq(i+9)
0x00000007, 0xe80009e7, // mov -, srel(i+1)
0x00000500, 0xf0f811a7, // brr rx_ptr, label
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x00000009, 0xe80009e7, // mov -, srel(i+9)
0x00000011, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000a, 0xe80009e7, // mov -, srel(i+9)
0x00000012, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000b, 0xe80009e7, // mov -, srel(i+9)
0x00000013, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000c, 0xe80009e7, // mov -, srel(i+9)
0x00000014, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000d, 0xe80009e7, // mov -, srel(i+9)
0x00000015, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000e, 0xe80009e7, // mov -, srel(i+9)
0x00000016, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x00000000, 0xf0f509e7, // bra -, ra_link_1
0x0000000f, 0xe80009e7, // mov -, srel(i+9)
0x00000017, 0xe80009e7, // mov -, sacq(i+1)
0x009e7000, 0x100009e7, // nop
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> (1<> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0xfffffda8, 0xf0f80027, // brr ra_link_0, call
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x959e7009, 0x10024249, // mov ra_32_re, r0; mov rb_32_im, r1
0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x14767180, 0x10020867, // and r1, r0, mask
0x0e9c11c0, 0xd0020827, // shr r0, r0, shift
0x14767180, 0x10020827, // and r0, r0, mask
0x119c13c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147a7180, 0x10020867, // and r1, r0, mask
0x0e9c21c0, 0xd0020827, // shr r0, r0, shift
0x147a7180, 0x10020827, // and r0, r0, mask
0x119c23c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147e7180, 0x10020867, // and r1, r0, mask
0x0e9c41c0, 0xd0020827, // shr r0, r0, shift
0x147e7180, 0x10020827, // and r0, r0, mask
0x119c43c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x149da1c0, 0x10020867, // and r1, r0, mask
0x0e9c81c0, 0xd0020827, // shr r0, r0, shift
0x149da1c0, 0x10020827, // and r0, r0, mask
0x119c83c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0xfffffc90, 0xf0f80027, // brr ra_link_0, call
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x20427006, 0x100059c2, // nop; fmul ra_temp, r0, ra_tw_re+TW32_ACTIVE
0x209d000f, 0x100049e2, // nop; fmul r2, r1, rb_tw_im+TW32_ACTIVE
0x2042700e, 0x100049e3, // nop; fmul r3, r1, ra_tw_re+TW32_ACTIVE
0x22090c87, 0x10024821, // fsub r0, ra_temp, r2; fmul r1, r0, rb_tw_im+TW32_ACTIVE
0x019e72c0, 0x10020867, // fadd r1, r1, r3
0x00000000, 0xf0f549e7, // bra -, ra_save_32
0x95642ff6, 0x10024642, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo
0x95687ff6, 0x10024687, // mov ra_vpm_hi, rb_vpm_hi; mov rb_vpm_hi, ra_vpm_hi
0x95701ff6, 0x10024701, // mov ra_vdw_32, rb_vdw_32; mov rb_vdw_32, ra_vdw_32
0x8c15ddf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffc00, 0xf0f80027, // brr ra_link_0, call
0x009e7000, 0xa00009e7, // nop; ldtmu0
0x159e7900, 0xa0020827, // mov r0, r4; ldtmu0
0x159e7900, 0x10020867, // mov r1, r4
0x00000000, 0xf0f489e7, // bra -, ra_save_16
0x009e7000, 0x100009e7, // nop
0x95642ff6, 0x10024642, // mov ra_vpm_lo, rb_vpm_lo; mov rb_vpm_lo, ra_vpm_lo
0x956c0ff6, 0x100246c0, // mov ra_vdw_16, rb_vdw_16; mov rb_vdw_16, ra_vdw_16
0x159c5fc0, 0x10022827, // mov.setf r0, rb_inst
0x0d9c11c0, 0xd0020827, // sub r0, r0, 1
0x119c51c0, 0xd0020827, // shl r0, r0, 5
0x0c9c6e00, 0x100601a7, // add.ifnz ra_sync, rx_sync_slave, r0
0x159c4fc0, 0x10060127, // mov.ifnz ra_save_16, rx_save_slave_16
0x159cafc0, 0x100602a7, // mov.ifnz ra_save_32, rx_save_slave_32
0x15827d80, 0x100220e7, // mov.setf ra_addr_x, unif
0x15827d80, 0x100210e7, // mov rb_addr_y, unif
0x00000590, 0xf00809e7, // brr.allz -, r:end
0x952cbdbf, 0x10024410, // mov ra_tw_re+TW32_ACTIVE, ra_tw_re+tw32; mov rb_tw_im+TW32_ACTIVE, rb_tw_im+tw32
0x9530cdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c462, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d463, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c51c0, 0xd0020827, // shl r0, r0, m
0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num
0x00000000, 0xe00201e7, // mov ra_points, 0
0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y
0x8c15bdf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x14767180, 0x10020867, // and r1, r0, mask
0x0e9c11c0, 0xd0020827, // shr r0, r0, shift
0x14767180, 0x10020827, // and r0, r0, mask
0x119c13c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147a7180, 0x10020867, // and r1, r0, mask
0x0e9c21c0, 0xd0020827, // shr r0, r0, shift
0x147a7180, 0x10020827, // and r0, r0, mask
0x119c23c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x147e7180, 0x10020867, // and r1, r0, mask
0x0e9c41c0, 0xd0020827, // shr r0, r0, shift
0x147e7180, 0x10020827, // and r0, r0, mask
0x119c43c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x149da1c0, 0x10020867, // and r1, r0, mask
0x0e9c81c0, 0xd0020827, // shr r0, r0, shift
0x149da1c0, 0x10020827, // and r0, r0, mask
0x119c83c0, 0xd0020867, // shl r1, r1, shift
0x159e7040, 0x10020827, // or r0, r0, r1
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x94981dc0, 0xd00269e2, // and.setf -, elem_num, 1; mov r2, r0
0x959f1489, 0xd004c820, // mov.ifz r0, r2; mov.ifnz r0, r1 >> 1
0x959ff252, 0xd0068861, // mov.ifnz r1, r1; mov.ifz r1, r2 << 1
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffb50, 0xf0f80227, // brr ra_link_1, r:pass_1
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100
0x0e1cddc0, 0xd00229e7, // shr.setf -, ra_points, STAGES
0xfffffb28, 0xf00809e7, // brr.allz -, r:pass_1
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dfdc0, 0x100201e7, // add ra_points, ra_points, rb_0x100
0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync
0x009e7000, 0x100009e7, // nop
0x009e7000, 0xa00009e7, // ldtmu0
0x009e7000, 0xa00009e7, // ldtmu0
0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y
0x9530cdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c462, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d463, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c41c0, 0xd0020827, // shl r0, r0, m
0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num
0x00000000, 0xe00201e7, // mov ra_points, 0
0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y
0x8c15ddf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffc98, 0xf0f80227, // brr ra_link_1, r:pass_2
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dddc0, 0x100201e7, // add ra_points, ra_points, rb_0x80
0xfffffc78, 0xf0f80227, // brr ra_link_1, r:pass_2
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dddc0, 0x100201e7, // add ra_points, ra_points, rb_0x80
0x95514dbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3
0x20367016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16
0x209cd017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16
0x209cd01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16
0x2136709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16
0x029e7640, 0x100208e7, // fsub r3, r3, r1
0x02527c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2
0x029d4ec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c462, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d463, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x0e1cddc0, 0xd00229e7, // shr.setf -, ra_points, STAGES
0xfffffbb0, 0xf00809e7, // brr.allz -, r:pass_2
0x00000020, 0xe0020827, // mov r0, 4*8
0x0d227c00, 0x10020227, // sub ra_link_1, ra_link_1, r0
0x0c1dddc0, 0x100201e7, // add ra_points, ra_points, rb_0x80
0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync
0x009e7000, 0x100009e7, // nop
0x009e7000, 0xa00009e7, // ldtmu0
0x009e7000, 0xa00009e7, // ldtmu0
0x950c3dbf, 0x100250c3, // mov rb_addr_y, ra_addr_x; mov ra_addr_x, rb_addr_y
0x953cfdbf, 0x100248a3, // mov r2, ra_tw_re+tw16; mov r3, rb_tw_im+tw16
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c462, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d463, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x159c5fc0, 0x10020827, // mov r0, rb_inst
0x119c41c0, 0xd0020827, // shl r0, r0, m
0x0c9a7180, 0x10020167, // add ra_load_idx, r0, elem_num
0x00000000, 0xe00201e7, // mov ra_points, 0
0x159c3fc0, 0x10020067, // mov ra_save_ptr, rb_addr_y
0x8c15ddf6, 0x10024160, // add ra_load_idx, ra_load_idx, stride; mov r0, ra_load_idx
0x119c31c0, 0xd0020827, // shl r0, r0, 3
0x0c9c41c0, 0xd0020867, // add r1, r0, 4
0x0c0e7c00, 0x10020e27, // add t0s, ra_addr_x, r0
0x0c0e7c40, 0x10020e27, // add t0s, ra_addr_x, r1
0xfffffab0, 0xf0f80227, // brr ra_link_1, r:pass_3
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dddc0, 0x100201e7, // add ra_points, ra_points, rb_0x80
0x95514dbf, 0x100248a3, // mov r2, ra_tw_re+TW16_ACTIVE+3; mov r3, rb_tw_im+TW16_ACTIVE+3
0x203a7016, 0x100049e0, // nop; fmul r0, r2, ra_tw_re+tw16
0x209ce017, 0x100049e1, // nop; fmul r1, r2, rb_tw_im+tw16
0x209ce01f, 0x100049e2, // nop; fmul r2, r3, rb_tw_im+tw16
0x213a709e, 0x100248a3, // fadd r2, r0, r2; fmul r3, r3, ra_tw_re+tw16
0x029e7640, 0x100208e7, // fsub r3, r3, r1
0x02527c80, 0x100208a7, // fsub r2, ra_tw_re+TW16_ACTIVE+3, r2
0x029d4ec0, 0x100208e7, // fsub r3, rb_tw_im+TW16_ACTIVE+3, r3
0x14988dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f8492, 0xd002c522, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f86db, 0xd002d523, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14984dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f4492, 0xd002c4e2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f46db, 0xd002d4e3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14982dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f2492, 0xd002c4a2, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f26db, 0xd002d4a3, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x14981dc0, 0xd00229e7, // and.setf -, elem_num, (8>>i)
0x959f1492, 0xd002c462, // mov ra_tw_re+TW16_ACTIVE+3-i, r2; mov.ifnz r2, r2 >> (8>>i)
0x959f16db, 0xd002d463, // mov rb_tw_im+TW16_ACTIVE+3-i, r3; mov.ifnz r3, r3 >> (8>>i)
0x0e1cddc0, 0xd00229e7, // shr.setf -, ra_points, STAGES
0xfffff9e8, 0xf00809e7, // brr.allz -, r:pass_3
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c1dddc0, 0x100201e7, // add ra_points, ra_points, rb_0x80
0x00000000, 0xf0f4c227, // bra ra_link_1, ra_sync
0x009e7000, 0x100009e7, // nop
0x009e7000, 0xa00009e7, // ldtmu0
0x009e7000, 0xa00009e7, // ldtmu0
0xfffffa40, 0xf0f809e7, // brr -, r:loop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x159c3fc0, 0x100209a7, // mov interrupt, flag
0x009e7000, 0x300009e7, // nop; nop; thrend
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
================================================
FILE: src/hello_fft/hex/shader_trans.hex
================================================
0x15827d80, 0x10020e27, // mov t0s, unif
0x009e7000, 0xa00009e7, // ldtmu0
0x0c9cc9c0, 0xd0020e27, // add t0s, r4, 3*4
0x009e7000, 0xa00009e7, // ldtmu0
0x0c827980, 0x100200a7, // add ra_src_base, r4, unif
0x15827d80, 0x10020e27, // mov t0s, unif
0x009e7000, 0xa00009e7, // ldtmu0
0x0c9cc9c0, 0xd0020e27, // add t0s, r4, 3*4
0x009e7000, 0xa00009e7, // ldtmu0
0x0c827980, 0x100200e7, // add ra_dst_base, r4, unif
0x15827d80, 0x100214a7, // mov rb_Y_STRIDE_SRC, unif
0x15827d80, 0x100214e7, // mov rb_Y_STRIDE_DST, unif
0x15827d80, 0x10021527, // mov rb_NX, unif
0x15827d80, 0x10021567, // mov rb_NY, unif
0x00000008, 0xe0021467, // mov rb_X_STRIDE, 2*4
0x00000010, 0xe0021427, // mov rb_0x10, 0x10
0xc0000000, 0xe0020827, // mov r0, vdw_setup_1(0)
0x0c9d31c0, 0x10020827, // add r0, r0, rb_Y_STRIDE_DST
0x00000040, 0xe0020867, // mov r1, 16*4
0x0d9e7040, 0x100201a7, // sub ra_vdw_stride, r0, r1
0x40991037, 0x100049e0, // nop; mul24 r0, elem_num, rb_X_STRIDE
0x159e7000, 0x10021027, // mov rb_offsets_re+i, r0
0x0c9c41c0, 0xd0021227, // add rb_offsets_im+i, r0, 4
0x0c9d21c0, 0x10020827, // add r0, r0, rb_Y_STRIDE_SRC
0x159e7000, 0x10021067, // mov rb_offsets_re+i, r0
0x0c9c41c0, 0xd0021267, // add rb_offsets_im+i, r0, 4
0x0c9d21c0, 0x10020827, // add r0, r0, rb_Y_STRIDE_SRC
0x159e7000, 0x100210a7, // mov rb_offsets_re+i, r0
0x0c9c41c0, 0xd00212a7, // add rb_offsets_im+i, r0, 4
0x0c9d21c0, 0x10020827, // add r0, r0, rb_Y_STRIDE_SRC
0x159e7000, 0x100210e7, // mov rb_offsets_re+i, r0
0x0c9c41c0, 0xd00212e7, // add rb_offsets_im+i, r0, 4
0x0c9d21c0, 0x10020827, // add r0, r0, rb_Y_STRIDE_SRC
0x159e7000, 0x10021127, // mov rb_offsets_re+i, r0
0x0c9c41c0, 0xd0021327, // add rb_offsets_im+i, r0, 4
0x0c9d21c0, 0x10020827, // add r0, r0, rb_Y_STRIDE_SRC
0x159e7000, 0x10021167, // mov rb_offsets_re+i, r0
0x0c9c41c0, 0xd0021367, // add rb_offsets_im+i, r0, 4
0x0c9d21c0, 0x10020827, // add r0, r0, rb_Y_STRIDE_SRC
0x159e7000, 0x100211a7, // mov rb_offsets_re+i, r0
0x0c9c41c0, 0xd00213a7, // add rb_offsets_im+i, r0, 4
0x0c9d21c0, 0x10020827, // add r0, r0, rb_Y_STRIDE_SRC
0x159e7000, 0x100211e7, // mov rb_offsets_re+i, r0
0x0c9c41c0, 0xd00213e7, // add rb_offsets_im+i, r0, 4
0x0c9d21c0, 0x10020827, // add r0, r0, rb_Y_STRIDE_SRC
0x00000000, 0xe0020067, // mov ra_y, 0
0x00000000, 0xe0020027, // mov ra_x, 0
0x40052037, 0x100049e1, // nop; mul24 r1, ra_y, rb_Y_STRIDE_SRC
0x40011037, 0x100049e0, // nop; mul24 r0, ra_x, rb_X_STRIDE
0x0c9e7040, 0x10020827, // add r0, r0, r1
0x0c0a7c00, 0x10020127, // add ra_src_cell, ra_src_base, r0
0x40013037, 0x100049e1, // nop; mul24 r1, ra_x, rb_Y_STRIDE_DST
0x40051037, 0x100049e0, // nop; mul24 r0, ra_y, rb_X_STRIDE
0x0c9e7040, 0x10020827, // add r0, r0, r1
0x0c0e7c00, 0x10020167, // add ra_dst_cell, ra_dst_base, r0
0x00001200, 0xe0021c67, // mov vw_setup, vpm_setup(16, 1, v32(0,0))
0x0c100dc0, 0x10020e27, // add t0s, ra_src_cell, rb_offsets_re
0x0c108dc0, 0x10020f27, // add t1s, ra_src_cell, rb_offsets_im
0x0c101dc0, 0x10020e27, // add t0s, ra_src_cell, rb_offsets_re+1+i
0x0c109dc0, 0x10020f27, // add t1s, ra_src_cell, rb_offsets_im+1+i
0x009e7000, 0xa00009e7, // ldtmu0
0x159e7900, 0x10020c27, // mov vpm, r4
0x009e7000, 0xb00009e7, // ldtmu1
0x159e7900, 0x10020c27, // mov vpm, r4
0x0c102dc0, 0x10020e27, // add t0s, ra_src_cell, rb_offsets_re+1+i
0x0c10adc0, 0x10020f27, // add t1s, ra_src_cell, rb_offsets_im+1+i
0x009e7000, 0xa00009e7, // ldtmu0
0x159e7900, 0x10020c27, // mov vpm, r4
0x009e7000, 0xb00009e7, // ldtmu1
0x159e7900, 0x10020c27, // mov vpm, r4
0x0c103dc0, 0x10020e27, // add t0s, ra_src_cell, rb_offsets_re+1+i
0x0c10bdc0, 0x10020f27, // add t1s, ra_src_cell, rb_offsets_im+1+i
0x009e7000, 0xa00009e7, // ldtmu0
0x159e7900, 0x10020c27, // mov vpm, r4
0x009e7000, 0xb00009e7, // ldtmu1
0x159e7900, 0x10020c27, // mov vpm, r4
0x0c104dc0, 0x10020e27, // add t0s, ra_src_cell, rb_offsets_re+1+i
0x0c10cdc0, 0x10020f27, // add t1s, ra_src_cell, rb_offsets_im+1+i
0x009e7000, 0xa00009e7, // ldtmu0
0x159e7900, 0x10020c27, // mov vpm, r4
0x009e7000, 0xb00009e7, // ldtmu1
0x159e7900, 0x10020c27, // mov vpm, r4
0x0c105dc0, 0x10020e27, // add t0s, ra_src_cell, rb_offsets_re+1+i
0x0c10ddc0, 0x10020f27, // add t1s, ra_src_cell, rb_offsets_im+1+i
0x009e7000, 0xa00009e7, // ldtmu0
0x159e7900, 0x10020c27, // mov vpm, r4
0x009e7000, 0xb00009e7, // ldtmu1
0x159e7900, 0x10020c27, // mov vpm, r4
0x0c106dc0, 0x10020e27, // add t0s, ra_src_cell, rb_offsets_re+1+i
0x0c10edc0, 0x10020f27, // add t1s, ra_src_cell, rb_offsets_im+1+i
0x009e7000, 0xa00009e7, // ldtmu0
0x159e7900, 0x10020c27, // mov vpm, r4
0x009e7000, 0xb00009e7, // ldtmu1
0x159e7900, 0x10020c27, // mov vpm, r4
0x0c107dc0, 0x10020e27, // add t0s, ra_src_cell, rb_offsets_re+1+i
0x0c10fdc0, 0x10020f27, // add t1s, ra_src_cell, rb_offsets_im+1+i
0x009e7000, 0xa00009e7, // ldtmu0
0x159e7900, 0x10020c27, // mov vpm, r4
0x009e7000, 0xb00009e7, // ldtmu1
0x159e7900, 0x10020c27, // mov vpm, r4
0x009e7000, 0xa00009e7, // ldtmu0
0x159e7900, 0x10020c27, // mov vpm, r4
0x009e7000, 0xb00009e7, // ldtmu1
0x159e7900, 0x10020c27, // mov vpm, r4
0x88104000, 0xe0021c67, // mov vw_setup, vdw_setup_0(16, 16, dma_h32(0,0))
0x151a7d80, 0x10021c67, // mov vw_setup, ra_vdw_stride
0x15167d80, 0x10021ca7, // mov vw_addr, ra_dst_cell
0x159f2fc0, 0x100009e7, // mov -, vw_wait
0x0c010dc0, 0x10020027, // add ra_x, ra_x, rb_0x10
0x009e7000, 0x100009e7, // nop
0x0d014dc0, 0x100229e7, // sub.setf -, ra_x, rb_NX
0xfffffde0, 0xf01809e7, // brr.allnz -, r:inner
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x0c048dc0, 0xd0020067, // add ra_y, ra_y, 8
0x009e7000, 0x100009e7, // nop
0x0d055dc0, 0x100229e7, // sub.setf -, ra_y, rb_NY
0xfffffda0, 0xf01809e7, // brr.allnz -, r:outer
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
0x00000001, 0xe00209a7, // mov interrupt, 1
0x009e7000, 0x300009e7, // nop; nop; thrend
0x009e7000, 0x100009e7, // nop
0x009e7000, 0x100009e7, // nop
================================================
FILE: src/hello_fft/mailbox.c
================================================
/*
Copyright (c) 2012, Broadcom Europe Ltd.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include "mailbox.h"
#define PAGE_SIZE (4 * 1024)
void* mapmem(unsigned base, unsigned size) {
int mem_fd;
unsigned offset = base % PAGE_SIZE;
base = base - offset;
/* open /dev/mem */
if ((mem_fd = open("/dev/mem", O_RDWR | O_SYNC)) < 0) {
log(LOG_CRIT, "mapmem(): can't open /dev/mem: %s\n", strerror(errno));
exit(-1);
}
void* mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED /*|MAP_FIXED*/, mem_fd, base);
#ifdef GPU_FFT_DEBUG
printf("base=0x%x, mem=%p\n", base, mem);
#endif
if (mem == MAP_FAILED) {
log(LOG_CRIT, "mapmem(): mmap error: %s\n", strerror(errno));
exit(-1);
}
close(mem_fd);
return (char*)mem + offset;
}
void unmapmem(void* addr, unsigned size) {
int s = munmap(addr, size);
if (s != 0) {
log(LOG_CRIT, "unmapmem(): munmap error: %s\n", strerror(errno));
exit(-1);
}
}
/*
* use ioctl to send mbox property message
*/
static int mbox_property(int file_desc, void* buf) {
int ret_val = ioctl(file_desc, IOCTL_MBOX_PROPERTY, buf);
if (ret_val < 0) {
log(LOG_ERR, "mbox_property(): ioctl_set_msg failed: %s\n", strerror(errno));
}
#ifdef GPU_FFT_DEBUG
unsigned* p = buf;
int i;
unsigned size = *(unsigned*)buf;
for (i = 0; i < size / 4; i++)
printf("%04x: 0x%08x\n", i * sizeof *p, p[i]);
#endif
return ret_val;
}
unsigned mem_alloc(int file_desc, unsigned size, unsigned align, unsigned flags) {
int i = 0;
unsigned p[32];
p[i++] = 0; // size
p[i++] = 0x00000000; // process request
p[i++] = 0x3000c; // (the tag id)
p[i++] = 12; // (size of the buffer)
p[i++] = 12; // (size of the data)
p[i++] = size; // (num bytes? or pages?)
p[i++] = align; // (alignment)
p[i++] = flags; // (MEM_FLAG_L1_NONALLOCATING)
p[i++] = 0x00000000; // end tag
p[0] = i * sizeof *p; // actual size
mbox_property(file_desc, p);
return p[5];
}
unsigned mem_free(int file_desc, unsigned handle) {
int i = 0;
unsigned p[32];
p[i++] = 0; // size
p[i++] = 0x00000000; // process request
p[i++] = 0x3000f; // (the tag id)
p[i++] = 4; // (size of the buffer)
p[i++] = 4; // (size of the data)
p[i++] = handle;
p[i++] = 0x00000000; // end tag
p[0] = i * sizeof *p; // actual size
mbox_property(file_desc, p);
return p[5];
}
unsigned mem_lock(int file_desc, unsigned handle) {
int i = 0;
unsigned p[32];
p[i++] = 0; // size
p[i++] = 0x00000000; // process request
p[i++] = 0x3000d; // (the tag id)
p[i++] = 4; // (size of the buffer)
p[i++] = 4; // (size of the data)
p[i++] = handle;
p[i++] = 0x00000000; // end tag
p[0] = i * sizeof *p; // actual size
mbox_property(file_desc, p);
return p[5];
}
unsigned mem_unlock(int file_desc, unsigned handle) {
int i = 0;
unsigned p[32];
p[i++] = 0; // size
p[i++] = 0x00000000; // process request
p[i++] = 0x3000e; // (the tag id)
p[i++] = 4; // (size of the buffer)
p[i++] = 4; // (size of the data)
p[i++] = handle;
p[i++] = 0x00000000; // end tag
p[0] = i * sizeof *p; // actual size
mbox_property(file_desc, p);
return p[5];
}
unsigned execute_code(int file_desc, unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5) {
int i = 0;
unsigned p[32];
p[i++] = 0; // size
p[i++] = 0x00000000; // process request
p[i++] = 0x30010; // (the tag id)
p[i++] = 28; // (size of the buffer)
p[i++] = 28; // (size of the data)
p[i++] = code;
p[i++] = r0;
p[i++] = r1;
p[i++] = r2;
p[i++] = r3;
p[i++] = r4;
p[i++] = r5;
p[i++] = 0x00000000; // end tag
p[0] = i * sizeof *p; // actual size
mbox_property(file_desc, p);
return p[5];
}
unsigned qpu_enable(int file_desc, unsigned enable) {
int i = 0;
unsigned p[32];
p[i++] = 0; // size
p[i++] = 0x00000000; // process request
p[i++] = 0x30012; // (the tag id)
p[i++] = 4; // (size of the buffer)
p[i++] = 4; // (size of the data)
p[i++] = enable;
p[i++] = 0x00000000; // end tag
p[0] = i * sizeof *p; // actual size
mbox_property(file_desc, p);
return p[5];
}
unsigned execute_qpu(int file_desc, unsigned num_qpus, unsigned control, unsigned noflush, unsigned timeout) {
int i = 0;
unsigned p[32];
p[i++] = 0; // size
p[i++] = 0x00000000; // process request
p[i++] = 0x30011; // (the tag id)
p[i++] = 16; // (size of the buffer)
p[i++] = 16; // (size of the data)
p[i++] = num_qpus;
p[i++] = control;
p[i++] = noflush;
p[i++] = timeout; // ms
p[i++] = 0x00000000; // end tag
p[0] = i * sizeof *p; // actual size
mbox_property(file_desc, p);
return p[5];
}
int mbox_open() {
int file_desc;
// open a char device file used for communicating with kernel mbox driver
file_desc = open(DEVICE_FILE_NAME, 0);
if (file_desc < 0) {
log(LOG_CRIT, "Can't open device file %s: %s\n", DEVICE_FILE_NAME, strerror(errno));
exit(-1);
}
return file_desc;
}
void mbox_close(int file_desc) {
close(file_desc);
}
================================================
FILE: src/hello_fft/mailbox.h
================================================
/*
Copyright (c) 2012, Broadcom Europe Ltd.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include
#define MAJOR_NUM 100
#define IOCTL_MBOX_PROPERTY _IOWR(MAJOR_NUM, 0, char*)
#define DEVICE_FILE_NAME "/dev/vcio"
int mbox_open();
void mbox_close(int file_desc);
unsigned get_version(int file_desc);
unsigned mem_alloc(int file_desc, unsigned size, unsigned align, unsigned flags);
unsigned mem_free(int file_desc, unsigned handle);
unsigned mem_lock(int file_desc, unsigned handle);
unsigned mem_unlock(int file_desc, unsigned handle);
void* mapmem(unsigned base, unsigned size);
void unmapmem(void* addr, unsigned size);
unsigned execute_code(int file_desc, unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5);
unsigned execute_qpu(int file_desc, unsigned num_qpus, unsigned control, unsigned noflush, unsigned timeout);
unsigned qpu_enable(int file_desc, unsigned enable);
================================================
FILE: src/helper_functions.cpp
================================================
/*
* helper_functions.cpp
*
* Copyright (C) 2023 charlie-foxtrot
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see .
*/
#include // struct stat, S_ISDIR
#include // size_t
#include // strerror
#include "helper_functions.h"
#include "logging.h"
using namespace std;
bool dir_exists(const string& dir_path) {
struct stat st;
return (stat(dir_path.c_str(), &st) == 0 && S_ISDIR(st.st_mode));
}
bool file_exists(const string& file_path) {
struct stat st;
return (stat(file_path.c_str(), &st) == 0 && S_ISREG(st.st_mode));
}
bool make_dir(const string& dir_path) {
if (dir_exists(dir_path)) {
return true;
}
if (mkdir(dir_path.c_str(), 0755) != 0) {
log(LOG_ERR, "Could not create directory %s: %s\n", dir_path.c_str(), strerror(errno));
return false;
}
return true;
}
bool make_subdirs(const string& basedir, const string& subdirs) {
// if final directory exists then nothing to do
const string delim = "/";
const string final_path = basedir + delim + subdirs;
if (dir_exists(final_path)) {
return true;
}
// otherwise scan through subdirs for each slash and make each directory. start with index of 0
// to create basedir incase that doesn't exist
size_t index = 0;
while (index != string::npos) {
if (!make_dir(basedir + delim + subdirs.substr(0, index))) {
return false;
}
index = subdirs.find_first_of(delim, index + 1);
}
make_dir(final_path);
return dir_exists(final_path);
}
string make_dated_subdirs(const string& basedir, const struct tm* time) {
// use the time to build the date subdirectories
char date_path[11];
strftime(date_path, sizeof(date_path), "%Y/%m/%d", time);
const string date_path_str = string(date_path);
// make all the subdirectories, and return the full path if successful
if (make_subdirs(basedir, date_path_str)) {
return basedir + "/" + date_path_str;
}
// on any error return empty string
return "";
}
================================================
FILE: src/helper_functions.h
================================================
/*
* helper_functions.h
*
* Copyright (C) 2023 charlie-foxtrot
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see .
*/
#ifndef _HELPER_FUNCTIONS_H
#define _HELPER_FUNCTIONS_H
#include // struct tm
#include
bool dir_exists(const std::string& dir_path);
bool file_exists(const std::string& file_path);
bool make_dir(const std::string& dir_path);
bool make_subdirs(const std::string& basedir, const std::string& subdirs);
std::string make_dated_subdirs(const std::string& basedir, const struct tm* time);
#endif /* _HELPER_FUNCTIONS_H */
================================================
FILE: src/input-common.cpp
================================================
/*
* input-common.cpp
* common input handling routines
*
* Copyright (c) 2015-2021 Tomasz Lemiech
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see .
*/
#include "input-common.h"
#include
#include // dlopen, dlsym
#include
#include
#include // asprintf
#include // free
#include
#include
using namespace std;
typedef input_t* (*input_new_func_t)(void);
input_t* input_new(char const* const type) {
assert(type != NULL);
void* dlhandle = dlopen(NULL, RTLD_NOW);
assert(dlhandle != NULL);
char* fname = NULL;
int chars_written = asprintf(&fname, "%s_input_new", type);
if (chars_written <= 0) {
return NULL;
}
input_new_func_t fptr = (input_new_func_t)dlsym(dlhandle, fname);
free(fname);
if (fptr == NULL) {
return NULL;
}
input_t* input = (*fptr)();
assert(input->init != NULL);
assert(input->run_rx_thread != NULL);
assert(input->set_centerfreq != NULL);
return input;
}
int input_init(input_t* const input) {
assert(input != NULL);
input_state_t new_state = INPUT_FAILED; // fail-safe default
errno = 0;
int ret = input->init(input);
if (ret < 0) {
ret = -1;
} else if ((ret = pthread_mutex_init(&input->buffer_lock, NULL)) != 0) {
errno = ret;
ret = -1;
} else {
new_state = INPUT_INITIALIZED;
ret = 0;
}
input->state = new_state;
return ret;
}
int input_start(input_t* const input) {
assert(input != NULL);
assert(input->dev_data != NULL);
assert(input->state == INPUT_INITIALIZED);
int err = pthread_create(&input->rx_thread, NULL, input->run_rx_thread, (void*)input);
if (err != 0) {
errno = err;
return -1;
}
return 0;
}
int input_parse_config(input_t* const input, libconfig::Setting& cfg) {
assert(input != NULL);
if (input->parse_config != NULL) {
return input->parse_config(input, cfg);
} else {
// Very simple inputs (like stdin) might not necessarily have any configuration
// variables, so it's legal not to have parse_config defined.
return 0;
}
}
int input_stop(input_t* const input) {
assert(input != NULL);
assert(input->dev_data != NULL);
int err = 0;
errno = 0;
if (input->state == INPUT_RUNNING && input->stop != NULL) {
err = input->stop(input);
if (err != 0) {
input->state = INPUT_FAILED;
return -1;
}
}
input->state = INPUT_STOPPED;
err = pthread_join(input->rx_thread, NULL);
if (err != 0) {
errno = err;
return -1;
}
return 0;
}
int input_set_centerfreq(input_t* const input, int const centerfreq) {
assert(input != NULL);
assert(input->dev_data != NULL);
if (input->state != INPUT_RUNNING) {
return -1;
}
int ret = input->set_centerfreq(input, centerfreq);
if (ret != 0) {
input->state = INPUT_FAILED;
return -1;
}
input->centerfreq = centerfreq;
return 0;
}
================================================
FILE: src/input-common.h
================================================
/*
* input-common.h
*
* Copyright (c) 2015-2021 Tomasz Lemiech
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see .
*/
#ifndef _INPUT_COMMON_H
#define _INPUT_COMMON_H 1
#include
#include
#if __GNUC__ >= 4
#define MODULE_EXPORT extern "C" __attribute__((visibility("default")))
#else
#define MODULE_EXPORT extern "C"
#endif /* __GNUC__ */
typedef enum { SFMT_UNDEF = 0, SFMT_U8, SFMT_S8, SFMT_S16, SFMT_F32 } sample_format_t;
#define SAMPLE_FORMAT_CNT 5
typedef enum { INPUT_UNKNOWN = 0, INPUT_INITIALIZED, INPUT_RUNNING, INPUT_FAILED, INPUT_STOPPED, INPUT_DISABLED } input_state_t;
#define INPUT_STATE_CNT 6
typedef struct input_t input_t;
struct input_t {
unsigned char* buffer;
void* dev_data;
size_t buf_size, bufs, bufe;
size_t overflow_count;
input_state_t state;
sample_format_t sfmt;
float fullscale;
int bytes_per_sample;
int sample_rate;
int centerfreq;
int (*parse_config)(input_t* const input, libconfig::Setting& cfg);
int (*init)(input_t* const input);
void* (*run_rx_thread)(void* input_ptr); // to be launched via pthread_create()
int (*set_centerfreq)(input_t* const input, int const centerfreq);
int (*stop)(input_t* const input);
pthread_t rx_thread;
pthread_mutex_t buffer_lock;
};
input_t* input_new(char const* const type);
int input_init(input_t* const input);
int input_parse_config(input_t* const input, libconfig::Setting& cfg);
int input_start(input_t* const input);
int input_set_centerfreq(input_t* const input, int const centerfreq);
int input_stop(input_t* const input);
#endif /* _INPUT_COMMON_H */
================================================
FILE: src/input-file.cpp
================================================
/*
* input-file.cpp
* binary file specific routines
*
* Copyright (c) 2015-2021 Tomasz Lemiech
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see .
*/
#include "input-file.h" // file_dev_data_t
#include
#include // SCHAR_MAX
#include
#include
#include // FIXME: get rid of this
#include // usleep
#include // Setting
#include "input-common.h" // input_t, sample_format_t, input_state_t, MODULE_EXPORT
#include "input-helpers.h" // circbuffer_append
#include "rtl_airband.h" // do_exit, fft_size, debug_print, XCALLOC, error()
using namespace std;
int file_parse_config(input_t* const input, libconfig::Setting& cfg) {
assert(input != NULL);
file_dev_data_t* dev_data = (file_dev_data_t*)input->dev_data;
assert(dev_data != NULL);
if (cfg.exists("filepath")) {
dev_data->filepath = strdup(cfg["filepath"]);
} else {
cerr << "File configuration error: no 'filepath' given\n";
error();
}
if (cfg.exists("speedup_factor")) {
if (cfg["speedup_factor"].getType() == libconfig::Setting::TypeInt) {
dev_data->speedup_factor = (int)cfg["speedup_factor"];
} else if (cfg["speedup_factor"].getType() == libconfig::Setting::TypeFloat) {
dev_data->speedup_factor = (float)cfg["speedup_factor"];
} else {
cerr << "File configuration error: 'speedup_factor' must be a float or int if set\n";
error();
}
if (dev_data->speedup_factor <= 0.0) {
cerr << "File configuration error: 'speedup_factor' must be >= 0.0\n";
error();
}
} else {
dev_data->speedup_factor = 4;
}
return 0;
}
int file_init(input_t* const input) {
assert(input != NULL);
file_dev_data_t* dev_data = (file_dev_data_t*)input->dev_data;
assert(dev_data != NULL);
dev_data->input_file = fopen(dev_data->filepath, "rb");
if (!dev_data->input_file) {
cerr << "File input failed to open '" << dev_data->filepath << "' - " << strerror(errno) << endl;
error();
}
log(LOG_INFO, "File input %s initialized\n", dev_data->filepath);
return 0;
}
void* file_rx_thread(void* ctx) {
input_t* input = (input_t*)ctx;
assert(input != NULL);
assert(input->sample_rate != 0);
file_dev_data_t* dev_data = (file_dev_data_t*)input->dev_data;
assert(dev_data != NULL);
assert(dev_data->input_file != NULL);
assert(dev_data->speedup_factor != 0.0);
size_t buf_len = (input->buf_size / 2) - 1;
unsigned char* buf = (unsigned char*)XCALLOC(1, buf_len);
float time_per_byte_ms = 1000 / (input->sample_rate * input->bytes_per_sample * 2 * dev_data->speedup_factor);
log(LOG_DEBUG, "sample_rate: %d, bytes_per_sample: %d, speedup_factor: %f, time_per_byte_ms: %f\n", input->sample_rate, input->bytes_per_sample, dev_data->speedup_factor, time_per_byte_ms);
input->state = INPUT_RUNNING;
while (true) {
if (do_exit) {
break;
}
if (feof(dev_data->input_file)) {
log(LOG_INFO, "File '%s': hit end of file at %d, disabling\n", dev_data->filepath, ftell(dev_data->input_file));
input->state = INPUT_FAILED;
break;
}
if (ferror(dev_data->input_file)) {
log(LOG_ERR, "File '%s': read error (%d), disabling\n", dev_data->filepath, ferror(dev_data->input_file));
input->state = INPUT_FAILED;
break;
}
timeval start;
gettimeofday(&start, NULL);
size_t space_left;
pthread_mutex_lock(&input->buffer_lock);
if (input->bufe >= input->bufs) {
space_left = input->bufs + (input->buf_size - input->bufe);
} else {
space_left = input->bufs - input->bufe;
}
pthread_mutex_unlock(&input->buffer_lock);
if (space_left > buf_len) {
size_t len = fread(buf, sizeof(unsigned char), buf_len, dev_data->input_file);
circbuffer_append(input, buf, len);
timeval end;
gettimeofday(&end, NULL);
int time_taken_ms = delta_sec(&start, &end) * 1000;
int sleep_time_ms = len * time_per_byte_ms - time_taken_ms;
if (sleep_time_ms > 0) {
SLEEP(sleep_time_ms);
}
} else {
SLEEP(10);
}
}
free(buf);
return 0;
}
int file_set_centerfreq(input_t* const /*input*/, int const /*centerfreq*/) {
return 0;
}
int file_stop(input_t* const input) {
assert(input != NULL);
file_dev_data_t* dev_data = (file_dev_data_t*)input->dev_data;
assert(dev_data != NULL);
fclose(dev_data->input_file);
dev_data->input_file = NULL;
return 0;
}
MODULE_EXPORT input_t* file_input_new() {
file_dev_data_t* dev_data = (file_dev_data_t*)XCALLOC(1, sizeof(file_dev_data_t));
dev_data->input_file = NULL;
dev_data->speedup_factor = 0.0;
input_t* input = (input_t*)XCALLOC(1, sizeof(input_t));
input->dev_data = dev_data;
input->state = INPUT_UNKNOWN;
input->sfmt = SFMT_U8;
input->fullscale = (float)SCHAR_MAX - 0.5f;
input->bytes_per_sample = sizeof(unsigned char);
input->sample_rate = 0;
input->parse_config = &file_parse_config;
input->init = &file_init;
input->run_rx_thread = &file_rx_thread;
input->set_centerfreq = &file_set_centerfreq;
input->stop = &file_stop;
return input;
}
================================================
FILE: src/input-file.h
================================================
/*
* input-file.h
* RTLSDR-specific declarations
*
* Copyright (c) 2015-2021 Tomasz Lemiech
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see .
*/
#include
#include
#include