Full Code of VIA-Research/uPIMulator for AI

main 870d916334e9 cached
2582 files
229.4 MB
2.7M tokens
17659 symbols
1 requests
Copy disabled (too large) Download .txt
Showing preview only (10,783K chars total). Download the full file to get everything.
Repository: VIA-Research/uPIMulator
Branch: main
Commit: 870d916334e9
Files: 2582
Total size: 229.4 MB

Directory structure:
gitextract_ry7k9cpr/

├── .gitignore
├── LICENSE
├── README.md
├── assets/
│   ├── figure5_mem_util_calculator.xlsx
│   ├── figure7_active_tasklet_breakdown.xlsx
│   └── figure9_instruction_mix.xlsx
├── golang/
│   ├── README.md
│   └── uPIMulator/
│       ├── benchmark/
│       │   ├── BS/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   ├── bs_omp.c
│       │   │   │   │   └── timer.h
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       ├── binary_search.cu
│       │   │   │       ├── binary_search.h
│       │   │   │       ├── cpu_lib.py
│       │   │   │       ├── cu_lib_import.py
│       │   │   │       └── run.py
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── CMakeLists.txt
│       │   ├── GEMV/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── _BL_10.conf
│       │   │   ├── _NR_TASKLETS_10_BL_10.conf
│       │   │   ├── _NR_TASKLETS_11_BL_10.conf
│       │   │   ├── _NR_TASKLETS_12_BL_10.conf
│       │   │   ├── _NR_TASKLETS_13_BL_10.conf
│       │   │   ├── _NR_TASKLETS_14_BL_10.conf
│       │   │   ├── _NR_TASKLETS_15_BL_10.conf
│       │   │   ├── _NR_TASKLETS_16
│       │   │   ├── _NR_TASKLETS_16_BL_10.conf
│       │   │   ├── _NR_TASKLETS_17_BL_10.conf
│       │   │   ├── _NR_TASKLETS_18_BL_10.conf
│       │   │   ├── _NR_TASKLETS_19_BL_10.conf
│       │   │   ├── _NR_TASKLETS_1_BL_10.conf
│       │   │   ├── _NR_TASKLETS_20_BL_10.conf
│       │   │   ├── _NR_TASKLETS_21_BL_10.conf
│       │   │   ├── _NR_TASKLETS_22_BL_10.conf
│       │   │   ├── _NR_TASKLETS_23_BL_10.conf
│       │   │   ├── _NR_TASKLETS_24_BL_10.conf
│       │   │   ├── _NR_TASKLETS_2_BL_10.conf
│       │   │   ├── _NR_TASKLETS_3_BL_10.conf
│       │   │   ├── _NR_TASKLETS_4_BL_10.conf
│       │   │   ├── _NR_TASKLETS_5_BL_10.conf
│       │   │   ├── _NR_TASKLETS_6_BL_10.conf
│       │   │   ├── _NR_TASKLETS_7_BL_10.conf
│       │   │   ├── _NR_TASKLETS_8_BL_10.conf
│       │   │   ├── _NR_TASKLETS_9_BL_10.conf
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   ├── gemv_openmp.c
│       │   │   │   │   └── gemv_utils.h
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       └── gemv.cu
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── HST-L/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   ├── input/
│       │   │   │   └── image_VanHateren.iml
│       │   │   ├── run.sh
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── HST-S/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── app_baseline.c
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       ├── input/
│       │   │   │       │   └── image_VanHateren.iml
│       │   │   │       ├── kernel.cpp
│       │   │   │       ├── kernel.cu
│       │   │   │       ├── kernel.h
│       │   │   │       ├── main.cpp
│       │   │   │       └── support/
│       │   │   │           ├── common.h
│       │   │   │           ├── cuda-setup.h
│       │   │   │           ├── partitioner.h
│       │   │   │           ├── timer.h
│       │   │   │           └── verify.h
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   ├── input/
│       │   │   │   └── image_VanHateren.iml
│       │   │   ├── run.sh
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── MLP/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── _BL_10.conf
│       │   │   ├── _NR_TASKLETS_10_BL_10.conf
│       │   │   ├── _NR_TASKLETS_11_BL_10.conf
│       │   │   ├── _NR_TASKLETS_12_BL_10.conf
│       │   │   ├── _NR_TASKLETS_13_BL_10.conf
│       │   │   ├── _NR_TASKLETS_14_BL_10.conf
│       │   │   ├── _NR_TASKLETS_15_BL_10.conf
│       │   │   ├── _NR_TASKLETS_16
│       │   │   ├── _NR_TASKLETS_16_BL_10.conf
│       │   │   ├── _NR_TASKLETS_17_BL_10.conf
│       │   │   ├── _NR_TASKLETS_18_BL_10.conf
│       │   │   ├── _NR_TASKLETS_19_BL_10.conf
│       │   │   ├── _NR_TASKLETS_1_BL_10.conf
│       │   │   ├── _NR_TASKLETS_20_BL_10.conf
│       │   │   ├── _NR_TASKLETS_21_BL_10.conf
│       │   │   ├── _NR_TASKLETS_22_BL_10.conf
│       │   │   ├── _NR_TASKLETS_23_BL_10.conf
│       │   │   ├── _NR_TASKLETS_24_BL_10.conf
│       │   │   ├── _NR_TASKLETS_2_BL_10.conf
│       │   │   ├── _NR_TASKLETS_3_BL_10.conf
│       │   │   ├── _NR_TASKLETS_4_BL_10.conf
│       │   │   ├── _NR_TASKLETS_5_BL_10.conf
│       │   │   ├── _NR_TASKLETS_6_BL_10.conf
│       │   │   ├── _NR_TASKLETS_7_BL_10.conf
│       │   │   ├── _NR_TASKLETS_8_BL_10.conf
│       │   │   ├── _NR_TASKLETS_9_BL_10.conf
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── mlp_openmp.c
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       └── mlp.cu
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── RED/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── app_baseline.cpp
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       └── app_baseline.cu
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   ├── run.sh
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── cyclecount.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── SCAN-RSS/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── app_baseline.cpp
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       └── app_baseline.cu
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   ├── run.sh
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── SCAN-SSA/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   ├── run.sh
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── SEL/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── app_baseline.c
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       ├── ds.h
│       │   │   │       ├── kernel.cu
│       │   │   │       └── select.cu
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── TRNS/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   ├── kernel.cpp
│       │   │   │   │   ├── kernel.h
│       │   │   │   │   ├── main.cpp
│       │   │   │   │   └── support/
│       │   │   │   │       ├── common.h
│       │   │   │   │       ├── setup.h
│       │   │   │   │       ├── timer.h
│       │   │   │   │       └── verify.h
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       ├── kernel.cu
│       │   │   │       ├── kernel.h
│       │   │   │       ├── main.cpp
│       │   │   │       └── support/
│       │   │   │           ├── common.h
│       │   │   │           ├── cuda-setup.h
│       │   │   │           ├── timer.h
│       │   │   │           └── verify.h
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── TS/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── UNI/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── app_baseline.c
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       ├── ds.h
│       │   │   │       ├── kernel.cu
│       │   │   │       └── unique.cu
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── VA/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── app_baseline.c
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       └── vec_add.cu
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   └── build.py
│       ├── docker/
│       │   └── Dockerfile
│       ├── go.mod
│       ├── go.sum
│       ├── script/
│       │   ├── build.py
│       │   ├── format.py
│       │   ├── run_validation.sh
│       │   └── visualize.py
│       ├── sdk/
│       │   ├── CMakeLists.txt
│       │   ├── build.py
│       │   ├── misc/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── accessMramFromDpu.c
│       │   │   ├── coreDump.c
│       │   │   ├── crt0.c
│       │   │   ├── dpu.lds
│       │   │   ├── internalStateReset.c
│       │   │   ├── linkerScript.lds
│       │   │   ├── restoreRegisters.c
│       │   │   └── restore_carry_and_zero_flag.h
│       │   ├── stdlib/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── abort.c
│       │   │   ├── assert.h
│       │   │   ├── atoi.c
│       │   │   ├── atol.c
│       │   │   ├── ctype.h
│       │   │   ├── errno.c
│       │   │   ├── errno.h
│       │   │   ├── exit.c
│       │   │   ├── inttypes.h
│       │   │   ├── iso646.h
│       │   │   ├── limits.h
│       │   │   ├── memchr.c
│       │   │   ├── memcmp.c
│       │   │   ├── memcpy.c
│       │   │   ├── memmove.c
│       │   │   ├── memmram_utils.h
│       │   │   ├── memset.c
│       │   │   ├── stdalign.h
│       │   │   ├── stdarg.h
│       │   │   ├── stdbool.h
│       │   │   ├── stddef.h
│       │   │   ├── stdint.h
│       │   │   ├── stdio.c
│       │   │   ├── stdio.h
│       │   │   ├── stdlib.h
│       │   │   ├── stdnoreturn.h
│       │   │   ├── stpcpy.c
│       │   │   ├── stpncpy.c
│       │   │   ├── strcat.c
│       │   │   ├── strchr.c
│       │   │   ├── strcmp.c
│       │   │   ├── strcpy.c
│       │   │   ├── strcspn.c
│       │   │   ├── strdup.c
│       │   │   ├── strerror.c
│       │   │   ├── string.h
│       │   │   ├── strlen.c
│       │   │   ├── strlwr.c
│       │   │   ├── strncat.c
│       │   │   ├── strncmp.c
│       │   │   ├── strncpy.c
│       │   │   ├── strndup.c
│       │   │   ├── strnlen.c
│       │   │   ├── strpbrk.c
│       │   │   ├── strrchr.c
│       │   │   ├── strrev.c
│       │   │   ├── strsep.c
│       │   │   ├── strspn.c
│       │   │   ├── strstr.c
│       │   │   ├── strtok_r.c
│       │   │   ├── strtol.c
│       │   │   └── strupr.c
│       │   └── syslib/
│       │       ├── CMakeLists.txt
│       │       ├── absvdi2.c
│       │       ├── absvsi2.c
│       │       ├── adddf3.c
│       │       ├── addsf3.c
│       │       ├── addvdi3.c
│       │       ├── addvsi3.c
│       │       ├── alloc.c
│       │       ├── alloc.h
│       │       ├── ashldi3.c
│       │       ├── ashrdi3.c
│       │       ├── atomic_bit.h
│       │       ├── atomics.c
│       │       ├── attributes.h
│       │       ├── barrier.c
│       │       ├── barrier.h
│       │       ├── bswapdi2.c
│       │       ├── bswapsi2.c
│       │       ├── buddy_alloc.c
│       │       ├── buddy_alloc.h
│       │       ├── buddy_realloc.c
│       │       ├── built_ins.h
│       │       ├── clzdi2.c
│       │       ├── clzsi2.c
│       │       ├── cmpdi2.c
│       │       ├── comparedf2.c
│       │       ├── comparesf2.c
│       │       ├── ctzdi2.c
│       │       ├── ctzsi2.c
│       │       ├── defs.c
│       │       ├── defs.h
│       │       ├── devprivate.h
│       │       ├── div32.c
│       │       ├── divdf3.c
│       │       ├── divdi3.c
│       │       ├── divmodsi4.c
│       │       ├── divsf3.c
│       │       ├── divsi3.c
│       │       ├── dpuconst.h
│       │       ├── dpufault.h
│       │       ├── dpuruntime.h
│       │       ├── extendhfsf2.c
│       │       ├── extendsfdf2.c
│       │       ├── ffsdi2.c
│       │       ├── ffssi2.c
│       │       ├── ffsti2.c
│       │       ├── fixdfdi.c
│       │       ├── fixdfsi.c
│       │       ├── fixsfdi.c
│       │       ├── fixsfsi.c
│       │       ├── fixunsdfdi.c
│       │       ├── fixunsdfsi.c
│       │       ├── fixunssfdi.c
│       │       ├── fixunssfsi.c
│       │       ├── float.h
│       │       ├── floatdidf.c
│       │       ├── floatdisf.c
│       │       ├── floatsidf.c
│       │       ├── floatsisf.c
│       │       ├── floatundidf.c
│       │       ├── floatundisf.c
│       │       ├── floatunsidf.c
│       │       ├── floatunsisf.c
│       │       ├── fp_add_impl.inc
│       │       ├── fp_extend.h
│       │       ├── fp_extend_impl.inc
│       │       ├── fp_fixint_impl.inc
│       │       ├── fp_fixuint_impl.inc
│       │       ├── fp_lib.h
│       │       ├── fp_mul_impl.inc
│       │       ├── fp_trunc.h
│       │       ├── fp_trunc_impl.inc
│       │       ├── fsb_allocator.c
│       │       ├── fsb_allocator.h
│       │       ├── handshake.c
│       │       ├── handshake.h
│       │       ├── int_endianness.h
│       │       ├── int_lib.h
│       │       ├── int_math.h
│       │       ├── int_types.h
│       │       ├── int_util.c
│       │       ├── int_util.h
│       │       ├── listener.c
│       │       ├── lshrdi3.c
│       │       ├── macro_utils.h
│       │       ├── mcount.c
│       │       ├── moddi3.c
│       │       ├── modsi3.c
│       │       ├── mram.h
│       │       ├── mul32.c
│       │       ├── mul64.c
│       │       ├── muldc3.c
│       │       ├── muldf3.c
│       │       ├── mulodi4.c
│       │       ├── mulosi4.c
│       │       ├── mulsf3.c
│       │       ├── mulvdi3.c
│       │       ├── mulvsi3.c
│       │       ├── mutex.h
│       │       ├── negdf2.c
│       │       ├── negdi2.c
│       │       ├── negsf2.c
│       │       ├── negvdi2.c
│       │       ├── negvsi2.c
│       │       ├── paritydi2.c
│       │       ├── paritysi2.c
│       │       ├── perfcounter.c
│       │       ├── perfcounter.h
│       │       ├── popcountdi2.c
│       │       ├── popcountsi2.c
│       │       ├── powidf2.c
│       │       ├── powisf2.c
│       │       ├── profiling.c
│       │       ├── profiling.h
│       │       ├── profiling_internals.h
│       │       ├── sem.c
│       │       ├── sem.h
│       │       ├── seqread.h
│       │       ├── seqread.inc
│       │       ├── seqread1024.c
│       │       ├── seqread128.c
│       │       ├── seqread256.c
│       │       ├── seqread32.c
│       │       ├── seqread512.c
│       │       ├── seqread64.c
│       │       ├── soft_cache.c
│       │       ├── soft_cache.h
│       │       ├── subdf3.c
│       │       ├── subsf3.c
│       │       ├── subvdi3.c
│       │       ├── subvsi3.c
│       │       ├── sysdef.h
│       │       ├── truncdfhf2.c
│       │       ├── truncdfsf2.c
│       │       ├── truncsfhf2.c
│       │       ├── ucmpdi2.c
│       │       ├── udiv64.c
│       │       ├── udivdi3.c
│       │       ├── udivmodsi4.c
│       │       ├── udivsi3.c
│       │       ├── umoddi3.c
│       │       ├── umodsi3.c
│       │       └── waitqueue.c
│       └── src/
│           ├── abi/
│           │   ├── encoding/
│           │   │   ├── ascii_encoder.go
│           │   │   └── byte_stream.go
│           │   └── word/
│           │       ├── intermediate.go
│           │       └── word.go
│           ├── assembler/
│           │   ├── assemblable.go
│           │   ├── assembler.go
│           │   └── prim/
│           │       ├── bs.go
│           │       ├── gemv.go
│           │       ├── hst_l.go
│           │       ├── hst_s.go
│           │       ├── mlp.go
│           │       ├── red.go
│           │       ├── scan_rss.go
│           │       ├── scan_ssa.go
│           │       ├── sel.go
│           │       ├── trns.go
│           │       ├── ts.go
│           │       ├── uni.go
│           │       └── va.go
│           ├── compiler/
│           │   └── compiler.go
│           ├── core/
│           │   ├── job.go
│           │   └── thread_pool.go
│           ├── linker/
│           │   ├── analyze_liveness_job.go
│           │   ├── kernel/
│           │   │   ├── directive/
│           │   │   │   ├── ascii_directive.go
│           │   │   │   ├── asciz_directive.go
│           │   │   │   ├── byte_directive.go
│           │   │   │   ├── long_directive.go
│           │   │   │   ├── quad_directive.go
│           │   │   │   ├── short_directive.go
│           │   │   │   └── zero_directive.go
│           │   │   ├── encodable.go
│           │   │   ├── executable.go
│           │   │   ├── instruction/
│           │   │   │   ├── cc/
│           │   │   │   │   ├── acquire_cc.go
│           │   │   │   │   ├── add_nz_cc.go
│           │   │   │   │   ├── boot_cc.go
│           │   │   │   │   ├── cc.go
│           │   │   │   │   ├── const_cc_ge0.go
│           │   │   │   │   ├── const_cc_geu.go
│           │   │   │   │   ├── const_cc_zero.go
│           │   │   │   │   ├── count_nz_cc.go
│           │   │   │   │   ├── div_cc.go
│           │   │   │   │   ├── div_nz_cc.go
│           │   │   │   │   ├── ext_sub_set_cc.go
│           │   │   │   │   ├── false_cc.go
│           │   │   │   │   ├── imm_shift_nz_cc.go
│           │   │   │   │   ├── log_nz_cc.go
│           │   │   │   │   ├── log_set_cc.go
│           │   │   │   │   ├── mul_nz_cc.go
│           │   │   │   │   ├── no_cc.go
│           │   │   │   │   ├── release_cc.go
│           │   │   │   │   ├── shift_nz_cc.go
│           │   │   │   │   ├── sub_nz_cc.go
│           │   │   │   │   ├── sub_set_cc.go
│           │   │   │   │   ├── true_cc.go
│           │   │   │   │   └── true_false_cc.go
│           │   │   │   ├── endian.go
│           │   │   │   ├── exception.go
│           │   │   │   ├── flag.go
│           │   │   │   ├── instruction.go
│           │   │   │   ├── op_code.go
│           │   │   │   ├── reg_descriptor/
│           │   │   │   │   ├── gp_reg_descriptor.go
│           │   │   │   │   ├── pair_reg_descriptor.go
│           │   │   │   │   ├── sp_reg_descriptor.go
│           │   │   │   │   └── src_reg_descriptor.go
│           │   │   │   └── suffix.go
│           │   │   ├── kernel.go
│           │   │   ├── label.go
│           │   │   ├── liveness.go
│           │   │   ├── relocatable.go
│           │   │   └── section.go
│           │   ├── lex_job.go
│           │   ├── lexer/
│           │   │   ├── keyword_factory.go
│           │   │   ├── lexer.go
│           │   │   ├── regex.go
│           │   │   ├── regex_factory.go
│           │   │   ├── token.go
│           │   │   ├── token_stream.go
│           │   │   └── tokenizer.go
│           │   ├── linker.go
│           │   ├── logic/
│           │   │   ├── instruction_assigner.go
│           │   │   ├── label_assigner.go
│           │   │   ├── linker_constant.go
│           │   │   ├── linker_script.go
│           │   │   ├── liveness_analyzer.go
│           │   │   └── set_assigner.go
│           │   ├── parse_job.go
│           │   └── parser/
│           │       ├── ast.go
│           │       ├── expr/
│           │       │   ├── binary_add_expr.go
│           │       │   ├── binary_sub_expr.go
│           │       │   ├── ci_op_code_expr.go
│           │       │   ├── condition_expr.go
│           │       │   ├── ddci_op_code_expr.go
│           │       │   ├── dma_rri_op_code_expr.go
│           │       │   ├── drdici_op_code_expr.go
│           │       │   ├── endian_expr.go
│           │       │   ├── expr.go
│           │       │   ├── i_op_code_expr.go
│           │       │   ├── jump_op_code_expr.go
│           │       │   ├── load_op_code_expr.go
│           │       │   ├── negative_number_expr.go
│           │       │   ├── primary_expr.go
│           │       │   ├── program_counter_expr.go
│           │       │   ├── r_op_code_expr.go
│           │       │   ├── rici_op_code_expr.go
│           │       │   ├── rr_op_code_expr.go
│           │       │   ├── rri_op_code_expr.go
│           │       │   ├── rrri_op_code_expr.go
│           │       │   ├── section_name_expr.go
│           │       │   ├── section_type_expr.go
│           │       │   ├── src_reg_expr.go
│           │       │   ├── store_op_code_expr.go
│           │       │   ├── suffix_expr.go
│           │       │   └── symbol_type.go
│           │       ├── parser.go
│           │       ├── rule.go
│           │       ├── stack.go
│           │       ├── stack_item.go
│           │       ├── stmt/
│           │       │   ├── directive/
│           │       │   │   ├── addrsig_stmt.go
│           │       │   │   ├── addrsig_sym_stmt.go
│           │       │   │   ├── ascii_stmt.go
│           │       │   │   ├── asciz_stmt.go
│           │       │   │   ├── byte_stmt.go
│           │       │   │   ├── cfi_def_cfa_offset_stmt.go
│           │       │   │   ├── cfi_endproc.go
│           │       │   │   ├── cfi_offset_stmt.go
│           │       │   │   ├── cfi_sections_stmt.go
│           │       │   │   ├── cfi_startproc_stmt.go
│           │       │   │   ├── file_number_stmt.go
│           │       │   │   ├── file_string_stmt.go
│           │       │   │   ├── global_stmt.go
│           │       │   │   ├── loc_is_stmt_stmt.go
│           │       │   │   ├── loc_number_stmt.go
│           │       │   │   ├── loc_prologue_end_stmt.go
│           │       │   │   ├── long_program_counter.go
│           │       │   │   ├── long_section_name_stmt.go
│           │       │   │   ├── p2_align_stmt.go
│           │       │   │   ├── quad_stmt.go
│           │       │   │   ├── section_identifier_number_stmt.go
│           │       │   │   ├── section_identifier_stmt.go
│           │       │   │   ├── section_stack_sizes_stmt.go
│           │       │   │   ├── section_string_number_stmt.go
│           │       │   │   ├── section_string_stmt.go
│           │       │   │   ├── set_stmt.go
│           │       │   │   ├── short_stmt.go
│           │       │   │   ├── size_stmt.go
│           │       │   │   ├── text_stmt.go
│           │       │   │   ├── type_stmt.go
│           │       │   │   ├── weak_stmt.go
│           │       │   │   ├── zero_double_number_stmt.go
│           │       │   │   └── zero_single_number_stmt.go
│           │       │   ├── instruction/
│           │       │   │   ├── ci_stmt.go
│           │       │   │   ├── ddci_stmt.go
│           │       │   │   ├── dma_rri_stmt.go
│           │       │   │   ├── drdici_stmt.go
│           │       │   │   ├── edri_stmt.go
│           │       │   │   ├── erid_stmt.go
│           │       │   │   ├── erii_stmt.go
│           │       │   │   ├── erir_stmt.go
│           │       │   │   ├── erri_stmt.go
│           │       │   │   ├── i_stmt.go
│           │       │   │   ├── nop_stmt.go
│           │       │   │   ├── r_stmt.go
│           │       │   │   ├── rci_stmt.go
│           │       │   │   ├── rici_stmt.go
│           │       │   │   ├── rir_stmt.go
│           │       │   │   ├── rirc_stmt.go
│           │       │   │   ├── rirci_stmt.go
│           │       │   │   ├── rr_stmt.go
│           │       │   │   ├── rrc_stmt.go
│           │       │   │   ├── rrci_stmt.go
│           │       │   │   ├── rri_stmt.go
│           │       │   │   ├── rric_stmt.go
│           │       │   │   ├── rrici_stmt.go
│           │       │   │   ├── rrr_stmt.go
│           │       │   │   ├── rrrc_stmt.go
│           │       │   │   ├── rrrci_stmt.go
│           │       │   │   ├── rrri_stmt.go
│           │       │   │   ├── rrrici_stmt.go
│           │       │   │   ├── s_erri_stmt.go
│           │       │   │   ├── s_r_stmt.go
│           │       │   │   ├── s_rci_stmt.go
│           │       │   │   ├── s_rirc_stmt.go
│           │       │   │   ├── s_rirci_stmt.go
│           │       │   │   ├── s_rr_stmt.go
│           │       │   │   ├── s_rrc_stmt.go
│           │       │   │   ├── s_rrci_stmt.go
│           │       │   │   ├── s_rri_stmt.go
│           │       │   │   ├── s_rric_stmt.go
│           │       │   │   ├── s_rrici_stmt.go
│           │       │   │   ├── s_rrr_stmt.go
│           │       │   │   ├── s_rrrc_stmt.go
│           │       │   │   ├── s_rrrci_stmt.go
│           │       │   │   ├── s_rrri_stmt.go
│           │       │   │   └── s_rrrici_stmt.go
│           │       │   ├── label_stmt.go
│           │       │   ├── stmt.go
│           │       │   └── sugar/
│           │       │       ├── bkp_stmt.go
│           │       │       ├── boot_ri_stmt.go
│           │       │       ├── call_ri_stmt.go
│           │       │       ├── call_rr_stmt.go
│           │       │       ├── div_step_drdi_stmt.go
│           │       │       ├── jeq_rii_stmt.go
│           │       │       ├── jeq_rri_stmt.go
│           │       │       ├── jnz_ri_stmt.go
│           │       │       ├── jump_i_stmt.go
│           │       │       ├── jump_r_stmt.go
│           │       │       ├── lbs_rri_stmt.go
│           │       │       ├── lbs_s_rri_stmt.go
│           │       │       ├── ld_dri_stmt.go
│           │       │       ├── movd_dd_stmt.go
│           │       │       ├── move_ri_stmt.go
│           │       │       ├── move_rici_stmt.go
│           │       │       ├── move_s_ri_stmt.go
│           │       │       ├── move_s_rici_stmt.go
│           │       │       ├── sb_id_ri_stmt.go
│           │       │       ├── sb_id_rii_stmt.go
│           │       │       ├── sb_rir_stmt.go
│           │       │       ├── sd_rid_stmt.go
│           │       │       ├── stop_stmt.go
│           │       │       └── time_cfg_r_stmt.go
│           │       ├── table.go
│           │       └── walker.go
│           ├── main.go
│           ├── misc/
│           │   ├── command_line_option.go
│           │   ├── command_line_parser.go
│           │   ├── command_line_validator.go
│           │   ├── config_loader.go
│           │   ├── config_validator.go
│           │   ├── file_dumper.go
│           │   ├── file_scanner.go
│           │   └── stat_factory.go
│           └── simulator/
│               ├── channel/
│               │   ├── channel.go
│               │   ├── channel_message.go
│               │   └── channel_message_q.go
│               ├── cycle_job.go
│               ├── dpu/
│               │   ├── dpu.go
│               │   ├── dram/
│               │   │   ├── dma_command.go
│               │   │   ├── dma_command_q.go
│               │   │   ├── memory_command.go
│               │   │   ├── memory_command_q.go
│               │   │   ├── memory_controller.go
│               │   │   ├── memory_scheduler.go
│               │   │   ├── mram.go
│               │   │   ├── row_buffer.go
│               │   │   └── wordline.go
│               │   ├── logic/
│               │   │   ├── alu.go
│               │   │   ├── cycle_rule.go
│               │   │   ├── dma.go
│               │   │   ├── instruction_q.go
│               │   │   ├── logic.go
│               │   │   ├── operand_collector.go
│               │   │   ├── pipeline.go
│               │   │   ├── reg_set.go
│               │   │   ├── thread.go
│               │   │   ├── thread_q.go
│               │   │   └── thread_scheduler.go
│               │   ├── reg/
│               │   │   ├── condition_reg.go
│               │   │   ├── exception_reg.go
│               │   │   ├── flag_reg.go
│               │   │   ├── gp_reg.go
│               │   │   ├── pc_reg.go
│               │   │   ├── reg_file.go
│               │   │   └── sp_reg.go
│               │   └── sram/
│               │       ├── atomic.go
│               │       ├── iram.go
│               │       ├── lock.go
│               │       └── wram.go
│               ├── host/
│               │   ├── channel_transfer_read_job.go
│               │   ├── channel_transfer_write_job.go
│               │   ├── chunk.go
│               │   ├── cycle_job.go
│               │   ├── dma_transfer_to_atomic_job.go
│               │   ├── dma_transfer_to_iram_job.go
│               │   ├── dma_transfer_to_mram_job.go
│               │   ├── dma_transfer_to_wram_job.go
│               │   └── host.go
│               ├── rank/
│               │   └── rank.go
│               └── simulator.go
├── golang_vm/
│   ├── README.md
│   └── uPIMulator/
│       ├── benchmark/
│       │   ├── BS/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   ├── bs_omp.c
│       │   │   │   │   └── timer.h
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       ├── binary_search.cu
│       │   │   │       ├── binary_search.h
│       │   │   │       ├── cpu_lib.py
│       │   │   │       ├── cu_lib_import.py
│       │   │   │       └── run.py
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── CMakeLists.txt
│       │   ├── GEMV/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── _BL_10.conf
│       │   │   ├── _NR_TASKLETS_10_BL_10.conf
│       │   │   ├── _NR_TASKLETS_11_BL_10.conf
│       │   │   ├── _NR_TASKLETS_12_BL_10.conf
│       │   │   ├── _NR_TASKLETS_13_BL_10.conf
│       │   │   ├── _NR_TASKLETS_14_BL_10.conf
│       │   │   ├── _NR_TASKLETS_15_BL_10.conf
│       │   │   ├── _NR_TASKLETS_16
│       │   │   ├── _NR_TASKLETS_16_BL_10.conf
│       │   │   ├── _NR_TASKLETS_17_BL_10.conf
│       │   │   ├── _NR_TASKLETS_18_BL_10.conf
│       │   │   ├── _NR_TASKLETS_19_BL_10.conf
│       │   │   ├── _NR_TASKLETS_1_BL_10.conf
│       │   │   ├── _NR_TASKLETS_20_BL_10.conf
│       │   │   ├── _NR_TASKLETS_21_BL_10.conf
│       │   │   ├── _NR_TASKLETS_22_BL_10.conf
│       │   │   ├── _NR_TASKLETS_23_BL_10.conf
│       │   │   ├── _NR_TASKLETS_24_BL_10.conf
│       │   │   ├── _NR_TASKLETS_2_BL_10.conf
│       │   │   ├── _NR_TASKLETS_3_BL_10.conf
│       │   │   ├── _NR_TASKLETS_4_BL_10.conf
│       │   │   ├── _NR_TASKLETS_5_BL_10.conf
│       │   │   ├── _NR_TASKLETS_6_BL_10.conf
│       │   │   ├── _NR_TASKLETS_7_BL_10.conf
│       │   │   ├── _NR_TASKLETS_8_BL_10.conf
│       │   │   ├── _NR_TASKLETS_9_BL_10.conf
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   ├── gemv_openmp.c
│       │   │   │   │   └── gemv_utils.h
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       └── gemv.cu
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── HST-L/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   ├── input/
│       │   │   │   └── image_VanHateren.iml
│       │   │   ├── run.sh
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── HST-S/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── app_baseline.c
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       ├── input/
│       │   │   │       │   └── image_VanHateren.iml
│       │   │   │       ├── kernel.cpp
│       │   │   │       ├── kernel.cu
│       │   │   │       ├── kernel.h
│       │   │   │       ├── main.cpp
│       │   │   │       └── support/
│       │   │   │           ├── common.h
│       │   │   │           ├── cuda-setup.h
│       │   │   │           ├── partitioner.h
│       │   │   │           ├── timer.h
│       │   │   │           └── verify.h
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   ├── input/
│       │   │   │   └── image_VanHateren.iml
│       │   │   ├── run.sh
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── MLP/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── _BL_10.conf
│       │   │   ├── _NR_TASKLETS_10_BL_10.conf
│       │   │   ├── _NR_TASKLETS_11_BL_10.conf
│       │   │   ├── _NR_TASKLETS_12_BL_10.conf
│       │   │   ├── _NR_TASKLETS_13_BL_10.conf
│       │   │   ├── _NR_TASKLETS_14_BL_10.conf
│       │   │   ├── _NR_TASKLETS_15_BL_10.conf
│       │   │   ├── _NR_TASKLETS_16
│       │   │   ├── _NR_TASKLETS_16_BL_10.conf
│       │   │   ├── _NR_TASKLETS_17_BL_10.conf
│       │   │   ├── _NR_TASKLETS_18_BL_10.conf
│       │   │   ├── _NR_TASKLETS_19_BL_10.conf
│       │   │   ├── _NR_TASKLETS_1_BL_10.conf
│       │   │   ├── _NR_TASKLETS_20_BL_10.conf
│       │   │   ├── _NR_TASKLETS_21_BL_10.conf
│       │   │   ├── _NR_TASKLETS_22_BL_10.conf
│       │   │   ├── _NR_TASKLETS_23_BL_10.conf
│       │   │   ├── _NR_TASKLETS_24_BL_10.conf
│       │   │   ├── _NR_TASKLETS_2_BL_10.conf
│       │   │   ├── _NR_TASKLETS_3_BL_10.conf
│       │   │   ├── _NR_TASKLETS_4_BL_10.conf
│       │   │   ├── _NR_TASKLETS_5_BL_10.conf
│       │   │   ├── _NR_TASKLETS_6_BL_10.conf
│       │   │   ├── _NR_TASKLETS_7_BL_10.conf
│       │   │   ├── _NR_TASKLETS_8_BL_10.conf
│       │   │   ├── _NR_TASKLETS_9_BL_10.conf
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── mlp_openmp.c
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       └── mlp.cu
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── RED/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── app_baseline.cpp
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       └── app_baseline.cu
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   ├── run.sh
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── cyclecount.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── SCAN-RSS/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── app_baseline.cpp
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       └── app_baseline.cu
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   ├── run.sh
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── SCAN-SSA/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   ├── run.sh
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── SEL/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── app_baseline.c
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       ├── ds.h
│       │   │   │       ├── kernel.cu
│       │   │   │       └── select.cu
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── TRNS/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   ├── kernel.cpp
│       │   │   │   │   ├── kernel.h
│       │   │   │   │   ├── main.cpp
│       │   │   │   │   └── support/
│       │   │   │   │       ├── common.h
│       │   │   │   │       ├── setup.h
│       │   │   │   │       ├── timer.h
│       │   │   │   │       └── verify.h
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       ├── kernel.cu
│       │   │   │       ├── kernel.h
│       │   │   │       ├── main.cpp
│       │   │   │       └── support/
│       │   │   │           ├── common.h
│       │   │   │           ├── cuda-setup.h
│       │   │   │           ├── timer.h
│       │   │   │           └── verify.h
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── TS/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── UNI/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── app_baseline.c
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       ├── ds.h
│       │   │   │       ├── kernel.cu
│       │   │   │       └── unique.cu
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── VA/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── app_baseline.c
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       └── vec_add.cu
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── VA_SIMPLE/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   └── host/
│       │   │       ├── CMakeLists.txt
│       │   │       └── app.c
│       │   └── build.py
│       ├── docker/
│       │   └── Dockerfile
│       ├── go.mod
│       ├── script/
│       │   ├── build.py
│       │   └── format.py
│       ├── sdk/
│       │   ├── CMakeLists.txt
│       │   ├── build.py
│       │   ├── misc/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── accessMramFromDpu.c
│       │   │   ├── coreDump.c
│       │   │   ├── crt0.c
│       │   │   ├── dpu.lds
│       │   │   ├── internalStateReset.c
│       │   │   ├── linkerScript.lds
│       │   │   ├── restoreRegisters.c
│       │   │   └── restore_carry_and_zero_flag.h
│       │   ├── stdlib/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── abort.c
│       │   │   ├── assert.h
│       │   │   ├── atoi.c
│       │   │   ├── atol.c
│       │   │   ├── ctype.h
│       │   │   ├── errno.c
│       │   │   ├── errno.h
│       │   │   ├── exit.c
│       │   │   ├── inttypes.h
│       │   │   ├── iso646.h
│       │   │   ├── limits.h
│       │   │   ├── memchr.c
│       │   │   ├── memcmp.c
│       │   │   ├── memcpy.c
│       │   │   ├── memmove.c
│       │   │   ├── memmram_utils.h
│       │   │   ├── memset.c
│       │   │   ├── stdalign.h
│       │   │   ├── stdarg.h
│       │   │   ├── stdbool.h
│       │   │   ├── stddef.h
│       │   │   ├── stdint.h
│       │   │   ├── stdio.c
│       │   │   ├── stdio.h
│       │   │   ├── stdlib.h
│       │   │   ├── stdnoreturn.h
│       │   │   ├── stpcpy.c
│       │   │   ├── stpncpy.c
│       │   │   ├── strcat.c
│       │   │   ├── strchr.c
│       │   │   ├── strcmp.c
│       │   │   ├── strcpy.c
│       │   │   ├── strcspn.c
│       │   │   ├── strdup.c
│       │   │   ├── strerror.c
│       │   │   ├── string.h
│       │   │   ├── strlen.c
│       │   │   ├── strlwr.c
│       │   │   ├── strncat.c
│       │   │   ├── strncmp.c
│       │   │   ├── strncpy.c
│       │   │   ├── strndup.c
│       │   │   ├── strnlen.c
│       │   │   ├── strpbrk.c
│       │   │   ├── strrchr.c
│       │   │   ├── strrev.c
│       │   │   ├── strsep.c
│       │   │   ├── strspn.c
│       │   │   ├── strstr.c
│       │   │   ├── strtok_r.c
│       │   │   ├── strtol.c
│       │   │   └── strupr.c
│       │   └── syslib/
│       │       ├── CMakeLists.txt
│       │       ├── absvdi2.c
│       │       ├── absvsi2.c
│       │       ├── adddf3.c
│       │       ├── addsf3.c
│       │       ├── addvdi3.c
│       │       ├── addvsi3.c
│       │       ├── alloc.c
│       │       ├── alloc.h
│       │       ├── ashldi3.c
│       │       ├── ashrdi3.c
│       │       ├── atomic_bit.h
│       │       ├── atomics.c
│       │       ├── attributes.h
│       │       ├── barrier.c
│       │       ├── barrier.h
│       │       ├── bswapdi2.c
│       │       ├── bswapsi2.c
│       │       ├── buddy_alloc.c
│       │       ├── buddy_alloc.h
│       │       ├── buddy_realloc.c
│       │       ├── built_ins.h
│       │       ├── clzdi2.c
│       │       ├── clzsi2.c
│       │       ├── cmpdi2.c
│       │       ├── comparedf2.c
│       │       ├── comparesf2.c
│       │       ├── ctzdi2.c
│       │       ├── ctzsi2.c
│       │       ├── defs.c
│       │       ├── defs.h
│       │       ├── devprivate.h
│       │       ├── div32.c
│       │       ├── divdf3.c
│       │       ├── divdi3.c
│       │       ├── divmodsi4.c
│       │       ├── divsf3.c
│       │       ├── divsi3.c
│       │       ├── dpuconst.h
│       │       ├── dpufault.h
│       │       ├── dpuruntime.h
│       │       ├── extendhfsf2.c
│       │       ├── extendsfdf2.c
│       │       ├── ffsdi2.c
│       │       ├── ffssi2.c
│       │       ├── ffsti2.c
│       │       ├── fixdfdi.c
│       │       ├── fixdfsi.c
│       │       ├── fixsfdi.c
│       │       ├── fixsfsi.c
│       │       ├── fixunsdfdi.c
│       │       ├── fixunsdfsi.c
│       │       ├── fixunssfdi.c
│       │       ├── fixunssfsi.c
│       │       ├── float.h
│       │       ├── floatdidf.c
│       │       ├── floatdisf.c
│       │       ├── floatsidf.c
│       │       ├── floatsisf.c
│       │       ├── floatundidf.c
│       │       ├── floatundisf.c
│       │       ├── floatunsidf.c
│       │       ├── floatunsisf.c
│       │       ├── fp_add_impl.inc
│       │       ├── fp_extend.h
│       │       ├── fp_extend_impl.inc
│       │       ├── fp_fixint_impl.inc
│       │       ├── fp_fixuint_impl.inc
│       │       ├── fp_lib.h
│       │       ├── fp_mul_impl.inc
│       │       ├── fp_trunc.h
│       │       ├── fp_trunc_impl.inc
│       │       ├── fsb_allocator.c
│       │       ├── fsb_allocator.h
│       │       ├── handshake.c
│       │       ├── handshake.h
│       │       ├── int_endianness.h
│       │       ├── int_lib.h
│       │       ├── int_math.h
│       │       ├── int_types.h
│       │       ├── int_util.c
│       │       ├── int_util.h
│       │       ├── listener.c
│       │       ├── lshrdi3.c
│       │       ├── macro_utils.h
│       │       ├── mcount.c
│       │       ├── moddi3.c
│       │       ├── modsi3.c
│       │       ├── mram.h
│       │       ├── mul32.c
│       │       ├── mul64.c
│       │       ├── muldc3.c
│       │       ├── muldf3.c
│       │       ├── mulodi4.c
│       │       ├── mulosi4.c
│       │       ├── mulsf3.c
│       │       ├── mulvdi3.c
│       │       ├── mulvsi3.c
│       │       ├── mutex.h
│       │       ├── negdf2.c
│       │       ├── negdi2.c
│       │       ├── negsf2.c
│       │       ├── negvdi2.c
│       │       ├── negvsi2.c
│       │       ├── paritydi2.c
│       │       ├── paritysi2.c
│       │       ├── perfcounter.c
│       │       ├── perfcounter.h
│       │       ├── popcountdi2.c
│       │       ├── popcountsi2.c
│       │       ├── powidf2.c
│       │       ├── powisf2.c
│       │       ├── profiling.c
│       │       ├── profiling.h
│       │       ├── profiling_internals.h
│       │       ├── sem.c
│       │       ├── sem.h
│       │       ├── seqread.h
│       │       ├── seqread.inc
│       │       ├── seqread1024.c
│       │       ├── seqread128.c
│       │       ├── seqread256.c
│       │       ├── seqread32.c
│       │       ├── seqread512.c
│       │       ├── seqread64.c
│       │       ├── soft_cache.c
│       │       ├── soft_cache.h
│       │       ├── subdf3.c
│       │       ├── subsf3.c
│       │       ├── subvdi3.c
│       │       ├── subvsi3.c
│       │       ├── sysdef.h
│       │       ├── truncdfhf2.c
│       │       ├── truncdfsf2.c
│       │       ├── truncsfhf2.c
│       │       ├── ucmpdi2.c
│       │       ├── udiv64.c
│       │       ├── udivdi3.c
│       │       ├── udivmodsi4.c
│       │       ├── udivsi3.c
│       │       ├── umoddi3.c
│       │       ├── umodsi3.c
│       │       └── waitqueue.c
│       └── src/
│           ├── device/
│           │   ├── abi/
│           │   │   ├── intermediate.go
│           │   │   └── word.go
│           │   ├── compiler/
│           │   │   └── compiler.go
│           │   ├── core/
│           │   │   ├── job.go
│           │   │   └── thread_pool.go
│           │   ├── linker/
│           │   │   ├── analyze_liveness_job.go
│           │   │   ├── kernel/
│           │   │   │   ├── directive/
│           │   │   │   │   ├── ascii_directive.go
│           │   │   │   │   ├── asciz_directive.go
│           │   │   │   │   ├── byte_directive.go
│           │   │   │   │   ├── long_directive.go
│           │   │   │   │   ├── quad_directive.go
│           │   │   │   │   ├── short_directive.go
│           │   │   │   │   └── zero_directive.go
│           │   │   │   ├── encodable.go
│           │   │   │   ├── executable.go
│           │   │   │   ├── instruction/
│           │   │   │   │   ├── cc/
│           │   │   │   │   │   ├── acquire_cc.go
│           │   │   │   │   │   ├── add_nz_cc.go
│           │   │   │   │   │   ├── boot_cc.go
│           │   │   │   │   │   ├── cc.go
│           │   │   │   │   │   ├── const_cc_ge0.go
│           │   │   │   │   │   ├── const_cc_geu.go
│           │   │   │   │   │   ├── const_cc_zero.go
│           │   │   │   │   │   ├── count_nz_cc.go
│           │   │   │   │   │   ├── div_cc.go
│           │   │   │   │   │   ├── div_nz_cc.go
│           │   │   │   │   │   ├── ext_sub_set_cc.go
│           │   │   │   │   │   ├── false_cc.go
│           │   │   │   │   │   ├── imm_shift_nz_cc.go
│           │   │   │   │   │   ├── log_nz_cc.go
│           │   │   │   │   │   ├── log_set_cc.go
│           │   │   │   │   │   ├── mul_nz_cc.go
│           │   │   │   │   │   ├── no_cc.go
│           │   │   │   │   │   ├── release_cc.go
│           │   │   │   │   │   ├── shift_nz_cc.go
│           │   │   │   │   │   ├── sub_nz_cc.go
│           │   │   │   │   │   ├── sub_set_cc.go
│           │   │   │   │   │   ├── true_cc.go
│           │   │   │   │   │   └── true_false_cc.go
│           │   │   │   │   ├── endian.go
│           │   │   │   │   ├── exception.go
│           │   │   │   │   ├── flag.go
│           │   │   │   │   ├── instruction.go
│           │   │   │   │   ├── op_code.go
│           │   │   │   │   ├── reg_descriptor/
│           │   │   │   │   │   ├── gp_reg_descriptor.go
│           │   │   │   │   │   ├── pair_reg_descriptor.go
│           │   │   │   │   │   ├── sp_reg_descriptor.go
│           │   │   │   │   │   └── src_reg_descriptor.go
│           │   │   │   │   └── suffix.go
│           │   │   │   ├── kernel.go
│           │   │   │   ├── label.go
│           │   │   │   ├── liveness.go
│           │   │   │   ├── relocatable.go
│           │   │   │   └── section.go
│           │   │   ├── lex_job.go
│           │   │   ├── lexer/
│           │   │   │   ├── keyword_factory.go
│           │   │   │   ├── lexer.go
│           │   │   │   ├── regex.go
│           │   │   │   ├── regex_factory.go
│           │   │   │   ├── token.go
│           │   │   │   ├── token_stream.go
│           │   │   │   └── tokenizer.go
│           │   │   ├── linker.go
│           │   │   ├── logic/
│           │   │   │   ├── instruction_assigner.go
│           │   │   │   ├── label_assigner.go
│           │   │   │   ├── linker_constant.go
│           │   │   │   ├── linker_script.go
│           │   │   │   ├── liveness_analyzer.go
│           │   │   │   └── set_assigner.go
│           │   │   ├── parse_job.go
│           │   │   └── parser/
│           │   │       ├── ast.go
│           │   │       ├── expr/
│           │   │       │   ├── binary_add_expr.go
│           │   │       │   ├── binary_sub_expr.go
│           │   │       │   ├── ci_op_code_expr.go
│           │   │       │   ├── condition_expr.go
│           │   │       │   ├── ddci_op_code_expr.go
│           │   │       │   ├── dma_rri_op_code_expr.go
│           │   │       │   ├── drdici_op_code_expr.go
│           │   │       │   ├── endian_expr.go
│           │   │       │   ├── expr.go
│           │   │       │   ├── i_op_code_expr.go
│           │   │       │   ├── jump_op_code_expr.go
│           │   │       │   ├── load_op_code_expr.go
│           │   │       │   ├── negative_number_expr.go
│           │   │       │   ├── primary_expr.go
│           │   │       │   ├── program_counter_expr.go
│           │   │       │   ├── r_op_code_expr.go
│           │   │       │   ├── rici_op_code_expr.go
│           │   │       │   ├── rr_op_code_expr.go
│           │   │       │   ├── rri_op_code_expr.go
│           │   │       │   ├── rrri_op_code_expr.go
│           │   │       │   ├── section_name_expr.go
│           │   │       │   ├── section_type_expr.go
│           │   │       │   ├── src_reg_expr.go
│           │   │       │   ├── store_op_code_expr.go
│           │   │       │   ├── suffix_expr.go
│           │   │       │   └── symbol_type.go
│           │   │       ├── parser.go
│           │   │       ├── rule.go
│           │   │       ├── stack.go
│           │   │       ├── stack_item.go
│           │   │       ├── stmt/
│           │   │       │   ├── directive/
│           │   │       │   │   ├── addrsig_stmt.go
│           │   │       │   │   ├── addrsig_sym_stmt.go
│           │   │       │   │   ├── ascii_stmt.go
│           │   │       │   │   ├── asciz_stmt.go
│           │   │       │   │   ├── byte_stmt.go
│           │   │       │   │   ├── cfi_def_cfa_offset_stmt.go
│           │   │       │   │   ├── cfi_endproc.go
│           │   │       │   │   ├── cfi_offset_stmt.go
│           │   │       │   │   ├── cfi_sections_stmt.go
│           │   │       │   │   ├── cfi_startproc_stmt.go
│           │   │       │   │   ├── file_number_stmt.go
│           │   │       │   │   ├── file_string_stmt.go
│           │   │       │   │   ├── global_stmt.go
│           │   │       │   │   ├── loc_is_stmt_stmt.go
│           │   │       │   │   ├── loc_number_stmt.go
│           │   │       │   │   ├── loc_prologue_end_stmt.go
│           │   │       │   │   ├── long_program_counter.go
│           │   │       │   │   ├── long_section_name_stmt.go
│           │   │       │   │   ├── p2_align_stmt.go
│           │   │       │   │   ├── quad_stmt.go
│           │   │       │   │   ├── section_identifier_number_stmt.go
│           │   │       │   │   ├── section_identifier_stmt.go
│           │   │       │   │   ├── section_stack_sizes_stmt.go
│           │   │       │   │   ├── section_string_number_stmt.go
│           │   │       │   │   ├── section_string_stmt.go
│           │   │       │   │   ├── set_stmt.go
│           │   │       │   │   ├── short_stmt.go
│           │   │       │   │   ├── size_stmt.go
│           │   │       │   │   ├── text_stmt.go
│           │   │       │   │   ├── type_stmt.go
│           │   │       │   │   ├── weak_stmt.go
│           │   │       │   │   ├── zero_double_number_stmt.go
│           │   │       │   │   └── zero_single_number_stmt.go
│           │   │       │   ├── instruction/
│           │   │       │   │   ├── ci_stmt.go
│           │   │       │   │   ├── ddci_stmt.go
│           │   │       │   │   ├── dma_rri_stmt.go
│           │   │       │   │   ├── drdici_stmt.go
│           │   │       │   │   ├── edri_stmt.go
│           │   │       │   │   ├── erid_stmt.go
│           │   │       │   │   ├── erii_stmt.go
│           │   │       │   │   ├── erir_stmt.go
│           │   │       │   │   ├── erri_stmt.go
│           │   │       │   │   ├── i_stmt.go
│           │   │       │   │   ├── nop_stmt.go
│           │   │       │   │   ├── r_stmt.go
│           │   │       │   │   ├── rci_stmt.go
│           │   │       │   │   ├── rici_stmt.go
│           │   │       │   │   ├── rir_stmt.go
│           │   │       │   │   ├── rirc_stmt.go
│           │   │       │   │   ├── rirci_stmt.go
│           │   │       │   │   ├── rr_stmt.go
│           │   │       │   │   ├── rrc_stmt.go
│           │   │       │   │   ├── rrci_stmt.go
│           │   │       │   │   ├── rri_stmt.go
│           │   │       │   │   ├── rric_stmt.go
│           │   │       │   │   ├── rrici_stmt.go
│           │   │       │   │   ├── rrr_stmt.go
│           │   │       │   │   ├── rrrc_stmt.go
│           │   │       │   │   ├── rrrci_stmt.go
│           │   │       │   │   ├── rrri_stmt.go
│           │   │       │   │   ├── rrrici_stmt.go
│           │   │       │   │   ├── s_erri_stmt.go
│           │   │       │   │   ├── s_r_stmt.go
│           │   │       │   │   ├── s_rci_stmt.go
│           │   │       │   │   ├── s_rirc_stmt.go
│           │   │       │   │   ├── s_rirci_stmt.go
│           │   │       │   │   ├── s_rr_stmt.go
│           │   │       │   │   ├── s_rrc_stmt.go
│           │   │       │   │   ├── s_rrci_stmt.go
│           │   │       │   │   ├── s_rri_stmt.go
│           │   │       │   │   ├── s_rric_stmt.go
│           │   │       │   │   ├── s_rrici_stmt.go
│           │   │       │   │   ├── s_rrr_stmt.go
│           │   │       │   │   ├── s_rrrc_stmt.go
│           │   │       │   │   ├── s_rrrci_stmt.go
│           │   │       │   │   ├── s_rrri_stmt.go
│           │   │       │   │   └── s_rrrici_stmt.go
│           │   │       │   ├── label_stmt.go
│           │   │       │   ├── stmt.go
│           │   │       │   └── sugar/
│           │   │       │       ├── bkp_stmt.go
│           │   │       │       ├── boot_ri_stmt.go
│           │   │       │       ├── call_ri_stmt.go
│           │   │       │       ├── call_rr_stmt.go
│           │   │       │       ├── div_step_drdi_stmt.go
│           │   │       │       ├── jeq_rii_stmt.go
│           │   │       │       ├── jeq_rri_stmt.go
│           │   │       │       ├── jnz_ri_stmt.go
│           │   │       │       ├── jump_i_stmt.go
│           │   │       │       ├── jump_r_stmt.go
│           │   │       │       ├── lbs_rri_stmt.go
│           │   │       │       ├── lbs_s_rri_stmt.go
│           │   │       │       ├── ld_dri_stmt.go
│           │   │       │       ├── movd_dd_stmt.go
│           │   │       │       ├── move_ri_stmt.go
│           │   │       │       ├── move_rici_stmt.go
│           │   │       │       ├── move_s_ri_stmt.go
│           │   │       │       ├── move_s_rici_stmt.go
│           │   │       │       ├── sb_id_ri_stmt.go
│           │   │       │       ├── sb_id_rii_stmt.go
│           │   │       │       ├── sb_rir_stmt.go
│           │   │       │       ├── sd_rid_stmt.go
│           │   │       │       ├── stop_stmt.go
│           │   │       │       └── time_cfg_r_stmt.go
│           │   │       ├── table.go
│           │   │       └── walker.go
│           │   └── simulator/
│           │       ├── channel/
│           │       │   ├── channel.go
│           │       │   ├── channel_command.go
│           │       │   └── channel_command_q.go
│           │       ├── dpu/
│           │       │   ├── control_interface.go
│           │       │   ├── dpu.go
│           │       │   ├── dram/
│           │       │   │   ├── dma_command.go
│           │       │   │   ├── dma_command_q.go
│           │       │   │   ├── memory_command.go
│           │       │   │   ├── memory_command_q.go
│           │       │   │   ├── memory_controller.go
│           │       │   │   ├── memory_scheduler.go
│           │       │   │   ├── mram.go
│           │       │   │   ├── row_buffer.go
│           │       │   │   └── wordline.go
│           │       │   ├── logic/
│           │       │   │   ├── alu.go
│           │       │   │   ├── cycle_rule.go
│           │       │   │   ├── dma.go
│           │       │   │   ├── instruction_q.go
│           │       │   │   ├── logic.go
│           │       │   │   ├── operand_collector.go
│           │       │   │   ├── pipeline.go
│           │       │   │   ├── reg_set.go
│           │       │   │   ├── thread.go
│           │       │   │   ├── thread_q.go
│           │       │   │   └── thread_scheduler.go
│           │       │   ├── reg/
│           │       │   │   ├── condition_reg.go
│           │       │   │   ├── exception_reg.go
│           │       │   │   ├── flag_reg.go
│           │       │   │   ├── gp_reg.go
│           │       │   │   ├── pc_reg.go
│           │       │   │   ├── reg_file.go
│           │       │   │   └── sp_reg.go
│           │       │   └── sram/
│           │       │       ├── atomic.go
│           │       │       ├── iram.go
│           │       │       ├── lock.go
│           │       │       └── wram.go
│           │       └── rank/
│           │           ├── rank.go
│           │           ├── rank_command.go
│           │           └── rank_command_q.go
│           ├── encoding/
│           │   ├── ascii_encoder.go
│           │   └── byte_stream.go
│           ├── host/
│           │   ├── abi/
│           │   │   ├── binary.go
│           │   │   ├── bytecode.go
│           │   │   ├── label.go
│           │   │   ├── op_code.go
│           │   │   └── relocatable.go
│           │   ├── interpreter/
│           │   │   ├── codegen/
│           │   │   │   ├── codegen.go
│           │   │   │   └── type_system/
│           │   │   │       ├── method.go
│           │   │   │       ├── symbol.go
│           │   │   │       └── type_system.go
│           │   │   ├── interpreter.go
│           │   │   ├── lexer/
│           │   │   │   ├── keyword_factory.go
│           │   │   │   ├── lexer.go
│           │   │   │   ├── regex.go
│           │   │   │   ├── regex_factory.go
│           │   │   │   ├── token.go
│           │   │   │   ├── token_stream.go
│           │   │   │   └── tokenizer.go
│           │   │   └── parser/
│           │   │       ├── ast.go
│           │   │       ├── decl/
│           │   │       │   ├── decl.go
│           │   │       │   ├── func_decl.go
│           │   │       │   ├── func_def.go
│           │   │       │   └── struct_def.go
│           │   │       ├── directive/
│           │   │       │   ├── define_directive.go
│           │   │       │   ├── directive.go
│           │   │       │   └── include_directive.go
│           │   │       ├── expr/
│           │   │       │   ├── additive_expr.go
│           │   │       │   ├── arg_list.go
│           │   │       │   ├── assignment_expr.go
│           │   │       │   ├── bitwise_and_expr.go
│           │   │       │   ├── bitwise_or_expr.go
│           │   │       │   ├── bitwise_xor_expr.go
│           │   │       │   ├── conditional_expr.go
│           │   │       │   ├── equality_expr.go
│           │   │       │   ├── expr.go
│           │   │       │   ├── logical_and_expr.go
│           │   │       │   ├── logical_or_expr.go
│           │   │       │   ├── multiplicative_expr.go
│           │   │       │   ├── postfix_expr.go
│           │   │       │   ├── primary_expr.go
│           │   │       │   ├── relational_expr.go
│           │   │       │   ├── shift_expr.go
│           │   │       │   └── unary_expr.go
│           │   │       ├── param_list/
│           │   │       │   ├── param.go
│           │   │       │   └── param_list.go
│           │   │       ├── parser.go
│           │   │       ├── rule.go
│           │   │       ├── stack.go
│           │   │       ├── stack_item.go
│           │   │       ├── stmt/
│           │   │       │   ├── block_stmt.go
│           │   │       │   ├── break_stmt.go
│           │   │       │   ├── continue_stmt.go
│           │   │       │   ├── dpu_foreach_stmt.go
│           │   │       │   ├── empty_stmt.go
│           │   │       │   ├── expr_stmt.go
│           │   │       │   ├── for_stmt.go
│           │   │       │   ├── if_stmt.go
│           │   │       │   ├── return_stmt.go
│           │   │       │   ├── stmt.go
│           │   │       │   ├── var_decl_init_stmt.go
│           │   │       │   ├── var_decl_stmt.go
│           │   │       │   └── while_stmt.go
│           │   │       ├── table.go
│           │   │       └── type_specifier/
│           │   │           └── type_specifier.go
│           │   └── vm/
│           │       ├── arena/
│           │       │   ├── arena.go
│           │       │   ├── garbage_collector.go
│           │       │   ├── memory.go
│           │       │   └── pool.go
│           │       ├── bank_cycle_job.go
│           │       ├── base/
│           │       │   └── object.go
│           │       ├── dpu_compute_cycle_job.go
│           │       ├── dpu_cycle_job.go
│           │       ├── dpu_load_job.go
│           │       ├── dram/
│           │       │   ├── bank/
│           │       │   │   ├── array.go
│           │       │   │   ├── bank.go
│           │       │   │   ├── dma_command.go
│           │       │   │   ├── dma_command_q.go
│           │       │   │   ├── memory_command.go
│           │       │   │   ├── memory_command_q.go
│           │       │   │   ├── row_buffer.go
│           │       │   │   ├── segment.go
│           │       │   │   ├── transfer_command.go
│           │       │   │   ├── transfer_command_q.go
│           │       │   │   └── wordline.go
│           │       │   ├── channel/
│           │       │   │   ├── channel.go
│           │       │   │   ├── channel_command.go
│           │       │   │   └── channel_command_q.go
│           │       │   ├── memory_controller.go
│           │       │   ├── memory_mapping.go
│           │       │   ├── memory_scheduler.go
│           │       │   └── rank/
│           │       │       ├── rank.go
│           │       │       ├── rank_command.go
│           │       │       └── rank_command_q.go
│           │       ├── frame/
│           │       │   ├── frame.go
│           │       │   └── frame_chain.go
│           │       ├── pc/
│           │       │   └── pc.go
│           │       ├── stack/
│           │       │   ├── return_stack.go
│           │       │   ├── stack.go
│           │       │   └── stack_item.go
│           │       ├── symbol/
│           │       │   ├── scope.go
│           │       │   ├── scope_chain.go
│           │       │   └── symbol.go
│           │       ├── type_system/
│           │       │   ├── field.go
│           │       │   ├── registry.go
│           │       │   ├── skeleton.go
│           │       │   └── type_variable.go
│           │       └── virtual_machine.go
│           ├── main.go
│           ├── misc/
│           │   ├── command_line_option.go
│           │   ├── command_line_parser.go
│           │   ├── command_line_validator.go
│           │   ├── config_loader.go
│           │   ├── config_validator.go
│           │   ├── file_dumper.go
│           │   ├── file_scanner.go
│           │   └── stat_factory.go
│           ├── program/
│           │   ├── app.go
│           │   └── task.go
│           └── system/
│               └── system.go
├── python_cpp/
│   ├── README.md
│   ├── uPIMulator_backend/
│   │   ├── CMakeLists.txt
│   │   ├── script/
│   │   │   ├── build.sh
│   │   │   ├── format.sh
│   │   │   ├── run.sh
│   │   │   └── run_serial.sh
│   │   └── src/
│   │       ├── CMakeLists.txt
│   │       ├── abi/
│   │       │   ├── cc/
│   │       │   │   ├── _base_cc.cc
│   │       │   │   ├── _base_cc.h
│   │       │   │   ├── acquire_cc.h
│   │       │   │   ├── add_nz_cc.h
│   │       │   │   ├── boot_cc.h
│   │       │   │   ├── const_cc_ge0.h
│   │       │   │   ├── const_cc_geu.h
│   │       │   │   ├── const_cc_zero.h
│   │       │   │   ├── count_nz_cc.h
│   │       │   │   ├── div_cc.h
│   │       │   │   ├── div_nz_cc.h
│   │       │   │   ├── ext_sub_set_cc.h
│   │       │   │   ├── false_cc.h
│   │       │   │   ├── imm_shift_nz_cc.h
│   │       │   │   ├── log_nz_cc.h
│   │       │   │   ├── log_set_cc.h
│   │       │   │   ├── mul_nz_cc.h
│   │       │   │   ├── no_cc.h
│   │       │   │   ├── release_cc.h
│   │       │   │   ├── shift_nz_cc.h
│   │       │   │   ├── sub_nz_cc.h
│   │       │   │   ├── sub_set_cc.h
│   │       │   │   ├── true_cc.h
│   │       │   │   └── true_false_cc.h
│   │       │   ├── instruction/
│   │       │   │   ├── instruction.cc
│   │       │   │   ├── instruction.h
│   │       │   │   ├── op_code.h
│   │       │   │   └── suffix.h
│   │       │   ├── isa/
│   │       │   │   ├── condition.h
│   │       │   │   ├── endian.h
│   │       │   │   ├── exception.h
│   │       │   │   └── flag.h
│   │       │   ├── reg/
│   │       │   │   ├── gp_reg.h
│   │       │   │   ├── pair_reg.cc
│   │       │   │   ├── pair_reg.h
│   │       │   │   ├── sp_reg.h
│   │       │   │   ├── src_reg.cc
│   │       │   │   └── src_reg.h
│   │       │   └── word/
│   │       │       ├── _base_word.cc
│   │       │       ├── _base_word.h
│   │       │       ├── data_address_word.h
│   │       │       ├── data_word.h
│   │       │       ├── immediate.h
│   │       │       ├── instruction_address_word.h
│   │       │       ├── instruction_word.h
│   │       │       └── representation.h
│   │       ├── converter/
│   │       │   ├── condition_converter.cc
│   │       │   ├── condition_converter.h
│   │       │   ├── endian_converter.cc
│   │       │   ├── endian_converter.h
│   │       │   ├── flag_converter.cc
│   │       │   ├── flag_converter.h
│   │       │   ├── instruction_converter.cc
│   │       │   ├── instruction_converter.h
│   │       │   ├── op_code_converter.cc
│   │       │   ├── op_code_converter.h
│   │       │   ├── reg_converter.cc
│   │       │   ├── reg_converter.h
│   │       │   ├── reg_file_converter.cc
│   │       │   ├── reg_file_converter.h
│   │       │   ├── suffix_converter.cc
│   │       │   └── suffix_converter.h
│   │       ├── encoder/
│   │       │   ├── byte.h
│   │       │   ├── byte_stream.cc
│   │       │   ├── byte_stream.h
│   │       │   ├── instruction_encoder.cc
│   │       │   └── instruction_encoder.h
│   │       ├── initializer/
│   │       │   ├── int_initializer.cc
│   │       │   ├── int_initializer.h
│   │       │   ├── str_initializer.h
│   │       │   └── str_initialzier.cc
│   │       ├── main.cc
│   │       ├── main.h
│   │       ├── simulator/
│   │       │   ├── basic/
│   │       │   │   ├── queue.h
│   │       │   │   └── timer_queue.h
│   │       │   ├── cpu/
│   │       │   │   ├── cpu.cc
│   │       │   │   ├── cpu.h
│   │       │   │   ├── fini_thread.cc
│   │       │   │   ├── fini_thread.h
│   │       │   │   ├── init_thread.cc
│   │       │   │   ├── init_thread.h
│   │       │   │   ├── sched_thread.cc
│   │       │   │   ├── sched_thread.h
│   │       │   │   ├── thread.cc
│   │       │   │   └── thread.h
│   │       │   ├── dpu/
│   │       │   │   ├── alu.cc
│   │       │   │   ├── alu.h
│   │       │   │   ├── cycle_rule.cc
│   │       │   │   ├── cycle_rule.h
│   │       │   │   ├── dma.cc
│   │       │   │   ├── dma.h
│   │       │   │   ├── dma_command.cc
│   │       │   │   ├── dma_command.h
│   │       │   │   ├── dpu.cc
│   │       │   │   ├── dpu.h
│   │       │   │   ├── logic.cc
│   │       │   │   ├── logic.h
│   │       │   │   ├── operand_collector.cc
│   │       │   │   ├── operand_collector.h
│   │       │   │   ├── pipeline.cc
│   │       │   │   ├── pipeline.h
│   │       │   │   ├── revolver_scheduler.cc
│   │       │   │   ├── revolver_scheduler.h
│   │       │   │   ├── thread.cc
│   │       │   │   └── thread.h
│   │       │   ├── dram/
│   │       │   │   ├── fifo_scheduler.cc
│   │       │   │   ├── fifo_scheduler.h
│   │       │   │   ├── frfcfs_scheduler.cc
│   │       │   │   ├── frfcfs_scheduler.h
│   │       │   │   ├── memory_command.cc
│   │       │   │   ├── memory_command.h
│   │       │   │   ├── memory_controller.cc
│   │       │   │   ├── memory_controller.h
│   │       │   │   ├── mram.cc
│   │       │   │   ├── mram.h
│   │       │   │   ├── row_buffer.cc
│   │       │   │   ├── row_buffer.h
│   │       │   │   ├── scheduler.cc
│   │       │   │   ├── scheduler.h
│   │       │   │   ├── wordline.cc
│   │       │   │   └── wordline.h
│   │       │   ├── rank/
│   │       │   │   ├── rank.cc
│   │       │   │   ├── rank.h
│   │       │   │   └── rank_message.h
│   │       │   ├── reg/
│   │       │   │   ├── condition_reg.cc
│   │       │   │   ├── condition_reg.h
│   │       │   │   ├── exception_reg.h
│   │       │   │   ├── flag_reg.h
│   │       │   │   ├── gp_reg.cc
│   │       │   │   ├── gp_reg.h
│   │       │   │   ├── pc_reg.h
│   │       │   │   ├── reg_file.cc
│   │       │   │   ├── reg_file.h
│   │       │   │   ├── sp_reg.cc
│   │       │   │   └── sp_reg.h
│   │       │   ├── sram/
│   │       │   │   ├── atomic.cc
│   │       │   │   ├── atomic.h
│   │       │   │   ├── iram.cc
│   │       │   │   ├── iram.h
│   │       │   │   ├── lock.cc
│   │       │   │   ├── lock.h
│   │       │   │   ├── wram.cc
│   │       │   │   └── wram.h
│   │       │   ├── system.cc
│   │       │   └── system.h
│   │       └── util/
│   │           ├── argument_parser.cc
│   │           ├── argument_parser.h
│   │           ├── config_loader.h
│   │           ├── stat_factory.cc
│   │           └── stat_factory.h
│   └── uPIMulator_frontend/
│       ├── .flake8
│       ├── .hadolint.yaml
│       ├── .isort.cfg
│       ├── .markdownlint.yaml
│       ├── .shellcheckrc
│       ├── benchmark/
│       │   ├── Arithmetic-Throughput/
│       │   │   ├── Makefile
│       │   │   ├── dpu/
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   ├── run.sh
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── cyclecount.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── BFS/
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── app.c
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       └── app.cu
│       │   │   ├── data/
│       │   │   │   └── loc-gowalla_edges.txt
│       │   │   ├── dpu/
│       │   │   │   ├── dpu-utils.h
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   ├── app.c
│       │   │   │   └── mram-management.h
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── graph.h
│       │   │       ├── params.h
│       │   │       ├── timer.h
│       │   │       └── utils.h
│       │   ├── BS/
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   ├── bs_omp.c
│       │   │   │   │   └── timer.h
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       ├── binary_search.cu
│       │   │   │       ├── binary_search.h
│       │   │   │       ├── cpu_lib.py
│       │   │   │       ├── cu_lib_import.py
│       │   │   │       └── run.py
│       │   │   ├── dpu/
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── CPU-DPU/
│       │   │   ├── Makefile
│       │   │   ├── dpu/
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   ├── run.sh
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── GEMV/
│       │   │   ├── Makefile
│       │   │   ├── _BL_10.conf
│       │   │   ├── _NR_TASKLETS_10_BL_10.conf
│       │   │   ├── _NR_TASKLETS_11_BL_10.conf
│       │   │   ├── _NR_TASKLETS_12_BL_10.conf
│       │   │   ├── _NR_TASKLETS_13_BL_10.conf
│       │   │   ├── _NR_TASKLETS_14_BL_10.conf
│       │   │   ├── _NR_TASKLETS_15_BL_10.conf
│       │   │   ├── _NR_TASKLETS_16
│       │   │   ├── _NR_TASKLETS_16_BL_10.conf
│       │   │   ├── _NR_TASKLETS_17_BL_10.conf
│       │   │   ├── _NR_TASKLETS_18_BL_10.conf
│       │   │   ├── _NR_TASKLETS_19_BL_10.conf
│       │   │   ├── _NR_TASKLETS_1_BL_10.conf
│       │   │   ├── _NR_TASKLETS_20_BL_10.conf
│       │   │   ├── _NR_TASKLETS_21_BL_10.conf
│       │   │   ├── _NR_TASKLETS_22_BL_10.conf
│       │   │   ├── _NR_TASKLETS_23_BL_10.conf
│       │   │   ├── _NR_TASKLETS_24_BL_10.conf
│       │   │   ├── _NR_TASKLETS_2_BL_10.conf
│       │   │   ├── _NR_TASKLETS_3_BL_10.conf
│       │   │   ├── _NR_TASKLETS_4_BL_10.conf
│       │   │   ├── _NR_TASKLETS_5_BL_10.conf
│       │   │   ├── _NR_TASKLETS_6_BL_10.conf
│       │   │   ├── _NR_TASKLETS_7_BL_10.conf
│       │   │   ├── _NR_TASKLETS_8_BL_10.conf
│       │   │   ├── _NR_TASKLETS_9_BL_10.conf
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   ├── gemv_openmp.c
│       │   │   │   │   └── gemv_utils.h
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       └── gemv.cu
│       │   │   ├── dpu/
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── HST-L/
│       │   │   ├── Makefile
│       │   │   ├── dpu/
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   ├── input/
│       │   │   │   └── image_VanHateren.iml
│       │   │   ├── run.sh
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── HST-S/
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── app_baseline.c
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       ├── input/
│       │   │   │       │   └── image_VanHateren.iml
│       │   │   │       ├── kernel.cpp
│       │   │   │       ├── kernel.cu
│       │   │   │       ├── kernel.h
│       │   │   │       ├── main.cpp
│       │   │   │       └── support/
│       │   │   │           ├── common.h
│       │   │   │           ├── cuda-setup.h
│       │   │   │           ├── partitioner.h
│       │   │   │           ├── timer.h
│       │   │   │           └── verify.h
│       │   │   ├── dpu/
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   ├── input/
│       │   │   │   └── image_VanHateren.iml
│       │   │   ├── run.sh
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── MLP/
│       │   │   ├── Makefile
│       │   │   ├── _BL_10.conf
│       │   │   ├── _NR_TASKLETS_10_BL_10.conf
│       │   │   ├── _NR_TASKLETS_11_BL_10.conf
│       │   │   ├── _NR_TASKLETS_12_BL_10.conf
│       │   │   ├── _NR_TASKLETS_13_BL_10.conf
│       │   │   ├── _NR_TASKLETS_14_BL_10.conf
│       │   │   ├── _NR_TASKLETS_15_BL_10.conf
│       │   │   ├── _NR_TASKLETS_16
│       │   │   ├── _NR_TASKLETS_16_BL_10.conf
│       │   │   ├── _NR_TASKLETS_17_BL_10.conf
│       │   │   ├── _NR_TASKLETS_18_BL_10.conf
│       │   │   ├── _NR_TASKLETS_19_BL_10.conf
│       │   │   ├── _NR_TASKLETS_1_BL_10.conf
│       │   │   ├── _NR_TASKLETS_20_BL_10.conf
│       │   │   ├── _NR_TASKLETS_21_BL_10.conf
│       │   │   ├── _NR_TASKLETS_22_BL_10.conf
│       │   │   ├── _NR_TASKLETS_23_BL_10.conf
│       │   │   ├── _NR_TASKLETS_24_BL_10.conf
│       │   │   ├── _NR_TASKLETS_2_BL_10.conf
│       │   │   ├── _NR_TASKLETS_3_BL_10.conf
│       │   │   ├── _NR_TASKLETS_4_BL_10.conf
│       │   │   ├── _NR_TASKLETS_5_BL_10.conf
│       │   │   ├── _NR_TASKLETS_6_BL_10.conf
│       │   │   ├── _NR_TASKLETS_7_BL_10.conf
│       │   │   ├── _NR_TASKLETS_8_BL_10.conf
│       │   │   ├── _NR_TASKLETS_9_BL_10.conf
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── mlp_openmp.c
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       └── mlp.cu
│       │   │   ├── dpu/
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── MRAM-Latency/
│       │   │   ├── Makefile
│       │   │   ├── dpu/
│       │   │   │   └── copy.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   ├── run.sh
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── cyclecount.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── NW/
│       │   │   ├── .conf
│       │   │   ├── Makefile
│       │   │   ├── _NR_TASKLETS_10_BL_1024
│       │   │   ├── _NR_TASKLETS_11_BL_1024
│       │   │   ├── _NR_TASKLETS_12_BL_1024
│       │   │   ├── _NR_TASKLETS_13_BL_1024
│       │   │   ├── _NR_TASKLETS_14_BL_1024
│       │   │   ├── _NR_TASKLETS_15_BL_1024
│       │   │   ├── _NR_TASKLETS_16_BL_1024
│       │   │   ├── _NR_TASKLETS_17_BL_1024
│       │   │   ├── _NR_TASKLETS_18_BL_1024
│       │   │   ├── _NR_TASKLETS_19_BL_1024
│       │   │   ├── _NR_TASKLETS_1_BL_1024
│       │   │   ├── _NR_TASKLETS_20_BL_1024
│       │   │   ├── _NR_TASKLETS_21_BL_1024
│       │   │   ├── _NR_TASKLETS_22_BL_1024
│       │   │   ├── _NR_TASKLETS_23_BL_1024
│       │   │   ├── _NR_TASKLETS_24_BL_1024
│       │   │   ├── _NR_TASKLETS_2_BL_1024
│       │   │   ├── _NR_TASKLETS_3_BL_1024
│       │   │   ├── _NR_TASKLETS_4_BL_1024
│       │   │   ├── _NR_TASKLETS_5_BL_1024
│       │   │   ├── _NR_TASKLETS_6_BL_1024
│       │   │   ├── _NR_TASKLETS_7_BL_1024
│       │   │   ├── _NR_TASKLETS_8_BL_1024
│       │   │   ├── _NR_TASKLETS_9_BL_1024
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   ├── needle.cpp
│       │   │   │   │   ├── run
│       │   │   │   │   └── run_offload
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── Makefile_nvidia
│       │   │   │       ├── README
│       │   │   │       ├── common/
│       │   │   │       │   ├── common.mk
│       │   │   │       │   └── make.config
│       │   │   │       ├── needle.cu
│       │   │   │       ├── needle.h
│       │   │   │       ├── needle_kernel.cu
│       │   │   │       ├── run
│       │   │   │       └── timing.h
│       │   │   ├── dpu/
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── Operational-Intensity/
│       │   │   ├── Makefile
│       │   │   ├── dpu/
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   ├── run.sh
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── cyclecount.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── RED/
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── app_baseline.cpp
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       └── app_baseline.cu
│       │   │   ├── dpu/
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   ├── run.sh
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── cyclecount.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── Random-GUPS/
│       │   │   ├── Makefile
│       │   │   ├── dpu/
│       │   │   │   └── gups.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   ├── run.sh
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── cyclecount.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── SCAN-RSS/
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── app_baseline.cpp
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       └── app_baseline.cu
│       │   │   ├── dpu/
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   ├── run.sh
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── SCAN-SSA/
│       │   │   ├── Makefile
│       │   │   ├── dpu/
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   ├── run.sh
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── SEL/
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── app_baseline.c
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       ├── ds.h
│       │   │   │       ├── kernel.cu
│       │   │   │       └── select.cu
│       │   │   ├── dpu/
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── STREAM/
│       │   │   ├── Makefile
│       │   │   ├── dpu/
│       │   │   │   ├── add.c
│       │   │   │   ├── copy.c
│       │   │   │   ├── copyw.c
│       │   │   │   ├── scale.c
│       │   │   │   └── triad.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   ├── run.sh
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── cyclecount.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── STRIDED/
│       │   │   ├── Makefile
│       │   │   ├── dpu/
│       │   │   │   └── strided.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   ├── run.sh
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── cyclecount.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── SpMV/
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── app.c
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       └── app.cu
│       │   │   ├── data/
│       │   │   │   ├── bcsstk30.mtx
│       │   │   │   └── generate/
│       │   │   │       ├── Makefile
│       │   │   │       ├── generate.sh
│       │   │   │       └── replicate.c
│       │   │   ├── dpu/
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   ├── app.c
│       │   │   │   └── mram-management.h
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── matrix.h
│       │   │       ├── params.h
│       │   │       ├── timer.h
│       │   │       └── utils.h
│       │   ├── TRNS/
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   ├── kernel.cpp
│       │   │   │   │   ├── kernel.h
│       │   │   │   │   ├── main.cpp
│       │   │   │   │   └── support/
│       │   │   │   │       ├── common.h
│       │   │   │   │       ├── setup.h
│       │   │   │   │       ├── timer.h
│       │   │   │   │       └── verify.h
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       ├── kernel.cu
│       │   │   │       ├── kernel.h
│       │   │   │       ├── main.cpp
│       │   │   │       └── support/
│       │   │   │           ├── common.h
│       │   │   │           ├── cuda-setup.h
│       │   │   │           ├── timer.h
│       │   │   │           └── verify.h
│       │   │   ├── dpu/
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── TS/
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   ├── inputs/
│       │   │   │   │   │   └── randomlist33M.txt
│       │   │   │   │   ├── launch.sh
│       │   │   │   │   ├── mprofile.h
│       │   │   │   │   ├── streamp_openmp.cpp
│       │   │   │   │   └── tools.cpp
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       ├── STREAMP.cu
│       │   │   │       ├── inputs/
│       │   │   │       │   └── randomlist33M.txt
│       │   │   │       ├── launch.sh
│       │   │   │       └── randlist.py
│       │   │   ├── dpu/
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── UNI/
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── app_baseline.c
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       ├── ds.h
│       │   │   │       ├── kernel.cu
│       │   │   │       └── unique.cu
│       │   │   ├── dpu/
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── VA/
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── app_baseline.c
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       └── vec_add.cu
│       │   │   ├── dpu/
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   └── WRAM/
│       │       ├── Makefile
│       │       ├── dpu/
│       │       │   └── task.c
│       │       ├── host/
│       │       │   └── app.c
│       │       ├── run.sh
│       │       └── support/
│       │           ├── common.h
│       │           ├── cyclecount.h
│       │           ├── params.h
│       │           └── timer.h
│       ├── docker/
│       │   ├── compiler.dockerfile
│       │   └── parser.dockerfile
│       ├── pyproject.toml
│       ├── requirements.txt
│       ├── sdk/
│       │   ├── misc/
│       │   │   ├── accessMramFromDpu.c
│       │   │   ├── coreDump.c
│       │   │   ├── crt0.c
│       │   │   ├── dpu.lds
│       │   │   ├── internalStateReset.c
│       │   │   ├── linkerScript.lds
│       │   │   ├── restoreRegisters.c
│       │   │   └── restore_carry_and_zero_flag.h
│       │   ├── stdlib/
│       │   │   ├── abort.c
│       │   │   ├── assert.h
│       │   │   ├── atoi.c
│       │   │   ├── atol.c
│       │   │   ├── ctype.h
│       │   │   ├── errno.c
│       │   │   ├── errno.h
│       │   │   ├── exit.c
│       │   │   ├── inttypes.h
│       │   │   ├── iso646.h
│       │   │   ├── limits.h
│       │   │   ├── memchr.c
│       │   │   ├── memcmp.c
│       │   │   ├── memcpy.c
│       │   │   ├── memmove.c
│       │   │   ├── memmram_utils.h
│       │   │   ├── memset.c
│       │   │   ├── stdalign.h
│       │   │   ├── stdarg.h
│       │   │   ├── stdbool.h
│       │   │   ├── stddef.h
│       │   │   ├── stdint.h
│       │   │   ├── stdio.c
│       │   │   ├── stdio.h
│       │   │   ├── stdlib.h
│       │   │   ├── stdnoreturn.h
│       │   │   ├── stpcpy.c
│       │   │   ├── stpncpy.c
│       │   │   ├── strcat.c
│       │   │   ├── strchr.c
│       │   │   ├── strcmp.c
│       │   │   ├── strcpy.c
│       │   │   ├── strcspn.c
│       │   │   ├── strdup.c
│       │   │   ├── strerror.c
│       │   │   ├── string.h
│       │   │   ├── strlen.c
│       │   │   ├── strlwr.c
│       │   │   ├── strncat.c
│       │   │   ├── strncmp.c
│       │   │   ├── strncpy.c
│       │   │   ├── strndup.c
│       │   │   ├── strnlen.c
│       │   │   ├── strpbrk.c
│       │   │   ├── strrchr.c
│       │   │   ├── strrev.c
│       │   │   ├── strsep.c
│       │   │   ├── strspn.c
│       │   │   ├── strstr.c
│       │   │   ├── strtok_r.c
│       │   │   ├── strtol.c
│       │   │   └── strupr.c
│       │   └── syslib/
│       │       ├── absvdi2.c
│       │       ├── absvsi2.c
│       │       ├── adddf3.c
│       │       ├── addsf3.c
│       │       ├── addvdi3.c
│       │       ├── addvsi3.c
│       │       ├── alloc.c
│       │       ├── alloc.h
│       │       ├── ashldi3.c
│       │       ├── ashrdi3.c
│       │       ├── atomic_bit.h
│       │       ├── atomics.c
│       │       ├── attributes.h
│       │       ├── barrier.c
│       │       ├── barrier.h
│       │       ├── bswapdi2.c
│       │       ├── bswapsi2.c
│       │       ├── buddy_alloc.c
│       │       ├── buddy_alloc.h
│       │       ├── buddy_realloc.c
│       │       ├── built_ins.h
│       │       ├── clzdi2.c
│       │       ├── clzsi2.c
│       │       ├── cmpdi2.c
│       │       ├── comparedf2.c
│       │       ├── comparesf2.c
│       │       ├── ctzdi2.c
│       │       ├── ctzsi2.c
│       │       ├── defs.c
│       │       ├── defs.h
│       │       ├── devprivate.h
│       │       ├── div32.c
│       │       ├── divdf3.c
│       │       ├── divdi3.c
│       │       ├── divmodsi4.c
│       │       ├── divsf3.c
│       │       ├── divsi3.c
│       │       ├── dpuconst.h
│       │       ├── dpufault.h
│       │       ├── dpuruntime.h
│       │       ├── extendhfsf2.c
│       │       ├── extendsfdf2.c
│       │       ├── ffsdi2.c
│       │       ├── ffssi2.c
│       │       ├── ffsti2.c
│       │       ├── fixdfdi.c
│       │       ├── fixdfsi.c
│       │       ├── fixsfdi.c
│       │       ├── fixsfsi.c
│       │       ├── fixunsdfdi.c
│       │       ├── fixunsdfsi.c
│       │       ├── fixunssfdi.c
│       │       ├── fixunssfsi.c
│       │       ├── float.h
│       │       ├── floatdidf.c
│       │       ├── floatdisf.c
│       │       ├── floatsidf.c
│       │       ├── floatsisf.c
│       │       ├── floatundidf.c
│       │       ├── floatundisf.c
│       │       ├── floatunsidf.c
│       │       ├── floatunsisf.c
│       │       ├── fp_add_impl.inc
│       │       ├── fp_extend.h
│       │       ├── fp_extend_impl.inc
│       │       ├── fp_fixint_impl.inc
│       │       ├── fp_fixuint_impl.inc
│       │       ├── fp_lib.h
│       │       ├── fp_mul_impl.inc
│       │       ├── fp_trunc.h
│       │       ├── fp_trunc_impl.inc
│       │       ├── fsb_allocator.c
│       │       ├── fsb_allocator.h
│       │       ├── handshake.c
│       │       ├── handshake.h
│       │       ├── int_endianness.h
│       │       ├── int_lib.h
│       │       ├── int_math.h
│       │       ├── int_types.h
│       │       ├── int_util.c
│       │       ├── int_util.h
│       │       ├── listener.c
│       │       ├── lshrdi3.c
│       │       ├── macro_utils.h
│       │       ├── mcount.c
│       │       ├── moddi3.c
│       │       ├── modsi3.c
│       │       ├── mram.h
│       │       ├── mul32.c
│       │       ├── mul64.c
│       │       ├── muldc3.c
│       │       ├── muldf3.c
│       │       ├── mulodi4.c
│       │       ├── mulosi4.c
│       │       ├── mulsf3.c
│       │       ├── mulvdi3.c
│       │       ├── mulvsi3.c
│       │       ├── mutex.h
│       │       ├── negdf2.c
│       │       ├── negdi2.c
│       │       ├── negsf2.c
│       │       ├── negvdi2.c
│       │       ├── negvsi2.c
│       │       ├── paritydi2.c
│       │       ├── paritysi2.c
│       │       ├── perfcounter.c
│       │       ├── perfcounter.h
│       │       ├── popcountdi2.c
│       │       ├── popcountsi2.c
│       │       ├── powidf2.c
│       │       ├── powisf2.c
│       │       ├── profiling.c
│       │       ├── profiling.h
│       │       ├── profiling_internals.h
│       │       ├── sem.c
│       │       ├── sem.h
│       │       ├── seqread.h
│       │       ├── seqread.inc
│       │       ├── seqread1024.c
│       │       ├── seqread128.c
│       │       ├── seqread256.c
│       │       ├── seqread32.c
│       │       ├── seqread512.c
│       │       ├── seqread64.c
│       │       ├── soft_cache.c
│       │       ├── soft_cache.h
│       │       ├── subdf3.c
│       │       ├── subsf3.c
│       │       ├── subvdi3.c
│       │       ├── subvsi3.c
│       │       ├── sysdef.h
│       │       ├── truncdfhf2.c
│       │       ├── truncdfsf2.c
│       │       ├── truncsfhf2.c
│       │       ├── ucmpdi2.c
│       │       ├── udiv64.c
│       │       ├── udivdi3.c
│       │       ├── udivmodsi4.c
│       │       ├── udivsi3.c
│       │       ├── umoddi3.c
│       │       ├── umodsi3.c
│       │       └── waitqueue.c
│       ├── src/
│       │   ├── abi/
│       │   │   ├── binary/
│       │   │   │   ├── executable.py
│       │   │   │   ├── liveness.py
│       │   │   │   └── relocatable.py
│       │   │   ├── directive/
│       │   │   │   ├── ascii_directive.py
│       │   │   │   ├── asciz_directive.py
│       │   │   │   ├── byte_directive.py
│       │   │   │   ├── long_directive.py
│       │   │   │   ├── quad_directive.py
│       │   │   │   ├── short_directive.py
│       │   │   │   └── zero_directive.py
│       │   │   ├── isa/
│       │   │   │   ├── cc/
│       │   │   │   │   ├── _base_cc.py
│       │   │   │   │   ├── acquire_cc.py
│       │   │   │   │   ├── add_nz_cc.py
│       │   │   │   │   ├── boot_cc.py
│       │   │   │   │   ├── const_cc_ge0.py
│       │   │   │   │   ├── const_cc_geu.py
│       │   │   │   │   ├── const_cc_zero.py
│       │   │   │   │   ├── count_nz_cc.py
│       │   │   │   │   ├── div_cc.py
│       │   │   │   │   ├── div_nz_cc.py
│       │   │   │   │   ├── ext_sub_set_cc.py
│       │   │   │   │   ├── false_cc.py
│       │   │   │   │   ├── imm_shift_nz_cc.py
│       │   │   │   │   ├── log_nz_cc.py
│       │   │   │   │   ├── log_set_cc.py
│       │   │   │   │   ├── mul_nz_cc.py
│       │   │   │   │   ├── no_cc.py
│       │   │   │   │   ├── release_cc.py
│       │   │   │   │   ├── shift_nz_cc.py
│       │   │   │   │   ├── sub_nz_cc.py
│       │   │   │   │   ├── sub_set_cc.py
│       │   │   │   │   ├── true_cc.py
│       │   │   │   │   └── true_false_cc.py
│       │   │   │   ├── exception.py
│       │   │   │   ├── flag.py
│       │   │   │   ├── instruction/
│       │   │   │   │   ├── condition.py
│       │   │   │   │   ├── endian.py
│       │   │   │   │   ├── instruction.py
│       │   │   │   │   ├── op_code.py
│       │   │   │   │   └── suffix.py
│       │   │   │   └── register/
│       │   │   │       ├── gp_register.py
│       │   │   │       ├── pair_register.py
│       │   │   │       └── sp_register.py
│       │   │   ├── label/
│       │   │   │   ├── label.py
│       │   │   │   └── symbol.py
│       │   │   ├── section/
│       │   │   │   ├── section.py
│       │   │   │   ├── section_flag.py
│       │   │   │   ├── section_name.py
│       │   │   │   └── section_type.py
│       │   │   └── word/
│       │   │       ├── _base_word.py
│       │   │       ├── data_address_word.py
│       │   │       ├── data_word.py
│       │   │       ├── double_data_word.py
│       │   │       ├── immediate.py
│       │   │       ├── instruction_address_word.py
│       │   │       ├── instruction_word.py
│       │   │       └── representation.py
│       │   ├── assembler/
│       │   │   ├── assembler.py
│       │   │   └── data_prep/
│       │   │       ├── bin.py
│       │   │       ├── bs_data_prep.py
│       │   │       ├── gemv_data_prep.py
│       │   │       ├── hst_data_prep.py
│       │   │       ├── mlp_data_prep.py
│       │   │       ├── red_data_prep.py
│       │   │       ├── scan_rss_data_prep.py
│       │   │       ├── scan_ssa_data_prep.py
│       │   │       ├── sel_data_prep.py
│       │   │       ├── trns_data_prep.py
│       │   │       ├── ts_data_prep.py
│       │   │       ├── uni_data_prep.py
│       │   │       └── va_data_prep.py
│       │   ├── compiler/
│       │   │   └── compiler.py
│       │   ├── converter/
│       │   │   ├── condition_converter.py
│       │   │   ├── endian_converter.py
│       │   │   ├── instruction_converter.py
│       │   │   ├── op_code_converter.py
│       │   │   ├── register_converter.py
│       │   │   ├── section_flag_converter.py
│       │   │   ├── section_name_converter.py
│       │   │   ├── section_type_converter.py
│       │   │   ├── suffix_converter.py
│       │   │   └── symbol_converter.py
│       │   ├── encoder/
│       │   │   ├── ascii_encoder.py
│       │   │   ├── byte.py
│       │   │   ├── directive_encoder.py
│       │   │   └── instruction_encoder.py
│       │   ├── initializer/
│       │   │   ├── directive_initializer.py
│       │   │   ├── instruction_initializer.py
│       │   │   ├── int_initializer.py
│       │   │   └── str_initializer.py
│       │   ├── iss/
│       │   │   ├── cpu/
│       │   │   │   ├── cpu.py
│       │   │   │   ├── fini_thread.py
│       │   │   │   ├── init_thread.py
│       │   │   │   └── sched_thread.py
│       │   │   ├── dpu/
│       │   │   │   ├── alu.py
│       │   │   │   ├── decoder.py
│       │   │   │   ├── dispatcher.py
│       │   │   │   ├── dma.py
│       │   │   │   ├── dpu.py
│       │   │   │   ├── logic.py
│       │   │   │   ├── scheduler.py
│       │   │   │   └── thread.py
│       │   │   ├── dram/
│       │   │   │   ├── mram.py
│       │   │   │   ├── mram_command.py
│       │   │   │   └── word.py
│       │   │   ├── register/
│       │   │   │   ├── condition_register.py
│       │   │   │   ├── exception_register.py
│       │   │   │   ├── flag_register.py
│       │   │   │   ├── gp_register.py
│       │   │   │   ├── pc_register.py
│       │   │   │   ├── register_file.py
│       │   │   │   └── sp_register.py
│       │   │   ├── sram/
│       │   │   │   ├── atomic.py
│       │   │   │   ├── iram.py
│       │   │   │   ├── lock.py
│       │   │   │   └── wram.py
│       │   │   └── system.py
│       │   ├── linker_/
│       │   │   ├── linker.py
│       │   │   ├── linker_script.py
│       │   │   └── logic/
│       │   │       ├── instruction_assigner.py
│       │   │       ├── label_assigner.py
│       │   │       ├── liveness_analyzer.py
│       │   │       └── set_assigner.py
│       │   ├── main.py
│       │   ├── parser_/
│       │   │   ├── grammar/
│       │   │   │   ├── .antlr/
│       │   │   │   │   ├── assembly.interp
│       │   │   │   │   ├── assembly.tokens
│       │   │   │   │   ├── assemblyLexer.interp
│       │   │   │   │   ├── assemblyLexer.java
│       │   │   │   │   ├── assemblyLexer.tokens
│       │   │   │   │   └── assemblyParser.java
│       │   │   │   ├── assembly.g4
│       │   │   │   ├── assembly.interp
│       │   │   │   ├── assembly.tokens
│       │   │   │   ├── assemblyLexer.interp
│       │   │   │   ├── assemblyLexer.py
│       │   │   │   ├── assemblyLexer.tokens
│       │   │   │   ├── assemblyListener.py
│       │   │   │   └── assemblyParser.py
│       │   │   ├── grammar_generator.py
│       │   │   └── parser.py
│       │   └── util/
│       │       ├── config_loader.py
│       │       ├── docker_client.py
│       │       ├── param_loader.py
│       │       └── path_collector.py
│       └── test/
│           ├── abi/
│           │   ├── binary/
│           │   │   ├── executable_test.py
│           │   │   └── liveness_test.py
│           │   ├── directive/
│           │   │   ├── ascii_directive_test.py
│           │   │   ├── asciz_directive_test.py
│           │   │   ├── byte_directive_test.py
│           │   │   ├── long_directive_test.py
│           │   │   ├── quad_directive_test.py
│           │   │   ├── short_directive_test.py
│           │   │   └── zero_directive_test.py
│           │   ├── isa/
│           │   │   └── register/
│           │   │       ├── gp_register_test.py
│           │   │       └── pair_register_test.py
│           │   ├── label/
│           │   │   └── label_test.py
│           │   ├── section/
│           │   │   └── section_test.py
│           │   └── word/
│           │       ├── immediate_test.py
│           │       └── words_test.py
│           ├── compiler/
│           │   └── compiler_test.py
│           ├── encoder/
│           │   ├── ascii_encoder_test.py
│           │   ├── directive_encoder_test.py
│           │   └── instruction_encoder_test.py
│           ├── iss/
│           │   ├── dpu/
│           │   │   ├── dma_test.py
│           │   │   └── scheduler_test.py
│           │   ├── dram/
│           │   │   └── mram_test.py
│           │   ├── register/
│           │   │   └── register_file_test.py
│           │   └── sram/
│           │       ├── atomic_test.py
│           │       ├── iram_test.py
│           │       └── wram_test.py
│           ├── linker_/
│           │   └── linker_test.py
│           ├── parser_/
│           │   ├── grammar_generator_test.py
│           │   └── parser_test.py
│           └── util/
│               └── config_loader_test.py
└── tools/
    ├── README.md
    ├── upmem_profiler/
    │   ├── CMakeLists.txt
    │   ├── script/
    │   │   ├── active_tasklet_profile.sh
    │   │   ├── build.sh
    │   │   ├── example.sh
    │   │   ├── function_profile.sh
    │   │   ├── instruction_mix_profile.sh
    │   │   ├── mram_access_pattern_profile.sh
    │   │   ├── timeline_profile.sh
    │   │   └── tlb_behavior_profile.sh
    │   └── src/
    │       ├── CMakeLists.txt
    │       ├── abi/
    │       │   └── instruction/
    │       │       ├── op_code.h
    │       │       └── suffix.h
    │       ├── basic/
    │       │   ├── instruction_parser.cc
    │       │   ├── instruction_parser.h
    │       │   ├── interval.cc
    │       │   ├── interval.h
    │       │   ├── reg_file_parser.cc
    │       │   ├── reg_file_parser.h
    │       │   ├── stats_parser.cc
    │       │   └── stats_parser.h
    │       ├── converter/
    │       │   ├── op_code_converter.cc
    │       │   ├── op_code_converter.h
    │       │   ├── suffix_converter.cc
    │       │   └── suffix_converter.h
    │       ├── instruction_mix/
    │       │   ├── instruction_mix_profiler.cc
    │       │   └── instruction_mix_profiler.h
    │       ├── main.cc
    │       ├── main.h
    │       └── util/
    │           ├── argument_parser.cc
    │           ├── argument_parser.h
    │           └── config_loader.h
    └── upmem_reg_model/
        ├── data/
        │   ├── input.xlsx
        │   └── output.xlsx
        ├── script/
        │   └── format.sh
        └── src/
            ├── benchmark/
            │   ├── _base_benchmark.py
            │   ├── bs.py
            │   ├── gemv.py
            │   ├── hst_l.py
            │   ├── hst_s.py
            │   ├── mlp.py
            │   ├── red.py
            │   ├── scan_rss.py
            │   ├── scan_ssa.py
            │   ├── sel.py
            │   ├── trns.py
            │   ├── ts.py
            │   ├── uni.py
            │   └── va.py
            ├── io_/
            │   ├── excel_reader.py
            │   └── excel_writer.py
            ├── main.py
            └── regression/
                ├── datum.py
                └── model.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
*.S
*.bin

bin/
build/
cmake-build-debug/
validation_log/
__pycache__/
.idea/
.vscode/

# Prerequisites
*.d

# Compiled Object files
*.slo
*.lo
*.o
*.obj

# Precompiled Headers
*.gch
*.pch

# Compiled Dynamic libraries
*.so
*.dylib
*.dll

# Compiled Static libraries
*.lai
*.la
*.a
*.lib

# Executables
*.exe
*.out
*.app


================================================
FILE: LICENSE
================================================
Copyright (c) 2024, VIA Research Group at KAIST

Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:

The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

================================================
FILE: README.md
================================================
# 📖 Introduction
<img src="./assets/uPIMulator_logo.png" width="1000">

**Welcome to the uPIMulator Framework Documentation!**

This documentation serves as your comprehensive guide to the uPIMulator framework, catering to both novice and experienced researchers.
Here, you'll find the resources necessary to leverage uPIMulator effectively for your research projects.

We provide in-depth coverage of uPIMulator's features, from foundational concepts to advanced functionalities.
Explore this documentation to unlock the full potential of uPIMulator and elevate your research endeavors.

# 🤙 Contact Information
## 📍 Address
[KAIST](https://www.kaist.ac.kr/en/), School of Electrical Engineering

[Vertically Integrated Architecture Research Group](https://sites.google.com/view/kaist-via/home)

Office: N1 818 @ [KAIST](https://www.kaist.ac.kr/en/)

## 📧 Email
- Bongjoon Hyun: [bongjoon.hyun@gmail.com](mailto:bongjoon.hyun@gmail.com)
- Taehun Kim: [taehun.kim@kaist.ac.kr](mailto:taehun.kim@kaist.ac.kr)
- Dongjae Lee: [dongjae.lee@kaist.ac.kr](mailto:dongjae.lee@kaist.ac.kr)
- Minsoo Rhu: [minsoo.rhu@gmail.com](mailto:minsoo.rhu@gmail.com)

Please feel free to reach out to us if you have any questions or require further assistance.

# 🧑‍💻 Released Versions
> **uPIMulator: A Flexible and Scalable Simulation Framework for General-Purpose Processing-In-Memory (PIM) Architectures**

uPIMulator is a cycle-level performance simulator tailored for general-purpose Processing-In-Memory (PIM) systems adhering to the UPMEM Instruction Set Architecture (ISA). This tool provides a detailed simulation environment, empowering computer architecture researchers and PIM program developers to investigate and harness the capabilities of PIM technology.

For comprehensive insights into uPIMulator and its applications, refer to our HPCA-2024 publication:

"[Pathfinding Future PIM Architectures by Demystifying a Commercial PIM Technology](https://www.computer.org/csdl/proceedings-article/hpca/2024/931300a263/1VOAAZSdy0w)", HPCA, 2024

The currently available versions are:
1. Python & C++ version
2. Go version
3. Go & virtual machine version

All source code and version history can be accessed in our GitHub repository.

## The Python & C++ Version
This is our initial implementation of uPIMulator, used in our HPCA-2024 publication. 
You'll find it in the `python_cpp` directory.  Refer to the included [README](./python_cpp/README.md) for usage instructions.

## The Go Version
Our second implementation, optimized for speed and memory efficiency.
Located in the `golang` directory, it offers an 8.5x speed increase and 7.5x memory reduction due to multi-threading.
We generally recommend this version for most use cases. See the [README](./golang/README.md) for details.

## The Go & Virtual Machine Version
This latest version extends the Go version with virtual machine capabilities, eliminating the need for manual data preparation code in many scenarios.
It's particularly suitable for complex or dynamic benchmarks where manual data preparation is challenging, though it may not offer the fastest simulation speed.
Refer to the [README](./golang_vm/README.md) for further guidance.

# 🔍 Summary of Correlation Ratio (R²) and Mean Absolute Error (MAE)
## Single DPU
<img src="./assets/validation_single_dpu.png" width="400"/>

## Multiple DPUs
<img src="./assets/validation_multi_dpus.png" width="400"/>

- Each data point represents a single kernel from the PrIM benchmark suite.
- Summary of Correlation Ratio (R²) and Mean Absolute Error (MAE) for single- and multi-DPU simulations:

| Scenario | Total Data Points | Correlation (R²) | MAE |
|---|---|---|---|
| Single DPU (no inter-DPU communication) | 710 | 98.4% | 12.0% |
| Multi-DPU (with inter-DPU communication) | 387 | 83.6% | 26.9% |

- These validation results were obtained using the Python & C++ version of uPIMulator.

# List of Supported Instructions
- uPIMulator currently supports 599 out of the 970 instructions defined in the [UPMEM-PIM ISA](https://sdk.upmem.com/2023.2.0/201_IS.html#instruction-set-architecture).
Due to limitations in the publicly available ISA documentation, support for the remaining 371 instructions is pending.
However, the currently supported instructions have been sufficient to enable functionally correct simulations of the [PrIM benchmark suite](https://github.com/CMU-SAFARI/prim-benchmarks), producing results consistent with those obtained on real UPMEM-PIM hardware.

- For a detailed list of the currently supported instructions, please refer to [this Google spreadsheet](https://docs.google.com/spreadsheets/d/1xq8t6aRvafmTlGmy4Am8i3QmoOzli1heNxLmCEIlCv4/edit?usp=sharing).

# 🪨 Materials
- Bongjoon Hyun, Taehun Kim, Dongjae Lee, and Minsoo Rhu, "[Pathfinding Future PIM Architectures by Demystifying a Commercial PIM Technology](https://www.computer.org/csdl/proceedings-article/hpca/2024/931300a263/1VOAAZSdy0w)", *The 30th IEEE International Symposium on High-Performance Computer Architecture ([HPCA-30](https://hpca-conf.org/2024/))*, Edinburgh, Scotland, Mar. 2024
    - ${\textsf{\color{red}Best Paper Award}}$
    - Acceptance rate: 18% (75 among 410)
    - [Slide](https://drive.google.com/file/d/1TRgFu6YdBu2gtdtDKIuJI85u8Be8w60M/view?usp=sharing)
    - [Presentation](https://youtu.be/e-RXYl568fw?si=LbTYbM7p7qod-L8w)

# 🎁 Contributions
We welcome and encourage contributions to uPIMulator!
If you are interested in contributing or have questions, please feel free to open an issue or submit a pull request.

## List of Maintainers
- Bongjoon Hyun (bongjoon.hyun@gmail.com)
- Taehun Kim (taehun.kim@kaist.ac.kr)
- Dongjae Lee (dongjae.lee@kaist.ac.kr)
- Minsoo Rhu (minsoo.rhu@gmail.com)

## List of Contributors

# 🙏 Acknowledgement
We would like to thank the developers of the [PrIM benchmark suite](https://github.com/CMU-SAFARI/prim-benchmarks), which was instrumental in developing this project.

This research is funded by the generous support from the following organizations:
- Institute of Information & Communications Technology Planning & Evaluation (IITP) grant funded by the Korea government(MSIT) (No. 2022-0-01037, Development of High Performance Processing-in-Memory Technology based on DRAM) and the Korea government(MSIT) (No.RS-2024-00438851, (SW Starlab) High-performance Privacy-preserving Machine Learning System and System Software)
- National Research Foundation of Korea (NRF) grant funded by the Korea government (MSIT) (NRF-2021R1A2C2091753)
- Samsung Electronics

We appreciate their commitment to advancing research in this field.

## 📔 Citation
Bongjoon Hyun, Taehun Kim, Dongjae Lee, and Minsoo Rhu, "[Pathfinding Future PIM Architectures by Demystifying a Commercial PIM Technology](https://www.computer.org/csdl/proceedings-article/hpca/2024/931300a263/1VOAAZSdy0w)", IEEE International Symposium on High-Performance Computer Architecture (HPCA), March 2024.


================================================
FILE: golang/README.md
================================================
# ⚙️ Usage
## Currently Supported Mode
uPIMulator operates in an execution-driven simulation mode, enabling cycle-level performance analysis of PIM-based applications.

## Workflow
The typical usage workflow comprises two primary stages:

1. **Binary Generation:** Compile, assemble, and link your application code to generate the required binary files for simulation.
2. **Cycle-Level Simulation:** Utilize the generated binary files as input to the cycle-level simulator to obtain detailed performance metrics and insights.

We are actively working on expanding uPIMulator's capabilities and may introduce additional usage modes in future releases. 

## Installation & Build
### Prerequisites

- **Go Compiler and SDK:**  uPIMulator requires Go 1.21.5 or later. 
You can download and install Go from the [official website](https://go.dev/doc/install).

- **Docker:** Please ensure that Docker is installed on your system.

- **Docker Group Membership:** Your Ubuntu user account needs to be a member of the `docker` group.

- **Tested Environment:** uPIMulator has been thoroughly tested on Ubuntu 18.04 with an Intel CPU.
While we strive for compatibility across different environments, optimal performance and functionality are guaranteed within the tested setup.

### Installation Steps

1. **Install and Build**
   Navigate to the `uPIMulator` directory and execute the build script:

   ```bash
   cd /path/to/uPIMulator/golang/uPIMulator/script
   python build.py
   ```

## Binary Files Generation & Cycle-Level Simulation
We will use the VA (vector addition) benchmark as an example to demonstrate the binary file generation and simulation process.
Please note that the initial simulation might take approximately 30 minutes.

### Execution
To initiate a simulation, provide the following:

- **Benchmark name:** Specify the desired benchmark (e.g., 'VA').
- **Number of tasklets:** Define the number of tasklets to be utilized.
- **Output directory path:** Indicate the absolute path to the directory where you want to store binary files, log files, and other simulation artifacts. 

You can further customize the simulation by utilizing command-line options to adjust various parameters.

### Simulation Output
Detailed simulation results will be written to the standard output (`stdout`).

> **Important Notes:**
> - **Create Output Directory:** Prior to running the simulation, create an empty directory at the specified `bin_dirpath`. 
> **Absolute Paths:**  Always provide absolute paths for both `root_dirpath` (the repository's root directory) and `bin_dirpath`.

### Example Command

```bash
cd /path/to/uPIMulator/golang/uPIMulator
rm -rf bin
mkdir bin
./build/uPIMulator --root_dirpath /path/to/uPIMulator/golang/uPIMulator --bin_dirpath /path/to/uPIMulator/golang/uPIMulator/bin --benchmark VA --num_channels 1 --num_ranks_per_channel 1 --num_dpus_per_rank 1 --num_tasklets 16 --data_prep_params 1024
```

# 📄 Reproducing Figures from the Paper
To replicate the figures presented in our paper, please adhere to the instructions provided below.
We offer replication manuals for Figures 5, 6, 7, 9 and 10 for brevity.

## Configuration of PrIM Benchmarks

- **Single DPU Focus:** For Figures 5, 6, 7, and 9 the parameters `num_channels`, `num_ranks_per_channel`, and `num_dpus_per_rank` must always be set to `1`, as these experiments specifically characterize the behavior of a single DPU.
- **Data Preparation Parameter:**  When generating the binary files for the PrIM benchmarks, please configure the `data_prep_param` parameter according to the following table:

| Benchmark | `data_prep_param` (Figures 5, 6) | `data_prep_param` (Figure 10) |
|---|---|---|
| BS       | 32768 | 131072 |
| GEMV     | 2048  | 4096   |
| HST-L    | 131072 | 524288 |
| HST-S    | 131072 | 524288 |
| MLP      | 256   | 1024   |
| RED      | 524288 | 2097152|
| SCAN-RSS | 262144 | 1048576|
| SCAN-SSA | 262144 | 1048576|
| SEL      | 524288 | 2097152|
| TRNS     | 1024  | 128    |
| TS       | 2048  | 65536  |
| UNI      | 524288 | 2097152|
| VA       | 524288 | 2097152|

### Example Command

```bash
./uPIMulator --root_dirpath /path/to/uPIMulator/ --bin_dirpath /path/to/uPIMulator/bin --benchmark VA --num_channels 1 --num_ranks_per_channel 1 --num_dpus_per_rank 1 --num_tasklets 16 --data_prep_params 524288
``` 

Please ensure you adhere to these configurations to accurately replicate the figures presented in the paper. 

## Figure 5: PrIM Compute and Memory Utilization
<img src="../assets/uPIMulator_figure5.png" width="400"/>

This figure illustrates the compute utilization (represented by red points) and memory read bandwidth utilization (represented by blue points) of the PrIM benchmarks when executed with varying numbers of threads (tasklets): 1, 4, and 16.

### Metrics Calculation

- **Compute Utilization (IPC):** `num_instructions` / `logic_cycle`
- **Memory Read Bandwidth Utilization (GB/s):** Refer to the provided Excel sheet for the calculation: [link](../assets/figure5_mem_util_calculator.xlsx)

> **Note:** The values for `num_instructions` and `logic_cycle` required in these calculations can be obtained from the simulation results generated by uPIMulator. 

## Figure 6: DPU Runtime Breakdown
<img src="../assets/uPIMulator_fiture6.png" width="400"/>

This figure presents a breakdown of DPU runtime, categorizing cycles into:

- **Active Cycles (Black):** Represent cycles when the DPU is actively executing instructions.
- **Idle Cycles (Red, Yellow, Blue):** Represent cycles when the DPU is stalled due to various reasons.

### Calculation of Cycle Ratios
To generate the breakdown depicted in the figure, you can utilize the following formulas:

- **Issuable Ratio:**  `breakdown_run` / `logic_cycle`
- **Idle (Memory) Ratio:** `breakdown_dma` / `logic_cycle`
- **Idle (Revolver) Ratio:** `breakdown_etc` / `logic_cycle`
- **Idle (RF) Ratio:** `backpressure` / `logic_cycle`

> **Note:** The values for the variables used in these formulas (`breakdown_run`, `logic_cycle`, etc.) are available in the simulation results produced by uPIMulator. 

## Figure 7: Issuable Tasklets
<img src="../assets/uPIMulator_figure7.png" width="400"/>

This figure visualizes the number of tasklets (threads) that are ready for execution (issuable) by the DPU scheduler at each cycle.

### Replication
To reproduce this figure, utilize the provided [Excel sheet](../assets/figure7_active_tasklet_breakdown.xlsx).
The spreadsheet includes instructions on how to populate it with the relevant simulation output data, and it will automatically generate the corresponding figure.

> **Important Configuration Note**: Please ensure that the number of threads is configured to **16 tasklets** when running the simulations for this figure.
> You can achieve this by using the following command-line argument: `--num_tasklets 16`.

## Figure 9: Instruction Mix (Single DPU)
<img src="../assets/uPIMulator_figure9.png" width="400"/>

Figure 9 provides a breakdown of the instruction mix observed during single-DPU execution.
To generate this figure, follow the steps outlined below using the `upmem_profiler` tool and the accompanying Excel sheet.

### Procedure

1. **Build the Profiler**

   ```bash
   cd /path/to/uPIMulator/tools/upmem_profiler/script
   bash build.sh
   ```

2. **Extract Instructions**
   Run the simulation with the `--verbose 1` flag to capture detailed instruction traces.

   ```bash
   cd /path/to/uPIMulator/golang/uPIMulator/
   ./build/uPIMulator --root_dirpath /path/to/uPIMulator/golang/uPIMulator --bin_dirpath /path/to/uPIMulator/golang/uPIMulator/bin --benchmark VA --num_channels 1 --num_ranks_per_channel 1 --num_dpus_per_rank 1 --num_tasklets 16 --data_prep_params 1024 --verbose 1 > trace.txt
   ```

3. **Run the Profiler**
   Process the generated trace file using the `upmem_profiler` in `instruction_mix` mode.

   ```bash
   cd /path/to/uPIMulator/tools/upmem_profiler/
   ./build/src/upmem_profiler --logpath /path/to/uPIMulator/golang/uPIMulator/trace.txt --mode instruction_mix
   ```

4. **Generate the Figure**
   Utilize the profiler's output to populate the provided [Excel sheet](../assets/figure9_instruction_mix.xlsx), which will automatically generate the instruction mix figure.

> **Important Configuration Note:** Similar to Figure 7, the instruction mix analysis in Figure 9 is based on simulations with **16 tasklets**.
> Ensure that you maintain this configuration (`--num_tasklets 16`) for accurate replication. 

## Figure 10: Multi-DPU Latency Breakdown and Speedup
<img src="../assets/uPIMulator_figure10.png" width="400"/>

Figure 10 presents the latency breakdown and speedup achieved in multi-DPU scenarios.

### Configuring the Number of DPUs
You can adjust the number of DPUs by modifying the following parameters in `uPIMulator`:

- `num_channels` 
- `num_ranks_per_channel`
- `num_dpus_per_rank`

### Generating the Latency Breakdown
To obtain the latency breakdown data for plotting, utilize the `upmem_reg_model` tool located in the `tools/upmem_reg_model/` directory.
This tool implements a communication model between the host and DPUs based on linear regression.

### Procedure

1. **Prepare Input Excel:**
   - We provide a sample input Excel file as a template.
   - Append a new row to this file, specifying the benchmark name, number of DPUs, and the `data_prep_param` used in your simulation.
   - Fill in the relevant time values (in milliseconds) obtained from your simulation results, such as kernel execution time.
   You can convert cycle counts to time in milliseconds by dividing the cycle count by the corresponding clock frequency (in MHz) and then multiplying by 1000.

2. **Run the Regression Model:**

   ```bash
   cd /path/to/uPIMulator/tools/upmem_reg_model/src
   python main.py --input_excel_filepath /path/to/your/input_excel_file --output_excel_filepath /path/to/your/output_excel_file
   ```

3. **Access the Output:** 
   - The linear regression results will be available in the specified output Excel file.
   - Use this data to create the latency breakdown plots as shown in Figure 10.

Please ensure that you follow these steps carefully to accurately reproduce the multi-DPU latency breakdown and speedup analysis presented in the paper.

# 🌋 Adding Custom Benchmarks
uPIMulator empowers you to go beyond the provided PrIM benchmark suite by incorporating your own custom benchmarks.
This is particularly beneficial if you have access to UPMEM-PIM hardware and want to evaluate your code's performance in a simulated environment.

## Requirements
To successfully integrate a new benchmark, ensure it adheres to the following:

1. **UPMEM-C Language:**  The benchmark must be implemented in UPMEM-C, a C-like language tailored for UPMEM-PIM programming.
Consult the [UPMEM SDK documentation](https://sdk.upmem.com/2021.4.0/) for detailed programming guidelines.

2. **File Structure and Naming:**  
   - Maintain the same file hierarchy as the PrIM benchmarks, including a `dpu` subdirectory.
   - Include a `CMakeLists.txt` file within your benchmark's directory hierarchy, mirroring the structure used in the PrIM examples.
   This is essential as uPIMulator's interpreter and linker automatically detect and compile benchmarks using these `CMakeLists.txt` files.

## Data Preparation
Since UPMEM PIM-enabled memory directly utilizes physical addresses and uPIMulator currently doesn't support concurrent execution of host and PIM-enabled memory, exercise caution when feeding input/output data. 

You'll need to provide Go source code to handle data preparation for your benchmark.
This script should reside in the `uPIMulator/src/assembler` directory and be recognized by `uPIMulator/src/assembler/assembler.go`.

> **Key Considerations for Data Preparation Scripts**
> - Data transferred from the host to DPUs using `dpu_push_xfer` must be organized within the `input_dpu_mram_heap_pointer_name` variable in your data preparation script.
> - Similarly, data transferred from DPUs to the host using `dpu_push_xfer` should be placed within the `output_dpu_mram_heap_pointer_name` variable.

## Reference Examples
We have included data preparation scripts for the 13 supported PrIM benchmarks.
These serve as excellent references for structuring your custom data preparation scripts.

By following these guidelines, you can seamlessly integrate your benchmarks into uPIMulator for comprehensive performance evaluation and analysis. 

If you have any questions or encounter any difficulties during the integration process, don't hesitate to reach out to us for support. 


================================================
FILE: golang/uPIMulator/benchmark/BS/CMakeLists.txt
================================================
#add_subdirectory(host)
add_subdirectory(dpu)

================================================
FILE: golang/uPIMulator/benchmark/BS/Makefile
================================================
DPU_DIR := dpu
HOST_DIR := host
BUILDDIR ?= bin
NR_TASKLETS ?= 16
NR_DPUS ?= 1
PROBLEM_SIZE ?= 2

define conf_filename
	${BUILDDIR}/.NR_DPUS_$(1)_NR_TASKLETS_$(2).conf
endef
CONF := $(call conf_filename,${NR_DPUS},${NR_TASKLETS})

COMMON_INCLUDES := support
HOST_TARGET := ${BUILDDIR}/host_code
DPU_TARGET := ${BUILDDIR}/dpu_code

HOST_SOURCES := $(wildcard ${HOST_DIR}/*.c)
DPU_SOURCES := $(wildcard ${DPU_DIR}/*.c)

.PHONY: all clean test

__dirs := $(shell mkdir -p ${BUILDDIR})

COMMON_FLAGS := -w -I${COMMON_INCLUDES}
HOST_FLAGS := ${COMMON_FLAGS} -std=c11 -O3 `dpu-pkg-config --cflags --libs dpu` -DNR_TASKLETS=${NR_TASKLETS} -DNR_DPUS=${NR_DPUS} -DPROBLEM_SIZE=${PROBLEM_SIZE}
DPU_FLAGS := ${COMMON_FLAGS} -O2 -DNR_TASKLETS=${NR_TASKLETS}

all: ${HOST_TARGET} ${DPU_TARGET}

${CONF}:
	$(RM) $(call conf_filename,*,*)
	touch ${CONF}

${HOST_TARGET}: ${HOST_SOURCES} ${COMMON_INCLUDES} ${CONF}
	$(CC) -o $@ ${HOST_SOURCES} ${HOST_FLAGS}
	$(CC) -S -o ${HOST_TARGET}.S ${HOST_SOURCES} ${HOST_FLAGS}

${DPU_TARGET}: ${DPU_SOURCES} ${COMMON_INCLUDES} ${CONF}
	dpu-upmem-dpurte-clang ${DPU_FLAGS} -o $@ ${DPU_SOURCES}
	dpu-upmem-dpurte-clang -S ${DPU_FLAGS} -o ${DPU_TARGET}.S ${DPU_SOURCES}

clean:
	$(RM) -r $(BUILDDIR)

test: all
	./${HOST_TARGET} -i 262144


================================================
FILE: golang/uPIMulator/benchmark/BS/baselines/cpu/Makefile
================================================
all:
	gcc bs_omp.c -o bs_omp -fopenmp
run:
	./bs_omp 262144 16777216


================================================
FILE: golang/uPIMulator/benchmark/BS/baselines/cpu/README
================================================
Binary Search (BS)

Compilation instructions:

    make

Execution instructions

    ./bs_omp 2048576 16777216


================================================
FILE: golang/uPIMulator/benchmark/BS/baselines/cpu/bs_omp.c
================================================

#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include <unistd.h>
#include <getopt.h>
#include <assert.h>
#include <time.h>
#include <stdint.h>
#include "timer.h"

#define DTYPE uint64_t
/*
* @brief creates a "test file" by filling a bufferwith values
*/
void create_test_file(DTYPE * input, uint64_t  nr_elements, DTYPE * querys, uint64_t n_querys) {

  uint64_t max = UINT64_MAX;
  uint64_t min = 0;

  srand(time(NULL));

  input[0] = 1;
  for (uint64_t i = 1; i < nr_elements; i++) {
        input[i] = input[i - 1] + (rand() % 10) + 1;
  }

  for(uint64_t i = 0; i < n_querys; i++)
  {
	querys[i] = input[rand() % (nr_elements - 2)];
  }
}

/**
* @brief compute output in the host
*/
uint64_t binarySearch(DTYPE * input, uint64_t input_size, DTYPE* querys, unsigned n_querys)
{

	uint64_t found = -1;
	uint64_t q, r, l, m;
	
       #pragma omp parallel for private(q,r,l,m)
     	for(q = 0; q < n_querys; q++)
      	{
		l = 0;
		r = input_size;
		while (l <= r) 
		{
	    		m = l + (r - l) / 2;

	    		// Check if x is present at mid
	     		if (input[m] == querys[q])
			{	
		    		found += m;
				break;
			}
	    		// If x greater, ignore left half
	    		if (input[m] < querys[q])
			    	l = m + 1;

	    		// If x is smaller, ignore right half
			else
		    		r = m - 1;
		
		}
       	}

      	return found;
}

  /**
  * @brief Main of the Host Application.
  */
  int main(int argc, char **argv) {

    Timer timer;
    uint64_t input_size = atol(argv[1]);
    uint64_t n_querys = atol(argv[2]);

    printf("Vector size: %lu, num searches: %lu\n", input_size, n_querys);
	
    DTYPE * input = malloc((input_size) * sizeof(DTYPE));
    DTYPE * querys = malloc((n_querys) * sizeof(DTYPE));

    DTYPE result_host = -1;

    // Create an input file with arbitrary data.
    create_test_file(input, input_size, querys, n_querys);
	
    start(&timer, 0, 0);
    result_host = binarySearch(input, input_size - 1, querys, n_querys);   
    stop(&timer, 0);


    int status = (result_host);
    if (status) {
        printf("[OK] Execution time: ");
	print(&timer, 0, 1);
	printf("ms.\n");
    } else {
        printf("[ERROR]\n");
    }
    free(input);


    return status ? 0 : 1;
}



================================================
FILE: golang/uPIMulator/benchmark/BS/baselines/cpu/timer.h
================================================
/*
 * Copyright (c) 2016 University of Cordoba and University of Illinois
 * All rights reserved.
 *
 * Developed by:    IMPACT Research Group
 *                  University of Cordoba and University of Illinois
 *                  http://impact.crhc.illinois.edu/
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * with the Software without restriction, including without limitation the 
 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 * sell copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 *      > Redistributions of source code must retain the above copyright notice,
 *        this list of conditions and the following disclaimers.
 *      > Redistributions in binary form must reproduce the above copyright
 *        notice, this list of conditions and the following disclaimers in the
 *        documentation and/or other materials provided with the distribution.
 *      > Neither the names of IMPACT Research Group, University of Cordoba, 
 *        University of Illinois nor the names of its contributors may be used 
 *        to endorse or promote products derived from this Software without 
 *        specific prior written permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
 * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
 * THE SOFTWARE.
 *
 */

#include <sys/time.h>

typedef struct Timer{

    struct timeval startTime[4];
    struct timeval stopTime[4];
    double         time[4];

}Timer;

void start(Timer *timer, int i, int rep) {
    if(rep == 0) {
        timer->time[i] = 0.0;
    }
    gettimeofday(&timer->startTime[i], NULL);
}

void stop(Timer *timer, int i) {
    gettimeofday(&timer->stopTime[i], NULL);
    timer->time[i] += (timer->stopTime[i].tv_sec - timer->startTime[i].tv_sec) * 1000000.0 +
                      (timer->stopTime[i].tv_usec - timer->startTime[i].tv_usec);
}

void print(Timer *timer, int i, int REP) { printf("%f\t", timer->time[i] / (1000 * REP)); }


================================================
FILE: golang/uPIMulator/benchmark/BS/baselines/gpu/Makefile
================================================
all:
	nvcc -arch=sm_30 -m64 -Xcompiler -fPIC -shared -o cu_binary_search.so binary_search.cu -std=c++11


================================================
FILE: golang/uPIMulator/benchmark/BS/baselines/gpu/README
================================================
Binary Search (BS)

Compilation instructions:

    make

Execution instructions

    python3 run.py


================================================
FILE: golang/uPIMulator/benchmark/BS/baselines/gpu/binary_search.cu
================================================
#include <cuda.h>
#include <limits.h>
#include "binary_search.h"

#include <chrono>
#include <iostream>

#define BLOCKDIM 512
#define SEARCH_CHUNK 16
#define BLOCK_CHUNK (BLOCKDIM*SEARCH_CHUNK)


__global__ void search_kernel(const long int *arr,
    const long int len, const long int *querys, const long int num_querys, long int *res, bool *flag)
{
    int search;
    if(*flag == false) {
        int tid = threadIdx.x;
        __shared__ int s_arr[BLOCK_CHUNK];

        /* Since each value is being copied to shared memory, the rest of the
        following uncommented code is unncessary, since a direct comparison
        can be done at the time of copy below. */
        // for(int i = 0; i < BLOCKDIM; ++i) {
        //     int shared_loc = i*SEARCH_CHUNK + tid;
        //     int global_loc = shared_loc + BLOCK_CHUNK * blockIdx.x;
        //     if(arr[global_loc] == search) {
        //         *flag = true;
        //         *res = global_loc;
        //     }
        //     __syncthreads();
        // }

        /* Copy chunk of array that this entire block of threads will read
        from the slower global memory to the faster shared memory. */
        for(long int i = 0; i < SEARCH_CHUNK; ++i) {
            int shared_loc = tid*SEARCH_CHUNK + i;
            int global_loc = shared_loc + BLOCK_CHUNK * blockIdx.x;

            /* Make sure to stay within the bounds of the global array,
            else assign a dummy value. */
            if(global_loc < len) {
              s_arr[shared_loc] = arr[global_loc];
            }
            else {
              s_arr[shared_loc] = INT_MAX;
            }
        }
        __syncthreads();

        for(long int i = 0; i < num_querys; i++)
        {
            search = querys[i];
            /* For each runtime, set the initial search range. */
            int L = 0;
            int R = SEARCH_CHUNK - 1;
            int m = (L + R) / 2;

            /* Pointer to the part of the shared array for this runtime. */
            int *s_ptr = &s_arr[tid*SEARCH_CHUNK];

            /* Each runtime will search a chunk of the block array.
            Many blocks will not find a solution so the search must
            be allowed to fail on a per block basis. The loop will
            break (fail) when L >= R. */
            while(L <= R && *flag == false)
            {
                if(s_ptr[m] < search) {
                    L = m + 1;
                }
                else if(s_ptr[m] > search) {
                    R = m - 1;
                }
                else {
                    *flag = true;
                    *res = m += tid*SEARCH_CHUNK + BLOCK_CHUNK * blockIdx.x;
                }

                m = (L + R) / 2;
            }
        }
    }
}



int binary_search(const long int *arr, const long int len, const long int *querys, const long int num_querys)
{
    long int *d_arr, *d_querys, *d_res;
    bool *d_flag;

    size_t arr_size = len * sizeof(long int);
    size_t querys_size = num_querys * sizeof(long int);
    size_t res_size = sizeof(long int);
    size_t flag_size = sizeof(bool);

    cudaMalloc(&d_arr, arr_size);
    cudaMalloc(&d_querys, querys_size);
    cudaMalloc(&d_res, res_size);
    cudaMalloc(&d_flag, flag_size);

    cudaMemcpy(d_arr, arr, arr_size, cudaMemcpyHostToDevice);
    cudaMemcpy(d_querys, querys, querys_size, cudaMemcpyHostToDevice);
    cudaMemset(d_flag, 0, flag_size);

    /* Set res value to -1, so that if the function returns -1, that
    indicates an algorithm failure. */
    cudaMemset(d_res, -0x1, res_size);

    int blockSize = BLOCKDIM;
    int gridSize = (len-1)/BLOCK_CHUNK + 1;

    auto start = std::chrono::high_resolution_clock::now();
    search_kernel<<<gridSize,blockSize>>>(d_arr, len, d_querys, num_querys ,d_res, d_flag);
    cudaDeviceSynchronize();
    auto end = std::chrono::high_resolution_clock::now();
    std::cout << "Kernel Time: " <<
        std::chrono::duration_cast<std::chrono::milliseconds>(end-start).count() <<
        " ms" << std::endl;

    long int res;
    cudaMemcpy(&res, d_res, res_size, cudaMemcpyDeviceToHost);

    return res;
}


================================================
FILE: golang/uPIMulator/benchmark/BS/baselines/gpu/binary_search.h
================================================
#ifndef BINARY_SEARCH_H
#define BINARY_SEARCH_H

#ifdef _WIN32
  #include <windows.h>
  #define DLL_EXPORT __declspec(dllexport)
#else
  #define DLL_EXPORT
#endif


extern "C" {

    int DLL_EXPORT binary_search(const long int *arr, const long int len, const long int *querys, const long int num_querys);

}

#endif /* BINARY_SEARCH_H */


================================================
FILE: golang/uPIMulator/benchmark/BS/baselines/gpu/cpu_lib.py
================================================
# -*- coding: utf-8 -*-


def binary_search(arr, search):

    L = 0
    R = len(arr)

    while L <= R:

        if L > R:
            return -1  # Error code 1

        m = (L + R) / 2
        if arr[m] < search:
            L = m + 1
        elif arr[m] > search:
            R = m - 1
        else:
            return m

    return -2  # Error code 2


================================================
FILE: golang/uPIMulator/benchmark/BS/baselines/gpu/cu_lib_import.py
================================================
# -*- coding: utf-8 -*-

__all__ = [
    "binary_search",
]


import os.path as path
import platform
from ctypes import *

from numpy.ctypeslib import load_library, ndpointer

## Load the DLL
if platform.system() == "Linux":
    cuda_lib = load_library("cu_binary_search.so", path.dirname(path.realpath(__file__)))
elif platform.system() == "Windows":
    cuda_lib = load_library("cu_binary_search.dll", path.dirname(path.realpath(__file__)))


## Define argtypes for all functions to import
argtype_defs = {
    "binary_search": [ndpointer("i8"), c_int, ndpointer("i8"), c_int],
}


## Import functions from DLL
for func, argtypes in argtype_defs.items():
    locals().update({func: cuda_lib[func]})
    locals()[func].argtypes = argtypes


================================================
FILE: golang/uPIMulator/benchmark/BS/baselines/gpu/run.py
================================================
# -*- coding: utf-8 -*-

import time

import numpy as np

# Local Imports
from cu_lib_import import binary_search as gpu_search

# Set an array label to create
arr_len = 2048576
num_querys = 16777216

# Dummy array created
arr = np.arange(0, arr_len, 1).astype("i8")

# Random search querys created
querys = np.random.randint(1, arr_len, num_querys)

# GPU search function call
t0 = time.time()
res_gpu = gpu_search(arr, len(arr), querys, len(querys))
print("Total GPU Time: %i ms" % ((time.time() - t0) * 1e003))


================================================
FILE: golang/uPIMulator/benchmark/BS/dpu/CMakeLists.txt
================================================
set(CMAKE_C_COMPILER "/root/upmem-2023.2.0-Linux-x86_64/bin/dpu-upmem-dpurte-clang")
set(CMAKE_C_FLAGS "-w -I/root/uPIMulator/benchmark/BS/support -O2 -S -DNR_TASKLETS=${NR_TASKLETS}")

file(GLOB_RECURSE SRCS *.c)

add_executable(BS_device ${SRCS})



================================================
FILE: golang/uPIMulator/benchmark/BS/dpu/task.c
================================================
/*
* Binary Search with multiple tasklets
*
*/
#include <stdint.h>
#include <stdio.h>
#include <defs.h>
#include <mram.h>
#include <alloc.h>
#include <mram.h>
#include <barrier.h>
#include <perfcounter.h>
#include "common.h"

__host dpu_arguments_t DPU_INPUT_ARGUMENTS;
__host dpu_results_t DPU_RESULTS[NR_TASKLETS];

// Search
DTYPE __attribute__ ((noinline)) search(DTYPE *bufferA, DTYPE searching_for) {
  DTYPE found = -2;
  if(bufferA[0] <= searching_for)
  {
    found = -1;
    for (uint32_t i = 0; i < BLOCK_SIZE / sizeof(DTYPE); i++){
      if(bufferA[i] == searching_for)
      {
        found = i;
        break;
      }
    }
  }
  return found;
}

BARRIER_INIT(my_barrier, NR_TASKLETS);

extern int main_kernel1(void);

int(*kernels[nr_kernels])(void) = {main_kernel1};

int main(void){
  // Kernel
  return kernels[DPU_INPUT_ARGUMENTS.kernel]();
}

// main_kernel1
int main_kernel1() {
  unsigned int tasklet_id = me();
  #if PRINT
  printf("tasklet_id = %u\n", tasklet_id);
  #endif
  if(tasklet_id == 0){
    mem_reset(); // Reset the heap
  }
  // Barrier
  barrier_wait(&my_barrier);

  DTYPE searching_for, found;
  uint64_t input_size = DPU_INPUT_ARGUMENTS.input_size;

  // Address of the current processing block in MRAM
  uint32_t start_mram_block_addr_A       = (uint32_t) DPU_MRAM_HEAP_POINTER;
  uint32_t start_mram_block_addr_aux     = start_mram_block_addr_A;
  uint32_t end_mram_block_addr_A         = start_mram_block_addr_A + sizeof(DTYPE) * input_size;
  uint32_t current_mram_block_addr_query = end_mram_block_addr_A + tasklet_id * (DPU_INPUT_ARGUMENTS.slice_per_dpu / NR_TASKLETS) * sizeof(DTYPE);

  // Initialize a local cache to store the MRAM block
  DTYPE *cache_A     = (DTYPE *) mem_alloc(BLOCK_SIZE);
  DTYPE *cache_aux_A = (DTYPE *) mem_alloc(BLOCK_SIZE);
  DTYPE *cache_aux_B = (DTYPE *) mem_alloc(BLOCK_SIZE);

  dpu_results_t *result = &DPU_RESULTS[tasklet_id];

  // TODO(bongjoon.hyun@gmail.com): original PrIM benchmark uses uint64_t for targets' type
  for(uint32_t targets = 0; targets < (DPU_INPUT_ARGUMENTS.slice_per_dpu / NR_TASKLETS); targets++)
  {
    found = -1;

    mram_read((__mram_ptr void const *) current_mram_block_addr_query, &searching_for, 8);
    current_mram_block_addr_query += 8;

    bool end = false;

    // Initialize input vector boundaries
    start_mram_block_addr_A    = (uint32_t) DPU_MRAM_HEAP_POINTER;
    start_mram_block_addr_aux  = start_mram_block_addr_A;
    end_mram_block_addr_A      = start_mram_block_addr_A + sizeof(DTYPE) * input_size;

    uint32_t current_mram_block_addr_A = start_mram_block_addr_A;

    // Bring first and last values to WRAM
    mram_read((__mram_ptr void const *) current_mram_block_addr_A, cache_aux_A, BLOCK_SIZE);
    mram_read((__mram_ptr void const *) (end_mram_block_addr_A - BLOCK_SIZE * sizeof(DTYPE)),   cache_aux_B, BLOCK_SIZE);

    current_mram_block_addr_A = (start_mram_block_addr_A + end_mram_block_addr_A) / 2;
    while(!end)
    {
      // Load cache with current MRAM block
      mram_read((__mram_ptr void const *) current_mram_block_addr_A, cache_A, BLOCK_SIZE);

      // Search inside block
      found = search(cache_A, searching_for);

      // If found > -1, we found the searching_for query
      if(found > -1)
      {
        result->found = found + (current_mram_block_addr_A - start_mram_block_addr_aux) / sizeof(DTYPE);
        break;
      }

      // If found == -2, we need to discard right part of the input vector
      if(found == -2)
      {
        end_mram_block_addr_A     = current_mram_block_addr_A;
        current_mram_block_addr_A = (current_mram_block_addr_A + start_mram_block_addr_A) / 2;
      }

      // If found == -1, we need to discard left part of the input vector
      else if (found == -1)
      {
        start_mram_block_addr_A   = current_mram_block_addr_A;
        current_mram_block_addr_A = (current_mram_block_addr_A + end_mram_block_addr_A) / 2;
      }

      // Start boundary check
      if(current_mram_block_addr_A < (start_mram_block_addr_aux + BLOCK_SIZE))
      {
        end = true;
        mram_read((__mram_ptr void const *) current_mram_block_addr_A, cache_A, BLOCK_SIZE);
        found = search(cache_A, searching_for);

        if(found > -1)
        {
          end = true;
          result->found = found + (current_mram_block_addr_A - start_mram_block_addr_aux) / sizeof(DTYPE);
        }
      }

      // End boundary check
      if(current_mram_block_addr_A > (end_mram_block_addr_A - BLOCK_SIZE))
      {
        end = true;
        mram_read((__mram_ptr void const *) end_mram_block_addr_A - BLOCK_SIZE, cache_A, BLOCK_SIZE);
        found = search(cache_A, searching_for);

        if(found > -1)
        {
          result->found = found + (current_mram_block_addr_A - start_mram_block_addr_aux) / sizeof(DTYPE);
        }
      }
    }
  }
  return 0;
}


================================================
FILE: golang/uPIMulator/benchmark/BS/host/app.c
================================================
/**
* app.c
* BS Host Application Source File
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include <dpu.h>
#include <dpu_log.h>
#include <unistd.h>
#include <getopt.h>
#include <assert.h>
#include <time.h>

#if ENERGY
#include <dpu_probe.h>
#endif

#include "params.h"
#include "timer.h"

// Define the DPU Binary path as DPU_BINARY here
#define DPU_BINARY "./bin/bs_dpu"

// Create input arrays
void create_test_file(DTYPE * input, DTYPE * querys, uint64_t  nr_elements, uint64_t nr_querys) {

	input[0] = 1;
	for (uint64_t i = 1; i < nr_elements; i++) {
		input[i] = input[i - 1] + 1;
	}
	for (uint64_t i = 0; i < nr_querys; i++) {
		querys[i] = i;
	}
}

// Compute output in the host
int64_t binarySearch(DTYPE * input, DTYPE * querys, DTYPE input_size, uint64_t num_querys)
{
	uint64_t result = -1;
	DTYPE r;
	for(uint64_t q = 0; q < num_querys; q++)
	{
		DTYPE l = 0;
		r = input_size;
		while (l <= r) {
			DTYPE m = l + (r - l) / 2;

			// Check if x is present at mid
			if (input[m] == querys[q])
			result = m;

			// If x greater, ignore left half
			if (input[m] < querys[q])
			l = m + 1;

			// If x is smaller, ignore right half
			else
			r = m - 1;
		}
	}
	return result;
}


// Main of the Host Application
int main(int argc, char **argv) {

	struct Params p = input_params(argc, argv);
	struct dpu_set_t dpu_set, dpu;
	uint32_t nr_of_dpus;
	uint64_t input_size = INPUT_SIZE;
	uint64_t num_querys = p.num_querys;
	DTYPE result_host = -1;
	DTYPE result_dpu  = -1;

	// Create the timer
	Timer timer;

	// Allocate DPUs and load binary
	DPU_ASSERT(dpu_alloc(NR_DPUS, NULL, &dpu_set));
	DPU_ASSERT(dpu_load(dpu_set, DPU_BINARY, NULL));
	DPU_ASSERT(dpu_get_nr_dpus(dpu_set, &nr_of_dpus));

	#if ENERGY
	struct dpu_probe_t probe;
	DPU_ASSERT(dpu_probe_init("energy_probe", &probe));
	#endif

	// Query number adjustement for proper partitioning
	if(num_querys % (nr_of_dpus * NR_TASKLETS))
	num_querys = num_querys + (nr_of_dpus * NR_TASKLETS - num_querys % (nr_of_dpus * NR_TASKLETS));

	assert(num_querys % (nr_of_dpus * NR_TASKLETS) == 0 && "Input dimension");    // Allocate input and querys vectors

	DTYPE * input  = malloc((input_size) * sizeof(DTYPE));
	DTYPE * querys = malloc((num_querys) * sizeof(DTYPE));

	// Create an input file with arbitrary data
	create_test_file(input, querys, input_size, num_querys);

	// Compute host solution
	start(&timer, 0, 0);
	result_host = binarySearch(input, querys, input_size - 1, num_querys);
	stop(&timer, 0);

	// Create kernel arguments
	uint64_t slice_per_dpu          = num_querys / nr_of_dpus;
	dpu_arguments_t input_arguments = {input_size, slice_per_dpu, 0};

	for (unsigned int rep = 0; rep < p.n_warmup + p.n_reps; rep++) {
		// Perform input transfers
		uint64_t i = 0;

		if (rep >= p.n_warmup)
		start(&timer, 1, rep - p.n_warmup);

		DPU_FOREACH(dpu_set, dpu, i)
		{
			DPU_ASSERT(dpu_prepare_xfer(dpu, &input_arguments));
		}

		DPU_ASSERT(dpu_push_xfer(dpu_set, DPU_XFER_TO_DPU, "DPU_INPUT_ARGUMENTS", 0, sizeof(input_arguments), DPU_XFER_DEFAULT));

		i = 0;

		DPU_FOREACH(dpu_set, dpu, i)
		{
			DPU_ASSERT(dpu_prepare_xfer(dpu, input));
		}

		DPU_ASSERT(dpu_push_xfer(dpu_set, DPU_XFER_TO_DPU, DPU_MRAM_HEAP_POINTER_NAME, 0, input_size * sizeof(DTYPE), DPU_XFER_DEFAULT));

		i = 0;

		DPU_FOREACH(dpu_set, dpu, i)
		{
			DPU_ASSERT(dpu_prepare_xfer(dpu, querys + slice_per_dpu * i));
		}

		DPU_ASSERT(dpu_push_xfer(dpu_set, DPU_XFER_TO_DPU, DPU_MRAM_HEAP_POINTER_NAME, input_size * sizeof(DTYPE), slice_per_dpu * sizeof(DTYPE), DPU_XFER_DEFAULT));

		if (rep >= p.n_warmup)
		stop(&timer, 1);

		// Run kernel on DPUs
		if (rep >= p.n_warmup)
		{
			start(&timer, 2, rep - p.n_warmup);
			#if ENERGY
			DPU_ASSERT(dpu_probe_start(&probe));
			#endif
		}

		DPU_ASSERT(dpu_launch(dpu_set, DPU_SYNCHRONOUS));

		if (rep >= p.n_warmup)
		{
			stop(&timer, 2);
			#if ENERGY
			DPU_ASSERT(dpu_probe_stop(&probe));
			#endif
		}
		// Print logs if required
		#if PRINT
		unsigned int each_dpu = 0;
		printf("Display DPU Logs\n");
		DPU_FOREACH(dpu_set, dpu)
		{
			printf("DPU#%d:\n", each_dpu);
			DPU_ASSERT(dpulog_read_for_dpu(dpu.dpu, stdout));
			each_dpu++;
		}
		#endif

		// Retrieve results
		if (rep >= p.n_warmup)
		start(&timer, 3, rep - p.n_warmup);
		dpu_results_t* results_retrieve[nr_of_dpus];
		i = 0;
		DPU_FOREACH(dpu_set, dpu, i)
		{
			results_retrieve[i] = (dpu_results_t*)malloc(NR_TASKLETS * sizeof(dpu_results_t));
			DPU_ASSERT(dpu_prepare_xfer(dpu, results_retrieve[i]));
		}

		DPU_ASSERT(dpu_push_xfer(dpu_set, DPU_XFER_FROM_DPU, "DPU_RESULTS", 0, NR_TASKLETS * sizeof(dpu_results_t), DPU_XFER_DEFAULT));

		DPU_FOREACH(dpu_set, dpu, i)
		{
			for(unsigned int each_tasklet = 0; each_tasklet < NR_TASKLETS; each_tasklet++)
			{
				if(results_retrieve[i][each_tasklet].found > result_dpu)
				{
					result_dpu = results_retrieve[i][each_tasklet].found;
				}
			}
			free(results_retrieve[i]);
		}
		if(rep >= p.n_warmup)
		stop(&timer, 3);
	}
	// Print timing results
	printf("CPU Version Time (ms): ");
	print(&timer, 0, p.n_reps);
	printf("CPU-DPU Time (ms): ");
	print(&timer, 1, p.n_reps);
	printf("DPU Kernel Time (ms): ");
	print(&timer, 2, p.n_reps);
	printf("DPU-CPU Time (ms): ");
	print(&timer, 3, p.n_reps);

	#if ENERGY
	double energy;
	DPU_ASSERT(dpu_probe_get(&probe, DPU_ENERGY, DPU_AVERAGE, &energy));
	printf("DPU Energy (J): %f\t", energy * num_iterations);
	#endif

	int status = (result_dpu == result_host);
	if (status) {
		printf("[" ANSI_COLOR_GREEN "OK" ANSI_COLOR_RESET "] results are equal\n");
	} else {
		printf("[" ANSI_COLOR_RED "ERROR" ANSI_COLOR_RESET "] results differ!\n");
	}

	free(input);
	DPU_ASSERT(dpu_free(dpu_set));

	return status ? 0 : 1;
}


================================================
FILE: golang/uPIMulator/benchmark/BS/support/common.h
================================================
#ifndef _COMMON_H_
#define _COMMON_H_

#ifdef TL
#define TASKLETS_INITIALIZER TASKLETS(TL, main, 2048, 2)
#define NB_OF_TASKLETS_PER_DPU TL
#else
#define TASKLETS_INITIALIZER TASKLETS(16, main, 2048, 2)
#define NB_OF_TASKLETS_PER_DPU 16
#endif

// Transfer size between MRAM and WRAM
#ifdef BL
#define BLOCK_SIZE_LOG2 BL
#define BLOCK_SIZE (1 << BLOCK_SIZE_LOG2)
#else
#define BLOCK_SIZE_LOG2 8
#define BLOCK_SIZE (1 << BLOCK_SIZE_LOG2)
#endif

// Data type
#define DTYPE int64_t

// Vector size
#define INPUT_SIZE 2048576

typedef struct {
	uint64_t input_size;
	uint64_t slice_per_dpu;
	enum kernels {
		kernel1 = 0,
		nr_kernels = 1,
	} kernel;
} dpu_arguments_t;

// Structures used by both the host and the dpu to communicate information
typedef struct {
    DTYPE found;
} dpu_results_t;

#ifndef ENERGY
#define ENERGY 0
#endif
#define PRINT 0

#define ANSI_COLOR_RED     "\x1b[31m"
#define ANSI_COLOR_GREEN   "\x1b[32m"
#define ANSI_COLOR_RESET   "\x1b[0m"
#endif


================================================
FILE: golang/uPIMulator/benchmark/BS/support/params.h
================================================
#ifndef _PARAMS_H_
#define _PARAMS_H_

#include "common.h"

typedef struct Params {
  long  num_querys;
  unsigned   n_warmup;
  unsigned   n_reps;
}Params;

void usage() {
  fprintf(stderr,
    "\nUsage:  ./program [options]"
    "\n"
    "\nGeneral options:"
    "\n    -h        help"
    "\n    -w <W>    # of untimed warmup iterations (default=1)"
    "\n    -e <E>    # of timed repetition iterations (default=3)"
    "\n"
    "\nBenchmark-specific options:"
    "\n    -i <I>    problem size (default=2 queries)"
    "\n");
  }

  struct Params input_params(int argc, char **argv) {
    struct Params p;
    p.num_querys    = PROBLEM_SIZE;
    p.n_warmup      = 1;
    p.n_reps        = 3;

    int opt;
    while((opt = getopt(argc, argv, "h:i:w:e:")) >= 0) {
      switch(opt) {
        case 'h':
        usage();
        exit(0);
        break;
        case 'i': p.num_querys    = atol(optarg); break;
        case 'w': p.n_warmup      = atoi(optarg); break;
        case 'e': p.n_reps        = atoi(optarg); break; 
	default:
        	fprintf(stderr, "\nUnrecognized option!\n");
        	usage();
        	exit(0);
      }
    }
    assert(NR_DPUS > 0 && "Invalid # of dpus!");

    return p;
  }
  #endif


================================================
FILE: golang/uPIMulator/benchmark/BS/support/timer.h
================================================
/*
 * Copyright (c) 2016 University of Cordoba and University of Illinois
 * All rights reserved.
 *
 * Developed by:    IMPACT Research Group
 *                  University of Cordoba and University of Illinois
 *                  http://impact.crhc.illinois.edu/
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * with the Software without restriction, including without limitation the 
 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 * sell copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 *      > Redistributions of source code must retain the above copyright notice,
 *        this list of conditions and the following disclaimers.
 *      > Redistributions in binary form must reproduce the above copyright
 *        notice, this list of conditions and the following disclaimers in the
 *        documentation and/or other materials provided with the distribution.
 *      > Neither the names of IMPACT Research Group, University of Cordoba, 
 *        University of Illinois nor the names of its contributors may be used 
 *        to endorse or promote products derived from this Software without 
 *        specific prior written permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
 * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
 * THE SOFTWARE.
 *
 */

#include <sys/time.h>

typedef struct Timer{

    struct timeval startTime[4];
    struct timeval stopTime[4];
    double         time[4];

}Timer;

void start(Timer *timer, int i, int rep) {
    if(rep == 0) {
        timer->time[i] = 0.0;
    }
    gettimeofday(&timer->startTime[i], NULL);
}

void stop(Timer *timer, int i) {
    gettimeofday(&timer->stopTime[i], NULL);
    timer->time[i] += (timer->stopTime[i].tv_sec - timer->startTime[i].tv_sec) * 1000000.0 +
                      (timer->stopTime[i].tv_usec - timer->startTime[i].tv_usec);
}

void print(Timer *timer, int i, int REP) { printf("%f\t", timer->time[i] / (1000 * REP)); }


================================================
FILE: golang/uPIMulator/benchmark/CMakeLists.txt
================================================
cmake_minimum_required(VERSION 3.16)

project(benchmark)

add_subdirectory(BS)
add_subdirectory(GEMV)
add_subdirectory(HST-L)
add_subdirectory(HST-S)
add_subdirectory(MLP)
add_subdirectory(RED)
add_subdirectory(SCAN-RSS)
add_subdirectory(SCAN-SSA)
add_subdirectory(SEL)
add_subdirectory(TRNS)
add_subdirectory(TS)
add_subdirectory(UNI)
add_subdirectory(VA)


================================================
FILE: golang/uPIMulator/benchmark/GEMV/CMakeLists.txt
================================================
#add_subdirectory(host)
add_subdirectory(dpu)

================================================
FILE: golang/uPIMulator/benchmark/GEMV/Makefile
================================================
DPU_DIR := dpu
HOST_DIR := host
BUILDDIR ?= bin
NR_TASKLETS ?= 16 
BL ?= 10
NR_DPUS ?= 1 

define conf_filename
	${BUILDDIR}/.NR_DPUS_$(1)_NR_TASKLETS_$(2)_BL_$(3).conf
endef
CONF := $(call conf_filename,${NR_DPUS},${NR_TASKLETS},${BL})

HOST_TARGET := ${BUILDDIR}/host_code
DPU_TARGET := ${BUILDDIR}/dpu_code

COMMON_INCLUDES := support
HOST_SOURCES := $(wildcard ${HOST_DIR}/*.c)
DPU_SOURCES := $(wildcard ${DPU_DIR}/*.c)

.PHONY: all clean test

__dirs := $(shell mkdir -p ${BUILDDIR})

COMMON_FLAGS := -w -I${COMMON_INCLUDES}
HOST_FLAGS := ${COMMON_FLAGS} -std=c11 -O3 `dpu-pkg-config --cflags --libs dpu` -DNR_TASKLETS=${NR_TASKLETS} -DNR_DPUS=${NR_DPUS} -DBL=${BL}
DPU_FLAGS := ${COMMON_FLAGS} -O2 -DNR_TASKLETS=${NR_TASKLETS} -DBL=${BL}

all: ${HOST_TARGET} ${DPU_TARGET}

${CONF}:
	$(RM) $(call conf_filename,*,*)
	touch ${CONF}

${HOST_TARGET}: ${HOST_SOURCES} ${COMMON_INCLUDES} ${CONF}
	$(CC) -o $@ ${HOST_SOURCES} ${HOST_FLAGS}
	$(CC) -S -o ${HOST_TARGET}.S ${HOST_SOURCES} ${HOST_FLAGS}

${DPU_TARGET}: ${DPU_SOURCES} ${COMMON_INCLUDES} ${CONF}
	dpu-upmem-dpurte-clang ${DPU_FLAGS} -o $@ ${DPU_SOURCES}
	dpu-upmem-dpurte-clang -S ${DPU_FLAGS} -o ${DPU_TARGET}.S ${DPU_SOURCES}

clean:
	$(RM) -r $(BUILDDIR)

test: all
	./${HOST_TARGET} -m 1024 -n 1024


================================================
FILE: golang/uPIMulator/benchmark/GEMV/_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_10_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_11_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_12_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_13_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_14_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_15_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_16
================================================


================================================
FILE: golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_16_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_17_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_18_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_19_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_1_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_20_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_21_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_22_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_23_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_24_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_2_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_3_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_4_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_5_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_6_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_7_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_8_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_9_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/GEMV/baselines/cpu/Makefile
================================================
all:
		gcc -o gemv -fopenmp gemv_openmp.c 

clean:
		rm gemv




================================================
FILE: golang/uPIMulator/benchmark/GEMV/baselines/cpu/README
================================================
Matrix-Vector Multiplication (GEMV)

Compilation instructions:

    make

Execution instructions

    ./gemv


================================================
FILE: golang/uPIMulator/benchmark/GEMV/baselines/cpu/gemv_openmp.c
================================================
#include <stdlib.h>
#include <stdio.h>
#include "../../support/timer.h"
#include "gemv_utils.h"

int main(int argc, char *argv[])
{
  const size_t rows = 20480;
  const size_t cols = 8192;

  double **A, *b, *x;

  b = (double*) malloc(sizeof(double)*rows);
  x = (double*) malloc(sizeof(double)*cols);

  allocate_dense(rows, cols, &A);

  make_hilbert_mat(rows,cols, &A);

#pragma omp parallel
    {
#pragma omp for
    for (size_t i = 0; i < cols; i++) {
      x[i] = (double) i+1 ;
    }

#pragma omp for
    for (size_t i = 0; i < rows; i++) {
      b[i] = (double) 0.0;
    }
    }

  Timer timer;
  start(&timer, 0, 0);


   gemv(A, x, rows, cols, &b);
   
   stop(&timer, 0);


    printf("Kernel ");
    print(&timer, 0, 1);
    printf("\n");

#if 0
  print_vec(x, rows);
  print_mat(A, rows, cols);
  print_vec(b, rows);
#endif

  printf("sum(x) = %f, sum(Ax) = %f\n", sum_vec(x,cols), sum_vec(b,rows));
  return 0;
}

void gemv(double** A, double* x, size_t rows, size_t cols, double** b) {
#pragma omp parallel for
  for (size_t i = 0; i < rows; i ++ )
  for (size_t j = 0; j < cols; j ++ ) {
    (*b)[i] = (*b)[i] + A[i][j]*x[j];
  }
}

void make_hilbert_mat(size_t rows, size_t cols, double*** A) {
#pragma omp parallel for
  for (size_t i = 0; i < rows; i++) {
    for (size_t j = 0; j < cols; j++) {
      (*A)[i][j] = 1.0/( (double) i + (double) j + 1.0);
    }
  }
}

double sum_vec(double* vec, size_t rows) {
  double sum = 0.0;
#pragma omp parallel for reduction(+:sum)
  for (int i = 0; i < rows; i++) sum = sum + vec[i];
  return sum;
}


================================================
FILE: golang/uPIMulator/benchmark/GEMV/baselines/cpu/gemv_utils.h
================================================
void allocate_dense(size_t rows,size_t  cols, double*** dense) {

  *dense = malloc(sizeof(double)*rows);
  **dense = malloc(sizeof(double)*rows*cols);

  for (size_t i=0; i < rows; i++ ) {
    (*dense)[i] = (*dense)[0] + i*cols;
  }

}

void print_mat(double** A, size_t rows, size_t cols) {
  for (size_t i = 0; i < rows; i++) {
    for (size_t j = 0; j < cols; j++) {
      printf("%f ", A[i][j]);
    }
    printf("\n");
  }
}

void print_vec(double* b, size_t rows) {
  for (size_t i = 0; i < rows; i++) {
    printf("%f\n", b[i]);
  }
}

void gemv(double** A, double* x, size_t rows, size_t cols, double** b);
void make_hilbert_mat(size_t rows, size_t cols, double*** A);
double sum_vec(double* vec, size_t rows);


================================================
FILE: golang/uPIMulator/benchmark/GEMV/baselines/gpu/Makefile
================================================
all:
	/usr/local/cuda/bin/nvcc gemv.cu -I/usr/local/cuda/include -lm -o gemv

clean:
	rm gemv


================================================
FILE: golang/uPIMulator/benchmark/GEMV/baselines/gpu/README
================================================
Matrix-Vector Multiplication (GEMV)

Compilation instructions:

    make

Execution instructions

    ./gemv


================================================
FILE: golang/uPIMulator/benchmark/GEMV/baselines/gpu/gemv.cu
================================================
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <cuda.h>

#define THREAD 128

#define T int

__global__ void gemv(int m, int n, T *adim, T *b, T *d_ans);

void cgemv(int m, int n, T *adim, T *b, T *d_ans);

double gettime()
{
struct timeval tv;
gettimeofday(&tv, NULL);
return tv.tv_sec + (double)tv.tv_usec*1.0e-6;
}

int main(int argc, char **argv)
{
/* for CPU */
int i, j;
int *bdim, *c, *ans, *h_ans;
//double start, stop;
//double cpu_time, gpu_time;
int n = 8192;
int m = 20480;

bdim = (T*)malloc(sizeof(T) *m*n);
c = (T*)malloc(sizeof(T) *n);
ans = (T*)malloc(sizeof(T) *m);
h_ans = (T*)malloc(sizeof(T) *m);

/* for GPU */
T *d_bdim, *d_c, *d_ans;
cudaMalloc((void **)&d_bdim, sizeof(T)*m*n);
cudaMalloc((void **)&d_c, sizeof(T)*n);
cudaMalloc((void **)&d_ans, sizeof(T)*m);

for(i = 0; i < n; i++)
{
c[i] = 1;
for(j = 0; j < m; j++)
bdim[i*m+j] = 1;
}

//start = gettime();
cgemv(m, n, bdim, c, ans);
//stop = gettime();
//cpu_time=stop - start;

// Event creation
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
float time1 = 0;


cudaMemcpy(d_bdim, bdim, sizeof(T)*m*n, cudaMemcpyHostToDevice);
cudaMemcpy(d_c, c, sizeof(T)*n, cudaMemcpyHostToDevice);

// Start timer
cudaEventRecord( start, 0 );
//start = gettime();
gemv<<<m, THREAD>>>(m, n, d_bdim, d_c, d_ans);
//stop = gettime();
// End timer
cudaEventRecord( stop, 0 );
cudaEventSynchronize( stop );
cudaEventElapsedTime( &time1, start, stop );

//gpu_time=stop - start;

cudaMemcpy(h_ans, d_ans, sizeof(T)*m, cudaMemcpyDeviceToHost);

//printf("cpu_time : %.6f[sec]\n",cpu_time);
//printf("gpu_time : %.6f[sec]\n",gpu_time);
//printf("%f x\n", cpu_time / gpu_time);


for(i = 0; i < m; i++)
printf("%d -- %d\n", ans[i], h_ans[i]);

printf("Execution time = %f ms\n", time1);


free(bdim);
free(c);
free(ans);
free(h_ans);
cudaFree(d_bdim);
cudaFree(d_c);
cudaFree(d_ans);

return 0;
} 

__global__ void gemv(int m, int n, T* adim, T* b, T* d_ans)
{
int i;
int div = n/THREAD;
__shared__ T tmp[THREAD];

tmp[threadIdx.x] = 0.0;

for(i = 0; i < div; i++)
{
tmp[threadIdx.x] += adim[blockIdx.x*n+i*THREAD+threadIdx.x] * b[i * THREAD + threadIdx.x];
}
if(threadIdx.x < m%THREAD)
tmp[threadIdx.x] += adim[blockIdx.x*n+THREAD*div+threadIdx.x] * b[THREAD * div + threadIdx.x];

__syncthreads();

for(i = THREAD / 2; i > 31; i = i / 2)
{
if(threadIdx.x < i)
tmp[threadIdx.x] += tmp[threadIdx.x + i];
__syncthreads();
}

if(threadIdx.x < 16)
{
tmp[threadIdx.x] += tmp[threadIdx.x + 16];
__syncthreads();
tmp[threadIdx.x] += tmp[threadIdx.x + 8];
__syncthreads();
tmp[threadIdx.x] += tmp[threadIdx.x + 4];
__syncthreads();
tmp[threadIdx.x] += tmp[threadIdx.x + 2];
__syncthreads();
tmp[threadIdx.x] += tmp[threadIdx.x + 1];
__syncthreads();
}


if(threadIdx.x == 0)
d_ans[blockIdx.x] = tmp[0];

}

void cgemv(int m, int n, T *adim, T *b, T *d_ans)
{
int i, j;

for(i = 0; i < m; i++)
for(j = 0; j < n; j++)
d_ans[i] += adim[i*n+j] * b[j];

}


================================================
FILE: golang/uPIMulator/benchmark/GEMV/dpu/CMakeLists.txt
================================================
SET(BL 10)

set(CMAKE_C_COMPILER "/root/upmem-2023.2.0-Linux-x86_64/bin/dpu-upmem-dpurte-clang")
set(CMAKE_C_FLAGS "-w -I/root/uPIMulator/benchmark/GEMV/support -O2 -S -DNR_TASKLETS=${NR_TASKLETS} -DBL=${BL}")

file(GLOB_RECURSE SRCS *.c)

add_executable(GEMV_device ${SRCS})



================================================
FILE: golang/uPIMulator/benchmark/GEMV/dpu/task.c
================================================
/*
 * Matrix vector multiplication with multiple tasklet
 *
 */
#include <stdint.h>
#include <stdio.h>
#include <defs.h>
#include <mram.h>
#include <alloc.h>
#include <barrier.h>
#include <seqread.h>

#include "../support/common.h"

__host dpu_arguments_t DPU_INPUT_ARGUMENTS;

// GEMV
void __attribute__ ((noinline)) gemv(T *bufferC, T *bufferA, T *bufferB, int pos) {
	for (unsigned int i = 0; i < BLOCK_SIZE / sizeof(T); i++) {
		bufferC[pos] += bufferA[i] * bufferB[i];
	}
	return;
}

// Barrier
BARRIER_INIT(my_barrier, NR_TASKLETS);

// main
int main() {
	unsigned int tasklet_id = me();
#if PRINT
	printf("tasklet_id = %u\n", tasklet_id);
#endif
	if (tasklet_id == 0){ // Initialize once the cycle counter
		mem_reset(); // Reset the heap
	}
	// Barrier
	barrier_wait(&my_barrier);

	int32_t n_size = DPU_INPUT_ARGUMENTS.n_size;
	int32_t n_size_pad = DPU_INPUT_ARGUMENTS.n_size_pad;
	uint32_t nr_rows = DPU_INPUT_ARGUMENTS.nr_rows;
	uint32_t max_rows = DPU_INPUT_ARGUMENTS.max_rows;


	unsigned int nrows = nr_rows;
	unsigned int rows_per_tasklet; 
	unsigned int start_row;
	unsigned int chunks = nrows / (NR_TASKLETS + NR_TASKLETS);
	unsigned int dbl_chunks = chunks + chunks;                                                                       
	rows_per_tasklet = dbl_chunks;
	unsigned int rest_rows = nrows % (NR_TASKLETS + NR_TASKLETS);

	if ((tasklet_id + tasklet_id) < rest_rows)
		rows_per_tasklet += 2;
	if (rest_rows > 0) {
		if ((tasklet_id + tasklet_id) >= rest_rows) {
			unsigned int hlf_rest_rows = rest_rows >> 1;
			if ((rest_rows & 1) == 1)
				start_row = (hlf_rest_rows + 1) * (dbl_chunks + 2) + (tasklet_id - 1 - hlf_rest_rows) * dbl_chunks;
			else
				start_row = (hlf_rest_rows) * (dbl_chunks + 2) + (tasklet_id - hlf_rest_rows) * dbl_chunks;
		} else 
			start_row = tasklet_id * (dbl_chunks + 2);
	} else {
		start_row = tasklet_id * (dbl_chunks);
	}

	// Address of the current row in MRAM
	uint32_t mram_base_addr_A = (uint32_t) (DPU_MRAM_HEAP_POINTER + start_row * n_size * sizeof(T));
	uint32_t mram_base_addr_B = (uint32_t) (DPU_MRAM_HEAP_POINTER + max_rows * n_size_pad * sizeof(T));
	uint32_t mram_base_addr_C = (uint32_t) (DPU_MRAM_HEAP_POINTER + max_rows * n_size_pad * sizeof(T) + n_size_pad * sizeof(T) + start_row * sizeof(T));
	uint32_t mram_temp_addr_A = mram_base_addr_A;
	uint32_t mram_temp_addr_B = mram_base_addr_B;

	// Inititalize a local cache to store the MRAM block
	T *cache_A = (T *) mem_alloc(BLOCK_SIZE + 8);
	T *cache_A_aux = (T *) mem_alloc(8);
	T *cache_B = (T *) mem_alloc(BLOCK_SIZE);
	T *cache_C = (T *) mem_alloc(8);

	int offset = 0;

	// Iterate over nr_rows
	for (unsigned int i = start_row; i < start_row + rows_per_tasklet; i += 2) {

		mram_temp_addr_A = (uint32_t) (DPU_MRAM_HEAP_POINTER + i * n_size * sizeof(T));
		mram_temp_addr_B = mram_base_addr_B;

		cache_C[0] = 0;
		cache_C[1] = 0;
		for(unsigned int pos = 0; pos < 2 && i + pos < nr_rows; pos++){
			int n = 0, j;
			for (n = 0; n < (int32_t) (n_size - (BLOCK_SIZE/sizeof(T))); n += (BLOCK_SIZE / sizeof(T)))
			{

				mram_read((__mram_ptr void const*) (mram_temp_addr_A), cache_A, BLOCK_SIZE);
				mram_read((__mram_ptr void const*) (mram_temp_addr_B), cache_B, BLOCK_SIZE);

				if(offset)
				{

					for(unsigned int off = 0; off < (BLOCK_SIZE / sizeof(T)) - 1; off++)
					{
						cache_A[off] = cache_A[off + 1];
					}

					mram_read((__mram_ptr void const*) (mram_temp_addr_A + BLOCK_SIZE), cache_A_aux, 8);

					cache_A[BLOCK_SIZE / sizeof(T) - 1] = cache_A_aux[0];
				}

				// Compute GEMV
				gemv(cache_C, cache_A, cache_B, pos);

				// Update memory addresses
				mram_temp_addr_A += BLOCK_SIZE;
				mram_temp_addr_B += BLOCK_SIZE;
			}

			mram_read((__mram_ptr void const*) (mram_temp_addr_A), cache_A, BLOCK_SIZE);


			if(offset)
			{
				for(unsigned int off = 0; off < (BLOCK_SIZE / sizeof(T)) -1; off++)
				{

					cache_A[off] = cache_A[off + 1];
				}

				mram_read((__mram_ptr void const*) (mram_temp_addr_A + BLOCK_SIZE ), cache_A_aux, 8);

  			       cache_A[BLOCK_SIZE / sizeof(T) - 1] = cache_A_aux[0];
			}


			mram_read((__mram_ptr void const*) (mram_temp_addr_B), cache_B, BLOCK_SIZE);

			for (j = 0; j < (int) (n_size - n); j++) {
				// Compute GEMV
				if(j >= (int)(BLOCK_SIZE / sizeof(T))){ 
					printf("error\n");
					break;
				}
				cache_C[pos] += cache_A[j] * cache_B[j];
			}


			mram_temp_addr_A += (BLOCK_SIZE - ((BLOCK_SIZE / sizeof(T)) - (n_size - n)) * sizeof(T));
			mram_temp_addr_B = mram_base_addr_B;

			if(mram_temp_addr_A % 8 != 0)
			{
				offset = 1;
			}
			else
			{
				offset = 0;
			}
		}
		// Write cache to current MRAM block
		mram_write(cache_C, (__mram_ptr void *) (mram_base_addr_C), 8);

		// Update memory address
		mram_base_addr_C += 2 * sizeof(T);

	}

	return 0;
}


================================================
FILE: golang/uPIMulator/benchmark/GEMV/host/app.c
================================================
/**
 * app.c
 * GEMV Host Application Source File
 *
 */

#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include <dpu.h>
#include <dpu_log.h>
#include <unistd.h>
#include <getopt.h>
#include <assert.h>

#if ENERGY
#include <dpu_probe.h>
#endif

#include "../support/common.h"
#include "../support/timer.h"
#include "../support/params.h"

// Define the DPU Binary path as DPU_BINARY here
#ifndef DPU_BINARY
#define DPU_BINARY "./bin/gemv_dpu"
#endif

static T* A;
static T* B;
static T* C;
static T* C_dpu;

// Create input arrays
static void init_data(T* A, T* B, unsigned int m_size, unsigned int n_size) {
	srand(0);

	for (unsigned int i = 0; i < m_size * n_size; i++)
	{
		A[i] = (unsigned int) (rand()%50);
	}

	for (unsigned int i = 0; i < n_size; i++)
	{
		B[i] = (unsigned int) (rand()%50);
	}
}

// Compute output in the host
static void gemv_host(T* C, T* A, T* B, unsigned int m_size, unsigned int n_size) {
	for (unsigned int i = 0; i < m_size; i++)
	{
		C[i] = 0;
	}

	for (unsigned int m = 0; m < m_size; m++) {
		for (unsigned int n = 0; n < n_size; n++)
		{
			C[m] += A[m * n_size + n] * B[n];
		}
	}
}

// Main of the Host Application
int main(int argc, char **argv) {

	struct Params p = input_params(argc, argv);

	struct dpu_set_t dpu_set, dpu;
	uint32_t nr_of_dpus;

	// Allocate DPUs and load binary
	DPU_ASSERT(dpu_alloc(NR_DPUS, NULL, &dpu_set));
	DPU_ASSERT(dpu_load(dpu_set, DPU_BINARY, NULL));
	DPU_ASSERT(dpu_get_nr_dpus(dpu_set, &nr_of_dpus));

#if ENERGY
	struct dpu_probe_t probe;
	DPU_ASSERT(dpu_probe_init("energy_probe", &probe));
#endif

	unsigned int i;
	unsigned int m_size = p.m_size;
	unsigned int n_size = p.n_size;

	// Initialize help data
	dpu_info = (struct dpu_info_t *) malloc(nr_of_dpus * sizeof(struct dpu_info_t));
	dpu_arguments_t *input_args = (dpu_arguments_t *) malloc(nr_of_dpus * sizeof(dpu_arguments_t));
	uint32_t max_rows_per_dpu = 0;
	uint32_t n_size_pad = n_size;
	if(n_size % 2 == 1)
	{
		n_size_pad++;
	}

	i = 0;
	DPU_FOREACH(dpu_set, dpu, i) {
		uint32_t rows_per_dpu;
		uint32_t prev_rows_dpu = 0;
		uint32_t chunks = m_size / nr_of_dpus;
		rows_per_dpu = chunks;
		uint32_t rest_rows = m_size % nr_of_dpus;
		if (i < rest_rows)
			rows_per_dpu++;
		if (rest_rows > 0) {
			if (i >= rest_rows)
				prev_rows_dpu = rest_rows * (chunks + 1) + (i - rest_rows) * chunks;
			else
				prev_rows_dpu = i * (chunks + 1);
		} else {
			prev_rows_dpu = i * chunks;
		}

		// Keep max rows for parallel transfers
		uint32_t rows_per_dpu_pad = rows_per_dpu;
		if (rows_per_dpu_pad % 2 == 1) // 4-byte elements
			rows_per_dpu_pad++;
		if (rows_per_dpu_pad > max_rows_per_dpu)
			max_rows_per_dpu = rows_per_dpu_pad;

		dpu_info[i].rows_per_dpu = rows_per_dpu;
		dpu_info[i].rows_per_dpu_pad = rows_per_dpu_pad;
		dpu_info[i].prev_rows_dpu = prev_rows_dpu;

		// Copy input arguments to DPU
		input_args[i].n_size = n_size;
		input_args[i].n_size_pad = n_size_pad;
		input_args[i].nr_rows = rows_per_dpu;
	}

	A = malloc(max_rows_per_dpu * nr_of_dpus * n_size_pad * sizeof(T));
	B = malloc(n_size_pad * sizeof(T));
	C = malloc(max_rows_per_dpu * nr_of_dpus * sizeof(T));

	// Initialize data with arbitrary data
	init_data(A, B, m_size, n_size);

	// Timer
	Timer timer;

	// Compute output on CPU (performance comparison and verification purposes)
	start(&timer, 0, 0);
	gemv_host(C, A, B, m_size, n_size);
	stop(&timer, 0);
	for (unsigned int rep = 0; rep < p.n_warmup + p.n_reps; rep++) {



		if (rep >= p.n_warmup)
			start(&timer, 1, rep - p.n_warmup);
		// Input arguments
		i = 0;
		DPU_FOREACH(dpu_set, dpu, i) {
			// Copy input arguments to DPU
			input_args[i].max_rows = max_rows_per_dpu;

			DPU_ASSERT(dpu_prepare_xfer(dpu, input_args + i));
		}

		DPU_ASSERT(dpu_push_xfer(dpu_set, DPU_XFER_TO_DPU, "DPU_INPUT_ARGUMENTS", 0, sizeof(dpu_arguments_t), DPU_XFER_DEFAULT));

		// Copy input array and vector
		i = 0;
		DPU_FOREACH(dpu_set, dpu, i) {
			DPU_ASSERT(dpu_prepare_xfer(dpu, A + dpu_info[i].prev_rows_dpu * n_size));
		}
		DPU_ASSERT(dpu_push_xfer(dpu_set, DPU_XFER_TO_DPU, DPU_MRAM_HEAP_POINTER_NAME, 0, max_rows_per_dpu * n_size_pad * sizeof(T), DPU_XFER_DEFAULT));
		DPU_FOREACH(dpu_set, dpu, i) {
			DPU_ASSERT(dpu_prepare_xfer(dpu, B));
		}
		DPU_ASSERT(dpu_push_xfer(dpu_set, DPU_XFER_TO_DPU, DPU_MRAM_HEAP_POINTER_NAME, max_rows_per_dpu * n_size_pad * sizeof(T) , n_size_pad * sizeof(T), DPU_XFER_DEFAULT));

		if (rep >= p.n_warmup)
			stop(&timer, 1);

		// Run kernel on DPUs
		if (rep >= p.n_warmup)
		{
			start(&timer, 2, rep - p.n_warmup);
#if ENERGY
			DPU_ASSERT(dpu_probe_start(&probe));
#endif
		}

		DPU_ASSERT(dpu_launch(dpu_set, DPU_SYNCHRONOUS));

		if (rep >= p.n_warmup)
		{
			stop(&timer, 2);
#if ENERGY
			DPU_ASSERT(dpu_probe_stop(&probe));
#endif
		}
#if PRINT
		// Display DPU Logs
		DPU_FOREACH(dpu_set, dpu) {
			DPU_ASSERT(dpulog_read_for_dpu(dpu.dpu, stdout));
		}
#endif

		// Retrieve results
		C_dpu = malloc(max_rows_per_dpu * nr_of_dpus * sizeof(T));
		if (rep >= p.n_warmup)
			start(&timer, 3, rep - p.n_warmup);
		i = 0;
		DPU_FOREACH(dpu_set, dpu, i) {
			DPU_ASSERT(dpu_prepare_xfer(dpu, C_dpu + i * max_rows_per_dpu));
		}
		DPU_ASSERT(dpu_push_xfer(dpu_set, DPU_XFER_FROM_DPU, DPU_MRAM_HEAP_POINTER_NAME, max_rows_per_dpu * n_size_pad * sizeof(T) + n_size_pad * sizeof(T), max_rows_per_dpu * sizeof(T), DPU_XFER_DEFAULT));
		if(rep >= p.n_warmup)
			stop(&timer, 3);
	}
#if ENERGY
	double acc_energy, avg_energy, acc_time, avg_time;
	DPU_ASSERT(dpu_probe_get(&probe, DPU_ENERGY, DPU_ACCUMULATE, &acc_energy));
	DPU_ASSERT(dpu_probe_get(&probe, DPU_ENERGY, DPU_AVERAGE, &avg_energy));
	DPU_ASSERT(dpu_probe_get(&probe, DPU_TIME, DPU_ACCUMULATE, &acc_time));
	DPU_ASSERT(dpu_probe_get(&probe, DPU_TIME, DPU_AVERAGE, &avg_time));
#endif

	// Print timing results
	printf("CPU Version Time (ms): ");
	print(&timer, 0, 1);
	printf("CPU-DPU Time (ms): ");
	print(&timer, 1, p.n_reps);
	printf("DPU Kernel Time (ms): ");
	print(&timer, 2, p.n_reps);
	printf("DPU-CPU Time (ms): ");
	print(&timer, 3, p.n_reps);

#if ENERGY
	printf("Energy (J): %f J\t", avg_energy);
#endif

	// Check output
	bool status = true;
	unsigned int n,j;
	i = 0;
	for (n = 0; n < nr_of_dpus; n++) {
		for (j = 0; j < dpu_info[n].rows_per_dpu; j++) {
			if(C[i] != C_dpu[n * max_rows_per_dpu + j]) {
				status = false;
#if PRINT
	//			printf("%d: %d -- %d\n", i, C[i], C_dpu[n * max_rows_per_dpu + j]);
#endif
			}
			i++;
		}
	}
	if (status) {
		printf("[" ANSI_COLOR_GREEN "OK" ANSI_COLOR_RESET "] Outputs are equal\n");
	} else {
		printf("[" ANSI_COLOR_RED "ERROR" ANSI_COLOR_RESET "] Outputs differ!\n");
	}

	// Deallocation
	free(A);
	free(B);
	free(C);
	free(C_dpu);
	DPU_ASSERT(dpu_free(dpu_set));

#if ENERGY
	DPU_ASSERT(dpu_probe_deinit(&probe));
#endif

	return status ? 0 : -1;
}


================================================
FILE: golang/uPIMulator/benchmark/GEMV/support/common.h
================================================
#ifndef _COMMON_H_
#define _COMMON_H_

// Structures used by both the host and the dpu to communicate information 
typedef struct {
    uint32_t n_size;
    uint32_t n_size_pad;
    uint32_t nr_rows;
    uint32_t max_rows;
} dpu_arguments_t;

// Specific information for each DPU
struct dpu_info_t {
    uint32_t rows_per_dpu;
    uint32_t rows_per_dpu_pad;
    uint32_t prev_rows_dpu;
};
struct dpu_info_t *dpu_info;

// Transfer size between MRAM and WRAM
#ifdef BL
#define BLOCK_SIZE_LOG2 BL
#define BLOCK_SIZE (1 << BLOCK_SIZE_LOG2)
#else
#define BLOCK_SIZE_LOG2 8
#define BLOCK_SIZE (1 << BLOCK_SIZE_LOG2)
#define BL BLOCK_SIZE_LOG2
#endif

// Data type
#define T uint32_t

#ifndef ENERGY
#define ENERGY 0
#endif
#define PRINT 0

#define ANSI_COLOR_RED     "\x1b[31m"
#define ANSI_COLOR_GREEN   "\x1b[32m"
#define ANSI_COLOR_RESET   "\x1b[0m"
#endif


================================================
FILE: golang/uPIMulator/benchmark/GEMV/support/params.h
================================================
#ifndef _PARAMS_H_
#define _PARAMS_H_

#include "common.h"

typedef struct Params {
    unsigned int  m_size;
    unsigned int  n_size;
    unsigned int  n_warmup;
    unsigned int  n_reps;
}Params;

static void usage() {
    fprintf(stderr,
            "\nUsage:  ./program [options]"
            "\n"
            "\nGeneral options:"
            "\n    -h        help"
            "\n    -w <W>    # of untimed warmup iterations (default=1)"
            "\n    -e <E>    # of timed repetition iterations (default=3)"
            "\n"
            "\nBenchmark-specific options:"
            "\n    -m <I>    m_size (default=8192 elements)"
            "\n    -n <I>    n_size (default=8192 elements)"
            "\n");
}

struct Params input_params(int argc, char **argv) {
    struct Params p;
    p.m_size        = 8192;
    p.n_size        = 8192;
    p.n_warmup      = 1;
    p.n_reps        = 3;

    int opt;
    while((opt = getopt(argc, argv, "hm:n:w:e:")) >= 0) {
        switch(opt) {
            case 'h':
                usage();
                exit(0);
                break;
            case 'm': p.m_size        = atoi(optarg); break;
            case 'n': p.n_size        = atoi(optarg); break;
            case 'w': p.n_warmup      = atoi(optarg); break;
            case 'e': p.n_reps        = atoi(optarg); break;
            default:
                      fprintf(stderr, "\nUnrecognized option!\n");
                      usage();
                      exit(0);
        }
    }
    assert(NR_DPUS > 0 && "Invalid # of dpus!");

    return p;
}
#endif


================================================
FILE: golang/uPIMulator/benchmark/GEMV/support/timer.h
================================================
/*
 * Copyright (c) 2016 University of Cordoba and University of Illinois
 * All rights reserved.
 *
 * Developed by:    IMPACT Research Group
 *                  University of Cordoba and University of Illinois
 *                  http://impact.crhc.illinois.edu/
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * with the Software without restriction, including without limitation the 
 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 * sell copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 *      > Redistributions of source code must retain the above copyright notice,
 *        this list of conditions and the following disclaimers.
 *      > Redistributions in binary form must reproduce the above copyright
 *        notice, this list of conditions and the following disclaimers in the
 *        documentation and/or other materials provided with the distribution.
 *      > Neither the names of IMPACT Research Group, University of Cordoba, 
 *        University of Illinois nor the names of its contributors may be used 
 *        to endorse or promote products derived from this Software without 
 *        specific prior written permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
 * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
 * THE SOFTWARE.
 *
 */

#include <sys/time.h>

typedef struct Timer{

    struct timeval startTime[4];
    struct timeval stopTime[4];
    double         time[4];

}Timer;

void start(Timer *timer, int i, int rep) {
    if(rep == 0) {
        timer->time[i] = 0.0;
    }
    gettimeofday(&timer->startTime[i], NULL);
}

void stop(Timer *timer, int i) {
    gettimeofday(&timer->stopTime[i], NULL);
    timer->time[i] += (timer->stopTime[i].tv_sec - timer->startTime[i].tv_sec) * 1000000.0 +
                      (timer->stopTime[i].tv_usec - timer->startTime[i].tv_usec);
    //printf("Time (ms): %f\t",((timer->stopTime[i].tv_sec - timer->startTime[i].tv_sec) * 1000000.0 +
    //                  (timer->stopTime[i].tv_usec - timer->startTime[i].tv_usec)) / 1000);
 
}

void print(Timer *timer, int i, int REP) { printf("%f\t", timer->time[i] / (1000 * REP)); }


================================================
FILE: golang/uPIMulator/benchmark/HST-L/CMakeLists.txt
================================================
#add_subdirectory(host)
add_subdirectory(dpu)

================================================
FILE: golang/uPIMulator/benchmark/HST-L/Makefile
================================================
DPU_DIR := dpu
HOST_DIR := host
BUILDDIR ?= bin
NR_TASKLETS ?= 16
BL ?= 8
NR_DPUS ?= 1
NR_HISTO ?= 1
ENERGY ?= 0

define conf_filename
	${BUILDDIR}/.NR_DPUS_$(1)_NR_TASKLETS_$(2)_BL_$(3)_NR_DPUS_$(4).conf
endef
CONF := $(call conf_filename,${NR_DPUS},${NR_TASKLETS},${BL},${NR_DPUS})

HOST_TARGET := ${BUILDDIR}/host_code
DPU_TARGET := ${BUILDDIR}/dpu_code

COMMON_INCLUDES := support
HOST_SOURCES := $(wildcard ${HOST_DIR}/*.c)
DPU_SOURCES := $(wildcard ${DPU_DIR}/*.c)

.PHONY: all clean test

__dirs := $(shell mkdir -p ${BUILDDIR})

COMMON_FLAGS := -w -I${COMMON_INCLUDES}
HOST_FLAGS := ${COMMON_FLAGS} -std=c11 -O3 `dpu-pkg-config --cflags --libs dpu` -DNR_TASKLETS=${NR_TASKLETS} -DNR_DPUS=${NR_DPUS} -DBL=${BL} -DENERGY=${ENERGY}
DPU_FLAGS := ${COMMON_FLAGS} -O2 -DNR_TASKLETS=${NR_TASKLETS} -DBL=${BL} -DNR_HISTO=${NR_HISTO} 

all: ${HOST_TARGET} ${DPU_TARGET}

${CONF}:
	$(RM) $(call conf_filename,*,*)
	touch ${CONF}

${HOST_TARGET}: ${HOST_SOURCES} ${COMMON_INCLUDES} ${CONF}
	$(CC) -o $@ ${HOST_SOURCES} ${HOST_FLAGS}
	$(CC) -S -o ${HOST_TARGET}.S ${HOST_SOURCES} ${HOST_FLAGS}

${DPU_TARGET}: ${DPU_SOURCES} ${COMMON_INCLUDES} ${CONF}
	dpu-upmem-dpurte-clang ${DPU_FLAGS} -o $@ ${DPU_SOURCES}
	dpu-upmem-dpurte-clang -S ${DPU_FLAGS} -o ${DPU_TARGET}.S ${DPU_SOURCES}

clean:
	$(RM) -r $(BUILDDIR)

test: all
	./${HOST_TARGET}


================================================
FILE: golang/uPIMulator/benchmark/HST-L/dpu/CMakeLists.txt
================================================
SET(BL 10)
SET(NR_HISTO 1)

set(CMAKE_C_COMPILER "/root/upmem-2023.2.0-Linux-x86_64/bin/dpu-upmem-dpurte-clang")
set(CMAKE_C_FLAGS "-w -I/root/uPIMulator/benchmark/HST-L/support -O2 -S -DNR_TASKLETS=${NR_TASKLETS} -DBL=${BL} -DNR_HISTO=${NR_HISTO}")

file(GLOB_RECURSE SRCS *.c)

add_executable(HST-L_device ${SRCS})



================================================
FILE: golang/uPIMulator/benchmark/HST-L/dpu/task.c
================================================
/*
* Histogram (HST-L) with multiple tasklets
*
*/
#include <stdint.h>
#include <stdio.h>
#include <defs.h>
#include <mram.h>
#include <alloc.h>
#include <perfcounter.h>
#include <barrier.h>
#include <atomic_bit.h>
#include <mutex.h>

#include "../support/common.h"

__host dpu_arguments_t DPU_INPUT_ARGUMENTS;

// Array for communication between adjacent tasklets
uint32_t* message[NR_TASKLETS];
// DPU histogram
uint32_t* histo_dpu;

// Barrier
BARRIER_INIT(my_barrier, NR_TASKLETS);
ATOMIC_BIT_INIT(barriers_mutexes)[NR_HISTO];
barrier_t barriers[NR_HISTO];

// Mutex
mutex_id_t my_mutex[NR_HISTO];

// Histogram in each tasklet
void __attribute__ ((noinline)) histogram(uint32_t* histo, uint32_t bins, T *input, uint32_t histo_id, unsigned int l_size){
    for(unsigned int j = 0; j < l_size; j++) {
        T d = (input[j] * bins) >> DEPTH;
        mutex_lock(my_mutex[histo_id]);
        histo[d] += 1;
        mutex_unlock(my_mutex[histo_id]);
    }
}

extern int main_kernel1(void);

int (*kernels[nr_kernels])(void) = {main_kernel1};

int main(void) { 
    // Kernel
    return kernels[DPU_INPUT_ARGUMENTS.kernel](); 
}

// main_kernel1
int main_kernel1() {
    unsigned int tasklet_id = me();
#if PRINT
    printf("tasklet_id = %u\n", tasklet_id);
#endif
    unsigned int l_tasklet_id = tasklet_id / NR_HISTO;
    unsigned int nr_l_tasklet = NR_TASKLETS / NR_HISTO;
    unsigned int my_histo_id = tasklet_id & (NR_HISTO - 1);

    if (tasklet_id == 0){ // Initialize once the cycle counter
        mem_reset(); // Reset the heap
        // Initialize barriers
        for (unsigned int each_barrier = 0; each_barrier < NR_HISTO; each_barrier++) {
            barriers[each_barrier].wait_queue = 0xff;
            barriers[each_barrier].count = nr_l_tasklet;
            barriers[each_barrier].initial_count = nr_l_tasklet;
            barriers[each_barrier].lock = (uint8_t) &ATOMIC_BIT_GET(barriers_mutexes)[each_barrier];
        }
    }
    // Barrier
    barrier_wait(&my_barrier);

    uint32_t input_size_dpu_bytes = DPU_INPUT_ARGUMENTS.size;
    uint32_t input_size_dpu_bytes_transfer = DPU_INPUT_ARGUMENTS.transfer_size; // Transfer input size per DPU in bytes
    uint32_t bins = DPU_INPUT_ARGUMENTS.bins;

    // Address of the current processing block in MRAM
    uint32_t base_tasklet = tasklet_id << BLOCK_SIZE_LOG2;
    uint32_t mram_base_addr_A = (uint32_t)DPU_MRAM_HEAP_POINTER;
    uint32_t mram_base_addr_histo = (uint32_t)(DPU_MRAM_HEAP_POINTER + input_size_dpu_bytes_transfer);

    // Initialize a local cache to store the MRAM block
    T *cache_A = (T *) mem_alloc(BLOCK_SIZE);
	
    // Local histogram
    if (tasklet_id < NR_HISTO){ // Allocate DPU histogram
        uint32_t *histo = (uint32_t *) mem_alloc(bins * sizeof(uint32_t));
        message[tasklet_id] = histo;
    }
    // Barrier
    barrier_wait(&barriers[my_histo_id]);

    uint32_t *my_histo = message[my_histo_id];

    // Initialize local histogram
    for(unsigned int i = l_tasklet_id; i < bins; i += nr_l_tasklet){
        my_histo[i] = 0;
    }
    // Barrier
    barrier_wait(&barriers[my_histo_id]);

    // Compute histogram
    for(unsigned int byte_index = base_tasklet; byte_index < input_size_dpu_bytes; byte_index += BLOCK_SIZE * NR_TASKLETS){

        // Bound checking
        uint32_t l_size_bytes = (byte_index + BLOCK_SIZE >= input_size_dpu_bytes) ? (input_size_dpu_bytes - byte_index) : BLOCK_SIZE;

        // Load cache with current MRAM block
        mram_read((const __mram_ptr void*)(mram_base_addr_A + byte_index), cache_A, l_size_bytes);

        // Histogram in each tasklet
        histogram(my_histo, bins, cache_A, my_histo_id, l_size_bytes >> DIV);
    }

    // Barrier
    barrier_wait(&my_barrier);

    uint32_t *histo_dpu = message[0];
    for (unsigned int i = tasklet_id; i < bins; i += NR_TASKLETS){
        uint32_t b = 0;
        for (unsigned int j = 0; j < NR_HISTO; j++){			
            b += *(message[j] + i);
        }
        histo_dpu[i] = b;
    }

    // Barrier
    barrier_wait(&my_barrier);

    // Write dpu histogram to current MRAM block
    if(tasklet_id == 0){
        if(bins * sizeof(uint32_t) <= 2048)
            mram_write(histo_dpu, (__mram_ptr void*)(mram_base_addr_histo), bins * sizeof(uint32_t));
        else 
            for(unsigned int offset = 0; offset < ((bins * sizeof(uint32_t)) >> 11); offset++){
    	        mram_write(histo_dpu + (offset << 9), (__mram_ptr void*)(mram_base_addr_histo + (offset << 11)), 2048);
            }
    }

    return 0;
}


================================================
FILE: golang/uPIMulator/benchmark/HST-L/host/app.c
================================================
/**
* app.c
* HST-L Host Application Source File
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include <math.h>
#include <dpu.h>
#include <dpu_log.h>
#include <unistd.h>
#include <getopt.h>
#include <assert.h>

#include "../support/common.h"
#include "../support/timer.h"
#include "../support/params.h"

// Define the DPU Binary path as DPU_BINARY here
#ifndef DPU_BINARY
#define DPU_BINARY "./bin/dpu_code"
#endif

#if ENERGY
#include <dpu_probe.h>
#endif

// Pointer declaration
static T* A;
static unsigned int* histo_host;
static unsigned int* histo;

// Create input arrays
static void read_input(T* A, const Params p) {

    char  dctFileName[100];
    FILE *File = NULL;

    // Open input file
    unsigned short temp;
    sprintf(dctFileName, p.file_name);
    if((File = fopen(dctFileName, "rb")) != NULL) {
        for(unsigned int y = 0; y < p.input_size; y++) {
            fread(&temp, sizeof(unsigned short), 1, File);
            A[y] = (unsigned int)ByteSwap16(temp);
            if(A[y] >= 4096)
                A[y] = 4095;
        }
        fclose(File);
    } else {
        printf("%s does not exist\n", dctFileName);
        exit(1);
    }
}

// Compute output in the host
static void histogram_host(unsigned int* histo, T* A, unsigned int bins, unsigned int nr_elements, int exp, unsigned int nr_of_dpus) {
    if(!exp){
        for (unsigned int i = 0; i < nr_of_dpus; i++) {
            for (unsigned int j = 0; j < nr_elements; j++) {
                T d = A[j];
                histo[i * bins + ((d * bins) >> DEPTH)] += 1;
            }
        }
    }
    else{
        for (unsigned int j = 0; j < nr_elements; j++) {
            T d = A[j];
            histo[(d * bins) >> DEPTH] += 1;
        }
    }
}

// Main of the Host Application
int main(int argc, char **argv) {

    struct Params p = input_params(argc, argv);

    struct dpu_set_t dpu_set, dpu;
    uint32_t nr_of_dpus;
    
#if ENERGY
    struct dpu_probe_t probe;
    DPU_ASSERT(dpu_probe_init("energy_probe", &probe));
#endif

    // Allocate DPUs and load binary
    DPU_ASSERT(dpu_alloc(NR_DPUS, NULL, &dpu_set));
    DPU_ASSERT(dpu_load(dpu_set, DPU_BINARY, NULL));
    DPU_ASSERT(dpu_get_nr_dpus(dpu_set, &nr_of_dpus));
    printf("Allocated %d DPU(s)\n", nr_of_dpus);

    unsigned int i = 0;
    unsigned int input_size; // Size of input image
    unsigned int dpu_s = p.dpu_s;
    if(p.exp == 0)
        input_size = p.input_size * nr_of_dpus; // Size of input image
    else if(p.exp == 1)
        input_size = p.input_size; // Size of input image
	else
        input_size = p.input_size * dpu_s; // Size of input image

    const unsigned int input_size_8bytes = 
        ((input_size * sizeof(T)) % 8) != 0 ? roundup(input_size, 8) : input_size; // Input size per DPU (max.), 8-byte aligned
    const unsigned int input_size_dpu = divceil(input_size, nr_of_dpus); // Input size per DPU (max.)
    const unsigned int input_size_dpu_8bytes = 
        ((input_size_dpu * sizeof(T)) % 8) != 0 ? roundup(input_size_dpu, 8) : input_size_dpu; // Input size per DPU (max.), 8-byte aligned

    // Input/output allocation
    A = malloc(input_size_dpu_8bytes * nr_of_dpus * sizeof(T));
    T *bufferA = A;
    histo_host = malloc(p.bins * sizeof(unsigned int));
    histo = malloc(nr_of_dpus * p.bins * sizeof(unsigned int));

    // Create an input file with arbitrary data
    read_input(A, p);
    if(p.exp == 0){
        for(unsigned int j = 1; j < nr_of_dpus; j++){
            memcpy(&A[j * input_size_dpu_8bytes], &A[0], input_size_dpu_8bytes * sizeof(T));
        }
    }
    else if(p.exp == 2){
        for(unsigned int j = 1; j < dpu_s; j++)
            memcpy(&A[j * p.input_size], &A[0], p.input_size * sizeof(T));
    }

    // Timer declaration
    Timer timer;

    printf("NR_TASKLETS\t%d\tBL\t%d\tinput_size\t%u\n", NR_TASKLETS, BL, input_size);

    // Loop over main kernel
    for(int rep = 0; rep < p.n_warmup + p.n_reps; rep++) {
        memset(histo_host, 0, p.bins * sizeof(unsigned int));
        memset(histo, 0, nr_of_dpus * p.bins * sizeof(unsigned int));

        // Compute output on CPU (performance comparison and verification purposes)
        if(rep >= p.n_warmup)
            start(&timer, 0, rep - p.n_warmup);
        histogram_host(histo_host, A, p.bins, p.input_size, 1, nr_of_dpus);
        if(rep >= p.n_warmup)
            stop(&timer, 0);

        printf("Load input data\n");
        if(rep >= p.n_warmup)
            start(&timer, 1, rep - p.n_warmup);
        // Input arguments
        unsigned int kernel = 0;
        i = 0;
	    dpu_arguments_t input_arguments[NR_DPUS];
	    for(i=0; i<nr_of_dpus-1; i++) {
	        input_arguments[i].size=input_size_dpu_8bytes * sizeof(T); 
	        input_arguments[i].transfer_size=input_size_dpu_8bytes * sizeof(T); 
	        input_arguments[i].bins=p.bins;
	        input_arguments[i].kernel=kernel;
	    }
	    input_arguments[nr_of_dpus-1].size=(input_size_8bytes - input_size_dpu_8bytes * (NR_DPUS-1)) * sizeof(T); 
	    input_arguments[nr_of_dpus-1].transfer_size=input_size_dpu_8bytes * sizeof(T); 
	    input_arguments[nr_of_dpus-1].bins=p.bins;
	    input_arguments[nr_of_dpus-1].kernel=kernel;

        // Copy input arrays
        i = 0;
        DPU_FOREACH(dpu_set, dpu, i) {
            DPU_ASSERT(dpu_prepare_xfer(dpu, &input_arguments[i]));
        }
        DPU_ASSERT(dpu_push_xfer(dpu_set, DPU_XFER_TO_DPU, "DPU_INPUT_ARGUMENTS", 0, sizeof(input_arguments[0]), DPU_XFER_DEFAULT));
        DPU_FOREACH(dpu_set, dpu, i) {
            DPU_ASSERT(dpu_prepare_xfer(dpu, bufferA + input_size_dpu_8bytes * i));
        }
        DPU_ASSERT(dpu_push_xfer(dpu_set, DPU_XFER_TO_DPU, DPU_MRAM_HEAP_POINTER_NAME, 0, input_size_dpu_8bytes * sizeof(T), DPU_XFER_DEFAULT));
        if(rep >= p.n_warmup)
            stop(&timer, 1);

        printf("Run program on DPU(s) \n");
        // Run DPU kernel
        if(rep >= p.n_warmup) {
            start(&timer, 2, rep - p.n_warmup);
            #if ENERGY
            DPU_ASSERT(dpu_probe_start(&probe));
            #endif
        }
        DPU_ASSERT(dpu_launch(dpu_set, DPU_SYNCHRONOUS));
        if(rep >= p.n_warmup) {
            stop(&timer, 2);
            #if ENERGY
            DPU_ASSERT(dpu_probe_stop(&probe));
            #endif
        }

#if PRINT
        {
            unsigned int each_dpu = 0;
            printf("Display DPU Logs\n");
            DPU_FOREACH (dpu_set, dpu) {
                printf("DPU#%d:\n", each_dpu);
                DPU_ASSERT(dpulog_read_for_dpu(dpu.dpu, stdout));
                each_dpu++;
            }
        }
#endif

        printf("Retrieve results\n");
        i = 0;
        if(rep >= p.n_warmup)
            start(&timer, 3, rep - p.n_warmup);
        // PARALLEL RETRIEVE TRANSFER
        DPU_FOREACH(dpu_set, dpu, i) {
            DPU_ASSERT(dpu_prepare_xfer(dpu, histo + p.bins * i));
        }
        DPU_ASSERT(dpu_push_xfer(dpu_set, DPU_XFER_FROM_DPU, DPU_MRAM_HEAP_POINTER_NAME, input_size_dpu_8bytes * sizeof(T), p.bins * sizeof(unsigned int), DPU_XFER_DEFAULT));
		
        // Final histogram merging
        for(i = 1; i < nr_of_dpus; i++){
            for(unsigned int j = 0; j < p.bins; j++){
                histo[j] += histo[j + i * p.bins];
            }			
        }		
        if(rep >= p.n_warmup)
            stop(&timer, 3);

    }

    // Print timing results
    printf("CPU ");
    print(&timer, 0, p.n_reps);
    printf("CPU-DPU ");
    print(&timer, 1, p.n_reps);
    printf("DPU Kernel ");
    print(&timer, 2, p.n_reps);
    printf("DPU-CPU ");
    print(&timer, 3, p.n_reps);

    #if ENERGY
    double energy;
    DPU_ASSERT(dpu_probe_get(&probe, DPU_ENERGY, DPU_AVERAGE, &energy));
    printf("DPU Energy (J): %f\t", energy);
    #endif	


    // Check output
    bool status = true;
    if(p.exp == 1) 
        for (unsigned int j = 0; j < p.bins; j++) {
            if(histo_host[j] != histo[j]){ 
                status = false;
#if PRINT
                printf("%u - %u: %u -- %u\n", j, j, histo_host[j], histo[j]);
#endif
            }
        }
    else if(p.exp == 2) 
        for (unsigned int j = 0; j < p.bins; j++) {
            if(dpu_s * histo_host[j] != histo[j]){ 
                status = false;
#if PRINT
                printf("%u - %u: %u -- %u\n", j, j, dpu_s * histo_host[j], histo[j]);
#endif
            }
        }
    else
        for (unsigned int j = 0; j < p.bins; j++) {
            if(nr_of_dpus * histo_host[j] != histo[j]){ 
                status = false;
#if PRINT
                printf("%u - %u: %u -- %u\n", j, j, nr_of_dpus * histo_host[j], histo[j]);
#endif
            }
        }
    if (status) {
        printf("[" ANSI_COLOR_GREEN "OK" ANSI_COLOR_RESET "] Outputs are equal\n");
    } else {
        printf("[" ANSI_COLOR_RED "ERROR" ANSI_COLOR_RESET "] Outputs differ!\n");
    }

    // Deallocation
    free(A);
    free(histo_host);
    free(histo);
    DPU_ASSERT(dpu_free(dpu_set));
	
    return status ? 0 : -1;
}


================================================
FILE: golang/uPIMulator/benchmark/HST-L/run.sh
================================================
#!/bin/bash

for i in 1 
do
	for b in 64 128 256 512 1024 2048 4096
	do
    	for k in 1 2 4 8 16
	    do
	        NR_DPUS=$i NR_TASKLETS=$k BL=10 make all
		    wait
            ./bin/host_code -w 2 -e 5 -b ${b} > profile/HSTL_${b}_tl${k}_dpu${i}.txt
		    wait
		    make clean
		    wait
		done
	done
done


================================================
FILE: golang/uPIMulator/benchmark/HST-L/support/common.h
================================================
#ifndef _COMMON_H_
#define _COMMON_H_

// Transfer size between MRAM and WRAM
#ifdef BL
#define BLOCK_SIZE_LOG2 BL
#define BLOCK_SIZE (1 << BLOCK_SIZE_LOG2)
#else
#define BLOCK_SIZE_LOG2 8
#define BLOCK_SIZE (1 << BLOCK_SIZE_LOG2)
#define BL BLOCK_SIZE_LOG2
#endif

// Data type
#define T uint32_t
#define DIV 2 // Shift right to divide by sizeof(T)
#define REGS (BLOCK_SIZE >> 2) // 32 bits

// Pixel depth
#define DEPTH 12
#define ByteSwap16(n) (((((unsigned int)n) << 8) & 0xFF00) | ((((unsigned int)n) >> 8) & 0x00FF))

// Structures used by both the host and the dpu to communicate information 
typedef struct {
    uint32_t size;
    uint32_t transfer_size;
    uint32_t bins;
	enum kernels {
	    kernel1 = 0,
	    nr_kernels = 1,
	} kernel;
} dpu_arguments_t;

#ifndef ENERGY
#define ENERGY 0
#endif
#define PRINT 0 

#define ANSI_COLOR_RED     "\x1b[31m"
#define ANSI_COLOR_GREEN   "\x1b[32m"
#define ANSI_COLOR_RESET   "\x1b[0m"

#define divceil(n, m) (((n)-1) / (m) + 1)
#define roundup(n, m) ((n / m) * m + m)
#endif


================================================
FILE: golang/uPIMulator/benchmark/HST-L/support/params.h
================================================
#ifndef _PARAMS_H_
#define _PARAMS_H_

#include "common.h"

typedef struct Params {
    unsigned int   input_size;
    unsigned int   bins;
    int   n_warmup;
    int   n_reps;
    const char *file_name;
    int  exp;
    int  dpu_s;
}Params;

static void usage() {
    fprintf(stderr,
        "\nUsage:  ./program [options]"
        "\n"
        "\nGeneral options:"
        "\n    -h        help"
        "\n    -w <W>    # of untimed warmup iterations (default=1)"
        "\n    -e <E>    # of timed repetition iterations (default=3)"
        "\n    -x <X>    Weak (0) or strong (1, 2) scaling (default=0)"
        "\n"
        "\nBenchmark-specific options:"
        "\n    -i <I>    input size (default=1536*1024 elements)"
        "\n    -b <B>    histogram size (default=256 bins)"
        "\n    -f <F>    input image file (default=../input/image_VanHateren.iml)"
        "\n");
}

struct Params input_params(int argc, char **argv) {
    struct Params p;
    p.input_size    = 1536 * 1024;
    p.bins          = 256;
    p.n_warmup      = 1;
    p.n_reps        = 3;
    p.exp           = 0;
    p.file_name     = "./input/image_VanHateren.iml";
    p.dpu_s         = 64;

    int opt;
    while((opt = getopt(argc, argv, "hi:b:w:e:f:x:z:")) >= 0) {
        switch(opt) {
        case 'h':
        usage();
        exit(0);
        break;
        case 'i': p.input_size    = atoi(optarg); break;
        case 'b': p.bins          = atoi(optarg); break;
        case 'w': p.n_warmup      = atoi(optarg); break;
        case 'e': p.n_reps        = atoi(optarg); break;
        case 'f': p.file_name     = optarg; break;
        case 'x': p.exp           = atoi(optarg); break;
        case 'z': p.dpu_s         = atoi(optarg); break;
        default:
            fprintf(stderr, "\nUnrecognized option!\n");
            usage();
            exit(0);
        }
    }
    assert(NR_DPUS > 0 && "Invalid # of dpus!");

    return p;
}
#endif


================================================
FILE: golang/uPIMulator/benchmark/HST-L/support/timer.h
================================================
/*
 * Copyright (c) 2016 University of Cordoba and University of Illinois
 * All rights reserved.
 *
 * Developed by:    IMPACT Research Group
 *                  University of Cordoba and University of Illinois
 *                  http://impact.crhc.illinois.edu/
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * with the Software without restriction, including without limitation the 
 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 * sell copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 *      > Redistributions of source code must retain the above copyright notice,
 *        this list of conditions and the following disclaimers.
 *      > Redistributions in binary form must reproduce the above copyright
 *        notice, this list of conditions and the following disclaimers in the
 *        documentation and/or other materials provided with the distribution.
 *      > Neither the names of IMPACT Research Group, University of Cordoba, 
 *        University of Illinois nor the names of its contributors may be used 
 *        to endorse or promote products derived from this Software without 
 *        specific prior written permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
 * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
 * THE SOFTWARE.
 *
 */

#include <sys/time.h>

typedef struct Timer{

    struct timeval startTime[4];
    struct timeval stopTime[4];
    double         time[4];

}Timer;

void start(Timer *timer, int i, int rep) {
    if(rep == 0) {
        timer->time[i] = 0.0;
    }
    gettimeofday(&timer->startTime[i], NULL);
}

void stop(Timer *timer, int i) {
    gettimeofday(&timer->stopTime[i], NULL);
    timer->time[i] += (timer->stopTime[i].tv_sec - timer->startTime[i].tv_sec) * 1000000.0 +
                      (timer->stopTime[i].tv_usec - timer->startTime[i].tv_usec);
}

void print(Timer *timer, int i, int REP) { printf("Time (ms): %f\t", timer->time[i] / (1000 * REP)); }


================================================
FILE: golang/uPIMulator/benchmark/HST-S/CMakeLists.txt
================================================
#add_subdirectory(host)
add_subdirectory(dpu)

================================================
FILE: golang/uPIMulator/benchmark/HST-S/Makefile
================================================
DPU_DIR := dpu
HOST_DIR := host
BUILDDIR ?= bin
NR_TASKLETS ?= 16
BL ?= 10
NR_DPUS ?= 1
ENERGY ?= 0

define conf_filename
	${BUILDDIR}/.NR_DPUS_$(1)_NR_TASKLETS_$(2)_BL_$(3).conf
endef
CONF := $(call conf_filename,${NR_DPUS},${NR_TASKLETS},${BL})

HOST_TARGET := ${BUILDDIR}/host_code
DPU_TARGET := ${BUILDDIR}/dpu_code

COMMON_INCLUDES := support
HOST_SOURCES := $(wildcard ${HOST_DIR}/*.c)
DPU_SOURCES := $(wildcard ${DPU_DIR}/*.c)

.PHONY: all clean test

__dirs := $(shell mkdir -p ${BUILDDIR})

COMMON_FLAGS := -w -I${COMMON_INCLUDES}
HOST_FLAGS := ${COMMON_FLAGS} -std=c11 -O3 `dpu-pkg-config --cflags --libs dpu` -DNR_TASKLETS=${NR_TASKLETS} -DNR_DPUS=${NR_DPUS} -DBL=${BL} -DENERGY=${ENERGY}
DPU_FLAGS := ${COMMON_FLAGS} -O2 -DNR_TASKLETS=${NR_TASKLETS} -DBL=${BL}

all: ${HOST_TARGET} ${DPU_TARGET}

${CONF}:
	$(RM) $(call conf_filename,*,*)
	touch ${CONF}

${HOST_TARGET}: ${HOST_SOURCES} ${COMMON_INCLUDES} ${CONF}
	$(CC) -o $@ ${HOST_SOURCES} ${HOST_FLAGS}
	$(CC) -S -o ${HOST_TARGET}.S ${HOST_SOURCES} ${HOST_FLAGS}

${DPU_TARGET}: ${DPU_SOURCES} ${COMMON_INCLUDES} ${CONF}
	dpu-upmem-dpurte-clang ${DPU_FLAGS} -o $@ ${DPU_SOURCES}
	dpu-upmem-dpurte-clang -S ${DPU_FLAGS} -o ${DPU_TARGET}.S ${DPU_SOURCES}

clean:
	$(RM) -r $(BUILDDIR)

test: all
	./${HOST_TARGET}


================================================
FILE: golang/uPIMulator/benchmark/HST-S/baselines/cpu/Makefile
================================================
all:
	gcc -o hist -fopenmp app_baseline.c 

clean:
	rm hist



================================================
FILE: golang/uPIMulator/benchmark/HST-S/baselines/cpu/README
================================================
Histogram - input partition (HST)

Compilation instructions:

    make

Execution instructions

    ./hist -y 1006632960 -t 4

For more options:

    ./hsti -h


================================================
FILE: golang/uPIMulator/benchmark/HST-S/baselines/cpu/app_baseline.c
================================================
/*
* JGL@SAFARI
*/

/**
* @file app.c
* @brief Template for a Host Application Source File.
*
* The macros DPU_BINARY and NR_TASKLETS are directly
* used in the static functions, and are not passed as arguments of these functions.
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include <unistd.h>
#include <getopt.h>
#include <assert.h>
#include <stdint.h>

#include <omp.h>

#include "../../support/common.h"
#include "../../support/timer.h"

// Pointer declaration
static T* A;
static unsigned int* histo_host;

typedef struct Params {
    unsigned int   input_size;
    unsigned int   bins;
    int   n_warmup;
    int   n_reps;
    const char *file_name;
    int  exp;
    int  n_threads;
}Params;

/**
* @brief creates input arrays
* @param nr_elements how many elements in input arrays
*/
static void read_input(T* A, const Params p) {

    char  dctFileName[100];
    FILE *File = NULL;

    // Open input file
    unsigned short temp;
    sprintf(dctFileName, p.file_name);
    if((File = fopen(dctFileName, "rb")) != NULL) {
        for(unsigned int y = 0; y < p.input_size; y++) {
            fread(&temp, sizeof(unsigned short), 1, File);
            A[y] = (unsigned int)ByteSwap16(temp);
            if(A[y] >= 4096)
                A[y] = 4095;
        }
        fclose(File);
    } else {
        printf("%s does not exist\n", dctFileName);
        exit(1);
    }
}

/**
* @brief compute output in the host
*/
static void histogram_host(unsigned int* histo, T* A, unsigned int bins, unsigned int nr_elements, int exp, unsigned int nr_of_dpus, int t) {

    omp_set_num_threads(t);

    if(!exp){
        #pragma omp parallel for
        for (unsigned int i = 0; i < nr_of_dpus; i++) {
            for (unsigned int j = 0; j < nr_elements; j++) {
                T d = A[j];
                histo[i * bins + ((d * bins) >> DEPTH)] += 1;
            }
        }
    }
    else{
        #pragma omp parallel for
        for (unsigned int j = 0; j < nr_elements; j++) {
            T d = A[j];
            #pragma omp atomic update
            histo[(d * bins) >> DEPTH] += 1;
        }
    }
}

// Params ---------------------------------------------------------------------
void usage() {
    fprintf(stderr,
        "\nUsage:  ./program [options]"
        "\n"
        "\nGeneral options:"
        "\n    -h        help"
        "\n    -w <W>    # of untimed warmup iterations (default=1)"
        "\n    -e <E>    # of timed repetition iterations (default=3)"
        "\n    -t <T>    # of threads (default=8)"
        "\n    -x <X>    Weak (0) or strong (1) scaling (default=0)"
        "\n"
        "\nBenchmark-specific options:"
        "\n    -i <I>    input size (default=1536*1024 elements)"
        "\n    -b <B>    histogram size (default=256 bins)"
        "\n    -f <F>    input image file (default=../input/image_VanHateren.iml)"
        "\n");
}

struct Params input_params(int argc, char **argv) {
    struct Params p;
    p.input_size    = 1536 * 1024;
    p.bins          = 256;
    p.n_warmup      = 1;
    p.n_reps        = 3;
    p.n_threads     = 8;
    p.exp           = 1;
    p.file_name     = "../../input/image_VanHateren.iml";

    int opt;
    while((opt = getopt(argc, argv, "hi:b:w:e:f:x:t:")) >= 0) {
        switch(opt) {
        case 'h':
        usage();
        exit(0);
        break;
        case 'i': p.input_size    = atoi(optarg); break;
        case 'b': p.bins          = atoi(optarg); break;
        case 'w': p.n_warmup      = atoi(optarg); break;
        case 'e': p.n_reps        = atoi(optarg); break;
        case 'f': p.file_name     = optarg; break;
        case 'x': p.exp           = atoi(optarg); break;
        case 't': p.n_threads     = atoi(optarg); break;
        default:
            fprintf(stderr, "\nUnrecognized option!\n");
            usage();
            exit(0);
        }
    }
    assert(p.n_threads > 0 && "Invalid # of ranks!");

    return p;
}

/**
* @brief Main of the Host Application.
*/
int main(int argc, char **argv) {

    struct Params p = input_params(argc, argv);

    uint32_t nr_of_dpus;
    
    const unsigned int input_size = p.input_size; // Size of input image
    if(!p.exp)
        assert(input_size % p.n_threads == 0 && "Input size!");
    else
        assert(input_size % p.n_threads == 0 && "Input size!");

    // Input/output allocation
    A = malloc(input_size * sizeof(T));
    T *bufferA = A;
    if(!p.exp)
        histo_host = malloc(nr_of_dpus * p.bins * sizeof(unsigned int));
    else
        histo_host = malloc(p.bins * sizeof(unsigned int));

    // Create an input file with arbitrary data.
    read_input(A, p);

    Timer timer;
    start(&timer, 0, 0);

	if(!p.exp)
            memset(histo_host, 0, nr_of_dpus * p.bins * sizeof(unsigned int));
    else
            memset(histo_host, 0, p.bins * sizeof(unsigned int));

    histogram_host(histo_host, A, p.bins, input_size, p.exp, nr_of_dpus, p.n_threads);

    stop(&timer, 0);
    printf("Kernel ");
    print(&timer, 0, 1);
    printf("\n");
	
    return 0;
}


================================================
FILE: golang/uPIMulator/benchmark/HST-S/baselines/gpu/Makefile
================================================
# 
#  Copyright (c) 2016 University of Cordoba and University of Illinois
#  All rights reserved.
# 
#  Developed by:    IMPACT Research Group
#                   University of Cordoba and University of Illinois
#                   http://impact.crhc.illinois.edu/
# 
#  Permission is hereby granted, free of charge, to any person obtaining a copy
#  of this software and associated documentation files (the "Software"), to deal
#  with the Software without restriction, including without limitation the 
#  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
#  sell copies of the Software, and to permit persons to whom the Software is
#  furnished to do so, subject to the following conditions:
# 
#       > Redistributions of source code must retain the above copyright notice,
#         this list of conditions and the following disclaimers.
#       > Redistributions in binary form must reproduce the above copyright
#         notice, this list of conditions and the following disclaimers in the
#         documentation and/or other materials provided with the distribution.
#       > Neither the names of IMPACT Research Group, University of Cordoba, 
#         University of Illinois nor the names of its contributors may be used 
#         to endorse or promote products derived from this Software without 
#         specific prior written permission.
# 
#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
#  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
#  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
#  CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
#  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
#  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
#  THE SOFTWARE.
# 

CXX=/usr/local/cuda/bin/nvcc
CXX_FLAGS=-std=c++11

LIB=-L/usr/lib/ -L/usr/local/cuda/lib64 -lm

INC=-I/usr/local/cuda/include

DEP=kernel.cpp kernel.h main.cpp kernel.cu support/common.h support/cuda-setup.h support/partitioner.h support/timer.h support/verify.h
SRC=main.cpp kernel.cpp kernel.cu
EXE=hsti

all:
	$(CXX) $(CXX_FLAGS) $(SRC) $(LIB) $(INC) -o $(EXE)

clean:
	rm -f $(EXE)



================================================
FILE: golang/uPIMulator/benchmark/HST-S/baselines/gpu/README
================================================
Histogram - input partition (HST)

Compilation instructions:

    make

Execution instructions

    ./hsti -n 1006632960 -g 512

For more options:

    ./hsti -h


Note:
The input folder contains one image from Van Hateren's natural image database 
(http://www.kyb.tuebingen.mpg.de/?id=227). Image pixels are 12-bit depth. Thus, 
for calculation of the B-bin histogram of an image, the corresponding histogram 
bin is computed as ((pixel * B) >> 12).
Monochrome images from other databases or synthetic images can also be used. The 
read input function (in main.cpp) might need to be changed accordingly. If image 
pixels are b-bit depth and the histogram contains B bins, the histogram bin will 
be computed as ((pixel * B) >> b).


================================================
FILE: golang/uPIMulator/benchmark/HST-S/baselines/gpu/kernel.cpp
================================================
/*
 * Copyright (c) 2016 University of Cordoba and University of Illinois
 * All rights reserved.
 *
 * Developed by:    IMPACT Research Group
 *                  University of Cordoba and University of Illinois
 *                  http://impact.crhc.illinois.edu/
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * with the Software without restriction, including without limitation the 
 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 * sell copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 *      > Redistributions of source code must retain the above copyright notice,
 *        this list of conditions and the following disclaimers.
 *      > Redistributions in binary form must reproduce the above copyright
 *        notice, this list of conditions and the following disclaimers in the
 *        documentation and/or other materials provided with the distribution.
 *      > Neither the names of IMPACT Research Group, University of Cordoba, 
 *        University of Illinois nor the names of its contributors may be used 
 *        to endorse or promote products derived from this Software without 
 *        specific prior written permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
 * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
 * THE SOFTWARE.
 *
 */

#include "kernel.h"
#include "support/partitioner.h"
#include <math.h>
#include <thread>
#include <vector>
#include <algorithm>

// CPU threads--------------------------------------------------------------------------------------
void run_cpu_threads(std::atomic_uint *histo, unsigned int *data, int size, int bins, int n_threads, int chunk, int n_tasks, float alpha
#ifdef CUDA_8_0
    , std::atomic_int *worklist
#endif
    ) {
    std::vector<std::thread> cpu_threads;
    for(int k = 0; k < n_threads; k++) {
        cpu_threads.push_back(std::thread([=]() {

#ifdef CUDA_8_0
            Partitioner p = partitioner_create(n_tasks, alpha, k, n_threads, worklist);
#else
            Partitioner p = partitioner_create(n_tasks, alpha, k, n_threads);
#endif

            unsigned int Hs[bins];
            // Local histogram initialization
            for(int i = 0; i < bins; i++) {
                Hs[i] = 0;
            }

            for(int i = cpu_first(&p); cpu_more(&p); i = cpu_next(&p)) {
                for(int j = 0; j < chunk; j++) {
                    // Read pixel
                    unsigned int d = ((data[i * chunk + j] * bins) >> 12);

                    // Vote in histogram
                    Hs[d]++;
                }
            }

            // Merge to global histogram
            for(int i = 0; i < bins; i++) {
                (&histo[i])->fetch_add(Hs[i]);
            }

        }));
    }
    std::for_each(cpu_threads.begin(), cpu_threads.end(), [](std::thread &t) { t.join(); });
}


================================================
FILE: golang/uPIMulator/benchmark/HST-S/baselines/gpu/kernel.cu
================================================
/*
 * Copyright (c) 2016 University of Cordoba and University of Illinois
 * All rights reserved.
 *
 * Developed by:    IMPACT Research Group
 *                  University of Cordoba and University of Illinois
 *                  http://impact.crhc.illinois.edu/
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * with the Software without restriction, including without limitation the 
 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 * sell copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 *      > Redistributions of source code must retain the above copyright notice,
 *        this list of conditions and the following disclaimers.
 *      > Redistributions in binary form must reproduce the above copyright
 *        notice, this list of conditions and the following disclaimers in the
 *        documentation and/or other materials provided with the distribution.
 *      > Neither the names of IMPACT Research Group, University of Cordoba, 
 *        University of Illinois nor the names of its contributors may be used 
 *        to endorse or promote products derived from this Software without 
 *        specific prior written permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
 * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
 * THE SOFTWARE.
 *
 */

#define _CUDA_COMPILER_

#include "support/common.h"
#include "support/partitioner.h"

// CUDA kernel ------------------------------------------------------------------------------------------
__global__ void Histogram_kernel(int size, int bins, int n_tasks, float alpha, unsigned int *data,
    unsigned int *histo
#ifdef CUDA_8_0
    , int *worklist
#endif
    ) {

    extern __shared__ unsigned int l_mem[];
    unsigned int* l_histo = l_mem;
#ifdef CUDA_8_0
    int* l_tmp = (int*)&l_histo[bins];
#endif
    
#ifdef CUDA_8_0
    Partitioner p = partitioner_create(n_tasks, alpha, worklist, l_tmp);
#else
    Partitioner p = partitioner_create(n_tasks, alpha);
#endif
    
    // Block and runtime index
    const int bx = blockIdx.x;
    const int tx = threadIdx.x;
    const int bD = blockDim.x;
    const int gD = gridDim.x;

    // Sub-histograms initialization
    for(int pos = tx; pos < bins; pos += bD) {
        l_histo[pos] = 0;
    }

    __syncthreads(); // Intra-block synchronization

    // Main loop
    for(int i = gpu_first(&p); gpu_more(&p); i = gpu_next(&p)) {
    
        // Global memory read
        unsigned int d = data[i * bD + tx];

        // Atomic vote in shared memory
        atomicAdd(&l_histo[((d * bins) >> 12)], 1);
    }

    __syncthreads(); // Intra-block synchronization

    // Merge per-block histograms and write to global memory
    for(int pos = tx; pos < bins; pos += bD) {
// Atomic addition in global memory
#ifdef CUDA_8_0
        atomicAdd_system(histo + pos, l_histo[pos]);
#else
        atomicAdd(histo + pos, l_histo[pos]);
#endif
    }
}

cudaError_t call_Histogram_kernel(int blocks, int threads, int size, int bins, int n_tasks, float alpha, 
    unsigned int *data, unsigned int *histo, int l_mem_size
#ifdef CUDA_8_0
    , int* worklist
#endif
    ){
    dim3 dimGrid(blocks);
    dim3 dimBlock(threads);
    Histogram_kernel<<<dimGrid, dimBlock, l_mem_size>>>(size, bins, n_tasks, alpha, 
        data, histo
#ifdef CUDA_8_0
        , worklist
#endif
        );
    cudaError_t err = cudaGetLastError();
    return err;
}


================================================
FILE: golang/uPIMulator/benchmark/HST-S/baselines/gpu/kernel.h
================================================
/*
 * Copyright (c) 2016 University of Cordoba and University of Illinois
 * All rights reserved.
 *
 * Developed by:    IMPACT Research Group
 *                  University of Cordoba and University of Illinois
 *                  http://impact.crhc.illinois.edu/
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * with the Software without restriction, including without limitation the 
 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 * sell copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 *      > Redistributions of source code must retain the above copyright notice,
 *        this list of conditions and the following disclaimers.
 *      > Redistributions in binary form must reproduce the above copyright
 *        notice, this list of conditions and the following disclaimers in the
 *        documentation and/or other materials provided with the distribution.
 *      > Neither the names of IMPACT Research Group, University of Cordoba, 
 *        University of Illinois nor the names of its contributors may be used 
 *        to endorse or promote products derived from this Software without 
 *        specific prior written permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
 * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
 * THE SOFTWARE.
 *
 */

#include <cuda_runtime.h>
#include <atomic>
#include "support/common.h"

void run_cpu_threads(std::atomic_uint *histo, unsigned int *data, int size, int bins, int num_threads, int chunk, int n_tasks, float alpha
#ifdef CUDA_8_0
    , std::atomic_int *wl
#endif
    );

cudaError_t call_Histogram_kernel(int blocks, int threads, int size, int bins, int n_tasks, float alpha, 
    unsigned int *data, unsigned int *histo, int l_mem_size
#ifdef CUDA_8_0
    , int* worklist
#endif
		);


================================================
FILE: golang/uPIMulator/benchmark/HST-S/baselines/gpu/main.cpp
================================================
/*
 * Copyright (c) 2016 University of Cordoba and University of Illinois
 * All rights reserved.
 *
 * Developed by:    IMPACT Research Group
 *                  University of Cordoba and University of Illinois
 *                  http://impact.crhc.illinois.edu/
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * with the Software without restriction, including without limitation the 
 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 * sell copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 *      > Redistributions of source code must retain the above copyright notice,
 *        this list of conditions and the following disclaimers.
 *      > Redistributions in binary form must reproduce the above copyright
 *        notice, this list of conditions and the following disclaimers in the
 *        documentation and/or other materials provided with the distribution.
 *      > Neither the names of IMPACT Research Group, University of Cordoba, 
 *        University of Illinois nor the names of its contributors may be used 
 *        to endorse or promote products derived from this Software without 
 *        specific prior written permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
 * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
 * THE SOFTWARE.
 *
 */

#include "support/cuda-setup.h"
#include "kernel.h"
#include "support/common.h"
#include "support/timer.h"
#include "support/verify.h"

#include <unistd.h>
#include <thread>
#include <assert.h>

// Params ---------------------------------------------------------------------
struct Params {

    int   device;
    int   n_gpu_threads;
    int   n_gpu_blocks;
    int   n_threads;
    int   n_warmup;
    int   n_reps;
    float alpha;
    int   in_size;
    int   n_bins;

    Params(int argc, char **argv) {
        device        = 0;
        n_gpu_threads  = 256;
        n_gpu_blocks = 16;
        n_threads     = 4;
        n_warmup      = 5;
        n_reps        = 50;
        alpha         = 0.2;
        in_size       = 1536 * 1024 * 640;
        n_bins        = 256;
        int opt;
        while((opt = getopt(argc, argv, "hd:i:g:t:w:r:a:n:b:")) >= 0) {
            switch(opt) {
            case 'h':
                usage();
                exit(0);
                break;
            case 'd': device        = atoi(optarg); break;
            case 'i': n_gpu_threads  = atoi(optarg); break;
            case 'g': n_gpu_blocks = atoi(optarg); break;
            case 't': n_threads     = atoi(optarg); break;
            case 'w': n_warmup      = atoi(optarg); break;
            case 'r': n_reps        = atoi(optarg); break;
            case 'a': alpha         = atof(optarg); break;
            case 'n': in_size       = atoi(optarg); break;
            case 'b': n_bins        = atoi(optarg); break;
            default:
                fprintf(stderr, "\nUnrecognized option!\n");
                usage();
                exit(0);
            }
        }
        if(alpha == 0.0) {
            assert(n_gpu_threads > 0 && "Invalid # of device threads!");
            assert(n_gpu_blocks > 0 && "Invalid # of device blocks!");
        } else if(alpha == 1.0) {
            assert(n_threads > 0 && "Invalid # of host threads!");
        } else if(alpha > 0.0 && alpha < 1.0) {
            assert(n_gpu_threads > 0 && "Invalid # of device threads!");
            assert(n_gpu_blocks > 0 && "Invalid # of device blocks!");
            assert(n_threads > 0 && "Invalid # of host threads!");
        } else {
#ifdef CUDA_8_0
            assert((n_gpu_threads > 0 && n_gpu_blocks > 0 || n_threads > 0) && "Invalid # of host + device workers!");
#else
            assert(0 && "Illegal value for -a");
#endif
        }
    }

    void usage() {
        fprintf(stderr,
                "\nUsage:  ./hsti [options]"
                "\n"
                "\nGeneral options:"
                "\n    -h        help"
                "\n    -d <D>    CUDA device ID (default=0)"
                "\n    -i <I>    # of device threads per block (default=256)"
                "\n    -g <G>    # of device blocks (default=16)"
                "\n    -t <T>    # of host threads (default=4)"
                "\n    -w <W>    # of untimed warmup iterations (default=5)"
                "\n    -r <R>    # of timed repetition iterations (default=50)"
                "\n"
                "\nData-partitioning-specific options:"
                "\n    -a <A>    fraction of input elements to process on host (default=0.2)"
#ifdef CUDA_8_0
                "\n              NOTE: Dynamic partitioning used when <A> is not between 0.0 and 1.0"
#else
                "\n              NOTE: <A> must be between 0.0 and 1.0"
#endif
                "\n"
                "\nBenchmark-specific options:"
                "\n    -n <N>    input size (default=1572864, i.e., 1536x1024)"
                "\n    -b <B>    # of bins in histogram (default=256)"
                "\n");
    }
};

// Input Data -----------------------------------------------------------------
void read_input(unsigned int *input, const Params &p) {

    char  dctFileName[100];
    FILE *File = NULL;

    // Open input file
    unsigned short temp;
    sprintf(dctFileName, "./input/image_VanHateren.iml");
    if((File = fopen(dctFileName, "rb")) != NULL) {
        for(int y = 0; y < p.in_size; y++) {
            int fr   = fread(&temp, sizeof(unsigned short), 1, File);
            input[y] = (unsigned int)ByteSwap16(temp);
            if(input[y] >= 4096)
                input[y] = 4095;
        }
        fclose(File);
    } else {
        printf("%s does not exist\n", dctFileName);
        exit(1);
    }
}

// Main ------------------------------------------------------------------------------------------
int main(int argc, char **argv) {

    Params p(argc, argv);
    CUDASetup    setcuda(p.device);
    Timer        timer;
    cudaError_t  cudaStatus;

    // Allocate buffers
    timer.start("Allocation");
    int n_tasks = divceil(p.in_size, p.n_gpu_threads);
#ifdef CUDA_8_0
    unsigned int *h_in;
    cudaStatus = cudaMallocManaged(&h_in, p.in_size * sizeof(unsigned int));
    std::atomic_uint *h_histo;
    cudaStatus = cudaMallocManaged(&h_histo, p.n_bins * sizeof(std::atomic_uint));
    unsigned int *    d_in     = h_in;
    std::atomic_uint *d_histo  = h_histo;
    std::atomic_int * worklist;
    cudaStatus = cudaMallocManaged(&worklist, sizeof(std::atomic_int));
#else
    unsigned int *    h_in          = (unsigned int *)malloc(p.in_size * sizeof(unsigned int));
    std::atomic_uint *h_histo       = (std::atomic_uint *)malloc(p.n_bins * sizeof(std::atomic_uint));
    unsigned int *    h_histo_merge = (unsigned int *)malloc(p.n_bins * sizeof(unsigned int));
    unsigned int *    d_in;
    cudaStatus = cudaMalloc((void**)&d_in, p.in_size * sizeof(unsigned int));
    unsigned int *    d_histo;
    cudaStatus = cudaMalloc((void**)&d_histo, p.n_bins * sizeof(unsigned int));
    ALLOC_ERR(h_in, h_histo, h_histo_merge);
#endif
    CUDA_ERR();
    cudaDeviceSynchronize();
    timer.stop("Allocation");
    timer.print("Allocation", 1);

    // Initialize
    timer.start("Initialization");
    const int max_gpu_threads = setcuda.max_gpu_threads();
    read_input(h_in, p);
#ifdef CUDA_8_0
    for(int i = 0; i < p.n_bins; i++) {
        h_histo[i].store(0);
    }
#else
    memset(h_histo, 0, p.n_bins * sizeof(unsigned int));
#endif
    cudaDeviceSynchronize();
    timer.stop("Initialization");
    timer.print("Initialization", 1);

#ifndef CUDA_8_0
    // Copy to device
    timer.start("Copy To Device");
    cudaStatus = cudaMemcpy(d_in, h_in, p.in_size * sizeof(unsigned int), cudaMemcpyHostToDevice);
    cudaStatus = cudaMemcpy(d_histo, h_histo, p.n_bins * sizeof(unsigned int), cudaMemcpyHostToDevice);
    cudaDeviceSynchronize();
    CUDA_ERR();
    timer.stop("Copy To Device");
    timer.print("Copy To Device", 1);
#endif

    // Loop over main kernel
    for(int rep = 0; rep < p.n_warmup + p.n_reps; rep++) {

        // Reset
#ifdef CUDA_8_0
        if(p.alpha < 0.0 || p.alpha > 1.0) { // Dynamic partitioning
            worklist[0].store(0);
        }
        for(int i = 0; i < p.n_bins; i++) {
            h_histo[i].store(0);
        }
#else
        memset(h_histo, 0, p.n_bins * sizeof(unsigned int));
        cudaStatus = cudaMemcpy(d_histo, h_histo, p.n_bins * sizeof(unsigned int), cudaMemcpyHostToDevice);
        cudaDeviceSynchronize();
        CUDA_ERR();
#endif

        if(rep >= p.n_warmup)
            timer.start("Kernel");

        p.n_gpu_blocks = p.in_size / p.n_gpu_threads;

        // Launch GPU threads
        // Kernel launch
        if(p.n_gpu_blocks > 0) {
            assert(p.n_gpu_threads <= max_gpu_threads && 
                "The runtime block size is greater than the maximum runtime block size that can be used on this device");
            cudaStatus = call_Histogram_kernel(p.n_gpu_blocks, p.n_gpu_threads, p.in_size, p.n_bins, n_tasks, 
                p.alpha, d_in, (unsigned int*)d_histo, p.n_bins * sizeof(unsigned int)
#ifdef CUDA_8_0
                + sizeof(int), (int*)worklist
#endif
                );
            CUDA_ERR();
        }

        // Launch CPU threads
        std::thread main_thread(run_cpu_threads, h_histo, h_in, p.in_size, p.n_bins, p.n_threads, p.n_gpu_threads,
            n_tasks, p.alpha
#ifdef CUDA_8_0
            , worklist
#endif
            );

        cudaDeviceSynchronize();
        main_thread.join();

        if(rep >= p.n_warmup)
            timer.stop("Kernel");
    }
    timer.print("Kernel", p.n_reps);

#ifndef CUDA_8_0
    // Copy back
    timer.start("Copy Back and Merge");
    cudaStatus = cudaMemcpy(h_histo_merge, d_histo, p.n_bins * sizeof(unsigned int), cudaMemcpyDeviceToHost);
    CUDA_ERR();
    cudaDeviceSynchronize();
    for(unsigned int i = 0; i < p.n_bins; ++i) {
        h_histo_merge[i] += (unsigned int)h_histo[i];
    }
    timer.stop("Copy Back and Merge");
    timer.print("Copy Back and Merge", 1);
#endif

    // Verify answer
#ifdef CUDA_8_0
    verify((unsigned int *)h_histo, h_in, p.in_size, p.n_bins);
#else
    verify((unsigned int *)h_histo_merge, h_in, p.in_size, p.n_bins);
#endif

    // Free memory
    timer.start("Deallocation");
#ifdef CUDA_8_0
    cudaStatus = cudaFree(h_in);
    cudaStatus = cudaFree(h_histo);
    cudaStatus = cudaFree(worklist);
#else
    free(h_in);
    free(h_histo);
    free(h_histo_merge);
    cudaStatus = cudaFree(d_in);
    cudaStatus = cudaFree(d_histo);
#endif
    CUDA_ERR();
    cudaDeviceSynchronize();
    timer.stop("Deallocation");
    timer.print("Deallocation", 1);

    // Release timers
    timer.release("Allocation");
    timer.release("Initialization");
    timer.release("Copy To Device");
    timer.release("Kernel");
    timer.release("Copy Back and Merge");
    timer.release("Deallocation");

    printf("Test Passed\n");
    return 0;
}


================================================
FILE: golang/uPIMulator/benchmark/HST-S/baselines/gpu/support/common.h
================================================
/*
 * Copyright (c) 2016 University of Cordoba and University of Illinois
 * All rights reserved.
 *
 * Developed by:    IMPACT Research Group
 *                  University of Cordoba and University of Illinois
 *                  http://impact.crhc.illinois.edu/
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * with the Software without restriction, including without limitation the 
 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 * sell copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 *      > Redistributions of source code must retain the above copyright notice,
 *        this list of conditions and the following disclaimers.
 *      > Redistributions in binary form must reproduce the above copyright
 *        notice, this list of conditions and the following disclaimers in the
 *        documentation and/or other materials provided with the distribution.
 *      > Neither the names of IMPACT Research Group, University of Cordoba, 
 *        University of Illinois nor the names of its contributors may be used 
 *        to endorse or promote products derived from this Software without 
 *        specific prior written permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
 * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
 * THE SOFTWARE.
 *
 */

#ifndef _COMMON_H_
#define _COMMON_H_

#define ByteSwap16(n) (((((unsigned int)n) << 8) & 0xFF00) | ((((unsigned int)n) >> 8) & 0x00FF))

#define PRINT 0

#define divceil(n, m) (((n)-1) / (m) + 1)

#endif


================================================
FILE: golang/uPIMulator/benchmark/HST-S/baselines/gpu/support/cuda-setup.h
================================================
/*
 * Copyright (c) 2016 University of Cordoba and University of Illinois
 * All rights reserved.
 *
 * Developed by:    IMPACT Research Group
 *                  University of Cordoba and University of Illinois
 *                  http://impact.crhc.illinois.edu/
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * with the Software without restriction, including without limitation the 
 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 * sell copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 *      > Redistributions of source code must retain the above copyright notice,
 *        this list of conditions and the following disclaimers.
 *      > Redistributions in binary form must reproduce the above copyright
 *        notice, this list of conditions and the following disclaimers in the
 *        documentation and/or other materials provided with the distribution.
 *      > Neither the names of IMPACT Research Group, University of Cordoba, 
 *        University of Illinois nor the names of its contributors may be used 
 *        to endorse or promote products derived from this Software without 
 *        specific prior written permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
 * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
 * THE SOFTWARE.
 *
 */

#include <cuda.h>
#include <cuda_runtime.h>
#include <fstream>

// Allocation error checking
#define ERR_1(v1)                                                                                                      \
    if(v1 == NULL) {                                                                                                   \
        fprintf(stderr, "Allocation error at %s, %d\n", __FILE__, __LINE__);                                           \
        exit(-1);                                                                                                      \
    }
#define ERR_2(v1,v2) ERR_1(v1) ERR_1(v2)
#define ERR_3(v1,v2,v3) ERR_2(v1,v2) ERR_1(v3)
#define ERR_4(v1,v2,v3,v4) ERR_3(v1,v2,v3) ERR_1(v4)
#define ERR_5(v1,v2,v3,v4,v5) ERR_4(v1,v2,v3,v4) ERR_1(v5)
#define ERR_6(v1,v2,v3,v4,v5,v6) ERR_5(v1,v2,v3,v4,v5) ERR_1(v6)
#define GET_ERR_MACRO(_1,_2,_3,_4,_5,_6,NAME,...) NAME
#define ALLOC_ERR(...) GET_ERR_MACRO(__VA_ARGS__,ERR_6,ERR_5,ERR_4,ERR_3,ERR_2,ERR_1)(__VA_ARGS__)

#define CUDA_ERR()                                                                                                     \
    if(cudaStatus != cudaSuccess) {                                                                                    \
        fprintf(stderr, "CUDA error: %s\n at %s, %d\n", cudaGetErrorString(cudaStatus), __FILE__, __LINE__);           \
        exit(-1);                                                                                                      \
    }

struct CUDASetup {

    cudaDeviceProp device_prop;

    CUDASetup(int device) {
        cudaError_t cudaStatus;
        cudaStatus = cudaSetDevice(device);
        CUDA_ERR();

        cudaStatus = cudaGetDeviceProperties(&device_prop, device);
        CUDA_ERR();
        fprintf(stderr, "%s\t", device_prop.name);

    }

    int max_gpu_threads() {
        return device_prop.maxThreadsPerBlock;
    }
};


================================================
FILE: golang/uPIMulator/benchmark/HST-S/baselines/gpu/support/partitioner.h
================================================
/*
 * Copyright (c) 2016 University of Cordoba and University of Illinois
 * All rights reserved.
 *
 * Developed by:    IMPACT Research Group
 *                  University of Cordoba and University of Illinois
 *                  http://impact.crhc.illinois.edu/
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * with the Software without restriction, including without limitation the 
 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 * sell copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 *      > Redistributions of source code must retain the above copyright notice,
 *        this list of conditions and the following disclaimers.
 *      > Redistributions in binary form must reproduce the above copyright
 *        notice, this list of conditions and the following disclaimers in the
 *        documentation and/or other materials provided with the distribution.
 *      > Neither the names of IMPACT Research Group, University of Cordoba, 
 *        University of Illinois nor the names of its contributors may be used 
 *        to endorse or promote products derived from this Software without 
 *        specific prior written permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
 * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
 * THE SOFTWARE.
 *
 */

#ifndef _PARTITIONER_H_
#define _PARTITIONER_H_

#ifndef _CUDA_COMPILER_
#include <iostream>
#endif

#if !defined(_CUDA_COMPILER_) && defined(CUDA_8_0)
#include <atomic>
#endif

// Partitioner definition -----------------------------------------------------

typedef struct Partitioner {

    int n_tasks;
    int cut;
    int current;
#ifndef _CUDA_COMPILER_
    int thread_id;
    int n_threads;
#endif


#ifdef CUDA_8_0
    // CUDA 8.0 support for dynamic partitioning
    int strategy;
#ifdef _CUDA_COMPILER_
    int *worklist;
    int *tmp;
#else
    std::atomic_int *worklist;
#endif
#endif

} Partitioner;

// Partitioning strategies
#define STATIC_PARTITIONING 0
#define DYNAMIC_PARTITIONING 1

// Create a partitioner -------------------------------------------------------

#ifdef _CUDA_COMPILER_
__device__
#endif
inline Partitioner partitioner_create(int n_tasks, float alpha
#ifndef _CUDA_COMPILER_
    , int thread_id, int n_threads
#endif
#ifdef CUDA_8_0
#ifdef _CUDA_COMPILER_
    , int *worklist
    , int *tmp
#else
    , std::atomic_int *worklist
#endif
#endif
    ) {
    Partitioner p;
    p.n_tasks = n_tasks;
#ifndef _CUDA_COMPILER_
    p.thread_id = thread_id;
    p.n_threads = n_threads;
#endif
    if(alpha >= 0.0 && alpha <= 1.0) {
        p.cut = p.n_tasks * alpha;
#ifdef CUDA_8_0
        p.strategy = STATIC_PARTITIONING;
#endif
    } else {
#ifdef CUDA_8_0
        p.strategy = DYNAMIC_PARTITIONING;
        p.worklist = worklist;
#ifdef _CUDA_COMPILER_
        p.tmp = tmp;
#endif
#endif
    }
    return p;
}

// Partitioner iterators: first() ---------------------------------------------

#ifndef _CUDA_COMPILER_

inline int cpu_first(Partitioner *p) {
#ifdef CUDA_8_0
    if(p->strategy == DYNAMIC_PARTITIONING) {
        p->current = p->worklist->fetch_add(1);
    } else
#endif
    {
        p->current = p->thread_id;
    }
    return p->current;
}

#else

__device__ inline int gpu_first(Partitioner *p) {
#ifdef CUDA_8_0
    if(p->strategy == DYNAMIC_PARTITIONING) {
        if(threadIdx.y == 0 && threadIdx.x == 0) {
            p->tmp[0] = atomicAdd_system(p->worklist, 1);
        }
        __syncthreads();
        p->current = p->tmp[0];
    } else
#endif
    {
        p->current = p->cut + blockIdx.x;
    }
    return p->current;
}

#endif

// Partitioner iterators: more() ----------------------------------------------

#ifndef _CUDA_COMPILER_

inline bool cpu_more(const Partitioner *p) {
#ifdef CUDA_8_0
    if(p->strategy == DYNAMIC_PARTITIONING) {
        return (p->current < p->n_tasks);
    } else
#endif
    {
        return (p->current < p->cut);
    }
}

#else

__device__ inline bool gpu_more(const Partitioner *p) {
    return (p->current < p->n_tasks);
}

#endif

// Partitioner iterators: next() ----------------------------------------------

#ifndef _CUDA_COMPILER_

inline int cpu_next(Partitioner *p) {
#ifdef CUDA_8_0
    if(p->strategy == DYNAMIC_PARTITIONING) {
        p->current = p->worklist->fetch_add(1);
    } else
#endif
    {
        p->current = p->current + p->n_threads;
    }
    return p->current;
}

#else

__device__ inline int gpu_next(Partitioner *p) {
#ifdef CUDA_8_0
    if(p->strategy == DYNAMIC_PARTITIONING) {
        if(threadIdx.y == 0 && threadIdx.x == 0) {
            p->tmp[0] = atomicAdd_system(p->worklist, 1);
        }
        __syncthreads();
        p->current = p->tmp[0];
    } else
#endif
    {
        p->current = p->current + gridDim.x;
    }
    return p->current;
}

#endif

#endif


================================================
FILE: golang/uPIMulator/benchmark/HST-S/baselines/gpu/support/timer.h
================================================
/*
 * Copyright (c) 2016 University of Cordoba and University of Illinois
 * All rights reserved.
 *
 * Developed by:    IMPACT Research Group
 *                  University of Cordoba and University of Illinois
 *                  http://impact.crhc.illinois.edu/
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * with the Software without restriction, including without limitation the 
 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 * sell copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 *      > Redistributions of source code must retain the above copyright notice,
 *        this list of conditions and the following disclaimers.
 *      > Redistributions in binary form must reproduce the above copyright
 *        notice, this list of conditions and the following disclaimers in the
 *        documentation and/or other materials provided with the distribution.
 *      > Neither the names of IMPACT Research Group, University of Cordoba, 
 *        University of Illinois nor the names of its contributors may be used 
 *        to endorse or promote products derived from this Software without 
 *        specific prior written permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
 * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
 * THE SOFTWARE.
 *
 */

#include <cuda_runtime.h>
#include <sys/time.h>
#include <iostream>
#include <map>
#include <string>

using namespace std;

struct Timer {

    map<string, cudaEvent_t> startTime;
    map<string, cudaEvent_t> stopTime;
    map<string, float>         time;

    void start(string name) {
        if(!time.count(name)) {
            cudaEventCreate(&startTime[name]); 
            cudaEventCreate(&stopTime[name]);
            time[name] = 0.0;
        }
        cudaEventRecord(startTime[name], 0);
    }

    void stop(string name) {
        cudaEventRecord(stopTime[name],0);
        cudaEventSynchronize(stopTime[name]);
        float part_time = 0.0;
        cudaEventElapsedTime(&part_time, startTime[name], stopTime[name]);
        time[name] += part_time;
    }

    void print(string name, unsigned int REP) { printf("%s Time (ms): %f\n", name.c_str(), time[name] / REP); }

    void release(string name){
        cudaEventDestroy(startTime[name]); 
        cudaEventDestroy(stopTime[name]);
    }
};


================================================
FILE: golang/uPIMulator/benchmark/HST-S/baselines/gpu/support/verify.h
================================================
/*
 * Copyright (c) 2016 University of Cordoba and University of Illinois
 * All rights reserved.
 *
 * Developed by:    IMPACT Research Group
 *                  University of Cordoba and University of Illinois
 *                  http://impact.crhc.illinois.edu/
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * with the Software without restriction, including without limitation the 
 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 * sell copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 *      > Redistributions of source code must retain the above copyright notice,
 *        this list of conditions and the following disclaimers.
 *      > Redistributions in binary form must reproduce the above copyright
 *        notice, this list of conditions and the following disclaimers in the
 *        documentation and/or other materials provided with the distribution.
 *      > Neither the names of IMPACT Research Group, University of Cordoba, 
 *        University of Illinois nor the names of its contributors may be used 
 *        to endorse or promote products derived from this Software without 
 *        specific prior written permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
 * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
 * THE SOFTWARE.
 *
 */

#include "common.h"
#include <math.h>
#include <string.h>

inline int compare_output(unsigned int *outp, unsigned int *outpCPU, int bins) {
    for(int i = 0; i < bins; i++) {
        if(outp[i] != outpCPU[i]) {
            printf("Test failed\n");
            exit(EXIT_FAILURE);
        }
    }
    return 0;
}

// Sequential implementation for comparison purposes
inline void HistogramCPU(unsigned int *histo, unsigned int *data, int size, int bins) {
    for(int i = 0; i < size; i++) {
        // Read pixel
        unsigned int d = ((data[i] * bins) >> 12);
        // Vote in histogram
        histo[d]++;
    }
}

inline void verify(unsigned int *histo, unsigned int *input, int size, int bins) {
    unsigned int *gold = (unsigned int *)malloc(bins * sizeof(unsigned int));
    memset(gold, 0, bins * sizeof(unsigned int));
    HistogramCPU(gold, input, size, bins);
    compare_output(histo, gold, bins);
    free(gold);
}


================================================
FILE: golang/uPIMulator/benchmark/HST-S/dpu/CMakeLists.txt
================================================
SET(BL 10)

set(CMAKE_C_COMPILER "/root/upmem-2023.2.0-Linux-x86_64/bin/dpu-upmem-dpurte-clang")
set(CMAKE_C_FLAGS "-w -I/root/uPIMulator/benchmark/HST-S/support -O2 -S -DNR_TASKLETS=${NR_TASKLETS} -DBL=${BL}")

file(GLOB_RECURSE SRCS *.c)

add_executable(HST-S_device ${SRCS})



================================================
FILE: golang/uPIMulator/benchmark/HST-S/dpu/task.c
================================================
/*
* Histogram (HST-S) with multiple tasklets
*
*/
#include <stdint.h>
#include <stdio.h>
#include <defs.h>
#include <mram.h>
#include <alloc.h>
#include <perfcounter.h>
#include <barrier.h>

#include "../support/common.h"

__host dpu_arguments_t DPU_INPUT_ARGUMENTS;

// Array for communication between adjacent tasklets
uint32_t* message[NR_TASKLETS];
// DPU histogram
uint32_t* histo_dpu;

// Barrier
BARRIER_INIT(my_barrier, NR_TASKLETS);

// Histogram in each tasklet
void __attribute__ ((noinline)) histogram(uint32_t* histo, uint32_t bins, T *input, unsigned int l_size){
    for(unsigned int j = 0; j < l_size; j++) {
        T d = input[j];
        histo[(d * bins) >> DEPTH] += 1;
    }
}

extern int main_kernel1(void);

int (*kernels[nr_kernels])(void) = {main_kernel1};

int main(void) { 
    // Kernel
    return kernels[DPU_INPUT_ARGUMENTS.kernel](); 
}

// main_kernel1
int main_kernel1() {
    unsigned int tasklet_id = me();
#if PRINT
    printf("tasklet_id = %u\n", tasklet_id);
#endif
    if (tasklet_id == 0){ // Initialize once the cycle counter
        mem_reset(); // Reset the heap
    }
    // Barrier
    barrier_wait(&my_barrier);

    uint32_t input_size_dpu_bytes = DPU_INPUT_ARGUMENTS.size;
    uint32_t input_size_dpu_bytes_transfer = DPU_INPUT_ARGUMENTS.transfer_size; // Transfer input size per DPU in bytes
    uint32_t bins = DPU_INPUT_ARGUMENTS.bins;

    // Address of the current processing block in MRAM
    uint32_t base_tasklet = tasklet_id << BLOCK_SIZE_LOG2;
    uint32_t mram_base_addr_A = (uint32_t)DPU_MRAM_HEAP_POINTER;
    uint32_t mram_base_addr_histo = (uint32_t)(DPU_MRAM_HEAP_POINTER + input_size_dpu_bytes_transfer);

    // Initialize a local cache to store the MRAM block
    T *cache_A = (T *) mem_alloc(BLOCK_SIZE);
	
    // Local histogram
    uint32_t *histo = (uint32_t *) mem_alloc(bins * sizeof(uint32_t));

    // Initialize local histogram
    for(unsigned int i = 0; i < bins; i++){
        histo[i] = 0;
    }

    // Compute histogram
    for(unsigned int byte_index = base_tasklet; byte_index < input_size_dpu_bytes; byte_index += BLOCK_SIZE * NR_TASKLETS){

        // Bound checking
        uint32_t l_size_bytes = (byte_index + BLOCK_SIZE >= input_size_dpu_bytes) ? (input_size_dpu_bytes - byte_index) : BLOCK_SIZE;

        // Load cache with current MRAM block
        mram_read((const __mram_ptr void*)(mram_base_addr_A + byte_index), cache_A, l_size_bytes);

        // Histogram in each tasklet
        histogram(histo, bins, cache_A, l_size_bytes >> DIV);

    }
    message[tasklet_id] = histo;

    // Barrier
    barrier_wait(&my_barrier);

    uint32_t *histo_dpu = message[0];

    for (unsigned int i = tasklet_id; i < bins; i += NR_TASKLETS){
        uint32_t b = 0;		
        for (unsigned int j = 0; j < NR_TASKLETS; j++){			
            b += *(message[j] + i);
        }
        histo_dpu[i] = b;
    }

    // Barrier
    barrier_wait(&my_barrier);

    // Write dpu histogram to current MRAM block
    if(tasklet_id == 0){
        if(bins * sizeof(uint32_t) <= 2048)
            mram_write(histo_dpu, (__mram_ptr void*)(mram_base_addr_histo), bins * sizeof(uint32_t));
        else 
            for(unsigned int offset = 0; offset < ((bins * sizeof(uint32_t)) >> 11); offset++){
                mram_write(histo_dpu + (offset << 9), (__mram_ptr void*)(mram_base_addr_histo + (offset << 11)), 2048);
            }
    }

    return 0;
}


================================================
FILE: golang/uPIMulator/benchmark/HST-S/host/app.c
================================================
/**
* app.c
* HST-S Host Application Source File
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include <math.h>
#include <dpu.h>
#include <dpu_log.h>
#include <unistd.h>
#include <getopt.h>
#include <assert.h>

#include "../support/common.h"
#include "../support/timer.h"
#include "../support/params.h"

// Define the DPU Binary path as DPU_BINARY here
#ifndef DPU_BINARY
#define DPU_BINARY "./bin/dpu_code"
#endif

#if ENERGY
#include <dpu_probe.h>
#endif

// Pointer declaration
static T* A;
static unsigned int* histo_host;
static unsigned int* histo;

// Create input arrays
static void read_input(T* A, const Params p) {

    char  dctFileName[100];
    FILE *File = NULL;

    // Open input file
    unsigned short temp;
    sprintf(dctFileName, p.file_name);
    if((File = fopen(dctFileName, "rb")) != NULL) {
        for(unsigned int y = 0; y < p.input_size; y++) {
            fread(&temp, sizeof(unsigned short), 1, File);
            A[y] = (unsigned int)ByteSwap16(temp);
            if(A[y] >= 4096)
                A[y] = 4095;
        }
        fclose(File);
    } else {
        printf("%s does not exist\n", dctFileName);
        exit(1);
    }
}

// Compute output in the host
static void histogram_host(unsigned int* histo, T* A, unsigned int bins, unsigned int nr_elements, int exp, unsigned int nr_of_dpus) {
    if(!exp){
        for (unsigned int i = 0; i < nr_of_dpus; i++) {
            for (unsigned int j = 0; j < nr_elements; j++) {
                T d = A[j];
                histo[i * bins + ((d * bins) >> DEPTH)] += 1;
            }
        }
    }
    else{
        for (unsigned int j = 0; j < nr_elements; j++) {
            T d = A[j];
            histo[(d * bins) >> DEPTH] += 1;
        }
    }
}

// Main of the Host Application
int main(int argc, char **argv) {

    struct Params p = input_params(argc, argv);

    struct dpu_set_t dpu_set, dpu;
    uint32_t nr_of_dpus;
    
#if ENERGY
    struct dpu_probe_t probe;
    DPU_ASSERT(dpu_probe_init("energy_probe", &probe));
#endif

    // Allocate DPUs and load binary
    DPU_ASSERT(dpu_alloc(NR_DPUS, NULL, &dpu_set));
    DPU_ASSERT(dpu_load(dpu_set, DPU_BINARY, NULL));
    DPU_ASSERT(dpu_get_nr_dpus(dpu_set, &nr_of_dpus));
    printf("Allocated %d DPU(s)\n", nr_of_dpus);

    unsigned int i = 0;
    unsigned int input_size; // Size of input image
    unsigned int dpu_s = p.dpu_s;
    if(p.exp == 0)
        input_size = p.input_size * nr_of_dpus; // Size of input image
    else if(p.exp == 1)
        input_size = p.input_size; // Size of input image
    else
        input_size = p.input_size * dpu_s; // Size of input image

    const unsigned int input_size_8bytes = 
        ((input_size * sizeof(T)) % 8) != 0 ? roundup(input_size, 8) : input_size; // Input size per DPU (max.), 8-byte aligned
    const unsigned int input_size_dpu = divceil(input_size, nr_of_dpus); // Input size per DPU (max.)
    const unsigned int input_size_dpu_8bytes = 
        ((input_size_dpu * sizeof(T)) % 8) != 0 ? roundup(input_size_dpu, 8) : input_size_dpu; // Input size per DPU (max.), 8-byte aligned

    // Input/output allocation
    A = malloc(input_size_dpu_8bytes * nr_of_dpus * sizeof(T));
    T *bufferA = A;
    histo_host = malloc(p.bins * sizeof(unsigned int));
    histo = malloc(nr_of_dpus * p.bins * sizeof(unsigned int));

    // Create an input file with arbitrary data
    read_input(A, p);
    if(p.exp == 0){
        for(unsigned int j = 1; j < nr_of_dpus; j++){
            memcpy(&A[j * input_size_dpu_8bytes], &A[0], input_size_dpu_8bytes * sizeof(T));
        }
    }
    else if(p.exp == 2){
        for(unsigned int j = 1; j < dpu_s; j++)
            memcpy(&A[j * p.input_size], &A[0], p.input_size * sizeof(T));
    }

    // Timer declaration
    Timer timer;

    printf("NR_TASKLETS\t%d\tBL\t%d\tinput_size\t%u\n", NR_TASKLETS, BL, input_size);

    // Loop over main kernel
    for(int rep = 0; rep < p.n_warmup + p.n_reps; rep++) {
        memset(histo_host, 0, p.bins * sizeof(unsigned int));
        memset(histo, 0, nr_of_dpus * p.bins * sizeof(unsigned int));

        // Compute output on CPU (performance comparison and verification purposes)
        if(rep >= p.n_warmup)
            start(&timer, 0, rep - p.n_warmup);
        histogram_host(histo_host, A, p.bins, p.input_size, 1, nr_of_dpus);
        if(rep >= p.n_warmup)
            stop(&timer, 0);

        printf("Load input data\n");
        if(rep >= p.n_warmup)
            start(&timer, 1, rep - p.n_warmup);
        // Input arguments
        unsigned int kernel = 0;
        i = 0;
	    dpu_arguments_t input_arguments[NR_DPUS];
	    for(i=0; i<nr_of_dpus-1; i++) {
	        input_arguments[i].size=input_size_dpu_8bytes * sizeof(T); 
	        input_arguments[i].transfer_size=input_size_dpu_8bytes * sizeof(T); 
	        input_arguments[i].bins=p.bins;
	        input_arguments[i].kernel=kernel;
	    }
	    input_arguments[nr_of_dpus-1].size=(input_size_8bytes - input_size_dpu_8bytes * (NR_DPUS-1)) * sizeof(T); 
	    input_arguments[nr_of_dpus-1].transfer_size=input_size_dpu_8bytes * sizeof(T); 
	    input_arguments[nr_of_dpus-1].bins=p.bins;
	    input_arguments[nr_of_dpus-1].kernel=kernel;

        // Copy input arrays
        i = 0;
        DPU_FOREACH(dpu_set, dpu, i) {
            DPU_ASSERT(dpu_prepare_xfer(dpu, &input_arguments[i]));
        }
        DPU_ASSERT(dpu_push_xfer(dpu_set, DPU_XFER_TO_DPU, "DPU_INPUT_ARGUMENTS", 0, sizeof(input_arguments[0]), DPU_XFER_DEFAULT));
        DPU_FOREACH(dpu_set, dpu, i) {
            DPU_ASSERT(dpu_prepare_xfer(dpu, bufferA + input_size_dpu_8bytes * i));
        }
        DPU_ASSERT(dpu_push_xfer(dpu_set, DPU_XFER_TO_DPU, DPU_MRAM_HEAP_POINTER_NAME, 0, input_size_dpu_8bytes * sizeof(T), DPU_XFER_DEFAULT));
        if(rep >= p.n_warmup)
            stop(&timer, 1);

        printf("Run program on DPU(s) \n");
        // Run DPU kernel
        if(rep >= p.n_warmup) {
            start(&timer, 2, rep - p.n_warmup);
            #if ENERGY
            DPU_ASSERT(dpu_probe_start(&probe));
            #endif
        }
 
        DPU_ASSERT(dpu_launch(dpu_set, DPU_SYNCHRONOUS));
        if(rep >= p.n_warmup) {
            stop(&timer, 2);
            #if ENERGY
            DPU_ASSERT(dpu_probe_stop(&probe));
            #endif
        }

#if PRINT
        {
            unsigned int each_dpu = 0;
            printf("Display DPU Logs\n");
            DPU_FOREACH (dpu_set, dpu) {
                printf("DPU#%d:\n", each_dpu);
                DPU_ASSERT(dpulog_read_for_dpu(dpu.dpu, stdout));
                each_dpu++;
            }
        }
#endif

        printf("Retrieve results\n");
        i = 0;
        if(rep >= p.n_warmup)
            start(&timer, 3, rep - p.n_warmup);
        // PARALLEL RETRIEVE TRANSFER
        DPU_FOREACH(dpu_set, dpu, i) {
            DPU_ASSERT(dpu_prepare_xfer(dpu, histo + p.bins * i));
        }
        DPU_ASSERT(dpu_push_xfer(dpu_set, DPU_XFER_FROM_DPU, DPU_MRAM_HEAP_POINTER_NAME, input_size_dpu_8bytes * sizeof(T), p.bins * sizeof(unsigned int), DPU_XFER_DEFAULT));

        // Final histogram merging
        for(i = 1; i < nr_of_dpus; i++){
            for(unsigned int j = 0; j < p.bins; j++){
                histo[j] += histo[j + i * p.bins];
            }			
        }
        if(rep >= p.n_warmup)
            stop(&timer, 3);

    }

    // Print timing results
    printf("CPU ");
    print(&timer, 0, p.n_reps);
    printf("CPU-DPU ");
    print(&timer, 1, p.n_reps);
    printf("DPU Kernel ");
    print(&timer, 2, p.n_reps);
    printf("DPU-CPU ");
    print(&timer, 3, p.n_reps);

    #if ENERGY
    double energy;
    DPU_ASSERT(dpu_probe_get(&probe, DPU_ENERGY, DPU_AVERAGE, &energy));
    printf("DPU Energy (J): %f\t", energy);
    #endif	

    // Check output
    bool status = true;
    if(p.exp == 1) 
        for (unsigned int j = 0; j < p.bins; j++) {
            if(histo_host[j] != histo[j]){ 
                status = false;
#if PRINT
                printf("%u - %u: %u -- %u\n", j, j, histo_host[j], histo[j]);
#endif
            }
        }
    else if(p.exp == 2) 
        for (unsigned int j = 0; j < p.bins; j++) {
            if(dpu_s * histo_host[j] != histo[j]){ 
                status = false;
#if PRINT
                printf("%u - %u: %u -- %u\n", j, j, dpu_s * histo_host[j], histo[j]);
#endif
            }
        }
    else
        for (unsigned int j = 0; j < p.bins; j++) {
            if(nr_of_dpus * histo_host[j] != histo[j]){ 
                status = false;
#if PRINT
                printf("%u - %u: %u -- %u\n", j, j, nr_of_dpus * histo_host[j], histo[j]);
#endif
            }
        }
    if (status) {
        printf("[" ANSI_COLOR_GREEN "OK" ANSI_COLOR_RESET "] Outputs are equal\n");
    } else {
        printf("[" ANSI_COLOR_RED "ERROR" ANSI_COLOR_RESET "] Outputs differ!\n");
    }

    // Deallocation
    free(A);
    free(histo_host);
    free(histo);
    DPU_ASSERT(dpu_free(dpu_set));
	
    return status ? 0 : -1;
}


================================================
FILE: golang/uPIMulator/benchmark/HST-S/run.sh
================================================
#!/bin/bash

for i in 1 
do
	for b in 64 128 256 512 1024 2048 4096
	do
    	for k in 1 2 4 8 16
	    do
            NR_DPUS=$i NR_TASKLETS=$k BL=10 make all
            wait
            ./bin/host_code -w 2 -e 5 -b ${b} -x 1 > profile/HSTS_${b}_tl${k}_dpu${i}.txt
            wait
            make clean
            wait
		done
	done
done


================================================
FILE: golang/uPIMulator/benchmark/HST-S/support/common.h
================================================
#ifndef _COMMON_H_
#define _COMMON_H_

// Transfer size between MRAM and WRAM
#ifdef BL
#define BLOCK_SIZE_LOG2 BL
#define BLOCK_SIZE (1 << BLOCK_SIZE_LOG2)
#else
#define BLOCK_SIZE_LOG2 8
#define BLOCK_SIZE (1 << BLOCK_SIZE_LOG2)
#define BL BLOCK_SIZE_LOG2
#endif

// Data type
#define T uint32_t
#define DIV 2 // Shift right to divide by sizeof(T)
#define REGS (BLOCK_SIZE >> 2) // 32 bits

// Pixel depth
#define DEPTH 12
#define ByteSwap16(n) (((((unsigned int)n) << 8) & 0xFF00) | ((((unsigned int)n) >> 8) & 0x00FF))

// Structures used by both the host and the dpu to communicate information 
typedef struct {
    uint32_t size;
    uint32_t transfer_size;
    uint32_t bins;
	enum kernels {
	    kernel1 = 0,
	    nr_kernels = 1,
	} kernel;
} dpu_arguments_t;

#ifndef ENERGY
#define ENERGY 0
#endif
#define PRINT 0 

#define ANSI_COLOR_RED     "\x1b[31m"
#define ANSI_COLOR_GREEN   "\x1b[32m"
#define ANSI_COLOR_RESET   "\x1b[0m"

#define divceil(n, m) (((n)-1) / (m) + 1)
#define roundup(n, m) ((n / m) * m + m)
#endif


================================================
FILE: golang/uPIMulator/benchmark/HST-S/support/params.h
================================================
#ifndef _PARAMS_H_
#define _PARAMS_H_

#include "common.h"

typedef struct Params {
    unsigned int   input_size;
    unsigned int   bins;
    int   n_warmup;
    int   n_reps;
    const char *file_name;
    int  exp;
    int  dpu_s;
}Params;

static void usage() {
    fprintf(stderr,
        "\nUsage:  ./program [options]"
        "\n"
        "\nGeneral options:"
        "\n    -h        help"
        "\n    -w <W>    # of untimed warmup iterations (default=1)"
        "\n    -e <E>    # of timed repetition iterations (default=3)"
        "\n    -x <X>    Weak (0) or strong (1, 2) scaling (default=0)"
        "\n"
        "\nBenchmark-specific options:"
        "\n    -i <I>    input size (default=1536*1024 elements)"
        "\n    -b <B>    histogram size (default=256 bins)"
        "\n    -f <F>    input image file (default=../input/image_VanHateren.iml)"
        "\n");
}

struct Params input_params(int argc, char **argv) {
    struct Params p;
    p.input_size    = 1536 * 1024;
    p.bins          = 256;
    p.n_warmup      = 1;
    p.n_reps        = 3;
    p.exp           = 0;
    p.file_name     = "./input/image_VanHateren.iml";
    p.dpu_s         = 64;

    int opt;
    while((opt = getopt(argc, argv, "hi:b:w:e:f:x:z:")) >= 0) {
        switch(opt) {
        case 'h':
        usage();
        exit(0);
        break;
        case 'i': p.input_size    = atoi(optarg); break;
        case 'b': p.bins          = atoi(optarg); break;
        case 'w': p.n_warmup      = atoi(optarg); break;
        case 'e': p.n_reps        = atoi(optarg); break;
        case 'f': p.file_name     = optarg; break;
        case 'x': p.exp           = atoi(optarg); break;
        case 'z': p.dpu_s         = atoi(optarg); break;
        default:
            fprintf(stderr, "\nUnrecognized option!\n");
            usage();
            exit(0);
        }
    }
    assert(NR_DPUS > 0 && "Invalid # of dpus!");

    return p;
}
#endif


================================================
FILE: golang/uPIMulator/benchmark/HST-S/support/timer.h
================================================
/*
 * Copyright (c) 2016 University of Cordoba and University of Illinois
 * All rights reserved.
 *
 * Developed by:    IMPACT Research Group
 *                  University of Cordoba and University of Illinois
 *                  http://impact.crhc.illinois.edu/
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * with the Software without restriction, including without limitation the 
 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 * sell copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 *      > Redistributions of source code must retain the above copyright notice,
 *        this list of conditions and the following disclaimers.
 *      > Redistributions in binary form must reproduce the above copyright
 *        notice, this list of conditions and the following disclaimers in the
 *        documentation and/or other materials provided with the distribution.
 *      > Neither the names of IMPACT Research Group, University of Cordoba, 
 *        University of Illinois nor the names of its contributors may be used 
 *        to endorse or promote products derived from this Software without 
 *        specific prior written permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
 * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
 * THE SOFTWARE.
 *
 */

#include <sys/time.h>

typedef struct Timer{

    struct timeval startTime[4];
    struct timeval stopTime[4];
    double         time[4];

}Timer;

void start(Timer *timer, int i, int rep) {
    if(rep == 0) {
        timer->time[i] = 0.0;
    }
    gettimeofday(&timer->startTime[i], NULL);
}

void stop(Timer *timer, int i) {
    gettimeofday(&timer->stopTime[i], NULL);
    timer->time[i] += (timer->stopTime[i].tv_sec - timer->startTime[i].tv_sec) * 1000000.0 +
                      (timer->stopTime[i].tv_usec - timer->startTime[i].tv_usec);
}

void print(Timer *timer, int i, int REP) { printf("Time (ms): %f\t", timer->time[i] / (1000 * REP)); }


================================================
FILE: golang/uPIMulator/benchmark/MLP/CMakeLists.txt
================================================
#add_subdirectory(host)
add_subdirectory(dpu)

================================================
FILE: golang/uPIMulator/benchmark/MLP/Makefile
================================================
DPU_DIR := dpu
HOST_DIR := host
BUILDDIR ?= bin
NR_TASKLETS ?= 16 
BL ?= 10
NR_DPUS ?= 1 

define conf_filename
	${BUILDDIR}/.NR_DPUS_$(1)_NR_TASKLETS_$(2)_BL_$(3).conf
endef
CONF := $(call conf_filename,${NR_DPUS},${NR_TASKLETS},${BL})

HOST_TARGET := ${BUILDDIR}/host_code
DPU_TARGET := ${BUILDDIR}/dpu_code

COMMON_INCLUDES := support
HOST_SOURCES := $(wildcard ${HOST_DIR}/*.c)
DPU_SOURCES := $(wildcard ${DPU_DIR}/*.c)

.PHONY: all clean test

__dirs := $(shell mkdir -p ${BUILDDIR})

COMMON_FLAGS := -w -I${COMMON_INCLUDES}
HOST_FLAGS := ${COMMON_FLAGS} -std=c11 -O3 `dpu-pkg-config --cflags --libs dpu` -DNR_TASKLETS=${NR_TASKLETS} -DNR_DPUS=${NR_DPUS} -DBL=${BL}
DPU_FLAGS := ${COMMON_FLAGS} -O2 -DNR_TASKLETS=${NR_TASKLETS} -DBL=${BL}

all: ${HOST_TARGET} ${DPU_TARGET}

${CONF}:
	$(RM) $(call conf_filename,*,*)
	touch ${CONF}

${HOST_TARGET}: ${HOST_SOURCES} ${COMMON_INCLUDES} ${CONF}
	$(CC) -o $@ ${HOST_SOURCES} ${HOST_FLAGS}
	$(CC) -S -o ${HOST_TARGET}.S ${HOST_SOURCES} ${HOST_FLAGS}

${DPU_TARGET}: ${DPU_SOURCES} ${COMMON_INCLUDES} ${CONF}
	dpu-upmem-dpurte-clang ${DPU_FLAGS} -o $@ ${DPU_SOURCES}
	dpu-upmem-dpurte-clang -S ${DPU_FLAGS} -o ${DPU_TARGET}.S ${DPU_SOURCES}

clean:
	$(RM) -r $(BUILDDIR)

test: all
	./${HOST_TARGET} -m 1024 -n 1024


================================================
FILE: golang/uPIMulator/benchmark/MLP/_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_10_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_11_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_12_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_13_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_14_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_15_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_16
================================================


================================================
FILE: golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_16_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_17_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_18_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_19_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_1_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_20_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_21_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_22_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_23_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_24_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_2_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_3_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_4_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_5_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_6_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_7_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_8_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_9_BL_10.conf
================================================


================================================
FILE: golang/uPIMulator/benchmark/MLP/baselines/cpu/Makefile
================================================
all:
	gcc mlp_openmp.c -o mlp_openmp -fopenmp -std=c99
run:
	./mlp_openmp


================================================
FILE: golang/uPIMulator/benchmark/MLP/baselines/cpu/README
================================================
Multilayer Perceptron (MLP)

Compilation instructions

    make

Execution instructions

    ./mlp_openmp


================================================
FILE: golang/uPIMulator/benchmark/MLP/baselines/cpu/mlp_openmp.c
================================================
/**
* @file app.c
* @brief Template for a Host Application Source File.
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include <unistd.h>
#include <getopt.h>
#include <assert.h>
#include <stdint.h>
#include "../../support/timer.h"
#include "../../support/common.h"

T** A;
T* B;
T* C;

// Create input arrays
static void init_data(T** A, T* B, unsigned int m_size, unsigned int n_size){
    for (unsigned int l = 0; l < NUM_LAYERS; l++)
		for (unsigned int i = 0; i < m_size * n_size; i++){
			if(i % 100 < 98){
				A[l][i] = 0;
			}else{
				A[l][i] = (l+i) % 2;
			}
		}
	for (unsigned int i = 0; i < n_size; i++){
		if(i % 50 < 48){
			B[i] = 0;
		}
		else{
			B[i] = i % 2;
		}
	}
}

// Compute output in the host
static void mlp_host(T* C, T** A, T* B, unsigned int m_size, unsigned int n_size) {
	for (unsigned int nl = 0; nl < NUM_LAYERS; nl++){
		for (unsigned int m = 0; m < m_size; m++){
			C[m] = 0;
		}
		#pragma omp parallel for
		for (unsigned int m = 0; m < m_size; m++){
			for (unsigned int n = 0; n < n_size; n++){
				C[m] += A[nl][m * n_size + n] * B[n];
			}
			C[m] = max(0, C[m]);
		}
		for (unsigned int n = 0; n < n_size; n++){
			B[n] = C[n];
		}
	}
}

static uint64_t mlp_host_sum(uint64_t n_size, uint64_t m_size) {
  uint64_t sum = 0;
  for (uint64_t m = 0; m < n_size; m++){
    sum += B[m];
  }
  return sum;
}

// Params ---------------------------------------------------------------------
typedef struct Params {
  char* dpu_type;
  int   nr_of_ranks;
  int   input_size_n;
  int   input_size_m;
  int   n_warmup;
  int   n_reps;
}Params;

void usage() {
  fprintf(stderr,
    "\nUsage:  ./program [options]"
    "\n"
    "\nGeneral options:"
    "\n    -h        help"
    "\n    -d <D>    DPU type (default=fsim)"
    "\n    -r <R>    # of ranks (default=2)"
    "\n"
    "\nBenchmark-specific options:"
    "\n    -i <I>    input size (default=8M elements)"
    "\n");
  }

  struct Params input_params(int argc, char **argv) {
    struct Params p;
    p.dpu_type      = "fsim";
    p.nr_of_ranks   = 1;
    p.input_size_n  = 1 << 9;
    p.input_size_m  = 1 << 9;
    p.n_warmup      = 2;
    p.n_reps        = 3;

    int opt;
    while((opt = getopt(argc, argv, "hd:r:i:")) >= 0) {
      switch(opt) {
        case 'h':
        usage();
        exit(0);
        break;
        case 'd': p.dpu_type        = optarg; break;
        case 'r': p.nr_of_ranks     = atoi(optarg); break;
        case 'n': p.input_size_n    = atoi(optarg); break;
        case 'm': p.input_size_m    = atoi(optarg); break;
        default:
        fprintf(stderr, "\nUnrecognized option!\n");
        usage();
        exit(0);
      }
    }
    assert(p.nr_of_ranks > 0 && "Invalid # of ranks!");

    return p;
  }

  /**
  * @brief Main of the Host Application.
  */
  int main(int argc, char **argv) {

    struct Params p = input_params(argc, argv);
    uint64_t n_size = 8192;
    uint64_t m_size = 20480;

    Timer timer;
    A = malloc(NUM_LAYERS * sizeof(T*));
    for(int l = 0; l < NUM_LAYERS; l++)
        A[l] = malloc(n_size*m_size*sizeof(unsigned int));
    B = malloc(m_size*sizeof(unsigned int));
    C = malloc(m_size*sizeof(unsigned int));

    // Create an input file with arbitrary data.
    init_data(A, B, m_size, n_size);

    start(&timer, 0, 1);
    mlp_host(C, A, B, n_size, m_size);
    stop(&timer, 0);

    uint32_t sum = mlp_host_sum(n_size, m_size);
   
    printf("Kernel ");
    print(&timer, 0, 1);
    printf("\n");

    printf("SUM = %d \n", sum);

    for(int l = 0; l < NUM_LAYERS; l++)
        free(A[l]);
    free(A);
    free(B);
    free(C);

    return 0;
}


================================================
FILE: golang/uPIMulator/benchmark/MLP/baselines/gpu/Makefile
================================================
all:
	/usr/local/cuda/bin/nvcc mlp.cu -I/usr/local/cuda/include -lm -o mlp

clean:
	rm mlp


================================================
FILE: golang/uPIMulator/benchmark/MLP/baselines/gpu/README
================================================
Multilayer Perceptron (MLP)

Compilation instructions

    make

Execution instructions

    ./mlp


================================================
FILE: golang/uPIMulator/benchmark/MLP/baselines/gpu/mlp.cu
================================================
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <cuda.h>
#include "../../support/common.h"

#define THREAD 128

__global__ void gemv(int m, int n, T *adim, T *b, T *d_ans);

void cgemv(int m, int n, T *adim, T *b, T *d_ans);

double gettime()
{
	struct timeval tv;
	gettimeofday(&tv, NULL);
	return tv.tv_sec + (double)tv.tv_usec*1.0e-6;
}

int main(int argc, char **argv)
{
	/* for CPU */
	int i, j;
	T **bdim; 
	T *c, *ans, *h_ans, *h_c;
	int n = 8192;
	int m = 20480;

	bdim = (T**) malloc(NUM_LAYERS * sizeof(T*));
	for(int l = 0; l < NUM_LAYERS; l++)
		bdim[l] = (T*)malloc(sizeof(T)*m*n);
	c = (T*)malloc(sizeof(T) *n);
	h_c = (T*)malloc(sizeof(T) *n);
	ans = (T*)malloc(sizeof(T) *m);
	h_ans = (T*)malloc(sizeof(T) *m);

	/* for GPU */
	T *d_bdim; 
	T *d_c, *d_ans;
	cudaMalloc((void **)&d_bdim, sizeof(T)*m*n);
	cudaMalloc((void **)&d_c, sizeof(T)*n);
	cudaMalloc((void **)&d_ans, sizeof(T)*m);

	for(i = 0; i < n; i++)
	{
		if(i % 50 < 48)
		{
			c[i] = 0;
			h_c[i] = 0;
		}
		else
		{
			c[i] = i % 2;
			h_c[i] = i % 2;
		}
	}
	for(int l = 0; l < NUM_LAYERS; l++)
		for(i = 0; i < n; i++)
		{
			for(j = 0; j < m; j++){
				if(j % 100 < 98)
				{

					bdim[l][i*m+j] = 0;
				}
				else
				{

					bdim[l][i*m+j] = (l + i) % 2;
				}
			}
		}

	for(j = 0; j < m; j++){
		ans[j] = 0;
		h_ans[j] = 0;
	}
	// Computation on the host for verification
	T* vector = c;
	T* output = ans;
	T* matrix;
	int mm = m;
	int nn = n;
	for(int l = 0; l < NUM_LAYERS; l++){
		matrix = bdim[l];
		cgemv(mm, nn, matrix, vector, output);
		vector = output;
                h_ans = output;
		mm = n; nn = m;
	}

	// Event creation
	cudaEvent_t start, stop;
	cudaEventCreate(&start);
	cudaEventCreate(&stop);
	float time1 = 0;
	float time2 = 0;
	cudaMemcpy(d_ans, h_ans, sizeof(T)*m, cudaMemcpyHostToDevice);
	cudaMemcpy(d_c, h_c, sizeof(T)*n, cudaMemcpyHostToDevice);

	vector = d_c;
	output = d_ans;
	mm = m;
	nn = n;
	for(int l = 0; l < NUM_LAYERS; l++){
		cudaMemcpy(d_bdim, bdim[l], sizeof(T)*m*n, cudaMemcpyHostToDevice);
		matrix = d_bdim;
		// Start timer
		cudaEventRecord( start, 0 );
		gemv<<<mm, THREAD>>>(mm, nn, matrix, vector, output);
		// End timer
		cudaEventRecord( stop, 0 );
		cudaEventSynchronize( stop );
		cudaEventElapsedTime( &time2, start, stop );
		time1 += time2;
		vector = output;
		d_ans = output;
		mm = n; nn = m;
	}

	cudaMemcpy(h_ans, d_ans, sizeof(T)*m, cudaMemcpyDeviceToHost);
	cudaMemcpy(h_c, d_c, sizeof(T)*n, cudaMemcpyDeviceToHost);

	for(i = 0; i < m; i++)
	{
		if(ans[i] != h_ans[i])
		printf("ERROR in Ans %d -> %d -- %d\n", i, ans[i], h_ans[i]);
        }

	for(i = 0; i < n; i++)
	{
		if(c[i] != h_c[i])
		printf("ERROR in C %d -> %d -- %d\n", i, c[i], h_c[i]);
	}
	printf("Execution time = %f ms\n", time1);


	for(int l = 0; l < NUM_LAYERS; l++)
		free(bdim[l]);


	free(bdim);
	free(c);
	free(ans);
	free(h_c);
	cudaFree(d_bdim);
	cudaFree(d_c);
	cudaFree(d_ans);
	cudaEventDestroy(start);
	cudaEventDestroy(stop);

	return 0;
} 

__global__ void gemv(int m, int n, T* adim, T* b, T* d_ans)
{
	int i;
	int div = n/THREAD;
	__shared__ T tmp[THREAD];

	tmp[threadIdx.x] = 0.0;

	for(i = 0; i < div; i++){
		tmp[threadIdx.x] += adim[blockIdx.x*n+i*THREAD+threadIdx.x] * b[i * THREAD + threadIdx.x];
	}
	if(threadIdx.x < m%THREAD)
		tmp[threadIdx.x] += adim[blockIdx.x*n+THREAD*div+threadIdx.x] * b[THREAD * div + threadIdx.x];

	__syncthreads();

	for(i = THREAD / 2; i > 31; i = i / 2)
	{
		if(threadIdx.x < i)
			tmp[threadIdx.x] += tmp[threadIdx.x + i];
		__syncthreads();
	}

	if(threadIdx.x < 16)
	{
		tmp[threadIdx.x] += tmp[threadIdx.x + 16];
		__syncthreads();
		tmp[threadIdx.x] += tmp[threadIdx.x + 8];
		__syncthreads();
		tmp[threadIdx.x] += tmp[threadIdx.x + 4];
		__syncthreads();
		tmp[threadIdx.x] += tmp[threadIdx.x + 2];
		__syncthreads();
		tmp[threadIdx.x] += tmp[threadIdx.x + 1];
		__syncthreads();
	}


	if(threadIdx.x == 0)
		d_ans[blockIdx.x] = max(0, tmp[0]);

}

void cgemv(int m, int n, T *adim, T *b, T *d_ans)
{
	int i, j;

	for(i = 0; i < m; i++){
		for(j = 0; j < n; j++)
			d_ans[i] += adim[i*n+j] * b[j];
		d_ans[i] = max(0, d_ans[i]);
	}

}


================================================
FILE: golang/uPIMulator/benchmark/MLP/dpu/CMakeLists.txt
================================================
SET(BL 10)

set(CMAKE_C_COMPILER "/root/upmem-2023.2.0-Linux-x86_64/bin/dpu-upmem-dpurte-clang")
set(CMAKE_C_FLAGS "-w -I/root/uPIMulator/benchmark/MLP/support -O2 -S -DNR_TASKLETS=${NR_TASKLETS} -DBL=${BL}")

file(GLOB_RECURSE SRCS *.c)

add_executable(MLP_device ${SRCS})



================================================
FILE: golang/uPIMulator/benchmark/MLP/dpu/task.c
================================================
/*
 * Matrix vector multiplication with multiple tasklet
 *
 */
#include <stdint.h>
#include <stdio.h>
#include <defs.h>
#include <mram.h>
#include <alloc.h>
#include <barrier.h>
#include <seqread.h>

#include "../support/common.h"

__host dpu_arguments_t DPU_INPUT_ARGUMENTS;

// GEMV
void __attribute__ ((noinline)) gemv(T *bufferC, T *bufferA, T *bufferB, int pos) {
	for (unsigned int i = 0; i < BLOCK_SIZE / sizeof(T); i++) {
		bufferC[pos] += bufferA[i] * bufferB[i];
	}
	return;
}

// Barrier
BARRIER_INIT(my_barrier, NR_TASKLETS);

// main
int main() {
	unsigned int tasklet_id = me();
#if PRINT
	printf("tasklet_id = %u\n", tasklet_id);
#endif
	if (tasklet_id == 0){ // Initialize once the cycle counter
		mem_reset(); // Reset the heap
	}
	// Barrier
	barrier_wait(&my_barrier);

	int32_t n_size = DPU_INPUT_ARGUMENTS.n_size;
	int32_t n_size_pad = DPU_INPUT_ARGUMENTS.n_size_pad;
	uint32_t nr_rows = DPU_INPUT_ARGUMENTS.nr_rows;
	uint32_t max_rows = DPU_INPUT_ARGUMENTS.max_rows;


	unsigned int nrows = nr_rows;
	unsigned int rows_per_tasklet; 
	unsigned int start_row;
	unsigned int chunks = nrows / (NR_TASKLETS + NR_TASKLETS);
	unsigned int dbl_chunks = chunks + chunks;                                                                       
	rows_per_tasklet = dbl_chunks;
	unsigned int rest_rows = nrows % (NR_TASKLETS + NR_TASKLETS);

	if ((tasklet_id + tasklet_id) < rest_rows)
		rows_per_tasklet += 2;
	if (rest_rows > 0) {
		if ((tasklet_id + tasklet_id) >= rest_rows) {
			unsigned int hlf_rest_rows = rest_rows >> 1;
			if ((rest_rows & 1) == 1)
				start_row = (hlf_rest_rows + 1) * (dbl_chunks + 2) + (tasklet_id - 1 - hlf_rest_rows) * dbl_chunks;
			else
				start_row = (hlf_rest_rows) * (dbl_chunks + 2) + (tasklet_id - hlf_rest_rows) * dbl_chunks;
		} else 
			start_row = tasklet_id * (dbl_chunks + 2);
	} else {
		start_row = tasklet_id * (dbl_chunks);
	}

	// Address of the current row in MRAM
	uint32_t mram_base_addr_A = (uint32_t) (DPU_MRAM_HEAP_POINTER + start_row * n_size * sizeof(T));
	uint32_t mram_base_addr_B = (uint32_t) (DPU_MRAM_HEAP_POINTER + max_rows * n_size_pad * sizeof(T));
	uint32_t mram_base_addr_C = (uint32_t) (DPU_MRAM_HEAP_POINTER + max_rows * n_size_pad * sizeof(T) + n_size_pad * sizeof(T) + start_row * sizeof(T));
	uint32_t mram_temp_addr_A = mram_base_addr_A;
	uint32_t mram_temp_addr_B = mram_base_addr_B;

	// Inititalize a local cache to store the MRAM block
	T *cache_A = (T *) mem_alloc(BLOCK_SIZE + 8);
	T *cache_A_aux = (T *) mem_alloc(8);
	T *cache_B = (T *) mem_alloc(BLOCK_SIZE);
	T *cache_C = (T *) mem_alloc(8);

	int offset = 0;

	// Iterate over nr_rows
	for (unsigned int i = start_row; i < start_row + rows_per_tasklet; i += 2) {

		mram_temp_addr_A = (uint32_t) (DPU_MRAM_HEAP_POINTER + i * n_size * sizeof(T));
		mram_temp_addr_B = mram_base_addr_B;

		cache_C[0] = 0;
		cache_C[1] = 0;
		for(unsigned int pos = 0; pos < 2 && i + pos < nr_rows; pos++){
			int n = 0, j;
			for (n = 0; n < (int32_t) (n_size - (BLOCK_SIZE/sizeof(T))); n += (BLOCK_SIZE / sizeof(T)))
			{

				mram_read((__mram_ptr void const*) (mram_temp_addr_A), cache_A, BLOCK_SIZE);
				mram_read((__mram_ptr void const*) (mram_temp_addr_B), cache_B, BLOCK_SIZE);

				if(offset)
				{

					for(unsigned int off = 0; off < (BLOCK_SIZE / sizeof(T)) - 1; off++)
					{
						cache_A[off] = cache_A[off + 1];
					}

					mram_read((__mram_ptr void const*) (mram_temp_addr_A + BLOCK_SIZE), cache_A_aux, 8);

					cache_A[BLOCK_SIZE / sizeof(T) - 1] = cache_A_aux[0];
				}

				// Compute GEMV
				gemv(cache_C, cache_A, cache_B, pos);

				// Update memory addresses
				mram_temp_addr_A += BLOCK_SIZE;
				mram_temp_addr_B += BLOCK_SIZE;
			}

			mram_read((__mram_ptr void const*) (mram_temp_addr_A), cache_A, BLOCK_SIZE);


			if(offset)
			{
				for(unsigned int off = 0; off < (BLOCK_SIZE / sizeof(T)) -1; off++)
				{

					cache_A[off] = cache_A[off + 1];
				}

				mram_read((__mram_ptr void const*) (mram_temp_addr_A + BLOCK_SIZE ), cache_A_aux, 8);

  			       cache_A[BLOCK_SIZE / sizeof(T) - 1] = cache_A_aux[0];
			}


			mram_read((__mram_ptr void const*) (mram_temp_addr_B), cache_B, BLOCK_SIZE);

			for (j = 0; j < (int) (n_size - n); j++) {
				// Compute GEMV
				if(j >= (int)(BLOCK_SIZE / sizeof(T))){ 
					printf("error\n");
					break;
				}
				cache_C[pos] += cache_A[j] * cache_B[j];
			}


			mram_temp_addr_A += (BLOCK_SIZE - ((BLOCK_SIZE / sizeof(T)) - (n_size - n)) * sizeof(T));
			mram_temp_addr_B = mram_base_addr_B;

			if(mram_temp_addr_A % 8 != 0)
			{
				offset = 1;
			}
			else
			{
				offset = 0;
			}
		}
		// Write cache to current MRAM block
		mram_write(cache_C, (__mram_ptr void *) (mram_base_addr_C), 8);

		// Update memory address
		mram_base_addr_C += 2 * sizeof(T);

	}

	return 0;
}


================================================
FILE: golang/uPIMulator/benchmark/MLP/host/app.c
================================================
/**
 * app.c
 * MLP Host Application Source File
 *
 */

#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include <dpu.h>
#include <dpu_log.h>
#include <unistd.h>
#include <getopt.h>
#include <assert.h>

#if ENERGY
#include <dpu_probe.h>
#endif

#include "../support/common.h"
#include "../support/timer.h"
#include "../support/params.h"

// Define the DPU Binary path as DPU_BINARY here
#ifndef DPU_BINARY
#define DPU_BINARY "./bin/mlp_dpu"
#endif

static T** A;
static T* B;
static T* B_host;
static T* B_tmp;
static T* C;
static T* C_dpu;

// Create input arrays
static void init_data(T** A, T* B, T* B_host, unsigned int m_size, unsigned int n_size) {
	for (unsigned int l = 0; l < NUM_LAYERS; l++)
		for (unsigned int i = 0; i < m_size * n_size; i++){
			if(i % 100 < 98){
				A[l][i] = 0;
			}else{
				A[l][i] = (l+i) % 2;
			}
		}
	for (unsigned int i = 0; i < n_size; i++){
		if(i % 50 < 48){
			B[i] = 0;
		}
		else{
			B[i] = i % 2;
		}
		B_host[i] = B[i];
	}
}

// Compute output in the host
static void mlp_host(T* C, T** A, T* B, unsigned int m_size, unsigned int n_size) {

	for (unsigned int nl = 0; nl < NUM_LAYERS; nl++){
		for (unsigned int m = 0; m < m_size; m++){
			C[m] = 0;
		}
		for (unsigned int m = 0; m < m_size; m++){
			for (unsigned int n = 0; n < n_size; n++){
				C[m] += A[nl][m * n_size + n] * B[n];
			}
			C[m] = max(0, C[m]);
		}
		for (unsigned int n = 0; n < n_size; n++){
			B[n] = C[n];
		}
	}
}

// Main of the Host Application
int main(int argc, char **argv) {

	struct Params p = input_params(argc, argv);

	struct dpu_set_t dpu_set, dpu;
	uint32_t nr_of_dpus;

	// Allocate DPUs and load binary
	DPU_ASSERT(dpu_alloc(NR_DPUS, NULL, &dpu_set));
	DPU_ASSERT(dpu_load(dpu_set, DPU_BINARY, NULL));
	DPU_ASSERT(dpu_get_nr_dpus(dpu_set, &nr_of_dpus));

#if ENERGY
	struct dpu_probe_t probe;
	DPU_ASSERT(dpu_probe_init("energy_probe", &probe));
#endif

	unsigned int i, l;
	unsigned int m_size = p.m_size;
	unsigned int n_size = p.n_size;

	// Initialize help data
	dpu_info = (struct dpu_info_t *) malloc(nr_of_dpus * sizeof(struct dpu_info_t));
	dpu_arguments_t *input_args = (dpu_arguments_t *) malloc(nr_of_dpus * sizeof(dpu_arguments_t));
	uint32_t max_rows_per_dpu = 0;
	uint32_t n_size_pad = n_size;
	if(n_size % 2 == 1){
		n_size_pad++;
	}

	// Timer
	Timer timer;
	i = 0;
	DPU_FOREACH(dpu_set, dpu, i) {
		uint32_t rows_per_dpu;
		uint32_t prev_rows_dpu = 0;
		uint32_t chunks =
Download .txt
gitextract_ry7k9cpr/

├── .gitignore
├── LICENSE
├── README.md
├── assets/
│   ├── figure5_mem_util_calculator.xlsx
│   ├── figure7_active_tasklet_breakdown.xlsx
│   └── figure9_instruction_mix.xlsx
├── golang/
│   ├── README.md
│   └── uPIMulator/
│       ├── benchmark/
│       │   ├── BS/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   ├── bs_omp.c
│       │   │   │   │   └── timer.h
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       ├── binary_search.cu
│       │   │   │       ├── binary_search.h
│       │   │   │       ├── cpu_lib.py
│       │   │   │       ├── cu_lib_import.py
│       │   │   │       └── run.py
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── CMakeLists.txt
│       │   ├── GEMV/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── _BL_10.conf
│       │   │   ├── _NR_TASKLETS_10_BL_10.conf
│       │   │   ├── _NR_TASKLETS_11_BL_10.conf
│       │   │   ├── _NR_TASKLETS_12_BL_10.conf
│       │   │   ├── _NR_TASKLETS_13_BL_10.conf
│       │   │   ├── _NR_TASKLETS_14_BL_10.conf
│       │   │   ├── _NR_TASKLETS_15_BL_10.conf
│       │   │   ├── _NR_TASKLETS_16
│       │   │   ├── _NR_TASKLETS_16_BL_10.conf
│       │   │   ├── _NR_TASKLETS_17_BL_10.conf
│       │   │   ├── _NR_TASKLETS_18_BL_10.conf
│       │   │   ├── _NR_TASKLETS_19_BL_10.conf
│       │   │   ├── _NR_TASKLETS_1_BL_10.conf
│       │   │   ├── _NR_TASKLETS_20_BL_10.conf
│       │   │   ├── _NR_TASKLETS_21_BL_10.conf
│       │   │   ├── _NR_TASKLETS_22_BL_10.conf
│       │   │   ├── _NR_TASKLETS_23_BL_10.conf
│       │   │   ├── _NR_TASKLETS_24_BL_10.conf
│       │   │   ├── _NR_TASKLETS_2_BL_10.conf
│       │   │   ├── _NR_TASKLETS_3_BL_10.conf
│       │   │   ├── _NR_TASKLETS_4_BL_10.conf
│       │   │   ├── _NR_TASKLETS_5_BL_10.conf
│       │   │   ├── _NR_TASKLETS_6_BL_10.conf
│       │   │   ├── _NR_TASKLETS_7_BL_10.conf
│       │   │   ├── _NR_TASKLETS_8_BL_10.conf
│       │   │   ├── _NR_TASKLETS_9_BL_10.conf
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   ├── gemv_openmp.c
│       │   │   │   │   └── gemv_utils.h
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       └── gemv.cu
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── HST-L/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   ├── input/
│       │   │   │   └── image_VanHateren.iml
│       │   │   ├── run.sh
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── HST-S/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── app_baseline.c
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       ├── input/
│       │   │   │       │   └── image_VanHateren.iml
│       │   │   │       ├── kernel.cpp
│       │   │   │       ├── kernel.cu
│       │   │   │       ├── kernel.h
│       │   │   │       ├── main.cpp
│       │   │   │       └── support/
│       │   │   │           ├── common.h
│       │   │   │           ├── cuda-setup.h
│       │   │   │           ├── partitioner.h
│       │   │   │           ├── timer.h
│       │   │   │           └── verify.h
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   ├── input/
│       │   │   │   └── image_VanHateren.iml
│       │   │   ├── run.sh
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── MLP/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── _BL_10.conf
│       │   │   ├── _NR_TASKLETS_10_BL_10.conf
│       │   │   ├── _NR_TASKLETS_11_BL_10.conf
│       │   │   ├── _NR_TASKLETS_12_BL_10.conf
│       │   │   ├── _NR_TASKLETS_13_BL_10.conf
│       │   │   ├── _NR_TASKLETS_14_BL_10.conf
│       │   │   ├── _NR_TASKLETS_15_BL_10.conf
│       │   │   ├── _NR_TASKLETS_16
│       │   │   ├── _NR_TASKLETS_16_BL_10.conf
│       │   │   ├── _NR_TASKLETS_17_BL_10.conf
│       │   │   ├── _NR_TASKLETS_18_BL_10.conf
│       │   │   ├── _NR_TASKLETS_19_BL_10.conf
│       │   │   ├── _NR_TASKLETS_1_BL_10.conf
│       │   │   ├── _NR_TASKLETS_20_BL_10.conf
│       │   │   ├── _NR_TASKLETS_21_BL_10.conf
│       │   │   ├── _NR_TASKLETS_22_BL_10.conf
│       │   │   ├── _NR_TASKLETS_23_BL_10.conf
│       │   │   ├── _NR_TASKLETS_24_BL_10.conf
│       │   │   ├── _NR_TASKLETS_2_BL_10.conf
│       │   │   ├── _NR_TASKLETS_3_BL_10.conf
│       │   │   ├── _NR_TASKLETS_4_BL_10.conf
│       │   │   ├── _NR_TASKLETS_5_BL_10.conf
│       │   │   ├── _NR_TASKLETS_6_BL_10.conf
│       │   │   ├── _NR_TASKLETS_7_BL_10.conf
│       │   │   ├── _NR_TASKLETS_8_BL_10.conf
│       │   │   ├── _NR_TASKLETS_9_BL_10.conf
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── mlp_openmp.c
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       └── mlp.cu
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── RED/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── app_baseline.cpp
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       └── app_baseline.cu
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   ├── run.sh
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── cyclecount.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── SCAN-RSS/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── app_baseline.cpp
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       └── app_baseline.cu
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   ├── run.sh
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── SCAN-SSA/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   ├── run.sh
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── SEL/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── app_baseline.c
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       ├── ds.h
│       │   │   │       ├── kernel.cu
│       │   │   │       └── select.cu
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── TRNS/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   ├── kernel.cpp
│       │   │   │   │   ├── kernel.h
│       │   │   │   │   ├── main.cpp
│       │   │   │   │   └── support/
│       │   │   │   │       ├── common.h
│       │   │   │   │       ├── setup.h
│       │   │   │   │       ├── timer.h
│       │   │   │   │       └── verify.h
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       ├── kernel.cu
│       │   │   │       ├── kernel.h
│       │   │   │       ├── main.cpp
│       │   │   │       └── support/
│       │   │   │           ├── common.h
│       │   │   │           ├── cuda-setup.h
│       │   │   │           ├── timer.h
│       │   │   │           └── verify.h
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── TS/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── UNI/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── app_baseline.c
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       ├── ds.h
│       │   │   │       ├── kernel.cu
│       │   │   │       └── unique.cu
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── VA/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── app_baseline.c
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       └── vec_add.cu
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   └── build.py
│       ├── docker/
│       │   └── Dockerfile
│       ├── go.mod
│       ├── go.sum
│       ├── script/
│       │   ├── build.py
│       │   ├── format.py
│       │   ├── run_validation.sh
│       │   └── visualize.py
│       ├── sdk/
│       │   ├── CMakeLists.txt
│       │   ├── build.py
│       │   ├── misc/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── accessMramFromDpu.c
│       │   │   ├── coreDump.c
│       │   │   ├── crt0.c
│       │   │   ├── dpu.lds
│       │   │   ├── internalStateReset.c
│       │   │   ├── linkerScript.lds
│       │   │   ├── restoreRegisters.c
│       │   │   └── restore_carry_and_zero_flag.h
│       │   ├── stdlib/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── abort.c
│       │   │   ├── assert.h
│       │   │   ├── atoi.c
│       │   │   ├── atol.c
│       │   │   ├── ctype.h
│       │   │   ├── errno.c
│       │   │   ├── errno.h
│       │   │   ├── exit.c
│       │   │   ├── inttypes.h
│       │   │   ├── iso646.h
│       │   │   ├── limits.h
│       │   │   ├── memchr.c
│       │   │   ├── memcmp.c
│       │   │   ├── memcpy.c
│       │   │   ├── memmove.c
│       │   │   ├── memmram_utils.h
│       │   │   ├── memset.c
│       │   │   ├── stdalign.h
│       │   │   ├── stdarg.h
│       │   │   ├── stdbool.h
│       │   │   ├── stddef.h
│       │   │   ├── stdint.h
│       │   │   ├── stdio.c
│       │   │   ├── stdio.h
│       │   │   ├── stdlib.h
│       │   │   ├── stdnoreturn.h
│       │   │   ├── stpcpy.c
│       │   │   ├── stpncpy.c
│       │   │   ├── strcat.c
│       │   │   ├── strchr.c
│       │   │   ├── strcmp.c
│       │   │   ├── strcpy.c
│       │   │   ├── strcspn.c
│       │   │   ├── strdup.c
│       │   │   ├── strerror.c
│       │   │   ├── string.h
│       │   │   ├── strlen.c
│       │   │   ├── strlwr.c
│       │   │   ├── strncat.c
│       │   │   ├── strncmp.c
│       │   │   ├── strncpy.c
│       │   │   ├── strndup.c
│       │   │   ├── strnlen.c
│       │   │   ├── strpbrk.c
│       │   │   ├── strrchr.c
│       │   │   ├── strrev.c
│       │   │   ├── strsep.c
│       │   │   ├── strspn.c
│       │   │   ├── strstr.c
│       │   │   ├── strtok_r.c
│       │   │   ├── strtol.c
│       │   │   └── strupr.c
│       │   └── syslib/
│       │       ├── CMakeLists.txt
│       │       ├── absvdi2.c
│       │       ├── absvsi2.c
│       │       ├── adddf3.c
│       │       ├── addsf3.c
│       │       ├── addvdi3.c
│       │       ├── addvsi3.c
│       │       ├── alloc.c
│       │       ├── alloc.h
│       │       ├── ashldi3.c
│       │       ├── ashrdi3.c
│       │       ├── atomic_bit.h
│       │       ├── atomics.c
│       │       ├── attributes.h
│       │       ├── barrier.c
│       │       ├── barrier.h
│       │       ├── bswapdi2.c
│       │       ├── bswapsi2.c
│       │       ├── buddy_alloc.c
│       │       ├── buddy_alloc.h
│       │       ├── buddy_realloc.c
│       │       ├── built_ins.h
│       │       ├── clzdi2.c
│       │       ├── clzsi2.c
│       │       ├── cmpdi2.c
│       │       ├── comparedf2.c
│       │       ├── comparesf2.c
│       │       ├── ctzdi2.c
│       │       ├── ctzsi2.c
│       │       ├── defs.c
│       │       ├── defs.h
│       │       ├── devprivate.h
│       │       ├── div32.c
│       │       ├── divdf3.c
│       │       ├── divdi3.c
│       │       ├── divmodsi4.c
│       │       ├── divsf3.c
│       │       ├── divsi3.c
│       │       ├── dpuconst.h
│       │       ├── dpufault.h
│       │       ├── dpuruntime.h
│       │       ├── extendhfsf2.c
│       │       ├── extendsfdf2.c
│       │       ├── ffsdi2.c
│       │       ├── ffssi2.c
│       │       ├── ffsti2.c
│       │       ├── fixdfdi.c
│       │       ├── fixdfsi.c
│       │       ├── fixsfdi.c
│       │       ├── fixsfsi.c
│       │       ├── fixunsdfdi.c
│       │       ├── fixunsdfsi.c
│       │       ├── fixunssfdi.c
│       │       ├── fixunssfsi.c
│       │       ├── float.h
│       │       ├── floatdidf.c
│       │       ├── floatdisf.c
│       │       ├── floatsidf.c
│       │       ├── floatsisf.c
│       │       ├── floatundidf.c
│       │       ├── floatundisf.c
│       │       ├── floatunsidf.c
│       │       ├── floatunsisf.c
│       │       ├── fp_add_impl.inc
│       │       ├── fp_extend.h
│       │       ├── fp_extend_impl.inc
│       │       ├── fp_fixint_impl.inc
│       │       ├── fp_fixuint_impl.inc
│       │       ├── fp_lib.h
│       │       ├── fp_mul_impl.inc
│       │       ├── fp_trunc.h
│       │       ├── fp_trunc_impl.inc
│       │       ├── fsb_allocator.c
│       │       ├── fsb_allocator.h
│       │       ├── handshake.c
│       │       ├── handshake.h
│       │       ├── int_endianness.h
│       │       ├── int_lib.h
│       │       ├── int_math.h
│       │       ├── int_types.h
│       │       ├── int_util.c
│       │       ├── int_util.h
│       │       ├── listener.c
│       │       ├── lshrdi3.c
│       │       ├── macro_utils.h
│       │       ├── mcount.c
│       │       ├── moddi3.c
│       │       ├── modsi3.c
│       │       ├── mram.h
│       │       ├── mul32.c
│       │       ├── mul64.c
│       │       ├── muldc3.c
│       │       ├── muldf3.c
│       │       ├── mulodi4.c
│       │       ├── mulosi4.c
│       │       ├── mulsf3.c
│       │       ├── mulvdi3.c
│       │       ├── mulvsi3.c
│       │       ├── mutex.h
│       │       ├── negdf2.c
│       │       ├── negdi2.c
│       │       ├── negsf2.c
│       │       ├── negvdi2.c
│       │       ├── negvsi2.c
│       │       ├── paritydi2.c
│       │       ├── paritysi2.c
│       │       ├── perfcounter.c
│       │       ├── perfcounter.h
│       │       ├── popcountdi2.c
│       │       ├── popcountsi2.c
│       │       ├── powidf2.c
│       │       ├── powisf2.c
│       │       ├── profiling.c
│       │       ├── profiling.h
│       │       ├── profiling_internals.h
│       │       ├── sem.c
│       │       ├── sem.h
│       │       ├── seqread.h
│       │       ├── seqread.inc
│       │       ├── seqread1024.c
│       │       ├── seqread128.c
│       │       ├── seqread256.c
│       │       ├── seqread32.c
│       │       ├── seqread512.c
│       │       ├── seqread64.c
│       │       ├── soft_cache.c
│       │       ├── soft_cache.h
│       │       ├── subdf3.c
│       │       ├── subsf3.c
│       │       ├── subvdi3.c
│       │       ├── subvsi3.c
│       │       ├── sysdef.h
│       │       ├── truncdfhf2.c
│       │       ├── truncdfsf2.c
│       │       ├── truncsfhf2.c
│       │       ├── ucmpdi2.c
│       │       ├── udiv64.c
│       │       ├── udivdi3.c
│       │       ├── udivmodsi4.c
│       │       ├── udivsi3.c
│       │       ├── umoddi3.c
│       │       ├── umodsi3.c
│       │       └── waitqueue.c
│       └── src/
│           ├── abi/
│           │   ├── encoding/
│           │   │   ├── ascii_encoder.go
│           │   │   └── byte_stream.go
│           │   └── word/
│           │       ├── intermediate.go
│           │       └── word.go
│           ├── assembler/
│           │   ├── assemblable.go
│           │   ├── assembler.go
│           │   └── prim/
│           │       ├── bs.go
│           │       ├── gemv.go
│           │       ├── hst_l.go
│           │       ├── hst_s.go
│           │       ├── mlp.go
│           │       ├── red.go
│           │       ├── scan_rss.go
│           │       ├── scan_ssa.go
│           │       ├── sel.go
│           │       ├── trns.go
│           │       ├── ts.go
│           │       ├── uni.go
│           │       └── va.go
│           ├── compiler/
│           │   └── compiler.go
│           ├── core/
│           │   ├── job.go
│           │   └── thread_pool.go
│           ├── linker/
│           │   ├── analyze_liveness_job.go
│           │   ├── kernel/
│           │   │   ├── directive/
│           │   │   │   ├── ascii_directive.go
│           │   │   │   ├── asciz_directive.go
│           │   │   │   ├── byte_directive.go
│           │   │   │   ├── long_directive.go
│           │   │   │   ├── quad_directive.go
│           │   │   │   ├── short_directive.go
│           │   │   │   └── zero_directive.go
│           │   │   ├── encodable.go
│           │   │   ├── executable.go
│           │   │   ├── instruction/
│           │   │   │   ├── cc/
│           │   │   │   │   ├── acquire_cc.go
│           │   │   │   │   ├── add_nz_cc.go
│           │   │   │   │   ├── boot_cc.go
│           │   │   │   │   ├── cc.go
│           │   │   │   │   ├── const_cc_ge0.go
│           │   │   │   │   ├── const_cc_geu.go
│           │   │   │   │   ├── const_cc_zero.go
│           │   │   │   │   ├── count_nz_cc.go
│           │   │   │   │   ├── div_cc.go
│           │   │   │   │   ├── div_nz_cc.go
│           │   │   │   │   ├── ext_sub_set_cc.go
│           │   │   │   │   ├── false_cc.go
│           │   │   │   │   ├── imm_shift_nz_cc.go
│           │   │   │   │   ├── log_nz_cc.go
│           │   │   │   │   ├── log_set_cc.go
│           │   │   │   │   ├── mul_nz_cc.go
│           │   │   │   │   ├── no_cc.go
│           │   │   │   │   ├── release_cc.go
│           │   │   │   │   ├── shift_nz_cc.go
│           │   │   │   │   ├── sub_nz_cc.go
│           │   │   │   │   ├── sub_set_cc.go
│           │   │   │   │   ├── true_cc.go
│           │   │   │   │   └── true_false_cc.go
│           │   │   │   ├── endian.go
│           │   │   │   ├── exception.go
│           │   │   │   ├── flag.go
│           │   │   │   ├── instruction.go
│           │   │   │   ├── op_code.go
│           │   │   │   ├── reg_descriptor/
│           │   │   │   │   ├── gp_reg_descriptor.go
│           │   │   │   │   ├── pair_reg_descriptor.go
│           │   │   │   │   ├── sp_reg_descriptor.go
│           │   │   │   │   └── src_reg_descriptor.go
│           │   │   │   └── suffix.go
│           │   │   ├── kernel.go
│           │   │   ├── label.go
│           │   │   ├── liveness.go
│           │   │   ├── relocatable.go
│           │   │   └── section.go
│           │   ├── lex_job.go
│           │   ├── lexer/
│           │   │   ├── keyword_factory.go
│           │   │   ├── lexer.go
│           │   │   ├── regex.go
│           │   │   ├── regex_factory.go
│           │   │   ├── token.go
│           │   │   ├── token_stream.go
│           │   │   └── tokenizer.go
│           │   ├── linker.go
│           │   ├── logic/
│           │   │   ├── instruction_assigner.go
│           │   │   ├── label_assigner.go
│           │   │   ├── linker_constant.go
│           │   │   ├── linker_script.go
│           │   │   ├── liveness_analyzer.go
│           │   │   └── set_assigner.go
│           │   ├── parse_job.go
│           │   └── parser/
│           │       ├── ast.go
│           │       ├── expr/
│           │       │   ├── binary_add_expr.go
│           │       │   ├── binary_sub_expr.go
│           │       │   ├── ci_op_code_expr.go
│           │       │   ├── condition_expr.go
│           │       │   ├── ddci_op_code_expr.go
│           │       │   ├── dma_rri_op_code_expr.go
│           │       │   ├── drdici_op_code_expr.go
│           │       │   ├── endian_expr.go
│           │       │   ├── expr.go
│           │       │   ├── i_op_code_expr.go
│           │       │   ├── jump_op_code_expr.go
│           │       │   ├── load_op_code_expr.go
│           │       │   ├── negative_number_expr.go
│           │       │   ├── primary_expr.go
│           │       │   ├── program_counter_expr.go
│           │       │   ├── r_op_code_expr.go
│           │       │   ├── rici_op_code_expr.go
│           │       │   ├── rr_op_code_expr.go
│           │       │   ├── rri_op_code_expr.go
│           │       │   ├── rrri_op_code_expr.go
│           │       │   ├── section_name_expr.go
│           │       │   ├── section_type_expr.go
│           │       │   ├── src_reg_expr.go
│           │       │   ├── store_op_code_expr.go
│           │       │   ├── suffix_expr.go
│           │       │   └── symbol_type.go
│           │       ├── parser.go
│           │       ├── rule.go
│           │       ├── stack.go
│           │       ├── stack_item.go
│           │       ├── stmt/
│           │       │   ├── directive/
│           │       │   │   ├── addrsig_stmt.go
│           │       │   │   ├── addrsig_sym_stmt.go
│           │       │   │   ├── ascii_stmt.go
│           │       │   │   ├── asciz_stmt.go
│           │       │   │   ├── byte_stmt.go
│           │       │   │   ├── cfi_def_cfa_offset_stmt.go
│           │       │   │   ├── cfi_endproc.go
│           │       │   │   ├── cfi_offset_stmt.go
│           │       │   │   ├── cfi_sections_stmt.go
│           │       │   │   ├── cfi_startproc_stmt.go
│           │       │   │   ├── file_number_stmt.go
│           │       │   │   ├── file_string_stmt.go
│           │       │   │   ├── global_stmt.go
│           │       │   │   ├── loc_is_stmt_stmt.go
│           │       │   │   ├── loc_number_stmt.go
│           │       │   │   ├── loc_prologue_end_stmt.go
│           │       │   │   ├── long_program_counter.go
│           │       │   │   ├── long_section_name_stmt.go
│           │       │   │   ├── p2_align_stmt.go
│           │       │   │   ├── quad_stmt.go
│           │       │   │   ├── section_identifier_number_stmt.go
│           │       │   │   ├── section_identifier_stmt.go
│           │       │   │   ├── section_stack_sizes_stmt.go
│           │       │   │   ├── section_string_number_stmt.go
│           │       │   │   ├── section_string_stmt.go
│           │       │   │   ├── set_stmt.go
│           │       │   │   ├── short_stmt.go
│           │       │   │   ├── size_stmt.go
│           │       │   │   ├── text_stmt.go
│           │       │   │   ├── type_stmt.go
│           │       │   │   ├── weak_stmt.go
│           │       │   │   ├── zero_double_number_stmt.go
│           │       │   │   └── zero_single_number_stmt.go
│           │       │   ├── instruction/
│           │       │   │   ├── ci_stmt.go
│           │       │   │   ├── ddci_stmt.go
│           │       │   │   ├── dma_rri_stmt.go
│           │       │   │   ├── drdici_stmt.go
│           │       │   │   ├── edri_stmt.go
│           │       │   │   ├── erid_stmt.go
│           │       │   │   ├── erii_stmt.go
│           │       │   │   ├── erir_stmt.go
│           │       │   │   ├── erri_stmt.go
│           │       │   │   ├── i_stmt.go
│           │       │   │   ├── nop_stmt.go
│           │       │   │   ├── r_stmt.go
│           │       │   │   ├── rci_stmt.go
│           │       │   │   ├── rici_stmt.go
│           │       │   │   ├── rir_stmt.go
│           │       │   │   ├── rirc_stmt.go
│           │       │   │   ├── rirci_stmt.go
│           │       │   │   ├── rr_stmt.go
│           │       │   │   ├── rrc_stmt.go
│           │       │   │   ├── rrci_stmt.go
│           │       │   │   ├── rri_stmt.go
│           │       │   │   ├── rric_stmt.go
│           │       │   │   ├── rrici_stmt.go
│           │       │   │   ├── rrr_stmt.go
│           │       │   │   ├── rrrc_stmt.go
│           │       │   │   ├── rrrci_stmt.go
│           │       │   │   ├── rrri_stmt.go
│           │       │   │   ├── rrrici_stmt.go
│           │       │   │   ├── s_erri_stmt.go
│           │       │   │   ├── s_r_stmt.go
│           │       │   │   ├── s_rci_stmt.go
│           │       │   │   ├── s_rirc_stmt.go
│           │       │   │   ├── s_rirci_stmt.go
│           │       │   │   ├── s_rr_stmt.go
│           │       │   │   ├── s_rrc_stmt.go
│           │       │   │   ├── s_rrci_stmt.go
│           │       │   │   ├── s_rri_stmt.go
│           │       │   │   ├── s_rric_stmt.go
│           │       │   │   ├── s_rrici_stmt.go
│           │       │   │   ├── s_rrr_stmt.go
│           │       │   │   ├── s_rrrc_stmt.go
│           │       │   │   ├── s_rrrci_stmt.go
│           │       │   │   ├── s_rrri_stmt.go
│           │       │   │   └── s_rrrici_stmt.go
│           │       │   ├── label_stmt.go
│           │       │   ├── stmt.go
│           │       │   └── sugar/
│           │       │       ├── bkp_stmt.go
│           │       │       ├── boot_ri_stmt.go
│           │       │       ├── call_ri_stmt.go
│           │       │       ├── call_rr_stmt.go
│           │       │       ├── div_step_drdi_stmt.go
│           │       │       ├── jeq_rii_stmt.go
│           │       │       ├── jeq_rri_stmt.go
│           │       │       ├── jnz_ri_stmt.go
│           │       │       ├── jump_i_stmt.go
│           │       │       ├── jump_r_stmt.go
│           │       │       ├── lbs_rri_stmt.go
│           │       │       ├── lbs_s_rri_stmt.go
│           │       │       ├── ld_dri_stmt.go
│           │       │       ├── movd_dd_stmt.go
│           │       │       ├── move_ri_stmt.go
│           │       │       ├── move_rici_stmt.go
│           │       │       ├── move_s_ri_stmt.go
│           │       │       ├── move_s_rici_stmt.go
│           │       │       ├── sb_id_ri_stmt.go
│           │       │       ├── sb_id_rii_stmt.go
│           │       │       ├── sb_rir_stmt.go
│           │       │       ├── sd_rid_stmt.go
│           │       │       ├── stop_stmt.go
│           │       │       └── time_cfg_r_stmt.go
│           │       ├── table.go
│           │       └── walker.go
│           ├── main.go
│           ├── misc/
│           │   ├── command_line_option.go
│           │   ├── command_line_parser.go
│           │   ├── command_line_validator.go
│           │   ├── config_loader.go
│           │   ├── config_validator.go
│           │   ├── file_dumper.go
│           │   ├── file_scanner.go
│           │   └── stat_factory.go
│           └── simulator/
│               ├── channel/
│               │   ├── channel.go
│               │   ├── channel_message.go
│               │   └── channel_message_q.go
│               ├── cycle_job.go
│               ├── dpu/
│               │   ├── dpu.go
│               │   ├── dram/
│               │   │   ├── dma_command.go
│               │   │   ├── dma_command_q.go
│               │   │   ├── memory_command.go
│               │   │   ├── memory_command_q.go
│               │   │   ├── memory_controller.go
│               │   │   ├── memory_scheduler.go
│               │   │   ├── mram.go
│               │   │   ├── row_buffer.go
│               │   │   └── wordline.go
│               │   ├── logic/
│               │   │   ├── alu.go
│               │   │   ├── cycle_rule.go
│               │   │   ├── dma.go
│               │   │   ├── instruction_q.go
│               │   │   ├── logic.go
│               │   │   ├── operand_collector.go
│               │   │   ├── pipeline.go
│               │   │   ├── reg_set.go
│               │   │   ├── thread.go
│               │   │   ├── thread_q.go
│               │   │   └── thread_scheduler.go
│               │   ├── reg/
│               │   │   ├── condition_reg.go
│               │   │   ├── exception_reg.go
│               │   │   ├── flag_reg.go
│               │   │   ├── gp_reg.go
│               │   │   ├── pc_reg.go
│               │   │   ├── reg_file.go
│               │   │   └── sp_reg.go
│               │   └── sram/
│               │       ├── atomic.go
│               │       ├── iram.go
│               │       ├── lock.go
│               │       └── wram.go
│               ├── host/
│               │   ├── channel_transfer_read_job.go
│               │   ├── channel_transfer_write_job.go
│               │   ├── chunk.go
│               │   ├── cycle_job.go
│               │   ├── dma_transfer_to_atomic_job.go
│               │   ├── dma_transfer_to_iram_job.go
│               │   ├── dma_transfer_to_mram_job.go
│               │   ├── dma_transfer_to_wram_job.go
│               │   └── host.go
│               ├── rank/
│               │   └── rank.go
│               └── simulator.go
├── golang_vm/
│   ├── README.md
│   └── uPIMulator/
│       ├── benchmark/
│       │   ├── BS/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   ├── bs_omp.c
│       │   │   │   │   └── timer.h
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       ├── binary_search.cu
│       │   │   │       ├── binary_search.h
│       │   │   │       ├── cpu_lib.py
│       │   │   │       ├── cu_lib_import.py
│       │   │   │       └── run.py
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── CMakeLists.txt
│       │   ├── GEMV/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── _BL_10.conf
│       │   │   ├── _NR_TASKLETS_10_BL_10.conf
│       │   │   ├── _NR_TASKLETS_11_BL_10.conf
│       │   │   ├── _NR_TASKLETS_12_BL_10.conf
│       │   │   ├── _NR_TASKLETS_13_BL_10.conf
│       │   │   ├── _NR_TASKLETS_14_BL_10.conf
│       │   │   ├── _NR_TASKLETS_15_BL_10.conf
│       │   │   ├── _NR_TASKLETS_16
│       │   │   ├── _NR_TASKLETS_16_BL_10.conf
│       │   │   ├── _NR_TASKLETS_17_BL_10.conf
│       │   │   ├── _NR_TASKLETS_18_BL_10.conf
│       │   │   ├── _NR_TASKLETS_19_BL_10.conf
│       │   │   ├── _NR_TASKLETS_1_BL_10.conf
│       │   │   ├── _NR_TASKLETS_20_BL_10.conf
│       │   │   ├── _NR_TASKLETS_21_BL_10.conf
│       │   │   ├── _NR_TASKLETS_22_BL_10.conf
│       │   │   ├── _NR_TASKLETS_23_BL_10.conf
│       │   │   ├── _NR_TASKLETS_24_BL_10.conf
│       │   │   ├── _NR_TASKLETS_2_BL_10.conf
│       │   │   ├── _NR_TASKLETS_3_BL_10.conf
│       │   │   ├── _NR_TASKLETS_4_BL_10.conf
│       │   │   ├── _NR_TASKLETS_5_BL_10.conf
│       │   │   ├── _NR_TASKLETS_6_BL_10.conf
│       │   │   ├── _NR_TASKLETS_7_BL_10.conf
│       │   │   ├── _NR_TASKLETS_8_BL_10.conf
│       │   │   ├── _NR_TASKLETS_9_BL_10.conf
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   ├── gemv_openmp.c
│       │   │   │   │   └── gemv_utils.h
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       └── gemv.cu
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── HST-L/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   ├── input/
│       │   │   │   └── image_VanHateren.iml
│       │   │   ├── run.sh
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── HST-S/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── app_baseline.c
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       ├── input/
│       │   │   │       │   └── image_VanHateren.iml
│       │   │   │       ├── kernel.cpp
│       │   │   │       ├── kernel.cu
│       │   │   │       ├── kernel.h
│       │   │   │       ├── main.cpp
│       │   │   │       └── support/
│       │   │   │           ├── common.h
│       │   │   │           ├── cuda-setup.h
│       │   │   │           ├── partitioner.h
│       │   │   │           ├── timer.h
│       │   │   │           └── verify.h
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   ├── input/
│       │   │   │   └── image_VanHateren.iml
│       │   │   ├── run.sh
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── MLP/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── _BL_10.conf
│       │   │   ├── _NR_TASKLETS_10_BL_10.conf
│       │   │   ├── _NR_TASKLETS_11_BL_10.conf
│       │   │   ├── _NR_TASKLETS_12_BL_10.conf
│       │   │   ├── _NR_TASKLETS_13_BL_10.conf
│       │   │   ├── _NR_TASKLETS_14_BL_10.conf
│       │   │   ├── _NR_TASKLETS_15_BL_10.conf
│       │   │   ├── _NR_TASKLETS_16
│       │   │   ├── _NR_TASKLETS_16_BL_10.conf
│       │   │   ├── _NR_TASKLETS_17_BL_10.conf
│       │   │   ├── _NR_TASKLETS_18_BL_10.conf
│       │   │   ├── _NR_TASKLETS_19_BL_10.conf
│       │   │   ├── _NR_TASKLETS_1_BL_10.conf
│       │   │   ├── _NR_TASKLETS_20_BL_10.conf
│       │   │   ├── _NR_TASKLETS_21_BL_10.conf
│       │   │   ├── _NR_TASKLETS_22_BL_10.conf
│       │   │   ├── _NR_TASKLETS_23_BL_10.conf
│       │   │   ├── _NR_TASKLETS_24_BL_10.conf
│       │   │   ├── _NR_TASKLETS_2_BL_10.conf
│       │   │   ├── _NR_TASKLETS_3_BL_10.conf
│       │   │   ├── _NR_TASKLETS_4_BL_10.conf
│       │   │   ├── _NR_TASKLETS_5_BL_10.conf
│       │   │   ├── _NR_TASKLETS_6_BL_10.conf
│       │   │   ├── _NR_TASKLETS_7_BL_10.conf
│       │   │   ├── _NR_TASKLETS_8_BL_10.conf
│       │   │   ├── _NR_TASKLETS_9_BL_10.conf
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── mlp_openmp.c
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       └── mlp.cu
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── RED/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── app_baseline.cpp
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       └── app_baseline.cu
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   ├── run.sh
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── cyclecount.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── SCAN-RSS/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── app_baseline.cpp
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       └── app_baseline.cu
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   ├── run.sh
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── SCAN-SSA/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   ├── run.sh
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── SEL/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── app_baseline.c
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       ├── ds.h
│       │   │   │       ├── kernel.cu
│       │   │   │       └── select.cu
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── TRNS/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   ├── kernel.cpp
│       │   │   │   │   ├── kernel.h
│       │   │   │   │   ├── main.cpp
│       │   │   │   │   └── support/
│       │   │   │   │       ├── common.h
│       │   │   │   │       ├── setup.h
│       │   │   │   │       ├── timer.h
│       │   │   │   │       └── verify.h
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       ├── kernel.cu
│       │   │   │       ├── kernel.h
│       │   │   │       ├── main.cpp
│       │   │   │       └── support/
│       │   │   │           ├── common.h
│       │   │   │           ├── cuda-setup.h
│       │   │   │           ├── timer.h
│       │   │   │           └── verify.h
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── TS/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── UNI/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── app_baseline.c
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       ├── ds.h
│       │   │   │       ├── kernel.cu
│       │   │   │       └── unique.cu
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── VA/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── app_baseline.c
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       └── vec_add.cu
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── VA_SIMPLE/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── dpu/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   └── task.c
│       │   │   └── host/
│       │   │       ├── CMakeLists.txt
│       │   │       └── app.c
│       │   └── build.py
│       ├── docker/
│       │   └── Dockerfile
│       ├── go.mod
│       ├── script/
│       │   ├── build.py
│       │   └── format.py
│       ├── sdk/
│       │   ├── CMakeLists.txt
│       │   ├── build.py
│       │   ├── misc/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── accessMramFromDpu.c
│       │   │   ├── coreDump.c
│       │   │   ├── crt0.c
│       │   │   ├── dpu.lds
│       │   │   ├── internalStateReset.c
│       │   │   ├── linkerScript.lds
│       │   │   ├── restoreRegisters.c
│       │   │   └── restore_carry_and_zero_flag.h
│       │   ├── stdlib/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── abort.c
│       │   │   ├── assert.h
│       │   │   ├── atoi.c
│       │   │   ├── atol.c
│       │   │   ├── ctype.h
│       │   │   ├── errno.c
│       │   │   ├── errno.h
│       │   │   ├── exit.c
│       │   │   ├── inttypes.h
│       │   │   ├── iso646.h
│       │   │   ├── limits.h
│       │   │   ├── memchr.c
│       │   │   ├── memcmp.c
│       │   │   ├── memcpy.c
│       │   │   ├── memmove.c
│       │   │   ├── memmram_utils.h
│       │   │   ├── memset.c
│       │   │   ├── stdalign.h
│       │   │   ├── stdarg.h
│       │   │   ├── stdbool.h
│       │   │   ├── stddef.h
│       │   │   ├── stdint.h
│       │   │   ├── stdio.c
│       │   │   ├── stdio.h
│       │   │   ├── stdlib.h
│       │   │   ├── stdnoreturn.h
│       │   │   ├── stpcpy.c
│       │   │   ├── stpncpy.c
│       │   │   ├── strcat.c
│       │   │   ├── strchr.c
│       │   │   ├── strcmp.c
│       │   │   ├── strcpy.c
│       │   │   ├── strcspn.c
│       │   │   ├── strdup.c
│       │   │   ├── strerror.c
│       │   │   ├── string.h
│       │   │   ├── strlen.c
│       │   │   ├── strlwr.c
│       │   │   ├── strncat.c
│       │   │   ├── strncmp.c
│       │   │   ├── strncpy.c
│       │   │   ├── strndup.c
│       │   │   ├── strnlen.c
│       │   │   ├── strpbrk.c
│       │   │   ├── strrchr.c
│       │   │   ├── strrev.c
│       │   │   ├── strsep.c
│       │   │   ├── strspn.c
│       │   │   ├── strstr.c
│       │   │   ├── strtok_r.c
│       │   │   ├── strtol.c
│       │   │   └── strupr.c
│       │   └── syslib/
│       │       ├── CMakeLists.txt
│       │       ├── absvdi2.c
│       │       ├── absvsi2.c
│       │       ├── adddf3.c
│       │       ├── addsf3.c
│       │       ├── addvdi3.c
│       │       ├── addvsi3.c
│       │       ├── alloc.c
│       │       ├── alloc.h
│       │       ├── ashldi3.c
│       │       ├── ashrdi3.c
│       │       ├── atomic_bit.h
│       │       ├── atomics.c
│       │       ├── attributes.h
│       │       ├── barrier.c
│       │       ├── barrier.h
│       │       ├── bswapdi2.c
│       │       ├── bswapsi2.c
│       │       ├── buddy_alloc.c
│       │       ├── buddy_alloc.h
│       │       ├── buddy_realloc.c
│       │       ├── built_ins.h
│       │       ├── clzdi2.c
│       │       ├── clzsi2.c
│       │       ├── cmpdi2.c
│       │       ├── comparedf2.c
│       │       ├── comparesf2.c
│       │       ├── ctzdi2.c
│       │       ├── ctzsi2.c
│       │       ├── defs.c
│       │       ├── defs.h
│       │       ├── devprivate.h
│       │       ├── div32.c
│       │       ├── divdf3.c
│       │       ├── divdi3.c
│       │       ├── divmodsi4.c
│       │       ├── divsf3.c
│       │       ├── divsi3.c
│       │       ├── dpuconst.h
│       │       ├── dpufault.h
│       │       ├── dpuruntime.h
│       │       ├── extendhfsf2.c
│       │       ├── extendsfdf2.c
│       │       ├── ffsdi2.c
│       │       ├── ffssi2.c
│       │       ├── ffsti2.c
│       │       ├── fixdfdi.c
│       │       ├── fixdfsi.c
│       │       ├── fixsfdi.c
│       │       ├── fixsfsi.c
│       │       ├── fixunsdfdi.c
│       │       ├── fixunsdfsi.c
│       │       ├── fixunssfdi.c
│       │       ├── fixunssfsi.c
│       │       ├── float.h
│       │       ├── floatdidf.c
│       │       ├── floatdisf.c
│       │       ├── floatsidf.c
│       │       ├── floatsisf.c
│       │       ├── floatundidf.c
│       │       ├── floatundisf.c
│       │       ├── floatunsidf.c
│       │       ├── floatunsisf.c
│       │       ├── fp_add_impl.inc
│       │       ├── fp_extend.h
│       │       ├── fp_extend_impl.inc
│       │       ├── fp_fixint_impl.inc
│       │       ├── fp_fixuint_impl.inc
│       │       ├── fp_lib.h
│       │       ├── fp_mul_impl.inc
│       │       ├── fp_trunc.h
│       │       ├── fp_trunc_impl.inc
│       │       ├── fsb_allocator.c
│       │       ├── fsb_allocator.h
│       │       ├── handshake.c
│       │       ├── handshake.h
│       │       ├── int_endianness.h
│       │       ├── int_lib.h
│       │       ├── int_math.h
│       │       ├── int_types.h
│       │       ├── int_util.c
│       │       ├── int_util.h
│       │       ├── listener.c
│       │       ├── lshrdi3.c
│       │       ├── macro_utils.h
│       │       ├── mcount.c
│       │       ├── moddi3.c
│       │       ├── modsi3.c
│       │       ├── mram.h
│       │       ├── mul32.c
│       │       ├── mul64.c
│       │       ├── muldc3.c
│       │       ├── muldf3.c
│       │       ├── mulodi4.c
│       │       ├── mulosi4.c
│       │       ├── mulsf3.c
│       │       ├── mulvdi3.c
│       │       ├── mulvsi3.c
│       │       ├── mutex.h
│       │       ├── negdf2.c
│       │       ├── negdi2.c
│       │       ├── negsf2.c
│       │       ├── negvdi2.c
│       │       ├── negvsi2.c
│       │       ├── paritydi2.c
│       │       ├── paritysi2.c
│       │       ├── perfcounter.c
│       │       ├── perfcounter.h
│       │       ├── popcountdi2.c
│       │       ├── popcountsi2.c
│       │       ├── powidf2.c
│       │       ├── powisf2.c
│       │       ├── profiling.c
│       │       ├── profiling.h
│       │       ├── profiling_internals.h
│       │       ├── sem.c
│       │       ├── sem.h
│       │       ├── seqread.h
│       │       ├── seqread.inc
│       │       ├── seqread1024.c
│       │       ├── seqread128.c
│       │       ├── seqread256.c
│       │       ├── seqread32.c
│       │       ├── seqread512.c
│       │       ├── seqread64.c
│       │       ├── soft_cache.c
│       │       ├── soft_cache.h
│       │       ├── subdf3.c
│       │       ├── subsf3.c
│       │       ├── subvdi3.c
│       │       ├── subvsi3.c
│       │       ├── sysdef.h
│       │       ├── truncdfhf2.c
│       │       ├── truncdfsf2.c
│       │       ├── truncsfhf2.c
│       │       ├── ucmpdi2.c
│       │       ├── udiv64.c
│       │       ├── udivdi3.c
│       │       ├── udivmodsi4.c
│       │       ├── udivsi3.c
│       │       ├── umoddi3.c
│       │       ├── umodsi3.c
│       │       └── waitqueue.c
│       └── src/
│           ├── device/
│           │   ├── abi/
│           │   │   ├── intermediate.go
│           │   │   └── word.go
│           │   ├── compiler/
│           │   │   └── compiler.go
│           │   ├── core/
│           │   │   ├── job.go
│           │   │   └── thread_pool.go
│           │   ├── linker/
│           │   │   ├── analyze_liveness_job.go
│           │   │   ├── kernel/
│           │   │   │   ├── directive/
│           │   │   │   │   ├── ascii_directive.go
│           │   │   │   │   ├── asciz_directive.go
│           │   │   │   │   ├── byte_directive.go
│           │   │   │   │   ├── long_directive.go
│           │   │   │   │   ├── quad_directive.go
│           │   │   │   │   ├── short_directive.go
│           │   │   │   │   └── zero_directive.go
│           │   │   │   ├── encodable.go
│           │   │   │   ├── executable.go
│           │   │   │   ├── instruction/
│           │   │   │   │   ├── cc/
│           │   │   │   │   │   ├── acquire_cc.go
│           │   │   │   │   │   ├── add_nz_cc.go
│           │   │   │   │   │   ├── boot_cc.go
│           │   │   │   │   │   ├── cc.go
│           │   │   │   │   │   ├── const_cc_ge0.go
│           │   │   │   │   │   ├── const_cc_geu.go
│           │   │   │   │   │   ├── const_cc_zero.go
│           │   │   │   │   │   ├── count_nz_cc.go
│           │   │   │   │   │   ├── div_cc.go
│           │   │   │   │   │   ├── div_nz_cc.go
│           │   │   │   │   │   ├── ext_sub_set_cc.go
│           │   │   │   │   │   ├── false_cc.go
│           │   │   │   │   │   ├── imm_shift_nz_cc.go
│           │   │   │   │   │   ├── log_nz_cc.go
│           │   │   │   │   │   ├── log_set_cc.go
│           │   │   │   │   │   ├── mul_nz_cc.go
│           │   │   │   │   │   ├── no_cc.go
│           │   │   │   │   │   ├── release_cc.go
│           │   │   │   │   │   ├── shift_nz_cc.go
│           │   │   │   │   │   ├── sub_nz_cc.go
│           │   │   │   │   │   ├── sub_set_cc.go
│           │   │   │   │   │   ├── true_cc.go
│           │   │   │   │   │   └── true_false_cc.go
│           │   │   │   │   ├── endian.go
│           │   │   │   │   ├── exception.go
│           │   │   │   │   ├── flag.go
│           │   │   │   │   ├── instruction.go
│           │   │   │   │   ├── op_code.go
│           │   │   │   │   ├── reg_descriptor/
│           │   │   │   │   │   ├── gp_reg_descriptor.go
│           │   │   │   │   │   ├── pair_reg_descriptor.go
│           │   │   │   │   │   ├── sp_reg_descriptor.go
│           │   │   │   │   │   └── src_reg_descriptor.go
│           │   │   │   │   └── suffix.go
│           │   │   │   ├── kernel.go
│           │   │   │   ├── label.go
│           │   │   │   ├── liveness.go
│           │   │   │   ├── relocatable.go
│           │   │   │   └── section.go
│           │   │   ├── lex_job.go
│           │   │   ├── lexer/
│           │   │   │   ├── keyword_factory.go
│           │   │   │   ├── lexer.go
│           │   │   │   ├── regex.go
│           │   │   │   ├── regex_factory.go
│           │   │   │   ├── token.go
│           │   │   │   ├── token_stream.go
│           │   │   │   └── tokenizer.go
│           │   │   ├── linker.go
│           │   │   ├── logic/
│           │   │   │   ├── instruction_assigner.go
│           │   │   │   ├── label_assigner.go
│           │   │   │   ├── linker_constant.go
│           │   │   │   ├── linker_script.go
│           │   │   │   ├── liveness_analyzer.go
│           │   │   │   └── set_assigner.go
│           │   │   ├── parse_job.go
│           │   │   └── parser/
│           │   │       ├── ast.go
│           │   │       ├── expr/
│           │   │       │   ├── binary_add_expr.go
│           │   │       │   ├── binary_sub_expr.go
│           │   │       │   ├── ci_op_code_expr.go
│           │   │       │   ├── condition_expr.go
│           │   │       │   ├── ddci_op_code_expr.go
│           │   │       │   ├── dma_rri_op_code_expr.go
│           │   │       │   ├── drdici_op_code_expr.go
│           │   │       │   ├── endian_expr.go
│           │   │       │   ├── expr.go
│           │   │       │   ├── i_op_code_expr.go
│           │   │       │   ├── jump_op_code_expr.go
│           │   │       │   ├── load_op_code_expr.go
│           │   │       │   ├── negative_number_expr.go
│           │   │       │   ├── primary_expr.go
│           │   │       │   ├── program_counter_expr.go
│           │   │       │   ├── r_op_code_expr.go
│           │   │       │   ├── rici_op_code_expr.go
│           │   │       │   ├── rr_op_code_expr.go
│           │   │       │   ├── rri_op_code_expr.go
│           │   │       │   ├── rrri_op_code_expr.go
│           │   │       │   ├── section_name_expr.go
│           │   │       │   ├── section_type_expr.go
│           │   │       │   ├── src_reg_expr.go
│           │   │       │   ├── store_op_code_expr.go
│           │   │       │   ├── suffix_expr.go
│           │   │       │   └── symbol_type.go
│           │   │       ├── parser.go
│           │   │       ├── rule.go
│           │   │       ├── stack.go
│           │   │       ├── stack_item.go
│           │   │       ├── stmt/
│           │   │       │   ├── directive/
│           │   │       │   │   ├── addrsig_stmt.go
│           │   │       │   │   ├── addrsig_sym_stmt.go
│           │   │       │   │   ├── ascii_stmt.go
│           │   │       │   │   ├── asciz_stmt.go
│           │   │       │   │   ├── byte_stmt.go
│           │   │       │   │   ├── cfi_def_cfa_offset_stmt.go
│           │   │       │   │   ├── cfi_endproc.go
│           │   │       │   │   ├── cfi_offset_stmt.go
│           │   │       │   │   ├── cfi_sections_stmt.go
│           │   │       │   │   ├── cfi_startproc_stmt.go
│           │   │       │   │   ├── file_number_stmt.go
│           │   │       │   │   ├── file_string_stmt.go
│           │   │       │   │   ├── global_stmt.go
│           │   │       │   │   ├── loc_is_stmt_stmt.go
│           │   │       │   │   ├── loc_number_stmt.go
│           │   │       │   │   ├── loc_prologue_end_stmt.go
│           │   │       │   │   ├── long_program_counter.go
│           │   │       │   │   ├── long_section_name_stmt.go
│           │   │       │   │   ├── p2_align_stmt.go
│           │   │       │   │   ├── quad_stmt.go
│           │   │       │   │   ├── section_identifier_number_stmt.go
│           │   │       │   │   ├── section_identifier_stmt.go
│           │   │       │   │   ├── section_stack_sizes_stmt.go
│           │   │       │   │   ├── section_string_number_stmt.go
│           │   │       │   │   ├── section_string_stmt.go
│           │   │       │   │   ├── set_stmt.go
│           │   │       │   │   ├── short_stmt.go
│           │   │       │   │   ├── size_stmt.go
│           │   │       │   │   ├── text_stmt.go
│           │   │       │   │   ├── type_stmt.go
│           │   │       │   │   ├── weak_stmt.go
│           │   │       │   │   ├── zero_double_number_stmt.go
│           │   │       │   │   └── zero_single_number_stmt.go
│           │   │       │   ├── instruction/
│           │   │       │   │   ├── ci_stmt.go
│           │   │       │   │   ├── ddci_stmt.go
│           │   │       │   │   ├── dma_rri_stmt.go
│           │   │       │   │   ├── drdici_stmt.go
│           │   │       │   │   ├── edri_stmt.go
│           │   │       │   │   ├── erid_stmt.go
│           │   │       │   │   ├── erii_stmt.go
│           │   │       │   │   ├── erir_stmt.go
│           │   │       │   │   ├── erri_stmt.go
│           │   │       │   │   ├── i_stmt.go
│           │   │       │   │   ├── nop_stmt.go
│           │   │       │   │   ├── r_stmt.go
│           │   │       │   │   ├── rci_stmt.go
│           │   │       │   │   ├── rici_stmt.go
│           │   │       │   │   ├── rir_stmt.go
│           │   │       │   │   ├── rirc_stmt.go
│           │   │       │   │   ├── rirci_stmt.go
│           │   │       │   │   ├── rr_stmt.go
│           │   │       │   │   ├── rrc_stmt.go
│           │   │       │   │   ├── rrci_stmt.go
│           │   │       │   │   ├── rri_stmt.go
│           │   │       │   │   ├── rric_stmt.go
│           │   │       │   │   ├── rrici_stmt.go
│           │   │       │   │   ├── rrr_stmt.go
│           │   │       │   │   ├── rrrc_stmt.go
│           │   │       │   │   ├── rrrci_stmt.go
│           │   │       │   │   ├── rrri_stmt.go
│           │   │       │   │   ├── rrrici_stmt.go
│           │   │       │   │   ├── s_erri_stmt.go
│           │   │       │   │   ├── s_r_stmt.go
│           │   │       │   │   ├── s_rci_stmt.go
│           │   │       │   │   ├── s_rirc_stmt.go
│           │   │       │   │   ├── s_rirci_stmt.go
│           │   │       │   │   ├── s_rr_stmt.go
│           │   │       │   │   ├── s_rrc_stmt.go
│           │   │       │   │   ├── s_rrci_stmt.go
│           │   │       │   │   ├── s_rri_stmt.go
│           │   │       │   │   ├── s_rric_stmt.go
│           │   │       │   │   ├── s_rrici_stmt.go
│           │   │       │   │   ├── s_rrr_stmt.go
│           │   │       │   │   ├── s_rrrc_stmt.go
│           │   │       │   │   ├── s_rrrci_stmt.go
│           │   │       │   │   ├── s_rrri_stmt.go
│           │   │       │   │   └── s_rrrici_stmt.go
│           │   │       │   ├── label_stmt.go
│           │   │       │   ├── stmt.go
│           │   │       │   └── sugar/
│           │   │       │       ├── bkp_stmt.go
│           │   │       │       ├── boot_ri_stmt.go
│           │   │       │       ├── call_ri_stmt.go
│           │   │       │       ├── call_rr_stmt.go
│           │   │       │       ├── div_step_drdi_stmt.go
│           │   │       │       ├── jeq_rii_stmt.go
│           │   │       │       ├── jeq_rri_stmt.go
│           │   │       │       ├── jnz_ri_stmt.go
│           │   │       │       ├── jump_i_stmt.go
│           │   │       │       ├── jump_r_stmt.go
│           │   │       │       ├── lbs_rri_stmt.go
│           │   │       │       ├── lbs_s_rri_stmt.go
│           │   │       │       ├── ld_dri_stmt.go
│           │   │       │       ├── movd_dd_stmt.go
│           │   │       │       ├── move_ri_stmt.go
│           │   │       │       ├── move_rici_stmt.go
│           │   │       │       ├── move_s_ri_stmt.go
│           │   │       │       ├── move_s_rici_stmt.go
│           │   │       │       ├── sb_id_ri_stmt.go
│           │   │       │       ├── sb_id_rii_stmt.go
│           │   │       │       ├── sb_rir_stmt.go
│           │   │       │       ├── sd_rid_stmt.go
│           │   │       │       ├── stop_stmt.go
│           │   │       │       └── time_cfg_r_stmt.go
│           │   │       ├── table.go
│           │   │       └── walker.go
│           │   └── simulator/
│           │       ├── channel/
│           │       │   ├── channel.go
│           │       │   ├── channel_command.go
│           │       │   └── channel_command_q.go
│           │       ├── dpu/
│           │       │   ├── control_interface.go
│           │       │   ├── dpu.go
│           │       │   ├── dram/
│           │       │   │   ├── dma_command.go
│           │       │   │   ├── dma_command_q.go
│           │       │   │   ├── memory_command.go
│           │       │   │   ├── memory_command_q.go
│           │       │   │   ├── memory_controller.go
│           │       │   │   ├── memory_scheduler.go
│           │       │   │   ├── mram.go
│           │       │   │   ├── row_buffer.go
│           │       │   │   └── wordline.go
│           │       │   ├── logic/
│           │       │   │   ├── alu.go
│           │       │   │   ├── cycle_rule.go
│           │       │   │   ├── dma.go
│           │       │   │   ├── instruction_q.go
│           │       │   │   ├── logic.go
│           │       │   │   ├── operand_collector.go
│           │       │   │   ├── pipeline.go
│           │       │   │   ├── reg_set.go
│           │       │   │   ├── thread.go
│           │       │   │   ├── thread_q.go
│           │       │   │   └── thread_scheduler.go
│           │       │   ├── reg/
│           │       │   │   ├── condition_reg.go
│           │       │   │   ├── exception_reg.go
│           │       │   │   ├── flag_reg.go
│           │       │   │   ├── gp_reg.go
│           │       │   │   ├── pc_reg.go
│           │       │   │   ├── reg_file.go
│           │       │   │   └── sp_reg.go
│           │       │   └── sram/
│           │       │       ├── atomic.go
│           │       │       ├── iram.go
│           │       │       ├── lock.go
│           │       │       └── wram.go
│           │       └── rank/
│           │           ├── rank.go
│           │           ├── rank_command.go
│           │           └── rank_command_q.go
│           ├── encoding/
│           │   ├── ascii_encoder.go
│           │   └── byte_stream.go
│           ├── host/
│           │   ├── abi/
│           │   │   ├── binary.go
│           │   │   ├── bytecode.go
│           │   │   ├── label.go
│           │   │   ├── op_code.go
│           │   │   └── relocatable.go
│           │   ├── interpreter/
│           │   │   ├── codegen/
│           │   │   │   ├── codegen.go
│           │   │   │   └── type_system/
│           │   │   │       ├── method.go
│           │   │   │       ├── symbol.go
│           │   │   │       └── type_system.go
│           │   │   ├── interpreter.go
│           │   │   ├── lexer/
│           │   │   │   ├── keyword_factory.go
│           │   │   │   ├── lexer.go
│           │   │   │   ├── regex.go
│           │   │   │   ├── regex_factory.go
│           │   │   │   ├── token.go
│           │   │   │   ├── token_stream.go
│           │   │   │   └── tokenizer.go
│           │   │   └── parser/
│           │   │       ├── ast.go
│           │   │       ├── decl/
│           │   │       │   ├── decl.go
│           │   │       │   ├── func_decl.go
│           │   │       │   ├── func_def.go
│           │   │       │   └── struct_def.go
│           │   │       ├── directive/
│           │   │       │   ├── define_directive.go
│           │   │       │   ├── directive.go
│           │   │       │   └── include_directive.go
│           │   │       ├── expr/
│           │   │       │   ├── additive_expr.go
│           │   │       │   ├── arg_list.go
│           │   │       │   ├── assignment_expr.go
│           │   │       │   ├── bitwise_and_expr.go
│           │   │       │   ├── bitwise_or_expr.go
│           │   │       │   ├── bitwise_xor_expr.go
│           │   │       │   ├── conditional_expr.go
│           │   │       │   ├── equality_expr.go
│           │   │       │   ├── expr.go
│           │   │       │   ├── logical_and_expr.go
│           │   │       │   ├── logical_or_expr.go
│           │   │       │   ├── multiplicative_expr.go
│           │   │       │   ├── postfix_expr.go
│           │   │       │   ├── primary_expr.go
│           │   │       │   ├── relational_expr.go
│           │   │       │   ├── shift_expr.go
│           │   │       │   └── unary_expr.go
│           │   │       ├── param_list/
│           │   │       │   ├── param.go
│           │   │       │   └── param_list.go
│           │   │       ├── parser.go
│           │   │       ├── rule.go
│           │   │       ├── stack.go
│           │   │       ├── stack_item.go
│           │   │       ├── stmt/
│           │   │       │   ├── block_stmt.go
│           │   │       │   ├── break_stmt.go
│           │   │       │   ├── continue_stmt.go
│           │   │       │   ├── dpu_foreach_stmt.go
│           │   │       │   ├── empty_stmt.go
│           │   │       │   ├── expr_stmt.go
│           │   │       │   ├── for_stmt.go
│           │   │       │   ├── if_stmt.go
│           │   │       │   ├── return_stmt.go
│           │   │       │   ├── stmt.go
│           │   │       │   ├── var_decl_init_stmt.go
│           │   │       │   ├── var_decl_stmt.go
│           │   │       │   └── while_stmt.go
│           │   │       ├── table.go
│           │   │       └── type_specifier/
│           │   │           └── type_specifier.go
│           │   └── vm/
│           │       ├── arena/
│           │       │   ├── arena.go
│           │       │   ├── garbage_collector.go
│           │       │   ├── memory.go
│           │       │   └── pool.go
│           │       ├── bank_cycle_job.go
│           │       ├── base/
│           │       │   └── object.go
│           │       ├── dpu_compute_cycle_job.go
│           │       ├── dpu_cycle_job.go
│           │       ├── dpu_load_job.go
│           │       ├── dram/
│           │       │   ├── bank/
│           │       │   │   ├── array.go
│           │       │   │   ├── bank.go
│           │       │   │   ├── dma_command.go
│           │       │   │   ├── dma_command_q.go
│           │       │   │   ├── memory_command.go
│           │       │   │   ├── memory_command_q.go
│           │       │   │   ├── row_buffer.go
│           │       │   │   ├── segment.go
│           │       │   │   ├── transfer_command.go
│           │       │   │   ├── transfer_command_q.go
│           │       │   │   └── wordline.go
│           │       │   ├── channel/
│           │       │   │   ├── channel.go
│           │       │   │   ├── channel_command.go
│           │       │   │   └── channel_command_q.go
│           │       │   ├── memory_controller.go
│           │       │   ├── memory_mapping.go
│           │       │   ├── memory_scheduler.go
│           │       │   └── rank/
│           │       │       ├── rank.go
│           │       │       ├── rank_command.go
│           │       │       └── rank_command_q.go
│           │       ├── frame/
│           │       │   ├── frame.go
│           │       │   └── frame_chain.go
│           │       ├── pc/
│           │       │   └── pc.go
│           │       ├── stack/
│           │       │   ├── return_stack.go
│           │       │   ├── stack.go
│           │       │   └── stack_item.go
│           │       ├── symbol/
│           │       │   ├── scope.go
│           │       │   ├── scope_chain.go
│           │       │   └── symbol.go
│           │       ├── type_system/
│           │       │   ├── field.go
│           │       │   ├── registry.go
│           │       │   ├── skeleton.go
│           │       │   └── type_variable.go
│           │       └── virtual_machine.go
│           ├── main.go
│           ├── misc/
│           │   ├── command_line_option.go
│           │   ├── command_line_parser.go
│           │   ├── command_line_validator.go
│           │   ├── config_loader.go
│           │   ├── config_validator.go
│           │   ├── file_dumper.go
│           │   ├── file_scanner.go
│           │   └── stat_factory.go
│           ├── program/
│           │   ├── app.go
│           │   └── task.go
│           └── system/
│               └── system.go
├── python_cpp/
│   ├── README.md
│   ├── uPIMulator_backend/
│   │   ├── CMakeLists.txt
│   │   ├── script/
│   │   │   ├── build.sh
│   │   │   ├── format.sh
│   │   │   ├── run.sh
│   │   │   └── run_serial.sh
│   │   └── src/
│   │       ├── CMakeLists.txt
│   │       ├── abi/
│   │       │   ├── cc/
│   │       │   │   ├── _base_cc.cc
│   │       │   │   ├── _base_cc.h
│   │       │   │   ├── acquire_cc.h
│   │       │   │   ├── add_nz_cc.h
│   │       │   │   ├── boot_cc.h
│   │       │   │   ├── const_cc_ge0.h
│   │       │   │   ├── const_cc_geu.h
│   │       │   │   ├── const_cc_zero.h
│   │       │   │   ├── count_nz_cc.h
│   │       │   │   ├── div_cc.h
│   │       │   │   ├── div_nz_cc.h
│   │       │   │   ├── ext_sub_set_cc.h
│   │       │   │   ├── false_cc.h
│   │       │   │   ├── imm_shift_nz_cc.h
│   │       │   │   ├── log_nz_cc.h
│   │       │   │   ├── log_set_cc.h
│   │       │   │   ├── mul_nz_cc.h
│   │       │   │   ├── no_cc.h
│   │       │   │   ├── release_cc.h
│   │       │   │   ├── shift_nz_cc.h
│   │       │   │   ├── sub_nz_cc.h
│   │       │   │   ├── sub_set_cc.h
│   │       │   │   ├── true_cc.h
│   │       │   │   └── true_false_cc.h
│   │       │   ├── instruction/
│   │       │   │   ├── instruction.cc
│   │       │   │   ├── instruction.h
│   │       │   │   ├── op_code.h
│   │       │   │   └── suffix.h
│   │       │   ├── isa/
│   │       │   │   ├── condition.h
│   │       │   │   ├── endian.h
│   │       │   │   ├── exception.h
│   │       │   │   └── flag.h
│   │       │   ├── reg/
│   │       │   │   ├── gp_reg.h
│   │       │   │   ├── pair_reg.cc
│   │       │   │   ├── pair_reg.h
│   │       │   │   ├── sp_reg.h
│   │       │   │   ├── src_reg.cc
│   │       │   │   └── src_reg.h
│   │       │   └── word/
│   │       │       ├── _base_word.cc
│   │       │       ├── _base_word.h
│   │       │       ├── data_address_word.h
│   │       │       ├── data_word.h
│   │       │       ├── immediate.h
│   │       │       ├── instruction_address_word.h
│   │       │       ├── instruction_word.h
│   │       │       └── representation.h
│   │       ├── converter/
│   │       │   ├── condition_converter.cc
│   │       │   ├── condition_converter.h
│   │       │   ├── endian_converter.cc
│   │       │   ├── endian_converter.h
│   │       │   ├── flag_converter.cc
│   │       │   ├── flag_converter.h
│   │       │   ├── instruction_converter.cc
│   │       │   ├── instruction_converter.h
│   │       │   ├── op_code_converter.cc
│   │       │   ├── op_code_converter.h
│   │       │   ├── reg_converter.cc
│   │       │   ├── reg_converter.h
│   │       │   ├── reg_file_converter.cc
│   │       │   ├── reg_file_converter.h
│   │       │   ├── suffix_converter.cc
│   │       │   └── suffix_converter.h
│   │       ├── encoder/
│   │       │   ├── byte.h
│   │       │   ├── byte_stream.cc
│   │       │   ├── byte_stream.h
│   │       │   ├── instruction_encoder.cc
│   │       │   └── instruction_encoder.h
│   │       ├── initializer/
│   │       │   ├── int_initializer.cc
│   │       │   ├── int_initializer.h
│   │       │   ├── str_initializer.h
│   │       │   └── str_initialzier.cc
│   │       ├── main.cc
│   │       ├── main.h
│   │       ├── simulator/
│   │       │   ├── basic/
│   │       │   │   ├── queue.h
│   │       │   │   └── timer_queue.h
│   │       │   ├── cpu/
│   │       │   │   ├── cpu.cc
│   │       │   │   ├── cpu.h
│   │       │   │   ├── fini_thread.cc
│   │       │   │   ├── fini_thread.h
│   │       │   │   ├── init_thread.cc
│   │       │   │   ├── init_thread.h
│   │       │   │   ├── sched_thread.cc
│   │       │   │   ├── sched_thread.h
│   │       │   │   ├── thread.cc
│   │       │   │   └── thread.h
│   │       │   ├── dpu/
│   │       │   │   ├── alu.cc
│   │       │   │   ├── alu.h
│   │       │   │   ├── cycle_rule.cc
│   │       │   │   ├── cycle_rule.h
│   │       │   │   ├── dma.cc
│   │       │   │   ├── dma.h
│   │       │   │   ├── dma_command.cc
│   │       │   │   ├── dma_command.h
│   │       │   │   ├── dpu.cc
│   │       │   │   ├── dpu.h
│   │       │   │   ├── logic.cc
│   │       │   │   ├── logic.h
│   │       │   │   ├── operand_collector.cc
│   │       │   │   ├── operand_collector.h
│   │       │   │   ├── pipeline.cc
│   │       │   │   ├── pipeline.h
│   │       │   │   ├── revolver_scheduler.cc
│   │       │   │   ├── revolver_scheduler.h
│   │       │   │   ├── thread.cc
│   │       │   │   └── thread.h
│   │       │   ├── dram/
│   │       │   │   ├── fifo_scheduler.cc
│   │       │   │   ├── fifo_scheduler.h
│   │       │   │   ├── frfcfs_scheduler.cc
│   │       │   │   ├── frfcfs_scheduler.h
│   │       │   │   ├── memory_command.cc
│   │       │   │   ├── memory_command.h
│   │       │   │   ├── memory_controller.cc
│   │       │   │   ├── memory_controller.h
│   │       │   │   ├── mram.cc
│   │       │   │   ├── mram.h
│   │       │   │   ├── row_buffer.cc
│   │       │   │   ├── row_buffer.h
│   │       │   │   ├── scheduler.cc
│   │       │   │   ├── scheduler.h
│   │       │   │   ├── wordline.cc
│   │       │   │   └── wordline.h
│   │       │   ├── rank/
│   │       │   │   ├── rank.cc
│   │       │   │   ├── rank.h
│   │       │   │   └── rank_message.h
│   │       │   ├── reg/
│   │       │   │   ├── condition_reg.cc
│   │       │   │   ├── condition_reg.h
│   │       │   │   ├── exception_reg.h
│   │       │   │   ├── flag_reg.h
│   │       │   │   ├── gp_reg.cc
│   │       │   │   ├── gp_reg.h
│   │       │   │   ├── pc_reg.h
│   │       │   │   ├── reg_file.cc
│   │       │   │   ├── reg_file.h
│   │       │   │   ├── sp_reg.cc
│   │       │   │   └── sp_reg.h
│   │       │   ├── sram/
│   │       │   │   ├── atomic.cc
│   │       │   │   ├── atomic.h
│   │       │   │   ├── iram.cc
│   │       │   │   ├── iram.h
│   │       │   │   ├── lock.cc
│   │       │   │   ├── lock.h
│   │       │   │   ├── wram.cc
│   │       │   │   └── wram.h
│   │       │   ├── system.cc
│   │       │   └── system.h
│   │       └── util/
│   │           ├── argument_parser.cc
│   │           ├── argument_parser.h
│   │           ├── config_loader.h
│   │           ├── stat_factory.cc
│   │           └── stat_factory.h
│   └── uPIMulator_frontend/
│       ├── .flake8
│       ├── .hadolint.yaml
│       ├── .isort.cfg
│       ├── .markdownlint.yaml
│       ├── .shellcheckrc
│       ├── benchmark/
│       │   ├── Arithmetic-Throughput/
│       │   │   ├── Makefile
│       │   │   ├── dpu/
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   ├── run.sh
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── cyclecount.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── BFS/
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── app.c
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       └── app.cu
│       │   │   ├── data/
│       │   │   │   └── loc-gowalla_edges.txt
│       │   │   ├── dpu/
│       │   │   │   ├── dpu-utils.h
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   ├── app.c
│       │   │   │   └── mram-management.h
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── graph.h
│       │   │       ├── params.h
│       │   │       ├── timer.h
│       │   │       └── utils.h
│       │   ├── BS/
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   ├── bs_omp.c
│       │   │   │   │   └── timer.h
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       ├── binary_search.cu
│       │   │   │       ├── binary_search.h
│       │   │   │       ├── cpu_lib.py
│       │   │   │       ├── cu_lib_import.py
│       │   │   │       └── run.py
│       │   │   ├── dpu/
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── CPU-DPU/
│       │   │   ├── Makefile
│       │   │   ├── dpu/
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   ├── run.sh
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── GEMV/
│       │   │   ├── Makefile
│       │   │   ├── _BL_10.conf
│       │   │   ├── _NR_TASKLETS_10_BL_10.conf
│       │   │   ├── _NR_TASKLETS_11_BL_10.conf
│       │   │   ├── _NR_TASKLETS_12_BL_10.conf
│       │   │   ├── _NR_TASKLETS_13_BL_10.conf
│       │   │   ├── _NR_TASKLETS_14_BL_10.conf
│       │   │   ├── _NR_TASKLETS_15_BL_10.conf
│       │   │   ├── _NR_TASKLETS_16
│       │   │   ├── _NR_TASKLETS_16_BL_10.conf
│       │   │   ├── _NR_TASKLETS_17_BL_10.conf
│       │   │   ├── _NR_TASKLETS_18_BL_10.conf
│       │   │   ├── _NR_TASKLETS_19_BL_10.conf
│       │   │   ├── _NR_TASKLETS_1_BL_10.conf
│       │   │   ├── _NR_TASKLETS_20_BL_10.conf
│       │   │   ├── _NR_TASKLETS_21_BL_10.conf
│       │   │   ├── _NR_TASKLETS_22_BL_10.conf
│       │   │   ├── _NR_TASKLETS_23_BL_10.conf
│       │   │   ├── _NR_TASKLETS_24_BL_10.conf
│       │   │   ├── _NR_TASKLETS_2_BL_10.conf
│       │   │   ├── _NR_TASKLETS_3_BL_10.conf
│       │   │   ├── _NR_TASKLETS_4_BL_10.conf
│       │   │   ├── _NR_TASKLETS_5_BL_10.conf
│       │   │   ├── _NR_TASKLETS_6_BL_10.conf
│       │   │   ├── _NR_TASKLETS_7_BL_10.conf
│       │   │   ├── _NR_TASKLETS_8_BL_10.conf
│       │   │   ├── _NR_TASKLETS_9_BL_10.conf
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   ├── gemv_openmp.c
│       │   │   │   │   └── gemv_utils.h
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       └── gemv.cu
│       │   │   ├── dpu/
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── HST-L/
│       │   │   ├── Makefile
│       │   │   ├── dpu/
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   ├── input/
│       │   │   │   └── image_VanHateren.iml
│       │   │   ├── run.sh
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── HST-S/
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── app_baseline.c
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       ├── input/
│       │   │   │       │   └── image_VanHateren.iml
│       │   │   │       ├── kernel.cpp
│       │   │   │       ├── kernel.cu
│       │   │   │       ├── kernel.h
│       │   │   │       ├── main.cpp
│       │   │   │       └── support/
│       │   │   │           ├── common.h
│       │   │   │           ├── cuda-setup.h
│       │   │   │           ├── partitioner.h
│       │   │   │           ├── timer.h
│       │   │   │           └── verify.h
│       │   │   ├── dpu/
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   ├── input/
│       │   │   │   └── image_VanHateren.iml
│       │   │   ├── run.sh
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── MLP/
│       │   │   ├── Makefile
│       │   │   ├── _BL_10.conf
│       │   │   ├── _NR_TASKLETS_10_BL_10.conf
│       │   │   ├── _NR_TASKLETS_11_BL_10.conf
│       │   │   ├── _NR_TASKLETS_12_BL_10.conf
│       │   │   ├── _NR_TASKLETS_13_BL_10.conf
│       │   │   ├── _NR_TASKLETS_14_BL_10.conf
│       │   │   ├── _NR_TASKLETS_15_BL_10.conf
│       │   │   ├── _NR_TASKLETS_16
│       │   │   ├── _NR_TASKLETS_16_BL_10.conf
│       │   │   ├── _NR_TASKLETS_17_BL_10.conf
│       │   │   ├── _NR_TASKLETS_18_BL_10.conf
│       │   │   ├── _NR_TASKLETS_19_BL_10.conf
│       │   │   ├── _NR_TASKLETS_1_BL_10.conf
│       │   │   ├── _NR_TASKLETS_20_BL_10.conf
│       │   │   ├── _NR_TASKLETS_21_BL_10.conf
│       │   │   ├── _NR_TASKLETS_22_BL_10.conf
│       │   │   ├── _NR_TASKLETS_23_BL_10.conf
│       │   │   ├── _NR_TASKLETS_24_BL_10.conf
│       │   │   ├── _NR_TASKLETS_2_BL_10.conf
│       │   │   ├── _NR_TASKLETS_3_BL_10.conf
│       │   │   ├── _NR_TASKLETS_4_BL_10.conf
│       │   │   ├── _NR_TASKLETS_5_BL_10.conf
│       │   │   ├── _NR_TASKLETS_6_BL_10.conf
│       │   │   ├── _NR_TASKLETS_7_BL_10.conf
│       │   │   ├── _NR_TASKLETS_8_BL_10.conf
│       │   │   ├── _NR_TASKLETS_9_BL_10.conf
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── mlp_openmp.c
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       └── mlp.cu
│       │   │   ├── dpu/
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── MRAM-Latency/
│       │   │   ├── Makefile
│       │   │   ├── dpu/
│       │   │   │   └── copy.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   ├── run.sh
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── cyclecount.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── NW/
│       │   │   ├── .conf
│       │   │   ├── Makefile
│       │   │   ├── _NR_TASKLETS_10_BL_1024
│       │   │   ├── _NR_TASKLETS_11_BL_1024
│       │   │   ├── _NR_TASKLETS_12_BL_1024
│       │   │   ├── _NR_TASKLETS_13_BL_1024
│       │   │   ├── _NR_TASKLETS_14_BL_1024
│       │   │   ├── _NR_TASKLETS_15_BL_1024
│       │   │   ├── _NR_TASKLETS_16_BL_1024
│       │   │   ├── _NR_TASKLETS_17_BL_1024
│       │   │   ├── _NR_TASKLETS_18_BL_1024
│       │   │   ├── _NR_TASKLETS_19_BL_1024
│       │   │   ├── _NR_TASKLETS_1_BL_1024
│       │   │   ├── _NR_TASKLETS_20_BL_1024
│       │   │   ├── _NR_TASKLETS_21_BL_1024
│       │   │   ├── _NR_TASKLETS_22_BL_1024
│       │   │   ├── _NR_TASKLETS_23_BL_1024
│       │   │   ├── _NR_TASKLETS_24_BL_1024
│       │   │   ├── _NR_TASKLETS_2_BL_1024
│       │   │   ├── _NR_TASKLETS_3_BL_1024
│       │   │   ├── _NR_TASKLETS_4_BL_1024
│       │   │   ├── _NR_TASKLETS_5_BL_1024
│       │   │   ├── _NR_TASKLETS_6_BL_1024
│       │   │   ├── _NR_TASKLETS_7_BL_1024
│       │   │   ├── _NR_TASKLETS_8_BL_1024
│       │   │   ├── _NR_TASKLETS_9_BL_1024
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   ├── needle.cpp
│       │   │   │   │   ├── run
│       │   │   │   │   └── run_offload
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── Makefile_nvidia
│       │   │   │       ├── README
│       │   │   │       ├── common/
│       │   │   │       │   ├── common.mk
│       │   │   │       │   └── make.config
│       │   │   │       ├── needle.cu
│       │   │   │       ├── needle.h
│       │   │   │       ├── needle_kernel.cu
│       │   │   │       ├── run
│       │   │   │       └── timing.h
│       │   │   ├── dpu/
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── Operational-Intensity/
│       │   │   ├── Makefile
│       │   │   ├── dpu/
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   ├── run.sh
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── cyclecount.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── RED/
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── app_baseline.cpp
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       └── app_baseline.cu
│       │   │   ├── dpu/
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   ├── run.sh
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── cyclecount.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── Random-GUPS/
│       │   │   ├── Makefile
│       │   │   ├── dpu/
│       │   │   │   └── gups.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   ├── run.sh
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── cyclecount.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── SCAN-RSS/
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── app_baseline.cpp
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       └── app_baseline.cu
│       │   │   ├── dpu/
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   ├── run.sh
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── SCAN-SSA/
│       │   │   ├── Makefile
│       │   │   ├── dpu/
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   ├── run.sh
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── SEL/
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── app_baseline.c
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       ├── ds.h
│       │   │   │       ├── kernel.cu
│       │   │   │       └── select.cu
│       │   │   ├── dpu/
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── STREAM/
│       │   │   ├── Makefile
│       │   │   ├── dpu/
│       │   │   │   ├── add.c
│       │   │   │   ├── copy.c
│       │   │   │   ├── copyw.c
│       │   │   │   ├── scale.c
│       │   │   │   └── triad.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   ├── run.sh
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── cyclecount.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── STRIDED/
│       │   │   ├── Makefile
│       │   │   ├── dpu/
│       │   │   │   └── strided.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   ├── run.sh
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── cyclecount.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── SpMV/
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── app.c
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       └── app.cu
│       │   │   ├── data/
│       │   │   │   ├── bcsstk30.mtx
│       │   │   │   └── generate/
│       │   │   │       ├── Makefile
│       │   │   │       ├── generate.sh
│       │   │   │       └── replicate.c
│       │   │   ├── dpu/
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   ├── app.c
│       │   │   │   └── mram-management.h
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── matrix.h
│       │   │       ├── params.h
│       │   │       ├── timer.h
│       │   │       └── utils.h
│       │   ├── TRNS/
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   ├── kernel.cpp
│       │   │   │   │   ├── kernel.h
│       │   │   │   │   ├── main.cpp
│       │   │   │   │   └── support/
│       │   │   │   │       ├── common.h
│       │   │   │   │       ├── setup.h
│       │   │   │   │       ├── timer.h
│       │   │   │   │       └── verify.h
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       ├── kernel.cu
│       │   │   │       ├── kernel.h
│       │   │   │       ├── main.cpp
│       │   │   │       └── support/
│       │   │   │           ├── common.h
│       │   │   │           ├── cuda-setup.h
│       │   │   │           ├── timer.h
│       │   │   │           └── verify.h
│       │   │   ├── dpu/
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── TS/
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   ├── inputs/
│       │   │   │   │   │   └── randomlist33M.txt
│       │   │   │   │   ├── launch.sh
│       │   │   │   │   ├── mprofile.h
│       │   │   │   │   ├── streamp_openmp.cpp
│       │   │   │   │   └── tools.cpp
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       ├── STREAMP.cu
│       │   │   │       ├── inputs/
│       │   │   │       │   └── randomlist33M.txt
│       │   │   │       ├── launch.sh
│       │   │   │       └── randlist.py
│       │   │   ├── dpu/
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── UNI/
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── app_baseline.c
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       ├── ds.h
│       │   │   │       ├── kernel.cu
│       │   │   │       └── unique.cu
│       │   │   ├── dpu/
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   ├── VA/
│       │   │   ├── Makefile
│       │   │   ├── baselines/
│       │   │   │   ├── cpu/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── README
│       │   │   │   │   └── app_baseline.c
│       │   │   │   └── gpu/
│       │   │   │       ├── Makefile
│       │   │   │       ├── README
│       │   │   │       └── vec_add.cu
│       │   │   ├── dpu/
│       │   │   │   └── task.c
│       │   │   ├── host/
│       │   │   │   └── app.c
│       │   │   └── support/
│       │   │       ├── common.h
│       │   │       ├── params.h
│       │   │       └── timer.h
│       │   └── WRAM/
│       │       ├── Makefile
│       │       ├── dpu/
│       │       │   └── task.c
│       │       ├── host/
│       │       │   └── app.c
│       │       ├── run.sh
│       │       └── support/
│       │           ├── common.h
│       │           ├── cyclecount.h
│       │           ├── params.h
│       │           └── timer.h
│       ├── docker/
│       │   ├── compiler.dockerfile
│       │   └── parser.dockerfile
│       ├── pyproject.toml
│       ├── requirements.txt
│       ├── sdk/
│       │   ├── misc/
│       │   │   ├── accessMramFromDpu.c
│       │   │   ├── coreDump.c
│       │   │   ├── crt0.c
│       │   │   ├── dpu.lds
│       │   │   ├── internalStateReset.c
│       │   │   ├── linkerScript.lds
│       │   │   ├── restoreRegisters.c
│       │   │   └── restore_carry_and_zero_flag.h
│       │   ├── stdlib/
│       │   │   ├── abort.c
│       │   │   ├── assert.h
│       │   │   ├── atoi.c
│       │   │   ├── atol.c
│       │   │   ├── ctype.h
│       │   │   ├── errno.c
│       │   │   ├── errno.h
│       │   │   ├── exit.c
│       │   │   ├── inttypes.h
│       │   │   ├── iso646.h
│       │   │   ├── limits.h
│       │   │   ├── memchr.c
│       │   │   ├── memcmp.c
│       │   │   ├── memcpy.c
│       │   │   ├── memmove.c
│       │   │   ├── memmram_utils.h
│       │   │   ├── memset.c
│       │   │   ├── stdalign.h
│       │   │   ├── stdarg.h
│       │   │   ├── stdbool.h
│       │   │   ├── stddef.h
│       │   │   ├── stdint.h
│       │   │   ├── stdio.c
│       │   │   ├── stdio.h
│       │   │   ├── stdlib.h
│       │   │   ├── stdnoreturn.h
│       │   │   ├── stpcpy.c
│       │   │   ├── stpncpy.c
│       │   │   ├── strcat.c
│       │   │   ├── strchr.c
│       │   │   ├── strcmp.c
│       │   │   ├── strcpy.c
│       │   │   ├── strcspn.c
│       │   │   ├── strdup.c
│       │   │   ├── strerror.c
│       │   │   ├── string.h
│       │   │   ├── strlen.c
│       │   │   ├── strlwr.c
│       │   │   ├── strncat.c
│       │   │   ├── strncmp.c
│       │   │   ├── strncpy.c
│       │   │   ├── strndup.c
│       │   │   ├── strnlen.c
│       │   │   ├── strpbrk.c
│       │   │   ├── strrchr.c
│       │   │   ├── strrev.c
│       │   │   ├── strsep.c
│       │   │   ├── strspn.c
│       │   │   ├── strstr.c
│       │   │   ├── strtok_r.c
│       │   │   ├── strtol.c
│       │   │   └── strupr.c
│       │   └── syslib/
│       │       ├── absvdi2.c
│       │       ├── absvsi2.c
│       │       ├── adddf3.c
│       │       ├── addsf3.c
│       │       ├── addvdi3.c
│       │       ├── addvsi3.c
│       │       ├── alloc.c
│       │       ├── alloc.h
│       │       ├── ashldi3.c
│       │       ├── ashrdi3.c
│       │       ├── atomic_bit.h
│       │       ├── atomics.c
│       │       ├── attributes.h
│       │       ├── barrier.c
│       │       ├── barrier.h
│       │       ├── bswapdi2.c
│       │       ├── bswapsi2.c
│       │       ├── buddy_alloc.c
│       │       ├── buddy_alloc.h
│       │       ├── buddy_realloc.c
│       │       ├── built_ins.h
│       │       ├── clzdi2.c
│       │       ├── clzsi2.c
│       │       ├── cmpdi2.c
│       │       ├── comparedf2.c
│       │       ├── comparesf2.c
│       │       ├── ctzdi2.c
│       │       ├── ctzsi2.c
│       │       ├── defs.c
│       │       ├── defs.h
│       │       ├── devprivate.h
│       │       ├── div32.c
│       │       ├── divdf3.c
│       │       ├── divdi3.c
│       │       ├── divmodsi4.c
│       │       ├── divsf3.c
│       │       ├── divsi3.c
│       │       ├── dpuconst.h
│       │       ├── dpufault.h
│       │       ├── dpuruntime.h
│       │       ├── extendhfsf2.c
│       │       ├── extendsfdf2.c
│       │       ├── ffsdi2.c
│       │       ├── ffssi2.c
│       │       ├── ffsti2.c
│       │       ├── fixdfdi.c
│       │       ├── fixdfsi.c
│       │       ├── fixsfdi.c
│       │       ├── fixsfsi.c
│       │       ├── fixunsdfdi.c
│       │       ├── fixunsdfsi.c
│       │       ├── fixunssfdi.c
│       │       ├── fixunssfsi.c
│       │       ├── float.h
│       │       ├── floatdidf.c
│       │       ├── floatdisf.c
│       │       ├── floatsidf.c
│       │       ├── floatsisf.c
│       │       ├── floatundidf.c
│       │       ├── floatundisf.c
│       │       ├── floatunsidf.c
│       │       ├── floatunsisf.c
│       │       ├── fp_add_impl.inc
│       │       ├── fp_extend.h
│       │       ├── fp_extend_impl.inc
│       │       ├── fp_fixint_impl.inc
│       │       ├── fp_fixuint_impl.inc
│       │       ├── fp_lib.h
│       │       ├── fp_mul_impl.inc
│       │       ├── fp_trunc.h
│       │       ├── fp_trunc_impl.inc
│       │       ├── fsb_allocator.c
│       │       ├── fsb_allocator.h
│       │       ├── handshake.c
│       │       ├── handshake.h
│       │       ├── int_endianness.h
│       │       ├── int_lib.h
│       │       ├── int_math.h
│       │       ├── int_types.h
│       │       ├── int_util.c
│       │       ├── int_util.h
│       │       ├── listener.c
│       │       ├── lshrdi3.c
│       │       ├── macro_utils.h
│       │       ├── mcount.c
│       │       ├── moddi3.c
│       │       ├── modsi3.c
│       │       ├── mram.h
│       │       ├── mul32.c
│       │       ├── mul64.c
│       │       ├── muldc3.c
│       │       ├── muldf3.c
│       │       ├── mulodi4.c
│       │       ├── mulosi4.c
│       │       ├── mulsf3.c
│       │       ├── mulvdi3.c
│       │       ├── mulvsi3.c
│       │       ├── mutex.h
│       │       ├── negdf2.c
│       │       ├── negdi2.c
│       │       ├── negsf2.c
│       │       ├── negvdi2.c
│       │       ├── negvsi2.c
│       │       ├── paritydi2.c
│       │       ├── paritysi2.c
│       │       ├── perfcounter.c
│       │       ├── perfcounter.h
│       │       ├── popcountdi2.c
│       │       ├── popcountsi2.c
│       │       ├── powidf2.c
│       │       ├── powisf2.c
│       │       ├── profiling.c
│       │       ├── profiling.h
│       │       ├── profiling_internals.h
│       │       ├── sem.c
│       │       ├── sem.h
│       │       ├── seqread.h
│       │       ├── seqread.inc
│       │       ├── seqread1024.c
│       │       ├── seqread128.c
│       │       ├── seqread256.c
│       │       ├── seqread32.c
│       │       ├── seqread512.c
│       │       ├── seqread64.c
│       │       ├── soft_cache.c
│       │       ├── soft_cache.h
│       │       ├── subdf3.c
│       │       ├── subsf3.c
│       │       ├── subvdi3.c
│       │       ├── subvsi3.c
│       │       ├── sysdef.h
│       │       ├── truncdfhf2.c
│       │       ├── truncdfsf2.c
│       │       ├── truncsfhf2.c
│       │       ├── ucmpdi2.c
│       │       ├── udiv64.c
│       │       ├── udivdi3.c
│       │       ├── udivmodsi4.c
│       │       ├── udivsi3.c
│       │       ├── umoddi3.c
│       │       ├── umodsi3.c
│       │       └── waitqueue.c
│       ├── src/
│       │   ├── abi/
│       │   │   ├── binary/
│       │   │   │   ├── executable.py
│       │   │   │   ├── liveness.py
│       │   │   │   └── relocatable.py
│       │   │   ├── directive/
│       │   │   │   ├── ascii_directive.py
│       │   │   │   ├── asciz_directive.py
│       │   │   │   ├── byte_directive.py
│       │   │   │   ├── long_directive.py
│       │   │   │   ├── quad_directive.py
│       │   │   │   ├── short_directive.py
│       │   │   │   └── zero_directive.py
│       │   │   ├── isa/
│       │   │   │   ├── cc/
│       │   │   │   │   ├── _base_cc.py
│       │   │   │   │   ├── acquire_cc.py
│       │   │   │   │   ├── add_nz_cc.py
│       │   │   │   │   ├── boot_cc.py
│       │   │   │   │   ├── const_cc_ge0.py
│       │   │   │   │   ├── const_cc_geu.py
│       │   │   │   │   ├── const_cc_zero.py
│       │   │   │   │   ├── count_nz_cc.py
│       │   │   │   │   ├── div_cc.py
│       │   │   │   │   ├── div_nz_cc.py
│       │   │   │   │   ├── ext_sub_set_cc.py
│       │   │   │   │   ├── false_cc.py
│       │   │   │   │   ├── imm_shift_nz_cc.py
│       │   │   │   │   ├── log_nz_cc.py
│       │   │   │   │   ├── log_set_cc.py
│       │   │   │   │   ├── mul_nz_cc.py
│       │   │   │   │   ├── no_cc.py
│       │   │   │   │   ├── release_cc.py
│       │   │   │   │   ├── shift_nz_cc.py
│       │   │   │   │   ├── sub_nz_cc.py
│       │   │   │   │   ├── sub_set_cc.py
│       │   │   │   │   ├── true_cc.py
│       │   │   │   │   └── true_false_cc.py
│       │   │   │   ├── exception.py
│       │   │   │   ├── flag.py
│       │   │   │   ├── instruction/
│       │   │   │   │   ├── condition.py
│       │   │   │   │   ├── endian.py
│       │   │   │   │   ├── instruction.py
│       │   │   │   │   ├── op_code.py
│       │   │   │   │   └── suffix.py
│       │   │   │   └── register/
│       │   │   │       ├── gp_register.py
│       │   │   │       ├── pair_register.py
│       │   │   │       └── sp_register.py
│       │   │   ├── label/
│       │   │   │   ├── label.py
│       │   │   │   └── symbol.py
│       │   │   ├── section/
│       │   │   │   ├── section.py
│       │   │   │   ├── section_flag.py
│       │   │   │   ├── section_name.py
│       │   │   │   └── section_type.py
│       │   │   └── word/
│       │   │       ├── _base_word.py
│       │   │       ├── data_address_word.py
│       │   │       ├── data_word.py
│       │   │       ├── double_data_word.py
│       │   │       ├── immediate.py
│       │   │       ├── instruction_address_word.py
│       │   │       ├── instruction_word.py
│       │   │       └── representation.py
│       │   ├── assembler/
│       │   │   ├── assembler.py
│       │   │   └── data_prep/
│       │   │       ├── bin.py
│       │   │       ├── bs_data_prep.py
│       │   │       ├── gemv_data_prep.py
│       │   │       ├── hst_data_prep.py
│       │   │       ├── mlp_data_prep.py
│       │   │       ├── red_data_prep.py
│       │   │       ├── scan_rss_data_prep.py
│       │   │       ├── scan_ssa_data_prep.py
│       │   │       ├── sel_data_prep.py
│       │   │       ├── trns_data_prep.py
│       │   │       ├── ts_data_prep.py
│       │   │       ├── uni_data_prep.py
│       │   │       └── va_data_prep.py
│       │   ├── compiler/
│       │   │   └── compiler.py
│       │   ├── converter/
│       │   │   ├── condition_converter.py
│       │   │   ├── endian_converter.py
│       │   │   ├── instruction_converter.py
│       │   │   ├── op_code_converter.py
│       │   │   ├── register_converter.py
│       │   │   ├── section_flag_converter.py
│       │   │   ├── section_name_converter.py
│       │   │   ├── section_type_converter.py
│       │   │   ├── suffix_converter.py
│       │   │   └── symbol_converter.py
│       │   ├── encoder/
│       │   │   ├── ascii_encoder.py
│       │   │   ├── byte.py
│       │   │   ├── directive_encoder.py
│       │   │   └── instruction_encoder.py
│       │   ├── initializer/
│       │   │   ├── directive_initializer.py
│       │   │   ├── instruction_initializer.py
│       │   │   ├── int_initializer.py
│       │   │   └── str_initializer.py
│       │   ├── iss/
│       │   │   ├── cpu/
│       │   │   │   ├── cpu.py
│       │   │   │   ├── fini_thread.py
│       │   │   │   ├── init_thread.py
│       │   │   │   └── sched_thread.py
│       │   │   ├── dpu/
│       │   │   │   ├── alu.py
│       │   │   │   ├── decoder.py
│       │   │   │   ├── dispatcher.py
│       │   │   │   ├── dma.py
│       │   │   │   ├── dpu.py
│       │   │   │   ├── logic.py
│       │   │   │   ├── scheduler.py
│       │   │   │   └── thread.py
│       │   │   ├── dram/
│       │   │   │   ├── mram.py
│       │   │   │   ├── mram_command.py
│       │   │   │   └── word.py
│       │   │   ├── register/
│       │   │   │   ├── condition_register.py
│       │   │   │   ├── exception_register.py
│       │   │   │   ├── flag_register.py
│       │   │   │   ├── gp_register.py
│       │   │   │   ├── pc_register.py
│       │   │   │   ├── register_file.py
│       │   │   │   └── sp_register.py
│       │   │   ├── sram/
│       │   │   │   ├── atomic.py
│       │   │   │   ├── iram.py
│       │   │   │   ├── lock.py
│       │   │   │   └── wram.py
│       │   │   └── system.py
│       │   ├── linker_/
│       │   │   ├── linker.py
│       │   │   ├── linker_script.py
│       │   │   └── logic/
│       │   │       ├── instruction_assigner.py
│       │   │       ├── label_assigner.py
│       │   │       ├── liveness_analyzer.py
│       │   │       └── set_assigner.py
│       │   ├── main.py
│       │   ├── parser_/
│       │   │   ├── grammar/
│       │   │   │   ├── .antlr/
│       │   │   │   │   ├── assembly.interp
│       │   │   │   │   ├── assembly.tokens
│       │   │   │   │   ├── assemblyLexer.interp
│       │   │   │   │   ├── assemblyLexer.java
│       │   │   │   │   ├── assemblyLexer.tokens
│       │   │   │   │   └── assemblyParser.java
│       │   │   │   ├── assembly.g4
│       │   │   │   ├── assembly.interp
│       │   │   │   ├── assembly.tokens
│       │   │   │   ├── assemblyLexer.interp
│       │   │   │   ├── assemblyLexer.py
│       │   │   │   ├── assemblyLexer.tokens
│       │   │   │   ├── assemblyListener.py
│       │   │   │   └── assemblyParser.py
│       │   │   ├── grammar_generator.py
│       │   │   └── parser.py
│       │   └── util/
│       │       ├── config_loader.py
│       │       ├── docker_client.py
│       │       ├── param_loader.py
│       │       └── path_collector.py
│       └── test/
│           ├── abi/
│           │   ├── binary/
│           │   │   ├── executable_test.py
│           │   │   └── liveness_test.py
│           │   ├── directive/
│           │   │   ├── ascii_directive_test.py
│           │   │   ├── asciz_directive_test.py
│           │   │   ├── byte_directive_test.py
│           │   │   ├── long_directive_test.py
│           │   │   ├── quad_directive_test.py
│           │   │   ├── short_directive_test.py
│           │   │   └── zero_directive_test.py
│           │   ├── isa/
│           │   │   └── register/
│           │   │       ├── gp_register_test.py
│           │   │       └── pair_register_test.py
│           │   ├── label/
│           │   │   └── label_test.py
│           │   ├── section/
│           │   │   └── section_test.py
│           │   └── word/
│           │       ├── immediate_test.py
│           │       └── words_test.py
│           ├── compiler/
│           │   └── compiler_test.py
│           ├── encoder/
│           │   ├── ascii_encoder_test.py
│           │   ├── directive_encoder_test.py
│           │   └── instruction_encoder_test.py
│           ├── iss/
│           │   ├── dpu/
│           │   │   ├── dma_test.py
│           │   │   └── scheduler_test.py
│           │   ├── dram/
│           │   │   └── mram_test.py
│           │   ├── register/
│           │   │   └── register_file_test.py
│           │   └── sram/
│           │       ├── atomic_test.py
│           │       ├── iram_test.py
│           │       └── wram_test.py
│           ├── linker_/
│           │   └── linker_test.py
│           ├── parser_/
│           │   ├── grammar_generator_test.py
│           │   └── parser_test.py
│           └── util/
│               └── config_loader_test.py
└── tools/
    ├── README.md
    ├── upmem_profiler/
    │   ├── CMakeLists.txt
    │   ├── script/
    │   │   ├── active_tasklet_profile.sh
    │   │   ├── build.sh
    │   │   ├── example.sh
    │   │   ├── function_profile.sh
    │   │   ├── instruction_mix_profile.sh
    │   │   ├── mram_access_pattern_profile.sh
    │   │   ├── timeline_profile.sh
    │   │   └── tlb_behavior_profile.sh
    │   └── src/
    │       ├── CMakeLists.txt
    │       ├── abi/
    │       │   └── instruction/
    │       │       ├── op_code.h
    │       │       └── suffix.h
    │       ├── basic/
    │       │   ├── instruction_parser.cc
    │       │   ├── instruction_parser.h
    │       │   ├── interval.cc
    │       │   ├── interval.h
    │       │   ├── reg_file_parser.cc
    │       │   ├── reg_file_parser.h
    │       │   ├── stats_parser.cc
    │       │   └── stats_parser.h
    │       ├── converter/
    │       │   ├── op_code_converter.cc
    │       │   ├── op_code_converter.h
    │       │   ├── suffix_converter.cc
    │       │   └── suffix_converter.h
    │       ├── instruction_mix/
    │       │   ├── instruction_mix_profiler.cc
    │       │   └── instruction_mix_profiler.h
    │       ├── main.cc
    │       ├── main.h
    │       └── util/
    │           ├── argument_parser.cc
    │           ├── argument_parser.h
    │           └── config_loader.h
    └── upmem_reg_model/
        ├── data/
        │   ├── input.xlsx
        │   └── output.xlsx
        ├── script/
        │   └── format.sh
        └── src/
            ├── benchmark/
            │   ├── _base_benchmark.py
            │   ├── bs.py
            │   ├── gemv.py
            │   ├── hst_l.py
            │   ├── hst_s.py
            │   ├── mlp.py
            │   ├── red.py
            │   ├── scan_rss.py
            │   ├── scan_ssa.py
            │   ├── sel.py
            │   ├── trns.py
            │   ├── ts.py
            │   ├── uni.py
            │   └── va.py
            ├── io_/
            │   ├── excel_reader.py
            │   └── excel_writer.py
            ├── main.py
            └── regression/
                ├── datum.py
                └── model.py
Download .txt
Showing preview only (1,520K chars total). Download the full file or copy to clipboard to get everything.
SYMBOL INDEX (17659 symbols across 1760 files)

FILE: golang/uPIMulator/benchmark/BS/baselines/cpu/bs_omp.c
  function create_test_file (line 17) | void create_test_file(DTYPE * input, uint64_t  nr_elements, DTYPE * quer...
  function binarySearch (line 38) | uint64_t binarySearch(DTYPE * input, uint64_t input_size, DTYPE* querys,...
  function main (line 76) | int main(int argc, char **argv) {

FILE: golang/uPIMulator/benchmark/BS/baselines/cpu/timer.h
  type Timer (line 38) | typedef struct Timer{
  function start (line 46) | void start(Timer *timer, int i, int rep) {
  function stop (line 53) | void stop(Timer *timer, int i) {
  function print (line 59) | void print(Timer *timer, int i, int REP) { printf("%f\t", timer->time[i]...

FILE: golang/uPIMulator/benchmark/BS/baselines/gpu/cpu_lib.py
  function binary_search (line 4) | def binary_search(arr, search):

FILE: golang/uPIMulator/benchmark/BS/dpu/task.c
  function DTYPE (line 19) | DTYPE __attribute__ ((noinline)) search(DTYPE *bufferA, DTYPE searching_...
  function main (line 41) | int main(void){
  function main_kernel1 (line 47) | int main_kernel1() {

FILE: golang/uPIMulator/benchmark/BS/host/app.c
  function create_test_file (line 28) | void create_test_file(DTYPE * input, DTYPE * querys, uint64_t  nr_elemen...
  function binarySearch (line 40) | int64_t binarySearch(DTYPE * input, DTYPE * querys, DTYPE input_size, ui...
  function main (line 69) | int main(int argc, char **argv) {

FILE: golang/uPIMulator/benchmark/BS/support/common.h
  type dpu_arguments_t (line 27) | typedef struct {
  type dpu_results_t (line 37) | typedef struct {

FILE: golang/uPIMulator/benchmark/BS/support/params.h
  type Params (line 6) | typedef struct Params {
  function usage (line 12) | void usage() {
  function input_params (line 26) | struct Params input_params(int argc, char **argv) {

FILE: golang/uPIMulator/benchmark/BS/support/timer.h
  type Timer (line 38) | typedef struct Timer{
  function start (line 46) | void start(Timer *timer, int i, int rep) {
  function stop (line 53) | void stop(Timer *timer, int i) {
  function print (line 59) | void print(Timer *timer, int i, int REP) { printf("%f\t", timer->time[i]...

FILE: golang/uPIMulator/benchmark/GEMV/baselines/cpu/gemv_openmp.c
  function main (line 6) | int main(int argc, char *argv[])
  function gemv (line 56) | void gemv(double** A, double* x, size_t rows, size_t cols, double** b) {
  function make_hilbert_mat (line 64) | void make_hilbert_mat(size_t rows, size_t cols, double*** A) {
  function sum_vec (line 73) | double sum_vec(double* vec, size_t rows) {

FILE: golang/uPIMulator/benchmark/GEMV/baselines/cpu/gemv_utils.h
  function allocate_dense (line 1) | void allocate_dense(size_t rows,size_t  cols, double*** dense) {
  function print_mat (line 12) | void print_mat(double** A, size_t rows, size_t cols) {
  function print_vec (line 21) | void print_vec(double* b, size_t rows) {

FILE: golang/uPIMulator/benchmark/GEMV/dpu/task.c
  function gemv (line 18) | void __attribute__ ((noinline)) gemv(T *bufferC, T *bufferA, T *bufferB,...
  function main (line 29) | int main() {

FILE: golang/uPIMulator/benchmark/GEMV/host/app.c
  function init_data (line 36) | static void init_data(T* A, T* B, unsigned int m_size, unsigned int n_si...
  function gemv_host (line 51) | static void gemv_host(T* C, T* A, T* B, unsigned int m_size, unsigned in...
  function main (line 66) | int main(int argc, char **argv) {

FILE: golang/uPIMulator/benchmark/GEMV/support/common.h
  type dpu_arguments_t (line 5) | typedef struct {
  type dpu_info_t (line 13) | struct dpu_info_t {
  type dpu_info_t (line 18) | struct dpu_info_t

FILE: golang/uPIMulator/benchmark/GEMV/support/params.h
  type Params (line 6) | typedef struct Params {
  function usage (line 13) | static void usage() {
  function input_params (line 28) | struct Params input_params(int argc, char **argv) {

FILE: golang/uPIMulator/benchmark/GEMV/support/timer.h
  type Timer (line 38) | typedef struct Timer{
  function start (line 46) | void start(Timer *timer, int i, int rep) {
  function stop (line 53) | void stop(Timer *timer, int i) {
  function print (line 62) | void print(Timer *timer, int i, int REP) { printf("%f\t", timer->time[i]...

FILE: golang/uPIMulator/benchmark/HST-L/dpu/task.c
  function histogram (line 33) | void __attribute__ ((noinline)) histogram(uint32_t* histo, uint32_t bins...
  function main (line 46) | int main(void) {
  function main_kernel1 (line 52) | int main_kernel1() {

FILE: golang/uPIMulator/benchmark/HST-L/host/app.c
  function read_input (line 36) | static void read_input(T* A, const Params p) {
  function histogram_host (line 59) | static void histogram_host(unsigned int* histo, T* A, unsigned int bins,...
  function main (line 77) | int main(int argc, char **argv) {

FILE: golang/uPIMulator/benchmark/HST-L/support/common.h
  type dpu_arguments_t (line 24) | typedef struct {

FILE: golang/uPIMulator/benchmark/HST-L/support/params.h
  type Params (line 6) | typedef struct Params {
  function usage (line 16) | static void usage() {
  function input_params (line 33) | struct Params input_params(int argc, char **argv) {

FILE: golang/uPIMulator/benchmark/HST-L/support/timer.h
  type Timer (line 38) | typedef struct Timer{
  function start (line 46) | void start(Timer *timer, int i, int rep) {
  function stop (line 53) | void stop(Timer *timer, int i) {
  function print (line 59) | void print(Timer *timer, int i, int REP) { printf("Time (ms): %f\t", tim...

FILE: golang/uPIMulator/benchmark/HST-S/baselines/cpu/app_baseline.c
  type Params (line 30) | typedef struct Params {
  function read_input (line 44) | static void read_input(T* A, const Params p) {
  function histogram_host (line 69) | static void histogram_host(unsigned int* histo, T* A, unsigned int bins,...
  function usage (line 93) | void usage() {
  function input_params (line 111) | struct Params input_params(int argc, char **argv) {
  function main (line 149) | int main(int argc, char **argv) {

FILE: golang/uPIMulator/benchmark/HST-S/baselines/gpu/kernel.cpp
  function run_cpu_threads (line 44) | void run_cpu_threads(std::atomic_uint *histo, unsigned int *data, int si...

FILE: golang/uPIMulator/benchmark/HST-S/baselines/gpu/main.cpp
  type Params (line 47) | struct Params {
    method Params (line 59) | Params(int argc, char **argv) {
    method usage (line 109) | void usage() {
  function read_input (line 138) | void read_input(unsigned int *input, const Params &p) {
  function main (line 161) | int main(int argc, char **argv) {

FILE: golang/uPIMulator/benchmark/HST-S/baselines/gpu/support/cuda-setup.h
  function max_gpu_threads (line 60) | struct CUDASetup {

FILE: golang/uPIMulator/benchmark/HST-S/baselines/gpu/support/partitioner.h
  type Partitioner (line 49) | typedef struct Partitioner {
  function Partitioner (line 82) | inline Partitioner partitioner_create(int n_tasks, float alpha
  function cpu_first (line 122) | inline int cpu_first(Partitioner *p) {
  function __device__ (line 136) | __device__ inline int gpu_first(Partitioner *p) {
  function cpu_more (line 158) | inline bool cpu_more(const Partitioner *p) {
  function __device__ (line 171) | __device__ inline bool gpu_more(const Partitioner *p) {
  function cpu_next (line 181) | inline int cpu_next(Partitioner *p) {
  function __device__ (line 195) | __device__ inline int gpu_next(Partitioner *p) {

FILE: golang/uPIMulator/benchmark/HST-S/baselines/gpu/support/timer.h
  type Timer (line 44) | struct Timer {
  function stop (line 59) | void stop(string name) {
  function print (line 67) | void print(string name, unsigned int REP) { printf("%s Time (ms): %f\n",...
  function release (line 69) | void release(string name){

FILE: golang/uPIMulator/benchmark/HST-S/baselines/gpu/support/verify.h
  function compare_output (line 40) | inline int compare_output(unsigned int *outp, unsigned int *outpCPU, int...
  function HistogramCPU (line 51) | inline void HistogramCPU(unsigned int *histo, unsigned int *data, int si...
  function verify (line 60) | inline void verify(unsigned int *histo, unsigned int *input, int size, i...

FILE: golang/uPIMulator/benchmark/HST-S/dpu/task.c
  function histogram (line 26) | void __attribute__ ((noinline)) histogram(uint32_t* histo, uint32_t bins...
  function main (line 37) | int main(void) {
  function main_kernel1 (line 43) | int main_kernel1() {

FILE: golang/uPIMulator/benchmark/HST-S/host/app.c
  function read_input (line 36) | static void read_input(T* A, const Params p) {
  function histogram_host (line 59) | static void histogram_host(unsigned int* histo, T* A, unsigned int bins,...
  function main (line 77) | int main(int argc, char **argv) {

FILE: golang/uPIMulator/benchmark/HST-S/support/common.h
  type dpu_arguments_t (line 24) | typedef struct {

FILE: golang/uPIMulator/benchmark/HST-S/support/params.h
  type Params (line 6) | typedef struct Params {
  function usage (line 16) | static void usage() {
  function input_params (line 33) | struct Params input_params(int argc, char **argv) {

FILE: golang/uPIMulator/benchmark/HST-S/support/timer.h
  type Timer (line 38) | typedef struct Timer{
  function start (line 46) | void start(Timer *timer, int i, int rep) {
  function stop (line 53) | void stop(Timer *timer, int i) {
  function print (line 59) | void print(Timer *timer, int i, int REP) { printf("Time (ms): %f\t", tim...

FILE: golang/uPIMulator/benchmark/MLP/baselines/cpu/mlp_openmp.c
  function init_data (line 22) | static void init_data(T** A, T* B, unsigned int m_size, unsigned int n_s...
  function mlp_host (line 42) | static void mlp_host(T* C, T** A, T* B, unsigned int m_size, unsigned in...
  function mlp_host_sum (line 60) | static uint64_t mlp_host_sum(uint64_t n_size, uint64_t m_size) {
  type Params (line 69) | typedef struct Params {
  function usage (line 78) | void usage() {
  function input_params (line 92) | struct Params input_params(int argc, char **argv) {
  function main (line 126) | int main(int argc, char **argv) {

FILE: golang/uPIMulator/benchmark/MLP/dpu/task.c
  function gemv (line 18) | void __attribute__ ((noinline)) gemv(T *bufferC, T *bufferA, T *bufferB,...
  function main (line 29) | int main() {

FILE: golang/uPIMulator/benchmark/MLP/host/app.c
  function init_data (line 38) | static void init_data(T** A, T* B, T* B_host, unsigned int m_size, unsig...
  function mlp_host (line 59) | static void mlp_host(T* C, T** A, T* B, unsigned int m_size, unsigned in...
  function main (line 78) | int main(int argc, char **argv) {

FILE: golang/uPIMulator/benchmark/MLP/support/common.h
  type dpu_arguments_t (line 5) | typedef struct {
  type dpu_info_t (line 13) | struct dpu_info_t {
  type dpu_info_t (line 18) | struct dpu_info_t

FILE: golang/uPIMulator/benchmark/MLP/support/params.h
  type Params (line 6) | typedef struct Params {
  function usage (line 13) | static void usage() {
  function input_params (line 28) | struct Params input_params(int argc, char **argv) {

FILE: golang/uPIMulator/benchmark/MLP/support/timer.h
  type Timer (line 38) | typedef struct Timer{
  function start (line 46) | void start(Timer *timer, int i, int rep) {
  function stop (line 53) | void stop(Timer *timer, int i) {
  function print (line 62) | void print(Timer *timer, int i, int REP) { printf("%f\t", timer->time[i]...

FILE: golang/uPIMulator/benchmark/RED/baselines/cpu/app_baseline.cpp
  function read_input (line 50) | static void read_input(T* A, unsigned int nr_elements) {
  function T (line 62) | static T reduction_host(T* A, unsigned int nr_elements) {
  type Params (line 71) | struct Params {
  function usage (line 79) | void usage() {
  function input_params (line 95) | struct Params input_params(int argc, char **argv) {
  function main (line 129) | int main(int argc, char **argv) {

FILE: golang/uPIMulator/benchmark/RED/dpu/task.c
  function T (line 24) | T __attribute__ ((noinline)) reduction(T *input, unsigned int l_size){
  function main (line 39) | int main(void) {
  function main_kernel1 (line 45) | int main_kernel1() {

FILE: golang/uPIMulator/benchmark/RED/host/app.c
  function read_input (line 33) | static void read_input(T* A, unsigned int nr_elements) {
  function T (line 42) | static T reduction_host(T* A, unsigned int nr_elements) {
  function main (line 51) | int main(int argc, char **argv) {

FILE: golang/uPIMulator/benchmark/RED/support/common.h
  type dpu_arguments_t (line 42) | typedef struct {
  type dpu_results_t (line 51) | typedef struct {

FILE: golang/uPIMulator/benchmark/RED/support/cyclecount.h
  type perfcounter_cycles (line 4) | typedef struct perfcounter_cycles{
  function timer_start (line 11) | void timer_start(perfcounter_cycles *cycles){
  function timer_stop (line 15) | uint64_t timer_stop(perfcounter_cycles *cycles){

FILE: golang/uPIMulator/benchmark/RED/support/params.h
  type Params (line 6) | typedef struct Params {
  function usage (line 13) | static void usage() {
  function input_params (line 28) | struct Params input_params(int argc, char **argv) {

FILE: golang/uPIMulator/benchmark/RED/support/timer.h
  type Timer (line 38) | typedef struct Timer{
  function start (line 46) | void start(Timer *timer, int i, int rep) {
  function stop (line 53) | void stop(Timer *timer, int i) {
  function print (line 59) | void print(Timer *timer, int i, int REP) { printf("Time (ms): %f\t", tim...

FILE: golang/uPIMulator/benchmark/SCAN-RSS/baselines/cpu/app_baseline.cpp
  function read_input (line 51) | static void read_input(T* A, unsigned int nr_elements) {
  function scan_host (line 63) | static void scan_host(T* C, T* A, unsigned int nr_elements) {
  type Params (line 71) | struct Params {
  function usage (line 79) | void usage() {
  function input_params (line 95) | struct Params input_params(int argc, char **argv) {
  function main (line 129) | int main(int argc, char **argv) {

FILE: golang/uPIMulator/benchmark/SCAN-RSS/dpu/task.c
  function T (line 24) | T __attribute__ ((noinline)) reduction(T *input){
  function T (line 33) | T __attribute__ ((noinline)) scan(T *output, T *input){
  function T (line 42) | T __attribute__ ((noinline)) handshake_sync(T l_count, unsigned int task...
  function add_ (line 63) | void __attribute__ ((noinline)) add_(T *output, T p_count){
  function main (line 75) | int main(void) {
  function main_kernel1 (line 81) | int main_kernel1() {
  function main_kernel2 (line 134) | int main_kernel2() {

FILE: golang/uPIMulator/benchmark/SCAN-RSS/host/app.c
  function read_input (line 35) | static void read_input(T* A, unsigned int nr_elements, unsigned int nr_e...
  function scan_host (line 47) | static void scan_host(T* C, T* A, unsigned int nr_elements) {
  function main (line 55) | int main(int argc, char **argv) {

FILE: golang/uPIMulator/benchmark/SCAN-RSS/support/common.h
  type dpu_arguments_t (line 44) | typedef struct {
  type dpu_results_t (line 54) | typedef struct {

FILE: golang/uPIMulator/benchmark/SCAN-RSS/support/params.h
  type Params (line 6) | typedef struct Params {
  function usage (line 13) | static void usage() {
  function input_params (line 28) | struct Params input_params(int argc, char **argv) {

FILE: golang/uPIMulator/benchmark/SCAN-RSS/support/timer.h
  type Timer (line 38) | typedef struct Timer{
  function start (line 46) | void start(Timer *timer, int i, int rep) {
  function stop (line 53) | void stop(Timer *timer, int i) {
  function print (line 59) | void print(Timer *timer, int i, int REP) { printf("Time (ms): %f\t", tim...

FILE: golang/uPIMulator/benchmark/SCAN-SSA/dpu/task.c
  function T (line 24) | T __attribute__ ((noinline)) scan(T *output, T *input){
  function T (line 34) | T __attribute__ ((noinline)) handshake_sync(T l_count, unsigned int task...
  function add_ (line 55) | void __attribute__ ((noinline)) add_(T *output, T p_count){
  function main (line 67) | int main(void) {
  function main_kernel1 (line 73) | int main_kernel1() {
  function main_kernel2 (line 136) | int main_kernel2() {

FILE: golang/uPIMulator/benchmark/SCAN-SSA/host/app.c
  function read_input (line 35) | static void read_input(T* A, unsigned int nr_elements, unsigned int nr_e...
  function scan_host (line 47) | static void scan_host(T* C, T* A, unsigned int nr_elements) {
  function main (line 55) | int main(int argc, char **argv) {

FILE: golang/uPIMulator/benchmark/SCAN-SSA/support/common.h
  type dpu_arguments_t (line 44) | typedef struct {
  type dpu_results_t (line 54) | typedef struct {

FILE: golang/uPIMulator/benchmark/SCAN-SSA/support/params.h
  type Params (line 6) | typedef struct Params {
  function usage (line 13) | static void usage() {
  function input_params (line 28) | struct Params input_params(int argc, char **argv) {

FILE: golang/uPIMulator/benchmark/SCAN-SSA/support/timer.h
  type Timer (line 38) | typedef struct Timer{
  function start (line 46) | void start(Timer *timer, int i, int rep) {
  function stop (line 53) | void stop(Timer *timer, int i) {
  function print (line 59) | void print(Timer *timer, int i, int REP) { printf("Time (ms): %f\t", tim...

FILE: golang/uPIMulator/benchmark/SEL/baselines/cpu/app_baseline.c
  function pred (line 23) | bool pred(const uint64_t x){
  function select_host (line 46) | static int select_host(int size, int t) {
  type Params (line 65) | typedef struct Params {
  function usage (line 73) | void usage() {
  function input_params (line 89) | struct Params input_params(int argc, char **argv) {
  function main (line 121) | int main(int argc, char **argv) {

FILE: golang/uPIMulator/benchmark/SEL/baselines/gpu/ds.h
  function __device__ (line 73) | __device__ int dynamic_wg_id(volatile unsigned int *flags, const int num...
  function __device__ (line 82) | __device__ void ds_sync(volatile unsigned int *flags, const int my_s){
  function __device__ (line 98) | __device__ void ds_sync_irregular(volatile unsigned int *flags, const in...
  function __device__ (line 118) | __device__ void ds_sync_irregular_partition(volatile unsigned int *flags...
  function __device__ (line 225) | __device__ inline int lane_id(void) { return threadIdx.x % WARP_SIZE; }
  function __device__ (line 226) | __device__ inline int warp_id(void) { return threadIdx.x / WARP_SIZE; }
  function warp_prefix_sums (line 228) | __device__ unsigned int warp_prefix_sums(bool p){
  function __device__ (line 233) | __device__ int warp_scan(int val, volatile int *s_data){
  function __device__ (line 268) | __device__ int block_binary_prefix_sums(int* count, int x){

FILE: golang/uPIMulator/benchmark/SEL/dpu/task.c
  function select (line 24) | unsigned int __attribute__ ((noinline)) select(T *output, T *input){
  function handshake_sync (line 37) | unsigned int __attribute__ ((noinline)) handshake_sync(unsigned int l_co...
  function main (line 61) | int main(void) {
  function main_kernel1 (line 67) | int main_kernel1() {

FILE: golang/uPIMulator/benchmark/SEL/host/app.c
  function read_input (line 35) | static void read_input(T* A, unsigned int nr_elements, unsigned int nr_e...
  function select_host (line 48) | static unsigned int select_host(T* C, T* A, unsigned int nr_elements) {
  function main (line 60) | int main(int argc, char **argv) {

FILE: golang/uPIMulator/benchmark/SEL/support/common.h
  type dpu_arguments_t (line 5) | typedef struct {
  type dpu_results_t (line 13) | typedef struct {
  function pred (line 32) | bool pred(const T x){

FILE: golang/uPIMulator/benchmark/SEL/support/params.h
  type Params (line 6) | typedef struct Params {
  function usage (line 13) | static void usage() {
  function input_params (line 28) | struct Params input_params(int argc, char **argv) {

FILE: golang/uPIMulator/benchmark/SEL/support/timer.h
  type Timer (line 38) | typedef struct Timer{
  function start (line 46) | void start(Timer *timer, int i, int rep) {
  function stop (line 53) | void stop(Timer *timer, int i) {
  function print (line 59) | void print(Timer *timer, int i, int REP) { printf("Time (ms): %f\t", tim...

FILE: golang/uPIMulator/benchmark/TRNS/baselines/cpu/kernel.cpp
  function run_cpu_threads_100 (line 43) | void run_cpu_threads_100(T *input, std::atomic_int *finished, std::atomi...
  function run_cpu_threads_010 (line 98) | void run_cpu_threads_010(T *input, std::atomic_int* head, int a, int b, ...

FILE: golang/uPIMulator/benchmark/TRNS/baselines/cpu/main.cpp
  type Params (line 48) | struct Params {
    method Params (line 58) | Params(int argc, char **argv) {
    method usage (line 88) | void usage() {
  function read_input (line 111) | void read_input(T *x_vector, const Params &p) {
  function main (line 120) | int main(int argc, char **argv) {

FILE: golang/uPIMulator/benchmark/TRNS/baselines/cpu/support/timer.h
  type Timer (line 43) | struct Timer {
  function stop (line 56) | void stop(string name) {
  function print (line 62) | void print(string name, int REP) { printf("%s Time (ms): %f\n", name.c_s...

FILE: golang/uPIMulator/benchmark/TRNS/baselines/cpu/support/verify.h
  function compare_output (line 39) | inline int compare_output(T *output, T *ref, int dim) {
  function trns_host (line 53) | static void trns_host(T* input, unsigned int A, unsigned int B, unsigned...
  function verify (line 68) | inline void verify(T *input2, T *input, int height, int width, int tile_...

FILE: golang/uPIMulator/benchmark/TRNS/baselines/gpu/main.cpp
  type Params (line 48) | struct Params {
    method Params (line 61) | Params(int argc, char **argv) {
    method usage (line 98) | void usage() {
  function read_input (line 123) | void read_input(T *x_vector, const Params &p) {
  function main (line 132) | int main(int argc, char **argv) {

FILE: golang/uPIMulator/benchmark/TRNS/baselines/gpu/support/cuda-setup.h
  function max_gpu_threads (line 60) | struct CUDASetup {

FILE: golang/uPIMulator/benchmark/TRNS/baselines/gpu/support/timer.h
  type Timer (line 44) | struct Timer {
  function stop (line 59) | void stop(string name) {
  function print (line 67) | void print(string name, unsigned int REP) { printf("%s Time (ms): %f\n",...
  function release (line 69) | void release(string name){

FILE: golang/uPIMulator/benchmark/TRNS/baselines/gpu/support/verify.h
  function compare_output (line 39) | inline int compare_output(T *output, T *ref, int dim) {
  function trns_host (line 53) | static void trns_host(T* input, unsigned int A, unsigned int B, unsigned...
  function verify (line 68) | inline void verify(T *input2, T *input, int height, int width, int tile_...

FILE: golang/uPIMulator/benchmark/TRNS/dpu/task.c
  function main (line 40) | int main(void) {
  function main_kernel1 (line 46) | int main_kernel1() {
  function main_kernel2 (line 77) | int main_kernel2() {
  function get_tile (line 133) | uint32_t __attribute__ ((noinline)) get_tile(){
  function read_tile_step2 (line 141) | void __attribute__ ((noinline)) read_tile_step2(uint32_t A, uint32_t off...
  function write_tile_step2 (line 155) | void __attribute__ ((noinline)) write_tile_step2(uint32_t A, uint32_t of...
  function read_tile_step3 (line 169) | void __attribute__ ((noinline)) read_tile_step3(uint32_t A, uint32_t off...
  function write_tile_step3 (line 173) | void __attribute__ ((noinline)) write_tile_step3(uint32_t A, uint32_t of...
  function _Bool (line 177) | _Bool __attribute__ ((noinline)) get_done(uint32_t done_array_step3, uin...
  function _Bool (line 188) | _Bool __attribute__ ((noinline))get_and_set_done(uint32_t done_array_ste...

FILE: golang/uPIMulator/benchmark/TRNS/host/app.c
  function read_input (line 36) | static void read_input(T* A, unsigned int nr_elements) {
  function trns_host (line 45) | static void trns_host(T* input, unsigned int A, unsigned int B, unsigned...
  function main (line 61) | int main(int argc, char **argv) {

FILE: golang/uPIMulator/benchmark/TRNS/support/common.h
  type dpu_arguments_t (line 18) | typedef struct {

FILE: golang/uPIMulator/benchmark/TRNS/support/params.h
  type Params (line 6) | typedef struct Params {
  function usage (line 16) | static void usage() {
  function input_params (line 34) | struct Params input_params(int argc, char **argv) {

FILE: golang/uPIMulator/benchmark/TRNS/support/timer.h
  type Timer (line 38) | typedef struct Timer{
  function start (line 46) | void start(Timer *timer, int i, int rep) {
  function stop (line 53) | void stop(Timer *timer, int i) {
  function print (line 59) | void print(Timer *timer, int i, int REP) { printf("Time (ms): %f\t", tim...

FILE: golang/uPIMulator/benchmark/TS/dpu/task.c
  function dot_product (line 21) | void __attribute__ ((noinline)) dot_product(DTYPE *vectorA, DTYPE *vecto...
  function main (line 45) | int main(void){
  function main_kernel1 (line 51) | int main_kernel1() {

FILE: golang/uPIMulator/benchmark/TS/host/app.c
  function DTYPE (line 38) | static DTYPE *create_test_file(unsigned int ts_elements, unsigned int qu...
  function streamp (line 55) | static void streamp(DTYPE* tSeries, DTYPE* AMean, DTYPE* ASigma, int Pro...
  function compute_ts_statistics (line 82) | static void compute_ts_statistics(unsigned int timeSeriesLength, unsigne...
  function main (line 121) | int main(int argc, char **argv) {

FILE: golang/uPIMulator/benchmark/TS/support/common.h
  type dpu_arguments_t (line 17) | typedef struct  {
  type dpu_result_t (line 30) | typedef struct  {

FILE: golang/uPIMulator/benchmark/TS/support/params.h
  type Params (line 7) | typedef struct Params {
  function usage (line 14) | void usage() {
  function input_params (line 29) | struct Params input_params(int argc, char **argv) {

FILE: golang/uPIMulator/benchmark/TS/support/timer.h
  type Timer (line 38) | typedef struct Timer{
  function start (line 46) | void start(Timer *timer, int i, int rep) {
  function stop (line 53) | void stop(Timer *timer, int i) {
  function print (line 59) | void print(Timer *timer, int i, int REP) { printf("%f\t", timer->time[i]...

FILE: golang/uPIMulator/benchmark/UNI/baselines/cpu/app_baseline.c
  function T (line 23) | static T *create_test_file(unsigned int nr_elements) {
  function unique_host (line 43) | static int unique_host(int size, int t) {
  type Params (line 63) | typedef struct Params {
  function usage (line 70) | void usage() {
  function input_params (line 85) | struct Params input_params(int argc, char **argv) {
  function main (line 115) | int main(int argc, char **argv) {

FILE: golang/uPIMulator/benchmark/UNI/baselines/gpu/ds.h
  function __device__ (line 73) | __device__ int dynamic_wg_id(volatile unsigned int *flags, const int num...
  function __device__ (line 82) | __device__ void ds_sync(volatile unsigned int *flags, const int my_s){
  function __device__ (line 98) | __device__ void ds_sync_irregular(volatile unsigned int *flags, const in...
  function __device__ (line 118) | __device__ void ds_sync_irregular_partition(volatile unsigned int *flags...
  function __device__ (line 225) | __device__ inline int lane_id(void) { return threadIdx.x % WARP_SIZE; }
  function __device__ (line 226) | __device__ inline int warp_id(void) { return threadIdx.x / WARP_SIZE; }
  function warp_prefix_sums (line 228) | __device__ unsigned int warp_prefix_sums(bool p){
  function __device__ (line 233) | __device__ int warp_scan(int val, volatile int *s_data){
  function __device__ (line 268) | __device__ int block_binary_prefix_sums(int* count, int x){

FILE: golang/uPIMulator/benchmark/UNI/dpu/task.c
  function unique (line 27) | unsigned int __attribute__ ((noinline)) unique(T *output, T *input){
  function uint3 (line 42) | uint3 __attribute__ ((noinline)) handshake_sync(T *output, unsigned int ...
  function main (line 74) | int main(void) {
  function main_kernel1 (line 80) | int main_kernel1() {

FILE: golang/uPIMulator/benchmark/UNI/host/app.c
  function read_input (line 35) | static void read_input(T* A, unsigned int nr_elements, unsigned int nr_e...
  function unique_host (line 48) | static unsigned int unique_host(T* C, T* A, unsigned int nr_elements) {
  function main (line 62) | int main(int argc, char **argv) {

FILE: golang/uPIMulator/benchmark/UNI/support/common.h
  type dpu_arguments_t (line 9) | typedef struct {
  type dpu_results_t (line 17) | typedef struct {
  type uint3 (line 33) | typedef struct{unsigned int x; unsigned int y; unsigned int z;} uint3;

FILE: golang/uPIMulator/benchmark/UNI/support/params.h
  type Params (line 6) | typedef struct Params {
  function usage (line 13) | static void usage() {
  function input_params (line 28) | struct Params input_params(int argc, char **argv) {

FILE: golang/uPIMulator/benchmark/UNI/support/timer.h
  type Timer (line 38) | typedef struct Timer{
  function start (line 46) | void start(Timer *timer, int i, int rep) {
  function stop (line 53) | void stop(Timer *timer, int i) {
  function print (line 59) | void print(Timer *timer, int i, int REP) { printf("Time (ms): %f\t", tim...

FILE: golang/uPIMulator/benchmark/VA/baselines/cpu/app_baseline.c
  function vector_addition_host (line 45) | static void vector_addition_host(unsigned int nr_elements, int t) {
  type Params (line 54) | typedef struct Params {
  function usage (line 61) | void usage() {
  function input_params (line 76) | struct Params input_params(int argc, char **argv) {
  function main (line 108) | int main(int argc, char **argv) {

FILE: golang/uPIMulator/benchmark/VA/dpu/task.c
  function vector_addition (line 18) | void __attribute__ ((noinline)) vector_addition(T *bufferB, T *bufferA, ...
  function main (line 31) | int main(void) {
  function main_kernel1 (line 37) | int main_kernel1() {

FILE: golang/uPIMulator/benchmark/VA/host/app.c
  function read_input (line 36) | static void read_input(T* A, T* B, unsigned int nr_elements) {
  function vector_addition_host (line 46) | static void vector_addition_host(T* C, T* A, T* B, unsigned int nr_eleme...
  function main (line 53) | int main(int argc, char **argv) {

FILE: golang/uPIMulator/benchmark/VA/support/common.h
  type dpu_arguments_t (line 5) | typedef struct {

FILE: golang/uPIMulator/benchmark/VA/support/params.h
  type Params (line 6) | typedef struct Params {
  function usage (line 13) | static void usage() {
  function input_params (line 28) | struct Params input_params(int argc, char **argv) {

FILE: golang/uPIMulator/benchmark/VA/support/timer.h
  type Timer (line 38) | typedef struct Timer{
  function start (line 46) | void start(Timer *timer, int i, int rep) {
  function stop (line 53) | void stop(Timer *timer, int i) {
  function print (line 59) | void print(Timer *timer, int i, int REP) { printf("Time (ms): %f\t", tim...

FILE: golang/uPIMulator/sdk/misc/accessMramFromDpu.c
  function __bootstrap (line 6) | void __attribute__((naked, used, section(".text.__bootstrap"))) __bootst...

FILE: golang/uPIMulator/sdk/misc/coreDump.c
  function __bootstrap (line 21) | void __attribute__((naked, used, section(".text.__bootstrap"))) __bootst...

FILE: golang/uPIMulator/sdk/misc/crt0.c
  function __bootstrap (line 32) | void __attribute__((naked, used, section(".text.__bootstrap"), no_instru...

FILE: golang/uPIMulator/sdk/misc/internalStateReset.c
  function __bootstrap (line 13) | void __attribute__((naked, used, section(".text.__bootstrap"))) __bootst...

FILE: golang/uPIMulator/sdk/misc/restoreRegisters.c
  function __bootstrap (line 13) | void __attribute__((naked, used, section(".text.__bootstrap"))) __bootst...

FILE: golang/uPIMulator/sdk/stdlib/abort.c
  function abort (line 8) | void

FILE: golang/uPIMulator/sdk/stdlib/atoi.c
  function atoi (line 10) | int

FILE: golang/uPIMulator/sdk/stdlib/atol.c
  function atol (line 10) | long

FILE: golang/uPIMulator/sdk/stdlib/ctype.h
  function isdigit (line 20) | static inline int
  function islower (line 32) | static inline int islower(c) { return (c >= 'a') && (c <= 'z'); }
  function isupper (line 40) | static inline int isupper(c) { return (c >= 'A') && (c <= 'Z'); }
  function isalpha (line 48) | static inline int
  function isalnum (line 60) | static inline int
  function iscntrl (line 72) | static inline int
  function isprint (line 84) | static inline int
  function isgraph (line 96) | static inline int
  function ispunct (line 108) | static inline int
  function isspace (line 120) | static inline int
  function isxdigit (line 132) | static inline int
  function isblank (line 144) | static inline int
  function tolower (line 156) | static inline int
  function toupper (line 168) | static inline int

FILE: golang/uPIMulator/sdk/stdlib/exit.c
  function exit (line 10) | void

FILE: golang/uPIMulator/sdk/stdlib/inttypes.h
  type imaxdiv_t (line 16) | typedef struct {
  function intmax_t (line 21) | static inline intmax_t
  function imaxdiv_t (line 27) | static inline imaxdiv_t

FILE: golang/uPIMulator/sdk/stdlib/memcmp.c
  function memcmp (line 8) | int

FILE: golang/uPIMulator/sdk/stdlib/memcpy.c
  function __mram_ptr (line 76) | __attribute__((used)) __mram_ptr void *
  function __mram_ptr (line 162) | __attribute__((used)) __mram_ptr void *

FILE: golang/uPIMulator/sdk/stdlib/memmove.c
  function __mram_ptr (line 67) | __mram_ptr void *

FILE: golang/uPIMulator/sdk/stdlib/memset.c
  type memset_wram_t (line 21) | typedef uint32_t memset_wram_t;
  function __mram_ptr (line 85) | __attribute__((used)) __mram_ptr void *
  function __mram_ptr (line 100) | __attribute__((used)) __mram_ptr void *

FILE: golang/uPIMulator/sdk/stdlib/stdarg.h
  type __builtin_va_list (line 30) | typedef __builtin_va_list va_list;
  type __builtin_va_list (line 48) | typedef __builtin_va_list __gnuc_va_list;

FILE: golang/uPIMulator/sdk/stdlib/stddef.h
  type wchar_t (line 36) | typedef unsigned int wchar_t;

FILE: golang/uPIMulator/sdk/stdlib/stdint.h
  type int_least8_t (line 62) | typedef signed char int_least8_t;
  type int_least16_t (line 66) | typedef short int int_least16_t;
  type int_least32_t (line 70) | typedef int int_least32_t;
  type int_least64_t (line 74) | typedef long int int_least64_t;
  type uint_least8_t (line 81) | typedef unsigned char uint_least8_t;
  type uint_least16_t (line 85) | typedef unsigned short int uint_least16_t;
  type uint_least32_t (line 89) | typedef unsigned int uint_least32_t;
  type uint_least64_t (line 93) | typedef unsigned long int uint_least64_t;
  type int_fast8_t (line 102) | typedef signed char int_fast8_t;
  type int_fast16_t (line 106) | typedef int int_fast16_t;
  type int_fast32_t (line 110) | typedef int int_fast32_t;
  type int_fast64_t (line 114) | typedef long int int_fast64_t;
  type uint_fast8_t (line 121) | typedef unsigned char uint_fast8_t;
  type uint_fast16_t (line 125) | typedef unsigned int uint_fast16_t;
  type uint_fast32_t (line 129) | typedef unsigned int uint_fast32_t;
  type uint_fast64_t (line 133) | typedef unsigned long int uint_fast64_t;
  type intmax_t (line 151) | typedef long long int intmax_t;
  type uintmax_t (line 155) | typedef unsigned long long int uintmax_t;

FILE: golang/uPIMulator/sdk/stdlib/stdio.c
  function __dma_aligned (line 28) | __lower_data(__STR(__STDOUT_BUFFER_STATE)) __dma_aligned struct {
  function __transfer_cache_to_mram (line 45) | __attribute__((noinline)) static void
  function __write_byte_and_flush_if_needed (line 62) | __attribute__((noinline)) static void
  function __finalized_print_sequence (line 72) | __attribute__((noinline)) static void
  function __open_print_sequence (line 83) | __attribute__((noinline)) static void
  function __close_print_sequence (line 94) | __attribute__((noinline)) static void
  function printf (line 100) | void
  function puts (line 258) | void
  function putchar (line 278) | void

FILE: golang/uPIMulator/sdk/stdlib/stdlib.h
  function abs (line 55) | static inline int
  function labs (line 64) | static inline long int
  function llabs (line 73) | static inline long long int
  type div_t (line 79) | typedef struct {
  type ldiv_t (line 84) | typedef struct {
  type lldiv_t (line 89) | typedef struct {
  function div_t (line 94) | static inline div_t
  function ldiv_t (line 101) | static inline ldiv_t
  function lldiv_t (line 108) | static inline lldiv_t

FILE: golang/uPIMulator/sdk/stdlib/strcmp.c
  function strcmp (line 8) | int

FILE: golang/uPIMulator/sdk/stdlib/strcspn.c
  function strcspn (line 16) | size_t

FILE: golang/uPIMulator/sdk/stdlib/strlen.c
  function strlen (line 8) | size_t

FILE: golang/uPIMulator/sdk/stdlib/strncmp.c
  function strncmp (line 8) | int

FILE: golang/uPIMulator/sdk/stdlib/strnlen.c
  function strnlen (line 8) | size_t

FILE: golang/uPIMulator/sdk/stdlib/strspn.c
  function strspn (line 17) | size_t

FILE: golang/uPIMulator/sdk/syslib/absvdi2.c
  function COMPILER_RT_ABI (line 21) | COMPILER_RT_ABI di_int

FILE: golang/uPIMulator/sdk/syslib/absvsi2.c
  function COMPILER_RT_ABI (line 21) | COMPILER_RT_ABI si_int

FILE: golang/uPIMulator/sdk/syslib/adddf3.c
  function COMPILER_RT_ABI (line 18) | COMPILER_RT_ABI double
  function AEABI_RTABI (line 25) | AEABI_RTABI double

FILE: golang/uPIMulator/sdk/syslib/addsf3.c
  function COMPILER_RT_ABI (line 18) | COMPILER_RT_ABI float
  function AEABI_RTABI (line 25) | AEABI_RTABI float

FILE: golang/uPIMulator/sdk/syslib/addvdi3.c
  function COMPILER_RT_ABI (line 21) | COMPILER_RT_ABI di_int

FILE: golang/uPIMulator/sdk/syslib/addvsi3.c
  function COMPILER_RT_ABI (line 21) | COMPILER_RT_ABI si_int

FILE: golang/uPIMulator/sdk/syslib/ashldi3.c
  function COMPILER_RT_ABI (line 21) | COMPILER_RT_ABI di_int
  function AEABI_RTABI (line 43) | AEABI_RTABI di_int

FILE: golang/uPIMulator/sdk/syslib/ashrdi3.c
  function COMPILER_RT_ABI (line 21) | COMPILER_RT_ABI di_int
  function AEABI_RTABI (line 44) | AEABI_RTABI di_int

FILE: golang/uPIMulator/sdk/syslib/barrier.c
  function barrier_wait (line 11) | void

FILE: golang/uPIMulator/sdk/syslib/barrier.h
  type barrier_t (line 33) | typedef struct barrier_t {

FILE: golang/uPIMulator/sdk/syslib/bswapdi2.c
  function COMPILER_RT_ABI (line 17) | COMPILER_RT_ABI uint64_t

FILE: golang/uPIMulator/sdk/syslib/bswapsi2.c
  function COMPILER_RT_ABI (line 17) | COMPILER_RT_ABI uint32_t

FILE: golang/uPIMulator/sdk/syslib/buddy_alloc.c
  function next_power_of_2 (line 30) | static inline unsigned int
  type _buddy_search_context_t (line 240) | struct _buddy_search_context_t {
  function buddy_search_for_pointer (line 247) | int
  function buddy_free_fusion_of_blocks (line 327) | static void
  function safe_buddy_free (line 390) | void
  function buddy_free (line 414) | void __noinline
  function buddy_init (line 437) | void __noinline
  function buddy_reset (line 462) | void __noinline

FILE: golang/uPIMulator/sdk/syslib/buddy_realloc.c
  type _buddy_search_context_t (line 18) | struct _buddy_search_context_t {
  function buddy_sizeofblock (line 32) | static int

FILE: golang/uPIMulator/sdk/syslib/clzdi2.c
  function COMPILER_RT_ABI (line 21) | COMPILER_RT_ABI si_int

FILE: golang/uPIMulator/sdk/syslib/clzsi2.c
  function COMPILER_RT_ABI (line 21) | COMPILER_RT_ABI si_int

FILE: golang/uPIMulator/sdk/syslib/cmpdi2.c
  function COMPILER_RT_ABI (line 22) | COMPILER_RT_ABI si_int
  function COMPILER_RT_ABI (line 45) | COMPILER_RT_ABI si_int

FILE: golang/uPIMulator/sdk/syslib/comparedf2.c
  type LE_RESULT (line 43) | enum LE_RESULT { LE_LESS = -1, LE_EQUAL = 0, LE_GREATER = 1, LE_UNORDERE...
  function LE_RESULT (line 45) | LE_RESULT
  type GE_RESULT (line 92) | enum GE_RESULT {
  function GE_RESULT (line 99) | GE_RESULT
  function COMPILER_RT_ABI (line 129) | COMPILER_RT_ABI int
  function LE_RESULT (line 139) | LE_RESULT
  function LE_RESULT (line 145) | LE_RESULT
  function LE_RESULT (line 151) | LE_RESULT
  function GE_RESULT (line 157) | GE_RESULT
  function AEABI_RTABI (line 164) | AEABI_RTABI int

FILE: golang/uPIMulator/sdk/syslib/comparesf2.c
  type LE_RESULT (line 43) | enum LE_RESULT { LE_LESS = -1, LE_EQUAL = 0, LE_GREATER = 1, LE_UNORDERE...
  function LE_RESULT (line 45) | LE_RESULT
  type GE_RESULT (line 92) | enum GE_RESULT {
  function GE_RESULT (line 99) | GE_RESULT
  function COMPILER_RT_ABI (line 129) | COMPILER_RT_ABI int
  function LE_RESULT (line 139) | LE_RESULT
  function LE_RESULT (line 145) | LE_RESULT
  function LE_RESULT (line 151) | LE_RESULT
  function GE_RESULT (line 157) | GE_RESULT
  function AEABI_RTABI (line 164) | AEABI_RTABI int

FILE: golang/uPIMulator/sdk/syslib/ctzdi2.c
  function COMPILER_RT_ABI (line 21) | COMPILER_RT_ABI si_int

FILE: golang/uPIMulator/sdk/syslib/ctzsi2.c
  function COMPILER_RT_ABI (line 21) | COMPILER_RT_ABI si_int

FILE: golang/uPIMulator/sdk/syslib/defs.c
  function check_stack (line 10) | int

FILE: golang/uPIMulator/sdk/syslib/defs.h
  function sysname_t (line 32) | static inline sysname_t
  function __ATTRIBUTE_NO_RETURN__ (line 43) | __ATTRIBUTE_NO_RETURN__ static inline void

FILE: golang/uPIMulator/sdk/syslib/div32.c
  function __udiv32 (line 8) | void __attribute__((naked, noinline, no_instrument_function)) __udiv32(v...
  function __div32 (line 58) | void __attribute__((naked, noinline, no_instrument_function)) __div32(void)

FILE: golang/uPIMulator/sdk/syslib/divdf3.c
  function COMPILER_RT_ABI (line 22) | COMPILER_RT_ABI fp_t
  function AEABI_RTABI (line 197) | AEABI_RTABI fp_t

FILE: golang/uPIMulator/sdk/syslib/divdi3.c
  function __divdi3 (line 15) | int64_t

FILE: golang/uPIMulator/sdk/syslib/divmodsi4.c
  function COMPILER_RT_ABI (line 22) | COMPILER_RT_ABI si_int

FILE: golang/uPIMulator/sdk/syslib/divsf3.c
  function COMPILER_RT_ABI (line 22) | COMPILER_RT_ABI fp_t
  function AEABI_RTABI (line 181) | AEABI_RTABI fp_t

FILE: golang/uPIMulator/sdk/syslib/divsi3.c
  function COMPILER_RT_ABI (line 19) | COMPILER_RT_ABI si_int
  function AEABI_RTABI (line 38) | AEABI_RTABI si_int

FILE: golang/uPIMulator/sdk/syslib/dpuruntime.h
  type thread_stack_t (line 24) | typedef struct {

FILE: golang/uPIMulator/sdk/syslib/extendhfsf2.c
  function __extendhfsf2 (line 17) | float
  function COMPILER_RT_ABI (line 23) | COMPILER_RT_ABI float
  function AEABI_RTABI (line 30) | AEABI_RTABI float

FILE: golang/uPIMulator/sdk/syslib/extendsfdf2.c
  function COMPILER_RT_ABI (line 15) | COMPILER_RT_ABI double
  function AEABI_RTABI (line 22) | AEABI_RTABI double

FILE: golang/uPIMulator/sdk/syslib/ffsdi2.c
  function COMPILER_RT_ABI (line 21) | COMPILER_RT_ABI si_int

FILE: golang/uPIMulator/sdk/syslib/ffssi2.c
  function COMPILER_RT_ABI (line 21) | COMPILER_RT_ABI si_int

FILE: golang/uPIMulator/sdk/syslib/ffsti2.c
  function COMPILER_RT_ABI (line 23) | COMPILER_RT_ABI si_int

FILE: golang/uPIMulator/sdk/syslib/fixdfdi.c
  function COMPILER_RT_ABI (line 22) | COMPILER_RT_ABI di_int
  type di_int (line 37) | typedef di_int fixint_t;
  type du_int (line 38) | typedef du_int fixuint_t;
  function COMPILER_RT_ABI (line 41) | COMPILER_RT_ABI di_int

FILE: golang/uPIMulator/sdk/syslib/fixdfsi.c
  type si_int (line 13) | typedef si_int fixint_t;
  type su_int (line 14) | typedef su_int fixuint_t;
  function COMPILER_RT_ABI (line 17) | COMPILER_RT_ABI si_int
  function AEABI_RTABI (line 24) | AEABI_RTABI si_int

FILE: golang/uPIMulator/sdk/syslib/fixsfdi.c
  function COMPILER_RT_ABI (line 22) | COMPILER_RT_ABI di_int
  type di_int (line 37) | typedef di_int fixint_t;
  type du_int (line 38) | typedef du_int fixuint_t;
  function COMPILER_RT_ABI (line 41) | COMPILER_RT_ABI di_int

FILE: golang/uPIMulator/sdk/syslib/fixsfsi.c
  type si_int (line 13) | typedef si_int fixint_t;
  type su_int (line 14) | typedef su_int fixuint_t;
  function COMPILER_RT_ABI (line 17) | COMPILER_RT_ABI si_int
  function AEABI_RTABI (line 24) | AEABI_RTABI si_int

FILE: golang/uPIMulator/sdk/syslib/fixunsdfdi.c
  function COMPILER_RT_ABI (line 19) | COMPILER_RT_ABI du_int
  type du_int (line 35) | typedef du_int fixuint_t;
  function COMPILER_RT_ABI (line 38) | COMPILER_RT_ABI du_int

FILE: golang/uPIMulator/sdk/syslib/fixunsdfsi.c
  type su_int (line 13) | typedef su_int fixuint_t;
  function COMPILER_RT_ABI (line 16) | COMPILER_RT_ABI su_int
  function AEABI_RTABI (line 23) | AEABI_RTABI su_int

FILE: golang/uPIMulator/sdk/syslib/fixunssfdi.c
  function COMPILER_RT_ABI (line 19) | COMPILER_RT_ABI du_int
  type du_int (line 36) | typedef du_int fixuint_t;
  function COMPILER_RT_ABI (line 39) | COMPILER_RT_ABI du_int

FILE: golang/uPIMulator/sdk/syslib/fixunssfsi.c
  type su_int (line 17) | typedef su_int fixuint_t;
  function COMPILER_RT_ABI (line 20) | COMPILER_RT_ABI su_int
  function AEABI_RTABI (line 27) | AEABI_RTABI su_int

FILE: golang/uPIMulator/sdk/syslib/floatdidf.c
  function COMPILER_RT_ABI (line 30) | COMPILER_RT_ABI double
  function COMPILER_RT_ABI (line 53) | COMPILER_RT_ABI double
  function AEABI_RTABI (line 105) | AEABI_RTABI double

FILE: golang/uPIMulator/sdk/syslib/floatdisf.c
  function COMPILER_RT_ABI (line 25) | COMPILER_RT_ABI float
  function AEABI_RTABI (line 75) | AEABI_RTABI float

FILE: golang/uPIMulator/sdk/syslib/floatsidf.c
  function COMPILER_RT_ABI (line 21) | COMPILER_RT_ABI fp_t
  function AEABI_RTABI (line 55) | AEABI_RTABI fp_t

FILE: golang/uPIMulator/sdk/syslib/floatsisf.c
  function COMPILER_RT_ABI (line 21) | COMPILER_RT_ABI fp_t
  function AEABI_RTABI (line 63) | AEABI_RTABI fp_t

FILE: golang/uPIMulator/sdk/syslib/floatundidf.c
  function COMPILER_RT_ABI (line 30) | COMPILER_RT_ABI double
  function COMPILER_RT_ABI (line 58) | COMPILER_RT_ABI double
  function AEABI_RTABI (line 107) | AEABI_RTABI double

FILE: golang/uPIMulator/sdk/syslib/floatundisf.c
  function COMPILER_RT_ABI (line 25) | COMPILER_RT_ABI float
  function AEABI_RTABI (line 72) | AEABI_RTABI float

FILE: golang/uPIMulator/sdk/syslib/floatunsidf.c
  function COMPILER_RT_ABI (line 21) | COMPILER_RT_ABI fp_t
  function AEABI_RTABI (line 45) | AEABI_RTABI fp_t

FILE: golang/uPIMulator/sdk/syslib/floatunsisf.c
  function COMPILER_RT_ABI (line 21) | COMPILER_RT_ABI fp_t
  function AEABI_RTABI (line 55) | AEABI_RTABI fp_t

FILE: golang/uPIMulator/sdk/syslib/fp_extend.h
  type src_t (line 20) | typedef float src_t;
  type src_rep_t (line 21) | typedef uint32_t src_rep_t;
  type src_t (line 27) | typedef double src_t;
  type src_rep_t (line 28) | typedef uint64_t src_rep_t;
  function src_rep_t_clz (line 31) | static __inline int
  type src_t (line 45) | typedef uint16_t src_t;
  type src_rep_t (line 46) | typedef uint16_t src_rep_t;
  type dst_t (line 56) | typedef float dst_t;
  type dst_rep_t (line 57) | typedef uint32_t dst_rep_t;
  type dst_t (line 62) | typedef double dst_t;
  type dst_rep_t (line 63) | typedef uint64_t dst_rep_t;
  type dst_t (line 68) | typedef long double dst_t;
  type __uint128_t (line 69) | typedef __uint128_t dst_rep_t;
  function src_rep_t (line 80) | static __inline src_rep_t
  function dst_t (line 90) | static __inline dst_t

FILE: golang/uPIMulator/sdk/syslib/fp_lib.h
  type rep_t (line 43) | typedef uint32_t rep_t;
  type srep_t (line 44) | typedef int32_t srep_t;
  type fp_t (line 45) | typedef float fp_t;
  function rep_clz (line 49) | static __inline int
  function wideMultiply (line 56) | static __inline void
  type rep_t (line 68) | typedef uint64_t rep_t;
  type srep_t (line 69) | typedef int64_t srep_t;
  type fp_t (line 70) | typedef double fp_t;
  function rep_clz (line 74) | static __inline int
  function wideMultiply (line 93) | static __inline void
  type __uint128_t (line 117) | typedef __uint128_t rep_t;
  type __int128_t (line 118) | typedef __int128_t srep_t;
  type fp_t (line 119) | typedef long double fp_t;
  function rep_clz (line 125) | static __inline int
  function wideMultiply (line 165) | static __inline void
  function rep_t (line 228) | static __inline rep_t
  function fp_t (line 238) | static __inline fp_t
  function normalize (line 248) | static __inline int
  function wideLeftShift (line 256) | static __inline void
  function wideRightShiftWithSticky (line 263) | static __inline void

FILE: golang/uPIMulator/sdk/syslib/fp_trunc.h
  type src_t (line 20) | typedef float src_t;
  type src_rep_t (line 21) | typedef uint32_t src_rep_t;
  type src_t (line 26) | typedef double src_t;
  type src_rep_t (line 27) | typedef uint64_t src_rep_t;
  type src_t (line 32) | typedef long double src_t;
  type __uint128_t (line 33) | typedef __uint128_t src_rep_t;
  type dst_t (line 42) | typedef double dst_t;
  type dst_rep_t (line 43) | typedef uint64_t dst_rep_t;
  type dst_t (line 48) | typedef float dst_t;
  type dst_rep_t (line 49) | typedef uint32_t dst_rep_t;
  type dst_t (line 54) | typedef uint16_t dst_t;
  type dst_rep_t (line 55) | typedef uint16_t dst_rep_t;
  function src_rep_t (line 66) | static __inline src_rep_t
  function dst_t (line 76) | static __inline dst_t

FILE: golang/uPIMulator/sdk/syslib/fsb_allocator.c
  function fsb_allocator_t (line 16) | fsb_allocator_t __noinline
  function fsb_free (line 68) | void __noinline

FILE: golang/uPIMulator/sdk/syslib/handshake.c
  function handshake_notify (line 19) | void
  function handshake_wait_for (line 38) | int

FILE: golang/uPIMulator/sdk/syslib/int_lib.h
  function __builtin_ctz (line 104) | uint32_t __inline __builtin_ctz(uint32_t value)
  function __builtin_clz (line 112) | uint32_t __inline __builtin_clz(uint32_t value)
  function __builtin_clzll (line 121) | uint32_t __inline __builtin_clzll(uint64_t value)
  function __builtin_clzll (line 129) | uint32_t __inline __builtin_clzll(uint64_t value)

FILE: golang/uPIMulator/sdk/syslib/int_types.h
  type si_int (line 27) | typedef int si_int;
  type su_int (line 28) | typedef unsigned su_int;
  type di_int (line 30) | typedef long long di_int;
  type du_int (line 31) | typedef unsigned long long du_int;
  type dwords (line 33) | typedef union {
  type udwords (line 46) | typedef union {
  type ti_int (line 64) | typedef int ti_int __attribute__((mode(TI)));
  type tu_int (line 65) | typedef unsigned tu_int __attribute__((mode(TI)));
  type twords (line 67) | typedef union {
  type utwords (line 80) | typedef union {
  function ti_int (line 93) | static __inline ti_int
  function tu_int (line 102) | static __inline tu_int
  type float_bits (line 113) | typedef union {
  type double_bits (line 118) | typedef union {
  type uqwords (line 123) | typedef struct {
  type long_double_bits (line 133) | typedef union {
  type _Complex (line 139) | typedef float _Complex Fcomplex;
  type _Complex (line 140) | typedef double _Complex Dcomplex;
  type _Complex (line 141) | typedef long double _Complex Lcomplex;
  type Fcomplex (line 146) | typedef struct {
  type Dcomplex (line 150) | typedef struct {
  type Lcomplex (line 154) | typedef struct {

FILE: golang/uPIMulator/sdk/syslib/int_util.c
  function compilerrt_abort_impl (line 29) | __attribute__((visibility("hidden")))
  function compilerrt_abort_impl (line 44) | __attribute__((weak)) __attribute__((visibility("hidden")))
  function compilerrt_abort_impl (line 63) | void

FILE: golang/uPIMulator/sdk/syslib/listener.c
  function fifo_sys_fetch_info (line 19) | static inline uint32_t
  function fifo_fetch_info (line 26) | static inline uint32_t
  function sysname_t (line 33) | static inline sysname_t
  function sysname_t (line 39) | static inline sysname_t
  function __sys_internal_listener_loop (line 45) | void

FILE: golang/uPIMulator/sdk/syslib/lshrdi3.c
  function COMPILER_RT_ABI (line 21) | COMPILER_RT_ABI di_int
  function AEABI_RTABI (line 43) | AEABI_RTABI di_int

FILE: golang/uPIMulator/sdk/syslib/mcount.c
  function mcount (line 6) | void __attribute__((naked, noinline, no_instrument_function)) mcount(void)
  function ret_mcount (line 12) | void __attribute__((naked, noinline, no_instrument_function)) ret_mcount...

FILE: golang/uPIMulator/sdk/syslib/moddi3.c
  function __moddi3 (line 15) | int64_t

FILE: golang/uPIMulator/sdk/syslib/modsi3.c
  function COMPILER_RT_ABI (line 22) | COMPILER_RT_ABI si_int

FILE: golang/uPIMulator/sdk/syslib/mram.h
  function mram_read (line 32) | static inline void
  function mram_write (line 50) | static inline void

FILE: golang/uPIMulator/sdk/syslib/mul32.c
  function __mulsi3 (line 8) | int __attribute__((noinline)) __mulsi3(int a, int b)

FILE: golang/uPIMulator/sdk/syslib/mul64.c
  function _mul00 (line 33) | static uint16_t
  function _mul01 (line 45) | static uint16_t
  function _mul11 (line 60) | static uint16_t
  function _mul12 (line 72) | static uint16_t
  function __muldi3 (line 131) | uint64_t

FILE: golang/uPIMulator/sdk/syslib/muldc3.c
  function COMPILER_RT_ABI (line 20) | COMPILER_RT_ABI Dcomplex

FILE: golang/uPIMulator/sdk/syslib/muldf3.c
  function COMPILER_RT_ABI (line 18) | COMPILER_RT_ABI fp_t
  function AEABI_RTABI (line 25) | AEABI_RTABI fp_t

FILE: golang/uPIMulator/sdk/syslib/mulodi4.c
  function COMPILER_RT_ABI (line 21) | COMPILER_RT_ABI di_int

FILE: golang/uPIMulator/sdk/syslib/mulosi4.c
  function COMPILER_RT_ABI (line 21) | COMPILER_RT_ABI si_int

FILE: golang/uPIMulator/sdk/syslib/mulsf3.c
  function COMPILER_RT_ABI (line 18) | COMPILER_RT_ABI fp_t
  function AEABI_RTABI (line 25) | AEABI_RTABI fp_t

FILE: golang/uPIMulator/sdk/syslib/mulvdi3.c
  function COMPILER_RT_ABI (line 21) | COMPILER_RT_ABI di_int

FILE: golang/uPIMulator/sdk/syslib/mulvsi3.c
  function COMPILER_RT_ABI (line 21) | COMPILER_RT_ABI si_int

FILE: golang/uPIMulator/sdk/syslib/mutex.h
  function mutex_lock (line 58) | static inline void
  function mutex_trylock (line 70) | static inline bool
  function mutex_unlock (line 83) | static inline void

FILE: golang/uPIMulator/sdk/syslib/negdf2.c
  function COMPILER_RT_ABI (line 17) | COMPILER_RT_ABI fp_t
  function AEABI_RTABI (line 24) | AEABI_RTABI fp_t

FILE: golang/uPIMulator/sdk/syslib/negdi2.c
  function COMPILER_RT_ABI (line 19) | COMPILER_RT_ABI di_int

FILE: golang/uPIMulator/sdk/syslib/negsf2.c
  function COMPILER_RT_ABI (line 17) | COMPILER_RT_ABI fp_t
  function AEABI_RTABI (line 24) | AEABI_RTABI fp_t

FILE: golang/uPIMulator/sdk/syslib/negvdi2.c
  function COMPILER_RT_ABI (line 21) | COMPILER_RT_ABI di_int

FILE: golang/uPIMulator/sdk/syslib/negvsi2.c
  function COMPILER_RT_ABI (line 21) | COMPILER_RT_ABI si_int

FILE: golang/uPIMulator/sdk/syslib/paritydi2.c
  function COMPILER_RT_ABI (line 19) | COMPILER_RT_ABI si_int

FILE: golang/uPIMulator/sdk/syslib/paritysi2.c
  function COMPILER_RT_ABI (line 19) | COMPILER_RT_ABI si_int

FILE: golang/uPIMulator/sdk/syslib/perfcounter.c
  function perfcounter_t (line 11) | perfcounter_t
  function perfcounter_t (line 20) | perfcounter_t

FILE: golang/uPIMulator/sdk/syslib/perfcounter.h
  type perfcounter_t (line 22) | typedef uint64_t perfcounter_t;
  type perfcounter_config_t (line 33) | typedef enum _perfcounter_config_t {

FILE: golang/uPIMulator/sdk/syslib/popcountdi2.c
  function COMPILER_RT_ABI (line 19) | COMPILER_RT_ABI si_int

FILE: golang/uPIMulator/sdk/syslib/popcountsi2.c
  function COMPILER_RT_ABI (line 19) | COMPILER_RT_ABI si_int

FILE: golang/uPIMulator/sdk/syslib/powidf2.c
  function COMPILER_RT_ABI (line 19) | COMPILER_RT_ABI double

FILE: golang/uPIMulator/sdk/syslib/powisf2.c
  function COMPILER_RT_ABI (line 19) | COMPILER_RT_ABI float

FILE: golang/uPIMulator/sdk/syslib/profiling.c
  function profiling_start (line 14) | void __attribute__((no_instrument_function)) profiling_start(dpu_profili...
  function profiling_stop (line 28) | void __attribute__((no_instrument_function)) profiling_stop(dpu_profilin...

FILE: golang/uPIMulator/sdk/syslib/profiling_internals.h
  type dpu_profiling_t (line 30) | typedef struct {

FILE: golang/uPIMulator/sdk/syslib/sem.c
  function sem_take (line 11) | void
  function sem_give (line 40) | void

FILE: golang/uPIMulator/sdk/syslib/sem.h
  type sem_t (line 30) | typedef struct sem_t {

FILE: golang/uPIMulator/sdk/syslib/seqread.h
  type seqreader_buffer_t (line 57) | typedef uintptr_t seqreader_buffer_t;
  type seqreader_t (line 63) | typedef struct {

FILE: golang/uPIMulator/sdk/syslib/subdf3.c
  function COMPILER_RT_ABI (line 19) | COMPILER_RT_ABI fp_t
  function AEABI_RTABI (line 26) | AEABI_RTABI fp_t

FILE: golang/uPIMulator/sdk/syslib/subsf3.c
  function COMPILER_RT_ABI (line 19) | COMPILER_RT_ABI fp_t
  function AEABI_RTABI (line 26) | AEABI_RTABI fp_t

FILE: golang/uPIMulator/sdk/syslib/subvdi3.c
  function COMPILER_RT_ABI (line 21) | COMPILER_RT_ABI di_int

FILE: golang/uPIMulator/sdk/syslib/subvsi3.c
  function COMPILER_RT_ABI (line 21) | COMPILER_RT_ABI si_int

FILE: golang/uPIMulator/sdk/syslib/sysdef.h
  type thread_id_t (line 18) | typedef unsigned int thread_id_t;
  type sysname_t (line 27) | typedef unsigned int sysname_t;

FILE: golang/uPIMulator/sdk/syslib/truncdfhf2.c
  function COMPILER_RT_ABI (line 14) | COMPILER_RT_ABI uint16_t
  function AEABI_RTABI (line 21) | AEABI_RTABI uint16_t

FILE: golang/uPIMulator/sdk/syslib/truncdfsf2.c
  function COMPILER_RT_ABI (line 14) | COMPILER_RT_ABI float
  function AEABI_RTABI (line 21) | AEABI_RTABI float

FILE: golang/uPIMulator/sdk/syslib/truncsfhf2.c
  function __truncsfhf2 (line 16) | uint16_t
  function COMPILER_RT_ABI (line 22) | COMPILER_RT_ABI uint16_t
  function AEABI_RTABI (line 29) | AEABI_RTABI uint16_t

FILE: golang/uPIMulator/sdk/syslib/ucmpdi2.c
  function COMPILER_RT_ABI (line 22) | COMPILER_RT_ABI si_int
  function COMPILER_RT_ABI (line 45) | COMPILER_RT_ABI si_int

FILE: golang/uPIMulator/sdk/syslib/udiv64.c
  function __clz__ (line 12) | static unsigned int
  function __udiv64 (line 18) | uint64_t

FILE: golang/uPIMulator/sdk/syslib/udivdi3.c
  function __udivdi3 (line 15) | uint64_t

FILE: golang/uPIMulator/sdk/syslib/udivmodsi4.c
  function COMPILER_RT_ABI (line 22) | COMPILER_RT_ABI su_int

FILE: golang/uPIMulator/sdk/syslib/udivsi3.c
  function COMPILER_RT_ABI (line 22) | COMPILER_RT_ABI su_int
  function AEABI_RTABI (line 66) | AEABI_RTABI su_int

FILE: golang/uPIMulator/sdk/syslib/umoddi3.c
  function __umoddi3 (line 15) | uint64_t

FILE: golang/uPIMulator/sdk/syslib/umodsi3.c
  function COMPILER_RT_ABI (line 22) | COMPILER_RT_ABI su_int

FILE: golang/uPIMulator/src/abi/encoding/ascii_encoder.go
  type AsciiEncoder (line 3) | type AsciiEncoder struct
    method Init (line 9) | func (this *AsciiEncoder) Init() {
    method Encode (line 244) | func (this *AsciiEncoder) Encode(characters string) *ByteStream {
    method Decode (line 256) | func (this *AsciiEncoder) Decode(byte_stream *ByteStream) string {
    method Unknown (line 267) | func (this *AsciiEncoder) Unknown() string {

FILE: golang/uPIMulator/src/abi/encoding/byte_stream.go
  type ByteStream (line 3) | type ByteStream struct
    method Init (line 7) | func (this *ByteStream) Init() {
    method Size (line 11) | func (this *ByteStream) Size() int64 {
    method Get (line 15) | func (this *ByteStream) Get(pos int) uint8 {
    method Set (line 19) | func (this *ByteStream) Set(pos int, value uint8) {
    method Append (line 23) | func (this *ByteStream) Append(value uint8) {
    method Merge (line 27) | func (this *ByteStream) Merge(byte_stream *ByteStream) {

FILE: golang/uPIMulator/src/abi/word/intermediate.go
  type Immediate (line 7) | type Immediate struct
    method Init (line 13) | func (this *Immediate) Init(representation Representation, width int, ...
    method Representation (line 22) | func (this *Immediate) Representation() Representation {
    method Width (line 26) | func (this *Immediate) Width() int {
    method Bit (line 30) | func (this *Immediate) Bit(pos int) bool {
    method BitSlice (line 34) | func (this *Immediate) BitSlice(begin int, end int) int64 {
    method Value (line 38) | func (this *Immediate) Value() int64 {
    method ToByteStream (line 42) | func (this *Immediate) ToByteStream() *encoding.ByteStream {

FILE: golang/uPIMulator/src/abi/word/word.go
  type Representation (line 8) | type Representation
  constant UNSIGNED (line 11) | UNSIGNED Representation = iota
  constant SIGNED (line 12) | SIGNED
  type Word (line 15) | type Word struct
    method Init (line 19) | func (this *Word) Init(width int) {
    method Width (line 28) | func (this *Word) Width() int {
    method Size (line 32) | func (this *Word) Size() int {
    method SignBit (line 41) | func (this *Word) SignBit() bool {
    method Bit (line 45) | func (this *Word) Bit(pos int) bool {
    method SetBit (line 49) | func (this *Word) SetBit(pos int) {
    method ClearBit (line 53) | func (this *Word) ClearBit(pos int) {
    method BitSlice (line 57) | func (this *Word) BitSlice(representation Representation, begin int, e...
    method SetBitSlice (line 74) | func (this *Word) SetBitSlice(begin int, end int, value int64) {
    method Value (line 84) | func (this *Word) Value(representation Representation) int64 {
    method SetValue (line 88) | func (this *Word) SetValue(value int64) {
    method ToByteStream (line 92) | func (this *Word) ToByteStream() *encoding.ByteStream {
    method FromByteStream (line 107) | func (this *Word) FromByteStream(byte_stream *encoding.ByteStream) {
    method VerifySlice (line 117) | func (this *Word) VerifySlice(begin int, end int) {
    method Pow2 (line 139) | func (this *Word) Pow2(exponent int) int64 {
    method SetPositiveBitSlice (line 147) | func (this *Word) SetPositiveBitSlice(begin int, end int, value int64) {
    method SetNegativeBitSlice (line 172) | func (this *Word) SetNegativeBitSlice(begin int, end int, value int64) {

FILE: golang/uPIMulator/src/assembler/assemblable.go
  type Assemblable (line 8) | type Assemblable interface

FILE: golang/uPIMulator/src/assembler/assembler.go
  type Assembler (line 11) | type Assembler struct
    method Init (line 26) | func (this *Assembler) Init(command_line_parser *misc.CommandLineParse...
    method Assemble (line 62) | func (this *Assembler) Assemble() {
    method AssembleInputDpuHost (line 70) | func (this *Assembler) AssembleInputDpuHost() {
    method AssembleOutputDpuHost (line 97) | func (this *Assembler) AssembleOutputDpuHost() {
    method AssembleInputDpuMramHeapPointerName (line 124) | func (this *Assembler) AssembleInputDpuMramHeapPointerName() {
    method AssembleOutputDpuMramHeapPointerName (line 154) | func (this *Assembler) AssembleOutputDpuMramHeapPointerName() {
    method AssembleNumExecutions (line 184) | func (this *Assembler) AssembleNumExecutions() {

FILE: golang/uPIMulator/src/assembler/prim/bs.go
  type Bs (line 10) | type Bs struct
    method Init (line 24) | func (this *Bs) Init(command_line_parser *misc.CommandLineParser) {
    method InputDpuHost (line 101) | func (this *Bs) InputDpuHost(execution int, dpu_id int) map[string]*en...
    method OutputDpuHost (line 134) | func (this *Bs) OutputDpuHost(execution int, dpu_id int) map[string]*e...
    method InputDpuMramHeapPointerName (line 159) | func (this *Bs) InputDpuMramHeapPointerName(
    method OutputDpuMramHeapPointerName (line 193) | func (this *Bs) OutputDpuMramHeapPointerName(
    method NumExecutions (line 211) | func (this *Bs) NumExecutions() int {
    method Sum (line 215) | func (this *Bs) Sum(s []int64) int64 {
    method Pow2 (line 223) | func (this *Bs) Pow2(exponent int) int {

FILE: golang/uPIMulator/src/assembler/prim/gemv.go
  type Gemv (line 11) | type Gemv struct
    method Init (line 26) | func (this *Gemv) Init(command_line_parser *misc.CommandLineParser) {
    method InputDpuHost (line 94) | func (this *Gemv) InputDpuHost(execution int, dpu_id int) map[string]*...
    method OutputDpuHost (line 132) | func (this *Gemv) OutputDpuHost(execution int, dpu_id int) map[string]...
    method InputDpuMramHeapPointerName (line 144) | func (this *Gemv) InputDpuMramHeapPointerName(
    method OutputDpuMramHeapPointerName (line 181) | func (this *Gemv) OutputDpuMramHeapPointerName(
    method NumExecutions (line 220) | func (this *Gemv) NumExecutions() int {
    method MatMul (line 224) | func (this *Gemv) MatMul(x [][]int64, y []int64) []int64 {

FILE: golang/uPIMulator/src/assembler/prim/hst_l.go
  type HstL (line 12) | type HstL struct
    method Init (line 27) | func (this *HstL) Init(command_line_parser *misc.CommandLineParser) {
    method InputDpuHost (line 104) | func (this *HstL) InputDpuHost(execution int, dpu_id int) map[string]*...
    method OutputDpuHost (line 142) | func (this *HstL) OutputDpuHost(execution int, dpu_id int) map[string]...
    method InputDpuMramHeapPointerName (line 154) | func (this *HstL) InputDpuMramHeapPointerName(
    method OutputDpuMramHeapPointerName (line 182) | func (this *HstL) OutputDpuMramHeapPointerName(
    method NumExecutions (line 207) | func (this *HstL) NumExecutions() int {
    method Pow2 (line 211) | func (this *HstL) Pow2(exponent int) int {

FILE: golang/uPIMulator/src/assembler/prim/hst_s.go
  type HstS (line 12) | type HstS struct
    method Init (line 27) | func (this *HstS) Init(command_line_parser *misc.CommandLineParser) {
    method InputDpuHost (line 104) | func (this *HstS) InputDpuHost(execution int, dpu_id int) map[string]*...
    method OutputDpuHost (line 142) | func (this *HstS) OutputDpuHost(execution int, dpu_id int) map[string]...
    method InputDpuMramHeapPointerName (line 154) | func (this *HstS) InputDpuMramHeapPointerName(
    method OutputDpuMramHeapPointerName (line 182) | func (this *HstS) OutputDpuMramHeapPointerName(
    method NumExecutions (line 207) | func (this *HstS) NumExecutions() int {
    method Pow2 (line 211) | func (this *HstS) Pow2(exponent int) int {

FILE: golang/uPIMulator/src/assembler/prim/mlp.go
  type Mlp (line 10) | type Mlp struct
    method Init (line 26) | func (this *Mlp) Init(command_line_parser *misc.CommandLineParser) {
    method InputDpuHost (line 135) | func (this *Mlp) InputDpuHost(execution int, dpu_id int) map[string]*e...
    method OutputDpuHost (line 173) | func (this *Mlp) OutputDpuHost(execution int, dpu_id int) map[string]*...
    method InputDpuMramHeapPointerName (line 185) | func (this *Mlp) InputDpuMramHeapPointerName(
    method OutputDpuMramHeapPointerName (line 222) | func (this *Mlp) OutputDpuMramHeapPointerName(
    method NumExecutions (line 261) | func (this *Mlp) NumExecutions() int {
    method MatMul (line 265) | func (this *Mlp) MatMul(x [][]int64, y []int64) []int64 {

FILE: golang/uPIMulator/src/assembler/prim/red.go
  type Red (line 12) | type Red struct
    method Init (line 27) | func (this *Red) Init(command_line_parser *misc.CommandLineParser) {
    method InputDpuHost (line 129) | func (this *Red) InputDpuHost(execution int, dpu_id int) map[string]*e...
    method OutputDpuHost (line 162) | func (this *Red) OutputDpuHost(execution int, dpu_id int) map[string]*...
    method InputDpuMramHeapPointerName (line 197) | func (this *Red) InputDpuMramHeapPointerName(
    method OutputDpuMramHeapPointerName (line 224) | func (this *Red) OutputDpuMramHeapPointerName(
    method NumExecutions (line 242) | func (this *Red) NumExecutions() int {
    method Sum (line 246) | func (this *Red) Sum(s []int64) int64 {
    method Pow2 (line 254) | func (this *Red) Pow2(exponent int) int {

FILE: golang/uPIMulator/src/assembler/prim/scan_rss.go
  type ScanRss (line 12) | type ScanRss struct
    method Init (line 27) | func (this *ScanRss) Init(command_line_parser *misc.CommandLineParser) {
    method InputDpuHost (line 129) | func (this *ScanRss) InputDpuHost(execution int, dpu_id int) map[strin...
    method OutputDpuHost (line 162) | func (this *ScanRss) OutputDpuHost(execution int, dpu_id int) map[stri...
    method InputDpuMramHeapPointerName (line 189) | func (this *ScanRss) InputDpuMramHeapPointerName(
    method OutputDpuMramHeapPointerName (line 219) | func (this *ScanRss) OutputDpuMramHeapPointerName(
    method NumExecutions (line 249) | func (this *ScanRss) NumExecutions() int {
    method Sum1D (line 253) | func (this *ScanRss) Sum1D(s []int64) int64 {
    method Sum2D (line 261) | func (this *ScanRss) Sum2D(s [][]int64) int64 {
    method Pow2 (line 271) | func (this *ScanRss) Pow2(exponent int) int {

FILE: golang/uPIMulator/src/assembler/prim/scan_ssa.go
  type ScanSsa (line 12) | type ScanSsa struct
    method Init (line 27) | func (this *ScanSsa) Init(command_line_parser *misc.CommandLineParser) {
    method InputDpuHost (line 129) | func (this *ScanSsa) InputDpuHost(execution int, dpu_id int) map[strin...
    method OutputDpuHost (line 162) | func (this *ScanSsa) OutputDpuHost(execution int, dpu_id int) map[stri...
    method InputDpuMramHeapPointerName (line 189) | func (this *ScanSsa) InputDpuMramHeapPointerName(
    method OutputDpuMramHeapPointerName (line 219) | func (this *ScanSsa) OutputDpuMramHeapPointerName(
    method NumExecutions (line 249) | func (this *ScanSsa) NumExecutions() int {
    method Sum1D (line 253) | func (this *ScanSsa) Sum1D(s []int64) int64 {
    method Sum2D (line 261) | func (this *ScanSsa) Sum2D(s [][]int64) int64 {
    method Pow2 (line 271) | func (this *ScanSsa) Pow2(exponent int) int {

FILE: golang/uPIMulator/src/assembler/prim/sel.go
  type Sel (line 11) | type Sel struct
    method Init (line 25) | func (this *Sel) Init(command_line_parser *misc.CommandLineParser) {
    method InputDpuHost (line 115) | func (this *Sel) InputDpuHost(execution int, dpu_id int) map[string]*e...
    method OutputDpuHost (line 143) | func (this *Sel) OutputDpuHost(execution int, dpu_id int) map[string]*...
    method InputDpuMramHeapPointerName (line 168) | func (this *Sel) InputDpuMramHeapPointerName(
    method OutputDpuMramHeapPointerName (line 195) | func (this *Sel) OutputDpuMramHeapPointerName(
    method NumExecutions (line 224) | func (this *Sel) NumExecutions() int {

FILE: golang/uPIMulator/src/assembler/prim/trns.go
  type Trns (line 11) | type Trns struct
    method Init (line 29) | func (this *Trns) Init(command_line_parser *misc.CommandLineParser) {
    method InputDpuHost (line 99) | func (this *Trns) InputDpuHost(execution int, dpu_id int) map[string]*...
    method OutputDpuHost (line 137) | func (this *Trns) OutputDpuHost(execution int, dpu_id int) map[string]...
    method InputDpuMramHeapPointerName (line 149) | func (this *Trns) InputDpuMramHeapPointerName(
    method OutputDpuMramHeapPointerName (line 185) | func (this *Trns) OutputDpuMramHeapPointerName(
    method NumExecutions (line 214) | func (this *Trns) NumExecutions() int {
    method Transpose (line 218) | func (this *Trns) Transpose(s [][]int64) [][]int64 {

FILE: golang/uPIMulator/src/assembler/prim/ts.go
  type Ts (line 11) | type Ts struct
    method Init (line 39) | func (this *Ts) Init(command_line_parser *misc.CommandLineParser) {
    method InputDpuHost (line 280) | func (this *Ts) InputDpuHost(execution int, dpu_id int) map[string]*en...
    method OutputDpuHost (line 333) | func (this *Ts) OutputDpuHost(execution int, dpu_id int) map[string]*e...
    method InputDpuMramHeapPointerName (line 373) | func (this *Ts) InputDpuMramHeapPointerName(
    method OutputDpuMramHeapPointerName (line 422) | func (this *Ts) OutputDpuMramHeapPointerName(
    method NumExecutions (line 440) | func (this *Ts) NumExecutions() int {
    method DotProduct (line 444) | func (this *Ts) DotProduct(a []int64, a_aux []int64, query []int64, re...

FILE: golang/uPIMulator/src/assembler/prim/uni.go
  type Uni (line 11) | type Uni struct
    method Init (line 27) | func (this *Uni) Init(command_line_parser *misc.CommandLineParser) {
    method InputDpuHost (line 145) | func (this *Uni) InputDpuHost(execution int, dpu_id int) map[string]*e...
    method OutputDpuHost (line 173) | func (this *Uni) OutputDpuHost(execution int, dpu_id int) map[string]*...
    method InputDpuMramHeapPointerName (line 220) | func (this *Uni) InputDpuMramHeapPointerName(
    method OutputDpuMramHeapPointerName (line 247) | func (this *Uni) OutputDpuMramHeapPointerName(
    method NumExecutions (line 272) | func (this *Uni) NumExecutions() int {

FILE: golang/uPIMulator/src/assembler/prim/va.go
  type Va (line 12) | type Va struct
    method Init (line 26) | func (this *Va) Init(command_line_parser *misc.CommandLineParser) {
    method InputDpuHost (line 96) | func (this *Va) InputDpuHost(execution int, dpu_id int) map[string]*en...
    method OutputDpuHost (line 129) | func (this *Va) OutputDpuHost(execution int, dpu_id int) map[string]*e...
    method InputDpuMramHeapPointerName (line 141) | func (this *Va) InputDpuMramHeapPointerName(
    method OutputDpuMramHeapPointerName (line 175) | func (this *Va) OutputDpuMramHeapPointerName(
    method NumExecutions (line 202) | func (this *Va) NumExecutions() int {
    method Pow2 (line 206) | func (this *Va) Pow2(exponent int) int {

FILE: golang/uPIMulator/src/compiler/compiler.go
  type Compiler (line 10) | type Compiler struct
    method Init (line 20) | func (this *Compiler) Init(command_line_parser *misc.CommandLineParser) {
    method Build (line 36) | func (this *Compiler) Build() {
    method Compile (line 48) | func (this *Compiler) Compile() {
    method CompileBenchmark (line 53) | func (this *Compiler) CompileBenchmark() {
    method CompileSdk (line 77) | func (this *Compiler) CompileSdk() {

FILE: golang/uPIMulator/src/core/job.go
  type Job (line 3) | type Job interface

FILE: golang/uPIMulator/src/core/thread_pool.go
  type ThreadPool (line 8) | type ThreadPool struct
    method Init (line 17) | func (this *ThreadPool) Init(num_threads int) {
    method Enque (line 28) | func (this *ThreadPool) Enque(job Job) {
    method Start (line 33) | func (this *ThreadPool) Start() {
    method Dispatch (line 40) | func (this *ThreadPool) Dispatch(job Job) {

FILE: golang/uPIMulator/src/linker/analyze_liveness_job.go
  type AnalyzeLivenessJob (line 9) | type AnalyzeLivenessJob struct
    method Init (line 13) | func (this *AnalyzeLivenessJob) Init(relocatable *kernel.Relocatable) {
    method Execute (line 17) | func (this *AnalyzeLivenessJob) Execute() {

FILE: golang/uPIMulator/src/linker/kernel/directive/ascii_directive.go
  type AsciiDirective (line 7) | type AsciiDirective struct
    method Init (line 11) | func (this *AsciiDirective) Init(characters string) {
    method Characters (line 15) | func (this *AsciiDirective) Characters() string {
    method Size (line 19) | func (this *AsciiDirective) Size() int64 {
    method Encode (line 23) | func (this *AsciiDirective) Encode() *encoding.ByteStream {

FILE: golang/uPIMulator/src/linker/kernel/directive/asciz_directive.go
  type AscizDirective (line 7) | type AscizDirective struct
    method Init (line 11) | func (this *AscizDirective) Init(characters string) {
    method Characters (line 15) | func (this *AscizDirective) Characters() string {
    method Size (line 19) | func (this *AscizDirective) Size() int64 {
    method Encode (line 23) | func (this *AscizDirective) Encode() *encoding.ByteStream {

FILE: golang/uPIMulator/src/linker/kernel/directive/byte_directive.go
  type ByteDirective (line 8) | type ByteDirective struct
    method Init (line 12) | func (this *ByteDirective) Init(value int64) {
    method Size (line 17) | func (this *ByteDirective) Size() int64 {
    method Immediate (line 21) | func (this *ByteDirective) Immediate() *word.Immediate {
    method Encode (line 25) | func (this *ByteDirective) Encode() *encoding.ByteStream {

FILE: golang/uPIMulator/src/linker/kernel/directive/long_directive.go
  type LongDirective (line 8) | type LongDirective struct
    method Init (line 12) | func (this *LongDirective) Init(value int64) {
    method Size (line 17) | func (this *LongDirective) Size() int64 {
    method Immediate (line 21) | func (this *LongDirective) Immediate() *word.Immediate {
    method Encode (line 25) | func (this *LongDirective) Encode() *encoding.ByteStream {

FILE: golang/uPIMulator/src/linker/kernel/directive/quad_directive.go
  type QuadDirective (line 8) | type QuadDirective struct
    method Init (line 12) | func (this *QuadDirective) Init(value int64) {
    method Size (line 17) | func (this *QuadDirective) Size() int64 {
    method Immediate (line 21) | func (this *QuadDirective) Immediate() *word.Immediate {
    method Encode (line 25) | func (this *QuadDirective) Encode() *encoding.ByteStream {

FILE: golang/uPIMulator/src/linker/kernel/directive/short_directive.go
  type ShortDirective (line 8) | type ShortDirective struct
    method Init (line 12) | func (this *ShortDirective) Init(value int64) {
    method Size (line 17) | func (this *ShortDirective) Size() int64 {
    method Immediate (line 21) | func (this *ShortDirective) Immediate() *word.Immediate {
    method Encode (line 25) | func (this *ShortDirective) Encode() *encoding.ByteStream {

FILE: golang/uPIMulator/src/linker/kernel/directive/zero_directive.go
  type ZeroDirective (line 9) | type ZeroDirective struct
    method Init (line 14) | func (this *ZeroDirective) Init(size int64, value int64) {
    method Size (line 26) | func (this *ZeroDirective) Size() int64 {
    method Immediate (line 30) | func (this *ZeroDirective) Immediate() *word.Immediate {
    method Encode (line 34) | func (this *ZeroDirective) Encode() *encoding.ByteStream {

FILE: golang/uPIMulator/src/linker/kernel/encodable.go
  type Encodable (line 7) | type Encodable interface

FILE: golang/uPIMulator/src/linker/kernel/executable.go
  type Executable (line 13) | type Executable struct
    method Init (line 28) | func (this *Executable) Init(name string) {
    method Name (line 39) | func (this *Executable) Name() string {
    method Path (line 43) | func (this *Executable) Path() string {
    method SetPath (line 47) | func (this *Executable) SetPath(path string) {
    method SetBenchmarkRelocatable (line 51) | func (this *Executable) SetBenchmarkRelocatable(relocatable *Relocatab...
    method AddSdkRelocatable (line 57) | func (this *Executable) AddSdkRelocatable(relocatable *Relocatable) {
    method TokenStream (line 64) | func (this *Executable) TokenStream() *lexer.TokenStream {
    method SetTokenStream (line 68) | func (this *Executable) SetTokenStream(token_stream *lexer.TokenStream) {
    method Ast (line 72) | func (this *Executable) Ast() *parser.Ast {
    method SetAst (line 76) | func (this *Executable) SetAst(ast *parser.Ast) {
    method Liveness (line 80) | func (this *Executable) Liveness() *Liveness {
    method DumpAssembly (line 84) | func (this *Executable) DumpAssembly() {
    method DumpAddresses (line 96) | func (this *Executable) DumpAddresses(path string) {
    method DumpAtomic (line 109) | func (this *Executable) DumpAtomic(path string) {
    method DumpIram (line 124) | func (this *Executable) DumpIram(path string) {
    method DumpWram (line 139) | func (this *Executable) DumpWram(path string) {
    method DumpMram (line 154) | func (this *Executable) DumpMram(path string) {
    method Section (line 169) | func (this *Executable) Section(section_name SectionName, name string)...
    method Sections (line 178) | func (this *Executable) Sections(section_name SectionName) map[*Sectio...
    method AddSection (line 188) | func (this *Executable) AddSection(
    method CurSection (line 201) | func (this *Executable) CurSection() *Section {
    method CheckoutSection (line 205) | func (this *Executable) CheckoutSection(section_name SectionName, name...
    method Label (line 214) | func (this *Executable) Label(label_name string) *Label {
    method Addresses (line 231) | func (this *Executable) Addresses() map[string]int64 {
    method AtomicByteStream (line 241) | func (this *Executable) AtomicByteStream() *encoding.ByteStream {
    method IramByteStream (line 260) | func (this *Executable) IramByteStream() *encoding.ByteStream {
    method WramByteStream (line 279) | func (this *Executable) WramByteStream() *encoding.ByteStream {
    method MramByteStream (line 298) | func (this *Executable) MramByteStream() *encoding.ByteStream {
    method UpdateLocalSymbols (line 317) | func (this *Executable) UpdateLocalSymbols(relocatable *Relocatable) {
    method UpdateUnresolvedSymbols (line 325) | func (this *Executable) UpdateUnresolvedSymbols(relocatable *Relocatab...
    method Sort (line 339) | func (this *Executable) Sort(begin_address int64, end_address int64) [...

FILE: golang/uPIMulator/src/linker/kernel/instruction/cc/acquire_cc.go
  type AcquireCc (line 7) | type AcquireCc struct
    method Init (line 11) | func (this *AcquireCc) Init(condition Condition) {
    method Condition (line 26) | func (this *AcquireCc) Condition() Condition {

FILE: golang/uPIMulator/src/linker/kernel/instruction/cc/add_nz_cc.go
  type AddNzCc (line 7) | type AddNzCc struct
    method Init (line 11) | func (this *AddNzCc) Init(condition Condition) {
    method Condition (line 48) | func (this *AddNzCc) Condition() Condition {

FILE: golang/uPIMulator/src/linker/kernel/instruction/cc/boot_cc.go
  type BootCc (line 7) | type BootCc struct
    method Init (line 11) | func (this *BootCc) Init(condition Condition) {
    method Condition (line 33) | func (this *BootCc) Condition() Condition {

FILE: golang/uPIMulator/src/linker/kernel/instruction/cc/cc.go
  type Condition (line 3) | type Condition
  constant TRUE (line 6) | TRUE Condition = iota
  constant FALSE (line 7) | FALSE
  constant Z (line 9) | Z
  constant NZ (line 10) | NZ
  constant E (line 12) | E
  constant O (line 13) | O
  constant PL (line 15) | PL
  constant MI (line 16) | MI
  constant OV (line 18) | OV
  constant NOV (line 19) | NOV
  constant C (line 21) | C
  constant NC (line 22) | NC
  constant SZ (line 24) | SZ
  constant SNZ (line 25) | SNZ
  constant SPL (line 27) | SPL
  constant SMI (line 28) | SMI
  constant SO (line 30) | SO
  constant SE (line 31) | SE
  constant NC5 (line 33) | NC5
  constant NC6 (line 34) | NC6
  constant NC7 (line 35) | NC7
  constant NC8 (line 36) | NC8
  constant NC9 (line 37) | NC9
  constant NC10 (line 38) | NC10
  constant NC11 (line 39) | NC11
  constant NC12 (line 40) | NC12
  constant NC13 (line 41) | NC13
  constant NC14 (line 42) | NC14
  constant MAX (line 44) | MAX
  constant NMAX (line 45) | NMAX
  constant SH32 (line 47) | SH32
  constant NSH32 (line 48) | NSH32
  constant EQ (line 50) | EQ
  constant NEQ (line 51) | NEQ
  constant LTU (line 53) | LTU
  constant LEU (line 54) | LEU
  constant GTU (line 55) | GTU
  constant GEU (line 56) | GEU
  constant LTS (line 58) | LTS
  constant LES (line 59) | LES
  constant GTS (line 60) | GTS
  constant GES (line 61) | GES
  constant XZ (line 63) | XZ
  constant XNZ (line 64) | XNZ
  constant XLEU (line 66) | XLEU
  constant XGTU (line 67) | XGTU
  constant XLES (line 69) | XLES
  constant XGTS (line 70) | XGTS
  constant SMALL (line 72) | SMALL
  constant LARGE (line 73) | LARGE
  type Cc (line 76) | type Cc interface

FILE: golang/uPIMulator/src/linker/kernel/instruction/cc/const_cc_ge0.go
  type ConstCcGe0 (line 7) | type ConstCcGe0 struct
    method Init (line 11) | func (this *ConstCcGe0) Init(condition Condition) {
    method Condition (line 24) | func (this *ConstCcGe0) Condition() Condition {

FILE: golang/uPIMulator/src/linker/kernel/instruction/cc/const_cc_geu.go
  type ConstCcGeu (line 7) | type ConstCcGeu struct
    method Init (line 11) | func (this *ConstCcGeu) Init(condition Condition) {
    method Condition (line 24) | func (this *ConstCcGeu) Condition() Condition {

FILE: golang/uPIMulator/src/linker/kernel/instruction/cc/const_cc_zero.go
  type ConstCcZero (line 7) | type ConstCcZero struct
    method Init (line 11) | func (this *ConstCcZero) Init(condition Condition) {
    method Condition (line 24) | func (this *ConstCcZero) Condition() Condition {

FILE: golang/uPIMulator/src/linker/kernel/instruction/cc/count_nz_cc.go
  type CountNzCc (line 7) | type CountNzCc struct
    method Init (line 11) | func (this *CountNzCc) Init(condition Condition) {
    method Condition (line 34) | func (this *CountNzCc) Condition() Condition {

FILE: golang/uPIMulator/src/linker/kernel/instruction/cc/div_cc.go
  type DivCc (line 7) | type DivCc struct
    method Init (line 11) | func (this *DivCc) Init(condition Condition) {
    method Condition (line 29) | func (this *DivCc) Condition() Condition {

FILE: golang/uPIMulator/src/linker/kernel/instruction/cc/div_nz_cc.go
  type DivNzCc (line 7) | type DivNzCc struct
    method Init (line 11) | func (this *DivNzCc) Init(condition Condition) {
    method Condition (line 28) | func (this *DivNzCc) Condition() Condition {

FILE: golang/uPIMulator/src/linker/kernel/instruction/cc/ext_sub_set_cc.go
  type ExtSubSetCc (line 7) | type ExtSubSetCc struct
    method Init (line 11) | func (this *ExtSubSetCc) Init(condition Condition) {
    method Condition (line 52) | func (this *ExtSubSetCc) Condition() Condition {

FILE: golang/uPIMulator/src/linker/kernel/instruction/cc/false_cc.go
  type FalseCc (line 7) | type FalseCc struct
    method Init (line 11) | func (this *FalseCc) Init(condition Condition) {
    method Condition (line 24) | func (this *FalseCc) Condition() Condition {

FILE: golang/uPIMulator/src/linker/kernel/instruction/cc/imm_shift_nz_cc.go
  type ImmShiftNzCc (line 7) | type ImmShiftNzCc struct
    method Init (line 11) | func (this *ImmShiftNzCc) Init(condition Condition) {
    method Condition (line 38) | func (this *ImmShiftNzCc) Condition() Condition {

FILE: golang/uPIMulator/src/linker/kernel/instruction/cc/log_nz_cc.go
  type LogNzCc (line 7) | type LogNzCc struct
    method Init (line 11) | func (this *LogNzCc) Init(condition Condition) {
    method Condition (line 34) | func (this *LogNzCc) Condition() Condition {

FILE: golang/uPIMulator/src/linker/kernel/instruction/cc/log_set_cc.go
  type LogSetCc (line 7) | type LogSetCc struct
    method Init (line 11) | func (this *LogSetCc) Init(condition Condition) {
    method Condition (line 27) | func (this *LogSetCc) Condition() Condition {

FILE: golang/uPIMulator/src/linker/kernel/instruction/cc/mul_nz_cc.go
  type MulNzCc (line 7) | type MulNzCc struct
    method Init (line 11) | func (this *MulNzCc) Init(condition Condition) {
    method Condition (line 34) | func (this *MulNzCc) Condition() Condition {

FILE: golang/uPIMulator/src/linker/kernel/instruction/cc/no_cc.go
  type NoCc (line 7) | type NoCc struct
    method Init (line 11) | func (this *NoCc) Init(condition Condition) {
    method Condition (line 22) | func (this *NoCc) Condition() Condition {

FILE: golang/uPIMulator/src/linker/kernel/instruction/cc/release_cc.go
  type ReleaseCc (line 7) | type ReleaseCc struct
    method Init (line 11) | func (this *ReleaseCc) Init(condition Condition) {
    method Condition (line 24) | func (this *ReleaseCc) Condition() Condition {

FILE: golang/uPIMulator/src/linker/kernel/instruction/cc/shift_nz_cc.go
  type ShiftNzCc (line 7) | type ShiftNzCc struct
    method Init (line 11) | func (this *ShiftNzCc) Init(condition Condition) {
    method Condition (line 40) | func (this *ShiftNzCc) Condition() Condition {

FILE: golang/uPIMulator/src/linker/kernel/instruction/cc/sub_nz_cc.go
  type SubNzCc (line 7) | type SubNzCc struct
    method Init (line 11) | func (this *SubNzCc) Init(condition Condition) {
    method Condition (line 50) | func (this *SubNzCc) Condition() Condition {

FILE: golang/uPIMulator/src/linker/kernel/instruction/cc/sub_set_cc.go
  type SubSetCc (line 7) | type SubSetCc struct
    method Init (line 11) | func (this *SubSetCc) Init(condition Condition) {
    method Condition (line 29) | func (this *SubSetCc) Condition() Condition {

FILE: golang/uPIMulator/src/linker/kernel/instruction/cc/true_cc.go
  type TrueCc (line 7) | type TrueCc struct
    method Init (line 11) | func (this *TrueCc) Init(condition Condition) {
    method Condition (line 24) | func (this *TrueCc) Condition() Condition {

FILE: golang/uPIMulator/src/linker/kernel/instruction/cc/true_false_cc.go
  type TrueFalseCc (line 7) | type TrueFalseCc struct
    method Init (line 11) | func (this *TrueFalseCc) Init(condition Condition) {
    method Condition (line 25) | func (this *TrueFalseCc) Condition() Condition {

FILE: golang/uPIMulator/src/linker/kernel/instruction/endian.go
  type Endian (line 3) | type Endian
  constant LITTLE (line 6) | LITTLE Endian = iota
  constant BIG (line 7) | BIG

FILE: golang/uPIMulator/src/linker/kernel/instruction/exception.go
  type Exception (line 3) | type Exception
  constant MEMORY_FAULT (line 6) | MEMORY_FAULT Exception = iota
  constant DMA_FAULT (line 7) | DMA_FAULT
  constant HEAP_FULL (line 8) | HEAP_FULL
  constant DIVISION_BY_ZERO (line 9) | DIVISION_BY_ZERO
  constant ASSERT (line 10) | ASSERT
  constant HALT (line 11) | HALT
  constant PRINT_OVERFLOW (line 12) | PRINT_OVERFLOW
  constant ALREADY_PROFILING (line 13) | ALREADY_PROFILING
  constant NOT_PROFILING (line 14) | NOT_PROFILING

FILE: golang/uPIMulator/src/linker/kernel/instruction/flag.go
  type Flag (line 3) | type Flag
  constant ZERO (line 6) | ZERO Flag = iota
  constant CARRY (line 7) | CARRY

FILE: golang/uPIMulator/src/linker/kernel/instruction/instruction.go
  type Instruction (line 14) | type Instruction struct
    method InitRici (line 34) | func (this *Instruction) InitRici(
    method InitRri (line 63) | func (this *Instruction) InitRri(
    method InitRric (line 94) | func (this *Instruction) InitRric(
    method InitRrici (line 144) | func (this *Instruction) InitRrici(
    method InitRrif (line 210) | func (this *Instruction) InitRrif(
    method InitRrr (line 237) | func (this *Instruction) InitRrr(
    method InitRrrc (line 255) | func (this *Instruction) InitRrrc(
    method InitRrrci (line 297) | func (this *Instruction) InitRrrci(
    method InitZri (line 358) | func (this *Instruction) InitZri(op_code OpCode, ra *reg_descriptor.Sr...
    method InitZric (line 383) | func (this *Instruction) InitZric(
    method InitZrici (line 431) | func (this *Instruction) InitZrici(
    method InitZrif (line 495) | func (this *Instruction) InitZrif(
    method InitZrr (line 520) | func (this *Instruction) InitZrr(
    method InitZrrc (line 536) | func (this *Instruction) InitZrrc(
    method InitZrrci (line 576) | func (this *Instruction) InitZrrci(
    method InitSRri (line 635) | func (this *Instruction) InitSRri(
    method InitSRric (line 672) | func (this *Instruction) InitSRric(
    method InitSRrici (line 728) | func (this *Instruction) InitSRrici(
    method InitSRrif (line 800) | func (this *Instruction) InitSRrif(
    method InitSRrr (line 833) | func (this *Instruction) InitSRrr(
    method InitSRrrc (line 857) | func (this *Instruction) InitSRrrc(
    method InitSRrrci (line 905) | func (this *Instruction) InitSRrrci(
    method InitRr (line 972) | func (this *Instruction) InitRr(
    method InitRrc (line 988) | func (this *Instruction) InitRrc(
    method InitRrci (line 1011) | func (this *Instruction) InitRrci(
    method InitZr (line 1058) | func (this *Instruction) InitZr(op_code OpCode, ra *reg_descriptor.Src...
    method InitZrc (line 1069) | func (this *Instruction) InitZrc(
    method InitZrci (line 1090) | func (this *Instruction) InitZrci(
    method InitSRr (line 1135) | func (this *Instruction) InitSRr(
    method InitSRrc (line 1157) | func (this *Instruction) InitSRrc(
    method InitSRrci (line 1186) | func (this *Instruction) InitSRrci(
    method InitDrdici (line 1239) | func (this *Instruction) InitDrdici(
    method InitRrri (line 1286) | func (this *Instruction) InitRrri(
    method InitRrrici (line 1308) | func (this *Instruction) InitRrrici(
    method InitZrri (line 1344) | func (this *Instruction) InitZrri(
    method InitZrrici (line 1364) | func (this *Instruction) InitZrrici(
    method InitSRrri (line 1398) | func (this *Instruction) InitSRrri(
    method InitSRrrici (line 1426) | func (this *Instruction) InitSRrrici(
    method InitRir (line 1468) | func (this *Instruction) InitRir(
    method InitRirc (line 1489) | func (this *Instruction) InitRirc(
    method InitRirci (line 1517) | func (this *Instruction) InitRirci(
    method InitZir (line 1552) | func (this *Instruction) InitZir(op_code OpCode, imm int64, ra *reg_de...
    method InitZirc (line 1567) | func (this *Instruction) InitZirc(
    method InitZirci (line 1593) | func (this *Instruction) InitZirci(
    method InitSRirc (line 1626) | func (this *Instruction) InitSRirc(
    method InitSRirci (line 1660) | func (this *Instruction) InitSRirci(
    method InitR (line 1701) | func (this *Instruction) InitR(op_code OpCode, rc *reg_descriptor.GpRe...
    method InitRci (line 1712) | func (this *Instruction) InitRci(
    method InitZ (line 1740) | func (this *Instruction) InitZ(op_code OpCode) {
    method InitZci (line 1750) | func (this *Instruction) InitZci(
    method InitSR (line 1776) | func (this *Instruction) InitSR(
    method InitSRci (line 1796) | func (this *Instruction) InitSRci(
    method InitCi (line 1830) | func (this *Instruction) InitCi(op_code OpCode, condition cc.Condition...
    method InitI (line 1852) | func (this *Instruction) InitI(op_code OpCode, imm int64) {
    method InitDdci (line 1865) | func (this *Instruction) InitDdci(
    method InitErri (line 1896) | func (this *Instruction) InitErri(
    method InitSErri (line 1921) | func (this *Instruction) InitSErri(
    method InitEdri (line 1952) | func (this *Instruction) InitEdri(
    method InitErii (line 1977) | func (this *Instruction) InitErii(
    method InitErir (line 2004) | func (this *Instruction) InitErir(
    method InitErid (line 2030) | func (this *Instruction) InitErid(
    method InitDmaRri (line 2056) | func (this *Instruction) InitDmaRri(
    method OpCode (line 2077) | func (this *Instruction) OpCode() OpCode {
    method Suffix (line 2081) | func (this *Instruction) Suffix() Suffix {
    method Rc (line 2085) | func (this *Instruction) Rc() *reg_descriptor.GpRegDescriptor {
    method Ra (line 2089) | func (this *Instruction) Ra() *reg_descriptor.SrcRegDescriptor {
    method Rb (line 2093) | func (this *Instruction) Rb() *reg_descriptor.SrcRegDescriptor {
    method Dc (line 2097) | func (this *Instruction) Dc() *reg_descriptor.PairRegDescriptor {
    method Db (line 2101) | func (this *Instruction) Db() *reg_descriptor.PairRegDescriptor {
    method Condition (line 2105) | func (this *Instruction) Condition() cc.Condition {
    method Imm (line 2114) | func (this *Instruction) Imm() *word.Immediate {
    method Off (line 2118) | func (this *Instruction) Off() *word.Immediate {
    method Pc (line 2122) | func (this *Instruction) Pc() *word.Immediate {
    method Endian (line 2126) | func (this *Instruction) Endian() Endian {
    method Encode (line 2135) | func (this *Instruction) Encode() *encoding.ByteStream {
    method EncodeRici (line 2277) | func (this *Instruction) EncodeRici(word_ *word.Word) {
    method EncodeRri (line 2305) | func (this *Instruction) EncodeRri(word_ *word.Word) {
    method EncodeRric (line 2329) | func (this *Instruction) EncodeRric(word_ *word.Word) {
    method EncodeRrici (line 2357) | func (this *Instruction) EncodeRrici(word_ *word.Word) {
    method EncodeRrif (line 2389) | func (this *Instruction) EncodeRrif(word_ *word.Word) {
    method EncodeRrr (line 2417) | func (this *Instruction) EncodeRrr(word_ *word.Word) {
    method EncodeRrrc (line 2441) | func (this *Instruction) EncodeRrrc(word_ *word.Word) {
    method EncodeRrrci (line 2469) | func (this *Instruction) EncodeRrrci(word_ *word.Word) {
    method EncodeZri (line 2501) | func (this *Instruction) EncodeZri(word_ *word.Word) {
    method EncodeZric (line 2521) | func (this *Instruction) EncodeZric(word_ *word.Word) {
    method EncodeZrici (line 2545) | func (this *Instruction) EncodeZrici(word_ *word.Word) {
    method EncodeZrif (line 2573) | func (this *Instruction) EncodeZrif(word_ *word.Word) {
    method EncodeZrr (line 2597) | func (this *Instruction) EncodeZrr(word_ *word.Word) {
    method EncodeZrrc (line 2617) | func (this *Instruction) EncodeZrrc(word_ *word.Word) {
    method EncodeZrrci (line 2641) | func (this *Instruction) EncodeZrrci(word_ *word.Word) {
    method EncodeSRri (line 2669) | func (this *Instruction) EncodeSRri(word_ *word.Word) {
    method EncodeSRric (line 2693) | func (this *Instruction) EncodeSRric(word_ *word.Word) {
    method EncodeSRrici (line 2721) | func (this *Instruction) EncodeSRrici(word_ *word.Word) {
    method EncodeSRrif (line 2753) | func (this *Instruction) EncodeSRrif(word_ *word.Word) {
    method EncodeSRrr (line 2781) | func (this *Instruction) EncodeSRrr(word_ *word.Word) {
    method EncodeSRrrc (line 2805) | func (this *Instruction) EncodeSRrrc(word_ *word.Word) {
    method EncodeSRrrci (line 2833) | func (this *Instruction) EncodeSRrrci(word_ *word.Word) {
    method EncodeRr (line 2865) | func (this *Instruction) EncodeRr(word_ *word.Word) {
    method EncodeRrc (line 2885) | func (this *Instruction) EncodeRrc(word_ *word.Word) {
    method EncodeRrci (line 2909) | func (this *Instruction) EncodeRrci(word_ *word.Word) {
    method EncodeZr (line 2937) | func (this *Instruction) EncodeZr(word_ *word.Word) {
    method EncodeZrc (line 2953) | func (this *Instruction) EncodeZrc(word_ *word.Word) {
    method EncodeZrci (line 2973) | func (this *Instruction) EncodeZrci(word_ *word.Word) {
    method EncodeSRr (line 2997) | func (this *Instruction) EncodeSRr(word_ *word.Word) {
    method EncodeSRrc (line 3017) | func (this *Instruction) EncodeSRrc(word_ *word.Word) {
    method EncodeSRrci (line 3041) | func (this *Instruction) EncodeSRrci(word_ *word.Word) {
    method EncodeDrdici (line 3069) | func (this *Instruction) EncodeDrdici(word_ *word.Word) {
    method EncodeRrri (line 3105) | func (this *Instruction) EncodeRrri(word_ *word.Word) {
    method EncodeRrrici (line 3133) | func (this *Instruction) EncodeRrrici(word_ *word.Word) {
    method EncodeZrri (line 3169) | func (this *Instruction) EncodeZrri(word_ *word.Word) {
    method EncodeZrrici (line 3193) | func (this *Instruction) EncodeZrrici(word_ *word.Word) {
    method EncodeSRrri (line 3225) | func (this *Instruction) EncodeSRrri(word_ *word.Word) {
    method EncodeSRrrici (line 3253) | func (this *Instruction) EncodeSRrrici(word_ *word.Word) {
    method EncodeRir (line 3289) | func (this *Instruction) EncodeRir(word_ *word.Word) {
    method EncodeRirc (line 3313) | func (this *Instruction) EncodeRirc(word_ *word.Word) {
    method EncodeRirci (line 3341) | func (this *Instruction) EncodeRirci(word_ *word.Word) {
    method EncodeZir (line 3373) | func (this *Instruction) EncodeZir(word_ *word.Word) {
    method EncodeZirc (line 3393) | func (this *Instruction) EncodeZirc(word_ *word.Word) {
    method EncodeZirci (line 3417) | func (this *Instruction) EncodeZirci(word_ *word.Word) {
    method EncodeSRirc (line 3445) | func (this *Instruction) EncodeSRirc(word_ *word.Word) {
    method EncodeSRirci (line 3473) | func (this *Instruction) EncodeSRirci(word_ *word.Word) {
    method EncodeR (line 3505) | func (this *Instruction) EncodeR(word_ *word.Word) {
    method EncodeRci (line 3521) | func (this *Instruction) EncodeRci(word_ *word.Word) {
    method EncodeZ (line 3545) | func (this *Instruction) EncodeZ(word_ *word.Word) {
    method EncodeZci (line 3557) | func (this *Instruction) EncodeZci(word_ *word.Word) {
    method EncodeSR (line 3577) | func (this *Instruction) EncodeSR(word_ *word.Word) {
    method EncodeSRci (line 3593) | func (this *Instruction) EncodeSRci(word_ *word.Word) {
    method EncodeCi (line 3617) | func (this *Instruction) EncodeCi(word_ *word.Word) {
    method EncodeI (line 3637) | func (this *Instruction) EncodeI(word_ *word.Word) {
    method EncodeDdci (line 3653) | func (this *Instruction) EncodeDdci(word_ *word.Word) {
    method EncodeErri (line 3681) | func (this *Instruction) EncodeErri(word_ *word.Word) {
    method EncodeSErri (line 3709) | func (this *Instruction) EncodeSErri(word_ *word.Word) {
    method EncodeEdri (line 3737) | func (this *Instruction) EncodeEdri(word_ *word.Word) {
    method EncodeErii (line 3765) | func (this *Instruction) EncodeErii(word_ *word.Word) {
    method EncodeErir (line 3793) | func (this *Instruction) EncodeErir(word_ *word.Word) {
    method EncodeErid (line 3821) | func (this *Instruction) EncodeErid(word_ *word.Word) {
    method EncodeDmaRri (line 3849) | func (this *Instruction) EncodeDmaRri(word_ *word.Word) {
    method EncodeOpCode (line 3873) | func (this *Instruction) EncodeOpCode(word_ *word.Word) {
    method EncodeSuffix (line 3877) | func (this *Instruction) EncodeSuffix(word_ *word.Word) {
    method EncodeGpRegDescriptor (line 3881) | func (this *Instruction) EncodeGpRegDescriptor(
    method EncodeSrcRegDescriptor (line 3890) | func (this *Instruction) EncodeSrcRegDescriptor(
    method EncodePairRegDescriptor (line 3910) | func (this *Instruction) EncodePairRegDescriptor(
    method EncodeImm (line 3919) | func (this *Instruction) EncodeImm(word_ *word.Word, begin int, end in...
    method EncodeCondition (line 3923) | func (this *Instruction) EncodeCondition(
    method EncodePc (line 3932) | func (this *Instruction) EncodePc(word_ *word.Word, begin int, end int...
    method EncodeEndian (line 3936) | func (this *Instruction) EncodeEndian(word_ *word.Word, begin int, end...
    method EncodeOff (line 3940) | func (this *Instruction) EncodeOff(word_ *word.Word, begin int, end in...
    method Decode (line 3944) | func (this *Instruction) Decode(byte_stream *encoding.ByteStream) {
    method DecodeRici (line 4085) | func (this *Instruction) DecodeRici(word_ *word.Word) {
    method DecodeRri (line 4122) | func (this *Instruction) DecodeRri(word_ *word.Word) {
    method DecodeRric (line 4166) | func (this *Instruction) DecodeRric(word_ *word.Word) {
    method DecodeRrici (line 4217) | func (this *Instruction) DecodeRrici(word_ *word.Word) {
    method DecodeRrif (line 4281) | func (this *Instruction) DecodeRrif(word_ *word.Word) {
    method DecodeRrr (line 4314) | func (this *Instruction) DecodeRrr(word_ *word.Word) {
    method DecodeRrrc (line 4338) | func (this *Instruction) DecodeRrrc(word_ *word.Word) {
    method DecodeRrrci (line 4368) | func (this *Instruction) DecodeRrrci(word_ *word.Word) {
    method DecodeZri (line 4405) | func (this *Instruction) DecodeZri(word_ *word.Word) {
    method DecodeZric (line 4445) | func (this *Instruction) DecodeZric(word_ *word.Word) {
    method DecodeZrici (line 4492) | func (this *Instruction) DecodeZrici(word_ *word.Word) {
    method DecodeZrif (line 4552) | func (this *Instruction) DecodeZrif(word_ *word.Word) {
    method DecodeZrr (line 4581) | func (this *Instruction) DecodeZrr(word_ *word.Word) {
    method DecodeZrrc (line 4601) | func (this *Instruction) DecodeZrrc(word_ *word.Word) {
    method DecodeZrrci (line 4627) | func (this *Instruction) DecodeZrrci(word_ *word.Word) {
    method DecodeSRri (line 4660) | func (this *Instruction) DecodeSRri(word_ *word.Word) {
    method DecodeSRric (line 4704) | func (this *Instruction) DecodeSRric(word_ *word.Word) {
    method DecodeSRrici (line 4755) | func (this *Instruction) DecodeSRrici(word_ *word.Word) {
    method DecodeSRrif (line 4819) | func (this *Instruction) DecodeSRrif(word_ *word.Word) {
    method DecodeSRrr (line 4852) | func (this *Instruction) DecodeSRrr(word_ *word.Word) {
    method DecodeSRrrc (line 4876) | func (this *Instruction) DecodeSRrrc(word_ *word.Word) {
    method DecodeSRrrci (line 4906) | func (this *Instruction) DecodeSRrrci(word_ *word.Word) {
    method DecodeRr (line 4943) | func (this *Instruction) DecodeRr(word_ *word.Word) {
    method DecodeRrc (line 4963) | func (this *Instruction) DecodeRrc(word_ *word.Word) {
    method DecodeRrci (line 4989) | func (this *Instruction) DecodeRrci(word_ *word.Word) {
    method DecodeZr (line 5022) | func (this *Instruction) DecodeZr(word_ *word.Word) {
    method DecodeZrc (line 5038) | func (this *Instruction) DecodeZrc(word_ *word.Word) {
    method DecodeZrci (line 5060) | func (this *Instruction) DecodeZrci(word_ *word.Word) {
    method DecodeSRr (line 5089) | func (this *Instruction) DecodeSRr(word_ *word.Word) {
    method DecodeSRrc (line 5109) | func (this *Instruction) DecodeSRrc(word_ *word.Word) {
    method DecodeSRrci (line 5135) | func (this *Instruction) DecodeSRrci(word_ *word.Word) {
    method DecodeDrdici (line 5168) | func (this *Instruction) DecodeDrdici(word_ *word.Word) {
    method DecodeRrri (line 5212) | func (this *Instruction) DecodeRrri(word_ *word.Word) {
    method DecodeRrrici (line 5243) | func (this *Instruction) DecodeRrrici(word_ *word.Word) {
    method DecodeZrri (line 5287) | func (this *Instruction) DecodeZrri(word_ *word.Word) {
    method DecodeZrrici (line 5314) | func (this *Instruction) DecodeZrrici(word_ *word.Word) {
    method DecodeSRrri (line 5354) | func (this *Instruction) DecodeSRrri(word_ *word.Word) {
    method DecodeSRrrici (line 5385) | func (this *Instruction) DecodeSRrrici(word_ *word.Word) {
    method DecodeRir (line 5429) | func (this *Instruction) DecodeRir(word_ *word.Word) {
    method DecodeRirc (line 5456) | func (this *Instruction) DecodeRirc(word_ *word.Word) {
    method DecodeRirci (line 5489) | func (this *Instruction) DecodeRirci(word_ *word.Word) {
    method DecodeZir (line 5529) | func (this *Instruction) DecodeZir(word_ *word.Word) {
    method DecodeZirc (line 5552) | func (this *Instruction) DecodeZirc(word_ *word.Word) {
    method DecodeZirci (line 5581) | func (this *Instruction) DecodeZirci(word_ *word.Word) {
    method DecodeSRirc (line 5617) | func (this *Instruction) DecodeSRirc(word_ *word.Word) {
    method DecodeSRirci (line 5650) | func (this *Instruction) DecodeSRirci(word_ *word.Word) {
    method DecodeR (line 5690) | func (this *Instruction) DecodeR(word_ *word.Word) {
    method DecodeRci (line 5706) | func (this *Instruction) DecodeRci(word_ *word.Word) {
    method DecodeZ (line 5735) | func (this *Instruction) DecodeZ(word_ *word.Word) {
    method DecodeZci (line 5747) | func (this *Instruction) DecodeZci(word_ *word.Word) {
    method DecodeSR (line 5772) | func (this *Instruction) DecodeSR(word_ *word.Word) {
    method DecodeSRci (line 5788) | func (this *Instruction) DecodeSRci(word_ *word.Word) {
    method DecodeCi (line 5817) | func (this *Instruction) DecodeCi(word_ *word.Word) {
    method DecodeI (line 5842) | func (this *Instruction) DecodeI(word_ *word.Word) {
    method DecodeDdci (line 5861) | func (this *Instruction) DecodeDdci(word_ *word.Word) {
    method DecodeErri (line 5894) | func (this *Instruction) DecodeErri(word_ *word.Word) {
    method DecodeSErri (line 5927) | func (this *Instruction) DecodeSErri(word_ *word.Word) {
    method DecodeEdri (line 5960) | func (this *Instruction) DecodeEdri(word_ *word.Word) {
    method DecodeErii (line 5993) | func (this *Instruction) DecodeErii(word_ *word.Word) {
    method DecodeErir (line 6029) | func (this *Instruction) DecodeErir(word_ *word.Word) {
    method DecodeErid (line 6062) | func (this *Instruction) DecodeErid(word_ *word.Word) {
    method DecodeDmaRri (line 6095) | func (this *Instruction) DecodeDmaRri(word_ *word.Word) {
    method DecodeOpCode (line 6122) | func (this *Instruction) DecodeOpCode(word_ *word.Word) OpCode {
    method DecodeSuffix (line 6126) | func (this *Instruction) DecodeSuffix(word_ *word.Word) Suffix {
    method DecodeGpRegDescriptor (line 6130) | func (this *Instruction) DecodeGpRegDescriptor(
    method DecodeSrcRegDescriptor (line 6143) | func (this *Instruction) DecodeSrcRegDescriptor(
    method DecodePairRegDescriptor (line 6172) | func (this *Instruction) DecodePairRegDescriptor(
    method DecodeImm (line 6185) | func (this *Instruction) DecodeImm(
    method DecodeCondition (line 6194) | func (this *Instruction) DecodeCondition(word_ *word.Word, begin int, ...
    method DecodePc (line 6198) | func (this *Instruction) DecodePc(word_ *word.Word, begin int, end int...
    method DecodeEndian (line 6202) | func (this *Instruction) DecodeEndian(word_ *word.Word, begin int, end...
    method DecodeOff (line 6206) | func (this *Instruction) DecodeOff(
    method OpCodeBegin (line 6215) | func (this *Instruction) OpCodeBegin() int {
    method OpCodeEnd (line 6219) | func (this *Instruction) OpCodeEnd() int {
    method OpCodeWidth (line 6223) | func (this *Instruction) OpCodeWidth() int {
    method SuffixBegin (line 6227) | func (this *Instruction) SuffixBegin() int {
    method SuffixEnd (line 6231) | func (this *Instruction) SuffixEnd() int {
    method SuffixWidth (line 6235) | func (this *Instruction) SuffixWidth() int {
    method RegisterWidth (line 6239) | func (this *Instruction) RegisterWidth() int {
    method ConditionWidth (line 6250) | func (this *Instruction) ConditionWidth() int {
    method PcWidth (line 6254) | func (this *Instruction) PcWidth() int {
    method EndianWidth (line 6261) | func (this *Instruction) EndianWidth() int {
    method AcquireRiciOpCodes (line 6265) | func (this *Instruction) AcquireRiciOpCodes() map[OpCode]bool {
    method ReleaseRiciOpCodes (line 6271) | func (this *Instruction) ReleaseRiciOpCodes() map[OpCode]bool {
    method BootRiciOpCodes (line 6277) | func (this *Instruction) BootRiciOpCodes() map[OpCode]bool {
    method RiciOpCodes (line 6284) | func (this *Instruction) RiciOpCodes() map[OpCode]bool {
    method AddRriOpCodes (line 6306) | func (this *Instruction) AddRriOpCodes() map[OpCode]bool {
    method AsrRriOpCodes (line 6316) | func (this *Instruction) AsrRriOpCodes() map[OpCode]bool {
    method CallRriOpCodes (line 6332) | func (this *Instruction) CallRriOpCodes() map[OpCode]bool {
    method RriOpCodes (line 6338) | func (this *Instruction) RriOpCodes() map[OpCode]bool {
    method AddRricOpCodes (line 6360) | func (this *Instruction) AddRricOpCodes() map[OpCode]bool {
    method AsrRricOpCodes (line 6376) | func (this *Instruction) AsrRricOpCodes() map[OpCode]bool {
    method SubRricOpCodes (line 6392) | func (this *Instruction) SubRricOpCodes() map[OpCode]bool {
    method RricOpCodes (line 6399) | func (this *Instruction) RricOpCodes() map[OpCode]bool {
    method AddRriciOpCodes (line 6421) | func (this *Instruction) AddRriciOpCodes() map[OpCode]bool {
    method AndRriciOpCodes (line 6428) | func (this *Instruction) AndRriciOpCodes() map[OpCode]bool {
    method AsrRriciOpCodes (line 6442) | func (this *Instruction) AsrRriciOpCodes() map[OpCode]bool {
    method SubRriciOpCodes (line 6458) | func (this *Instruction) SubRriciOpCodes() map[OpCode]bool {
    method RriciOpCodes (line 6465) | func (this *Instruction) RriciOpCodes() map[OpCode]bool {
    method RrifOpCodes (line 6492) | func (this *Instruction) RrifOpCodes() map[OpCode]bool {
    method RrrOpCodes (line 6510) | func (this *Instruction) RrrOpCodes() map[OpCode]bool {
    method AddRrrcOpCodes (line 6555) | func (this *Instruction) AddRrrcOpCodes() map[OpCode]bool {
    method RsubRrrcOpCodes (line 6596) | func (this *Instruction) RsubRrrcOpCodes() map[OpCode]bool {
    method SubRrrcOpCodes (line 6603) | func (this *Instruction) SubRrrcOpCodes() map[OpCode]bool {
    method RrrcOpCodes (line 6610) | func (this *Instruction) RrrcOpCodes() map[OpCode]bool {
    method AddRrrciOpCodes (line 6632) | func (this *Instruction) AddRrrciOpCodes() map[OpCode]bool {
    method AndRrrciOpCodes (line 6639) | func (this *Instruction) AndRrrciOpCodes() map[OpCode]bool {
    method AsrRrrciOpCodes (line 6653) | func (this *Instruction) AsrRrrciOpCodes() map[OpCode]bool {
    method MulRrrciOpCodes (line 6670) | func (this *Instruction) MulRrrciOpCodes() map[OpCode]bool {
    method RsubRrrciOpCodes (line 6687) | func (this *Instruction) RsubRrrciOpCodes() map[OpCode]bool {
    method RrrciOpCodes (line 6696) | func (this *Instruction) RrrciOpCodes() map[OpCode]bool {
    method RrOpCodes (line 6728) | func (this *Instruction) RrOpCodes() map[OpCode]bool {
    method RrcOpCodes (line 6743) | func (this *Instruction) RrcOpCodes() map[OpCode]bool {
    method CaoRrciOpCodes (line 6757) | func (this *Instruction) CaoRrciOpCodes() map[OpCode]bool {
    method ExtsbRrciOpCodes (line 6766) | func (this *Instruction) ExtsbRrciOpCodes() map[OpCode]bool {
    method TimeCfgRrciOpCodes (line 6776) | func (this *Instruction) TimeCfgRrciOpCodes() map[OpCode]bool {
    method RrciOpCodes (line 6782) | func (this *Instruction) RrciOpCodes() map[OpCode]bool {
    method DivStepDrdiciOpCodes (line 6804) | func (this *Instruction) DivStepDrdiciOpCodes() map[OpCode]bool {
    method MulStepDrdiciOpCodes (line 6810) | func (this *Instruction) MulStepDrdiciOpCodes() map[OpCode]bool {
    method DrdiciOpCodes (line 6816) | func (this *Instruction) DrdiciOpCodes() map[OpCode]bool {
    method RrriOpCodes (line 6833) | func (this *Instruction) RrriOpCodes() map[OpCode]bool {
    method RrriciOpCodes (line 6842) | func (this *Instruction) RrriciOpCodes() map[OpCode]bool {
    method RirOpCodes (line 6851) | func (this *Instruction) RirOpCodes() map[OpCode]bool {
    method RircOpCodes (line 6858) | func (this *Instruction) RircOpCodes() map[OpCode]bool {
    method RirciOpCodes (line 6865) | func (this *Instruction) RirciOpCodes() map[OpCode]bool {
    method ROpCodes (line 6872) | func (this *Instruction) ROpCodes() map[OpCode]bool {
    method RciOpCodes (line 6878) | func (this *Instruction) RciOpCodes() map[OpCode]bool {
    method CiOpCodes (line 6884) | func (this *Instruction) CiOpCodes() map[OpCode]bool {
    method IOpCodes (line 6890) | func (this *Instruction) IOpCodes() map[OpCode]bool {
    method MovdDdciOpCodes (line 6896) | func (this *Instruction) MovdDdciOpCodes() map[OpCode]bool {
    method SwapdDdciOpCodes (line 6902) | func (this *Instruction) SwapdDdciOpCodes() map[OpCode]bool {
    method DdciOpCodes (line 6908) | func (this *Instruction) DdciOpCodes() map[OpCode]bool {
    method ErriOpCodes (line 6925) | func (this *Instruction) ErriOpCodes() map[OpCode]bool {
    method EdriOpCodes (line 6935) | func (this *Instruction) EdriOpCodes() map[OpCode]bool {
    method EriiOpCodes (line 6941) | func (this *Instruction) EriiOpCodes() map[OpCode]bool {
    method ErirOpCodes (line 6954) | func (this *Instruction) ErirOpCodes() map[OpCode]bool {
    method EridOpCodes (line 6962) | func (this *Instruction) EridOpCodes() map[OpCode]bool {
    method LdmaDmaRriOpCodes (line 6968) | func (this *Instruction) LdmaDmaRriOpCodes() map[OpCode]bool {
    method LdmaiDmaRriOpCodes (line 6974) | func (this *Instruction) LdmaiDmaRriOpCodes() map[OpCode]bool {
    method SdmaDmaRriOpCodes (line 6980) | func (this *Instruction) SdmaDmaRriOpCodes() map[OpCode]bool {
    method DmaRriOpCodes (line 6986) | func (this *Instruction) DmaRriOpCodes() map[OpCode]bool {
    method Stringify (line 7008) | func (this *Instruction) Stringify() string {
    method StringifyOpCode (line 7377) | func (this *Instruction) StringifyOpCode() string {
    method StringifySuffix (line 7552) | func (this *Instruction) StringifySuffix() string {
    method StringifyGpRegDescriptor (line 7717) | func (this *Instruction) StringifyGpRegDescriptor(
    method StringifySrcRegDescriptor (line 7723) | func (this *Instruction) StringifySrcRegDescriptor(
    method StringifyPairRegDescriptor (line 7754) | func (this *Instruction) StringifyPairRegDescriptor(
    method StringifyImm (line 7760) | func (this *Instruction) StringifyImm(imm *word.Immediate) string {
    method StringifyCondition (line 7764) | func (this *Instruction) StringifyCondition(condition cc.Condition) st...
    method StringifyEndian (line 7871) | func (this *Instruction) StringifyEndian(endian Endian) string {
    method StringifyOff (line 7882) | func (this *Instruction) StringifyOff(off *word.Immediate) string {
    method StringifyPc (line 7886) | func (this *Instruction) StringifyPc(pc *word.Immediate) string {

FILE: golang/uPIMulator/src/linker/kernel/instruction/op_code.go
  type OpCode (line 3) | type OpCode
  constant ACQUIRE (line 6) | ACQUIRE OpCode = iota
  constant RELEASE (line 7) | RELEASE
  constant ADD (line 9) | ADD
  constant ADDC (line 10) | ADDC
  constant AND (line 11) | AND
  constant ANDN (line 12) | ANDN
  constant ASR (line 13) | ASR
  constant CAO (line 14) | CAO
  constant CLO (line 15) | CLO
  constant CLS (line 16) | CLS
  constant CLZ (line 17) | CLZ
  constant CMPB4 (line 18) | CMPB4
  constant DIV_STEP (line 19) | DIV_STEP
  constant EXTSB (line 20) | EXTSB
  constant EXTSH (line 21) | EXTSH
  constant EXTUB (line 22) | EXTUB
  constant EXTUH (line 23) | EXTUH
  constant LSL (line 24) | LSL
  constant LSL_ADD (line 25) | LSL_ADD
  constant LSL_SUB (line 26) | LSL_SUB
  constant LSL1 (line 27) | LSL1
  constant LSL1X (line 28) | LSL1X
  constant LSLX (line 29) | LSLX
  constant LSR (line 30) | LSR
  constant LSR_ADD (line 31) | LSR_ADD
  constant LSR1 (line 32) | LSR1
  constant LSR1X (line 33) | LSR1X
  constant LSRX (line 34) | LSRX
  constant MUL_SH_SH (line 35) | MUL_SH_SH
  constant MUL_SH_SL (line 36) | MUL_SH_SL
  constant MUL_SH_UH (line 37) | MUL_SH_UH
  constant MUL_SH_UL (line 38) | MUL_SH_UL
  constant MUL_SL_SH (line 39) | MUL_SL_SH
  constant MUL_SL_SL (line 40) | MUL_SL_SL
  constant MUL_SL_UH (line 41) | MUL_SL_UH
  constant MUL_SL_UL (line 42) | MUL_SL_UL
  constant MUL_STEP (line 43) | MUL_STEP
  constant MUL_UH_UH (line 44) | MUL_UH_UH
  constant MUL_UH_UL (line 45) | MUL_UH_UL
  constant MUL_UL_UH (line 46) | MUL_UL_UH
  constant MUL_UL_UL (line 47) | MUL_UL_UL
  constant NAND (line 48) | NAND
  constant NOR (line 49) | NOR
  constant NXOR (line 50) | NXOR
  constant OR (line 51) | OR
  constant ORN (line 52) | ORN
  constant ROL (line 53) | ROL
  constant ROL_ADD (line 54) | ROL_ADD
  constant ROR (line 55) | ROR
  constant RSUB (line 56) | RSUB
  constant RSUBC (line 57) | RSUBC
  constant SUB (line 58) | SUB
  constant SUBC (line 59) | SUBC
  constant XOR (line 60) | XOR
  constant BOOT (line 62) | BOOT
  constant RESUME (line 63) | RESUME
  constant STOP (line 64) | STOP
  constant CALL (line 66) | CALL
  constant FAULT (line 68) | FAULT
  constant NOP (line 69) | NOP
  constant SATS (line 70) | SATS
  constant MOVD (line 71) | MOVD
  constant SWAPD (line 72) | SWAPD
  constant HASH (line 74) | HASH
  constant TIME (line 75) | TIME
  constant TIME_CFG (line 76) | TIME_CFG
  constant LBS (line 78) | LBS
  constant LBU (line 79) | LBU
  constant LD (line 80) | LD
  constant LHS (line 81) | LHS
  constant LHU (line 82) | LHU
  constant LW (line 83) | LW
  constant SB (line 85) | SB
  constant SB_ID (line 86) | SB_ID
  constant SD (line 87) | SD
  constant SD_ID (line 88) | SD_ID
  constant SH (line 89) | SH
  constant SH_ID (line 90) | SH_ID
  constant SW (line 91) | SW
  constant SW_ID (line 92) | SW_ID
  constant LDMA (line 94) | LDMA
  constant LDMAI (line 95) | LDMAI
  constant SDMA (line 96) | SDMA

FILE: golang/uPIMulator/src/linker/kernel/instruction/reg_descriptor/gp_reg_descriptor.go
  type GpRegDescriptor (line 8) | type GpRegDescriptor struct
    method Init (line 12) | func (this *GpRegDescriptor) Init(index int) {
    method Index (line 29) | func (this *GpRegDescriptor) Index() int {

FILE: golang/uPIMulator/src/linker/kernel/instruction/reg_descriptor/pair_reg_descriptor.go
  type PairRegDescriptor (line 7) | type PairRegDescriptor struct
    method Init (line 12) | func (this *PairRegDescriptor) Init(index int) {
    method Index (line 25) | func (this *PairRegDescriptor) Index() int {
    method EvenRegDescriptor (line 29) | func (this *PairRegDescriptor) EvenRegDescriptor() *GpRegDescriptor {
    method OddRegDescriptor (line 33) | func (this *PairRegDescriptor) OddRegDescriptor() *GpRegDescriptor {

FILE: golang/uPIMulator/src/linker/kernel/instruction/reg_descriptor/sp_reg_descriptor.go
  type SpRegDescriptor (line 3) | type SpRegDescriptor
  constant ZERO (line 6) | ZERO SpRegDescriptor = iota
  constant ONE (line 7) | ONE
  constant LNEG (line 8) | LNEG
  constant MNEG (line 9) | MNEG
  constant ID (line 10) | ID
  constant ID2 (line 11) | ID2
  constant ID4 (line 12) | ID4
  constant ID8 (line 13) | ID8

FILE: golang/uPIMulator/src/linker/kernel/instruction/reg_descriptor/src_reg_descriptor.go
  type SrcRegDescriptor (line 3) | type SrcRegDescriptor struct
    method InitGpRegDescriptor (line 8) | func (this *SrcRegDescriptor) InitGpRegDescriptor(gp_reg_descriptor *G...
    method InitSpRegDescriptor (line 13) | func (this *SrcRegDescriptor) InitSpRegDescriptor(sp_reg_descriptor *S...
    method IsGpRegDescriptor (line 18) | func (this *SrcRegDescriptor) IsGpRegDescriptor() bool {
    method IsSpRegDescriptor (line 22) | func (this *SrcRegDescriptor) IsSpRegDescriptor() bool {
    method GpRegDescriptor (line 26) | func (this *SrcRegDescriptor) GpRegDescriptor() *GpRegDescriptor {
    method SpRegDescriptor (line 30) | func (this *SrcRegDescriptor) SpRegDescriptor() *SpRegDescriptor {

FILE: golang/uPIMulator/src/linker/kernel/instruction/suffix.go
  type Suffix (line 3) | type Suffix
  constant RICI (line 6) | RICI Suffix = iota
  constant RRI (line 8) | RRI
  constant RRIC (line 9) | RRIC
  constant RRICI (line 10) | RRICI
  constant RRIF (line 11) | RRIF
  constant RRR (line 12) | RRR
  constant RRRC (line 13) | RRRC
  constant RRRCI (line 14) | RRRCI
  constant ZRI (line 16) | ZRI
  constant ZRIC (line 17) | ZRIC
  constant ZRICI (line 18) | ZRICI
  constant ZRIF (line 19) | ZRIF
  constant ZRR (line 20) | ZRR
  constant ZRRC (line 21) | ZRRC
  constant ZRRCI (line 22) | ZRRCI
  constant S_RRI (line 24) | S_RRI
  constant S_RRIC (line 25) | S_RRIC
  constant S_RRICI (line 26) | S_RRICI
  constant S_RRIF (line 27) | S_RRIF
  constant S_RRR (line 28) | S_RRR
  constant S_RRRC (line 29) | S_RRRC
  constant S_RRRCI (line 30) | S_RRRCI
  constant U_RRI (line 32) | U_RRI
  constant U_RRIC (line 33) | U_RRIC
  constant U_RRICI (line 34) | U_RRICI
  constant U_RRIF (line 35) | U_RRIF
  constant U_RRR (line 36) | U_RRR
  constant U_RRRC (line 37) | U_RRRC
  constant U_RRRCI (line 38) | U_RRRCI
  constant RR (line 40) | RR
  constant RRC (line 41) | RRC
  constant RRCI (line 42) | RRCI
  constant ZR (line 44) | ZR
  constant ZRC (line 45) | ZRC
  constant ZRCI (line 46) | ZRCI
  constant S_RR (line 48) | S_RR
  constant S_RRC (line 49) | S_RRC
  constant S_RRCI (line 50) | S_RRCI
  constant U_RR (line 52) | U_RR
  constant U_RRC (line 53) | U_RRC
  constant U_RRCI (line 54) | U_RRCI
  constant DRDICI (line 56) | DRDICI
  constant RRRI (line 58) | RRRI
  constant RRRICI (line 59) | RRRICI
  constant ZRRI (line 61) | ZRRI
  constant ZRRICI (line 62) | ZRRICI
  constant S_RRRI (line 64) | S_RRRI
  constant S_RRRICI (line 65) | S_RRRICI
  constant U_RRRI (line 67) | U_RRRI
  constant U_RRRICI (line 68) | U_RRRICI
  constant RIR (line 70) | RIR
  constant RIRC (line 71) | RIRC
  constant RIRCI (line 72) | RIRCI
  constant ZIR (line 74) | ZIR
  constant ZIRC (line 75) | ZIRC
  constant ZIRCI (line 76) | ZIRCI
  constant S_RIRC (line 78) | S_RIRC
  constant S_RIRCI (line 79) | S_RIRCI
  constant U_RIRC (line 81) | U_RIRC
  constant U_RIRCI (line 82) | U_RIRCI
  constant R (line 84) | R
  constant RCI (line 85) | RCI
  constant Z (line 87) | Z
  constant ZCI (line 88) | ZCI
  constant S_R (line 90) | S_R
  constant S_RCI (line 91) | S_RCI
  constant U_R (line 93) | U_R
  constant U_RCI (line 94) | U_RCI
  constant CI (line 96) | CI
  constant I (line 97) | I
  constant DDCI (line 99) | DDCI
  constant ERRI (line 101) | ERRI
  constant S_ERRI (line 103) | S_ERRI
  constant U_ERRI (line 104) | U_ERRI
  constant EDRI (line 106) | EDRI
  constant ERII (line 108) | ERII
  constant ERIR (line 109) | ERIR
  constant ERID (line 110) | ERID
  constant DMA_RRI (line 112) | DMA_RRI

FILE: golang/uPIMulator/src/linker/kernel/kernel.go
  type Kernel (line 8) | type Kernel struct
    method Init (line 17) | func (this *Kernel) Init() {
    method Address (line 21) | func (this *Kernel) Address(label_name string) int64 {
    method Atomic (line 30) | func (this *Kernel) Atomic() *encoding.ByteStream {
    method SetAtomic (line 34) | func (this *Kernel) SetAtomic(atomic *encoding.ByteStream) {
    method Iram (line 38) | func (this *Kernel) Iram() *encoding.ByteStream {
    method SetIram (line 42) | func (this *Kernel) SetIram(iram *encoding.ByteStream) {
    method Wram (line 46) | func (this *Kernel) Wram() *encoding.ByteStream {
    method SetWram (line 50) | func (this *Kernel) SetWram(wram *encoding.ByteStream) {
    method Mram (line 54) | func (this *Kernel) Mram() *encoding.ByteStream {
    method SetMram (line 58) | func (this *Kernel) SetMram(mram *encoding.ByteStream) {

FILE: golang/uPIMulator/src/linker/kernel/label.go
  type Label (line 8) | type Label struct
    method Init (line 15) | func (this *Label) Init(name string) {
    method Name (line 24) | func (this *Label) Name() string {
    method Address (line 28) | func (this *Label) Address() int64 {
    method BeginAddress (line 37) | func (this *Label) BeginAddress() int64 {
    method EndAddress (line 41) | func (this *Label) EndAddress() int64 {
    method SetAddress (line 45) | func (this *Label) SetAddress(address int64) {
    method Size (line 55) | func (this *Label) Size() int64 {
    method SetSize (line 59) | func (this *Label) SetSize(size int64) {
    method ToByteStream (line 63) | func (this *Label) ToByteStream() *encoding.ByteStream {
    method Append (line 72) | func (this *Label) Append(encodable Encodable) {

FILE: golang/uPIMulator/src/linker/kernel/liveness.go
  type Liveness (line 3) | type Liveness struct
    method Init (line 9) | func (this *Liveness) Init() {
    method Defs (line 15) | func (this *Liveness) Defs() map[string]bool {
    method AddDef (line 19) | func (this *Liveness) AddDef(def string) {
    method Uses (line 23) | func (this *Liveness) Uses() map[string]bool {
    method AddUse (line 27) | func (this *Liveness) AddUse(use string) {
    method GlobalSymbols (line 31) | func (this *Liveness) GlobalSymbols() map[string]bool {
    method AddGlobalSymbol (line 35) | func (this *Liveness) AddGlobalSymbol(global_symbol string) {
    method LocalSymbols (line 39) | func (this *Liveness) LocalSymbols() map[string]bool {
    method UnresolvedSymbols (line 49) | func (this *Liveness) UnresolvedSymbols() map[string]bool {

FILE: golang/uPIMulator/src/linker/kernel/relocatable.go
  type Relocatable (line 11) | type Relocatable struct
    method Init (line 22) | func (this *Relocatable) Init(name string) {
    method Name (line 28) | func (this *Relocatable) Name() string {
    method Path (line 32) | func (this *Relocatable) Path() string {
    method SetPath (line 36) | func (this *Relocatable) SetPath(path string) {
    method TokenStream (line 40) | func (this *Relocatable) TokenStream() *lexer.TokenStream {
    method SetTokenStream (line 44) | func (this *Relocatable) SetTokenStream(token_stream *lexer.TokenStrea...
    method Ast (line 48) | func (this *Relocatable) Ast() *parser.Ast {
    method SetAst (line 52) | func (this *Relocatable) SetAst(ast *parser.Ast) {
    method Liveness (line 56) | func (this *Relocatable) Liveness() *Liveness {
    method SetLiveness (line 60) | func (this *Relocatable) SetLiveness(liveness *Liveness) {
    method Lines (line 64) | func (this *Relocatable) Lines() []string {
    method RenameLocalSymbol (line 75) | func (this *Relocatable) RenameLocalSymbol(old_name string, new_name s...
    method RenameLine (line 91) | func (this *Relocatable) RenameLine(line string) string {

FILE: golang/uPIMulator/src/linker/kernel/section.go
  type SectionName (line 8) | type SectionName
  constant ATOMIC (line 11) | ATOMIC SectionName = iota
  constant BSS (line 12) | BSS
  constant DATA (line 13) | DATA
  constant DEBUG_ABBREV (line 14) | DEBUG_ABBREV
  constant DEBUG_FRAME (line 15) | DEBUG_FRAME
  constant DEBUG_INFO (line 16) | DEBUG_INFO
  constant DEBUG_LINE (line 17) | DEBUG_LINE
  constant DEBUG_LOC (line 18) | DEBUG_LOC
  constant DEBUG_RANGES (line 19) | DEBUG_RANGES
  constant DEBUG_STR (line 20) | DEBUG_STR
  constant DPU_HOST (line 21) | DPU_HOST
  constant MRAM (line 22) | MRAM
  constant RODATA (line 23) | RODATA
  constant STACK_SIZES (line 24) | STACK_SIZES
  constant TEXT (line 25) | TEXT
  type SectionFlag (line 28) | type SectionFlag
  constant ALLOC (line 31) | ALLOC SectionFlag = iota
  constant WRITE (line 32) | WRITE
  constant EXECINSTR (line 33) | EXECINSTR
  constant LINK_ORDER (line 34) | LINK_ORDER
  constant MERGE (line 35) | MERGE
  constant STRINGS (line 36) | STRINGS
  type SectionType (line 39) | type SectionType
  constant PROGBITS (line 42) | PROGBITS SectionType = iota
  constant NOBITS (line 43) | NOBITS
  type Section (line 46) | type Section struct
    method Init (line 56) | func (this *Section) Init(
    method SectionName (line 76) | func (this *Section) SectionName() SectionName {
    method Name (line 80) | func (this *Section) Name() string {
    method SectionFlags (line 84) | func (this *Section) SectionFlags() map[SectionFlag]bool {
    method SectionType (line 88) | func (this *Section) SectionType() SectionType {
    method Address (line 92) | func (this *Section) Address() int64 {
    method SetAddress (line 96) | func (this *Section) SetAddress(address int64) {
    method Size (line 104) | func (this *Section) Size() int64 {
    method Label (line 112) | func (this *Section) Label(label_name string) *Label {
    method Labels (line 122) | func (this *Section) Labels() []*Label {
    method AppendLabel (line 126) | func (this *Section) AppendLabel(label_name string) {
    method CheckoutLabel (line 133) | func (this *Section) CheckoutLabel(label_name string) {
    method CurLabel (line 142) | func (this *Section) CurLabel() *Label {
    method ToByteStream (line 146) | func (this *Section) ToByteStream() *encoding.ByteStream {
    method HiddenLabelName (line 157) | func (this *Section) HiddenLabelName() string {

FILE: golang/uPIMulator/src/linker/lex_job.go
  type LexJob (line 9) | type LexJob struct
    method Init (line 13) | func (this *LexJob) Init(relocatable *kernel.Relocatable) {
    method Execute (line 17) | func (this *LexJob) Execute() {

FILE: golang/uPIMulator/src/linker/lexer/keyword_factory.go
  type KeywordFactory (line 7) | type KeywordFactory struct
    method Init (line 11) | func (this *KeywordFactory) Init() {
    method AddKeyword (line 15) | func (this *KeywordFactory) AddKeyword(keyword string, token_type Toke...
    method IsTokenizable (line 19) | func (this *KeywordFactory) IsTokenizable(word string) bool {
    method Tokenize (line 24) | func (this *KeywordFactory) Tokenize(word string) *Token {

FILE: golang/uPIMulator/src/linker/lexer/lexer.go
  type Lexer (line 8) | type Lexer struct
    method Init (line 12) | func (this *Lexer) Init() {
    method Lex (line 17) | func (this *Lexer) Lex(path string) *TokenStream {
    method Tokenize (line 41) | func (this *Lexer) Tokenize(line string) *TokenStream {
    method FindTokenWithMaxLength (line 59) | func (this *Lexer) FindTokenWithMaxLength(line string, prev_pos int) (...
    method IsWhiteSpace (line 105) | func (this *Lexer) IsWhiteSpace(word string) bool {
    method IsComment (line 114) | func (this *Lexer) IsComment(word string) bool {
    method IsQuote (line 123) | func (this *Lexer) IsQuote(word string) bool {
    method FindNextQuote (line 132) | func (this *Lexer) FindNextQuote(line string, pos int) int {

FILE: golang/uPIMulator/src/linker/lexer/regex.go
  type Regex (line 8) | type Regex struct
    method Init (line 15) | func (this *Regex) Init(expr string, token_type TokenType) {
    method Expr (line 29) | func (this *Regex) Expr() string {
    method TokenType (line 33) | func (this *Regex) TokenType() TokenType {
    method IsTokenizable (line 37) | func (this *Regex) IsTokenizable(word string) bool {
    method Tokenize (line 41) | func (this *Regex) Tokenize(word string) *Token {

FILE: golang/uPIMulator/src/linker/lexer/regex_factory.go
  type RegexFactory (line 7) | type RegexFactory struct
    method Init (line 11) | func (this *RegexFactory) Init() {
    method HasRegex (line 15) | func (this *RegexFactory) HasRegex(expr string) bool {
    method AddRegex (line 24) | func (this *RegexFactory) AddRegex(expr string, token_type TokenType) {
    method IsTokenizable (line 36) | func (this *RegexFactory) IsTokenizable(word string) bool {
    method Tokenize (line 45) | func (this *RegexFactory) Tokenize(word string) *Token {

FILE: golang/uPIMulator/src/linker/lexer/token.go
  type TokenType (line 3) | type TokenType
  constant END_OF_FILE (line 6) | END_OF_FILE TokenType = iota
  constant NEW_LINE (line 8) | NEW_LINE
  constant IDENTIFIER (line 10) | IDENTIFIER
  constant POSITIVIE_NUMBER (line 11) | POSITIVIE_NUMBER
  constant HEX_NUMBER (line 12) | HEX_NUMBER
  constant STRING (line 13) | STRING
  constant GP_REG (line 14) | GP_REG
  constant PAIR_REG (line 15) | PAIR_REG
  constant ACQUIRE (line 17) | ACQUIRE
  constant RELEASE (line 18) | RELEASE
  constant BOOT (line 19) | BOOT
  constant RESUME (line 20) | RESUME
  constant ADD (line 22) | ADD
  constant ADDC (line 23) | ADDC
  constant AND (line 24) | AND
  constant ANDN (line 25) | ANDN
  constant ASR (line 26) | ASR
  constant CMPB4 (line 27) | CMPB4
  constant LSL (line 28) | LSL
  constant LSL1 (line 29) | LSL1
  constant LSL1X (line 30) | LSL1X
  constant LSLX (line 31) | LSLX
  constant LSR (line 32) | LSR
  constant LSR1 (line 33) | LSR1
  constant LSR1X (line 34) | LSR1X
  constant LSRX (line 35) | LSRX
  constant MUL_SH_SH (line 36) | MUL_SH_SH
  constant MUL_SH_SL (line 37) | MUL_SH_SL
  constant MUL_SH_UH (line 38) | MUL_SH_UH
  constant MUL_SH_UL (line 39) | MUL_SH_UL
  constant MUL_SL_SH (line 40) | MUL_SL_SH
  constant MUL_SL_SL (line 41) | MUL_SL_SL
  constant MUL_SL_UH (line 42) | MUL_SL_UH
  constant MUL_SL_UL (line 43) | MUL_SL_UL
  constant MUL_UH_UH (line 44) | MUL_UH_UH
  constant MUL_UH_UL (line 45) | MUL_UH_UL
  constant MUL_UL_UH (line 46) | MUL_UL_UH
  constant MUL_UL_UL (line 47) | MUL_UL_UL
  constant NAND (line 48) | NAND
  constant NOR (line 49) | NOR
  constant NXOR (line 50) | NXOR
  constant OR (line 51) | OR
  constant ORN (line 52) | ORN
  constant ROL (line 53) | ROL
  constant ROR (line 54) | ROR
  constant RSUB (line 55) | RSUB
  constant RSUBC (line 56) | RSUBC
  constant SUB (line 57) | SUB
  constant SUBC (line 58) | SUBC
  constant XOR (line 59) | XOR
  constant CALL (line 60) | CALL
  constant HASH (line 61) | HASH
  constant CAO (line 63) | CAO
  constant CLO (line 64) | CLO
  constant CLS (line 65) | CLS
  constant CLZ (line 66) | CLZ
  constant EXTSB (line 67) | EXTSB
  constant EXTSH (line 68) | EXTSH
  constant EXTUB (line 69) | EXTUB
  constant EXTUH (line 70) | EXTUH
  constant SATS (line 71) | SATS
  constant TIME_CFG (line 72) | TIME_CFG
  constant DIV_STEP (line 74) | DIV_STEP
  constant MUL_STEP (line 75) | MUL_STEP
  constant LSL_ADD (line 77) | LSL_ADD
  constant LSL_SUB (line 78) | LSL_SUB
  constant LSR_ADD (line 79) | LSR_ADD
  constant ROL_ADD (line 80) | ROL_ADD
  constant TIME (line 82) | TIME
  constant NOP (line 83) | NOP
  constant STOP (line 85) | STOP
  constant FAULT (line 87) | FAULT
  constant MOVD (line 89) | MOVD
  constant SWAPD (line 90) | SWAPD
  constant LBS (line 92) | LBS
  constant LBU (line 93) | LBU
  constant LD (line 94) | LD
  constant LHS (line 95) | LHS
  constant LHU (line 96) | LHU
  constant LW (line 97) | LW
  constant SB (line 99) | SB
  constant SB_ID (line 100) | SB_ID
  constant SD (line 101) | SD
  constant SD_ID (line 102) | SD_ID
  constant SH (line 103) | SH
  constant SH_ID (line 104) | SH_ID
  constant SW (line 105) | SW
  constant SW_ID (line 106) | SW_ID
  constant LDMA (line 108) | LDMA
  constant LDMAI (line 109) | LDMAI
  constant SDMA (line 110) | SDMA
  constant MOVE (line 112) | MOVE
  constant NEG (line 113) | NEG
  constant NOT (line 114) | NOT
  constant BKP (line 115) | BKP
  constant JEQ (line 117) | JEQ
  constant JNEQ (line 118) | JNEQ
  constant JZ (line 119) | JZ
  constant JNZ (line 120) | JNZ
  constant JLTU (line 121) | JLTU
  constant JGTU (line 122) | JGTU
  constant JLEU (line 123) | JLEU
  constant JGEU (line 124) | JGEU
  constant JLTS (line 125) | JLTS
  constant JGTS (line 126) | JGTS
  constant JLES (line 127) | JLES
  constant JGES (line 128) | JGES
  constant JUMP (line 129) | JUMP
  constant S (line 131) | S
  constant U (line 132) | U
  constant ATOMIC (line 134) | ATOMIC
  constant BSS (line 135) | BSS
  constant DATA (line 136) | DATA
  constant DEBUG_ABBREV (line 137) | DEBUG_ABBREV
  constant DEBUG_FRAME (line 138) | DEBUG_FRAME
  constant DEBUG_INFO (line 139) | DEBUG_INFO
  constant DEBUG_LINE (line 140) | DEBUG_LINE
  constant DEBUG_LOC (line 141) | DEBUG_LOC
  constant DEBUG_RANGES (line 142) | DEBUG_RANGES
  constant DEBUG_STR (line 143) | DEBUG_STR
  constant DPU_HOST (line 144) | DPU_HOST
  constant MRAM (line 145) | MRAM
  constant RODATA (line 146) | RODATA
  constant STACK_SIZES (line 147) | STACK_SIZES
  constant TEXT (line 148) | TEXT
  constant PROGBITS (line 150) | PROGBITS
  constant NOBITS (line 151) | NOBITS
  constant FUNCTION (line 153) | FUNCTION
  constant OBJECT (line 154) | OBJECT
  constant TRUE (line 156) | TRUE
  constant FALSE (line 157) | FALSE
  constant Z (line 158) | Z
  constant NZ (line 159) | NZ
  constant E (line 160) | E
  constant O (line 161) | O
  constant PL (line 162) | PL
  constant MI (line 163) | MI
  constant OV (line 164) | OV
  constant NOV (line 165) | NOV
  constant C (line 166) | C
  constant NC (line 167) | NC
  constant SZ (line 168) | SZ
  constant SNZ (line 169) | SNZ
  constant SPL (line 170) | SPL
  constant SMI (line 171) | SMI
  constant SO (line 172) | SO
  constant SE (line 173) | SE
  constant NC5 (line 174) | NC5
  constant NC6 (line 175) | NC6
  constant NC7 (line 176) | NC7
  constant NC8 (line 177) | NC8
  constant NC9 (line 178) | NC9
  constant NC10 (line 179) | NC10
  constant NC11 (line 180) | NC11
  constant NC12 (line 181) | NC12
  constant NC13 (line 182) | NC13
  constant NC14 (line 183) | NC14
  constant MAX (line 184) | MAX
  constant NMAX (line 185) | NMAX
  constant SH32 (line 186) | SH32
  constant NSH32 (line 187) | NSH32
  constant EQ (line 188) | EQ
  constant NEQ (line 189) | NEQ
  constant LTU (line 190) | LTU
  constant LEU (line 191) | LEU
  constant GTU (line 192) | GTU
  constant GEU (line 193) | GEU
  constant LTS (line 194) | LTS
  constant LES (line 195) | LES
  constant GTS (line 196) | GTS
  constant GES (line 197) | GES
  constant XZ (line 198) | XZ
  constant XNZ (line 199) | XNZ
  constant XLEU (line 200) | XLEU
  constant XGTU (line 201) | XGTU
  constant XLES (line 202) | XLES
  constant XGTS (line 203) | XGTS
  constant SMALL (line 204) | SMALL
  constant LARGE (line 205) | LARGE
  constant LITTLE (line 207) | LITTLE
  constant BIG (line 208) | BIG
  constant ZERO_REG (line 210) | ZERO_REG
  constant ONE (line 211) | ONE
  constant ID (line 212) | ID
  constant ID2 (line 213) | ID2
  constant ID4 (line 214) | ID4
  constant ID8 (line 215) | ID8
  constant LNEG (line 216) | LNEG
  constant MNEG (line 217) | MNEG
  constant ADDRSIG (line 219) | ADDRSIG
  constant ADDRSIG_SYM (line 220) | ADDRSIG_SYM
  constant ASCII (line 221) | ASCII
  constant ASCIZ (line 222) | ASCIZ
  constant BYTE (line 223) | BYTE
  constant CFI_DEF_CFA_OFFSET (line 224) | CFI_DEF_CFA_OFFSET
  constant CFI_ENDPROC (line 225) | CFI_ENDPROC
  constant CFI_OFFSET (line 226) | CFI_OFFSET
  constant CFI_SECTIONS (line 227) | CFI_SECTIONS
  constant CFI_STARTPROC (line 228) | CFI_STARTPROC
  constant FILE (line 229) | FILE
  constant GLOBL (line 230) | GLOBL
  constant LOC (line 231) | LOC
  constant LONG (line 232) | LONG
  constant P2ALIGN (line 233) | P2ALIGN
  constant QUAD (line 234) | QUAD
  constant SECTION (line 235) | SECTION
  constant SET (line 236) | SET
  constant SHORT (line 237) | SHORT
  constant SIZE (line 238) | SIZE
  constant TYPE (line 239) | TYPE
  constant WEAK (line 240) | WEAK
  constant ZERO_DIRECTIVE (line 241) | ZERO_DIRECTIVE
  constant IS_STMT (line 243) | IS_STMT
  constant PROLOGUE_END (line 244) | PROLOGUE_END
  constant COLON (line 246) | COLON
  constant COMMA (line 247) | COMMA
  constant PLUS (line 248) | PLUS
  constant MINUS (line 249) | MINUS
  type Token (line 252) | type Token struct
    method Init (line 257) | func (this *Token) Init(token_type TokenType, attribute string) {
    method TokenType (line 262) | func (this *Token) TokenType() TokenType {
    method Attribute (line 266) | func (this *Token) Attribute() string {

FILE: golang/uPIMulator/src/linker/lexer/token_stream.go
  type TokenStream (line 3) | type TokenStream struct
    method Init (line 7) | func (this *TokenStream) Init() {
    method Size (line 11) | func (this *TokenStream) Size() int {
    method Get (line 15) | func (this *TokenStream) Get(pos int) *Token {
    method Append (line 19) | func (this *TokenStream) Append(token *Token) {
    method Merge (line 23) | func (this *TokenStream) Merge(token_stream *TokenStream) {

FILE: golang/uPIMulator/src/linker/lexer/tokenizer.go
  type Tokenizer (line 7) | type Tokenizer struct
    method Init (line 12) | func (this *Tokenizer) Init() {
    method InitKeywordFactory (line 17) | func (this *Tokenizer) InitKeywordFactory() {
    method InitRegexFactory (line 253) | func (this *Tokenizer) InitRegexFactory() {
    method IsTokenizable (line 268) | func (this *Tokenizer) IsTokenizable(word string) bool {
    method Tokenize (line 272) | func (this *Tokenizer) Tokenize(word string) *Token {

FILE: golang/uPIMulator/src/linker/linker.go
  type Linker (line 15) | type Linker struct
    method Init (line 31) | func (this *Linker) Init(command_line_parser *misc.CommandLineParser) {
    method InitBenchmarkRelocatable (line 49) | func (this *Linker) InitBenchmarkRelocatable() {
    method InitSdkRelocatables (line 66) | func (this *Linker) InitSdkRelocatables() {
    method Link (line 108) | func (this *Linker) Link() {
    method Lex (line 117) | func (this *Linker) Lex() {
    method Parse (line 136) | func (this *Linker) Parse() {
    method AnalyzeLiveness (line 155) | func (this *Linker) AnalyzeLiveness() {
    method MakeExecutable (line 174) | func (this *Linker) MakeExecutable() {
    method HasResolved (line 188) | func (this *Linker) HasResolved() bool {
    method ResolveSymbols (line 197) | func (this *Linker) ResolveSymbols() {
    method LoadExecutable (line 213) | func (this *Linker) LoadExecutable() {
    method DumpExecutable (line 245) | func (this *Linker) DumpExecutable() {

FILE: golang/uPIMulator/src/linker/logic/instruction_assigner.go
  type InstructionAssigner (line 18) | type InstructionAssigner struct
    method Init (line 24) | func (this *InstructionAssigner) Init(linker_script *LinkerScript) {
    method Assign (line 122) | func (this *InstructionAssigner) Assign(executable *kernel.Executable) {
    method WalkAsciiStmt (line 127) | func (this *InstructionAssigner) WalkAsciiStmt(stmt_ *stmt.Stmt) {
    method WalkAscizStmt (line 142) | func (this *InstructionAssigner) WalkAscizStmt(stmt_ *stmt.Stmt) {
    method WalkByteStmt (line 155) | func (this *InstructionAssigner) WalkByteStmt(stmt_ *stmt.Stmt) {
    method WalkLongProgramCounterStmt (line 167) | func (this *InstructionAssigner) WalkLongProgramCounterStmt(stmt_ *stm...
    method WalkLongSectionNameStmt (line 179) | func (this *InstructionAssigner) WalkLongSectionNameStmt(stmt_ *stmt.S...
    method WalkQuadStmt (line 191) | func (this *InstructionAssigner) WalkQuadStmt(stmt_ *stmt.Stmt) {
    method WalkSectionIdentifierNumberStmt (line 203) | func (this *InstructionAssigner) WalkSectionIdentifierNumberStmt(stmt_...
    method WalkSectionIdentifierStmt (line 212) | func (this *InstructionAssigner) WalkSectionIdentifierStmt(stmt_ *stmt...
    method WalkSectionStackSizes (line 221) | func (this *InstructionAssigner) WalkSectionStackSizes(stmt_ *stmt.Stm...
    method WalkSectionStringNumberStmt (line 271) | func (this *InstructionAssigner) WalkSectionStringNumberStmt(stmt_ *st...
    method WalkSectionStringStmt (line 280) | func (this *InstructionAssigner) WalkSectionStringStmt(stmt_ *stmt.Stm...
    method WalkShortStmt (line 289) | func (this *InstructionAssigner) WalkShortStmt(stmt_ *stmt.Stmt) {
    method WalkTextStmt (line 301) | func (this *InstructionAssigner) WalkTextStmt(stmt_ *stmt.Stmt) {
    method WalkZeroDoubleNumberStmt (line 308) | func (this *InstructionAssigner) WalkZeroDoubleNumberStmt(stmt_ *stmt....
    method WalkZeroSingleNumberStmt (line 321) | func (this *InstructionAssigner) WalkZeroSingleNumberStmt(stmt_ *stmt....
    method WalkCiStmt (line 333) | func (this *InstructionAssigner) WalkCiStmt(stmt_ *stmt.Stmt) {
    method WalkDdciStmt (line 347) | func (this *InstructionAssigner) WalkDdciStmt(stmt_ *stmt.Stmt) {
    method WalkDmaRriStmt (line 363) | func (this *InstructionAssigner) WalkDmaRriStmt(stmt_ *stmt.Stmt) {
    method WalkDrdiciStmt (line 378) | func (this *InstructionAssigner) WalkDrdiciStmt(stmt_ *stmt.Stmt) {
    method WalkEdriStmt (line 396) | func (this *InstructionAssigner) WalkEdriStmt(stmt_ *stmt.Stmt) {
    method WalkEridStmt (line 412) | func (this *InstructionAssigner) WalkEridStmt(stmt_ *stmt.Stmt) {
    method WalkEriiStmt (line 428) | func (this *InstructionAssigner) WalkEriiStmt(stmt_ *stmt.Stmt) {
    method WalkErirStmt (line 444) | func (this *InstructionAssigner) WalkErirStmt(stmt_ *stmt.Stmt) {
    method WalkErriStmt (line 460) | func (this *InstructionAssigner) WalkErriStmt(stmt_ *stmt.Stmt) {
    method WalkIStmt (line 476) | func (this *InstructionAssigner) WalkIStmt(stmt_ *stmt.Stmt) {
    method WalkNopStmt (line 489) | func (this *InstructionAssigner) WalkNopStmt(stmt_ *stmt.Stmt) {
    method WalkRciStmt (line 501) | func (this *InstructionAssigner) WalkRciStmt(stmt_ *stmt.Stmt) {
    method WalkRiciStmt (line 523) | func (this *InstructionAssigner) WalkRiciStmt(stmt_ *stmt.Stmt) {
    method WalkRirciStmt (line 539) | func (this *InstructionAssigner) WalkRirciStmt(stmt_ *stmt.Stmt) {
    method WalkRircStmt (line 562) | func (this *InstructionAssigner) WalkRircStmt(stmt_ *stmt.Stmt) {
    method WalkRirStmt (line 584) | func (this *InstructionAssigner) WalkRirStmt(stmt_ *stmt.Stmt) {
    method WalkRrciStmt (line 605) | func (this *InstructionAssigner) WalkRrciStmt(stmt_ *stmt.Stmt) {
    method WalkRrcStmt (line 652) | func (this *InstructionAssigner) WalkRrcStmt(stmt_ *stmt.Stmt) {
    method WalkRriciStmt (line 674) | func (this *InstructionAssigner) WalkRriciStmt(stmt_ *stmt.Stmt) {
    method WalkRricStmt (line 698) | func (this *InstructionAssigner) WalkRricStmt(stmt_ *stmt.Stmt) {
    method WalkRriStmt (line 728) | func (this *InstructionAssigner) WalkRriStmt(stmt_ *stmt.Stmt) {
    method WalkRrrciStmt (line 756) | func (this *InstructionAssigner) WalkRrrciStmt(stmt_ *stmt.Stmt) {
    method WalkRrrcStmt (line 779) | func (this *InstructionAssigner) WalkRrrcStmt(stmt_ *stmt.Stmt) {
    method WalkRrriciStmt (line 801) | func (this *InstructionAssigner) WalkRrriciStmt(stmt_ *stmt.Stmt) {
    method WalkRrriStmt (line 825) | func (this *InstructionAssigner) WalkRrriStmt(stmt_ *stmt.Stmt) {
    method WalkRrrStmt (line 847) | func (this *InstructionAssigner) WalkRrrStmt(stmt_ *stmt.Stmt) {
    method WalkRrStmt (line 868) | func (this *InstructionAssigner) WalkRrStmt(stmt_ *stmt.Stmt) {
    method WalkRStmt (line 914) | func (this *InstructionAssigner) WalkRStmt(stmt_ *stmt.Stmt) {
    method WalkSErriStmt (line 934) | func (this *InstructionAssigner) WalkSErriStmt(stmt_ *stmt.Stmt) {
    method WalkSRciStmt (line 951) | func (this *InstructionAssigner) WalkSRciStmt(stmt_ *stmt.Stmt) {
    method WalkSRirciStmt (line 967) | func (this *InstructionAssigner) WalkSRirciStmt(stmt_ *stmt.Stmt) {
    method WalkSRircStmt (line 985) | func (this *InstructionAssigner) WalkSRircStmt(stmt_ *stmt.Stmt) {
    method WalkSRrciStmt (line 1002) | func (this *InstructionAssigner) WalkSRrciStmt(stmt_ *stmt.Stmt) {
    method WalkSRrcStmt (line 1032) | func (this *InstructionAssigner) WalkSRrcStmt(stmt_ *stmt.Stmt) {
    method WalkSRriciStmt (line 1048) | func (this *InstructionAssigner) WalkSRriciStmt(stmt_ *stmt.Stmt) {
    method WalkSRricStmt (line 1066) | func (this *InstructionAssigner) WalkSRricStmt(stmt_ *stmt.Stmt) {
    method WalkSRriStmt (line 1087) | func (this *InstructionAssigner) WalkSRriStmt(stmt_ *stmt.Stmt) {
    method WalkSRrrciStmt (line 1103) | func (this *InstructionAssigner) WalkSRrrciStmt(stmt_ *stmt.Stmt) {
    method WalkSRrrcStmt (line 1121) | func (this *InstructionAssigner) WalkSRrrcStmt(stmt_ *stmt.Stmt) {
    method WalkSRrriciStmt (line 1138) | func (this *InstructionAssigner) WalkSRrriciStmt(stmt_ *stmt.Stmt) {
    method WalkSRrriStmt (line 1157) | func (this *InstructionAssigner) WalkSRrriStmt(stmt_ *stmt.Stmt) {
    method WalkSRrrStmt (line 1174) | func (this *InstructionAssigner) WalkSRrrStmt(stmt_ *stmt.Stmt) {
    method WalkSRrStmt (line 1190) | func (this *InstructionAssigner) WalkSRrStmt(stmt_ *stmt.Stmt) {
    method WalkSRStmt (line 1221) | func (this *InstructionAssigner) WalkSRStmt(stmt_ *stmt.Stmt) {
    method WalkBkpStmt (line 1235) | func (this *InstructionAssigner) WalkBkpStmt(stmt_ *stmt.Stmt) {
    method WalkBootRiStmt (line 1246) | func (this *InstructionAssigner) WalkBootRiStmt(stmt_ *stmt.Stmt) {
    method WalkCallRiStmt (line 1262) | func (this *InstructionAssigner) WalkCallRiStmt(stmt_ *stmt.Stmt) {
    method WalkCallRrStmt (line 1282) | func (this *InstructionAssigner) WalkCallRrStmt(stmt_ *stmt.Stmt) {
    method WalkDivStepDrdiStmt (line 1297) | func (this *InstructionAssigner) WalkDivStepDrdiStmt(stmt_ *stmt.Stmt) {
    method WalkJeqRiiStmt (line 1315) | func (this *InstructionAssigner) WalkJeqRiiStmt(stmt_ *stmt.Stmt) {
    method WalkJeqRriStmt (line 1331) | func (this *InstructionAssigner) WalkJeqRriStmt(stmt_ *stmt.Stmt) {
    method WalkJnzRiStmt (line 1347) | func (this *InstructionAssigner) WalkJnzRiStmt(stmt_ *stmt.Stmt) {
    method WalkJumpIStmt (line 1372) | func (this *InstructionAssigner) WalkJumpIStmt(stmt_ *stmt.Stmt) {
    method WalkJumpRStmt (line 1391) | func (this *InstructionAssigner) WalkJumpRStmt(stmt_ *stmt.Stmt) {
    method WalkLbsRriStmt (line 1405) | func (this *InstructionAssigner) WalkLbsRriStmt(stmt_ *stmt.Stmt) {
    method WalkLbsSRriStmt (line 1421) | func (this *InstructionAssigner) WalkLbsSRriStmt(stmt_ *stmt.Stmt) {
    method WalkLdDriStmt (line 1438) | func (this *InstructionAssigner) WalkLdDriStmt(stmt_ *stmt.Stmt) {
    method WalkMovdDdStmt (line 1454) | func (this *InstructionAssigner) WalkMovdDdStmt(stmt_ *stmt.Stmt) {
    method WalkMoveRiciStmt (line 1470) | func (this *InstructionAssigner) WalkMoveRiciStmt(stmt_ *stmt.Stmt) {
    method WalkMoveRiStmt (line 1492) | func (this *InstructionAssigner) WalkMoveRiStmt(stmt_ *stmt.Stmt) {
    method WalkMoveSRiciStmt (line 1512) | func (this *InstructionAssigner) WalkMoveSRiciStmt(stmt_ *stmt.Stmt) {
    method WalkMoveSRiStmt (line 1535) | func (this *InstructionAssigner) WalkMoveSRiStmt(stmt_ *stmt.Stmt) {
    method WalkSbIdRiiStmt (line 1557) | func (this *InstructionAssigner) WalkSbIdRiiStmt(stmt_ *stmt.Stmt) {
    method WalkSbIdRiStmt (line 1573) | func (this *InstructionAssigner) WalkSbIdRiStmt(stmt_ *stmt.Stmt) {
    method WalkSbRirStmt (line 1589) | func (this *InstructionAssigner) WalkSbRirStmt(stmt_ *stmt.Stmt) {
    method WalkSdRidStmt (line 1605) | func (this *InstructionAssigner) WalkSdRidStmt(stmt_ *stmt.Stmt) {
    method WalkStopStmt (line 1621) | func (this *InstructionAssigner) WalkStopStmt(stmt_ *stmt.Stmt) {
    method WalkTimeCfgRStmt (line 1633) | func (this *InstructionAssigner) WalkTimeCfgRStmt(stmt_ *stmt.Stmt) {
    method WalkLabelStmt (line 1646) | func (this *InstructionAssigner) WalkLabelStmt(stmt_ *stmt.Stmt) {
    method ConvertSectionName (line 1659) | func (this *InstructionAssigner) ConvertSectionName(expr_ *expr.Expr) ...
    method ConvertName (line 1699) | func (this *InstructionAssigner) ConvertName(expr_ *expr.Expr) string {
    method ConvertCiOpCode (line 1719) | func (this *InstructionAssigner) ConvertCiOpCode(op_code *expr.Expr) i...
    method ConvertDdciOpCode (line 1731) | func (this *InstructionAssigner) ConvertDdciOpCode(op_code *expr.Expr)...
    method ConvertDmaRriOpCode (line 1745) | func (this *InstructionAssigner) ConvertDmaRriOpCode(op_code *expr.Exp...
    method ConvertDrdiciOpCode (line 1761) | func (this *InstructionAssigner) ConvertDrdiciOpCode(op_code *expr.Exp...
    method ConvertIOpCode (line 1775) | func (this *InstructionAssigner) ConvertIOpCode(op_code *expr.Expr) in...
    method ConvertRiciOpCode (line 1787) | func (this *InstructionAssigner) ConvertRiciOpCode(op_code *expr.Expr)...
    method ConvertROpCode (line 1805) | func (this *InstructionAssigner) ConvertROpCode(op_code *expr.Expr) in...
    method ConvertRrOpCode (line 1819) | func (this *InstructionAssigner) ConvertRrOpCode(op_code *expr.Expr) i...
    method ConvertRriOpCode (line 1855) | func (this *InstructionAssigner) ConvertRriOpCode(op_code *expr.Expr) ...
    method ConvertRrriOpCode (line 1945) | func (this *InstructionAssigner) ConvertRrriOpCode(op_code *expr.Expr)...
    method ConvertLoadOpCode (line 1963) | func (this *InstructionAssigner) ConvertLoadOpCode(op_code *expr.Expr)...
    method ConvertStoreOpCode (line 1985) | func (this *InstructionAssigner) ConvertStoreOpCode(op_code *expr.Expr...
    method ConvertJumpOpCode (line 2011) | func (this *InstructionAssigner) ConvertJumpOpCode(op_code *expr.Expr)...
    method ConvertSuffix (line 2047) | func (this *InstructionAssigner) ConvertSuffix(
    method ConvertGpReg (line 2134) | func (this *InstructionAssigner) ConvertGpReg(expr_ *expr.Expr) *reg_d...
    method ConvertSrcReg (line 2148) | func (this *InstructionAssigner) ConvertSrcReg(expr_ *expr.Expr) *reg_...
    method ConvertPairReg (line 2221) | func (this *InstructionAssigner) ConvertPairReg(
    method ConvertCondition (line 2235) | func (this *InstructionAssigner) ConvertCondition(expr_ *expr.Expr) cc...
    method ConvertEndian (line 2346) | func (this *InstructionAssigner) ConvertEndian(expr_ *expr.Expr) instr...
    method ConvertJumpCondition (line 2361) | func (this *InstructionAssigner) ConvertJumpCondition(op_code *expr.Ex...
    method EvaluateProgramCounter (line 2395) | func (this *InstructionAssigner) EvaluateProgramCounter(expr_ *expr.Ex...
    method EvaluatePrimary (line 2414) | func (this *InstructionAssigner) EvaluatePrimary(expr_ *expr.Expr) int...
    method EvaluatePositiveNumber (line 2431) | func (this *InstructionAssigner) EvaluatePositiveNumber(token *lexer.T...
    method EvaluateHexNumber (line 2441) | func (this *InstructionAssigner) EvaluateHexNumber(token *lexer.Token)...
    method EvaluateIdentifier (line 2458) | func (this *InstructionAssigner) EvaluateIdentifier(token *lexer.Token...
    method EvaluateNegativeNumber (line 2479) | func (this *InstructionAssigner) EvaluateNegativeNumber(expr_ *expr.Ex...
    method EvaluateBinaryAdd (line 2485) | func (this *InstructionAssigner) EvaluateBinaryAdd(expr_ *expr.Expr) i...
    method EvaluateBinarySub (line 2518) | func (this *InstructionAssigner) EvaluateBinarySub(expr_ *expr.Expr) i...
    method EvaluateSectionName (line 2551) | func (this *InstructionAssigner) EvaluateSectionName(expr_ *expr.Expr)...
    method IsZeroReg (line 2558) | func (this *InstructionAssigner) IsZeroReg(expr_ *expr.Expr) bool {

FILE: golang/uPIMulator/src/linker/logic/label_assigner.go
  type LabelAssigner (line 14) | type LabelAssigner struct
    method Init (line 19) | func (this *LabelAssigner) Init() {
    method Assign (line 116) | func (this *LabelAssigner) Assign(executable *kernel.Executable) {
    method WalkAsciiStmt (line 121) | func (this *LabelAssigner) WalkAsciiStmt(stmt_ *stmt.Stmt) {
    method WalkAscizStmt (line 132) | func (this *LabelAssigner) WalkAscizStmt(stmt_ *stmt.Stmt) {
    method WalkByteStmt (line 141) | func (this *LabelAssigner) WalkByteStmt(stmt_ *stmt.Stmt) {
    method WalkLongProgramCounterStmt (line 146) | func (this *LabelAssigner) WalkLongProgramCounterStmt(stmt_ *stmt.Stmt) {
    method WalkLongSectionNameStmt (line 151) | func (this *LabelAssigner) WalkLongSectionNameStmt(stmt_ *stmt.Stmt) {
    method WalkQuadStmt (line 156) | func (this *LabelAssigner) WalkQuadStmt(stmt_ *stmt.Stmt) {
    method WalkSectionIdentifierNumberStmt (line 161) | func (this *LabelAssigner) WalkSectionIdentifierNumberStmt(stmt_ *stmt...
    method WalkSectionIdentifierStmt (line 176) | func (this *LabelAssigner) WalkSectionIdentifierStmt(stmt_ *stmt.Stmt) {
    method WalkSectionStackSizes (line 191) | func (this *LabelAssigner) WalkSectionStackSizes(stmt_ *stmt.Stmt) {
    method WalkSectionStringNumberStmt (line 247) | func (this *LabelAssigner) WalkSectionStringNumberStmt(stmt_ *stmt.Stm...
    method WalkSectionStringStmt (line 262) | func (this *LabelAssigner) WalkSectionStringStmt(stmt_ *stmt.Stmt) {
    method WalkShortStmt (line 277) | func (this *LabelAssigner) WalkShortStmt(stmt_ *stmt.Stmt) {
    method WalkTextStmt (line 282) | func (this *LabelAssigner) WalkTextStmt(stmt_ *stmt.Stmt) {
    method WalkZeroDoubleNumberStmt (line 295) | func (this *LabelAssigner) WalkZeroDoubleNumberStmt(stmt_ *stmt.Stmt) {
    method WalkZeroSingleNumberStmt (line 312) | func (this *LabelAssigner) WalkZeroSingleNumberStmt(stmt_ *stmt.Stmt) {
    method WalkInstructionStmt (line 329) | func (this *LabelAssigner) WalkInstructionStmt(stmt_ *stmt.Stmt) {
    method WalkLabelStmt (line 340) | func (this *LabelAssigner) WalkLabelStmt(stmt_ *stmt.Stmt) {
    method ConvertSectionName (line 357) | func (this *LabelAssigner) ConvertSectionName(expr_ *expr.Expr) kernel...
    method ConvertName (line 397) | func (this *LabelAssigner) ConvertName(expr_ *expr.Expr) string {
    method ConvertSectionFlags (line 417) | func (this *LabelAssigner) ConvertSectionFlags(token *lexer.Token) map...
    method ConvertSectionType (line 442) | func (this *LabelAssigner) ConvertSectionType(expr_ *expr.Expr) kernel...

FILE: golang/uPIMulator/src/linker/logic/linker_constant.go
  type LinkerConstant (line 3) | type LinkerConstant struct
    method Init (line 8) | func (this *LinkerConstant) Init(name string) {
    method Name (line 13) | func (this *LinkerConstant) Name() string {
    method Value (line 17) | func (this *LinkerConstant) Value() int64 {
    method SetValue (line 21) | func (this *LinkerConstant) SetValue(value int64) {

FILE: golang/uPIMulator/src/linker/logic/linker_script.go
  type LinkerScript (line 13) | type LinkerScript struct
    method Init (line 22) | func (this *LinkerScript) Init(command_line_parser *misc.CommandLinePa...
    method Assign (line 33) | func (this *LinkerScript) Assign(executable *kernel.Executable) {
    method HasLinkerConstant (line 40) | func (this *LinkerScript) HasLinkerConstant(name string) bool {
    method LinkerConstant (line 45) | func (this *LinkerScript) LinkerConstant(name string) *LinkerConstant {
    method InitLinkerConstants (line 49) | func (this *LinkerScript) InitLinkerConstants() {
    method AssignAtomic (line 100) | func (this *LinkerScript) AssignAtomic(executable *kernel.Executable) {
    method AssignIram (line 123) | func (this *LinkerScript) AssignIram(executable *kernel.Executable) {
    method AssignWram (line 157) | func (this *LinkerScript) AssignWram(executable *kernel.Executable) {
    method AssignMram (line 274) | func (this *LinkerScript) AssignMram(executable *kernel.Executable) {
    method DumpValues (line 397) | func (this *LinkerScript) DumpValues(path string) {

FILE: golang/uPIMulator/src/linker/logic/liveness_analyzer.go
  type LivenessAnalyzer (line 12) | type LivenessAnalyzer struct
    method Init (line 17) | func (this *LivenessAnalyzer) Init() {
    method Analyze (line 30) | func (this *LivenessAnalyzer) Analyze(relocatable *kernel.Relocatable)...
    method WalkPrimaryExpr (line 35) | func (this *LivenessAnalyzer) WalkPrimaryExpr(expr_ *expr.Expr) {
    method WalkGlobalStmt (line 49) | func (this *LivenessAnalyzer) WalkGlobalStmt(stmt_ *stmt.Stmt) {
    method WalkSetStmt (line 73) | func (this *LivenessAnalyzer) WalkSetStmt(stmt_ *stmt.Stmt) {
    method WalkLabelStmt (line 104) | func (this *LivenessAnalyzer) WalkLabelStmt(stmt_ *stmt.Stmt) {

FILE: golang/uPIMulator/src/linker/logic/set_assigner.go
  type SetAssigner (line 12) | type SetAssigner struct
    method Init (line 17) | func (this *SetAssigner) Init() {
    method Assign (line 32) | func (this *SetAssigner) Assign(executable *kernel.Executable) {
    method WalkSectionIdentifierNumberStmt (line 37) | func (this *SetAssigner) WalkSectionIdentifierNumberStmt(stmt_ *stmt.S...
    method WalkSectionIdentifierStmt (line 46) | func (this *SetAssigner) WalkSectionIdentifierStmt(stmt_ *stmt.Stmt) {
    method WalkSectionStackSizes (line 55) | func (this *SetAssigner) WalkSectionStackSizes(stmt_ *stmt.Stmt) {
    method WalkSectionStringNumberStmt (line 105) | func (this *SetAssigner) WalkSectionStringNumberStmt(stmt_ *stmt.Stmt) {
    method WalkSectionStringStmt (line 114) | func (this *SetAssigner) WalkSectionStringStmt(stmt_ *stmt.Stmt) {
    method WalkSetStmt (line 123) | func (this *SetAssigner) WalkSetStmt(stmt_ *stmt.Stmt) {
    method WalkTextStmt (line 149) | func (this *SetAssigner) WalkTextStmt(stmt_ *stmt.Stmt) {
    method ConvertSectionName (line 156) | func (this *SetAssigner) ConvertSectionName(expr_ *expr.Expr) kernel.S...
    method ConvertName (line 196) | func (this *SetAssigner) ConvertName(expr_ *expr.Expr) string {

FILE: golang/uPIMulator/src/linker/parse_job.go
  type ParseJob (line 9) | type ParseJob struct
    method Init (line 13) | func (this *ParseJob) Init(relocatable *kernel.Relocatable) {
    method Execute (line 17) | func (this *ParseJob) Execute() {

FILE: golang/uPIMulator/src/linker/parser/ast.go
  type Ast (line 7) | type Ast struct
    method Init (line 11) | func (this *Ast) Init(stmts []*stmt.Stmt) {
    method Size (line 15) | func (this *Ast) Size() int {
    method Get (line 19) | func (this *Ast) Get(pos int) *stmt.Stmt {

FILE: golang/uPIMulator/src/linker/parser/expr/binary_add_expr.go
  type BinaryAddExpr (line 7) | type BinaryAddExpr struct
    method Init (line 12) | func (this *BinaryAddExpr) Init(operand1 *Expr, operand2 *Expr) {
    method Operand1 (line 27) | func (this *BinaryAddExpr) Operand1() *Expr {
    method Operand2 (line 31) | func (this *BinaryAddExpr) Operand2() *Expr {

FILE: golang/uPIMulator/src/linker/parser/expr/binary_sub_expr.go
  type BinarySubExpr (line 7) | type BinarySubExpr struct
    method Init (line 12) | func (this *BinarySubExpr) Init(operand1 *Expr, operand2 *Expr) {
    method Operand1 (line 27) | func (this *BinarySubExpr) Operand1() *Expr {
    method Operand2 (line 31) | func (this *BinarySubExpr) Operand2() *Expr {

FILE: golang/uPIMulator/src/linker/parser/expr/ci_op_code_expr.go
  type CiOpCodeExpr (line 8) | type CiOpCodeExpr struct
    method Init (line 12) | func (this *CiOpCodeExpr) Init(token *lexer.Token) {
    method Token (line 23) | func (this *CiOpCodeExpr) Token() *lexer.Token {

FILE: golang/uPIMulator/src/linker/parser/expr/condition_expr.go
  type ConditionExpr (line 8) | type ConditionExpr struct
    method Init (line 12) | func (this *ConditionExpr) Init(token *lexer.Token) {
    method Token (line 72) | func (this *ConditionExpr) Token() *lexer.Token {

FILE: golang/uPIMulator/src/linker/parser/expr/ddci_op_code_expr.go
  type DdciOpCodeExpr (line 8) | type DdciOpCodeExpr struct
    method Init (line 12) | func (this *DdciOpCodeExpr) Init(token *lexer.Token) {
    method Token (line 23) | func (this *DdciOpCodeExpr) Token() *lexer.Token {

FILE: golang/uPIMulator/src/linker/parser/expr/dma_rri_op_code_expr.go
  type DmaRriOpCodeExpr (line 8) | type DmaRriOpCodeExpr struct
    method Init (line 12) | func (this *DmaRriOpCodeExpr) Init(token *lexer.Token) {
    method Token (line 23) | func (this *DmaRriOpCodeExpr) Token() *lexer.Token {

FILE: golang/uPIMulator/src/linker/parser/expr/drdici_op_code_expr.go
  type DrdiciOpCodeExpr (line 8) | type DrdiciOpCodeExpr struct
    method Init (line 12) | func (this *DrdiciOpCodeExpr) Init(token *lexer.Token) {
    method Token (line 23) | func (this *DrdiciOpCodeExpr) Token() *lexer.Token {

FILE: golang/uPIMulator/src/linker/parser/expr/endian_expr.go
  type EndianExpr (line 8) | type EndianExpr struct
    method Init (line 12) | func (this *EndianExpr) Init(token *lexer.Token) {
    method Token (line 24) | func (this *EndianExpr) Token() *lexer.Token {

FILE: golang/uPIMulator/src/linker/parser/expr/expr.go
  type ExprType (line 7) | type ExprType
  constant CI_OP_CODE (line 10) | CI_OP_CODE ExprType = iota
  constant DDCI_OP_CODE (line 11) | DDCI_OP_CODE
  constant DMA_RRI_OP_CODE (line 12) | DMA_RRI_OP_CODE
  constant DRDICI_OP_CODE (line 13) | DRDICI_OP_CODE
  constant I_OP_CODE (line 14) | I_OP_CODE
  constant JUMP_OP_CODE (line 15) | JUMP_OP_CODE
  constant LOAD_OP_CODE (line 16) | LOAD_OP_CODE
  constant R_OP_CODE (line 17) | R_OP_CODE
  constant RICI_OP_CODE (line 18) | RICI_OP_CODE
  constant RR_OP_CODE (line 19) | RR_OP_CODE
  constant RRI_OP_CODE (line 20) | RRI_OP_CODE
  constant RRRI_OP_CODE (line 21) | RRRI_OP_CODE
  constant STORE_OP_CODE (line 22) | STORE_OP_CODE
  constant SUFFIX (line 24) | SUFFIX
  constant CONDITION (line 25) | CONDITION
  constant ENDIAN (line 26) | ENDIAN
  constant SECTION_NAME (line 28) | SECTION_NAME
  constant SECTION_TYPE (line 29) | SECTION_TYPE
  constant SYMBOL_TYPE (line 31) | SYMBOL_TYPE
  constant NEGATIVE_NUMBER (line 33) | NEGATIVE_NUMBER
  constant PRIMARY (line 34) | PRIMARY
  constant BINARY_ADD (line 35) | BINARY_ADD
  constant BINARY_SUB (line 36) | BINARY_SUB
  constant PROGRAM_COUNTER (line 37) | PROGRAM_COUNTER
  constant SRC_REG (line 39) | SRC_REG
  type Expr (line 42) | type Expr struct
    method InitCiOpCodeExpr (line 77) | func (this *Expr) InitCiOpCodeExpr(token *lexer.Token) {
    method InitDdciOpCodeExpr (line 84) | func (this *Expr) InitDdciOpCodeExpr(token *lexer.Token) {
    method InitDmaRriOpCodeExpr (line 91) | func (this *Expr) InitDmaRriOpCodeExpr(token *lexer.Token) {
    method InitDrdiciOpCodeExpr (line 98) | func (this *Expr) InitDrdiciOpCodeExpr(token *lexer.Token) {
    method InitIOpCodeExpr (line 105) | func (this *Expr) InitIOpCodeExpr(token *lexer.Token) {
    method InitJumpOpCodeExpr (line 112) | func (this *Expr) InitJumpOpCodeExpr(token *lexer.Token) {
    method InitLoadOpCodeExpr (line 119) | func (this *Expr) InitLoadOpCodeExpr(token *lexer.Token) {
    method InitROpCodeExpr (line 126) | func (this *Expr) InitROpCodeExpr(token *lexer.Token) {
    method InitRiciOpCodeExpr (line 133) | func (this *Expr) InitRiciOpCodeExpr(token *lexer.Token) {
    method InitRrOpCodeExpr (line 140) | func (this *Expr) InitRrOpCodeExpr(token *lexer.Token) {
    method InitRriOpCodeExpr (line 147) | func (this *Expr) InitRriOpCodeExpr(token *lexer.Token) {
    method InitRrriOpCodeExpr (line 154) | func (this *Expr) InitRrriOpCodeExpr(token *lexer.Token) {
    method InitStoreOpCodeExpr (line 161) | func (this *Expr) InitStoreOpCodeExpr(token *lexer.Token) {
    method InitSuffixExpr (line 168) | func (this *Expr) InitSuffixExpr(token *lexer.Token) {
    method InitConditionExpr (line 175) | func (this *Expr) InitConditionExpr(token *lexer.Token) {
    method InitEndianExpr (line 182) | func (this *Expr) InitEndianExpr(token *lexer.Token) {
    method InitSectionNameExpr (line 189) | func (this *Expr) InitSectionNameExpr(token *lexer.Token) {
    method InitSectionTypeExpr (line 196) | func (this *Expr) InitSectionTypeExpr(token *lexer.Token) {
    method InitSymbolTypeExpr (line 203) | func (this *Expr) InitSymbolTypeExpr(token *lexer.Token) {
    method InitNegativeNumberExpr (line 210) | func (this *Expr) InitNegativeNumberExpr(token *lexer.Token) {
    method InitPrimaryExpr (line 217) | func (this *Expr) InitPrimaryExpr(token *lexer.Token) {
    method InitBinaryAddExpr (line 224) | func (this *Expr) InitBinaryAddExpr(operand1 *Expr, operand2 *Expr) {
    method InitBinarySubExpr (line 231) | func (this *Expr) InitBinarySubExpr(operand1 *Expr, operand2 *Expr) {
    method InitProgramCounterExpr (line 238) | func (this *Expr) InitProgramCounterExpr(expr *Expr) {
    method InitSrcRegExpr (line 245) | func (this *Expr) InitSrcRegExpr(token *lexer.Token) {
    method ExprType (line 252) | func (this *Expr) ExprType() ExprType {
    method CiOpCodeExpr (line 256) | func (this *Expr) CiOpCodeExpr() *CiOpCodeExpr {
    method DdciOpCodeExpr (line 260) | func (this *Expr) DdciOpCodeExpr() *DdciOpCodeExpr {
    method DmaRriOpCodeExpr (line 264) | func (this *Expr) DmaRriOpCodeExpr() *DmaRriOpCodeExpr {
    method DrdiciOpCodeExpr (line 268) | func (this *Expr) DrdiciOpCodeExpr() *DrdiciOpCodeExpr {
    method IOpCodeExpr (line 272) | func (this *Expr) IOpCodeExpr() *IOpCodeExpr {
    method JumpOpCodeExpr (line 276) | func (this *Expr) JumpOpCodeExpr() *JumpOpCodeExpr {
    method LoadOpCodeExpr (line 280) | func (this *Expr) LoadOpCodeExpr() *LoadOpCodeExpr {
    method ROpCodeExpr (line 284) | func (this *Expr) ROpCodeExpr() *ROpCodeExpr {
    method RiciOpCodeExpr (line 288) | func (this *Expr) RiciOpCodeExpr() *RiciOpCodeExpr {
    method RrOpCodeExpr (line 292) | func (this *Expr) RrOpCodeExpr() *RrOpCodeExpr {
    method RriOpCodeExpr (line 296) | func (this *Expr) RriOpCodeExpr() *RriOpCodeExpr {
    method RrriOpCodeExpr (line 300) | func (this *Expr) RrriOpCodeExpr() *RrriOpCodeExpr {
    method StoreOpCodeExpr (line 304) | func (this *Expr) StoreOpCodeExpr() *StoreOpCodeExpr {
    method SuffixExpr (line 308) | func (this *Expr) SuffixExpr() *SuffixExpr {
    method ConditionExpr (line 312) | func (this *Expr) ConditionExpr() *ConditionExpr {
    method EndianExpr (line 316) | func (this *Expr) EndianExpr() *EndianExpr {
    method SectionNameExpr (line 320) | func (this *Expr) SectionNameExpr() *SectionNameExpr {
    method SectionTypeExpr (line 324) | func (this *Expr) SectionTypeExpr() *SectionTypeExpr {
    method SymbolTypeExpr (line 328) | func (this *Expr) SymbolTypeExpr() *SymbolTypeExpr {
    method NegativeNumberExpr (line 332) | func (this *Expr) NegativeNumberExpr() *NegativeNumberExpr {
    method PrimaryExpr (line 336) | func (this *Expr) PrimaryExpr() *PrimaryExpr {
    method BinaryAddExpr (line 340) | func (this *Expr) BinaryAddExpr() *BinaryAddExpr {
    method BinarySubExpr (line 344) | func (this *Expr) BinarySubExpr() *BinarySubExpr {
    method ProgramCounterExpr (line 348) | func (this *Expr) ProgramCounterExpr() *ProgramCounterExpr {
    method SrcRegExpr (line 352) | func (this *Expr) SrcRegExpr() *SrcRegExpr {

FILE: golang/uPIMulator/src/linker/parser/expr/i_op_code_expr.go
  type IOpCodeExpr (line 8) | type IOpCodeExpr struct
    method Init (line 12) | func (this *IOpCodeExpr) Init(token *lexer.Token) {
    method Token (line 23) | func (this *IOpCodeExpr) Token() *lexer.Token {

FILE: golang/uPIMulator/src/linker/parser/expr/jump_op_code_expr.go
  type JumpOpCodeExpr (line 8) | type JumpOpCodeExpr struct
    method Init (line 12) | func (this *JumpOpCodeExpr) Init(token *lexer.Token) {
    method Token (line 35) | func (this *JumpOpCodeExpr) Token() *lexer.Token {

FILE: golang/uPIMulator/src/linker/parser/expr/load_op_code_expr.go
  type LoadOpCodeExpr (line 8) | type LoadOpCodeExpr struct
    method Init (line 12) | func (this *LoadOpCodeExpr) Init(token *lexer.Token) {
    method Token (line 28) | func (this *LoadOpCodeExpr) Token() *lexer.Token {

FILE: golang/uPIMulator/src/linker/parser/expr/negative_number_expr.go
  type NegativeNumberExpr (line 8) | type NegativeNumberExpr struct
    method Init (line 12) | func (this *NegativeNumberExpr) Init(token *lexer.Token) {
    method Token (line 21) | func (this *NegativeNumberExpr) Token() *lexer.Token {

FILE: golang/uPIMulator/src/linker/parser/expr/primary_expr.go
  type PrimaryExpr (line 8) | type PrimaryExpr struct
    method Init (line 12) | func (this *PrimaryExpr) Init(token *lexer.Token) {
    method Token (line 24) | func (this *PrimaryExpr) Token() *lexer.Token {

FILE: golang/uPIMulator/src/linker/parser/expr/program_counter_expr.go
  type ProgramCounterExpr (line 7) | type ProgramCounterExpr struct
    method Init (line 11) | func (this *ProgramCounterExpr) Init(expr *Expr) {
    method Expr (line 23) | func (this *ProgramCounterExpr) Expr() *Expr {

FILE: golang/uPIMulator/src/linker/parser/expr/r_op_code_expr.go
  type ROpCodeExpr (line 8) | type ROpCodeExpr struct
    method Init (line 12) | func (this *ROpCodeExpr) Init(token *lexer.Token) {
    method Token (line 23) | func (this *ROpCodeExpr) Token() *lexer.Token {

FILE: golang/uPIMulator/src/linker/parser/expr/rici_op_code_expr.go
  type RiciOpCodeExpr (line 8) | type RiciOpCodeExpr struct
    method Init (line 12) | func (this *RiciOpCodeExpr) Init(token *lexer.Token) {
    method Token (line 26) | func (this *RiciOpCodeExpr) Token() *lexer.Token {

FILE: golang/uPIMulator/src/linker/parser/expr/rr_op_code_expr.go
  type RrOpCodeExpr (line 8) | type RrOpCodeExpr struct
    method Init (line 12) | func (this *RrOpCodeExpr) Init(token *lexer.Token) {
    method Token (line 35) | func (this *RrOpCodeExpr) Token() *lexer.Token {

FILE: golang/uPIMulator/src/linker/parser/expr/rri_op_code_expr.go
  type RriOpCodeExpr (line 8) | type RriOpCodeExpr struct
    method Init (line 12) | func (this *RriOpCodeExpr) Init(token *lexer.Token) {
    method Token (line 61) | func (this *RriOpCodeExpr) Token() *lexer.Token {

FILE: golang/uPIMulator/src/linker/parser/expr/rrri_op_code_expr.go
  type RrriOpCodeExpr (line 8) | type RrriOpCodeExpr struct
    method Init (line 12) | func (this *RrriOpCodeExpr) Init(token *lexer.Token) {
    method Token (line 26) | func (this *RrriOpCodeExpr) Token() *lexer.Token {

FILE: golang/uPIMulator/src/linker/parser/expr/section_name_expr.go
  type SectionNameExpr (line 8) | type SectionNameExpr struct
    method Init (line 12) | func (this *SectionNameExpr) Init(token *lexer.Token) {
    method Token (line 37) | func (this *SectionNameExpr) Token() *lexer.Token {

FILE: golang/uPIMulator/src/linker/parser/expr/section_type_expr.go
  type SectionTypeExpr (line 8) | type SectionTypeExpr struct
    method Init (line 12) | func (this *SectionTypeExpr) Init(token *lexer.Token) {
    method Token (line 23) | func (this *SectionTypeExpr) Token() *lexer.Token {

FILE: golang/uPIMulator/src/linker/parser/expr/src_reg_expr.go
  type SrcRegExpr (line 8) | type SrcRegExpr struct
    method Init (line 12) | func (this *SrcRegExpr) Init(token *lexer.Token) {
    method Token (line 31) | func (this *SrcRegExpr) Token() *lexer.Token {

FILE: golang/uPIMulator/src/linker/parser/expr/store_op_code_expr.go
  type StoreOpCodeExpr (line 8) | type StoreOpCodeExpr struct
    method Init (line 12) | func (this *StoreOpCodeExpr) Init(token *lexer.Token) {
    method Token (line 30) | func (this *StoreOpCodeExpr) Token() *lexer.Token {

FILE: golang/uPIMulator/src/linker/parser/expr/suffix_expr.go
  type SuffixExpr (line 8) | type SuffixExpr struct
    method Init (line 12) | func (this *SuffixExpr) Init(token *lexer.Token) {
    method Token (line 23) | func (this *SuffixExpr) Token() *lexer.Token {

FILE: golang/uPIMulator/src/linker/parser/expr/symbol_type.go
  type SymbolTypeExpr (line 8) | type SymbolTypeExpr struct
    method Init (line 12) | func (this *SymbolTypeExpr) Init(token *lexer.Token) {
    method Token (line 23) | func (this *SymbolTypeExpr) Token() *lexer.Token {

FILE: golang/uPIMulator/src/linker/parser/parser.go
  type Parser (line 10) | type Parser struct
    method Init (line 15) | func (this *Parser) Init() {
    method Parse (line 156) | func (this *Parser) Parse(token_stream *lexer.TokenStream) *Ast {
    method ReduceExpr (line 185) | func (this *Parser) ReduceExpr(token *lexer.Token) {
    method ReduceStmt (line 200) | func (this *Parser) ReduceStmt(token *lexer.Token) {
    method RegisterAccessExpr (line 215) | func (this *Parser) RegisterAccessExpr() {
    method RegisterConcatExpr (line 254) | func (this *Parser) RegisterConcatExpr() {
    method RegisterBinaryAddExpr (line 297) | func (this *Parser) RegisterBinaryAddExpr() {
    method RegisterBinarySubExpr (line 336) | func (this *Parser) RegisterBinarySubExpr() {
    method RegisterCiOpCodeExpr (line 375) | func (this *Parser) RegisterCiOpCodeExpr() {
    method RegisterDdciOpCodeExpr (line 414) | func (this *Parser) RegisterDdciOpCodeExpr() {
    method RegisterDmaRriOpCodeExpr (line 453) | func (this *Parser) RegisterDmaRriOpCodeExpr() {
    method RegisterDrdiciOpCodeExpr (line 492) | func (this *Parser) RegisterDrdiciOpCodeExpr() {
    method RegisterConditionExpr (line 531) | func (this *Parser) RegisterConditionExpr() {
    method RegisterEndianExpr (line 620) | func (this *Parser) RegisterEndianExpr() {
    method RegisterIOpCodeExpr (line 659) | func (this *Parser) RegisterIOpCodeExpr() {
    method RegisterJumpOpCodeExpr (line 698) | func (this *Parser) RegisterJumpOpCodeExpr() {
    method RegisterLoadOpCodeExpr (line 749) | func (this *Parser) RegisterLoadOpCodeExpr() {
    method RegisterNegativeNumberExpr (line 793) | func (this *Parser) RegisterNegativeNumberExpr() {
    method RegisterPrimaryExpr (line 829) | func (this *Parser) RegisterPrimaryExpr() {
    method RegisterProgramCounterExpr (line 868) | func (this *Parser) RegisterProgramCounterExpr() {
    method RegisterROpCodeExpr (line 910) | func (this *Parser) RegisterROpCodeExpr() {
    method RegisterRiciOpCodeExpr (line 949) | func (this *Parser) RegisterRiciOpCodeExpr() {
    method RegisterRrOpCodeExpr (line 991) | func (this *Parser) RegisterRrOpCodeExpr() {
    method RegisterRriOpCodeExpr (line 1042) | func (this *Parser) RegisterRriOpCodeExpr() {
    method RegisterRrriOpCodeExpr (line 1119) | func (this *Parser) RegisterRrriOpCodeExpr() {
    method RegisterStoreOpCodeExpr (line 1161) | func (this *Parser) RegisterStoreOpCodeExpr() {
    method RegisterSuffixExpr (line 1207) | func (this *Parser) RegisterSuffixExpr() {
    method RegisterSectionNameExpr (line 1246) | func (this *Parser) RegisterSectionNameExpr() {
    method RegisterSectionTypeExpr (line 1299) | func (this *Parser) RegisterSectionTypeExpr() {
    method RegisterSrcRegExpr (line 1338) | func (this *Parser) RegisterSrcRegExpr() {
    method RegisterSymbolTypeExpr (line 1385) | func (this *Parser) RegisterSymbolTypeExpr() {
    method RegisterAddrsigStmt (line 1424) | func (this *Parser) RegisterAddrsigStmt() {
    method RegisterAddrsigSymStmt (line 1456) | func (this *Parser) RegisterAddrsigSymStmt() {
    method RegisterAsciiStmt (line 1492) | func (this *Parser) RegisterAsciiStmt() {
    method RegisterAscizStmt (line 1528) | func (this *Parser) RegisterAscizStmt() {
    method RegisterByteStmt (line 1564) | func (this *Parser) RegisterByteStmt() {
    method RegisterCfiDefCfaOffsetStmt (line 1600) | func (this *Parser) RegisterCfiDefCfaOffsetStmt() {
    method RegisterCfiEndprocStmt (line 1636) | func (this *Parser) RegisterCfiEndprocStmt() {
    method RegisterCfiOffsetStmt (line 1668) | func (this *Parser) RegisterCfiOffsetStmt() {
    method RegisterCfiSectionsStmt (line 1709) | func (this *Parser) RegisterCfiSectionsStmt() {
    method RegisterCfiStartprocStmt (line 1745) | func (this *Parser) RegisterCfiStartprocStmt() {
    method RegisterFileNumberStmt (line 1777) | func (this *Parser) RegisterFileNumberStmt() {
    method RegisterFileStringStmt (line 1819) | func (this *Parser) RegisterFileStringStmt() {
    method RegisterGlobalStmt (line 1855) | func (this *Parser) RegisterGlobalStmt() {
    method RegisterLocIsStmtStmt (line 1891) | func (this *Parser) RegisterLocIsStmtStmt() {
    method RegisterLocNumberStmt (line 1938) | func (this *Parser) RegisterLocNumberStmt() {
    method RegisterLocPrologueEndStmt (line 1980) | func (this *Parser) RegisterLocPrologueEndStmt() {
    method RegisterLongProgramCounterStmt (line 2024) | func (this *Parser) RegisterLongProgramCounterStmt() {
    method RegisterLongSectionNameStmt (line 2060) | func (this *Parser) RegisterLongSectionNameStmt() {
    method RegisterP2AlignStmt (line 2096) | func (this *Parser) RegisterP2AlignStmt() {
    method RegisterQuadStmt (line 2132) | func (this *Parser) RegisterQuadStmt() {
    method RegisterSectionIdentifierNumberStmt (line 2168) | func (this *Parser) RegisterSectionIdentifierNumberStmt() {
    method RegisterSectionIdentifierStmt (line 2228) | func (this *Parser) RegisterSectionIdentifierStmt() {
    method RegisterSectionStackSizesStmt (line 2277) | func (this *Parser) RegisterSectionStackSizesStmt() {
    method RegisterSectionStringNumberStmt (line 2330) | func (this *Parser) RegisterSectionStringNumberStmt() {
    method RegisterSectionStringStmt (line 2381) | func (this *Parser) RegisterSectionStringStmt() {
    method RegisterSetStmt (line 2427) | func (this *Parser) RegisterSetStmt() {
    method RegisterShortStmt (line 2468) | func (this *Parser) RegisterShortStmt() {
    method RegisterSizeStmt (line 2504) | func (this *Parser) RegisterSizeStmt() {
    method RegisterTextStmt (line 2545) | func (this *Parser) RegisterTextStmt() {
    method RegisterTypeStmt (line 2582) | func (this *Parser) RegisterTypeStmt() {
    method RegisterWeakStmt (line 2623) | func (this *Parser) RegisterWeakStmt() {
    method RegisterZeroDoubleNumberStmt (line 2659) | func (this *Parser) RegisterZeroDoubleNumberStmt() {
    method RegisterZeroSingleNumberStmt (line 2700) | func (this *Parser) RegisterZeroSingleNumberStmt() {
    method RegisterCiStmt (line 2736) | func (this *Parser) RegisterCiStmt() {
    method RegisterDdciStmt (line 2778) | func (this *Parser) RegisterDdciStmt() {
    method RegisterDmaRriStmt (line 2830) | func (this *Parser) RegisterDmaRriStmt() {
    method RegisterDrdiciStmt (line 2877) | func (this *Parser) RegisterDrdiciStmt() {
    method RegisterEdriStmt (line 2939) | func (this *Parser) RegisterEdriStmt() {
    method RegisterEridStmt (line 2991) | func (this *Parser) RegisterEridStmt() {
    method RegisterEriiStmt (line 3043) | func (this *Parser) RegisterEriiStmt() {
    method RegisterErirStmt (line 3095) | func (this *Parser) RegisterErirStmt() {
    method RegisterErriStmt (line 3147) | func (this *Parser) RegisterErriStmt() {
    method RegisterIStmt (line 3199) | func (this *Parser) RegisterIStmt() {
    method RegisterRciStmt (line 3236) | func (this *Parser) RegisterRciStmt() {
    method RegisterRiciStmt (line 3283) | func (this *Parser) RegisterRiciStmt() {
    method RegisterRirciStmt (line 3335) | func (this *Parser) RegisterRirciStmt() {
    method RegisterRircStmt (line 3392) | func (this *Parser) RegisterRircStmt() {
    method RegisterRirStmt (line 3444) | func (this *Parser) RegisterRirStmt() {
    method RegisterRrciStmt (line 3491) | func (this *Parser) RegisterRrciStmt() {
    method RegisterRrcStmt (line 3543) | func (this *Parser) RegisterRrcStmt() {
    method RegisterRriciStmt (line 3590) | func (this *Parser) RegisterRriciStmt() {
    method RegisterRricStmt (line 3647) | func (this *Parser) RegisterRricStmt() {
    method RegisterRriStmt (line 3699) | func (this *Parser) RegisterRriStmt() {
    method RegisterRrrciStmt (line 3746) | func (this *Parser) RegisterRrrciStmt() {
    method RegisterRrrcStmt (line 3803) | func (this *Parser) RegisterRrrcStmt() {
    method RegisterRrriciStmt (line 3855) | func (this *Parser) RegisterRrriciStmt() {
    method RegisterRrriStmt (line 3917) | func (this *Parser) RegisterRrriStmt() {
    method RegisterRrrStmt (line 3969) | func (this *Parser) RegisterRrrStmt() {
    method RegisterRrStmt (line 4016) | func (this *Parser) RegisterRrStmt() {
    method RegisterRStmt (line 4058) | func (this *Parser) RegisterRStmt() {
    method RegisterSErriStmt (line 4095) | func (this *Parser) RegisterSErriStmt() {
    method RegisterSRciStmt (line 4150) | func (this *Parser) RegisterSRciStmt() {
    method RegisterSRirciStmt (line 4200) | func (this *Parser) RegisterSRirciStmt() {
    method RegisterSRircStmt (line 4260) | func (this *Parser) RegisterSRircStmt() {
    method RegisterSRrciStmt (line 4315) | func (this *Parser) RegisterSRrciStmt() {
    method RegisterSRrcStmt (line 4370) | func (this *Parser) RegisterSRrcStmt() {
    method RegisterSRriciStmt (line 4420) | func (this *Parser) RegisterSRriciStmt() {
    method RegisterSRricStmt (line 4480) | func (this *Parser) RegisterSRricStmt() {
    method RegisterSRriStmt (line 4535) | func (this *Parser) RegisterSRriStmt() {
    method RegisterSRrrciStmt (line 4585) | func (this *Parser) RegisterSRrrciStmt() {
    method RegisterSRrrcStmt (line 4645) | func (this *Parser) RegisterSRrrcStmt() {
    method RegisterSRrriciStmt (line 4700) | func (this *Parser) RegisterSRrriciStmt() {
    method RegisterSRrriStmt (line 4765) | func (this *Parser) RegisterSRrriStmt() {
    method RegisterSRrrStmt (line 4820) | func (this *Parser) RegisterSRrrStmt() {
    method RegisterSRrStmt (line 4870) | func (this *Parser) RegisterSRrStmt() {
    method RegisterSRStmt (line 4915) | func (this *Parser) RegisterSRStmt() {
    method RegisterNopStmt (line 4955) | func (this *Parser) RegisterNopStmt() {
    method RegisterBkpStmt (line 4994) | func (this *Parser) RegisterBkpStmt() {
    method RegisterBootRiStmt (line 5033) | func (this *Parser) RegisterBootRiStmt() {
    method RegisterCallRiStmt (line 5081) | func (this *Parser) RegisterCallRiStmt() {
    method RegisterCallRrStmt (line 5129) | func (this *Parser) RegisterCallRrStmt() {
    method RegisterDivStepDrdiStmt (line 5177) | func (this *Parser) RegisterDivStepDrdiStmt() {
    method RegisterJeqRiiStmt (line 5229) | func (this *Parser) RegisterJeqRiiStmt() {
    method RegisterJeqRriStmt (line 5276) | func (this *Parser) RegisterJeqRriStmt() {
    method RegisterJnzRiStmt (line 5323) | func (this *Parser) RegisterJnzRiStmt() {
    method RegisterJumpIStmt (line 5365) | func (this *Parser) RegisterJumpIStmt() {
    method RegisterJumpRStmt (line 5408) | func (this *Parser) RegisterJumpRStmt() {
    method RegisterLbsRriStmt (line 5451) | func (this *Parser) RegisterLbsRriStmt() {
    method RegisterLbsSRriStmt (line 5498) | func (this *Parser) RegisterLbsSRriStmt() {
    method RegisterLdDriStmt (line 5548) | func (this *Parser) RegisterLdDriStmt() {
    method RegisterMovdDdStmt (line 5595) | func (this *Parser) RegisterMovdDdStmt() {
    method RegisterMoveRiciStmt (line 5637) | func (this *Parser) RegisterMoveRiciStmt() {
    method RegisterMoveRiStmt (line 5695) | func (this *Parser) RegisterMoveRiStmt() {
    method RegisterMoveSRiciStmt (line 5743) | func (this *Parser) RegisterMoveSRiciStmt() {
    method RegisterMoveSRiStmt (line 5804) | func (this *Parser) RegisterMoveSRiStmt() {
    method RegisterSbIdRiiStmt (line 5855) | func (this *Parser) RegisterSbIdRiiStmt() {
    method RegisterSbIdRiStmt (line 5902) | func (this *Parser) RegisterSbIdRiStmt() {
    method RegisterSbRirStmt (line 5944) | func (this *Parser) RegisterSbRirStmt() {
    method RegisterSdRidStmt (line 5991) | func (this *Parser) RegisterSdRidStmt() {
    method RegisterStopStmt (line 6038) | func (this *Parser) RegisterStopStmt() {
    method RegisterTimeCfgRStmt (line 6078) | func (this *Parser) RegisterTimeCfgRStmt() {
    method RegisterLabelStmt (line 6121) | func (this *Parser) RegisterLabelStmt() {

FILE: golang/uPIMulator/src/linker/parser/rule.go
  type Reducible (line 8) | type Reducible
  type Reduce (line 9) | type Reduce
  type Rule (line 11) | type Rule struct
    method Init (line 18) | func (this *Rule) Init(precedence map[lexer.TokenType]bool, reducible ...
    method IsReducible (line 24) | func (this *Rule) IsReducible(stack_items []*StackItem, token *lexer.T...
    method Reduce (line 32) | func (this *Rule) Reduce(stack_items []*StackItem, token *lexer.Token)...

FILE: golang/uPIMulator/src/linker/parser/stack.go
  type Stack (line 9) | type Stack struct
    method Init (line 13) | func (this *Stack) Init() {
    method Push (line 17) | func (this *Stack) Push(stack_item *StackItem) {
    method Pop (line 21) | func (this *Stack) Pop(num int) {
    method Front (line 25) | func (this *Stack) Front(num int) []*StackItem {
    method NonStmtSize (line 34) | func (this *Stack) NonStmtSize() int {
    method AreStmts (line 48) | func (this *Stack) AreStmts() bool {
    method CanAccept (line 59) | func (this *Stack) CanAccept() bool {
    method Accept (line 78) | func (this *Stack) Accept() *Ast {

FILE: golang/uPIMulator/src/linker/parser/stack_item.go
  type StackItemType (line 9) | type StackItemType
  constant TOKEN (line 12) | TOKEN StackItemType = iota
  constant EXPR (line 13) | EXPR
  constant STMT (line 14) | STMT
  type StackItem (line 17) | type StackItem struct
    method InitToken (line 25) | func (this *StackItem) InitToken(token *lexer.Token) {
    method InitExpr (line 31) | func (this *StackItem) InitExpr(expr *expr.Expr) {
    method InitStmt (line 37) | func (this *StackItem) InitStmt(stmt *stmt.Stmt) {
    method StackItemType (line 43) | func (this *StackItem) StackItemType() StackItemType {
    method Token (line 47) | func (this *StackItem) Token() *lexer.Token {
    method Expr (line 51) | func (this *StackItem) Expr() *expr.Expr {
    method Stmt (line 55) | func (this *StackItem) Stmt() *stmt.Stmt {

FILE: golang/uPIMulator/src/linker/parser/stmt/directive/addrsig_stmt.go
  type AddrsigStmt (line 3) | type AddrsigStmt struct
    method Init (line 6) | func (this *AddrsigStmt) Init() {

FILE: golang/uPIMulator/src/linker/parser/stmt/directive/addrsig_sym_stmt.go
  type AddrsigSymStmt (line 8) | type AddrsigSymStmt struct
    method Init (line 12) | func (this *AddrsigSymStmt) Init(expr_ *expr.Expr) {
    method Expr (line 21) | func (this *AddrsigSymStmt) Expr() *expr.Expr {

FILE: golang/uPIMulator/src/linker/parser/stmt/directive/ascii_stmt.go
  type AsciiStmt (line 8) | type AsciiStmt struct
    method Init (line 12) | func (this *AsciiStmt) Init(token *lexer.Token) {
    method Token (line 21) | func (this *AsciiStmt) Token() *lexer.Token {

FILE: golang/uPIMulator/src/linker/parser/stmt/directive/asciz_stmt.go
  type AscizStmt (line 8) | type AscizStmt struct
    method Init (line 12) | func (this *AscizStmt) Init(token *lexer.Token) {
    method Token (line 21) | func (this *AscizStmt) Token() *lexer.Token {

FILE: golang/uPIMulator/src/linker/parser/stmt/directive/byte_stmt.go
  type ByteStmt (line 8) | type ByteStmt struct
    method Init (line 12) | func (this *ByteStmt) Init(expr_ *expr.Expr) {
    method Expr (line 21) | func (this *ByteStmt) Expr() *expr.Expr {

FILE: golang/uPIMulator/src/linker/parser/stmt/directive/cfi_def_cfa_offset_stmt.go
  type CfiDefCfaOffsetStmt (line 8) | type CfiDefCfaOffsetStmt struct
    method Init (line 12) | func (this *CfiDefCfaOffsetStmt) Init(expr_ *expr.Expr) {
    method Expr (line 21) | func (this *CfiDefCfaOffsetStmt) Expr() *expr.Expr {

FILE: golang/uPIMulator/src/linker/parser/stmt/directive/cfi_endproc.go
  type CfiEndprocStmt (line 3) | type CfiEndprocStmt struct
    method Init (line 6) | func (this *CfiEndprocStmt) Init() {

FILE: golang/uPIMulator/src/linker/parser/stmt/directive/cfi_offset_stmt.go
  type CfiOffsetStmt (line 8) | type CfiOffsetStmt struct
    method Init (line 13) | func (this *CfiOffsetStmt) Init(expr1 *expr.Expr, expr2 *expr.Expr) {
    method Expr1 (line 28) | func (this *CfiOffsetStmt) Expr1() *expr.Expr {
    method Expr2 (line 32) | func (this *CfiOffsetStmt) Expr2() *expr.Expr {

FILE: golang/uPIMulator/src/linker/parser/stmt/directive/cfi_sections_stmt.go
  type CfiSectionsStmt (line 8) | type CfiSectionsStmt struct
    method Init (line 12) | func (this *CfiSectionsStmt) Init(expr_ *expr.Expr) {
    method Expr (line 21) | func (this *CfiSectionsStmt) Expr() *expr.Expr {

FILE: golang/uPIMulator/src/linker/parser/stmt/directive/cfi_startproc_stmt.go
  type CfiStartprocStmt (line 3) | type CfiStartprocStmt struct
    method Init (line 6) | func (this *CfiStartprocStmt) Init() {

FILE: golang/uPIMulator/src/linker/parser/stmt/directive/file_number_stmt.go
  type FileNumberStmt (line 9) | type FileNumberStmt struct
    method Init (line 15) | func (this *FileNumberStmt) Init(expr_ *expr.Expr, token1 *lexer.Token...
    method Expr (line 36) | func (this *FileNumberStmt) Expr() *expr.Expr {
    method Token1 (line 40) | func (this *FileNumberStmt) Token1() *lexer.Token {
    method Token2 (line 44) | func (this *FileNumberStmt) Token2() *lexer.Token {

FILE: golang/uPIMulator/src/linker/parser/stmt/directive/file_string_stmt.go
  type FileStringStmt (line 8) | type FileStringStmt struct
    method Init (line 12) | func (this *FileStringStmt) Init(token *lexer.Token) {
    method Token (line 21) | func (this *FileStringStmt) Token() *lexer.Token {

FILE: golang/uPIMulator/src/linker/parser/stmt/directive/global_stmt.go
  type GlobalStmt (line 8) | type GlobalStmt struct
    method Init (line 12) | func (this *GlobalStmt) Init(expr_ *expr.Expr) {
    method Expr (line 21) | func (this *GlobalStmt) Expr() *expr.Expr {

FILE: golang/uPIMulator/src/linker/parser/stmt/directive/loc_is_stmt_stmt.go
  type LocIsStmtStmt (line 8) | type LocIsStmtStmt struct
    method Init (line 15) | func (this *LocIsStmtStmt) Init(
    method Expr1 (line 47) | func (this *LocIsStmtStmt) Expr1() *expr.Expr {
    method Expr2 (line 51) | func (this *LocIsStmtStmt) Expr2() *expr.Expr {
    method Expr3 (line 55) | func (this *LocIsStmtStmt) Expr3() *expr.Expr {
    method Expr4 (line 59) | func (this *LocIsStmtStmt) Expr4() *expr.Expr {

FILE: golang/uPIMulator/src/linker/parser/stmt/directive/loc_number_stmt.go
  type LocNumberStmt (line 8) | type LocNumberStmt struct
    method Init (line 14) | func (this *LocNumberStmt) Init(expr1 *expr.Expr, expr2 *expr.Expr, ex...
    method Expr1 (line 35) | func (this *LocNumberStmt) Expr1() *expr.Expr {
    method Expr2 (line 39) | func (this *LocNumberStmt) Expr2() *expr.Expr {
    method Expr3 (line 43) | func (this *LocNumberStmt) Expr3() *expr.Expr {

FILE: golang/uPIMulator/src/linker/parser/stmt/directive/loc_prologue_end_stmt.go
  type LocPrologueEndStmt (line 8) | type LocPrologueEndStmt struct
    method Init (line 14) | func (this *LocPrologueEndStmt) Init(expr1 *expr.Expr, expr2 *expr.Exp...
    method Expr1 (line 35) | func (this *LocPrologueEndStmt) Expr1() *expr.Expr {
    method Expr2 (line 39) | func (this *LocPrologueEndStmt) Expr2() *expr.Expr {
    method Expr3 (line 43) | func (this *LocPrologueEndStmt) Expr3() *expr.Expr {

FILE: golang/uPIMulator/src/linker/parser/stmt/directive/long_program_counter.go
  type LongProgramCounterStmt (line 8) | type LongProgramCounterStmt struct
    method Init (line 12) | func (this *LongProgramCounterStmt) Init(expr_ *expr.Expr) {
    method Expr (line 21) | func (this *LongProgramCounterStmt) Expr() *expr.Expr {

FILE: golang/uPIMulator/src/linker/parser/stmt/directive/long_section_name_stmt.go
  type LongSectionNameStmt (line 8) | type LongSectionNameStmt struct
    method Init (line 12) | func (this *LongSectionNameStmt) Init(expr_ *expr.Expr) {
    method Expr (line 21) | func (this *LongSectionNameStmt) Expr() *expr.Expr {

FILE: golang/uPIMulator/src/linker/parser/stmt/directive/p2_align_stmt.go
  type P2AlignStmt (line 8) | type P2AlignStmt struct
    method Init (line 12) | func (this *P2AlignStmt) Init(expr_ *expr.Expr) {
    method Expr (line 21) | func (this *P2AlignStmt) Expr() *expr.Expr {

FILE: golang/uPIMulator/src/linker/parser/stmt/directive/quad_stmt.go
  type QuadStmt (line 8) | type QuadStmt struct
    method Init (line 12) | func (this *QuadStmt) Init(expr_ *expr.Expr) {
    method Expr (line 21) | func (this *QuadStmt) Expr() *expr.Expr {

FILE: golang/uPIMulator/src/linker/parser/stmt/directive/section_identifier_number_stmt.go
  type SectionIdentifierNumberStmt (line 9) | type SectionIdentifierNumberStmt struct
    method Init (line 17) | func (this *SectionIdentifierNumberStmt) Init(
    method Expr1 (line 56) | func (this *SectionIdentifierNumberStmt) Expr1() *expr.Expr {
    method Expr2 (line 60) | func (this *SectionIdentifierNumberStmt) Expr2() *expr.Expr {
    method Token (line 64) | func (this *SectionIdentifierNumberStmt) Token() *lexer.Token {
    method Expr3 (line 68) | func (this *SectionIdentifierNumberStmt) Expr3() *expr.Expr {
    method Expr4 (line 72) | func (this *SectionIdentifierNumberStmt) Expr4() *expr.Expr {

FILE: golang/uPIMulator/src/linker/parser/stmt/directive/section_identifier_stmt.go
  type SectionIdentifierStmt (line 9) | type SectionIdentifierStmt struct
    method Init (line 16) | func (this *SectionIdentifierStmt) Init(
    method Expr1 (line 48) | func (this *SectionIdentifierStmt) Expr1() *expr.Expr {
    method Expr2 (line 52) | func (this *SectionIdentifierStmt) Expr2() *expr.Expr {
    method Token (line 56) | func (this *SectionIdentifierStmt) Token() *lexer.Token {
    method Expr3 (line 60) | func (this *SectionIdentifierStmt) Expr3() *expr.Expr {

FILE: golang/uPIMulator/src/linker/parser/stmt/directive/section_stack_sizes_stmt.go
  type SectionStackSizesStmt (line 9) | type SectionStackSizesStmt struct
    method Init (line 16) | func (this *SectionStackSizesStmt) Init(
    method Token (line 48) | func (this *SectionStackSizesStmt) Token() *lexer.Token {
    method Expr1 (line 52) | func (this *SectionStackSizesStmt) Expr1() *expr.Expr {
    method Expr2 (line 56) | func (this *SectionStackSizesStmt) Expr2() *expr.Expr {
    method Expr3 (line 60) | func (this *SectionStackSizesStmt) Expr3() *expr.Expr {

FILE: golang/uPIMulator/src/linker/parser/stmt/directive/section_string_number_stmt.go
  type SectionStringNumberStmt (line 9) | type SectionStringNumberStmt struct
    method Init (line 16) | func (this *SectionStringNumberStmt) Init(
    method Expr1 (line 48) | func (this *SectionStringNumberStmt) Expr1() *expr.Expr {
    method Token (line 52) | func (this *SectionStringNumberStmt) Token() *lexer.Token {
    method Expr2 (line 56) | func (this *SectionStringNumberStmt) Expr2() *expr.Expr {
    method Expr3 (line 60) | func (this *SectionStringNumberStmt) Expr3() *expr.Expr {

FILE: golang/uPIMulator/src/linker/parser/stmt/directive/section_string_stmt.go
  type SectionStringStmt (line 9) | type SectionStringStmt struct
    method Init (line 15) | func (this *SectionStringStmt) Init(expr1 *expr.Expr, token *lexer.Tok...
    method Expr1 (line 36) | func (this *SectionStringStmt) Expr1() *expr.Expr {
    method Token (line 40) | func (this *SectionStringStmt) Token() *lexer.Token {
    method Expr2 (line 44) | func (this *SectionStringStmt) Expr2() *expr.Expr {

FILE: golang/uPIMulator/src/linker/parser/stmt/directive/set_stmt.go
  type SetStmt (line 8) | type SetStmt struct
    method Init (line 13) | func (this *SetStmt) Init(expr1 *expr.Expr, expr2 *expr.Expr) {
    method Expr1 (line 28) | func (this *SetStmt) Expr1() *expr.Expr {
    method Expr2 (line 32) | func (this *SetStmt) Expr2() *expr.Expr {

FILE: golang/uPIMulator/src/linker/parser/stmt/directive/short_stmt.go
  type ShortStmt (line 8) | type ShortStmt struct
    method Init (line 12) | func (this *ShortStmt) Init(expr_ *expr.Expr) {
    method Expr (line 21) | func (this *ShortStmt) Expr() *expr.Expr {

FILE: golang/uPIMulator/src/linker/parser/stmt/directive/size_stmt.go
  type SizeStmt (line 8) | type SizeStmt struct
    method Init (line 13) | func (this *SizeStmt) Init(expr1 *expr.Expr, expr2 *expr.Expr) {
    method Expr1 (line 28) | func (this *SizeStmt
Copy disabled (too large) Download .json
Condensed preview — 2582 files, each showing path, character count, and a content snippet. Download the .json file for the full structured content (11,065K chars).
[
  {
    "path": ".gitignore",
    "chars": 321,
    "preview": "*.S\n*.bin\n\nbin/\nbuild/\ncmake-build-debug/\nvalidation_log/\n__pycache__/\n.idea/\n.vscode/\n\n# Prerequisites\n*.d\n\n# Compiled "
  },
  {
    "path": "LICENSE",
    "chars": 1071,
    "preview": "Copyright (c) 2024, VIA Research Group at KAIST\n\nPermission is hereby granted, free of charge, to any person obtaining a"
  },
  {
    "path": "README.md",
    "chars": 6921,
    "preview": "# 📖 Introduction\n<img src=\"./assets/uPIMulator_logo.png\" width=\"1000\">\n\n**Welcome to the uPIMulator Framework Documentat"
  },
  {
    "path": "golang/README.md",
    "chars": 12599,
    "preview": "# ⚙️ Usage\n## Currently Supported Mode\nuPIMulator operates in an execution-driven simulation mode, enabling cycle-level "
  },
  {
    "path": "golang/uPIMulator/benchmark/BS/CMakeLists.txt",
    "chars": 46,
    "preview": "#add_subdirectory(host)\r\nadd_subdirectory(dpu)"
  },
  {
    "path": "golang/uPIMulator/benchmark/BS/Makefile",
    "chars": 1261,
    "preview": "DPU_DIR := dpu\nHOST_DIR := host\nBUILDDIR ?= bin\nNR_TASKLETS ?= 16\nNR_DPUS ?= 1\nPROBLEM_SIZE ?= 2\n\ndefine conf_filename\n\t"
  },
  {
    "path": "golang/uPIMulator/benchmark/BS/baselines/cpu/Makefile",
    "chars": 69,
    "preview": "all:\n\tgcc bs_omp.c -o bs_omp -fopenmp\nrun:\n\t./bs_omp 262144 16777216\n"
  },
  {
    "path": "golang/uPIMulator/benchmark/BS/baselines/cpu/README",
    "chars": 111,
    "preview": "Binary Search (BS)\n\nCompilation instructions:\n\n    make\n\nExecution instructions\n\n    ./bs_omp 2048576 16777216\n"
  },
  {
    "path": "golang/uPIMulator/benchmark/BS/baselines/cpu/bs_omp.c",
    "chars": 2232,
    "preview": "\n#include <stdio.h>\n#include <stdlib.h>\n#include <stdbool.h>\n#include <string.h>\n#include <unistd.h>\n#include <getopt.h>"
  },
  {
    "path": "golang/uPIMulator/benchmark/BS/baselines/cpu/timer.h",
    "chars": 2589,
    "preview": "/*\r\n * Copyright (c) 2016 University of Cordoba and University of Illinois\r\n * All rights reserved.\r\n *\r\n * Developed by"
  },
  {
    "path": "golang/uPIMulator/benchmark/BS/baselines/gpu/Makefile",
    "chars": 104,
    "preview": "all:\n\tnvcc -arch=sm_30 -m64 -Xcompiler -fPIC -shared -o cu_binary_search.so binary_search.cu -std=c++11\n"
  },
  {
    "path": "golang/uPIMulator/benchmark/BS/baselines/gpu/README",
    "chars": 100,
    "preview": "Binary Search (BS)\n\nCompilation instructions:\n\n    make\n\nExecution instructions\n\n    python3 run.py\n"
  },
  {
    "path": "golang/uPIMulator/benchmark/BS/baselines/gpu/binary_search.cu",
    "chars": 4258,
    "preview": "#include <cuda.h>\r\n#include <limits.h>\r\n#include \"binary_search.h\"\r\n\r\n#include <chrono>\r\n#include <iostream>\r\n\r\n#define "
  },
  {
    "path": "golang/uPIMulator/benchmark/BS/baselines/gpu/binary_search.h",
    "chars": 356,
    "preview": "#ifndef BINARY_SEARCH_H\r\n#define BINARY_SEARCH_H\r\n\r\n#ifdef _WIN32\r\n  #include <windows.h>\r\n  #define DLL_EXPORT __declsp"
  },
  {
    "path": "golang/uPIMulator/benchmark/BS/baselines/gpu/cpu_lib.py",
    "chars": 355,
    "preview": "# -*- coding: utf-8 -*-\n\n\ndef binary_search(arr, search):\n\n    L = 0\n    R = len(arr)\n\n    while L <= R:\n\n        if L >"
  },
  {
    "path": "golang/uPIMulator/benchmark/BS/baselines/gpu/cu_lib_import.py",
    "chars": 770,
    "preview": "# -*- coding: utf-8 -*-\r\n\r\n__all__ = [\r\n    \"binary_search\",\r\n]\r\n\r\n\r\nimport os.path as path\r\nimport platform\r\nfrom ctype"
  },
  {
    "path": "golang/uPIMulator/benchmark/BS/baselines/gpu/run.py",
    "chars": 514,
    "preview": "# -*- coding: utf-8 -*-\n\nimport time\n\nimport numpy as np\n\n# Local Imports\nfrom cu_lib_import import binary_search as gpu"
  },
  {
    "path": "golang/uPIMulator/benchmark/BS/dpu/CMakeLists.txt",
    "chars": 257,
    "preview": "set(CMAKE_C_COMPILER \"/root/upmem-2023.2.0-Linux-x86_64/bin/dpu-upmem-dpurte-clang\")\r\nset(CMAKE_C_FLAGS \"-w -I/root/uPIM"
  },
  {
    "path": "golang/uPIMulator/benchmark/BS/dpu/task.c",
    "chars": 4865,
    "preview": "/*\n* Binary Search with multiple tasklets\n*\n*/\n#include <stdint.h>\n#include <stdio.h>\n#include <defs.h>\n#include <mram.h"
  },
  {
    "path": "golang/uPIMulator/benchmark/BS/host/app.c",
    "chars": 5737,
    "preview": "/**\n* app.c\n* BS Host Application Source File\n*\n*/\n#include <stdio.h>\n#include <stdlib.h>\n#include <stdbool.h>\n#include "
  },
  {
    "path": "golang/uPIMulator/benchmark/BS/support/common.h",
    "chars": 971,
    "preview": "#ifndef _COMMON_H_\n#define _COMMON_H_\n\n#ifdef TL\n#define TASKLETS_INITIALIZER TASKLETS(TL, main, 2048, 2)\n#define NB_OF_"
  },
  {
    "path": "golang/uPIMulator/benchmark/BS/support/params.h",
    "chars": 1218,
    "preview": "#ifndef _PARAMS_H_\n#define _PARAMS_H_\n\n#include \"common.h\"\n\ntypedef struct Params {\n  long  num_querys;\n  unsigned   n_w"
  },
  {
    "path": "golang/uPIMulator/benchmark/BS/support/timer.h",
    "chars": 2589,
    "preview": "/*\r\n * Copyright (c) 2016 University of Cordoba and University of Illinois\r\n * All rights reserved.\r\n *\r\n * Developed by"
  },
  {
    "path": "golang/uPIMulator/benchmark/CMakeLists.txt",
    "chars": 357,
    "preview": "cmake_minimum_required(VERSION 3.16)\n\nproject(benchmark)\n\nadd_subdirectory(BS)\nadd_subdirectory(GEMV)\nadd_subdirectory(H"
  },
  {
    "path": "golang/uPIMulator/benchmark/GEMV/CMakeLists.txt",
    "chars": 46,
    "preview": "#add_subdirectory(host)\r\nadd_subdirectory(dpu)"
  },
  {
    "path": "golang/uPIMulator/benchmark/GEMV/Makefile",
    "chars": 1265,
    "preview": "DPU_DIR := dpu\nHOST_DIR := host\nBUILDDIR ?= bin\nNR_TASKLETS ?= 16 \nBL ?= 10\nNR_DPUS ?= 1 \n\ndefine conf_filename\n\t${BUILD"
  },
  {
    "path": "golang/uPIMulator/benchmark/GEMV/_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_10_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_11_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_12_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_13_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_14_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_15_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_16",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_16_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_17_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_18_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_19_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_1_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_20_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_21_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_22_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_23_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_24_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_2_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_3_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_4_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_5_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_6_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_7_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_8_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/GEMV/_NR_TASKLETS_9_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/GEMV/baselines/cpu/Makefile",
    "chars": 63,
    "preview": "all:\n\t\tgcc -o gemv -fopenmp gemv_openmp.c \n\nclean:\n\t\trm gemv\n\n\n"
  },
  {
    "path": "golang/uPIMulator/benchmark/GEMV/baselines/cpu/README",
    "chars": 109,
    "preview": "Matrix-Vector Multiplication (GEMV)\n\nCompilation instructions:\n\n    make\n\nExecution instructions\n\n    ./gemv\n"
  },
  {
    "path": "golang/uPIMulator/benchmark/GEMV/baselines/cpu/gemv_openmp.c",
    "chars": 1560,
    "preview": "#include <stdlib.h>\n#include <stdio.h>\n#include \"../../support/timer.h\"\n#include \"gemv_utils.h\"\n\nint main(int argc, char"
  },
  {
    "path": "golang/uPIMulator/benchmark/GEMV/baselines/cpu/gemv_utils.h",
    "chars": 720,
    "preview": "void allocate_dense(size_t rows,size_t  cols, double*** dense) {\n\n  *dense = malloc(sizeof(double)*rows);\n  **dense = ma"
  },
  {
    "path": "golang/uPIMulator/benchmark/GEMV/baselines/gpu/Makefile",
    "chars": 94,
    "preview": "all:\n\t/usr/local/cuda/bin/nvcc gemv.cu -I/usr/local/cuda/include -lm -o gemv\n\nclean:\n\trm gemv\n"
  },
  {
    "path": "golang/uPIMulator/benchmark/GEMV/baselines/gpu/README",
    "chars": 109,
    "preview": "Matrix-Vector Multiplication (GEMV)\n\nCompilation instructions:\n\n    make\n\nExecution instructions\n\n    ./gemv\n"
  },
  {
    "path": "golang/uPIMulator/benchmark/GEMV/baselines/gpu/gemv.cu",
    "chars": 2950,
    "preview": "#include <stdio.h>\n#include <stdlib.h>\n#include <sys/time.h>\n#include <cuda.h>\n\n#define THREAD 128\n\n#define T int\n\n__glo"
  },
  {
    "path": "golang/uPIMulator/benchmark/GEMV/dpu/CMakeLists.txt",
    "chars": 286,
    "preview": "SET(BL 10)\r\n\r\nset(CMAKE_C_COMPILER \"/root/upmem-2023.2.0-Linux-x86_64/bin/dpu-upmem-dpurte-clang\")\r\nset(CMAKE_C_FLAGS \"-"
  },
  {
    "path": "golang/uPIMulator/benchmark/GEMV/dpu/task.c",
    "chars": 4787,
    "preview": "/*\n * Matrix vector multiplication with multiple tasklet\n *\n */\n#include <stdint.h>\n#include <stdio.h>\n#include <defs.h>"
  },
  {
    "path": "golang/uPIMulator/benchmark/GEMV/host/app.c",
    "chars": 6818,
    "preview": "/**\n * app.c\n * GEMV Host Application Source File\n *\n */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <stdbool.h>\n#i"
  },
  {
    "path": "golang/uPIMulator/benchmark/GEMV/support/common.h",
    "chars": 855,
    "preview": "#ifndef _COMMON_H_\n#define _COMMON_H_\n\n// Structures used by both the host and the dpu to communicate information \ntyped"
  },
  {
    "path": "golang/uPIMulator/benchmark/GEMV/support/params.h",
    "chars": 1575,
    "preview": "#ifndef _PARAMS_H_\n#define _PARAMS_H_\n\n#include \"common.h\"\n\ntypedef struct Params {\n    unsigned int  m_size;\n    unsign"
  },
  {
    "path": "golang/uPIMulator/benchmark/GEMV/support/timer.h",
    "chars": 2790,
    "preview": "/*\r\n * Copyright (c) 2016 University of Cordoba and University of Illinois\r\n * All rights reserved.\r\n *\r\n * Developed by"
  },
  {
    "path": "golang/uPIMulator/benchmark/HST-L/CMakeLists.txt",
    "chars": 46,
    "preview": "#add_subdirectory(host)\r\nadd_subdirectory(dpu)"
  },
  {
    "path": "golang/uPIMulator/benchmark/HST-L/Makefile",
    "chars": 1339,
    "preview": "DPU_DIR := dpu\nHOST_DIR := host\nBUILDDIR ?= bin\nNR_TASKLETS ?= 16\nBL ?= 8\nNR_DPUS ?= 1\nNR_HISTO ?= 1\nENERGY ?= 0\n\ndefine"
  },
  {
    "path": "golang/uPIMulator/benchmark/HST-L/dpu/CMakeLists.txt",
    "chars": 328,
    "preview": "SET(BL 10)\r\nSET(NR_HISTO 1)\r\n\r\nset(CMAKE_C_COMPILER \"/root/upmem-2023.2.0-Linux-x86_64/bin/dpu-upmem-dpurte-clang\")\r\nset"
  },
  {
    "path": "golang/uPIMulator/benchmark/HST-L/dpu/task.c",
    "chars": 4531,
    "preview": "/*\n* Histogram (HST-L) with multiple tasklets\n*\n*/\n#include <stdint.h>\n#include <stdio.h>\n#include <defs.h>\n#include <mr"
  },
  {
    "path": "golang/uPIMulator/benchmark/HST-L/host/app.c",
    "chars": 9029,
    "preview": "/**\n* app.c\n* HST-L Host Application Source File\n*\n*/\n#include <stdio.h>\n#include <stdlib.h>\n#include <stdbool.h>\n#inclu"
  },
  {
    "path": "golang/uPIMulator/benchmark/HST-L/run.sh",
    "chars": 308,
    "preview": "#!/bin/bash\n\nfor i in 1 \ndo\n\tfor b in 64 128 256 512 1024 2048 4096\n\tdo\n    \tfor k in 1 2 4 8 16\n\t    do\n\t        NR_DPU"
  },
  {
    "path": "golang/uPIMulator/benchmark/HST-L/support/common.h",
    "chars": 1029,
    "preview": "#ifndef _COMMON_H_\n#define _COMMON_H_\n\n// Transfer size between MRAM and WRAM\n#ifdef BL\n#define BLOCK_SIZE_LOG2 BL\n#defi"
  },
  {
    "path": "golang/uPIMulator/benchmark/HST-L/support/params.h",
    "chars": 1947,
    "preview": "#ifndef _PARAMS_H_\n#define _PARAMS_H_\n\n#include \"common.h\"\n\ntypedef struct Params {\n    unsigned int   input_size;\n    u"
  },
  {
    "path": "golang/uPIMulator/benchmark/HST-L/support/timer.h",
    "chars": 2600,
    "preview": "/*\r\n * Copyright (c) 2016 University of Cordoba and University of Illinois\r\n * All rights reserved.\r\n *\r\n * Developed by"
  },
  {
    "path": "golang/uPIMulator/benchmark/HST-S/CMakeLists.txt",
    "chars": 46,
    "preview": "#add_subdirectory(host)\r\nadd_subdirectory(dpu)"
  },
  {
    "path": "golang/uPIMulator/benchmark/HST-S/Makefile",
    "chars": 1278,
    "preview": "DPU_DIR := dpu\nHOST_DIR := host\nBUILDDIR ?= bin\nNR_TASKLETS ?= 16\nBL ?= 10\nNR_DPUS ?= 1\nENERGY ?= 0\n\ndefine conf_filenam"
  },
  {
    "path": "golang/uPIMulator/benchmark/HST-S/baselines/cpu/Makefile",
    "chars": 61,
    "preview": "all:\n\tgcc -o hist -fopenmp app_baseline.c \n\nclean:\n\trm hist\n\n"
  },
  {
    "path": "golang/uPIMulator/benchmark/HST-S/baselines/cpu/README",
    "chars": 160,
    "preview": "Histogram - input partition (HST)\n\nCompilation instructions:\n\n    make\n\nExecution instructions\n\n    ./hist -y 1006632960"
  },
  {
    "path": "golang/uPIMulator/benchmark/HST-S/baselines/cpu/app_baseline.c",
    "chars": 5079,
    "preview": "/*\n* JGL@SAFARI\n*/\n\n/**\n* @file app.c\n* @brief Template for a Host Application Source File.\n*\n* The macros DPU_BINARY an"
  },
  {
    "path": "golang/uPIMulator/benchmark/HST-S/baselines/gpu/Makefile",
    "chars": 2269,
    "preview": "# \n#  Copyright (c) 2016 University of Cordoba and University of Illinois\n#  All rights reserved.\n# \n#  Developed by:   "
  },
  {
    "path": "golang/uPIMulator/benchmark/HST-S/baselines/gpu/README",
    "chars": 732,
    "preview": "Histogram - input partition (HST)\n\nCompilation instructions:\n\n    make\n\nExecution instructions\n\n    ./hsti -n 1006632960"
  },
  {
    "path": "golang/uPIMulator/benchmark/HST-S/baselines/gpu/kernel.cpp",
    "chars": 3409,
    "preview": "/*\n * Copyright (c) 2016 University of Cordoba and University of Illinois\n * All rights reserved.\n *\n * Developed by:   "
  },
  {
    "path": "golang/uPIMulator/benchmark/HST-S/baselines/gpu/kernel.cu",
    "chars": 3972,
    "preview": "/*\n * Copyright (c) 2016 University of Cordoba and University of Illinois\n * All rights reserved.\n *\n * Developed by:   "
  },
  {
    "path": "golang/uPIMulator/benchmark/HST-S/baselines/gpu/kernel.h",
    "chars": 2369,
    "preview": "/*\n * Copyright (c) 2016 University of Cordoba and University of Illinois\n * All rights reserved.\n *\n * Developed by:   "
  },
  {
    "path": "golang/uPIMulator/benchmark/HST-S/baselines/gpu/main.cpp",
    "chars": 11520,
    "preview": "/*\n * Copyright (c) 2016 University of Cordoba and University of Illinois\n * All rights reserved.\n *\n * Developed by:   "
  },
  {
    "path": "golang/uPIMulator/benchmark/HST-S/baselines/gpu/support/common.h",
    "chars": 2091,
    "preview": "/*\n * Copyright (c) 2016 University of Cordoba and University of Illinois\n * All rights reserved.\n *\n * Developed by:   "
  },
  {
    "path": "golang/uPIMulator/benchmark/HST-S/baselines/gpu/support/cuda-setup.h",
    "chars": 3789,
    "preview": "/*\n * Copyright (c) 2016 University of Cordoba and University of Illinois\n * All rights reserved.\n *\n * Developed by:   "
  },
  {
    "path": "golang/uPIMulator/benchmark/HST-S/baselines/gpu/support/partitioner.h",
    "chars": 5596,
    "preview": "/*\r\n * Copyright (c) 2016 University of Cordoba and University of Illinois\r\n * All rights reserved.\r\n *\r\n * Developed by"
  },
  {
    "path": "golang/uPIMulator/benchmark/HST-S/baselines/gpu/support/timer.h",
    "chars": 2967,
    "preview": "/*\r\n * Copyright (c) 2016 University of Cordoba and University of Illinois\r\n * All rights reserved.\r\n *\r\n * Developed by"
  },
  {
    "path": "golang/uPIMulator/benchmark/HST-S/baselines/gpu/support/verify.h",
    "chars": 2819,
    "preview": "/*\n * Copyright (c) 2016 University of Cordoba and University of Illinois\n * All rights reserved.\n *\n * Developed by:   "
  },
  {
    "path": "golang/uPIMulator/benchmark/HST-S/dpu/CMakeLists.txt",
    "chars": 288,
    "preview": "SET(BL 10)\r\n\r\nset(CMAKE_C_COMPILER \"/root/upmem-2023.2.0-Linux-x86_64/bin/dpu-upmem-dpurte-clang\")\r\nset(CMAKE_C_FLAGS \"-"
  },
  {
    "path": "golang/uPIMulator/benchmark/HST-S/dpu/task.c",
    "chars": 3426,
    "preview": "/*\n* Histogram (HST-S) with multiple tasklets\n*\n*/\n#include <stdint.h>\n#include <stdio.h>\n#include <defs.h>\n#include <mr"
  },
  {
    "path": "golang/uPIMulator/benchmark/HST-S/host/app.c",
    "chars": 9029,
    "preview": "/**\n* app.c\n* HST-S Host Application Source File\n*\n*/\n#include <stdio.h>\n#include <stdlib.h>\n#include <stdbool.h>\n#inclu"
  },
  {
    "path": "golang/uPIMulator/benchmark/HST-S/run.sh",
    "chars": 340,
    "preview": "#!/bin/bash\n\nfor i in 1 \ndo\n\tfor b in 64 128 256 512 1024 2048 4096\n\tdo\n    \tfor k in 1 2 4 8 16\n\t    do\n            NR_"
  },
  {
    "path": "golang/uPIMulator/benchmark/HST-S/support/common.h",
    "chars": 1029,
    "preview": "#ifndef _COMMON_H_\n#define _COMMON_H_\n\n// Transfer size between MRAM and WRAM\n#ifdef BL\n#define BLOCK_SIZE_LOG2 BL\n#defi"
  },
  {
    "path": "golang/uPIMulator/benchmark/HST-S/support/params.h",
    "chars": 1947,
    "preview": "#ifndef _PARAMS_H_\n#define _PARAMS_H_\n\n#include \"common.h\"\n\ntypedef struct Params {\n    unsigned int   input_size;\n    u"
  },
  {
    "path": "golang/uPIMulator/benchmark/HST-S/support/timer.h",
    "chars": 2600,
    "preview": "/*\r\n * Copyright (c) 2016 University of Cordoba and University of Illinois\r\n * All rights reserved.\r\n *\r\n * Developed by"
  },
  {
    "path": "golang/uPIMulator/benchmark/MLP/CMakeLists.txt",
    "chars": 46,
    "preview": "#add_subdirectory(host)\r\nadd_subdirectory(dpu)"
  },
  {
    "path": "golang/uPIMulator/benchmark/MLP/Makefile",
    "chars": 1265,
    "preview": "DPU_DIR := dpu\nHOST_DIR := host\nBUILDDIR ?= bin\nNR_TASKLETS ?= 16 \nBL ?= 10\nNR_DPUS ?= 1 \n\ndefine conf_filename\n\t${BUILD"
  },
  {
    "path": "golang/uPIMulator/benchmark/MLP/_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_10_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_11_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_12_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_13_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_14_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_15_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_16",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_16_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_17_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_18_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_19_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_1_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_20_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_21_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_22_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_23_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_24_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_2_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_3_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_4_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_5_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_6_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_7_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_8_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/MLP/_NR_TASKLETS_9_BL_10.conf",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "golang/uPIMulator/benchmark/MLP/baselines/cpu/Makefile",
    "chars": 74,
    "preview": "all:\n\tgcc mlp_openmp.c -o mlp_openmp -fopenmp -std=c99\nrun:\n\t./mlp_openmp\n"
  },
  {
    "path": "golang/uPIMulator/benchmark/MLP/baselines/cpu/README",
    "chars": 106,
    "preview": "Multilayer Perceptron (MLP)\n\nCompilation instructions\n\n    make\n\nExecution instructions\n\n    ./mlp_openmp\n"
  },
  {
    "path": "golang/uPIMulator/benchmark/MLP/baselines/cpu/mlp_openmp.c",
    "chars": 3656,
    "preview": "/**\n* @file app.c\n* @brief Template for a Host Application Source File.\n*\n*/\n#include <stdio.h>\n#include <stdlib.h>\n#inc"
  },
  {
    "path": "golang/uPIMulator/benchmark/MLP/baselines/gpu/Makefile",
    "chars": 91,
    "preview": "all:\n\t/usr/local/cuda/bin/nvcc mlp.cu -I/usr/local/cuda/include -lm -o mlp\n\nclean:\n\trm mlp\n"
  },
  {
    "path": "golang/uPIMulator/benchmark/MLP/baselines/gpu/README",
    "chars": 99,
    "preview": "Multilayer Perceptron (MLP)\n\nCompilation instructions\n\n    make\n\nExecution instructions\n\n    ./mlp\n"
  },
  {
    "path": "golang/uPIMulator/benchmark/MLP/baselines/gpu/mlp.cu",
    "chars": 4138,
    "preview": "#include <stdio.h>\n#include <stdlib.h>\n#include <sys/time.h>\n#include <cuda.h>\n#include \"../../support/common.h\"\n\n#defin"
  },
  {
    "path": "golang/uPIMulator/benchmark/MLP/dpu/CMakeLists.txt",
    "chars": 284,
    "preview": "SET(BL 10)\r\n\r\nset(CMAKE_C_COMPILER \"/root/upmem-2023.2.0-Linux-x86_64/bin/dpu-upmem-dpurte-clang\")\r\nset(CMAKE_C_FLAGS \"-"
  },
  {
    "path": "golang/uPIMulator/benchmark/MLP/dpu/task.c",
    "chars": 4787,
    "preview": "/*\n * Matrix vector multiplication with multiple tasklet\n *\n */\n#include <stdint.h>\n#include <stdio.h>\n#include <defs.h>"
  },
  {
    "path": "golang/uPIMulator/benchmark/MLP/host/app.c",
    "chars": 9004,
    "preview": "/**\n * app.c\n * MLP Host Application Source File\n *\n */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <stdbool.h>\n#in"
  },
  {
    "path": "golang/uPIMulator/benchmark/MLP/support/common.h",
    "chars": 939,
    "preview": "#ifndef _COMMON_H_\n#define _COMMON_H_\n\n// Structures used by both the host and the dpu to communicate information \ntyped"
  },
  {
    "path": "golang/uPIMulator/benchmark/MLP/support/params.h",
    "chars": 1577,
    "preview": "#ifndef _PARAMS_H_\n#define _PARAMS_H_\n\n#include \"common.h\"\n\ntypedef struct Params {\n    unsigned int  m_size;\n    unsign"
  },
  {
    "path": "golang/uPIMulator/benchmark/MLP/support/timer.h",
    "chars": 2790,
    "preview": "/*\r\n * Copyright (c) 2016 University of Cordoba and University of Illinois\r\n * All rights reserved.\r\n *\r\n * Developed by"
  },
  {
    "path": "golang/uPIMulator/benchmark/RED/CMakeLists.txt",
    "chars": 46,
    "preview": "#add_subdirectory(host)\r\nadd_subdirectory(dpu)"
  },
  {
    "path": "golang/uPIMulator/benchmark/RED/Makefile",
    "chars": 1515,
    "preview": "DPU_DIR := dpu\nHOST_DIR := host\nBUILDDIR ?= bin\nNR_TASKLETS ?= 16\nBL ?= 10\nNR_DPUS ?= 1\nVERSION ?= SINGLE\nSYNC ?= HAND\nT"
  },
  {
    "path": "golang/uPIMulator/benchmark/RED/baselines/cpu/Makefile",
    "chars": 206,
    "preview": "all:\n\tg++ -O2 app_baseline.cpp -fopenmp -DTHRUST_HOST_SYSTEM=THRUST_HOST_SYSTEM_CPP -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE"
  },
  {
    "path": "golang/uPIMulator/benchmark/RED/baselines/cpu/README",
    "chars": 118,
    "preview": "Reduction (RED)\n\nCompilation instructions\n\n    TYPE=UINT64 make\n\nExecution instructions\n\n    ./red -i 1048576000 -t 4\n"
  },
  {
    "path": "golang/uPIMulator/benchmark/RED/baselines/cpu/app_baseline.cpp",
    "chars": 5130,
    "preview": "/*\n* JGL@SAFARI\n*/\n\n/**\n* CPU code with Thrust\n*/\n#include <stdio.h>\n#include <stdlib.h>\n#include <stdbool.h>\n#include <"
  },
  {
    "path": "golang/uPIMulator/benchmark/RED/baselines/gpu/Makefile",
    "chars": 110,
    "preview": "all:\n\t/usr/local/cuda/bin/nvcc app_baseline.cu -I/usr/local/cuda/include -lm -o red -D${TYPE}\n\nclean:\n\trm red\n"
  },
  {
    "path": "golang/uPIMulator/benchmark/RED/baselines/gpu/README",
    "chars": 113,
    "preview": "Reduction (RED)\n\nCompilation instructions\n\n    TYPE=UINT64 make\n\nExecution instructions\n\n    ./red -i 1048576000\n"
  },
  {
    "path": "golang/uPIMulator/benchmark/RED/baselines/gpu/app_baseline.cu",
    "chars": 5372,
    "preview": "/*\n* JGL@SAFARI\n*/\n\n/**\n* GPU code with Thrust\n*/\n#include <stdio.h>\n#include <stdlib.h>\n#include <stdbool.h>\n#include <"
  },
  {
    "path": "golang/uPIMulator/benchmark/RED/dpu/CMakeLists.txt",
    "chars": 399,
    "preview": "SET(BL 10)\r\nSET(TYPE INT64)\r\nSET(VERSION SINGLE)\r\nSET(SYNC HAND)\r\nSET(PERF 0)\r\n\r\nset(CMAKE_C_COMPILER \"/root/upmem-2023."
  },
  {
    "path": "golang/uPIMulator/benchmark/RED/dpu/task.c",
    "chars": 3958,
    "preview": "/*\n* Reduction with multiple tasklets\n*\n*/\n#include <stdint.h>\n#include <stdio.h>\n#include <defs.h>\n#include <mram.h>\n#i"
  },
  {
    "path": "golang/uPIMulator/benchmark/RED/host/app.c",
    "chars": 8068,
    "preview": "/**\n* app.c\n* RED Host Application Source File\n*\n*/\n#include <stdio.h>\n#include <stdlib.h>\n#include <stdbool.h>\n#include"
  },
  {
    "path": "golang/uPIMulator/benchmark/RED/run.sh",
    "chars": 724,
    "preview": "#!/bin/bash\n\nfor i in 1 \ndo\n\tfor j in BARRIER HAND\n\tdo \t\n\t\tfor k in 1 2 4 8 16\n\t\tdo\n\t\t    PERF=1 NR_DPUS=$i NR_TASKLETS="
  },
  {
    "path": "golang/uPIMulator/benchmark/RED/support/common.h",
    "chars": 1629,
    "preview": "#ifndef _COMMON_H_\n#define _COMMON_H_\n\n// Transfer size between MRAM and WRAM\n#ifdef BL\n#define BLOCK_SIZE_LOG2 BL\n#defi"
  },
  {
    "path": "golang/uPIMulator/benchmark/RED/support/cyclecount.h",
    "chars": 555,
    "preview": "#include <perfcounter.h>\n\n// Timer\ntypedef struct perfcounter_cycles{\n    perfcounter_t start;\n    perfcounter_t end;\n  "
  },
  {
    "path": "golang/uPIMulator/benchmark/RED/support/params.h",
    "chars": 1449,
    "preview": "#ifndef _PARAMS_H_\n#define _PARAMS_H_\n\n#include \"common.h\"\n\ntypedef struct Params {\n    unsigned int   input_size;\n    i"
  },
  {
    "path": "golang/uPIMulator/benchmark/RED/support/timer.h",
    "chars": 2600,
    "preview": "/*\r\n * Copyright (c) 2016 University of Cordoba and University of Illinois\r\n * All rights reserved.\r\n *\r\n * Developed by"
  },
  {
    "path": "golang/uPIMulator/benchmark/SCAN-RSS/CMakeLists.txt",
    "chars": 46,
    "preview": "#add_subdirectory(host)\r\nadd_subdirectory(dpu)"
  },
  {
    "path": "golang/uPIMulator/benchmark/SCAN-RSS/Makefile",
    "chars": 1331,
    "preview": "DPU_DIR := dpu\nHOST_DIR := host\nBUILDDIR ?= bin\nNR_DPUS ?= 1\nNR_TASKLETS ?= 16\nBL ?= 10\nTYPE ?= INT64\nENERGY ?= 0\n\ndefin"
  },
  {
    "path": "golang/uPIMulator/benchmark/SCAN-RSS/baselines/cpu/Makefile",
    "chars": 208,
    "preview": "all:\n\tg++ -O2 app_baseline.cpp -fopenmp -DTHRUST_HOST_SYSTEM=THRUST_HOST_SYSTEM_CPP -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE"
  },
  {
    "path": "golang/uPIMulator/benchmark/SCAN-RSS/baselines/cpu/README",
    "chars": 121,
    "preview": "Prefix sum (SCAN)\n\nCompilation instructions\n\n    TYPE=UINT64 make\n\nExecution instructions\n\n    ./scan -i 1258291200 -t 4"
  },
  {
    "path": "golang/uPIMulator/benchmark/SCAN-RSS/baselines/cpu/app_baseline.cpp",
    "chars": 4965,
    "preview": "/*\n* JGL@SAFARI\n*/\n\n/**\n* CPU code with Thrust\n*/\n#include <stdio.h>\n#include <stdlib.h>\n#include <stdbool.h>\n#include <"
  },
  {
    "path": "golang/uPIMulator/benchmark/SCAN-RSS/baselines/gpu/Makefile",
    "chars": 112,
    "preview": "all:\n\t/usr/local/cuda/bin/nvcc app_baseline.cu -I/usr/local/cuda/include -lm -o scan -D${TYPE}\n\nclean:\n\trm scan\n"
  },
  {
    "path": "golang/uPIMulator/benchmark/SCAN-RSS/baselines/gpu/README",
    "chars": 116,
    "preview": "Prefix sum (SCAN)\n\nCompilation instructions\n\n    TYPE=UINT64 make\n\nExecution instructions\n\n    ./scan -i 1258291200\n"
  },
  {
    "path": "golang/uPIMulator/benchmark/SCAN-RSS/baselines/gpu/app_baseline.cu",
    "chars": 5418,
    "preview": "/*\n* JGL@SAFARI\n*/\n\n/**\n* GPU code with Thrust\n*/\n#include <stdio.h>\n#include <stdlib.h>\n#include <stdbool.h>\n#include <"
  },
  {
    "path": "golang/uPIMulator/benchmark/SCAN-RSS/dpu/CMakeLists.txt",
    "chars": 321,
    "preview": "SET(BL 10)\r\nSET(TYPE INT64)\r\n\r\nset(CMAKE_C_COMPILER \"/root/upmem-2023.2.0-Linux-x86_64/bin/dpu-upmem-dpurte-clang\")\r\nset"
  },
  {
    "path": "golang/uPIMulator/benchmark/SCAN-RSS/dpu/task.c",
    "chars": 5369,
    "preview": "/*\n* Scan with multiple tasklets (Reduce-scan-scan)\n*\n*/\n#include <stdint.h>\n#include <stdio.h>\n#include <defs.h>\n#inclu"
  },
  {
    "path": "golang/uPIMulator/benchmark/SCAN-RSS/host/app.c",
    "chars": 9121,
    "preview": "/**\n* app.c\n* SCAN-RSS Host Application Source File\n*\n*/\n#include <stdio.h>\n#include <stdlib.h>\n#include <stdbool.h>\n#in"
  },
  {
    "path": "golang/uPIMulator/benchmark/SCAN-RSS/run.sh",
    "chars": 263,
    "preview": "#!/bin/bash\n\nfor i in  2048 4096 8192 16384 65536 262144 1048576 3932160\ndo\n\t\t    NR_DPUS=1 NR_TASKLETS=16 BL=10 VERSION"
  },
  {
    "path": "golang/uPIMulator/benchmark/SCAN-RSS/support/common.h",
    "chars": 1515,
    "preview": "#ifndef _COMMON_H_\n#define _COMMON_H_\n\n// Transfer size between MRAM and WRAM\n#ifdef BL\n#define BLOCK_SIZE_LOG2 BL\n#defi"
  },
  {
    "path": "golang/uPIMulator/benchmark/SCAN-RSS/support/params.h",
    "chars": 1449,
    "preview": "#ifndef _PARAMS_H_\n#define _PARAMS_H_\n\n#include \"common.h\"\n\ntypedef struct Params {\n    unsigned int   input_size;\n    i"
  },
  {
    "path": "golang/uPIMulator/benchmark/SCAN-RSS/support/timer.h",
    "chars": 2600,
    "preview": "/*\r\n * Copyright (c) 2016 University of Cordoba and University of Illinois\r\n * All rights reserved.\r\n *\r\n * Developed by"
  },
  {
    "path": "golang/uPIMulator/benchmark/SCAN-SSA/CMakeLists.txt",
    "chars": 46,
    "preview": "#add_subdirectory(host)\r\nadd_subdirectory(dpu)"
  },
  {
    "path": "golang/uPIMulator/benchmark/SCAN-SSA/Makefile",
    "chars": 1331,
    "preview": "DPU_DIR := dpu\nHOST_DIR := host\nBUILDDIR ?= bin\nNR_DPUS ?= 1\nNR_TASKLETS ?= 16\nBL ?= 10\nTYPE ?= INT64\nENERGY ?= 0\n\ndefin"
  },
  {
    "path": "golang/uPIMulator/benchmark/SCAN-SSA/dpu/CMakeLists.txt",
    "chars": 321,
    "preview": "SET(BL 10)\r\nSET(TYPE INT64)\r\n\r\nset(CMAKE_C_COMPILER \"/root/upmem-2023.2.0-Linux-x86_64/bin/dpu-upmem-dpurte-clang\")\r\nset"
  },
  {
    "path": "golang/uPIMulator/benchmark/SCAN-SSA/dpu/task.c",
    "chars": 4919,
    "preview": "/*\n* Scan with multiple tasklets (Scan-scan-add)\n*\n*/\n#include <stdint.h>\n#include <stdio.h>\n#include <defs.h>\n#include "
  },
  {
    "path": "golang/uPIMulator/benchmark/SCAN-SSA/host/app.c",
    "chars": 9206,
    "preview": "/**\n* app.c\n* SCAN-SSA Host Application Source File\n*\n*/\n#include <stdio.h>\n#include <stdlib.h>\n#include <stdbool.h>\n#in"
  },
  {
    "path": "golang/uPIMulator/benchmark/SCAN-SSA/run.sh",
    "chars": 248,
    "preview": "#!/bin/bash\n\nfor i in  2048 4096 8192 16384 65536 262144 1048576 3932160\ndo\n\t\t    NR_DPUS=1 NR_TASKLETS=16 BL=10 make al"
  },
  {
    "path": "golang/uPIMulator/benchmark/SCAN-SSA/support/common.h",
    "chars": 1514,
    "preview": "#ifndef _COMMON_H_\n#define _COMMON_H_\n\n// Transfer size between MRAM and WRAM\n#ifdef BL\n#define BLOCK_SIZE_LOG2 BL\n#defi"
  },
  {
    "path": "golang/uPIMulator/benchmark/SCAN-SSA/support/params.h",
    "chars": 1449,
    "preview": "#ifndef _PARAMS_H_\n#define _PARAMS_H_\n\n#include \"common.h\"\n\ntypedef struct Params {\n    unsigned int   input_size;\n    i"
  },
  {
    "path": "golang/uPIMulator/benchmark/SCAN-SSA/support/timer.h",
    "chars": 2600,
    "preview": "/*\r\n * Copyright (c) 2016 University of Cordoba and University of Illinois\r\n * All rights reserved.\r\n *\r\n * Developed by"
  },
  {
    "path": "golang/uPIMulator/benchmark/SEL/CMakeLists.txt",
    "chars": 46,
    "preview": "#add_subdirectory(host)\r\nadd_subdirectory(dpu)"
  },
  {
    "path": "golang/uPIMulator/benchmark/SEL/Makefile",
    "chars": 1280,
    "preview": "DPU_DIR := dpu\nHOST_DIR := host\nBUILDDIR ?= bin\nNR_TASKLETS ?= 16\nBL ?= 10\nNR_DPUS ?= 1\nENERGY ?= 0\n\ndefine conf_filenam"
  },
  {
    "path": "golang/uPIMulator/benchmark/SEL/baselines/cpu/Makefile",
    "chars": 59,
    "preview": "all:\n\tgcc -o sel -fopenmp app_baseline.c \n\nclean:\n\trm sel\n\n"
  },
  {
    "path": "golang/uPIMulator/benchmark/SEL/baselines/cpu/README",
    "chars": 211,
    "preview": "Select (SEL)\n\nCompilation instructions\n\n    make\n\nExecution instructions\n\n    ./sel -i 1258291200 -t 4\n\nRead more\nJ. Gom"
  },
  {
    "path": "golang/uPIMulator/benchmark/SEL/baselines/cpu/app_baseline.c",
    "chars": 3382,
    "preview": "/**\n* @file app.c\n* @brief Template for a Host Application Source File.\n*\n*/\n#include <stdio.h>\n#include <stdlib.h>\n#inc"
  },
  {
    "path": "golang/uPIMulator/benchmark/SEL/baselines/gpu/Makefile",
    "chars": 142,
    "preview": "all:\n\t/usr/local/cuda/bin/nvcc select.cu -I/usr/local/cuda/include -lm -o select -D COARSENING=32 -D THREADS=512 -D INT6"
  },
  {
    "path": "golang/uPIMulator/benchmark/SEL/baselines/gpu/README",
    "chars": 558,
    "preview": "Select (SEL)\n\nCompilation instructions\n\n    make\n\nExecution instructions\n\n    ./select 0 50 1258291200\n\nCompilation flag"
  },
  {
    "path": "golang/uPIMulator/benchmark/SEL/baselines/gpu/ds.h",
    "chars": 7749,
    "preview": "/***************************************************************************\n *cr\n *cr            (C) Copyright 2015 The"
  },
  {
    "path": "golang/uPIMulator/benchmark/SEL/baselines/gpu/kernel.cu",
    "chars": 3324,
    "preview": "/***************************************************************************\n *cr\n *cr            (C) Copyright 2015 The"
  },
  {
    "path": "golang/uPIMulator/benchmark/SEL/baselines/gpu/select.cu",
    "chars": 7057,
    "preview": "/***************************************************************************\n *cr\n *cr            (C) Copyright 2015 The"
  },
  {
    "path": "golang/uPIMulator/benchmark/SEL/dpu/CMakeLists.txt",
    "chars": 284,
    "preview": "SET(BL 10)\r\n\r\nset(CMAKE_C_COMPILER \"/root/upmem-2023.2.0-Linux-x86_64/bin/dpu-upmem-dpurte-clang\")\r\nset(CMAKE_C_FLAGS \"-"
  },
  {
    "path": "golang/uPIMulator/benchmark/SEL/dpu/task.c",
    "chars": 3432,
    "preview": "/*\n* Select with multiple tasklets\n*\n*/\n#include <stdint.h>\n#include <stdio.h>\n#include <defs.h>\n#include <mram.h>\n#incl"
  },
  {
    "path": "golang/uPIMulator/benchmark/SEL/host/app.c",
    "chars": 7856,
    "preview": "/**\n* app.c\n* SEL Host Application Source File\n*\n*/\n#include <stdio.h>\n#include <stdlib.h>\n#include <stdbool.h>\n#include"
  },
  {
    "path": "golang/uPIMulator/benchmark/SEL/support/common.h",
    "chars": 923,
    "preview": "#ifndef _COMMON_H_\n#define _COMMON_H_\n\n// Structures used by both the host and the dpu to communicate information \ntyped"
  },
  {
    "path": "golang/uPIMulator/benchmark/SEL/support/params.h",
    "chars": 1449,
    "preview": "#ifndef _PARAMS_H_\n#define _PARAMS_H_\n\n#include \"common.h\"\n\ntypedef struct Params {\n    unsigned int   input_size;\n    i"
  },
  {
    "path": "golang/uPIMulator/benchmark/SEL/support/timer.h",
    "chars": 2600,
    "preview": "/*\r\n * Copyright (c) 2016 University of Cordoba and University of Illinois\r\n * All rights reserved.\r\n *\r\n * Developed by"
  },
  {
    "path": "golang/uPIMulator/benchmark/TRNS/CMakeLists.txt",
    "chars": 46,
    "preview": "#add_subdirectory(host)\r\nadd_subdirectory(dpu)"
  },
  {
    "path": "golang/uPIMulator/benchmark/TRNS/Makefile",
    "chars": 1235,
    "preview": "DPU_DIR := dpu\nHOST_DIR := host\nBUILDDIR ?= bin\nNR_DPUS ?= 1\nNR_TASKLETS ?= 16\nENERGY ?= 0\n\ndefine conf_filename\n\t${BUIL"
  },
  {
    "path": "golang/uPIMulator/benchmark/TRNS/baselines/cpu/Makefile",
    "chars": 2132,
    "preview": "# \n#  Copyright (c) 2016 University of Cordoba and University of Illinois\n#  All rights reserved.\n# \n#  Developed by:   "
  },
  {
    "path": "golang/uPIMulator/benchmark/TRNS/baselines/cpu/README",
    "chars": 271,
    "preview": "In-place matrix transposition (TRNS)\n\nCompilation instructions\n\n    make\n\nExecution instructions\n\n    ./trns -w 0 -r 1 -"
  },
  {
    "path": "golang/uPIMulator/benchmark/TRNS/baselines/cpu/kernel.cpp",
    "chars": 5120,
    "preview": "/*\n * Copyright (c) 2016 University of Cordoba and University of Illinois\n * All rights reserved.\n *\n * Developed by:   "
  },
  {
    "path": "golang/uPIMulator/benchmark/TRNS/baselines/cpu/kernel.h",
    "chars": 2170,
    "preview": "/*\n * Copyright (c) 2016 University of Cordoba and University of Illinois\n * All rights reserved.\n *\n * Developed by:   "
  },
  {
    "path": "golang/uPIMulator/benchmark/TRNS/baselines/cpu/main.cpp",
    "chars": 7826,
    "preview": "/*\n * Copyright (c) 2016 University of Cordoba and University of Illinois\n * All rights reserved.\n *\n * Developed by:   "
  }
]

// ... and 2382 more files (download for full content)

About this extraction

This page contains the full source code of the VIA-Research/uPIMulator GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 2582 files (229.4 MB), approximately 2.7M tokens, and a symbol index with 17659 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!