Repository: sjtu-zhao-lab/pom Branch: main Commit: 332cf8b63e5a Files: 53 Total size: 532.8 KB Directory structure: gitextract_f9e36swx/ ├── .gitattributes ├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── ae_script.sh ├── build-pom.sh ├── clean.sh ├── include/ │ ├── CMakeLists.txt │ └── polyhedral/ │ ├── codegen.h │ ├── compute.h │ ├── core.h │ ├── debug.h │ ├── expr.h │ ├── function.h │ ├── generator.h │ ├── generator_isl.h │ ├── placeholder.h │ └── type.h ├── lib/ │ ├── CMakeLists.txt │ └── polyhedral/ │ ├── CMakeLists.txt │ ├── codegen.cpp │ ├── compute.cpp │ ├── core.cpp │ ├── debug.cpp │ ├── expr.cpp │ ├── function.cpp │ ├── generator.cpp │ ├── generator_isl.cpp │ ├── placeholer.cpp │ └── test.cpp ├── results-gen.sh ├── run-code.sh ├── samples/ │ └── config.json ├── tcl-gen.sh ├── testbench/ │ ├── 2mm.cpp │ ├── 3mm.cpp │ ├── bicg.cpp │ ├── blur.cpp │ ├── edgeDetect.cpp │ ├── gaussian.cpp │ ├── gemm.cpp │ ├── gesummv.cpp │ ├── heat.cpp │ ├── jacobi.cpp │ ├── jacobi2d.cpp │ ├── resnet18.cpp │ ├── seidel.cpp │ └── vgg16.cpp └── vitis-reports.sh ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitattributes ================================================ # Auto detect text files and perform LF normalization * text=auto ================================================ FILE: .gitignore ================================================ build # balance/ # sample_l/ # samles_4_paper/ # samles_update/ build.txt .vscode/ .env scalehls/ **/tmp **/cpp_src **/mlir_src **/hls_proj **/dump_csv **/.ipynb_checkpoints **/__pycache__ *.onnx* *.tmp *.gv *.png *.log *.csv *.zip *.swp *.swo *.swn ================================================ FILE: .gitmodules ================================================ [submodule "scalehls"] path = scalehls url = git@github.com:Jason048/scalehls.git branch = scalehls-pom ================================================ FILE: CMakeLists.txt ================================================ cmake_minimum_required(VERSION 3.13.4) if(POLICY CMP0068) cmake_policy(SET CMP0068 NEW) set(CMAKE_BUILD_WITH_INSTALL_NAME_DIR ON) endif() if(POLICY CMP0075) cmake_policy(SET CMP0075 NEW) endif() if(POLICY CMP0077) cmake_policy(SET CMP0077 NEW) endif() set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED YES) add_definitions(-w) set(PROJECT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) set(PROJECT_BINARY_DIR ${PROJECT_SOURCE_DIR}/build) set(TEST_PATH ${PROJECT_SOURCE_DIR}/test) SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin) SET(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/lib) if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) project(POM LANGUAGES CXX C) # llvm set(LLVM_SOURCE_DIR ${PROJECT_SOURCE_DIR}/scalehls/polygeist/llvm-project/llvm) find_package(MLIR REQUIRED CONFIG) message(STATUS "Using MLIRConfig.cmake in: ${MLIR_DIR}") message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") message(STATUS "Using LLVM_EXTERNAL_LIT.cmake in: ${LLVM_EXTERNAL_LIT}") list(APPEND CMAKE_MODULE_PATH "${MLIR_CMAKE_DIR}") list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_DIR}") include(TableGen) include(AddLLVM) include(AddMLIR) include(HandleLLVMOptions) else () set(LLVM_SOURCE_DIR ${LLVM_MAIN_SRC_DIR}) set(MLIR_MAIN_SRC_DIR ${LLVM_MAIN_SRC_DIR}/../mlir) set(MLIR_INCLUDE_DIRS ${MLIR_MAIN_SRC_DIR}/include) set(MLIR_CMAKE_DIR ${MLIR_MAIN_SRC_DIR}/cmake/modules) set(MLIR_TABLEGEN_EXE $) set(MLIR_TABLEGEN_OUTPUT_DIR ${LLVM_BINARY_DIR}/tools/mlir/include) include_directories(SYSTEM ${MLIR_INCLUDE_DIR}) include_directories(SYSTEM ${MLIR_TABLEGEN_OUTPUT_DIR}) message(STATUS "Using LLVM_SOURCE_DIR in: ${LLVM_SOURCE_DIR}") message(STATUS "Using MLIR_MAIN_SRC_DIR in: ${MLIR_MAIN_SRC_DIR}") endif() # include_directories("/home/POM/third_party/llvm-project/mlir/include/mlir/IR") set(POM_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) set(POM_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) set(POM_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/include ) set(POM_TOOLS_DIR ${CMAKE_BINARY_DIR}/bin) list(APPEND CMAKE_MODULE_PATH "${MLIR_MAIN_SRC_DIR}/cmake/modules") list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules") include_directories(${LLVM_INCLUDE_DIRS}) include_directories(${MLIR_INCLUDE_DIRS}) include_directories(${PROJECT_SOURCE_DIR}/include) include_directories(${PROJECT_BINARY_DIR}/include) link_directories(${LLVM_BUILD_LIBRARY_DIR}) add_definitions(${LLVM_DEFINITIONS}) # ISL #set(ISL_INCLUDE_DIRECTORY ${PROJECT_SOURCE_DIR}/third_party/isl/build/include/ CACHE PATH #"Path to ISL include directory") #set(ISL_LIB_DIRECTORY ${PROJECT_SOURCE_DIR}/third_party/isl/build/.lib/ CACHE PATH "Path to #ISL library directory") include_directories(${PROJECT_SOURCE_DIR}/include/polyhedral) include_directories(${PROJECT_SOURCE_DIR}/lib/polyhedral) include_directories(${PROJECT_SOURCE_DIR}/include) include_directories(${PROJECT_SOURCE_DIR}/testbench) include_directories(${PROJECT_SOURCE_DIR}/scalehls/polygeist/llvm-project/mlir/include) include_directories(${PROJECT_SOURCE_DIR}/scalehls/polygeist/llvm-project/llvm/include) include_directories(${PROJECT_SOURCE_DIR}/scalehls/build/tools/scalehls/include) include_directories($ISL_INCLUDE_DIRECTORY) include_directories(${PROJECT_SOURCE_DIR}/scalehls/include) #include_directories(${PROJECT_SOURCE_DIR}/pybind11/include) include_directories(/usr/include/python3.8) find_library(ISLLib isl PATHS /usr/local/lib NO_DEFAULT_PATH) message(STATUS "Using ISLlib in: ${ISLLib}") get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) get_property(conversion_libs GLOBAL PROPERTY MLIR_CONVERSION_LIBS) get_property(translation_libs GLOBAL PROPERTY MLIR_TRANSLATION_LIBS) # find_library(MLIRSCF NAMES libMLIRSCF.a PATHS ${PROJECT_SOURCE_DIR}/scalehls/build/lib) # message(STATUS "Using MLIRSCF in: ${MLIRSCF}") find_library(MLIRHLS MLIRHLS PATHS ${PROJECT_SOURCE_DIR}/scalehls/build/lib) message(STATUS "Using MLIRHLS in: ${MLIRHLS}") find_library(MLIRScaleHLSSupport MLIRScaleHLSSupport PATHS ${PROJECT_SOURCE_DIR}/scalehls/build/lib) message(STATUS "Using MLIRScaleHLSSupport in: ${MLIRScaleHLSSupport}") find_library(MLIRScaleHLSTransforms MLIRScaleHLSTransforms PATHS ${PROJECT_SOURCE_DIR}/scalehls/build/lib) message(STATUS "Using MLIRScaleHLSSupport in: ${MLIRScaleHLSTransforms}") set(TEST_SOURCE_DIR ${PROJECT_SOURCE_DIR}/testbench) file(GLOB_RECURSE mains RELATIVE "${TEST_SOURCE_DIR}" "${TEST_SOURCE_DIR}/*.cpp" ) message(STATUS "Using TEST_SOURCE_DIR in:${TEST_SOURCE_DIR}") foreach(mainfile IN LISTS mains) # Get file name without directory get_filename_component(mainname ${mainfile} NAME_WE) # message(STATUS "get mainname in:${mainname}") add_executable(${mainname} ${TEST_SOURCE_DIR}/${mainfile}) target_link_libraries(${mainname} Functions) target_link_libraries(${mainname} ${ISLLib}) target_link_libraries(${mainname} ${dialect_libs} ${conversion_libs} ${translation_libs} ${MLIRScaleHLSTransforms} ${MLIRScaleHLSSupport} ${MLIRHLS} MLIRAffineTransforms MLIROptLib MLIRAnalysis MLIRCallInterfaces MLIRCastInterfaces MLIRIR MLIRParser MLIRPass MLIRSideEffectInterfaces MLIRSupport MLIRTransforms) endforeach() # add_executable(test test.cpp) # add_executable(bicg bicg.cpp) # target_link_libraries(test Functions) # target_link_libraries(test ${ISLLib}) # target_link_libraries(test # ${dialect_libs} # ${conversion_libs} # ${translation_libs} # ${MLIRScaleHLSTransforms} # ${MLIRScaleHLSSupport} # ${MLIRHLSCpp} # MLIRAffineTransforms # MLIROptLib # MLIRAnalysis # MLIRCallInterfaces # MLIRCastInterfaces # MLIRIR # MLIRParser # MLIRPass # MLIRSideEffectInterfaces # MLIRSupport # MLIRTransforms) add_subdirectory(include) add_subdirectory(lib) #add_subdirectory(test) #add_subdirectory(standalone-opt) #add_subdirectory(standalone-translate) #add_subdirectory(POM-isl) # add_subdirectory(pybind11) #find_package(pybind11 REQUIRED) #pybind11_add_module(wrapper wrapper.cpp) #target_link_libraries(wrapper PRIVATE ${ISLLib}) # pybind11_add_module(core ${PROJECT_SOURCE_DIR}/lib/Polyhedral/core.cpp) # pybind11_add_module(place ${PROJECT_SOURCE_DIR}/lib/Polyhedral/placeholer.cpp) # pybind11_add_module(example2 ${PROJECT_SOURCE_DIR}/lib/Polyhedral/example2.cpp) # add_library(core MODULE # ${PROJECT_SOURCE_DIR}/lib/Polyhedral/core.cpp # ) # target_link_libraries(core # pybind11::module # ) ================================================ FILE: Dockerfile ================================================ # This Dockerfile configures a Docker environment that # contains all the required packages for the tool FROM ubuntu:20.04 ARG UID ARG GID ARG VHLS_PATH RUN echo "Group ID: $GID" RUN echo "User ID: $UID" USER root RUN apt-get update -y && apt-get install apt-utils -y RUN DEBIAN_FRONTEND="noninteractive" apt-get -y install tzdata # Install basic packages RUN apt-get upgrade -y RUN apt-get update -y \ && apt-get install -y clang lld cmake libssl-dev\ pkg-config g++\ llvm gcc ninja-build \ build-essential autoconf libtool\ git vim wget sudo CMD ["bash"] # Add dev-user # RUN groupadd -o -g $GID dev-user # RUN useradd -r -g $GID -u $UID -m -d /home/dev-user -s /sbin/nologin -c "User" dev-user # RUN echo "dev-user ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers # USER dev-user # Install PyTorch and Torch-MLIR ENV PATH="${PATH}:~/.local/bin" # RUN pip3 install --user --upgrade pip \ # && pip3 install pandas dataclasses colorlog pyyaml # Add environment variables ENV vhls $VHLS_PATH RUN printf "\ \nexport LIBRARY_PATH=/usr/lib/x86_64-linux-gnu:\$LIBRARY_PATH \ \n# Vitis HLS setup \ \nsource ${vhls}/Vitis/2022.2/settings64.sh \ \nsource ${vhls}/Vitis_HLS/2022.2/settings64.sh \ \nexport PATH=$PATH:/workspace/build/bin:/workspace/scalehls/polygeist/llvm/build/bin:/workspace/scalehls/polygeist/build/bin:~/.local/bin \ \n" >> ~/.vimrc #Add vim environment RUN printf "\ \nset autoread \ \nautocmd BufWritePost *.cpp silent! !clang-format -i \ \nautocmd BufWritePost *.c silent! !clang-format -i \ \nautocmd BufWritePost *.h silent! !clang-format -i \ \nautocmd BufWritePost *.hpp silent! !clang-format -i \ \nautocmd BufWritePost *.cc silent! !clang-format -i \ \nautocmd BufWritePost *.py silent! !python3 -m black \ \nautocmd BufWritePost *.sv silent! !verible-verilog-format --inplace \ \nautocmd BufWritePost *.v silent! !verible-verilog-format --inplace \ \nautocmd BufWritePost * redraw! \ \n" >> ~/.vimrc # Entrypoint set up WORKDIR /home/workspace # COPY . /usr/src/workspace ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: Makefile ================================================ user=$(if $(shell id -u),$(shell id -u),9001) group=$(if $(shell id -g),$(shell id -g),1000) # phism=/workspace vhls=/data/Vivado # docker buildx pruney # Build docker container build-docker: (docker build --build-arg VHLS_PATH=$(vhls) . --tag pom_dev) # Enter docker container shell: build-docker docker run -it -v $(shell pwd):/home/workspace -v $(vhls):$(vhls) --name pom_dev pom_dev:latest /bin/bash # docker exec -it -v /home/wczhang/Xilinx:/home/wczhang/Xilinx flowgnn /bin/bash ================================================ FILE: README.md ================================================ # POM: An Optimizing Framework on MLIR for Efficient FPGA-based Accelerator Generation ![GitHub License](https://img.shields.io/github/license/sjtu-zhao-lab/pom) ![GitHub Repo stars](https://img.shields.io/github/stars/sjtu-zhao-lab/pom) ## 1. Introduction POM is an end-to-end optimizing framework on MLIR for efficient FPGA-based accelerator generation. POM has the following technical contributions: - **Programmability**: POM provides a decoupled DSL that enables concise descriptions of functions, loops, and arrays. A rich collection of scheduling primitives is provided for flexible customization, leading to much fewer lines of code while maintaining high performance. - **Extensibility**: POM explicitly introduces three layers of IR to perform operations at suitable abstraction levels in a unified framework, streamlining the implementation and debugging process and reducing the effort of supporting various optimization methods. - **Quality**: POM provides a rich set of optimization methods and performs FPGA-oriented schedule operations at proper levels, relieving tight loop-carried dependence, exploiting parallelism, and improving overall performance. - **Automation**: POM contains a design space exploration (DSE) engine to search for high-performance schedule schemes automatically and efficiently, while also allowing designers to set user-specified schedules. Please refer to our [HPCA' 24 ](https://arxiv.org/abs/2401.05154)paper for more details: ``` @inproceedings{zhanghpca2024pom, title={An Optimizing Framework on MLIR for Efficient FPGA-based Accelerator Generation}, author={Weichuang Zhang and Jieru Zhao and Guan Shen and Quan Chen and Chen Chen and Minyi Guo}, booktitle={2024 IEEE International Symposium on High-Performance Computer Architecture (HPCA)}, year={2024} } ``` *** ## 2. Installation ### 2.1 Install Prerequisite: isl ``` git clone git://repo.or.cz/isl.git cd isl git pull git submodule init git submodule update ./autogen.sh ./configure --with-int=imath make make check make install ``` More details of isl installation: https://compsys-tools.ens-lyon.fr/iscc/isl.pdf ### 2.2 Install POM ``` git clone --recursive git@github.com:sjtu-zhao-lab/pom.git cd pom ``` ### 2.3 Code structure ``` pom/ ├── scalehls/ │ ├── polygeist / │ │ ├── llvm-project/ ``` ## 3. Build ### 3.1 Build scalehls ``` # Go to scalehls/ ./build-scalehls.sh ``` ### 3.2 Build POM ``` # Go to pom/ ./build-pom.sh ``` *** ## 4. Getting Started with a GEMM kernel ``` # Go to pom/build/ cmake --build . --target gemm ``` You can run the following instruction to generate an optimized MLIR affine dialect: ``` ./bin/gemm ``` The optimized IR is stored at pom/samples/gemm/test_gemm_4096.mlir . You can further translate the optimized IR into HLS C code with the following instruction: ``` ../scalehls/build/bin/scalehls-opt ../samples/gemm/test_gemm_4096.mlir\ --scalehls-func-preprocess="top-func=gemm" \ --scalehls-qor-estimation="target-spec=../samples/config.json" \ | ../scalehls/build/bin/scalehls-translate -emit-hlscpp > ../samples/gemm/test_gemm_4096.cpp ``` ## Repository Layout - `include` and `lib` : Compiler implementation - `scalehls` : the HLS C code generation - `testbench`: Kernels and applications described with POM DSL - `samples`: The generated designs ## Related Projects - [ScaleHLS](https://github.com/hanchenye/scalehls) - [Tiramisu](https://github.com/Tiramisu-Compiler/tiramisu) - [MLIR](https://mlir.llvm.org/) ================================================ FILE: ae_script.sh ================================================ start_time=$(date +"%s") echo "" echo ">>> Start the experiment workflow" echo "" ./build-pom.sh ./run-code.sh ./tcl-gen.sh ./vitis-reports.sh ./results-gen.sh end_time=$(date +"%s") execution_time=$(($end_time - $start_time)) echo "" echo ">>> All Steps have been finished!" echo ">>> Total Execution Time: $execution_time seconds" echo "" ================================================ FILE: build-pom.sh ================================================ #!/usr/bin/env bash set -o errexit set -o pipefail set -o nounset # The absolute path to the directory of this script. POM_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" # POM_DIR="$(dirname "$CURRENT_DIR")" start_time=$(date +"%s") echo "" echo ">>> Step 1. Building POM ..." echo "" # Got to the build directory. cd "${POM_DIR}" mkdir -p build cd build if [ ! -f "CMakeCache.txt" ]; then LLVM_DIR="${POM_DIR}/scalehls/build/lib/cmake/llvm" \ MLIR_DIR="${POM_DIR}/scalehls/build/lib/cmake/mlir" \ cmake -G Ninja .. # -DMLIR_DIR="${POM_DIR}/scalehls/build/lib/cmake/mlir" \ # -DLLVM_EXTERNAL_LIT="${POM_DIR}/scalehls/build/bin/llvm-lit" fi cd ../ # Run building. # targets=("edgeDetect" "gaussian" "blur" "vgg16" "resnet" "jacobi" "jacobi2d" "heat" "seidel") echo "" echo ">>> Step 2. Initializing samples/{testbench}" echo "" folders=("gemm" "bicg" "gesummv" "2mm" "3mm" "edgeDetect" "gaussian" "blur" "vgg16" "resnet18" "jacobi" "jacobi2d" "heat" "seidel") for folder in "${folders[@]}" do mkdir -p samples/"${folder}" done # for target in "${targets[@]}" # do # cmake --build . --target "$target" # done # end_time=$(date +"%s") # execution_time=$(($end_time - $start_time)) echo "" echo ">>> Building finished!" echo "" ================================================ FILE: clean.sh ================================================ #!/bin/bash folders=("gemm" "bicg" "gesummv" "2mm" "3mm" "edgeDetect" "gaussian" "blur" "vgg16" "resnet" "jacobi" "jacobi2d" "heat" "seidel") for folder in "${folders[@]}" do rm -rf "samples/${folder}"/* done ================================================ FILE: include/CMakeLists.txt ================================================ # add_subdirectory(Standalone) # add_subdirectory(Dialect) # add_subdirectory(Polyhedral) ================================================ FILE: include/polyhedral/codegen.h ================================================ #ifndef _H_polyfp_CODEGEN_ #define _H_polyfp_CODEGEN_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include // #include "debug.h" // #include "expr.h" // #include "type.h" #include "function.h" #include "compute.h" namespace polyfp{ class var; std::string generate_new_variable_name(); polyfp::expr traverse_expr_and_replace_non_affine_accesses(polyfp::compute *comp, const polyfp::expr &exp); } #endif ================================================ FILE: include/polyhedral/compute.h ================================================ #ifndef _H_polyfp_COMPUTE_ #define _H_polyfp_COMPUTE_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "expr.h" #include "placeholder.h" #include "debug.h" namespace polyfp{ std::string generate_new_computation_name(); class var; class scheduler; class function; class placeholder; class compute { friend function; friend placeholder; private: // The data access map isl_map *access; // The isl context of the function. isl_ctx *ctx; // The placeholder that stores the results polyfp::placeholder *plhd; polyfp::expr plhd_expr; polyfp::primitive_t data_type; // the expression (or statement) of the function polyfp::expr expression; polyfp::function *fct; /** * TODO: */ std::map access_map; // The iteration domain of the compute(nested loop) isl_set *iteration_domain; // The name of the compute(nested loop) std::string name; // The number of dimensions in the original definition of the compute int number_of_dims; /** * TODO: Add predicates to the nested loops * Derived from Tiramisu: * A predicate around the compute. The compute is executed * only if this predicate is true. This is useful to insert a non-affine * condition around the compute. */ polyfp::expr predicate; // The schedule of the compute. isl_map * schedule; /** * Derived from Tiramisu: * Time-processor domain of the compute. * In this representation, the logical time of execution and the * processor where the compute will be executed are both * specified. */ isl_set *time_processor_domain; // The iteration variables(iterators) of the compute std::vector iteration_variables; /** * TODO: */ std::vector placeholder_dims; std::vector placeholder_accessmap; /** * TODO: add predicate * \p predicate is an expression that represents constraints on the iteration domain * (for example (i != j). The predicate has to be an affine * expression. */ std::string construct_iteration_domain(std::string name, std::vector iterator_variables, polyfp::expr predicate); // Return the names of iteration domain dimensions. std::vector get_iteration_domain_dimension_names(); void check_dimensions_validity(std::vector dimensions); // Get the number of dimensions of the compute int get_iteration_domain_dimensions_number(); // Check that the names used in \p dimensions are not already in use. void assert_names_not_assigned(std::vector dimensions); /** * Generate an identity schedule for the compute * Derived from Tiramisu: * This identity schedule is an identity relation created from the iteration domain. */ isl_map *gen_identity_schedule_for_iteration_domain(); /** * Generate an identity schedule for the compute. * Derived from Tiramisu: * This identity schedule is an identity relation created from the time-processor domain. */ isl_map *gen_identity_schedule_for_time_space_domain(); // Assign a name to iteration domain dimensions that do not have a name. void name_unnamed_iteration_domain_dimensions(); // Assign a name to iteration domain dimensions that do not have a name. void name_unnamed_time_space_dimensions(); /** * Set an identity schedule for the compute. * Derived from Tiramisu: * This identity schedule is an identity relation created from the iteration domain. */ void set_identity_schedule_based_on_iteration_domain(); // Set the iteration domain of the compute void set_iteration_domain(isl_set *domain); // Set the names of loop levels dimensions. void set_loop_level_names(std::vector loop_levels, std::vector names); void set_loop_level_names(std::vector names); // Set the names of the dimensions of the schedule domain. void set_schedule_domain_dim_names(std::vector loop_levels, std::vector names); // Return the function where the compute is declared. polyfp::function *get_function() const; /** * Derived from Tiramisu: * Search the time-space domain (the range of the schedule) and * return the loop level numbers that correspond to the dimensions * named \p dim. */ std::vector get_loop_level_numbers_from_dimension_names(std::vector dim_names); // Intersect set with the context of the compute. isl_set *intersect_set_with_context(isl_set *set); /** * Derived from Tiramisu: * Return the time-processor domain of the compute. * In this representation, the logical time of execution and the * processor where the compute will be executed are both specified. */ isl_set *get_time_processor_domain() const; /** * Derived from Tiramisu: * Return the trimmed time-processor domain. * TODO: The first dimension of the time-processor domain is used * to indicate redundancy of the compute. In POM there is no redundancy * of the compute. This feature will be removed soon. * The trimmed time-processor domain is the time-processor domain * without the dimension that represents the redundancy. We simply * take the time-processor domain and remove the first dimension. */ isl_set *get_trimmed_time_processor_domain(); /** * Derived from Tiramisu: * Update loop level names. This function should be called after each scheduling operation * because scheduling usually changes the original loop level names. * This function erases \p nb_loop_levels_to_erase loop level names starting from the * loop level \p start_erasing. It then inserts the loop level names \p new_names in * \p start_erasing. In other words, it replaces the names of loop levels from * \p start_erasing to \p start_erasing + \p nb_loop_levels_to_erase with the loop levels * indicated by \p new_names. This function sets the non erased loop levels to be equal to the * original loop level names. * * \p original_loop_level_names : a vector containing the original loop level names (loop level * names before scheduling). * * \p new_names : the new loop level names. * * \p start_erasing : start erasing loop levels from this loop level. * * \p nb_loop_levels_to_erase : number of loop levels to erase. * * Example. Assuming the original loop levels are {i0, i1, i2, i3} * * Calling this->update_names({i0, i1, i2, i3}, {x0, x1}, 1, 2) updates the loop levels to become * {i0, x0, x1, i3}. */ void update_names(std::vector original_loop_level_names, std::vector new_names, int start_erasing, int nb_loop_levels_to_erase); protected: isl_ctx *get_ctx() const; polyfp::expr get_predicate(); /** * Return a unique name of compute; made of the following pattern: * [compute name]@[compute address in memory] */ const std::string get_unique_name() const; // Set the name of the compute. void set_name(const std::string &n); void init_computation(std::string iteration_space_str, polyfp::function *fct, const polyfp::expr &e, polyfp::primitive_t t, expr p); void set_schedule(isl_map *map); void set_schedule(std::string map_str); compute(std::string name,std::vector iterator_variables, polyfp::expr e, primitive_t t, expr p); public: compute(); compute(std::string iteration_domain, polyfp::expr e, polyfp::primitive_t t, polyfp::function *fct, expr p); int II; bool is_unrolled; long latency; long best_latency = LLONG_MAX; int dsp; int minII; std::vector get_iteration_variables(); isl_map * original_schedule; std::map tile_map; std::map tile_size_map; std::map directive_map; std::map directive_tool_map; std::vector original_loop_level_name; std::vector final_loop_level_names; std::vector final_loop_level_names_reserved; std::vector unroll_factor; std::vector unroll_dimension; bool refused = false; std::map temp_access_map; isl_map * best_schedule; std::map best_tile_map; std::map best_tile_size_map; std::map best_directive_map; std::map best_directive_tool_map; std::vector best_loop_level_names; std::vector best_unroll_factor; std::vector best_unroll_dimension; std::mapiterators_location_map; int after_level; int ori_after_level; compute(std::string name, std::vector iterator_variables, polyfp::expr e, expr p); compute(std::string name, std::vector iterator_variables, int a, expr p); isl_map *get_access_relation() const; bool is_tiled = false ; bool is_skewed = false; bool is_optimized = false; bool is_pipelined = false; // bool is_first_opt = false; // TODO: Config file int current_factor = 1; int largest_factor = 2; std::string iterator_to_skew; std::string iterator_to_modify; int skew_factor; std::vector get_loop_level_names(); int get_loop_level_number_from_dimension_name(std::string dim_name) { return this->get_loop_level_numbers_from_dimension_names({dim_name})[0]; } // Debug void dump_iteration_domain() const; // Debug void dump_schedule() const; // Debug void dump() const; void gen_time_space_domain(); primitive_t get_data_type() const; const polyfp::expr &get_expr() const; std::vector get_placeholder_dims(); void set_placeholder_dims(std::vector temp); int get_loop_levels_number(); isl_set *get_iteration_domain() const; std::vector compute_buffer_size(); std::map get_access_map(); std::map get_tile_map(); std::map get_tile_size_map(); std::map get_directive_map(); std::map get_directive_tool_map(); void update_leader_components(polyfp::compute *comp); void delete_leader_components(polyfp::compute *comp); // DSE components std::map components; std::map component_level_map; polyfp::compute *leader; std::unordered_map childern; std::vector parents; bool is_leader; bool has_a_leader; bool is_top_parent; bool is_leaf; void dump_components(); void dump_loads_stores(); const std::string &get_name() const; isl_map *get_schedule() const; void set_expression(const polyfp::expr &e); void set_access(std::string access_str); void set_access(isl_map *access); placeholder *get_placeholder(); expr get_placeholder_expr(); // OPT virtual void interchange(var L0, var L1); virtual void interchange(int L0, int L1); virtual void split(var L0, int sizeX); virtual void split(var L0, int sizeX, var L0_outer, var L0_inner); virtual void split(int L0, int sizeX); virtual void tile(var L0, var L1, int sizeX, int sizeY); virtual void tile(var L0, var L1, int sizeX, int sizeY, var L0_outer, var L1_outer, var L0_inner, var L1_inner); virtual void tile(var L0, var L1, var L2, int sizeX, int sizeY, int sizeZ); virtual void tile(var L0, var L1, var L2, int sizeX, int sizeY, int sizeZ, var L0_outer, var L1_outer, var L2_outer, var L0_inner, var L1_inner, var L2_inner); virtual void tile(int L0, int L1, int sizeX, int sizeY); virtual void tile(int L0, int L1, int L2, int sizeX, int sizeY, int sizeZ); virtual void skew(var i, var j, int a , int b, var ni, var nj); virtual void skew(int i, int j, int a, int b); void after(compute &comp, polyfp::var iterator); void after(compute &comp, int level); void after(compute *comp, polyfp::var iterator); void after(compute *comp, int level); void after_low_level(compute &comp, int level); void after_low_level(compute &comp, std::vector levels); void pipeline(polyfp::expr dim, int II); void unroll(polyfp::expr dim, int factor); std::map > > map_loadstores; std::vector get_loads(); void get_loads_stores(); void get_all_loadstores(); void auto_loop_transformation(); void compute_dependence_vectors(); std::unordered_map load_map; std::unordered_map store_map; std::vector load_vector; std::vector store_vector; std::map > > map_dependence_vectors; void dump_all_loadstores(); void check_loop_interchange(); void check_loop_skewing(); void apply_opt_strategy(std::vector); bool opt_finished = false; bool is_skewed_inDSE = false; std::vector final_strategy; std::vector current_strategy; std::vector temp_strategy; const static int root_dimension = -1; template polyfp::expr operator()(Args... args) { std::vector access_expressions{std::forward(args)...}; if (access_expressions.size() != this->number_of_dims) { polyfp::str_dump("Error - Incorrect access: " + this->get_name() + "("); for (int i = 0; i < access_expressions.size(); i++) { polyfp::expr e = access_expressions[i]; e.dump(false); if (i != access_expressions.size() - 1) polyfp::str_dump(", "); } polyfp::str_dump(").\n"); polyfp::str_dump("The number of access dimensions does not match that used in the declaration of " + this->get_name() + ".\n\n"); exit(1); } return polyfp::expr(polyfp::o_access, this->get_name(), access_expressions, this->get_data_type()); } operator expr(); }; } #endif ================================================ FILE: include/polyhedral/core.h ================================================ #ifndef _H_polyfp_CORE_ #define _H_polyfp_CORE_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include // #include "debug.h" #include "expr.h" #include "type.h" #include "codegen.h" namespace polyfp{ class compute; class constant; class generator; void init(std::string name); void init(); void codegen(); compute *get_computation_annotated_in_a_node(isl_ast_node *node); int loop_level_into_dynamic_dimension(int level); int loop_level_into_static_dimension(int level); int dynamic_dimension_into_loop_level(int dim); isl_map *add_eq_to_schedule_map(int dim0, int in_dim_coefficient, int out_dim_coefficient, int const_conefficient, isl_map *sched); } #endif ================================================ FILE: include/polyhedral/debug.h ================================================ #ifndef _H_DEBUG_ #define _H_DEBUG_ #include namespace polyfp { void str_dump(const std::string &str); void str_dump(const std::string &str, const char *str2); void str_dump(const char *str, const char *str2); void print_indentation(); extern int polyfp_indentation; } // namespace polyfp #define ERROR(message, exit_program) { \ std::cerr << "Error in " << __FILE__ << ":" \ << __LINE__ << " - " << message << std::endl; \ if (exit_program) \ { \ exit(1); \ } \ } #endif ================================================ FILE: include/polyhedral/expr.h ================================================ #ifndef _H_polyfp_EXPR_ #define _H_polyfp_EXPR_ #include #include #include #include #include #include "debug.h" #include "type.h" namespace polyfp { class function; class compute; std::string generate_new_variable_name(); std::string str_from_polyfp_type_expr(polyfp::expr_t type); std::string str_polyfp_type_op(polyfp::op_t type); std::string str_from_polyfp_type_primitive(polyfp::primitive_t type); // class placeholder; class expr; class var; class global; template using only_integral = typename std::enable_if::value, expr>::type; class global { private: static primitive_t loop_iterator_type; static function *implicit_fct; public: static std::string generate_new_placeholder_name() { static int counter = 0; return "b" + std::to_string(counter++); } static std::string generate_new_constant_name() { static int counter = 0; return "C" + std::to_string(counter++); } static function *get_implicit_function() { return global::implicit_fct; } static void set_implicit_function(function *fct) { global::implicit_fct = fct; } // TODO: The default data type static void set_default_polyfp_options() { global::loop_iterator_type = p_float32; } static void set_loop_iterator_type(primitive_t t) { global::loop_iterator_type = t; } static primitive_t get_loop_iterator_data_type() { return global::loop_iterator_type; } global() { set_default_polyfp_options(); } }; class expr { friend class var; friend class computation; friend class generator; friend class p_max; // The type of the operator. polyfp::op_t _operator; std::vector op; union { uint8_t uint8_value; int8_t int8_value; uint16_t uint16_value; int16_t int16_value; uint32_t uint32_value; int32_t int32_value; uint64_t uint64_value; int64_t int64_value; float float32_value; double float64_value; }; // e.g. {i, j} std::vector access_vector; bool defined; protected: std::string name; polyfp::primitive_t dtype; polyfp::expr_t etype; public: polyfp::compute *owner; // Create an undefined expression. expr() { this->defined = false; this->_operator = polyfp::o_none; this->etype = polyfp::e_none; this->dtype = polyfp::p_none; } // Create an undefined expression with type. expr(polyfp::primitive_t dtype) { this->defined = false; this->_operator = polyfp::o_none; this->etype = polyfp::e_none; this->dtype = dtype; } /** * Create an expression for a unary operator that applies * on a variable. For example: allocate(A) or free(B). */ expr(polyfp::op_t o, std::string name) { this->_operator = o; this->etype = polyfp::e_op; this->dtype = polyfp::p_none; this->defined = true; this->name = name; } /** * Construct an expression for a binary operator. */ expr(polyfp::op_t o, polyfp::expr expr0, polyfp::expr expr1) { if (expr0.get_data_type() != expr1.get_data_type()) { polyfp::str_dump("Binary operation between two expressions of different types:\n"); expr0.dump(false); polyfp::str_dump(" (" + str_from_polyfp_type_primitive(expr0.get_data_type()) + ")"); polyfp::str_dump(" and "); expr1.dump(false); polyfp::str_dump(" (" + str_from_polyfp_type_primitive(expr1.get_data_type()) + ")"); polyfp::str_dump("\n"); ERROR("\nThe two expressions should be of the same type. Use casting to elevate the type of one expression to the other.\n", true); } this->_operator = o; this->etype = polyfp::e_op; this->dtype = expr0.get_data_type(); this->defined = true; this->op.push_back(expr0); this->op.push_back(expr1); } // Construct an access expr(polyfp::op_t o, std::string name, std::vector vec, polyfp::primitive_t type) { assert(((o == polyfp::o_access) || (o == polyfp::o_placeholder)) && "The operator is not an access or a placeholder operator."); assert(vec.size() > 0); assert(name.size() > 0); this->_operator = o; this->etype = polyfp::e_op; this->dtype = type; this->defined = true; if (o == polyfp::o_access || o == polyfp::o_placeholder) { this->set_access(vec); } else { ERROR("Type of operator is not o_access or o_placeholder, or o_lin_index.", true); } this->name = name; } // Construct an unsigned 8-bit integer expression. expr(uint8_t val) { this->etype = polyfp::e_val; this->_operator = polyfp::o_none; this->defined = true; this->dtype = polyfp::p_uint8; this->uint8_value = val; } // Construct a signed 8-bit integer expression. expr(int8_t val) { this->etype = polyfp::e_val; this->_operator = polyfp::o_none; this->defined = true; this->dtype = polyfp::p_int8; this->int8_value = val; } // Construct an unsigned 16-bit integer expression. expr(uint16_t val) { this->defined = true; this->etype = polyfp::e_val; this->_operator = polyfp::o_none; this->dtype = polyfp::p_uint16; this->uint16_value = val; } expr(int16_t val) { this->defined = true; this->etype = polyfp::e_val; this->_operator = polyfp::o_none; this->dtype = polyfp::p_int16; this->int16_value = val; } expr(uint32_t val) { this->etype = polyfp::e_val; this->_operator = polyfp::o_none; this->defined = true; this->dtype = polyfp::p_uint32; this->uint32_value = val; } expr(int32_t val) { this->etype = polyfp::e_val; this->_operator = polyfp::o_none; this->defined = true; this->dtype = polyfp::p_int32; this->int32_value = val; } expr(uint64_t val) { this->etype = polyfp::e_val; this->_operator = polyfp::o_none; this->defined = true; this->dtype = polyfp::p_uint64; this->uint64_value = val; } expr(int64_t val) { this->etype = polyfp::e_val; this->_operator = polyfp::o_none; this->defined = true; this->dtype = polyfp::p_int64; this->int64_value = val; } expr(float val) { this->etype = polyfp::e_val; this->_operator = polyfp::o_none; this->defined = true; this->dtype = polyfp::p_float32; this->float32_value = val; } polyfp::expr copy() const; expr(double val) { this->etype = polyfp::e_val; this->_operator = polyfp::o_none; this->defined = true; this->dtype = polyfp::p_float64; this->float64_value = val; } uint8_t get_uint8_value() const { assert(this->get_expr_type() == polyfp::e_val); assert(this->get_data_type() == polyfp::p_uint8); return uint8_value; } int8_t get_int8_value() const { assert(this->get_expr_type() == polyfp::e_val); assert(this->get_data_type() == polyfp::p_int8); return int8_value; } uint16_t get_uint16_value() const { assert(this->get_expr_type() == polyfp::e_val); assert(this->get_data_type() == polyfp::p_uint16); return uint16_value; } int16_t get_int16_value() const { assert(this->get_expr_type() == polyfp::e_val); assert(this->get_data_type() == polyfp::p_int16); return int16_value; } uint32_t get_uint32_value() const { assert(this->get_expr_type() == polyfp::e_val); assert(this->get_data_type() == polyfp::p_uint32); return uint32_value; } int32_t get_int32_value() const { assert(this->get_expr_type() == polyfp::e_val); assert(this->get_data_type() == polyfp::p_int32); return int32_value; } uint64_t get_uint64_value() const { assert(this->get_expr_type() == polyfp::e_val); assert(this->get_data_type() == polyfp::p_uint64); return uint64_value; } int64_t get_int64_value() const { assert(this->get_expr_type() == polyfp::e_val); assert(this->get_data_type() == polyfp::p_int64); return int64_value; } float get_float32_value() const { assert(this->get_expr_type() == polyfp::e_val); assert(this->get_data_type() == polyfp::p_float32); return float32_value; } double get_float64_value() const { assert(this->get_expr_type() == polyfp::e_val); assert(this->get_data_type() == polyfp::p_float64); return float64_value; } int64_t get_int_val() const { assert(this->get_expr_type() == polyfp::e_val); int64_t result = 0; if (this->get_data_type() == polyfp::p_uint8) { result = this->get_uint8_value(); } else if (this->get_data_type() == polyfp::p_int8) { result = this->get_int8_value(); } else if (this->get_data_type() == polyfp::p_uint16) { result = this->get_uint16_value(); } else if (this->get_data_type() == polyfp::p_int16) { result = this->get_int16_value(); } else if (this->get_data_type() == polyfp::p_uint32) { result = this->get_uint32_value(); } else if (this->get_data_type() == polyfp::p_int32) { result = this->get_int32_value(); } else if (this->get_data_type() == polyfp::p_uint64) { result = this->get_uint64_value(); } else if (this->get_data_type() == polyfp::p_int64) { result = this->get_int64_value(); } else if (this->get_data_type() == polyfp::p_float32) { result = this->get_float32_value(); } else if (this->get_data_type() == polyfp::p_float64) { result = this->get_float64_value(); } else { ERROR("Calling get_int_val() on a non integer expression.", true); } return result; } double get_double_val() const { assert(this->get_expr_type() == polyfp::e_val); double result = 0; if (this->get_data_type() == polyfp::p_float32) { result = this->get_float32_value(); } else if (this->get_data_type() == polyfp::p_float64) { result = this->get_float64_value(); } else { ERROR("Calling get_double_val() on a non double expression.", true); } return result; } /** * Return the value of the \p i 'th operand of the expression. * \p i can be 0, 1 or 2. */ const polyfp::expr &get_operand(int i) const { assert(this->get_expr_type() == polyfp::e_op); assert((i < (int)this->op.size()) && "Operand index is out of bounds."); return this->op[i]; } // Return the number of arguments of the operator. int get_n_arg() const { assert(this->get_expr_type() == polyfp::e_op); return this->op.size(); } polyfp::expr_t get_expr_type() const { return etype; } polyfp::primitive_t get_data_type() const { return dtype; } const std::string &get_name() const { assert( (this->get_expr_type() == polyfp::e_var) || (this->get_op_type() == polyfp::o_access) || (this->get_op_type() == polyfp::o_placeholder)); return name; } void set_name(std::string &name) { assert((this->get_expr_type() == polyfp::e_var) || (this->get_op_type() == polyfp::o_access)); this->name = name; } polyfp::expr replace_op_in_expr(const std::string &to_replace, const std::string &replace_with) { if (this->name == to_replace) { this->name = replace_with; return *this; } for (int i = 0; i < this->op.size(); i++) { polyfp::expr operand = this->get_operand(i); this->op[i] = operand.replace_op_in_expr(to_replace, replace_with); } return *this; } // Get the type of the operator (polyfp::op_t) polyfp::op_t get_op_type() const { return _operator; } // e.g. For a placeholder access A[i+1,j], it will return {i+1, j} const std::vector &get_access() const { assert(this->get_expr_type() == polyfp::e_op); assert(this->get_op_type() == polyfp::o_access || this->get_op_type() == polyfp::o_placeholder); return access_vector; } // Get the number of dimensions in the access vector. int get_n_dim_access() const { assert(this->get_expr_type() == polyfp::e_op); assert(this->get_op_type() == polyfp::o_access); return access_vector.size(); } bool is_defined() const { return defined; } bool is_equal(polyfp::expr e) const { bool equal = true; if ((this->_operator != e._operator) || (this->op.size() != e.op.size()) || (this->access_vector.size() != e.access_vector.size()) || (this->defined != e.defined) || (this->name != e.name) || (this->dtype != e.dtype) || (this->etype != e.etype)) { equal = false; return equal; } for (int i = 0; i < this->access_vector.size(); i++) equal = equal && this->access_vector[i].is_equal(e.access_vector[i]); for (int i = 0; i < this->op.size(); i++) equal = equal && this->op[i].is_equal(e.op[i]); if ((this->etype == e_val) && (e.etype == e_val)) { if (this->get_int_val() != e.get_int_val()) equal = false; if ((this->get_data_type() == polyfp::p_float32) || (this->get_data_type() == polyfp::p_float64)) if (this->get_double_val() != e.get_double_val()) equal = false; } return equal; } bool is_integer() const { return this->get_expr_type() == e_val && (this->get_data_type() == p_uint8 || this->get_data_type() == p_uint16 || this->get_data_type() == p_uint32 || this->get_data_type() == p_uint64 || this->get_data_type() == p_int16 || this->get_data_type() == p_int32 || this->get_data_type() == p_int8 || this->get_data_type() == p_int64); } expr operator+(polyfp::expr other) const; expr operator-(polyfp::expr other) const; expr operator/(polyfp::expr other) const; expr operator*(polyfp::expr other) const; expr operator%(polyfp::expr other) const; expr operator>>(polyfp::expr other) const; // TODO: Extensions // Expression multiplied by (-1). polyfp::expr& operator=(polyfp::expr const &); void set_access(std::vector vector) { access_vector = vector; } void set_access_dimension(int i, polyfp::expr acc) { assert((i < (int)this->access_vector.size()) && "index is out of bounds."); access_vector[i] = acc; } void get_access_vector(std::vector &loads) const{ switch (this->etype){ case polyfp::e_op: { if (this->get_n_arg() > 0) { for (int i = 0; i < this->get_n_arg(); i++) { this->op[i].get_access_vector(loads); } } if ((this->get_op_type() == polyfp::o_access)) { // std::cout << "Access to " + this->get_name() + ". Access expressions:" << std::endl; loads.push_back(*this); } break; } case (polyfp::e_val): { // TODO: // if (this->get_data_type() == polyfp::p_uint8) // { // std::cout << "Value:" << this->get_uint8_value() << std::endl; // } // else if (this->get_data_type() == polyfp::p_int8) // { // std::cout << "Value:" << this->get_int8_value() << std::endl; // } // else if (this->get_data_type() == polyfp::p_uint16) // { // std::cout << "Value:" << this->get_uint16_value() << std::endl; // } // else if (this->get_data_type() == polyfp::p_int16) // { // std::cout << "Value:" << this->get_int16_value() << std::endl; // } // else if (this->get_data_type() == polyfp::p_uint32) // { // std::cout << "Value:" << this->get_uint32_value() << std::endl; // } // else if (this->get_data_type() == polyfp::p_int32) // { // std::cout << "Value:" << this->get_int32_value() << std::endl; // } // else if (this->get_data_type() == polyfp::p_uint64) // { // std::cout << "Value:" << this->get_uint64_value() << std::endl; // } // else if (this->get_data_type() == polyfp::p_int64) // { // std::cout << "Value:" << this->get_int64_value() << std::endl; // } // else if (this->get_data_type() == polyfp::p_float32) // { // std::cout << "Value:" << this->get_float32_value() << std::endl; // } // else if (this->get_data_type() == polyfp::p_float64) // { // std::cout << "Value:" << this->get_float64_value() << std::endl; // } break; } case (polyfp::e_var): { // TODO: // std::cout << "Var name:" << this->get_name() << std::endl; // std::cout << "Expression value type:" << str_from_polyfp_type_primitive(this->dtype) << std::endl; break; } } } void dump(bool exhaustive) const { if (this->get_expr_type() != e_none) { if (exhaustive == true) { if (this->is_defined()) { std::cout << "Expression:" << std::endl; std::cout << "Expression type:" << str_from_polyfp_type_expr(this->etype) << std::endl; switch (this->etype) { case polyfp::e_op: { std::cout << "Expression operator type:" << str_polyfp_type_op(this->_operator) << std::endl; if (this->get_n_arg() > 0) { std::cout << "Number of operands:" << this->get_n_arg() << std::endl; std::cout << "Dumping the operands:" << std::endl; for (int i = 0; i < this->get_n_arg(); i++) { std::cout << "Operand " << std::to_string(i) << "." << std::endl; this->op[i].dump(exhaustive); } } if ((this->get_op_type() == polyfp::o_access)) { std::cout << "Access to " + this->get_name() + ". Access expressions:" << std::endl; for (const auto &e : this->get_access()) { e.dump(exhaustive); } } break; } case (polyfp::e_val): { std::cout << "Expression value type:" << str_from_polyfp_type_primitive(this->dtype) << std::endl; if (this->get_data_type() == polyfp::p_uint8) { std::cout << "Value:" << this->get_uint8_value() << std::endl; } else if (this->get_data_type() == polyfp::p_int8) { std::cout << "Value:" << this->get_int8_value() << std::endl; } else if (this->get_data_type() == polyfp::p_uint16) { std::cout << "Value:" << this->get_uint16_value() << std::endl; } else if (this->get_data_type() == polyfp::p_int16) { std::cout << "Value:" << this->get_int16_value() << std::endl; } else if (this->get_data_type() == polyfp::p_uint32) { std::cout << "Value:" << this->get_uint32_value() << std::endl; } else if (this->get_data_type() == polyfp::p_int32) { std::cout << "Value:" << this->get_int32_value() << std::endl; } else if (this->get_data_type() == polyfp::p_uint64) { std::cout << "Value:" << this->get_uint64_value() << std::endl; } else if (this->get_data_type() == polyfp::p_int64) { std::cout << "Value:" << this->get_int64_value() << std::endl; } else if (this->get_data_type() == polyfp::p_float32) { std::cout << "Value:" << this->get_float32_value() << std::endl; } else if (this->get_data_type() == polyfp::p_float64) { std::cout << "Value:" << this->get_float64_value() << std::endl; } break; } case (polyfp::e_var): { std::cout << "Var name:" << this->get_name() << std::endl; std::cout << "Expression value type:" << str_from_polyfp_type_primitive(this->dtype) << std::endl; break; } } } } else { std::cout << "dump expression"<to_str(); } } } bool is_constant() const { if (this->get_expr_type() == polyfp::e_val) return true; else return false; } int get_dependence_vector() const{ // TODO: a more general method to calculate dependence vector // Already supported: A(i+4,j-5)-> A(i,j-1) // Not supported: A(2*i,j), A(i+j, j+9) int temp; if (this->get_expr_type() == e_op){ switch (this->get_op_type()){ case polyfp::o_add: if ((this->get_operand(0).get_expr_type() == polyfp::e_val)){ temp = this->get_operand(0).get_int_val(); }else if((this->get_operand(1).get_expr_type() == polyfp::e_val)){ temp = this->get_operand(1).get_int_val(); }else{ std::cout<<"not supported type"<get_operand(0).get_expr_type() == polyfp::e_val)){ temp = -(this->get_operand(1).get_int_val()); }else if((this->get_operand(1).get_expr_type() == polyfp::e_val)){ temp = -(this->get_operand(1).get_int_val()); }else{ std::cout<<"not supported type"<get_expr_type() == e_var){ temp = 0; }else{ std::cout<<"not supported type"<get_expr_type() != e_none) { switch (this->etype) { case polyfp::e_op: { switch (this->get_op_type()) { case polyfp::o_max: return *this; case polyfp::o_min: return *this; case polyfp::o_add: this->get_operand(0).simplify(); this->get_operand(1).simplify(); if ((this->get_operand(0).get_expr_type() == polyfp::e_val) && (this->get_operand(1).get_expr_type() == polyfp::e_val)) if ((this->get_operand(0).get_data_type() == polyfp::p_int32)) return expr(this->get_operand(0).get_int_val() + this->get_operand(1).get_int_val()); case polyfp::o_sub: this->get_operand(0).simplify(); this->get_operand(1).simplify(); if ((this->get_operand(0).get_expr_type() == polyfp::e_val) && (this->get_operand(1).get_expr_type() == polyfp::e_val)) if ((this->get_operand(0).get_data_type() == polyfp::p_int32)) return expr(this->get_operand(0).get_int_val() - this->get_operand(1).get_int_val()); case polyfp::o_mul: this->get_operand(0).simplify(); this->get_operand(1).simplify(); if ((this->get_operand(0).get_expr_type() == polyfp::e_val) && (this->get_operand(1).get_expr_type() == polyfp::e_val)) if ((this->get_operand(0).get_data_type() == polyfp::p_int32)) return expr(this->get_operand(0).get_int_val() * this->get_operand(1).get_int_val()); case polyfp::o_div: return *this; case polyfp::o_mod: return *this; case polyfp::o_access: return *this; default: ERROR("Simplifying an unsupported polyfp expression.", 1); } break; } case (polyfp::e_val): { return *this; } case (polyfp::e_var): { return *this; } default: ERROR("Expression type not supported.", true); } } return *this; } #include std::string to_str() const { std::string str = std::string(""); if (this->get_expr_type() != e_none) { // std::cout<get_expr_type(); switch (this->etype) { case polyfp::e_op: { switch (this->get_op_type()) { case polyfp::o_max: str += "max(" + this->get_operand(0).to_str(); str += ", " + this->get_operand(1).to_str(); str += ")"; break; case polyfp::o_min: str += "min(" + this->get_operand(0).to_str(); str += ", " + this->get_operand(1).to_str(); str += ")"; break; case polyfp::o_add: str += "(" + this->get_operand(0).to_str(); str += " + " + this->get_operand(1).to_str(); str += ")"; break; case polyfp::o_sub: str += "(" + this->get_operand(0).to_str(); str += " - " + this->get_operand(1).to_str(); str += ")"; break; case polyfp::o_mul: str += "(" + this->get_operand(0).to_str(); str += " * " + this->get_operand(1).to_str(); str += ")"; break; case polyfp::o_div: str += "(" + this->get_operand(0).to_str(); str += " / " + this->get_operand(1).to_str(); str += ")"; break; case polyfp::o_mod: str += "(" + this->get_operand(0).to_str(); str += " % " + this->get_operand(1).to_str(); str += ")"; break; case polyfp::o_access: case polyfp::o_placeholder: str += this->get_name() + "("; for (int k = 0; k < this->get_access().size(); k++) { if (k != 0) { str += ", "; } str += this->get_access()[k].to_str(); } str += ")"; break; default: ERROR("Dumping an unsupported polyfp expression.", 1); } break; } case (polyfp::e_val): { if (this->get_data_type() == polyfp::p_uint8) { str += std::to_string((int)this->get_uint8_value()); } else if (this->get_data_type() == polyfp::p_int8) { str += std::to_string((int)this->get_int8_value()); } else if (this->get_data_type() == polyfp::p_uint16) { str += std::to_string(this->get_uint16_value()); } else if (this->get_data_type() == polyfp::p_int16) { str += std::to_string(this->get_int16_value()); } else if (this->get_data_type() == polyfp::p_uint32) { str += std::to_string(this->get_uint32_value()); } else if (this->get_data_type() == polyfp::p_int32) { str += std::to_string(this->get_int32_value()); } else if (this->get_data_type() == polyfp::p_uint64) { str += std::to_string(this->get_uint64_value()); } else if (this->get_data_type() == polyfp::p_int64) { str += std::to_string(this->get_int64_value()); } else if (this->get_data_type() == polyfp::p_float32) { str += std::to_string(this->get_float32_value()); } else if (this->get_data_type() == polyfp::p_float64) { str += std::to_string(this->get_float64_value()); } break; } case (polyfp::e_var): { str += this->get_name(); break; } default: ERROR("Expression type not supported.", true); } } return str; } }; class var: public polyfp::expr { friend compute; private: static std::unordered_map declared_vars; expr lower; expr upper; public: // Return the upper bound of this variable. expr get_upper() { return upper; } expr get_lower() { return lower; } var(std::string name); var(std::string name, polyfp::primitive_t type); var(std::string name, int lower_bound, int upper_bound) : var(name) { lower = expr((int32_t) lower_bound); upper = expr((int32_t) upper_bound); // flag = 0; } var(std::string name, expr lower_bound, expr upper_bound) : var(name) { lower = lower_bound; upper = upper_bound; // flag = 0; } var(std::string name, int lower_bound, expr upper_bound) : var(name) { lower = expr((int32_t) lower_bound); upper = upper_bound; // flag = 0; } var(): var(generate_new_variable_name()) {} void show(){ std::cout << "Saved variable " << this->name << " of type " << str_from_polyfp_type_primitive(this->dtype)< expr value_cast(primitive_t tT, cT val) { switch (tT) { case p_int8: return expr{static_cast(val)}; case p_uint8: return expr{static_cast(val)}; case p_int16: return expr{static_cast(val)}; case p_uint16: return expr{static_cast(val)}; case p_int32: return expr{static_cast(val)}; case p_uint32: return expr{static_cast(val)}; case p_int64: return expr{static_cast(val)}; case p_uint64: return expr{static_cast(val)}; case p_float32: return expr{static_cast(val)}; case p_float64: return expr{static_cast(val)}; default: throw std::invalid_argument{"Type not supported"}; } } template only_integral operator+(const polyfp::expr &e, T val) { return e + value_cast(e.get_data_type(), val); } template only_integral operator+(T val, const polyfp::expr &e) { return value_cast(e.get_data_type(), val) + e; } template only_integral operator-(const polyfp::expr &e, T val) { return e - value_cast(e.get_data_type(), val); } template only_integral operator-(T val, const polyfp::expr &e) { return value_cast(e.get_data_type(), val) - e; } template only_integral operator/(const polyfp::expr &e, T val) { return e / expr{val}; } template only_integral operator/(T val, const polyfp::expr &e) { return expr{val} / e; } template only_integral operator*(const polyfp::expr &e, T val) { return e * value_cast(e.get_data_type(), val); } template only_integral operator*(T val, const polyfp::expr &e) { return value_cast(e.get_data_type(), val) * e; } template only_integral operator%(const polyfp::expr &e, T val) { return e % expr{val}; } template only_integral operator%(T val, const polyfp::expr &e) { return expr{val} % e; } } #endif ================================================ FILE: include/polyhedral/function.h ================================================ #ifndef _H_polyfp_function_ #define _H_polyfp_function_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "scalehls/Transforms/Passes.h" #include "scalehls/Transforms/Utils.h" #include "scalehls/Transforms/Estimator.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/ToolOutputFile.h" #include "mlir/Dialect/Affine/Analysis/Utils.h" #include "mlir/Dialect/Affine/IR/AffineValueMap.h" #include "mlir/Support/FileUtilities.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/ADT/SmallPtrSet.h" #include "mlir/IR/Builders.h" #include "mlir/IR/IntegerSet.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h" #include "mlir/Dialect/Affine/LoopUtils.h" #include "mlir/IR/Attributes.h" #include "mlir/IR/Builders.h" #include "mlir/IR/MLIRContext.h" #include "mlir/IR/Verifier.h" #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" #include "mlir/Dialect/Affine/Passes.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/LLVMIR/LLVMTypes.h" #include "mlir/Dialect/Math/IR/Math.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" #include "mlir/Target/LLVMIR/Import.h" #include "mlir/IR/Builders.h" #include "mlir/IR/MLIRContext.h" #include "mlir/IR/OpDefinition.h" #include "expr.h" #include "type.h" #include "codegen.h" #include "generator_isl.h" #include "placeholder.h" namespace polyfp{ class constant; class compute; class generator; class placeholder; isl_ast_node *for_code_generator_after_for(isl_ast_node *node, isl_ast_build *build, void *user); void gen_mlir(polyfp::function *fct, isl_ast_node *node, int level); class function{ friend constant; friend compute; friend generator; friend placeholder; private: std::string name; std::vector invariants; std::map constant_list; std::vector function_arguments; std::map placeholders_list; std::map fct_argument_list; std::map global_argument_list; bool fct_argument_added = false; std::vector, std::vector>> partition_map; // The isl context of the function. isl_ctx *ctx; // The isl AST generated by gen_isl_ast(). isl_ast_node *ast; // Contains all the computes of the function std::vector body; /** * TODO: Extend * Derived from Tiramisu: * The context set of the function, i.e. a set representing the * constraints over the parameters. * The parameters of a function are the function invariants (constants). */ isl_set *context_set; std::vector iterator_names; isl_union_set *get_trimmed_time_processor_domain() const; /** * Derived from Tiramisu: * This function iterates over the computes of the function. * It modifies the identity schedule of each computes in order to * make all the identity schedules have the same number of dimensions * in their ranges. * This is done by adding dimensions equal to 0 to the range of each * identity schedule that does not have enough dimensions. */ isl_union_map *get_aligned_identity_schedules() const; /** * Derived from Tiramisu: * This function first computes the identity schedules, * then it computes the maximal dimension among the dimensions * of the ranges of all the identity schedules. */ int get_max_identity_schedules_range_dim() const; void rename_computations(); // Recursive function to perform the DFS step of dump_sched_graph. void dump_sched_graph_dfs(polyfp::compute *, std::unordered_set &); // Recursive function to perform the DFS step of is_sched_graph_tree. bool is_sched_graph_tree_dfs(polyfp::compute *, std::unordered_set &); protected: void dfs(int pos, int top, int end, int map[500][500], int n, int v[500],int stack[500]); polyfp::compute * update_latency(); int get_longest_path(); int get_longest_node(std::vector path); void add_computation(compute *cpt); void add_invariant(std::pair param); void add_placeholder(std::pair buf); const std::vector &get_iterator_names() const; // void add_iterator_name(const std::string &it_name); const std::vector &get_computations() const; /** TODO: remove * Derived from Tiramisu: * Return a set that represents the parameters of the function * (an ISL set that represents the parameters and constraints over * the parameters of the functions, a parameter is an invariant * of the function). This set is also known as the context of * the program. * An example of a context set is the following: * "[N,M]->{: M>0 and N>0}" * This context set indicates that the two parameters N and M * are strictly positive. */ isl_set *get_program_context() const; std::vector get_computation_by_name(std::string str) const; isl_ctx *get_isl_ctx() const; /** * Return the union of all the schedules * of the compute of the function. */ isl_union_map *get_schedule() const; /** * Return the union of all the iteration domains * of the computes of the function. */ isl_union_set *get_iteration_domain() const; /** * Return true if the usage of high level scheduling comments is valid; i.e. if * the scheduling relations formed using before, after, compute_at, etc.. form a tree. * * More specifically, it verifies that: * - There should be exactly one compute with no compute scheduled before it. * - Each other compute should have exactly one compute scheduled before it. */ bool is_sched_graph_tree(); /** * Modify the schedules of the computes of this function to reflect * the order specified using the high level scheduling commands. * * Commands like .after() do not directly modify the schedules * but rather modify the sched_graph. */ void gen_ordering_schedules(); /** * This functions iterates over the schedules of the function (the schedule * of each compute in the function) and computes the maximal dimension * among the dimensions of the ranges of all the schedules. */ int get_max_schedules_range_dim() const; /** * Stores all high level scheduling instructions between computes; i.e. if a user calls * for example c2.after(c1, L), sched_graph[&c1] would contain the key &c2, and * sched_graph[&c1][&c2] = L. */ std::unordered_map> sched_graph; std::unordered_map> sched_graph_reversed; /** * Return an ISL AST that represents this function. * The function gen_isl_ast() should be called before calling * this function. */ isl_ast_node *get_isl_ast() const; // Generate a mlir stmt that represents the function. void gen_mlir_stmt(); public: bool is_dataflowed = false; void evaluate_func(); std::unordered_set starting_computations; std::vector leader_computations; std::vector leaf_computations; std::map leader_computation_index; std::map latency_map; std::map all_latency_map; std::map resource_map; std::map> path_map; std::vector> paths; std::vector finish_list; bool consistent_flag = true; bool refused = false; void add_fct_argument(std::pair buf); void add_fct_argument(); void add_global_argument(std::pair buf); void check_loop_fusion(); int get_global_location(){ return global_location; } void set_global_location(int new_location){ this->global_location = new_location; } void dump_schedule(std::string path); long longest_path; long longest_node; long dsp_max; long dsp_usage; long best_dsp_usage = dsp_max; long best_latency; long current_latency; bool new_strategy = true; polyfp::compute * current_opt_comp; int global_location; bool one_compute; function(std::string name); /** * Derived from Tiramisu: * This method applies to the schedule of each compute * in the function. It makes the dimensions of the ranges of * all the schedules equal. This is done by adding dimensions * equal to 0 to the range of each schedule. * This function is called automatically when gen_isl_ast() * or gen_time_processor_domain() are called. */ void align_schedules(); const std::vector &get_body() const; const std::map &get_placeholders() const; const std::map &get_fct_arguments() const; const std::map &get_global_arguments() const; const std::map &get_invariants() const; const std::vector get_invariant_names() const; std::vector, std::vector>> get_partition_map(); void set_partition(std::string name, std::vector tile_factors, std::vector types); void dump_sched_graph(); isl_ast_node *get_isl_ast1() const; /** * Compute the graph of dependences between the computes of the function. * C[0] = 0 * D[1] = C[0] * D[2] = C[0] * {C[0] -> D[1]; C[0]->D[2]} */ isl_union_map *compute_dep_graph(); void gen_isl_ast(); /** * Generate the time-space domain of the function. * * In this representation, the logical time of execution and the * processor where the compute will be executed are both * specified. */ void gen_time_space_domain(); void gen_loop_location(); std::string get_name(); void collect_accesses(); std::map > > map_loadstores; void codegen(); void auto_DSE_loop_transformation(); void auto_DSE(std::string path); void auto_DSE_tile_size(polyfp::compute* comp, int factor,std::string path); void dependence_analysis(); void compute_dependency_graph(); /** * Dump the function on standard output (dump most of the fields of * polyfp::function).This is mainly useful for debugging. */ void dump(bool exhaustive) const; void gen_c_code() const; void trans(); }; } #endif ================================================ FILE: include/polyhedral/generator.h ================================================ #include #include #include #include #include #include #include #include #include #include #include #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h" #include "mlir/Dialect/Affine/LoopUtils.h" #include "mlir/IR/Attributes.h" #include "mlir/IR/Builders.h" #include "mlir/IR/IntegerSet.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/MLIRContext.h" #include "mlir/IR/Verifier.h" #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" #include "mlir/Dialect/Affine/Passes.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/Math/IR/Math.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" #include "mlir/Dialect/SCF/SCF.h" #include "mlir/Target/LLVMIR/Import.h" #include "mlir/IR/Builders.h" #include "mlir/IR/MLIRContext.h" #include "mlir/IR/OpDefinition.h" #include "mlir/Pass/PassManager.h" #include "mlir/Target/LLVMIR/Export.h" #include "mlir/Transforms/Passes.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "expr.h" #include "type.h" #include "codegen.h" #include "function.h" using llvm::SmallVector; using llvm::ArrayRef; namespace polyfp{ class function; class MLIRGenImpl { friend function; friend compute; private: mlir::ModuleOp theModule; /// The builder is a helper class to create IR inside a function. The builder /// is stateful, in particular it keeps an "insertion point": this is where /// the next operations will be introduced. mlir::OpBuilder builder; public: MLIRGenImpl(mlir::MLIRContext &context) : builder(&context) { theModule = mlir::ModuleOp::create(builder.getUnknownLoc()); } mlir::ModuleOp mlirGen1(const polyfp::function &fct, isl_ast_node *isl_node, int &level, bool flag, bool flag2, bool if_flag); //contains all loops std::vector ops; std::vector start_loops_position; // std::map argument_list; std::vector argument_list; std::map argument_map; std::map array_map; std::map get_argument_map(); std::map get_array_map(); std::vector values; std::vector allocs; std::vector funcs; std::vector get_funcs(); std::map name_map; mlir::ModuleOp getModule(); void a_print_expr(polyfp::expr polyfp_expr, polyfp::compute *comp, int level); // std::vector index_values; // SmallVector index_args; int get_iterator_location_from_name(polyfp::compute *comp,polyfp::expr polyfp_expr, std::vector &index_values); mlir::AffineExpr a_print_index(polyfp::expr polyfp_expr, polyfp::compute *comp, std::vector &index_values,int level); // std::vector add_op; // // std::vector> sum_op; // std::vector mul_op; // std::vector all_add_op; // std::vector all_mul_op; using value = std::variant; std::vector current_op; std::vector all_current_op; using AffineLoopBand = SmallVector; using TileList = SmallVector; }; } ================================================ FILE: include/polyhedral/generator_isl.h ================================================ #ifndef _H_polyfp_function1_ #define _H_polyfp_function1_ #include "expr.h" #include "type.h" #include "compute.h" #include "placeholder.h" namespace polyfp{ class fucntion; class generator { friend function; friend compute; // friend placeholder; protected: /** * Compute the accesses of the RHS of the compute * \p comp and store them in the accesses vector. * If \p return_buffer_accesses is set to true, this function returns access functions to * buffers. Otherwise it returns access functions to computes. */ static void get_rhs_accesses(const polyfp::function *func, const polyfp::compute *comp, std::vector &accesses, bool return_buffer_accesses); /** * Derived from Tiramisu: * Analyze the \p access_expression and return a set of constraints * that correspond to the access pattern of the access_expression. * * access_dimension: * The dimension of the access. For example, the access * C0(i0, i1, i2) have three access dimensions: i0, i1 and i2. * access_expression: * The expression of the access. * This expression is parsed recursively (by calling get_constraint_for_access) * and is gradually used to update the constraint. * access_relation: * The access relation that represents the access. * cst: * The constraint that defines the access and that is being constructed. * Different calls to get_constraint_for_access modify this constraint * gradually until the final constraint is created. Only the final constraint * is added to the access_relation. * coeff: * The coefficient in which all the dimension coefficients of the constraint * are going to be multiplied. This coefficient is used to implement o_minus, * o_mul and o_sub. */ static isl_constraint *get_constraint_for_access(int access_dimension, const polyfp::expr &access_expression, isl_map *access_relation, isl_constraint *cst, int coeff, const polyfp::function *fct); /** * Derived from Tiramisu: * Traverse a polyfp expression (\p exp) and extract the access relations * from the access operation passed in \p exp. The access relations are added * to the vector \p accesses. * The access relation is from the domain of the compute \p comp to the * domain of the compute accessed by the access operation. * If \p return_buffer_accesses = true, an access to a buffer is created * instead of an access to computes. */ static void traverse_expr_and_extract_accesses(const polyfp::function *fct, const polyfp::compute *comp, const polyfp::expr &exp, std::vector &accesses, bool return_buffer_accesses); public: // TODO }; /** * A class containing utility functions. */ class utility { public: /** * Derived from Tiramisu: * Traverse recursively the ISL AST tree * \p node represents the root of the tree to be traversed. * \p dim is the dimension of the loop from which the bounds have to be * extracted. * \p upper is a boolean that should be set to true to extract * the upper bound and false to extract the lower bound. */ static expr extract_bound_expression(isl_ast_node *ast, int dim, bool upper); /** * Derived from Tiramisu: * Return a polyfp::expr representing the bound of * the dimension \p dim in \p set. If \p upper is true * then this function returns the upper bound otherwise * it returns the lower bound. * * For example, assuming that * * S = {S[i,j]: 0<=i{S[i]} * * this function returns the string "N,M,K". */ static std::string get_parameters_list(isl_set *set); }; } #endif ================================================ FILE: include/polyhedral/placeholder.h ================================================ #ifndef _H_polyfp_PLACEHOLDER_ #define _H_polyfp_PLACEHOLDER_ // #include "compute.h" #include "function.h" #include "expr.h" #include /** * A class that represents placeholders. * * placeholders have two use cases: * - used to store the results of computations, and * - used to represent input arguments to functions. */ namespace polyfp{ class compute; static std::string generate_new_p_operator_name() { static int counter = 0; return "p" + std::to_string(counter++); } class placeholder { friend compute; friend function; // friend generator; private: /** * The sizes of the dimensions of the placeholder. Assuming the following * placeholder buf[N0][N1][N2], dim_sizes should be {N0, N1, N2}. */ std::vector dim_sizes; /** * The polyfp function where this placeholder is declared or where the * placeholder is an argument. */ polyfp::function *fct; /** * The name of the placeholder. * placeholder names should not start with _ (an underscore). * Names starting with _ are reserved names. */ std::string name; /** * The type of the elements of the placeholder. */ polyfp::primitive_t type; protected: /** * Set the size of a dimension of the placeholder. */ void set_dim_size(int dim, int size); public: /** * \brief Default polyfp constructor */ placeholder(); /** * A polyfp placeholder is equivalent to an array in C. * * placeholders have two use cases: * - Used to store the results of computes, and * - Used to represent input arguments to functions. * * \p name is the name of the placeholder. * * \p dim_sizes is a vector of polyfp expressions that represent the * size of each dimension in the placeholder. * Assuming we want to declare the placeholder buf[N0][N1][N2], * then the vector of sizes should be {N0, N1, N2}. * placeholder dimensions in polyfp have the same semantics as in * C/C++. * * \p type is the type of the elements of the placeholder. * It must be a primitive type (i.e. p_uint8, p_uint16, ...). * Possible types are declared in \ref polyfp::primitive_t * (in type.h). * * \p fct is a pointer to a polyfp function where the placeholder is * declared or used. If this argument is not provided (which is * the common case), the function that was created automatically * during polyfp initialization will be used (we call that * function the "implicit function"). */ placeholder(std::string name, std::vector dim_sizes, polyfp::primitive_t type, polyfp::function *fct = global::get_implicit_function()); void dump(bool exhaustive) const; const std::string &get_name() const; // Get the number of dimensions of the placeholder. int get_n_dims() const; polyfp::primitive_t get_elements_type() const; void partition(std::vector factors, std::string type); void partition(std::vector factors, std::vector type); const std::vector &get_dim_sizes() const; template polyfp::expr operator()(Args... args) { // TODO move to cpp std::vector access_expressions{std::forward(args)...}; if (access_expressions.size() != this->get_n_dims()) { polyfp::str_dump("Error - Incorrect access: " + this->get_name() + "("); for (int i = 0; i < access_expressions.size(); i++) { polyfp::expr e = access_expressions[i]; e.dump(false); if (i != access_expressions.size() - 1) polyfp::str_dump(", "); } polyfp::str_dump(").\n"); polyfp::str_dump("The number of access dimensions does not match that used in the declaration of " + this->get_name() + ".\n\n"); exit(1); } return polyfp::expr(polyfp::o_access, this->get_name(), access_expressions, this->get_elements_type()); // } } operator expr(); }; } #endif ================================================ FILE: include/polyhedral/type.h ================================================ #ifndef _H_PolyFP_TYPE_ #define _H_PolyFP_TYPE_ #include #include namespace polyfp { // Type of expression enum expr_t { e_val, // literal value, like 1, 2.4, 10, ... e_var, // a variable of a primitive type (i.e., an identifier holding one value), e_op, // an operation: add, mul, div, ... e_none // undefined expression. The existence of an expression of e_none type means an error. }; enum primitive_t { p_uint8, p_uint16, p_uint32, p_uint64, p_int8, p_int16, p_int32, p_int64, p_float32, p_float64, // p_boolean, p_none }; // Type of operator enum op_t { o_add, o_sub, o_mul, o_div, o_mod, o_max, o_min, o_access, o_placeholder, o_none, }; } #endif ================================================ FILE: lib/CMakeLists.txt ================================================ add_subdirectory(polyhedral) # add_subdirectory(CAPI) # add_subdirectory(Standalone) # add_subdirectory(hello) ================================================ FILE: lib/polyhedral/CMakeLists.txt ================================================ get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) get_property(conversion_libs GLOBAL PROPERTY MLIR_CONVERSION_LIBS) # include_directories(${PROJECT_SOURCE_DIR}/pybind11/include) # include_directories(/usr/include/python3.8) aux_source_directory(. DIR_LIB_SRCS) # 编译成静态库文件 # add_library(Functions ${DIR_LIB_SRCS}) add_library(Functions ${DIR_LIB_SRCS}) ================================================ FILE: lib/polyhedral/codegen.cpp ================================================ #include "codegen.h" namespace polyfp { std::vector function::get_computation_by_name(std::string name) const { assert(!name.empty()); std::vector res_comp; for (const auto &comp : this->get_computations()) { if (name == comp->get_name()) { res_comp.push_back(comp); } } if (res_comp.empty()) { polyfp::str_dump("Computation not found."); } else { // polyfp::str_dump("Computation found."); } return res_comp; } bool access_is_affine(const polyfp::expr &exp) { // We assume that the access is affine until we find the opposite. bool affine = true; // Traverse the expression tree and try to find expressions that are non-affine. if (exp.get_expr_type() == polyfp::e_val || exp.get_expr_type() == polyfp::e_var) { affine = true; } else if (exp.get_expr_type() == polyfp::e_op) { switch (exp.get_op_type()) { case polyfp::o_access: case polyfp::o_placeholder: affine = false; break; case polyfp::o_add: case polyfp::o_sub: affine = access_is_affine(exp.get_operand(0)) && access_is_affine(exp.get_operand(1)); break; case polyfp::o_max: case polyfp::o_min: case polyfp::o_mul: case polyfp::o_div: case polyfp::o_mod: break; default: ERROR("Unsupported polyfp expression passed to access_is_affine().", 1); } } return affine; } isl_ast_node *for_code_generator_after_for(isl_ast_node *node, isl_ast_build *build, void *user) { return node; } } ================================================ FILE: lib/polyhedral/compute.cpp ================================================ #include "compute.h" #include "core.h" #include namespace polyfp{ isl_ctx *polyfp::compute::get_ctx() const { return ctx; } isl_set *polyfp::compute::get_iteration_domain() const { assert(iteration_domain != NULL); return iteration_domain; } void polyfp::compute::set_iteration_domain(isl_set *domain) { this->iteration_domain = domain; } int polyfp::compute::get_iteration_domain_dimensions_number() { assert(iteration_domain != NULL); return isl_set_n_dim(this->iteration_domain); } isl_map *compute::get_schedule() const { return this->schedule; } isl_set *polyfp::compute::get_trimmed_time_processor_domain() { isl_set *tp_domain = isl_set_copy(this->get_time_processor_domain()); const char *name = isl_set_get_tuple_name(isl_set_copy(tp_domain)); isl_set *tp_domain_without_duplicate_dim = isl_set_project_out(isl_set_copy(tp_domain), isl_dim_set, 0, 1); tp_domain_without_duplicate_dim = isl_set_set_tuple_name(tp_domain_without_duplicate_dim, name); return tp_domain_without_duplicate_dim ; } void compute::name_unnamed_iteration_domain_dimensions() { isl_set *iter = this->get_iteration_domain(); assert(iter != NULL); for (int i = 0; i < this->get_iteration_domain_dimensions_number(); i++) { if (isl_set_has_dim_name(iter, isl_dim_set, i) == isl_bool_false) iter = isl_set_set_dim_name(iter, isl_dim_set, i, generate_new_variable_name().c_str()); } this->set_iteration_domain(iter); } void compute::name_unnamed_time_space_dimensions() { isl_map *sched = this->get_schedule(); assert(sched != NULL); for (int i = 0; i < this->get_loop_levels_number(); i++) { if (isl_map_has_dim_name(sched, isl_dim_out, loop_level_into_dynamic_dimension(i)) == isl_bool_false) sched = isl_map_set_dim_name(sched, isl_dim_out, loop_level_into_dynamic_dimension(i), generate_new_variable_name().c_str()); } this->set_schedule(sched); } isl_map *isl_map_add_dim_and_eq_constraint(isl_map *map, int dim_pos, int constant) { assert(map != NULL); assert(dim_pos >= 0); assert(dim_pos <= (signed int) isl_map_dim(map, isl_dim_out)); map = isl_map_insert_dims(map, isl_dim_out, dim_pos, 1); map = isl_map_set_tuple_name(map, isl_dim_out, isl_map_get_tuple_name(map, isl_dim_in)); isl_space *sp = isl_map_get_space(map); isl_local_space *lsp = isl_local_space_from_space(isl_space_copy(sp)); isl_constraint *cst = isl_constraint_alloc_equality(lsp); cst = isl_constraint_set_coefficient_si(cst, isl_dim_out, dim_pos, 1); cst = isl_constraint_set_constant_si(cst, (-1) * constant); map = isl_map_add_constraint(map, cst); return map; } isl_map *polyfp::compute::gen_identity_schedule_for_iteration_domain() { isl_space *sp = isl_set_get_space(this->get_iteration_domain()); isl_map *sched = isl_map_identity(isl_space_map_from_set(sp)); sched = isl_map_intersect_domain(sched, isl_set_copy(this->get_iteration_domain())); sched = isl_map_coalesce(sched); for (int i = 0; i < isl_space_dim(sp, isl_dim_out) + 1; i++) { sched = isl_map_add_dim_and_eq_constraint(sched, 2 * i, 0); } sched = isl_map_add_dim_and_eq_constraint(sched, 0, 0); return sched; } void compute::set_schedule(isl_map *map) { this->schedule = map; } void compute::set_schedule(std::string map_str) { assert(!map_str.empty()); assert(this->ctx != NULL); isl_map *map = isl_map_read_from_str(this->ctx, map_str.c_str()); assert(map != NULL); this->set_schedule(map); } void compute::dump_iteration_domain() const { isl_set_dump(this->get_iteration_domain()); } void compute::dump_schedule() const { polyfp::str_dump("Dumping the schedule of the computation " + this->get_name() + " : "); std::flush(std::cout); isl_map_dump(this->get_schedule()); } const polyfp::expr &polyfp::compute::get_expr() const { return expression; } void compute::dump() const { std::cout << std::endl << "Dumping the computation \"" + this->get_name() + "\" :" << std::endl; std::cout << "Iteration domain of the computation \"" << this->name << "\" : "; std::flush(std::cout); isl_set_dump(this->get_iteration_domain()); std::flush(std::cout); this->dump_schedule(); std::flush(std::cout); std::cout << "Expression of the computation : "; std::flush(std::cout); this->get_expr().dump(true); std::cout << std::endl; std::flush(std::cout); std::cout << "Access relation of the computation : "; std::flush(std::cout); isl_map_dump(this->get_access_relation()); if (this->get_access_relation() == NULL) { std::cout << "\n"; } std::flush(std::cout); if (this->get_time_processor_domain() != NULL) { std::cout << "Time-space domain " << std::endl; std::flush(std::cout); isl_set_dump(this->get_time_processor_domain()); } else { std::cout << "Time-space domain : NULL." << std::endl; } polyfp::str_dump("\n"); polyfp::str_dump("\n"); } void polyfp::compute::set_identity_schedule_based_on_iteration_domain() { isl_map *sched = this->gen_identity_schedule_for_iteration_domain(); this->set_schedule(sched); } std::vector compute::get_iteration_domain_dimension_names() { isl_set *iter = this->get_iteration_domain(); assert(iter != NULL); std::vector result; for (int i = 0; i < this->get_iteration_domain_dimensions_number(); i++) { if (isl_set_has_dim_name(iter, isl_dim_set, i)) result.push_back(std::string(isl_set_get_dim_name(iter, isl_dim_set, i))); else { ERROR("All iteration domain dimensions must have " "a name.", true); } } assert(result.size() == this->get_iteration_domain_dimensions_number()); return result; } void compute::update_names(std::vector original_loop_level_names, std::vector new_names, int erase_from, int nb_loop_levels_to_erase) { this->final_loop_level_names.clear(); this->final_loop_level_names = this->final_loop_level_names_reserved; // // std::cout<<"original names: "<final_loop_level_names.clear(); // this->final_loop_level_names = original_loop_level_names; this->set_loop_level_names(original_loop_level_names); } void polyfp::compute::set_expression(const polyfp::expr &e) { // polyfp::expr modified_e = traverse_expr_and_replace_non_affine_accesses(this, e); // polyfp::str_dump("The original expression is: "); modified_e.dump(false); this->expression = e.copy(); } std::vector compute::get_loop_level_names() { // polyfp::str_dump("Collecting names of loop levels from the range of the schedule: ", isl_map_to_str(this->get_schedule())); std::vector names; std::string names_to_print_for_debugging = ""; for (int i = 0; i < this->get_loop_levels_number(); i++) { std::string dim_name = isl_map_get_dim_name(this->get_schedule(), isl_dim_out, loop_level_into_dynamic_dimension(i)); names.push_back(dim_name); names_to_print_for_debugging += dim_name + " "; } // polyfp::str_dump("Names of loop levels: " + names_to_print_for_debugging); return names; } std::vector polyfp::compute::get_placeholder_dims() { return placeholder_dims; } void polyfp::compute::set_placeholder_dims(std::vector temp) { this->placeholder_dims = temp ; } polyfp::function *polyfp::compute::get_function() const { return fct; } int compute::get_loop_levels_number() { assert(this->get_schedule() != NULL); int loop_levels_number = ((isl_map_dim(this->get_schedule(), isl_dim_out)) - 2)/2; return loop_levels_number; } void compute::set_loop_level_names(std::vector names) { assert(names.size() > 0); // polyfp::str_dump("Number of loop levels: " + std::to_string(this->get_loop_levels_number())); // polyfp::str_dump("Number of names to be set: " + std::to_string(names.size())); for (int i = 0; i < names.size(); i++) { if (isl_map_has_dim_name(this->get_schedule(), isl_dim_out, loop_level_into_dynamic_dimension(i)) == isl_bool_true) { this->schedule = isl_map_set_dim_name(this->get_schedule(), isl_dim_out, loop_level_into_dynamic_dimension(i), names[i].c_str()); // polyfp::str_dump("Setting the name of loop level " + std::to_string(i) + " into " + names[i].c_str()); } } // polyfp::str_dump("The schedule after renaming: ", isl_map_to_str(this->get_schedule())); } void compute::set_loop_level_names(std::vector loop_levels, std::vector names) { this->check_dimensions_validity(loop_levels); assert(names.size() > 0); assert(names.size() == loop_levels.size()); for (int i = 0; i < loop_levels.size(); i++) { if (loop_level_into_static_dimension(loop_levels[i]) <= isl_map_dim(this->get_schedule(), isl_dim_out)) { this->schedule = isl_map_set_dim_name(this->get_schedule(), isl_dim_out, loop_level_into_dynamic_dimension(loop_levels[i]), names[i].c_str()); // polyfp::str_dump("Setting the name of loop level " + std::to_string(loop_levels[i]) + " into " + names[i].c_str()); } } // polyfp::str_dump("The schedule after renaming: ", isl_map_to_str(this->get_schedule())); } isl_set *polyfp::compute::get_time_processor_domain() const { return time_processor_domain; } void polyfp::compute::set_access(isl_map *access) { assert(access != NULL); this->set_access(isl_map_to_str(access)); } void polyfp::compute::set_access(std::string access_str) { this->access = isl_map_read_from_str(this->ctx, access_str.c_str()); std::vector same_name_computations = this->get_function()->get_computation_by_name(this->get_name()); // TODO: Delete if (same_name_computations.size() > 1) for (auto c : same_name_computations) { c->access = isl_map_read_from_str(this->ctx, access_str.c_str()); } std::vector computations = this->get_function()->get_computation_by_name(this->get_name()); for (auto c : computations) if (isl_map_is_equal(this->get_access_relation(), c->get_access_relation()) == isl_bool_false) { ERROR("Computations that have the same name should also have the same access relation.", true); } assert(this->access != nullptr && "Set access failed"); } isl_map *polyfp::compute::get_access_relation() const { return access; } polyfp::placeholder *polyfp::compute::get_placeholder() { return this->plhd; } polyfp::expr polyfp::compute::get_placeholder_expr() { return this->plhd_expr; } std::vector compute::compute_buffer_size() { std::vector dim_sizes; // If the computation has an update, we first compute the union of all the // updates, then we compute the bounds of the union. for (int i = 0; i < this->get_iteration_domain_dimensions_number(); i++) { isl_set *union_iter_domain = isl_set_copy(this->get_iteration_domain()); // polyfp::str_dump("Extracting bounds of the following set:", isl_set_to_str(union_iter_domain)); polyfp::expr lower = utility::get_bound(union_iter_domain, i, false); polyfp::expr upper = utility::get_bound(union_iter_domain, i, true); polyfp::expr diff = (upper - lower + 1); dim_sizes.push_back(diff); } return dim_sizes; } std::map compute::get_access_map(){ return this->access_map; } std::map compute::get_tile_map(){ return this->tile_map; } std::map compute::get_tile_size_map(){ return this->tile_size_map; } std::map compute::get_directive_map(){ return this->directive_map;; } std::map compute::get_directive_tool_map(){ return this->directive_tool_map;; } void polyfp::compute::set_name(const std::string &n) { this->name = n; } isl_map *polyfp::compute::gen_identity_schedule_for_time_space_domain() { isl_set *tp_domain = this->get_trimmed_time_processor_domain(); isl_space *sp = isl_set_get_space(tp_domain); isl_map *sched = isl_map_identity(isl_space_map_from_set(sp)); sched = isl_map_intersect_domain( sched, isl_set_copy(this->get_trimmed_time_processor_domain())); sched = isl_map_set_tuple_name(sched, isl_dim_out, ""); sched = isl_map_coalesce(sched); return sched; } void compute::assert_names_not_assigned(std::vector dimensions) { for (auto const dim: dimensions) { int d = isl_map_find_dim_by_name(this->get_schedule(), isl_dim_out, dim.c_str()); if (d >= 0) { ERROR("Dimension " + dim + " is already in use.", true); } d = isl_map_find_dim_by_name(this->get_schedule(), isl_dim_in, dim.c_str()); if (d >= 0) { ERROR("Dimension " + dim + " is already in use.", true); } } } void compute::check_dimensions_validity(std::vector dimensions) { assert(dimensions.size() > 0); for (auto const dim: dimensions) { assert(dim >= compute::root_dimension); if (loop_level_into_dynamic_dimension(dim) >= isl_space_dim(isl_map_get_space(this->get_schedule()), isl_dim_out)) { ERROR("The dynamic dimension " + std::to_string(loop_level_into_dynamic_dimension(dim)) + " is not less than the number of dimensions of the " "time-space domain " + std::to_string(isl_space_dim(isl_map_get_space( this->get_schedule()), isl_dim_out)), true); } } } void compute::set_schedule_domain_dim_names(std::vector loop_levels, std::vector names) { this->check_dimensions_validity(loop_levels); assert(names.size() > 0); assert(names.size() == loop_levels.size()); for (int i = 0; i < loop_levels.size(); i++) { assert(loop_levels[i] <= isl_map_dim(this->get_schedule(), isl_dim_in)); this->schedule = isl_map_set_dim_name(this->get_schedule(), isl_dim_in, loop_levels[i], names[i].c_str()); } // polyfp::str_dump("The schedule after renaming: ", isl_map_to_str(this->get_schedule())); } void polyfp::compute::init_computation(std::string iteration_space_str, polyfp::function *fction, const polyfp::expr &e, polyfp::primitive_t t, polyfp::expr p) { // polyfp::str_dump("Constructing the computation: " + iteration_space_str); assert(iteration_space_str.length() > 0 && ("Empty iteration space")); access = NULL; time_processor_domain = NULL; predicate = polyfp::expr(); this->data_type = t; this->ctx = fction->get_isl_ctx(); //todo for(auto &kv : fction->get_placeholders()){ if(kv.first == p.get_name()) this->plhd = kv.second; } this->plhd_expr = p; this->plhd_expr.owner = this; placeholder_dims = p.get_access(); iteration_domain = isl_set_read_from_str(ctx, iteration_space_str.c_str()); //TODO name = std::string(isl_space_get_tuple_name(isl_set_get_space(iteration_domain), isl_dim_type::isl_dim_set)); number_of_dims = isl_set_dim(iteration_domain, isl_dim_type::isl_dim_set); // for (unsigned i = 0; i < number_of_dims; i++) { // if (isl_set_has_dim_name(iteration_domain, isl_dim_type::isl_dim_set, i)) { // std::string dim_name(isl_set_get_dim_name(iteration_domain, isl_dim_type::isl_dim_set, i)); // this->access_variables.push_back(make_pair(i, dim_name)); // } // } // for(auto &kv: access_variables){ // // std::cout<is_leader = true; this->is_top_parent = true; this->is_leaf = true; this->has_a_leader = false; fct->leader_computations.push_back(this); this->after_level = -2; fct->add_computation(this); this->set_identity_schedule_based_on_iteration_domain(); this->set_expression(e); std::vector nms = this->get_iteration_domain_dimension_names(); for (int i = 0; i< this->get_iteration_domain_dimensions_number(); i++) this->set_schedule_domain_dim_names({i}, {generate_new_variable_name()}); for (int i = 0; i< nms.size(); i++){ this->set_loop_level_names({i}, {nms[i]}); this->final_loop_level_names.push_back(nms[i]); this->final_loop_level_names_reserved.push_back(nms[i]); // if(fct->get_body().size() == 1){ // this->iterators_location_map.insert(std::make_pair(nms[i],i)); // fct->global_location = nms.size(); // } } } compute::compute(std::string name, std::vector iterator_variables, polyfp::expr e, primitive_t t, expr p) { this->iteration_variables = iterator_variables; std::string iteration_space_str = construct_iteration_domain(name, iterator_variables, predicate); // std::cout< iterator_variables, polyfp::expr e, expr p) : compute(name, iterator_variables, e, p_float64, p) {} compute::compute(std::string name, std::vector iterator_variables, int a, expr p) : compute(name, iterator_variables, expr((uint16_t) a), p_float64, p) {} std::vector compute::get_iteration_variables() { return this->iteration_variables; } std::string polyfp::compute::construct_iteration_domain(std::string name, std::vector iterator_variables, polyfp::expr predicate) { polyfp::function *function = global::get_implicit_function(); std::string iteration_space_str = ""; std::string comp_name = name; iteration_space_str += "{" + comp_name + "["; if (iterator_variables.size() == 0) iteration_space_str += "0"; else for (int i = 0; i < iterator_variables.size(); i++) { var iter = iterator_variables[i]; iteration_space_str += iter.get_name(); if (i < iterator_variables.size() - 1) iteration_space_str += ", "; } iteration_space_str += "] "; if (iterator_variables.size() != 0) iteration_space_str += ": "; if (predicate.is_defined()) iteration_space_str += predicate.to_str() + " and "; bool insert_and = false; for (int i = 0; i < iterator_variables.size(); i++) { var iter = iterator_variables[i]; if ((insert_and == true && (iter.lower.is_defined() || iter.upper.is_defined()))) { iteration_space_str += " and "; insert_and = false; } if (iter.lower.is_defined() || iter.upper.is_defined()) { iteration_space_str += iter.lower.to_str() + "<=" + iter.get_name() + "<" + iter.upper.to_str(); insert_and = true; } } iteration_space_str += "}"; return iteration_space_str; } const std::string &polyfp::compute::get_name() const { return name; } polyfp::primitive_t polyfp::compute::get_data_type() const { return data_type; } std::vector compute::get_loop_level_numbers_from_dimension_names( std::vector dim_names) { assert(dim_names.size() > 0); std::vector dim_numbers; for (auto const dim: dim_names) { assert(dim.size()>0); if (dim == "root") { int d = compute::root_dimension; dim_numbers.push_back(d); } else { int d = isl_map_find_dim_by_name(this->get_schedule(), isl_dim_out, dim.c_str()); // polyfp::str_dump("Searching in the range of ", isl_map_to_str(this->get_schedule())); if (d < 0) { ERROR("Dimension " + dim + " not found.", true); } // polyfp::str_dump("Corresponding loop level is " + std::to_string(dynamic_dimension_into_loop_level(d))); dim_numbers.push_back(dynamic_dimension_into_loop_level(d)); } } this->check_dimensions_validity(dim_numbers); return dim_numbers; } struct param_pack_1 { int in_dim; int out_constant; }; /** * Derived from Tiramisu: * Take a basic map as input, go through all of its constraints, * identifies the constraint of the static dimension param_pack_1.in_dim * (passed in user) and replace the value of param_pack_1.out_constant if * the static dimension is bigger than that value. */ isl_stat extract_static_dim_value_from_bmap(__isl_take isl_basic_map *bmap, void *user) { struct param_pack_1 *data = (struct param_pack_1 *) user; isl_constraint_list *list = isl_basic_map_get_constraint_list(bmap); int n_constraints = isl_constraint_list_n_constraint(list); for (int i = 0; i < n_constraints; i++) { isl_constraint *cst = isl_constraint_list_get_constraint(list, i); isl_val *val = isl_constraint_get_coefficient_val(cst, isl_dim_out, data->in_dim); if (isl_val_is_one(val)) // i.e., the coefficient of the dimension data->in_dim is 1 { isl_val *val2 = isl_constraint_get_constant_val(cst); int const_val = (-1) * isl_val_get_num_si(val2); data->out_constant = const_val; } } return isl_stat_ok; } // Derived from Tiramisu: // if multiple const values exist, choose the maximal value among them because we // want to use this value to know by how much we shift the computations back. // so we need to figure out the maximal const value and use it to shift the iterations // backward so that that iteration runs before the consumer. isl_stat extract_constant_value_from_bset(__isl_take isl_basic_set *bset, void *user) { struct param_pack_1 *data = (struct param_pack_1 *) user; isl_constraint_list *list = isl_basic_set_get_constraint_list(bset); int n_constraints = isl_constraint_list_n_constraint(list); for (int i = 0; i < n_constraints; i++) { isl_constraint *cst = isl_constraint_list_get_constraint(list, i); if (isl_constraint_is_equality(cst) && isl_constraint_involves_dims(cst, isl_dim_set, data->in_dim, 1)) { isl_val *val = isl_constraint_get_coefficient_val(cst, isl_dim_out, data->in_dim); assert(isl_val_is_one(val)); // assert that the coefficients of all the other dimension spaces are zero. isl_val *val2 = isl_constraint_get_constant_val(cst); int const_val = (-1) * isl_val_get_num_si(val2); data->out_constant = std::max(data->out_constant, const_val); } } return isl_stat_ok; } /** * Derived from Tiramisu: * Return the value of the static dimension. * For example, if we have a map M = {S0[i,j]->[0,0,i,1,j,2]; S0[i,j]->[1,0,i,1,j,3]} * and call isl_map_get_static_dim(M, 5, 1), it will return 3. */ int isl_map_get_static_dim(isl_map *map, int dim_pos) { assert(map != NULL); assert(dim_pos >= 0); assert(dim_pos <= (signed int) isl_map_dim(map, isl_dim_out)); // polyfp::str_dump("Getting the constant coefficient of ", isl_map_to_str(map)); // polyfp::str_dump(" at dimension "); // polyfp::str_dump(std::to_string(dim_pos)); struct param_pack_1 *data = (struct param_pack_1 *) malloc(sizeof(struct param_pack_1)); data->out_constant = 0; data->in_dim = dim_pos; isl_map_foreach_basic_map(isl_map_copy(map), &extract_static_dim_value_from_bmap, data); // polyfp::str_dump("The constant is: "); // polyfp::str_dump(std::to_string(data->out_constant)); return data->out_constant; } std::vector compute::get_loads(){ auto expr = this->get_expr(); std::vector loads; expr.get_access_vector(loads); return loads; } void compute::get_loads_stores() { auto s_loads =this->get_loads(); std::map> s_single_ls; std::vector s_stores; s_stores.push_back(this->get_placeholder_expr()); s_single_ls.insert(std::pair("load", s_loads)); s_single_ls.insert(std::pair("store", s_stores)); this->map_loadstores.insert(std::pair(-1,s_single_ls)); for (auto &edge: this->components) { // std::cout<<"dump_component:"+edge.first->get_name()<get_loads(); std::map> single_ls; std::vector stores; stores.push_back(edge.first->get_placeholder_expr()); // std::cout<<"component:"+std::to_string(loads.size())<get_placeholder_expr().get_name()<map_loadstores.insert(std::pair(edge.second,single_ls)); } } void compute::get_all_loadstores() { this->get_loads_stores(); for(auto &level: this->map_loadstores){ for(auto &map: level.second){ if(map.first == "load"){ for(auto &op: map.second){ if(this->load_map.find(op.get_name()) == this->load_map.end()){ this->load_map.insert(std::pair(op.get_name(), &op)); } this->load_vector.push_back(&op); // // std::cout<<"load_vector:"+op.get_name()<store_map.find(op.get_name()) == this->store_map.end()){ this->store_map.insert(std::pair(op.get_name(), &op)); } this->store_vector.push_back(&op); } } } } } void compute::check_loop_interchange(){ bool is_legal = true; // std::map new_order_map; std::map > new_order_map ; std::unordered_map final_dim_order; std::vector dims_no_dp; // std::cout<<"check_loop_interchange:"<map_dependence_vectors) { std::vector dims_no_dp; auto vectors = vector_list.second; auto dim_list = vector_list.first->get_access(); std::unordered_map new_vector_map; for(auto &vector: vectors) { int dims = vector.size(); bool has_zero = false; int zero_number = 0; for(int i=0; i::iterator iter=find(dims_no_dp.begin(),dims_no_dp.end(),name); if ( iter==dims_no_dp.end()){ dims_no_dp.push_back(name); } }else{ std::vector::iterator iter=find(dims_no_dp.begin(),dims_no_dp.end(),dim_list[i].get_name()); if ( iter==dims_no_dp.end()){ dims_no_dp.push_back(dim_list[i].get_name()); } } } } if(vector[i] < 0) { has_zero = true; zero_number += 1; is_legal = false; } if(vector[i] > 0) { has_zero = true; zero_number += 1; std::string dim_name; polyfp::expr temp_dim; if(dim_list[i].get_expr_type()==polyfp::e_op) { if (dim_list[i].get_operand(0).get_expr_type() == polyfp::e_var) { dim_name = dim_list[i].get_operand(0).get_name(); temp_dim = dim_list[i].get_operand(0); }else if(dim_list[i].get_operand(1).get_expr_type() == polyfp::e_var) { dim_name = dim_list[i].get_operand(1).get_name(); temp_dim = dim_list[i].get_operand(1); } }else { dim_name = dim_list[i].get_name(); temp_dim = dim_list[i]; } if (new_vector_map.find(&temp_dim) == new_vector_map.end()) { new_vector_map[&temp_dim] = vector[i]; }else if(new_vector_map[&temp_dim]>vector[i]){ new_vector_map[&temp_dim] = vector[i]; } std::vector::iterator iter=find(dims_no_dp.begin(),dims_no_dp.end(),dim_name); if ( iter!=dims_no_dp.end()) { iter = dims_no_dp.erase(iter); } } } } std::vector> tmp; std::vector dim_order; for (auto& i : new_vector_map) tmp.push_back(i); std::sort(tmp.begin(), tmp.end(), [=](std::pair& a, std::pair& b) { return a.second < b.second; }); // other dims should be moved to outer level first. // std::cout<<"new_vector_map:"+ std::to_string(new_vector_map.size())<get_iteration_variables()) { std::vector::iterator iter=find(dims_no_dp.begin(),dims_no_dp.end(),other_dim.get_name()); if(new_vector_map.find(&other_dim) == new_vector_map.end()&& iter==dims_no_dp.end()) { dim_order.push_back(&other_dim); } } // move dims that have loop carried dependencies. // remove all elements with value val for(auto &dim: tmp) { dim_order.push_back(dim.first); } new_order_map[vector_list.first] = dim_order; for(auto &kv :new_vector_map) { if (final_dim_order.find(kv.first->get_name()) == final_dim_order.end()&&final_dim_order.size()get_iteration_domain_dimensions_number()) { final_dim_order[kv.first->get_name()] = kv.second; }else if(final_dim_order[kv.first->get_name()]>kv.second) { final_dim_order[kv.first->get_name()] = kv.second; } } } // Decide a common order and detect conflicts // Define confilct: for all comps in the nested loop, // number of dims that need to be interchanged should not exceed total dims number-1 // gradually add computes until conflicts occur std::vector waiting_list; bool need_split = false; polyfp::compute *comp_to_split ; for(auto &dvectors: new_order_map) { for(auto &dvector:dvectors.second) { std::vector::iterator iter=find(waiting_list.begin(),waiting_list.end(),dvector->get_name()); if ( iter==waiting_list.end()) { waiting_list.push_back(dvector->get_name()); } if(waiting_list.size() == this->get_iteration_domain_dimensions_number()) { need_split = true; comp_to_split = dvectors.first->owner; } } } if(need_split == true && is_legal == true) { // TODO: if there is no dependency between comp_to_split and // other comps(its leader and component), split it from the nested loop int top_level = 0; // for(auto &dim: waiting_list){ // int level = this->get_loop_level_number_from_dimension_name(dim); // if(level!=0){ // comp_to_split->interchange(top_level,level); // } // } // comp_to_split->after(comp_to_split->leader,comp_to_split->leader->get_iteration_domain_dimensions_number()-1); comp_to_split->after(comp_to_split->leader, -1); comp_to_split->get_all_loadstores(); // comp->dump_components(); // comp->dump_loads_stores(); comp_to_split->dump_all_loadstores(); comp_to_split->compute_dependence_vectors(); comp_to_split->check_loop_interchange(); } if(need_split == false) { int top_level = 0; for(auto &dim: waiting_list) { int level = this->get_loop_level_number_from_dimension_name(dim); this->interchange(top_level,level); int count = level-top_level-1; if(count!=0){ for(int i=0; iinterchange(top_level+1+i,level); } } top_level+=1; } for(auto &map: this->components) { int dims = map.first->get_iteration_variables().size(); if(map.first->after_level==dims-1) { int top_level2 = 0; for(auto &dim: waiting_list) { int level = map.first->get_loop_level_number_from_dimension_name(dim); //TODO: Potential bugs here map.first->interchange(top_level2,level); int count = level-top_level2-1; if(count!=0) { for(int i=0; iinterchange(top_level2+1+i,level); } } map.first->after(map.first->leader,this->get_iteration_domain_dimensions_number()-1); top_level2+=1; } } } } } void compute::check_loop_skewing() { bool is_legal = false; int factor=1; if(this->map_dependence_vectors.size() == 1) { for(auto &vector_list : this->map_dependence_vectors) { std::vector dims_no_dp; auto vectors = vector_list.second; auto dim_list = vector_list.first->get_access(); std::unordered_map new_vector_map; // TODO: factors if(8>=vectors.size()&&vectors.size()>=2) { is_legal = true; factor = 1; } else if(8<=vectors.size()){ is_legal = true; factor = 2; }else{ // TODO } } auto iterators = this->get_iteration_variables(); int size = iterators.size(); std::map iterator_map; for(auto &iter: iterators) { int loc = this->get_loop_level_number_from_dimension_name(iter.get_name()); // std::cout<skew(iterator_map[1],iterator_map[2],1,factor,i0,j0); }else if(size==2) { this->skew(iterator_map[0],iterator_map[1],1,factor,i0,j0); } this->is_skewed_inDSE = true; } } } void compute::auto_loop_transformation() { this->check_loop_interchange(); this->check_loop_skewing(); } void compute::apply_opt_strategy(std::vector tile_size){ std::map iterator_map; this->set_schedule(this->original_schedule); this->set_loop_level_names(this->original_loop_level_name); this->directive_map.clear(); this->is_unrolled = false; this->unroll_factor.clear(); this->unroll_dimension.clear(); this->tile_map.clear(); this->tile_size_map.clear(); this->access_map.clear(); auto iterators = this->get_iteration_variables(); int size = iterators.size(); //TODO: SKEW MAP for(auto &iter: iterators) { int loc = this->get_loop_level_number_from_dimension_name(iter.get_name()); iterator_map[loc] = iter; } if(size >= 3) { var i0("i0"), j0("j0"),k0("k0"), i1("i1"), j1("j1"),k1("k1"); // TODO: Config file if(tile_size[0]<=64 && tile_size[1]<64 && tile_size[2]<64) { int temp_index = this->get_iteration_variables().size()-3; if(tile_size[2]==1 && tile_size[1]==1 && tile_size[0]==1) { // TODO }else { this->tile(iterator_map[temp_index],iterator_map[temp_index+1], iterator_map[temp_index+2],tile_size[0],tile_size[1],tile_size[2],i0, j0, k0, i1, j1, k1); } // std::cout<pipeline(k0,1); this->unroll(k1,-1); this->unroll(j1,-1); this->unroll(i1,-1); } if(tile_size[2]!=1 && tile_size[1]!=1 && tile_size[0]==1) { this->pipeline(k0,1); this->unroll(k1,-1); this->unroll(j1,-1); } if(tile_size[2]==1 && tile_size[1]==1 && tile_size[0]!=1) { this->pipeline(iterator_map[temp_index+2],1); // comp->unroll(k1,-1); // comp->unroll(j1,-1); this->unroll(i1,-1); } if(tile_size[2]!=1 && tile_size[1]==1 && tile_size[0]!=1) { this->pipeline(k0,1); this->unroll(k1,-1); this->unroll(i1,-1); } if(tile_size[2]==1 && tile_size[1]==1 && tile_size[0]==1) { int lower = stoi(iterator_map[temp_index+2].get_lower().to_str()); int upper = stoi(iterator_map[temp_index+2].get_upper().to_str()); int range = upper-lower; // TODO: Config if(range<=7) { this->pipeline(iterator_map[temp_index+1],1); this->unroll(iterator_map[temp_index+2],-1); } } if(tile_size[2]!=1 && tile_size[1]==1 && tile_size[0]==1) { this->pipeline(k0,1); this->unroll(k1,-1); } if(tile_size[2]==1 && tile_size[1]!=1 && tile_size[0]!=1) { int lower = stoi(iterator_map[temp_index+2].get_lower().to_str()); int upper = stoi(iterator_map[temp_index+2].get_upper().to_str()); int range = upper-lower; if(range<=6) { this->pipeline(j0,1); this->unroll(j1,-1); this->unroll(i1,-1); this->unroll(iterator_map[temp_index+2],-1); }else { this->pipeline(iterator_map[temp_index+2],1); this->unroll(j1,-1); this->unroll(i1,-1); } } for(auto &part:this->components) { part.first->set_schedule(part.first->original_schedule); part.first->set_loop_level_names(part.first->original_loop_level_name); part.first->tile(iterator_map[temp_index+0],iterator_map[temp_index+1], iterator_map[temp_index+2],tile_size[0],tile_size[1],tile_size[2],i0, j0, k0, i1, j1, k1); if(tile_size[2]==1 && tile_size[1]!=1 && tile_size[0]!=1) { if(part.first->after_level == 2) { part.first->after(this,j1); } else if(part.first->after_level == 0) { part.first->after(this,i0); part.first->pipeline(iterator_map[temp_index+2],1); } }else { if(part.first->after_level == 2) { part.first->after(this,k1); } else if (part.first->after_level == 0){ part.first->after(this,iterator_map[temp_index+0]); part.first->pipeline(iterator_map[temp_index+2],1); //TODO part.first->unroll(k1,-1); part.first->unroll(j1,-1); } } } } } else if(size == 2) { var i0("i0"), j0("j0"), i1("i1"), j1("j1"); // TODO: Config file if(tile_size[0]<64 && tile_size[1]<64) { this->tile(iterator_map[0],iterator_map[1],tile_size[0],tile_size[1],i0, j0, i1, j1); if(tile_size[1]!=1&&tile_size[0]!=1) { this->pipeline(j0,1); this->unroll(j1,-1); this->unroll(i1,-1); }else if(tile_size[1]==1&&tile_size[0]!=1) { this->pipeline(iterator_map[1],1); this->unroll(i1,-1); }else if(tile_size[0]==1&&tile_size[1]!=1) { this->pipeline(j0,1); this->unroll(j1,-1); } for(auto &part:this->components) { part.first->set_schedule(part.first->original_schedule); part.first->set_loop_level_names(part.first->original_loop_level_name); part.first->directive_map.clear(); part.first->is_unrolled = false; part.first->unroll_factor.clear(); part.first->unroll_dimension.clear(); part.first->tile_map.clear(); part.first->tile_size_map.clear(); part.first->access_map.clear(); part.first->tile(iterator_map[0],iterator_map[1],tile_size[0],tile_size[1],i0, j0, i1, j1); if(tile_size[1]!=1&&tile_size[0]!=1) { if(part.first->after_level == 1) { part.first->after(this,j1); } else if(part.first->after_level == 0) { part.first->after(this,i0); part.first->pipeline(j0,1); } } else if(tile_size[1]==1&&tile_size[0]!=1) { if(part.first->after_level == 1) { part.first->after(this,i1); } else if(part.first->after_level == 0) { part.first->pipeline(iterator_map[1],1); part.first->after(this,i0); } } else if(tile_size[0]==1&&tile_size[1]!=1) { if(part.first->after_level == 1) { part.first->after(this,j1); } else if(part.first->after_level == 0) { part.first->after(this,iterator_map[0]); part.first->pipeline(j0,1); part.first->unroll(j1,-1); } else if(part.first->after_level == 2) { part.first->after(this,j1); } } } } } } void compute::compute_dependence_vectors() { for(auto &store: this->store_vector) { auto store_index = store->get_access(); auto dims = store_index.size(); for(auto &load: this->load_vector) { auto load_index = load->get_access(); if(store->get_name() == load->get_name()) { // std::cout<<"array " + store->get_name()< vector_set; for(int i = 0; i < dims; i++) { auto vector_element = store_index[i].get_dependence_vector()-load_index[i].get_dependence_vector(); // std::cout<<"vector of dimension " + std::to_string(i)+"is: "+std::to_string(vector_element)<map_dependence_vectors[store].push_back(vector_set); } } } } void compute::dump_all_loadstores() { std::string result1 = "loads:"; std::string result2 = "stores:"; for(auto &op: this->load_map){ result1 += op.first +" "; } for(auto &op: this->store_map){ result2 += op.first +" "; } } void compute::interchange(polyfp::var L0_var, polyfp::var L1_var) { assert(L0_var.get_name().length() > 0); assert(L1_var.get_name().length() > 0); std::vector dimensions = this->get_loop_level_numbers_from_dimension_names({L0_var.get_name(), L1_var.get_name()}); this->check_dimensions_validity(dimensions); int L0 = dimensions[0]; int L1 = dimensions[1]; this->interchange(L0, L1); } void compute::interchange(int L0, int L1) { int inDim0 = loop_level_into_dynamic_dimension(L0); int inDim1 = loop_level_into_dynamic_dimension(L1); assert(inDim0 >= 0); assert(inDim0 < isl_space_dim(isl_map_get_space(this->get_schedule()), isl_dim_out)); assert(inDim1 >= 0); assert(inDim1 < isl_space_dim(isl_map_get_space(this->get_schedule()), isl_dim_out)); isl_map *schedule = this->get_schedule(); // polyfp::str_dump("Original schedule: ", isl_map_to_str(schedule)); // polyfp::str_dump("Interchanging the dimensions " + std::to_string( // L0) + " and " + std::to_string(L1)); int n_dims = isl_map_dim(schedule, isl_dim_out); std::string inDim0_str = isl_map_get_dim_name(schedule, isl_dim_out, inDim0); std::string inDim1_str = isl_map_get_dim_name(schedule, isl_dim_out, inDim1); std::vector dimensions; // Create a map for the duplicate schedule. std::string map = "{ " + this->get_name() + "["; for (int i = 0; i < n_dims; i++) { if (i == 0) { int duplicate_ID = isl_map_get_static_dim(schedule, 0); map = map + std::to_string(duplicate_ID); } else { if (isl_map_get_dim_name(schedule, isl_dim_out, i) == NULL) { isl_id *new_id = isl_id_alloc(this->get_ctx(), generate_new_variable_name().c_str(), NULL); schedule = isl_map_set_dim_id(schedule, isl_dim_out, i, new_id); } map = map + isl_map_get_dim_name(schedule, isl_dim_out, i); } if (i != n_dims - 1) { map = map + ","; } } map = map + "] ->" + this->get_name() + "["; for (int i = 0; i < n_dims; i++) { if (i == 0) { int duplicate_ID = isl_map_get_static_dim(schedule, 0); map = map + std::to_string(duplicate_ID); } else { if ((i != inDim0) && (i != inDim1)) { map = map + isl_map_get_dim_name(schedule, isl_dim_out, i); dimensions.push_back(isl_map_get_dim_id(schedule, isl_dim_out, i)); } else if (i == inDim0) { map = map + inDim1_str; isl_id *id1 = isl_id_alloc(this->get_ctx(), inDim1_str.c_str(), NULL); dimensions.push_back(id1); } else if (i == inDim1) { map = map + inDim0_str; isl_id *id1 = isl_id_alloc(this->get_ctx(), inDim0_str.c_str(), NULL); dimensions.push_back(id1); } } if (i != n_dims - 1) { map = map + ","; } } map = map + "]}"; // polyfp::str_dump(map.c_str()); isl_map *transformation_map = isl_map_read_from_str(this->get_ctx(), map.c_str()); transformation_map = isl_map_set_tuple_id( transformation_map, isl_dim_in, isl_map_get_tuple_id(isl_map_copy(schedule), isl_dim_out)); isl_id *id_range = isl_id_alloc(this->get_ctx(), this->get_name().c_str(), NULL); transformation_map = isl_map_set_tuple_id( transformation_map, isl_dim_out, id_range); // Check that the names of each dimension is well set for (int i = 1; i < isl_map_dim(transformation_map, isl_dim_in); i++) { isl_id *dim_id = isl_id_copy(dimensions[i - 1]); transformation_map = isl_map_set_dim_id(transformation_map, isl_dim_out, i, dim_id); assert(isl_map_has_dim_name(transformation_map, isl_dim_in, i)); assert(isl_map_has_dim_name(transformation_map, isl_dim_out, i)); } // polyfp::str_dump("Final transformation map : ", isl_map_to_str(transformation_map)); schedule = isl_map_apply_range(isl_map_copy(schedule), isl_map_copy(transformation_map)); // polyfp::str_dump("Schedule after interchange: ", isl_map_to_str(schedule)); this->set_schedule(schedule); } void compute::split(polyfp::var L0_var, int sizeX) { polyfp::var L0_outer = polyfp::var(generate_new_variable_name()); polyfp::var L0_inner = polyfp::var(generate_new_variable_name()); this->split(L0_var, sizeX, L0_outer, L0_inner); } void compute::split(polyfp::var L0_var, int sizeX, polyfp::var L0_outer, polyfp::var L0_inner) { // polyfp::str_dump("Schedule after interchange: "); assert(L0_var.get_name().length() > 0); std::vector original_loop_level_names = this->get_loop_level_names(); std::vector dimensions = this->get_loop_level_numbers_from_dimension_names({L0_var.get_name()}); // polyfp::str_dump("Scget_loop_level_numbers_from_dimension_nameshedule after interchange: "); this->check_dimensions_validity(dimensions); int L0 = dimensions[0]; this->assert_names_not_assigned({L0_outer.get_name(), L0_inner.get_name()}); this->split(L0, sizeX); this->set_loop_level_names({L0_outer.get_name(), L0_inner.get_name()}); // this->update_names(original_loop_level_names, {L0_outer.get_name(), L0_inner.get_name()}, L0, 1); // polyfp::str_dump(L0_outer.get_name()); // polyfp::str_dump(L0_inner.get_name()); } void compute::split(int L0, int sizeX) { int inDim0 = loop_level_into_dynamic_dimension(L0); assert(this->get_schedule() != NULL); assert(inDim0 >= 0); assert(inDim0 < isl_space_dim(isl_map_get_space(this->get_schedule()), isl_dim_out)); assert(sizeX >= 1); isl_map *schedule = this->get_schedule(); int duplicate_ID = isl_map_get_static_dim(schedule, 0); schedule = isl_map_copy(schedule); schedule = isl_map_set_tuple_id(schedule, isl_dim_out, isl_id_alloc(this->get_ctx(), this->get_name().c_str(), NULL)); // polyfp::str_dump("Original schedule: ", isl_map_to_str(schedule)); // polyfp::str_dump("Splitting dimension " + std::to_string(inDim0) // + " with split size " + std::to_string(sizeX)); std::string inDim0_str; std::string outDim0_str = generate_new_variable_name(); std::string static_dim_str = generate_new_variable_name(); std::string outDim1_str = generate_new_variable_name(); int n_dims = isl_map_dim(this->get_schedule(), isl_dim_out); std::vector dimensions; std::vector dimensions_str; std::string map = "{"; map = map + this->get_name() + "["; for (int i = 0; i < n_dims; i++) { if (i == 0) { std::string dim_str = generate_new_variable_name(); dimensions_str.push_back(dim_str); map = map + dim_str; } else { std::string dim_str = generate_new_variable_name(); dimensions_str.push_back(dim_str); map = map + dim_str; if (i == inDim0) { inDim0_str = dim_str; } } if (i != n_dims - 1) { map = map + ","; } } map = map + "] -> " + this->get_name() + "["; for (int i = 0; i < n_dims; i++) { if (i == 0) { map = map + dimensions_str[i]; dimensions.push_back(isl_id_alloc( this->get_ctx(), dimensions_str[i].c_str(), NULL)); } else if (i != inDim0) { map = map + dimensions_str[i]; dimensions.push_back(isl_id_alloc( this->get_ctx(), dimensions_str[i].c_str(), NULL)); } else { map = map + outDim0_str + ", " + static_dim_str + ", " + outDim1_str; isl_id *id0 = isl_id_alloc(this->get_ctx(), outDim0_str.c_str(), NULL); isl_id *id2 = isl_id_alloc(this->get_ctx(), static_dim_str.c_str(), NULL); isl_id *id1 = isl_id_alloc(this->get_ctx(), outDim1_str.c_str(), NULL); dimensions.push_back(id0); dimensions.push_back(id2); dimensions.push_back(id1); } if (i != n_dims - 1) { map = map + ","; } } map = map + "] : " + dimensions_str[0] + " = " + std::to_string(duplicate_ID) + " and " + outDim0_str + " = floor(" + inDim0_str + "/" + std::to_string(sizeX) + ") and " + outDim1_str + " = (" + inDim0_str + "%" + std::to_string(sizeX) + ") and " + static_dim_str + " = 0}"; // std::cout<get_ctx(), map.c_str()); for (int i = 0; i < dimensions.size(); i++) transformation_map = isl_map_set_dim_id( transformation_map, isl_dim_out, i, isl_id_copy(dimensions[i])); transformation_map = isl_map_set_tuple_id( transformation_map, isl_dim_in, isl_map_get_tuple_id(isl_map_copy(schedule), isl_dim_out)); isl_id *id_range = isl_id_alloc(this->get_ctx(), this->get_name().c_str(), NULL); transformation_map = isl_map_set_tuple_id(transformation_map, isl_dim_out, id_range); // polyfp::str_dump("Transformation map : ", isl_map_to_str(transformation_map)); schedule = isl_map_apply_range(isl_map_copy(schedule), isl_map_copy(transformation_map)); // polyfp::str_dump("Schedule after splitting: ", isl_map_to_str(schedule)); this->set_schedule(schedule); } void compute::tile(polyfp::var L0, polyfp::var L1, polyfp::var L2, int sizeX, int sizeY, int sizeZ) { assert(L0.get_name().length() > 0); assert(L1.get_name().length() > 0); assert(L2.get_name().length() > 0); polyfp::var L0_outer = polyfp::var(generate_new_variable_name()); polyfp::var L1_outer = polyfp::var(generate_new_variable_name()); polyfp::var L2_outer = polyfp::var(generate_new_variable_name()); polyfp::var L0_inner = polyfp::var(generate_new_variable_name()); polyfp::var L1_inner = polyfp::var(generate_new_variable_name()); polyfp::var L2_inner = polyfp::var(generate_new_variable_name()); this->tile(L0, L1, L2, sizeX, sizeY, sizeZ, L0_outer, L1_outer, L0_outer, L0_inner, L1_inner, L2_inner); } void compute::tile(polyfp::var L0, polyfp::var L1, int sizeX, int sizeY) { assert(L0.get_name().length() > 0); assert(L1.get_name().length() > 0); polyfp::var L0_outer = polyfp::var(generate_new_variable_name()); polyfp::var L1_outer = polyfp::var(generate_new_variable_name()); polyfp::var L0_inner = polyfp::var(generate_new_variable_name()); polyfp::var L1_inner = polyfp::var(generate_new_variable_name()); this->tile(L0, L1, sizeX, sizeY, L0_outer, L1_outer, L0_inner, L1_inner); } void compute::tile(polyfp::var L0, polyfp::var L1, polyfp::var L2, int sizeX, int sizeY, int sizeZ, polyfp::var L0_outer, polyfp::var L1_outer, polyfp::var L2_outer, polyfp::var L0_inner, polyfp::var L1_inner, polyfp::var L2_inner) { assert(L0.get_name().length() > 0); assert(L1.get_name().length() > 0); assert(L2.get_name().length() > 0); assert(L0_outer.get_name().length() > 0); assert(L1_outer.get_name().length() > 0); assert(L2_outer.get_name().length() > 0); assert(L0_inner.get_name().length() > 0); assert(L1_inner.get_name().length() > 0); assert(L2_inner.get_name().length() > 0); if(sizeX==0 &&sizeY==0&&sizeZ==0) { return; } this->assert_names_not_assigned({L0_outer.get_name(), L1_outer.get_name(), L2_outer.get_name(), L0_inner.get_name(), L1_inner.get_name(), L2_inner.get_name()}); std::vector original_loop_level_names = this->get_loop_level_names(); std::vector dimensions = this->get_loop_level_numbers_from_dimension_names({L0.get_name(), L1.get_name(), L2.get_name()}); assert(dimensions.size() == 3); this->tile(dimensions[0], dimensions[1], dimensions[2], sizeX, sizeY, sizeZ); if(sizeX == 1 && sizeY == 1 ) { this->update_names(original_loop_level_names, {L0.get_name(), L1.get_name(), L2_outer.get_name(), L2_inner.get_name()}, dimensions[0], 3); } else if(sizeX == 1 && sizeZ == 1 ) { this->update_names(original_loop_level_names, {L0.get_name(), L1_outer.get_name(), L2.get_name(), L1_inner.get_name()}, dimensions[0], 3); }else if(sizeY == 1 && sizeZ == 1 ) { this->update_names(original_loop_level_names, {L0_outer.get_name(), L1.get_name(), L2.get_name(), L0_inner.get_name()}, dimensions[0], 3); }else if(sizeX == 1) { this->update_names(original_loop_level_names, {L0.get_name(), L1_outer.get_name(), L2_outer.get_name(), L1_inner.get_name(), L2_inner.get_name()}, dimensions[0], 3); }else if(sizeY == 1) { this->update_names(original_loop_level_names, {L0_outer.get_name(), L1.get_name(), L2_outer.get_name(), L0_inner.get_name(), L2_inner.get_name()}, dimensions[0], 3); }else if(sizeZ == 1) { this->update_names(original_loop_level_names, {L0_outer.get_name(), L1_outer.get_name(), L2.get_name(), L0_inner.get_name(), L1_inner.get_name()}, dimensions[0], 3); }else{ this->update_names(original_loop_level_names, {L0_outer.get_name(), L1_outer.get_name(), L2_outer.get_name(), L0_inner.get_name(), L1_inner.get_name(), L2_inner.get_name()}, dimensions[0], 3); } this->access_map.insert(std::pair(L0.get_name(),L0_inner.get_name())); this->access_map.insert(std::pair(L1.get_name(),L1_inner.get_name())); this->access_map.insert(std::pair(L2.get_name(),L2_inner.get_name())); this->tile_map.insert(std::pair(L0_inner.get_name(),L0_outer.get_name())); this->tile_map.insert(std::pair(L1_inner.get_name(),L1_outer.get_name())); this->tile_map.insert(std::pair(L2_inner.get_name(),L2_outer.get_name())); this->tile_size_map.insert(std::pair(L0_inner.get_name(),sizeX)); this->tile_size_map.insert(std::pair(L1_inner.get_name(),sizeY)); this->tile_size_map.insert(std::pair(L2_inner.get_name(),sizeZ)); this->is_tiled = true; } void compute::tile(polyfp::var L0, polyfp::var L1, int sizeX, int sizeY, polyfp::var L0_outer, polyfp::var L1_outer, polyfp::var L0_inner, polyfp::var L1_inner) { assert(L0.get_name().length() > 0); assert(L1.get_name().length() > 0); assert(L0_outer.get_name().length() > 0); assert(L1_outer.get_name().length() > 0); assert(L0_inner.get_name().length() > 0); assert(L1_inner.get_name().length() > 0); std::vector original_loop_level_names = this->get_loop_level_names(); this->assert_names_not_assigned({L0_outer.get_name(), L1_outer.get_name(), L0_inner.get_name(), L1_inner.get_name()}); std::vector dimensions = this->get_loop_level_numbers_from_dimension_names({L0.get_name(), L1.get_name()}); assert(dimensions.size() == 2); this->tile(dimensions[0], dimensions[1], sizeX, sizeY); // Replace the original dimension name with new dimension names if(sizeX == 1) { this->update_names(original_loop_level_names, {L0.get_name(), L1_outer.get_name(), L1_inner.get_name()}, dimensions[0], 2); } else if(sizeY == 1) { this->update_names(original_loop_level_names, {L0_outer.get_name(), L1.get_name(), L0_inner.get_name()}, dimensions[0], 2); }else { this->update_names(original_loop_level_names, {L0_outer.get_name(), L1_outer.get_name(), L0_inner.get_name(),L1_inner.get_name()}, dimensions[0], 2); } this->access_map.insert(std::pair(L0.get_name(),L0_inner.get_name())); this->access_map.insert(std::pair(L1.get_name(),L1_inner.get_name())); this->tile_map.insert(std::pair(L0_inner.get_name(),L0_outer.get_name())); this->tile_map.insert(std::pair(L1_inner.get_name(),L1_outer.get_name())); this->tile_size_map.insert(std::pair(L0_inner.get_name(),sizeX)); this->tile_size_map.insert(std::pair(L1_inner.get_name(),sizeY)); this->is_tiled = true; } void compute::tile(int L0, int L1, int L2, int sizeX, int sizeY, int sizeZ) { // Check that the two dimensions are consecutive. // Tiling only applies on a consecutive band of loop dimensions. assert(L1 == L0 + 1); assert(L2 == L1 + 1); assert((sizeX > 0) && (sizeY > 0) && (sizeZ > 0)); assert(this->get_iteration_domain() != NULL); this->check_dimensions_validity({L0, L1, L2}); // Original loops // L0 // L1 // L2 this->split(L0, sizeX); // Split L0 into L0 and L0_prime // Compute the new L1 and the new L2 and the newly created L0 (called L0 prime) int L0_prime = L0 + 1; L1 = L1 + 1; L2 = L2 + 1; // Loop after transformation // L0 // L0_prime // L1 // L2 this->split(L1, sizeY); int L1_prime = L1 + 1; L2 = L2 + 1; // Loop after transformation // L0 // L0_prime // L1 // L1_prime // L2 this->split(L2, sizeZ); // Loop after transformation // L0 // L0_prime // L1 // L1_prime // L2 // L2_prime this->interchange(L0_prime, L1); // Change the position of L0_prime to the new position int temp = L0_prime; L0_prime = L1; L1 = temp; // Loop after transformation // L0 // L1 // L0_prime // L1_prime // L2 // L2_prime this->interchange(L0_prime, L2); // Change the position of L0_prime to the new position temp = L0_prime; L0_prime = L2; L2 = temp; // Loop after transformation // L0 // L1 // L2 // L1_prime // L0_prime // L2_prime this->interchange(L1_prime, L0_prime); // Loop after transformation // L0 // L1 // L2 // L0_prime // L1_prime // L2_prime } void compute::tile(int L0, int L1, int sizeX, int sizeY) { // Check that the two dimensions are consecutive. // Tiling only applies on a consecutive band of loop dimensions. assert(L1 == L0 + 1); assert((sizeX > 0) && (sizeY > 0)); assert(this->get_iteration_domain() != NULL); this->check_dimensions_validity({L0, L1}); if(sizeX != 1) { this->split(L0, sizeX); this->split(L1 + 1, sizeY); this->interchange(L0 + 1, L1 + 1); }else { this->split(L1, sizeY); } } void compute::skew(polyfp::var L0_var, polyfp::var L1_var, int f_i, int f_j , polyfp::var new_L0_var, polyfp::var new_L1_var) { assert(L0_var.get_name().length() > 0); assert(L1_var.get_name().length() > 0); assert(new_L0_var.get_name().length() > 0); assert(new_L1_var.get_name().length() > 0); this->assert_names_not_assigned({new_L0_var.get_name(), new_L1_var.get_name()}); std::vector original_loop_level_names = this->get_loop_level_names(); std::vector dimensions = this->get_loop_level_numbers_from_dimension_names({L0_var.get_name(), L1_var.get_name()}); this->check_dimensions_validity(dimensions); this->is_skewed = true; int L0 = dimensions[0]; int L1 = dimensions[1]; this->skew(L0, L1, f_i,f_j ); this->update_names(original_loop_level_names, {new_L0_var.get_name(), new_L1_var.get_name()}, dimensions[0], 2); this->access_map.insert(std::pair(L0_var.get_name(),new_L1_var.get_name())); this->access_map.insert(std::pair(L1_var.get_name(),new_L0_var.get_name())); this->iterator_to_skew = new_L1_var.get_name(); this->iterator_to_modify = new_L0_var.get_name(); this->skew_factor = f_j; } void compute::skew(int L0 , int L1 , int f_i , int f_j) { if (L0 + 1 != L1) { ERROR("Loop levels passed to angle_skew() should be consecutive. The first argument to angle_skew() should be the outer loop level.", true); } assert(f_j != 0); assert(f_i >= 0); int dim0 = loop_level_into_dynamic_dimension(L0); int dim1 = loop_level_into_dynamic_dimension(L1); assert(this->get_schedule() != NULL); assert(dim0 >= 0); assert(dim0 < isl_space_dim(isl_map_get_space(this->get_schedule()), isl_dim_out)); isl_map *schedule = this->get_schedule(); int duplicate_ID = isl_map_get_static_dim(schedule, 0); schedule = isl_map_copy(schedule); schedule = isl_map_set_tuple_id(schedule, isl_dim_out, isl_id_alloc(this->get_ctx(), this->get_name().c_str(), NULL)); // polyfp::str_dump("Original schedule: ", isl_map_to_str(schedule)); // polyfp::str_dump("Angle _ Skewing dimensions " + std::to_string(dim0) // + " and " + std::to_string(dim1)); std::string inDim0_str, inDim1_str; std::string outDim1_str = generate_new_variable_name(); std::string outDim0_str = generate_new_variable_name(); int n_dims = isl_map_dim(this->get_schedule(), isl_dim_out); std::vector dimensions; std::vector dimensions_str; std::string map = "{"; map = map + this->get_name() + "["; for (int i = 0; i < n_dims; i++) { if (i == 0) { std::string dim_str = generate_new_variable_name(); dimensions_str.push_back(dim_str); map = map + dim_str; } else { std::string dim_str = generate_new_variable_name(); dimensions_str.push_back(dim_str); map = map + dim_str; if (i == dim0) inDim0_str = dim_str; else if (i == dim1) inDim1_str = dim_str; } if (i != n_dims - 1) { map = map + ","; } } map = map + "] -> " + this->get_name() + "["; for (int i = 0; i < n_dims; i++) { if (i == 0) { map = map + dimensions_str[i]; dimensions.push_back(isl_id_alloc( this->get_ctx(), dimensions_str[i].c_str(), NULL)); } else if ((i != dim1) && (i!=dim0)) { map = map + dimensions_str[i]; dimensions.push_back(isl_id_alloc( this->get_ctx(), dimensions_str[i].c_str(), NULL)); } else // i==dim1 { if(i==dim1){ map = map + outDim1_str; isl_id *id0 = isl_id_alloc(this->get_ctx(), outDim1_str.c_str(), NULL); dimensions.push_back(id0); } else{// i== dim 0 map = map + outDim0_str; isl_id *id0 = isl_id_alloc(this->get_ctx(), outDim0_str.c_str(), NULL); dimensions.push_back(id0); } } if (i != n_dims - 1) { map = map + ","; } } // Computes gcd of f_i and f_j int n1 = abs(f_i); int n2 = abs(f_j); while(n1 != n2) { if(n1 > n2) n1 -= n2; else n2 -= n1; } // polyfp::str_dump("The gcd of f_i = "+std::to_string(f_i)+" and fj = "+std::to_string(f_j)+" is pgcd = "+std::to_string(n1)); // Update f_i and f_j to equivalent but prime between themselfs value f_i = f_i / n1; f_j = f_j / n1; int gamma = 0; int sigma = 1; bool found = false; if ((f_j == 1) || (f_i == 1)) { gamma = f_i - 1; sigma = 1; /* Since sigma = 1 then f_i - gamma * f_j = 1 & using the previous condition : - f_i = 1 : then gamma = 0 (f_i-1) is enough - f_j = 1 : then gamma = f_i -1 */ } else { if((f_j == - 1) && (f_i > 1)) { gamma = 1; sigma = 0; } else { //General case : solving the Linear Diophantine equation & finding basic solution (sigma & gamma) for : f_i* sigma - f_j*gamma = 1 int i =0; while((i < 100) && (!found)) { if (((sigma * f_i ) % abs(f_j)) == 1){ found = true; } else{ sigma ++; i++; } }; if(!found) { // Detect infinite loop and prevent it in case where f_i and f_j are not prime between themselfs ERROR(" Error in solving the Linear Diophantine equation f_i* sigma - f_j*gamma = 1 ", true); } gamma = ((sigma * f_i) - 1 ) / f_j; } } map = map + "] : " + dimensions_str[0] + " = " + std::to_string(duplicate_ID) + " and " + outDim0_str + " = (" + inDim0_str + "*"+std::to_string(f_i)+" + "+inDim1_str+"*"+std::to_string(f_j)+" ) and " +outDim1_str+" = ("+inDim0_str+"*"+std::to_string(gamma)+" + "+inDim1_str+"*"+std::to_string(sigma)+" ) }"; // polyfp::str_dump("Transformation angle map (string format) : " + map); isl_map *transformation_map = isl_map_read_from_str(this->get_ctx(), map.c_str()); for (int i = 0; i < dimensions.size(); i++) transformation_map = isl_map_set_dim_id( transformation_map, isl_dim_out, i, isl_id_copy(dimensions[i])); transformation_map = isl_map_set_tuple_id( transformation_map, isl_dim_in, isl_map_get_tuple_id(isl_map_copy(schedule), isl_dim_out)); isl_id *id_range = isl_id_alloc(this->get_ctx(), this->get_name().c_str(), NULL); transformation_map = isl_map_set_tuple_id(transformation_map, isl_dim_out, id_range); schedule = isl_map_apply_range(isl_map_copy(schedule), isl_map_copy(transformation_map)); // polyfp::str_dump("Schedule after transformation is : ", // isl_map_to_str(schedule)); this->set_schedule(schedule); } void polyfp::compute::after(compute &comp, polyfp::var level) { assert(level.get_name().length() > 0); std::vector dimensions = this->get_loop_level_numbers_from_dimension_names({level.get_name()}); assert(dimensions.size() == 1); int current_level = dimensions[0]; auto leader_dim_map = comp.iterators_location_map; this->after_level = current_level; this->ori_after_level = current_level; this->after(comp, dimensions[0]); } void polyfp::compute::after(compute *comp, polyfp::var level) { assert(level.get_name().length() > 0); std::vector dimensions = this->get_loop_level_numbers_from_dimension_names({level.get_name()}); assert(dimensions.size() == 1); int current_level = dimensions[0]; int counter = 0; auto leader_dim_map = comp->iterators_location_map; this->after_level = current_level; this->ori_after_level= current_level; this->after(comp, dimensions[0]); } void polyfp::compute::after(compute &comp, int level) { auto &graph = this->get_function()->sched_graph; auto &edges = graph[&comp]; auto level_it = edges.find(this); edges[this] = level; this->get_function()->starting_computations.erase(this); // todo // this->get_function()->sched_graph_reversed[this][&comp] = level; this->after_level = level; // this->ori_after_level= level; if(level != -1) { std::vector::iterator iter2 = this->get_function()->leader_computations.begin(); while(iter2 != this->get_function()->leader_computations.end()) { if(*iter2 == this) { iter2 = this->get_function()->leader_computations.erase(iter2); } else { iter2++; } } // this->get_function()->leader_computations.erase(this); this->is_leader = false; this->is_top_parent = false; this->has_a_leader = true; this->leader = ∁ int component_level = comp.components.size(); if(component_level !=0) { std::map::reverse_iterator iter = comp.components.rbegin(); component_level = iter->second+1; } auto iter = comp.components.find (this) ; if(iter != comp.components.end()) iter = comp.components.erase (iter); comp.components.insert(std::pair(this,component_level)); comp.update_leader_components(this); }else if(level == -1) { this->is_leader = true; this->has_a_leader = false; this->is_top_parent = false; this->leader = NULL; comp.is_leaf = false; auto iter = comp.components.find (this) ; if(iter != comp.components.end()){ iter = comp.components.erase (iter); comp.delete_leader_components(this); } std::vector::iterator iter2 = this->get_function()->leader_computations.begin(); while(iter2 != this->get_function()->leader_computations.end()) { if(*iter2 == this) { iter2 = this->get_function()->leader_computations.erase(iter2); } else { iter2++; } } this->get_function()->leader_computations.push_back(this); //TODO: check if it is in lead_comp list // int current_level = level; // int counter = 0; // auto dim_list = this->get_loop_level_names(); // auto leader_dim_map = comp.iterators_location_map; // for(int i=0; iglobal_location; // this->iterators_location_map.insert(std::make_pair(dim_list[counter],next_level)); // fct->global_location+=1; // counter+=1; // } } assert(this->get_function()->sched_graph_reversed[this].size() < 2 && "Node has more than one predecessor."); // polyfp::str_dump("sched_graph[" + comp.get_name() + ", " + // this->get_name() + "] = " + std::to_string(level)); } void polyfp::compute::after(compute *comp, int level) { // polyfp::str_dump("Scheduling " + this->get_name() + " to be executed after " + // comp.get_name() + " at level " + std::to_string(level)); auto &graph = this->get_function()->sched_graph; auto &edges = graph[comp]; auto level_it = edges.find(this); // if (level_it != edges.end()) // { // if (level_it->second > level) // { // level = level_it->second; // } // } edges[this] = level; this->get_function()->starting_computations.erase(this); this->after_level = level; if(level != -1) { std::vector::iterator iter2 = this->get_function()->leader_computations.begin(); while(iter2 != this->get_function()->leader_computations.end()) { if(*iter2 == this) { iter2 = this->get_function()->leader_computations.erase(iter2); } else { iter2++; } } this->is_leader = false; this->is_top_parent = false; this->has_a_leader = true; this->leader = comp; int component_level = comp->components.size(); if(component_level !=0) { std::map::reverse_iterator iter = comp->components.rbegin(); component_level = iter->second+1; } auto iter = comp->components.find (this) ; if(iter != comp->components.end()) iter = comp->components.erase (iter); comp->components.insert(std::pair(this,component_level)); comp->update_leader_components(this); }else if(level == -1) { this->is_leader = true; this->has_a_leader = false; this->is_top_parent = false; this->leader = NULL; comp->is_leaf = false; auto iter = comp->components.find (this) ; if(iter != comp->components.end()) { iter = comp->components.erase (iter); comp->delete_leader_components(this); } this->get_function()->leader_computations.push_back(this); } assert(this->get_function()->sched_graph_reversed[this].size() < 2 && "Node has more than one predecessor."); } void polyfp::compute::update_leader_components(polyfp::compute *comp) { if(this->has_a_leader) { int component_level = this->leader->components.size()+1; this->leader->components.insert(std::pair(comp,component_level)); this->leader->update_leader_components(comp); } } void polyfp::compute::delete_leader_components(polyfp::compute *comp) { if(this->has_a_leader){ auto iter = this->leader->components.find (this) ; if(iter != this->leader->components.end()){ iter = this->leader->components.erase (iter); } // this->leader->components.insert(std::pair(comp,component_level)); this->leader->update_leader_components(comp); } } void polyfp::compute::dump_components() { std::string result = ""; for (auto &edge: this->components) { result += edge.first->get_name() +"[" + std::to_string(edge.second )+ "]=>"; } result += this->get_name(); // std::cout<map_loadstores) { result += std::to_string(edge.first )+":[" ; for(auto &map: edge.second){ result+= map.first; for(auto &vec: map.second){ result+= vec.get_name(); } } result+=+ "]=>"; } result += "root"; // std::cout<align_schedules(); assert(this->get_schedule() != NULL); assert(dim < (signed int) isl_map_dim(this->get_schedule(), isl_dim_out)); assert(dim >= compute::root_dimension); isl_map *new_sched = NULL; for (int i = 1; i<=dim; i=i+2) { if (i < dim) { // Get the constant in comp, add +1 to it and set it to sched1 int order = isl_map_get_static_dim(comp.get_schedule(), i); new_sched = isl_map_copy(this->get_schedule()); new_sched = add_eq_to_schedule_map(i, 0, -1, order, new_sched); } else // (i == dim) { // Get the constant in comp, add +1 to it and set it to sched1 int order = isl_map_get_static_dim(comp.get_schedule(), i); new_sched = isl_map_copy(this->get_schedule()); new_sched = add_eq_to_schedule_map(i, 0, -1, order + 10, new_sched); } this->set_schedule(new_sched); } // polyfp::str_dump("Schedule adjusted: ", // isl_map_to_str(this->get_schedule())); } void polyfp::compute::pipeline(polyfp::expr dim, int II) { for(auto &kv: this->get_loop_level_names()){ if(dim.get_name() == kv){ int level = this->get_loop_level_number_from_dimension_name(kv); this->directive_map.insert(std::pair(kv,"pipeline")); std::string c_name = "c"+ std::to_string(level*2+1); this->directive_tool_map.insert(std::pair(kv,c_name)); this->II = II; } } } void polyfp::compute::unroll(polyfp::expr dim, int factor) { this->is_unrolled = true; std::string name = dim.get_name(); auto it = std::find_if(this->unroll_dimension.begin(), this->unroll_dimension.end(), [&](const auto &d) { return d.get_name() == name; }); if (it == this->unroll_dimension.end()) { this->unroll_factor.push_back(factor); this->unroll_dimension.push_back(dim); } // this->unroll_factor.push_back(factor); // this->unroll_dimension.push_back(dim); } const std::string polyfp::compute::get_unique_name() const { std::stringstream namestream; namestream << get_name(); namestream << "@"; namestream << (void *)this; return namestream.str(); } void polyfp::compute::gen_time_space_domain() { assert(this->get_iteration_domain() != NULL); assert(this->get_schedule() != NULL); isl_set *iter = isl_set_copy(this->get_iteration_domain()); iter = this->intersect_set_with_context(iter); time_processor_domain = isl_set_apply( iter, isl_map_copy(this->get_schedule())); // polyfp::str_dump("Schedule:", isl_map_to_str(this->get_schedule())); // polyfp::str_dump("Generated time-space domain:", isl_set_to_str(time_processor_domain)); } isl_set *compute::intersect_set_with_context(isl_set *set) { // Unify the space of the context and the "missing" set so that we can intersect them. isl_set *context = isl_set_copy(this->get_function()->get_program_context()); if (context != NULL) { isl_space *model = isl_set_get_space(isl_set_copy(context)); set = isl_set_align_params(set, isl_space_copy(model)); // polyfp::str_dump("Context: ", isl_set_to_str(context)); // polyfp::str_dump("Set after aligning its parameters with the context parameters: ", // isl_set_to_str (set)); isl_id *missing_id1 = NULL; if (isl_set_has_tuple_id(set) == isl_bool_true) { missing_id1 = isl_set_get_tuple_id(set); } else { std::string name = isl_set_get_tuple_name(set); assert(name.size() > 0); missing_id1 = isl_id_alloc(this->get_ctx(), name.c_str(), NULL); } int nb_dims = isl_set_dim(set, isl_dim_set); context = isl_set_add_dims(context, isl_dim_set, nb_dims); // isl_set_to_str (context); context = isl_set_set_tuple_id(context, isl_id_copy(missing_id1)); // isl_set_to_str (context); set = isl_set_intersect(set, isl_set_copy(context)); // isl_set_to_str (set); } return set; } } ================================================ FILE: lib/polyhedral/core.cpp ================================================ #include #include #include #include #include #include #include #include #include #include "core.h" #ifdef _WIN32 #include #endif namespace polyfp { // Used for the generation of new variable names. int id_counter = 0; static int next_dim_name = 0; primitive_t global::loop_iterator_type = p_int32; function *global::implicit_fct; std::unordered_map var::declared_vars; std::string generate_new_variable_name(); polyfp::expr traverse_expr_and_replace_non_affine_accesses(polyfp::compute *comp, const polyfp::expr &exp); void init(std::string fct_name) { function *fct = new function(fct_name); global::set_implicit_function(fct); init(); } void init() { global::set_default_polyfp_options(); } void codegen() { function *fct = global::get_implicit_function(); fct->codegen(); } /** * Derived from Tiramisu: * Transform the loop level into its corresponding dynamic schedule * dimension. * * In the example below, the dynamic dimension that corresponds * to the loop level 0 is 2, and to 1 it is 4, ... * * The first dimension is the duplication dimension, the following * dimensions are static, dynamic, static, dynamic, ... * * Loop level : -1 0 1 2 * Schedule dimension number: 0, 1 2 3 4 5 6 7 * Schedule: [0, 0, i1, 0, i2, 0, i3, 0] */ int loop_level_into_dynamic_dimension(int level) { return 1 + (level * 2 + 1); } /** * Derived from Tiramisu: * Transform the loop level into the first static schedule * dimension after its corresponding dynamic dimension. * * In the example below, the first static dimension that comes * after the corresponding dynamic dimension for * the loop level 0 is 3, and to 1 it is 5, ... * * Loop level : -1 0 1 2 * Schedule dimension number: 0, 1 2 3 4 5 6 7 * Schedule: [0, 0, i1, 0, i2, 0, i3, 0] */ int loop_level_into_static_dimension(int level) { return loop_level_into_dynamic_dimension(level) + 1; } /** * Derived from Tiramisu: * Transform a dynamic schedule dimension into the corresponding loop level. * * In the example below, the loop level that corresponds * to the dynamic dimension 2 is 0, and to the dynamic dimension 4 is 1, ... * * The first dimension is the duplication dimension, the following * dimensions are static, dynamic, static, dynamic, ... * * Loop level : -1 0 1 2 * Schedule dimension number: 0, 1 2 3 4 5 6 7 * Schedule: [0, 0, i1, 0, i2, 0, i3, 0] */ int dynamic_dimension_into_loop_level(int dim) { assert(dim % 2 == 0); int level = (dim - 2)/2; return level; } std::string generate_new_variable_name() { return "t" + std::to_string(id_counter++); } std::string generate_new_computation_name() { return "C" + std::to_string(id_counter++); } std::string str_from_polyfp_type_expr(polyfp::expr_t type) { switch (type) { case polyfp::e_val: return "val"; case polyfp::e_op: return "op"; case polyfp::e_var: return "var"; default: ERROR("polyfp type not supported.", true); return ""; } } std::string str_from_polyfp_type_primitive(polyfp::primitive_t type) { switch (type) { case polyfp::p_uint8: return "uint8"; case polyfp::p_int8: return "int8"; case polyfp::p_uint16: return "uint16"; case polyfp::p_int16: return "int16"; case polyfp::p_uint32: return "uint32"; case polyfp::p_int32: return "int32"; case polyfp::p_uint64: return "uint64"; case polyfp::p_int64: return "int64"; case polyfp::p_float32: return "float32"; case polyfp::p_float64: return "float64"; default: ERROR("polyfp type not supported.", true); return ""; } } std::string str_polyfp_type_op(polyfp::op_t type) { switch (type) { case polyfp::o_max: return "max"; case polyfp::o_min: return "min"; case polyfp::o_add: return "add"; case polyfp::o_sub: return "sub"; case polyfp::o_mul: return "mul"; case polyfp::o_div: return "div"; case polyfp::o_mod: case polyfp::o_access: return "access"; default: // ERROR("polyfp op not supported.", true); return ""; } } isl_map *add_eq_to_schedule_map(int dim0, int in_dim_coefficient, int out_dim_coefficient, int const_conefficient, isl_map *sched) { // isl_map_to_str(sched); // std::to_string(const_conefficient); isl_map *identity = isl_set_identity(isl_map_range(isl_map_copy(sched))); identity = isl_map_universe(isl_map_get_space(identity)); isl_space *sp = isl_map_get_space(identity); isl_local_space *lsp = isl_local_space_from_space(isl_space_copy(sp)); // Create a transformation map that transforms the schedule. for (int i = 0; i < isl_map_dim (identity, isl_dim_out); i++) if (i == dim0) { isl_constraint *cst = isl_constraint_alloc_equality(isl_local_space_copy(lsp)); cst = isl_constraint_set_coefficient_si(cst, isl_dim_in, dim0, in_dim_coefficient); cst = isl_constraint_set_coefficient_si(cst, isl_dim_out, dim0, -out_dim_coefficient); // TODO: this should be inverted into const_conefficient. cst = isl_constraint_set_constant_si(cst, -const_conefficient); identity = isl_map_add_constraint(identity, cst); // isl_map_to_str(identity); } else { // Set equality constraints for dimensions isl_constraint *cst2 = isl_constraint_alloc_equality(isl_local_space_copy(lsp)); cst2 = isl_constraint_set_coefficient_si(cst2, isl_dim_in, i, 1); cst2 = isl_constraint_set_coefficient_si(cst2, isl_dim_out, i, -1); identity = isl_map_add_constraint(identity, cst2); } isl_map *final_identity = identity; // isl_map_to_str(final_identity); sched = isl_map_apply_range (sched, final_identity); // isl_map_to_str(sched); return sched; } } ================================================ FILE: lib/polyhedral/debug.cpp ================================================ #include #include namespace polyfp { int polyfp_indentation = 0; void str_dump(const std::string &str) { std::cout << str; } void str_dump(const std::string &str, const char *str2) { std::cout << str << " " << str2; } void str_dump(const char *str, const char *str2) { std::cout << str << " " << str2< // #include "function.h" namespace polyfp { polyfp::expr& polyfp::expr::operator=(polyfp::expr const & e) { this->_operator = e._operator; this->op = e.op; this->access_vector = e.access_vector; this->defined = e.defined; this->name = e.name; this->dtype = e.dtype; this->etype = e.etype; // Copy the integer value if (e.get_expr_type() == polyfp::e_val) { if (e.get_data_type() == polyfp::p_uint8) { this->uint8_value = e.get_uint8_value(); } else if (e.get_data_type() == polyfp::p_int8) { this->int8_value = e.get_int8_value(); } else if (e.get_data_type() == polyfp::p_uint16) { this->uint16_value = e.get_uint16_value(); } else if (e.get_data_type() == polyfp::p_int16) { this->int16_value = e.get_int16_value(); } else if (e.get_data_type() == polyfp::p_uint32) { this->uint32_value = e.get_uint32_value(); } else if (e.get_data_type() == polyfp::p_int32) { this->int32_value = e.get_int32_value(); } else if (e.get_data_type() == polyfp::p_uint64) { this->uint64_value = e.get_uint64_value(); } else if (e.get_data_type() == polyfp::p_int64) { this->int64_value = e.get_int64_value(); } else if (e.get_data_type() == polyfp::p_float32) { this->float32_value = e.get_float32_value(); } else if (e.get_data_type() == polyfp::p_float64) { this->float64_value = e.get_float64_value(); } } return *this; } // todo // polyfp::expr polyfp::expr::substitute(std::vector> substitutions) const // { // for (auto &substitution: substitutions) // if (this->is_equal(substitution.first)) // return substitution.second; // return apply_to_operands([&substitutions](const expr& e){ // return e.substitute(substitutions); // }); // } // polyfp::expr polyfp::expr::substitute_access(std::string original, std::string substitute) const { // expr && result = this->apply_to_operands([&original, &substitute](const expr& e){ // return e.substitute_access(original, substitute); // }); // if (result.get_op_type() == o_access && result.name == original) // { // result.name = substitute; // } // return result; // } polyfp::var::var(std::string name) { assert(!name.empty()); auto declared = var::declared_vars.find(name); if (declared != var::declared_vars.end()) { *this = declared->second; } else { this->name = name; this->etype = polyfp::e_var; this->dtype = global::get_loop_iterator_data_type(); // this->defined = true; if (true) { var::declared_vars.insert(std::make_pair(name, *this)); } } } polyfp::var::var(std::string name, polyfp::primitive_t type) { assert(!name.empty()); auto declared = var::declared_vars.find(name); if (declared != var::declared_vars.end()) { assert(declared->second.dtype == type); *this = declared->second; } else { this->name = name; this->etype = polyfp::e_var; this->dtype = type; // this->defined = true; if (true) { var::declared_vars.insert(std::make_pair(name, *this)); } } } polyfp::constant::constant(float value, polyfp::primitive_t t, polyfp::function *fct): float_value(value), func(fct), datatype(t){ this->name = global::generate_new_constant_name(); this->etype = polyfp::e_var; this->dtype = t; // fct->add_invariant(*this); fct->add_invariant(std::pair(name, this)); } polyfp::primitive_t constant::get_type() const { return dtype; } polyfp::p_max::p_max(polyfp::expr value1, polyfp::expr value2, polyfp::op_t o, polyfp::function *fct){ this->left_value = value1; this->right_value = value2; this->func = fct; this->_operator = o; this->etype = polyfp::e_op; this->op.push_back(value1); this->op.push_back(value2); // fct->add_invariant(*this); // fct->add_invariant(std::pair(name, this)); } polyfp::expr polyfp::expr::copy() const { return (*this); } expr polyfp::expr::operator+(polyfp::expr other) const { return polyfp::expr{o_add, *this, other}; } expr polyfp::expr::operator-(polyfp::expr other) const { return polyfp::expr{o_sub, *this, other}; } expr polyfp::expr::operator*(polyfp::expr other) const { return polyfp::expr{o_mul, *this, other}; } expr polyfp::expr::operator/(polyfp::expr other) const { return polyfp::expr{o_div, *this, other}; } expr polyfp::expr::operator%(polyfp::expr other) const { return polyfp::expr{o_mod, *this, other}; } // todo // expr memcpy(const buffer &from, const buffer &to) { // return expr(o_memcpy, var(p_void_ptr, from.get_name()), var(p_void_ptr, to.get_name())); // } // expr allocate(const buffer &b) // { // return expr{o_allocate, b.get_name()}; // } } ================================================ FILE: lib/polyhedral/function.cpp ================================================ #include "function.h" #include "generator.h" #include #include #include namespace polyfp{ isl_map *isl_map_align_range_dims(isl_map *map, int max_dim) { assert(map != NULL); int mdim = isl_map_dim(map, isl_dim_out); assert(max_dim >= mdim); // polyfp::str_dump("Input map:", isl_map_to_str(map)); const char *original_range_name = isl_map_get_tuple_name(map, isl_dim_out); map = isl_map_add_dims(map, isl_dim_out, max_dim - mdim); for (int i = mdim; i < max_dim; i++) { isl_space *sp = isl_map_get_space(map); isl_local_space *lsp = isl_local_space_from_space(sp); isl_constraint *cst = isl_constraint_alloc_equality(lsp); cst = isl_constraint_set_coefficient_si(cst, isl_dim_out, i, 1); map = isl_map_add_constraint(map, cst); } map = isl_map_set_tuple_name(map, isl_dim_out, original_range_name); // polyfp::str_dump("After alignment, map = ",isl_map_to_str(map)); return map; } function::function(std::string name) { this->name = name; this->ast = NULL; this->context_set = NULL; this->ctx = isl_ctx_alloc(); this->global_location = 0; }; isl_ctx *function::get_isl_ctx() const { return ctx; } const std::vector &function ::get_computations() const { return body; } const std::vector &function ::get_body() const { return body; } void polyfp::function::add_invariant(std::pair invar) { this->constant_list.insert(invar); } const std::map &function::get_invariants() const { return constant_list; } void polyfp::function::set_partition(std::string name, std::vector factors, std::vector types) { // std::vector types; // for (int dim = 0; dim < factors.size(); ++dim) { // types.push_back(type); // } std::tuple, std::vector> dims(name,factors,types); this->partition_map.push_back(dims); } std::vector, std::vector>> polyfp::function::get_partition_map() { return this->partition_map; } void polyfp::function::add_computation(compute *cpt) { assert(cpt != NULL); this->body.push_back(cpt); this->starting_computations.insert(cpt); } void polyfp::function::dump(bool s) const { if (s) { std::cout << "\n\nFunction \"" << this->name << "\"" << std::endl << std::endl; if (this->function_arguments.size() > 0) { std::cout << "Function arguments (polyfp buffers):" << std::endl; for (const auto &buf : this->function_arguments) { // buf->dump(s); } std::cout << std::endl; } // todo if (this->invariants.size() > 0) { std::cout << "Function invariants:" << std::endl; for (const auto &inv : this->invariants) { //todo // inv.dump(); } std::cout << std::endl; } if (this->get_program_context() != NULL) { std::cout << "Function context set: " << isl_set_to_str(this->get_program_context()) << std::endl; } std::cout << "Body " << std::endl; for (const auto &cpt : this->body) { cpt->dump(); } std::cout << std::endl; for (const auto &buf : this->placeholders_list) { std::cout << "Placeholder name: " << buf.second->get_name() << std::endl; buf.second->dump(false); } std::cout << std::endl << std::endl; } } int polyfp::function::get_max_identity_schedules_range_dim() const { int max_dim = 0; for (const auto &comp : this->get_computations()) { isl_map *sched = comp->gen_identity_schedule_for_time_space_domain(); int m = isl_map_dim(sched, isl_dim_out); max_dim = std::max(max_dim, m); } return max_dim; } const std::vector &function::get_iterator_names() const { return iterator_names; } isl_ast_node *function::get_isl_ast() const { assert((ast != NULL) && ("You should generate an isl ast first (gen_isl_ast()).")); return ast; } isl_ast_node *function::get_isl_ast1() const { assert((ast != NULL) && ("You should generate an isl ast first (gen_isl_ast()).")); return ast; } isl_union_set *polyfp::function::get_iteration_domain() const { isl_union_set *result = NULL; isl_space *space = NULL; if (!this->body.empty()) { space = isl_set_get_space(this->body[0]->get_iteration_domain()); } else { return NULL; } assert(space != NULL); result = isl_union_set_empty(space); for (const auto &cpt : this->body) { isl_set *cpt_iter_space = isl_set_copy(cpt->get_iteration_domain()); result = isl_union_set_union(isl_union_set_from_set(cpt_iter_space), result); } return result; } isl_union_map *polyfp::function::get_aligned_identity_schedules() const { isl_union_map *result; isl_space *space; if (this->body.empty() == false) { space = isl_map_get_space(this->body[0]->gen_identity_schedule_for_time_space_domain()); } else { return NULL; } assert(space != NULL); result = isl_union_map_empty(space); int max_dim = this->get_max_identity_schedules_range_dim(); for (const auto &comp : this->get_computations()) { isl_map *sched = comp->gen_identity_schedule_for_time_space_domain(); // polyfp::str_dump("Identity schedule for time space domain: ", isl_map_to_str(sched)); assert((sched != NULL) && "Identity schedule could not be computed"); sched = isl_map_align_range_dims(sched, max_dim); result = isl_union_map_union(result, isl_union_map_from_map(sched)); } return result; } void function::dump_sched_graph_dfs(compute * comp, std::unordered_set &visited) { // Do not visit anything that was already returned if (visited.find(comp) != visited.end()) return; visited.insert(comp); for (auto &edge: this->sched_graph[comp]) { const std::string level = ((edge.second == compute::root_dimension) ? "root" : std::to_string(edge.second)); polyfp::str_dump(comp->get_name() + "=[" + level + "]=>" + edge.first->get_name()); std::cout<<" "; dump_sched_graph_dfs(edge.first, visited); } } bool function::is_sched_graph_tree_dfs(compute * comp, std::unordered_set &visited) { // Do not visit anything that was already returned if (visited.find(comp) != visited.end()) return false; visited.insert(comp); for (auto &edge: this->sched_graph[comp]) { if (!is_sched_graph_tree_dfs(edge.first, visited)) return false; } return true; } bool function::is_sched_graph_tree() { if (this->starting_computations.size() != 1) { return false; } // Contains all nodes that have been visited std::unordered_set visited; for (auto &comp: this->starting_computations) { if (!is_sched_graph_tree_dfs(comp, visited)) { return false; } } return true; } void function::dump_sched_graph() { // polyfp::str_dump("Number of schedule graph roots is " + // std::to_string(this->starting_computations.size())); polyfp::str_dump("Number of schedule graph roots is " + std::to_string(this->starting_computations.size())); std::cout<starting_computations){ polyfp::str_dump(" * " + root->get_name()); std::cout< visited; polyfp::str_dump("Displaying schedule graph"); std::cout<starting_computations) { dump_sched_graph_dfs(comp, visited); } std::cout<is_sched_graph_tree()) { // polyfp::str_dump("this->is_sched_graph_tree(): true."); std::priority_queue level_to_check; std::unordered_map> level_queue; auto current_comp = *(this->starting_computations.begin()); bool comps_remain = true; while(comps_remain) { for (auto &edge: this->sched_graph[current_comp]) { if (level_queue[edge.second].size() == 0) level_to_check.push(edge.second); level_queue[edge.second].push_back(edge.first); } comps_remain = level_to_check.size() > 0; if (comps_remain) { int fuse_level = level_to_check.top(); auto next_comp = level_queue[fuse_level].front(); level_queue[fuse_level].pop_front(); next_comp->after_low_level((*current_comp), fuse_level); current_comp = next_comp; if (level_queue[fuse_level].size() == 0) level_to_check.pop(); } } } else { polyfp::str_dump("this->is_sched_graph_tree(): false."); } } int polyfp::function::get_max_schedules_range_dim() const { int max_dim = 0; for (const auto &comp : this->get_computations()) { isl_map *sched = comp->get_schedule(); int m = isl_map_dim(sched, isl_dim_out); max_dim = std::max(max_dim, m); } return max_dim; } isl_set *function::get_program_context() const { if (context_set != NULL) { return isl_set_copy(context_set); } else { return NULL; } } void polyfp::function::align_schedules() { int max_dim = this->get_max_schedules_range_dim(); for (auto &comp : this->get_computations()) { isl_map *dup_sched = comp->get_schedule(); assert((dup_sched != NULL) && "Schedules should be set before calling align_schedules"); dup_sched = isl_map_align_range_dims(dup_sched, max_dim); comp->set_schedule(dup_sched); // polyfp::str_dump("Generated time-space domain:", isl_map_to_str(dup_sched)); comp->name_unnamed_time_space_dimensions(); } } std::string function::get_name(){ return this->name; } void function::gen_time_space_domain() { this->gen_ordering_schedules(); this->align_schedules(); for (auto &comp : this->get_computations()) { comp->gen_time_space_domain(); } } void function::gen_loop_location() { auto leader_list = this->leader_computations; // std::cout<get_name() <<'\n'; if(a_leader->is_leader == true) { if(a_leader->after_level!= -2) { int level = a_leader->after_level; int current_level = level; int counter = 0; // auto dim_list = a_leader->get_loop_level_names(); auto dim_list = a_leader->final_loop_level_names; for(int i=0; iglobal_location; a_leader->iterators_location_map.insert(std::make_pair(dim_list[counter],next_level)); this->global_location+=1; counter+=1; } } else { auto nms = a_leader->final_loop_level_names; for (int i = 0; i< nms.size(); i++) { a_leader->iterators_location_map.insert(std::make_pair(nms[i],i)); this->global_location = nms.size(); } } } // std::cout <iterators_location_map.size()<<'\n'; // for(auto &map: a_leader->iterators_location_map){ // std::cout<components; // sort components by their value std::vector> temp; for (auto it = components.begin(); it != components.end(); it++) temp.push_back(std::make_pair(it->first, it->second)); std::sort(temp.begin(), temp.end(), [](const std::pair &x, const std::pair &y) -> int { return x.second < y.second; }); for (auto it = temp.begin(); it != temp.end(); it++) { // std::cout << it->first->get_name() << ':' << it->second << '\n'; // std::cout <first->after_level <<'\n'; auto comp = it->first; int level = comp->after_level; int current_level = level; int counter = 0; auto dim_list = comp->final_loop_level_names; auto leader_dim_map = comp->leader->iterators_location_map; if(level!=-1) { for(int i=0; iiterators_location_map.insert(std::make_pair(dim_list[counter],leader_dim_map[dim_list[counter]])); }else { // auto fct = global::get_implicit_function(); auto next_level = this->global_location; comp->iterators_location_map.insert(std::make_pair(dim_list[counter],next_level)); this->global_location += 1; } counter+=1; } }else{ // TODO } } } } isl_union_map *polyfp::function::get_schedule() const { isl_union_map *result = NULL; isl_space *space = NULL; if (!this->body.empty()) { space = isl_map_get_space(this->body[0]->get_schedule()); } else { return NULL; } assert(space != NULL); result = isl_union_map_empty(isl_space_copy(space)); for (const auto &cpt : this->body) { isl_map *m = isl_map_copy(cpt->get_schedule()); result = isl_union_map_union(isl_union_map_from_map(m), result); } result = isl_union_map_intersect_domain(result, this->get_iteration_domain()); return result; } isl_union_set *polyfp::function::get_trimmed_time_processor_domain() const { isl_union_set *result = NULL; isl_space *space = NULL; if (!this->body.empty()) { space = isl_set_get_space(this->body[0]->get_trimmed_time_processor_domain()); } else { return NULL; } assert(space != NULL); result = isl_union_set_empty(space); for (const auto &cpt : this->body) { isl_set *cpt_iter_space = isl_set_copy(cpt->get_trimmed_time_processor_domain()); result = isl_union_set_union(isl_union_set_from_set(cpt_iter_space), result); } return result; } const std::map &function::get_placeholders() const { return placeholders_list; } const std::map &function::get_fct_arguments() const { return fct_argument_list; } const std::map &function::get_global_arguments() const { return global_argument_list; } void function::add_placeholder(std::pair buf) { assert(!buf.first.empty() && ("Empty buffer name.")); assert((buf.second != NULL) && ("Empty buffer.")); this->placeholders_list.insert(buf); } void function::add_fct_argument(std::pair buf) { assert(!buf.first.empty() && ("Empty buffer name.")); assert((buf.second != NULL) && ("Empty buffer.")); this->fct_argument_list.insert(buf); } void function::add_global_argument(std::pair buf) { assert(!buf.first.empty() && ("Empty buffer name.")); assert((buf.second != NULL) && ("Empty buffer.")); this->global_argument_list.insert(buf); } void function::add_fct_argument() { this->fct_argument_added = true; } isl_union_map *polyfp::function::compute_dep_graph() { isl_union_map *result = NULL; for (const auto &consumer : this->get_computations()) { isl_union_map *accesses_union_map = NULL; std::vector < isl_map * > accesses_vector; generator::get_rhs_accesses(this, consumer, accesses_vector, false); if (!accesses_vector.empty()) { if (accesses_union_map == NULL) { isl_space *space = isl_map_get_space(accesses_vector[0]); assert(space != NULL); accesses_union_map = isl_union_map_empty(space); } for (size_t i = 0; i < accesses_vector.size(); ++i) { isl_map *reverse_access = isl_map_reverse(accesses_vector[i]); accesses_union_map = isl_union_map_union(isl_union_map_from_map(reverse_access), accesses_union_map); } //accesses_union_map = isl_union_map_intersect_range(accesses_union_map, isl_union_set_from_set(isl_set_copy(consumer->get_iteration_domain()))); //accesses_union_map = isl_union_map_intersect_domain(accesses_union_map, isl_union_set_from_set(isl_set_copy(consumer->get_iteration_domain()))); polyfp::str_dump("Accesses after filtering."); polyfp::str_dump(isl_union_map_to_str(accesses_union_map)); if (result == NULL) { result = isl_union_map_copy(accesses_union_map); isl_union_map_free(accesses_union_map); } else { result = isl_union_map_union(result, accesses_union_map); } } } if (result != NULL) { polyfp::str_dump(isl_union_map_to_str(result)); } else { polyfp::str_dump("Null."); } return result; } void function::gen_isl_ast() { // Check that time_processor representation has already been computed, assert(this->get_trimmed_time_processor_domain() != NULL); assert(this->get_aligned_identity_schedules() != NULL); isl_ctx *ctx = this->get_isl_ctx(); assert(ctx != NULL); isl_ast_build *ast_build; if (this->get_program_context() == NULL) { ast_build = isl_ast_build_alloc(ctx); } else { ast_build = isl_ast_build_from_context(isl_set_copy(this->get_program_context())); } isl_options_set_ast_build_atomic_upper_bound(ctx, 1); isl_options_get_ast_build_exploit_nested_bounds(ctx); isl_options_set_ast_build_group_coscheduled(ctx, 1); ast_build = isl_ast_build_set_after_each_for(ast_build, &polyfp::for_code_generator_after_for, NULL); // ast_build = isl_ast_build_set_at_each_domain(ast_build, &polyfp::generator::stmt_code_generator, // this); isl_id_list *iterators = isl_id_list_alloc(ctx, this->get_iterator_names().size()); if (this->get_iterator_names().size() > 0) { std::string name = generate_new_variable_name(); isl_id *id = isl_id_alloc(ctx, name.c_str(), NULL); iterators = isl_id_list_add(iterators, id); for (int i = 0; i < this->get_iterator_names().size(); i++) { name = this->get_iterator_names()[i]; id = isl_id_alloc(ctx, name.c_str(), NULL); iterators = isl_id_list_add(iterators, id); name = generate_new_variable_name(); id = isl_id_alloc(ctx, name.c_str(), NULL); iterators = isl_id_list_add(iterators, id); } ast_build = isl_ast_build_set_iterators(ast_build, iterators); } // Intersect the iteration domain with the domain of the schedule. isl_union_map *umap = isl_union_map_intersect_domain( isl_union_map_copy(this->get_aligned_identity_schedules()), isl_union_set_copy(this->get_trimmed_time_processor_domain())); // polyfp::str_dump("Schedule:", isl_union_map_to_str(this->get_schedule())); // polyfp::str_dump("Iteration domain:", // isl_union_set_to_str(this->get_iteration_domain())); // polyfp::str_dump("Trimmed Time-Processor domain:", // isl_union_set_to_str(this->get_trimmed_time_processor_domain())); // polyfp::str_dump("Trimmed Time-Processor aligned identity schedule:", // isl_union_map_to_str(this->get_aligned_identity_schedules())) ; // polyfp::str_dump("Identity schedule intersect trimmed Time-Processor domain:", // isl_union_map_to_str(umap)); const char *s; s = "[N,M,K] -> {s_2[i,j,k] -> [0, i, 0, j, 0, k, 10] : 0 <= i <= N and 0 <= j <= M and 0 <= k <= K; s_1[i, j, k] -> [0, i, 0, j, 0, k, 0] : 0 <= i <= N and 0 <= j <= M and 0 <= k <= 1 }"; // s_2[0, i, 0, j, 0, k, 0] -> [0, i' = i, 0, j' = j, 0, k' = k, 0] : 0 <= i <= 4095 and 0 <= j <= 4095 and 0 <= k <= 4095; // s_1[0, i, 0, j, 0, k, 10] -> [0, i' = i, 0, j' = j, 0, k' = k, 10] : 0 <= i <= 4095 and 0 <= j <= 4095 and 0 <= k <= 4095 isl_union_map *fmap = isl_union_map_read_from_str(ctx,s); this->ast = isl_ast_build_node_from_schedule_map(ast_build, umap); isl_ast_build_free(ast_build); } void polyfp::function::check_loop_fusion() { for (auto &comp: this->leader_computations) { // comp->get_loads_stores(); comp->load_vector.clear(); comp->store_vector.clear(); comp->map_loadstores.clear(); comp->get_all_loadstores(); // comp->dump_components(); // comp->dump_loads_stores(); comp->dump_all_loadstores(); } auto temp_computations = this->leader_computations; std::vector leader_list; int leader_num = temp_computations.size(); // for(int i=0; ileader_computation_index[comp_from]; // leader_list.push_back(comp_from_index); // } for(int i=0; iget_name()!=comp_second->get_name()) { bool has_edge = false; for(auto &store: comp_first->store_vector) { for(auto &load: comp_second->load_vector) { if(store->get_name() == load->get_name()) { has_edge = true; } } } if(has_edge == false) { auto ndim_first = comp_first->get_loop_levels_number(); auto ndim_second = comp_second->get_loop_levels_number(); auto dim_first = comp_first->get_iteration_variables(); auto dim_second = comp_second->get_iteration_variables(); bool is_legal = true; if(ndim_first == ndim_second) { for(int i=0; iafter(comp_first, ndim_first-1); comp_second->refused = true; this->refused = true; for(int i=0; iget_loop_level_names().size(); i++) { comp_second->temp_access_map.insert(std::pair(comp_second->get_loop_level_names()[i],comp_first->get_loop_level_names()[i])); } std::vector new_placeholder_index; auto temp_placeholder_index = comp_first->get_placeholder_dims(); auto original_placeholder_index = comp_second->get_placeholder_dims(); for(int i=0; iget_placeholder_dims()[i]; tvar.set_name(comp_second->temp_access_map[comp_second->get_placeholder_dims()[i].get_name()]); new_placeholder_index.push_back(tvar); // for(auto &kv: original_placeholder_index){ // if(kv.get_expr_type() == polyfp::e_op){ // }else{ // auto t = temp_placeholder_index[i].get_name(); // std::cout<set_placeholder_dims(new_placeholder_index); comp_second->set_loop_level_names(comp_first->get_loop_level_names()); } } } } } void polyfp::function::dependence_analysis() { auto temp_computations = this->leader_computations; for(auto &comp: temp_computations) { comp->compute_dependence_vectors(); comp->auto_loop_transformation(); } auto modified_computations = this->leader_computations; if(temp_computations.size()<=10) { this->check_loop_fusion(); } } void polyfp::function::dfs(int pos, int top, int end, int map[500][500], int n, int v[100],int stack[550])//从pos点开始访问 { // std::cout<<"DFSING"< path; for(i=0;ipaths.push_back(path); return; } v[pos]=1; stack[top++]=pos; for(i=1;i<=n;i++) { if(!v[i]&&map[pos][i]) this->dfs(i,top,end,map,n,v,stack); } v[pos]=0; top--; } void polyfp::function::compute_dependency_graph(){ int map[500][500]={0}; std::vector leader_list; int leader_num = this->leader_computations.size(); for(int i=0; ileader_computation_index[comp_from]; leader_list.push_back(comp_from_index); } for(int i=0; ileader_computation_index[comp_from]; for(int j=i; jleader_computation_index[comp_to]; if(comp_from->get_name()!=comp_to->get_name()) { bool has_edge = false; for(auto &store: comp_from->store_vector) { for(auto &load: comp_to->load_vector) { if(store->get_name() == load->get_name()) { has_edge = true; } } } if(has_edge == true) { map[comp_from_index][comp_to_index] = 1; std::vector::iterator it = find(leader_list.begin(), leader_list.end(), comp_to_index); if ( it!=leader_list.end()) { leader_list.erase(it); } } } } } std::vector leafs; for(auto &comp: this->leader_computations) { if(comp->is_leaf == true) { leafs.push_back(this->leader_computation_index[comp]); } } if(this->leader_computations.size()<=30) { for(auto &leader: leader_list){ for(auto &leaf: leafs){ int stack[550],v[500]={0},top=0,n=this->leader_computations.size(),start=leader,end=leaf; this->dfs(start,top,end,map,n,v,stack); } } } } void polyfp::function::auto_DSE(std::string path) { this->auto_DSE_loop_transformation(); for(auto &comp: this->leader_computations) { if(comp->is_skewed_inDSE == true) { this->dump_schedule(path); return; } } this->evaluate_func(); auto comp = this->update_latency(); this->best_latency = this->current_latency; this->best_dsp_usage = 9999; int factor = 1; this->auto_DSE_tile_size(comp,factor,path); std::vector temp; for(auto &comp: this->leader_computations) { comp->set_schedule(comp->original_schedule); comp->set_loop_level_names(comp->original_loop_level_name); comp->directive_map.clear(); comp->is_unrolled = false; comp->unroll_factor.clear(); comp->unroll_dimension.clear(); comp->tile_map.clear(); comp->tile_size_map.clear(); comp->access_map.clear(); comp->final_loop_level_names.clear(); comp->final_loop_level_names = comp->final_loop_level_names_reserved; if(comp->is_optimized == true) { if(comp->final_strategy.size()!=0) { comp->apply_opt_strategy(comp->final_strategy); } else { auto iterators = comp->get_iteration_variables(); int size = iterators.size(); std::map iterator_map; for(auto &iter: iterators) { int loc = comp->get_loop_level_number_from_dimension_name(iter.get_name()); iterator_map[loc] = iter; } if(size >= 3) { comp->pipeline(iterator_map[size-3+2],1); }else if(size == 2) { comp->pipeline(iterator_map[1],1); }else if(size == 1) { comp->pipeline(iterator_map[0],1); } } } else { auto iterators = comp->get_iteration_variables(); int size = iterators.size(); std::map iterator_map; for(auto &iter: iterators) { int loc = comp->get_loop_level_number_from_dimension_name(iter.get_name()); iterator_map[loc] = iter; } if(size >= 3) { comp->pipeline(iterator_map[size-3+2],1); } else if(size == 2) { comp->pipeline(iterator_map[1],1); } if(size == 1) { comp->pipeline(iterator_map[0],1); } } } this->dump_schedule(path); } void polyfp::function::auto_DSE_loop_transformation() { for (auto &comp: this->leader_computations) { comp->get_all_loadstores(); comp->dump_all_loadstores(); } this->dependence_analysis(); for (int i=0; ileader_computations.size(); i++) { this->leader_computation_index[leader_computations[i]] = i; this->leader_computations[i]->original_schedule = leader_computations[i]->get_schedule(); std::vector current_name_list = this->leader_computations[i]->get_loop_level_names(); int final_size = this->leader_computations[i]->final_loop_level_names.size(); int current_size = current_name_list.size(); if(final_size == current_size) { this->leader_computations[i]->final_loop_level_names = current_name_list; this->leader_computations[i]->final_loop_level_names_reserved = current_name_list; } else if(final_size < current_size) { for(int i=0; ileader_computations[i]->final_loop_level_names[i] = current_name_list[i]; this->leader_computations[i]->final_loop_level_names_reserved[i] = current_name_list[i]; } } this->leader_computations[i]->original_loop_level_name = leader_computations[i]->get_loop_level_names(); for(auto &part:this->leader_computations[i]->components) { part.first->original_loop_level_name = part.first->get_loop_level_names(); part.first->original_schedule = part.first->get_schedule(); } if(leader_computations[i]->is_leaf == true) { this->leaf_computations.push_back(leader_computations[i]); } } for(auto &comp:this->get_body()) { std::vector current_name_list = comp->get_loop_level_names(); int final_size = comp->final_loop_level_names.size(); int current_size = current_name_list.size(); if(final_size == current_size) { comp->final_loop_level_names = current_name_list; comp->final_loop_level_names_reserved = current_name_list; }else if(final_size < current_size){ for(int i=0; ifinal_loop_level_names[i] = current_name_list[i]; comp->final_loop_level_names_reserved[i] = current_name_list[i]; } } } if(this->leader_computations.size()<=30){ this->compute_dependency_graph(); } } void polyfp::function::dump_schedule(std::string path) { for(auto &comp: this->get_body()) { comp->iterators_location_map.clear(); this->global_location = 0; } this->gen_loop_location(); this->gen_time_space_domain(); this->gen_isl_ast(); mlir::MLIRContext context; auto manager = polyfp::MLIRGenImpl(context); int level = 0; context.disableMultithreading(); context.getOrLoadDialect(); context.getOrLoadDialect(); context.getOrLoadDialect(); context.getOrLoadDialect(); context.getOrLoadDialect(); context.getOrLoadDialect(); manager.mlirGen1(*this,this->get_isl_ast(),level,true, false, false); bool skew_flag = false; for(auto &comp : this->leader_computations) { if(comp->is_skewed_inDSE == true){ skew_flag = true; } int index = this->leader_computation_index[comp]; int position = manager.start_loops_position[index]; for(auto &comp : this->leader_computations) { for(auto &kv : comp->get_directive_map()) { if(kv.second == "pipeline") { int loc_2 = comp->get_loop_level_number_from_dimension_name(kv.first); int loc = comp->iterators_location_map[kv.first]; mlir::scalehls::setLoopDirective(manager.ops[loc], true, comp->II, false, false); for(int i=1; i<=loc_2; i++) { mlir::scalehls::setLoopDirective(manager.ops[loc-i], false, comp->II, false, true); } } } for(auto &sub_comps: comp->components) { auto sub_comp = sub_comps.first; for(auto &kv : sub_comp->get_directive_map()) { if(kv.second == "pipeline"){ int loc_2 = sub_comp->get_loop_level_number_from_dimension_name(kv.first); int loc = sub_comp->iterators_location_map[kv.first]; mlir::scalehls::setLoopDirective(manager.ops[loc], true, sub_comp->II, false, false); for(int i=1; i<=loc_2; i++) { mlir::scalehls::setLoopDirective(manager.ops[loc-i], false, sub_comp->II, false, true); } } } } } } auto map = manager.get_argument_map(); mlir::scalehls::setTopFuncAttr(manager.get_funcs()[0]); mlir::scalehls::applyFuncPreprocess(manager.get_funcs()[0], true); for(auto &comp: this->leader_computations) { auto iterators = comp->get_iteration_variables(); int size = iterators.size(); if(size==1) { var i0("i0"), j0("j0"),k0("k0"), i1("i1"), j1("j1"),k1("k1"); } if(comp->is_unrolled == true&&size!=1) { for(int i=0; iunroll_dimension.size(); i++) { int loc = comp->iterators_location_map[comp->unroll_dimension[i].get_name()]; if(comp->unroll_factor[i] != -1) { mlir::loopUnrollUpToFactor(manager.ops[loc],comp->unroll_factor[i]); }else{ mlir::loopUnrollFull(manager.ops[loc]); } } for(auto &sub_comps:comp->components) { auto sub_comp = sub_comps.first; for(int i=0; iunroll_dimension.size(); i++) { if(sub_comp->unroll_dimension.size()!=0) { int loc = sub_comp->iterators_location_map[sub_comp->unroll_dimension[i].get_name()]; if(sub_comp->unroll_factor[i] != -1) { mlir::loopUnrollUpToFactor(manager.ops[loc],sub_comp->unroll_factor[i]); } else { mlir::loopUnrollFull(manager.ops[loc]); } } } } } } mlir::scalehls::applyMemoryOpts(manager.get_funcs()[0]); mlir::scalehls::applyAutoArrayPartition(manager.get_funcs()[0]); if(this->refused == true){ //TODO: there exists a bug in the latest version of POM, and the DSE of partition factor is not working. // This may affect the best array partition results generated by POM. We will fix the bug as soon as possible. auto temp_p = this->get_placeholders(); auto temp_p_d = temp_p["A"]; if(temp_p_d->get_dim_sizes()[0] == 32) { this->set_partition("A",{16,16},{"cyclic","cyclic"}); }else if(temp_p_d->get_dim_sizes()[0] == 64) { this->set_partition("A",{32,32},{"cyclic","cyclic"}); }else { this->set_partition("A",{16,32},{"cyclic","cyclic"}); } auto map = manager.get_array_map(); for(auto &kv: this->get_partition_map()) { SmallVector kinds; SmallVector factors; for(auto &factor: std::get<1>(kv)) { factors.push_back(factor); } for(auto &type: std::get<2>(kv)) { if(type == "cyclic"){ kinds.push_back(mlir::scalehls::hls::PartitionKind::CYCLIC); }else if(type == "block"){ kinds.push_back(mlir::scalehls::hls::PartitionKind::BLOCK); }else if(type == "none"){ kinds.push_back(mlir::scalehls::hls::PartitionKind::NONE); } } mlir::scalehls::applyArrayPartition(manager.get_funcs()[0].getArgument(map[std::get<0>(kv)]), factors, kinds,/*updateFuncSignature=*/true); // manager.getModule().dump(); } } SmallVector factors; std::string errorMessage; std::string pwd = std::filesystem::current_path().parent_path(); auto configFile = mlir::openInputFile(pwd+"/samples/config.json", &errorMessage); if (!configFile) { llvm::errs() << errorMessage << "\n"; } auto config = llvm::json::parse(configFile->getBuffer()); if (!config) { llvm::errs() << "failed to parse the target spec json file\n"; } auto configObj = config.get().getAsObject(); if (!configObj) { llvm::errs() << "support an object in the target spec json file, found " "something else\n"; } unsigned maxDspNum =ceil(configObj->getInteger("dsp").getValueOr(220)); this->dsp_max = maxDspNum; llvm::StringMap latencyMap; mlir::scalehls::getLatencyMap(configObj, latencyMap); llvm::StringMap dspUsageMap; mlir::scalehls::getDspUsageMap(configObj, dspUsageMap); // TODO: Parameterize initial parallel factor, max DSE iteration, max unroll && partition factor int loc = 0; int total_dsp = 0; long total_latency = 0; if(manager.start_loops_position.size() == 0) { manager.start_loops_position.push_back(0); } if(skew_flag == false) { mlir::scalehls::ScaleHLSEstimator(latencyMap, dspUsageMap, true).estimateFunc(manager.funcs[0]); for(auto &loop: manager.start_loops_position) { mlir::scalehls::ScaleHLSEstimator(latencyMap, dspUsageMap, true).estimateLoop(manager.ops[loop],manager.funcs[0]); // manager.getModule().dump(); auto latency = mlir::scalehls::getTiming(manager.ops[loop]).getLatency(); auto dspNum = mlir::scalehls::getResource(manager.ops[loop]).getDsp(); } } auto module = manager.getModule(); // mlir::verify(module); // if (mlir::failed(mlir::verify(module))) { // module->emitError("module verification error"); // // module->dump(); // } // module->dump(); std::error_code error; std::string s = this->get_name(); std::string path1 = path+s+".mlir"; llvm::raw_fd_ostream os(path1, error); os << *module; // std::cout<<"Note: "+s+".cpp has been generated!"<get_body()){ comp->iterators_location_map.clear(); this->global_location = 0; } for(auto &comp: this->leader_computations){ if(comp->is_optimized == true && this->current_opt_comp!= NULL &&this->current_opt_comp->get_name()!=comp->get_name()) { comp->set_schedule(comp->original_schedule); comp->set_loop_level_names(comp->original_loop_level_name); comp->directive_map.clear(); comp->is_unrolled = false; comp->unroll_factor.clear(); comp->unroll_dimension.clear(); comp->tile_map.clear(); comp->tile_size_map.clear(); comp->access_map.clear(); comp->final_loop_level_names.clear(); comp->final_loop_level_names = comp->final_loop_level_names_reserved; if(comp->final_strategy.size()!=0) { comp->apply_opt_strategy(comp->final_strategy); }else { auto iterators = comp->get_iteration_variables(); int size = iterators.size(); std::map iterator_map; for(auto &iter: iterators) { int loc = comp->get_loop_level_number_from_dimension_name(iter.get_name()); // int loc = comp->iterators_location_map(iter.get_name()); // std::cout<= 3) { comp->pipeline(iterator_map[size-3+2],1); for(auto &sub_comps: comp->components) { auto sub_comp = sub_comps.first; //TODO, right pipeline level if(sub_comp->after_level !=2) { sub_comp->pipeline(iterator_map[size-3+2],1); } } } else if(size == 2) { comp->pipeline(iterator_map[1],1); for(auto &sub_comps: comp->components) { auto sub_comp = sub_comps.first; //TODO, right pipeline level if(sub_comp->after_level !=1) { sub_comp->pipeline(iterator_map[1],1); } } }else if(size == 1) { comp->pipeline(iterator_map[0],1); } } } else if(comp->is_optimized == true && this->current_opt_comp!= NULL &&this->current_opt_comp->get_name()==comp->get_name()) { comp->set_schedule(comp->original_schedule); comp->set_loop_level_names(comp->original_loop_level_name); comp->directive_map.clear(); comp->is_unrolled = false; comp->unroll_factor.clear(); comp->unroll_dimension.clear(); comp->tile_map.clear(); comp->tile_size_map.clear(); comp->access_map.clear(); comp->final_loop_level_names.clear(); comp->final_loop_level_names = comp->final_loop_level_names_reserved; if(comp->temp_strategy.size()!=0) { comp->apply_opt_strategy(comp->temp_strategy); } else if(comp->final_strategy.size()!=0) { comp->apply_opt_strategy(comp->final_strategy); } else { auto iterators = comp->get_iteration_variables(); int size = iterators.size(); std::map iterator_map; for(auto &iter: iterators) { int loc = comp->get_loop_level_number_from_dimension_name(iter.get_name()); // int loc = comp->iterators_location_map(iter.get_name()); // std::cout<= 3) { comp->pipeline(iterator_map[size-3+2],1); for(auto &sub_comps: comp->components) { auto sub_comp = sub_comps.first; // TODO, right pipeline level if(sub_comp->after_level !=2){ sub_comp->pipeline(iterator_map[size-3+2],1); } } }else if(size == 2) { comp->pipeline(iterator_map[1],1); for(auto &sub_comps: comp->components){ auto sub_comp = sub_comps.first; // TODO, right pipeline level if(sub_comp->after_level !=1) { sub_comp->pipeline(iterator_map[1],1); } } }else if(size == 1) { comp->pipeline(iterator_map[0],1); } } } else if(this->current_opt_comp!= NULL && this->current_opt_comp->get_name()!=comp->get_name()) { comp->set_schedule(comp->original_schedule); comp->set_loop_level_names(comp->original_loop_level_name); comp->directive_map.clear(); comp->is_unrolled = false; comp->unroll_factor.clear(); comp->unroll_dimension.clear(); comp->tile_map.clear(); comp->tile_size_map.clear(); comp->access_map.clear(); comp->final_loop_level_names.clear(); comp->final_loop_level_names = comp->final_loop_level_names_reserved; auto iterators = comp->get_iteration_variables(); int size = iterators.size(); std::map iterator_map; for(auto &iter: iterators) { int loc = comp->get_loop_level_number_from_dimension_name(iter.get_name()); iterator_map[loc] = iter; } if(size >= 3) { comp->pipeline(iterator_map[size-3+2],1); for(auto &sub_comps: comp->components){ auto sub_comp = sub_comps.first; // TODO, right pipeline level if(sub_comp->after_level !=2){ sub_comp->pipeline(iterator_map[size-3+2],1); } } }else if(size == 2) { comp->pipeline(iterator_map[1],1); for(auto &sub_comps: comp->components) { auto sub_comp = sub_comps.first; // TODO, right pipeline level if(sub_comp->after_level !=1) { sub_comp->pipeline(iterator_map[1],1); } } }else if(size == 1) { comp->pipeline(iterator_map[0],1); } } else if(this->current_opt_comp!= NULL && this->current_opt_comp->get_name()==comp->get_name()) { comp->set_schedule(comp->original_schedule); comp->set_loop_level_names(comp->original_loop_level_name); comp->directive_map.clear(); comp->is_unrolled = false; comp->unroll_factor.clear(); comp->unroll_dimension.clear(); comp->tile_map.clear(); comp->tile_size_map.clear(); comp->access_map.clear(); comp->final_loop_level_names.clear(); comp->final_loop_level_names = comp->final_loop_level_names_reserved; auto iterators = comp->get_iteration_variables(); int size = iterators.size(); std::map iterator_map; if(comp->temp_strategy.size()!=0) { comp->apply_opt_strategy(comp->temp_strategy); } else { auto iterators = comp->get_iteration_variables(); int size = iterators.size(); std::map iterator_map; for(auto &iter: iterators) { int loc = comp->get_loop_level_number_from_dimension_name(iter.get_name()); iterator_map[loc] = iter; } if(size >= 3) { comp->pipeline(iterator_map[size-3+2],1); for(auto &sub_comps: comp->components) { auto sub_comp = sub_comps.first; // TODO, right pipeline level if(sub_comp->after_level !=2) { sub_comp->pipeline(iterator_map[size-3+2],1); } } } else if(size == 2) { comp->pipeline(iterator_map[1],1); for(auto &sub_comps: comp->components){ auto sub_comp = sub_comps.first; // TODO, right pipeline level if(sub_comp->after_level !=1) { sub_comp->pipeline(iterator_map[1],1); } } } else if(size == 1) { comp->pipeline(iterator_map[0],1); } } } else{ // std::cout<< comp->get_name()+"evaluation initialization failed"<gen_loop_location(); this->gen_time_space_domain(); this->gen_isl_ast(); mlir::MLIRContext context; auto manager = polyfp::MLIRGenImpl(context); int level = 0; context.disableMultithreading(); context.getOrLoadDialect(); context.getOrLoadDialect(); context.getOrLoadDialect(); context.getOrLoadDialect(); context.getOrLoadDialect(); context.getOrLoadDialect(); manager.mlirGen1(*this,this->get_isl_ast(),level,true, false, false); for(auto &comp : this->leader_computations) { int index = this->leader_computation_index[comp]; int position = manager.start_loops_position[index]; //TODO: for(auto &comp : this->leader_computations) { for(auto &kv : comp->get_directive_map()) { if(kv.second == "pipeline") { int loc_2 = comp->get_loop_level_number_from_dimension_name(kv.first); int loc = comp->iterators_location_map[kv.first]; // index = loc + index; mlir::scalehls::setLoopDirective(manager.ops[loc], true, comp->II, false, false); for(int i=1; i<=loc_2; i++) { mlir::scalehls::setLoopDirective(manager.ops[loc-i], false, comp->II, false, true); } } } } } auto map = manager.get_argument_map(); mlir::scalehls::setTopFuncAttr(manager.get_funcs()[0]); mlir::scalehls::applyFuncPreprocess(manager.get_funcs()[0], true); for(auto &comp: this->leader_computations) { if(comp->is_unrolled == true) { for(int i=0; iunroll_dimension.size(); i++) { int loc = comp->iterators_location_map[comp->unroll_dimension[i].get_name()]; // loc = loc + bias; if(comp->unroll_factor[i] != -1) { mlir::loopUnrollUpToFactor(manager.ops[loc],comp->unroll_factor[i]); } else { mlir::loopUnrollFull(manager.ops[loc]); } } for(auto &sub_comps:comp->components) { auto sub_comp = sub_comps.first; for(int i=0; iunroll_dimension.size(); i++) { int loc = sub_comp->iterators_location_map[sub_comp->unroll_dimension[i].get_name()]; if(sub_comp->unroll_factor[i] != -1) { mlir::loopUnrollUpToFactor(manager.ops[loc],sub_comp->unroll_factor[i]); } else { mlir::loopUnrollFull(manager.ops[loc]); } } } } } mlir::scalehls::applyMemoryOpts(manager.get_funcs()[0]); mlir::scalehls::applyAutoArrayPartition(manager.get_funcs()[0]); SmallVector factors; std::string errorMessage; std::string pwd = std::filesystem::current_path().parent_path(); auto configFile = mlir::openInputFile(pwd+"/samples/config.json", &errorMessage); if (!configFile) { llvm::errs() << errorMessage << "\n"; } auto config = llvm::json::parse(configFile->getBuffer()); if (!config) { llvm::errs() << "failed to parse the target spec json file\n"; } auto configObj = config.get().getAsObject(); if (!configObj) { llvm::errs() << "support an object in the target spec json file, found " "something else\n"; } unsigned maxDspNum =ceil(configObj->getInteger("dsp").getValueOr(220)); this->dsp_max = maxDspNum; auto name = this->get_name(); // TODO: Vitis_HLS 2022.2 improved its scheduling methods // and two data paths of test_3mm can be executed in parallel. // Therefore, the actual DSP usage is twice that estimated by the cost model. // A profiler needs to be added to analyze the potential parallel datapath // and adjust the DSP usage accordingly. if(name.substr(0, 8) == "test_3mm") { this->dsp_max = this->dsp_max/2; } llvm::StringMap latencyMap; mlir::scalehls::getLatencyMap(configObj, latencyMap); llvm::StringMap dspUsageMap; mlir::scalehls::getDspUsageMap(configObj, dspUsageMap); int loc = 0; int total_dsp = 0; long total_latency = 0; if(manager.start_loops_position.size() == 0) { manager.start_loops_position.push_back(0); } bool consistent_flag_flag; for(auto &loop: manager.start_loops_position ) { mlir::scalehls::ScaleHLSEstimator(latencyMap, dspUsageMap, true).estimateLoop(manager.ops[loop],manager.funcs[0]); // manager.getModule().dump(); auto latency = mlir::scalehls::getTiming(manager.ops[loop]).getLatency(); // std::cout<<"latency: "+std::to_string(latency)<leader_computations[loc]->latency = latency; this->leader_computations[loc]->dsp = dspNum; this->leader_computations[loc]->minII = minII; if(this->leader_computations[loc]->best_latency>=latency) { this->leader_computations[loc]->best_latency = latency; } else { if(this->current_opt_comp->get_name()==this->leader_computations[loc]->get_name()) { consistent_flag_flag = true; } this->consistent_flag = false; } if(consistent_flag_flag==true) { this->consistent_flag = true; } // total_dsp+=dspNum; total_latency+=latency; // std::cout<<"total_latency: "+std::to_string(total_latency)<latency_map[loc] = latency; this->resource_map[loc] = dspNum; loc+=1; } mlir::scalehls::ScaleHLSEstimator(latencyMap, dspUsageMap, true).estimateFunc(manager.funcs[0]); total_dsp = mlir::scalehls::getResource(manager.funcs[0]).getDsp(); this->dsp_usage = total_dsp; this->current_latency = total_latency; // std::cout<<"current latency"+std::to_string(total_latency)<dsp_usage>this->dsp_max){ // this->new_strategy = false; // } // manager.getModule().dump(); } void polyfp::function::auto_DSE_tile_size(polyfp::compute *comp, int factor, std::string path) { // std::cout<<"Currently optimized compute: "<get_name()<set_schedule(comp->original_schedule); comp->set_loop_level_names(comp->original_loop_level_name); comp->directive_map.clear(); comp->is_unrolled = false; comp->unroll_factor.clear(); comp->unroll_dimension.clear(); comp->tile_map.clear(); comp->tile_size_map.clear(); comp->access_map.clear(); auto iterators = comp->get_iteration_variables(); std::vector temp_iterators; int temp_size = iterators.size(); if(temp_size>3) { int border = temp_size-3; for(auto &iter: iterators) { int loc = comp->get_loop_level_number_from_dimension_name(iter.get_name()); if(loc>=border) { temp_iterators.push_back(iter); } } iterators.clear(); iterators=temp_iterators; } std::vector dim_ranges; std::map> dim_tile_sizes; bool not_2_pow = false; int count = 0; for(auto &iter: iterators) { int lower = stoi(iter.get_lower().to_str()); int upper = stoi(iter.get_upper().to_str()); int range = upper-lower; dim_ranges.push_back(range); std::vector temp; if(range%32 != 0) { not_2_pow = true; for(int i=2; i iterator_map; int size = iterators.size(); scale = 16*pow(2,factor-1); for(auto &iter: iterators) { int loc = comp->get_loop_level_number_from_dimension_name(iter.get_name()); iterator_map[loc] = iter; } if(comp->is_optimized == true ) { if(comp->current_factor < comp->largest_factor && comp->opt_finished == false) { comp->current_factor+=1; factor = comp->current_factor; scale = 16*pow(2,comp->current_factor-1); } else{ this->finish_list.push_back(comp->get_name()); if(comp->current_strategy.size()!=0) { comp->final_strategy = comp->current_strategy; }else{ // TODO // std::cout<<"no final strategy"<leader_computations.size()!=1) { int path_index = this->get_longest_path(); std::vector current_longest_path = paths[path_index]; std::vector current_longest_path_latency; std::map current_longest_map; int num = current_longest_path.size(); for(int i=0; ilatency_map[current_longest_path[i]]; current_longest_path_latency.push_back(temp_latency); current_longest_map.insert(std::make_pair(temp_latency,current_longest_path[i])); } std::sort(current_longest_path_latency.begin(),current_longest_path_latency.end(),std::greater()); for(int i=0; ipath_map[path_index][node_index]; std::map::iterator it; polyfp::compute *comp; for( it= this->leader_computation_index.begin();it!=this->leader_computation_index.end();it++) { if(it->second==final_index) { comp = it->first; std::string name = comp->get_name(); if (std::find(finish_list.begin(), finish_list.end(), name) == finish_list.end()) { auto_DSE_tile_size(comp, 1,path); return; } } } } } return; } } else { comp->is_optimized = true; comp->current_factor = factor; } int factor1=1; int factor2=1; int factor3=1; std::vector> tilesize_list; std::vector current_design; std::vector final_design; // std::vector final_strategy; // std::vector current_strategy; // Print header row. std::string s = this->get_name(); std::string path1 = path+s+".csv"; std::ifstream ifs(path1,std::ios::in); char ch; ifs>>ch; std::ofstream myfile; myfile.open(path1,std::ios::app); if(ifs.eof()) { for (unsigned i = 0; i < size; ++i) { myfile << "l" << i << ","; } myfile << "cycle,dsp,ii\n"; } if(size >= 3) { // TODO, here 4 is desided by the scale if(not_2_pow == false) { //config: 5,3 for(int i = 0; i<5+factor; i++) { factor1 = pow(2,i); for(int j = 0; j<3+factor-i; j++) { factor2 = pow(2,j); factor3 = scale/factor2/factor1; tilesize_list.push_back({factor1,factor2,factor3}); // std::cout<<"tile factor: "; // std::cout< dim0 = dim_tile_sizes[0]; std::vector dim1 = dim_tile_sizes[1]; std::vector dim2 = dim_tile_sizes[2]; if(dim0.size()==0) { dim0.push_back(1); } if(dim1.size()==0) { dim1.push_back(1); } for(auto &size0: dim0) { for(auto &size1: dim1) { for(auto &size2: dim2) { tilesize_list.push_back({size0,size1,size2}); std::cout<<"tile factor: "; std::cout<current_factor=3; } bool larger_factor = true; if(larger_factor == true) { for(auto &tile_size: tilesize_list) { comp->set_schedule(comp->original_schedule); comp->set_loop_level_names(comp->original_loop_level_name); comp->directive_map.clear(); comp->is_unrolled = false; comp->unroll_factor.clear(); comp->unroll_dimension.clear(); comp->tile_map.clear(); comp->tile_size_map.clear(); comp->access_map.clear(); comp->opt_finished = false; var i0("i0"), j0("j0"),k0("k0"), i1("i1"), j1("j1"),k1("k1"); if(tile_size[0]<=3 && tile_size[1]<=16 && tile_size[2]<=16) { // if(tile_size[0]<=16 && tile_size[1]<32 && tile_size[2]<32){ // if(tile_size[0]<2 && tile_size[1]<4 && tile_size[2]<4){ int temp_index = comp->get_iteration_variables().size()-3; // std::cout<tile(iterator_map[temp_index],iterator_map[temp_index+1],iterator_map[temp_index+2],tile_size[0],tile_size[1],tile_size[2],i0, j0, k0, i1, j1, k1); } if(tile_size[2]!=1 && tile_size[1]!=1 && tile_size[0]!=1){ comp->pipeline(k0,1); comp->unroll(k1,-1); comp->unroll(j1,-1); comp->unroll(i1,-1); } if(tile_size[2]!=1 && tile_size[1]!=1 && tile_size[0]==1){ comp->pipeline(k0,1); comp->unroll(k1,-1); comp->unroll(j1,-1); } if(tile_size[2]!=1 && tile_size[1]==1 && tile_size[0]!=1){ comp->pipeline(k0,1); comp->unroll(k1,-1); comp->unroll(i1,-1); } if(tile_size[2]!=1 && tile_size[1]==1 && tile_size[0]==1){ comp->pipeline(k0,1); comp->unroll(k1,-1); // comp->unroll(i1,-1); } if(tile_size[2]==1 && tile_size[1]==1 && tile_size[0]==1){ int lower = stoi(iterator_map[temp_index+2].get_lower().to_str()); int upper = stoi(iterator_map[temp_index+2].get_upper().to_str()); int range = upper-lower; if(range<=7){ comp->pipeline(iterator_map[temp_index+1],1); comp->unroll(iterator_map[temp_index+2],-1); } } if(tile_size[2]==1 && tile_size[1]!=1 && tile_size[0]!=1){ int lower = stoi(iterator_map[temp_index+2].get_lower().to_str()); int upper = stoi(iterator_map[temp_index+2].get_upper().to_str()); int range = upper-lower; if(range<=6){ comp->pipeline(j0,1); comp->unroll(j1,-1); comp->unroll(i1,-1); comp->unroll(iterator_map[temp_index+2],-1); }else{ comp->pipeline(iterator_map[temp_index+2],1); comp->unroll(j1,-1); comp->unroll(i1,-1); } } for(auto &part:comp->components){ part.first->set_schedule(part.first->original_schedule); part.first->set_loop_level_names(part.first->original_loop_level_name); part.first->tile(iterator_map[temp_index+0],iterator_map[temp_index+1],iterator_map[temp_index+2],tile_size[0],tile_size[1],tile_size[2],i0, j0, k0, i1, j1, k1); if(tile_size[2]==1 && tile_size[1]!=1 && tile_size[0]!=1){ if(part.first->after_level == 2){ part.first->after(comp,j1); }else if(part.first->after_level == 0){ part.first->after(comp,i0); part.first->pipeline(iterator_map[temp_index+2],1); } // part.first->after(comp,j1); }else{ if(part.first->after_level == 2){ part.first->after(comp,k1); }else if(part.first->after_level == 0){ part.first->after(comp,iterator_map[temp_index+0]); part.first->pipeline(iterator_map[temp_index+2],1); //TODO part.first->unroll(k1,-1); part.first->unroll(j1,-1); } // part.first->after(comp,k1); } } int II = 1; this->current_opt_comp = comp; //TODO if(this->leader_computations.size() == -1){ this->evaluate_func(); if(this->current_latency < this->best_latency && this->dsp_max>= this->dsp_usage){ this->best_latency = this->current_latency; this->best_dsp_usage = this->dsp_usage; // std::cout<<"best_latency: "; // std::cout<dump_schedule(path); } }else { comp->temp_strategy = tile_size; this->evaluate_func(); auto latency = comp->latency; int dsp = comp->dsp; // std::cout<<"schedule: "+std::to_string(tile_size[0])+", "+std::to_string(tile_size[1])+", "+std::to_string(tile_size[2])+": "+std::to_string(latency)+": "+std::to_string(dsp)<update_latency(); // std::cout<<"after evaluation"<update_latency(); polyfp::compute * new_comp = NULL; if((this->current_latency < this->best_latency || this->consistent_flag == false) && this->dsp_max>=this->dsp_usage){ auto comp = this->update_latency(); int path_index = this->get_longest_path(); std::vector current_longest_path = paths[path_index]; std::vector current_longest_path_latency; std::map current_longest_map; int num = current_longest_path.size(); for(int i=0; ilatency_map[current_longest_path[i]]; current_longest_path_latency.push_back(temp_latency); current_longest_map.insert(std::make_pair(temp_latency,current_longest_path[i])); } std::sort(current_longest_path_latency.begin(),current_longest_path_latency.end(),std::greater()); bool comp_flag = false; for(int i=0; ipath_map[path_index][node_index]; // int final_index = current_longest_map[current_longest_path_latency[i]]; // std::cout<<"the final_index"+std::to_string(final_index); std::map::iterator it; polyfp::compute *comp1; for( it= this->leader_computation_index.begin();it!=this->leader_computation_index.end();it++) { if(it->second==final_index) { comp1 = it->first; std::string name = comp1->get_name(); if (std::find(finish_list.begin(), finish_list.end(), name) == finish_list.end()) { new_comp = comp1; comp_flag = true; break; } } } if(comp_flag == true) { break; } } if(new_comp == NULL) { return; } if(new_comp->get_name() != comp->get_name() && this->dsp_max>=this->dsp_usage) { this->best_latency = this->current_latency; final_design = tile_size; break; }else if(new_comp->get_name() == comp->get_name() &&this->current_latency < this->best_latency && this->dsp_max>= this->dsp_usage) { this->best_latency = this->current_latency; this->best_dsp_usage = this->dsp_usage; current_design = tile_size; long latency = comp->latency; int dsp = comp->dsp; }else{ // TODO } auto latency = comp->latency; int dsp = comp->dsp; } } auto latency = comp->latency; int dsp = comp->dsp; myfile << tile_size[0] << ","; myfile << tile_size[1] << ","; myfile << tile_size[2] << ","; myfile << latency<< ","; myfile << this->dsp_usage << ","; myfile << comp->minII << "\n"; } } if(final_design.size()!=0) { comp->final_strategy = final_design; comp->current_strategy = final_design; comp->apply_opt_strategy(comp->final_strategy); this->evaluate_func(); auto new_comp = this->update_latency(); auto_DSE_tile_size(new_comp, 1,path); } else if(current_design.size()!=0) { comp->current_strategy = current_design; comp->final_strategy = current_design; auto_DSE_tile_size(comp, 1,path); }else if(current_design.size()==0) { comp->opt_finished = true; auto_DSE_tile_size(comp, 1,path); } } myfile.close(); } else if(size == 2) { if(not_2_pow == false) { for(int j = 0; j<2+factor; j++) { factor1 = pow(2,j); factor2 = scale/factor1; tilesize_list.push_back({factor1,factor2}); } }else{ std::vector dim0 = dim_tile_sizes[0]; std::vector dim1 = dim_tile_sizes[1]; if(dim0.size()==0){ dim0.push_back(1); } for(auto &size0: dim0) { for(auto &size1: dim1) { tilesize_list.push_back({size0,size1}); } } comp->current_factor=3; } bool larger_factor = true; for(auto &tile_size: tilesize_list) { comp->set_schedule(comp->original_schedule); comp->set_loop_level_names(comp->original_loop_level_name); comp->directive_map.clear(); comp->is_unrolled = false; comp->unroll_factor.clear(); comp->unroll_dimension.clear(); comp->tile_map.clear(); comp->tile_size_map.clear(); comp->access_map.clear(); comp->opt_finished = false; var i0("i0"), j0("j0"), i1("i1"), j1("j1"); int lower1 = stoi(iterator_map[0].get_lower().to_str()); int upper1 = stoi(iterator_map[0].get_upper().to_str()); int range1 = upper1-lower1; int lower2 = stoi(iterator_map[1].get_lower().to_str()); int upper2 = stoi(iterator_map[1].get_upper().to_str()); int range2 = upper2-lower2; // if(tile_size[0]<=16 && tile_size[1]<=16){ if(tile_size[0]<32 && tile_size[1]<=32 && range1>tile_size[0] && range2>tile_size[1]) { // if(tile_size[0]<2 && tile_size[1]<4){ // for(auto &iter: comp) // std::cout<<"size1"<tile(iterator_map[0],iterator_map[1],tile_size[0],tile_size[1],i0, j0, i1, j1); if(tile_size[1]!=1&&tile_size[0]!=1) { comp->pipeline(j0,1); comp->unroll(j1,-1); comp->unroll(i1,-1); }else if(tile_size[1]==1&&tile_size[0]!=1) { comp->pipeline(iterator_map[1],1); comp->unroll(i1,-1); }else if(tile_size[0]==1&&tile_size[1]!=1) { comp->pipeline(j0,1); comp->unroll(j1,-1); } for(auto &part:comp->components) { part.first->set_schedule(part.first->original_schedule); part.first->set_loop_level_names(part.first->original_loop_level_name); part.first->directive_map.clear(); part.first->is_unrolled = false; part.first->unroll_factor.clear(); part.first->unroll_dimension.clear(); part.first->tile_map.clear(); part.first->tile_size_map.clear(); part.first->access_map.clear(); part.first->tile(iterator_map[0],iterator_map[1],tile_size[0],tile_size[1],i0, j0, i1, j1); if(tile_size[1]!=1&&tile_size[0]!=1) { if(part.first->after_level == 1) { part.first->after(comp,j1); }else if(part.first->after_level == 0) { part.first->pipeline(j0,1); part.first->after(comp,i0); // part.first->unroll(j1,-1); // part.first->unroll(i1,-1); } }else if(tile_size[1]==1&&tile_size[0]!=1) { if(part.first->after_level == 1) { part.first->after(comp,i1); }else if(part.first->after_level == 0) { part.first->after(comp,i0); part.first->pipeline(iterator_map[1],1); } // part.first->after(comp,i1); }else if(tile_size[0]==1&&tile_size[1]!=1) { if(part.first->after_level == 1) { // part.first->unroll(j1,-1); // std::cout<<"part.first->after(comp,j1); "<after(comp,j1); }else if(part.first->after_level == 0) { part.first->pipeline(j0,1); part.first->after(comp,iterator_map[0]); // std::cout<<"unroll dimension 2"<unroll(j1,-1); } } } this->current_opt_comp = comp; if(this->leader_computations.size() == -1) { this->evaluate_func(); if(this->current_latency <= this->best_latency && this->dsp_max>= this->dsp_usage) { this->best_latency = this->current_latency; this->best_dsp_usage = this->dsp_usage; this->dump_schedule(path); } if(this->dsp_max>this->dsp_usage) { larger_factor = true; // auto_DSE_tile_size(new_comp, factor); } }else { comp->temp_strategy = tile_size; this->evaluate_func(); long latency = comp->latency; int dsp = comp->dsp; polyfp::compute * new_comp = NULL; if(this->current_latency < this->best_latency && this->dsp_max>=this->dsp_usage) { auto comp = this->update_latency(); if(this->leader_computations.size()!=1) { int path_index = this->get_longest_path(); std::vector current_longest_path = paths[path_index]; std::vector current_longest_path_latency; std::map current_longest_map; int num = current_longest_path.size(); for(int i=0; ilatency_map[current_longest_path[i]]; current_longest_path_latency.push_back(temp_latency); current_longest_map.insert(std::make_pair(temp_latency,current_longest_path[i])); } std::sort(current_longest_path_latency.begin(),current_longest_path_latency.end(),std::greater()); bool comp_flag = false; for(int i=0; ipath_map[path_index][node_index]; std::map::iterator it; polyfp::compute *comp1; for( it= this->leader_computation_index.begin();it!=this->leader_computation_index.end();it++) { if(it->second==final_index) { comp1 = it->first; std::string name = comp1->get_name(); if (std::find(finish_list.begin(), finish_list.end(), name) == finish_list.end()){ new_comp = comp1; comp_flag = true; break; } } } if(comp_flag == true) { break; } } if(new_comp == NULL) { return; } if(new_comp->get_name() != comp->get_name() && this->dsp_max>=this->dsp_usage) { this->best_latency = this->current_latency; final_design = tile_size; break; }else if(new_comp->get_name() == comp->get_name() &&this->current_latency < this->best_latency && this->dsp_max>= this->dsp_usage) { this->best_latency = this->current_latency; this->best_dsp_usage = this->dsp_usage; current_design = tile_size; auto latency = comp->latency; int dsp = comp->dsp; }else{ // TODO } auto latency = comp->latency; int dsp = comp->dsp; }else{ new_comp = comp; if(new_comp->get_name() == comp->get_name() &&this->current_latency < this->best_latency && this->dsp_max>= this->dsp_usage) { this->best_latency = this->current_latency; this->best_dsp_usage = this->dsp_usage; current_design = tile_size; auto latency = comp->latency; int dsp = comp->dsp; }else { // TODO } auto latency = comp->latency; int dsp = comp->dsp; } } } auto latency = comp->latency; int dsp = comp->dsp; // TODO myfile << tile_size[0] << ","; myfile << tile_size[1] << ","; myfile << latency<< ","; myfile << this->dsp_usage << "\n"; } } if(final_design.size()!=0) { comp->final_strategy = final_design; comp->current_strategy = final_design; comp->apply_opt_strategy(comp->final_strategy); this->evaluate_func(); auto new_comp = this->update_latency(); auto_DSE_tile_size(new_comp, 1,path); }else if(current_design.size()!=0) { comp->current_strategy = current_design; auto_DSE_tile_size(comp, 1,path); }else if(current_design.size()==0||comp->current_factor == comp->largest_factor) { comp->opt_finished = true; auto_DSE_tile_size(comp, 1,path); } myfile.close(); } } bool cmp_value(const std::pair left, const std::pair right) { return left.second < right.second; } int polyfp::function::get_longest_path() { auto i= std::max_element(this->all_latency_map.begin(),this->all_latency_map.end(),cmp_value); return i->first; } int polyfp::function::get_longest_node(std::vector path) { long max_latency = 0; long index = 0; for(int j=0; jlatency_map[path[j]]){ max_latency = this->latency_map[path[j]]; index = j; } } // std::cout<<"longest node: "+std::to_string(max_latency)+";"+std::to_string(index)<paths.size(); i++) { std::string result = "Latency of path:"; long sum = 0; std::vector node_list; for(int j=0; jpaths[i].size(); j++) { result += std::to_string(this->latency_map[this->paths[i][j]]); result += ";"; sum+=this->latency_map[this->paths[i][j]]; node_list.push_back(this->paths[i][j]); } this->path_map.insert(std::make_pair(i,node_list)); result+=std::to_string(sum); this->all_latency_map[i] = sum; } // std::cout<<"this->all_latency_map.size()"<all_latency_map.size()<all_latency_map ){ // std::cout<all_latency_map.size()!=0) { int path_index = this->get_longest_path(); int node_index = this->get_longest_node(this->paths[path_index]); int final_index = this->path_map[path_index][node_index]; this->longest_path = path_index; this->longest_node = node_index; // std::cout<<"path: "; // std::cout<::iterator it; for( it= this->leader_computation_index.begin();it!=this->leader_computation_index.end();it++) { if(it->second==final_index) comp = it->first; } } else { comp = this->get_body()[0]; } return comp; } void polyfp::function::codegen() { for(auto &comp:this->get_body()) { std::vector current_name_list = comp->get_loop_level_names(); int final_size = comp->final_loop_level_names.size(); int current_size = current_name_list.size(); if(final_size == current_size) { comp->final_loop_level_names = current_name_list; comp->final_loop_level_names_reserved = current_name_list; }else if(final_size < current_size) { for(int i=0; ifinal_loop_level_names[i] = current_name_list[i]; comp->final_loop_level_names_reserved[i] = current_name_list[i]; } } } this->gen_loop_location(); this->gen_time_space_domain(); this->gen_isl_ast(); this->gen_c_code(); this->gen_mlir_stmt(); } void polyfp::function::gen_c_code() const { polyfp::str_dump("\n\n"); polyfp::str_dump("\nC like code:\n"); isl_printer *p; p = isl_printer_to_file(this->get_isl_ctx(), stdout); p = isl_printer_set_output_format(p, ISL_FORMAT_C); p = isl_printer_print_ast_node(p, this->get_isl_ast()); isl_printer_free(p); polyfp::str_dump("\n\n"); } } ================================================ FILE: lib/polyhedral/generator.cpp ================================================ #include "generator.h" #include #include #include #include "scalehls/Transforms/Passes.h" #include "scalehls/Transforms/Utils.h" #include "scalehls/Transforms/Estimator.h" #include "llvm/Support/MemoryBuffer.h" #include "mlir/Dialect/Affine/Analysis/Utils.h" #include "mlir/Dialect/Affine/IR/AffineValueMap.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "mlir/Support/FileUtilities.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Support/MemoryBuffer.h" namespace polyfp{ polyfp::compute *get_computation_annotated_in_a_node(isl_ast_node *node) { // Retrieve the computes of the node. isl_id *comp_id = isl_ast_node_get_annotation(node); polyfp::compute *comp = (polyfp::compute *)isl_id_get_user(comp_id); isl_id_free(comp_id); return comp; } std::map polyfp::MLIRGenImpl::get_argument_map(){ return this->argument_map; } std::map polyfp::MLIRGenImpl::get_array_map(){ return this->array_map; } std::vector polyfp::MLIRGenImpl::get_funcs(){ return this->funcs; } //TODO: find out what is the acutal "loc" int polyfp::MLIRGenImpl::get_iterator_location_from_name(polyfp::compute *comp, polyfp::expr polyfp_expr, std::vector &index_values) { auto name_set = comp->get_loop_level_names(); int loc; if (std::find(name_set.begin(), name_set.end(), polyfp_expr.get_name()) == name_set.end() ) { for (auto &kv2: comp->get_access_map()) { if(polyfp_expr.get_name()==kv2.first) { // loc = comp->get_loop_level_number_from_dimension_name(kv2.second); loc = comp->iterators_location_map[kv2.second]; } } mlir::Value value = ops[loc].getInductionVar(); if ( std::find(index_values.begin(), index_values.end(), value) == index_values.end()) { index_values.push_back(value); } loc = std::find(index_values.begin(), index_values.end(), value) - index_values.begin(); } else{ loc = comp->iterators_location_map[polyfp_expr.get_name()]; mlir::Value value = ops[loc].getInductionVar(); if ( std::find(index_values.begin(), index_values.end(), value) == index_values.end()) { index_values.push_back(value); } loc = std::find(index_values.begin(), index_values.end(), value) - index_values.begin(); } return loc; } mlir::ModuleOp polyfp::MLIRGenImpl::mlirGen1(const polyfp::function &fct, isl_ast_node *isl_node, int &level, bool flag, bool flag2, bool if_flag) { std::vector> generated_stmts; isl_ast_node *node=isl_node; if (isl_ast_node_get_type(node) == isl_ast_node_for) { isl_ast_expr *iter = isl_ast_node_for_get_iterator(node); isl_id *identifier = isl_ast_expr_get_id(iter); std::string name_str(isl_id_get_name(identifier)); name_map.insert(std::pair(level,name_str)); isl_ast_expr *init = isl_ast_node_for_get_init(node); // std::cout< lb_values; std::vector lb_args; std::vector ub_values; std::vector ub_args; isl_ast_expr *cond_upper = isl_ast_expr_get_op_arg(cond, 1); if (isl_ast_expr_get_type(init) == isl_ast_expr_int) { lb_int = isl_val_get_num_si(isl_ast_expr_get_val(init)); } else if (isl_ast_expr_get_type(init) == isl_ast_expr_op) { // std::cout<<"isl_ast_expr_op "<level) { index = start_loops_position[i-1]; break; } if(i == start_loops_position.size()-1 ) { index = start_loops_position[i]; break; } } for (auto &kv4: name_map) { if(name_str==kv4.second) { loc = kv4.first; } } mlir::Value value = ops[loc+index].getInductionVar(); lb_values.push_back(value); isl_id_free(identifier); } else { int nb = isl_ast_expr_get_op_n_arg(expr0); isl_ast_expr *expr0_0 = isl_ast_expr_get_op_arg(expr0, 0); isl_ast_expr *expr1_0 = isl_ast_expr_get_op_arg(expr0, 1); if (isl_ast_expr_get_type(expr0_0) == isl_ast_expr_id) { isl_id *identifier = isl_ast_expr_get_id(expr0_0); std::string name_str(isl_id_get_name(identifier)); int loc; int index = 0; for(int i=0; ilevel) { index = start_loops_position[i-1]; break; } if(i == start_loops_position.size()-1 ) { index = start_loops_position[i]; break; } } for (auto &kv4: name_map) { if(name_str==kv4.second) { loc = kv4.first; } } mlir::Value value = ops[loc+index].getInductionVar(); lb_values.push_back(value); isl_id_free(identifier); } if (isl_ast_expr_get_type(expr1) == isl_ast_expr_int) { div1 = isl_val_get_num_si(isl_ast_expr_get_val(expr1_0)); } } if (isl_ast_expr_get_type(expr1) == isl_ast_expr_int) { sub1 = isl_val_get_num_si(isl_ast_expr_get_val(expr1)); } else{ // TODO } //TODO: find right dimensions lb_args.push_back(builder.getAffineDimExpr(0).floorDiv(div1) - sub1); } else if(isl_ast_expr_get_op_type(expr_itr) == isl_ast_expr_op) { // std::cout<<"a division"< (lb_args),builder.getContext()); mlir::ValueRange lb_vr=llvm::makeArrayRef(lb_values); if (isl_ast_expr_get_type(cond_upper) == isl_ast_expr_int) { ub_int = isl_val_get_num_si(isl_ast_expr_get_val(cond_upper))+1; } else if (isl_ast_expr_get_type(cond_upper) == isl_ast_expr_op) { // std::cout<<"upper bound"<level) { index = start_loops_position[i-1]; break; } if(i == start_loops_position.size()-1 ) { index = start_loops_position[i]; break; } } mlir::Value value = ops[loc+index].getInductionVar(); ub_values.push_back(value); isl_id_free(identifier); } if (isl_ast_expr_get_type(expr1) == isl_ast_expr_int) { add1 = isl_val_get_num_si(isl_ast_expr_get_val(expr1))+1; } //TODO: find right dimensions ub_args.push_back(builder.getAffineDimExpr(0) - add1); } else if (isl_ast_expr_get_type(expr_itr) == isl_ast_expr_id) { isl_id *identifier = isl_ast_expr_get_id(expr_itr); std::string name_str(isl_id_get_name(identifier)); int loc; for (auto &kv4: name_map) { if(name_str==kv4.second) { loc = kv4.first; } } int index = 0; for(int i=0; ilevel) { index = start_loops_position[i-1]; break; } if(i == start_loops_position.size()-1 ) { index = start_loops_position[i]; break; } } mlir::Value value = ops[loc+index].getInductionVar(); ub_values.push_back(value); isl_id_free(identifier); ub_args.push_back(builder.getAffineDimExpr(0)); } else if(isl_ast_expr_get_type(expr_itr) == isl_ast_expr_op) { int nb = isl_ast_expr_get_op_n_arg(expr_itr); isl_ast_expr *expr0_0 = isl_ast_expr_get_op_arg(expr_itr, 0); isl_ast_expr *expr1_0 = isl_ast_expr_get_op_arg(expr_itr, 1); if (isl_ast_expr_get_type(expr0_0) == isl_ast_expr_id) { isl_id *identifier = isl_ast_expr_get_id(expr0_0); std::string name_str(isl_id_get_name(identifier)); int loc; int index = 0; for(int i=0; ilevel) { index = start_loops_position[i-1]; break; } if(i == start_loops_position.size()-1 ) { index = start_loops_position[i]; break; } } for (auto &kv4: name_map) { if(name_str==kv4.second) { loc = kv4.first; } } mlir::Value value = ops[loc+index].getInductionVar(); ub_values.push_back(value); isl_id_free(identifier); } if (isl_ast_expr_get_type(expr1_0) == isl_ast_expr_int) { div1 = isl_val_get_num_si(isl_ast_expr_get_val(expr1_0)); } ub_args.push_back(builder.getAffineDimExpr(0).floorDiv(div1)); } else { polyfp::str_dump("Transforming the following expression", (const char *)isl_ast_expr_to_C_str(expr_itr)); } isl_ast_expr_free(expr_itr); } } auto ub_map = mlir::AffineMap::get(1, 0, ArrayRef (ub_args),builder.getContext()); mlir::ValueRange ub_vr=llvm::makeArrayRef(ub_values); int step = isl_val_get_num_si(isl_ast_expr_get_val(inc)); if (isl_ast_node_get_type(node) == isl_ast_node_for) { if(level== 0) { start_loops_position.push_back(level); std::vector types; std::string name = polyfp::global::get_implicit_function()->get_name(); mlir::Location loc = builder.getUnknownLoc(); auto varLoc = loc; llvm::SmallVector operandTypes; llvm::SmallVector operandTypes_temp; llvm::SmallVector operandTypes_arg; mlir::Type t; mlir::MemRefType mr; for (auto &kv : fct.get_invariants()) { //TODO: more datatype if(kv.second->get_type() == polyfp::p_float64) { t= builder.getF64Type(); } if(kv.second->get_type() == polyfp::p_float32) { t= builder.getF32Type(); } if(kv.second->get_type() == polyfp::p_int32) { t= builder.getIntegerType(32); } operandTypes.push_back(t); argument_list.push_back(kv.first); operandTypes_temp.push_back(mr); } std::vector p_names; for (auto &kv : fct.get_fct_arguments()) { unsigned memspace = 0; auto size = kv.second->get_dim_sizes(); auto arg_type = kv.second->get_elements_type(); p_names.push_back(kv.first); if (arg_type == polyfp::p_uint8 || arg_type == polyfp::p_int8) { t= builder.getIntegerType(8); }else if(arg_type == polyfp::p_uint16 || arg_type == polyfp::p_int16) { t= builder.getIntegerType(16); }else if(arg_type == polyfp::p_uint32 || arg_type == polyfp::p_int32) { t= builder.getIntegerType(32); }else if(arg_type == polyfp::p_uint64 || arg_type == polyfp::p_int64) { t= builder.getIntegerType(64); }else if(arg_type == polyfp::p_float32) { t= builder.getF32Type(); }else if(arg_type == polyfp::p_float64) { t= builder.getF64Type(); } mr = mlir::MemRefType::get(llvm::makeArrayRef(size), t, {}, memspace); operandTypes.push_back(mr); argument_list.push_back(kv.first); array_map.insert(std::make_pair(kv.first,operandTypes.size()-1)); } for (auto &kv : fct.get_placeholders()) { unsigned memspace = 0; auto size = kv.second->get_dim_sizes(); auto arg_type = kv.second->get_elements_type(); // p_names.push_back(kv.first); if (arg_type == polyfp::p_uint8 || arg_type == polyfp::p_int8) { t= builder.getIntegerType(8); }else if(arg_type == polyfp::p_uint16 || arg_type == polyfp::p_int16) { t= builder.getIntegerType(16); }else if(arg_type == polyfp::p_uint32 || arg_type == polyfp::p_int32) { t= builder.getIntegerType(32); }else if(arg_type == polyfp::p_uint64 || arg_type == polyfp::p_int64) { t= builder.getIntegerType(64); }else if(arg_type == polyfp::p_float32) { t= builder.getF32Type(); }else if(arg_type == polyfp::p_float64) { t= builder.getF64Type(); } mr = mlir::MemRefType::get(llvm::makeArrayRef(size), t, {}, memspace); // auto op = builder.create(loc, mr); // op.dump(); operandTypes_temp.push_back(mr); // argument_list.push_back(kv.first); // array_map.insert(std::make_pair(kv.first,operandTypes_temp.size()-1)); } for (auto &kv : fct.get_global_arguments()) { unsigned memspace = 0; auto size = kv.second->get_dim_sizes(); auto arg_type = kv.second->get_elements_type(); p_names.push_back(kv.first); if (arg_type == polyfp::p_uint8 || arg_type == polyfp::p_int8) { t= builder.getIntegerType(8); }else if(arg_type == polyfp::p_uint16 || arg_type == polyfp::p_int16) { t= builder.getIntegerType(16); }else if(arg_type == polyfp::p_uint32 || arg_type == polyfp::p_int32) { t= builder.getIntegerType(32); }else if(arg_type == polyfp::p_uint64 || arg_type == polyfp::p_int64) { t= builder.getIntegerType(64); }else if(arg_type == polyfp::p_float32) { t= builder.getF32Type(); }else if(arg_type == polyfp::p_float64) { t= builder.getF64Type(); } mr = mlir::MemRefType::get(llvm::makeArrayRef(size), t, {}, memspace); // auto op = builder.create(loc, mr); // op.dump(); operandTypes_arg.push_back(mr); argument_list.push_back(kv.first); array_map.insert(std::make_pair(kv.first,operandTypes_temp.size()-1)); } if(flag ==true) { mlir::FuncOp myFunc = mlir::FuncOp::create(loc, /*name=*/name, /*type=*/builder.getFunctionType(operandTypes, {}), /*attrs=*/{}); auto &entryBlock = *myFunc.addEntryBlock(); builder.setInsertionPointToStart(&entryBlock); funcs.push_back(myFunc); theModule.push_back(myFunc); int index_pname = 0; int index_pname2 = 0; for(auto &arg: operandTypes_arg) { mlir::MemRefType mr = arg.dyn_cast(); auto value = builder.create(loc, mr); values.push_back(value); } for(auto &p_name: p_names) { if(index_pname<=fct.get_fct_arguments().size()) { auto mem = myFunc.getArgument(index_pname); index_pname+=1; argument_map.insert(std::pair(p_name,mem)); } else { auto mem = values[index_pname2]; index_pname2+=1; argument_map.insert(std::pair(p_name,mem)); } } // theModule.dump(); // mlir::MemRefType mr = operandTypes_temp[0].dyn_cast(); // auto op = builder.create(loc, mr); // for(auto &) //example of builder.getI16IntegerAttr(5) // mlir::Value arg1 = builder.create(builder.getUnknownLoc(),t, builder.getF32ArrayAttr(3.0)); // mlir::Value arg2 = builder.create(builder.getUnknownLoc(),t, builder.getF32ArrayAttr(6.7)); if(vbound_flag == false) { auto loop = builder.create(builder.getUnknownLoc(), lb_int, ub_int, step); ops.push_back(loop); builder.setInsertionPointAfter(ops[0]); auto return_op = builder.create(builder.getUnknownLoc(), ArrayRef()); builder.setInsertionPointToStart(loop.getBody()); }else { auto loop = builder.create(builder.getUnknownLoc(), lb_vr, lb_map, ub_vr, ub_map, step); ops.push_back(loop); builder.setInsertionPointAfter(ops[0]); auto return_op = builder.create(builder.getUnknownLoc(), ArrayRef()); builder.setInsertionPointToStart(loop.getBody()); } }else{ if(vbound_flag == false) { auto loop = builder.create(builder.getUnknownLoc(), lb_int, ub_int, step); ops.push_back(loop); builder.setInsertionPointAfter(ops[0]); builder.setInsertionPointToStart(loop.getBody()); } else { auto loop = builder.create(builder.getUnknownLoc(), lb_vr, lb_map, ub_vr, ub_map, step); ops.push_back(loop); builder.setInsertionPointAfter(ops[0]); builder.setInsertionPointToStart(loop.getBody()); } } } else { if(vbound_flag == false) { auto loop = builder.create(builder.getUnknownLoc(), lb_int, ub_int, step); ops.push_back(loop); builder.setInsertionPointAfter(ops[0]); builder.setInsertionPointToStart(loop.getBody()); } else { auto loop = builder.create(builder.getUnknownLoc(), lb_vr, lb_map, ub_vr, ub_map, step); ops.push_back(loop); builder.setInsertionPointAfter(ops[0]); builder.setInsertionPointToStart(loop.getBody()); } } } isl_ast_expr_free(init); isl_ast_expr_free(cond); isl_ast_expr_free(inc); isl_ast_node_free(body); isl_ast_expr_free(cond_upper); if (isl_ast_node_get_type(body) == isl_ast_node_for) { level = level+1; mlirGen1(fct,body,level,false,flag2,if_flag); } if (isl_ast_node_get_type(body) == isl_ast_node_user) { mlirGen1(fct,body,level,false,flag2,if_flag); } else if(isl_ast_node_get_type(body) == isl_ast_node_block) { mlirGen1(fct,body,level,false,flag2,if_flag); } else if(isl_ast_node_get_type(body) == isl_ast_node_if) { // TODO } else{ // TODO } } else if (isl_ast_node_get_type(node) == isl_ast_node_block) { // std::cout<<"enter a block node"<ops.size() == 0) { for (int i = 0; i <=isl_ast_node_list_n_ast_node(list) - 1; i++) { isl_ast_node *child = isl_ast_node_list_get_ast_node(list, i); if (isl_ast_node_get_type(child) == isl_ast_node_user) { mlirGen1(fct,child,current_level,false,true,if_flag); } else if (isl_ast_node_get_type(child) == isl_ast_node_for) { if(this->ops.size() != 0) { level = level + 1 ; int current_level = level; start_loops_position.push_back(level); mlirGen1(fct,child,level,false,true,if_flag); builder.setInsertionPointAfter(ops[current_level]); } else{ // start_loops_position.push_back(level); mlirGen1(fct,child,level,true, false, false); builder.setInsertionPointAfter(ops[current_level]); } } else if (isl_ast_node_get_type(child) == isl_ast_node_block) { mlirGen1(fct,child,level,false,true,if_flag); }else if (isl_ast_node_get_type(child) == isl_ast_node_if) { mlirGen1(fct,child,level,false,true,if_flag); }else{ // TODO } } } else { for (int i = 0; i <=isl_ast_node_list_n_ast_node(list) - 1; i++) { isl_ast_node *child = isl_ast_node_list_get_ast_node(list, i); if (isl_ast_node_get_type(child) == isl_ast_node_user) { mlirGen1(fct,child,current_level,false,true,if_flag); } else if (isl_ast_node_get_type(child) == isl_ast_node_for) { if(this->ops.size() != 0) { level = level + 1 ; int current_level = level; // start_loops_position.push_back(level); mlirGen1(fct,child,level,false,true,if_flag); builder.setInsertionPointAfter(ops[current_level]); } else { // start_loops_position.push_back(level); mlirGen1(fct,child,level,true, false, false); builder.setInsertionPointAfter(ops[current_level]); } } else if (isl_ast_node_get_type(child) == isl_ast_node_block) { mlirGen1(fct,child,level,false,true,if_flag); } else if (isl_ast_node_get_type(child) == isl_ast_node_if) { mlirGen1(fct,child,level,false,true,if_flag); } else{ // TODO } } } } else if (isl_ast_node_get_type(node) == isl_ast_node_user) { bool flag = true; isl_ast_expr *expr = isl_ast_node_user_get_expr(node); isl_ast_expr *arg = isl_ast_expr_get_op_arg(expr, 0); isl_id *id = isl_ast_expr_get_id(arg); std::string computation_name(isl_id_get_name(id)); isl_id_free(id); polyfp::compute *comp; int dim_number = 0; for (const auto &cpt : fct.get_body()) { if(cpt->get_name()==computation_name) { comp = cpt; } if(dim_number < cpt->get_loop_levels_number()) dim_number = cpt->get_loop_levels_number(); } auto polyfp_expr = comp->get_expr(); std::string p_name = comp->get_placeholder()->get_name(); int index_placeholder; int index_argument; for(int i=0; i placeholder_index_values; SmallVector placeholder_index_args; bool placeholder_index_flag = false; int count = 0; for (auto &kv: comp->get_placeholder_dims()) { int bias = 0; if(kv.get_expr_type() == polyfp::e_op) { //TODO, HANDLE loop skewing auto expr0 = kv.get_operand(0); auto expr1 = kv.get_operand(1); auto left_index = a_print_index(expr0,comp,placeholder_index_values,level); auto right_index = a_print_index(expr1,comp,placeholder_index_values,level); if(kv.get_op_type() == polyfp::o_sub) { placeholder_index_args.push_back(left_index - right_index); placeholder_index_flag = true; } else if(kv.get_op_type() == polyfp::o_add) { placeholder_index_args.push_back(left_index + right_index); placeholder_index_flag = true; } else if(kv.get_op_type() == polyfp::o_mul) { placeholder_index_args.push_back(left_index * right_index); placeholder_index_flag = true; } else if(kv.get_op_type() == polyfp::o_div) { placeholder_index_args.push_back(left_index.floorDiv(right_index)); placeholder_index_flag = true; } } else{ int loc =0; int loc_2 =0; std::string tile_name1; std::string tile_name2; int tile_size; auto name_set = comp->get_loop_level_names(); // std::cout<get_access_map()) { if(kv.get_name()==kv2.first) { tile_name1 = kv2.second; loc = comp->iterators_location_map[tile_name1]; } } mlir::Value value = ops[loc].getInductionVar(); if ( std::find(placeholder_index_values.begin(), placeholder_index_values.end(), value)== placeholder_index_values.end() ) { placeholder_index_values.push_back(value); } if(comp->is_tiled == true) { for (auto &kv3: comp->get_tile_map()) { if(tile_name1==kv3.first) { // loc_2 = comp->get_loop_level_number_from_dimension_name(kv3.second); loc = comp->iterators_location_map[kv3.second]; tile_name2 = kv3.second; } } for (auto &kv4: comp->get_tile_size_map()) { if(tile_name1==kv4.first) { tile_size = kv4.second; } } mlir::Value value2 = ops[loc].getInductionVar(); if ( std::find(placeholder_index_values.begin(), placeholder_index_values.end(), value2)== placeholder_index_values.end() ) { placeholder_index_values.push_back(value2); } //TODO: find the right dim int index_2 = std::find(placeholder_index_values.begin(), placeholder_index_values.end(), value2) - placeholder_index_values.begin(); int index_3 = std::find(placeholder_index_values.begin(), placeholder_index_values.end(), value) - placeholder_index_values.begin(); placeholder_index_args.push_back(builder.getAffineDimExpr(index_3)+builder.getAffineDimExpr(index_2)*tile_size); placeholder_index_flag = true; } else if(comp->is_skewed == true) { loc = comp->iterators_location_map[comp->iterator_to_skew]; mlir::Value value2 = ops[loc].getInductionVar(); if ( std::find(placeholder_index_values.begin(), placeholder_index_values.end(), value2)== placeholder_index_values.end() ) { placeholder_index_values.push_back(value2); } //TODO: find the right dim int index_2 = std::find(placeholder_index_values.begin(), placeholder_index_values.end(), value2) - placeholder_index_values.begin(); int index_3 = std::find(placeholder_index_values.begin(), placeholder_index_values.end(), value) - placeholder_index_values.begin(); // placeholder_index_args.push_back(builder.getAffineDimExpr(index_3)-builder.getAffineDimExpr(index_2)*2); // placeholder_index_args.push_back(builder.getAffineDimExpr(index_3)); if(tile_name1 == comp->iterator_to_modify) { placeholder_index_args.push_back(builder.getAffineDimExpr(index_3)-builder.getAffineDimExpr(index_2)*comp->skew_factor); }else { placeholder_index_args.push_back(builder.getAffineDimExpr(index_3)); } placeholder_index_flag = true; } } else{ loc = comp->iterators_location_map[kv.get_name()]; mlir::Value value = ops[loc].getInductionVar(); if (std::find(placeholder_index_values.begin(), placeholder_index_values.end(), value) == placeholder_index_values.end()) { placeholder_index_values.push_back(value); } //TODO: int index2 = std::find(placeholder_index_values.begin(), placeholder_index_values.end(), value) - placeholder_index_values.begin(); placeholder_index_args.push_back(builder.getAffineDimExpr(index2)); } } } auto placeholder_map = mlir::AffineMap::get(placeholder_index_values.size(), 0, ArrayRef (placeholder_index_args),builder.getContext()); mlir::ValueRange placeholder_vr=llvm::makeArrayRef(placeholder_index_values); mlir::Value mem; if(index_placeholder+1<=funcs[0].getNumArguments()) { mem = funcs[0].getArgument(index_placeholder); argument_map.insert(std::pair(p_name,mem)); } else{ mem = values[index_placeholder-funcs[0].getNumArguments()]; argument_map.insert(std::pair(p_name,mem)); } if (polyfp_expr.get_expr_type() == polyfp::e_var) { int index_argument; std::string arg_name = polyfp_expr.get_name(); for(int i=0; i ifOperands; ifOperands.push_back(value); SmallVector ifExprs; ifExprs.push_back(builder.getAffineDimExpr(0)); SmallVector ifEqFlags; ifEqFlags.push_back(true); const auto condition = mlir::IntegerSet::get(1, 0, ifExprs, ifEqFlags); auto ifOp =builder.create(builder.getUnknownLoc(), condition, ifOperands,/*withElseRegion=*/false); builder.setInsertionPointToStart(ifOp.getBody()); auto store1 = builder.create(builder.getUnknownLoc(), arg_1, mem, placeholder_vr); builder.setInsertionPointAfter(ifOp); } else { if(placeholder_index_flag == true) { auto store1 = builder.create(builder.getUnknownLoc(), arg_1, mem, placeholder_map, placeholder_vr); builder.setInsertionPointAfter(store1); }else { auto store1 = builder.create(builder.getUnknownLoc(), arg_1, mem, placeholder_vr); builder.setInsertionPointAfter(store1); } } } else if (polyfp_expr.get_expr_type() == polyfp::e_op && polyfp_expr.get_op_type() != polyfp::o_access && polyfp_expr.get_op_type() != polyfp::o_max ) { mlir::ValueRange indices = {}; auto a = polyfp_expr.get_operand(0); auto b = polyfp_expr.get_operand(1); mlir::BlockArgument left; mlir::BlockArgument right; mlir::arith::MulFOp allocSize_m; mlir::arith::AddFOp allocSize_a; // theModule.dump(); a_print_expr(polyfp_expr, comp, level); if(if_flag == true) { mlir::Value value = ops[2].getInductionVar(); SmallVector ifOperands; ifOperands.push_back(value); SmallVector ifExprs; ifExprs.push_back(builder.getAffineDimExpr(0)); SmallVector ifEqFlags; ifEqFlags.push_back(true); const auto condition = mlir::IntegerSet::get(1, 0, ifExprs, ifEqFlags); auto ifOp =builder.create(builder.getUnknownLoc(), condition, ifOperands,/*withElseRegion=*/false); builder.setInsertionPointToStart(ifOp.getBody()); //TODO: other arithmetic? sub, o_div auto the_op = all_current_op.back(); auto index = the_op.index(); if(index==0) { auto op_to_process = std::get<0>(the_op); auto store1 = builder.create(builder.getUnknownLoc(), op_to_process, mem, placeholder_vr); } else if(index==1) { auto op_to_process = std::get<1>(the_op); auto store1 = builder.create(builder.getUnknownLoc(), op_to_process, mem, placeholder_vr); }else if(index==2) { auto op_to_process = std::get<2>(the_op); auto store1 = builder.create(builder.getUnknownLoc(), op_to_process, mem, placeholder_vr); }else if(index==3) { auto op_to_process = std::get<3>(the_op); auto store1 = builder.create(builder.getUnknownLoc(), op_to_process, mem, placeholder_vr); } builder.setInsertionPointAfter(ifOp); }else{ auto the_op = all_current_op.back(); auto index = the_op.index(); if(index==0) { auto op_to_process = std::get<0>(the_op); if(placeholder_index_flag == true) { auto store1 = builder.create(builder.getUnknownLoc(), op_to_process, mem, placeholder_map, placeholder_vr); builder.setInsertionPointAfter(store1); }else{ auto store1 = builder.create(builder.getUnknownLoc(), op_to_process, mem, placeholder_vr); builder.setInsertionPointAfter(store1); } }else if(index==1) { auto op_to_process = std::get<1>(the_op); if(placeholder_index_flag == true) { auto store1 = builder.create(builder.getUnknownLoc(), op_to_process, mem, placeholder_map, placeholder_vr); builder.setInsertionPointAfter(store1); }else { auto store1 = builder.create(builder.getUnknownLoc(), op_to_process, mem, placeholder_vr); builder.setInsertionPointAfter(store1); } }else if(index==2) { auto op_to_process = std::get<2>(the_op); if(placeholder_index_flag == true) { auto store1 = builder.create(builder.getUnknownLoc(), op_to_process, mem, placeholder_map, placeholder_vr); builder.setInsertionPointAfter(store1); } else { auto store1 = builder.create(builder.getUnknownLoc(), op_to_process, mem, placeholder_vr); builder.setInsertionPointAfter(store1); } }else if(index==3) { auto op_to_process = std::get<3>(the_op); if(placeholder_index_flag == true) { auto store1 = builder.create(builder.getUnknownLoc(), op_to_process, mem, placeholder_map, placeholder_vr); builder.setInsertionPointAfter(store1); } else { auto store1 = builder.create(builder.getUnknownLoc(), op_to_process, mem, placeholder_vr); builder.setInsertionPointAfter(store1); } } } } else if(polyfp_expr.get_expr_type() == polyfp::e_op && polyfp_expr.get_op_type() == polyfp::o_access ) { std::string a_name = polyfp_expr.get_name(); int index_a; // if(comp->refused == true){ // a_name = comp->temp_access_map[a_name]; // } for(int i=0; i index_values; SmallVector index_args; bool index_flag = false; int count = 0; for (auto &kv: polyfp_expr.get_access()) { int bias = 0; if(kv.get_expr_type() == polyfp::e_op) { auto expr0 = kv.get_operand(0); auto expr1 = kv.get_operand(1); auto left_index = a_print_index(expr0,comp,index_values,level); auto right_index = a_print_index(expr1,comp,index_values,level); if(kv.get_op_type() == polyfp::o_sub) { index_args.push_back(left_index - right_index); index_flag = true; }else if(kv.get_op_type() == polyfp::o_add) { index_args.push_back(left_index + right_index); index_flag = true; }else if(kv.get_op_type() == polyfp::o_mul) { index_args.push_back(left_index * right_index); index_flag = true; }else if(kv.get_op_type() == polyfp::o_div) { index_args.push_back(left_index.floorDiv(right_index)); index_flag = true; } } else { int loc =0; int loc_2 =0; std::string tile_name1; std::string tile_name2; int tile_size; auto name_set = comp->get_loop_level_names(); //MOD3 auto t_name = kv.get_name(); if(comp->refused == true) { t_name = comp->temp_access_map[t_name]; } if ( std::find(name_set.begin(), name_set.end(), t_name) == name_set.end() ) { for (auto &kv2: comp->get_access_map()) { if(t_name==kv2.first){ tile_name1 = kv2.second; loc = comp->iterators_location_map[tile_name1]; // loc = comp->get_loop_level_number_from_dimension_name(kv2.second); } } mlir::Value value = ops[loc].getInductionVar(); if ( std::find(index_values.begin(), index_values.end(), value)== index_values.end() ) { index_values.push_back(value); } if(comp->is_tiled ==true) { for (auto &kv3: comp->get_tile_map()) { if(tile_name1==kv3.first) { loc = comp->iterators_location_map[kv3.second]; // loc = comp->get_loop_level_number_from_dimension_name(kv3.second); tile_name2 = kv3.second; } } for (auto &kv4: comp->get_tile_size_map()) { if(tile_name1==kv4.first){ tile_size = kv4.second; } } mlir::Value value2 = ops[loc].getInductionVar(); if ( std::find(index_values.begin(), index_values.end(), value2)== index_values.end() ) { index_values.push_back(value2); } //TODO int index_2 = std::find(index_values.begin(), index_values.end(), value2) - index_values.begin(); int index_3 = std::find(index_values.begin(), index_values.end(), value) - index_values.begin(); index_args.push_back(builder.getAffineDimExpr(index_3)+builder.getAffineDimExpr(index_2)*tile_size); } else if(comp->is_skewed == true) { loc = comp->iterators_location_map[comp->iterator_to_skew]; mlir::Value value2 = ops[loc].getInductionVar(); if ( std::find(index_values.begin(), index_values.end(), value2)== index_values.end() ) { index_values.push_back(value2); } //TODO: find the right dim int index_2 = std::find(index_values.begin(), index_values.end(), value2) - index_values.begin(); int index_3 = std::find(index_values.begin(), index_values.end(), value) - index_values.begin(); // placeholder_index_args.push_back(builder.getAffineDimExpr(index_3)-builder.getAffineDimExpr(index_2)*2); // placeholder_index_args.push_back(builder.getAffineDimExpr(index_3)); if(tile_name1 == comp->iterator_to_modify) { index_args.push_back(builder.getAffineDimExpr(index_3)-builder.getAffineDimExpr(index_2)*comp->skew_factor); } else { index_args.push_back(builder.getAffineDimExpr(index_3)); } index_flag = true; } else { index_args.push_back(builder.getAffineDimExpr(loc)); } index_flag = true; } else { if(comp->refused == true) { t_name = comp->temp_access_map[t_name]; } else { loc = comp->iterators_location_map[kv.get_name()]; } // loc = comp->get_loop_level_number_from_dimension_name(kv.get_name()); int index = 0; for(int i=0; ilevel&&start_loops_position[i-1]<=level) { index = start_loops_position[i-1]; break; } if(i == start_loops_position.size()-1 ) { index = start_loops_position[i]; break; } } mlir::Value value = ops[loc].getInductionVar(); if ( std::find(index_values.begin(), index_values.end(), value) == index_values.end()) { index_values.push_back(value); } //TODO int index_1 = std::find(index_values.begin(), index_values.end(), value) - index_values.begin(); index_args.push_back(builder.getAffineDimExpr(index_1)); } } } auto map = mlir::AffineMap::get(index_values.size(), 0, ArrayRef (index_args),builder.getContext()); mlir::ValueRange vr=llvm::makeArrayRef(index_values); //TODO.number of variables mlir::AffineLoadOp a; if(index_flag == true) { a = builder.create(builder.getUnknownLoc(), arg_a,map,vr); } else { a = builder.create(builder.getUnknownLoc(), arg_a ,vr); } if(if_flag == true) { mlir::Value value = ops[2].getInductionVar(); SmallVector ifOperands; ifOperands.push_back(value); SmallVector ifExprs; ifExprs.push_back(builder.getAffineDimExpr(0)); SmallVector ifEqFlags; ifEqFlags.push_back(true); const auto condition = mlir::IntegerSet::get(1, 0, ifExprs, ifEqFlags); auto ifOp =builder.create(builder.getUnknownLoc(), condition, ifOperands,/*withElseRegion=*/false); builder.setInsertionPointToStart(ifOp.getBody()); auto store1 = builder.create(builder.getUnknownLoc(), a, mem, vr); builder.setInsertionPointAfter(ifOp); } else { if(placeholder_index_flag == true) { auto store1 = builder.create(builder.getUnknownLoc(), a, mem, placeholder_map, placeholder_vr); } else { auto store1 = builder.create(builder.getUnknownLoc(), a, mem, placeholder_vr); } } } else if(polyfp_expr.get_op_type() == polyfp::o_max) { mlir::ValueRange indices = {}; auto a = polyfp_expr.get_operand(0); auto b = polyfp_expr.get_operand(1); mlir::BlockArgument left; mlir::BlockArgument right; // mlir::arith::MulFOp allocSize_m; // mlir::arith::AddFOp allocSize_a; // theModule.dump(); a_print_expr(polyfp_expr, comp, level); if(if_flag == true) { mlir::Value value = ops[2].getInductionVar(); SmallVector ifOperands; ifOperands.push_back(value); SmallVector ifExprs; ifExprs.push_back(builder.getAffineDimExpr(0)); SmallVector ifEqFlags; ifEqFlags.push_back(true); const auto condition = mlir::IntegerSet::get(1, 0, ifExprs, ifEqFlags); auto ifOp =builder.create(builder.getUnknownLoc(), condition, ifOperands,/*withElseRegion=*/false); builder.setInsertionPointToStart(ifOp.getBody()); //TODO: other arithmetic? sub, o_div auto the_op = all_current_op.back(); auto index = the_op.index(); if(index==0){ auto op_to_process = std::get<0>(the_op); auto store1 = builder.create(builder.getUnknownLoc(), op_to_process, mem, placeholder_vr); }else if(index==1){ auto op_to_process = std::get<1>(the_op); auto store1 = builder.create(builder.getUnknownLoc(), op_to_process, mem, placeholder_vr); }else if(index==2){ auto op_to_process = std::get<2>(the_op); auto store1 = builder.create(builder.getUnknownLoc(), op_to_process, mem, placeholder_vr); }else if(index==3){ auto op_to_process = std::get<3>(the_op); auto store1 = builder.create(builder.getUnknownLoc(), op_to_process, mem, placeholder_vr); } builder.setInsertionPointAfter(ifOp); } else { auto the_op = all_current_op.back(); auto index = the_op.index(); if(index==0){ auto op_to_process = std::get<0>(the_op); if(placeholder_index_flag == true){ auto store1 = builder.create(builder.getUnknownLoc(), op_to_process, mem, placeholder_map, placeholder_vr); builder.setInsertionPointAfter(store1); }else{ auto store1 = builder.create(builder.getUnknownLoc(), op_to_process, mem, placeholder_vr); builder.setInsertionPointAfter(store1); } }else if(index==1){ auto op_to_process = std::get<1>(the_op); if(placeholder_index_flag == true){ auto store1 = builder.create(builder.getUnknownLoc(), op_to_process, mem, placeholder_map, placeholder_vr); builder.setInsertionPointAfter(store1); }else{ auto store1 = builder.create(builder.getUnknownLoc(), op_to_process, mem, placeholder_vr); builder.setInsertionPointAfter(store1); } }else if(index==2){ auto op_to_process = std::get<2>(the_op); if(placeholder_index_flag == true){ auto store1 = builder.create(builder.getUnknownLoc(), op_to_process, mem, placeholder_map, placeholder_vr); builder.setInsertionPointAfter(store1); }else{ auto store1 = builder.create(builder.getUnknownLoc(), op_to_process, mem, placeholder_vr); builder.setInsertionPointAfter(store1); } }else if(index==3){ auto op_to_process = std::get<3>(the_op); if(placeholder_index_flag == true){ auto store1 = builder.create(builder.getUnknownLoc(), op_to_process, mem, placeholder_map, placeholder_vr); builder.setInsertionPointAfter(store1); }else{ auto store1 = builder.create(builder.getUnknownLoc(), op_to_process, mem, placeholder_vr); builder.setInsertionPointAfter(store1); } } else if(index==4){ auto op_to_process = std::get<4>(the_op); if(placeholder_index_flag == true){ auto store1 = builder.create(builder.getUnknownLoc(), op_to_process, mem, placeholder_map, placeholder_vr); builder.setInsertionPointAfter(store1); }else{ auto store1 = builder.create(builder.getUnknownLoc(), op_to_process, mem, placeholder_vr); builder.setInsertionPointAfter(store1); } } } } } else if (isl_ast_node_get_type(node) == isl_ast_node_if) { isl_ast_expr *cond = isl_ast_node_if_get_cond(node); isl_ast_node *if_stmt = isl_ast_node_if_get_then(node); isl_ast_node *else_stmt = isl_ast_node_if_get_else(node); mlirGen1(fct,if_stmt,level,false,true,true); } return theModule; } mlir::AffineExpr polyfp::MLIRGenImpl::a_print_index(polyfp::expr polyfp_expr, polyfp::compute *comp, std::vector &index_values, int level){ mlir::AffineExpr index_value; int loc = 0; int loc2=0; if (polyfp_expr.get_expr_type() == polyfp::e_op){ auto expr0 = polyfp_expr.get_operand(0); auto expr1 = polyfp_expr.get_operand(1); auto left_index = a_print_index(expr0,comp,index_values,level); auto right_index = a_print_index(expr1,comp, index_values,level); if(polyfp_expr.get_op_type() == polyfp::o_sub){ index_value = left_index - right_index; }else if(polyfp_expr.get_op_type() == polyfp::o_add){ index_value = left_index + right_index; }else if(polyfp_expr.get_op_type() == polyfp::o_mul){ index_value = left_index * right_index; }else if(polyfp_expr.get_op_type() == polyfp::o_div){ index_value = left_index.floorDiv(right_index); } } if (polyfp_expr.get_expr_type() == polyfp::e_var){ loc = get_iterator_location_from_name(comp,polyfp_expr,index_values); auto name_set = comp->get_loop_level_names(); if(comp->is_tiled == true&&std::find(name_set.begin(), name_set.end(), polyfp_expr.get_name()) == name_set.end()){ int loc_2 =0; std::string tile_name1; std::string tile_name2; int tile_size; if(comp->is_tiled ==true) { for (auto &kv2: comp->get_access_map()) { if(polyfp_expr.get_name()==kv2.first) { tile_name1 = kv2.second; loc2 = comp->iterators_location_map[kv2.second]; } } for (auto &kv3: comp->get_tile_map()) { if(tile_name1==kv3.first) { loc = comp->iterators_location_map[kv3.second]; tile_name2 = kv3.second; } } for (auto &kv4: comp->get_tile_size_map()){ if(tile_name1==kv4.first) { tile_size = kv4.second; } } mlir::Value value2 = ops[loc].getInductionVar(); mlir::Value value3 = ops[loc2].getInductionVar(); if ( std::find(index_values.begin(), index_values.end(), value2) == index_values.end()) { index_values.push_back(value2); } int index2 = std::find(index_values.begin(), index_values.end(), value2) - index_values.begin(); //TODO, 这里index3支持了,其他地方可能还有问题 int index3 = std::find(index_values.begin(), index_values.end(), value3) - index_values.begin(); index_value = builder.getAffineDimExpr(index3)+builder.getAffineDimExpr(index2)*tile_size; } }else if(comp->is_skewed == true) { int loc_2 =0; std::string tile_name1; std::string tile_name2; int tile_size; for (auto &kv2: comp->get_access_map()) { if(polyfp_expr.get_name()==kv2.first) { tile_name1 = kv2.second; loc = comp->iterators_location_map[tile_name1]; } } mlir::Value value = ops[loc].getInductionVar(); if ( std::find(index_values.begin(), index_values.end(), value)== index_values.end() ) { index_values.push_back(value); } loc = comp->iterators_location_map[comp->iterator_to_skew]; mlir::Value value2 = ops[loc].getInductionVar(); if ( std::find(index_values.begin(), index_values.end(), value2)== index_values.end() ){ index_values.push_back(value2); } //TODO: int index_2 = std::find(index_values.begin(), index_values.end(), value2) - index_values.begin(); int index_3 = std::find(index_values.begin(), index_values.end(), value) - index_values.begin(); if(tile_name1 == comp->iterator_to_modify){ index_value=builder.getAffineDimExpr(index_3)-builder.getAffineDimExpr(index_2)*comp->skew_factor; }else{ index_value =builder.getAffineDimExpr(index_3); } // index_value = builder.getAffineDimExpr(index_3)+builder.getAffineDimExpr(index_2)*tile_size; } else{ index_value = builder.getAffineDimExpr(loc); } } if (polyfp_expr.get_expr_type() == polyfp::e_val) { index_value = getAffineConstantExpr(polyfp_expr.get_int_val(),builder.getContext()); } // index_value = builder.getAffineDimExpr(loc%2); return index_value; } // Dump expression void polyfp::MLIRGenImpl::a_print_expr(polyfp::expr polyfp_expr, polyfp::compute *comp, int level){ auto left = polyfp_expr.get_operand(0); auto right = polyfp_expr.get_operand(1); if ((right.get_op_type() == polyfp::o_access || right.get_expr_type() == polyfp::e_var ) && (left.get_op_type() == polyfp::o_access || left.get_expr_type() == polyfp::e_var)) { // theModule.dump(); mlir::AffineLoadOp loadedRhs; mlir::AffineLoadOp loadedLhs; mlir::Value arg_left; mlir::Value arg_right; if(left.get_op_type() == polyfp::o_access) { std::string a_name = left.get_name(); int index_a; // if(comp->refused == true){ // a_name = comp->temp_access_map[a_name]; // } // std::cout< index_values; SmallVector index_args; bool index_flag = false; for (auto &kv: left.get_access()) { int bias = 0; if(kv.get_expr_type() == polyfp::e_op) { auto expr0 = kv.get_operand(0); auto expr1 = kv.get_operand(1); auto left_index = a_print_index(expr0,comp,index_values,level); auto right_index = a_print_index(expr1,comp,index_values,level); if(kv.get_op_type() == polyfp::o_sub){ index_args.push_back(left_index - right_index); index_flag = true; }else if(kv.get_op_type() == polyfp::o_add){ index_args.push_back(left_index + right_index); index_flag = true; }else if(kv.get_op_type() == polyfp::o_mul){ index_args.push_back(left_index * right_index); index_flag = true; }else if(kv.get_op_type() == polyfp::o_div){ index_args.push_back(left_index.floorDiv(right_index)); index_flag = true; } } else { int loc =0; int loc_2 =0; std::string tile_name1; std::string tile_name2; int tile_size; auto name_set = comp->get_loop_level_names(); auto t_name = kv.get_name(); if(comp->refused == true) { t_name = comp->temp_access_map[t_name]; } if ( std::find(name_set.begin(), name_set.end(), t_name) == name_set.end() ) { for (auto &kv2: comp->get_access_map()) { if(t_name==kv2.first) { tile_name1 = kv2.second; loc = comp->iterators_location_map[kv2.second]; } } mlir::Value value = ops[loc].getInductionVar(); if ( std::find(index_values.begin(), index_values.end(), value) == index_values.end() ) { index_values.push_back(value); } if(comp->is_tiled ==true) { for (auto &kv3: comp->get_tile_map()){ if(tile_name1==kv3.first) { loc = comp->iterators_location_map[kv3.second]; tile_name2 = kv3.second; } } for (auto &kv4: comp->get_tile_size_map()) { if(tile_name1==kv4.first){ tile_size = kv4.second; } } mlir::Value value2 = ops[loc].getInductionVar(); if ( std::find(index_values.begin(), index_values.end(), value2)== index_values.end() ) { index_values.push_back(value2); } //TODO: int index_2 = std::find(index_values.begin(), index_values.end(), value2) - index_values.begin(); int index_3 = std::find(index_values.begin(), index_values.end(), value) - index_values.begin(); index_args.push_back(builder.getAffineDimExpr(index_3)+builder.getAffineDimExpr(index_2)*tile_size); } else if(comp->is_skewed == true) { loc = comp->iterators_location_map[comp->iterator_to_skew]; mlir::Value value2 = ops[loc].getInductionVar(); if ( std::find(index_values.begin(), index_values.end(), value2)== index_values.end() ) { index_values.push_back(value2); } //TODO: find the right dim int index_2 = std::find(index_values.begin(), index_values.end(), value2) - index_values.begin(); int index_3 = std::find(index_values.begin(), index_values.end(), value) - index_values.begin(); // placeholder_index_args.push_back(builder.getAffineDimExpr(index_3)-builder.getAffineDimExpr(index_2)*2); // placeholder_index_args.push_back(builder.getAffineDimExpr(index_3)); if(tile_name1 == comp->iterator_to_modify) { index_args.push_back(builder.getAffineDimExpr(index_3)-builder.getAffineDimExpr(index_2)*comp->skew_factor); } else { index_args.push_back(builder.getAffineDimExpr(index_3)); } index_flag = true; } else{ // TODO std::cout<<"something went wrong"<refused == true) { loc = comp->iterators_location_map[t_name]; } else { loc = comp->iterators_location_map[kv.get_name()]; } mlir::Value value = ops[loc].getInductionVar(); if ( std::find(index_values.begin(), index_values.end(), value) == index_values.end()) { index_values.push_back(value); } //TODO index_1 + index ? int index_1 = std::find(index_values.begin(), index_values.end(), value) - index_values.begin(); index_args.push_back(builder.getAffineDimExpr(index_1)); } } } auto map = mlir::AffineMap::get(index_values.size(), 0, ArrayRef (index_args),builder.getContext()); mlir::ValueRange vr=llvm::makeArrayRef(index_values); //TODO.number of variables mlir::AffineLoadOp a; if(index_flag == true) { loadedLhs = builder.create(builder.getUnknownLoc(), arg_a,map,vr); }else{ loadedLhs = builder.create(builder.getUnknownLoc(), arg_a ,vr); } } else if(left.get_expr_type() == polyfp::e_var){ int index_argument; std::string arg_name = left.get_name(); for(int i=0; i index_values; SmallVector index_args; bool index_flag = false; for (auto &kv: right.get_access()) { int bias = 0; if(kv.get_expr_type() == polyfp::e_op) { auto expr0 = kv.get_operand(0); auto expr1 = kv.get_operand(1); auto left_index = a_print_index(expr0,comp,index_values,level); auto right_index = a_print_index(expr1,comp,index_values,level); if(kv.get_op_type() == polyfp::o_sub){ index_args.push_back(left_index - right_index); index_flag = true; }else if(kv.get_op_type() == polyfp::o_add){ index_args.push_back(left_index + right_index); index_flag = true; }else if(kv.get_op_type() == polyfp::o_mul){ index_args.push_back(left_index * right_index); index_flag = true; }else if(kv.get_op_type() == polyfp::o_div){ index_args.push_back(left_index.floorDiv(right_index)); index_flag = true; } } else { int loc =0; int loc_2 =0; std::string tile_name1; std::string tile_name2; int tile_size; auto name_set = comp->get_loop_level_names(); auto t_name = kv.get_name(); if(comp->refused == true) { t_name = comp->temp_access_map[t_name]; } if ( std::find(name_set.begin(), name_set.end(), t_name) == name_set.end() ) { for (auto &kv2: comp->get_access_map()) { if(t_name==kv2.first) { tile_name1 = kv2.second; loc = comp->iterators_location_map[kv2.second]; // loc = comp->get_loop_level_number_from_dimension_name(kv2.second); } } int index = 0; for(int i=0; ilevel&&start_loops_position[i-1]<=level) { index = start_loops_position[i-1]; break; } if(i == start_loops_position.size()-1 ) { index = start_loops_position[i]; break; } } mlir::Value value = ops[loc].getInductionVar(); if ( std::find(index_values.begin(), index_values.end(), value)== index_values.end() ) { index_values.push_back(value); } if(comp->is_tiled ==true) { for (auto &kv3: comp->get_tile_map()) { if(tile_name1==kv3.first) { loc = comp->iterators_location_map[kv3.second]; // loc_2 = comp->get_loop_level_number_from_dimension_name(kv3.second); tile_name2 = kv3.second; } } for (auto &kv4: comp->get_tile_size_map()) { if(tile_name1==kv4.first) { tile_size = kv4.second; } } mlir::Value value2 = ops[loc].getInductionVar(); if ( std::find(index_values.begin(), index_values.end(), value2)== index_values.end() ) { index_values.push_back(value2); } int index_2 = std::find(index_values.begin(), index_values.end(), value2) - index_values.begin(); int index_3 = std::find(index_values.begin(), index_values.end(), value) - index_values.begin(); index_args.push_back(builder.getAffineDimExpr(index_3)+builder.getAffineDimExpr(index_2)*tile_size); } else if(comp->is_skewed == true) { loc = comp->iterators_location_map[comp->iterator_to_skew]; mlir::Value value2 = ops[loc].getInductionVar(); if ( std::find(index_values.begin(), index_values.end(), value2)== index_values.end() ) { index_values.push_back(value2); } //TODO: find the right dim int index_2 = std::find(index_values.begin(), index_values.end(), value2) - index_values.begin(); int index_3 = std::find(index_values.begin(), index_values.end(), value) - index_values.begin(); // placeholder_index_args.push_back(builder.getAffineDimExpr(index_3)-builder.getAffineDimExpr(index_2)*2); // placeholder_index_args.push_back(builder.getAffineDimExpr(index_3)); if(tile_name1 == comp->iterator_to_modify) { index_args.push_back(builder.getAffineDimExpr(index_3)-builder.getAffineDimExpr(index_2)*comp->skew_factor); }else{ index_args.push_back(builder.getAffineDimExpr(index_3)); } index_flag = true; } else { // TODO: std::cout<<"something went wrong"<refused == true) { loc = comp->iterators_location_map[t_name]; } else { loc = comp->iterators_location_map[kv.get_name()]; } // loc = comp->get_loop_level_number_from_dimension_name(kv.get_name()); int index = 0; for(int i=0; ilevel&&start_loops_position[i-1]<=level){ index = start_loops_position[i-1]; break; } if(i == start_loops_position.size()-1 ){ index = start_loops_position[i]; break; } } mlir::Value value = ops[loc].getInductionVar(); if(std::find(index_values.begin(), index_values.end(), value) == index_values.end()){ index_values.push_back(value); } int index_1 = std::find(index_values.begin(), index_values.end(), value) - index_values.begin(); index_args.push_back(builder.getAffineDimExpr(index_1)); } } } auto map = mlir::AffineMap::get(index_values.size(), 0, ArrayRef (index_args),builder.getContext()); mlir::ValueRange vr=llvm::makeArrayRef(index_values); //TODO.number of variables mlir::AffineLoadOp a; if(index_flag == true){ loadedRhs = builder.create(builder.getUnknownLoc(), arg_a,map,vr); }else{ loadedRhs = builder.create(builder.getUnknownLoc(), arg_a ,vr); } } else if(right.get_expr_type() == polyfp::e_var) { int index_argument; std::string arg_name = right.get_name(); for(int i=0; i(builder.getUnknownLoc(), loadedLhs, loadedRhs); current_op.push_back(add_1); all_current_op.push_back(add_1); }else if(polyfp_expr.get_op_type() == polyfp::o_mul) { auto mul_1 = builder.create(builder.getUnknownLoc(), loadedLhs, loadedRhs); current_op.push_back(mul_1); all_current_op.push_back(mul_1); }else if(polyfp_expr.get_op_type() == polyfp::o_sub) { auto sub_1 = builder.create(builder.getUnknownLoc(), loadedLhs, loadedRhs); current_op.push_back(sub_1); all_current_op.push_back(sub_1); }else if(polyfp_expr.get_op_type() == polyfp::o_div) { auto div_1 = builder.create(builder.getUnknownLoc(), loadedLhs, loadedRhs); current_op.push_back(div_1); all_current_op.push_back(div_1); }else if(polyfp_expr.get_op_type() == polyfp::o_max) { auto max_1 = builder.create(builder.getUnknownLoc(), loadedLhs, loadedRhs); current_op.push_back(max_1); all_current_op.push_back(max_1); // theModule.dump(); } } if (right.get_op_type() == polyfp::o_access && left.get_expr_type() == polyfp::e_var) { if(polyfp_expr.get_op_type() == polyfp::o_add){ auto add_1 = builder.create(builder.getUnknownLoc(), arg_left, loadedRhs); current_op.push_back(add_1); all_current_op.push_back(add_1); }else if(polyfp_expr.get_op_type() == polyfp::o_mul){ auto mul_1 = builder.create(builder.getUnknownLoc(), arg_left, loadedRhs); current_op.push_back(mul_1); all_current_op.push_back(mul_1); }else if(polyfp_expr.get_op_type() == polyfp::o_sub){ auto sub_1 = builder.create(builder.getUnknownLoc(), arg_left, loadedRhs); current_op.push_back(sub_1); all_current_op.push_back(sub_1); }else if(polyfp_expr.get_op_type() == polyfp::o_div){ auto div_1 = builder.create(builder.getUnknownLoc(), arg_left, loadedRhs); current_op.push_back(div_1); all_current_op.push_back(div_1); }else if(polyfp_expr.get_op_type() == polyfp::o_max){ auto max_1 = builder.create(builder.getUnknownLoc(), arg_left, loadedRhs); current_op.push_back(max_1); all_current_op.push_back(max_1); } } if (left.get_op_type() == polyfp::o_access && right.get_expr_type() == polyfp::e_var) { if(polyfp_expr.get_op_type() == polyfp::o_add){ auto add_1 = builder.create(builder.getUnknownLoc(), loadedLhs, arg_right); current_op.push_back(add_1); all_current_op.push_back(add_1); }else if(polyfp_expr.get_op_type() == polyfp::o_mul){ auto mul_1 = builder.create(builder.getUnknownLoc(), loadedLhs, arg_right); current_op.push_back(mul_1); all_current_op.push_back(mul_1); }else if(polyfp_expr.get_op_type() == polyfp::o_sub){ auto sub_1 = builder.create(builder.getUnknownLoc(), loadedLhs, arg_right); current_op.push_back(sub_1); all_current_op.push_back(sub_1); }else if(polyfp_expr.get_op_type() == polyfp::o_div){ auto div_1 = builder.create(builder.getUnknownLoc(), loadedLhs, arg_right); current_op.push_back(div_1); all_current_op.push_back(div_1); }else if(polyfp_expr.get_op_type() == polyfp::o_max){ auto max_1 = builder.create(builder.getUnknownLoc(), loadedLhs, arg_right); current_op.push_back(max_1); all_current_op.push_back(max_1); } } if (left.get_expr_type() == polyfp::e_var && right.get_expr_type() == polyfp::e_var) { if(polyfp_expr.get_op_type() == polyfp::o_add){ auto add_1 = builder.create(builder.getUnknownLoc(), arg_left, arg_right); current_op.push_back(add_1); all_current_op.push_back(add_1); }else if(polyfp_expr.get_op_type() == polyfp::o_mul){ auto mul_1 = builder.create(builder.getUnknownLoc(), arg_left, arg_right); current_op.push_back(mul_1); all_current_op.push_back(mul_1); }else if(polyfp_expr.get_op_type() == polyfp::o_sub){ auto sub_1 = builder.create(builder.getUnknownLoc(), arg_left, arg_right); current_op.push_back(sub_1); all_current_op.push_back(sub_1); }else if(polyfp_expr.get_op_type() == polyfp::o_div){ auto div_1 = builder.create(builder.getUnknownLoc(), arg_left, arg_right); current_op.push_back(div_1); all_current_op.push_back(div_1); }else if(polyfp_expr.get_op_type() == polyfp::o_max){ auto max_1 = builder.create(builder.getUnknownLoc(), arg_left, arg_right); current_op.push_back(max_1); all_current_op.push_back(max_1); } } // theModule.dump(); } if ((right.get_op_type() == polyfp::o_access || right.get_expr_type() == polyfp::e_var ) && (left.get_op_type() != polyfp::o_access && left.get_expr_type() == polyfp::e_op)) { mlir::AffineLoadOp loadedRhs; a_print_expr(left,comp,level); if(right.get_op_type() == polyfp::o_access) { std::string a_name = right.get_name(); int index_a; for(int i=0; i index_values; SmallVector index_args; bool index_flag = false; for (auto &kv: right.get_access()) { int bias = 0; if(kv.get_expr_type() == polyfp::e_op) { auto expr0 = kv.get_operand(0); auto expr1 = kv.get_operand(1); auto left_index = a_print_index(expr0,comp,index_values,level); auto right_index = a_print_index(expr1,comp,index_values,level); if(kv.get_op_type() == polyfp::o_sub){ index_args.push_back(left_index - right_index); index_flag = true; }else if(kv.get_op_type() == polyfp::o_add){ index_args.push_back(left_index + right_index); index_flag = true; }else if(kv.get_op_type() == polyfp::o_mul){ index_args.push_back(left_index * right_index); index_flag = true; }else if(kv.get_op_type() == polyfp::o_div){ index_args.push_back(left_index.floorDiv(right_index)); index_flag = true; } } else { int loc =0; int loc_2 =0; std::string tile_name1; std::string tile_name2; int tile_size; auto name_set = comp->get_loop_level_names(); auto t_name = kv.get_name(); if(comp->refused == true) { t_name = comp->temp_access_map[t_name]; } if ( std::find(name_set.begin(), name_set.end(), t_name) == name_set.end() ) { for (auto &kv2: comp->get_access_map()) { if(t_name==kv2.first ){ tile_name1 = kv2.second; loc = comp->iterators_location_map[kv2.second]; // loc = comp->get_loop_level_number_from_dimension_name(kv2.second); } } int index = 0; for(int i=0; ilevel&&start_loops_position[i-1]<=level) { index = start_loops_position[i-1]; break; } if(i == start_loops_position.size()-1 ) { index = start_loops_position[i]; break; } } mlir::Value value = ops[loc].getInductionVar(); if(std::find(index_values.begin(), index_values.end(), value)== index_values.end() ) { index_values.push_back(value); } if(comp->is_tiled ==true) { for (auto &kv3: comp->get_tile_map()) { if(tile_name1==kv3.first){ loc = comp->iterators_location_map[kv3.second]; // loc_2 = comp->get_loop_level_number_from_dimension_name(kv3.second); tile_name2 = kv3.second; } } for (auto &kv4: comp->get_tile_size_map()) { if(tile_name1==kv4.first){ tile_size = kv4.second; } } mlir::Value value2 = ops[loc].getInductionVar(); if(std::find(index_values.begin(), index_values.end(), value2)== index_values.end() ) { index_values.push_back(value2); } //TODO int index_2 = std::find(index_values.begin(), index_values.end(), value2) - index_values.begin(); int index_3 = std::find(index_values.begin(), index_values.end(), value) - index_values.begin(); index_args.push_back(builder.getAffineDimExpr(index_3)+builder.getAffineDimExpr(index_2)*tile_size); } else if(comp->is_skewed == true) { loc = comp->iterators_location_map[comp->iterator_to_skew]; mlir::Value value2 = ops[loc].getInductionVar(); if ( std::find(index_values.begin(), index_values.end(), value2)== index_values.end() ) { index_values.push_back(value2); } //TODO: find the right dim int index_2 = std::find(index_values.begin(), index_values.end(), value2) - index_values.begin(); int index_3 = std::find(index_values.begin(), index_values.end(), value) - index_values.begin(); // placeholder_index_args.push_back(builder.getAffineDimExpr(index_3)-builder.getAffineDimExpr(index_2)*2); // placeholder_index_args.push_back(builder.getAffineDimExpr(index_3)); if(tile_name1 == comp->iterator_to_modify){ index_args.push_back(builder.getAffineDimExpr(index_3)-builder.getAffineDimExpr(index_2)*comp->skew_factor); }else{ index_args.push_back(builder.getAffineDimExpr(index_3)); } index_flag = true; } else{ // TODO } index_flag = true; }else{ if(comp->refused == true){ // t_name = comp->temp_access_map[t_name]; loc = comp->iterators_location_map[t_name]; }else{ loc = comp->iterators_location_map[kv.get_name()]; } // loc = comp->get_loop_level_number_from_dimension_name(kv.get_name()); int index = 0; for(int i=0; ilevel&&start_loops_position[i-1]<=level){ index = start_loops_position[i-1]; break; } if(i == start_loops_position.size()-1 ){ index = start_loops_position[i]; break; } } mlir::Value value = ops[loc].getInductionVar(); if ( std::find(index_values.begin(), index_values.end(), value) == index_values.end()){ index_values.push_back(value); } //TODO int index_2 = std::find(index_values.begin(), index_values.end(), value) - index_values.begin(); index_args.push_back(builder.getAffineDimExpr(index_2)); } } } auto map = mlir::AffineMap::get(index_values.size(), 0, ArrayRef (index_args),builder.getContext()); mlir::ValueRange vr=llvm::makeArrayRef(index_values); //TODO.number of variables mlir::AffineLoadOp a; if(index_flag == true){ loadedRhs = builder.create(builder.getUnknownLoc(), arg_a,map,vr); }else{ loadedRhs = builder.create(builder.getUnknownLoc(), arg_a ,vr); } auto the_op = all_current_op.back(); auto index = the_op.index(); if(polyfp_expr.get_op_type() == polyfp::o_add) { if(index==0){ auto op_to_process = std::get<0>(the_op); auto add_1 = builder.create(builder.getUnknownLoc(), loadedRhs, op_to_process); all_current_op.push_back(add_1); // add_1.dump(); }else if(index==1){ auto op_to_process = std::get<1>(the_op); auto add_1 = builder.create(builder.getUnknownLoc(), loadedRhs, op_to_process); all_current_op.push_back(add_1); }else if(index==2){ auto op_to_process = std::get<2>(the_op); auto add_1 = builder.create(builder.getUnknownLoc(), loadedRhs, op_to_process); all_current_op.push_back(add_1); }else if(index==3){ auto op_to_process = std::get<3>(the_op); auto add_1 = builder.create(builder.getUnknownLoc(), loadedRhs, op_to_process); all_current_op.push_back(add_1); } } else if(polyfp_expr.get_op_type() == polyfp::o_mul) { if(index==0){ auto op_to_process = std::get<0>(the_op); auto mul_1 = builder.create(builder.getUnknownLoc(), loadedRhs, op_to_process); all_current_op.push_back(mul_1); }else if(index==1){ auto op_to_process = std::get<1>(the_op); auto mul_1 = builder.create(builder.getUnknownLoc(), loadedRhs, op_to_process); all_current_op.push_back(mul_1); }else if(index==2){ auto op_to_process = std::get<2>(the_op); auto mul_1 = builder.create(builder.getUnknownLoc(), loadedRhs, op_to_process); all_current_op.push_back(mul_1); }else if(index==3){ auto op_to_process = std::get<3>(the_op); auto mul_1 = builder.create(builder.getUnknownLoc(), loadedRhs, op_to_process); all_current_op.push_back(mul_1); } } else if(polyfp_expr.get_op_type() == polyfp::o_sub) { if(index==0){ auto op_to_process = std::get<0>(the_op); auto sub_1 = builder.create(builder.getUnknownLoc(), loadedRhs, op_to_process); all_current_op.push_back(sub_1); }else if(index==1){ auto op_to_process = std::get<1>(the_op); auto sub_1 = builder.create(builder.getUnknownLoc(), loadedRhs, op_to_process); all_current_op.push_back(sub_1); }else if(index==2){ auto op_to_process = std::get<2>(the_op); auto sub_1 = builder.create(builder.getUnknownLoc(), loadedRhs, op_to_process); all_current_op.push_back(sub_1); }else if(index==3){ auto op_to_process = std::get<3>(the_op); auto sub_1 = builder.create(builder.getUnknownLoc(), loadedRhs, op_to_process); all_current_op.push_back(sub_1); } } else if(polyfp_expr.get_op_type() == polyfp::o_div) { if(index==0){ auto op_to_process = std::get<0>(the_op); auto div_1 = builder.create(builder.getUnknownLoc(), loadedRhs, op_to_process); all_current_op.push_back(div_1); }else if(index==1){ auto op_to_process = std::get<1>(the_op); auto div_1 = builder.create(builder.getUnknownLoc(), loadedRhs, op_to_process); all_current_op.push_back(div_1); }else if(index==2){ auto op_to_process = std::get<2>(the_op); auto div_1 = builder.create(builder.getUnknownLoc(), loadedRhs, op_to_process); all_current_op.push_back(div_1); }else if(index==3){ auto op_to_process = std::get<3>(the_op); auto div_1 = builder.create(builder.getUnknownLoc(), loadedRhs, op_to_process); all_current_op.push_back(div_1); } } else if(polyfp_expr.get_op_type() == polyfp::o_max) { if(index==0){ auto op_to_process = std::get<0>(the_op); auto max_1 = builder.create(builder.getUnknownLoc(), loadedRhs, op_to_process); all_current_op.push_back(max_1); }else if(index==1){ auto op_to_process = std::get<1>(the_op); auto max_1 = builder.create(builder.getUnknownLoc(), loadedRhs, op_to_process); all_current_op.push_back(max_1); }else if(index==2){ auto op_to_process = std::get<2>(the_op); auto max_1 = builder.create(builder.getUnknownLoc(), loadedRhs, op_to_process); all_current_op.push_back(max_1); }else if(index==3){ auto op_to_process = std::get<3>(the_op); auto max_1 = builder.create(builder.getUnknownLoc(), loadedRhs, op_to_process); all_current_op.push_back(max_1); } } } if(right.get_expr_type() == polyfp::e_var) { int index_argument; std::string arg_name = right.get_name(); for(int i=0; i(the_op); auto add_1 = builder.create(builder.getUnknownLoc(), op_to_process,arg_right); all_current_op.push_back(add_1); }else if(index==1){ auto op_to_process = std::get<1>(the_op); auto add_1 = builder.create(builder.getUnknownLoc(), op_to_process,arg_right); all_current_op.push_back(add_1); }else if(index==2){ auto op_to_process = std::get<2>(the_op); auto add_1 = builder.create(builder.getUnknownLoc(), op_to_process,arg_right); all_current_op.push_back(add_1); }else if(index==3){ auto op_to_process = std::get<3>(the_op); auto add_1 = builder.create(builder.getUnknownLoc(), op_to_process,arg_right); all_current_op.push_back(add_1); } } else if(polyfp_expr.get_op_type() == polyfp::o_mul) { if(index==0){ auto op_to_process = std::get<0>(the_op); auto mul_1 = builder.create(builder.getUnknownLoc(), op_to_process,arg_right); all_current_op.push_back(mul_1); }else if(index==1){ auto op_to_process = std::get<1>(the_op); auto mul_1 = builder.create(builder.getUnknownLoc(), op_to_process,arg_right); all_current_op.push_back(mul_1); }else if(index==2){ auto op_to_process = std::get<2>(the_op); auto mul_1 = builder.create(builder.getUnknownLoc(), op_to_process,arg_right); all_current_op.push_back(mul_1); }else if(index==3){ auto op_to_process = std::get<3>(the_op); auto mul_1 = builder.create(builder.getUnknownLoc(), op_to_process,arg_right); all_current_op.push_back(mul_1); } } else if(polyfp_expr.get_op_type() == polyfp::o_sub) { if(index==0){ auto op_to_process = std::get<0>(the_op); auto sub_1 = builder.create(builder.getUnknownLoc(), op_to_process,arg_right); all_current_op.push_back(sub_1); }else if(index==1){ auto op_to_process = std::get<1>(the_op); auto sub_1 = builder.create(builder.getUnknownLoc(), op_to_process,arg_right); all_current_op.push_back(sub_1); }else if(index==2){ auto op_to_process = std::get<2>(the_op); auto sub_1 = builder.create(builder.getUnknownLoc(), op_to_process,arg_right); all_current_op.push_back(sub_1); }else if(index==3){ auto op_to_process = std::get<3>(the_op); auto sub_1 = builder.create(builder.getUnknownLoc(), op_to_process,arg_right); all_current_op.push_back(sub_1); } } else if(polyfp_expr.get_op_type() == polyfp::o_div) { if(index==0){ auto op_to_process = std::get<0>(the_op); auto div_1 = builder.create(builder.getUnknownLoc(), op_to_process,arg_right); all_current_op.push_back(div_1); }else if(index==1){ auto op_to_process = std::get<1>(the_op); auto div_1 = builder.create(builder.getUnknownLoc(), op_to_process,arg_right); all_current_op.push_back(div_1); }else if(index==2){ auto op_to_process = std::get<2>(the_op); auto div_1 = builder.create(builder.getUnknownLoc(), op_to_process,arg_right); all_current_op.push_back(div_1); }else if(index==3){ auto op_to_process = std::get<3>(the_op); auto div_1 = builder.create(builder.getUnknownLoc(), op_to_process,arg_right); all_current_op.push_back(div_1); } } else if(polyfp_expr.get_op_type() == polyfp::o_max) { if(index==0){ auto op_to_process = std::get<0>(the_op); auto max_1 = builder.create(builder.getUnknownLoc(), loadedRhs, op_to_process); all_current_op.push_back(max_1); }else if(index==1){ auto op_to_process = std::get<1>(the_op); auto max_1 = builder.create(builder.getUnknownLoc(), loadedRhs, op_to_process); all_current_op.push_back(max_1); }else if(index==2){ auto op_to_process = std::get<2>(the_op); auto max_1 = builder.create(builder.getUnknownLoc(), loadedRhs, op_to_process); all_current_op.push_back(max_1); }else if(index==3){ auto op_to_process = std::get<3>(the_op); auto max_1 = builder.create(builder.getUnknownLoc(), loadedRhs, op_to_process); all_current_op.push_back(max_1); } } } // theModule.dump(); } if ((left.get_op_type() == polyfp::o_access || left.get_expr_type() == polyfp::e_var ) && (right.get_op_type() != polyfp::o_access && right.get_expr_type() == polyfp::e_op)) { mlir::AffineLoadOp loadedLhs; a_print_expr(right,comp,level); if(left.get_op_type() == polyfp::o_access){ std::string a_name = left.get_name(); int index_a; for(int i=0; i index_values; SmallVector index_args; bool index_flag = false; for (auto &kv: left.get_access()){ int bias = 0; if(kv.get_expr_type() == polyfp::e_op){ auto expr0 = kv.get_operand(0); auto expr1 = kv.get_operand(1); auto left_index = a_print_index(expr0,comp,index_values,level); auto right_index = a_print_index(expr1,comp,index_values,level); if(kv.get_op_type() == polyfp::o_sub){ index_args.push_back(left_index - right_index); index_flag = true; }else if(kv.get_op_type() == polyfp::o_add){ index_args.push_back(left_index + right_index); index_flag = true; }else if(kv.get_op_type() == polyfp::o_mul){ index_args.push_back(left_index * right_index); index_flag = true; }else if(kv.get_op_type() == polyfp::o_div){ index_args.push_back(left_index.floorDiv(right_index)); index_flag = true; } } else{ int loc =0; int loc_2 =0; std::string tile_name1; std::string tile_name2; int tile_size; auto name_set = comp->get_loop_level_names(); auto t_name = kv.get_name(); if(comp->refused == true){ t_name = comp->temp_access_map[t_name]; } //TODO int index = 0; for(int i=0; ilevel&&start_loops_position[i-1]<=level){ index = start_loops_position[i-1]; break; } if(i == start_loops_position.size()-1 ){ index = start_loops_position[i]; break; } } if(std::find(name_set.begin(), name_set.end(), t_name) == name_set.end()){ for(auto &kv2: comp->get_access_map()){ if(t_name==kv2.first){ tile_name1 = kv2.second; loc = comp->iterators_location_map[kv2.second]; // loc = comp->get_loop_level_number_from_dimension_name(kv2.second); } } mlir::Value value = ops[loc].getInductionVar(); if(std::find(index_values.begin(), index_values.end(), value)== index_values.end()){ index_values.push_back(value); } if(comp->is_tiled ==true){ for (auto &kv3: comp->get_tile_map()){ if(tile_name1==kv3.first){ loc = comp->iterators_location_map[kv3.second]; // loc_2 = comp->get_loop_level_number_from_dimension_name(kv3.second); tile_name2 = kv3.second; } } for (auto &kv4: comp->get_tile_size_map()){ if(tile_name1==kv4.first){ tile_size = kv4.second; } } mlir::Value value2 = ops[loc].getInductionVar(); if(std::find(index_values.begin(), index_values.end(), value2)== index_values.end() ){ index_values.push_back(value2); } //TODO int index_2 = std::find(index_values.begin(), index_values.end(), value2) - index_values.begin(); int index = std::find(index_values.begin(), index_values.end(), value) - index_values.begin(); index_args.push_back(builder.getAffineDimExpr(index)+builder.getAffineDimExpr(index_2)*tile_size); }else if(comp->is_skewed == true){ loc = comp->iterators_location_map[comp->iterator_to_skew]; mlir::Value value2 = ops[loc].getInductionVar(); if ( std::find(index_values.begin(), index_values.end(), value2)== index_values.end() ){ index_values.push_back(value2); } //TODO: find the right dim int index_2 = std::find(index_values.begin(), index_values.end(), value2) - index_values.begin(); int index_3 = std::find(index_values.begin(), index_values.end(), value) - index_values.begin(); // placeholder_index_args.push_back(builder.getAffineDimExpr(index_3)-builder.getAffineDimExpr(index_2)*2); // placeholder_index_args.push_back(builder.getAffineDimExpr(index_3)); if(tile_name1 == comp->iterator_to_modify){ index_args.push_back(builder.getAffineDimExpr(index_3)-builder.getAffineDimExpr(index_2)*comp->skew_factor); }else{ index_args.push_back(builder.getAffineDimExpr(index_3)); } index_flag = true; }else{ // TODO } index_flag = true; }else{ //TODO // loc = comp->get_loop_level_number_from_dimension_name(kv.get_name()); if(comp->refused == true){ // t_name = comp->temp_access_map[t_name]; loc = comp->iterators_location_map[t_name]; }else{ loc = comp->iterators_location_map[kv.get_name()]; } mlir::Value value = ops[loc].getInductionVar(); if(std::find(index_values.begin(), index_values.end(), value) == index_values.end()){ index_values.push_back(value); } //TODO int index_1 = std::find(index_values.begin(), index_values.end(), value) - index_values.begin(); index_args.push_back(builder.getAffineDimExpr(index_1)); } } } auto map = mlir::AffineMap::get(index_values.size(), 0, ArrayRef (index_args),builder.getContext()); mlir::ValueRange vr=llvm::makeArrayRef(index_values); //TODO.number of variables mlir::AffineLoadOp a; if(index_flag == true){ loadedLhs = builder.create(builder.getUnknownLoc(), arg_a,map,vr); }else{ loadedLhs = builder.create(builder.getUnknownLoc(), arg_a ,vr); } auto the_op = all_current_op.back(); auto index = the_op.index(); if(polyfp_expr.get_op_type() == polyfp::o_add){ if(index==0){ auto op_to_process = std::get<0>(the_op); auto add_1 = builder.create(builder.getUnknownLoc(), op_to_process,loadedLhs); all_current_op.push_back(add_1); }else if(index==1){ auto op_to_process = std::get<1>(the_op); auto add_1 = builder.create(builder.getUnknownLoc(), op_to_process,loadedLhs); all_current_op.push_back(add_1); }else if(index==2){ auto op_to_process = std::get<2>(the_op); auto add_1 = builder.create(builder.getUnknownLoc(), op_to_process,loadedLhs); all_current_op.push_back(add_1); }else if(index==3){ auto op_to_process = std::get<3>(the_op); auto add_1 = builder.create(builder.getUnknownLoc(), op_to_process,loadedLhs); all_current_op.push_back(add_1); } } else if(polyfp_expr.get_op_type() == polyfp::o_mul){ if(index==0){ auto op_to_process = std::get<0>(the_op); auto mul_1 = builder.create(builder.getUnknownLoc(), op_to_process,loadedLhs); all_current_op.push_back(mul_1); }else if(index==1){ auto op_to_process = std::get<1>(the_op); auto mul_1 = builder.create(builder.getUnknownLoc(), op_to_process,loadedLhs); all_current_op.push_back(mul_1); }else if(index==2){ auto op_to_process = std::get<2>(the_op); auto mul_1 = builder.create(builder.getUnknownLoc(), op_to_process,loadedLhs); all_current_op.push_back(mul_1); }else if(index==3){ auto op_to_process = std::get<3>(the_op); auto mul_1 = builder.create(builder.getUnknownLoc(), op_to_process,loadedLhs); all_current_op.push_back(mul_1); } } else if(polyfp_expr.get_op_type() == polyfp::o_sub){ if(index==0){ auto op_to_process = std::get<0>(the_op); auto sub_1 = builder.create(builder.getUnknownLoc(), op_to_process,loadedLhs); all_current_op.push_back(sub_1); }else if(index==1){ auto op_to_process = std::get<1>(the_op); auto sub_1 = builder.create(builder.getUnknownLoc(), op_to_process,loadedLhs); all_current_op.push_back(sub_1); }else if(index==2){ auto op_to_process = std::get<2>(the_op); auto sub_1 = builder.create(builder.getUnknownLoc(), op_to_process,loadedLhs); all_current_op.push_back(sub_1); }else if(index==3){ auto op_to_process = std::get<3>(the_op); auto sub_1 = builder.create(builder.getUnknownLoc(), op_to_process,loadedLhs); all_current_op.push_back(sub_1); } } else if(polyfp_expr.get_op_type() == polyfp::o_div){ if(index==0){ auto op_to_process = std::get<0>(the_op); auto div_1 = builder.create(builder.getUnknownLoc(), op_to_process,loadedLhs); all_current_op.push_back(div_1); }else if(index==1){ auto op_to_process = std::get<1>(the_op); auto div_1 = builder.create(builder.getUnknownLoc(), op_to_process,loadedLhs); all_current_op.push_back(div_1); }else if(index==2){ auto op_to_process = std::get<2>(the_op); auto div_1 = builder.create(builder.getUnknownLoc(), op_to_process,loadedLhs); all_current_op.push_back(div_1); }else if(index==3){ auto op_to_process = std::get<3>(the_op); auto div_1 = builder.create(builder.getUnknownLoc(), op_to_process,loadedLhs); all_current_op.push_back(div_1); } }else if(polyfp_expr.get_op_type() == polyfp::o_max){ if(index==0){ auto op_to_process = std::get<0>(the_op); auto max_1 = builder.create(builder.getUnknownLoc(), op_to_process,loadedLhs); all_current_op.push_back(max_1); }else if(index==1){ auto op_to_process = std::get<1>(the_op); auto max_1 = builder.create(builder.getUnknownLoc(), op_to_process,loadedLhs); all_current_op.push_back(max_1); }else if(index==2){ auto op_to_process = std::get<2>(the_op); auto max_1 = builder.create(builder.getUnknownLoc(), op_to_process,loadedLhs); all_current_op.push_back(max_1); }else if(index==3){ auto op_to_process = std::get<3>(the_op); auto max_1 = builder.create(builder.getUnknownLoc(), op_to_process,loadedLhs); all_current_op.push_back(max_1); } } } if(left.get_expr_type() == polyfp::e_var){ int index_argument; std::string arg_name = left.get_name(); for(int i=0; i(the_op); auto add_1 = builder.create(builder.getUnknownLoc(), arg_left, op_to_process); all_current_op.push_back(add_1); }else if(index==1){ auto op_to_process = std::get<1>(the_op); auto add_1 = builder.create(builder.getUnknownLoc(), arg_left, op_to_process); all_current_op.push_back(add_1); }else if(index==2){ auto op_to_process = std::get<2>(the_op); auto add_1 = builder.create(builder.getUnknownLoc(), arg_left, op_to_process); all_current_op.push_back(add_1); }else if(index==3){ auto op_to_process = std::get<3>(the_op); auto add_1 = builder.create(builder.getUnknownLoc(), arg_left, op_to_process); all_current_op.push_back(add_1); } } if(polyfp_expr.get_op_type() == polyfp::o_mul){ if(index==0){ auto op_to_process = std::get<0>(the_op); auto mul_1 = builder.create(builder.getUnknownLoc(), arg_left, op_to_process); all_current_op.push_back(mul_1); }else if(index==1){ auto op_to_process = std::get<1>(the_op); auto mul_1 = builder.create(builder.getUnknownLoc(), arg_left, op_to_process); all_current_op.push_back(mul_1); }else if(index==2){ auto op_to_process = std::get<2>(the_op); auto mul_1 = builder.create(builder.getUnknownLoc(), arg_left, op_to_process); all_current_op.push_back(mul_1); }else if(index==3){ auto op_to_process = std::get<3>(the_op); auto mul_1 = builder.create(builder.getUnknownLoc(), arg_left, op_to_process); all_current_op.push_back(mul_1); } } if(polyfp_expr.get_op_type() == polyfp::o_sub){ if(index==0){ auto op_to_process = std::get<0>(the_op); auto sub_1 = builder.create(builder.getUnknownLoc(), arg_left, op_to_process); all_current_op.push_back(sub_1); }else if(index==1){ auto op_to_process = std::get<1>(the_op); auto sub_1 = builder.create(builder.getUnknownLoc(), arg_left, op_to_process); all_current_op.push_back(sub_1); }else if(index==2){ auto op_to_process = std::get<2>(the_op); auto sub_1 = builder.create(builder.getUnknownLoc(), arg_left, op_to_process); all_current_op.push_back(sub_1); }else if(index==3){ auto op_to_process = std::get<3>(the_op); auto sub_1 = builder.create(builder.getUnknownLoc(), arg_left, op_to_process); all_current_op.push_back(sub_1); } } if(polyfp_expr.get_op_type() == polyfp::o_div){ if(index==0){ auto op_to_process = std::get<0>(the_op); auto div_1 = builder.create(builder.getUnknownLoc(), arg_left, op_to_process); all_current_op.push_back(div_1); }else if(index==1){ auto op_to_process = std::get<1>(the_op); auto div_1 = builder.create(builder.getUnknownLoc(), arg_left, op_to_process); all_current_op.push_back(div_1); }else if(index==2){ auto op_to_process = std::get<2>(the_op); auto div_1 = builder.create(builder.getUnknownLoc(), arg_left, op_to_process); all_current_op.push_back(div_1); }else if(index==3){ auto op_to_process = std::get<3>(the_op); auto div_1 = builder.create(builder.getUnknownLoc(), arg_left, op_to_process); all_current_op.push_back(div_1); } } else if(polyfp_expr.get_op_type() == polyfp::o_max){ if(index==0){ auto op_to_process = std::get<0>(the_op); auto max_1 = builder.create(builder.getUnknownLoc(), arg_left, op_to_process); all_current_op.push_back(max_1); }else if(index==1){ auto op_to_process = std::get<1>(the_op); auto max_1 = builder.create(builder.getUnknownLoc(), arg_left, op_to_process); all_current_op.push_back(max_1); }else if(index==2){ auto op_to_process = std::get<2>(the_op); auto max_1 = builder.create(builder.getUnknownLoc(), arg_left, op_to_process); all_current_op.push_back(max_1); }else if(index==3){ auto op_to_process = std::get<3>(the_op); auto max_1 = builder.create(builder.getUnknownLoc(), arg_left, op_to_process); all_current_op.push_back(max_1); } } } } if ((left.get_op_type() != polyfp::o_access && left.get_expr_type() == polyfp::e_op) && (right.get_op_type() != polyfp::o_access && right.get_expr_type() == polyfp::e_op)){ mlir::AffineLoadOp loadedLhs; a_print_expr(left,comp,level); a_print_expr(right,comp,level); auto the_op = all_current_op.back(); auto the_op2 = all_current_op[all_current_op.size()-2]; auto index = the_op.index(); auto index2 = the_op2.index(); //TODO, sub, mul ,div if(polyfp_expr.get_op_type() == polyfp::o_add){ if(index==0){ auto op_to_process = std::get<0>(the_op); if(index2==0){ auto op_to_process2 = std::get<0>(the_op2); auto add_1 = builder.create(builder.getUnknownLoc(), op_to_process,op_to_process2); all_current_op.push_back(add_1); }else if(index2==1){ auto op_to_process2 = std::get<1>(the_op2); auto add_1 = builder.create(builder.getUnknownLoc(), op_to_process,op_to_process2); all_current_op.push_back(add_1); } else if(index2==2){ auto op_to_process2 = std::get<2>(the_op2); auto add_1 = builder.create(builder.getUnknownLoc(), op_to_process,op_to_process2); all_current_op.push_back(add_1); } else if(index2==3){ auto op_to_process2 = std::get<3>(the_op2); auto add_1 = builder.create(builder.getUnknownLoc(), op_to_process,op_to_process2); all_current_op.push_back(add_1); } }else if(index==1){ auto op_to_process = std::get<1>(the_op); if(index2==0){ auto op_to_process2 = std::get<0>(the_op2); auto add_1 = builder.create(builder.getUnknownLoc(), op_to_process,op_to_process2); all_current_op.push_back(add_1); }else if(index2==1){ auto op_to_process2 = std::get<1>(the_op2); auto add_1 = builder.create(builder.getUnknownLoc(), op_to_process,op_to_process2); all_current_op.push_back(add_1); } else if(index2==2){ auto op_to_process2 = std::get<2>(the_op2); auto add_1 = builder.create(builder.getUnknownLoc(), op_to_process,op_to_process2); all_current_op.push_back(add_1); } else if(index2==3){ auto op_to_process2 = std::get<3>(the_op2); auto add_1 = builder.create(builder.getUnknownLoc(), op_to_process,op_to_process2); all_current_op.push_back(add_1); } }else if(index==2){ auto op_to_process = std::get<2>(the_op); if(index2==0){ auto op_to_process2 = std::get<0>(the_op2); auto add_1 = builder.create(builder.getUnknownLoc(), op_to_process,op_to_process2); all_current_op.push_back(add_1); }else if(index2==1){ auto op_to_process2 = std::get<1>(the_op2); auto add_1 = builder.create(builder.getUnknownLoc(), op_to_process,op_to_process2); all_current_op.push_back(add_1); } else if(index2==2){ auto op_to_process2 = std::get<2>(the_op2); auto add_1 = builder.create(builder.getUnknownLoc(), op_to_process,op_to_process2); all_current_op.push_back(add_1); } else if(index2==3){ auto op_to_process2 = std::get<3>(the_op2); auto add_1 = builder.create(builder.getUnknownLoc(), op_to_process,op_to_process2); all_current_op.push_back(add_1); } }else if(index==3){ auto op_to_process = std::get<3>(the_op); if(index2==0){ auto op_to_process2 = std::get<0>(the_op2); auto add_1 = builder.create(builder.getUnknownLoc(), op_to_process,op_to_process2); all_current_op.push_back(add_1); }else if(index2==1){ auto op_to_process2 = std::get<1>(the_op2); auto add_1 = builder.create(builder.getUnknownLoc(), op_to_process,op_to_process2); all_current_op.push_back(add_1); } else if(index2==2){ auto op_to_process2 = std::get<2>(the_op2); auto add_1 = builder.create(builder.getUnknownLoc(), op_to_process,op_to_process2); all_current_op.push_back(add_1); } else if(index2==3){ auto op_to_process2 = std::get<3>(the_op2); auto add_1 = builder.create(builder.getUnknownLoc(), op_to_process,op_to_process2); all_current_op.push_back(add_1); } } } // if(polyfp_expr.get_op_type() == polyfp::o_add){ // auto add_1 = builder.create(builder.getUnknownLoc(), add_op[add_op.size()-2], add_op.back()); // all_current_op.push_back(add_1); // } // else if(polyfp_expr.get_op_type() == polyfp::o_mul){ // auto mul_1 = builder.create(builder.getUnknownLoc(), mul_op[mul_op.size()-2], mul_op.back()); // all_current_op.push_back(mul_1); // } // else if(polyfp_expr.get_op_type() == polyfp::o_sub){ // auto sub_1 = builder.create(builder.getUnknownLoc(), mul_op[mul_op.size()-2], mul_op.back()); // all_current_op.push_back(sub_1); // } // else if(polyfp_expr.get_op_type() == polyfp::o_div){ // auto div_1 = builder.create(builder.getUnknownLoc(), mul_op[mul_op.size()-2], mul_op.back()); // all_current_op.push_back(div_1); // } } } mlir::OwningOpRef mlirGen2(mlir::MLIRContext &context, polyfp::function &fct, isl_ast_node *node, int &level) { auto manager = MLIRGenImpl(context); manager.mlirGen1(fct,node,level,true, false, false); // manager.getModule().dump(); for(auto &comp : fct.leader_computations) { for(auto &kv : comp->get_directive_map()) { if(kv.second == "pipeline") { int loc_2 = comp->get_loop_level_number_from_dimension_name(kv.first); int loc = comp->iterators_location_map[kv.first]; // index = loc + index; mlir::scalehls::setLoopDirective(manager.ops[loc], true, comp->II, false, false); for(int i=1; i<=loc_2; i++) { mlir::scalehls::setLoopDirective(manager.ops[loc-i], false, comp->II, false, true); } } } } auto map = manager.get_array_map(); mlir::scalehls::setTopFuncAttr(manager.get_funcs()[0]); for(auto &kv: fct.get_partition_map()) { SmallVector kinds; SmallVector factors; for(auto &factor: std::get<1>(kv)) { factors.push_back(factor); } for(auto &type: std::get<2>(kv)) { if(type == "cyclic"){ kinds.push_back(mlir::scalehls::hls::PartitionKind::CYCLIC); }else if(type == "block"){ kinds.push_back(mlir::scalehls::hls::PartitionKind::BLOCK); }else if(type == "none"){ kinds.push_back(mlir::scalehls::hls::PartitionKind::NONE); } } mlir::scalehls::applyArrayPartition(manager.get_funcs()[0].getArgument(map[std::get<0>(kv)]), factors, kinds,/*updateFuncSignature=*/true); // manager.getModule().dump(); } // manager.getModule().dump(); // mlir::scalehls::applyFuncPreprocess(manager.get_funcs()[0], true); mlir::scalehls::applyFuncPreprocess(manager.get_funcs()[0], true); // mlir::scalehls::setFuncDirective(manager.get_funcs()[0], false, 1, true); for(auto &comp: fct.leader_computations) { if(comp->is_unrolled == true) { for(int i=0; iunroll_dimension.size(); i++) { // int bias = comp->get_loop_level_number_from_dimension_name(comp->unroll_dimension[i].get_name()); // int loc = fct.leader_computation_index[comp]; int loc = comp->iterators_location_map[comp->unroll_dimension[i].get_name()]; if(comp->unroll_factor[i] != -1) { mlir::loopUnrollUpToFactor(manager.ops[loc],comp->unroll_factor[i]); } else { mlir::loopUnrollFull(manager.ops[loc]); } } } } // mlir::scalehls::applyMemoryOpts(manager.get_funcs()[0]); manager.getModule().dump(); // Read target specification JSON file. std::string errorMessage; std::string pwd = std::filesystem::current_path().parent_path(); auto configFile = mlir::openInputFile(pwd+"/samples/config.json", &errorMessage); if (!configFile) { llvm::errs() << errorMessage << "\n"; } // Parse JSON file into memory. auto config = llvm::json::parse(configFile->getBuffer()); if (!config) { llvm::errs() << "failed to parse the target spec json file\n"; } auto configObj = config.get().getAsObject(); if (!configObj) { llvm::errs() << "support an object in the target spec json file, found " "something else\n"; } // Collect profiling latency and DSP usage data, where default values are // based on Xilinx PYNQ-Z1 board. llvm::StringMap latencyMap; mlir::scalehls::getLatencyMap(configObj, latencyMap); llvm::StringMap dspUsageMap; mlir::scalehls::getDspUsageMap(configObj, dspUsageMap); int dspNum; return manager.getModule(); } mlir::ModuleOp polyfp::MLIRGenImpl::getModule() { return this->theModule; } void gen_mlir(polyfp::function &fct, isl_ast_node *node, int &level) { mlir::MLIRContext context; context.disableMultithreading(); context.getOrLoadDialect(); context.getOrLoadDialect(); context.getOrLoadDialect(); context.getOrLoadDialect(); context.getOrLoadDialect(); mlir::OwningOpRef module = mlirGen2(context, fct, node, level); mlir::verify(*module, false); if (failed(mlir::verify(*module, false))) { module->emitError("module verification error"); } // module->dump(); std::error_code error; std::string s = fct.get_name(); std::string pwd = std::filesystem::current_path().parent_path(); std::string path = pwd+"/samples/"+s+"/"+s+".mlir"; llvm::raw_fd_ostream os(path, error); os << *module; } void function::gen_mlir_stmt(){ int level = 0; gen_mlir(*this,this->get_isl_ast(),level); } } ================================================ FILE: lib/polyhedral/generator_isl.cpp ================================================ #include "generator_isl.h" namespace polyfp{ polyfp::expr polyfp_expr_from_isl_ast_expr(isl_ast_expr *isl_expr); void generator::get_rhs_accesses(const polyfp::function *func, const polyfp::compute *comp, std::vector &accesses, bool return_buffer_accesses) { const polyfp::expr &rhs = comp->get_expr(); generator::traverse_expr_and_extract_accesses(func, comp, rhs, accesses, return_buffer_accesses); } isl_map *create_map_from_domain_and_range(isl_set *domain, isl_set *range) { // polyfp::str_dump("Domain:", isl_set_to_str(domain)); // polyfp::str_dump("Range:", isl_set_to_str(range)); // Extracting the spaces and aligning them isl_space *sp1 = isl_set_get_space(domain); isl_space *sp2 = isl_set_get_space(range); sp1 = isl_space_align_params(sp1, isl_space_copy(sp2)); sp2 = isl_space_align_params(sp2, isl_space_copy(sp1)); // Create the space access_domain -> sched_range. isl_space *sp = isl_space_map_from_domain_and_range( isl_space_copy(sp1), isl_space_copy(sp2)); isl_map *adapter = isl_map_universe(sp); polyfp::str_dump("Transformation map:", isl_map_to_str(adapter)); isl_space *sp_map = isl_map_get_space(adapter); isl_local_space *l_sp = isl_local_space_from_space(sp_map); // Add equality constraints. for (int i = 0; i < isl_space_dim(sp1, isl_dim_set); i++) { if (isl_space_has_dim_id(sp1, isl_dim_set, i) == true) { for (int j = 0; j < isl_space_dim (sp2, isl_dim_set); j++) { if (isl_space_has_dim_id(sp2, isl_dim_set, j) == true) { isl_id *id1 = isl_space_get_dim_id(sp1, isl_dim_set, i); isl_id *id2 = isl_space_get_dim_id(sp2, isl_dim_set, j); if (strcmp(isl_id_get_name(id1), isl_id_get_name(id2)) == 0) { isl_constraint *cst = isl_equality_alloc( isl_local_space_copy(l_sp)); cst = isl_constraint_set_coefficient_si(cst, isl_dim_in, i, 1); cst = isl_constraint_set_coefficient_si( cst, isl_dim_out, j, -1); adapter = isl_map_add_constraint(adapter, cst); } isl_id_free(id1); isl_id_free(id2); } } } } isl_space_free(sp1); isl_space_free(sp2); isl_local_space_free(l_sp); // polyfp::str_dump("Transformation map after adding equality constraints:", // isl_map_to_str(adapter))); return adapter; } isl_constraint *generator::get_constraint_for_access(int access_dimension, const polyfp::expr &access_expression, isl_map *access_relation, isl_constraint *cst, int coeff, const polyfp::function *fct) { if (access_expression.get_expr_type() == polyfp::e_val) { int64_t val = coeff * access_expression.get_int_val() - isl_val_get_num_si(isl_constraint_get_constant_val(cst)); cst = isl_constraint_set_constant_si(cst, -val); // polyfp::str_dump("Assigning -(coeff * access_expression.get_int_val() - isl_val_get_num_si(isl_constraint_get_constant_val(cst))) to the cst dimension. The value assigned is : " // + std::to_string(-val)); } else if (access_expression.get_expr_type() == polyfp::e_var) { assert(!access_expression.get_name().empty()); int dim0 = isl_space_find_dim_by_name(isl_map_get_space(access_relation), isl_dim_in, access_expression.get_name().c_str()); if (dim0 >= 0) { int current_coeff = -isl_val_get_num_si(isl_constraint_get_coefficient_val(cst, isl_dim_in, dim0)); coeff = current_coeff + coeff; cst = isl_constraint_set_coefficient_si(cst, isl_dim_in, dim0, -coeff); } else { access_relation = isl_map_add_dims(access_relation, isl_dim_param, 1); int pos = isl_map_dim(access_relation, isl_dim_param); isl_id *param_id = isl_id_alloc(fct->get_isl_ctx(), access_expression.get_name().c_str (), NULL); access_relation = isl_map_set_dim_id(access_relation, isl_dim_param, pos - 1, param_id); isl_local_space *ls2 = isl_local_space_from_space(isl_map_get_space(access_relation)); cst = isl_constraint_alloc_equality(ls2); cst = isl_constraint_set_coefficient_si(cst, isl_dim_param, pos - 1, -coeff); cst = isl_constraint_set_coefficient_si(cst, isl_dim_out, access_dimension, 1); } } else if (access_expression.get_expr_type() == polyfp::e_op) { if (access_expression.get_op_type() == polyfp::o_add) { polyfp::expr op0 = access_expression.get_operand(0); polyfp::expr op1 = access_expression.get_operand(1); cst = generator::get_constraint_for_access(access_dimension, op0, access_relation, cst, coeff, fct); isl_constraint_dump(cst); cst = generator::get_constraint_for_access(access_dimension, op1, access_relation, cst, coeff, fct); isl_constraint_dump(cst); } else if (access_expression.get_op_type() == polyfp::o_sub) { polyfp::expr op0 = access_expression.get_operand(0); polyfp::expr op1 = access_expression.get_operand(1); cst = generator::get_constraint_for_access(access_dimension, op0, access_relation, cst, coeff, fct); cst = generator::get_constraint_for_access(access_dimension, op1, access_relation, cst, -coeff, fct); } else if (access_expression.get_op_type() == polyfp::o_mul) { polyfp::expr op0 = access_expression.get_operand(0); polyfp::expr op1 = access_expression.get_operand(1); if (op0.get_expr_type() == polyfp::e_val) { coeff = coeff * op0.get_int_val(); cst = generator::get_constraint_for_access(access_dimension, op1, access_relation, cst, coeff, fct); } else if (op1.get_expr_type() == polyfp::e_val) { coeff = coeff * op1.get_int_val(); cst = generator::get_constraint_for_access(access_dimension, op0, access_relation, cst, coeff, fct); } } else { ERROR("Currently only Add, Sub, Minus, and Mul operations for accesses are supported for now.", true); } } return cst; } void generator::traverse_expr_and_extract_accesses(const polyfp::function *fct, const polyfp::compute *comp, const polyfp::expr &exp, std::vector &accesses, bool return_buffer_accesses) { assert(fct != NULL); assert(comp != NULL); if ((exp.get_expr_type() == polyfp::e_op) && ((exp.get_op_type() == polyfp::o_access) || (exp.get_op_type() == polyfp::o_placeholder))) { std::vector computations_vector = fct->get_computation_by_name(exp.get_name()); if (computations_vector.size() == 0) { // Search for update computations. computations_vector = fct->get_computation_by_name("_" + exp.get_name() + "_update_0"); assert((computations_vector.size() > 0) && "Computation not found."); } polyfp::compute *access_op_comp = computations_vector[0]; isl_set *lhs_comp_domain = isl_set_universe(isl_set_get_space(comp->get_iteration_domain())); isl_set *rhs_comp_domain = isl_set_universe(isl_set_get_space( access_op_comp->get_iteration_domain())); isl_map *access_map = create_map_from_domain_and_range(lhs_comp_domain, rhs_comp_domain); isl_set_free(lhs_comp_domain); isl_set_free(rhs_comp_domain); isl_map *access_to_comp = isl_map_universe(isl_map_get_space(access_map)); isl_map_free(access_map); // The dimension_number is a counter that indicates to which dimension // is the access associated. int access_dimension = 0; for (const auto &access : exp.get_access()) { isl_constraint *cst = isl_constraint_alloc_equality(isl_local_space_from_space(isl_map_get_space( access_to_comp))); cst = isl_constraint_set_coefficient_si(cst, isl_dim_out, access_dimension, 1); cst = generator::get_constraint_for_access(access_dimension, access, access_to_comp, cst, 1, fct); access_to_comp = isl_map_add_constraint(access_to_comp, cst); access_dimension++; } if (return_buffer_accesses) { isl_map *access_to_buff = isl_map_copy(access_op_comp->get_access_relation()); access_to_buff = isl_map_apply_range(isl_map_copy(access_to_comp), access_to_buff); accesses.push_back(access_to_buff); isl_map_free(access_to_comp); } else { accesses.push_back(access_to_comp); } } else if (exp.get_expr_type() == polyfp::e_op) { switch (exp.get_op_type()) { case polyfp::o_max: case polyfp::o_min: case polyfp::o_add: case polyfp::o_sub: case polyfp::o_mul: case polyfp::o_div: case polyfp::o_mod: default: ERROR("Extracting access function from an unsupported polyfp expression.", 1); } } } polyfp::expr utility::get_bound(isl_set *set, int dim, int upper) { assert(set != NULL); assert(dim >= 0); assert(dim < isl_space_dim(isl_set_get_space(set), isl_dim_set)); assert(isl_set_is_empty(set) == isl_bool_false); polyfp::expr e = polyfp::expr(); isl_ast_build *ast_build; isl_ctx *ctx = isl_set_get_ctx(set); ast_build = isl_ast_build_alloc(ctx); // Create identity map for set. isl_space *sp = isl_set_get_space(set); isl_map *sched = isl_map_identity(isl_space_copy(isl_space_map_from_set(sp))); sched = isl_map_set_tuple_name(sched, isl_dim_out, ""); // Generate the AST. isl_options_set_ast_build_atomic_upper_bound(ctx, 1); isl_options_get_ast_build_exploit_nested_bounds(ctx); isl_options_set_ast_build_group_coscheduled(ctx, 1); isl_options_set_ast_build_allow_else(ctx, 1); isl_options_set_ast_build_detect_min_max(ctx, 1); // Intersect the iteration domain with the domain of the schedule. isl_map *map = isl_map_intersect_domain( isl_map_copy(sched), isl_set_copy(set)); // Set iterator names int length = isl_map_dim(map, isl_dim_out); isl_id_list *iterators = isl_id_list_alloc(ctx, length); for (int i = 0; i < length; i++) { std::string name; if (isl_set_has_dim_name(set, isl_dim_set, i) == true) name = isl_set_get_dim_name(set, isl_dim_set, i); else name = generate_new_variable_name(); isl_id *id = isl_id_alloc(ctx, name.c_str(), NULL); iterators = isl_id_list_add(iterators, id); } ast_build = isl_ast_build_set_iterators(ast_build, iterators); isl_ast_node *node = isl_ast_build_node_from_schedule_map(ast_build, isl_union_map_from_map(map)); e = utility::extract_bound_expression(node, dim, upper); isl_ast_build_free(ast_build); assert(e.is_defined() && "The computed bound expression is undefined."); return e; } polyfp::expr utility::extract_bound_expression(isl_ast_node *node, int dim, bool upper) { assert(node != NULL); assert(dim >= 0); polyfp::expr result; if (isl_ast_node_get_type(node) == isl_ast_node_block) { ERROR("Currently Tiramisu does not support extracting bounds from blocks.", true); } else if (isl_ast_node_get_type(node) == isl_ast_node_for) { isl_ast_expr *init_bound = isl_ast_node_for_get_init(node); isl_ast_expr *upper_bound = isl_ast_node_for_get_cond(node); if (dim == 0) { if (upper) { isl_ast_expr *cond = isl_ast_node_for_get_cond(node); if (isl_ast_expr_get_op_type(cond) == isl_ast_op_lt) { // Create an expression of "1". isl_val *one = isl_val_one(isl_ast_node_get_ctx(node)); // Add 1 to the ISL ast upper bound to transform it into a strinct bound. result = polyfp_expr_from_isl_ast_expr(isl_ast_expr_sub(isl_ast_expr_get_op_arg(cond, 1), isl_ast_expr_from_val(one))); } else if (isl_ast_expr_get_op_type(cond) == isl_ast_op_le) { result = polyfp_expr_from_isl_ast_expr(isl_ast_expr_get_op_arg(cond, 1)); } } else { isl_ast_expr *init = isl_ast_node_for_get_init(node); result = polyfp_expr_from_isl_ast_expr(init); } } else { isl_ast_node *body = isl_ast_node_for_get_body(node); result = utility::extract_bound_expression(body, dim-1, upper); isl_ast_node_free(body); } assert(result.is_defined()); } else if (isl_ast_node_get_type(node) == isl_ast_node_user) { ERROR("Cannot extract bounds from a isl_ast_user node.", true); } else if (isl_ast_node_get_type(node) == isl_ast_node_if) { // polyfp::expr cond_bound = polyfp_expr_from_isl_ast_expr(isl_ast_node_if_get_cond(node)); polyfp::expr then_bound = utility::extract_bound_expression(isl_ast_node_if_get_then(node), dim, upper); polyfp::expr else_bound; if (isl_ast_node_if_has_else(node)) { // else_bound = utility::extract_bound_expression(isl_ast_node_if_get_else(node), dim, upper); // result = polyfp::expr(polyfp::o_s, cond_bound, then_bound, else_bound); ERROR("If Then Else is unsupported in bound extraction.", true); } else result = then_bound; //polyfp::expr(polyfp::o_cond, cond_bound, then_bound); } return result; } std::string utility::get_parameters_list(isl_set *set) { std::string list = ""; assert(set != NULL); for (int i = 0; i < isl_set_dim(set, isl_dim_param); i++) { list += isl_set_get_dim_name(set, isl_dim_param, i); if ((i != isl_set_dim(set, isl_dim_param) - 1)) { list += ","; } } return list; } polyfp::expr polyfp_expr_from_isl_ast_expr(isl_ast_expr *isl_expr) { polyfp::expr result; if (isl_ast_expr_get_type(isl_expr) == isl_ast_expr_int) { isl_val *init_val = isl_ast_expr_get_val(isl_expr); result = value_cast(polyfp::global::get_loop_iterator_data_type(), isl_val_get_num_si(init_val)); isl_val_free(init_val); } else if (isl_ast_expr_get_type(isl_expr) == isl_ast_expr_id) { isl_id *identifier = isl_ast_expr_get_id(isl_expr); std::string name_str(isl_id_get_name(identifier)); isl_id_free(identifier); // TODO // result = polyfp::var(polyfp::global::get_loop_iterator_data_type(), name_str); } else if (isl_ast_expr_get_type(isl_expr) == isl_ast_expr_op) { polyfp::expr op0, op1, op2; std::vector new_arguments; isl_ast_expr *expr0 = isl_ast_expr_get_op_arg(isl_expr, 0); op0 = polyfp_expr_from_isl_ast_expr(expr0); isl_ast_expr_free(expr0); if (isl_ast_expr_get_op_n_arg(isl_expr) > 1) { isl_ast_expr *expr1 = isl_ast_expr_get_op_arg(isl_expr, 1); op1 = polyfp_expr_from_isl_ast_expr(expr1); isl_ast_expr_free(expr1); } if (isl_ast_expr_get_op_n_arg(isl_expr) > 2) { isl_ast_expr *expr2 = isl_ast_expr_get_op_arg(isl_expr, 2); op2 = polyfp_expr_from_isl_ast_expr(expr2); isl_ast_expr_free(expr2); } switch (isl_ast_expr_get_op_type(isl_expr)) { case isl_ast_op_max: result = polyfp::expr(polyfp::o_max, op0, op1); break; case isl_ast_op_min: result = polyfp::expr(polyfp::o_min, op0, op1); break; case isl_ast_op_add: result = polyfp::expr(polyfp::o_add, op0, op1); break; case isl_ast_op_sub: result = polyfp::expr(polyfp::o_sub, op0, op1); break; case isl_ast_op_mul: result = polyfp::expr(polyfp::o_mul, op0, op1); break; case isl_ast_op_div: result = polyfp::expr(polyfp::o_div, op0, op1); break; default: polyfp::str_dump("Transforming the following expression", (const char *)isl_ast_expr_to_C_str(isl_expr)); polyfp::str_dump("\n"); ERROR("Translating an unsupported ISL expression into a Tiramisu expression.", 1); } } else { polyfp::str_dump("Transforming the following expression", (const char *)isl_ast_expr_to_C_str(isl_expr)); polyfp::str_dump("\n"); ERROR("Translating an unsupported ISL expression into a Tiramisu expression.", 1); } return result; } } ================================================ FILE: lib/polyhedral/placeholer.cpp ================================================ #include "placeholder.h" #include namespace polyfp{ polyfp::placeholder::placeholder(std::string name, std::vector dim_sizes, polyfp::primitive_t type, polyfp::function *fct): dim_sizes(dim_sizes), fct(fct), name(name), type(type) { if(fct->fct_argument_added == false) { fct->add_fct_argument(std::pair(name, this)); fct->add_placeholder(std::pair(name, this)); } else { fct->add_global_argument(std::pair(name, this)); fct->add_placeholder(std::pair(name, this)); } } void placeholder::partition(std::vector factors, std::string type){ //TODO: CHECK DIMENSIONS AND WARNING std::vector types; for (int dim = 0; dim < factors.size(); ++dim) { types.push_back(type); } this->fct->set_partition(this->get_name(),factors,types); } // TODO void placeholder::partition(std::vector factors, std::vector types){ //TODO: CHECK DIMENSIONS AND WARNING this->fct->set_partition(this->get_name(),factors,types); } const std::string &placeholder::get_name() const { return name; } int placeholder::get_n_dims() const { return this->get_dim_sizes().size(); } polyfp::primitive_t placeholder::get_elements_type() const { return type; } const std::vector &placeholder::get_dim_sizes() const { return dim_sizes; } void polyfp::placeholder::dump(bool exhaustive) const { if (exhaustive) { std::cout << "Buffer \"" << this->name << "\", Number of dimensions: " << this->get_n_dims() << std::endl; std::cout << "Dimension sizes: "; for (const auto &size : dim_sizes) { std::cout << " "; } std::cout << std::endl; std::cout << "Elements type: " << str_from_polyfp_type_primitive(this->type) << std::endl; std::cout << std::endl << std::endl; } } // const std::string &p_max::get_name() const // { // return name; // } // polyfp::p_max::p_max(polyfp::expr expr1, polyfp::expr expr2) // { // this->arg_list.push_back(expr1); // this->arg_list.push_back(expr2); // } // int p_max::get_n_args() const // { // return this->arg_list.size(); // } } ================================================ FILE: lib/polyhedral/test.cpp ================================================ // #include #include int cf_test() { printf("hello python!\n"); return 0; } //////////////////////////////////// ================================================ FILE: results-gen.sh ================================================ start_time=$(date +"%s") echo "" echo ">>> Step 5. Collecting experimental results..." echo "" declare -A baseline_latency baseline_latency["gemm",32]=498753 baseline_latency["gemm",64]=3960961 baseline_latency["gemm",128]=31572225 baseline_latency["gemm",256]=252117505 baseline_latency["gemm",512]=2015101953 baseline_latency["gemm",1024]=19337840641 baseline_latency["gemm",2048]=154660769793 baseline_latency["gemm",4096]=1237118361601 baseline_latency["gemm",8192]=9896275755009 baseline_latency["bicg",32]=12353 baseline_latency["bicg",64]=49281 baseline_latency["bicg",128]=196865 baseline_latency["bicg",256]=786945 baseline_latency["bicg",512]=3146753 baseline_latency["bicg",1024]=14682113 baseline_latency["bicg",2048]=58724353 baseline_latency["bicg",4096]=234889217 baseline_latency["bicg",8192]=939540481 baseline_latency["gesummv",32]=12705 baseline_latency["gesummv",64]=49985 baseline_latency["gesummv",128]=198273 baseline_latency["gesummv",256]=789761 baseline_latency["gesummv",512]=3152385 baseline_latency["gesummv",1024]=14693377 baseline_latency["gesummv",2048]=58746881 baseline_latency["gesummv",4096]=234934273 baseline_latency["gesummv",8192]=939630593 baseline_latency["2mm",32]=697474 baseline_latency["2mm",64]=5542146 baseline_latency["2mm",128]=44188162 baseline_latency["2mm",256]=352912386 baseline_latency["2mm",512]=2820933634 baseline_latency["2mm",1024]=29004664834 baseline_latency["2mm",2048]=231982768130 baseline_latency["2mm",4096]=2199241375746 baseline_latency["2mm",8192]=17593058492418 baseline_latency["3mm",32]=1087683 baseline_latency["3mm",64]=8675715 baseline_latency["3mm",128]=69305091 baseline_latency["3mm",256]=554042883 baseline_latency["3mm",512]=4430760963 baseline_latency["3mm",1024]=45106599939 baseline_latency["3mm",2048]=360815013891 baseline_latency["3mm",4096]=2886369042435 baseline_latency["3mm",8192]=23090348212227 baseline_latency["jacobi",4096]=804925441 baseline_latency["jacobi2d",4096]=4668429217793 baseline_latency["heat",4096]=385699841 baseline_latency["seidel",4096]=4050540986369 baseline_latency["blur",4096]=3981925375233 baseline_latency["edgeDetect",4096]=2882880170 baseline_latency["gaussian",4096]=8694375278 baseline_latency["blur",4096]=2983445186 baseline_latency["vgg16",512]=3727670833 baseline_latency["resnet",512]=6602277677 declare -A execution_times while read -r line; do if [[ $line =~ time\[(.*),(.*)\] ]]; then kernel=${BASH_REMATCH[1]} size=${BASH_REMATCH[2]} if [[ $line =~ ([0-9.]+)$ ]]; then time=${BASH_REMATCH[1]} time=$(printf "%.2f" $time) if [[ $time == .* ]]; then time="0$time" fi execution_times[$kernel,$size]=$time fi fi done < "build/execution_times.txt" result_file="experimental_results.csv" # rm -f $result_file if [ -f $result_file ]; then rm $result_file fi printf "/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////\n" >> $result_file printf "/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////\n" >> $result_file # printf "------------------------------Experimental Results----------------------------" >> $result_file printf "\n" >> $result_file printf "\n" >> $result_file printf ">>> Results for TABLE III: \n" >> $result_file printf "\n" >> $result_file printf "%-20s %-25s %-20s %-20s %-20s %-20s %-15s %-15s\n" "Kernel" "Latency" "DSP" "FF" "LUT" "Power" "II" "Execution Time" >> $result_file kernels=("gemm" "bicg" "gesummv" "2mm" "3mm" ) sizes=(4096) for kernel in "${kernels[@]}" do for size in "${sizes[@]}" do xml_file="samples/${kernel}/test_${kernel}_${size}/test_${kernel}_${size}/syn/report/csynth.xml" baseline=${baseline_latency[$kernel,$size]} best_case_latency=$(xmlstarlet sel -t -v "/profile/PerformanceEstimates/SummaryOfOverallLatency/Best-caseLatency" $xml_file) acceleration=$(awk "BEGIN {printf \"%.1f\", $baseline / $best_case_latency}") acceleration_str="$acceleration x" latency_str="$best_case_latency($acceleration_str)" lut_util=$(xmlstarlet sel -t -v "/profile/AreaEstimates/Resources/LUT" $xml_file) lut_avail=$(xmlstarlet sel -t -v "/profile/AreaEstimates/AvailableResources/LUT" $xml_file) lut_percent=$((($lut_util * 100) / $lut_avail)) lut_str="$lut_util($lut_percent%)" dsp_util=$(xmlstarlet sel -t -v "/profile/AreaEstimates/Resources/DSP" $xml_file) dsp_avail=$(xmlstarlet sel -t -v "/profile/AreaEstimates/AvailableResources/DSP" $xml_file) dsp_percent=$((($dsp_util * 100) / $dsp_avail)) dsp_str="$dsp_util($dsp_percent%)" ff_util=$(xmlstarlet sel -t -v "/profile/AreaEstimates/Resources/FF" $xml_file) ff_avail=$(xmlstarlet sel -t -v "/profile/AreaEstimates/AvailableResources/FF" $xml_file) ff_percent=$((($ff_util * 100) / $ff_avail)) ff_str="$ff_util($ff_percent%)" II=$(grep -m 1 -oP '(?<=).*?(?=<\/PipelineII>)' $xml_file) time=${execution_times[$kernel,$size]} time_str="$time s" rpt_file="samples/${kernel}/test_${kernel}_power/test_${kernel}_power/impl/verilog/project.runs/impl_1/bd_0_wrapper_power_routed.rpt" total_power=$(grep "Total On-Chip Power (W)" "$rpt_file" | awk -F'|' '{print $3}' | tr -d '[:space:]') power_str="$total_power W" # echo "Total On-Chip Power: $total_power W" printf "%-20s %-25s %-20s %-20s %-20s %-20s %-15s %-15s\n" "${kernel}_${size}" "${latency_str}" "$dsp_str" "$ff_str" "$lut_str" "$power_str" "$II" "$time_str" >> $result_file done done printf "\n" >> $result_file printf "\n" >> $result_file printf ">>> Results for Fig. 12:\n" >> $result_file printf "\n" >> $result_file printf "%-20s %-25s %-20s %-20s %-20s %-15s %-15s\n" "Kernel" "Latency" "DSP" "FF" "LUT" "II" "Execution Time">> $result_file kernels=("gemm" "bicg" "gesummv" "2mm" "3mm" ) sizes=(32 64 128 256 512 1024 2048 4096 8192) for kernel in "${kernels[@]}" do for size in "${sizes[@]}" do xml_file="samples/${kernel}/test_${kernel}_${size}/test_${kernel}_${size}/syn/report/csynth.xml" baseline=${baseline_latency[$kernel,$size]} best_case_latency=$(xmlstarlet sel -t -v "/profile/PerformanceEstimates/SummaryOfOverallLatency/Best-caseLatency" $xml_file) acceleration=$(awk "BEGIN {printf \"%.1f\", $baseline / $best_case_latency}") acceleration_str="$acceleration x" latency_str="$best_case_latency($acceleration_str)" lut_util=$(xmlstarlet sel -t -v "/profile/AreaEstimates/Resources/LUT" $xml_file) lut_avail=$(xmlstarlet sel -t -v "/profile/AreaEstimates/AvailableResources/LUT" $xml_file) lut_percent=$((($lut_util * 100) / $lut_avail)) lut_str="$lut_util($lut_percent%)" dsp_util=$(xmlstarlet sel -t -v "/profile/AreaEstimates/Resources/DSP" $xml_file) dsp_avail=$(xmlstarlet sel -t -v "/profile/AreaEstimates/AvailableResources/DSP" $xml_file) dsp_percent=$((($dsp_util * 100) / $dsp_avail)) dsp_str="$dsp_util($dsp_percent%)" ff_util=$(xmlstarlet sel -t -v "/profile/AreaEstimates/Resources/FF" $xml_file) ff_avail=$(xmlstarlet sel -t -v "/profile/AreaEstimates/AvailableResources/FF" $xml_file) ff_percent=$((($ff_util * 100) / $ff_avail)) ff_str="$ff_util($ff_percent%)" II=$(grep -m 1 -oP '(?<=).*?(?=<\/PipelineII>)' $xml_file) time=${execution_times[$kernel,$size]} time_str="$time s" printf "%-20s %-25s %-20s %-20s %-20s %-15s %-15s\n" "${kernel}_${size}" "${latency_str}" "$dsp_str" "$ff_str" "$lut_str" "$II" "$time_str" >> $result_file done printf "\n" >> $result_file done printf "\n" >> $result_file printf "\n" >> $result_file printf ">>> Results for TABLE V and TABLE VII: \n" >> $result_file printf "\n" >> $result_file printf "%-20s %-25s %-20s %-20s %-20s %-15s %-15s\n" "Kernel" "Latency" "DSP" "FF" "LUT" "II" "Execution Time">> $result_file kernels=("vgg16" "resnet") sizes=(512) for kernel in "${kernels[@]}" do for size in "${sizes[@]}" do xml_file="samples/${kernel}/test_${kernel}_${size}/test_${kernel}_${size}/syn/report/csynth.xml" baseline=${baseline_latency[$kernel,$size]} best_case_latency=$(xmlstarlet sel -t -v "/profile/PerformanceEstimates/SummaryOfOverallLatency/Best-caseLatency" $xml_file) acceleration=$(awk "BEGIN {printf \"%.1f\", $baseline / $best_case_latency}") acceleration_str="$acceleration x" latency_str="$best_case_latency($acceleration_str)" lut_util=$(xmlstarlet sel -t -v "/profile/AreaEstimates/Resources/LUT" $xml_file) lut_avail=$(xmlstarlet sel -t -v "/profile/AreaEstimates/AvailableResources/LUT" $xml_file) lut_percent=$((($lut_util * 100) / $lut_avail)) lut_str="$lut_util($lut_percent%)" dsp_util=$(xmlstarlet sel -t -v "/profile/AreaEstimates/Resources/DSP" $xml_file) dsp_avail=$(xmlstarlet sel -t -v "/profile/AreaEstimates/AvailableResources/DSP" $xml_file) dsp_percent=$((($dsp_util * 100) / $dsp_avail)) dsp_str="$dsp_util($dsp_percent%)" ff_util=$(xmlstarlet sel -t -v "/profile/AreaEstimates/Resources/FF" $xml_file) ff_avail=$(xmlstarlet sel -t -v "/profile/AreaEstimates/AvailableResources/FF" $xml_file) ff_percent=$((($ff_util * 100) / $ff_avail)) ff_str="$ff_util($ff_percent%)" II=$(grep -m 1 -oP '(?<=).*?(?=<\/PipelineII>)' $xml_file) time=${execution_times[$kernel,$size]} time_str="$time s" printf "%-20s %-25s %-20s %-20s %-20s %-15s %-15s\n" "${kernel}_${size}" "${latency_str}" "$dsp_str" "$ff_str" "$lut_str" "$II" "$time_str">> $result_file done done kernels=("edgeDetect" "gaussian" "blur" "jacobi" "jacobi2d" "heat" "seidel") sizes=(4096) for kernel in "${kernels[@]}" do for size in "${sizes[@]}" do xml_file="samples/${kernel}/test_${kernel}_${size}/test_${kernel}_${size}/syn/report/csynth.xml" baseline=${baseline_latency[$kernel,$size]} best_case_latency=$(xmlstarlet sel -t -v "/profile/PerformanceEstimates/SummaryOfOverallLatency/Best-caseLatency" $xml_file) acceleration=$(awk "BEGIN {printf \"%.1f\", $baseline / $best_case_latency}") acceleration_str="$acceleration x" latency_str="$best_case_latency($acceleration_str)" lut_util=$(xmlstarlet sel -t -v "/profile/AreaEstimates/Resources/LUT" $xml_file) lut_avail=$(xmlstarlet sel -t -v "/profile/AreaEstimates/AvailableResources/LUT" $xml_file) lut_percent=$((($lut_util * 100) / $lut_avail)) lut_str="$lut_util($lut_percent%)" dsp_util=$(xmlstarlet sel -t -v "/profile/AreaEstimates/Resources/DSP" $xml_file) dsp_avail=$(xmlstarlet sel -t -v "/profile/AreaEstimates/AvailableResources/DSP" $xml_file) dsp_percent=$((($dsp_util * 100) / $dsp_avail)) dsp_str="$dsp_util($dsp_percent%)" ff_util=$(xmlstarlet sel -t -v "/profile/AreaEstimates/Resources/FF" $xml_file) ff_avail=$(xmlstarlet sel -t -v "/profile/AreaEstimates/AvailableResources/FF" $xml_file) ff_percent=$((($ff_util * 100) / $ff_avail)) ff_str="$ff_util($ff_percent%)" II=$(grep -m 1 -oP '(?<=).*?(?=<\/PipelineII>)' $xml_file) time=${execution_times[$kernel,$size]} time_str="$time s" printf "%-20s %-25s %-20s %-20s %-20s %-15s %-15s\n" "${kernel}_${size}" "${latency_str}" "$dsp_str" "$ff_str" "$lut_str" "$II" "$time_str">> $result_file done done printf "\n" >> $result_file printf ">>> Notes:\n" >> $result_file printf "1. The Resnet speedup may be slightly different from the speedup in the paper. This is because we have modified some of the codegen methods and the overall latency is affected: we use fewer resources and achieve a slightly lower speedup. Note that the speedup of VGG-16 is sightly better than the speedup in the paper. \n" >> $result_file printf "2. We are improving the optimization strategies for loops with small problem sizes and a final strategy for them have not been decided yet. So some of the small-problem-size results may be slightly different from the results in the paper. \n" >> $result_file printf "\n" >> $result_file printf "////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////\n" >> $result_file #printf "////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////\n" >> $result_file end_time=$(date +"%s") execution_time=$(($end_time - $start_time)) echo "" echo ">>> Step 5 has been finished!" echo ">>> Step 5 Total Execution Time: $execution_time seconds" echo "" echo "The experimental results are collected in experimental_results.csv!" ================================================ FILE: run-code.sh ================================================ #!/usr/bin/env bash start=$(date +"%s") echo "" echo ">>> Step 2. Compiling the object files and Generating the optimized HLS C code..." echo "" cd build if [ -f "execution_times.txt" ]; then rm "execution_times.txt" fi targets=("edgeDetect" "gaussian" "blur" "vgg16" "resnet" "jacobi" "jacobi2d" "heat" "seidel") for target in "${targets[@]}" do cmake --build . --target "$target" done # Run building. targets=("vgg16" "resnet") for target in "${targets[@]}" do start_time=$(date +%s.%N) ./bin/"$target" mlir_file="../samples/${target%.*}/test_${target%.*}.mlir" cpp_file="../samples/${target%.*}/test_${target%.*}.cpp" ../scalehls/build/bin/scalehls-opt $mlir_file \ --scalehls-func-preprocess="top-func=test_${target%.*}" \ --cse -canonicalize \ --scalehls-qor-estimation="target-spec=../samples/config.json" \ | ../scalehls/build/bin/scalehls-translate -emit-hlscpp > $cpp_file end_time=$(date +%s.%N) execution_time=$(echo "$end_time - $start_time" | bc) echo "time[$target,512] $execution_time" >> execution_times.txt echo "The HLS C code of $target: test_${target%.*}.cpp is generated!" echo "" done targets=("edgeDetect" "gaussian" "blur" "jacobi" "jacobi2d" "heat" "seidel") N_value=4096 for target in "${targets[@]}" do start_time=$(date +%s.%N) ./bin/"$target" mlir_file="../samples/${target%.*}/test_${target%.*}_$N_value.mlir" cpp_file="../samples/${target%.*}/test_${target%.*}_$N_value.cpp" ../scalehls/build/bin/scalehls-opt $mlir_file \ --scalehls-func-preprocess="top-func=test_${target%.*}_$N_value" \ --cse -canonicalize \ | ../scalehls/build/bin/scalehls-translate -emit-hlscpp > $cpp_file end_time=$(date +%s.%N) execution_time=$(echo "$end_time - $start_time" | bc) echo "time[$target,4096] $execution_time" >> execution_times.txt echo "The HLS C code of $target: test_${target%.*}_$N_value.cpp is generated!" echo "" done # code_file="../samples/seidel/test_seidel_4096.cpp" # target_file="../samples/seidel/test_seidel_4096.cpp" file_path="../samples/seidel/test_seidel_4096.cpp" # Insert lines after the third for loop sed -i '/for (int v5 = max(0, ((v4 \/ 2) - 2046)); v5 < min(4094, (v4 \/ 2)); v5 += 1) {/a #pragma HLS PIPELINE II=1\n#pragma HLS LOOP_TRIPCOUNT avg=1366 max=1366 min=1366' "$file_path" # Insert line before the first for loop sed -i '/for (int v3 = 0; v3 < 4096; v3 += 1) {/i #pragma HLS DEPENDENCE dependent=false type=inter variable=v1' "$file_path" echo "Lines inserted successfully into the file." N_values=(32 64 128 256 512 1024 2048 4096 8192) targets=("2mm.cpp" "3mm.cpp" "gemm.cpp" "bicg.cpp" "gesummv.cpp") execute_command() { source_file=$1 N=$2 sed -i "s/#define N 4096/#define N $N/" ../testbench/$source_file cmake --build . --target ${source_file%.*} start_time=$(date +%s.%N) ./bin/${source_file%.*} mlir_file="../samples/${source_file%.*}/test_${source_file%.*}_${N}.mlir" cpp_file="../samples/${source_file%.*}/test_${source_file%.*}_${N}.cpp" ../scalehls/build/bin/scalehls-opt $mlir_file \ --scalehls-func-preprocess="top-func=test_${source_file%.*}_${N}" \ --cse -canonicalize \ --scalehls-qor-estimation="target-spec=../samples/config.json" \ | ../scalehls/build/bin/scalehls-translate -emit-hlscpp > $cpp_file sed -i "s/#define N [0-9]*/#define N 4096/" ../testbench/$source_file end_time=$(date +%s.%N) execution_time=$(echo "$end_time - $start_time" | bc) echo "time[${source_file%.*},$N] $execution_time" >> execution_times.txt echo "" echo "The HLS C code of $source_file: test_${source_file%.*}_${N}.cpp is generated!" echo "" } max_parallel=1 for N in "${N_values[@]}" do for source_file in "${targets[@]}" do execute_command "$source_file" "$N" & if (( $(jobs | wc -l) >= $max_parallel )); then wait -n fi done done wait end=$(date +"%s") execution=$(($end - $start)) echo "" echo ">>> Step 2 has been finished!" echo ">>> Step 2 Total Execution Time: $execution seconds" echo "" ================================================ FILE: samples/config.json ================================================ { "__max_init_parallel": "The maximum loop parallelism in the initial sampling", "max_init_parallel": 32, "__max_expl_parallel": "The maximum loop parallelism in the exploration", "max_expl_parallel": 128, "__max_loop_parallel": "The maximum unroll factor of each loop", "max_loop_parallel": 16, "__max_iter_num": "The maximum iteration number in the exploration", "frequency": "100MHz", "dsp": 220, "bram": 180, "dsp_usage": { "fadd": 2, "fmul": 3, "fdiv": 0, "fcmp": 0, "fexp": 7 }, "100MHz": { "fadd": 4, "fmul": 3, "fdiv": 15, "fcmp": 1, "fexp": 8, "iadd": 1, "imul": 2, "iadd_delay": 1.8, "imul_delay": 6.91, "fmul_delay": 5.7, "fdiv_delay": 6.07, "fcmp_delay": 6.4, "fexp_delay": 7.68 } } ================================================ FILE: tcl-gen.sh ================================================ #!/bin/bash start_time=$(date +"%s") echo "" echo ">>> Step 3. Generating scripts for running Vitis_HLS..." echo "" examples=("vgg16" "resnet") sizes=(512) for example in "${examples[@]}" do for size in "${sizes[@]}" do script_name="script_${size}.tcl" cat > "samples/${example}/${script_name}" < "samples/${example}/${script_name}" <> "samples/${example}/${script_name}" # fi # echo "close_project" >> "samples/${example}/${script_name}" # echo "exit" >> "samples/${example}/${script_name}" # done # done for example in "${examples[@]}" do for size in "${sizes[@]}" do script_name="script_${size}.tcl" cat > "samples/${example}/${script_name}" < "samples/${example}/${script_name}" <> "samples/${example}/${script_name}" fi echo "close_project" >> "samples/${example}/${script_name}" echo "exit" >> "samples/${example}/${script_name}" done done examples=("edgeDetect" "gaussian" "blur" "jacobi" "jacobi2d" "heat" "seidel") sizes=(4096) for example in "${examples[@]}" do for size in "${sizes[@]}" do script_name="script_${size}.tcl" cat > "samples/${example}/${script_name}" <>> Step 3 has been finished!" # echo "" end_time=$(date +"%s") execution_time=$(($end_time - $start_time)) echo "" echo ">>> Step 3 has been finished!" echo ">>> Step 3 Total Execution Time: $execution_time seconds" echo "" ================================================ FILE: testbench/2mm.cpp ================================================ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "expr.h" #include "compute.h" #include "function.h" #include "core.h" // #include "mlir/IR/Attributes.h" #define N 4096 using namespace std; using namespace polyfp; int main(){ std::string name = "test_2mm_"+std::to_string(N); init(name); auto *fct = global::get_implicit_function(); var i("i", 0 ,N); var j("j", 0 ,N); var k("k", 0 ,N); placeholder A("A",{N,N},p_float32); placeholder B("B",{N,N},p_float32); placeholder C("C",{N,N},p_float32); placeholder D("D",{N,N},p_float32); placeholder temp("temp",{N,N},p_float32); constant alpha(1.6); constant beta(3.7); constant scalar(3.7); compute s_1("s_1",{i,j},scalar,temp(i,j)); compute s_2("s_2",{i,j,k},temp(i,j)+alpha*A(i,k)*B(k,j),temp(i,j)); compute s_3("s_3",{i,j},D(i,j)*beta,D(i,j)); compute s_4("s_4",{i,j,k},D(i,j)+temp(i,k)*C(k,j),D(i,j)); s_2.after(s_1,-1); s_3.after(s_2,-1); s_4.after(s_3,-1); // var i0("i0"), j0("j0"),k0("k0"), i1("i1"), j1("j1"),k1("k1"); // s_2.tile(k,i,j,1,2,16,i0, j0, k0, i1, j1,k1); // s_4.tile(k,j,i,1,2,16,i0, j0, k0, i1, j1,k1); // s_2.unroll(k1,-1); // s_2.unroll(j1,-1); // s_2.unroll(i1,-1); // s_4.unroll(k1,-1); // s_4.unroll(j1,-1); // s_4.unroll(i1,-1); // s_1.pipeline(j,1); // s_1.pipeline(j,1); // s_2.pipeline(j,1); // s_3.pipeline(j,1); // s_4.pipeline(j,1); // A.partition({16,1},"cyclic"); // B.partition({1,2},"cyclic"); // C.partition({1,2},"cyclic"); // D.partition({16,2},"cyclic"); // temp.partition({16,2},"cyclic"); std::string pwd = std::filesystem::current_path().parent_path(); std::string path = pwd+"/samples/2mm/"; fct->auto_DSE(path); // codegen(); } ================================================ FILE: testbench/3mm.cpp ================================================ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "expr.h" #include "compute.h" #include "function.h" #include "core.h" // #include "mlir/IR/Attributes.h" #define N 4096 using namespace std; using namespace polyfp; int main(){ std::string name = "test_3mm_"+std::to_string(N); init(name); auto *fct = global::get_implicit_function(); var i("i", 0 ,N); var j("j", 0 ,N); var k("k", 0 ,N); placeholder A("A",{N,N},p_float32); placeholder B("B",{N,N},p_float32); placeholder C("C",{N,N},p_float32); placeholder D("D",{N,N},p_float32); placeholder E("E",{N,N},p_float32); placeholder F("F",{N,N},p_float32); placeholder G("G",{N,N},p_float32); constant scalar(0); compute s_1("s_1",{i,j},scalar,E(i,j)); compute s_2("s_2",{i,j,k},E(i,j)+A(i,k)*B(k,j),E(i,j)); compute s_3("s_3",{i,j},scalar,F(i,j)); compute s_4("s_4",{i,j,k},F(i,j)+C(i,k)*D(k,j),F(i,j)); compute s_5("s_5",{i,j},scalar,G(i,j)); compute s_6("s_6",{i,j,k},G(i,j)+E(i,k)*F(k,j),G(i,j)); var i0("i0"), j0("j0"),k0("k0"), i1("i1"), j1("j1"),k1("k1"); s_2.after(s_1,-1); s_3.after(s_2,-1); s_4.after(s_3,-1); s_5.after(s_4,-1); s_6.after(s_5,-1); // s_3.after(s_1,j); // s_2.after(s_1,-1); // s_4.after(s_2,k); // s_2.tile(k,j,i,1,1,16,i0, j0, k0, i1, j1,k1); // s_6.tile(j,i,2,16,i0, j0, k0, i1, j1,k1); // s_2.unroll(k1,-1); // s_2.unroll(j1,-1); // s_2.unroll(i1,-1); // s_4.unroll(k1,-1); // s_4.unroll(j1,-1); // s_4.unroll(i1,-1); // s_2.pipeline(k0,1); // s_4.pipeline(k0,1); // A.partition({16,2},"cyclic"); // B.partition({2,2},"cyclic"); // C.partition({2,2},"cyclic"); // D.partition({16,2},"cyclic"); // temp.partition({16,2},"cyclic"); std::string pwd = std::filesystem::current_path().parent_path(); std::string path = pwd+"/samples/3mm/"; fct->auto_DSE(path); // codegen(); } ================================================ FILE: testbench/bicg.cpp ================================================ #include "expr.h" #include "compute.h" #include "function.h" #include "core.h" #define N 4096 #include using namespace std; using namespace polyfp; int main(){ std::string name = "test_bicg_"+std::to_string(N); init(name); auto *fct = global::get_implicit_function(); var i("i", 0 ,N); var j("j", 0 ,N); placeholder A("A",{N,N},p_float32); placeholder s("s",{N},p_float32); placeholder q("q",{N},p_float32); placeholder p("p",{N},p_float32); placeholder r("r",{N},p_float32); compute s_1("s_1",{i,j},s(j)+A(i,j)*r(i),s(j)); compute s_2("s_2",{i,j},q(i)+A(i,j)*p(j),q(i)); // compute s_2("s_2",{i,j},q(j)+A(j,i)*p(i),q(j)); // s_2.interchange(i,j); s_2.after(s_1,j); var i0("i0"), j0("j0"),k0("k0"), i1("i1"), j1("j1"),k1("k1"); // s_1.tile(i,j,1,32,i0, j0, i1, j1); // s_2.tile(i,j,1,32,i0, j0, i1, j1); // s_1.unroll(j1,-1); // s_2.unroll(j1,-1); // s_2.after(s_1,j1); // s_1.pipeline(j,1); // s.partition({32},"cyclic"); // q.partition({32},"cyclic"); // A.partition({16,16},"cyclic"); std::string pwd = std::filesystem::current_path().parent_path(); std::string path = pwd+"/samples/bicg/"; fct->auto_DSE(path); } ================================================ FILE: testbench/blur.cpp ================================================ #include "expr.h" #include "compute.h" #include "function.h" #include "core.h" #include #define N 4096 using namespace std; using namespace polyfp; int main(){ std::string name = "test_blur_"+std::to_string(N); init(name); auto *fct = global::get_implicit_function(); var i("i", 0 ,4094); var j("j", 0 ,4094); var c("c", 0 ,3); placeholder bx("bx",{N,N,3},p_float32); placeholder by("by",{N,N,3},p_float32); placeholder in("in",{N,N,3},p_float32); constant factor(3.0); compute s_1("s_1",{i,j,c},(in(i,j,c)+in(i,j+1,c)+in(i,j+2,c))/factor,bx(i,j,c)); compute s_2("s_2",{i,j,c},(bx(i,j,c)+bx(i+1,j,c)+bx(i+2,j,c))/factor,by(i,j,c)); var i0("i0"), j0("j0"),k0("k0"), i1("i1"), j1("j1"),k1("k1"); s_2.after(s_1,-1); std::string pwd = std::filesystem::current_path().parent_path(); std::string path = pwd+"/samples/blur/"; fct->auto_DSE(path); } ================================================ FILE: testbench/edgeDetect.cpp ================================================ #include "expr.h" #include "compute.h" #include "function.h" #include "core.h" #include using namespace std; using namespace polyfp; #define N 4096 int main(){ std::string name = "test_edgeDetect_"+std::to_string(N); init(name); auto *fct = global::get_implicit_function(); var i("i", 0 ,4094); var j("j", 0 ,4094); var c("c", 0 ,3); placeholder temp("temp",{4096,4096,3},p_float32); placeholder src("src",{4096,4096,3},p_float32); placeholder out("out",{4096,4096,3},p_float32); constant factor(8.0); compute s_1("s_1",{i,j,c},(src(i,j,c)+src(i,j+1,c)+src(i,j+2,c)+src(i+1,j,c)+src(i+1,j+2,c)+ src(i+2,j,c)+src(i+2,j+1,c)+src(i+2,j+2,c))/factor,temp(i,j,c)); compute s_2("s_2",{i,j,c},temp(i+1,j+1,c)-temp(i+2,j,c)+ temp(i+2,j+1,c)-temp(i+1,j,c),out(i,j,c)); s_2.after(s_1,-1); std::string pwd = std::filesystem::current_path().parent_path(); std::string path = pwd+"/samples/edgeDetect/"; fct->auto_DSE(path); } ================================================ FILE: testbench/gaussian.cpp ================================================ #include "expr.h" #include "compute.h" #include "function.h" #include "core.h" #include using namespace std; using namespace polyfp; #define N 4096 int main(){ std::string name = "test_gaussian_"+std::to_string(N); init(name); auto *fct = global::get_implicit_function(); var q("q", 0 ,4089); var w("w", 0 ,4089); var cc("cc", 0 ,3); var r("r", 0 ,7); var e("e", 0 ,7); placeholder temp("temp",{4096,4096,3},p_float32); placeholder src("src",{4096,4096,3},p_float32); placeholder conv("conv",{4096,4096,3},p_float32); placeholder kernelX("kernelX",{7},p_float32); placeholder kernelY("kernelY",{7},p_float32); constant scalar(0); compute s_1("s_1",{q,w,cc},scalar,temp(q,w,cc)); compute s_2("s_2",{q,w,cc},scalar,conv(q,w,cc)); compute s_3("s_3",{q,w,cc,r},temp(q,w,cc)+src(q + r,w,cc)*kernelX(r),temp(q,w,cc)); compute s_4("s_4",{q,w,cc,e},conv(q,w,cc)+temp(q,w+e,cc)*kernelY(e),conv(q,w,cc)); s_2.after(s_1,cc); s_3.after(s_1,-1); s_4.after(s_3,-1); std::string pwd = std::filesystem::current_path().parent_path(); std::string path = pwd+"/samples/gaussian/"; fct->auto_DSE(path); } ================================================ FILE: testbench/gemm.cpp ================================================ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "expr.h" #include "compute.h" #include "function.h" #include "core.h" // #include "mlir/IR/Attributes.h" #define N 4096 using namespace std; using namespace polyfp; int main(){ std::string name = "test_gemm_"+std::to_string(N); init(name); auto fct = global::get_implicit_function(); var i("i", 0 ,N); var j("j", 0 ,N); var k("k", 0 ,N); placeholder A("A",{N,N},p_float32); placeholder B("B",{N,N},p_float32); placeholder C("C",{N,N},p_float32); constant alpha; constant beta; compute s_1("s_1",{i,j},C(i,j)*beta,C(i,j)); compute s_2("s_2",{i,j,k},C(i,j)+alpha*A(i,k)*B(k,j),C(i,j)); var i0("i0"), j0("j0"),k0("k0"), i1("i1"), j1("j1"),k1("k1"); s_2.after(s_1,-1); // s_2.tile(k,j,i,2,2,16,i0, j0, k0, i1, j1,k1); // s_2.unroll(k1,-1); // s_2.unroll(j1,-1); // s_2.unroll(i1,-1); // s_1.pipeline(j,1); // s_2.pipeline(j,1); // s_1.tile(k,j,i,2,2,16,i0, j0, k0, i1, j1,k1); // s_1.unroll(k1,-1); // s_1.unroll(j1,-1); // s_1.unroll(i1,-1); // s_1.pipeline(k0,1); // s.tile(i, j, 4, 4, i0, j0, i1, j1); // A.partition({16,2},"cyclic"); // B.partition({2,2},"cyclic"); // C.partition({16,2},"cyclic"); // codegen(); std::string pwd = std::filesystem::current_path().parent_path(); std::string path = pwd+"/samples/gemm/"; fct->auto_DSE(path); } ================================================ FILE: testbench/gesummv.cpp ================================================ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "expr.h" #include "compute.h" #include "function.h" #include "core.h" // #include "mlir/IR/Attributes.h" #define N 4096 using namespace std; using namespace polyfp; int main(){ std::string name = "test_gesummv_"+std::to_string(N); init(name); auto *fct = global::get_implicit_function(); var i("i", 0 ,N); var j("j", 0 ,N); var k("k", 0 ,N); placeholder temp("temp",{N},p_float32); placeholder A("A",{N,N},p_float32); placeholder B("B",{N,N},p_float32); placeholder x("x",{N},p_float32); placeholder y("y",{N},p_float32); constant alpha(1.6); constant beta(3.7); compute s_1("s_1",{i,j},temp(i)+A(i,j)*x(j),temp(i)); compute s_2("s_2",{i,j},y(i)+B(i,j)*x(j),y(i)); compute s_3("s_3",{i},alpha*temp(i)+beta*y(i),y(i)); s_2.after(s_1,j); s_3.after(s_1,-1); std::string pwd = std::filesystem::current_path().parent_path(); std::string path = pwd+"/samples/gesummv/"; fct->auto_DSE(path); } ================================================ FILE: testbench/heat.cpp ================================================ #include "expr.h" #include "compute.h" #include "function.h" #include "core.h" #include using namespace std; using namespace polyfp; #define N 4096 int main(){ std::string name = "test_heat_"+std::to_string(N); init(name); auto *fct = global::get_implicit_function(); var i("i", 0 ,4094); var k("k", 0 ,4096); placeholder A("A",{4096},p_float32); placeholder B("B",{4096},p_float32); constant factor1(0.125); constant factor2(2.0); compute s_1("s_1",{k,i},(B(i)-factor2*B(i+1)+B(i+2))*factor1,A(i+1)); compute s_2("s_2",{k,i},A(i+1),B(i+1)); var i0("i0"), j0("j0"),k0("k0"), i1("i1"), j1("j1"),k1("k1"); s_2.after(s_1,k); std::string pwd = std::filesystem::current_path().parent_path(); std::string path = pwd+"/samples/heat/"; fct->auto_DSE(path); } ================================================ FILE: testbench/jacobi.cpp ================================================ #include "expr.h" #include "compute.h" #include "function.h" #include "core.h" #include using namespace std; using namespace polyfp; #define N 4096 int main(){ std::string name = "test_jacobi_"+std::to_string(N); init(name); auto *fct = global::get_implicit_function(); var i("i", 0 ,4094); var t("t", 0 ,4096); placeholder A("A",{4096},p_float32); placeholder B("B",{4096},p_float32); constant factor(0.33333); compute s_1("s_1",{t,i},(B(i)+B(i+1)+B(i+2))*factor,A(i+1)); compute s_2("s_2",{t,i},(A(i)+A(i+1)+A(i+2))*factor,B(i+1)); s_2.after(s_1,t); // Choice 1: Hand optimization for users with expertise var i0("i0"), j0("j0"),k0("k0"), i1("i1"), j1("j1"),k1("k1"); // s_1.tile(t,i,1,2,t0,i0,t1,i1); // s_2.tile(t,i,1,2,t0,i0,t1,i1); // s_1.pipeline(i0,1); // s_2.pipeline(i0,1); // s_1.unroll(i1,2); // s_2.unroll(i1,2); // A.partition({4},"cyclic"); // B.partition({4},"cyclic"); // codegen(); // Choice 2: Automatic DSE for users without expertise std::string pwd = std::filesystem::current_path().parent_path(); std::string path = pwd+"/samples/jacobi/"; fct->auto_DSE(path); } ================================================ FILE: testbench/jacobi2d.cpp ================================================ #include "expr.h" #include "compute.h" #include "function.h" #include "core.h" #include using namespace std; using namespace polyfp; #define N 4096 int main(){ std::string name = "test_jacobi2d_"+std::to_string(N); init(name); auto *fct = global::get_implicit_function(); var i("i", 0 ,4094); var j("j", 0 ,4094); // var j("j", 1 ,4095); var k("k", 0 ,4096); placeholder A("A",{4096,4096},p_float32); placeholder B("B",{4096,4096},p_float32); constant factor(0.2); compute s_1("s_1",{k,i,j},(A(i+1,j+1)+A(i+1,j)+A(i+1,j+2)+A(i+2,j+1)+A(i,j+1))*factor,B(i+1,j+1)); compute s_2("s_2",{k,i,j},(B(i+1,j+1)+B(i+1,j)+B(i+1,j+2)+B(i+2,j+1)+B(i,j+1))*factor,A(i+1,j+1)); var i0("i0"), j0("j0"),k0("k0"), i1("i1"), j1("j1"),k1("k1"); s_2.after(s_1,k); std::string pwd = std::filesystem::current_path().parent_path(); std::string path = pwd+"/samples/jacobi2d/"; fct->auto_DSE(path); } ================================================ FILE: testbench/resnet18.cpp ================================================ #include "expr.h" #include "compute.h" #include "function.h" #include "core.h" #include using namespace std; using namespace polyfp; // #define K 4 // Size of convolution filter ( FOut xFIn x K x K) // #define N 32 // DATA_SET // polyfp::expr pmax(polyfp::expr left, polyfp::expr right){ // return expr(polyfp::o_max, left, right); // } int main(){ init("resnet18"); auto *fct = global::get_implicit_function(); var o("o", 0 ,64); var y("y", 0 ,32); var x("x", 0 ,32); var i("i", 0 ,3); var p("p", 0 ,3); var q("q", 0 ,3); // Block 1.1 placeholder filter("filter",{64,3,3,3},p_float32); placeholder fo1("fo1",{64,32,32},p_float32);//{0,64,32,32} placeholder input("input",{3,32,32},p_float32); constant scalar(0,p_float32); // omit initialisation of input and filter compute s_1("s_1",{o,y,x},scalar,fo1(o,y,x)); compute s_2("s_2",{o,y,x,i,p,q},fo1(o,y,x)+input(i,y+p,x+q)*filter(o,i,p,q),fo1(o,y,x)); // ReLU = max() compute s_3("s_3",{o,y,x},p_max(fo1(o,y,x),scalar),fo1(o,y,x)); s_2.after(s_1,-1); s_3.after(s_2,-1); var i2("i2", 0 ,64); placeholder fo2("fo2",{64,32,32},p_float32);//{0,64,32,32} placeholder filter2("filter2",{64,64,3,3},p_float32); compute s_4("s_4",{o,y,x},scalar,fo2(o,y,x)); compute s_5("s_5",{o,y,x,i2,p,q},fo2(o,y,x)+fo1(i2,y+p,x+q)*filter2(o,i2,p,q),fo2(o,y,x)); // ReLU = max() compute s_6("s_6",{o,y,x},p_max(fo2(o,y,x),scalar),fo2(o,y,x)); s_4.after(s_3,x); s_5.after(s_4,-1); s_6.after(s_5,-1); placeholder fo3("fo3",{64,32,32},p_float32);//{0,64,32,32} compute s_7("s_7",{o,y,x},scalar,fo3(o,y,x)); compute s_8("s_8",{o,y,x,i2,p,q},fo3(o,y,x)+fo2(i2,y+p,x+q)*filter2(o,i2,p,q),fo3(o,y,x)); // Residual placeholder fo4("fo4",{64,32,32},p_float32); compute s_9("s_9",{o,y,x},fo3(o,y,x)+fo1(o,y,x),fo4(o,y,x)); // ReLU = max() compute s_10("s_10",{o,y,x},p_max(fo4(o,y,x),scalar),fo4(o,y,x)); s_7.after(s_6,x); s_8.after(s_7,-1); s_9.after(s_8,-1); s_10.after(s_9,-1); // Block 1.2 placeholder fo5("fo5",{64,32,32},p_float32);//{0,64,32,32} compute s_11("s_11",{o,y,x},scalar,fo5(o,y,x)); compute s_12("s_12",{o,y,x,i2,p,q},fo5(o,y,x)+fo4(i2,y+p,x+q)*filter2(o,i2,p,q),fo5(o,y,x)); // ReLU = max() compute s_13("s_13",{o,y,x},p_max(fo5(o,y,x),scalar),fo5(o,y,x)); s_11.after(s_10,x); s_12.after(s_11,-1); s_13.after(s_12,-1); placeholder fo6("fo6",{64,32,32},p_float32); compute s_14("s_14",{o,y,x},scalar,fo6(o,y,x)); compute s_15("s_15",{o,y,x,i2,p,q},fo6(o,y,x)+fo5(i2,y+p,x+q)*filter2(o,i2,p,q),fo6(o,y,x)); // Residual placeholder fo7("fo7",{64,32,32},p_float32); compute s_16("s_16",{o,y,x},fo6(o,y,x)+fo4(o,y,x),fo7(o,y,x)); // ReLU = max() compute s_17("s_17",{o,y,x},p_max(fo7(o,y,x),scalar),fo7(o,y,x)); s_14.after(s_13,x); s_15.after(s_14,-1); s_16.after(s_15,-1); s_17.after(s_16,-1); // Block 2.1 var o2("o2", 0 ,128); var y2("y2", 0 ,16); var x2("x2", 0 ,16); placeholder fo8("fo8",{128,16,16},p_float32);//{0,64,32,32} placeholder filter3("filter3",{128,64,3,3},p_float32); compute s_18("s_18",{o2,y2,x2},scalar,fo8(o2,y2,x2)); compute s_19("s_19",{o2,y2,x2,i2,p,q},fo8(o2,y2,x2)+fo7(i2,y2*2+p,x2*2+q)*filter3(o2,i2,p,q),fo8(o2,y2,x2)); // ReLU = max() compute s_20("s_20",{o2,y2,x2},p_max(fo8(o2,y2,x2),scalar),fo8(o2,y2,x2)); s_18.after(s_17,-1); s_19.after(s_18,-1); s_20.after(s_19,-1); var i3("i3", 0 ,128); placeholder fo9("fo9",{128,16,16},p_float32);//{0,64,32,32} placeholder filter4("filter4",{128,128,3,3},p_float32); compute s_21("s_21",{o2,y2,x2},scalar,fo9(o2,y2,x2)); compute s_22("s_22",{o2,y2,x2,i3,p,q},fo9(o2,y2,x2)+fo8(i3,y2+p,x2+q)*filter4(o2,i3,p,q),fo9(o2,y2,x2)); // transform placeholder fo10("fo10",{128,16,16},p_float32);//{0,64,32,32} placeholder temp1("temp1",{128,64},p_float32); compute s_23("s_23",{o2,y2,x2,i2},fo7(i2,y2*2,x2*2)*temp1(o2,i2)+fo10(o2,y2,x2),fo10(o2,y2,x2)); // Residual placeholder fo11("fo11",{128,16,16},p_float32);//{0,64,32,32} compute s_24("s_24",{o2,y2,x2},fo10(o2,y2,x2)+fo9(o2,y2,x2),fo11(o2,y2,x2)); // ReLU = max() compute s_25("s_25",{o2,y2,x2},p_max(fo11(o2,y2,x2),scalar),fo11(o2,y2,x2)); s_21.after(s_20,x2); s_22.after(s_21,-1); s_23.after(s_22,-1); s_24.after(s_23,-1); s_25.after(s_24,-1); // Block 2.2 placeholder fo12("fo12",{128,16,16},p_float32);//{0,64,32,32} compute s_26("s_26",{o2,y2,x2},scalar,fo12(o2,y2,x2)); compute s_27("s_27",{o2,y2,x2,i3,p,q},fo12(o2,y2,x2)+fo11(i3,y2+p,x2+q)*filter4(o2,i3,p,q),fo12(o2,y2,x2)); // ReLU = max() compute s_28("s_28",{o2,y2,x2},p_max(fo12(o2,y2,x2),scalar),fo12(o2,y2,x2)); placeholder fo13("fo13",{128,16,16},p_float32);//{0,64,32,32} compute s_29("s_29",{o2,y2,x2},scalar,fo13(o2,y2,x2)); compute s_30("s_30",{o2,y2,x2,i3,p,q},fo13(o2,y2,x2)+fo12(i3,y2+p,x2+q)*filter4(o2,i3,p,q),fo13(o2,y2,x2)); // Residual placeholder fo14("fo14",{128,16,16},p_float32);//{0,64,32,32} compute s_31("s_31",{o2,y2,x2},fo13(o2,y2,x2)+fo11(o2,y2,x2),fo14(o2,y2,x2)); // ReLU = max() compute s_32("s_32",{o2,y2,x2},p_max(fo14(o2,y2,x2),scalar),fo14(o2,y2,x2)); s_26.after(s_25,x2); s_27.after(s_26,-1); s_28.after(s_27,-1); s_29.after(s_28,x2); s_30.after(s_29,-1); s_31.after(s_30,-1); s_32.after(s_31,-1); // Block 3.1 var o3("o3", 0 ,256); var y3("y3", 0 ,8); var x3("x3", 0 ,8); placeholder fo15("fo15",{256,8,8},p_float32);//{0,64,32,32} placeholder filter5("filter5",{256,128,3,3},p_float32); compute s_33("s_33",{o3,y3,x3},scalar,fo15(o3,y3,x3)); compute s_34("s_34",{o3,y3,x3,i3,p,q},fo15(o3,y3,x3)+fo14(i3,y3*2+p,x3*2+q)*filter5(o3,i3,p,q),fo15(o3,y3,x3)); // ReLU = max() compute s_35("s_35",{o3,y3,x3},p_max(fo15(o3,y3,x3),scalar),fo15(o3,y3,x3)); s_33.after(s_32,-1); s_34.after(s_33,-1); s_35.after(s_34,-1); var i4("i4", 0 ,256); placeholder fo16("fo16",{256,8,8},p_float32);//{0,64,32,32} placeholder filter6("filter6",{256,256,3,3},p_float32); compute s_36("s_36",{o3,y3,x3},scalar,fo16(o3,y3,x3)); compute s_37("s_37",{o3,y3,x3,i4,p,q},fo16(o3,y3,x3)+fo15(i4,y3+p,x3+q)*filter6(o3,i4,p,q),fo16(o3,y3,x3)); // transform placeholder fo17("fo17",{256,8,8},p_float32);//{0,64,32,32} placeholder temp2("temp2",{256,128},p_float32); compute s_38("s_38",{o3,y3,x3,i3},fo14(i3,y3*2,x3*2)*temp2(o3,i3)+fo17(o3,y3,x3),fo17(o3,y3,x3)); // Residual placeholder fo18("fo18",{256,8,8},p_float32);//{0,64,32,32} compute s_39("s_39",{o3,y3,x3},fo17(o3,y3,x3)+fo16(o3,y3,x3),fo18(o3,y3,x3)); // ReLU = max() compute s_40("s_40",{o3,y3,x3},p_max(fo18(o3,y3,x3),scalar),fo18(o3,y3,x3)); s_36.after(s_35,x3); s_37.after(s_36,-1); s_38.after(s_37,-1); s_39.after(s_38,-1); s_40.after(s_39,-1); // Block 3.2 placeholder fo19("fo19",{256,8,8},p_float32);//{0,64,32,32} compute s_41("s_41",{o3,y3,x3},scalar,fo19(o3,y3,x3)); compute s_42("s_42",{o3,y3,x3,i4,p,q},fo19(o3,y3,x3)+fo18(i4,y3+p,x3+q)*filter6(o3,i4,p,q),fo19(o3,y3,x3)); // ReLU = max() compute s_43("s_43",{o3,y3,x3},p_max(fo19(o3,y3,x3),scalar),fo19(o3,y3,x3)); placeholder fo20("fo20",{256,8,8},p_float32);//{0,64,32,32} compute s_44("s_44",{o3,y3,x3},scalar,fo20(o3,y3,x3)); compute s_45("s_45",{o3,y3,x3,i4,p,q},fo20(o3,y3,x3)+fo19(i4,y3+p,x3+q)*filter6(o3,i4,p,q),fo20(o3,y3,x3)); // Residual placeholder fo21("fo21",{256,8,8},p_float32);//{0,64,32,32} compute s_46("s_46",{o3,y3,x3},fo20(o3,y3,x3)+fo18(o3,y3,x3),fo21(o3,y3,x3)); // ReLU = max() compute s_47("s_47",{o3,y3,x3},p_max(fo21(o3,y3,x3),scalar),fo21(o3,y3,x3)); s_41.after(s_40,x3); s_42.after(s_41,-1); s_43.after(s_42,-1); s_44.after(s_43,-1); s_45.after(s_44,-1); s_46.after(s_45,-1); s_47.after(s_46,-1); // Block 4.1 var o4("o4", 0 ,512); var y4("y4", 0 ,4); var x4("x4", 0 ,4); placeholder fo22("fo22",{512,4,4},p_float32);//{0,64,32,32} placeholder filter7("filter7",{512,256,3,3},p_float32); compute s_48("s_48",{o4,y4,x4},scalar,fo22(o4,y4,x4)); compute s_49("s_49",{o4,y4,x4,i4,p,q},fo22(o4,y4,x4)+fo21(i4,y4*2+p,x4*2+q)*filter7(o4,i4,p,q),fo22(o4,y4,x4)); // ReLU = max() compute s_50("s_50",{o4,y4,x4},p_max(fo22(o4,y4,x4),scalar),fo22(o4,y4,x4)); s_48.after(s_47,-1); s_49.after(s_48,-1); s_50.after(s_49,-1); var i5("i5", 0 ,512); placeholder fo23("fo23",{512,4,4},p_float32);//{0,64,32,32} placeholder filter8("filter8",{512,512,3,3},p_float32); compute s_51("s_51",{o4,y4,x4},scalar,fo23(o4,y4,x4)); compute s_52("s_52",{o4,y4,x4,i5,p,q},fo23(o4,y4,x4)+fo22(i5,y4+p,x4+q)*filter8(o4,i5,p,q),fo23(o4,y4,x4)); // transform placeholder fo24("fo24",{512,4,4},p_float32);//{0,64,32,32} placeholder temp3("temp3",{512,256},p_float32); compute s_53("s_53",{o4,y4,x4,i4},fo21(i4,y4*2,x4*2)*temp3(o4,i4)+fo24(o4,y4,x4),fo24(o4,y4,x4)); // Residual placeholder fo25("fo25",{512,4,4},p_float32);//{0,64,32,32} compute s_54("s_54",{o4,y4,x4},fo24(o4,y4,x4)+fo23(o4,y4,x4),fo25(o4,y4,x4)); // ReLU = max() compute s_55("s_55",{o4,y4,x4},p_max(fo25(o4,y4,x4),scalar),fo25(o4,y4,x4)); s_51.after(s_50,x4); s_52.after(s_51,-1); s_53.after(s_52,-1); s_54.after(s_53,-1); s_55.after(s_54,-1); // Block 4.2 placeholder fo26("fo26",{512,4,4},p_float32);//{0,64,32,32} compute s_56("s_56",{o4,y4,x4},scalar,fo26(o4,y4,x4)); compute s_57("s_57",{o4,y4,x4,i5,p,q},fo26(o4,y4,x4)+fo25(i5,y4+p,x4+q)*filter8(o4,i5,p,q),fo26(o4,y4,x4)); // ReLU = max() compute s_58("s_58",{o4,y4,x4},p_max(fo26(o4,y4,x4),scalar),fo26(o4,y4,x4)); placeholder fo27("fo27",{512,4,4},p_float32);//{0,64,32,32} compute s_59("s_59",{o4,y4,x4},scalar,fo27(o4,y4,x4)); compute s_60("s_60",{o4,y4,x4,i5,p,q},fo27(o4,y4,x4)+fo26(i5,y4+p,x4+q)*filter8(o4,i5,p,q),fo27(o4,y4,x4)); // Residual placeholder fo28("fo28",{512,4,4},p_float32);//{0,64,32,32} compute s_61("s_61",{o4,y4,x4},fo27(o4,y4,x4)+fo25(o4,y4,x4),fo28(o4,y4,x4)); // ReLU = max() compute s_62("s_62",{o4,y4,x4},p_max(fo28(o4,y4,x4),scalar),fo28(o4,y4,x4)); s_56.after(s_55,x4); s_57.after(s_56,-1); s_58.after(s_57,-1); s_59.after(s_58,x4); s_60.after(s_59,-1); s_61.after(s_60,-1); s_62.after(s_61,-1); fct->auto_DSE_loop_transformation(); int count=0; for(auto &comp: fct->leader_computations){ auto iterators = comp->get_iteration_variables(); int size = iterators.size(); if(size>=6){ comp->apply_opt_strategy({4,1,1}); } if(size==4){ if(count!=0){ comp->apply_opt_strategy({2,1,1}); count+=1; } count+=1; } } std::string pwd = std::filesystem::current_path().parent_path(); std::string path = pwd+"/samples/resnet18/"; fct->dump_schedule(path); } ================================================ FILE: testbench/seidel.cpp ================================================ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "expr.h" #include "compute.h" #include "function.h" #include "core.h" // #include "mlir/IR/Attributes.h" using namespace std; using namespace polyfp; int main(){ init("test_seidel_4096"); auto *fct = global::get_implicit_function(); var i("i", 0 ,4094); var j("j", 0 ,4094); var k("k", 0 ,4096); placeholder A("A",{4096,4096},p_float32); placeholder B("B",{4096,4096},p_float32); constant factor(9); compute s_1("s_1",{k,i,j},(A(i,j+1)+A(i,j)+A(i,j+2)+A(i+1,j)+A(i+1,j+1)+A(i+1,j+2)+A(i+2,j)+A(i+2,j+1)+A(i+2,j+2))/factor,A(i+1,j+1)); var i0("i0"), j0("j0"),k0("k0"), i1("i1"), j1("j1"),k1("k1"); std::string pwd = std::filesystem::current_path().parent_path(); std::string path = pwd+"/samples/seidel/"; fct->auto_DSE(path); } } ================================================ FILE: testbench/vgg16.cpp ================================================ #include "expr.h" #include "compute.h" #include "function.h" #include "core.h" #include using namespace std; using namespace polyfp; #define K 4 // Size of convolution filter ( FOut xFIn x K x K) #define N 32 // DATA_SET int main(){ init("test_vgg16"); auto *fct = global::get_implicit_function(); var o("o", 0 ,64); var y("y", 0 ,32); var x("x", 0 ,32); var i("i", 0 ,3); var p("p", 0 ,3); var q("q", 0 ,3); placeholder filter("filter",{64,3,3,3},p_float32); placeholder fo1("fo1",{64,32,32},p_float32);//{0,64,32,32} placeholder input("input",{3,34,34},p_float32); // placeholder temp("temp",{3,32,32},p_float32); // placeholder relu("relu",{64,32,32},p_float32); // placeholder input1("input1",{64,34,34},p_float32); constant scalar(0,p_float32); // omit initialisation of input and filter compute s_1("s_1",{o,y,x},scalar,fo1(o,y,x)); compute s_2("s_2",{o,y,x,i,p,q},fo1(o,y,x)+input(i,y+p,x+q)*filter(o,i,p,q),fo1(o,y,x)); // ReLU = max() compute s_3("s_3",{o,y,x},p_max(fo1(o,y,x),scalar),fo1(o,y,x)); s_2.after(s_1,-1); s_3.after(s_2,-1); var i2("i2", 0 ,64); placeholder fo2("fo2",{64,32,32},p_float32);//{0,64,32,32} placeholder filter2("filter2",{64,64,3,3},p_float32); compute s_4("s_4",{o,y,x},scalar,fo2(o,y,x)); compute s_5("s_5",{o,y,x,i2,p,q},fo2(o,y,x)+fo1(i2,y+p,x+q)*filter2(o,i2,p,q),fo2(o,y,x)); // ReLU = max() compute s_6("s_6",{o,y,x},p_max(fo2(o,y,x),scalar),fo2(o,y,x)); s_4.after(s_3,x); s_5.after(s_4,-1); s_6.after(s_5,-1); var y2("y2", 0 ,16); var x2("x2", 0 ,16); placeholder fo3("fo3",{64,16,16},p_float32);//{0,64,32,32} compute s_7("s_7",{o,y2,x2},scalar,fo3(o,y2,x2)); // o3(o,y2,x2) = Max(o2(o,y2*2+p,x2*2+q), o3(o,y2,x2)); compute s_8("s_8",{o,y2,x2,p,q},fo2(o,y2*2+p,x2*2+q),fo3(o,y2,x2)); s_7.after(s_6,-1); s_8.after(s_7,-1); // Block 2 var o2("o2", 0 ,128); placeholder fo4("fo4",{128,16,16},p_float32);//{0,64,32,32} placeholder filter3("filter3",{128,64,3,3},p_float32); compute s_9("s_9",{o2,y2,x2},scalar,fo4(o2,y2,x2)); compute s_10("s_10",{o2,y2,x2,i2,p,q},fo4(o2,y2,x2)+fo3(i2,y2+p,x2+q)*filter3(o2,i2,p,q),fo4(o2,y2,x2)); //ReLu = max() compute s_11("s_11",{o2,y2,x2},p_max(fo4(o2,y2,x2),scalar),fo4(o2,y2,x2)); s_9.after(s_8,-1); s_10.after(s_9,-1); s_11.after(s_10,-1); var i3("i3", 0 ,128); placeholder fo5("fo5",{128,16,16},p_float32);//{0,64,32,32} placeholder filter4("filter4",{128,128,3,3},p_float32); compute s_12("s_12",{o2,y2,x2},scalar,fo5(o2,y2,x2)); compute s_13("s_13",{o2,y2,x2,i3,p,q},fo5(o2,y2,x2)+fo4(i3,y2+p,x2+q)*filter4(o2,i3,p,q),fo5(o2,y2,x2)); //ReLu = max() compute s_14("s_14",{o2,y2,x2},p_max(fo5(o2,y2,x2),scalar),fo5(o2,y2,x2)); s_12.after(s_11,x2); s_13.after(s_12,-1); s_14.after(s_13,-1); var y3("y3", 0 ,8); var x3("x3", 0 ,8); placeholder fo6("fo6",{128,8,8},p_float32);//{0,64,32,32} compute s_15("s_15",{o2,y3,x3},scalar,fo6(o2,y3,x3)); // o3(o,y2,x2) = Max(o2(o,y2*2+p,x2*2+q), o3(o,y2,x2)); compute s_16("s_16",{o2,y3,x3,p,q},fo5(o2,y3*2+p,x3*2+q),fo6(o2,y3,x3)); s_15.after(s_14,-1); s_16.after(s_15,-1); // Block 3 var o3("o3", 0 ,256); placeholder fo7("fo7",{256,8,8},p_float32);//{0,64,32,32} placeholder filter5("filter5",{256,128,3,3},p_float32); compute s_17("s_17",{o3,y3,x3},scalar,fo7(o3,y3,x3)); compute s_18("s_18",{o3,y3,x3,i3,p,q},fo7(o3,y3,x3)+fo6(i3,y3+p,x3+q)*filter5(o3,i3,p,q),fo7(o3,y3,x3)); //ReLu = max() compute s_19("s_19",{o3,y3,x3},p_max(fo7(o3,y3,x3),scalar),fo7(o3,y3,x3)); s_17.after(s_16,-1); s_18.after(s_17,-1); s_19.after(s_18,-1); var i4("i4", 0 ,256); placeholder fo8("fo8",{256,8,8},p_float32);//{0,64,32,32} placeholder fo9("fo9",{256,8,8},p_float32);//{0,64,32,32} placeholder filter6("filter6",{256,256,3,3},p_float32); compute s_20("s_20",{o3,y3,x3},scalar,fo8(o3,y3,x3)); compute s_21("s_21",{o3,y3,x3,i4,p,q},fo8(o3,y3,x3)+fo7(i4,y3+p,x3+q)*filter6(o3,i4,p,q),fo8(o3,y3,x3)); //ReLu = max() compute s_22("s_22",{o3,y3,x3},p_max(fo8(o3,y3,x3),scalar),fo8(o3,y3,x3)); compute s_23("s_23",{o3,y3,x3},scalar,fo9(o3,y3,x3)); compute s_24("s_24",{o3,y3,x3,i4,p,q},fo9(o3,y3,x3)+fo8(i4,y3+p,x3+q)*filter6(o3,i4,p,q),fo9(o3,y3,x3)); //ReLu = max() compute s_25("s_25",{o3,y3,x3},p_max(fo9(o3,y3,x3),scalar),fo9(o3,y3,x3)); s_20.after(s_19,x3); s_21.after(s_20,-1); s_22.after(s_21,-1); s_23.after(s_22,-1); s_24.after(s_23,-1); s_25.after(s_24,-1); var y4("y4", 0 ,4); var x4("x4", 0 ,4); placeholder fo10("fo10",{256,4,4},p_float32);//{0,64,32,32} compute s_26("s_26",{o3,y4,x4},scalar,fo10(o3,y4,x4)); // o3(o,y2,x2) = Max(o2(o,y2*2+p,x2*2+q), o3(o,y2,x2)); compute s_27("s_27",{o3,y4,x4,p,q},fo9(o3,y4*2+p,x4*2+q),fo10(o3,y4,x4)); s_26.after(s_25,-1); s_27.after(s_26,-1); // Block 4 var o4("o4", 0 ,512); placeholder fo11("fo11",{512,4,4},p_float32);//{0,64,32,32} placeholder filter7("filter7",{512,256,3,3},p_float32); compute s_28("s_28",{o4,y4,x4},scalar,fo11(o4,y4,x4)); compute s_29("s_29",{o4,y4,x4,i4,p,q},fo11(o4,y4,x4)+fo10(i4,y4+p,x4+q)*filter7(o4,i4,p,q),fo11(o4,y4,x4)); //ReLu = max() compute s_30("s_30",{o4,y4,x4},p_max(fo11(o4,y4,x4),scalar),fo11(o4,y4,x4)); s_28.after(s_27,-1); s_29.after(s_28,-1); s_30.after(s_29,-1); var i5("i5", 0 ,512); placeholder fo12("fo12",{512,8,8},p_float32);//{0,64,32,32} placeholder fo13("fo13",{512,8,8},p_float32);//{0,64,32,32} placeholder filter8("filter8",{512,512,3,3},p_float32); compute s_31("s_31",{o4,y4,x4},scalar,fo12(o4,y4,x4)); compute s_32("s_32",{o4,y4,x4,i5,p,q},fo12(o4,y4,x4)+fo11(i5,y4+p,x4+q)*filter8(o4,i5,p,q),fo12(o4,y4,x4)); //ReLu = max() compute s_33("s_33",{o4,y4,x4},p_max(fo12(o4,y4,x4),scalar),fo12(o4,y4,x4)); compute s_34("s_34",{o4,y4,x4},scalar,fo13(o4,y4,x4)); compute s_35("s_35",{o4,y4,x4,i5,p,q},fo13(o4,y4,x4)+fo12(i5,y4+p,x4+q)*filter8(o4,i5,p,q),fo13(o4,y4,x4)); //ReLu = max() compute s_36("s_36",{o4,y4,x4},p_max(fo13(o4,y4,x4),scalar),fo13(o4,y4,x4)); s_31.after(s_30,x4); s_32.after(s_31,-1); s_33.after(s_32,-1); s_34.after(s_33,x4); s_35.after(s_34,-1); s_36.after(s_35,-1); var y5("y5", 0 ,2); var x5("x5", 0 ,2); placeholder fo14("fo14",{512,2,2},p_float32);//{0,64,32,32} compute s_37("s_37",{o4,y5,x5},scalar,fo14(o4,y5,x5)); // o3(o,y2,x2) = Max(o2(o,y2*2+p,x2*2+q), o3(o,y2,x2)); compute s_38("s_38",{o4,y5,x5,p,q},fo13(o4,y5*2+p,x5*2+q),fo14(o4,y5,x5)); s_37.after(s_36,-1); s_38.after(s_37,-1); // Block 5 var o5("o5", 0 ,512); placeholder fo15("fo15",{512,2,2},p_float32);//{0,64,32,32} placeholder filter9("filter9",{512,512,3,3},p_float32); compute s_39("s_39",{o5,y5,x5},scalar,fo15(o5,y5,x5)); compute s_40("s_40",{o5,y5,x5,i5,p,q},fo15(o5,y5,x5)+fo14(i5,y5+p,x5+q)*filter9(o5,i5,p,q),fo15(o5,y5,x5)); //ReLu = max() compute s_41("s_41",{o5,y5,x5},p_max(fo15(o5,y5,x5),scalar),fo15(o5,y5,x5)); s_39.after(s_38,-1); s_40.after(s_39,-1); s_41.after(s_40,-1); placeholder fo16("fo16",{512,2,2},p_float32);//{0,64,32,32} placeholder fo17("fo17",{512,2,2},p_float32);//{0,64,32,32} compute s_42("s_42",{o5,y5,x5},scalar,fo16(o5,y5,x5)); compute s_43("s_43",{o5,y5,x5,i5,p,q},fo16(o5,y5,x5)+fo15(i5,y5+p,x5+q)*filter9(o5,i5,p,q),fo16(o5,y5,x5)); //ReLu = max() compute s_44("s_44",{o5,y5,x5},p_max(fo16(o5,y5,x5),scalar),fo16(o5,y5,x5)); compute s_45("s_45",{o5,y5,x5},scalar,fo17(o5,y5,x5)); compute s_46("s_46",{o5,y5,x5,i5,p,q},fo17(o5,y5,x5)+fo16(i5,y5+p,x5+q)*filter9(o5,i5,p,q),fo17(o5,y5,x5)); //ReLu = max() compute s_47("s_47",{o5,y5,x5},p_max(fo17(o5,y5,x5),scalar),fo17(o5,y5,x5)); s_42.after(s_41,x5); s_43.after(s_42,-1); s_44.after(s_43,-1); s_45.after(s_44,-1); s_46.after(s_45,-1); s_47.after(s_46,-1); // var y5("y5", 0 ,2); // var x5("x5", 0 ,2); placeholder fo18("fo18",{512},p_float32);//{0,64,32,32} compute s_48("s_48",{o5},scalar,fo18(o5)); // o3(o,y2,x2) = Max(o2(o,y2*2+p,x2*2+q), o3(o,y2,x2)); compute s_49("s_49",{o5,p,q},fo17(o5,2+p,2+q),fo18(o5)); s_48.after(s_47,-1); s_49.after(s_48,-1); fct->auto_DSE_loop_transformation(); for(auto &comp: fct->leader_computations){ auto iterators = comp->get_iteration_variables(); int size = iterators.size(); if(size>=6){ comp->apply_opt_strategy({8,1,1}); } } std::string pwd = std::filesystem::current_path().parent_path(); std::string path = pwd+"/samples/vgg16/"; fct->dump_schedule(path); } ================================================ FILE: vitis-reports.sh ================================================ #!/bin/bash start_time=$(date +"%s") echo "" echo ">>> Step 4. Synthesising the optimized HLS C code..." echo "" export LD_PRELOAD=/lib/x86_64-linux-gnu/libudev.so.1 execute_tcl() { example=$1 size=$2 script_name="script_${size}.tcl" cd "samples/${example}" vitis_hls -f "$script_name" cd - } execute_tcl2() { example=$1 size=$2 script_name="script_power.tcl" cd "samples/${example}" vitis_hls -f "$script_name" cd - } max_parallel=20 examples=("edgeDetect" "gaussian" "blur" "jacobi" "jacobi2d" "heat" "seidel") sizes=(4096) for example in "${examples[@]}" do for size in "${sizes[@]}" do execute_tcl "$example" "$size" & if (( $(jobs | wc -l) >= $max_parallel )); then wait -n fi done done wait examples=("vgg16" "resnet") sizes=(512) for example in "${examples[@]}" do for size in "${sizes[@]}" do execute_tcl "$example" "$size" & if (( $(jobs | wc -l) >= $max_parallel )); then wait -n fi done done wait sizes=(32 64 128 256 512 1024 2048 4096 8192) examples=("2mm" "3mm" "gemm" "bicg" "gesummv") for example in "${examples[@]}" do for size in "${sizes[@]}" do execute_tcl "$example" "$size" & if (( $(jobs | wc -l) >= $max_parallel )); then wait -n fi done done wait examples=("2mm" "3mm" "gemm" "gesummv") sizes=(4096) for example in "${examples[@]}" do for size in "${sizes[@]}" do execute_tcl2 "$example" "$size" & if (( $(jobs | wc -l) >= $max_parallel )); then wait -n fi done done wait examples=("bicg") sizes=(4096) for example in "${examples[@]}" do for size in "${sizes[@]}" do execute_tcl2 "$example" "$size" & if (( $(jobs | wc -l) >= $max_parallel )); then wait -n fi done done wait cd /usr/src/workspace end_time=$(date +"%s") execution_time=$(($end_time - $start_time)) echo "" echo ">>> Step 4 has been finished!" echo ">>> Step 4 Total Execution Time: $execution_time seconds" echo ""