Repository: sgmarz/osblog Branch: master Commit: 76715da537e8 Files: 198 Total size: 811.8 KB Directory structure: gitextract_71jihpoi/ ├── LICENSE ├── README.md ├── assembly/ │ ├── .gitignore │ ├── intrin.S │ └── intrin.c └── risc_v/ ├── .cargo/ │ └── config ├── .gitignore ├── BUILD.md ├── Cargo.toml ├── chapters/ │ ├── ch0/ │ │ ├── .build.config │ │ ├── .cargo/ │ │ │ └── config │ │ ├── Cargo.toml │ │ ├── Makefile │ │ ├── do.sh │ │ └── virt.lds │ ├── ch1/ │ │ ├── .cargo/ │ │ │ └── config │ │ ├── .gitignore │ │ ├── Cargo.toml │ │ ├── Makefile │ │ ├── make_hdd.sh │ │ └── src/ │ │ ├── asm/ │ │ │ ├── boot.S │ │ │ └── trap.S │ │ ├── lds/ │ │ │ └── virt.lds │ │ └── lib.rs │ ├── ch2/ │ │ ├── .cargo/ │ │ │ └── config │ │ ├── .gitignore │ │ ├── Cargo.toml │ │ ├── Makefile │ │ ├── make_hdd.sh │ │ └── src/ │ │ ├── asm/ │ │ │ ├── boot.S │ │ │ └── trap.S │ │ ├── lds/ │ │ │ └── virt.lds │ │ ├── lib.rs │ │ └── uart.rs │ ├── ch3/ │ │ ├── .cargo/ │ │ │ └── config │ │ ├── .gitignore │ │ ├── Cargo.toml │ │ ├── Makefile │ │ ├── make_hdd.sh │ │ └── src/ │ │ ├── asm/ │ │ │ ├── boot.S │ │ │ ├── mem.S │ │ │ └── trap.S │ │ ├── kmem.rs │ │ ├── lds/ │ │ │ └── virt.lds │ │ ├── lib.rs │ │ ├── page.rs │ │ └── uart.rs │ ├── ch4/ │ │ ├── .cargo/ │ │ │ └── config │ │ ├── .gitignore │ │ ├── Cargo.toml │ │ ├── Makefile │ │ ├── make_hdd.sh │ │ └── src/ │ │ ├── asm/ │ │ │ ├── boot.S │ │ │ ├── mem.S │ │ │ └── trap.S │ │ ├── cpu.rs │ │ ├── kmem.rs │ │ ├── lds/ │ │ │ └── virt.lds │ │ ├── lib.rs │ │ ├── page.rs │ │ ├── trap.rs │ │ └── uart.rs │ ├── ch5/ │ │ ├── .cargo/ │ │ │ └── config │ │ ├── .gitignore │ │ ├── Cargo.toml │ │ ├── Makefile │ │ ├── make_hdd.sh │ │ └── src/ │ │ ├── asm/ │ │ │ ├── boot.S │ │ │ ├── mem.S │ │ │ └── trap.S │ │ ├── cpu.rs │ │ ├── kmem.rs │ │ ├── lds/ │ │ │ └── virt.lds │ │ ├── lib.rs │ │ ├── page.rs │ │ ├── plic.rs │ │ ├── trap.rs │ │ └── uart.rs │ ├── ch6/ │ │ ├── .cargo/ │ │ │ └── config │ │ ├── .gitignore │ │ ├── Cargo.toml │ │ ├── Makefile │ │ ├── make_hdd.sh │ │ └── src/ │ │ ├── asm/ │ │ │ ├── boot.S │ │ │ ├── mem.S │ │ │ └── trap.S │ │ ├── cpu.rs │ │ ├── kmem.rs │ │ ├── lds/ │ │ │ └── virt.lds │ │ ├── lib.rs │ │ ├── page.rs │ │ ├── plic.rs │ │ ├── process.rs │ │ ├── trap.rs │ │ └── uart.rs │ ├── ch7/ │ │ ├── .cargo/ │ │ │ └── config │ │ ├── .gitignore │ │ ├── Cargo.toml │ │ ├── Makefile │ │ ├── make_hdd.sh │ │ └── src/ │ │ ├── asm/ │ │ │ ├── boot.S │ │ │ ├── mem.S │ │ │ └── trap.S │ │ ├── cpu.rs │ │ ├── kmem.rs │ │ ├── lds/ │ │ │ └── virt.lds │ │ ├── lib.rs │ │ ├── page.rs │ │ ├── plic.rs │ │ ├── process.rs │ │ ├── sched.rs │ │ ├── syscall.rs │ │ ├── trap.rs │ │ └── uart.rs │ ├── ch8/ │ │ ├── .cargo/ │ │ │ └── config │ │ ├── .gitignore │ │ ├── Cargo.toml │ │ ├── Makefile │ │ ├── make_hdd.sh │ │ └── src/ │ │ ├── asm/ │ │ │ ├── boot.S │ │ │ ├── mem.S │ │ │ └── trap.S │ │ ├── cpu.rs │ │ ├── kmem.rs │ │ ├── lds/ │ │ │ └── virt.lds │ │ ├── lib.rs │ │ ├── page.rs │ │ ├── plic.rs │ │ ├── process.rs │ │ ├── sched.rs │ │ ├── syscall.rs │ │ ├── trap.rs │ │ └── uart.rs │ └── ch9/ │ ├── .cargo/ │ │ └── config │ ├── .gitignore │ ├── Cargo.toml │ ├── Makefile │ ├── make_hdd.sh │ └── src/ │ ├── asm/ │ │ ├── boot.S │ │ ├── mem.S │ │ └── trap.S │ ├── block.rs │ ├── cpu.rs │ ├── kmem.rs │ ├── lds/ │ │ └── virt.lds │ ├── lib.rs │ ├── page.rs │ ├── plic.rs │ ├── process.rs │ ├── rng.rs │ ├── sched.rs │ ├── syscall.rs │ ├── trap.rs │ ├── uart.rs │ └── virtio.rs ├── src/ │ ├── asm/ │ │ ├── boot.S │ │ ├── mem.S │ │ └── trap.S │ ├── assembly.rs │ ├── block.rs │ ├── buffer.rs │ ├── console.rs │ ├── cpu.rs │ ├── elf.rs │ ├── fs.rs │ ├── gpu.rs │ ├── input.rs │ ├── kmem.rs │ ├── lds/ │ │ └── virt.lds │ ├── lock.rs │ ├── main.rs │ ├── page.rs │ ├── plic.rs │ ├── process.rs │ ├── rng.rs │ ├── sched.rs │ ├── syscall.rs │ ├── test.rs │ ├── trap.rs │ ├── uart.rs │ ├── vfs.rs │ └── virtio.rs └── userspace/ ├── .gitignore ├── Makefile ├── fb.cpp ├── helloworld.cpp ├── input-event-codes.h ├── shell.cpp ├── sleepy.cpp ├── startlib/ │ ├── .gitignore │ ├── Makefile │ ├── linker.lds │ ├── printf.cpp │ ├── printf.h │ ├── start.S │ ├── syscall.S │ └── syscall.h └── upload.sh ================================================ FILE CONTENTS ================================================ ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2019 Stephen Marz Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ # osblog The Adventures of OS # RISC-V OS in Rust - risc_v/src - contains RISC-V OS in Rust - risc_v/src/asm - contains assembly portions - risc_v/userspace - contains C++ userspace programs ================================================ FILE: assembly/.gitignore ================================================ * !*.S !*.c !.gitignore !Makefile ================================================ FILE: assembly/intrin.S ================================================ .intel_syntax noprefix .section .text .global calc_asm calc_asm: # rdi rsi rdx rcx movupd xmm0, [rsi + 0] movupd xmm1, [rsi + 16] movupd xmm2, [rsi + 32] movupd xmm3, [rsi + 48] movupd xmm4, [rdx] mulps xmm0, xmm4 mulps xmm1, xmm4 mulps xmm2, xmm4 mulps xmm3, xmm4 haddps xmm0, xmm0 haddps xmm0, xmm0 haddps xmm1, xmm1 haddps xmm1, xmm1 haddps xmm2, xmm2 haddps xmm2, xmm2 haddps xmm3, xmm3 haddps xmm3, xmm3 movss [rdi + 0], xmm0 movss [rdi + 4], xmm1 movss [rdi + 8], xmm2 movss [rdi + 12], xmm3 ret ================================================ FILE: assembly/intrin.c ================================================ #include #include void calc_intrin(float result[], float matrix[], float vector[]); void calc_asm(float result[], float matrix[], float vector[]); int main() { int row, col; float vec[] = {1.0, 10.0, 100.0, 1000.0}; float mat[] = {2.0, 0.0, 0.0, 0.0, 0.0, 2.2, 0.0, 0.0, 0.0, 0.0, 22.2, 0.0, 0.0, 0.0, 0.0, 22.22}; float result[4]; calc_intrin(result, mat, vec); printf("%5.3f %5.3f %5.3f %5.3f\n", result[0], result[1], result[2], result[3]); calc_asm(result, mat, vec); printf("%5.3f %5.3f %5.3f %5.3f\n", result[0], result[1], result[2], result[3]); return 0; } void calc_intrin(float result[], float matrix[], float vector[]) { int row; __m128 vec = _mm_loadu_ps(vector); for (row = 0;row < 4;row++) { __m128 rowvec = _mm_loadu_ps(&matrix[row * 4]); __m128 rowvec2 = _mm_mul_ps(vec, rowvec); __m128 rowvec3 = _mm_hadd_ps(rowvec2, rowvec2); __m128 rowvec4 = _mm_hadd_ps(rowvec3, rowvec3); _mm_store_ss(&result[row], rowvec4); } } ================================================ FILE: risc_v/.cargo/config ================================================ [build] target = "riscv64gc-unknown-none-elf" rustflags = ['-Clink-arg=-Tsrc/lds/virt.lds'] [target.riscv64gc-unknown-none-elf] runner = "qemu-system-riscv64 -machine virt -cpu rv64 -d guest_errors,unimp -smp 4 -m 128M -drive if=none,format=raw,file=hdd.dsk,id=foo -device virtio-blk-device,scsi=off,drive=foo -serial mon:stdio -bios none -device virtio-rng-device -device virtio-gpu-device -device virtio-net-device -device virtio-tablet-device -device virtio-keyboard-device -kernel " ================================================ FILE: risc_v/.gitignore ================================================ os.elf target/* Cargo.lock hdd.dsk ================================================ FILE: risc_v/BUILD.md ================================================ # PREREQS You will need to install the riscv64gc target using rustup as well as cargo-binutils using cargo. * rustup target add riscv64gc-unknown-none-elf * cargo install cargo-binutils # BUILDING Edit .cargo/config to match your host's configuration. The runner will execute when you type `cargo run`. Type `cargo build` to start the build process. Type `cargo run` to run using the runner provided in .cargo/config # RELEASE BUILDS Release builds turn on the optimizer and make it run much quicker. To run release builds, add `--release` as a parameter to cargo: * cargo build --release * cargo run --release # HARD DRIVE FILE To run this as I have it configured, you'll need a hard drive file called hdd.dsk in this directory. You can create an empty one by typing the following. * fallocate -l 32M hdd.dsk ================================================ FILE: risc_v/Cargo.toml ================================================ [package] name = "sos" version = "0.1.0" authors = ["Stephen Marz "] edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [profile.dev] opt-level = 0 lto = false [profile.release] opt-level = 3 lto = true codegen-units = 1 [dependencies] ================================================ FILE: risc_v/chapters/ch0/.build.config ================================================ USE_SUDO="sudo" JOBS="10" LINUX_VER="git" BUILD_ROOT="${PWD}" TOOLCHAIN_ROOT="" BUILD_BINUTILS="${BUILD_ROOT}/build-binutils" BUILD_GCC_S1="${BUILD_ROOT}/build-gcc-s1" BUILD_GCC_S2="${BUILD_ROOT}/build-gcc-s2" BUILD_GLIBC_S1="${BUILD_ROOT}/build-glibc-s1" BUILD_GLIBC_S2="${BUILD_ROOT}/build-glibc-s2" BUILD_QEMU="${BUILD_ROOT}/build-qemu" ARCH="riscv" BITS="64" TAG="_1" ABI="lp64" ISA="rv64g" TARGET="${ARCH}${BITS}-unknown-linux-gnu" LIB_HEADER="linux.h" BUILD_LINUX_ARCH=$ARCH BUILD_LINUX="${BUILD_ROOT}/linux-${LINUX_VER}" BUILD_LINUX_HEADERS="${BUILD_ROOT}/build-${TARGET}-linux-headers" PREFIX="${TOOLCHAIN_ROOT}/opt/${ARCH}${BITS}${TAG}" SYSROOT="${PREFIX}/sysroot" PATH="${PREFIX}/bin:${PATH}" ================================================ FILE: risc_v/chapters/ch0/.cargo/config ================================================ [build] target = "riscv64gc-unknown-none-elf" [target.riscv64gc-unknown-none-elf] linker = "riscv64-unknown-linux-gnu-gcc" ================================================ FILE: risc_v/chapters/ch0/Cargo.toml ================================================ [package] name = "sos" version = "0.1.0" authors = ["Stephen Marz "] edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [lib] crate-type = ["staticlib"] [dependencies] ================================================ FILE: risc_v/chapters/ch0/Makefile ================================================ ##### ## BUILD ##### CC=riscv64-unknown-linux-gnu-g++ CFLAGS=-Wall -Wextra -pedantic -Wextra -O0 -g -std=c++17 CFLAGS+=-static -ffreestanding -nostdlib -fno-rtti -fno-exceptions CFLAGS+=-march=rv64gc -mabi=lp64 INCLUDES= LINKER_SCRIPT=-Tsrc/lds/virt.lds TYPE=debug RUST_TARGET=./target/riscv64gc-unknown-none-elf/$(TYPE) LIBS=-L$(RUST_TARGET) SOURCES_ASM=$(wildcard src/asm/*.S) LIB=-lsos -lgcc OUT=os.elf ##### ## QEMU ##### QEMU=qemu-system-riscv64 MACH=virt CPU=rv64 CPUS=4 MEM=128M DRIVE=hdd.dsk all: cargo build $(CC) $(CFLAGS) $(LINKER_SCRIPT) $(INCLUDES) -o $(OUT) $(SOURCES_ASM) $(LIBS) $(LIB) run: all $(QEMU) -machine $(MACH) -cpu $(CPU) -smp $(CPUS) -m $(MEM) -nographic -serial mon:stdio -bios none -kernel $(OUT) -drive if=none,format=raw,file=$(DRIVE),id=foo -device virtio-blk-device,scsi=off,drive=foo .PHONY: clean clean: cargo clean rm -f $(OUT) ================================================ FILE: risc_v/chapters/ch0/do.sh ================================================ #!/bin/bash # For building cross compilers # Use this at your own risk! # I make no warranties or guarantees with this script! # Stephen Marz # 15 Jan 2018 . ./.build.config if [ $# -eq 0 ]; then echo "Must provide a number" echo "0 - Binutils" echo "1 - GCC Stage 1" echo "2 - Linux Headers" echo "3 - GLIBC Headers" echo "4 - GLIBC" echo "5 - GCC Stage 2" echo "6 - QEMU" echo "7 - Libs and Links" echo "Add 90 if you just want to build that one stage" echo "99 - Clean" exit 99 else ARG=$1 fi #Build BINUTILS if [ $ARG -le 0 -o $ARG -eq 90 ]; then echo "+-+-+-+ BINUTILS +-+-+-+" mkdir -p ${BUILD_BINUTILS} cd ${BUILD_BINUTILS} ${BUILD_ROOT}/binutils-gdb/configure --target=${TARGET} --prefix=${PREFIX} --with-sysroot=${SYSROOT} --disable-multilib --disable-werror --disable-nls --with-expat=yes --enable-gdb > ${BUILD_ROOT}/binutils.log 2>&1 if [ $? -ne 0 ]; then echo "Error configuring BINUTILS" echo "~~~~~~~~~~~~~~~~~~~~~~~~~~" cat ${BUILD_ROOT}/binutils.log exit 1 fi cd ${BUILD_ROOT} make -C ${BUILD_BINUTILS} -j${JOBS} >> ${BUILD_ROOT}/binutils.log 2>&1 if [ $? -ne 0 ]; then echo "Error building BINUTILS" echo "~~~~~~~~~~~~~~~~~~~~~~~" cat ${BUILD_ROOT}/binutils.log exit 1 fi ${USE_SUDO} make -C ${BUILD_BINUTILS} install >> ${BUILD_ROOT}/binutils.log 2>&1 if [ $? -ne 0 ]; then echo "Error installing BINUTILS" echo "~~~~~~~~~~~~~~~~~~~~~~~~~" cat ${BUILD_ROOT}/binutils.log exit 1 fi fi #Build GCC Stage 1 if [ $ARG -le 1 -o $ARG -eq 91 ]; then echo "+-+-+-+ GCC STAGE 1 +-+-+-+" sed -i "s|\"/lib/ld-linux-${ARCH}|\"${SYSROOT}/lib/ld-linux-${ARCH}|" ${BUILD_ROOT}/gcc/gcc/config/${ARCH}/${LIB_HEADER} mkdir -p ${BUILD_GCC_S1} cd ${BUILD_GCC_S1} ${BUILD_ROOT}/gcc/configure --target=${TARGET} --prefix=${PREFIX} --with-sysroot=${SYSROOT} --with-newlib --without-headers --disable-shared --disable-threads --with-system-zlib --enable-tls --enable-languages=c --disable-libatomic --disable-libmudflap --disable-libssp --disable-libquadmath --disable-libgomp --disable-nls --disable-bootstrap --enable-checking=yes --disable-multilib --with-abi=${ABI} --with-arch=${ISA} > ${BUILD_ROOT}/gccs1.log 2>&1 if [ $? -ne 0 ]; then echo "Error configuring GCC stage 1" echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" cat ${BUILD_ROOT}/gccs1.log exit 2 fi cd ${BUILD_ROOT} make -j${JOBS} -C ${BUILD_GCC_S1} >> ${BUILD_ROOT}/gccs1.log 2>&1 if [ $? -ne 0 ]; then echo "Error building GCC stage 1" echo "~~~~~~~~~~~~~~~~~~~~~~~~~~" cat ${BUILD_ROOT}/gccs1.log exit 2 fi ${USE_SUDO} make -C ${BUILD_GCC_S1} install >> ${BUILD_ROOT}/gccs1.log 2>&1 if [ $? -ne 0 ]; then echo "Error installing GCC stage 1" echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~" cat ${BUILD_ROOT}/gccs1.log exit 2 fi fi #Build Linux Headers if [ $ARG -le 2 -o $ARG -eq 92 ]; then echo "+-+-+-+ LINUX HEADERS +-+-+-+" if [ ! -x ${BUILD_ROOT}/linux-${LINUX_VER} ]; then tar xf ${BUILD_ROOT}/linux-${LINUX_VER}.tar.xz -C ${BUILD_ROOT} > ${BUILD_ROOT}/linhdr.log 2>&1 fi if [ $? -ne 0 ]; then echo "Error unpacking Linux Headers" echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" cat ${BUILD_ROOT}/linhdr.log exit 3 fi make ARCH=${BUILD_LINUX_ARCH} INSTALL_HDR_PATH=${BUILD_LINUX_HEADERS} -C ${BUILD_LINUX} defconfig >> ${BUILD_ROOT}/linhdr.log 2>&1 if [ $? -ne 0 ]; then echo "Error configuring Linux Headers" echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" cat ${BUILD_ROOT}/linhdr.log exit 3 fi make ARCH=${BUILD_LINUX_ARCH} INSTALL_HDR_PATH=${BUILD_LINUX_HEADERS} -C ${BUILD_LINUX} headers_install >> ${BUILD_ROOT}/linhdr.log 2>&1 if [ $? -ne 0 ]; then echo "Error installing Linux Headers" echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" cat ${BUILD_ROOT}/linhdr.log exit 3 fi fi if [ $ARG -le 3 -o $ARG -eq 93 ]; then #Build GLIBC Headers echo "+-+-+-+ GLIBC HEADERS +-+-+-+" mkdir -p ${BUILD_GLIBC_S1} cd ${BUILD_GLIBC_S1} ${BUILD_ROOT}/glibc/configure --host=${TARGET} --prefix=${SYSROOT}/usr --enable-shared --with-headers=${BUILD_LINUX_HEADERS}/include --disable-multilib --enable-kernel=3.0.0 > ${BUILD_ROOT}/glibchdr.log 2>&1 if [ $? -ne 0 ]; then echo "Error configuring GLIBC headers" echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" cat ${BUILD_ROOT}/glibchdr.log exit 4 fi cd ${BUILD_ROOT} ${USE_SUDO} make -C ${BUILD_GLIBC_S1} install-headers >> ${BUILD_ROOT}/glibchdr.log 2>&1 if [ $? -ne 0 ]; then echo "Error installing GLIBC headers" echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" cat ${BUILD_ROOT}/glibchdr.log exit 4 fi ${USE_SUDO} cp -a ${BUILD_LINUX_HEADERS}/include/* ${SYSROOT}/usr/include/ >> ${BUILD_ROOT}/glibchdr.log 2>&1 if [ $? -ne 0 ]; then echo "Error copying include files" echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~" cat ${BUILD_ROOT}/glibchdr.log exit 4 fi fi if [ $ARG -le 4 -o $ARG -eq 94 ]; then #Build GLIBC echo "+-+-+-+ GLIBC +-+-+-+" mkdir -p ${BUILD_GLIBC_S2} cd ${BUILD_GLIBC_S2} ${BUILD_ROOT}/glibc/configure --host=${TARGET} --prefix=/usr --disable-werror --enable-tls --disable-nls --enable-shared --enable-obsolete-rpc --with-headers=${SYSROOT}/usr/include --disable-multilib --enable-kernel=3.0.0 > ${BUILD_ROOT}/glibc.log 2>&1 if [ $? -ne 0 ]; then echo "Error configuring GLIBC" echo "~~~~~~~~~~~~~~~~~~~~~~~" cat ${BUILD_ROOT}/glibc.log exit 5 fi cd ${BUILD_ROOT} make -C ${BUILD_GLIBC_S2} -j${JOBS} >> ${BUILD_ROOT}/glibc.log 2>&1 if [ $? -ne 0 ]; then echo "Error building GLIBC" echo "~~~~~~~~~~~~~~~~~~~~" cat ${BUILD_ROOT}/glibc.log exit 5 fi ${USE_SUDO} make -C ${BUILD_GLIBC_S2} install install_root=${SYSROOT} >> ${BUILD_ROOT}/glibc.log 2>&1 if [ $? -ne 0 ]; then echo "Error installing GLIBC" echo "~~~~~~~~~~~~~~~~~~~~~~" cat ${BUILD_ROOT}/glibc.log exit 5 fi ${USE_SUDO} ln -s ${SYSROOT}/lib64 ${SYSROOT}/lib fi if [ $ARG -le 5 -o $ARG -eq 95 ]; then #Build GCC Stage 2 echo "+-+-+-+ GCC STAGE 2 +-+-+-+" mkdir -p ${BUILD_GCC_S2} cd ${BUILD_GCC_S2} ${BUILD_ROOT}/gcc/configure --target=${TARGET} --prefix=${PREFIX} --with-sysroot=${SYSROOT} --with-system-zlib --enable-shared --enable-tls --enable-languages=c,c++ --disable-libmudflap --disable-libssp --disable-libquadmath --disable-nls --disable-bootstrap --disable-multilib --enable-checking=yes --with-abi=${ABI} > ${BUILD_ROOT}/gccs2.log 2>&1 if [ $? -ne 0 ]; then echo "Error configuring GCC stage 2" echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" cat ${BUILD_ROOT}/gccs2.log exit 6 fi cd ${BUILD_ROOT} make -C ${BUILD_GCC_S2} -j${JOBS} >> ${BUILD_ROOT}/gccs2.log 2>&1 if [ $? -ne 0 ]; then echo "Error building GCC stage 2" echo "~~~~~~~~~~~~~~~~~~~~~~~~~~" cat ${BUILD_ROOT}/gccs2.log exit 6 fi ${USE_SUDO} make -C ${BUILD_GCC_S2} install >> ${BUILD_ROOT}/gccs2.log 2>&1 if [ $? -ne 0 ]; then echo "Error installing GCC stage 2" echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~" cat ${BUILD_ROOT}/gccs2.log exit 6 fi ${USE_SUDO} cp -a ${PREFIX}/${TARGET}/lib* ${SYSROOT} if [ $? -ne 0 ]; then echo "Error copying libraries" echo "~~~~~~~~~~~~~~~~~~~~~~~" exit 6 fi fi if [ $ARG -le 6 -o $ARG -eq 96 ]; then #Build QEMU echo "+-+-+-+ QEMU +-+-+-+" mkdir -p ${BUILD_QEMU} cd ${BUILD_QEMU} ${BUILD_ROOT}/qemu/configure --prefix=${PREFIX} --interp-prefix=${SYSROOT} --target-list=riscv32-linux-user,riscv32-softmmu,${ARCH}${BITS}-linux-user,${ARCH}${BITS}-softmmu --enable-jemalloc --disable-werror > ${BUILD_ROOT}/qemu.log 2>&1 if [ $? -ne 0 ]; then echo "Error configuring QEMU" echo "~~~~~~~~~~~~~~~~~~~~~~" cat ${BUILD_ROOT}/qemu.log exit 7 fi cd ${BUILD_ROOT} make -C ${BUILD_QEMU} -j${JOBS} >> ${BUILD_ROOT}/qemu.log 2>&1 if [ $? -ne 0 ]; then echo "Error building QEMU" echo "~~~~~~~~~~~~~~~~~~~" cat ${BUILD_ROOT}/qemu.log exit 7 fi ${USE_SUDO} make -C ${BUILD_QEMU} install >> ${BUILD_ROOT}/qemu.log 2>&1 if [ $? -ne 0 ]; then echo "Error installing QEMU" echo "~~~~~~~~~~~~~~~~~~~~~" cat ${BUILD_ROOT}/qemu.log exit 7 fi fi if [ $ARG -le 7 -o $ARG -eq 97 ]; then #Make Symlinks echo "+-+-+-+ SYMLINKS +-+-+-+" ${USE_SUDO} ln -s ${PREFIX}/bin/${TARGET}-gcc ${PREFIX}/bin/${ARCH}${BITS}-gcc ${USE_SUDO} ln -s ${PREFIX}/bin/${TARGET}-g++ ${PREFIX}/bin/${ARCH}${BITS}-g++ ${USE_SUDO} ln -s ${PREFIX}/bin/${TARGET}-objdump ${PREFIX}/bin/${ARCH}${BITS}-objdump ${USE_SUDO} ln -s ${PREFIX}/bin/${TARGET}-gdb ${PREFIX}/bin/${ARCH}${BITS}-gdb #Copy Libraries echo "+-+-+-+ COPY LIBRARIES +-+-+-+" ${USE_SUDO} cp -a ${SYSROOT}/lib/* ${SYSROOT}/usr/lib${BITS}/${ABI}/ fi if [ $ARG -eq 99 ]; then echo "+-+-+-+ CLEANING +-+-+-+" ${USE_SUDO} rm -fr ${BUILD_BINUTILS} ${USE_SUDO} rm -fr ${BUILD_GCC_S1} ${USE_SUDO} rm -fr ${BUILD_LINUX_HEADERS} ${USE_SUDO} rm -fr ${BUILD_GLIBC_S1} ${USE_SUDO} rm -fr ${BUILD_GLIBC_S2} ${USE_SUDO} rm -fr ${BUILD_GCC_S2} ${USE_SUDO} rm -fr ${BUILD_QEMU} rm -fr *.log fi echo "+-+-+-+ !! DONE !! +-+-+-+" ================================================ FILE: risc_v/chapters/ch0/virt.lds ================================================ /* virt.lds Linker script for outputting to RISC-V QEMU "virt" machine. Stephen Marz 6 October 2019 */ /* riscv is the name of the architecture that the linker understands for any RISC-V target (64-bit or 32-bit). We will further refine this by using -mabi=lp64 and -march=rv64gc */ OUTPUT_ARCH( "riscv" ) /* We're setting our entry point to a symbol called _start which is inside of boot.S. This essentially stores the address of _start as the "entry point", or where CPU instructions should start executing. In the rest of this script, we are going to place _start right at the beginning of 0x8000_0000 because this is where the virtual machine and many RISC-V boards will start executing. */ ENTRY( _start ) /* The MEMORY section will explain that we have "ram" that contains a section that is 'w' (writeable), 'x' (executable), and 'a' (allocatable). We use '!' to invert 'r' (read-only) and 'i' (initialized). We don't want our memory to be read-only, and we're stating that it is NOT initialized at the beginning. The ORIGIN is the memory address 0x8000_0000. If we look at the virt spec or the specification for the RISC-V HiFive Unleashed, this is the starting memory address for our code. Side note: There might be other boot ROMs at different addresses, but their job is to get to this point. Finally LENGTH = 128M tells the linker that we have 128 megabyte of RAM. The linker will double check this to make sure everything can fit. The HiFive Unleashed has a lot more RAM than this, but for the virtual machine, I went with 128M since I think that's enough RAM for now. We can provide other pieces of memory, such as QSPI, or ROM, but we're telling the linker script here that we have one pool of RAM. */ MEMORY { ram (wxa!ri) : ORIGIN = 0x80000000, LENGTH = 128M } /* PHDRS is short for "program headers", which we specify three here: text - CPU instructions (executable sections) data - Global, initialized variables bss - Global, uninitialized variables (all will be set to 0 by boot.S) The command PT_LOAD tells the linker that these sections will be loaded from the file into memory. We can actually stuff all of these into a single program header, but by splitting it up into three, we can actually use the other PT_* commands such as PT_DYNAMIC, PT_INTERP, PT_NULL to tell the linker where to find additional information. However, for our purposes, every section will be loaded from the program headers. */ PHDRS { text PT_LOAD; data PT_LOAD; bss PT_LOAD; } /* We are now going to organize the memory based on which section it is in. In assembly, we can change the section with the ".section" directive. However, in C++ and Rust, CPU instructions go into text, global constants go into rodata, global initialized variables go into data, and global uninitialized variables go into bss. */ SECTIONS { /* The first part of our RAM layout will be the text section. Since our CPU instructions are here, and our memory starts at 0x8000_0000, we need our entry point to line up here. */ .text : { /* PROVIDE allows me to access a symbol called _text_start so I know where the text section starts in the operating system. This should not move, but it is here for convenience. The period '.' tells the linker to set _text_start to the CURRENT location ('.' = current memory location). This current memory location moves as we add things. */ PROVIDE(_text_start = .); /* We are going to layout all text sections here, starting with .text.init. The asterisk in front of the parentheses means to match the .text.init section of ANY object file. Otherwise, we can specify which object file should contain the .text.init section, for example, boot.o(.text.init) would specifically put the .text.init section of our bootloader here. Because we might want to change the name of our files, we'll leave it with a *. Inside the parentheses is the name of the section. I created my own called .text.init to make 100% sure that the _start is put right at the beginning. The linker will lay this out in the order it receives it: .text.init first all .text sections next any .text.* sections last .text.* means to match anything after .text. If we didn't already specify .text.init, this would've matched here. The assembler and linker can place things in "special" text sections, so we match any we might come across here. */ *(.text.init) *(.text .text.*) /* Again, with PROVIDE, we're providing a readable symbol called _text_end, which is set to the memory address AFTER .text.init, .text, and .text.*'s have been added. */ PROVIDE(_text_end = .); /* The portion after the right brace is in an odd format. However, this is telling the linker what memory portion to put it in. We labeled our RAM, ram, with the constraints that it is writeable, allocatable, and executable. The linker will make sure with this that we can do all of those things. >ram - This just tells the linker script to put this entire section (.text) into the ram region of memory. To my knowledge, the '>' does not mean "greater than". Instead, it is a symbol to let the linker know we want to put this in ram. AT>ram - This sets the LMA (load memory address) region to the same thing. LMA is the final translation of a VMA (virtual memory address). With this linker script, we're loading everything into its physical location. We'll let the kernel copy and sort out the virtual memory. That's why >ram and AT>ram are continually the same thing. :text - This tells the linker script to put this into the :text program header. We've only defined three: text, data, and bss. In this case, we're telling the linker script to go into the text section. */ } >ram AT>ram :text /* The global pointer allows the linker to position global variables and constants into independent positions relative to the gp (global pointer) register. The globals start after the text sections and are only relevant to the rodata, data, and bss sections. */ PROVIDE(_global_pointer = .); /* Most compilers create a rodata (read only data) section for global constants. However, we're going to place ours in the text section. We can actually put this in :data, but since the .text section is read-only, we can place it there. NOTE: This doesn't actually do anything, yet. The actual "protection" cannot be done at link time. Instead, when we program the memory management unit (MMU), we will be able to choose which bits (R=read, W=write, X=execute) we want each memory segment to be able to do. */ .rodata : { PROVIDE(_rodata_start = .); *(.rodata .rodata.*) PROVIDE(_rodata_end = .); /* Again, we're placing the rodata section in the memory segment "ram" and we're putting it in the :text program header. We don't have one for rodata anyway. */ } >ram AT>ram :text .data : { /* . = ALIGN(4096) tells the linker to align the current memory location (which is 0x8000_0000 + text section + rodata section) to 4096 bytes. This is because our paging system's resolution is 4,096 bytes or 4 KiB. */ . = ALIGN(4096); PROVIDE(_data_start = .); /* sdata and data are essentially the same thing. However, compilers usually use the sdata sections for shorter, quicker loading sections. So, usually critical data is loaded there. However, we're loading all of this in one fell swoop. So, we're looking to put all of the following sections under the umbrella .data: .sdata .sdata.[anything] .data .data.[anything] ...in that order. */ *(.sdata .sdata.*) *(.data .data.*) PROVIDE(_data_end = .); } >ram AT>ram :data .bss : { PROVIDE(_bss_start = .); *(.sbss .sbss.*) *(.bss .bss.*) PROVIDE(_bss_end = .); } >ram AT>ram :bss /* The following will be helpful when we allocate the kernel stack (_stack) and determine where the heap begnis and ends (_heap_start and _heap_start + _heap_size)/ When we do memory allocation, we can use these symbols. We use the symbols instead of hard-coding an address because this is a floating target. As we add code, the heap moves farther down the memory and gets shorter. _memory_start will be set to 0x8000_0000 here. We use ORIGIN(ram) so that it will take whatever we set the origin of ram to. Otherwise, we'd have to change it more than once if we ever stray away from 0x8000_0000 as our entry point. */ PROVIDE(_memory_start = ORIGIN(ram)); /* Our kernel stack starts at the end of the bss segment (_bss_end). However, we're allocating 0x80000 bytes (524 KiB) to our kernel stack. This should be PLENTY of space. The reason we add the memory is because the stack grows from higher memory to lower memory (bottom to top). Therefore we set the stack at the very bottom of its allocated slot. When we go to allocate from the stack, we'll subtract the number of bytes we need. */ PROVIDE(_stack = _bss_end + 0x80000); PROVIDE(_memory_end = ORIGIN(ram) + LENGTH(ram)); /* Finally, our heap starts right after the kernel stack. This heap will be used mainly to dole out memory for user-space applications. However, in some circumstances, it will be used for kernel memory as well. We don't align here because we let the kernel determine how it wants to do this. */ PROVIDE(_heap_start = _stack); PROVIDE(_heap_size = _memory_end - _stack); } ================================================ FILE: risc_v/chapters/ch1/.cargo/config ================================================ [build] target = "riscv64gc-unknown-none-elf" [target.riscv64gc-unknown-none-elf] linker = "riscv64-unknown-linux-gnu-gcc" ================================================ FILE: risc_v/chapters/ch1/.gitignore ================================================ os.elf target/* Cargo.lock hdd.dsk ================================================ FILE: risc_v/chapters/ch1/Cargo.toml ================================================ [package] name = "sos" version = "0.1.0" authors = ["Stephen Marz "] edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [lib] crate-type = ["staticlib"] [dependencies] ================================================ FILE: risc_v/chapters/ch1/Makefile ================================================ ##### ## BUILD ##### CC=riscv64-unknown-linux-gnu-g++ CFLAGS=-Wall -Wextra -pedantic -Wextra -O0 -g -std=c++17 CFLAGS+=-static -ffreestanding -nostdlib -fno-rtti -fno-exceptions CFLAGS+=-march=rv64gc -mabi=lp64 INCLUDES= LINKER_SCRIPT=-Tsrc/lds/virt.lds TYPE=debug RUST_TARGET=./target/riscv64gc-unknown-none-elf/$(TYPE) LIBS=-L$(RUST_TARGET) SOURCES_ASM=$(wildcard src/asm/*.S) LIB=-lsos -lgcc OUT=os.elf ##### ## QEMU ##### QEMU=qemu-system-riscv64 MACH=virt CPU=rv64 CPUS=4 MEM=128M DRIVE=hdd.dsk all: cargo build $(CC) $(CFLAGS) $(LINKER_SCRIPT) $(INCLUDES) -o $(OUT) $(SOURCES_ASM) $(LIBS) $(LIB) run: all $(QEMU) -machine $(MACH) -cpu $(CPU) -smp $(CPUS) -m $(MEM) -nographic -serial mon:stdio -bios none -kernel $(OUT) -drive if=none,format=raw,file=$(DRIVE),id=foo -device virtio-blk-device,scsi=off,drive=foo .PHONY: clean clean: cargo clean rm -f $(OUT) ================================================ FILE: risc_v/chapters/ch1/make_hdd.sh ================================================ #!/bin/sh dd if=/dev/zero of=hdd.dsk bs=1M count=32 ================================================ FILE: risc_v/chapters/ch1/src/asm/boot.S ================================================ # boot.S # bootloader for SoS # Stephen Marz # 8 February 2019 # Disable generation of compressed instructions. .option norvc # Define a .data section. .section .data # Define a .text.init section. .section .text.init # Execution starts here. .global _start _start: # Any hardware threads (hart) that are not bootstrapping # need to wait for an IPI csrr t0, mhartid bnez t0, 3f # SATP should be zero, but let's make sure csrw satp, zero # Disable linker instruction relaxation for the `la` instruction below. # This disallows the assembler from assuming that `gp` is already initialized. # This causes the value stored in `gp` to be calculated from `pc`. .option push .option norelax la gp, _global_pointer .option pop # Set all bytes in the BSS section to zero. la a0, _bss_start la a1, _bss_end bgeu a0, a1, 2f 1: sd zero, (a0) addi a0, a0, 8 bltu a0, a1, 1b 2: # Control registers, set the stack, mstatus, mepc, # and mtvec to return to the main function. # li t5, 0xffff; # csrw medeleg, t5 # csrw mideleg, t5 la sp, _stack # Setting `mstatus` register: # 0b11 << 11: Machine's previous protection mode is 3 (MPP=3). # 1 << 7 : Machine's previous interrupt-enable bit is 1 (MPIE=1). # 1 << 3 : Machine's interrupt-enable bit is 1 (MIE=1). li t0, (0b11 << 11) | (1 << 7) | (1 << 3) csrw mstatus, t0 # Machine's exception program counter (MEPC) is set to `kmain`. la t1, kmain csrw mepc, t1 # Machine's trap vector base address is set to `asm_trap_vector`. la t2, asm_trap_vector csrw mtvec, t2 # Setting Machine's interrupt-enable bits (`mie` register): # 1 << 3 : Machine's M-mode software interrupt-enable bit is 1 (MSIE=1). # 1 << 7 : Machine's timer interrupt-enable bit is 1 (MTIE=1). # 1 << 11: Machine's external interrupt-enable bit is 1 (MEIE=1). li t3, (1 << 3) | (1 << 7) | (1 << 11) csrw mie, t3 # Set the return address to infinitely wait for interrupts. la ra, 4f # We use mret here so that the mstatus register is properly updated. mret 3: # Parked harts go here. We need to set these # to only awaken if it receives a software interrupt, # which we're going to call the SIPI (Software Intra-Processor Interrupt). # We only use these to run user-space programs, although this may # change. 4: wfi j 4b ================================================ FILE: risc_v/chapters/ch1/src/asm/trap.S ================================================ # trap.S # In the future our trap vector will go here. .global asm_trap_vector # This will be our trap vector when we start # handling interrupts. asm_trap_vector: mret ================================================ FILE: risc_v/chapters/ch1/src/lds/virt.lds ================================================ OUTPUT_ARCH( "riscv" ) ENTRY( _start ) MEMORY { ram (wxa!ri) : ORIGIN = 0x80000000, LENGTH = 128M } PHDRS { text PT_LOAD; data PT_LOAD; bss PT_LOAD; } SECTIONS { .text : { PROVIDE(_text_start = .); *(.text.init) *(.text .text.*) PROVIDE(_text_end = .); } >ram AT>ram :text PROVIDE(_global_pointer = .); .rodata : { PROVIDE(_rodata_start = .); *(.rodata .rodata.*) PROVIDE(_rodata_end = .); } >ram AT>ram :text .data : { . = ALIGN(4096); PROVIDE(_data_start = .); *(.sdata .sdata.*) *(.data .data.*) PROVIDE(_data_end = .); } >ram AT>ram :data .bss :{ PROVIDE(_bss_start = .); *(.sbss .sbss.*) *(.bss .bss.*) PROVIDE(_bss_end = .); } >ram AT>ram :bss PROVIDE(_memory_start = ORIGIN(ram)); PROVIDE(_stack = _bss_end + 0x80000); PROVIDE(_memory_end = ORIGIN(ram) + LENGTH(ram)); PROVIDE(_heap_start = _stack); PROVIDE(_heap_size = _memory_end - _stack); } ================================================ FILE: risc_v/chapters/ch1/src/lib.rs ================================================ // Steve Operating System // Stephen Marz // 21 Sep 2019 #![no_std] #![feature(panic_info_message,asm)] // /////////////////////////////////// // / RUST MACROS // /////////////////////////////////// #[macro_export] macro_rules! print { ($($args:tt)+) => ({ }); } #[macro_export] macro_rules! println { () => ({ print!("\r\n") }); ($fmt:expr) => ({ print!(concat!($fmt, "\r\n")) }); ($fmt:expr, $($args:tt)+) => ({ print!(concat!($fmt, "\r\n"), $($args)+) }); } // /////////////////////////////////// // / LANGUAGE STRUCTURES / FUNCTIONS // /////////////////////////////////// #[no_mangle] extern "C" fn eh_personality() {} #[panic_handler] fn panic(info: &core::panic::PanicInfo) -> ! { print!("Aborting: "); if let Some(p) = info.location() { println!( "line {}, file {}: {}", p.line(), p.file(), info.message().unwrap() ); } else { println!("no information available."); } abort(); } #[no_mangle] extern "C" fn abort() -> ! { loop { unsafe { asm!("wfi"::::"volatile"); } } } // /////////////////////////////////// // / CONSTANTS // /////////////////////////////////// // /////////////////////////////////// // / ENTRY POINT // /////////////////////////////////// #[no_mangle] extern "C" fn kmain() { // Main should initialize all sub-systems and get // ready to start scheduling. The last thing this // should do is start the timer. } // /////////////////////////////////// // / RUST MODULES // /////////////////////////////////// ================================================ FILE: risc_v/chapters/ch2/.cargo/config ================================================ [build] target = "riscv64gc-unknown-none-elf" [target.riscv64gc-unknown-none-elf] linker = "riscv64-unknown-linux-gnu-gcc" ================================================ FILE: risc_v/chapters/ch2/.gitignore ================================================ os.elf target/* Cargo.lock hdd.dsk ================================================ FILE: risc_v/chapters/ch2/Cargo.toml ================================================ [package] name = "sos" version = "0.1.0" authors = ["Stephen Marz "] edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [lib] crate-type = ["staticlib"] [dependencies] ================================================ FILE: risc_v/chapters/ch2/Makefile ================================================ ##### ## BUILD ##### CC=riscv64-unknown-linux-gnu-g++ CFLAGS=-Wall -Wextra -pedantic -Wextra -O0 -g -std=c++17 CFLAGS+=-static -ffreestanding -nostdlib -fno-rtti -fno-exceptions CFLAGS+=-march=rv64gc -mabi=lp64 INCLUDES= LINKER_SCRIPT=-Tsrc/lds/virt.lds TYPE=debug RUST_TARGET=./target/riscv64gc-unknown-none-elf/$(TYPE) LIBS=-L$(RUST_TARGET) SOURCES_ASM=$(wildcard src/asm/*.S) LIB=-lsos -lgcc OUT=os.elf ##### ## QEMU ##### QEMU=qemu-system-riscv64 MACH=virt CPU=rv64 CPUS=4 MEM=128M DRIVE=hdd.dsk all: cargo build $(CC) $(CFLAGS) $(LINKER_SCRIPT) $(INCLUDES) -o $(OUT) $(SOURCES_ASM) $(LIBS) $(LIB) run: all $(QEMU) -machine $(MACH) -cpu $(CPU) -smp $(CPUS) -m $(MEM) -nographic -serial mon:stdio -bios none -kernel $(OUT) -drive if=none,format=raw,file=$(DRIVE),id=foo -device virtio-blk-device,scsi=off,drive=foo .PHONY: clean clean: cargo clean rm -f $(OUT) ================================================ FILE: risc_v/chapters/ch2/make_hdd.sh ================================================ #!/bin/sh dd if=/dev/zero of=hdd.dsk bs=1M count=32 ================================================ FILE: risc_v/chapters/ch2/src/asm/boot.S ================================================ # boot.S # bootloader for SoS # Stephen Marz # 8 February 2019 .option norvc .section .data .section .text.init .global _start _start: # Any hardware threads (hart) that are not bootstrapping # need to wait for an IPI csrr t0, mhartid bnez t0, 3f # SATP should be zero, but let's make sure csrw satp, zero .option push .option norelax la gp, _global_pointer .option pop # The BSS section is expected to be zero la a0, _bss_start la a1, _bss_end bgeu a0, a1, 2f 1: sd zero, (a0) addi a0, a0, 8 bltu a0, a1, 1b 2: # Control registers, set the stack, mstatus, mepc, # and mtvec to return to the main function. # li t5, 0xffff; # csrw medeleg, t5 # csrw mideleg, t5 la sp, _stack # We use mret here so that the mstatus register # is properly updated. li t0, (0b11 << 11) | (1 << 7) | (1 << 3) csrw mstatus, t0 la t1, kmain csrw mepc, t1 la t2, asm_trap_vector csrw mtvec, t2 li t3, (1 << 3) | (1 << 7) | (1 << 11) csrw mie, t3 la ra, 4f mret 3: # Parked harts go here. We need to set these # to only awaken if it receives a software interrupt, # which we're going to call the SIPI (Software Intra-Processor Interrupt). # We only use these to run user-space programs, although this may # change. 4: wfi j 4b ================================================ FILE: risc_v/chapters/ch2/src/asm/trap.S ================================================ # trap.S # In the future our trap vector will go here. .global asm_trap_vector # This will be our trap vector when we start # handling interrupts. asm_trap_vector: mret ================================================ FILE: risc_v/chapters/ch2/src/lds/virt.lds ================================================ /* virt.lds Linker script for outputting to RISC-V QEMU "virt" machine. Stephen Marz 6 October 2019 */ /* riscv is the name of the architecture that the linker understands for any RISC-V target (64-bit or 32-bit). We will further refine this by using -mabi=lp64 and -march=rv64gc */ OUTPUT_ARCH( "riscv" ) /* We're setting our entry point to a symbol called _start which is inside of boot.S. This essentially stores the address of _start as the "entry point", or where CPU instructions should start executing. In the rest of this script, we are going to place _start right at the beginning of 0x8000_0000 because this is where the virtual machine and many RISC-V boards will start executing. */ ENTRY( _start ) /* The MEMORY section will explain that we have "ram" that contains a section that is 'w' (writeable), 'x' (executable), and 'a' (allocatable). We use '!' to invert 'r' (read-only) and 'i' (initialized). We don't want our memory to be read-only, and we're stating that it is NOT initialized at the beginning. The ORIGIN is the memory address 0x8000_0000. If we look at the virt spec or the specification for the RISC-V HiFive Unleashed, this is the starting memory address for our code. Side note: There might be other boot ROMs at different addresses, but their job is to get to this point. Finally LENGTH = 128M tells the linker that we have 128 megabyte of RAM. The linker will double check this to make sure everything can fit. The HiFive Unleashed has a lot more RAM than this, but for the virtual machine, I went with 128M since I think that's enough RAM for now. We can provide other pieces of memory, such as QSPI, or ROM, but we're telling the linker script here that we have one pool of RAM. */ MEMORY { ram (wxa!ri) : ORIGIN = 0x80000000, LENGTH = 128M } /* PHDRS is short for "program headers", which we specify three here: text - CPU instructions (executable sections) data - Global, initialized variables bss - Global, uninitialized variables (all will be set to 0 by boot.S) The command PT_LOAD tells the linker that these sections will be loaded from the file into memory. We can actually stuff all of these into a single program header, but by splitting it up into three, we can actually use the other PT_* commands such as PT_DYNAMIC, PT_INTERP, PT_NULL to tell the linker where to find additional information. However, for our purposes, every section will be loaded from the program headers. */ PHDRS { text PT_LOAD; data PT_LOAD; bss PT_LOAD; } /* We are now going to organize the memory based on which section it is in. In assembly, we can change the section with the ".section" directive. However, in C++ and Rust, CPU instructions go into text, global constants go into rodata, global initialized variables go into data, and global uninitialized variables go into bss. */ SECTIONS { /* The first part of our RAM layout will be the text section. Since our CPU instructions are here, and our memory starts at 0x8000_0000, we need our entry point to line up here. */ .text : { /* PROVIDE allows me to access a symbol called _text_start so I know where the text section starts in the operating system. This should not move, but it is here for convenience. The period '.' tells the linker to set _text_start to the CURRENT location ('.' = current memory location). This current memory location moves as we add things. */ PROVIDE(_text_start = .); /* We are going to layout all text sections here, starting with .text.init. The asterisk in front of the parentheses means to match the .text.init section of ANY object file. Otherwise, we can specify which object file should contain the .text.init section, for example, boot.o(.text.init) would specifically put the .text.init section of our bootloader here. Because we might want to change the name of our files, we'll leave it with a *. Inside the parentheses is the name of the section. I created my own called .text.init to make 100% sure that the _start is put right at the beginning. The linker will lay this out in the order it receives it: .text.init first all .text sections next any .text.* sections last .text.* means to match anything after .text. If we didn't already specify .text.init, this would've matched here. The assembler and linker can place things in "special" text sections, so we match any we might come across here. */ *(.text.init) *(.text .text.*) /* Again, with PROVIDE, we're providing a readable symbol called _text_end, which is set to the memory address AFTER .text.init, .text, and .text.*'s have been added. */ PROVIDE(_text_end = .); /* The portion after the right brace is in an odd format. However, this is telling the linker what memory portion to put it in. We labeled our RAM, ram, with the constraints that it is writeable, allocatable, and executable. The linker will make sure with this that we can do all of those things. >ram - This just tells the linker script to put this entire section (.text) into the ram region of memory. To my knowledge, the '>' does not mean "greater than". Instead, it is a symbol to let the linker know we want to put this in ram. AT>ram - This sets the LMA (load memory address) region to the same thing. LMA is the final translation of a VMA (virtual memory address). With this linker script, we're loading everything into its physical location. We'll let the kernel copy and sort out the virtual memory. That's why >ram and AT>ram are continually the same thing. :text - This tells the linker script to put this into the :text program header. We've only defined three: text, data, and bss. In this case, we're telling the linker script to go into the text section. */ } >ram AT>ram :text /* The global pointer allows the linker to position global variables and constants into independent positions relative to the gp (global pointer) register. The globals start after the text sections and are only relevant to the rodata, data, and bss sections. */ PROVIDE(_global_pointer = .); /* Most compilers create a rodata (read only data) section for global constants. However, we're going to place ours in the text section. We can actually put this in :data, but since the .text section is read-only, we can place it there. NOTE: This doesn't actually do anything, yet. The actual "protection" cannot be done at link time. Instead, when we program the memory management unit (MMU), we will be able to choose which bits (R=read, W=write, X=execute) we want each memory segment to be able to do. */ .rodata : { PROVIDE(_rodata_start = .); *(.rodata .rodata.*) PROVIDE(_rodata_end = .); /* Again, we're placing the rodata section in the memory segment "ram" and we're putting it in the :text program header. We don't have one for rodata anyway. */ } >ram AT>ram :text .data : { /* . = ALIGN(4096) tells the linker to align the current memory location (which is 0x8000_0000 + text section + rodata section) to 4096 bytes. This is because our paging system's resolution is 4,096 bytes or 4 KiB. */ . = ALIGN(4096); PROVIDE(_data_start = .); /* sdata and data are essentially the same thing. However, compilers usually use the sdata sections for shorter, quicker loading sections. So, usually critical data is loaded there. However, we're loading all of this in one fell swoop. So, we're looking to put all of the following sections under the umbrella .data: .sdata .sdata.[anything] .data .data.[anything] ...in that order. */ *(.sdata .sdata.*) *(.data .data.*) PROVIDE(_data_end = .); } >ram AT>ram :data .bss : { PROVIDE(_bss_start = .); *(.sbss .sbss.*) *(.bss .bss.*) PROVIDE(_bss_end = .); } >ram AT>ram :bss /* The following will be helpful when we allocate the kernel stack (_stack) and determine where the heap begnis and ends (_heap_start and _heap_start + _heap_size)/ When we do memory allocation, we can use these symbols. We use the symbols instead of hard-coding an address because this is a floating target. As we add code, the heap moves farther down the memory and gets shorter. _memory_start will be set to 0x8000_0000 here. We use ORIGIN(ram) so that it will take whatever we set the origin of ram to. Otherwise, we'd have to change it more than once if we ever stray away from 0x8000_0000 as our entry point. */ PROVIDE(_memory_start = ORIGIN(ram)); /* Our kernel stack starts at the end of the bss segment (_bss_end). However, we're allocating 0x80000 bytes (524 KiB) to our kernel stack. This should be PLENTY of space. The reason we add the memory is because the stack grows from higher memory to lower memory (bottom to top). Therefore we set the stack at the very bottom of its allocated slot. When we go to allocate from the stack, we'll subtract the number of bytes we need. */ PROVIDE(_stack = _bss_end + 0x80000); PROVIDE(_memory_end = ORIGIN(ram) + LENGTH(ram)); /* Finally, our heap starts right after the kernel stack. This heap will be used mainly to dole out memory for user-space applications. However, in some circumstances, it will be used for kernel memory as well. We don't align here because we let the kernel determine how it wants to do this. */ PROVIDE(_heap_start = _stack); PROVIDE(_heap_size = _memory_end - _stack); } ================================================ FILE: risc_v/chapters/ch2/src/lib.rs ================================================ // Steve Operating System // Stephen Marz // 21 Sep 2019 #![no_std] #![feature(panic_info_message,asm)] // /////////////////////////////////// // / RUST MACROS // /////////////////////////////////// #[macro_export] macro_rules! print { ($($args:tt)+) => ({ use core::fmt::Write; let _ = write!(crate::uart::Uart::new(0x1000_0000), $($args)+); }); } #[macro_export] macro_rules! println { () => ({ print!("\r\n") }); ($fmt:expr) => ({ print!(concat!($fmt, "\r\n")) }); ($fmt:expr, $($args:tt)+) => ({ print!(concat!($fmt, "\r\n"), $($args)+) }); } // /////////////////////////////////// // / LANGUAGE STRUCTURES / FUNCTIONS // /////////////////////////////////// #[no_mangle] extern "C" fn eh_personality() {} #[panic_handler] fn panic(info: &core::panic::PanicInfo) -> ! { print!("Aborting: "); if let Some(p) = info.location() { println!( "line {}, file {}: {}", p.line(), p.file(), info.message().unwrap() ); } else { println!("no information available."); } abort(); } #[no_mangle] extern "C" fn abort() -> ! { loop { unsafe { asm!("wfi"::::"volatile"); } } } // /////////////////////////////////// // / CONSTANTS // /////////////////////////////////// // /////////////////////////////////// // / ENTRY POINT // /////////////////////////////////// #[no_mangle] extern "C" fn kmain() { // Main should initialize all sub-systems and get // ready to start scheduling. The last thing this // should do is start the timer. // Let's try using our newly minted UART by initializing it first. // The UART is sitting at MMIO address 0x1000_0000, so for testing // now, lets connect to it and see if we can initialize it and write // to it. let mut my_uart = uart::Uart::new(0x1000_0000); my_uart.init(); // Now test println! macro! println!("This is my operating system!"); println!("I'm so awesome. If you start typing something, I'll show you what you typed!"); // Now see if we can read stuff: // Usually we can use #[test] modules in Rust, but it would convolute the // task at hand. So, we'll just add testing snippets. loop { if let Some(c) = my_uart.get() { match c { 8 => { // This is a backspace, so we essentially have // to write a space and backup again: print!("{}{}{}", 8 as char, ' ', 8 as char); }, 10 | 13 => { // Newline or carriage-return println!(); }, 0x1b => { // Those familiar with ANSI escape sequences // knows that this is one of them. The next // thing we should get is the left bracket [ // These are multi-byte sequences, so we can take // a chance and get from UART ourselves. // Later, we'll button this up. if let Some(next_byte) = my_uart.get() { if next_byte == 91 { // This is a right bracket! We're on our way! if let Some(b) = my_uart.get() { match b as char { 'A' => { println!("That's the up arrow!"); }, 'B' => { println!("That's the down arrow!"); }, 'C' => { println!("That's the right arrow!"); }, 'D' => { println!("That's the left arrow!"); }, _ => { println!("That's something else....."); } } } } } }, _ => { print!("{}", c as char); } } } } } // /////////////////////////////////// // / RUST MODULES // /////////////////////////////////// pub mod uart; ================================================ FILE: risc_v/chapters/ch2/src/uart.rs ================================================ // uart.rs // UART routines and driver use core::convert::TryInto; use core::fmt::Write; use core::fmt::Error; pub struct Uart { base_address: usize, } impl Write for Uart { fn write_str(&mut self, out: &str) -> Result<(), Error> { for c in out.bytes() { self.put(c); } Ok(()) } } impl Uart { pub fn new(base_address: usize) -> Self { Uart { base_address } } pub fn init(&mut self) { let ptr = self.base_address as *mut u8; unsafe { // First, set the word length, which // are bits 0 and 1 of the line control register (LCR) // which is at base_address + 3 // We can easily write the value 3 here or 0b11, but I'm // extending it so that it is clear we're setting two individual // fields // Word 0 Word 1 // ~~~~~~ ~~~~~~ ptr.add(3).write_volatile((1 << 0) | (1 << 1)); // Now, enable the FIFO, which is bit index 0 of the FIFO // control register (FCR at offset 2). // Again, we can just write 1 here, but when we use left shift, // it's easier to see that we're trying to write bit index #0. ptr.add(2).write_volatile(1 << 0); // Enable receiver buffer interrupts, which is at bit index // 0 of the interrupt enable register (IER at offset 1). ptr.add(1).write_volatile(1 << 0); // If we cared about the divisor, the code below would set the divisor // from a global clock rate of 22.729 MHz (22,729,000 cycles per second) // to a signaling rate of 2400 (BAUD). We usually have much faster signalling // rates nowadays, but this demonstrates what the divisor actually does. // The formula given in the NS16500A specification for calculating the divisor // is: // divisor = ceil( (clock_hz) / (baud_sps x 16) ) // So, we substitute our values and get: // divisor = ceil( 22_729_000 / (2400 x 16) ) // divisor = ceil( 22_729_000 / 38_400 ) // divisor = ceil( 591.901 ) = 592 // The divisor register is two bytes (16 bits), so we need to split the value // 592 into two bytes. Typically, we would calculate this based on measuring // the clock rate, but again, for our purposes [qemu], this doesn't really do // anything. let divisor: u16 = 592; let divisor_least: u8 = (divisor & 0xff).try_into().unwrap(); let divisor_most: u8 = (divisor >> 8).try_into().unwrap(); // Notice that the divisor register DLL (divisor latch least) and DLM (divisor latch most) // have the same base address as the receiver/transmitter and the interrupt enable register. // To change what the base address points to, we open the "divisor latch" by writing 1 into // the Divisor Latch Access Bit (DLAB), which is bit index 7 of the Line Control Register (LCR) // which is at base_address + 3. let lcr = ptr.add(3).read_volatile(); ptr.add(3).write_volatile(lcr | 1 << 7); // Now, base addresses 0 and 1 point to DLL and DLM, respectively. // Put the lower 8 bits of the divisor into DLL ptr.add(0).write_volatile(divisor_least); ptr.add(1).write_volatile(divisor_most); // Now that we've written the divisor, we never have to touch this again. In hardware, this // will divide the global clock (22.729 MHz) into one suitable for 2,400 signals per second. // So, to once again get access to the RBR/THR/IER registers, we need to close the DLAB bit // by clearing it to 0. Here, we just restore the original value of lcr. ptr.add(3).write_volatile(lcr); } } pub fn put(&mut self, c: u8) { let ptr = self.base_address as *mut u8; unsafe { ptr.add(0).write_volatile(c); } } pub fn get(&mut self) -> Option { let ptr = self.base_address as *mut u8; unsafe { if ptr.add(5).read_volatile() & 1 == 0 { // The DR bit is 0, meaning no data None } else { // The DR bit is 1, meaning data! Some(ptr.add(0).read_volatile()) } } } } ================================================ FILE: risc_v/chapters/ch3/.cargo/config ================================================ [build] target = "riscv64gc-unknown-none-elf" [target.riscv64gc-unknown-none-elf] linker = "riscv64-unknown-linux-gnu-gcc" ================================================ FILE: risc_v/chapters/ch3/.gitignore ================================================ os.elf target/* Cargo.lock hdd.dsk ================================================ FILE: risc_v/chapters/ch3/Cargo.toml ================================================ [package] name = "sos" version = "0.1.0" authors = ["Stephen Marz "] edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [lib] crate-type = ["staticlib"] [dependencies] ================================================ FILE: risc_v/chapters/ch3/Makefile ================================================ ##### ## BUILD ##### CC=riscv64-unknown-linux-gnu-gcc CFLAGS=-Wall -Wextra -pedantic -Wextra -O0 -g CFLAGS+=-static -ffreestanding -nostdlib -fno-rtti -fno-exceptions CFLAGS+=-march=rv64gc -mabi=lp64 INCLUDES= LINKER_SCRIPT=-Tsrc/lds/virt.lds TYPE=debug RUST_TARGET=./target/riscv64gc-unknown-none-elf/$(TYPE) LIBS=-L$(RUST_TARGET) SOURCES_ASM=$(wildcard src/asm/*.S) LIB=-lsos -lgcc OUT=os.elf ##### ## QEMU ##### QEMU=qemu-system-riscv64 MACH=virt CPU=rv64 CPUS=4 MEM=128M DISK=hdd.dsk # DRIVE= -drive if=none,format=raw,file=$(DISK),id=foo -device virtio-blk-device,scsi=off,drive=foo DRIVE= all: cargo build $(CC) $(CFLAGS) $(LINKER_SCRIPT) $(INCLUDES) -o $(OUT) $(SOURCES_ASM) $(LIBS) $(LIB) run: all $(QEMU) -machine $(MACH) -cpu $(CPU) -smp $(CPUS) -m $(MEM) $(DRIVE) -nographic -serial mon:stdio -bios none -kernel $(OUT) .PHONY: clean clean: cargo clean rm -f $(OUT) ================================================ FILE: risc_v/chapters/ch3/make_hdd.sh ================================================ #!/bin/sh dd if=/dev/zero of=hdd.dsk bs=1M count=32 ================================================ FILE: risc_v/chapters/ch3/src/asm/boot.S ================================================ # boot.S # bootloader for SoS # Stephen Marz # 8 February 2019 # Disable generation of compressed instructions. .option norvc # Define a .data section. .section .data # Define a .text.init section. .section .text.init # Execution starts here. .global _start _start: # Any hardware threads (hart) that are not bootstrapping # need to wait for an IPI csrr t0, mhartid bnez t0, 3f # SATP should be zero, but let's make sure csrw satp, zero # Disable linker instruction relaxation for the `la` instruction below. # This disallows the assembler from assuming that `gp` is already initialized. # This causes the value stored in `gp` to be calculated from `pc`. .option push .option norelax la gp, _global_pointer .option pop # Set all bytes in the BSS section to zero. la a0, _bss_start la a1, _bss_end bgeu a0, a1, 2f 1: sd zero, (a0) addi a0, a0, 8 bltu a0, a1, 1b 2: # Control registers, set the stack, mstatus, mepc, # and mtvec to return to the main function. # li t5, 0xffff; # csrw medeleg, t5 # csrw mideleg, t5 # The stack grows from bottom to top, so we put the stack pointer # to the very end of the stack range. la sp, _stack_end # Setting `mstatus` register: # 0b11 << 11: Machine's previous protection mode is 3 (MPP=3). li t0, 0b11 << 11 csrw mstatus, t0 # Machine's exception program counter (MEPC) is set to `kinit`. la t1, kinit csrw mepc, t1 # Machine's trap vector base address is set to `asm_trap_vector`. la t2, asm_trap_vector csrw mtvec, t2 # Set the return address to get us into supervisor mode la ra, 2f # We use mret here so that the mstatus register is properly updated. mret 2: # We set the return address (ra above) to this label. When kinit() is finished # in Rust, it will return here. # Setting `sstatus` (supervisor status) register: # 1 << 8 : Supervisor's previous protection mode is 1 (SPP=1 [Supervisor]). # 1 << 5 : Supervisor's previous interrupt-enable bit is 1 (SPIE=1 [Enabled]). # 1 << 1 : Supervisor's interrupt-enable bit will be set to 1 after sret. # We set the "previous" bits because the sret will write the current bits # with the previous bits. li t0, (1 << 8) | (1 << 5) csrw sstatus, t0 la t1, kmain csrw sepc, t1 # Setting `mideleg` (machine interrupt delegate) register: # 1 << 1 : Software interrupt delegated to supervisor mode # 1 << 5 : Timer interrupt delegated to supervisor mode # 1 << 9 : External interrupt delegated to supervisor mode # By default all traps (interrupts or exceptions) automatically # cause an elevation to the machine privilege mode (mode 3). # When we delegate, we're telling the CPU to only elevate to # the supervisor privilege mode (mode 1) li t2, (1 << 1) | (1 << 5) | (1 << 9) csrw mideleg, t2 # Setting `sie` (supervisor interrupt enable) register: # This register takes the same bits as mideleg # 1 << 1 : Supervisor software interrupt enable (SSIE=1 [Enabled]) # 1 << 5 : Supervisor timer interrupt enable (STIE=1 [Enabled]) # 1 << 9 : Supervisor external interrupt enable (SEIE=1 [Enabled]) csrw sie, t2 # Setting `stvec` (supervisor trap vector) register: # Essentially this is a function pointer, but the last two bits can be 00 or 01 # 00 : All exceptions set pc to BASE # 01 : Asynchronous interrupts set pc to BASE + 4 x scause la t3, asm_trap_vector csrw stvec, t3 # kinit() is required to return back the SATP value (including MODE) via a0 csrw satp, a0 # Force the CPU to take our SATP register. # To be efficient, if the address space identifier (ASID) portion of SATP is already # in cache, it will just grab whatever's in cache. However, that means if we've updated # it in memory, it will be the old table. So, sfence.vma will ensure that the MMU always # grabs a fresh copy of the SATP register and associated tables. sfence.vma # sret will put us in supervisor mode and re-enable interrupts sret 3: # Parked harts go here. We need to set these # to only awaken if it receives a software interrupt, # which we're going to call the SIPI (Software Intra-Processor Interrupt). # We call the SIPI by writing the software interrupt into the Core Local Interruptor (CLINT) # Which is calculated by: base_address + hart * 4 # where base address is 0x0200_0000 (MMIO CLINT base address) # We only use additional harts to run user-space programs, although this may # change. 4: wfi j 4b ================================================ FILE: risc_v/chapters/ch3/src/asm/mem.S ================================================ // mem.S // Importation of linker symbols .section .rodata .global HEAP_START HEAP_START: .dword _heap_start .global HEAP_SIZE HEAP_SIZE: .dword _heap_size .global TEXT_START TEXT_START: .dword _text_start .global TEXT_END TEXT_END: .dword _text_end .global DATA_START DATA_START: .dword _data_start .global DATA_END DATA_END: .dword _data_end .global RODATA_START RODATA_START: .dword _rodata_start .global RODATA_END RODATA_END: .dword _rodata_end .global BSS_START BSS_START: .dword _bss_start .global BSS_END BSS_END: .dword _bss_end .global KERNEL_STACK_START KERNEL_STACK_START: .dword _stack_start .global KERNEL_STACK_END KERNEL_STACK_END: .dword _stack_end .section .data .global KERNEL_TABLE KERNEL_TABLE: .dword 0 ================================================ FILE: risc_v/chapters/ch3/src/asm/trap.S ================================================ # trap.S # In the future our trap vector will go here. .global asm_trap_vector # This will be our trap vector when we start # handling interrupts. asm_trap_vector: csrr a0, mtval wfi j asm_trap_vector ================================================ FILE: risc_v/chapters/ch3/src/kmem.rs ================================================ // kmem.rs // Sub-page level: malloc-like allocation system // Stephen Marz // 7 October 2019 use crate::page::{align_val, zalloc, Table, PAGE_SIZE}; use core::{mem::size_of, ptr::null_mut}; #[repr(usize)] enum AllocListFlags { Taken = 1 << 63, } impl AllocListFlags { pub fn val(self) -> usize { self as usize } } struct AllocList { pub flags_size: usize, } impl AllocList { pub fn is_taken(&self) -> bool { self.flags_size & AllocListFlags::Taken.val() != 0 } pub fn is_free(&self) -> bool { !self.is_taken() } pub fn set_taken(&mut self) { self.flags_size |= AllocListFlags::Taken.val(); } pub fn set_free(&mut self) { self.flags_size &= !AllocListFlags::Taken.val(); } pub fn set_size(&mut self, sz: usize) { let k = self.is_taken(); self.flags_size = sz & !AllocListFlags::Taken.val(); if k { self.flags_size |= AllocListFlags::Taken.val(); } } pub fn get_size(&self) -> usize { self.flags_size & !AllocListFlags::Taken.val() } } // This is the head of the allocation. We start here when // we search for a free memory location. static mut KMEM_HEAD: *mut AllocList = null_mut(); // In the future, we will have on-demand pages // so, we need to keep track of our memory footprint to // see if we actually need to allocate more. static mut KMEM_ALLOC: usize = 0; static mut KMEM_PAGE_TABLE: *mut Table = null_mut(); // These functions are safe helpers around an unsafe // operation. pub fn get_head() -> *mut u8 { unsafe { KMEM_HEAD as *mut u8 } } pub fn get_page_table() -> *mut Table { unsafe { KMEM_PAGE_TABLE as *mut Table } } pub fn get_num_allocations() -> usize { unsafe { KMEM_ALLOC } } /// Initialize kernel's memory /// This is not to be used to allocate memory /// for user processes. If that's the case, use /// alloc/dealloc from the page crate. pub fn init() { unsafe { // Allocate 64 kernel pages (64 * 4096 = 262 KiB) let k_alloc = zalloc(64); assert!(!k_alloc.is_null()); KMEM_ALLOC = 64; KMEM_HEAD = k_alloc as *mut AllocList; (*KMEM_HEAD).set_free(); (*KMEM_HEAD).set_size(KMEM_ALLOC * PAGE_SIZE); KMEM_PAGE_TABLE = zalloc(1) as *mut Table; } } /// Allocate sub-page level allocation based on bytes and zero the memory pub fn kzmalloc(sz: usize) -> *mut u8 { let size = align_val(sz, 3); let ret = kmalloc(size); if !ret.is_null() { for i in 0..size { unsafe { (*ret.add(i)) = 0; } } } ret } /// Allocate sub-page level allocation based on bytes pub fn kmalloc(sz: usize) -> *mut u8 { unsafe { let size = align_val(sz, 3) + size_of::(); let mut head = KMEM_HEAD; // .add() uses pointer arithmetic, so we type-cast into a u8 // so that we multiply by an absolute size (KMEM_ALLOC * // PAGE_SIZE). let tail = (KMEM_HEAD as *mut u8).add(KMEM_ALLOC * PAGE_SIZE) as *mut AllocList; while head < tail { if (*head).is_free() && size <= (*head).get_size() { let chunk_size = (*head).get_size(); let rem = chunk_size - size; (*head).set_taken(); if rem > size_of::() { let next = (head as *mut u8).add(size) as *mut AllocList; // There is space remaining here. (*next).set_free(); (*next).set_size(rem); (*head).set_size(size); } else { // If we get here, take the entire chunk (*head).set_size(chunk_size); } return head.add(1) as *mut u8; } else { // If we get here, what we saw wasn't a free // chunk, move on to the next. head = (head as *mut u8).add((*head).get_size()) as *mut AllocList; } } } // If we get here, we didn't find any free chunks--i.e. there isn't // enough memory for this. TODO: Add on-demand page allocation. null_mut() } /// Free a sub-page level allocation pub fn kfree(ptr: *mut u8) { unsafe { if !ptr.is_null() { let p = (ptr as *mut AllocList).offset(-1); if (*p).is_taken() { (*p).set_free(); } // After we free, see if we can combine adjacent free // spots to see if we can reduce fragmentation. coalesce(); } } } /// Merge smaller chunks into a bigger chunk pub fn coalesce() { unsafe { let mut head = KMEM_HEAD; let tail = (KMEM_HEAD as *mut u8).add(KMEM_ALLOC * PAGE_SIZE) as *mut AllocList; while head < tail { let next = (head as *mut u8).add((*head).get_size()) as *mut AllocList; if (*head).get_size() == 0 { // If this happens, then we have a bad heap // (double free or something). However, that // will cause an infinite loop since the next // pointer will never move beyond the current // location. break; } else if next >= tail { // We calculated the next by using the size // given as get_size(), however this could push // us past the tail. In that case, the size is // wrong, hence we break and stop doing what we // need to do. break; } else if (*head).is_free() && (*next).is_free() { // This means we have adjacent blocks needing to // be freed. So, we combine them into one // allocation. (*head).set_size( (*head).get_size() + (*next).get_size(), ); } // If we get here, we might've moved. Recalculate new // head. head = (head as *mut u8).add((*head).get_size()) as *mut AllocList; } } } /// For debugging purposes, print the kmem table pub fn print_table() { unsafe { let mut head = KMEM_HEAD; let tail = (KMEM_HEAD as *mut u8).add(KMEM_ALLOC * PAGE_SIZE) as *mut AllocList; while head < tail { println!( "{:p}: Length = {:<10} Taken = {}", head, (*head).get_size(), (*head).is_taken() ); head = (head as *mut u8).add((*head).get_size()) as *mut AllocList; } } } // /////////////////////////////////// // / GLOBAL ALLOCATOR // /////////////////////////////////// // The global allocator allows us to use the data structures // in the core library, such as a linked list or B-tree. // We want to use these sparingly since we have a coarse-grained // allocator. use core::alloc::{GlobalAlloc, Layout}; // The global allocator is a static constant to a global allocator // structure. We don't need any members because we're using this // structure just to implement alloc and dealloc. struct OsGlobalAlloc; unsafe impl GlobalAlloc for OsGlobalAlloc { unsafe fn alloc(&self, layout: Layout) -> *mut u8 { // We align to the next page size so that when // we divide by PAGE_SIZE, we get exactly the number // of pages necessary. kzmalloc(layout.size()) } unsafe fn dealloc(&self, ptr: *mut u8, _layout: Layout) { // We ignore layout since our allocator uses ptr_start -> last // to determine the span of an allocation. kfree(ptr); } } #[global_allocator] /// Technically, we don't need the {} at the end, but it /// reveals that we're creating a new structure and not just /// copying a value. static GA: OsGlobalAlloc = OsGlobalAlloc {}; #[alloc_error_handler] /// If for some reason alloc() in the global allocator gets null_mut(), /// then we come here. This is a divergent function, so we call panic to /// let the tester know what's going on. pub fn alloc_error(l: Layout) -> ! { panic!( "Allocator failed to allocate {} bytes with {}-byte alignment.", l.size(), l.align() ); } ================================================ FILE: risc_v/chapters/ch3/src/lds/virt.lds ================================================ /* virt.lds Linker script for outputting to RISC-V QEMU "virt" machine. Stephen Marz 6 October 2019 */ /* riscv is the name of the architecture that the linker understands for any RISC-V target (64-bit or 32-bit). We will further refine this by using -mabi=lp64 and -march=rv64gc */ OUTPUT_ARCH( "riscv" ) /* We're setting our entry point to a symbol called _start which is inside of boot.S. This essentially stores the address of _start as the "entry point", or where CPU instructions should start executing. In the rest of this script, we are going to place _start right at the beginning of 0x8000_0000 because this is where the virtual machine and many RISC-V boards will start executing. */ ENTRY( _start ) /* The MEMORY section will explain that we have "ram" that contains a section that is 'w' (writeable), 'x' (executable), and 'a' (allocatable). We use '!' to invert 'r' (read-only) and 'i' (initialized). We don't want our memory to be read-only, and we're stating that it is NOT initialized at the beginning. The ORIGIN is the memory address 0x8000_0000. If we look at the virt spec or the specification for the RISC-V HiFive Unleashed, this is the starting memory address for our code. Side note: There might be other boot ROMs at different addresses, but their job is to get to this point. Finally LENGTH = 128M tells the linker that we have 128 megabyte of RAM. The linker will double check this to make sure everything can fit. The HiFive Unleashed has a lot more RAM than this, but for the virtual machine, I went with 128M since I think that's enough RAM for now. We can provide other pieces of memory, such as QSPI, or ROM, but we're telling the linker script here that we have one pool of RAM. */ MEMORY { ram (wxa!ri) : ORIGIN = 0x80000000, LENGTH = 128M } /* PHDRS is short for "program headers", which we specify three here: text - CPU instructions (executable sections) data - Global, initialized variables bss - Global, uninitialized variables (all will be set to 0 by boot.S) The command PT_LOAD tells the linker that these sections will be loaded from the file into memory. We can actually stuff all of these into a single program header, but by splitting it up into three, we can actually use the other PT_* commands such as PT_DYNAMIC, PT_INTERP, PT_NULL to tell the linker where to find additional information. However, for our purposes, every section will be loaded from the program headers. */ PHDRS { text PT_LOAD; data PT_LOAD; bss PT_LOAD; } /* We are now going to organize the memory based on which section it is in. In assembly, we can change the section with the ".section" directive. However, in C++ and Rust, CPU instructions go into text, global constants go into rodata, global initialized variables go into data, and global uninitialized variables go into bss. */ SECTIONS { /* The first part of our RAM layout will be the text section. Since our CPU instructions are here, and our memory starts at 0x8000_0000, we need our entry point to line up here. */ .text : { /* PROVIDE allows me to access a symbol called _text_start so I know where the text section starts in the operating system. This should not move, but it is here for convenience. The period '.' tells the linker to set _text_start to the CURRENT location ('.' = current memory location). This current memory location moves as we add things. */ PROVIDE(_text_start = .); /* We are going to layout all text sections here, starting with .text.init. The asterisk in front of the parentheses means to match the .text.init section of ANY object file. Otherwise, we can specify which object file should contain the .text.init section, for example, boot.o(.text.init) would specifically put the .text.init section of our bootloader here. Because we might want to change the name of our files, we'll leave it with a *. Inside the parentheses is the name of the section. I created my own called .text.init to make 100% sure that the _start is put right at the beginning. The linker will lay this out in the order it receives it: .text.init first all .text sections next any .text.* sections last .text.* means to match anything after .text. If we didn't already specify .text.init, this would've matched here. The assembler and linker can place things in "special" text sections, so we match any we might come across here. */ *(.text.init) *(.text .text.*) /* Again, with PROVIDE, we're providing a readable symbol called _text_end, which is set to the memory address AFTER .text.init, .text, and .text.*'s have been added. */ PROVIDE(_text_end = .); /* The portion after the right brace is in an odd format. However, this is telling the linker what memory portion to put it in. We labeled our RAM, ram, with the constraints that it is writeable, allocatable, and executable. The linker will make sure with this that we can do all of those things. >ram - This just tells the linker script to put this entire section (.text) into the ram region of memory. To my knowledge, the '>' does not mean "greater than". Instead, it is a symbol to let the linker know we want to put this in ram. AT>ram - This sets the LMA (load memory address) region to the same thing. LMA is the final translation of a VMA (virtual memory address). With this linker script, we're loading everything into its physical location. We'll let the kernel copy and sort out the virtual memory. That's why >ram and AT>ram are continually the same thing. :text - This tells the linker script to put this into the :text program header. We've only defined three: text, data, and bss. In this case, we're telling the linker script to go into the text section. */ } >ram AT>ram :text /* The global pointer allows the linker to position global variables and constants into independent positions relative to the gp (global pointer) register. The globals start after the text sections and are only relevant to the rodata, data, and bss sections. */ PROVIDE(_global_pointer = .); /* Most compilers create a rodata (read only data) section for global constants. However, we're going to place ours in the text section. We can actually put this in :data, but since the .text section is read-only, we can place it there. NOTE: This doesn't actually do anything, yet. The actual "protection" cannot be done at link time. Instead, when we program the memory management unit (MMU), we will be able to choose which bits (R=read, W=write, X=execute) we want each memory segment to be able to do. */ .rodata : { PROVIDE(_rodata_start = .); *(.rodata .rodata.*) PROVIDE(_rodata_end = .); /* Again, we're placing the rodata section in the memory segment "ram" and we're putting it in the :text program header. We don't have one for rodata anyway. */ } >ram AT>ram :text .data : { /* . = ALIGN(4096) tells the linker to align the current memory location (which is 0x8000_0000 + text section + rodata section) to 4096 bytes. This is because our paging system's resolution is 4,096 bytes or 4 KiB. */ . = ALIGN(4096); PROVIDE(_data_start = .); /* sdata and data are essentially the same thing. However, compilers usually use the sdata sections for shorter, quicker loading sections. So, usually critical data is loaded there. However, we're loading all of this in one fell swoop. So, we're looking to put all of the following sections under the umbrella .data: .sdata .sdata.[anything] .data .data.[anything] ...in that order. */ *(.sdata .sdata.*) *(.data .data.*) PROVIDE(_data_end = .); } >ram AT>ram :data .bss : { PROVIDE(_bss_start = .); *(.sbss .sbss.*) *(.bss .bss.*) PROVIDE(_bss_end = .); } >ram AT>ram :bss /* The following will be helpful when we allocate the kernel stack (_stack) and determine where the heap begnis and ends (_heap_start and _heap_start + _heap_size)/ When we do memory allocation, we can use these symbols. We use the symbols instead of hard-coding an address because this is a floating target. As we add code, the heap moves farther down the memory and gets shorter. _memory_start will be set to 0x8000_0000 here. We use ORIGIN(ram) so that it will take whatever we set the origin of ram to. Otherwise, we'd have to change it more than once if we ever stray away from 0x8000_0000 as our entry point. */ PROVIDE(_memory_start = ORIGIN(ram)); /* Our kernel stack starts at the end of the bss segment (_bss_end). However, we're allocating 0x80000 bytes (524 KiB) to our kernel stack. This should be PLENTY of space. The reason we add the memory is because the stack grows from higher memory to lower memory (bottom to top). Therefore we set the stack at the very bottom of its allocated slot. When we go to allocate from the stack, we'll subtract the number of bytes we need. */ PROVIDE(_stack_start = _bss_end); PROVIDE(_stack_end = _stack_start + 0x8000); PROVIDE(_memory_end = ORIGIN(ram) + LENGTH(ram)); /* Finally, our heap starts right after the kernel stack. This heap will be used mainly to dole out memory for user-space applications. However, in some circumstances, it will be used for kernel memory as well. We don't align here because we let the kernel determine how it wants to do this. */ PROVIDE(_heap_start = _stack_end); PROVIDE(_heap_size = _memory_end - _heap_start); } ================================================ FILE: risc_v/chapters/ch3/src/lib.rs ================================================ // Steve Operating System // Stephen Marz // 21 Sep 2019 #![no_std] #![feature(panic_info_message, asm, allocator_api, alloc_error_handler, alloc_prelude, const_raw_ptr_to_usize_cast)] #[macro_use] extern crate alloc; // This is experimental and requires alloc_prelude as a feature use alloc::prelude::v1::*; // /////////////////////////////////// // / RUST MACROS // /////////////////////////////////// #[macro_export] macro_rules! print { ($($args:tt)+) => ({ use core::fmt::Write; let _ = write!(crate::uart::Uart::new(0x1000_0000), $($args)+); }); } #[macro_export] macro_rules! println { () => ({ print!("\r\n") }); ($fmt:expr) => ({ print!(concat!($fmt, "\r\n")) }); ($fmt:expr, $($args:tt)+) => ({ print!(concat!($fmt, "\r\n"), $($args)+) }); } // /////////////////////////////////// // / LANGUAGE STRUCTURES / FUNCTIONS // /////////////////////////////////// #[no_mangle] extern "C" fn eh_personality() {} #[panic_handler] fn panic(info: &core::panic::PanicInfo) -> ! { print!("Aborting: "); if let Some(p) = info.location() { println!( "line {}, file {}: {}", p.line(), p.file(), info.message().unwrap() ); } else { println!("no information available."); } abort(); } #[no_mangle] extern "C" fn abort() -> ! { loop { unsafe { asm!("wfi"::::"volatile"); } } } // /////////////////////////////////// // / CONSTANTS // /////////////////////////////////// // const STR_Y: &str = "\x1b[38;2;79;221;13m✓\x1b[m"; // const STR_N: &str = "\x1b[38;2;221;41;13m✘\x1b[m"; // The following symbols come from asm/mem.S. We can use // the symbols directly, but the address of the symbols // themselves are their values, which can cause issues. // Instead, I created doubleword values in mem.S in the .rodata and .data // sections. extern "C" { static TEXT_START: usize; static TEXT_END: usize; static DATA_START: usize; static DATA_END: usize; static RODATA_START: usize; static RODATA_END: usize; static BSS_START: usize; static BSS_END: usize; static KERNEL_STACK_START: usize; static KERNEL_STACK_END: usize; static HEAP_START: usize; static HEAP_SIZE: usize; static mut KERNEL_TABLE: usize; } /// Identity map range /// Takes a contiguous allocation of memory and maps it using PAGE_SIZE /// This assumes that start <= end pub fn id_map_range(root: &mut page::Table, start: usize, end: usize, bits: i64) { let mut memaddr = start & !(page::PAGE_SIZE - 1); let num_kb_pages = (page::align_val(end, 12) - memaddr) / page::PAGE_SIZE; // I named this num_kb_pages for future expansion when // I decide to allow for GiB (2^30) and 2MiB (2^21) page // sizes. However, the overlapping memory regions are causing // nightmares. for _ in 0..num_kb_pages { page::map(root, memaddr, memaddr, bits, 0); memaddr += 1 << 12; } } // /////////////////////////////////// // / ENTRY POINT // /////////////////////////////////// #[no_mangle] extern "C" fn kinit() -> usize { // We created kinit, which runs in super-duper mode // 3 called "machine mode". // The job of kinit() is to get us into supervisor mode // as soon as possible. // Interrupts are disabled for the duration of kinit() uart::Uart::new(0x1000_0000).init(); page::init(); kmem::init(); // Map heap allocations let root_ptr = kmem::get_page_table(); let root_u = root_ptr as usize; let mut root = unsafe { root_ptr.as_mut().unwrap() }; let kheap_head = kmem::get_head() as usize; let total_pages = kmem::get_num_allocations(); println!(); println!(); unsafe { println!("TEXT: 0x{:x} -> 0x{:x}", TEXT_START, TEXT_END); println!("RODATA: 0x{:x} -> 0x{:x}", RODATA_START, RODATA_END); println!("DATA: 0x{:x} -> 0x{:x}", DATA_START, DATA_END); println!("BSS: 0x{:x} -> 0x{:x}", BSS_START, BSS_END); println!("STACK: 0x{:x} -> 0x{:x}", KERNEL_STACK_START, KERNEL_STACK_END); println!("HEAP: 0x{:x} -> 0x{:x}", kheap_head, kheap_head + total_pages * 4096); } id_map_range( &mut root, kheap_head, kheap_head + total_pages * 4096, page::EntryBits::ReadWrite.val(), ); unsafe { // Map heap descriptors let num_pages = HEAP_SIZE / page::PAGE_SIZE; id_map_range(&mut root, HEAP_START, HEAP_START + num_pages, page::EntryBits::ReadWrite.val() ); // Map executable section id_map_range( &mut root, TEXT_START, TEXT_END, page::EntryBits::ReadExecute.val(), ); // Map rodata section // We put the ROdata section into the text section, so they can // potentially overlap however, we only care that it's read // only. id_map_range( &mut root, RODATA_START, RODATA_END, page::EntryBits::ReadExecute.val(), ); // Map data section id_map_range( &mut root, DATA_START, DATA_END, page::EntryBits::ReadWrite.val(), ); // Map bss section id_map_range( &mut root, BSS_START, BSS_END, page::EntryBits::ReadWrite.val(), ); // Map kernel stack id_map_range( &mut root, KERNEL_STACK_START, KERNEL_STACK_END, page::EntryBits::ReadWrite.val(), ); } // UART page::map( &mut root, 0x1000_0000, 0x1000_0000, page::EntryBits::ReadWrite.val(), 0 ); // CLINT // -> MSIP page::map( &mut root, 0x0200_0000, 0x0200_0000, page::EntryBits::ReadWrite.val(), 0 ); // -> MTIMECMP page::map( &mut root, 0x0200_b000, 0x0200_b000, page::EntryBits::ReadWrite.val(), 0 ); // -> MTIME page::map( &mut root, 0x0200_c000, 0x0200_c000, page::EntryBits::ReadWrite.val(), 0 ); // PLIC id_map_range( &mut root, 0x0c00_0000, 0x0c00_2000, page::EntryBits::ReadWrite.val(), ); id_map_range( &mut root, 0x0c20_0000, 0x0c20_8000, page::EntryBits::ReadWrite.val(), ); page::print_page_allocations(); // The following shows how we're going to walk to translate a virtual // address into a physical address. We will use this whenever a user // space application requires services. Since the user space application // only knows virtual addresses, we have to translate silently behind // the scenes. let p = 0x8005_7000 as usize; let m = page::virt_to_phys(&root, p).unwrap_or(0); println!("Walk 0x{:x} = 0x{:x}", p, m); // When we return from here, we'll go back to boot.S and switch into // supervisor mode We will return the SATP register to be written when // we return. root_u is the root page table's address. When stored into // the SATP register, this is divided by 4 KiB (right shift by 12 bits). // We enable the MMU by setting mode 8. Bits 63, 62, 61, 60 determine // the mode. // 0 = Bare (no translation) // 8 = Sv39 // 9 = Sv48 unsafe { // We have to store the kernel's table. The tables will be moved back // and forth between the kernel's table and user applicatons' tables. KERNEL_TABLE = root_u; } // table / 4096 Sv39 mode (root_u >> 12) | (8 << 60) } #[no_mangle] extern "C" fn kmain() { // kmain() starts in supervisor mode. So, we should have the trap // vector setup and the MMU turned on when we get here. // We initialized my_uart in machine mode under kinit for debugging // prints, but this just grabs a pointer to it. let mut my_uart = uart::Uart::new(0x1000_0000); // Create a new scope so that we can test the global allocator and // deallocator { // We have the global allocator, so let's see if that works! let k = Box::::new(100); println!("Boxed value = {}", *k); kmem::print_table(); // The following comes from the Rust documentation: // some bytes, in a vector let sparkle_heart = vec![240, 159, 146, 150]; // We know these bytes are valid, so we'll use `unwrap()`. let sparkle_heart = String::from_utf8(sparkle_heart).unwrap(); println!("String = {}", sparkle_heart); } // If we get here, the Box, vec, and String should all be freed since // they go out of scope. This calls their "Drop" trait. // Now see if we can read stuff: // Usually we can use #[test] modules in Rust, but it would convolute // the task at hand, and it requires us to create the testing harness // since the embedded testing system is part of the "std" library. loop { if let Some(c) = my_uart.get() { match c { 8 => { // This is a backspace, so we // essentially have to write a space and // backup again: print!("{} {}", 8 as char, 8 as char); }, 10 | 13 => { // Newline or carriage-return println!(); }, _ => { print!("{}", c as char); }, } } } } // /////////////////////////////////// // / RUST MODULES // /////////////////////////////////// pub mod kmem; pub mod page; pub mod uart; ================================================ FILE: risc_v/chapters/ch3/src/page.rs ================================================ // page.rs // Memory routines // Stephen Marz // 6 October 2019 use core::{mem::size_of, ptr::null_mut}; // //////////////////////////////// // // Allocation routines // //////////////////////////////// extern "C" { static HEAP_START: usize; static HEAP_SIZE: usize; } // We will use ALLOC_START to mark the start of the actual // memory we can dish out. static mut ALLOC_START: usize = 0; const PAGE_ORDER: usize = 12; pub const PAGE_SIZE: usize = 1 << 12; /// Align (set to a multiple of some power of two) /// This takes an order which is the exponent to 2^order /// Therefore, all alignments must be made as a power of two. /// This function always rounds up. pub const fn align_val(val: usize, order: usize) -> usize { let o = (1usize << order) - 1; (val + o) & !o } #[repr(u8)] pub enum PageBits { Empty = 0, Taken = 1 << 0, Last = 1 << 1, } impl PageBits { // We convert PageBits to a u8 a lot, so this is // for convenience. pub fn val(self) -> u8 { self as u8 } } // Each page is described by the Page structure. Linux does this // as well, where each 4096-byte chunk of memory has a structure // associated with it. However, there structure is much larger. pub struct Page { flags: u8, } impl Page { // If this page has been marked as the final allocation, // this function returns true. Otherwise, it returns false. pub fn is_last(&self) -> bool { if self.flags & PageBits::Last.val() != 0 { true } else { false } } // If the page is marked as being taken (allocated), then // this function returns true. Otherwise, it returns false. pub fn is_taken(&self) -> bool { if self.flags & PageBits::Taken.val() != 0 { true } else { false } } // This is the opposite of is_taken(). pub fn is_free(&self) -> bool { !self.is_taken() } // Clear the Page structure and all associated allocations. pub fn clear(&mut self) { self.flags = PageBits::Empty.val(); } // Set a certain flag. We ran into trouble here since PageBits // is an enumeration and we haven't implemented the BitOr Trait // on it. pub fn set_flag(&mut self, flag: PageBits) { self.flags |= flag.val(); } pub fn clear_flag(&mut self, flag: PageBits) { self.flags &= !(flag.val()); } } /// Initialize the allocation system. There are several ways that we can /// implement the page allocator: /// 1. Free list (singly linked list where it starts at the first free /// allocation) 2. Bookkeeping list (structure contains a taken and length) /// 3. Allocate one Page structure per 4096 bytes (this is what I chose) /// 4. Others pub fn init() { unsafe { let num_pages = HEAP_SIZE / PAGE_SIZE; let ptr = HEAP_START as *mut Page; // Clear all pages to make sure that they aren't accidentally // taken for i in 0..num_pages { (*ptr.add(i)).clear(); } // Determine where the actual useful memory starts. This will be // after all Page structures. We also must align the ALLOC_START // to a page-boundary (PAGE_SIZE = 4096). ALLOC_START = // (HEAP_START + num_pages * size_of::() + PAGE_SIZE - 1) // & !(PAGE_SIZE - 1); ALLOC_START = align_val( HEAP_START + num_pages * size_of::(), PAGE_ORDER, ); } } /// Allocate a page or multiple pages /// pages: the number of PAGE_SIZE pages to allocate pub fn alloc(pages: usize) -> *mut u8 { // We have to find a contiguous allocation of pages assert!(pages > 0); unsafe { // We create a Page structure for each page on the heap. We // actually might have more since HEAP_SIZE moves and so does // the size of our structure, but we'll only waste a few bytes. let num_pages = HEAP_SIZE / PAGE_SIZE; let ptr = HEAP_START as *mut Page; for i in 0..num_pages - pages { let mut found = false; // Check to see if this Page is free. If so, we have our // first candidate memory address. if (*ptr.add(i)).is_free() { // It was FREE! Yay! found = true; for j in i..i + pages { // Now check to see if we have a // contiguous allocation for all of the // request pages. If not, we should // check somewhere else. if (*ptr.add(j)).is_taken() { found = false; break; } } } // We've checked to see if there are enough contiguous // pages to form what we need. If we couldn't, found // will be false, otherwise it will be true, which means // we've found valid memory we can allocate. if found { for k in i..i + pages - 1 { (*ptr.add(k)).set_flag(PageBits::Taken); } // The marker for the last page is // PageBits::Last This lets us know when we've // hit the end of this particular allocation. (*ptr.add(i+pages-1)).set_flag(PageBits::Taken); (*ptr.add(i+pages-1)).set_flag(PageBits::Last); // The Page structures themselves aren't the // useful memory. Instead, there is 1 Page // structure per 4096 bytes starting at // ALLOC_START. return (ALLOC_START + PAGE_SIZE * i) as *mut u8; } } } // If we get here, that means that no contiguous allocation was // found. null_mut() } /// Allocate and zero a page or multiple pages /// pages: the number of pages to allocate /// Each page is PAGE_SIZE which is calculated as 1 << PAGE_ORDER /// On RISC-V, this typically will be 4,096 bytes. pub fn zalloc(pages: usize) -> *mut u8 { // Allocate and zero a page. // First, let's get the allocation let ret = alloc(pages); if !ret.is_null() { let size = (PAGE_SIZE * pages) / 8; let big_ptr = ret as *mut u64; for i in 0..size { // We use big_ptr so that we can force an // sd (store doubleword) instruction rather than // the sb. This means 8x fewer stores than before. // Typically we have to be concerned about remaining // bytes, but fortunately 4096 % 8 = 0, so we // won't have any remaining bytes. unsafe { (*big_ptr.add(i)) = 0; } } } ret } /// Deallocate a page by its pointer /// The way we've structured this, it will automatically coalesce /// contiguous pages. pub fn dealloc(ptr: *mut u8) { // Make sure we don't try to free a null pointer. assert!(!ptr.is_null()); unsafe { let addr = HEAP_START + (ptr as usize - ALLOC_START) / PAGE_SIZE; // Make sure that the address makes sense. The address we // calculate here is the page structure, not the HEAP address! assert!(addr >= HEAP_START && addr < HEAP_START + HEAP_SIZE); let mut p = addr as *mut Page; // Keep clearing pages until we hit the last page. while (*p).is_taken() && !(*p).is_last() { (*p).clear(); p = p.add(1); } // If the following assertion fails, it is most likely // caused by a double-free. assert!( (*p).is_last() == true, "Possible double-free detected! (Not taken found \ before last)" ); // If we get here, we've taken care of all previous pages and // we are on the last page. (*p).clear(); } } /// Print all page allocations /// This is mainly used for debugging. pub fn print_page_allocations() { unsafe { let num_pages = HEAP_SIZE / PAGE_SIZE; let mut beg = HEAP_START as *const Page; let end = beg.add(num_pages); let alloc_beg = ALLOC_START; let alloc_end = ALLOC_START + num_pages * PAGE_SIZE; println!(); println!( "PAGE ALLOCATION TABLE\nMETA: {:p} -> {:p}\nPHYS: \ 0x{:x} -> 0x{:x}", beg, end, alloc_beg, alloc_end ); println!("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"); let mut num = 0; while beg < end { if (*beg).is_taken() { let start = beg as usize; let memaddr = ALLOC_START + (start - HEAP_START) * PAGE_SIZE; print!("0x{:x} => ", memaddr); loop { num += 1; if (*beg).is_last() { let end = beg as usize; let memaddr = ALLOC_START + (end - HEAP_START) * PAGE_SIZE + PAGE_SIZE - 1; print!( "0x{:x}: {:>3} page(s)", memaddr, (end - start + 1) ); println!("."); break; } beg = beg.add(1); } } beg = beg.add(1); } println!("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"); println!( "Allocated: {:>5} pages ({:>9} bytes).", num, num * PAGE_SIZE ); println!( "Free : {:>5} pages ({:>9} bytes).", num_pages - num, (num_pages - num) * PAGE_SIZE ); println!(); } } // //////////////////////////////// // // MMU Routines // //////////////////////////////// // Represent (repr) our entry bits as // unsigned 64-bit integers. #[repr(i64)] #[derive(Copy, Clone)] pub enum EntryBits { None = 0, Valid = 1 << 0, Read = 1 << 1, Write = 1 << 2, Execute = 1 << 3, User = 1 << 4, Global = 1 << 5, Access = 1 << 6, Dirty = 1 << 7, // Convenience combinations ReadWrite = 1 << 1 | 1 << 2, ReadExecute = 1 << 1 | 1 << 3, ReadWriteExecute = 1 << 1 | 1 << 2 | 1 << 3, // User Convenience Combinations UserReadWrite = 1 << 1 | 1 << 2 | 1 << 4, UserReadExecute = 1 << 1 | 1 << 3 | 1 << 4, UserReadWriteExecute = 1 << 1 | 1 << 2 | 1 << 3 | 1 << 4, } // Helper functions to convert the enumeration // into an i64, which is what our page table // entries will be. impl EntryBits { pub fn val(self) -> i64 { self as i64 } } // A single entry. We're using an i64 so that // this will sign-extend rather than zero-extend // since RISC-V requires that the reserved sections // take on the most significant bit. pub struct Entry { pub entry: i64, } // The Entry structure describes one of the 512 entries per table, which is // described in the RISC-V privileged spec Figure 4.18. impl Entry { pub fn is_valid(&self) -> bool { self.get_entry() & EntryBits::Valid.val() != 0 } // The first bit (bit index #0) is the V bit for // valid. pub fn is_invalid(&self) -> bool { !self.is_valid() } // A leaf has one or more RWX bits set pub fn is_leaf(&self) -> bool { self.get_entry() & 0xe != 0 } pub fn is_branch(&self) -> bool { !self.is_leaf() } pub fn set_entry(&mut self, entry: i64) { self.entry = entry; } pub fn get_entry(&self) -> i64 { self.entry } } // Table represents a single table, which contains 512 (2^9), 64-bit entries. pub struct Table { pub entries: [Entry; 512], } impl Table { pub fn len() -> usize { 512 } } /// Map a virtual address to a physical address using 4096-byte page /// size. /// root: a mutable reference to the root Table /// vaddr: The virtual address to map /// paddr: The physical address to map /// bits: An OR'd bitset containing the bits the leaf should have. /// The bits should contain only the following: /// Read, Write, Execute, User, and/or Global /// The bits MUST include one or more of the following: /// Read, Write, Execute /// The valid bit automatically gets added. pub fn map(root: &mut Table, vaddr: usize, paddr: usize, bits: i64, level: usize) { // Make sure that Read, Write, or Execute have been provided // otherwise, we'll leak memory and always create a page fault. assert!(bits & 0xe != 0); // Extract out each VPN from the virtual address // On the virtual address, each VPN is exactly 9 bits, // which is why we use the mask 0x1ff = 0b1_1111_1111 (9 bits) let vpn = [ // VPN[0] = vaddr[20:12] (vaddr >> 12) & 0x1ff, // VPN[1] = vaddr[29:21] (vaddr >> 21) & 0x1ff, // VPN[2] = vaddr[38:30] (vaddr >> 30) & 0x1ff, ]; // Just like the virtual address, extract the physical address // numbers (PPN). However, PPN[2] is different in that it stores // 26 bits instead of 9. Therefore, we use, // 0x3ff_ffff = 0b11_1111_1111_1111_1111_1111_1111 (26 bits). let ppn = [ // PPN[0] = paddr[20:12] (paddr >> 12) & 0x1ff, // PPN[1] = paddr[29:21] (paddr >> 21) & 0x1ff, // PPN[2] = paddr[55:30] (paddr >> 30) & 0x3ff_ffff, ]; // We will use this as a floating reference so that we can set // individual entries as we walk the table. let mut v = &mut root.entries[vpn[2]]; // Now, we're going to traverse the page table and set the bits // properly. We expect the root to be valid, however we're required to // create anything beyond the root. // In Rust, we create a range iterator using the .. operator. // The .rev() will reverse the iteration since we need to start with // VPN[2] The .. operator is inclusive on start but exclusive on end. // So, (0..2) will iterate 0 and 1. for i in (level..2).rev() { if !v.is_valid() { // Allocate a page let page = zalloc(1); // The page is already aligned by 4,096, so store it // directly The page is stored in the entry shifted // right by 2 places. v.set_entry( (page as i64 >> 2) | EntryBits::Valid.val(), ); } let entry = ((v.get_entry() & !0x3ff) << 2) as *mut Entry; v = unsafe { entry.add(vpn[i]).as_mut().unwrap() }; } // When we get here, we should be at VPN[0] and v should be pointing to // our entry. // The entry structure is Figure 4.18 in the RISC-V Privileged // Specification let entry = (ppn[2] << 28) as i64 | // PPN[2] = [53:28] (ppn[1] << 19) as i64 | // PPN[1] = [27:19] (ppn[0] << 10) as i64 | // PPN[0] = [18:10] bits | // Specified bits, such as User, Read, Write, etc EntryBits::Valid.val(); // Valid bit // Set the entry. V should be set to the correct pointer by the loop // above. v.set_entry(entry); } /// Unmaps and frees all memory associated with a table. /// root: The root table to start freeing. /// NOTE: This does NOT free root directly. This must be /// freed manually. /// The reason we don't free the root is because it is /// usually embedded into the Process structure. pub fn unmap(root: &mut Table) { // Start with level 2 for lv2 in 0..Table::len() { let ref entry_lv2 = root.entries[lv2]; if entry_lv2.is_valid() && entry_lv2.is_branch() { // This is a valid entry, so drill down and free. let memaddr_lv1 = (entry_lv2.get_entry() & !0x3ff) << 2; let table_lv1 = unsafe { // Make table_lv1 a mutable reference instead of a pointer. (memaddr_lv1 as *mut Table).as_mut().unwrap() }; for lv1 in 0..Table::len() { let ref entry_lv1 = table_lv1.entries[lv1]; if entry_lv1.is_valid() && entry_lv1.is_branch() { let memaddr_lv0 = (entry_lv1.get_entry() & !0x3ff) << 2; // The next level is level 0, which // cannot have branches, therefore, // we free here. dealloc(memaddr_lv0 as *mut u8); } } dealloc(memaddr_lv1 as *mut u8); } } } /// Walk the page table to convert a virtual address to a /// physical address. /// If a page fault would occur, this returns None /// Otherwise, it returns Some with the physical address. pub fn virt_to_phys(root: &Table, vaddr: usize) -> Option { // Walk the page table pointed to by root let vpn = [ // VPN[0] = vaddr[20:12] (vaddr >> 12) & 0x1ff, // VPN[1] = vaddr[29:21] (vaddr >> 21) & 0x1ff, // VPN[2] = vaddr[38:30] (vaddr >> 30) & 0x1ff, ]; let mut v = &root.entries[vpn[2]]; for i in (0..=2).rev() { if v.is_invalid() { // This is an invalid entry, page fault. break; } else if v.is_leaf() { // According to RISC-V, a leaf can be at any level. // The offset mask masks off the PPN. Each PPN is 9 // bits and they start at bit #12. So, our formula // 12 + i * 9 let off_mask = (1 << (12 + i * 9)) - 1; let vaddr_pgoff = vaddr & off_mask; let addr = ((v.get_entry() << 2) as usize) & !off_mask; return Some(addr | vaddr_pgoff); } // Set v to the next entry which is pointed to by this // entry. However, the address was shifted right by 2 places // when stored in the page table entry, so we shift it left // to get it back into place. let entry = ((v.get_entry() & !0x3ff) << 2) as *const Entry; // We do i - 1 here, however we should get None or Some() above // before we do 0 - 1 = -1. v = unsafe { entry.add(vpn[i - 1]).as_ref().unwrap() }; } // If we get here, we've exhausted all valid tables and haven't // found a leaf. None } ================================================ FILE: risc_v/chapters/ch3/src/uart.rs ================================================ // uart.rs // UART routines and driver use core::convert::TryInto; use core::fmt::Write; use core::fmt::Error; pub struct Uart { base_address: usize, } impl Write for Uart { fn write_str(&mut self, out: &str) -> Result<(), Error> { for c in out.bytes() { self.put(c); } Ok(()) } } impl Uart { pub fn new(base_address: usize) -> Self { Uart { base_address } } pub fn init(&mut self) { let ptr = self.base_address as *mut u8; unsafe { // First, set the word length, which // are bits 0 and 1 of the line control register (LCR) // which is at base_address + 3 // We can easily write the value 3 here or 0b11, but I'm // extending it so that it is clear we're setting two individual // fields // Word 0 Word 1 // ~~~~~~ ~~~~~~ let lcr: u8 = (1 << 0) | (1 << 1); ptr.add(3).write_volatile(lcr); // Now, enable the FIFO, which is bit index 0 of the FIFO // control register (FCR at offset 2). // Again, we can just write 1 here, but when we use left shift, // it's easier to see that we're trying to write bit index #0. ptr.add(2).write_volatile(1 << 0); // Enable receiver buffer interrupts, which is at bit index // 0 of the interrupt enable register (IER at offset 1). ptr.add(1).write_volatile(1 << 0); // If we cared about the divisor, the code below would set the divisor // from a global clock rate of 22.729 MHz (22,729,000 cycles per second) // to a signaling rate of 2400 (BAUD). We usually have much faster signalling // rates nowadays, but this demonstrates what the divisor actually does. // The formula given in the NS16500A specification for calculating the divisor // is: // divisor = ceil( (clock_hz) / (baud_sps x 16) ) // So, we substitute our values and get: // divisor = ceil( 22_729_000 / (2400 x 16) ) // divisor = ceil( 22_729_000 / 38_400 ) // divisor = ceil( 591.901 ) = 592 // The divisor register is two bytes (16 bits), so we need to split the value // 592 into two bytes. Typically, we would calculate this based on measuring // the clock rate, but again, for our purposes [qemu], this doesn't really do // anything. let divisor: u16 = 592; let divisor_least: u8 = (divisor & 0xff).try_into().unwrap(); let divisor_most: u8 = (divisor >> 8).try_into().unwrap(); // Notice that the divisor register DLL (divisor latch least) and DLM (divisor latch most) // have the same base address as the receiver/transmitter and the interrupt enable register. // To change what the base address points to, we open the "divisor latch" by writing 1 into // the Divisor Latch Access Bit (DLAB), which is bit index 7 of the Line Control Register (LCR) // which is at base_address + 3. ptr.add(3).write_volatile(lcr | 1 << 7); // Now, base addresses 0 and 1 point to DLL and DLM, respectively. // Put the lower 8 bits of the divisor into DLL ptr.add(0).write_volatile(divisor_least); ptr.add(1).write_volatile(divisor_most); // Now that we've written the divisor, we never have to touch this again. In hardware, this // will divide the global clock (22.729 MHz) into one suitable for 2,400 signals per second. // So, to once again get access to the RBR/THR/IER registers, we need to close the DLAB bit // by clearing it to 0. ptr.add(3).write_volatile(lcr); } } pub fn put(&mut self, c: u8) { let ptr = self.base_address as *mut u8; unsafe { ptr.add(0).write_volatile(c); } } pub fn get(&mut self) -> Option { let ptr = self.base_address as *mut u8; unsafe { if ptr.add(5).read_volatile() & 1 == 0 { // The DR bit is 0, meaning no data None } else { // The DR bit is 1, meaning data! Some(ptr.add(0).read_volatile()) } } } } ================================================ FILE: risc_v/chapters/ch4/.cargo/config ================================================ [build] target = "riscv64gc-unknown-none-elf" [target.riscv64gc-unknown-none-elf] linker = "riscv64-unknown-linux-gnu-gcc" ================================================ FILE: risc_v/chapters/ch4/.gitignore ================================================ os.elf target/* Cargo.lock hdd.dsk ================================================ FILE: risc_v/chapters/ch4/Cargo.toml ================================================ [package] name = "sos" version = "0.1.0" authors = ["Stephen Marz "] edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [lib] crate-type = ["staticlib"] [dependencies] ================================================ FILE: risc_v/chapters/ch4/Makefile ================================================ ##### ## BUILD ##### CC=riscv64-unknown-linux-gnu-gcc CFLAGS=-Wall -Wextra -pedantic -Wextra -O0 -g CFLAGS+=-static -ffreestanding -nostdlib -fno-rtti -fno-exceptions CFLAGS+=-march=rv64gc -mabi=lp64 INCLUDES= LINKER_SCRIPT=-Tsrc/lds/virt.lds TYPE=debug RUST_TARGET=./target/riscv64gc-unknown-none-elf/$(TYPE) LIBS=-L$(RUST_TARGET) SOURCES_ASM=$(wildcard src/asm/*.S) LIB=-lsos -lgcc OUT=os.elf ##### ## QEMU ##### QEMU=qemu-system-riscv64 MACH=virt CPU=rv64 CPUS=4 MEM=128M DISK=hdd.dsk # DRIVE= -drive if=none,format=raw,file=$(DISK),id=foo -device virtio-blk-device,scsi=off,drive=foo DRIVE= all: cargo build $(CC) $(CFLAGS) $(LINKER_SCRIPT) $(INCLUDES) -o $(OUT) $(SOURCES_ASM) $(LIBS) $(LIB) run: all $(QEMU) -machine $(MACH) -cpu $(CPU) -smp $(CPUS) -m $(MEM) $(DRIVE) -nographic -serial mon:stdio -bios none -kernel $(OUT) .PHONY: clean clean: cargo clean rm -f $(OUT) ================================================ FILE: risc_v/chapters/ch4/make_hdd.sh ================================================ #!/bin/sh dd if=/dev/zero of=hdd.dsk bs=1M count=32 ================================================ FILE: risc_v/chapters/ch4/src/asm/boot.S ================================================ # boot.S # bootloader for SoS # Stephen Marz # 8 February 2019 # Disable generation of compressed instructions. .option norvc # Define a .text.init section. The .text.init is put at the # starting address so that the entry _start is put at the RISC-V # address 0x8000_0000. .section .text.init # Execution starts here. .global _start _start: # Disable linker instruction relaxation for the `la` instruction below. # This disallows the assembler from assuming that `gp` is already initialized. # This causes the value stored in `gp` to be calculated from `pc`. # The job of the global pointer is to give the linker the ability to address # memory relative to GP instead of as an absolute address. .option push .option norelax la gp, _global_pointer .option pop # SATP should be zero, but let's make sure. Each HART has its own # SATP register. csrw satp, zero # Any hardware threads (hart) that are not bootstrapping # need to wait for an IPI csrr t0, mhartid bnez t0, 3f # Set all bytes in the BSS section to zero. la a0, _bss_start la a1, _bss_end bgeu a0, a1, 2f 1: sd zero, (a0) addi a0, a0, 8 bltu a0, a1, 1b 2: # The stack grows from bottom to top, so we put the stack pointer # to the very end of the stack range. la sp, _stack_end # Setting `mstatus` register: # 0b01 << 11: Machine's previous protection mode is 2 (MPP=2). li t0, 0b11 << 11 csrw mstatus, t0 # Do not allow interrupts while running kinit csrw mie, zero # Machine's exception program counter (MEPC) is set to `kinit`. la t1, kinit csrw mepc, t1 # Set the return address to get us into supervisor mode la ra, 2f # We use mret here so that the mstatus register is properly updated. mret 2: # We set the return address (ra above) to this label. When kinit() is finished # in Rust, it will return here. # Setting `mstatus` (supervisor status) register: # 0b01 << 11 : Previous protection mode is 1 (MPP=01 [Supervisor]). # 1 << 7 : Previous machine interrupt-enable bit is 1 (MPIE=1 [Enabled]) # 1 << 5 : Previous interrupt-enable bit is 1 (SPIE=1 [Enabled]). # We set the "previous" bits because the mret will write the current bits # with the previous bits. li t0, (0b01 << 11) | (1 << 7) | (1 << 5) csrw mstatus, t0 # Machine's trap vector base address is set to `m_trap_vector`, for # "machine" trap vector. la t2, m_trap_vector csrw mtvec, t2 # Setting `stvec` (supervisor trap vector) register: # Essentially this is a function pointer, but the last two bits can be 00 or 01 # 00 : All exceptions set pc to BASE # 01 : Asynchronous interrupts set pc to BASE + 4 x scause # la t3, s_trap_vector # csrw stvec, t3 # Jump to kmain. We put the MPP = 01 for supervisor mode, so after # mret, we will jump to kmain in supervisor mode. la t1, kmain csrw mepc, t1 # Setting `sie` (supervisor interrupt enable) register: # This register takes the same bits as mideleg # 1 << 1 : Supervisor software interrupt enable (SSIE=1 [Enabled]) # 1 << 5 : Supervisor timer interrupt enable (STIE=1 [Enabled]) # 1 << 9 : Supervisor external interrupt enable (SEIE=1 [Enabled]) # 0xaaa = MEIP/SEIP and MTIP/STIP and MSIP/SSIP li t2, 0x888 csrw mie, t2 mret 3: # Parked harts go here. We need to set these # to only awaken if it receives a software interrupt, # which we're going to call the SIPI (Software Intra-Processor Interrupt). # We call the SIPI by writing the software interrupt into the Core Local Interruptor (CLINT) # Which is calculated by: base_address + hart * 4 # where base address is 0x0200_0000 (MMIO CLINT base address) # We only use additional harts to run user-space programs, although this may # change. # We divide up the stack so the harts aren't clobbering one another. la sp, _stack_end li t0, 0x10000 csrr a0, mhartid mul t0, t0, a0 sub sp, sp, t0 # The parked harts will be put into machine mode with interrupts enabled. li t0, 0b11 << 11 | (1 << 7) csrw mstatus, t0 # Allow for MSIP (Software interrupt). We will write the MSIP from hart #0 to # awaken these parked harts. li t3, (1 << 3) csrw mie, t3 # Machine's exception program counter (MEPC) is set to the Rust initialization # code and waiting loop. la t1, kinit_hart csrw mepc, t1 # Machine's trap vector base address is set to `m_trap_vector`, for # "machine" trap vector. The Rust initialization routines will give each # hart its own trap frame. We can use the same trap function and distinguish # between each hart by looking at the trap frame. la t2, m_trap_vector csrw mtvec, t2 # Whenever our hart is done initializing, we want it to return to the waiting # loop, which is just below mret. la ra, 4f # We use mret here so that the mstatus register is properly updated. mret 4: # wfi = wait for interrupt. This is a hint to the harts to shut everything needed # down. However, the RISC-V specification allows for wfi to do nothing. Anyway, # with QEMU, this will save some CPU! wfi j 4b ================================================ FILE: risc_v/chapters/ch4/src/asm/mem.S ================================================ // mem.S // Importation of linker symbols .section .rodata .global HEAP_START HEAP_START: .dword _heap_start .global HEAP_SIZE HEAP_SIZE: .dword _heap_size .global TEXT_START TEXT_START: .dword _text_start .global TEXT_END TEXT_END: .dword _text_end .global DATA_START DATA_START: .dword _data_start .global DATA_END DATA_END: .dword _data_end .global RODATA_START RODATA_START: .dword _rodata_start .global RODATA_END RODATA_END: .dword _rodata_end .global BSS_START BSS_START: .dword _bss_start .global BSS_END BSS_END: .dword _bss_end .global KERNEL_STACK_START KERNEL_STACK_START: .dword _stack_start .global KERNEL_STACK_END KERNEL_STACK_END: .dword _stack_end ================================================ FILE: risc_v/chapters/ch4/src/asm/trap.S ================================================ # trap.S # Trap handler and global context # Steve Operating System # Stephen Marz # 24 February 2019 .option norvc .altmacro .set NUM_GP_REGS, 32 # Number of registers per context .set NUM_FP_REGS, 32 .set REG_SIZE, 8 # Register size (in bytes) .set MAX_CPUS, 8 # Maximum number of CPUs # Use macros for saving and restoring multiple registers .macro save_gp i, basereg=t6 sd x\i, ((\i)*REG_SIZE)(\basereg) .endm .macro load_gp i, basereg=t6 ld x\i, ((\i)*REG_SIZE)(\basereg) .endm .macro save_fp i, basereg=t6 fsd f\i, ((NUM_GP_REGS+(\i))*REG_SIZE)(\basereg) .endm .macro load_fp i, basereg=t6 fld f\i, ((NUM_GP_REGS+(\i))*REG_SIZE)(\basereg) .endm .section .text .global m_trap_vector # This must be aligned by 4 since the last two bits # of the mtvec register do not contribute to the address # of this vector. .align 4 m_trap_vector: # All registers are volatile here, we need to save them # before we do anything. csrrw t6, mscratch, t6 # csrrw will atomically swap t6 into mscratch and the old # value of mscratch into t6. This is nice because we just # switched values and didn't destroy anything -- all atomically! # in cpu.rs we have a structure of: # 32 gp regs 0 # 32 fp regs 256 # SATP register 512 # Trap stack 520 # CPU HARTID 528 # We use t6 as the temporary register because it is the very # bottom register (x31) .set i, 1 .rept 30 save_gp %i .set i, i+1 .endr # Save the actual t6 register, which we swapped into # mscratch mv t5, t6 csrr t6, mscratch save_gp 31, t5 # Restore the kernel trap frame into mscratch csrw mscratch, t5 # Get ready to go into Rust (trap.rs) # We don't want to write into the user's stack or whomever # messed with us here. csrr a0, mepc csrr a1, mtval csrr a2, mcause csrr a3, mhartid csrr a4, mstatus mv a5, t5 ld sp, 520(a5) call m_trap # When we get here, we've returned from m_trap, restore registers # and return. # m_trap will return the return address via a0. csrw mepc, a0 # Now load the trap frame back into t6 csrr t6, mscratch # Restore all GP registers .set i, 1 .rept 31 load_gp %i .set i, i+1 .endr # Since we ran this loop 31 times starting with i = 1, # the last one loaded t6 back to its original value. mret .global make_syscall make_syscall: ecall ret ================================================ FILE: risc_v/chapters/ch4/src/cpu.rs ================================================ // cpu.rs // CPU and CPU-related routines // Also contains the kernel's trap frame // Stephen Marz // 14 October 2019 use core::ptr::null_mut; #[repr(usize)] pub enum SatpMode { Off = 0, Sv39 = 8, Sv48 = 9, } #[repr(C)] #[derive(Clone, Copy)] pub struct TrapFrame { pub regs: [usize; 32], // 0 - 255 pub fregs: [usize; 32], // 256 - 511 pub satp: usize, // 512 - 519 pub trap_stack: *mut u8, // 520 pub hartid: usize, // 528 } impl TrapFrame { pub const fn zero() -> Self { TrapFrame { regs: [0; 32], fregs: [0; 32], satp: 0, trap_stack: null_mut(), hartid: 0, } } } pub static mut KERNEL_TRAP_FRAME: [TrapFrame; 8] = [TrapFrame::zero(); 8]; pub const fn build_satp(mode: SatpMode, asid: usize, addr: usize) -> usize { (mode as usize) << 60 | (asid & 0xffff) << 44 | (addr >> 12) & 0xff_ffff_ffff } pub fn mhartid_read() -> usize { unsafe { let rval; asm!("csrr $0, mhartid" :"=r"(rval)); rval } } pub fn mstatus_write(val: usize) { unsafe { asm!("csrw mstatus, $0" ::"r"(val)); } } pub fn mstatus_read() -> usize { unsafe { let rval; asm!("csrr $0, mstatus":"=r"(rval)); rval } } pub fn stvec_write(val: usize) { unsafe { asm!("csrw stvec, $0" ::"r"(val)); } } pub fn stvec_read() -> usize { unsafe { let rval; asm!("csrr $0, stvec" :"=r"(rval)); rval } } pub fn mscratch_write(val: usize) { unsafe { asm!("csrw mscratch, $0" ::"r"(val)); } } pub fn mscratch_read() -> usize { unsafe { let rval; asm!("csrr $0, mscratch" : "=r"(rval)); rval } } pub fn mscratch_swap(to: usize) -> usize { unsafe { let from; asm!("csrrw $0, mscratch, $1" : "=r"(from) : "r"(to)); from } } pub fn sscratch_write(val: usize) { unsafe { asm!("csrw sscratch, $0" ::"r"(val)); } } pub fn sscratch_read() -> usize { unsafe { let rval; asm!("csrr $0, sscratch" : "=r"(rval)); rval } } pub fn sscratch_swap(to: usize) -> usize { unsafe { let from; asm!("csrrw $0, sscratch, $1" : "=r"(from) : "r"(to)); from } } pub fn sepc_write(val: usize) { unsafe { asm!("csrw sepc, $0" :: "r"(val)); } } pub fn sepc_read() -> usize { unsafe { let rval; asm!("csrr $0, sepc" :"=r"(rval)); rval } } pub fn satp_write(val: usize) { unsafe { asm!("csrw satp, $0" :: "r"(val)); } } pub fn satp_read() -> usize { unsafe { let rval; asm!("csrr $0, satp" :"=r"(rval)); rval } } pub fn satp_fence(vaddr: usize, asid: usize) { unsafe { asm!("sfence.vma $0, $1" :: "r"(vaddr), "r"(asid)); } } pub fn satp_fence_asid(asid: usize) { unsafe { asm!("sfence.vma zero, $0" :: "r"(asid)); } } ================================================ FILE: risc_v/chapters/ch4/src/kmem.rs ================================================ // kmem.rs // Sub-page level: malloc-like allocation system // Stephen Marz // 7 October 2019 use crate::page::{align_val, zalloc, Table, PAGE_SIZE}; use core::{mem::size_of, ptr::null_mut}; #[repr(usize)] enum AllocListFlags { Taken = 1 << 63, } impl AllocListFlags { pub fn val(self) -> usize { self as usize } } struct AllocList { pub flags_size: usize, } impl AllocList { pub fn is_taken(&self) -> bool { self.flags_size & AllocListFlags::Taken.val() != 0 } pub fn is_free(&self) -> bool { !self.is_taken() } pub fn set_taken(&mut self) { self.flags_size |= AllocListFlags::Taken.val(); } pub fn set_free(&mut self) { self.flags_size &= !AllocListFlags::Taken.val(); } pub fn set_size(&mut self, sz: usize) { let k = self.is_taken(); self.flags_size = sz & !AllocListFlags::Taken.val(); if k { self.flags_size |= AllocListFlags::Taken.val(); } } pub fn get_size(&self) -> usize { self.flags_size & !AllocListFlags::Taken.val() } } // This is the head of the allocation. We start here when // we search for a free memory location. static mut KMEM_HEAD: *mut AllocList = null_mut(); // In the future, we will have on-demand pages // so, we need to keep track of our memory footprint to // see if we actually need to allocate more. static mut KMEM_ALLOC: usize = 0; static mut KMEM_PAGE_TABLE: *mut Table = null_mut(); // These functions are safe helpers around an unsafe // operation. pub fn get_head() -> *mut u8 { unsafe { KMEM_HEAD as *mut u8 } } pub fn get_page_table() -> *mut Table { unsafe { KMEM_PAGE_TABLE as *mut Table } } pub fn get_num_allocations() -> usize { unsafe { KMEM_ALLOC } } /// Initialize kernel's memory /// This is not to be used to allocate memory /// for user processes. If that's the case, use /// alloc/dealloc from the page crate. pub fn init() { unsafe { // Allocate kernel pages (KMEM_ALLOC) KMEM_ALLOC = 512; let k_alloc = zalloc(KMEM_ALLOC); assert!(!k_alloc.is_null()); KMEM_HEAD = k_alloc as *mut AllocList; (*KMEM_HEAD).set_free(); (*KMEM_HEAD).set_size(KMEM_ALLOC * PAGE_SIZE); KMEM_PAGE_TABLE = zalloc(1) as *mut Table; } } /// Allocate sub-page level allocation based on bytes and zero the memory pub fn kzmalloc(sz: usize) -> *mut u8 { let size = align_val(sz, 3); let ret = kmalloc(size); if !ret.is_null() { for i in 0..size { unsafe { (*ret.add(i)) = 0; } } } ret } /// Allocate sub-page level allocation based on bytes pub fn kmalloc(sz: usize) -> *mut u8 { unsafe { let size = align_val(sz, 3) + size_of::(); let mut head = KMEM_HEAD; // .add() uses pointer arithmetic, so we type-cast into a u8 // so that we multiply by an absolute size (KMEM_ALLOC * // PAGE_SIZE). let tail = (KMEM_HEAD as *mut u8).add(KMEM_ALLOC * PAGE_SIZE) as *mut AllocList; while head < tail { if (*head).is_free() && size <= (*head).get_size() { let chunk_size = (*head).get_size(); let rem = chunk_size - size; (*head).set_taken(); if rem > size_of::() { let next = (head as *mut u8).add(size) as *mut AllocList; // There is space remaining here. (*next).set_free(); (*next).set_size(rem); (*head).set_size(size); } else { // If we get here, take the entire chunk (*head).set_size(chunk_size); } return head.add(1) as *mut u8; } else { // If we get here, what we saw wasn't a free // chunk, move on to the next. head = (head as *mut u8).add((*head).get_size()) as *mut AllocList; } } } // If we get here, we didn't find any free chunks--i.e. there isn't // enough memory for this. TODO: Add on-demand page allocation. null_mut() } /// Free a sub-page level allocation pub fn kfree(ptr: *mut u8) { unsafe { if !ptr.is_null() { let p = (ptr as *mut AllocList).offset(-1); if (*p).is_taken() { (*p).set_free(); } // After we free, see if we can combine adjacent free // spots to see if we can reduce fragmentation. coalesce(); } } } /// Merge smaller chunks into a bigger chunk pub fn coalesce() { unsafe { let mut head = KMEM_HEAD; let tail = (KMEM_HEAD as *mut u8).add(KMEM_ALLOC * PAGE_SIZE) as *mut AllocList; while head < tail { let next = (head as *mut u8).add((*head).get_size()) as *mut AllocList; if (*head).get_size() == 0 { // If this happens, then we have a bad heap // (double free or something). However, that // will cause an infinite loop since the next // pointer will never move beyond the current // location. break; } else if next >= tail { // We calculated the next by using the size // given as get_size(), however this could push // us past the tail. In that case, the size is // wrong, hence we break and stop doing what we // need to do. break; } else if (*head).is_free() && (*next).is_free() { // This means we have adjacent blocks needing to // be freed. So, we combine them into one // allocation. (*head).set_size( (*head).get_size() + (*next).get_size(), ); } // If we get here, we might've moved. Recalculate new // head. head = (head as *mut u8).add((*head).get_size()) as *mut AllocList; } } } /// For debugging purposes, print the kmem table pub fn print_table() { unsafe { let mut head = KMEM_HEAD; let tail = (KMEM_HEAD as *mut u8).add(KMEM_ALLOC * PAGE_SIZE) as *mut AllocList; while head < tail { println!( "{:p}: Length = {:<10} Taken = {}", head, (*head).get_size(), (*head).is_taken() ); head = (head as *mut u8).add((*head).get_size()) as *mut AllocList; } } } // /////////////////////////////////// // / GLOBAL ALLOCATOR // /////////////////////////////////// // The global allocator allows us to use the data structures // in the core library, such as a linked list or B-tree. // We want to use these sparingly since we have a coarse-grained // allocator. use core::alloc::{GlobalAlloc, Layout}; // The global allocator is a static constant to a global allocator // structure. We don't need any members because we're using this // structure just to implement alloc and dealloc. struct OsGlobalAlloc; unsafe impl GlobalAlloc for OsGlobalAlloc { unsafe fn alloc(&self, layout: Layout) -> *mut u8 { // We align to the next page size so that when // we divide by PAGE_SIZE, we get exactly the number // of pages necessary. kzmalloc(layout.size()) } unsafe fn dealloc(&self, ptr: *mut u8, _layout: Layout) { // We ignore layout since our allocator uses ptr_start -> last // to determine the span of an allocation. kfree(ptr); } } #[global_allocator] /// Technically, we don't need the {} at the end, but it /// reveals that we're creating a new structure and not just /// copying a value. static GA: OsGlobalAlloc = OsGlobalAlloc {}; #[alloc_error_handler] /// If for some reason alloc() in the global allocator gets null_mut(), /// then we come here. This is a divergent function, so we call panic to /// let the tester know what's going on. pub fn alloc_error(l: Layout) -> ! { panic!( "Allocator failed to allocate {} bytes with {}-byte alignment.", l.size(), l.align() ); } ================================================ FILE: risc_v/chapters/ch4/src/lds/virt.lds ================================================ /* virt.lds Linker script for outputting to RISC-V QEMU "virt" machine. Stephen Marz 6 October 2019 */ /* riscv is the name of the architecture that the linker understands for any RISC-V target (64-bit or 32-bit). We will further refine this by using -mabi=lp64 and -march=rv64gc */ OUTPUT_ARCH( "riscv" ) /* We're setting our entry point to a symbol called _start which is inside of boot.S. This essentially stores the address of _start as the "entry point", or where CPU instructions should start executing. In the rest of this script, we are going to place _start right at the beginning of 0x8000_0000 because this is where the virtual machine and many RISC-V boards will start executing. */ ENTRY( _start ) /* The MEMORY section will explain that we have "ram" that contains a section that is 'w' (writeable), 'x' (executable), and 'a' (allocatable). We use '!' to invert 'r' (read-only) and 'i' (initialized). We don't want our memory to be read-only, and we're stating that it is NOT initialized at the beginning. The ORIGIN is the memory address 0x8000_0000. If we look at the virt spec or the specification for the RISC-V HiFive Unleashed, this is the starting memory address for our code. Side note: There might be other boot ROMs at different addresses, but their job is to get to this point. Finally LENGTH = 128M tells the linker that we have 128 megabyte of RAM. The linker will double check this to make sure everything can fit. The HiFive Unleashed has a lot more RAM than this, but for the virtual machine, I went with 128M since I think that's enough RAM for now. We can provide other pieces of memory, such as QSPI, or ROM, but we're telling the linker script here that we have one pool of RAM. */ MEMORY { ram (wxa!ri) : ORIGIN = 0x80000000, LENGTH = 128M } /* PHDRS is short for "program headers", which we specify three here: text - CPU instructions (executable sections) data - Global, initialized variables bss - Global, uninitialized variables (all will be set to 0 by boot.S) The command PT_LOAD tells the linker that these sections will be loaded from the file into memory. We can actually stuff all of these into a single program header, but by splitting it up into three, we can actually use the other PT_* commands such as PT_DYNAMIC, PT_INTERP, PT_NULL to tell the linker where to find additional information. However, for our purposes, every section will be loaded from the program headers. */ PHDRS { text PT_LOAD; data PT_LOAD; bss PT_LOAD; } /* We are now going to organize the memory based on which section it is in. In assembly, we can change the section with the ".section" directive. However, in C++ and Rust, CPU instructions go into text, global constants go into rodata, global initialized variables go into data, and global uninitialized variables go into bss. */ SECTIONS { /* The first part of our RAM layout will be the text section. Since our CPU instructions are here, and our memory starts at 0x8000_0000, we need our entry point to line up here. */ .text : { /* PROVIDE allows me to access a symbol called _text_start so I know where the text section starts in the operating system. This should not move, but it is here for convenience. The period '.' tells the linker to set _text_start to the CURRENT location ('.' = current memory location). This current memory location moves as we add things. */ PROVIDE(_text_start = .); /* We are going to layout all text sections here, starting with .text.init. The asterisk in front of the parentheses means to match the .text.init section of ANY object file. Otherwise, we can specify which object file should contain the .text.init section, for example, boot.o(.text.init) would specifically put the .text.init section of our bootloader here. Because we might want to change the name of our files, we'll leave it with a *. Inside the parentheses is the name of the section. I created my own called .text.init to make 100% sure that the _start is put right at the beginning. The linker will lay this out in the order it receives it: .text.init first all .text sections next any .text.* sections last .text.* means to match anything after .text. If we didn't already specify .text.init, this would've matched here. The assembler and linker can place things in "special" text sections, so we match any we might come across here. */ *(.text.init) *(.text .text.*) /* Again, with PROVIDE, we're providing a readable symbol called _text_end, which is set to the memory address AFTER .text.init, .text, and .text.*'s have been added. */ PROVIDE(_text_end = .); /* The portion after the right brace is in an odd format. However, this is telling the linker what memory portion to put it in. We labeled our RAM, ram, with the constraints that it is writeable, allocatable, and executable. The linker will make sure with this that we can do all of those things. >ram - This just tells the linker script to put this entire section (.text) into the ram region of memory. To my knowledge, the '>' does not mean "greater than". Instead, it is a symbol to let the linker know we want to put this in ram. AT>ram - This sets the LMA (load memory address) region to the same thing. LMA is the final translation of a VMA (virtual memory address). With this linker script, we're loading everything into its physical location. We'll let the kernel copy and sort out the virtual memory. That's why >ram and AT>ram are continually the same thing. :text - This tells the linker script to put this into the :text program header. We've only defined three: text, data, and bss. In this case, we're telling the linker script to go into the text section. */ } >ram AT>ram :text /* The global pointer allows the linker to position global variables and constants into independent positions relative to the gp (global pointer) register. The globals start after the text sections and are only relevant to the rodata, data, and bss sections. */ PROVIDE(_global_pointer = .); /* Most compilers create a rodata (read only data) section for global constants. However, we're going to place ours in the text section. We can actually put this in :data, but since the .text section is read-only, we can place it there. NOTE: This doesn't actually do anything, yet. The actual "protection" cannot be done at link time. Instead, when we program the memory management unit (MMU), we will be able to choose which bits (R=read, W=write, X=execute) we want each memory segment to be able to do. */ .rodata : { PROVIDE(_rodata_start = .); *(.rodata .rodata.*) PROVIDE(_rodata_end = .); /* Again, we're placing the rodata section in the memory segment "ram" and we're putting it in the :text program header. We don't have one for rodata anyway. */ } >ram AT>ram :text .data : { /* . = ALIGN(4096) tells the linker to align the current memory location (which is 0x8000_0000 + text section + rodata section) to 4096 bytes. This is because our paging system's resolution is 4,096 bytes or 4 KiB. */ . = ALIGN(4096); PROVIDE(_data_start = .); /* sdata and data are essentially the same thing. However, compilers usually use the sdata sections for shorter, quicker loading sections. So, usually critical data is loaded there. However, we're loading all of this in one fell swoop. So, we're looking to put all of the following sections under the umbrella .data: .sdata .sdata.[anything] .data .data.[anything] ...in that order. */ *(.sdata .sdata.*) *(.data .data.*) PROVIDE(_data_end = .); } >ram AT>ram :data .bss : { PROVIDE(_bss_start = .); *(.sbss .sbss.*) *(.bss .bss.*) PROVIDE(_bss_end = .); } >ram AT>ram :bss /* The following will be helpful when we allocate the kernel stack (_stack) and determine where the heap begnis and ends (_heap_start and _heap_start + _heap_size)/ When we do memory allocation, we can use these symbols. We use the symbols instead of hard-coding an address because this is a floating target. As we add code, the heap moves farther down the memory and gets shorter. _memory_start will be set to 0x8000_0000 here. We use ORIGIN(ram) so that it will take whatever we set the origin of ram to. Otherwise, we'd have to change it more than once if we ever stray away from 0x8000_0000 as our entry point. */ PROVIDE(_memory_start = ORIGIN(ram)); /* Our kernel stack starts at the end of the bss segment (_bss_end). However, we're allocating 0x80000 bytes (524 KiB) to our kernel stack. This should be PLENTY of space. The reason we add the memory is because the stack grows from higher memory to lower memory (bottom to top). Therefore we set the stack at the very bottom of its allocated slot. When we go to allocate from the stack, we'll subtract the number of bytes we need. */ PROVIDE(_stack_start = _bss_end); PROVIDE(_stack_end = _stack_start + 0x8000); PROVIDE(_memory_end = ORIGIN(ram) + LENGTH(ram)); /* Finally, our heap starts right after the kernel stack. This heap will be used mainly to dole out memory for user-space applications. However, in some circumstances, it will be used for kernel memory as well. We don't align here because we let the kernel determine how it wants to do this. */ PROVIDE(_heap_start = _stack_end); PROVIDE(_heap_size = _memory_end - _heap_start); } ================================================ FILE: risc_v/chapters/ch4/src/lib.rs ================================================ // Steve Operating System // Stephen Marz // 21 Sep 2019 #![no_std] #![feature(panic_info_message, asm, allocator_api, alloc_error_handler, alloc_prelude, const_raw_ptr_to_usize_cast)] #[macro_use] extern crate alloc; // This is experimental and requires alloc_prelude as a feature use alloc::prelude::v1::*; // /////////////////////////////////// // / RUST MACROS // /////////////////////////////////// #[macro_export] macro_rules! print { ($($args:tt)+) => ({ use core::fmt::Write; let _ = write!(crate::uart::Uart::new(0x1000_0000), $($args)+); }); } #[macro_export] macro_rules! println { () => ({ print!("\r\n") }); ($fmt:expr) => ({ print!(concat!($fmt, "\r\n")) }); ($fmt:expr, $($args:tt)+) => ({ print!(concat!($fmt, "\r\n"), $($args)+) }); } // /////////////////////////////////// // / LANGUAGE STRUCTURES / FUNCTIONS // /////////////////////////////////// #[no_mangle] extern "C" fn eh_personality() {} #[panic_handler] fn panic(info: &core::panic::PanicInfo) -> ! { print!("Aborting: "); if let Some(p) = info.location() { println!( "line {}, file {}: {}", p.line(), p.file(), info.message().unwrap() ); } else { println!("no information available."); } abort(); } #[no_mangle] extern "C" fn abort() -> ! { loop { unsafe { asm!("wfi"::::"volatile"); } } } // /////////////////////////////////// // / CONSTANTS // /////////////////////////////////// // const STR_Y: &str = "\x1b[38;2;79;221;13m✓\x1b[m"; // const STR_N: &str = "\x1b[38;2;221;41;13m✘\x1b[m"; // The following symbols come from asm/mem.S. We can use // the symbols directly, but the address of the symbols // themselves are their values, which can cause issues. // Instead, I created doubleword values in mem.S in the .rodata and .data // sections. extern "C" { static TEXT_START: usize; static TEXT_END: usize; static DATA_START: usize; static DATA_END: usize; static RODATA_START: usize; static RODATA_END: usize; static BSS_START: usize; static BSS_END: usize; static KERNEL_STACK_START: usize; static KERNEL_STACK_END: usize; static HEAP_START: usize; static HEAP_SIZE: usize; } /// Identity map range /// Takes a contiguous allocation of memory and maps it using PAGE_SIZE /// This assumes that start <= end pub fn id_map_range(root: &mut page::Table, start: usize, end: usize, bits: i64) { let mut memaddr = start & !(page::PAGE_SIZE - 1); let num_kb_pages = (page::align_val(end, 12) - memaddr) / page::PAGE_SIZE; // I named this num_kb_pages for future expansion when // I decide to allow for GiB (2^30) and 2MiB (2^21) page // sizes. However, the overlapping memory regions are causing // nightmares. for _ in 0..num_kb_pages { page::map(root, memaddr, memaddr, bits, 0); memaddr += 1 << 12; } } // /////////////////////////////////// // / ENTRY POINT // /////////////////////////////////// #[no_mangle] extern "C" fn kinit() { // We created kinit, which runs in super-duper mode // 3 called "machine mode". // The job of kinit() is to get us into supervisor mode // as soon as possible. // Interrupts are disabled for the duration of kinit() uart::Uart::new(0x1000_0000).init(); page::init(); kmem::init(); // Map heap allocations let root_ptr = kmem::get_page_table(); let root_u = root_ptr as usize; let mut root = unsafe { root_ptr.as_mut().unwrap() }; let kheap_head = kmem::get_head() as usize; let total_pages = kmem::get_num_allocations(); println!(); println!(); unsafe { println!("TEXT: 0x{:x} -> 0x{:x}", TEXT_START, TEXT_END); println!("RODATA: 0x{:x} -> 0x{:x}", RODATA_START, RODATA_END); println!("DATA: 0x{:x} -> 0x{:x}", DATA_START, DATA_END); println!("BSS: 0x{:x} -> 0x{:x}", BSS_START, BSS_END); println!( "STACK: 0x{:x} -> 0x{:x}", KERNEL_STACK_START, KERNEL_STACK_END ); println!( "HEAP: 0x{:x} -> 0x{:x}", kheap_head, kheap_head + total_pages * page::PAGE_SIZE ); } id_map_range( &mut root, kheap_head, kheap_head + total_pages * page::PAGE_SIZE, page::EntryBits::ReadWrite.val(), ); // Using statics is inherently unsafe. unsafe { // Map heap descriptors let num_pages = HEAP_SIZE / page::PAGE_SIZE; id_map_range( &mut root, HEAP_START, HEAP_START + num_pages, page::EntryBits::ReadWrite.val(), ); // Map executable section id_map_range( &mut root, TEXT_START, TEXT_END, page::EntryBits::ReadExecute.val(), ); // Map rodata section // We put the ROdata section into the text section, so they can // potentially overlap however, we only care that it's read // only. id_map_range( &mut root, RODATA_START, RODATA_END, page::EntryBits::ReadExecute.val(), ); // Map data section id_map_range( &mut root, DATA_START, DATA_END, page::EntryBits::ReadWrite.val(), ); // Map bss section id_map_range( &mut root, BSS_START, BSS_END, page::EntryBits::ReadWrite.val(), ); // Map kernel stack id_map_range( &mut root, KERNEL_STACK_START, KERNEL_STACK_END, page::EntryBits::ReadWrite.val(), ); } // UART id_map_range( &mut root, 0x1000_0000, 0x1000_0100, page::EntryBits::ReadWrite.val(), ); // CLINT // -> MSIP id_map_range( &mut root, 0x0200_0000, 0x0200_ffff, page::EntryBits::ReadWrite.val(), ); // PLIC id_map_range( &mut root, 0x0c00_0000, 0x0c00_2000, page::EntryBits::ReadWrite.val(), ); id_map_range( &mut root, 0x0c20_0000, 0x0c20_8000, page::EntryBits::ReadWrite.val(), ); // When we return from here, we'll go back to boot.S and switch into // supervisor mode We will return the SATP register to be written when // we return. root_u is the root page table's address. When stored into // the SATP register, this is divided by 4 KiB (right shift by 12 bits). // We enable the MMU by setting mode 8. Bits 63, 62, 61, 60 determine // the mode. // 0 = Bare (no translation) // 8 = Sv39 // 9 = Sv48 // build_satp has these parameters: mode, asid, page table address. let satp_value = cpu::build_satp(cpu::SatpMode::Sv39, 0, root_u); unsafe { // We have to store the kernel's table. The tables will be moved // back and forth between the kernel's table and user // applicatons' tables. Note that we're writing the physical address // of the trap frame. cpu::mscratch_write( (&mut cpu::KERNEL_TRAP_FRAME[0] as *mut cpu::TrapFrame) as usize, ); cpu::sscratch_write(cpu::mscratch_read()); cpu::KERNEL_TRAP_FRAME[0].satp = satp_value; // Move the stack pointer to the very bottom. The stack is // actually in a non-mapped page. The stack is decrement-before // push and increment after pop. Therefore, the stack will be // allocated (decremented) before it is stored. cpu::KERNEL_TRAP_FRAME[0].trap_stack = page::zalloc(1).add(page::PAGE_SIZE); id_map_range( &mut root, cpu::KERNEL_TRAP_FRAME[0].trap_stack .sub(page::PAGE_SIZE,) as usize, cpu::KERNEL_TRAP_FRAME[0].trap_stack as usize, page::EntryBits::ReadWrite.val(), ); // The trap frame itself is stored in the mscratch register. id_map_range( &mut root, cpu::mscratch_read(), cpu::mscratch_read() + core::mem::size_of::(), page::EntryBits::ReadWrite.val(), ); page::print_page_allocations(); let p = cpu::KERNEL_TRAP_FRAME[0].trap_stack as usize - 1; let m = page::virt_to_phys(&root, p).unwrap_or(0); println!("Walk 0x{:x} = 0x{:x}", p, m); } // The following shows how we're going to walk to translate a virtual // address into a physical address. We will use this whenever a user // space application requires services. Since the user space application // only knows virtual addresses, we have to translate silently behind // the scenes. println!("Setting 0x{:x}", satp_value); println!("Scratch reg = 0x{:x}", cpu::mscratch_read()); cpu::satp_write(satp_value); cpu::satp_fence_asid(0); } #[no_mangle] extern "C" fn kinit_hart(hartid: usize) { // All non-0 harts initialize here. unsafe { // We have to store the kernel's table. The tables will be moved // back and forth between the kernel's table and user // applicatons' tables. cpu::mscratch_write( (&mut cpu::KERNEL_TRAP_FRAME[hartid] as *mut cpu::TrapFrame) as usize, ); // Copy the same mscratch over to the supervisor version of the // same register. cpu::sscratch_write(cpu::mscratch_read()); cpu::KERNEL_TRAP_FRAME[hartid].hartid = hartid; // We can't do the following until zalloc() is locked, but we // don't have locks, yet :( cpu::KERNEL_TRAP_FRAME[hartid].satp // = cpu::KERNEL_TRAP_FRAME[0].satp; // cpu::KERNEL_TRAP_FRAME[hartid].trap_stack = page::zalloc(1); } } #[no_mangle] extern "C" fn kmain() { // kmain() starts in supervisor mode. So, we should have the trap // vector setup and the MMU turned on when we get here. // We initialized my_uart in machine mode under kinit for debugging // prints, but this just grabs a pointer to it. let mut my_uart = uart::Uart::new(0x1000_0000); // Create a new scope so that we can test the global allocator and // deallocator { // We have the global allocator, so let's see if that works! let k = Box::::new(100); println!("Boxed value = {}", *k); // The following comes from the Rust documentation: // some bytes, in a vector let sparkle_heart = vec![240, 159, 146, 150]; // We know these bytes are valid, so we'll use `unwrap()`. // This will MOVE the vector. let sparkle_heart = String::from_utf8(sparkle_heart).unwrap(); println!("String = {}", sparkle_heart); println!("\n\nAllocations of a box, vector, and string"); kmem::print_table(); } println!("\n\nEverything should now be free:"); kmem::print_table(); unsafe { // Set the next machine timer to fire. let mtimecmp = 0x0200_4000 as *mut u64; let mtime = 0x0200_bff8 as *const u64; // The frequency given by QEMU is 10_000_000 Hz, so this sets // the next interrupt to fire one second from now. mtimecmp.write_volatile(mtime.read_volatile() + 10_000_000); // Let's cause a page fault and see what happens. This should trap // to m_trap under trap.rs let v = 0x0 as *mut u64; v.write_volatile(0); } // If we get here, the Box, vec, and String should all be freed since // they go out of scope. This calls their "Drop" trait. // Now see if we can read stuff: // Usually we can use #[test] modules in Rust, but it would convolute // the task at hand, and it requires us to create the testing harness // since the embedded testing system is part of the "std" library. loop { if let Some(c) = my_uart.get() { match c { 8 => { // This is a backspace, so we // essentially have to write a space and // backup again: print!("{} {}", 8 as char, 8 as char); }, 10 | 13 => { // Newline or carriage-return println!(); }, _ => { print!("{}", c as char); }, } } } } // /////////////////////////////////// // / RUST MODULES // /////////////////////////////////// pub mod cpu; pub mod kmem; pub mod page; pub mod trap; pub mod uart; ================================================ FILE: risc_v/chapters/ch4/src/page.rs ================================================ // page.rs // Memory routines // Stephen Marz // 6 October 2019 use core::{mem::size_of, ptr::null_mut}; // //////////////////////////////// // // Allocation routines // //////////////////////////////// extern "C" { static HEAP_START: usize; static HEAP_SIZE: usize; } // We will use ALLOC_START to mark the start of the actual // memory we can dish out. static mut ALLOC_START: usize = 0; const PAGE_ORDER: usize = 12; pub const PAGE_SIZE: usize = 1 << 12; /// Align (set to a multiple of some power of two) /// This takes an order which is the exponent to 2^order /// Therefore, all alignments must be made as a power of two. /// This function always rounds up. pub const fn align_val(val: usize, order: usize) -> usize { let o = (1usize << order) - 1; (val + o) & !o } #[repr(u8)] pub enum PageBits { Empty = 0, Taken = 1 << 0, Last = 1 << 1, } impl PageBits { // We convert PageBits to a u8 a lot, so this is // for convenience. pub fn val(self) -> u8 { self as u8 } } // Each page is described by the Page structure. Linux does this // as well, where each 4096-byte chunk of memory has a structure // associated with it. However, there structure is much larger. pub struct Page { flags: u8, } impl Page { // If this page has been marked as the final allocation, // this function returns true. Otherwise, it returns false. pub fn is_last(&self) -> bool { if self.flags & PageBits::Last.val() != 0 { true } else { false } } // If the page is marked as being taken (allocated), then // this function returns true. Otherwise, it returns false. pub fn is_taken(&self) -> bool { if self.flags & PageBits::Taken.val() != 0 { true } else { false } } // This is the opposite of is_taken(). pub fn is_free(&self) -> bool { !self.is_taken() } // Clear the Page structure and all associated allocations. pub fn clear(&mut self) { self.flags = PageBits::Empty.val(); } // Set a certain flag. We ran into trouble here since PageBits // is an enumeration and we haven't implemented the BitOr Trait // on it. pub fn set_flag(&mut self, flag: PageBits) { self.flags |= flag.val(); } pub fn clear_flag(&mut self, flag: PageBits) { self.flags &= !(flag.val()); } } /// Initialize the allocation system. There are several ways that we can /// implement the page allocator: /// 1. Free list (singly linked list where it starts at the first free /// allocation) 2. Bookkeeping list (structure contains a taken and length) /// 3. Allocate one Page structure per 4096 bytes (this is what I chose) /// 4. Others pub fn init() { unsafe { // let desc_per_page = PAGE_SIZE / size_of::(); let num_pages = HEAP_SIZE / PAGE_SIZE; // let num_desc_pages = num_pages / desc_per_page; let ptr = HEAP_START as *mut Page; // Clear all pages to make sure that they aren't accidentally // taken for i in 0..num_pages { (*ptr.add(i)).clear(); } // Determine where the actual useful memory starts. This will be // after all Page structures. We also must align the ALLOC_START // to a page-boundary (PAGE_SIZE = 4096). ALLOC_START = // (HEAP_START + num_pages * size_of::() + PAGE_SIZE - 1) // & !(PAGE_SIZE - 1); ALLOC_START = align_val( HEAP_START + num_pages * size_of::(), PAGE_ORDER, ); } } /// Allocate a page or multiple pages /// pages: the number of PAGE_SIZE pages to allocate pub fn alloc(pages: usize) -> *mut u8 { // We have to find a contiguous allocation of pages assert!(pages > 0); unsafe { // We create a Page structure for each page on the heap. We // actually might have more since HEAP_SIZE moves and so does // the size of our structure, but we'll only waste a few bytes. let num_pages = HEAP_SIZE / PAGE_SIZE; let ptr = HEAP_START as *mut Page; for i in 0..num_pages - pages { let mut found = false; // Check to see if this Page is free. If so, we have our // first candidate memory address. if (*ptr.add(i)).is_free() { // It was FREE! Yay! found = true; for j in i..i + pages { // Now check to see if we have a // contiguous allocation for all of the // request pages. If not, we should // check somewhere else. if (*ptr.add(j)).is_taken() { found = false; break; } } } // We've checked to see if there are enough contiguous // pages to form what we need. If we couldn't, found // will be false, otherwise it will be true, which means // we've found valid memory we can allocate. if found { for k in i..i + pages - 1 { (*ptr.add(k)).set_flag(PageBits::Taken); } // The marker for the last page is // PageBits::Last This lets us know when we've // hit the end of this particular allocation. (*ptr.add(i+pages-1)).set_flag(PageBits::Taken); (*ptr.add(i+pages-1)).set_flag(PageBits::Last); // The Page structures themselves aren't the // useful memory. Instead, there is 1 Page // structure per 4096 bytes starting at // ALLOC_START. return (ALLOC_START + PAGE_SIZE * i) as *mut u8; } } } // If we get here, that means that no contiguous allocation was // found. null_mut() } /// Allocate and zero a page or multiple pages /// pages: the number of pages to allocate /// Each page is PAGE_SIZE which is calculated as 1 << PAGE_ORDER /// On RISC-V, this typically will be 4,096 bytes. pub fn zalloc(pages: usize) -> *mut u8 { // Allocate and zero a page. // First, let's get the allocation let ret = alloc(pages); if !ret.is_null() { let size = (PAGE_SIZE * pages) / 8; let big_ptr = ret as *mut u64; for i in 0..size { // We use big_ptr so that we can force an // sd (store doubleword) instruction rather than // the sb. This means 8x fewer stores than before. // Typically we have to be concerned about remaining // bytes, but fortunately 4096 % 8 = 0, so we // won't have any remaining bytes. unsafe { (*big_ptr.add(i)) = 0; } } } ret } /// Deallocate a page by its pointer /// The way we've structured this, it will automatically coalesce /// contiguous pages. pub fn dealloc(ptr: *mut u8) { // Make sure we don't try to free a null pointer. assert!(!ptr.is_null()); unsafe { let addr = HEAP_START + (ptr as usize - ALLOC_START) / PAGE_SIZE; // Make sure that the address makes sense. The address we // calculate here is the page structure, not the HEAP address! assert!(addr >= HEAP_START && addr < HEAP_START + HEAP_SIZE); let mut p = addr as *mut Page; // Keep clearing pages until we hit the last page. while (*p).is_taken() && !(*p).is_last() { (*p).clear(); p = p.add(1); } // If the following assertion fails, it is most likely // caused by a double-free. assert!( (*p).is_last() == true, "Possible double-free detected! (Not taken found \ before last)" ); // If we get here, we've taken care of all previous pages and // we are on the last page. (*p).clear(); } } /// Print all page allocations /// This is mainly used for debugging. pub fn print_page_allocations() { unsafe { let num_pages = (HEAP_SIZE - (ALLOC_START - HEAP_START)) / PAGE_SIZE; let mut beg = HEAP_START as *const Page; let end = beg.add(num_pages); let alloc_beg = ALLOC_START; let alloc_end = ALLOC_START + num_pages * PAGE_SIZE; println!(); println!( "PAGE ALLOCATION TABLE\nMETA: {:p} -> {:p}\nPHYS: \ 0x{:x} -> 0x{:x}", beg, end, alloc_beg, alloc_end ); println!("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"); let mut num = 0; while beg < end { if (*beg).is_taken() { let start = beg as usize; let memaddr = ALLOC_START + (start - HEAP_START) * PAGE_SIZE; print!("0x{:x} => ", memaddr); loop { num += 1; if (*beg).is_last() { let end = beg as usize; let memaddr = ALLOC_START + (end - HEAP_START) * PAGE_SIZE + PAGE_SIZE - 1; print!( "0x{:x}: {:>3} page(s)", memaddr, (end - start + 1) ); println!("."); break; } beg = beg.add(1); } } beg = beg.add(1); } println!("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"); println!( "Allocated: {:>6} pages ({:>10} bytes).", num, num * PAGE_SIZE ); println!( "Free : {:>6} pages ({:>10} bytes).", num_pages - num, (num_pages - num) * PAGE_SIZE ); println!(); } } // //////////////////////////////// // // MMU Routines // //////////////////////////////// // Represent (repr) our entry bits as // unsigned 64-bit integers. #[repr(i64)] #[derive(Copy, Clone)] pub enum EntryBits { None = 0, Valid = 1 << 0, Read = 1 << 1, Write = 1 << 2, Execute = 1 << 3, User = 1 << 4, Global = 1 << 5, Access = 1 << 6, Dirty = 1 << 7, // Convenience combinations ReadWrite = 1 << 1 | 1 << 2, ReadExecute = 1 << 1 | 1 << 3, ReadWriteExecute = 1 << 1 | 1 << 2 | 1 << 3, // User Convenience Combinations UserReadWrite = 1 << 1 | 1 << 2 | 1 << 4, UserReadExecute = 1 << 1 | 1 << 3 | 1 << 4, UserReadWriteExecute = 1 << 1 | 1 << 2 | 1 << 3 | 1 << 4, } // Helper functions to convert the enumeration // into an i64, which is what our page table // entries will be. impl EntryBits { pub fn val(self) -> i64 { self as i64 } } // A single entry. We're using an i64 so that // this will sign-extend rather than zero-extend // since RISC-V requires that the reserved sections // take on the most significant bit. pub struct Entry { pub entry: i64, } // The Entry structure describes one of the 512 entries per table, which is // described in the RISC-V privileged spec Figure 4.18. impl Entry { pub fn is_valid(&self) -> bool { self.get_entry() & EntryBits::Valid.val() != 0 } // The first bit (bit index #0) is the V bit for // valid. pub fn is_invalid(&self) -> bool { !self.is_valid() } // A leaf has one or more RWX bits set pub fn is_leaf(&self) -> bool { self.get_entry() & 0xe != 0 } pub fn is_branch(&self) -> bool { !self.is_leaf() } pub fn set_entry(&mut self, entry: i64) { self.entry = entry; } pub fn get_entry(&self) -> i64 { self.entry } } // Table represents a single table, which contains 512 (2^9), 64-bit entries. pub struct Table { pub entries: [Entry; 512], } impl Table { pub fn len() -> usize { 512 } } /// Map a virtual address to a physical address using 4096-byte page /// size. /// root: a mutable reference to the root Table /// vaddr: The virtual address to map /// paddr: The physical address to map /// bits: An OR'd bitset containing the bits the leaf should have. /// The bits should contain only the following: /// Read, Write, Execute, User, and/or Global /// The bits MUST include one or more of the following: /// Read, Write, Execute /// The valid bit automatically gets added. pub fn map(root: &mut Table, vaddr: usize, paddr: usize, bits: i64, level: usize) { // Make sure that Read, Write, or Execute have been provided // otherwise, we'll leak memory and always create a page fault. assert!(bits & 0xe != 0); // Extract out each VPN from the virtual address // On the virtual address, each VPN is exactly 9 bits, // which is why we use the mask 0x1ff = 0b1_1111_1111 (9 bits) let vpn = [ // VPN[0] = vaddr[20:12] (vaddr >> 12) & 0x1ff, // VPN[1] = vaddr[29:21] (vaddr >> 21) & 0x1ff, // VPN[2] = vaddr[38:30] (vaddr >> 30) & 0x1ff, ]; // Just like the virtual address, extract the physical address // numbers (PPN). However, PPN[2] is different in that it stores // 26 bits instead of 9. Therefore, we use, // 0x3ff_ffff = 0b11_1111_1111_1111_1111_1111_1111 (26 bits). let ppn = [ // PPN[0] = paddr[20:12] (paddr >> 12) & 0x1ff, // PPN[1] = paddr[29:21] (paddr >> 21) & 0x1ff, // PPN[2] = paddr[55:30] (paddr >> 30) & 0x3ff_ffff, ]; // We will use this as a floating reference so that we can set // individual entries as we walk the table. let mut v = &mut root.entries[vpn[2]]; // Now, we're going to traverse the page table and set the bits // properly. We expect the root to be valid, however we're required to // create anything beyond the root. // In Rust, we create a range iterator using the .. operator. // The .rev() will reverse the iteration since we need to start with // VPN[2] The .. operator is inclusive on start but exclusive on end. // So, (0..2) will iterate 0 and 1. for i in (level..2).rev() { if !v.is_valid() { // Allocate a page let page = zalloc(1); // The page is already aligned by 4,096, so store it // directly The page is stored in the entry shifted // right by 2 places. v.set_entry( (page as i64 >> 2) | EntryBits::Valid.val(), ); } let entry = ((v.get_entry() & !0x3ff) << 2) as *mut Entry; v = unsafe { entry.add(vpn[i]).as_mut().unwrap() }; } // When we get here, we should be at VPN[0] and v should be pointing to // our entry. // The entry structure is Figure 4.18 in the RISC-V Privileged // Specification let entry = (ppn[2] << 28) as i64 | // PPN[2] = [53:28] (ppn[1] << 19) as i64 | // PPN[1] = [27:19] (ppn[0] << 10) as i64 | // PPN[0] = [18:10] bits | // Specified bits, such as User, Read, Write, etc EntryBits::Valid.val() | // Valid bit EntryBits::Dirty.val() | // Some machines require this to =1 EntryBits::Access.val() // Just like dirty, some machines require this ; // Set the entry. V should be set to the correct pointer by the loop // above. v.set_entry(entry); } /// Unmaps and frees all memory associated with a table. /// root: The root table to start freeing. /// NOTE: This does NOT free root directly. This must be /// freed manually. /// The reason we don't free the root is because it is /// usually embedded into the Process structure. pub fn unmap(root: &mut Table) { // Start with level 2 for lv2 in 0..Table::len() { let ref entry_lv2 = root.entries[lv2]; if entry_lv2.is_valid() && entry_lv2.is_branch() { // This is a valid entry, so drill down and free. let memaddr_lv1 = (entry_lv2.get_entry() & !0x3ff) << 2; let table_lv1 = unsafe { // Make table_lv1 a mutable reference instead of // a pointer. (memaddr_lv1 as *mut Table).as_mut().unwrap() }; for lv1 in 0..Table::len() { let ref entry_lv1 = table_lv1.entries[lv1]; if entry_lv1.is_valid() && entry_lv1.is_branch() { let memaddr_lv0 = (entry_lv1.get_entry() & !0x3ff) << 2; // The next level is level 0, which // cannot have branches, therefore, // we free here. dealloc(memaddr_lv0 as *mut u8); } } dealloc(memaddr_lv1 as *mut u8); } } } /// Walk the page table to convert a virtual address to a /// physical address. /// If a page fault would occur, this returns None /// Otherwise, it returns Some with the physical address. pub fn virt_to_phys(root: &Table, vaddr: usize) -> Option { // Walk the page table pointed to by root let vpn = [ // VPN[0] = vaddr[20:12] (vaddr >> 12) & 0x1ff, // VPN[1] = vaddr[29:21] (vaddr >> 21) & 0x1ff, // VPN[2] = vaddr[38:30] (vaddr >> 30) & 0x1ff, ]; let mut v = &root.entries[vpn[2]]; for i in (0..=2).rev() { if v.is_invalid() { // This is an invalid entry, page fault. break; } else if v.is_leaf() { // According to RISC-V, a leaf can be at any level. // The offset mask masks off the PPN. Each PPN is 9 // bits and they start at bit #12. So, our formula // 12 + i * 9 let off_mask = (1 << (12 + i * 9)) - 1; let vaddr_pgoff = vaddr & off_mask; let addr = ((v.get_entry() << 2) as usize) & !off_mask; return Some(addr | vaddr_pgoff); } // Set v to the next entry which is pointed to by this // entry. However, the address was shifted right by 2 places // when stored in the page table entry, so we shift it left // to get it back into place. let entry = ((v.get_entry() & !0x3ff) << 2) as *const Entry; // We do i - 1 here, however we should get None or Some() above // before we do 0 - 1 = -1. v = unsafe { entry.add(vpn[i - 1]).as_ref().unwrap() }; } // If we get here, we've exhausted all valid tables and haven't // found a leaf. None } ================================================ FILE: risc_v/chapters/ch4/src/trap.rs ================================================ // trap.rs // Trap routines // Stephen Marz // 10 October 2019 use crate::cpu::TrapFrame; #[no_mangle] extern "C" fn m_trap(epc: usize, tval: usize, cause: usize, hart: usize, status: usize, frame: &mut TrapFrame) -> usize { // We're going to handle all traps in machine mode. RISC-V lets // us delegate to supervisor mode, but switching out SATP (virtual memory) // gets hairy. let is_async = { if cause >> 63 & 1 == 1 { true } else { false } }; // The cause contains the type of trap (sync, async) as well as the cause // number. So, here we narrow down just the cause number. let cause_num = cause & 0xfff; let mut return_pc = epc; if is_async { // Asynchronous trap match cause_num { 3 => { // Machine software println!("Machine software interrupt CPU#{}", hart); }, 7 => unsafe { // Machine timer let mtimecmp = 0x0200_4000 as *mut u64; let mtime = 0x0200_bff8 as *const u64; // The frequency given by QEMU is 10_000_000 Hz, so this sets // the next interrupt to fire one second from now. mtimecmp.write_volatile(mtime.read_volatile() + 10_000_000); }, 11 => { // Machine external (interrupt from Platform Interrupt Controller (PLIC)) println!("Machine external interrupt CPU#{}", hart); }, _ => { panic!("Unhandled async trap CPU#{} -> {}\n", hart, cause_num); } } } else { // Synchronous trap match cause_num { 2 => { // Illegal instruction panic!("Illegal instruction CPU#{} -> 0x{:08x}: 0x{:08x}\n", hart, epc, tval); }, 8 => { // Environment (system) call from User mode println!("E-call from User mode! CPU#{} -> 0x{:08x}", hart, epc); return_pc += 4; }, 9 => { // Environment (system) call from Supervisor mode println!("E-call from Supervisor mode! CPU#{} -> 0x{:08x}", hart, epc); return_pc += 4; }, 11 => { // Environment (system) call from Machine mode panic!("E-call from Machine mode! CPU#{} -> 0x{:08x}\n", hart, epc); }, // Page faults 12 => { // Instruction page fault println!("Instruction page fault CPU#{} -> 0x{:08x}: 0x{:08x}", hart, epc, tval); return_pc += 4; }, 13 => { // Load page fault println!("Load page fault CPU#{} -> 0x{:08x}: 0x{:08x}", hart, epc, tval); return_pc += 4; }, 15 => { // Store page fault println!("Store page fault CPU#{} -> 0x{:08x}: 0x{:08x}", hart, epc, tval); return_pc += 4; }, _ => { panic!("Unhandled sync trap CPU#{} -> {}\n", hart, cause_num); } } }; // Finally, return the updated program counter return_pc } ================================================ FILE: risc_v/chapters/ch4/src/uart.rs ================================================ // uart.rs // UART routines and driver use core::{convert::TryInto, fmt::{Error, Write}}; pub struct Uart { base_address: usize, } impl Write for Uart { fn write_str(&mut self, out: &str) -> Result<(), Error> { for c in out.bytes() { self.put(c); } Ok(()) } } impl Uart { pub fn new(base_address: usize) -> Self { Uart { base_address } } pub fn init(&mut self) { let ptr = self.base_address as *mut u8; unsafe { // First, set the word length, which // are bits 0 and 1 of the line control register (LCR) // which is at base_address + 3 // We can easily write the value 3 here or 0b11, but I'm // extending it so that it is clear we're setting two // individual fields // Word 0 Word 1 // ~~~~~~ ~~~~~~ let lcr: u8 = (1 << 0) | (1 << 1); ptr.add(3).write_volatile(lcr); // Now, enable the FIFO, which is bit index 0 of the // FIFO control register (FCR at offset 2). // Again, we can just write 1 here, but when we use left // shift, it's easier to see that we're trying to write // bit index #0. ptr.add(2).write_volatile(1 << 0); // Enable receiver buffer interrupts, which is at bit // index 0 of the interrupt enable register (IER at // offset 1). ptr.add(1).write_volatile(1 << 0); // If we cared about the divisor, the code below would // set the divisor from a global clock rate of 22.729 // MHz (22,729,000 cycles per second) to a signaling // rate of 2400 (BAUD). We usually have much faster // signalling rates nowadays, but this demonstrates what // the divisor actually does. The formula given in the // NS16500A specification for calculating the divisor // is: // divisor = ceil( (clock_hz) / (baud_sps x 16) ) // So, we substitute our values and get: // divisor = ceil( 22_729_000 / (2400 x 16) ) // divisor = ceil( 22_729_000 / 38_400 ) // divisor = ceil( 591.901 ) = 592 // The divisor register is two bytes (16 bits), so we // need to split the value 592 into two bytes. // Typically, we would calculate this based on measuring // the clock rate, but again, for our purposes [qemu], // this doesn't really do anything. let divisor: u16 = 592; let divisor_least: u8 = (divisor & 0xff).try_into().unwrap(); let divisor_most: u8 = (divisor >> 8).try_into().unwrap(); // Notice that the divisor register DLL (divisor latch // least) and DLM (divisor latch most) have the same // base address as the receiver/transmitter and the // interrupt enable register. To change what the base // address points to, we open the "divisor latch" by // writing 1 into the Divisor Latch Access Bit (DLAB), // which is bit index 7 of the Line Control Register // (LCR) which is at base_address + 3. ptr.add(3).write_volatile(lcr | 1 << 7); // Now, base addresses 0 and 1 point to DLL and DLM, // respectively. Put the lower 8 bits of the divisor // into DLL ptr.add(0).write_volatile(divisor_least); ptr.add(1).write_volatile(divisor_most); // Now that we've written the divisor, we never have to // touch this again. In hardware, this will divide the // global clock (22.729 MHz) into one suitable for 2,400 // signals per second. So, to once again get access to // the RBR/THR/IER registers, we need to close the DLAB // bit by clearing it to 0. ptr.add(3).write_volatile(lcr); } } pub fn put(&mut self, c: u8) { let ptr = self.base_address as *mut u8; unsafe { ptr.add(0).write_volatile(c); } } pub fn get(&mut self) -> Option { let ptr = self.base_address as *mut u8; unsafe { if ptr.add(5).read_volatile() & 1 == 0 { // The DR bit is 0, meaning no data None } else { // The DR bit is 1, meaning data! Some(ptr.add(0).read_volatile()) } } } } ================================================ FILE: risc_v/chapters/ch5/.cargo/config ================================================ [build] target = "riscv64gc-unknown-none-elf" [target.riscv64gc-unknown-none-elf] linker = "riscv64-unknown-linux-gnu-gcc" ================================================ FILE: risc_v/chapters/ch5/.gitignore ================================================ os.elf target/* Cargo.lock hdd.dsk ================================================ FILE: risc_v/chapters/ch5/Cargo.toml ================================================ [package] name = "sos" version = "0.1.0" authors = ["Stephen Marz "] edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [lib] crate-type = ["staticlib"] [dependencies] ================================================ FILE: risc_v/chapters/ch5/Makefile ================================================ ##### ## BUILD ##### CC=riscv64-unknown-linux-gnu-gcc CFLAGS=-Wall -Wextra -pedantic -Wextra -O0 -g CFLAGS+=-static -ffreestanding -nostdlib -fno-rtti -fno-exceptions CFLAGS+=-march=rv64gc -mabi=lp64 INCLUDES= LINKER_SCRIPT=-Tsrc/lds/virt.lds TYPE=debug RUST_TARGET=./target/riscv64gc-unknown-none-elf/$(TYPE) LIBS=-L$(RUST_TARGET) SOURCES_ASM=$(wildcard src/asm/*.S) LIB=-lsos -lgcc OUT=os.elf ##### ## QEMU ##### QEMU=qemu-system-riscv64 MACH=virt CPU=rv64 CPUS=4 MEM=128M DISK=hdd.dsk # DRIVE= -drive if=none,format=raw,file=$(DISK),id=foo -device virtio-blk-device,scsi=off,drive=foo DRIVE= all: cargo build $(CC) $(CFLAGS) $(LINKER_SCRIPT) $(INCLUDES) -o $(OUT) $(SOURCES_ASM) $(LIBS) $(LIB) run: all $(QEMU) -machine $(MACH) -cpu $(CPU) -smp $(CPUS) -m $(MEM) $(DRIVE) -nographic -serial mon:stdio -bios none -kernel $(OUT) .PHONY: clean clean: cargo clean rm -f $(OUT) ================================================ FILE: risc_v/chapters/ch5/make_hdd.sh ================================================ #!/bin/sh dd if=/dev/zero of=hdd.dsk bs=1M count=32 ================================================ FILE: risc_v/chapters/ch5/src/asm/boot.S ================================================ # boot.S # bootloader for SoS # Stephen Marz # 8 February 2019 # Disable generation of compressed instructions. .option norvc # Define a .text.init section. The .text.init is put at the # starting address so that the entry _start is put at the RISC-V # address 0x8000_0000. .section .text.init # Execution starts here. .global _start _start: # Disable linker instruction relaxation for the `la` instruction below. # This disallows the assembler from assuming that `gp` is already initialized. # This causes the value stored in `gp` to be calculated from `pc`. # The job of the global pointer is to give the linker the ability to address # memory relative to GP instead of as an absolute address. .option push .option norelax la gp, _global_pointer .option pop # SATP should be zero, but let's make sure. Each HART has its own # SATP register. csrw satp, zero # Any hardware threads (hart) that are not bootstrapping # need to wait for an IPI csrr t0, mhartid bnez t0, 3f # Set all bytes in the BSS section to zero. la a0, _bss_start la a1, _bss_end bgeu a0, a1, 2f 1: sd zero, (a0) addi a0, a0, 8 bltu a0, a1, 1b 2: # The stack grows from bottom to top, so we put the stack pointer # to the very end of the stack range. la sp, _stack_end # Setting `mstatus` register: # 0b01 << 11: Machine's previous protection mode is 2 (MPP=2). li t0, 0b11 << 11 csrw mstatus, t0 # Do not allow interrupts while running kinit csrw mie, zero # Machine's exception program counter (MEPC) is set to `kinit`. la t1, kinit csrw mepc, t1 # Set the return address to get us into supervisor mode la ra, 2f # We use mret here so that the mstatus register is properly updated. mret 2: # We set the return address (ra above) to this label. When kinit() is finished # in Rust, it will return here. # Setting `mstatus` (supervisor status) register: # 0b01 << 11 : Previous protection mode is 1 (MPP=01 [Supervisor]). # 1 << 7 : Previous machine interrupt-enable bit is 1 (MPIE=1 [Enabled]) # 1 << 5 : Previous interrupt-enable bit is 1 (SPIE=1 [Enabled]). # We set the "previous" bits because the mret will write the current bits # with the previous bits. li t0, (0b01 << 11) | (1 << 7) | (1 << 5) csrw mstatus, t0 # Machine's trap vector base address is set to `m_trap_vector`, for # "machine" trap vector. la t2, m_trap_vector csrw mtvec, t2 # Setting `stvec` (supervisor trap vector) register: # Essentially this is a function pointer, but the last two bits can be 00 or 01 # 00 : All exceptions set pc to BASE # 01 : Asynchronous interrupts set pc to BASE + 4 x scause # la t3, s_trap_vector # csrw stvec, t3 # Jump to kmain. We put the MPP = 01 for supervisor mode, so after # mret, we will jump to kmain in supervisor mode. la t1, kmain csrw mepc, t1 # Setting `sie` (supervisor interrupt enable) register: # This register takes the same bits as mideleg # 1 << 1 : Supervisor software interrupt enable (SSIE=1 [Enabled]) # 1 << 5 : Supervisor timer interrupt enable (STIE=1 [Enabled]) # 1 << 9 : Supervisor external interrupt enable (SEIE=1 [Enabled]) # 0xaaa = MEIP/SEIP and MTIP/STIP and MSIP/SSIP li t2, 0xaaa csrw mie, t2 la ra, 4f mret 3: # Parked harts go here. We need to set these # to only awaken if it receives a software interrupt, # which we're going to call the SIPI (Software Intra-Processor Interrupt). # We call the SIPI by writing the software interrupt into the Core Local Interruptor (CLINT) # Which is calculated by: base_address + hart * 4 # where base address is 0x0200_0000 (MMIO CLINT base address) # We only use additional harts to run user-space programs, although this may # change. # We divide up the stack so the harts aren't clobbering one another. la sp, _stack_end li t0, 0x10000 csrr a0, mhartid mul t0, t0, a0 sub sp, sp, t0 # The parked harts will be put into machine mode with interrupts enabled. li t0, 0b11 << 11 | (1 << 7) csrw mstatus, t0 # Allow for MSIP (Software interrupt). We will write the MSIP from hart #0 to # awaken these parked harts. li t3, (1 << 3) csrw mie, t3 # Machine's exception program counter (MEPC) is set to the Rust initialization # code and waiting loop. la t1, kinit_hart csrw mepc, t1 # Machine's trap vector base address is set to `m_trap_vector`, for # "machine" trap vector. The Rust initialization routines will give each # hart its own trap frame. We can use the same trap function and distinguish # between each hart by looking at the trap frame. la t2, m_trap_vector csrw mtvec, t2 # Whenever our hart is done initializing, we want it to return to the waiting # loop, which is just below mret. la ra, 4f # We use mret here so that the mstatus register is properly updated. mret 4: # wfi = wait for interrupt. This is a hint to the harts to shut everything needed # down. However, the RISC-V specification allows for wfi to do nothing. Anyway, # with QEMU, this will save some CPU! wfi j 4b ================================================ FILE: risc_v/chapters/ch5/src/asm/mem.S ================================================ // mem.S // Importation of linker symbols .section .rodata .global HEAP_START HEAP_START: .dword _heap_start .global HEAP_SIZE HEAP_SIZE: .dword _heap_size .global TEXT_START TEXT_START: .dword _text_start .global TEXT_END TEXT_END: .dword _text_end .global DATA_START DATA_START: .dword _data_start .global DATA_END DATA_END: .dword _data_end .global RODATA_START RODATA_START: .dword _rodata_start .global RODATA_END RODATA_END: .dword _rodata_end .global BSS_START BSS_START: .dword _bss_start .global BSS_END BSS_END: .dword _bss_end .global KERNEL_STACK_START KERNEL_STACK_START: .dword _stack_start .global KERNEL_STACK_END KERNEL_STACK_END: .dword _stack_end ================================================ FILE: risc_v/chapters/ch5/src/asm/trap.S ================================================ # trap.S # Trap handler and global context # Steve Operating System # Stephen Marz # 24 February 2019 .option norvc .altmacro .set NUM_GP_REGS, 32 # Number of registers per context .set NUM_FP_REGS, 32 .set REG_SIZE, 8 # Register size (in bytes) .set MAX_CPUS, 8 # Maximum number of CPUs # Use macros for saving and restoring multiple registers .macro save_gp i, basereg=t6 sd x\i, ((\i)*REG_SIZE)(\basereg) .endm .macro load_gp i, basereg=t6 ld x\i, ((\i)*REG_SIZE)(\basereg) .endm .macro save_fp i, basereg=t6 fsd f\i, ((NUM_GP_REGS+(\i))*REG_SIZE)(\basereg) .endm .macro load_fp i, basereg=t6 fld f\i, ((NUM_GP_REGS+(\i))*REG_SIZE)(\basereg) .endm .section .text .global m_trap_vector # This must be aligned by 4 since the last two bits # of the mtvec register do not contribute to the address # of this vector. .align 4 m_trap_vector: # All registers are volatile here, we need to save them # before we do anything. csrrw t6, mscratch, t6 # csrrw will atomically swap t6 into mscratch and the old # value of mscratch into t6. This is nice because we just # switched values and didn't destroy anything -- all atomically! # in cpu.rs we have a structure of: # 32 gp regs 0 # 32 fp regs 256 # SATP register 512 # Trap stack 520 # CPU HARTID 528 # We use t6 as the temporary register because it is the very # bottom register (x31) .set i, 1 .rept 30 save_gp %i .set i, i+1 .endr # Save the actual t6 register, which we swapped into # mscratch mv t5, t6 csrr t6, mscratch save_gp 31, t5 # Restore the kernel trap frame into mscratch csrw mscratch, t5 # Get ready to go into Rust (trap.rs) # We don't want to write into the user's stack or whomever # messed with us here. csrr a0, mepc csrr a1, mtval csrr a2, mcause csrr a3, mhartid csrr a4, mstatus mv a5, t5 ld sp, 520(a5) call m_trap # When we get here, we've returned from m_trap, restore registers # and return. # m_trap will return the return address via a0. csrw mepc, a0 # Now load the trap frame back into t6 csrr t6, mscratch # Restore all GP registers .set i, 1 .rept 31 load_gp %i .set i, i+1 .endr # Since we ran this loop 31 times starting with i = 1, # the last one loaded t6 back to its original value. mret .global make_syscall make_syscall: ecall ret ================================================ FILE: risc_v/chapters/ch5/src/cpu.rs ================================================ // cpu.rs // CPU and CPU-related routines // Also contains the kernel's trap frame // Stephen Marz // 14 October 2019 use core::ptr::null_mut; /// In 64-bit mode, we're given three different modes for the MMU: /// 0 - The MMU is off -- no protection and no translation PA = VA /// 8 - This is Sv39 mode -- 39-bit virtual addresses /// 9 - This is Sv48 mode -- 48-bit virtual addresses #[repr(usize)] pub enum SatpMode { Off = 0, Sv39 = 8, Sv48 = 9, } /// The trap frame is set into a structure /// and packed into each hart's mscratch register. /// This allows for quick reference and full /// context switch handling. #[repr(C)] #[derive(Clone, Copy)] pub struct TrapFrame { pub regs: [usize; 32], // 0 - 255 pub fregs: [usize; 32], // 256 - 511 pub satp: usize, // 512 - 519 pub trap_stack: *mut u8, // 520 pub hartid: usize, // 528 } /// Rust requires that we initialize our structures /// because of the move semantics. What'll happen below /// is Rust will construct a new TrapFrame and move it /// out of the zero() function below. Rust contains two /// different "selfs" where self can refer to the object /// in memory or Self (capital S) which refers to the /// data type of the structure. In the case below, this /// is TrapFrame. impl TrapFrame { pub const fn zero() -> Self { TrapFrame { regs: [0; 32], fregs: [0; 32], satp: 0, trap_stack: null_mut(), hartid: 0, } } } /// The global kernel trap frame stores 8 separate /// frames -- one per CPU hart. We will switch these /// in and out and store a dormant trap frame with /// the process itself. pub static mut KERNEL_TRAP_FRAME: [TrapFrame; 8] = [TrapFrame::zero(); 8]; /// The SATP register contains three fields: mode, address space id, and /// the first level table address (level 2 for Sv39). This function /// helps make the 64-bit register contents based on those three /// fields. pub const fn build_satp(mode: SatpMode, asid: usize, addr: usize) -> usize { (mode as usize) << 60 | (asid & 0xffff) << 44 | (addr >> 12) & 0xff_ffff_ffff } pub fn mhartid_read() -> usize { unsafe { let rval; asm!("csrr $0, mhartid" :"=r"(rval)); rval } } pub fn mie_read() -> usize { unsafe { let rval; asm!("csrr $0, mie" :"=r"(rval)); rval } } pub fn mie_write(val: usize) { unsafe { asm!("csrw mie, $0" :: "r"(val)); } } pub fn mstatus_write(val: usize) { unsafe { asm!("csrw mstatus, $0" ::"r"(val)); } } pub fn mstatus_read() -> usize { unsafe { let rval; asm!("csrr $0, mstatus":"=r"(rval)); rval } } pub fn stvec_write(val: usize) { unsafe { asm!("csrw stvec, $0" ::"r"(val)); } } pub fn stvec_read() -> usize { unsafe { let rval; asm!("csrr $0, stvec" :"=r"(rval)); rval } } pub fn mscratch_write(val: usize) { unsafe { asm!("csrw mscratch, $0" ::"r"(val)); } } pub fn mscratch_read() -> usize { unsafe { let rval; asm!("csrr $0, mscratch" : "=r"(rval)); rval } } pub fn mscratch_swap(to: usize) -> usize { unsafe { let from; asm!("csrrw $0, mscratch, $1" : "=r"(from) : "r"(to)); from } } pub fn sscratch_write(val: usize) { unsafe { asm!("csrw sscratch, $0" ::"r"(val)); } } pub fn sscratch_read() -> usize { unsafe { let rval; asm!("csrr $0, sscratch" : "=r"(rval)); rval } } pub fn sscratch_swap(to: usize) -> usize { unsafe { let from; asm!("csrrw $0, sscratch, $1" : "=r"(from) : "r"(to)); from } } pub fn sepc_write(val: usize) { unsafe { asm!("csrw sepc, $0" :: "r"(val)); } } pub fn sepc_read() -> usize { unsafe { let rval; asm!("csrr $0, sepc" :"=r"(rval)); rval } } pub fn satp_write(val: usize) { unsafe { asm!("csrw satp, $0" :: "r"(val)); } } pub fn satp_read() -> usize { unsafe { let rval; asm!("csrr $0, satp" :"=r"(rval)); rval } } /// Take a hammer to the page tables and synchronize /// all of them. This essentially flushes the entire /// TLB. pub fn satp_fence(vaddr: usize, asid: usize) { unsafe { asm!("sfence.vma $0, $1" :: "r"(vaddr), "r"(asid)); } } /// Synchronize based on the address space identifier /// This allows us to fence a particular process rather /// than the entire TLB. /// The RISC-V documentation calls this a TLB flush +. /// Since there are other memory routines involved, they /// didn't call it a TLB flush, but it is much like /// Intel/AMD's invtlb [] instruction. pub fn satp_fence_asid(asid: usize) { unsafe { asm!("sfence.vma zero, $0" :: "r"(asid)); } } ================================================ FILE: risc_v/chapters/ch5/src/kmem.rs ================================================ // kmem.rs // Sub-page level: malloc-like allocation system // Stephen Marz // 7 October 2019 use crate::page::{align_val, zalloc, Table, PAGE_SIZE}; use core::{mem::size_of, ptr::null_mut}; #[repr(usize)] enum AllocListFlags { Taken = 1 << 63, } impl AllocListFlags { pub fn val(self) -> usize { self as usize } } struct AllocList { pub flags_size: usize, } impl AllocList { pub fn is_taken(&self) -> bool { self.flags_size & AllocListFlags::Taken.val() != 0 } pub fn is_free(&self) -> bool { !self.is_taken() } pub fn set_taken(&mut self) { self.flags_size |= AllocListFlags::Taken.val(); } pub fn set_free(&mut self) { self.flags_size &= !AllocListFlags::Taken.val(); } pub fn set_size(&mut self, sz: usize) { let k = self.is_taken(); self.flags_size = sz & !AllocListFlags::Taken.val(); if k { self.flags_size |= AllocListFlags::Taken.val(); } } pub fn get_size(&self) -> usize { self.flags_size & !AllocListFlags::Taken.val() } } // This is the head of the allocation. We start here when // we search for a free memory location. static mut KMEM_HEAD: *mut AllocList = null_mut(); // In the future, we will have on-demand pages // so, we need to keep track of our memory footprint to // see if we actually need to allocate more. static mut KMEM_ALLOC: usize = 0; static mut KMEM_PAGE_TABLE: *mut Table = null_mut(); // These functions are safe helpers around an unsafe // operation. pub fn get_head() -> *mut u8 { unsafe { KMEM_HEAD as *mut u8 } } pub fn get_page_table() -> *mut Table { unsafe { KMEM_PAGE_TABLE as *mut Table } } pub fn get_num_allocations() -> usize { unsafe { KMEM_ALLOC } } /// Initialize kernel's memory /// This is not to be used to allocate memory /// for user processes. If that's the case, use /// alloc/dealloc from the page crate. pub fn init() { unsafe { // Allocate kernel pages (KMEM_ALLOC) KMEM_ALLOC = 512; let k_alloc = zalloc(KMEM_ALLOC); assert!(!k_alloc.is_null()); KMEM_HEAD = k_alloc as *mut AllocList; (*KMEM_HEAD).set_free(); (*KMEM_HEAD).set_size(KMEM_ALLOC * PAGE_SIZE); KMEM_PAGE_TABLE = zalloc(1) as *mut Table; } } /// Allocate sub-page level allocation based on bytes and zero the memory pub fn kzmalloc(sz: usize) -> *mut u8 { let size = align_val(sz, 3); let ret = kmalloc(size); if !ret.is_null() { for i in 0..size { unsafe { (*ret.add(i)) = 0; } } } ret } /// Allocate sub-page level allocation based on bytes pub fn kmalloc(sz: usize) -> *mut u8 { unsafe { let size = align_val(sz, 3) + size_of::(); let mut head = KMEM_HEAD; // .add() uses pointer arithmetic, so we type-cast into a u8 // so that we multiply by an absolute size (KMEM_ALLOC * // PAGE_SIZE). let tail = (KMEM_HEAD as *mut u8).add(KMEM_ALLOC * PAGE_SIZE) as *mut AllocList; while head < tail { if (*head).is_free() && size <= (*head).get_size() { let chunk_size = (*head).get_size(); let rem = chunk_size - size; (*head).set_taken(); if rem > size_of::() { let next = (head as *mut u8).add(size) as *mut AllocList; // There is space remaining here. (*next).set_free(); (*next).set_size(rem); (*head).set_size(size); } else { // If we get here, take the entire chunk (*head).set_size(chunk_size); } return head.add(1) as *mut u8; } else { // If we get here, what we saw wasn't a free // chunk, move on to the next. head = (head as *mut u8).add((*head).get_size()) as *mut AllocList; } } } // If we get here, we didn't find any free chunks--i.e. there isn't // enough memory for this. TODO: Add on-demand page allocation. null_mut() } /// Free a sub-page level allocation pub fn kfree(ptr: *mut u8) { unsafe { if !ptr.is_null() { let p = (ptr as *mut AllocList).offset(-1); if (*p).is_taken() { (*p).set_free(); } // After we free, see if we can combine adjacent free // spots to see if we can reduce fragmentation. coalesce(); } } } /// Merge smaller chunks into a bigger chunk pub fn coalesce() { unsafe { let mut head = KMEM_HEAD; let tail = (KMEM_HEAD as *mut u8).add(KMEM_ALLOC * PAGE_SIZE) as *mut AllocList; while head < tail { let next = (head as *mut u8).add((*head).get_size()) as *mut AllocList; if (*head).get_size() == 0 { // If this happens, then we have a bad heap // (double free or something). However, that // will cause an infinite loop since the next // pointer will never move beyond the current // location. break; } else if next >= tail { // We calculated the next by using the size // given as get_size(), however this could push // us past the tail. In that case, the size is // wrong, hence we break and stop doing what we // need to do. break; } else if (*head).is_free() && (*next).is_free() { // This means we have adjacent blocks needing to // be freed. So, we combine them into one // allocation. (*head).set_size( (*head).get_size() + (*next).get_size(), ); } // If we get here, we might've moved. Recalculate new // head. head = (head as *mut u8).add((*head).get_size()) as *mut AllocList; } } } /// For debugging purposes, print the kmem table pub fn print_table() { unsafe { let mut head = KMEM_HEAD; let tail = (KMEM_HEAD as *mut u8).add(KMEM_ALLOC * PAGE_SIZE) as *mut AllocList; while head < tail { println!( "{:p}: Length = {:<10} Taken = {}", head, (*head).get_size(), (*head).is_taken() ); head = (head as *mut u8).add((*head).get_size()) as *mut AllocList; } } } // /////////////////////////////////// // / GLOBAL ALLOCATOR // /////////////////////////////////// // The global allocator allows us to use the data structures // in the core library, such as a linked list or B-tree. // We want to use these sparingly since we have a coarse-grained // allocator. use core::alloc::{GlobalAlloc, Layout}; // The global allocator is a static constant to a global allocator // structure. We don't need any members because we're using this // structure just to implement alloc and dealloc. struct OsGlobalAlloc; unsafe impl GlobalAlloc for OsGlobalAlloc { unsafe fn alloc(&self, layout: Layout) -> *mut u8 { // We align to the next page size so that when // we divide by PAGE_SIZE, we get exactly the number // of pages necessary. kzmalloc(layout.size()) } unsafe fn dealloc(&self, ptr: *mut u8, _layout: Layout) { // We ignore layout since our allocator uses ptr_start -> last // to determine the span of an allocation. kfree(ptr); } } #[global_allocator] /// Technically, we don't need the {} at the end, but it /// reveals that we're creating a new structure and not just /// copying a value. static GA: OsGlobalAlloc = OsGlobalAlloc {}; #[alloc_error_handler] /// If for some reason alloc() in the global allocator gets null_mut(), /// then we come here. This is a divergent function, so we call panic to /// let the tester know what's going on. pub fn alloc_error(l: Layout) -> ! { panic!( "Allocator failed to allocate {} bytes with {}-byte alignment.", l.size(), l.align() ); } ================================================ FILE: risc_v/chapters/ch5/src/lds/virt.lds ================================================ /* virt.lds Linker script for outputting to RISC-V QEMU "virt" machine. Stephen Marz 6 October 2019 */ /* riscv is the name of the architecture that the linker understands for any RISC-V target (64-bit or 32-bit). We will further refine this by using -mabi=lp64 and -march=rv64gc */ OUTPUT_ARCH( "riscv" ) /* We're setting our entry point to a symbol called _start which is inside of boot.S. This essentially stores the address of _start as the "entry point", or where CPU instructions should start executing. In the rest of this script, we are going to place _start right at the beginning of 0x8000_0000 because this is where the virtual machine and many RISC-V boards will start executing. */ ENTRY( _start ) /* The MEMORY section will explain that we have "ram" that contains a section that is 'w' (writeable), 'x' (executable), and 'a' (allocatable). We use '!' to invert 'r' (read-only) and 'i' (initialized). We don't want our memory to be read-only, and we're stating that it is NOT initialized at the beginning. The ORIGIN is the memory address 0x8000_0000. If we look at the virt spec or the specification for the RISC-V HiFive Unleashed, this is the starting memory address for our code. Side note: There might be other boot ROMs at different addresses, but their job is to get to this point. Finally LENGTH = 128M tells the linker that we have 128 megabyte of RAM. The linker will double check this to make sure everything can fit. The HiFive Unleashed has a lot more RAM than this, but for the virtual machine, I went with 128M since I think that's enough RAM for now. We can provide other pieces of memory, such as QSPI, or ROM, but we're telling the linker script here that we have one pool of RAM. */ MEMORY { ram (wxa!ri) : ORIGIN = 0x80000000, LENGTH = 128M } /* PHDRS is short for "program headers", which we specify three here: text - CPU instructions (executable sections) data - Global, initialized variables bss - Global, uninitialized variables (all will be set to 0 by boot.S) The command PT_LOAD tells the linker that these sections will be loaded from the file into memory. We can actually stuff all of these into a single program header, but by splitting it up into three, we can actually use the other PT_* commands such as PT_DYNAMIC, PT_INTERP, PT_NULL to tell the linker where to find additional information. However, for our purposes, every section will be loaded from the program headers. */ PHDRS { text PT_LOAD; data PT_LOAD; bss PT_LOAD; } /* We are now going to organize the memory based on which section it is in. In assembly, we can change the section with the ".section" directive. However, in C++ and Rust, CPU instructions go into text, global constants go into rodata, global initialized variables go into data, and global uninitialized variables go into bss. */ SECTIONS { /* The first part of our RAM layout will be the text section. Since our CPU instructions are here, and our memory starts at 0x8000_0000, we need our entry point to line up here. */ .text : { /* PROVIDE allows me to access a symbol called _text_start so I know where the text section starts in the operating system. This should not move, but it is here for convenience. The period '.' tells the linker to set _text_start to the CURRENT location ('.' = current memory location). This current memory location moves as we add things. */ PROVIDE(_text_start = .); /* We are going to layout all text sections here, starting with .text.init. The asterisk in front of the parentheses means to match the .text.init section of ANY object file. Otherwise, we can specify which object file should contain the .text.init section, for example, boot.o(.text.init) would specifically put the .text.init section of our bootloader here. Because we might want to change the name of our files, we'll leave it with a *. Inside the parentheses is the name of the section. I created my own called .text.init to make 100% sure that the _start is put right at the beginning. The linker will lay this out in the order it receives it: .text.init first all .text sections next any .text.* sections last .text.* means to match anything after .text. If we didn't already specify .text.init, this would've matched here. The assembler and linker can place things in "special" text sections, so we match any we might come across here. */ *(.text.init) *(.text .text.*) /* Again, with PROVIDE, we're providing a readable symbol called _text_end, which is set to the memory address AFTER .text.init, .text, and .text.*'s have been added. */ PROVIDE(_text_end = .); /* The portion after the right brace is in an odd format. However, this is telling the linker what memory portion to put it in. We labeled our RAM, ram, with the constraints that it is writeable, allocatable, and executable. The linker will make sure with this that we can do all of those things. >ram - This just tells the linker script to put this entire section (.text) into the ram region of memory. To my knowledge, the '>' does not mean "greater than". Instead, it is a symbol to let the linker know we want to put this in ram. AT>ram - This sets the LMA (load memory address) region to the same thing. LMA is the final translation of a VMA (virtual memory address). With this linker script, we're loading everything into its physical location. We'll let the kernel copy and sort out the virtual memory. That's why >ram and AT>ram are continually the same thing. :text - This tells the linker script to put this into the :text program header. We've only defined three: text, data, and bss. In this case, we're telling the linker script to go into the text section. */ } >ram AT>ram :text /* The global pointer allows the linker to position global variables and constants into independent positions relative to the gp (global pointer) register. The globals start after the text sections and are only relevant to the rodata, data, and bss sections. */ PROVIDE(_global_pointer = .); /* Most compilers create a rodata (read only data) section for global constants. However, we're going to place ours in the text section. We can actually put this in :data, but since the .text section is read-only, we can place it there. NOTE: This doesn't actually do anything, yet. The actual "protection" cannot be done at link time. Instead, when we program the memory management unit (MMU), we will be able to choose which bits (R=read, W=write, X=execute) we want each memory segment to be able to do. */ .rodata : { PROVIDE(_rodata_start = .); *(.rodata .rodata.*) PROVIDE(_rodata_end = .); /* Again, we're placing the rodata section in the memory segment "ram" and we're putting it in the :text program header. We don't have one for rodata anyway. */ } >ram AT>ram :text .data : { /* . = ALIGN(4096) tells the linker to align the current memory location (which is 0x8000_0000 + text section + rodata section) to 4096 bytes. This is because our paging system's resolution is 4,096 bytes or 4 KiB. */ . = ALIGN(4096); PROVIDE(_data_start = .); /* sdata and data are essentially the same thing. However, compilers usually use the sdata sections for shorter, quicker loading sections. So, usually critical data is loaded there. However, we're loading all of this in one fell swoop. So, we're looking to put all of the following sections under the umbrella .data: .sdata .sdata.[anything] .data .data.[anything] ...in that order. */ *(.sdata .sdata.*) *(.data .data.*) PROVIDE(_data_end = .); } >ram AT>ram :data .bss : { PROVIDE(_bss_start = .); *(.sbss .sbss.*) *(.bss .bss.*) PROVIDE(_bss_end = .); } >ram AT>ram :bss /* The following will be helpful when we allocate the kernel stack (_stack) and determine where the heap begnis and ends (_heap_start and _heap_start + _heap_size)/ When we do memory allocation, we can use these symbols. We use the symbols instead of hard-coding an address because this is a floating target. As we add code, the heap moves farther down the memory and gets shorter. _memory_start will be set to 0x8000_0000 here. We use ORIGIN(ram) so that it will take whatever we set the origin of ram to. Otherwise, we'd have to change it more than once if we ever stray away from 0x8000_0000 as our entry point. */ PROVIDE(_memory_start = ORIGIN(ram)); /* Our kernel stack starts at the end of the bss segment (_bss_end). However, we're allocating 0x80000 bytes (524 KiB) to our kernel stack. This should be PLENTY of space. The reason we add the memory is because the stack grows from higher memory to lower memory (bottom to top). Therefore we set the stack at the very bottom of its allocated slot. When we go to allocate from the stack, we'll subtract the number of bytes we need. */ PROVIDE(_stack_start = _bss_end); PROVIDE(_stack_end = _stack_start + 0x8000); PROVIDE(_memory_end = ORIGIN(ram) + LENGTH(ram)); /* Finally, our heap starts right after the kernel stack. This heap will be used mainly to dole out memory for user-space applications. However, in some circumstances, it will be used for kernel memory as well. We don't align here because we let the kernel determine how it wants to do this. */ PROVIDE(_heap_start = _stack_end); PROVIDE(_heap_size = _memory_end - _heap_start); } ================================================ FILE: risc_v/chapters/ch5/src/lib.rs ================================================ // Steve Operating System // Stephen Marz // 21 Sep 2019 #![no_std] #![feature(panic_info_message, asm, allocator_api, alloc_error_handler, alloc_prelude, const_raw_ptr_to_usize_cast)] #[macro_use] extern crate alloc; // This is experimental and requires alloc_prelude as a feature use alloc::prelude::v1::*; // /////////////////////////////////// // / RUST MACROS // /////////////////////////////////// #[macro_export] macro_rules! print { ($($args:tt)+) => ({ use core::fmt::Write; let _ = write!(crate::uart::Uart::new(0x1000_0000), $($args)+); }); } #[macro_export] macro_rules! println { () => ({ print!("\r\n") }); ($fmt:expr) => ({ print!(concat!($fmt, "\r\n")) }); ($fmt:expr, $($args:tt)+) => ({ print!(concat!($fmt, "\r\n"), $($args)+) }); } // /////////////////////////////////// // / LANGUAGE STRUCTURES / FUNCTIONS // /////////////////////////////////// #[no_mangle] extern "C" fn eh_personality() {} #[panic_handler] fn panic(info: &core::panic::PanicInfo) -> ! { print!("Aborting: "); if let Some(p) = info.location() { println!( "line {}, file {}: {}", p.line(), p.file(), info.message().unwrap() ); } else { println!("no information available."); } abort(); } #[no_mangle] extern "C" fn abort() -> ! { loop { unsafe { asm!("wfi"::::"volatile"); } } } // /////////////////////////////////// // / CONSTANTS // /////////////////////////////////// // const STR_Y: &str = "\x1b[38;2;79;221;13m✓\x1b[m"; // const STR_N: &str = "\x1b[38;2;221;41;13m✘\x1b[m"; // The following symbols come from asm/mem.S. We can use // the symbols directly, but the address of the symbols // themselves are their values, which can cause issues. // Instead, I created doubleword values in mem.S in the .rodata and .data // sections. extern "C" { static TEXT_START: usize; static TEXT_END: usize; static DATA_START: usize; static DATA_END: usize; static RODATA_START: usize; static RODATA_END: usize; static BSS_START: usize; static BSS_END: usize; static KERNEL_STACK_START: usize; static KERNEL_STACK_END: usize; static HEAP_START: usize; static HEAP_SIZE: usize; } /// Identity map range /// Takes a contiguous allocation of memory and maps it using PAGE_SIZE /// This assumes that start <= end pub fn id_map_range(root: &mut page::Table, start: usize, end: usize, bits: i64) { let mut memaddr = start & !(page::PAGE_SIZE - 1); let num_kb_pages = (page::align_val(end, 12) - memaddr) / page::PAGE_SIZE; // I named this num_kb_pages for future expansion when // I decide to allow for GiB (2^30) and 2MiB (2^21) page // sizes. However, the overlapping memory regions are causing // nightmares. for _ in 0..num_kb_pages { page::map(root, memaddr, memaddr, bits, 0); memaddr += 1 << 12; } } // /////////////////////////////////// // / ENTRY POINT // /////////////////////////////////// #[no_mangle] extern "C" fn kinit() { // We created kinit, which runs in super-duper mode // 3 called "machine mode". // The job of kinit() is to get us into supervisor mode // as soon as possible. // Interrupts are disabled for the duration of kinit() uart::Uart::new(0x1000_0000).init(); page::init(); kmem::init(); // Map heap allocations let root_ptr = kmem::get_page_table(); let root_u = root_ptr as usize; let mut root = unsafe { root_ptr.as_mut().unwrap() }; let kheap_head = kmem::get_head() as usize; let total_pages = kmem::get_num_allocations(); println!(); println!(); unsafe { println!("TEXT: 0x{:x} -> 0x{:x}", TEXT_START, TEXT_END); println!("RODATA: 0x{:x} -> 0x{:x}", RODATA_START, RODATA_END); println!("DATA: 0x{:x} -> 0x{:x}", DATA_START, DATA_END); println!("BSS: 0x{:x} -> 0x{:x}", BSS_START, BSS_END); println!( "STACK: 0x{:x} -> 0x{:x}", KERNEL_STACK_START, KERNEL_STACK_END ); println!( "HEAP: 0x{:x} -> 0x{:x}", kheap_head, kheap_head + total_pages * page::PAGE_SIZE ); } id_map_range( &mut root, kheap_head, kheap_head + total_pages * page::PAGE_SIZE, page::EntryBits::ReadWrite.val(), ); // Using statics is inherently unsafe. unsafe { // Map heap descriptors let num_pages = HEAP_SIZE / page::PAGE_SIZE; id_map_range( &mut root, HEAP_START, HEAP_START + num_pages, page::EntryBits::ReadWrite.val(), ); // Map executable section id_map_range( &mut root, TEXT_START, TEXT_END, page::EntryBits::ReadExecute.val(), ); // Map rodata section // We put the ROdata section into the text section, so they can // potentially overlap however, we only care that it's read // only. id_map_range( &mut root, RODATA_START, RODATA_END, page::EntryBits::ReadExecute.val(), ); // Map data section id_map_range( &mut root, DATA_START, DATA_END, page::EntryBits::ReadWrite.val(), ); // Map bss section id_map_range( &mut root, BSS_START, BSS_END, page::EntryBits::ReadWrite.val(), ); // Map kernel stack id_map_range( &mut root, KERNEL_STACK_START, KERNEL_STACK_END, page::EntryBits::ReadWrite.val(), ); } // UART id_map_range( &mut root, 0x1000_0000, 0x1000_0100, page::EntryBits::ReadWrite.val(), ); // CLINT // -> MSIP id_map_range( &mut root, 0x0200_0000, 0x0200_ffff, page::EntryBits::ReadWrite.val(), ); // PLIC id_map_range( &mut root, 0x0c00_0000, 0x0c00_2001, page::EntryBits::ReadWrite.val(), ); id_map_range( &mut root, 0x0c20_0000, 0x0c20_8001, page::EntryBits::ReadWrite.val(), ); // When we return from here, we'll go back to boot.S and switch into // supervisor mode We will return the SATP register to be written when // we return. root_u is the root page table's address. When stored into // the SATP register, this is divided by 4 KiB (right shift by 12 bits). // We enable the MMU by setting mode 8. Bits 63, 62, 61, 60 determine // the mode. // 0 = Bare (no translation) // 8 = Sv39 // 9 = Sv48 // build_satp has these parameters: mode, asid, page table address. let satp_value = cpu::build_satp(cpu::SatpMode::Sv39, 0, root_u); unsafe { // We have to store the kernel's table. The tables will be moved // back and forth between the kernel's table and user // applicatons' tables. Note that we're writing the physical address // of the trap frame. cpu::mscratch_write( (&mut cpu::KERNEL_TRAP_FRAME[0] as *mut cpu::TrapFrame) as usize, ); cpu::sscratch_write(cpu::mscratch_read()); cpu::KERNEL_TRAP_FRAME[0].satp = satp_value; // Move the stack pointer to the very bottom. The stack is // actually in a non-mapped page. The stack is decrement-before // push and increment after pop. Therefore, the stack will be // allocated (decremented) before it is stored. cpu::KERNEL_TRAP_FRAME[0].trap_stack = page::zalloc(1).add(page::PAGE_SIZE); id_map_range( &mut root, cpu::KERNEL_TRAP_FRAME[0].trap_stack .sub(page::PAGE_SIZE,) as usize, cpu::KERNEL_TRAP_FRAME[0].trap_stack as usize, page::EntryBits::ReadWrite.val(), ); // The trap frame itself is stored in the mscratch register. id_map_range( &mut root, cpu::mscratch_read(), cpu::mscratch_read() + core::mem::size_of::(), page::EntryBits::ReadWrite.val(), ); page::print_page_allocations(); let p = cpu::KERNEL_TRAP_FRAME[0].trap_stack as usize - 1; let m = page::virt_to_phys(&root, p).unwrap_or(0); println!("Walk 0x{:x} = 0x{:x}", p, m); } // The following shows how we're going to walk to translate a virtual // address into a physical address. We will use this whenever a user // space application requires services. Since the user space application // only knows virtual addresses, we have to translate silently behind // the scenes. println!("Setting 0x{:x}", satp_value); println!("Scratch reg = 0x{:x}", cpu::mscratch_read()); cpu::satp_write(satp_value); cpu::satp_fence_asid(0); } #[no_mangle] extern "C" fn kinit_hart(hartid: usize) { // All non-0 harts initialize here. unsafe { // We have to store the kernel's table. The tables will be moved // back and forth between the kernel's table and user // applicatons' tables. cpu::mscratch_write( (&mut cpu::KERNEL_TRAP_FRAME[hartid] as *mut cpu::TrapFrame) as usize, ); // Copy the same mscratch over to the supervisor version of the // same register. cpu::sscratch_write(cpu::mscratch_read()); cpu::KERNEL_TRAP_FRAME[hartid].hartid = hartid; // We can't do the following until zalloc() is locked, but we // don't have locks, yet :( cpu::KERNEL_TRAP_FRAME[hartid].satp // = cpu::KERNEL_TRAP_FRAME[0].satp; // cpu::KERNEL_TRAP_FRAME[hartid].trap_stack = page::zalloc(1); } } #[no_mangle] extern "C" fn kmain() { // kmain() starts in supervisor mode. So, we should have the trap // vector setup and the MMU turned on when we get here. // We initialized my_uart in machine mode under kinit for debugging // prints, but this just grabs a pointer to it. let mut my_uart = uart::Uart::new(0x1000_0000); // Create a new scope so that we can test the global allocator and // deallocator { // We have the global allocator, so let's see if that works! let k = Box::::new(100); println!("Boxed value = {}", *k); // The following comes from the Rust documentation: // some bytes, in a vector let sparkle_heart = vec![240, 159, 146, 150]; // We know these bytes are valid, so we'll use `unwrap()`. // This will MOVE the vector. let sparkle_heart = String::from_utf8(sparkle_heart).unwrap(); println!("String = {}", sparkle_heart); println!("\n\nAllocations of a box, vector, and string"); kmem::print_table(); } println!("\n\nEverything should now be free:"); kmem::print_table(); unsafe { // Set the next machine timer to fire. let mtimecmp = 0x0200_4000 as *mut u64; let mtime = 0x0200_bff8 as *const u64; // The frequency given by QEMU is 10_000_000 Hz, so this sets // the next interrupt to fire one second from now. mtimecmp.write_volatile(mtime.read_volatile() + 10_000_000); // Let's cause a page fault and see what happens. This should trap // to m_trap under trap.rs let v = 0x0 as *mut u64; v.write_volatile(0); } // If we get here, the Box, vec, and String should all be freed since // they go out of scope. This calls their "Drop" trait. // Let's set up the interrupt system via the PLIC. We have to set the threshold to something // that won't mask all interrupts. println!("Setting up interrupts and PLIC..."); // We lower the threshold wall so our interrupts can jump over it. plic::set_threshold(0); // VIRTIO = [1..8] // UART0 = 10 // PCIE = [32..35] // Enable the UART interrupt. plic::enable(10); plic::set_priority(10, 1); println!("UART interrupts have been enabled and are awaiting your command"); } // /////////////////////////////////// // / RUST MODULES // /////////////////////////////////// pub mod cpu; pub mod kmem; pub mod page; pub mod plic; pub mod trap; pub mod uart; ================================================ FILE: risc_v/chapters/ch5/src/page.rs ================================================ // page.rs // Memory routines // Stephen Marz // 6 October 2019 use core::{mem::size_of, ptr::null_mut}; // //////////////////////////////// // // Allocation routines // //////////////////////////////// extern "C" { static HEAP_START: usize; static HEAP_SIZE: usize; } // We will use ALLOC_START to mark the start of the actual // memory we can dish out. static mut ALLOC_START: usize = 0; const PAGE_ORDER: usize = 12; pub const PAGE_SIZE: usize = 1 << 12; /// Align (set to a multiple of some power of two) /// This takes an order which is the exponent to 2^order /// Therefore, all alignments must be made as a power of two. /// This function always rounds up. pub const fn align_val(val: usize, order: usize) -> usize { let o = (1usize << order) - 1; (val + o) & !o } #[repr(u8)] pub enum PageBits { Empty = 0, Taken = 1 << 0, Last = 1 << 1, } impl PageBits { // We convert PageBits to a u8 a lot, so this is // for convenience. pub fn val(self) -> u8 { self as u8 } } // Each page is described by the Page structure. Linux does this // as well, where each 4096-byte chunk of memory has a structure // associated with it. However, there structure is much larger. pub struct Page { flags: u8, } impl Page { // If this page has been marked as the final allocation, // this function returns true. Otherwise, it returns false. pub fn is_last(&self) -> bool { if self.flags & PageBits::Last.val() != 0 { true } else { false } } // If the page is marked as being taken (allocated), then // this function returns true. Otherwise, it returns false. pub fn is_taken(&self) -> bool { if self.flags & PageBits::Taken.val() != 0 { true } else { false } } // This is the opposite of is_taken(). pub fn is_free(&self) -> bool { !self.is_taken() } // Clear the Page structure and all associated allocations. pub fn clear(&mut self) { self.flags = PageBits::Empty.val(); } // Set a certain flag. We ran into trouble here since PageBits // is an enumeration and we haven't implemented the BitOr Trait // on it. pub fn set_flag(&mut self, flag: PageBits) { self.flags |= flag.val(); } pub fn clear_flag(&mut self, flag: PageBits) { self.flags &= !(flag.val()); } } /// Initialize the allocation system. There are several ways that we can /// implement the page allocator: /// 1. Free list (singly linked list where it starts at the first free /// allocation) 2. Bookkeeping list (structure contains a taken and length) /// 3. Allocate one Page structure per 4096 bytes (this is what I chose) /// 4. Others pub fn init() { unsafe { // let desc_per_page = PAGE_SIZE / size_of::(); let num_pages = HEAP_SIZE / PAGE_SIZE; // let num_desc_pages = num_pages / desc_per_page; let ptr = HEAP_START as *mut Page; // Clear all pages to make sure that they aren't accidentally // taken for i in 0..num_pages { (*ptr.add(i)).clear(); } // Determine where the actual useful memory starts. This will be // after all Page structures. We also must align the ALLOC_START // to a page-boundary (PAGE_SIZE = 4096). ALLOC_START = // (HEAP_START + num_pages * size_of::() + PAGE_SIZE - 1) // & !(PAGE_SIZE - 1); ALLOC_START = align_val( HEAP_START + num_pages * size_of::(), PAGE_ORDER, ); } } /// Allocate a page or multiple pages /// pages: the number of PAGE_SIZE pages to allocate pub fn alloc(pages: usize) -> *mut u8 { // We have to find a contiguous allocation of pages assert!(pages > 0); unsafe { // We create a Page structure for each page on the heap. We // actually might have more since HEAP_SIZE moves and so does // the size of our structure, but we'll only waste a few bytes. let num_pages = HEAP_SIZE / PAGE_SIZE; let ptr = HEAP_START as *mut Page; for i in 0..num_pages - pages { let mut found = false; // Check to see if this Page is free. If so, we have our // first candidate memory address. if (*ptr.add(i)).is_free() { // It was FREE! Yay! found = true; for j in i..i + pages { // Now check to see if we have a // contiguous allocation for all of the // request pages. If not, we should // check somewhere else. if (*ptr.add(j)).is_taken() { found = false; break; } } } // We've checked to see if there are enough contiguous // pages to form what we need. If we couldn't, found // will be false, otherwise it will be true, which means // we've found valid memory we can allocate. if found { for k in i..i + pages - 1 { (*ptr.add(k)).set_flag(PageBits::Taken); } // The marker for the last page is // PageBits::Last This lets us know when we've // hit the end of this particular allocation. (*ptr.add(i+pages-1)).set_flag(PageBits::Taken); (*ptr.add(i+pages-1)).set_flag(PageBits::Last); // The Page structures themselves aren't the // useful memory. Instead, there is 1 Page // structure per 4096 bytes starting at // ALLOC_START. return (ALLOC_START + PAGE_SIZE * i) as *mut u8; } } } // If we get here, that means that no contiguous allocation was // found. null_mut() } /// Allocate and zero a page or multiple pages /// pages: the number of pages to allocate /// Each page is PAGE_SIZE which is calculated as 1 << PAGE_ORDER /// On RISC-V, this typically will be 4,096 bytes. pub fn zalloc(pages: usize) -> *mut u8 { // Allocate and zero a page. // First, let's get the allocation let ret = alloc(pages); if !ret.is_null() { let size = (PAGE_SIZE * pages) / 8; let big_ptr = ret as *mut u64; for i in 0..size { // We use big_ptr so that we can force an // sd (store doubleword) instruction rather than // the sb. This means 8x fewer stores than before. // Typically we have to be concerned about remaining // bytes, but fortunately 4096 % 8 = 0, so we // won't have any remaining bytes. unsafe { (*big_ptr.add(i)) = 0; } } } ret } /// Deallocate a page by its pointer /// The way we've structured this, it will automatically coalesce /// contiguous pages. pub fn dealloc(ptr: *mut u8) { // Make sure we don't try to free a null pointer. assert!(!ptr.is_null()); unsafe { let addr = HEAP_START + (ptr as usize - ALLOC_START) / PAGE_SIZE; // Make sure that the address makes sense. The address we // calculate here is the page structure, not the HEAP address! assert!(addr >= HEAP_START && addr < HEAP_START + HEAP_SIZE); let mut p = addr as *mut Page; // Keep clearing pages until we hit the last page. while (*p).is_taken() && !(*p).is_last() { (*p).clear(); p = p.add(1); } // If the following assertion fails, it is most likely // caused by a double-free. assert!( (*p).is_last() == true, "Possible double-free detected! (Not taken found \ before last)" ); // If we get here, we've taken care of all previous pages and // we are on the last page. (*p).clear(); } } /// Print all page allocations /// This is mainly used for debugging. pub fn print_page_allocations() { unsafe { let num_pages = (HEAP_SIZE - (ALLOC_START - HEAP_START)) / PAGE_SIZE; let mut beg = HEAP_START as *const Page; let end = beg.add(num_pages); let alloc_beg = ALLOC_START; let alloc_end = ALLOC_START + num_pages * PAGE_SIZE; println!(); println!( "PAGE ALLOCATION TABLE\nMETA: {:p} -> {:p}\nPHYS: \ 0x{:x} -> 0x{:x}", beg, end, alloc_beg, alloc_end ); println!("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"); let mut num = 0; while beg < end { if (*beg).is_taken() { let start = beg as usize; let memaddr = ALLOC_START + (start - HEAP_START) * PAGE_SIZE; print!("0x{:x} => ", memaddr); loop { num += 1; if (*beg).is_last() { let end = beg as usize; let memaddr = ALLOC_START + (end - HEAP_START) * PAGE_SIZE + PAGE_SIZE - 1; print!( "0x{:x}: {:>3} page(s)", memaddr, (end - start + 1) ); println!("."); break; } beg = beg.add(1); } } beg = beg.add(1); } println!("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"); println!( "Allocated: {:>6} pages ({:>10} bytes).", num, num * PAGE_SIZE ); println!( "Free : {:>6} pages ({:>10} bytes).", num_pages - num, (num_pages - num) * PAGE_SIZE ); println!(); } } // //////////////////////////////// // // MMU Routines // //////////////////////////////// // Represent (repr) our entry bits as // unsigned 64-bit integers. #[repr(i64)] #[derive(Copy, Clone)] pub enum EntryBits { None = 0, Valid = 1 << 0, Read = 1 << 1, Write = 1 << 2, Execute = 1 << 3, User = 1 << 4, Global = 1 << 5, Access = 1 << 6, Dirty = 1 << 7, // Convenience combinations ReadWrite = 1 << 1 | 1 << 2, ReadExecute = 1 << 1 | 1 << 3, ReadWriteExecute = 1 << 1 | 1 << 2 | 1 << 3, // User Convenience Combinations UserReadWrite = 1 << 1 | 1 << 2 | 1 << 4, UserReadExecute = 1 << 1 | 1 << 3 | 1 << 4, UserReadWriteExecute = 1 << 1 | 1 << 2 | 1 << 3 | 1 << 4, } // Helper functions to convert the enumeration // into an i64, which is what our page table // entries will be. impl EntryBits { pub fn val(self) -> i64 { self as i64 } } // A single entry. We're using an i64 so that // this will sign-extend rather than zero-extend // since RISC-V requires that the reserved sections // take on the most significant bit. pub struct Entry { pub entry: i64, } // The Entry structure describes one of the 512 entries per table, which is // described in the RISC-V privileged spec Figure 4.18. impl Entry { pub fn is_valid(&self) -> bool { self.get_entry() & EntryBits::Valid.val() != 0 } // The first bit (bit index #0) is the V bit for // valid. pub fn is_invalid(&self) -> bool { !self.is_valid() } // A leaf has one or more RWX bits set pub fn is_leaf(&self) -> bool { self.get_entry() & 0xe != 0 } pub fn is_branch(&self) -> bool { !self.is_leaf() } pub fn set_entry(&mut self, entry: i64) { self.entry = entry; } pub fn get_entry(&self) -> i64 { self.entry } } // Table represents a single table, which contains 512 (2^9), 64-bit entries. pub struct Table { pub entries: [Entry; 512], } impl Table { pub fn len() -> usize { 512 } } /// Map a virtual address to a physical address using 4096-byte page /// size. /// root: a mutable reference to the root Table /// vaddr: The virtual address to map /// paddr: The physical address to map /// bits: An OR'd bitset containing the bits the leaf should have. /// The bits should contain only the following: /// Read, Write, Execute, User, and/or Global /// The bits MUST include one or more of the following: /// Read, Write, Execute /// The valid bit automatically gets added. pub fn map(root: &mut Table, vaddr: usize, paddr: usize, bits: i64, level: usize) { // Make sure that Read, Write, or Execute have been provided // otherwise, we'll leak memory and always create a page fault. assert!(bits & 0xe != 0); // Extract out each VPN from the virtual address // On the virtual address, each VPN is exactly 9 bits, // which is why we use the mask 0x1ff = 0b1_1111_1111 (9 bits) let vpn = [ // VPN[0] = vaddr[20:12] (vaddr >> 12) & 0x1ff, // VPN[1] = vaddr[29:21] (vaddr >> 21) & 0x1ff, // VPN[2] = vaddr[38:30] (vaddr >> 30) & 0x1ff, ]; // Just like the virtual address, extract the physical address // numbers (PPN). However, PPN[2] is different in that it stores // 26 bits instead of 9. Therefore, we use, // 0x3ff_ffff = 0b11_1111_1111_1111_1111_1111_1111 (26 bits). let ppn = [ // PPN[0] = paddr[20:12] (paddr >> 12) & 0x1ff, // PPN[1] = paddr[29:21] (paddr >> 21) & 0x1ff, // PPN[2] = paddr[55:30] (paddr >> 30) & 0x3ff_ffff, ]; // We will use this as a floating reference so that we can set // individual entries as we walk the table. let mut v = &mut root.entries[vpn[2]]; // Now, we're going to traverse the page table and set the bits // properly. We expect the root to be valid, however we're required to // create anything beyond the root. // In Rust, we create a range iterator using the .. operator. // The .rev() will reverse the iteration since we need to start with // VPN[2] The .. operator is inclusive on start but exclusive on end. // So, (0..2) will iterate 0 and 1. for i in (level..2).rev() { if !v.is_valid() { // Allocate a page let page = zalloc(1); // The page is already aligned by 4,096, so store it // directly The page is stored in the entry shifted // right by 2 places. v.set_entry( (page as i64 >> 2) | EntryBits::Valid.val(), ); } let entry = ((v.get_entry() & !0x3ff) << 2) as *mut Entry; v = unsafe { entry.add(vpn[i]).as_mut().unwrap() }; } // When we get here, we should be at VPN[0] and v should be pointing to // our entry. // The entry structure is Figure 4.18 in the RISC-V Privileged // Specification let entry = (ppn[2] << 28) as i64 | // PPN[2] = [53:28] (ppn[1] << 19) as i64 | // PPN[1] = [27:19] (ppn[0] << 10) as i64 | // PPN[0] = [18:10] bits | // Specified bits, such as User, Read, Write, etc EntryBits::Valid.val() | // Valid bit EntryBits::Dirty.val() | // Some machines require this to =1 EntryBits::Access.val() // Just like dirty, some machines require this ; // Set the entry. V should be set to the correct pointer by the loop // above. v.set_entry(entry); } /// Unmaps and frees all memory associated with a table. /// root: The root table to start freeing. /// NOTE: This does NOT free root directly. This must be /// freed manually. /// The reason we don't free the root is because it is /// usually embedded into the Process structure. pub fn unmap(root: &mut Table) { // Start with level 2 for lv2 in 0..Table::len() { let ref entry_lv2 = root.entries[lv2]; if entry_lv2.is_valid() && entry_lv2.is_branch() { // This is a valid entry, so drill down and free. let memaddr_lv1 = (entry_lv2.get_entry() & !0x3ff) << 2; let table_lv1 = unsafe { // Make table_lv1 a mutable reference instead of // a pointer. (memaddr_lv1 as *mut Table).as_mut().unwrap() }; for lv1 in 0..Table::len() { let ref entry_lv1 = table_lv1.entries[lv1]; if entry_lv1.is_valid() && entry_lv1.is_branch() { let memaddr_lv0 = (entry_lv1.get_entry() & !0x3ff) << 2; // The next level is level 0, which // cannot have branches, therefore, // we free here. dealloc(memaddr_lv0 as *mut u8); } } dealloc(memaddr_lv1 as *mut u8); } } } /// Walk the page table to convert a virtual address to a /// physical address. /// If a page fault would occur, this returns None /// Otherwise, it returns Some with the physical address. pub fn virt_to_phys(root: &Table, vaddr: usize) -> Option { // Walk the page table pointed to by root let vpn = [ // VPN[0] = vaddr[20:12] (vaddr >> 12) & 0x1ff, // VPN[1] = vaddr[29:21] (vaddr >> 21) & 0x1ff, // VPN[2] = vaddr[38:30] (vaddr >> 30) & 0x1ff, ]; let mut v = &root.entries[vpn[2]]; for i in (0..=2).rev() { if v.is_invalid() { // This is an invalid entry, page fault. break; } else if v.is_leaf() { // According to RISC-V, a leaf can be at any level. // The offset mask masks off the PPN. Each PPN is 9 // bits and they start at bit #12. So, our formula // 12 + i * 9 let off_mask = (1 << (12 + i * 9)) - 1; let vaddr_pgoff = vaddr & off_mask; let addr = ((v.get_entry() << 2) as usize) & !off_mask; return Some(addr | vaddr_pgoff); } // Set v to the next entry which is pointed to by this // entry. However, the address was shifted right by 2 places // when stored in the page table entry, so we shift it left // to get it back into place. let entry = ((v.get_entry() & !0x3ff) << 2) as *const Entry; // We do i - 1 here, however we should get None or Some() above // before we do 0 - 1 = -1. v = unsafe { entry.add(vpn[i - 1]).as_ref().unwrap() }; } // If we get here, we've exhausted all valid tables and haven't // found a leaf. None } ================================================ FILE: risc_v/chapters/ch5/src/plic.rs ================================================ // plic.rs // Platform Level Interrupt Controller (PLIC) // Stephen Marz // 1 Nov 2019 const PLIC_PRIORITY: usize = 0x0c00_0000; const PLIC_PENDING: usize = 0x0c00_1000; const PLIC_INT_ENABLE: usize = 0x0c00_2000; const PLIC_THRESHOLD: usize = 0x0c20_0000; const PLIC_CLAIM: usize = 0x0c20_0004; // Each register is 4-bytes (u32) // The PLIC is an external interrupt controller. The one // used by QEMU virt is the same as the SiFive PLIC. // https://sifive.cdn.prismic.io/sifive%2F834354f0-08e6-423c-bf1f-0cb58ef14061_fu540-c000-v1.0.pdf // Chapter 10 explains the priority, pending, interrupt enable, threshold and claims // The virt machine has the following external interrupts (from Qemu source): // Interrupt 0 is a "null" interrupt and is hardwired to 0. // VIRTIO = [1..8] // UART0 = 10 // PCIE = [32..35] /// Get the next available interrupt. This is the "claim" process. /// The plic will automatically sort by priority and hand us the /// ID of the interrupt. For example, if the UART is interrupting /// and it's next, we will get the value 10. pub fn next() -> Option { let claim_reg = PLIC_CLAIM as *const u32; let claim_no; // The claim register is filled with the highest-priority, enabled interrupt. unsafe { claim_no = claim_reg.read_volatile(); } if claim_no == 0 { // The interrupt 0 is hardwired to 0, which tells us that there is no // interrupt to claim, hence we return None. None } else { // If we get here, we've gotten a non-0 interrupt. Some(claim_no) } } /// Complete a pending interrupt by id. The id should come /// from the next() function above. pub fn complete(id: u32) { let complete_reg = PLIC_CLAIM as *mut u32; unsafe { // We actually write a u32 into the entire complete_register. // This is the same register as the claim register, but it can // differentiate based on whether we're reading or writing. complete_reg.write_volatile(id); } } /// Set the global threshold. The threshold can be a value [0..7]. /// The PLIC will mask any interrupts at or below the given threshold. /// This means that a threshold of 7 will mask ALL interrupts and /// a threshold of 0 will allow ALL interrupts. pub fn set_threshold(tsh: u8) { // We do tsh because we're using a u8, but our maximum number // is a 3-bit 0b111. So, we and with 7 (0b111) to just get the // last three bits. let actual_tsh = tsh & 7; let tsh_reg = PLIC_THRESHOLD as *mut u32; unsafe { tsh_reg.write_volatile(actual_tsh as u32); } } /// See if a given interrupt id is pending. pub fn is_pending(id: u32) -> bool { let pend = PLIC_PENDING as *const u32; let actual_id = 1 << id; let pend_ids; unsafe { pend_ids = pend.read_volatile(); } actual_id & pend_ids != 0 } /// Enable a given interrupt id pub fn enable(id: u32) { let enables = PLIC_INT_ENABLE as *mut u32; let actual_id = 1 << id; unsafe { // Unlike the complete and claim registers, the plic_int_enable // register is a bitset where the id is the bit index. The register // is a 32-bit register, so that gives us enables for interrupts // 31 through 1 (0 is hardwired to 0). enables.write_volatile(enables.read_volatile() | actual_id); } } /// Set a given interrupt priority to the given priority. /// The priority must be [0..7] pub fn set_priority(id: u32, prio: u8) { let actual_prio = prio as u32 & 7; let prio_reg = PLIC_PRIORITY as *mut u32; unsafe { // The offset for the interrupt id is: // PLIC_PRIORITY + 4 * id // Since we're using pointer arithmetic on a u32 type, // it will automatically multiply the id by 4. prio_reg.add(id as usize).write_volatile(actual_prio); } } ================================================ FILE: risc_v/chapters/ch5/src/trap.rs ================================================ // trap.rs // Trap routines // Stephen Marz // 10 October 2019 use crate::cpu::TrapFrame; use crate::{plic, uart}; #[no_mangle] /// The m_trap stands for "machine trap". Right now, we are handling /// all traps at machine mode. In this mode, we can figure out what's /// going on and send a trap where it needs to be. Remember, in machine /// mode and in this trap, interrupts are disabled and the MMU is off. extern "C" fn m_trap(epc: usize, tval: usize, cause: usize, hart: usize, status: usize, frame: &mut TrapFrame) -> usize { // We're going to handle all traps in machine mode. RISC-V lets // us delegate to supervisor mode, but switching out SATP (virtual memory) // gets hairy. let is_async = { if cause >> 63 & 1 == 1 { true } else { false } }; // The cause contains the type of trap (sync, async) as well as the cause // number. So, here we narrow down just the cause number. let cause_num = cause & 0xfff; let mut return_pc = epc; if is_async { // Asynchronous trap match cause_num { 3 => { // Machine software println!("Machine software interrupt CPU#{}", hart); }, 7 => unsafe { // Machine timer let mtimecmp = 0x0200_4000 as *mut u64; let mtime = 0x0200_bff8 as *const u64; // The frequency given by QEMU is 10_000_000 Hz, so this sets // the next interrupt to fire one second from now. mtimecmp.write_volatile(mtime.read_volatile() + 10_000_000); }, 11 => { // Machine external (interrupt from Platform Interrupt Controller (PLIC)) // println!("Machine external interrupt CPU#{}", hart); // We will check the next interrupt. If the interrupt isn't available, this will // give us None. However, that would mean we got a spurious interrupt, unless we // get an interrupt from a non-PLIC source. This is the main reason that the PLIC // hardwires the id 0 to 0, so that we can use it as an error case. if let Some(interrupt) = plic::next() { // If we get here, we've got an interrupt from the claim register. The PLIC will // automatically prioritize the next interrupt, so when we get it from claim, it // will be the next in priority order. match interrupt { 10 => { // Interrupt 10 is the UART interrupt. // We would typically set this to be handled out of the interrupt context, // but we're testing here! C'mon! // We haven't yet used the singleton pattern for my_uart, but remember, this // just simply wraps 0x1000_0000 (UART). let mut my_uart = uart::Uart::new(0x1000_0000); // If we get here, the UART better have something! If not, what happened?? if let Some(c) = my_uart.get() { // If you recognize this code, it used to be in the lib.rs under kmain(). That // was because we needed to poll for UART data. Now that we have interrupts, // here it goes! match c { 8 => { // This is a backspace, so we // essentially have to write a space and // backup again: print!("{} {}", 8 as char, 8 as char); }, 10 | 13 => { // Newline or carriage-return println!(); }, _ => { print!("{}", c as char); }, } } }, // Non-UART interrupts go here and do nothing. _ => { println!("Non-UART external interrupt: {}", interrupt); } } // We've claimed it, so now say that we've handled it. This resets the interrupt pending // and allows the UART to interrupt again. Otherwise, the UART will get "stuck". plic::complete(interrupt); } }, _ => { panic!("Unhandled async trap CPU#{} -> {}\n", hart, cause_num); } } } else { // Synchronous trap match cause_num { 2 => { // Illegal instruction panic!("Illegal instruction CPU#{} -> 0x{:08x}: 0x{:08x}\n", hart, epc, tval); }, 8 => { // Environment (system) call from User mode println!("E-call from User mode! CPU#{} -> 0x{:08x}", hart, epc); return_pc += 4; }, 9 => { // Environment (system) call from Supervisor mode println!("E-call from Supervisor mode! CPU#{} -> 0x{:08x}", hart, epc); return_pc += 4; }, 11 => { // Environment (system) call from Machine mode panic!("E-call from Machine mode! CPU#{} -> 0x{:08x}\n", hart, epc); }, // Page faults 12 => { // Instruction page fault println!("Instruction page fault CPU#{} -> 0x{:08x}: 0x{:08x}", hart, epc, tval); return_pc += 4; }, 13 => { // Load page fault println!("Load page fault CPU#{} -> 0x{:08x}: 0x{:08x}", hart, epc, tval); return_pc += 4; }, 15 => { // Store page fault println!("Store page fault CPU#{} -> 0x{:08x}: 0x{:08x}", hart, epc, tval); return_pc += 4; }, _ => { panic!("Unhandled sync trap CPU#{} -> {}\n", hart, cause_num); } } }; // Finally, return the updated program counter return_pc } ================================================ FILE: risc_v/chapters/ch5/src/uart.rs ================================================ // uart.rs // UART routines and driver use core::{convert::TryInto, fmt::{Error, Write}}; pub struct Uart { base_address: usize, } impl Write for Uart { fn write_str(&mut self, out: &str) -> Result<(), Error> { for c in out.bytes() { self.put(c); } Ok(()) } } impl Uart { pub fn new(base_address: usize) -> Self { Uart { base_address } } pub fn init(&mut self) { let ptr = self.base_address as *mut u8; unsafe { // First, set the word length, which // are bits 0 and 1 of the line control register (LCR) // which is at base_address + 3 // We can easily write the value 3 here or 0b11, but I'm // extending it so that it is clear we're setting two // individual fields // Word 0 Word 1 // ~~~~~~ ~~~~~~ let lcr: u8 = (1 << 0) | (1 << 1); ptr.add(3).write_volatile(lcr); // Now, enable the FIFO, which is bit index 0 of the // FIFO control register (FCR at offset 2). // Again, we can just write 1 here, but when we use left // shift, it's easier to see that we're trying to write // bit index #0. ptr.add(2).write_volatile(1 << 0); // Enable receiver buffer interrupts, which is at bit // index 0 of the interrupt enable register (IER at // offset 1). ptr.add(1).write_volatile(1 << 0); // If we cared about the divisor, the code below would // set the divisor from a global clock rate of 22.729 // MHz (22,729,000 cycles per second) to a signaling // rate of 2400 (BAUD). We usually have much faster // signalling rates nowadays, but this demonstrates what // the divisor actually does. The formula given in the // NS16500A specification for calculating the divisor // is: // divisor = ceil( (clock_hz) / (baud_sps x 16) ) // So, we substitute our values and get: // divisor = ceil( 22_729_000 / (2400 x 16) ) // divisor = ceil( 22_729_000 / 38_400 ) // divisor = ceil( 591.901 ) = 592 // The divisor register is two bytes (16 bits), so we // need to split the value 592 into two bytes. // Typically, we would calculate this based on measuring // the clock rate, but again, for our purposes [qemu], // this doesn't really do anything. let divisor: u16 = 592; let divisor_least: u8 = (divisor & 0xff).try_into().unwrap(); let divisor_most: u8 = (divisor >> 8).try_into().unwrap(); // Notice that the divisor register DLL (divisor latch // least) and DLM (divisor latch most) have the same // base address as the receiver/transmitter and the // interrupt enable register. To change what the base // address points to, we open the "divisor latch" by // writing 1 into the Divisor Latch Access Bit (DLAB), // which is bit index 7 of the Line Control Register // (LCR) which is at base_address + 3. ptr.add(3).write_volatile(lcr | 1 << 7); // Now, base addresses 0 and 1 point to DLL and DLM, // respectively. Put the lower 8 bits of the divisor // into DLL ptr.add(0).write_volatile(divisor_least); ptr.add(1).write_volatile(divisor_most); // Now that we've written the divisor, we never have to // touch this again. In hardware, this will divide the // global clock (22.729 MHz) into one suitable for 2,400 // signals per second. So, to once again get access to // the RBR/THR/IER registers, we need to close the DLAB // bit by clearing it to 0. ptr.add(3).write_volatile(lcr); } } pub fn put(&mut self, c: u8) { let ptr = self.base_address as *mut u8; unsafe { ptr.add(0).write_volatile(c); } } pub fn get(&mut self) -> Option { let ptr = self.base_address as *mut u8; unsafe { if ptr.add(5).read_volatile() & 1 == 0 { // The DR bit is 0, meaning no data None } else { // The DR bit is 1, meaning data! Some(ptr.add(0).read_volatile()) } } } } ================================================ FILE: risc_v/chapters/ch6/.cargo/config ================================================ [build] target = "riscv64gc-unknown-none-elf" [target.riscv64gc-unknown-none-elf] linker = "riscv64-unknown-linux-gnu-gcc" ================================================ FILE: risc_v/chapters/ch6/.gitignore ================================================ os.elf target/* Cargo.lock hdd.dsk ================================================ FILE: risc_v/chapters/ch6/Cargo.toml ================================================ [package] name = "sos" version = "0.1.0" authors = ["Stephen Marz "] edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [lib] crate-type = ["staticlib"] [dependencies] ================================================ FILE: risc_v/chapters/ch6/Makefile ================================================ ##### ## BUILD ##### CC=riscv64-unknown-linux-gnu-gcc CFLAGS=-Wall -Wextra -pedantic -Wextra -O0 -g CFLAGS+=-static -ffreestanding -nostdlib -fno-rtti -fno-exceptions CFLAGS+=-march=rv64gc -mabi=lp64 INCLUDES= LINKER_SCRIPT=-Tsrc/lds/virt.lds TYPE=debug RUST_TARGET=./target/riscv64gc-unknown-none-elf/$(TYPE) LIBS=-L$(RUST_TARGET) SOURCES_ASM=$(wildcard src/asm/*.S) LIB=-lsos -lgcc OUT=os.elf ##### ## QEMU ##### QEMU=qemu-system-riscv64 MACH=virt CPU=rv64 CPUS=4 MEM=128M DISK=hdd.dsk # DRIVE= -drive if=none,format=raw,file=$(DISK),id=foo -device virtio-blk-device,scsi=off,drive=foo DRIVE= all: cargo build $(CC) $(CFLAGS) $(LINKER_SCRIPT) $(INCLUDES) -o $(OUT) $(SOURCES_ASM) $(LIBS) $(LIB) run: all $(QEMU) -machine $(MACH) -cpu $(CPU) -smp $(CPUS) -m $(MEM) $(DRIVE) -nographic -serial mon:stdio -bios none -kernel $(OUT) .PHONY: clean clean: cargo clean rm -f $(OUT) ================================================ FILE: risc_v/chapters/ch6/make_hdd.sh ================================================ #!/bin/sh dd if=/dev/zero of=hdd.dsk bs=1M count=32 ================================================ FILE: risc_v/chapters/ch6/src/asm/boot.S ================================================ # boot.S # bootloader for SoS # Stephen Marz # 8 February 2019 # Disable generation of compressed instructions. .option norvc # Define a .text.init section. The .text.init is put at the # starting address so that the entry _start is put at the RISC-V # address 0x8000_0000. .section .text.init # Execution starts here. .global _start _start: # Disable linker instruction relaxation for the `la` instruction below. # This disallows the assembler from assuming that `gp` is already initialized. # This causes the value stored in `gp` to be calculated from `pc`. # The job of the global pointer is to give the linker the ability to address # memory relative to GP instead of as an absolute address. .option push .option norelax la gp, _global_pointer .option pop # SATP should be zero, but let's make sure. Each HART has its own # SATP register. csrw satp, zero # Any hardware threads (hart) that are not bootstrapping # need to wait for an IPI csrr t0, mhartid bnez t0, 3f # Set all bytes in the BSS section to zero. la a0, _bss_start la a1, _bss_end bgeu a0, a1, 2f 1: sd zero, (a0) addi a0, a0, 8 bltu a0, a1, 1b 2: # The stack grows from bottom to top, so we put the stack pointer # to the very end of the stack range. la sp, _stack_end # Setting `mstatus` register: # 0b01 << 11: Machine's previous protection mode is 2 (MPP=2). li t0, 0b11 << 11 csrw mstatus, t0 # Do not allow interrupts while running kinit csrw mie, zero # Machine's exception program counter (MEPC) is set to `kinit`. la t1, kinit csrw mepc, t1 # Set the return address to get us into supervisor mode la ra, 2f # We use mret here so that the mstatus register is properly updated. mret 2: # We set the return address (ra above) to this label. When kinit() is finished # in Rust, it will return here. # Setting `mstatus` (supervisor status) register: # 0b01 << 11 : Previous protection mode is 1 (MPP=01 [Supervisor]). # 1 << 7 : Previous machine interrupt-enable bit is 1 (MPIE=1 [Enabled]) # 1 << 5 : Previous interrupt-enable bit is 1 (SPIE=1 [Enabled]). # We set the "previous" bits because the mret will write the current bits # with the previous bits. li t0, (0b00 << 11) | (1 << 7) | (1 << 5) csrw mstatus, t0 # Machine's trap vector base address is set to `m_trap_vector`, for # "machine" trap vector. la t2, m_trap_vector csrw mtvec, t2 # Jump to first process. We put the MPP = 00 for user mode, so after # mret, we will jump to the first process' addresss in user mode. csrw mepc, a0 li t2, 0xaaa csrw mie, t2 la ra, 4f mret 3: # Parked harts go here. We need to set these # to only awaken if it receives a software interrupt, # which we're going to call the SIPI (Software Intra-Processor Interrupt). # We call the SIPI by writing the software interrupt into the Core Local Interruptor (CLINT) # Which is calculated by: base_address + hart * 4 # where base address is 0x0200_0000 (MMIO CLINT base address) # We only use additional harts to run user-space programs, although this may # change. # We divide up the stack so the harts aren't clobbering one another. la sp, _stack_end li t0, 0x10000 csrr a0, mhartid mul t0, t0, a0 sub sp, sp, t0 # The parked harts will be put into machine mode with interrupts enabled. li t0, 0b11 << 11 | (1 << 7) csrw mstatus, t0 # Allow for MSIP (Software interrupt). We will write the MSIP from hart #0 to # awaken these parked harts. li t3, (1 << 3) csrw mie, t3 # Machine's exception program counter (MEPC) is set to the Rust initialization # code and waiting loop. la t1, kinit_hart csrw mepc, t1 # Machine's trap vector base address is set to `m_trap_vector`, for # "machine" trap vector. The Rust initialization routines will give each # hart its own trap frame. We can use the same trap function and distinguish # between each hart by looking at the trap frame. la t2, m_trap_vector csrw mtvec, t2 # Whenever our hart is done initializing, we want it to return to the waiting # loop, which is just below mret. la ra, 4f # We use mret here so that the mstatus register is properly updated. mret 4: # wfi = wait for interrupt. This is a hint to the harts to shut everything needed # down. However, the RISC-V specification allows for wfi to do nothing. Anyway, # with QEMU, this will save some CPU! wfi j 4b ================================================ FILE: risc_v/chapters/ch6/src/asm/mem.S ================================================ // mem.S // Importation of linker symbols .section .rodata .global HEAP_START HEAP_START: .dword _heap_start .global HEAP_SIZE HEAP_SIZE: .dword _heap_size .global TEXT_START TEXT_START: .dword _text_start .global TEXT_END TEXT_END: .dword _text_end .global DATA_START DATA_START: .dword _data_start .global DATA_END DATA_END: .dword _data_end .global RODATA_START RODATA_START: .dword _rodata_start .global RODATA_END RODATA_END: .dword _rodata_end .global BSS_START BSS_START: .dword _bss_start .global BSS_END BSS_END: .dword _bss_end .global KERNEL_STACK_START KERNEL_STACK_START: .dword _stack_start .global KERNEL_STACK_END KERNEL_STACK_END: .dword _stack_end ================================================ FILE: risc_v/chapters/ch6/src/asm/trap.S ================================================ # trap.S # Trap handler and global context # Steve Operating System # Stephen Marz # 24 February 2019 .option norvc .altmacro .set NUM_GP_REGS, 32 # Number of registers per context .set NUM_FP_REGS, 32 .set REG_SIZE, 8 # Register size (in bytes) .set MAX_CPUS, 8 # Maximum number of CPUs # Use macros for saving and restoring multiple registers .macro save_gp i, basereg=t6 sd x\i, ((\i)*REG_SIZE)(\basereg) .endm .macro load_gp i, basereg=t6 ld x\i, ((\i)*REG_SIZE)(\basereg) .endm .macro save_fp i, basereg=t6 fsd f\i, ((NUM_GP_REGS+(\i))*REG_SIZE)(\basereg) .endm .macro load_fp i, basereg=t6 fld f\i, ((NUM_GP_REGS+(\i))*REG_SIZE)(\basereg) .endm .section .text .global m_trap_vector # This must be aligned by 4 since the last two bits # of the mtvec register do not contribute to the address # of this vector. .align 4 m_trap_vector: # All registers are volatile here, we need to save them # before we do anything. csrrw t6, mscratch, t6 # csrrw will atomically swap t6 into mscratch and the old # value of mscratch into t6. This is nice because we just # switched values and didn't destroy anything -- all atomically! # in cpu.rs we have a structure of: # 32 gp regs 0 # 32 fp regs 256 # SATP register 512 # Trap stack 520 # CPU HARTID 528 # We use t6 as the temporary register because it is the very # bottom register (x31) .set i, 1 .rept 30 save_gp %i .set i, i+1 .endr # Save the actual t6 register, which we swapped into # mscratch mv t5, t6 csrr t6, mscratch save_gp 31, t5 # Restore the kernel trap frame into mscratch csrw mscratch, t5 # Get ready to go into Rust (trap.rs) # We don't want to write into the user's stack or whomever # messed with us here. csrr a0, mepc csrr a1, mtval csrr a2, mcause csrr a3, mhartid csrr a4, mstatus mv a5, t5 ld sp, 520(a5) call m_trap # When we get here, we've returned from m_trap, restore registers # and return. # m_trap will return the return address via a0. csrw mepc, a0 # Now load the trap frame back into t6 csrr t6, mscratch # Restore all GP registers .set i, 1 .rept 31 load_gp %i .set i, i+1 .endr # Since we ran this loop 31 times starting with i = 1, # the last one loaded t6 back to its original value. mret .global make_syscall make_syscall: ecall ret ================================================ FILE: risc_v/chapters/ch6/src/cpu.rs ================================================ // cpu.rs // CPU and CPU-related routines // Also contains the kernel's trap frame // Stephen Marz // 14 October 2019 use core::ptr::null_mut; /// In 64-bit mode, we're given three different modes for the MMU: /// 0 - The MMU is off -- no protection and no translation PA = VA /// 8 - This is Sv39 mode -- 39-bit virtual addresses /// 9 - This is Sv48 mode -- 48-bit virtual addresses #[repr(usize)] pub enum SatpMode { Off = 0, Sv39 = 8, Sv48 = 9, } /// The trap frame is set into a structure /// and packed into each hart's mscratch register. /// This allows for quick reference and full /// context switch handling. #[repr(C)] #[derive(Clone, Copy)] pub struct TrapFrame { pub regs: [usize; 32], // 0 - 255 pub fregs: [usize; 32], // 256 - 511 pub satp: usize, // 512 - 519 pub trap_stack: *mut u8, // 520 pub hartid: usize, // 528 } /// Rust requires that we initialize our structures /// because of the move semantics. What'll happen below /// is Rust will construct a new TrapFrame and move it /// out of the zero() function below. Rust contains two /// different "selfs" where self can refer to the object /// in memory or Self (capital S) which refers to the /// data type of the structure. In the case below, this /// is TrapFrame. impl TrapFrame { pub const fn zero() -> Self { TrapFrame { regs: [0; 32], fregs: [0; 32], satp: 0, trap_stack: null_mut(), hartid: 0, } } } /// The global kernel trap frame stores 8 separate /// frames -- one per CPU hart. We will switch these /// in and out and store a dormant trap frame with /// the process itself. pub static mut KERNEL_TRAP_FRAME: [TrapFrame; 8] = [TrapFrame::zero(); 8]; /// The SATP register contains three fields: mode, address space id, and /// the first level table address (level 2 for Sv39). This function /// helps make the 64-bit register contents based on those three /// fields. pub const fn build_satp(mode: SatpMode, asid: usize, addr: usize) -> usize { (mode as usize) << 60 | (asid & 0xffff) << 44 | (addr >> 12) & 0xff_ffff_ffff } pub fn mhartid_read() -> usize { unsafe { let rval; asm!("csrr $0, mhartid" :"=r"(rval)); rval } } pub fn mie_read() -> usize { unsafe { let rval; asm!("csrr $0, mie" :"=r"(rval)); rval } } pub fn mie_write(val: usize) { unsafe { asm!("csrw mie, $0" :: "r"(val)); } } pub fn mstatus_write(val: usize) { unsafe { asm!("csrw mstatus, $0" ::"r"(val)); } } pub fn mstatus_read() -> usize { unsafe { let rval; asm!("csrr $0, mstatus":"=r"(rval)); rval } } pub fn stvec_write(val: usize) { unsafe { asm!("csrw stvec, $0" ::"r"(val)); } } pub fn stvec_read() -> usize { unsafe { let rval; asm!("csrr $0, stvec" :"=r"(rval)); rval } } pub fn mscratch_write(val: usize) { unsafe { asm!("csrw mscratch, $0" ::"r"(val)); } } pub fn mscratch_read() -> usize { unsafe { let rval; asm!("csrr $0, mscratch" : "=r"(rval)); rval } } pub fn mscratch_swap(to: usize) -> usize { unsafe { let from; asm!("csrrw $0, mscratch, $1" : "=r"(from) : "r"(to)); from } } pub fn sscratch_write(val: usize) { unsafe { asm!("csrw sscratch, $0" ::"r"(val)); } } pub fn sscratch_read() -> usize { unsafe { let rval; asm!("csrr $0, sscratch" : "=r"(rval)); rval } } pub fn sscratch_swap(to: usize) -> usize { unsafe { let from; asm!("csrrw $0, sscratch, $1" : "=r"(from) : "r"(to)); from } } pub fn sepc_write(val: usize) { unsafe { asm!("csrw sepc, $0" :: "r"(val)); } } pub fn sepc_read() -> usize { unsafe { let rval; asm!("csrr $0, sepc" :"=r"(rval)); rval } } pub fn satp_write(val: usize) { unsafe { asm!("csrw satp, $0" :: "r"(val)); } } pub fn satp_read() -> usize { unsafe { let rval; asm!("csrr $0, satp" :"=r"(rval)); rval } } /// Take a hammer to the page tables and synchronize /// all of them. This essentially flushes the entire /// TLB. pub fn satp_fence(vaddr: usize, asid: usize) { unsafe { asm!("sfence.vma $0, $1" :: "r"(vaddr), "r"(asid)); } } /// Synchronize based on the address space identifier /// This allows us to fence a particular process rather /// than the entire TLB. /// The RISC-V documentation calls this a TLB flush +. /// Since there are other memory routines involved, they /// didn't call it a TLB flush, but it is much like /// Intel/AMD's invtlb [] instruction. pub fn satp_fence_asid(asid: usize) { unsafe { asm!("sfence.vma zero, $0" :: "r"(asid)); } } ================================================ FILE: risc_v/chapters/ch6/src/kmem.rs ================================================ // kmem.rs // Sub-page level: malloc-like allocation system // Stephen Marz // 7 October 2019 use crate::page::{align_val, zalloc, Table, PAGE_SIZE}; use core::{mem::size_of, ptr::null_mut}; #[repr(usize)] enum AllocListFlags { Taken = 1 << 63, } impl AllocListFlags { pub fn val(self) -> usize { self as usize } } struct AllocList { pub flags_size: usize, } impl AllocList { pub fn is_taken(&self) -> bool { self.flags_size & AllocListFlags::Taken.val() != 0 } pub fn is_free(&self) -> bool { !self.is_taken() } pub fn set_taken(&mut self) { self.flags_size |= AllocListFlags::Taken.val(); } pub fn set_free(&mut self) { self.flags_size &= !AllocListFlags::Taken.val(); } pub fn set_size(&mut self, sz: usize) { let k = self.is_taken(); self.flags_size = sz & !AllocListFlags::Taken.val(); if k { self.flags_size |= AllocListFlags::Taken.val(); } } pub fn get_size(&self) -> usize { self.flags_size & !AllocListFlags::Taken.val() } } // This is the head of the allocation. We start here when // we search for a free memory location. static mut KMEM_HEAD: *mut AllocList = null_mut(); // In the future, we will have on-demand pages // so, we need to keep track of our memory footprint to // see if we actually need to allocate more. static mut KMEM_ALLOC: usize = 0; static mut KMEM_PAGE_TABLE: *mut Table = null_mut(); // These functions are safe helpers around an unsafe // operation. pub fn get_head() -> *mut u8 { unsafe { KMEM_HEAD as *mut u8 } } pub fn get_page_table() -> *mut Table { unsafe { KMEM_PAGE_TABLE as *mut Table } } pub fn get_num_allocations() -> usize { unsafe { KMEM_ALLOC } } /// Initialize kernel's memory /// This is not to be used to allocate memory /// for user processes. If that's the case, use /// alloc/dealloc from the page crate. pub fn init() { unsafe { // Allocate kernel pages (KMEM_ALLOC) KMEM_ALLOC = 512; let k_alloc = zalloc(KMEM_ALLOC); assert!(!k_alloc.is_null()); KMEM_HEAD = k_alloc as *mut AllocList; (*KMEM_HEAD).set_free(); (*KMEM_HEAD).set_size(KMEM_ALLOC * PAGE_SIZE); KMEM_PAGE_TABLE = zalloc(1) as *mut Table; } } /// Allocate sub-page level allocation based on bytes and zero the memory pub fn kzmalloc(sz: usize) -> *mut u8 { let size = align_val(sz, 3); let ret = kmalloc(size); if !ret.is_null() { for i in 0..size { unsafe { (*ret.add(i)) = 0; } } } ret } /// Allocate sub-page level allocation based on bytes pub fn kmalloc(sz: usize) -> *mut u8 { unsafe { let size = align_val(sz, 3) + size_of::(); let mut head = KMEM_HEAD; // .add() uses pointer arithmetic, so we type-cast into a u8 // so that we multiply by an absolute size (KMEM_ALLOC * // PAGE_SIZE). let tail = (KMEM_HEAD as *mut u8).add(KMEM_ALLOC * PAGE_SIZE) as *mut AllocList; while head < tail { if (*head).is_free() && size <= (*head).get_size() { let chunk_size = (*head).get_size(); let rem = chunk_size - size; (*head).set_taken(); if rem > size_of::() { let next = (head as *mut u8).add(size) as *mut AllocList; // There is space remaining here. (*next).set_free(); (*next).set_size(rem); (*head).set_size(size); } else { // If we get here, take the entire chunk (*head).set_size(chunk_size); } return head.add(1) as *mut u8; } else { // If we get here, what we saw wasn't a free // chunk, move on to the next. head = (head as *mut u8).add((*head).get_size()) as *mut AllocList; } } } // If we get here, we didn't find any free chunks--i.e. there isn't // enough memory for this. TODO: Add on-demand page allocation. null_mut() } /// Free a sub-page level allocation pub fn kfree(ptr: *mut u8) { unsafe { if !ptr.is_null() { let p = (ptr as *mut AllocList).offset(-1); if (*p).is_taken() { (*p).set_free(); } // After we free, see if we can combine adjacent free // spots to see if we can reduce fragmentation. coalesce(); } } } /// Merge smaller chunks into a bigger chunk pub fn coalesce() { unsafe { let mut head = KMEM_HEAD; let tail = (KMEM_HEAD as *mut u8).add(KMEM_ALLOC * PAGE_SIZE) as *mut AllocList; while head < tail { let next = (head as *mut u8).add((*head).get_size()) as *mut AllocList; if (*head).get_size() == 0 { // If this happens, then we have a bad heap // (double free or something). However, that // will cause an infinite loop since the next // pointer will never move beyond the current // location. break; } else if next >= tail { // We calculated the next by using the size // given as get_size(), however this could push // us past the tail. In that case, the size is // wrong, hence we break and stop doing what we // need to do. break; } else if (*head).is_free() && (*next).is_free() { // This means we have adjacent blocks needing to // be freed. So, we combine them into one // allocation. (*head).set_size( (*head).get_size() + (*next).get_size(), ); } // If we get here, we might've moved. Recalculate new // head. head = (head as *mut u8).add((*head).get_size()) as *mut AllocList; } } } /// For debugging purposes, print the kmem table pub fn print_table() { unsafe { let mut head = KMEM_HEAD; let tail = (KMEM_HEAD as *mut u8).add(KMEM_ALLOC * PAGE_SIZE) as *mut AllocList; while head < tail { println!( "{:p}: Length = {:<10} Taken = {}", head, (*head).get_size(), (*head).is_taken() ); head = (head as *mut u8).add((*head).get_size()) as *mut AllocList; } } } // /////////////////////////////////// // / GLOBAL ALLOCATOR // /////////////////////////////////// // The global allocator allows us to use the data structures // in the core library, such as a linked list or B-tree. // We want to use these sparingly since we have a coarse-grained // allocator. use core::alloc::{GlobalAlloc, Layout}; // The global allocator is a static constant to a global allocator // structure. We don't need any members because we're using this // structure just to implement alloc and dealloc. struct OsGlobalAlloc; unsafe impl GlobalAlloc for OsGlobalAlloc { unsafe fn alloc(&self, layout: Layout) -> *mut u8 { // We align to the next page size so that when // we divide by PAGE_SIZE, we get exactly the number // of pages necessary. kzmalloc(layout.size()) } unsafe fn dealloc(&self, ptr: *mut u8, _layout: Layout) { // We ignore layout since our allocator uses ptr_start -> last // to determine the span of an allocation. kfree(ptr); } } #[global_allocator] /// Technically, we don't need the {} at the end, but it /// reveals that we're creating a new structure and not just /// copying a value. static GA: OsGlobalAlloc = OsGlobalAlloc {}; #[alloc_error_handler] /// If for some reason alloc() in the global allocator gets null_mut(), /// then we come here. This is a divergent function, so we call panic to /// let the tester know what's going on. pub fn alloc_error(l: Layout) -> ! { panic!( "Allocator failed to allocate {} bytes with {}-byte alignment.", l.size(), l.align() ); } ================================================ FILE: risc_v/chapters/ch6/src/lds/virt.lds ================================================ /* virt.lds Linker script for outputting to RISC-V QEMU "virt" machine. Stephen Marz 6 October 2019 */ /* riscv is the name of the architecture that the linker understands for any RISC-V target (64-bit or 32-bit). We will further refine this by using -mabi=lp64 and -march=rv64gc */ OUTPUT_ARCH( "riscv" ) /* We're setting our entry point to a symbol called _start which is inside of boot.S. This essentially stores the address of _start as the "entry point", or where CPU instructions should start executing. In the rest of this script, we are going to place _start right at the beginning of 0x8000_0000 because this is where the virtual machine and many RISC-V boards will start executing. */ ENTRY( _start ) /* The MEMORY section will explain that we have "ram" that contains a section that is 'w' (writeable), 'x' (executable), and 'a' (allocatable). We use '!' to invert 'r' (read-only) and 'i' (initialized). We don't want our memory to be read-only, and we're stating that it is NOT initialized at the beginning. The ORIGIN is the memory address 0x8000_0000. If we look at the virt spec or the specification for the RISC-V HiFive Unleashed, this is the starting memory address for our code. Side note: There might be other boot ROMs at different addresses, but their job is to get to this point. Finally LENGTH = 128M tells the linker that we have 128 megabyte of RAM. The linker will double check this to make sure everything can fit. The HiFive Unleashed has a lot more RAM than this, but for the virtual machine, I went with 128M since I think that's enough RAM for now. We can provide other pieces of memory, such as QSPI, or ROM, but we're telling the linker script here that we have one pool of RAM. */ MEMORY { ram (wxa!ri) : ORIGIN = 0x80000000, LENGTH = 128M } /* PHDRS is short for "program headers", which we specify three here: text - CPU instructions (executable sections) data - Global, initialized variables bss - Global, uninitialized variables (all will be set to 0 by boot.S) The command PT_LOAD tells the linker that these sections will be loaded from the file into memory. We can actually stuff all of these into a single program header, but by splitting it up into three, we can actually use the other PT_* commands such as PT_DYNAMIC, PT_INTERP, PT_NULL to tell the linker where to find additional information. However, for our purposes, every section will be loaded from the program headers. */ PHDRS { text PT_LOAD; data PT_LOAD; bss PT_LOAD; } /* We are now going to organize the memory based on which section it is in. In assembly, we can change the section with the ".section" directive. However, in C++ and Rust, CPU instructions go into text, global constants go into rodata, global initialized variables go into data, and global uninitialized variables go into bss. */ SECTIONS { /* The first part of our RAM layout will be the text section. Since our CPU instructions are here, and our memory starts at 0x8000_0000, we need our entry point to line up here. */ .text : { /* PROVIDE allows me to access a symbol called _text_start so I know where the text section starts in the operating system. This should not move, but it is here for convenience. The period '.' tells the linker to set _text_start to the CURRENT location ('.' = current memory location). This current memory location moves as we add things. */ PROVIDE(_text_start = .); /* We are going to layout all text sections here, starting with .text.init. The asterisk in front of the parentheses means to match the .text.init section of ANY object file. Otherwise, we can specify which object file should contain the .text.init section, for example, boot.o(.text.init) would specifically put the .text.init section of our bootloader here. Because we might want to change the name of our files, we'll leave it with a *. Inside the parentheses is the name of the section. I created my own called .text.init to make 100% sure that the _start is put right at the beginning. The linker will lay this out in the order it receives it: .text.init first all .text sections next any .text.* sections last .text.* means to match anything after .text. If we didn't already specify .text.init, this would've matched here. The assembler and linker can place things in "special" text sections, so we match any we might come across here. */ *(.text.init) *(.text .text.*) /* Again, with PROVIDE, we're providing a readable symbol called _text_end, which is set to the memory address AFTER .text.init, .text, and .text.*'s have been added. */ PROVIDE(_text_end = .); /* The portion after the right brace is in an odd format. However, this is telling the linker what memory portion to put it in. We labeled our RAM, ram, with the constraints that it is writeable, allocatable, and executable. The linker will make sure with this that we can do all of those things. >ram - This just tells the linker script to put this entire section (.text) into the ram region of memory. To my knowledge, the '>' does not mean "greater than". Instead, it is a symbol to let the linker know we want to put this in ram. AT>ram - This sets the LMA (load memory address) region to the same thing. LMA is the final translation of a VMA (virtual memory address). With this linker script, we're loading everything into its physical location. We'll let the kernel copy and sort out the virtual memory. That's why >ram and AT>ram are continually the same thing. :text - This tells the linker script to put this into the :text program header. We've only defined three: text, data, and bss. In this case, we're telling the linker script to go into the text section. */ } >ram AT>ram :text /* The global pointer allows the linker to position global variables and constants into independent positions relative to the gp (global pointer) register. The globals start after the text sections and are only relevant to the rodata, data, and bss sections. */ PROVIDE(_global_pointer = .); /* Most compilers create a rodata (read only data) section for global constants. However, we're going to place ours in the text section. We can actually put this in :data, but since the .text section is read-only, we can place it there. NOTE: This doesn't actually do anything, yet. The actual "protection" cannot be done at link time. Instead, when we program the memory management unit (MMU), we will be able to choose which bits (R=read, W=write, X=execute) we want each memory segment to be able to do. */ .rodata : { PROVIDE(_rodata_start = .); *(.rodata .rodata.*) PROVIDE(_rodata_end = .); /* Again, we're placing the rodata section in the memory segment "ram" and we're putting it in the :text program header. We don't have one for rodata anyway. */ } >ram AT>ram :text .data : { /* . = ALIGN(4096) tells the linker to align the current memory location (which is 0x8000_0000 + text section + rodata section) to 4096 bytes. This is because our paging system's resolution is 4,096 bytes or 4 KiB. */ . = ALIGN(4096); PROVIDE(_data_start = .); /* sdata and data are essentially the same thing. However, compilers usually use the sdata sections for shorter, quicker loading sections. So, usually critical data is loaded there. However, we're loading all of this in one fell swoop. So, we're looking to put all of the following sections under the umbrella .data: .sdata .sdata.[anything] .data .data.[anything] ...in that order. */ *(.sdata .sdata.*) *(.data .data.*) PROVIDE(_data_end = .); } >ram AT>ram :data .bss : { PROVIDE(_bss_start = .); *(.sbss .sbss.*) *(.bss .bss.*) PROVIDE(_bss_end = .); } >ram AT>ram :bss /* The following will be helpful when we allocate the kernel stack (_stack) and determine where the heap begnis and ends (_heap_start and _heap_start + _heap_size)/ When we do memory allocation, we can use these symbols. We use the symbols instead of hard-coding an address because this is a floating target. As we add code, the heap moves farther down the memory and gets shorter. _memory_start will be set to 0x8000_0000 here. We use ORIGIN(ram) so that it will take whatever we set the origin of ram to. Otherwise, we'd have to change it more than once if we ever stray away from 0x8000_0000 as our entry point. */ PROVIDE(_memory_start = ORIGIN(ram)); /* Our kernel stack starts at the end of the bss segment (_bss_end). However, we're allocating 0x80000 bytes (524 KiB) to our kernel stack. This should be PLENTY of space. The reason we add the memory is because the stack grows from higher memory to lower memory (bottom to top). Therefore we set the stack at the very bottom of its allocated slot. When we go to allocate from the stack, we'll subtract the number of bytes we need. */ PROVIDE(_stack_start = _bss_end); PROVIDE(_stack_end = _stack_start + 0x8000); PROVIDE(_memory_end = ORIGIN(ram) + LENGTH(ram)); /* Finally, our heap starts right after the kernel stack. This heap will be used mainly to dole out memory for user-space applications. However, in some circumstances, it will be used for kernel memory as well. We don't align here because we let the kernel determine how it wants to do this. */ PROVIDE(_heap_start = _stack_end); PROVIDE(_heap_size = _memory_end - _heap_start); } ================================================ FILE: risc_v/chapters/ch6/src/lib.rs ================================================ // Steve Operating System // Stephen Marz // 21 Sep 2019 #![no_std] #![feature(panic_info_message, asm, allocator_api, alloc_error_handler, alloc_prelude, const_raw_ptr_to_usize_cast)] // #[macro_use] extern crate alloc; // This is experimental and requires alloc_prelude as a feature // use alloc::prelude::v1::*; // /////////////////////////////////// // / RUST MACROS // /////////////////////////////////// #[macro_export] macro_rules! print { ($($args:tt)+) => ({ use core::fmt::Write; let _ = write!(crate::uart::Uart::new(0x1000_0000), $($args)+); }); } #[macro_export] macro_rules! println { () => ({ print!("\r\n") }); ($fmt:expr) => ({ print!(concat!($fmt, "\r\n")) }); ($fmt:expr, $($args:tt)+) => ({ print!(concat!($fmt, "\r\n"), $($args)+) }); } // /////////////////////////////////// // / LANGUAGE STRUCTURES / FUNCTIONS // /////////////////////////////////// #[no_mangle] extern "C" fn eh_personality() {} #[panic_handler] fn panic(info: &core::panic::PanicInfo) -> ! { print!("Aborting: "); if let Some(p) = info.location() { println!( "line {}, file {}: {}", p.line(), p.file(), info.message().unwrap() ); } else { println!("no information available."); } abort(); } #[no_mangle] extern "C" fn abort() -> ! { loop { unsafe { asm!("wfi"::::"volatile"); } } } // /////////////////////////////////// // / CONSTANTS // /////////////////////////////////// // const STR_Y: &str = "\x1b[38;2;79;221;13m✓\x1b[m"; // const STR_N: &str = "\x1b[38;2;221;41;13m✘\x1b[m"; // The following symbols come from asm/mem.S. We can use // the symbols directly, but the address of the symbols // themselves are their values, which can cause issues. // Instead, I created doubleword values in mem.S in the .rodata and .data // sections. /* extern "C" { static TEXT_START: usize; static TEXT_END: usize; static DATA_START: usize; static DATA_END: usize; static RODATA_START: usize; static RODATA_END: usize; static BSS_START: usize; static BSS_END: usize; static KERNEL_STACK_START: usize; static KERNEL_STACK_END: usize; static HEAP_START: usize; static HEAP_SIZE: usize; } */ /// Identity map range /// Takes a contiguous allocation of memory and maps it using PAGE_SIZE /// This assumes that start <= end pub fn id_map_range(root: &mut page::Table, start: usize, end: usize, bits: i64) { let mut memaddr = start & !(page::PAGE_SIZE - 1); let num_kb_pages = (page::align_val(end, 12) - memaddr) / page::PAGE_SIZE; // I named this num_kb_pages for future expansion when // I decide to allow for GiB (2^30) and 2MiB (2^21) page // sizes. However, the overlapping memory regions are causing // nightmares. for _ in 0..num_kb_pages { page::map(root, memaddr, memaddr, bits, 0); memaddr += 1 << 12; } } // /////////////////////////////////// // / ENTRY POINT // /////////////////////////////////// #[no_mangle] extern "C" fn kinit() -> usize { uart::Uart::new(0x1000_0000).init(); page::init(); kmem::init(); let ret = process::init(); println!("Init process created at address 0x{:08x}", ret); // We lower the threshold wall so our interrupts can jump over it. plic::set_threshold(0); // VIRTIO = [1..8] // UART0 = 10 // PCIE = [32..35] // Enable the UART interrupt. plic::enable(10); plic::set_priority(10, 1); println!("UART interrupts have been enabled and are awaiting your command."); println!("Getting ready for first process."); println!("Issuing the first context-switch timer."); unsafe { let mtimecmp = 0x0200_4000 as *mut u64; let mtime = 0x0200_bff8 as *const u64; // The frequency given by QEMU is 10_000_000 Hz, so this sets // the next interrupt to fire one second from now. mtimecmp.write_volatile(mtime.read_volatile() + 10_000_000); } // When we return, we put the return value into mepc and start there. This // should be init's starting point. ret } #[no_mangle] extern "C" fn kinit_hart(hartid: usize) { // All non-0 harts initialize here. unsafe { // We have to store the kernel's table. The tables will be moved // back and forth between the kernel's table and user // applicatons' tables. cpu::mscratch_write( (&mut cpu::KERNEL_TRAP_FRAME[hartid] as *mut cpu::TrapFrame) as usize, ); // Copy the same mscratch over to the supervisor version of the // same register. cpu::sscratch_write(cpu::mscratch_read()); cpu::KERNEL_TRAP_FRAME[hartid].hartid = hartid; // We can't do the following until zalloc() is locked, but we // don't have locks, yet :( cpu::KERNEL_TRAP_FRAME[hartid].satp // = cpu::KERNEL_TRAP_FRAME[0].satp; // cpu::KERNEL_TRAP_FRAME[hartid].trap_stack = page::zalloc(1); } } // /////////////////////////////////// // / RUST MODULES // /////////////////////////////////// pub mod cpu; pub mod kmem; pub mod page; pub mod plic; pub mod process; pub mod trap; pub mod uart; ================================================ FILE: risc_v/chapters/ch6/src/page.rs ================================================ // page.rs // Memory routines // Stephen Marz // 6 October 2019 use core::{mem::size_of, ptr::null_mut}; // //////////////////////////////// // // Allocation routines // //////////////////////////////// extern "C" { static HEAP_START: usize; static HEAP_SIZE: usize; } // We will use ALLOC_START to mark the start of the actual // memory we can dish out. static mut ALLOC_START: usize = 0; const PAGE_ORDER: usize = 12; pub const PAGE_SIZE: usize = 1 << 12; /// Align (set to a multiple of some power of two) /// This takes an order which is the exponent to 2^order /// Therefore, all alignments must be made as a power of two. /// This function always rounds up. pub const fn align_val(val: usize, order: usize) -> usize { let o = (1usize << order) - 1; (val + o) & !o } #[repr(u8)] pub enum PageBits { Empty = 0, Taken = 1 << 0, Last = 1 << 1, } impl PageBits { // We convert PageBits to a u8 a lot, so this is // for convenience. pub fn val(self) -> u8 { self as u8 } } // Each page is described by the Page structure. Linux does this // as well, where each 4096-byte chunk of memory has a structure // associated with it. However, there structure is much larger. pub struct Page { flags: u8, } impl Page { // If this page has been marked as the final allocation, // this function returns true. Otherwise, it returns false. pub fn is_last(&self) -> bool { if self.flags & PageBits::Last.val() != 0 { true } else { false } } // If the page is marked as being taken (allocated), then // this function returns true. Otherwise, it returns false. pub fn is_taken(&self) -> bool { if self.flags & PageBits::Taken.val() != 0 { true } else { false } } // This is the opposite of is_taken(). pub fn is_free(&self) -> bool { !self.is_taken() } // Clear the Page structure and all associated allocations. pub fn clear(&mut self) { self.flags = PageBits::Empty.val(); } // Set a certain flag. We ran into trouble here since PageBits // is an enumeration and we haven't implemented the BitOr Trait // on it. pub fn set_flag(&mut self, flag: PageBits) { self.flags |= flag.val(); } pub fn clear_flag(&mut self, flag: PageBits) { self.flags &= !(flag.val()); } } /// Initialize the allocation system. There are several ways that we can /// implement the page allocator: /// 1. Free list (singly linked list where it starts at the first free /// allocation) 2. Bookkeeping list (structure contains a taken and length) /// 3. Allocate one Page structure per 4096 bytes (this is what I chose) /// 4. Others pub fn init() { unsafe { // let desc_per_page = PAGE_SIZE / size_of::(); let num_pages = HEAP_SIZE / PAGE_SIZE; // let num_desc_pages = num_pages / desc_per_page; let ptr = HEAP_START as *mut Page; // Clear all pages to make sure that they aren't accidentally // taken for i in 0..num_pages { (*ptr.add(i)).clear(); } // Determine where the actual useful memory starts. This will be // after all Page structures. We also must align the ALLOC_START // to a page-boundary (PAGE_SIZE = 4096). ALLOC_START = // (HEAP_START + num_pages * size_of::() + PAGE_SIZE - 1) // & !(PAGE_SIZE - 1); ALLOC_START = align_val( HEAP_START + num_pages * size_of::(), PAGE_ORDER, ); } } /// Allocate a page or multiple pages /// pages: the number of PAGE_SIZE pages to allocate pub fn alloc(pages: usize) -> *mut u8 { // We have to find a contiguous allocation of pages assert!(pages > 0); unsafe { // We create a Page structure for each page on the heap. We // actually might have more since HEAP_SIZE moves and so does // the size of our structure, but we'll only waste a few bytes. let num_pages = HEAP_SIZE / PAGE_SIZE; let ptr = HEAP_START as *mut Page; for i in 0..num_pages - pages { let mut found = false; // Check to see if this Page is free. If so, we have our // first candidate memory address. if (*ptr.add(i)).is_free() { // It was FREE! Yay! found = true; for j in i..i + pages { // Now check to see if we have a // contiguous allocation for all of the // request pages. If not, we should // check somewhere else. if (*ptr.add(j)).is_taken() { found = false; break; } } } // We've checked to see if there are enough contiguous // pages to form what we need. If we couldn't, found // will be false, otherwise it will be true, which means // we've found valid memory we can allocate. if found { for k in i..i + pages - 1 { (*ptr.add(k)).set_flag(PageBits::Taken); } // The marker for the last page is // PageBits::Last This lets us know when we've // hit the end of this particular allocation. (*ptr.add(i+pages-1)).set_flag(PageBits::Taken); (*ptr.add(i+pages-1)).set_flag(PageBits::Last); // The Page structures themselves aren't the // useful memory. Instead, there is 1 Page // structure per 4096 bytes starting at // ALLOC_START. return (ALLOC_START + PAGE_SIZE * i) as *mut u8; } } } // If we get here, that means that no contiguous allocation was // found. null_mut() } /// Allocate and zero a page or multiple pages /// pages: the number of pages to allocate /// Each page is PAGE_SIZE which is calculated as 1 << PAGE_ORDER /// On RISC-V, this typically will be 4,096 bytes. pub fn zalloc(pages: usize) -> *mut u8 { // Allocate and zero a page. // First, let's get the allocation let ret = alloc(pages); if !ret.is_null() { let size = (PAGE_SIZE * pages) / 8; let big_ptr = ret as *mut u64; for i in 0..size { // We use big_ptr so that we can force an // sd (store doubleword) instruction rather than // the sb. This means 8x fewer stores than before. // Typically we have to be concerned about remaining // bytes, but fortunately 4096 % 8 = 0, so we // won't have any remaining bytes. unsafe { (*big_ptr.add(i)) = 0; } } } ret } /// Deallocate a page by its pointer /// The way we've structured this, it will automatically coalesce /// contiguous pages. pub fn dealloc(ptr: *mut u8) { // Make sure we don't try to free a null pointer. assert!(!ptr.is_null()); unsafe { let addr = HEAP_START + (ptr as usize - ALLOC_START) / PAGE_SIZE; // Make sure that the address makes sense. The address we // calculate here is the page structure, not the HEAP address! assert!(addr >= HEAP_START && addr < HEAP_START + HEAP_SIZE); let mut p = addr as *mut Page; // Keep clearing pages until we hit the last page. while (*p).is_taken() && !(*p).is_last() { (*p).clear(); p = p.add(1); } // If the following assertion fails, it is most likely // caused by a double-free. assert!( (*p).is_last() == true, "Possible double-free detected! (Not taken found \ before last)" ); // If we get here, we've taken care of all previous pages and // we are on the last page. (*p).clear(); } } /// Print all page allocations /// This is mainly used for debugging. pub fn print_page_allocations() { unsafe { let num_pages = (HEAP_SIZE - (ALLOC_START - HEAP_START)) / PAGE_SIZE; let mut beg = HEAP_START as *const Page; let end = beg.add(num_pages); let alloc_beg = ALLOC_START; let alloc_end = ALLOC_START + num_pages * PAGE_SIZE; println!(); println!( "PAGE ALLOCATION TABLE\nMETA: {:p} -> {:p}\nPHYS: \ 0x{:x} -> 0x{:x}", beg, end, alloc_beg, alloc_end ); println!("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"); let mut num = 0; while beg < end { if (*beg).is_taken() { let start = beg as usize; let memaddr = ALLOC_START + (start - HEAP_START) * PAGE_SIZE; print!("0x{:x} => ", memaddr); loop { num += 1; if (*beg).is_last() { let end = beg as usize; let memaddr = ALLOC_START + (end - HEAP_START) * PAGE_SIZE + PAGE_SIZE - 1; print!( "0x{:x}: {:>3} page(s)", memaddr, (end - start + 1) ); println!("."); break; } beg = beg.add(1); } } beg = beg.add(1); } println!("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"); println!( "Allocated: {:>6} pages ({:>10} bytes).", num, num * PAGE_SIZE ); println!( "Free : {:>6} pages ({:>10} bytes).", num_pages - num, (num_pages - num) * PAGE_SIZE ); println!(); } } // //////////////////////////////// // // MMU Routines // //////////////////////////////// // Represent (repr) our entry bits as // unsigned 64-bit integers. #[repr(i64)] #[derive(Copy, Clone)] pub enum EntryBits { None = 0, Valid = 1 << 0, Read = 1 << 1, Write = 1 << 2, Execute = 1 << 3, User = 1 << 4, Global = 1 << 5, Access = 1 << 6, Dirty = 1 << 7, // Convenience combinations ReadWrite = 1 << 1 | 1 << 2, ReadExecute = 1 << 1 | 1 << 3, ReadWriteExecute = 1 << 1 | 1 << 2 | 1 << 3, // User Convenience Combinations UserReadWrite = 1 << 1 | 1 << 2 | 1 << 4, UserReadExecute = 1 << 1 | 1 << 3 | 1 << 4, UserReadWriteExecute = 1 << 1 | 1 << 2 | 1 << 3 | 1 << 4, } // Helper functions to convert the enumeration // into an i64, which is what our page table // entries will be. impl EntryBits { pub fn val(self) -> i64 { self as i64 } } // A single entry. We're using an i64 so that // this will sign-extend rather than zero-extend // since RISC-V requires that the reserved sections // take on the most significant bit. pub struct Entry { pub entry: i64, } // The Entry structure describes one of the 512 entries per table, which is // described in the RISC-V privileged spec Figure 4.18. impl Entry { pub fn is_valid(&self) -> bool { self.get_entry() & EntryBits::Valid.val() != 0 } // The first bit (bit index #0) is the V bit for // valid. pub fn is_invalid(&self) -> bool { !self.is_valid() } // A leaf has one or more RWX bits set pub fn is_leaf(&self) -> bool { self.get_entry() & 0xe != 0 } pub fn is_branch(&self) -> bool { !self.is_leaf() } pub fn set_entry(&mut self, entry: i64) { self.entry = entry; } pub fn get_entry(&self) -> i64 { self.entry } } // Table represents a single table, which contains 512 (2^9), 64-bit entries. pub struct Table { pub entries: [Entry; 512], } impl Table { pub fn len() -> usize { 512 } } /// Map a virtual address to a physical address using 4096-byte page /// size. /// root: a mutable reference to the root Table /// vaddr: The virtual address to map /// paddr: The physical address to map /// bits: An OR'd bitset containing the bits the leaf should have. /// The bits should contain only the following: /// Read, Write, Execute, User, and/or Global /// The bits MUST include one or more of the following: /// Read, Write, Execute /// The valid bit automatically gets added. pub fn map(root: &mut Table, vaddr: usize, paddr: usize, bits: i64, level: usize) { // Make sure that Read, Write, or Execute have been provided // otherwise, we'll leak memory and always create a page fault. assert!(bits & 0xe != 0); // Extract out each VPN from the virtual address // On the virtual address, each VPN is exactly 9 bits, // which is why we use the mask 0x1ff = 0b1_1111_1111 (9 bits) let vpn = [ // VPN[0] = vaddr[20:12] (vaddr >> 12) & 0x1ff, // VPN[1] = vaddr[29:21] (vaddr >> 21) & 0x1ff, // VPN[2] = vaddr[38:30] (vaddr >> 30) & 0x1ff, ]; // Just like the virtual address, extract the physical address // numbers (PPN). However, PPN[2] is different in that it stores // 26 bits instead of 9. Therefore, we use, // 0x3ff_ffff = 0b11_1111_1111_1111_1111_1111_1111 (26 bits). let ppn = [ // PPN[0] = paddr[20:12] (paddr >> 12) & 0x1ff, // PPN[1] = paddr[29:21] (paddr >> 21) & 0x1ff, // PPN[2] = paddr[55:30] (paddr >> 30) & 0x3ff_ffff, ]; // We will use this as a floating reference so that we can set // individual entries as we walk the table. let mut v = &mut root.entries[vpn[2]]; // Now, we're going to traverse the page table and set the bits // properly. We expect the root to be valid, however we're required to // create anything beyond the root. // In Rust, we create a range iterator using the .. operator. // The .rev() will reverse the iteration since we need to start with // VPN[2] The .. operator is inclusive on start but exclusive on end. // So, (0..2) will iterate 0 and 1. for i in (level..2).rev() { if !v.is_valid() { // Allocate a page let page = zalloc(1); // The page is already aligned by 4,096, so store it // directly The page is stored in the entry shifted // right by 2 places. v.set_entry( (page as i64 >> 2) | EntryBits::Valid.val(), ); } let entry = ((v.get_entry() & !0x3ff) << 2) as *mut Entry; v = unsafe { entry.add(vpn[i]).as_mut().unwrap() }; } // When we get here, we should be at VPN[0] and v should be pointing to // our entry. // The entry structure is Figure 4.18 in the RISC-V Privileged // Specification let entry = (ppn[2] << 28) as i64 | // PPN[2] = [53:28] (ppn[1] << 19) as i64 | // PPN[1] = [27:19] (ppn[0] << 10) as i64 | // PPN[0] = [18:10] bits | // Specified bits, such as User, Read, Write, etc EntryBits::Valid.val() | // Valid bit EntryBits::Dirty.val() | // Some machines require this to =1 EntryBits::Access.val() // Just like dirty, some machines require this ; // Set the entry. V should be set to the correct pointer by the loop // above. v.set_entry(entry); } /// Unmaps and frees all memory associated with a table. /// root: The root table to start freeing. /// NOTE: This does NOT free root directly. This must be /// freed manually. /// The reason we don't free the root is because it is /// usually embedded into the Process structure. pub fn unmap(root: &mut Table) { // Start with level 2 for lv2 in 0..Table::len() { let ref entry_lv2 = root.entries[lv2]; if entry_lv2.is_valid() && entry_lv2.is_branch() { // This is a valid entry, so drill down and free. let memaddr_lv1 = (entry_lv2.get_entry() & !0x3ff) << 2; let table_lv1 = unsafe { // Make table_lv1 a mutable reference instead of // a pointer. (memaddr_lv1 as *mut Table).as_mut().unwrap() }; for lv1 in 0..Table::len() { let ref entry_lv1 = table_lv1.entries[lv1]; if entry_lv1.is_valid() && entry_lv1.is_branch() { let memaddr_lv0 = (entry_lv1.get_entry() & !0x3ff) << 2; // The next level is level 0, which // cannot have branches, therefore, // we free here. dealloc(memaddr_lv0 as *mut u8); } } dealloc(memaddr_lv1 as *mut u8); } } } /// Walk the page table to convert a virtual address to a /// physical address. /// If a page fault would occur, this returns None /// Otherwise, it returns Some with the physical address. pub fn virt_to_phys(root: &Table, vaddr: usize) -> Option { // Walk the page table pointed to by root let vpn = [ // VPN[0] = vaddr[20:12] (vaddr >> 12) & 0x1ff, // VPN[1] = vaddr[29:21] (vaddr >> 21) & 0x1ff, // VPN[2] = vaddr[38:30] (vaddr >> 30) & 0x1ff, ]; let mut v = &root.entries[vpn[2]]; for i in (0..=2).rev() { if v.is_invalid() { // This is an invalid entry, page fault. break; } else if v.is_leaf() { // According to RISC-V, a leaf can be at any level. // The offset mask masks off the PPN. Each PPN is 9 // bits and they start at bit #12. So, our formula // 12 + i * 9 let off_mask = (1 << (12 + i * 9)) - 1; let vaddr_pgoff = vaddr & off_mask; let addr = ((v.get_entry() << 2) as usize) & !off_mask; return Some(addr | vaddr_pgoff); } // Set v to the next entry which is pointed to by this // entry. However, the address was shifted right by 2 places // when stored in the page table entry, so we shift it left // to get it back into place. let entry = ((v.get_entry() & !0x3ff) << 2) as *const Entry; // We do i - 1 here, however we should get None or Some() above // before we do 0 - 1 = -1. v = unsafe { entry.add(vpn[i - 1]).as_ref().unwrap() }; } // If we get here, we've exhausted all valid tables and haven't // found a leaf. None } ================================================ FILE: risc_v/chapters/ch6/src/plic.rs ================================================ // plic.rs // Platform Level Interrupt Controller (PLIC) // Stephen Marz // 1 Nov 2019 const PLIC_PRIORITY: usize = 0x0c00_0000; const PLIC_PENDING: usize = 0x0c00_1000; const PLIC_INT_ENABLE: usize = 0x0c00_2000; const PLIC_THRESHOLD: usize = 0x0c20_0000; const PLIC_CLAIM: usize = 0x0c20_0004; // Each register is 4-bytes (u32) // The PLIC is an external interrupt controller. The one // used by QEMU virt is the same as the SiFive PLIC. // https://sifive.cdn.prismic.io/sifive%2F834354f0-08e6-423c-bf1f-0cb58ef14061_fu540-c000-v1.0.pdf // Chapter 10 explains the priority, pending, interrupt enable, threshold and claims // The virt machine has the following external interrupts (from Qemu source): // Interrupt 0 is a "null" interrupt and is hardwired to 0. // VIRTIO = [1..8] // UART0 = 10 // PCIE = [32..35] /// Get the next available interrupt. This is the "claim" process. /// The plic will automatically sort by priority and hand us the /// ID of the interrupt. For example, if the UART is interrupting /// and it's next, we will get the value 10. pub fn next() -> Option { let claim_reg = PLIC_CLAIM as *const u32; let claim_no; // The claim register is filled with the highest-priority, enabled interrupt. unsafe { claim_no = claim_reg.read_volatile(); } if claim_no == 0 { // The interrupt 0 is hardwired to 0, which tells us that there is no // interrupt to claim, hence we return None. None } else { // If we get here, we've gotten a non-0 interrupt. Some(claim_no) } } /// Complete a pending interrupt by id. The id should come /// from the next() function above. pub fn complete(id: u32) { let complete_reg = PLIC_CLAIM as *mut u32; unsafe { // We actually write a u32 into the entire complete_register. // This is the same register as the claim register, but it can // differentiate based on whether we're reading or writing. complete_reg.write_volatile(id); } } /// Set the global threshold. The threshold can be a value [0..7]. /// The PLIC will mask any interrupts at or below the given threshold. /// This means that a threshold of 7 will mask ALL interrupts and /// a threshold of 0 will allow ALL interrupts. pub fn set_threshold(tsh: u8) { // We do tsh because we're using a u8, but our maximum number // is a 3-bit 0b111. So, we and with 7 (0b111) to just get the // last three bits. let actual_tsh = tsh & 7; let tsh_reg = PLIC_THRESHOLD as *mut u32; unsafe { tsh_reg.write_volatile(actual_tsh as u32); } } /// See if a given interrupt id is pending. pub fn is_pending(id: u32) -> bool { let pend = PLIC_PENDING as *const u32; let actual_id = 1 << id; let pend_ids; unsafe { pend_ids = pend.read_volatile(); } actual_id & pend_ids != 0 } /// Enable a given interrupt id pub fn enable(id: u32) { let enables = PLIC_INT_ENABLE as *mut u32; let actual_id = 1 << id; unsafe { // Unlike the complete and claim registers, the plic_int_enable // register is a bitset where the id is the bit index. The register // is a 32-bit register, so that gives us enables for interrupts // 31 through 1 (0 is hardwired to 0). enables.write_volatile(enables.read_volatile() | actual_id); } } /// Set a given interrupt priority to the given priority. /// The priority must be [0..7] pub fn set_priority(id: u32, prio: u8) { let actual_prio = prio as u32 & 7; let prio_reg = PLIC_PRIORITY as *mut u32; unsafe { // The offset for the interrupt id is: // PLIC_PRIORITY + 4 * id // Since we're using pointer arithmetic on a u32 type, // it will automatically multiply the id by 4. prio_reg.add(id as usize).write_volatile(actual_prio); } } ================================================ FILE: risc_v/chapters/ch6/src/process.rs ================================================ // process.rs // Kernel and user processes // Stephen Marz // 27 Nov 2019 use crate::{cpu::{build_satp, mscratch_write, satp_fence_asid, satp_write, SatpMode, TrapFrame}, page::{alloc, dealloc, map, unmap, zalloc, EntryBits, Table, PAGE_SIZE}}; use alloc::collections::vec_deque::VecDeque; // How many pages are we going to give a process for their // stack? const STACK_PAGES: usize = 2; // We want to adjust the stack to be at the bottom of the memory allocation // regardless of where it is on the kernel heap. const STACK_ADDR: usize = 0xf_0000_0000; // All processes will have a defined starting point in virtual memory. const PROCESS_STARTING_ADDR: usize = 0x2000_0000; // Here, we store a process list. It uses the global allocator // that we made before and its job is to store all processes. // We will have this list OWN the process. So, anytime we want // the process, we will consult the process list. // Using an Option here is one method of creating a "lazy static". // Rust requires that all statics be initialized, but all // initializations must be at compile-time. We cannot allocate // a VecDeque at compile time, so we are somewhat forced to // do this. static mut PROCESS_LIST: Option> = None; // We can search through the process list to get a new PID, but // it's probably easier and faster just to increase the pid: static mut NEXT_PID: u16 = 1; /// We will eventually move this function out of here, but its /// job is just to take a slot in the process list. fn init_process() { // We can't do much here until we have system calls because // we're running in User space. loop {} } /// Add a process given a function address and then /// push it onto the LinkedList. Uses Process::new_default /// to create a new stack, etc. pub fn add_process_default(pr: fn()) { unsafe { // This is the Rust-ism that really trips up C++ programmers. // PROCESS_LIST is wrapped in an Option<> enumeration, which // means that the Option owns the Deque. We can only borrow from // it or move ownership to us. In this case, we choose the // latter, where we move ownership to us, add a process, and // then move ownership back to the PROCESS_LIST. // This allows mutual exclusion as anyone else trying to grab // the process list will get None rather than the Deque. if let Some(mut pl) = PROCESS_LIST.take() { // .take() will replace PROCESS_LIST with None and give // us the only copy of the Deque. let p = Process::new_default(pr); pl.push_back(p); // Now, we no longer need the owned Deque, so we hand it // back by replacing the PROCESS_LIST's None with the // Some(pl). PROCESS_LIST.replace(pl); } // TODO: When we get to multi-hart processing, we need to keep // trying to grab the process list. We can do this with an // atomic instruction. but right now, we're a single-processor // computer. } } /// This should only be called once, and its job is to create /// the init process. Right now, this process is in the kernel, /// but later, it should call the shell. pub fn init() -> usize { unsafe { PROCESS_LIST = Some(VecDeque::with_capacity(5)); add_process_default(init_process); // Ugh....Rust is giving me fits over here! // I just want a memory address to the trap frame, but // due to the borrow rules of Rust, I'm fighting here. So, // instead, let's move the value out of PROCESS_LIST, get // the address, and then move it right back in. let pl = PROCESS_LIST.take().unwrap(); let p = pl.front().unwrap().frame; let frame = &p as *const TrapFrame as usize; mscratch_write(frame); satp_write(build_satp( SatpMode::Sv39, 1, pl.front().unwrap().root as usize, ),); // Synchronize PID 1. We use ASID as the PID. satp_fence_asid(1); // Put the process list back in the global. PROCESS_LIST.replace(pl); // Return the first instruction's address to execute. // Since we use the MMU, all start here. PROCESS_STARTING_ADDR } } // Our process must be able to sleep, wait, or run. // Running - means that when the scheduler finds this process, it can run it. // Sleeping - means that the process is waiting on a certain amount of time. // Waiting - means that the process is waiting on I/O // Dead - We should never get here, but we can flag a process as Dead and clean // it out of the list later. pub enum ProcessState { Running, Sleeping, Waiting, Dead, } // Let's represent this in C ABI. We do this // because we need to access some of the fields // in assembly. Rust gets to choose how it orders // the fields unless we represent the structure in // C-style ABI. #[repr(C)] pub struct Process { frame: TrapFrame, stack: *mut u8, program_counter: usize, pid: u16, root: *mut Table, state: ProcessState, data: ProcessData, } impl Process { pub fn new_default(func: fn()) -> Self { let func_addr = func as usize; // We will convert NEXT_PID below into an atomic increment when // we start getting into multi-hart processing. For now, we want // a process. Get it to work, then improve it! let mut ret_proc = Process { frame: TrapFrame::zero(), stack: alloc(STACK_PAGES), program_counter: PROCESS_STARTING_ADDR, pid: unsafe { NEXT_PID }, root: zalloc(1) as *mut Table, state: ProcessState::Waiting, data: ProcessData::zero(), }; unsafe { NEXT_PID += 1; } // Now we move the stack pointer to the bottom of the // allocation. The spec shows that register x2 (2) is the stack // pointer. // We could use ret_proc.stack.add, but that's an unsafe // function which would require an unsafe block. So, convert it // to usize first and then add PAGE_SIZE is better. // We also need to set the stack adjustment so that it is at the // bottom of the memory and far away from heap allocations. ret_proc.frame.regs[2] = STACK_ADDR + PAGE_SIZE * STACK_PAGES; // Map the stack on the MMU let pt; unsafe { pt = &mut *ret_proc.root; } let saddr = ret_proc.stack as usize; // We need to map the stack onto the user process' virtual // memory This gets a little hairy because we need to also map // the function code too. for i in 0..STACK_PAGES { let addr = i * PAGE_SIZE; map( pt, STACK_ADDR + addr, saddr + addr, EntryBits::UserReadWrite.val(), 0, ); } // Map the program counter on the MMU map( pt, PROCESS_STARTING_ADDR, func_addr, EntryBits::UserReadExecute.val(), 0, ); map( pt, PROCESS_STARTING_ADDR + 0x1001, func_addr + 0x1001, EntryBits::UserReadExecute.val(), 0, ); ret_proc } } impl Drop for Process { /// Since we're storing ownership of a Process in the linked list, /// we can cause it to deallocate automatically when it is removed. fn drop(&mut self) { // We allocate the stack as a page. dealloc(self.stack); // This is unsafe, but it's at the drop stage, so we won't // be using this again. unsafe { // Remember that unmap unmaps all levels of page tables // except for the root. It also deallocates the memory // associated with the tables. unmap(&mut *self.root); } dealloc(self.root as *mut u8); } } // The private data in a process contains information // that is relevant to where we are, including the path // and open file descriptors. pub struct ProcessData { cwd_path: [u8; 128], } // This is private data that we can query with system calls. // If we want to implement CFQ (completely fair queuing), which // is a per-process block queuing algorithm, we can put that here. impl ProcessData { pub fn zero() -> Self { ProcessData { cwd_path: [0; 128], } } } ================================================ FILE: risc_v/chapters/ch6/src/trap.rs ================================================ // trap.rs // Trap routines // Stephen Marz // 10 October 2019 use crate::cpu::TrapFrame; use crate::{plic, uart}; #[no_mangle] /// The m_trap stands for "machine trap". Right now, we are handling /// all traps at machine mode. In this mode, we can figure out what's /// going on and send a trap where it needs to be. Remember, in machine /// mode and in this trap, interrupts are disabled and the MMU is off. extern "C" fn m_trap(epc: usize, tval: usize, cause: usize, hart: usize, status: usize, frame: *mut TrapFrame) -> usize { // We're going to handle all traps in machine mode. RISC-V lets // us delegate to supervisor mode, but switching out SATP (virtual memory) // gets hairy. let is_async = { if cause >> 63 & 1 == 1 { true } else { false } }; // The cause contains the type of trap (sync, async) as well as the cause // number. So, here we narrow down just the cause number. let cause_num = cause & 0xfff; let mut return_pc = epc; if is_async { // Asynchronous trap match cause_num { 3 => { // Machine software println!("Machine software interrupt CPU#{}", hart); }, 7 => unsafe { // This is the context-switch timer. // We would typically invoke the scheduler here to pick another // process to run. // Machine timer println!("CTX"); let mtimecmp = 0x0200_4000 as *mut u64; let mtime = 0x0200_bff8 as *const u64; // The frequency given by QEMU is 10_000_000 Hz, so this sets // the next interrupt to fire one second from now. // This is much too slow for normal operations, but it gives us // a visual of what's happening behind the scenes. mtimecmp.write_volatile(mtime.read_volatile() + 10_000_000); }, 11 => { // Machine external (interrupt from Platform Interrupt Controller (PLIC)) // println!("Machine external interrupt CPU#{}", hart); // We will check the next interrupt. If the interrupt isn't available, this will // give us None. However, that would mean we got a spurious interrupt, unless we // get an interrupt from a non-PLIC source. This is the main reason that the PLIC // hardwires the id 0 to 0, so that we can use it as an error case. if let Some(interrupt) = plic::next() { // If we get here, we've got an interrupt from the claim register. The PLIC will // automatically prioritize the next interrupt, so when we get it from claim, it // will be the next in priority order. match interrupt { 10 => { // Interrupt 10 is the UART interrupt. // We would typically set this to be handled out of the interrupt context, // but we're testing here! C'mon! // We haven't yet used the singleton pattern for my_uart, but remember, this // just simply wraps 0x1000_0000 (UART). let mut my_uart = uart::Uart::new(0x1000_0000); // If we get here, the UART better have something! If not, what happened?? if let Some(c) = my_uart.get() { // If you recognize this code, it used to be in the lib.rs under kmain(). That // was because we needed to poll for UART data. Now that we have interrupts, // here it goes! match c { 8 => { // This is a backspace, so we // essentially have to write a space and // backup again: print!("{} {}", 8 as char, 8 as char); }, 10 | 13 => { // Newline or carriage-return println!(); }, _ => { print!("{}", c as char); }, } } }, // Non-UART interrupts go here and do nothing. _ => { println!("Non-UART external interrupt: {}", interrupt); } } // We've claimed it, so now say that we've handled it. This resets the interrupt pending // and allows the UART to interrupt again. Otherwise, the UART will get "stuck". plic::complete(interrupt); } }, _ => { panic!("Unhandled async trap CPU#{} -> {}\n", hart, cause_num); } } } else { // Synchronous trap match cause_num { 2 => { // Illegal instruction panic!("Illegal instruction CPU#{} -> 0x{:08x}: 0x{:08x}\n", hart, epc, tval); }, 8 => { // Environment (system) call from User mode println!("E-call from User mode! CPU#{} -> 0x{:08x}", hart, epc); return_pc += 4; }, 9 => { // Environment (system) call from Supervisor mode println!("E-call from Supervisor mode! CPU#{} -> 0x{:08x}", hart, epc); return_pc += 4; }, 11 => { // Environment (system) call from Machine mode panic!("E-call from Machine mode! CPU#{} -> 0x{:08x}\n", hart, epc); }, // Page faults 12 => { // Instruction page fault println!("Instruction page fault CPU#{} -> 0x{:08x}: 0x{:08x}", hart, epc, tval); return_pc += 4; }, 13 => { // Load page fault println!("Load page fault CPU#{} -> 0x{:08x}: 0x{:08x}", hart, epc, tval); return_pc += 4; }, 15 => { // Store page fault println!("Store page fault CPU#{} -> 0x{:08x}: 0x{:08x}", hart, epc, tval); return_pc += 4; }, _ => { panic!("Unhandled sync trap CPU#{} -> {}\n", hart, cause_num); } } }; // Finally, return the updated program counter return_pc } ================================================ FILE: risc_v/chapters/ch6/src/uart.rs ================================================ // uart.rs // UART routines and driver use core::{convert::TryInto, fmt::{Error, Write}}; pub struct Uart { base_address: usize, } impl Write for Uart { fn write_str(&mut self, out: &str) -> Result<(), Error> { for c in out.bytes() { self.put(c); } Ok(()) } } impl Uart { pub fn new(base_address: usize) -> Self { Uart { base_address } } pub fn init(&mut self) { let ptr = self.base_address as *mut u8; unsafe { // First, set the word length, which // are bits 0 and 1 of the line control register (LCR) // which is at base_address + 3 // We can easily write the value 3 here or 0b11, but I'm // extending it so that it is clear we're setting two // individual fields // Word 0 Word 1 // ~~~~~~ ~~~~~~ let lcr: u8 = (1 << 0) | (1 << 1); ptr.add(3).write_volatile(lcr); // Now, enable the FIFO, which is bit index 0 of the // FIFO control register (FCR at offset 2). // Again, we can just write 1 here, but when we use left // shift, it's easier to see that we're trying to write // bit index #0. ptr.add(2).write_volatile(1 << 0); // Enable receiver buffer interrupts, which is at bit // index 0 of the interrupt enable register (IER at // offset 1). ptr.add(1).write_volatile(1 << 0); // If we cared about the divisor, the code below would // set the divisor from a global clock rate of 22.729 // MHz (22,729,000 cycles per second) to a signaling // rate of 2400 (BAUD). We usually have much faster // signalling rates nowadays, but this demonstrates what // the divisor actually does. The formula given in the // NS16500A specification for calculating the divisor // is: // divisor = ceil( (clock_hz) / (baud_sps x 16) ) // So, we substitute our values and get: // divisor = ceil( 22_729_000 / (2400 x 16) ) // divisor = ceil( 22_729_000 / 38_400 ) // divisor = ceil( 591.901 ) = 592 // The divisor register is two bytes (16 bits), so we // need to split the value 592 into two bytes. // Typically, we would calculate this based on measuring // the clock rate, but again, for our purposes [qemu], // this doesn't really do anything. let divisor: u16 = 592; let divisor_least: u8 = (divisor & 0xff).try_into().unwrap(); let divisor_most: u8 = (divisor >> 8).try_into().unwrap(); // Notice that the divisor register DLL (divisor latch // least) and DLM (divisor latch most) have the same // base address as the receiver/transmitter and the // interrupt enable register. To change what the base // address points to, we open the "divisor latch" by // writing 1 into the Divisor Latch Access Bit (DLAB), // which is bit index 7 of the Line Control Register // (LCR) which is at base_address + 3. ptr.add(3).write_volatile(lcr | 1 << 7); // Now, base addresses 0 and 1 point to DLL and DLM, // respectively. Put the lower 8 bits of the divisor // into DLL ptr.add(0).write_volatile(divisor_least); ptr.add(1).write_volatile(divisor_most); // Now that we've written the divisor, we never have to // touch this again. In hardware, this will divide the // global clock (22.729 MHz) into one suitable for 2,400 // signals per second. So, to once again get access to // the RBR/THR/IER registers, we need to close the DLAB // bit by clearing it to 0. ptr.add(3).write_volatile(lcr); } } pub fn put(&mut self, c: u8) { let ptr = self.base_address as *mut u8; loop { // Wait until previous data is flushed if unsafe { ptr.add(5).read_volatile() } & (1 << 5) != 0 { break; } } unsafe { // Write data ptr.add(0).write_volatile(c); } } pub fn get(&mut self) -> Option { let ptr = self.base_address as *mut u8; unsafe { if ptr.add(5).read_volatile() & 1 == 0 { // The DR bit is 0, meaning no data None } else { // The DR bit is 1, meaning data! Some(ptr.add(0).read_volatile()) } } } } ================================================ FILE: risc_v/chapters/ch7/.cargo/config ================================================ [build] target = "riscv64gc-unknown-none-elf" [target.riscv64gc-unknown-none-elf] linker = "riscv64-unknown-linux-gnu-gcc" ================================================ FILE: risc_v/chapters/ch7/.gitignore ================================================ os.elf target/* Cargo.lock hdd.dsk ================================================ FILE: risc_v/chapters/ch7/Cargo.toml ================================================ [package] name = "sos" version = "0.1.0" authors = ["Stephen Marz "] edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [lib] crate-type = ["staticlib"] [dependencies] ================================================ FILE: risc_v/chapters/ch7/Makefile ================================================ ##### ## BUILD ##### CC=riscv64-unknown-linux-gnu-gcc CFLAGS=-Wall -Wextra -pedantic -Wextra -O0 -g CFLAGS+=-static -ffreestanding -nostdlib -fno-rtti -fno-exceptions CFLAGS+=-march=rv64gc -mabi=lp64 INCLUDES= LINKER_SCRIPT=-Tsrc/lds/virt.lds TYPE=debug RUST_TARGET=./target/riscv64gc-unknown-none-elf/$(TYPE) LIBS=-L$(RUST_TARGET) SOURCES_ASM=$(wildcard src/asm/*.S) LIB=-lsos -lgcc OUT=os.elf ##### ## QEMU ##### QEMU=qemu-system-riscv64 MACH=virt CPU=rv64 CPUS=4 MEM=128M DISK=hdd.dsk # DRIVE= -drive if=none,format=raw,file=$(DISK),id=foo -device virtio-blk-device,scsi=off,drive=foo DRIVE= all: cargo build $(CC) $(CFLAGS) $(LINKER_SCRIPT) $(INCLUDES) -o $(OUT) $(SOURCES_ASM) $(LIBS) $(LIB) run: all $(QEMU) -machine $(MACH) -cpu $(CPU) -smp $(CPUS) -m $(MEM) $(DRIVE) -nographic -serial mon:stdio -bios none -kernel $(OUT) .PHONY: clean clean: cargo clean rm -f $(OUT) ================================================ FILE: risc_v/chapters/ch7/make_hdd.sh ================================================ #!/bin/sh dd if=/dev/zero of=hdd.dsk bs=1M count=32 ================================================ FILE: risc_v/chapters/ch7/src/asm/boot.S ================================================ # boot.S # bootloader for SoS # Stephen Marz # 8 February 2019 # Disable generation of compressed instructions. .option norvc # Define a .text.init section. The .text.init is put at the # starting address so that the entry _start is put at the RISC-V # address 0x8000_0000. .section .text.init # Execution starts here. .global _start _start: # Disable linker instruction relaxation for the `la` instruction below. # This disallows the assembler from assuming that `gp` is already initialized. # This causes the value stored in `gp` to be calculated from `pc`. # The job of the global pointer is to give the linker the ability to address # memory relative to GP instead of as an absolute address. .option push .option norelax la gp, _global_pointer .option pop # SATP should be zero, but let's make sure. Each HART has its own # SATP register. csrw satp, zero # Any hardware threads (hart) that are not bootstrapping # need to wait for an IPI csrr t0, mhartid bnez t0, 3f # Set all bytes in the BSS section to zero. la a0, _bss_start la a1, _bss_end bgeu a0, a1, 2f 1: sd zero, (a0) addi a0, a0, 8 bltu a0, a1, 1b 2: # The stack grows from bottom to top, so we put the stack pointer # to the very end of the stack range. la sp, _stack_end # Setting `mstatus` register: # 0b01 << 11: Machine's previous protection mode is 2 (MPP=2). li t0, 0b11 << 11 csrw mstatus, t0 # Do not allow interrupts while running kinit csrw mie, zero # Machine's exception program counter (MEPC) is set to `kinit`. la t1, kinit csrw mepc, t1 # Set the return address to get us into supervisor mode la ra, 2f # We use mret here so that the mstatus register is properly updated. mret 2: # We set the return address (ra above) to this label. When kinit() is finished # in Rust, it will return here. # Setting `mstatus` (supervisor status) register: # 0b01 << 11 : Previous protection mode is 1 (MPP=01 [Supervisor]). # 1 << 7 : Previous machine interrupt-enable bit is 1 (MPIE=1 [Enabled]) # 1 << 5 : Previous interrupt-enable bit is 1 (SPIE=1 [Enabled]). # We set the "previous" bits because the mret will write the current bits # with the previous bits. li t0, (0b00 << 11) | (1 << 7) | (1 << 5) csrw mstatus, t0 # Machine's trap vector base address is set to `m_trap_vector`, for # "machine" trap vector. la t2, m_trap_vector csrw mtvec, t2 # Jump to first process. We put the MPP = 00 for user mode, so after # mret, we will jump to the first process' addresss in user mode. csrw mepc, a0 li t2, 0xaaa csrw mie, t2 la ra, 4f mret 3: # Parked harts go here. We need to set these # to only awaken if it receives a software interrupt, # which we're going to call the SIPI (Software Intra-Processor Interrupt). # We call the SIPI by writing the software interrupt into the Core Local Interruptor (CLINT) # Which is calculated by: base_address + hart * 4 # where base address is 0x0200_0000 (MMIO CLINT base address) # We only use additional harts to run user-space programs, although this may # change. # We divide up the stack so the harts aren't clobbering one another. la sp, _stack_end li t0, 0x10000 csrr a0, mhartid mul t0, t0, a0 sub sp, sp, t0 # The parked harts will be put into machine mode with interrupts enabled. li t0, 0b11 << 11 | (1 << 7) csrw mstatus, t0 # Allow for MSIP (Software interrupt). We will write the MSIP from hart #0 to # awaken these parked harts. li t3, (1 << 3) csrw mie, t3 # Machine's exception program counter (MEPC) is set to the Rust initialization # code and waiting loop. la t1, kinit_hart csrw mepc, t1 # Machine's trap vector base address is set to `m_trap_vector`, for # "machine" trap vector. The Rust initialization routines will give each # hart its own trap frame. We can use the same trap function and distinguish # between each hart by looking at the trap frame. la t2, m_trap_vector csrw mtvec, t2 # Whenever our hart is done initializing, we want it to return to the waiting # loop, which is just below mret. la ra, 4f # We use mret here so that the mstatus register is properly updated. mret 4: # wfi = wait for interrupt. This is a hint to the harts to shut everything needed # down. However, the RISC-V specification allows for wfi to do nothing. Anyway, # with QEMU, this will save some CPU! wfi j 4b ================================================ FILE: risc_v/chapters/ch7/src/asm/mem.S ================================================ // mem.S // Importation of linker symbols .section .rodata .global HEAP_START HEAP_START: .dword _heap_start .global HEAP_SIZE HEAP_SIZE: .dword _heap_size .global TEXT_START TEXT_START: .dword _text_start .global TEXT_END TEXT_END: .dword _text_end .global DATA_START DATA_START: .dword _data_start .global DATA_END DATA_END: .dword _data_end .global RODATA_START RODATA_START: .dword _rodata_start .global RODATA_END RODATA_END: .dword _rodata_end .global BSS_START BSS_START: .dword _bss_start .global BSS_END BSS_END: .dword _bss_end .global KERNEL_STACK_START KERNEL_STACK_START: .dword _stack_start .global KERNEL_STACK_END KERNEL_STACK_END: .dword _stack_end ================================================ FILE: risc_v/chapters/ch7/src/asm/trap.S ================================================ # trap.S # Trap handler and global context # Steve Operating System # Stephen Marz # 24 February 2019 .option norvc .altmacro .set NUM_GP_REGS, 32 # Number of registers per context .set NUM_FP_REGS, 32 .set REG_SIZE, 8 # Register size (in bytes) .set MAX_CPUS, 8 # Maximum number of CPUs # Use macros for saving and restoring multiple registers .macro save_gp i, basereg=t6 sd x\i, ((\i)*REG_SIZE)(\basereg) .endm .macro load_gp i, basereg=t6 ld x\i, ((\i)*REG_SIZE)(\basereg) .endm .macro save_fp i, basereg=t6 fsd f\i, ((NUM_GP_REGS+(\i))*REG_SIZE)(\basereg) .endm .macro load_fp i, basereg=t6 fld f\i, ((NUM_GP_REGS+(\i))*REG_SIZE)(\basereg) .endm .section .text .global m_trap_vector # This must be aligned by 4 since the last two bits # of the mtvec register do not contribute to the address # of this vector. .align 4 m_trap_vector: # All registers are volatile here, we need to save them # before we do anything. csrrw t6, mscratch, t6 # csrrw will atomically swap t6 into mscratch and the old # value of mscratch into t6. This is nice because we just # switched values and didn't destroy anything -- all atomically! # in cpu.rs we have a structure of: # 32 gp regs 0 # 32 fp regs 256 # SATP register 512 # Trap stack 520 # CPU HARTID 528 # We use t6 as the temporary register because it is the very # bottom register (x31) .set i, 1 .rept 30 save_gp %i .set i, i+1 .endr # Save the actual t6 register, which we swapped into # mscratch mv t5, t6 csrr t6, mscratch save_gp 31, t5 # Restore the kernel trap frame into mscratch csrw mscratch, t5 # Get ready to go into Rust (trap.rs) # We don't want to write into the user's stack or whomever # messed with us here. csrr a0, mepc csrr a1, mtval csrr a2, mcause csrr a3, mhartid csrr a4, mstatus mv a5, t5 ld sp, 520(a5) call m_trap # When we get here, we've returned from m_trap, restore registers # and return. # m_trap will return the return address via a0. csrw mepc, a0 # Now load the trap frame back into t6 csrr t6, mscratch # Restore all GP registers .set i, 1 .rept 31 load_gp %i .set i, i+1 .endr # Since we ran this loop 31 times starting with i = 1, # the last one loaded t6 back to its original value. mret .global make_syscall make_syscall: ecall ret ================================================ FILE: risc_v/chapters/ch7/src/cpu.rs ================================================ // cpu.rs // CPU and CPU-related routines // Also contains the kernel's trap frame // Stephen Marz // 14 October 2019 use core::ptr::null_mut; /// In 64-bit mode, we're given three different modes for the MMU: /// 0 - The MMU is off -- no protection and no translation PA = VA /// 8 - This is Sv39 mode -- 39-bit virtual addresses /// 9 - This is Sv48 mode -- 48-bit virtual addresses #[repr(usize)] pub enum SatpMode { Off = 0, Sv39 = 8, Sv48 = 9, } /// The trap frame is set into a structure /// and packed into each hart's mscratch register. /// This allows for quick reference and full /// context switch handling. #[repr(C)] #[derive(Clone, Copy)] pub struct TrapFrame { pub regs: [usize; 32], // 0 - 255 pub fregs: [usize; 32], // 256 - 511 pub satp: usize, // 512 - 519 pub trap_stack: *mut u8, // 520 pub hartid: usize, // 528 } /// Rust requires that we initialize our structures /// because of the move semantics. What'll happen below /// is Rust will construct a new TrapFrame and move it /// out of the zero() function below. Rust contains two /// different "selfs" where self can refer to the object /// in memory or Self (capital S) which refers to the /// data type of the structure. In the case below, this /// is TrapFrame. impl TrapFrame { pub const fn zero() -> Self { TrapFrame { regs: [0; 32], fregs: [0; 32], satp: 0, trap_stack: null_mut(), hartid: 0, } } } /// The global kernel trap frame stores 8 separate /// frames -- one per CPU hart. We will switch these /// in and out and store a dormant trap frame with /// the process itself. pub static mut KERNEL_TRAP_FRAME: [TrapFrame; 8] = [TrapFrame::zero(); 8]; /// The SATP register contains three fields: mode, address space id, and /// the first level table address (level 2 for Sv39). This function /// helps make the 64-bit register contents based on those three /// fields. pub const fn build_satp(mode: SatpMode, asid: usize, addr: usize) -> usize { (mode as usize) << 60 | (asid & 0xffff) << 44 | (addr >> 12) & 0xff_ffff_ffff } pub fn mhartid_read() -> usize { unsafe { let rval; asm!("csrr $0, mhartid" :"=r"(rval)); rval } } pub fn mie_read() -> usize { unsafe { let rval; asm!("csrr $0, mie" :"=r"(rval)); rval } } pub fn mie_write(val: usize) { unsafe { asm!("csrw mie, $0" :: "r"(val)); } } pub fn mstatus_write(val: usize) { unsafe { asm!("csrw mstatus, $0" ::"r"(val)); } } pub fn mstatus_read() -> usize { unsafe { let rval; asm!("csrr $0, mstatus":"=r"(rval)); rval } } pub fn stvec_write(val: usize) { unsafe { asm!("csrw stvec, $0" ::"r"(val)); } } pub fn stvec_read() -> usize { unsafe { let rval; asm!("csrr $0, stvec" :"=r"(rval)); rval } } pub fn mscratch_write(val: usize) { unsafe { asm!("csrw mscratch, $0" ::"r"(val)); } } pub fn mscratch_read() -> usize { unsafe { let rval; asm!("csrr $0, mscratch" : "=r"(rval)); rval } } pub fn mscratch_swap(to: usize) -> usize { unsafe { let from; asm!("csrrw $0, mscratch, $1" : "=r"(from) : "r"(to)); from } } pub fn sscratch_write(val: usize) { unsafe { asm!("csrw sscratch, $0" ::"r"(val)); } } pub fn sscratch_read() -> usize { unsafe { let rval; asm!("csrr $0, sscratch" : "=r"(rval)); rval } } pub fn sscratch_swap(to: usize) -> usize { unsafe { let from; asm!("csrrw $0, sscratch, $1" : "=r"(from) : "r"(to)); from } } pub fn sepc_write(val: usize) { unsafe { asm!("csrw sepc, $0" :: "r"(val)); } } pub fn sepc_read() -> usize { unsafe { let rval; asm!("csrr $0, sepc" :"=r"(rval)); rval } } pub fn satp_write(val: usize) { unsafe { asm!("csrw satp, $0" :: "r"(val)); } } pub fn satp_read() -> usize { unsafe { let rval; asm!("csrr $0, satp" :"=r"(rval)); rval } } /// Take a hammer to the page tables and synchronize /// all of them. This essentially flushes the entire /// TLB. pub fn satp_fence(vaddr: usize, asid: usize) { unsafe { asm!("sfence.vma $0, $1" :: "r"(vaddr), "r"(asid)); } } /// Synchronize based on the address space identifier /// This allows us to fence a particular process rather /// than the entire TLB. /// The RISC-V documentation calls this a TLB flush +. /// Since there are other memory routines involved, they /// didn't call it a TLB flush, but it is much like /// Intel/AMD's invtlb [] instruction. pub fn satp_fence_asid(asid: usize) { unsafe { asm!("sfence.vma zero, $0" :: "r"(asid)); } } ================================================ FILE: risc_v/chapters/ch7/src/kmem.rs ================================================ // kmem.rs // Sub-page level: malloc-like allocation system // Stephen Marz // 7 October 2019 use crate::page::{align_val, zalloc, Table, PAGE_SIZE}; use core::{mem::size_of, ptr::null_mut}; #[repr(usize)] enum AllocListFlags { Taken = 1 << 63, } impl AllocListFlags { pub fn val(self) -> usize { self as usize } } struct AllocList { pub flags_size: usize, } impl AllocList { pub fn is_taken(&self) -> bool { self.flags_size & AllocListFlags::Taken.val() != 0 } pub fn is_free(&self) -> bool { !self.is_taken() } pub fn set_taken(&mut self) { self.flags_size |= AllocListFlags::Taken.val(); } pub fn set_free(&mut self) { self.flags_size &= !AllocListFlags::Taken.val(); } pub fn set_size(&mut self, sz: usize) { let k = self.is_taken(); self.flags_size = sz & !AllocListFlags::Taken.val(); if k { self.flags_size |= AllocListFlags::Taken.val(); } } pub fn get_size(&self) -> usize { self.flags_size & !AllocListFlags::Taken.val() } } // This is the head of the allocation. We start here when // we search for a free memory location. static mut KMEM_HEAD: *mut AllocList = null_mut(); // In the future, we will have on-demand pages // so, we need to keep track of our memory footprint to // see if we actually need to allocate more. static mut KMEM_ALLOC: usize = 0; static mut KMEM_PAGE_TABLE: *mut Table = null_mut(); // These functions are safe helpers around an unsafe // operation. pub fn get_head() -> *mut u8 { unsafe { KMEM_HEAD as *mut u8 } } pub fn get_page_table() -> *mut Table { unsafe { KMEM_PAGE_TABLE as *mut Table } } pub fn get_num_allocations() -> usize { unsafe { KMEM_ALLOC } } /// Initialize kernel's memory /// This is not to be used to allocate memory /// for user processes. If that's the case, use /// alloc/dealloc from the page crate. pub fn init() { unsafe { // Allocate kernel pages (KMEM_ALLOC) KMEM_ALLOC = 512; let k_alloc = zalloc(KMEM_ALLOC); assert!(!k_alloc.is_null()); KMEM_HEAD = k_alloc as *mut AllocList; (*KMEM_HEAD).set_free(); (*KMEM_HEAD).set_size(KMEM_ALLOC * PAGE_SIZE); KMEM_PAGE_TABLE = zalloc(1) as *mut Table; } } /// Allocate sub-page level allocation based on bytes and zero the memory pub fn kzmalloc(sz: usize) -> *mut u8 { let size = align_val(sz, 3); let ret = kmalloc(size); if !ret.is_null() { for i in 0..size { unsafe { (*ret.add(i)) = 0; } } } ret } /// Allocate sub-page level allocation based on bytes pub fn kmalloc(sz: usize) -> *mut u8 { unsafe { let size = align_val(sz, 3) + size_of::(); let mut head = KMEM_HEAD; // .add() uses pointer arithmetic, so we type-cast into a u8 // so that we multiply by an absolute size (KMEM_ALLOC * // PAGE_SIZE). let tail = (KMEM_HEAD as *mut u8).add(KMEM_ALLOC * PAGE_SIZE) as *mut AllocList; while head < tail { if (*head).is_free() && size <= (*head).get_size() { let chunk_size = (*head).get_size(); let rem = chunk_size - size; (*head).set_taken(); if rem > size_of::() { let next = (head as *mut u8).add(size) as *mut AllocList; // There is space remaining here. (*next).set_free(); (*next).set_size(rem); (*head).set_size(size); } else { // If we get here, take the entire chunk (*head).set_size(chunk_size); } return head.add(1) as *mut u8; } else { // If we get here, what we saw wasn't a free // chunk, move on to the next. head = (head as *mut u8).add((*head).get_size()) as *mut AllocList; } } } // If we get here, we didn't find any free chunks--i.e. there isn't // enough memory for this. TODO: Add on-demand page allocation. null_mut() } /// Free a sub-page level allocation pub fn kfree(ptr: *mut u8) { unsafe { if !ptr.is_null() { let p = (ptr as *mut AllocList).offset(-1); if (*p).is_taken() { (*p).set_free(); } // After we free, see if we can combine adjacent free // spots to see if we can reduce fragmentation. coalesce(); } } } /// Merge smaller chunks into a bigger chunk pub fn coalesce() { unsafe { let mut head = KMEM_HEAD; let tail = (KMEM_HEAD as *mut u8).add(KMEM_ALLOC * PAGE_SIZE) as *mut AllocList; while head < tail { let next = (head as *mut u8).add((*head).get_size()) as *mut AllocList; if (*head).get_size() == 0 { // If this happens, then we have a bad heap // (double free or something). However, that // will cause an infinite loop since the next // pointer will never move beyond the current // location. break; } else if next >= tail { // We calculated the next by using the size // given as get_size(), however this could push // us past the tail. In that case, the size is // wrong, hence we break and stop doing what we // need to do. break; } else if (*head).is_free() && (*next).is_free() { // This means we have adjacent blocks needing to // be freed. So, we combine them into one // allocation. (*head).set_size( (*head).get_size() + (*next).get_size(), ); } // If we get here, we might've moved. Recalculate new // head. head = (head as *mut u8).add((*head).get_size()) as *mut AllocList; } } } /// For debugging purposes, print the kmem table pub fn print_table() { unsafe { let mut head = KMEM_HEAD; let tail = (KMEM_HEAD as *mut u8).add(KMEM_ALLOC * PAGE_SIZE) as *mut AllocList; while head < tail { println!( "{:p}: Length = {:<10} Taken = {}", head, (*head).get_size(), (*head).is_taken() ); head = (head as *mut u8).add((*head).get_size()) as *mut AllocList; } } } // /////////////////////////////////// // / GLOBAL ALLOCATOR // /////////////////////////////////// // The global allocator allows us to use the data structures // in the core library, such as a linked list or B-tree. // We want to use these sparingly since we have a coarse-grained // allocator. use core::alloc::{GlobalAlloc, Layout}; // The global allocator is a static constant to a global allocator // structure. We don't need any members because we're using this // structure just to implement alloc and dealloc. struct OsGlobalAlloc; unsafe impl GlobalAlloc for OsGlobalAlloc { unsafe fn alloc(&self, layout: Layout) -> *mut u8 { // We align to the next page size so that when // we divide by PAGE_SIZE, we get exactly the number // of pages necessary. kzmalloc(layout.size()) } unsafe fn dealloc(&self, ptr: *mut u8, _layout: Layout) { // We ignore layout since our allocator uses ptr_start -> last // to determine the span of an allocation. kfree(ptr); } } #[global_allocator] /// Technically, we don't need the {} at the end, but it /// reveals that we're creating a new structure and not just /// copying a value. static GA: OsGlobalAlloc = OsGlobalAlloc {}; #[alloc_error_handler] /// If for some reason alloc() in the global allocator gets null_mut(), /// then we come here. This is a divergent function, so we call panic to /// let the tester know what's going on. pub fn alloc_error(l: Layout) -> ! { panic!( "Allocator failed to allocate {} bytes with {}-byte alignment.", l.size(), l.align() ); } ================================================ FILE: risc_v/chapters/ch7/src/lds/virt.lds ================================================ /* virt.lds Linker script for outputting to RISC-V QEMU "virt" machine. Stephen Marz 6 October 2019 */ /* riscv is the name of the architecture that the linker understands for any RISC-V target (64-bit or 32-bit). We will further refine this by using -mabi=lp64 and -march=rv64gc */ OUTPUT_ARCH( "riscv" ) /* We're setting our entry point to a symbol called _start which is inside of boot.S. This essentially stores the address of _start as the "entry point", or where CPU instructions should start executing. In the rest of this script, we are going to place _start right at the beginning of 0x8000_0000 because this is where the virtual machine and many RISC-V boards will start executing. */ ENTRY( _start ) /* The MEMORY section will explain that we have "ram" that contains a section that is 'w' (writeable), 'x' (executable), and 'a' (allocatable). We use '!' to invert 'r' (read-only) and 'i' (initialized). We don't want our memory to be read-only, and we're stating that it is NOT initialized at the beginning. The ORIGIN is the memory address 0x8000_0000. If we look at the virt spec or the specification for the RISC-V HiFive Unleashed, this is the starting memory address for our code. Side note: There might be other boot ROMs at different addresses, but their job is to get to this point. Finally LENGTH = 128M tells the linker that we have 128 megabyte of RAM. The linker will double check this to make sure everything can fit. The HiFive Unleashed has a lot more RAM than this, but for the virtual machine, I went with 128M since I think that's enough RAM for now. We can provide other pieces of memory, such as QSPI, or ROM, but we're telling the linker script here that we have one pool of RAM. */ MEMORY { ram (wxa!ri) : ORIGIN = 0x80000000, LENGTH = 128M } /* PHDRS is short for "program headers", which we specify three here: text - CPU instructions (executable sections) data - Global, initialized variables bss - Global, uninitialized variables (all will be set to 0 by boot.S) The command PT_LOAD tells the linker that these sections will be loaded from the file into memory. We can actually stuff all of these into a single program header, but by splitting it up into three, we can actually use the other PT_* commands such as PT_DYNAMIC, PT_INTERP, PT_NULL to tell the linker where to find additional information. However, for our purposes, every section will be loaded from the program headers. */ PHDRS { text PT_LOAD; data PT_LOAD; bss PT_LOAD; } /* We are now going to organize the memory based on which section it is in. In assembly, we can change the section with the ".section" directive. However, in C++ and Rust, CPU instructions go into text, global constants go into rodata, global initialized variables go into data, and global uninitialized variables go into bss. */ SECTIONS { /* The first part of our RAM layout will be the text section. Since our CPU instructions are here, and our memory starts at 0x8000_0000, we need our entry point to line up here. */ .text : { /* PROVIDE allows me to access a symbol called _text_start so I know where the text section starts in the operating system. This should not move, but it is here for convenience. The period '.' tells the linker to set _text_start to the CURRENT location ('.' = current memory location). This current memory location moves as we add things. */ PROVIDE(_text_start = .); /* We are going to layout all text sections here, starting with .text.init. The asterisk in front of the parentheses means to match the .text.init section of ANY object file. Otherwise, we can specify which object file should contain the .text.init section, for example, boot.o(.text.init) would specifically put the .text.init section of our bootloader here. Because we might want to change the name of our files, we'll leave it with a *. Inside the parentheses is the name of the section. I created my own called .text.init to make 100% sure that the _start is put right at the beginning. The linker will lay this out in the order it receives it: .text.init first all .text sections next any .text.* sections last .text.* means to match anything after .text. If we didn't already specify .text.init, this would've matched here. The assembler and linker can place things in "special" text sections, so we match any we might come across here. */ *(.text.init) *(.text .text.*) /* Again, with PROVIDE, we're providing a readable symbol called _text_end, which is set to the memory address AFTER .text.init, .text, and .text.*'s have been added. */ PROVIDE(_text_end = .); /* The portion after the right brace is in an odd format. However, this is telling the linker what memory portion to put it in. We labeled our RAM, ram, with the constraints that it is writeable, allocatable, and executable. The linker will make sure with this that we can do all of those things. >ram - This just tells the linker script to put this entire section (.text) into the ram region of memory. To my knowledge, the '>' does not mean "greater than". Instead, it is a symbol to let the linker know we want to put this in ram. AT>ram - This sets the LMA (load memory address) region to the same thing. LMA is the final translation of a VMA (virtual memory address). With this linker script, we're loading everything into its physical location. We'll let the kernel copy and sort out the virtual memory. That's why >ram and AT>ram are continually the same thing. :text - This tells the linker script to put this into the :text program header. We've only defined three: text, data, and bss. In this case, we're telling the linker script to go into the text section. */ } >ram AT>ram :text /* The global pointer allows the linker to position global variables and constants into independent positions relative to the gp (global pointer) register. The globals start after the text sections and are only relevant to the rodata, data, and bss sections. */ PROVIDE(_global_pointer = .); /* Most compilers create a rodata (read only data) section for global constants. However, we're going to place ours in the text section. We can actually put this in :data, but since the .text section is read-only, we can place it there. NOTE: This doesn't actually do anything, yet. The actual "protection" cannot be done at link time. Instead, when we program the memory management unit (MMU), we will be able to choose which bits (R=read, W=write, X=execute) we want each memory segment to be able to do. */ .rodata : { PROVIDE(_rodata_start = .); *(.rodata .rodata.*) PROVIDE(_rodata_end = .); /* Again, we're placing the rodata section in the memory segment "ram" and we're putting it in the :text program header. We don't have one for rodata anyway. */ } >ram AT>ram :text .data : { /* . = ALIGN(4096) tells the linker to align the current memory location (which is 0x8000_0000 + text section + rodata section) to 4096 bytes. This is because our paging system's resolution is 4,096 bytes or 4 KiB. */ . = ALIGN(4096); PROVIDE(_data_start = .); /* sdata and data are essentially the same thing. However, compilers usually use the sdata sections for shorter, quicker loading sections. So, usually critical data is loaded there. However, we're loading all of this in one fell swoop. So, we're looking to put all of the following sections under the umbrella .data: .sdata .sdata.[anything] .data .data.[anything] ...in that order. */ *(.sdata .sdata.*) *(.data .data.*) PROVIDE(_data_end = .); } >ram AT>ram :data .bss : { PROVIDE(_bss_start = .); *(.sbss .sbss.*) *(.bss .bss.*) PROVIDE(_bss_end = .); } >ram AT>ram :bss /* The following will be helpful when we allocate the kernel stack (_stack) and determine where the heap begnis and ends (_heap_start and _heap_start + _heap_size)/ When we do memory allocation, we can use these symbols. We use the symbols instead of hard-coding an address because this is a floating target. As we add code, the heap moves farther down the memory and gets shorter. _memory_start will be set to 0x8000_0000 here. We use ORIGIN(ram) so that it will take whatever we set the origin of ram to. Otherwise, we'd have to change it more than once if we ever stray away from 0x8000_0000 as our entry point. */ PROVIDE(_memory_start = ORIGIN(ram)); /* Our kernel stack starts at the end of the bss segment (_bss_end). However, we're allocating 0x80000 bytes (524 KiB) to our kernel stack. This should be PLENTY of space. The reason we add the memory is because the stack grows from higher memory to lower memory (bottom to top). Therefore we set the stack at the very bottom of its allocated slot. When we go to allocate from the stack, we'll subtract the number of bytes we need. */ PROVIDE(_stack_start = _bss_end); PROVIDE(_stack_end = _stack_start + 0x8000); PROVIDE(_memory_end = ORIGIN(ram) + LENGTH(ram)); /* Finally, our heap starts right after the kernel stack. This heap will be used mainly to dole out memory for user-space applications. However, in some circumstances, it will be used for kernel memory as well. We don't align here because we let the kernel determine how it wants to do this. */ PROVIDE(_heap_start = _stack_end); PROVIDE(_heap_size = _memory_end - _heap_start); } ================================================ FILE: risc_v/chapters/ch7/src/lib.rs ================================================ // Steve Operating System // Stephen Marz // 21 Sep 2019 #![no_std] #![feature(panic_info_message, asm, allocator_api, alloc_error_handler, alloc_prelude, const_raw_ptr_to_usize_cast)] // #[macro_use] extern crate alloc; // This is experimental and requires alloc_prelude as a feature // use alloc::prelude::v1::*; // /////////////////////////////////// // / RUST MACROS // /////////////////////////////////// #[macro_export] macro_rules! print { ($($args:tt)+) => ({ use core::fmt::Write; let _ = write!(crate::uart::Uart::new(0x1000_0000), $($args)+); }); } #[macro_export] macro_rules! println { () => ({ print!("\r\n") }); ($fmt:expr) => ({ print!(concat!($fmt, "\r\n")) }); ($fmt:expr, $($args:tt)+) => ({ print!(concat!($fmt, "\r\n"), $($args)+) }); } // /////////////////////////////////// // / LANGUAGE STRUCTURES / FUNCTIONS // /////////////////////////////////// #[no_mangle] extern "C" fn eh_personality() {} #[panic_handler] fn panic(info: &core::panic::PanicInfo) -> ! { print!("Aborting: "); if let Some(p) = info.location() { println!( "line {}, file {}: {}", p.line(), p.file(), info.message().unwrap() ); } else { println!("no information available."); } abort(); } #[no_mangle] extern "C" fn abort() -> ! { loop { unsafe { asm!("wfi"::::"volatile"); } } } // /////////////////////////////////// // / CONSTANTS // /////////////////////////////////// // const STR_Y: &str = "\x1b[38;2;79;221;13m✓\x1b[m"; // const STR_N: &str = "\x1b[38;2;221;41;13m✘\x1b[m"; // The following symbols come from asm/mem.S. We can use // the symbols directly, but the address of the symbols // themselves are their values, which can cause issues. // Instead, I created doubleword values in mem.S in the .rodata and .data // sections. /* extern "C" { static TEXT_START: usize; static TEXT_END: usize; static DATA_START: usize; static DATA_END: usize; static RODATA_START: usize; static RODATA_END: usize; static BSS_START: usize; static BSS_END: usize; static KERNEL_STACK_START: usize; static KERNEL_STACK_END: usize; static HEAP_START: usize; static HEAP_SIZE: usize; } */ /// Identity map range /// Takes a contiguous allocation of memory and maps it using PAGE_SIZE /// This assumes that start <= end pub fn id_map_range(root: &mut page::Table, start: usize, end: usize, bits: i64) { let mut memaddr = start & !(page::PAGE_SIZE - 1); let num_kb_pages = (page::align_val(end, 12) - memaddr) / page::PAGE_SIZE; // I named this num_kb_pages for future expansion when // I decide to allow for GiB (2^30) and 2MiB (2^21) page // sizes. However, the overlapping memory regions are causing // nightmares. for _ in 0..num_kb_pages { page::map(root, memaddr, memaddr, bits, 0); memaddr += 1 << 12; } } // /////////////////////////////////// // / ENTRY POINT // /////////////////////////////////// #[no_mangle] extern "C" fn kinit() -> usize { uart::Uart::new(0x1000_0000).init(); page::init(); kmem::init(); let ret = process::init(); println!("Init process created at address 0x{:08x}", ret); // We lower the threshold wall so our interrupts can jump over it. plic::set_threshold(0); // VIRTIO = [1..8] // UART0 = 10 // PCIE = [32..35] // Enable the UART interrupt. plic::enable(10); plic::set_priority(10, 1); println!("UART interrupts have been enabled and are awaiting your command."); println!("Getting ready for first process."); println!("Issuing the first context-switch timer."); unsafe { let mtimecmp = 0x0200_4000 as *mut u64; let mtime = 0x0200_bff8 as *const u64; // The frequency given by QEMU is 10_000_000 Hz, so this sets // the next interrupt to fire one second from now. mtimecmp.write_volatile(mtime.read_volatile() + 10_000_000); } // When we return, we put the return value into mepc and start there. This // should be init's starting point. ret } #[no_mangle] extern "C" fn kinit_hart(hartid: usize) { // All non-0 harts initialize here. unsafe { // We have to store the kernel's table. The tables will be moved // back and forth between the kernel's table and user // applicatons' tables. cpu::mscratch_write( (&mut cpu::KERNEL_TRAP_FRAME[hartid] as *mut cpu::TrapFrame) as usize, ); // Copy the same mscratch over to the supervisor version of the // same register. cpu::sscratch_write(cpu::mscratch_read()); cpu::KERNEL_TRAP_FRAME[hartid].hartid = hartid; // We can't do the following until zalloc() is locked, but we // don't have locks, yet :( cpu::KERNEL_TRAP_FRAME[hartid].satp // = cpu::KERNEL_TRAP_FRAME[0].satp; // cpu::KERNEL_TRAP_FRAME[hartid].trap_stack = page::zalloc(1); } } // /////////////////////////////////// // / RUST MODULES // /////////////////////////////////// pub mod cpu; pub mod kmem; pub mod page; pub mod plic; pub mod process; pub mod syscall; pub mod trap; pub mod uart; ================================================ FILE: risc_v/chapters/ch7/src/page.rs ================================================ // page.rs // Memory routines // Stephen Marz // 6 October 2019 use core::{mem::size_of, ptr::null_mut}; // //////////////////////////////// // // Allocation routines // //////////////////////////////// extern "C" { static HEAP_START: usize; static HEAP_SIZE: usize; } // We will use ALLOC_START to mark the start of the actual // memory we can dish out. static mut ALLOC_START: usize = 0; const PAGE_ORDER: usize = 12; pub const PAGE_SIZE: usize = 1 << 12; /// Align (set to a multiple of some power of two) /// This takes an order which is the exponent to 2^order /// Therefore, all alignments must be made as a power of two. /// This function always rounds up. pub const fn align_val(val: usize, order: usize) -> usize { let o = (1usize << order) - 1; (val + o) & !o } #[repr(u8)] pub enum PageBits { Empty = 0, Taken = 1 << 0, Last = 1 << 1, } impl PageBits { // We convert PageBits to a u8 a lot, so this is // for convenience. pub fn val(self) -> u8 { self as u8 } } // Each page is described by the Page structure. Linux does this // as well, where each 4096-byte chunk of memory has a structure // associated with it. However, there structure is much larger. pub struct Page { flags: u8, } impl Page { // If this page has been marked as the final allocation, // this function returns true. Otherwise, it returns false. pub fn is_last(&self) -> bool { if self.flags & PageBits::Last.val() != 0 { true } else { false } } // If the page is marked as being taken (allocated), then // this function returns true. Otherwise, it returns false. pub fn is_taken(&self) -> bool { if self.flags & PageBits::Taken.val() != 0 { true } else { false } } // This is the opposite of is_taken(). pub fn is_free(&self) -> bool { !self.is_taken() } // Clear the Page structure and all associated allocations. pub fn clear(&mut self) { self.flags = PageBits::Empty.val(); } // Set a certain flag. We ran into trouble here since PageBits // is an enumeration and we haven't implemented the BitOr Trait // on it. pub fn set_flag(&mut self, flag: PageBits) { self.flags |= flag.val(); } pub fn clear_flag(&mut self, flag: PageBits) { self.flags &= !(flag.val()); } } /// Initialize the allocation system. There are several ways that we can /// implement the page allocator: /// 1. Free list (singly linked list where it starts at the first free /// allocation) 2. Bookkeeping list (structure contains a taken and length) /// 3. Allocate one Page structure per 4096 bytes (this is what I chose) /// 4. Others pub fn init() { unsafe { // let desc_per_page = PAGE_SIZE / size_of::(); let num_pages = HEAP_SIZE / PAGE_SIZE; // let num_desc_pages = num_pages / desc_per_page; let ptr = HEAP_START as *mut Page; // Clear all pages to make sure that they aren't accidentally // taken for i in 0..num_pages { (*ptr.add(i)).clear(); } // Determine where the actual useful memory starts. This will be // after all Page structures. We also must align the ALLOC_START // to a page-boundary (PAGE_SIZE = 4096). ALLOC_START = // (HEAP_START + num_pages * size_of::() + PAGE_SIZE - 1) // & !(PAGE_SIZE - 1); ALLOC_START = align_val( HEAP_START + num_pages * size_of::(), PAGE_ORDER, ); } } /// Allocate a page or multiple pages /// pages: the number of PAGE_SIZE pages to allocate pub fn alloc(pages: usize) -> *mut u8 { // We have to find a contiguous allocation of pages assert!(pages > 0); unsafe { // We create a Page structure for each page on the heap. We // actually might have more since HEAP_SIZE moves and so does // the size of our structure, but we'll only waste a few bytes. let num_pages = HEAP_SIZE / PAGE_SIZE; let ptr = HEAP_START as *mut Page; for i in 0..num_pages - pages { let mut found = false; // Check to see if this Page is free. If so, we have our // first candidate memory address. if (*ptr.add(i)).is_free() { // It was FREE! Yay! found = true; for j in i..i + pages { // Now check to see if we have a // contiguous allocation for all of the // request pages. If not, we should // check somewhere else. if (*ptr.add(j)).is_taken() { found = false; break; } } } // We've checked to see if there are enough contiguous // pages to form what we need. If we couldn't, found // will be false, otherwise it will be true, which means // we've found valid memory we can allocate. if found { for k in i..i + pages - 1 { (*ptr.add(k)).set_flag(PageBits::Taken); } // The marker for the last page is // PageBits::Last This lets us know when we've // hit the end of this particular allocation. (*ptr.add(i+pages-1)).set_flag(PageBits::Taken); (*ptr.add(i+pages-1)).set_flag(PageBits::Last); // The Page structures themselves aren't the // useful memory. Instead, there is 1 Page // structure per 4096 bytes starting at // ALLOC_START. return (ALLOC_START + PAGE_SIZE * i) as *mut u8; } } } // If we get here, that means that no contiguous allocation was // found. null_mut() } /// Allocate and zero a page or multiple pages /// pages: the number of pages to allocate /// Each page is PAGE_SIZE which is calculated as 1 << PAGE_ORDER /// On RISC-V, this typically will be 4,096 bytes. pub fn zalloc(pages: usize) -> *mut u8 { // Allocate and zero a page. // First, let's get the allocation let ret = alloc(pages); if !ret.is_null() { let size = (PAGE_SIZE * pages) / 8; let big_ptr = ret as *mut u64; for i in 0..size { // We use big_ptr so that we can force an // sd (store doubleword) instruction rather than // the sb. This means 8x fewer stores than before. // Typically we have to be concerned about remaining // bytes, but fortunately 4096 % 8 = 0, so we // won't have any remaining bytes. unsafe { (*big_ptr.add(i)) = 0; } } } ret } /// Deallocate a page by its pointer /// The way we've structured this, it will automatically coalesce /// contiguous pages. pub fn dealloc(ptr: *mut u8) { // Make sure we don't try to free a null pointer. assert!(!ptr.is_null()); unsafe { let addr = HEAP_START + (ptr as usize - ALLOC_START) / PAGE_SIZE; // Make sure that the address makes sense. The address we // calculate here is the page structure, not the HEAP address! assert!(addr >= HEAP_START && addr < HEAP_START + HEAP_SIZE); let mut p = addr as *mut Page; // Keep clearing pages until we hit the last page. while (*p).is_taken() && !(*p).is_last() { (*p).clear(); p = p.add(1); } // If the following assertion fails, it is most likely // caused by a double-free. assert!( (*p).is_last() == true, "Possible double-free detected! (Not taken found \ before last)" ); // If we get here, we've taken care of all previous pages and // we are on the last page. (*p).clear(); } } /// Print all page allocations /// This is mainly used for debugging. pub fn print_page_allocations() { unsafe { let num_pages = (HEAP_SIZE - (ALLOC_START - HEAP_START)) / PAGE_SIZE; let mut beg = HEAP_START as *const Page; let end = beg.add(num_pages); let alloc_beg = ALLOC_START; let alloc_end = ALLOC_START + num_pages * PAGE_SIZE; println!(); println!( "PAGE ALLOCATION TABLE\nMETA: {:p} -> {:p}\nPHYS: \ 0x{:x} -> 0x{:x}", beg, end, alloc_beg, alloc_end ); println!("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"); let mut num = 0; while beg < end { if (*beg).is_taken() { let start = beg as usize; let memaddr = ALLOC_START + (start - HEAP_START) * PAGE_SIZE; print!("0x{:x} => ", memaddr); loop { num += 1; if (*beg).is_last() { let end = beg as usize; let memaddr = ALLOC_START + (end - HEAP_START) * PAGE_SIZE + PAGE_SIZE - 1; print!( "0x{:x}: {:>3} page(s)", memaddr, (end - start + 1) ); println!("."); break; } beg = beg.add(1); } } beg = beg.add(1); } println!("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"); println!( "Allocated: {:>6} pages ({:>10} bytes).", num, num * PAGE_SIZE ); println!( "Free : {:>6} pages ({:>10} bytes).", num_pages - num, (num_pages - num) * PAGE_SIZE ); println!(); } } // //////////////////////////////// // // MMU Routines // //////////////////////////////// // Represent (repr) our entry bits as // unsigned 64-bit integers. #[repr(i64)] #[derive(Copy, Clone)] pub enum EntryBits { None = 0, Valid = 1 << 0, Read = 1 << 1, Write = 1 << 2, Execute = 1 << 3, User = 1 << 4, Global = 1 << 5, Access = 1 << 6, Dirty = 1 << 7, // Convenience combinations ReadWrite = 1 << 1 | 1 << 2, ReadExecute = 1 << 1 | 1 << 3, ReadWriteExecute = 1 << 1 | 1 << 2 | 1 << 3, // User Convenience Combinations UserReadWrite = 1 << 1 | 1 << 2 | 1 << 4, UserReadExecute = 1 << 1 | 1 << 3 | 1 << 4, UserReadWriteExecute = 1 << 1 | 1 << 2 | 1 << 3 | 1 << 4, } // Helper functions to convert the enumeration // into an i64, which is what our page table // entries will be. impl EntryBits { pub fn val(self) -> i64 { self as i64 } } // A single entry. We're using an i64 so that // this will sign-extend rather than zero-extend // since RISC-V requires that the reserved sections // take on the most significant bit. pub struct Entry { pub entry: i64, } // The Entry structure describes one of the 512 entries per table, which is // described in the RISC-V privileged spec Figure 4.18. impl Entry { pub fn is_valid(&self) -> bool { self.get_entry() & EntryBits::Valid.val() != 0 } // The first bit (bit index #0) is the V bit for // valid. pub fn is_invalid(&self) -> bool { !self.is_valid() } // A leaf has one or more RWX bits set pub fn is_leaf(&self) -> bool { self.get_entry() & 0xe != 0 } pub fn is_branch(&self) -> bool { !self.is_leaf() } pub fn set_entry(&mut self, entry: i64) { self.entry = entry; } pub fn get_entry(&self) -> i64 { self.entry } } // Table represents a single table, which contains 512 (2^9), 64-bit entries. pub struct Table { pub entries: [Entry; 512], } impl Table { pub fn len() -> usize { 512 } } /// Map a virtual address to a physical address using 4096-byte page /// size. /// root: a mutable reference to the root Table /// vaddr: The virtual address to map /// paddr: The physical address to map /// bits: An OR'd bitset containing the bits the leaf should have. /// The bits should contain only the following: /// Read, Write, Execute, User, and/or Global /// The bits MUST include one or more of the following: /// Read, Write, Execute /// The valid bit automatically gets added. pub fn map(root: &mut Table, vaddr: usize, paddr: usize, bits: i64, level: usize) { // Make sure that Read, Write, or Execute have been provided // otherwise, we'll leak memory and always create a page fault. assert!(bits & 0xe != 0); // Extract out each VPN from the virtual address // On the virtual address, each VPN is exactly 9 bits, // which is why we use the mask 0x1ff = 0b1_1111_1111 (9 bits) let vpn = [ // VPN[0] = vaddr[20:12] (vaddr >> 12) & 0x1ff, // VPN[1] = vaddr[29:21] (vaddr >> 21) & 0x1ff, // VPN[2] = vaddr[38:30] (vaddr >> 30) & 0x1ff, ]; // Just like the virtual address, extract the physical address // numbers (PPN). However, PPN[2] is different in that it stores // 26 bits instead of 9. Therefore, we use, // 0x3ff_ffff = 0b11_1111_1111_1111_1111_1111_1111 (26 bits). let ppn = [ // PPN[0] = paddr[20:12] (paddr >> 12) & 0x1ff, // PPN[1] = paddr[29:21] (paddr >> 21) & 0x1ff, // PPN[2] = paddr[55:30] (paddr >> 30) & 0x3ff_ffff, ]; // We will use this as a floating reference so that we can set // individual entries as we walk the table. let mut v = &mut root.entries[vpn[2]]; // Now, we're going to traverse the page table and set the bits // properly. We expect the root to be valid, however we're required to // create anything beyond the root. // In Rust, we create a range iterator using the .. operator. // The .rev() will reverse the iteration since we need to start with // VPN[2] The .. operator is inclusive on start but exclusive on end. // So, (0..2) will iterate 0 and 1. for i in (level..2).rev() { if !v.is_valid() { // Allocate a page let page = zalloc(1); // The page is already aligned by 4,096, so store it // directly The page is stored in the entry shifted // right by 2 places. v.set_entry( (page as i64 >> 2) | EntryBits::Valid.val(), ); } let entry = ((v.get_entry() & !0x3ff) << 2) as *mut Entry; v = unsafe { entry.add(vpn[i]).as_mut().unwrap() }; } // When we get here, we should be at VPN[0] and v should be pointing to // our entry. // The entry structure is Figure 4.18 in the RISC-V Privileged // Specification let entry = (ppn[2] << 28) as i64 | // PPN[2] = [53:28] (ppn[1] << 19) as i64 | // PPN[1] = [27:19] (ppn[0] << 10) as i64 | // PPN[0] = [18:10] bits | // Specified bits, such as User, Read, Write, etc EntryBits::Valid.val() | // Valid bit EntryBits::Dirty.val() | // Some machines require this to =1 EntryBits::Access.val() // Just like dirty, some machines require this ; // Set the entry. V should be set to the correct pointer by the loop // above. v.set_entry(entry); } /// Unmaps and frees all memory associated with a table. /// root: The root table to start freeing. /// NOTE: This does NOT free root directly. This must be /// freed manually. /// The reason we don't free the root is because it is /// usually embedded into the Process structure. pub fn unmap(root: &mut Table) { // Start with level 2 for lv2 in 0..Table::len() { let ref entry_lv2 = root.entries[lv2]; if entry_lv2.is_valid() && entry_lv2.is_branch() { // This is a valid entry, so drill down and free. let memaddr_lv1 = (entry_lv2.get_entry() & !0x3ff) << 2; let table_lv1 = unsafe { // Make table_lv1 a mutable reference instead of // a pointer. (memaddr_lv1 as *mut Table).as_mut().unwrap() }; for lv1 in 0..Table::len() { let ref entry_lv1 = table_lv1.entries[lv1]; if entry_lv1.is_valid() && entry_lv1.is_branch() { let memaddr_lv0 = (entry_lv1.get_entry() & !0x3ff) << 2; // The next level is level 0, which // cannot have branches, therefore, // we free here. dealloc(memaddr_lv0 as *mut u8); } } dealloc(memaddr_lv1 as *mut u8); } } } /// Walk the page table to convert a virtual address to a /// physical address. /// If a page fault would occur, this returns None /// Otherwise, it returns Some with the physical address. pub fn virt_to_phys(root: &Table, vaddr: usize) -> Option { // Walk the page table pointed to by root let vpn = [ // VPN[0] = vaddr[20:12] (vaddr >> 12) & 0x1ff, // VPN[1] = vaddr[29:21] (vaddr >> 21) & 0x1ff, // VPN[2] = vaddr[38:30] (vaddr >> 30) & 0x1ff, ]; let mut v = &root.entries[vpn[2]]; for i in (0..=2).rev() { if v.is_invalid() { // This is an invalid entry, page fault. break; } else if v.is_leaf() { // According to RISC-V, a leaf can be at any level. // The offset mask masks off the PPN. Each PPN is 9 // bits and they start at bit #12. So, our formula // 12 + i * 9 let off_mask = (1 << (12 + i * 9)) - 1; let vaddr_pgoff = vaddr & off_mask; let addr = ((v.get_entry() << 2) as usize) & !off_mask; return Some(addr | vaddr_pgoff); } // Set v to the next entry which is pointed to by this // entry. However, the address was shifted right by 2 places // when stored in the page table entry, so we shift it left // to get it back into place. let entry = ((v.get_entry() & !0x3ff) << 2) as *const Entry; // We do i - 1 here, however we should get None or Some() above // before we do 0 - 1 = -1. v = unsafe { entry.add(vpn[i - 1]).as_ref().unwrap() }; } // If we get here, we've exhausted all valid tables and haven't // found a leaf. None } ================================================ FILE: risc_v/chapters/ch7/src/plic.rs ================================================ // plic.rs // Platform Level Interrupt Controller (PLIC) // Stephen Marz // 1 Nov 2019 const PLIC_PRIORITY: usize = 0x0c00_0000; const PLIC_PENDING: usize = 0x0c00_1000; const PLIC_INT_ENABLE: usize = 0x0c00_2000; const PLIC_THRESHOLD: usize = 0x0c20_0000; const PLIC_CLAIM: usize = 0x0c20_0004; // Each register is 4-bytes (u32) // The PLIC is an external interrupt controller. The one // used by QEMU virt is the same as the SiFive PLIC. // https://sifive.cdn.prismic.io/sifive%2F834354f0-08e6-423c-bf1f-0cb58ef14061_fu540-c000-v1.0.pdf // Chapter 10 explains the priority, pending, interrupt enable, threshold and claims // The virt machine has the following external interrupts (from Qemu source): // Interrupt 0 is a "null" interrupt and is hardwired to 0. // VIRTIO = [1..8] // UART0 = 10 // PCIE = [32..35] /// Get the next available interrupt. This is the "claim" process. /// The plic will automatically sort by priority and hand us the /// ID of the interrupt. For example, if the UART is interrupting /// and it's next, we will get the value 10. pub fn next() -> Option { let claim_reg = PLIC_CLAIM as *const u32; let claim_no; // The claim register is filled with the highest-priority, enabled interrupt. unsafe { claim_no = claim_reg.read_volatile(); } if claim_no == 0 { // The interrupt 0 is hardwired to 0, which tells us that there is no // interrupt to claim, hence we return None. None } else { // If we get here, we've gotten a non-0 interrupt. Some(claim_no) } } /// Complete a pending interrupt by id. The id should come /// from the next() function above. pub fn complete(id: u32) { let complete_reg = PLIC_CLAIM as *mut u32; unsafe { // We actually write a u32 into the entire complete_register. // This is the same register as the claim register, but it can // differentiate based on whether we're reading or writing. complete_reg.write_volatile(id); } } /// Set the global threshold. The threshold can be a value [0..7]. /// The PLIC will mask any interrupts at or below the given threshold. /// This means that a threshold of 7 will mask ALL interrupts and /// a threshold of 0 will allow ALL interrupts. pub fn set_threshold(tsh: u8) { // We do tsh because we're using a u8, but our maximum number // is a 3-bit 0b111. So, we and with 7 (0b111) to just get the // last three bits. let actual_tsh = tsh & 7; let tsh_reg = PLIC_THRESHOLD as *mut u32; unsafe { tsh_reg.write_volatile(actual_tsh as u32); } } /// See if a given interrupt id is pending. pub fn is_pending(id: u32) -> bool { let pend = PLIC_PENDING as *const u32; let actual_id = 1 << id; let pend_ids; unsafe { pend_ids = pend.read_volatile(); } actual_id & pend_ids != 0 } /// Enable a given interrupt id pub fn enable(id: u32) { let enables = PLIC_INT_ENABLE as *mut u32; let actual_id = 1 << id; unsafe { // Unlike the complete and claim registers, the plic_int_enable // register is a bitset where the id is the bit index. The register // is a 32-bit register, so that gives us enables for interrupts // 31 through 1 (0 is hardwired to 0). enables.write_volatile(enables.read_volatile() | actual_id); } } /// Set a given interrupt priority to the given priority. /// The priority must be [0..7] pub fn set_priority(id: u32, prio: u8) { let actual_prio = prio as u32 & 7; let prio_reg = PLIC_PRIORITY as *mut u32; unsafe { // The offset for the interrupt id is: // PLIC_PRIORITY + 4 * id // Since we're using pointer arithmetic on a u32 type, // it will automatically multiply the id by 4. prio_reg.add(id as usize).write_volatile(actual_prio); } } ================================================ FILE: risc_v/chapters/ch7/src/process.rs ================================================ // process.rs // Kernel and user processes // Stephen Marz // 27 Nov 2019 use crate::{cpu::{build_satp, mscratch_write, satp_fence_asid, satp_write, SatpMode, TrapFrame}, page::{alloc, dealloc, map, unmap, zalloc, EntryBits, Table, PAGE_SIZE}}; use alloc::collections::vec_deque::VecDeque; // How many pages are we going to give a process for their // stack? const STACK_PAGES: usize = 2; // We want to adjust the stack to be at the bottom of the memory allocation // regardless of where it is on the kernel heap. const STACK_ADDR: usize = 0xf_0000_0000; // All processes will have a defined starting point in virtual memory. const PROCESS_STARTING_ADDR: usize = 0x2000_0000; // Here, we store a process list. It uses the global allocator // that we made before and its job is to store all processes. // We will have this list OWN the process. So, anytime we want // the process, we will consult the process list. // Using an Option here is one method of creating a "lazy static". // Rust requires that all statics be initialized, but all // initializations must be at compile-time. We cannot allocate // a VecDeque at compile time, so we are somewhat forced to // do this. pub static mut PROCESS_LIST: Option> = None; // We can search through the process list to get a new PID, but // it's probably easier and faster just to increase the pid: static mut NEXT_PID: u16 = 1; /// We will eventually move this function out of here, but its /// job is just to take a slot in the process list. fn init_process() { // We can't do much here until we have system calls because // we're running in User space. loop {} } /// Add a process given a function address and then /// push it onto the LinkedList. Uses Process::new_default /// to create a new stack, etc. pub fn add_process_default(pr: fn()) { unsafe { // This is the Rust-ism that really trips up C++ programmers. // PROCESS_LIST is wrapped in an Option<> enumeration, which // means that the Option owns the Deque. We can only borrow from // it or move ownership to us. In this case, we choose the // latter, where we move ownership to us, add a process, and // then move ownership back to the PROCESS_LIST. // This allows mutual exclusion as anyone else trying to grab // the process list will get None rather than the Deque. if let Some(mut pl) = PROCESS_LIST.take() { // .take() will replace PROCESS_LIST with None and give // us the only copy of the Deque. let p = Process::new_default(pr); pl.push_back(p); // Now, we no longer need the owned Deque, so we hand it // back by replacing the PROCESS_LIST's None with the // Some(pl). PROCESS_LIST.replace(pl); } // TODO: When we get to multi-hart processing, we need to keep // trying to grab the process list. We can do this with an // atomic instruction. but right now, we're a single-processor // computer. } } /// This should only be called once, and its job is to create /// the init process. Right now, this process is in the kernel, /// but later, it should call the shell. pub fn init() -> usize { unsafe { PROCESS_LIST = Some(VecDeque::with_capacity(5)); add_process_default(init_process); // Ugh....Rust is giving me fits over here! // I just want a memory address to the trap frame, but // due to the borrow rules of Rust, I'm fighting here. So, // instead, let's move the value out of PROCESS_LIST, get // the address, and then move it right back in. let pl = PROCESS_LIST.take().unwrap(); let p = pl.front().unwrap().frame; let frame = &p as *const TrapFrame as usize; mscratch_write(frame); satp_write(build_satp( SatpMode::Sv39, 1, pl.front().unwrap().root as usize, ),); // Synchronize PID 1. We use ASID as the PID. satp_fence_asid(1); // Put the process list back in the global. PROCESS_LIST.replace(pl); // Return the first instruction's address to execute. // Since we use the MMU, all start here. PROCESS_STARTING_ADDR } } // Our process must be able to sleep, wait, or run. // Running - means that when the scheduler finds this process, it can run it. // Sleeping - means that the process is waiting on a certain amount of time. // Waiting - means that the process is waiting on I/O // Dead - We should never get here, but we can flag a process as Dead and clean // it out of the list later. pub enum ProcessState { Running, Sleeping, Waiting, Dead, } // Let's represent this in C ABI. We do this // because we need to access some of the fields // in assembly. Rust gets to choose how it orders // the fields unless we represent the structure in // C-style ABI. #[repr(C)] pub struct Process { frame: TrapFrame, stack: *mut u8, program_counter: usize, pid: u16, root: *mut Table, state: ProcessState, data: ProcessData, } impl Process { pub fn new_default(func: fn()) -> Self { let func_addr = func as usize; // We will convert NEXT_PID below into an atomic increment when // we start getting into multi-hart processing. For now, we want // a process. Get it to work, then improve it! let mut ret_proc = Process { frame: TrapFrame::zero(), stack: alloc(STACK_PAGES), program_counter: PROCESS_STARTING_ADDR, pid: unsafe { NEXT_PID }, root: zalloc(1) as *mut Table, state: ProcessState::Waiting, data: ProcessData::zero(), }; unsafe { NEXT_PID += 1; } // Now we move the stack pointer to the bottom of the // allocation. The spec shows that register x2 (2) is the stack // pointer. // We could use ret_proc.stack.add, but that's an unsafe // function which would require an unsafe block. So, convert it // to usize first and then add PAGE_SIZE is better. // We also need to set the stack adjustment so that it is at the // bottom of the memory and far away from heap allocations. ret_proc.frame.regs[2] = STACK_ADDR + PAGE_SIZE * STACK_PAGES; // Map the stack on the MMU let pt; unsafe { pt = &mut *ret_proc.root; } let saddr = ret_proc.stack as usize; // We need to map the stack onto the user process' virtual // memory This gets a little hairy because we need to also map // the function code too. for i in 0..STACK_PAGES { let addr = i * PAGE_SIZE; map( pt, STACK_ADDR + addr, saddr + addr, EntryBits::UserReadWrite.val(), 0, ); } // Map the program counter on the MMU map( pt, PROCESS_STARTING_ADDR, func_addr, EntryBits::UserReadExecute.val(), 0, ); map( pt, PROCESS_STARTING_ADDR + 0x1001, func_addr + 0x1001, EntryBits::UserReadExecute.val(), 0, ); ret_proc } } impl Drop for Process { /// Since we're storing ownership of a Process in the linked list, /// we can cause it to deallocate automatically when it is removed. fn drop(&mut self) { // We allocate the stack as a page. dealloc(self.stack); // This is unsafe, but it's at the drop stage, so we won't // be using this again. unsafe { // Remember that unmap unmaps all levels of page tables // except for the root. It also deallocates the memory // associated with the tables. unmap(&mut *self.root); } dealloc(self.root as *mut u8); } } // The private data in a process contains information // that is relevant to where we are, including the path // and open file descriptors. pub struct ProcessData { cwd_path: [u8; 128], } // This is private data that we can query with system calls. // If we want to implement CFQ (completely fair queuing), which // is a per-process block queuing algorithm, we can put that here. impl ProcessData { pub fn zero() -> Self { ProcessData { cwd_path: [0; 128], } } } ================================================ FILE: risc_v/chapters/ch7/src/sched.rs ================================================ // sched.rs // Simple process scheduler // Stephen Marz // 27 Dec 2019 use crate::process::{PROCESS_LIST, Process}; use crate::cpu::mscratch_write; pub fn schedule() { } pub fn switch_to(from: &mut Process, to: &mut Process) { } ================================================ FILE: risc_v/chapters/ch7/src/syscall.rs ================================================ // syscall.rs // System calls // Stephen Marz // 3 Jan 2020 use crate::cpu::TrapFrame; pub fn do_syscall(mepc: usize, frame: *mut TrapFrame) -> usize { let syscall_number; unsafe { // A0 is X10, so it's register number 10. syscall_number = (*frame).regs[10]; } match syscall_number { 0 => { // Exit mepc + 4 }, _ => { print!("Unknown syscall number {}", syscall_number); mepc + 4 } } } ================================================ FILE: risc_v/chapters/ch7/src/trap.rs ================================================ // trap.rs // Trap routines // Stephen Marz // 10 October 2019 use crate::cpu::TrapFrame; use crate::{plic, uart}; use crate::syscall::do_syscall; #[no_mangle] /// The m_trap stands for "machine trap". Right now, we are handling /// all traps at machine mode. In this mode, we can figure out what's /// going on and send a trap where it needs to be. Remember, in machine /// mode and in this trap, interrupts are disabled and the MMU is off. extern "C" fn m_trap(epc: usize, tval: usize, cause: usize, hart: usize, status: usize, frame: *mut TrapFrame) -> usize { // We're going to handle all traps in machine mode. RISC-V lets // us delegate to supervisor mode, but switching out SATP (virtual memory) // gets hairy. let is_async = { if cause >> 63 & 1 == 1 { true } else { false } }; // The cause contains the type of trap (sync, async) as well as the cause // number. So, here we narrow down just the cause number. let cause_num = cause & 0xfff; let mut return_pc = epc; if is_async { // Asynchronous trap match cause_num { 3 => { // Machine software println!("Machine software interrupt CPU#{}", hart); }, 7 => unsafe { // This is the context-switch timer. // We would typically invoke the scheduler here to pick another // process to run. // Machine timer println!("CTX"); let mtimecmp = 0x0200_4000 as *mut u64; let mtime = 0x0200_bff8 as *const u64; // The frequency given by QEMU is 10_000_000 Hz, so this sets // the next interrupt to fire one second from now. // This is much too slow for normal operations, but it gives us // a visual of what's happening behind the scenes. mtimecmp.write_volatile(mtime.read_volatile() + 10_000_000); }, 11 => { // Machine external (interrupt from Platform Interrupt Controller (PLIC)) // println!("Machine external interrupt CPU#{}", hart); // We will check the next interrupt. If the interrupt isn't available, this will // give us None. However, that would mean we got a spurious interrupt, unless we // get an interrupt from a non-PLIC source. This is the main reason that the PLIC // hardwires the id 0 to 0, so that we can use it as an error case. if let Some(interrupt) = plic::next() { // If we get here, we've got an interrupt from the claim register. The PLIC will // automatically prioritize the next interrupt, so when we get it from claim, it // will be the next in priority order. match interrupt { 10 => { // Interrupt 10 is the UART interrupt. // We would typically set this to be handled out of the interrupt context, // but we're testing here! C'mon! // We haven't yet used the singleton pattern for my_uart, but remember, this // just simply wraps 0x1000_0000 (UART). let mut my_uart = uart::Uart::new(0x1000_0000); // If we get here, the UART better have something! If not, what happened?? if let Some(c) = my_uart.get() { // If you recognize this code, it used to be in the lib.rs under kmain(). That // was because we needed to poll for UART data. Now that we have interrupts, // here it goes! match c { 8 => { // This is a backspace, so we // essentially have to write a space and // backup again: print!("{} {}", 8 as char, 8 as char); }, 10 | 13 => { // Newline or carriage-return println!(); }, _ => { print!("{}", c as char); }, } } }, // Non-UART interrupts go here and do nothing. _ => { println!("Non-UART external interrupt: {}", interrupt); } } // We've claimed it, so now say that we've handled it. This resets the interrupt pending // and allows the UART to interrupt again. Otherwise, the UART will get "stuck". plic::complete(interrupt); } }, _ => { panic!("Unhandled async trap CPU#{} -> {}\n", hart, cause_num); } } } else { // Synchronous trap match cause_num { 2 => { // Illegal instruction panic!("Illegal instruction CPU#{} -> 0x{:08x}: 0x{:08x}\n", hart, epc, tval); }, 8 => { // Environment (system) call from User mode println!("E-call from User mode! CPU#{} -> 0x{:08x}", hart, epc); return_pc = do_syscall(return_pc, frame); }, 9 => { // Environment (system) call from Supervisor mode println!("E-call from Supervisor mode! CPU#{} -> 0x{:08x}", hart, epc); return_pc = do_syscall(return_pc, frame); }, 11 => { // Environment (system) call from Machine mode panic!("E-call from Machine mode! CPU#{} -> 0x{:08x}\n", hart, epc); }, // Page faults 12 => { // Instruction page fault println!("Instruction page fault CPU#{} -> 0x{:08x}: 0x{:08x}", hart, epc, tval); return_pc += 4; }, 13 => { // Load page fault println!("Load page fault CPU#{} -> 0x{:08x}: 0x{:08x}", hart, epc, tval); return_pc += 4; }, 15 => { // Store page fault println!("Store page fault CPU#{} -> 0x{:08x}: 0x{:08x}", hart, epc, tval); return_pc += 4; }, _ => { panic!("Unhandled sync trap CPU#{} -> {}\n", hart, cause_num); } } }; // Finally, return the updated program counter return_pc } ================================================ FILE: risc_v/chapters/ch7/src/uart.rs ================================================ // uart.rs // UART routines and driver use core::{convert::TryInto, fmt::{Error, Write}}; pub struct Uart { base_address: usize, } impl Write for Uart { fn write_str(&mut self, out: &str) -> Result<(), Error> { for c in out.bytes() { self.put(c); } Ok(()) } } impl Uart { pub fn new(base_address: usize) -> Self { Uart { base_address } } pub fn init(&mut self) { let ptr = self.base_address as *mut u8; unsafe { // First, set the word length, which // are bits 0 and 1 of the line control register (LCR) // which is at base_address + 3 // We can easily write the value 3 here or 0b11, but I'm // extending it so that it is clear we're setting two // individual fields // Word 0 Word 1 // ~~~~~~ ~~~~~~ let lcr: u8 = (1 << 0) | (1 << 1); ptr.add(3).write_volatile(lcr); // Now, enable the FIFO, which is bit index 0 of the // FIFO control register (FCR at offset 2). // Again, we can just write 1 here, but when we use left // shift, it's easier to see that we're trying to write // bit index #0. ptr.add(2).write_volatile(1 << 0); // Enable receiver buffer interrupts, which is at bit // index 0 of the interrupt enable register (IER at // offset 1). ptr.add(1).write_volatile(1 << 0); // If we cared about the divisor, the code below would // set the divisor from a global clock rate of 22.729 // MHz (22,729,000 cycles per second) to a signaling // rate of 2400 (BAUD). We usually have much faster // signalling rates nowadays, but this demonstrates what // the divisor actually does. The formula given in the // NS16500A specification for calculating the divisor // is: // divisor = ceil( (clock_hz) / (baud_sps x 16) ) // So, we substitute our values and get: // divisor = ceil( 22_729_000 / (2400 x 16) ) // divisor = ceil( 22_729_000 / 38_400 ) // divisor = ceil( 591.901 ) = 592 // The divisor register is two bytes (16 bits), so we // need to split the value 592 into two bytes. // Typically, we would calculate this based on measuring // the clock rate, but again, for our purposes [qemu], // this doesn't really do anything. let divisor: u16 = 592; let divisor_least: u8 = (divisor & 0xff).try_into().unwrap(); let divisor_most: u8 = (divisor >> 8).try_into().unwrap(); // Notice that the divisor register DLL (divisor latch // least) and DLM (divisor latch most) have the same // base address as the receiver/transmitter and the // interrupt enable register. To change what the base // address points to, we open the "divisor latch" by // writing 1 into the Divisor Latch Access Bit (DLAB), // which is bit index 7 of the Line Control Register // (LCR) which is at base_address + 3. ptr.add(3).write_volatile(lcr | 1 << 7); // Now, base addresses 0 and 1 point to DLL and DLM, // respectively. Put the lower 8 bits of the divisor // into DLL ptr.add(0).write_volatile(divisor_least); ptr.add(1).write_volatile(divisor_most); // Now that we've written the divisor, we never have to // touch this again. In hardware, this will divide the // global clock (22.729 MHz) into one suitable for 2,400 // signals per second. So, to once again get access to // the RBR/THR/IER registers, we need to close the DLAB // bit by clearing it to 0. ptr.add(3).write_volatile(lcr); } } pub fn put(&mut self, c: u8) { let ptr = self.base_address as *mut u8; unsafe { ptr.add(0).write_volatile(c); } } pub fn get(&mut self) -> Option { let ptr = self.base_address as *mut u8; unsafe { if ptr.add(5).read_volatile() & 1 == 0 { // The DR bit is 0, meaning no data None } else { // The DR bit is 1, meaning data! Some(ptr.add(0).read_volatile()) } } } } ================================================ FILE: risc_v/chapters/ch8/.cargo/config ================================================ [build] target = "riscv64gc-unknown-none-elf" [target.riscv64gc-unknown-none-elf] linker = "riscv64-unknown-linux-gnu-gcc" ================================================ FILE: risc_v/chapters/ch8/.gitignore ================================================ os.elf target/* ================================================ FILE: risc_v/chapters/ch8/Cargo.toml ================================================ [package] name = "sos" version = "0.1.0" authors = ["Stephen Marz "] edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [lib] crate-type = ["staticlib"] [dependencies] ================================================ FILE: risc_v/chapters/ch8/Makefile ================================================ ##### ## BUILD ##### CC=riscv64-unknown-linux-gnu-gcc CFLAGS=-Wall -Wextra -pedantic -Wextra -O0 -g CFLAGS+=-static -ffreestanding -nostdlib -fno-rtti -fno-exceptions CFLAGS+=-march=rv64gc -mabi=lp64 INCLUDES= LINKER_SCRIPT=-Tsrc/lds/virt.lds TYPE=debug RUST_TARGET=./target/riscv64gc-unknown-none-elf/$(TYPE) LIBS=-L$(RUST_TARGET) SOURCES_ASM=$(wildcard src/asm/*.S) LIB=-lsos -lgcc OUT=os.elf ##### ## QEMU ##### QEMU=qemu-system-riscv64 MACH=virt CPU=rv64 CPUS=4 MEM=128M DISK=hdd.dsk # DRIVE= -drive if=none,format=raw,file=$(DISK),id=foo -device virtio-blk-device,scsi=off,drive=foo DRIVE= all: cargo build $(CC) $(CFLAGS) $(LINKER_SCRIPT) $(INCLUDES) -o $(OUT) $(SOURCES_ASM) $(LIBS) $(LIB) run: all $(QEMU) -machine $(MACH) -cpu $(CPU) -smp $(CPUS) -m $(MEM) $(DRIVE) -nographic -serial mon:stdio -bios none -kernel $(OUT) .PHONY: clean clean: cargo clean rm -f $(OUT) ================================================ FILE: risc_v/chapters/ch8/make_hdd.sh ================================================ #!/bin/sh dd if=/dev/zero of=hdd.dsk bs=1M count=32 ================================================ FILE: risc_v/chapters/ch8/src/asm/boot.S ================================================ # boot.S # bootloader for SoS # Stephen Marz # 8 February 2019 # Disable generation of compressed instructions. .option norvc # Define a .text.init section. The .text.init is put at the # starting address so that the entry _start is put at the RISC-V # address 0x8000_0000. .section .text.init # Execution starts here. .global _start _start: # Disable linker instruction relaxation for the `la` instruction below. # This disallows the assembler from assuming that `gp` is already initialized. # This causes the value stored in `gp` to be calculated from `pc`. # The job of the global pointer is to give the linker the ability to address # memory relative to GP instead of as an absolute address. .option push .option norelax la gp, _global_pointer .option pop # SATP should be zero, but let's make sure. Each HART has its own # SATP register. csrw satp, zero # Any hardware threads (hart) that are not bootstrapping # need to wait for an IPI csrr t0, mhartid bnez t0, 3f # Set all bytes in the BSS section to zero. la a0, _bss_start la a1, _bss_end bgeu a0, a1, 2f 1: sd zero, (a0) addi a0, a0, 8 bltu a0, a1, 1b 2: # The stack grows from bottom to top, so we put the stack pointer # to the very end of the stack range. la sp, _stack_end # Setting `mstatus` register: # 0b01 << 11: Machine's previous protection mode is 2 (MPP=2). li t0, 0b11 << 11 csrw mstatus, t0 # Do not allow interrupts while running kinit csrw mie, zero # Machine's exception program counter (MEPC) is set to `kinit`. la t1, kinit csrw mepc, t1 # Set the return address to get us into supervisor mode la ra, 2f # We use mret here so that the mstatus register is properly updated. mret 2: # We set the return address (ra above) to this label. When kinit() is finished # in Rust, it will return here. # Setting `mstatus` (supervisor status) register: # 0b01 << 11 : Previous protection mode is 1 (MPP=01 [Supervisor]). # 1 << 7 : Previous machine interrupt-enable bit is 1 (MPIE=1 [Enabled]) # 1 << 5 : Previous interrupt-enable bit is 1 (SPIE=1 [Enabled]). # We set the "previous" bits because the mret will write the current bits # with the previous bits. li t0, (0b00 << 11) | (1 << 7) | (1 << 5) csrw mstatus, t0 # Machine's trap vector base address is set to `m_trap_vector`, for # "machine" trap vector. la t2, m_trap_vector csrw mtvec, t2 # Jump to first process. We put the MPP = 00 for user mode, so after # mret, we will jump to the first process' addresss in user mode. la ra, 4f mret 3: # Parked harts go here. We need to set these # to only awaken if it receives a software interrupt, # which we're going to call the SIPI (Software Intra-Processor Interrupt). # We call the SIPI by writing the software interrupt into the Core Local Interruptor (CLINT) # Which is calculated by: base_address + hart * 4 # where base address is 0x0200_0000 (MMIO CLINT base address) # We only use additional harts to run user-space programs, although this may # change. # We divide up the stack so the harts aren't clobbering one another. la sp, _stack_end li t0, 0x10000 csrr a0, mhartid mul t0, t0, a0 sub sp, sp, t0 # The parked harts will be put into machine mode with interrupts enabled. li t0, 0b11 << 11 | (1 << 7) csrw mstatus, t0 # Allow for MSIP (Software interrupt). We will write the MSIP from hart #0 to # awaken these parked harts. li t3, (1 << 3) csrw mie, t3 # Machine's exception program counter (MEPC) is set to the Rust initialization # code and waiting loop. la t1, kinit_hart csrw mepc, t1 # Machine's trap vector base address is set to `m_trap_vector`, for # "machine" trap vector. The Rust initialization routines will give each # hart its own trap frame. We can use the same trap function and distinguish # between each hart by looking at the trap frame. la t2, m_trap_vector csrw mtvec, t2 # Whenever our hart is done initializing, we want it to return to the waiting # loop, which is just below mret. la ra, 4f # We use mret here so that the mstatus register is properly updated. mret 4: # wfi = wait for interrupt. This is a hint to the harts to shut everything needed # down. However, the RISC-V specification allows for wfi to do nothing. Anyway, # with QEMU, this will save some CPU! wfi j 4b ================================================ FILE: risc_v/chapters/ch8/src/asm/mem.S ================================================ // mem.S // Importation of linker symbols .section .rodata .global HEAP_START HEAP_START: .dword _heap_start .global HEAP_SIZE HEAP_SIZE: .dword _heap_size .global TEXT_START TEXT_START: .dword _text_start .global TEXT_END TEXT_END: .dword _text_end .global DATA_START DATA_START: .dword _data_start .global DATA_END DATA_END: .dword _data_end .global RODATA_START RODATA_START: .dword _rodata_start .global RODATA_END RODATA_END: .dword _rodata_end .global BSS_START BSS_START: .dword _bss_start .global BSS_END BSS_END: .dword _bss_end .global KERNEL_STACK_START KERNEL_STACK_START: .dword _stack_start .global KERNEL_STACK_END KERNEL_STACK_END: .dword _stack_end ================================================ FILE: risc_v/chapters/ch8/src/asm/trap.S ================================================ # trap.S # Trap handler and global context # Steve Operating System # Stephen Marz # 24 February 2019 .option norvc .altmacro .set NUM_GP_REGS, 32 # Number of registers per context .set REG_SIZE, 8 # Register size (in bytes) # Use macros for saving and restoring multiple registers .macro save_gp i, basereg=t6 sd x\i, ((\i)*REG_SIZE)(\basereg) .endm .macro load_gp i, basereg=t6 ld x\i, ((\i)*REG_SIZE)(\basereg) .endm .macro save_fp i, basereg=t6 fsd f\i, ((NUM_GP_REGS+(\i))*REG_SIZE)(\basereg) .endm .macro load_fp i, basereg=t6 fld f\i, ((NUM_GP_REGS+(\i))*REG_SIZE)(\basereg) .endm .section .text .global m_trap_vector # This must be aligned by 4 since the last two bits # of the mtvec register do not contribute to the address # of this vector. .align 4 m_trap_vector: # All registers are volatile here, we need to save them # before we do anything. csrrw t6, mscratch, t6 # csrrw will atomically swap t6 into mscratch and the old # value of mscratch into t6. This is nice because we just # switched values and didn't destroy anything -- all atomically! # in cpu.rs we have a structure of: # 32 gp regs 0 # 32 fp regs 256 # SATP register 512 # Trap stack 520 # CPU HARTID 528 # We use t6 as the temporary register because it is the very # bottom register (x31) .set i, 0 .rept 31 save_gp %i .set i, i+1 .endr # Save the actual t6 register, which we swapped into # mscratch mv t5, t6 csrr t6, mscratch save_gp 31, t5 # Restore the kernel trap frame into mscratch csrw mscratch, t5 # Get ready to go into Rust (trap.rs) # We don't want to write into the user's stack or whomever # messed with us here. # csrw mie, zero csrr a0, mepc csrr a1, mtval csrr a2, mcause csrr a3, mhartid csrr a4, mstatus csrr a5, mscratch la t0, KERNEL_STACK_END ld sp, 0(t0) call m_trap # When we get here, we've returned from m_trap, restore registers # and return. # m_trap will return the return address via a0. csrw mepc, a0 # Now load the trap frame back into t6 csrr t6, mscratch # Restore all GP registers .set i, 1 .rept 31 load_gp %i .set i, i+1 .endr # Since we ran this loop 31 times starting with i = 1, # the last one loaded t6 back to its original value. mret .global switch_to_user switch_to_user: # a0 - Frame address # a1 - Program counter # a2 - SATP Register csrw mscratch, a0 # 1 << 7 is MPIE # Since user mode is 00, we don't need to set anything # in MPP (bits 12:11) li t0, 1 << 7 | 1 << 5 csrw mstatus, t0 csrw mepc, a1 csrw satp, a2 li t1, 0xaaa csrw mie, t1 la t2, m_trap_vector csrw mtvec, t2 # This fence forces the MMU to flush the TLB. However, since # we're using the PID as the address space identifier, we might # only need this when we create a process. Right now, this ensures # correctness, however it isn't the most efficient. sfence.vma # A0 is the context frame, so we need to reload it back # and mret so we can start running the program. mv t6, a0 .set i, 1 .rept 31 load_gp %i, t6 .set i, i+1 .endr # j . mret .global make_syscall make_syscall: ecall ret ================================================ FILE: risc_v/chapters/ch8/src/cpu.rs ================================================ // cpu.rs // CPU and CPU-related routines // Also contains the kernel's trap frame // Stephen Marz // 14 October 2019 use core::ptr::null_mut; /// In 64-bit mode, we're given three different modes for the MMU: /// 0 - The MMU is off -- no protection and no translation PA = VA /// 8 - This is Sv39 mode -- 39-bit virtual addresses /// 9 - This is Sv48 mode -- 48-bit virtual addresses #[repr(usize)] pub enum SatpMode { Off = 0, Sv39 = 8, Sv48 = 9, } /// The trap frame is set into a structure /// and packed into each hart's mscratch register. /// This allows for quick reference and full /// context switch handling. #[repr(C)] #[derive(Clone, Copy)] pub struct TrapFrame { pub regs: [usize; 32], // 0 - 255 pub fregs: [usize; 32], // 256 - 511 pub satp: usize, // 512 - 519 pub trap_stack: *mut u8, // 520 pub hartid: usize, // 528 } /// Rust requires that we initialize our structures /// because of the move semantics. What'll happen below /// is Rust will construct a new TrapFrame and move it /// out of the zero() function below. Rust contains two /// different "selfs" where self can refer to the object /// in memory or Self (capital S) which refers to the /// data type of the structure. In the case below, this /// is TrapFrame. impl TrapFrame { pub const fn zero() -> Self { TrapFrame { regs: [0; 32], fregs: [0; 32], satp: 0, trap_stack: null_mut(), hartid: 0, } } } /// The global kernel trap frame stores 8 separate /// frames -- one per CPU hart. We will switch these /// in and out and store a dormant trap frame with /// the process itself. pub static mut KERNEL_TRAP_FRAME: [TrapFrame; 8] = [TrapFrame::zero(); 8]; /// The SATP register contains three fields: mode, address space id, and /// the first level table address (level 2 for Sv39). This function /// helps make the 64-bit register contents based on those three /// fields. pub const fn build_satp(mode: SatpMode, asid: usize, addr: usize) -> usize { (mode as usize) << 60 | (asid & 0xffff) << 44 | (addr >> 12) & 0xff_ffff_ffff } pub fn mhartid_read() -> usize { unsafe { let rval; asm!("csrr $0, mhartid" :"=r"(rval)); rval } } pub fn mie_read() -> usize { unsafe { let rval; asm!("csrr $0, mie" :"=r"(rval)); rval } } pub fn mie_write(val: usize) { unsafe { asm!("csrw mie, $0" :: "r"(val)); } } pub fn mstatus_write(val: usize) { unsafe { asm!("csrw mstatus, $0" ::"r"(val)); } } pub fn mstatus_read() -> usize { unsafe { let rval; asm!("csrr $0, mstatus":"=r"(rval)); rval } } pub fn stvec_write(val: usize) { unsafe { asm!("csrw stvec, $0" ::"r"(val)); } } pub fn stvec_read() -> usize { unsafe { let rval; asm!("csrr $0, stvec" :"=r"(rval)); rval } } pub fn mscratch_write(val: usize) { unsafe { asm!("csrw mscratch, $0" ::"r"(val)); } } pub fn mscratch_read() -> usize { unsafe { let rval; asm!("csrr $0, mscratch" : "=r"(rval)); rval } } pub fn mscratch_swap(to: usize) -> usize { unsafe { let from; asm!("csrrw $0, mscratch, $1" : "=r"(from) : "r"(to)); from } } pub fn sscratch_write(val: usize) { unsafe { asm!("csrw sscratch, $0" ::"r"(val)); } } pub fn sscratch_read() -> usize { unsafe { let rval; asm!("csrr $0, sscratch" : "=r"(rval)); rval } } pub fn sscratch_swap(to: usize) -> usize { unsafe { let from; asm!("csrrw $0, sscratch, $1" : "=r"(from) : "r"(to)); from } } pub fn mepc_write(val: usize) { unsafe { asm!("csrw mepc, $0" :: "r"(val)); } } pub fn mepc_read() -> usize { unsafe { let rval; asm!("csrr $0, mepc" :"=r"(rval)); rval } } pub fn sepc_write(val: usize) { unsafe { asm!("csrw sepc, $0" :: "r"(val)); } } pub fn sepc_read() -> usize { unsafe { let rval; asm!("csrr $0, sepc" :"=r"(rval)); rval } } pub fn satp_write(val: usize) { unsafe { asm!("csrw satp, $0" :: "r"(val)); } } pub fn satp_read() -> usize { unsafe { let rval; asm!("csrr $0, satp" :"=r"(rval)); rval } } /// Take a hammer to the page tables and synchronize /// all of them. This essentially flushes the entire /// TLB. pub fn satp_fence(vaddr: usize, asid: usize) { unsafe { asm!("sfence.vma $0, $1" :: "r"(vaddr), "r"(asid)); } } /// Synchronize based on the address space identifier /// This allows us to fence a particular process rather /// than the entire TLB. /// The RISC-V documentation calls this a TLB flush +. /// Since there are other memory routines involved, they /// didn't call it a TLB flush, but it is much like /// Intel/AMD's invtlb [] instruction. pub fn satp_fence_asid(asid: usize) { unsafe { asm!("sfence.vma zero, $0" :: "r"(asid)); } } ================================================ FILE: risc_v/chapters/ch8/src/kmem.rs ================================================ // kmem.rs // Sub-page level: malloc-like allocation system // Stephen Marz // 7 October 2019 use crate::page::{align_val, zalloc, Table, PAGE_SIZE}; use core::{mem::size_of, ptr::null_mut}; #[repr(usize)] enum AllocListFlags { Taken = 1 << 63, } impl AllocListFlags { pub fn val(self) -> usize { self as usize } } struct AllocList { pub flags_size: usize, } impl AllocList { pub fn is_taken(&self) -> bool { self.flags_size & AllocListFlags::Taken.val() != 0 } pub fn is_free(&self) -> bool { !self.is_taken() } pub fn set_taken(&mut self) { self.flags_size |= AllocListFlags::Taken.val(); } pub fn set_free(&mut self) { self.flags_size &= !AllocListFlags::Taken.val(); } pub fn set_size(&mut self, sz: usize) { let k = self.is_taken(); self.flags_size = sz & !AllocListFlags::Taken.val(); if k { self.flags_size |= AllocListFlags::Taken.val(); } } pub fn get_size(&self) -> usize { self.flags_size & !AllocListFlags::Taken.val() } } // This is the head of the allocation. We start here when // we search for a free memory location. static mut KMEM_HEAD: *mut AllocList = null_mut(); // In the future, we will have on-demand pages // so, we need to keep track of our memory footprint to // see if we actually need to allocate more. static mut KMEM_ALLOC: usize = 0; static mut KMEM_PAGE_TABLE: *mut Table = null_mut(); // These functions are safe helpers around an unsafe // operation. pub fn get_head() -> *mut u8 { unsafe { KMEM_HEAD as *mut u8 } } pub fn get_page_table() -> *mut Table { unsafe { KMEM_PAGE_TABLE as *mut Table } } pub fn get_num_allocations() -> usize { unsafe { KMEM_ALLOC } } /// Initialize kernel's memory /// This is not to be used to allocate memory /// for user processes. If that's the case, use /// alloc/dealloc from the page crate. pub fn init() { unsafe { // Allocate kernel pages (KMEM_ALLOC) KMEM_ALLOC = 512; let k_alloc = zalloc(KMEM_ALLOC); assert!(!k_alloc.is_null()); KMEM_HEAD = k_alloc as *mut AllocList; (*KMEM_HEAD).set_free(); (*KMEM_HEAD).set_size(KMEM_ALLOC * PAGE_SIZE); KMEM_PAGE_TABLE = zalloc(1) as *mut Table; } } /// Allocate sub-page level allocation based on bytes and zero the memory pub fn kzmalloc(sz: usize) -> *mut u8 { let size = align_val(sz, 3); let ret = kmalloc(size); if !ret.is_null() { for i in 0..size { unsafe { (*ret.add(i)) = 0; } } } ret } /// Allocate sub-page level allocation based on bytes pub fn kmalloc(sz: usize) -> *mut u8 { unsafe { let size = align_val(sz, 3) + size_of::(); let mut head = KMEM_HEAD; // .add() uses pointer arithmetic, so we type-cast into a u8 // so that we multiply by an absolute size (KMEM_ALLOC * // PAGE_SIZE). let tail = (KMEM_HEAD as *mut u8).add(KMEM_ALLOC * PAGE_SIZE) as *mut AllocList; while head < tail { if (*head).is_free() && size <= (*head).get_size() { let chunk_size = (*head).get_size(); let rem = chunk_size - size; (*head).set_taken(); if rem > size_of::() { let next = (head as *mut u8).add(size) as *mut AllocList; // There is space remaining here. (*next).set_free(); (*next).set_size(rem); (*head).set_size(size); } else { // If we get here, take the entire chunk (*head).set_size(chunk_size); } return head.add(1) as *mut u8; } else { // If we get here, what we saw wasn't a free // chunk, move on to the next. head = (head as *mut u8).add((*head).get_size()) as *mut AllocList; } } } // If we get here, we didn't find any free chunks--i.e. there isn't // enough memory for this. TODO: Add on-demand page allocation. null_mut() } /// Free a sub-page level allocation pub fn kfree(ptr: *mut u8) { unsafe { if !ptr.is_null() { let p = (ptr as *mut AllocList).offset(-1); if (*p).is_taken() { (*p).set_free(); } // After we free, see if we can combine adjacent free // spots to see if we can reduce fragmentation. coalesce(); } } } /// Merge smaller chunks into a bigger chunk pub fn coalesce() { unsafe { let mut head = KMEM_HEAD; let tail = (KMEM_HEAD as *mut u8).add(KMEM_ALLOC * PAGE_SIZE) as *mut AllocList; while head < tail { let next = (head as *mut u8).add((*head).get_size()) as *mut AllocList; if (*head).get_size() == 0 { // If this happens, then we have a bad heap // (double free or something). However, that // will cause an infinite loop since the next // pointer will never move beyond the current // location. break; } else if next >= tail { // We calculated the next by using the size // given as get_size(), however this could push // us past the tail. In that case, the size is // wrong, hence we break and stop doing what we // need to do. break; } else if (*head).is_free() && (*next).is_free() { // This means we have adjacent blocks needing to // be freed. So, we combine them into one // allocation. (*head).set_size( (*head).get_size() + (*next).get_size(), ); } // If we get here, we might've moved. Recalculate new // head. head = (head as *mut u8).add((*head).get_size()) as *mut AllocList; } } } /// For debugging purposes, print the kmem table pub fn print_table() { unsafe { let mut head = KMEM_HEAD; let tail = (KMEM_HEAD as *mut u8).add(KMEM_ALLOC * PAGE_SIZE) as *mut AllocList; while head < tail { println!( "{:p}: Length = {:<10} Taken = {}", head, (*head).get_size(), (*head).is_taken() ); head = (head as *mut u8).add((*head).get_size()) as *mut AllocList; } } } // /////////////////////////////////// // / GLOBAL ALLOCATOR // /////////////////////////////////// // The global allocator allows us to use the data structures // in the core library, such as a linked list or B-tree. // We want to use these sparingly since we have a coarse-grained // allocator. use core::alloc::{GlobalAlloc, Layout}; // The global allocator is a static constant to a global allocator // structure. We don't need any members because we're using this // structure just to implement alloc and dealloc. struct OsGlobalAlloc; unsafe impl GlobalAlloc for OsGlobalAlloc { unsafe fn alloc(&self, layout: Layout) -> *mut u8 { // We align to the next page size so that when // we divide by PAGE_SIZE, we get exactly the number // of pages necessary. kzmalloc(layout.size()) } unsafe fn dealloc(&self, ptr: *mut u8, _layout: Layout) { // We ignore layout since our allocator uses ptr_start -> last // to determine the span of an allocation. kfree(ptr); } } #[global_allocator] /// Technically, we don't need the {} at the end, but it /// reveals that we're creating a new structure and not just /// copying a value. static GA: OsGlobalAlloc = OsGlobalAlloc {}; #[alloc_error_handler] /// If for some reason alloc() in the global allocator gets null_mut(), /// then we come here. This is a divergent function, so we call panic to /// let the tester know what's going on. pub fn alloc_error(l: Layout) -> ! { panic!( "Allocator failed to allocate {} bytes with {}-byte alignment.", l.size(), l.align() ); } ================================================ FILE: risc_v/chapters/ch8/src/lds/virt.lds ================================================ /* virt.lds Linker script for outputting to RISC-V QEMU "virt" machine. Stephen Marz 6 October 2019 */ /* riscv is the name of the architecture that the linker understands for any RISC-V target (64-bit or 32-bit). We will further refine this by using -mabi=lp64 and -march=rv64gc */ OUTPUT_ARCH( "riscv" ) /* We're setting our entry point to a symbol called _start which is inside of boot.S. This essentially stores the address of _start as the "entry point", or where CPU instructions should start executing. In the rest of this script, we are going to place _start right at the beginning of 0x8000_0000 because this is where the virtual machine and many RISC-V boards will start executing. */ ENTRY( _start ) /* The MEMORY section will explain that we have "ram" that contains a section that is 'w' (writeable), 'x' (executable), and 'a' (allocatable). We use '!' to invert 'r' (read-only) and 'i' (initialized). We don't want our memory to be read-only, and we're stating that it is NOT initialized at the beginning. The ORIGIN is the memory address 0x8000_0000. If we look at the virt spec or the specification for the RISC-V HiFive Unleashed, this is the starting memory address for our code. Side note: There might be other boot ROMs at different addresses, but their job is to get to this point. Finally LENGTH = 128M tells the linker that we have 128 megabyte of RAM. The linker will double check this to make sure everything can fit. The HiFive Unleashed has a lot more RAM than this, but for the virtual machine, I went with 128M since I think that's enough RAM for now. We can provide other pieces of memory, such as QSPI, or ROM, but we're telling the linker script here that we have one pool of RAM. */ MEMORY { ram (wxa!ri) : ORIGIN = 0x80000000, LENGTH = 128M } /* PHDRS is short for "program headers", which we specify three here: text - CPU instructions (executable sections) data - Global, initialized variables bss - Global, uninitialized variables (all will be set to 0 by boot.S) The command PT_LOAD tells the linker that these sections will be loaded from the file into memory. We can actually stuff all of these into a single program header, but by splitting it up into three, we can actually use the other PT_* commands such as PT_DYNAMIC, PT_INTERP, PT_NULL to tell the linker where to find additional information. However, for our purposes, every section will be loaded from the program headers. */ PHDRS { text PT_LOAD; data PT_LOAD; bss PT_LOAD; } /* We are now going to organize the memory based on which section it is in. In assembly, we can change the section with the ".section" directive. However, in C++ and Rust, CPU instructions go into text, global constants go into rodata, global initialized variables go into data, and global uninitialized variables go into bss. */ SECTIONS { /* The first part of our RAM layout will be the text section. Since our CPU instructions are here, and our memory starts at 0x8000_0000, we need our entry point to line up here. */ .text : { /* PROVIDE allows me to access a symbol called _text_start so I know where the text section starts in the operating system. This should not move, but it is here for convenience. The period '.' tells the linker to set _text_start to the CURRENT location ('.' = current memory location). This current memory location moves as we add things. */ PROVIDE(_text_start = .); /* We are going to layout all text sections here, starting with .text.init. The asterisk in front of the parentheses means to match the .text.init section of ANY object file. Otherwise, we can specify which object file should contain the .text.init section, for example, boot.o(.text.init) would specifically put the .text.init section of our bootloader here. Because we might want to change the name of our files, we'll leave it with a *. Inside the parentheses is the name of the section. I created my own called .text.init to make 100% sure that the _start is put right at the beginning. The linker will lay this out in the order it receives it: .text.init first all .text sections next any .text.* sections last .text.* means to match anything after .text. If we didn't already specify .text.init, this would've matched here. The assembler and linker can place things in "special" text sections, so we match any we might come across here. */ *(.text.init) *(.text .text.*) /* Again, with PROVIDE, we're providing a readable symbol called _text_end, which is set to the memory address AFTER .text.init, .text, and .text.*'s have been added. */ PROVIDE(_text_end = .); /* The portion after the right brace is in an odd format. However, this is telling the linker what memory portion to put it in. We labeled our RAM, ram, with the constraints that it is writeable, allocatable, and executable. The linker will make sure with this that we can do all of those things. >ram - This just tells the linker script to put this entire section (.text) into the ram region of memory. To my knowledge, the '>' does not mean "greater than". Instead, it is a symbol to let the linker know we want to put this in ram. AT>ram - This sets the LMA (load memory address) region to the same thing. LMA is the final translation of a VMA (virtual memory address). With this linker script, we're loading everything into its physical location. We'll let the kernel copy and sort out the virtual memory. That's why >ram and AT>ram are continually the same thing. :text - This tells the linker script to put this into the :text program header. We've only defined three: text, data, and bss. In this case, we're telling the linker script to go into the text section. */ } >ram AT>ram :text /* The global pointer allows the linker to position global variables and constants into independent positions relative to the gp (global pointer) register. The globals start after the text sections and are only relevant to the rodata, data, and bss sections. */ PROVIDE(_global_pointer = .); /* Most compilers create a rodata (read only data) section for global constants. However, we're going to place ours in the text section. We can actually put this in :data, but since the .text section is read-only, we can place it there. NOTE: This doesn't actually do anything, yet. The actual "protection" cannot be done at link time. Instead, when we program the memory management unit (MMU), we will be able to choose which bits (R=read, W=write, X=execute) we want each memory segment to be able to do. */ .rodata : { PROVIDE(_rodata_start = .); *(.rodata .rodata.*) PROVIDE(_rodata_end = .); /* Again, we're placing the rodata section in the memory segment "ram" and we're putting it in the :text program header. We don't have one for rodata anyway. */ } >ram AT>ram :text .data : { /* . = ALIGN(4096) tells the linker to align the current memory location (which is 0x8000_0000 + text section + rodata section) to 4096 bytes. This is because our paging system's resolution is 4,096 bytes or 4 KiB. */ . = ALIGN(4096); PROVIDE(_data_start = .); /* sdata and data are essentially the same thing. However, compilers usually use the sdata sections for shorter, quicker loading sections. So, usually critical data is loaded there. However, we're loading all of this in one fell swoop. So, we're looking to put all of the following sections under the umbrella .data: .sdata .sdata.[anything] .data .data.[anything] ...in that order. */ *(.sdata .sdata.*) *(.data .data.*) PROVIDE(_data_end = .); } >ram AT>ram :data .bss : { PROVIDE(_bss_start = .); *(.sbss .sbss.*) *(.bss .bss.*) PROVIDE(_bss_end = .); } >ram AT>ram :bss /* The following will be helpful when we allocate the kernel stack (_stack) and determine where the heap begnis and ends (_heap_start and _heap_start + _heap_size)/ When we do memory allocation, we can use these symbols. We use the symbols instead of hard-coding an address because this is a floating target. As we add code, the heap moves farther down the memory and gets shorter. _memory_start will be set to 0x8000_0000 here. We use ORIGIN(ram) so that it will take whatever we set the origin of ram to. Otherwise, we'd have to change it more than once if we ever stray away from 0x8000_0000 as our entry point. */ PROVIDE(_memory_start = ORIGIN(ram)); /* Our kernel stack starts at the end of the bss segment (_bss_end). However, we're allocating 0x80000 bytes (524 KiB) to our kernel stack. This should be PLENTY of space. The reason we add the memory is because the stack grows from higher memory to lower memory (bottom to top). Therefore we set the stack at the very bottom of its allocated slot. When we go to allocate from the stack, we'll subtract the number of bytes we need. */ PROVIDE(_stack_start = _bss_end); PROVIDE(_stack_end = _stack_start + 0x8000); PROVIDE(_memory_end = ORIGIN(ram) + LENGTH(ram)); /* Finally, our heap starts right after the kernel stack. This heap will be used mainly to dole out memory for user-space applications. However, in some circumstances, it will be used for kernel memory as well. We don't align here because we let the kernel determine how it wants to do this. */ PROVIDE(_heap_start = _stack_end); PROVIDE(_heap_size = _memory_end - _heap_start); } ================================================ FILE: risc_v/chapters/ch8/src/lib.rs ================================================ // Steve Operating System // Stephen Marz // 21 Sep 2019 #![no_std] #![feature(panic_info_message, asm, allocator_api, alloc_error_handler, alloc_prelude, const_raw_ptr_to_usize_cast)] // #[macro_use] extern crate alloc; // This is experimental and requires alloc_prelude as a feature // use alloc::prelude::v1::*; // /////////////////////////////////// // / RUST MACROS // /////////////////////////////////// #[macro_export] macro_rules! print { ($($args:tt)+) => ({ use core::fmt::Write; let _ = write!(crate::uart::Uart::new(0x1000_0000), $($args)+); }); } #[macro_export] macro_rules! println { () => ({ print!("\r\n") }); ($fmt:expr) => ({ print!(concat!($fmt, "\r\n")) }); ($fmt:expr, $($args:tt)+) => ({ print!(concat!($fmt, "\r\n"), $($args)+) }); } // /////////////////////////////////// // / LANGUAGE STRUCTURES / FUNCTIONS // /////////////////////////////////// #[no_mangle] extern "C" fn eh_personality() {} #[panic_handler] fn panic(info: &core::panic::PanicInfo) -> ! { print!("Aborting: "); if let Some(p) = info.location() { println!( "line {}, file {}: {}", p.line(), p.file(), info.message().unwrap() ); } else { println!("no information available."); } abort(); } #[no_mangle] extern "C" fn abort() -> ! { loop { unsafe { asm!("wfi"::::"volatile"); } } } // /////////////////////////////////// // / CONSTANTS // /////////////////////////////////// // const STR_Y: &str = "\x1b[38;2;79;221;13m✓\x1b[m"; // const STR_N: &str = "\x1b[38;2;221;41;13m✘\x1b[m"; // The following symbols come from asm/mem.S. We can use // the symbols directly, but the address of the symbols // themselves are their values, which can cause issues. // Instead, I created doubleword values in mem.S in the .rodata and .data // sections. /* extern "C" { static TEXT_START: usize; static TEXT_END: usize; static DATA_START: usize; static DATA_END: usize; static RODATA_START: usize; static RODATA_END: usize; static BSS_START: usize; static BSS_END: usize; static KERNEL_STACK_START: usize; static KERNEL_STACK_END: usize; static HEAP_START: usize; static HEAP_SIZE: usize; } */ /// Identity map range /// Takes a contiguous allocation of memory and maps it using PAGE_SIZE /// This assumes that start <= end pub fn id_map_range(root: &mut page::Table, start: usize, end: usize, bits: i64) { let mut memaddr = start & !(page::PAGE_SIZE - 1); let num_kb_pages = (page::align_val(end, 12) - memaddr) / page::PAGE_SIZE; // I named this num_kb_pages for future expansion when // I decide to allow for GiB (2^30) and 2MiB (2^21) page // sizes. However, the overlapping memory regions are causing // nightmares. for _ in 0..num_kb_pages { page::map(root, memaddr, memaddr, bits, 0); memaddr += 1 << 12; } } extern "C" { fn switch_to_user(frame: usize, mepc: usize, satp: usize) -> !; } // /////////////////////////////////// // / ENTRY POINT // /////////////////////////////////// #[no_mangle] extern "C" fn kinit() { uart::Uart::new(0x1000_0000).init(); page::init(); kmem::init(); let ret = process::init(); println!("Init process created at address 0x{:08x}", ret); // We lower the threshold wall so our interrupts can jump over it. plic::set_threshold(0); // VIRTIO = [1..8] // UART0 = 10 // PCIE = [32..35] // Enable the UART interrupt. plic::enable(10); plic::set_priority(10, 1); println!("UART interrupts have been enabled and are awaiting your command."); println!("Getting ready for first process."); println!("Issuing the first context-switch timer."); unsafe { let mtimecmp = 0x0200_4000 as *mut u64; let mtime = 0x0200_bff8 as *const u64; // The frequency given by QEMU is 10_000_000 Hz, so this sets // the next interrupt to fire one second from now. mtimecmp.write_volatile(mtime.read_volatile() + 1_000_000); } let (frame, mepc, satp) = sched::schedule(); unsafe { switch_to_user(frame, mepc, satp); } // switch_to_user will not return, so we should never get here println!("WE DIDN'T SCHEDULE?! THIS ISN'T RIGHT!"); } #[no_mangle] extern "C" fn kinit_hart(hartid: usize) { // All non-0 harts initialize here. unsafe { // We have to store the kernel's table. The tables will be moved // back and forth between the kernel's table and user // applicatons' tables. cpu::mscratch_write( (&mut cpu::KERNEL_TRAP_FRAME[hartid] as *mut cpu::TrapFrame) as usize, ); // Copy the same mscratch over to the supervisor version of the // same register. cpu::sscratch_write(cpu::mscratch_read()); cpu::KERNEL_TRAP_FRAME[hartid].hartid = hartid; // We can't do the following until zalloc() is locked, but we // don't have locks, yet :( cpu::KERNEL_TRAP_FRAME[hartid].satp // = cpu::KERNEL_TRAP_FRAME[0].satp; // cpu::KERNEL_TRAP_FRAME[hartid].trap_stack = page::zalloc(1); } } // /////////////////////////////////// // / RUST MODULES // /////////////////////////////////// pub mod cpu; pub mod kmem; pub mod page; pub mod plic; pub mod process; pub mod sched; pub mod syscall; pub mod trap; pub mod uart; ================================================ FILE: risc_v/chapters/ch8/src/page.rs ================================================ // page.rs // Memory routines // Stephen Marz // 6 October 2019 use core::{mem::size_of, ptr::null_mut}; // //////////////////////////////// // // Allocation routines // //////////////////////////////// extern "C" { static HEAP_START: usize; static HEAP_SIZE: usize; } // We will use ALLOC_START to mark the start of the actual // memory we can dish out. static mut ALLOC_START: usize = 0; const PAGE_ORDER: usize = 12; pub const PAGE_SIZE: usize = 1 << 12; /// Align (set to a multiple of some power of two) /// This takes an order which is the exponent to 2^order /// Therefore, all alignments must be made as a power of two. /// This function always rounds up. pub const fn align_val(val: usize, order: usize) -> usize { let o = (1usize << order) - 1; (val + o) & !o } #[repr(u8)] pub enum PageBits { Empty = 0, Taken = 1 << 0, Last = 1 << 1, } impl PageBits { // We convert PageBits to a u8 a lot, so this is // for convenience. pub fn val(self) -> u8 { self as u8 } } // Each page is described by the Page structure. Linux does this // as well, where each 4096-byte chunk of memory has a structure // associated with it. However, there structure is much larger. pub struct Page { flags: u8, } impl Page { // If this page has been marked as the final allocation, // this function returns true. Otherwise, it returns false. pub fn is_last(&self) -> bool { if self.flags & PageBits::Last.val() != 0 { true } else { false } } // If the page is marked as being taken (allocated), then // this function returns true. Otherwise, it returns false. pub fn is_taken(&self) -> bool { if self.flags & PageBits::Taken.val() != 0 { true } else { false } } // This is the opposite of is_taken(). pub fn is_free(&self) -> bool { !self.is_taken() } // Clear the Page structure and all associated allocations. pub fn clear(&mut self) { self.flags = PageBits::Empty.val(); } // Set a certain flag. We ran into trouble here since PageBits // is an enumeration and we haven't implemented the BitOr Trait // on it. pub fn set_flag(&mut self, flag: PageBits) { self.flags |= flag.val(); } pub fn clear_flag(&mut self, flag: PageBits) { self.flags &= !(flag.val()); } } /// Initialize the allocation system. There are several ways that we can /// implement the page allocator: /// 1. Free list (singly linked list where it starts at the first free /// allocation) 2. Bookkeeping list (structure contains a taken and length) /// 3. Allocate one Page structure per 4096 bytes (this is what I chose) /// 4. Others pub fn init() { unsafe { // let desc_per_page = PAGE_SIZE / size_of::(); let num_pages = HEAP_SIZE / PAGE_SIZE; // let num_desc_pages = num_pages / desc_per_page; let ptr = HEAP_START as *mut Page; // Clear all pages to make sure that they aren't accidentally // taken for i in 0..num_pages { (*ptr.add(i)).clear(); } // Determine where the actual useful memory starts. This will be // after all Page structures. We also must align the ALLOC_START // to a page-boundary (PAGE_SIZE = 4096). ALLOC_START = // (HEAP_START + num_pages * size_of::() + PAGE_SIZE - 1) // & !(PAGE_SIZE - 1); ALLOC_START = align_val( HEAP_START + num_pages * size_of::(), PAGE_ORDER, ); } } /// Allocate a page or multiple pages /// pages: the number of PAGE_SIZE pages to allocate pub fn alloc(pages: usize) -> *mut u8 { // We have to find a contiguous allocation of pages assert!(pages > 0); unsafe { // We create a Page structure for each page on the heap. We // actually might have more since HEAP_SIZE moves and so does // the size of our structure, but we'll only waste a few bytes. let num_pages = HEAP_SIZE / PAGE_SIZE; let ptr = HEAP_START as *mut Page; for i in 0..num_pages - pages { let mut found = false; // Check to see if this Page is free. If so, we have our // first candidate memory address. if (*ptr.add(i)).is_free() { // It was FREE! Yay! found = true; for j in i..i + pages { // Now check to see if we have a // contiguous allocation for all of the // request pages. If not, we should // check somewhere else. if (*ptr.add(j)).is_taken() { found = false; break; } } } // We've checked to see if there are enough contiguous // pages to form what we need. If we couldn't, found // will be false, otherwise it will be true, which means // we've found valid memory we can allocate. if found { for k in i..i + pages - 1 { (*ptr.add(k)).set_flag(PageBits::Taken); } // The marker for the last page is // PageBits::Last This lets us know when we've // hit the end of this particular allocation. (*ptr.add(i+pages-1)).set_flag(PageBits::Taken); (*ptr.add(i+pages-1)).set_flag(PageBits::Last); // The Page structures themselves aren't the // useful memory. Instead, there is 1 Page // structure per 4096 bytes starting at // ALLOC_START. return (ALLOC_START + PAGE_SIZE * i) as *mut u8; } } } // If we get here, that means that no contiguous allocation was // found. null_mut() } /// Allocate and zero a page or multiple pages /// pages: the number of pages to allocate /// Each page is PAGE_SIZE which is calculated as 1 << PAGE_ORDER /// On RISC-V, this typically will be 4,096 bytes. pub fn zalloc(pages: usize) -> *mut u8 { // Allocate and zero a page. // First, let's get the allocation let ret = alloc(pages); if !ret.is_null() { let size = (PAGE_SIZE * pages) / 8; let big_ptr = ret as *mut u64; for i in 0..size { // We use big_ptr so that we can force an // sd (store doubleword) instruction rather than // the sb. This means 8x fewer stores than before. // Typically we have to be concerned about remaining // bytes, but fortunately 4096 % 8 = 0, so we // won't have any remaining bytes. unsafe { (*big_ptr.add(i)) = 0; } } } ret } /// Deallocate a page by its pointer /// The way we've structured this, it will automatically coalesce /// contiguous pages. pub fn dealloc(ptr: *mut u8) { // Make sure we don't try to free a null pointer. assert!(!ptr.is_null()); unsafe { let addr = HEAP_START + (ptr as usize - ALLOC_START) / PAGE_SIZE; // Make sure that the address makes sense. The address we // calculate here is the page structure, not the HEAP address! assert!(addr >= HEAP_START && addr < HEAP_START + HEAP_SIZE); let mut p = addr as *mut Page; // Keep clearing pages until we hit the last page. while (*p).is_taken() && !(*p).is_last() { (*p).clear(); p = p.add(1); } // If the following assertion fails, it is most likely // caused by a double-free. assert!( (*p).is_last() == true, "Possible double-free detected! (Not taken found \ before last)" ); // If we get here, we've taken care of all previous pages and // we are on the last page. (*p).clear(); } } /// Print all page allocations /// This is mainly used for debugging. pub fn print_page_allocations() { unsafe { let num_pages = (HEAP_SIZE - (ALLOC_START - HEAP_START)) / PAGE_SIZE; let mut beg = HEAP_START as *const Page; let end = beg.add(num_pages); let alloc_beg = ALLOC_START; let alloc_end = ALLOC_START + num_pages * PAGE_SIZE; println!(); println!( "PAGE ALLOCATION TABLE\nMETA: {:p} -> {:p}\nPHYS: \ 0x{:x} -> 0x{:x}", beg, end, alloc_beg, alloc_end ); println!("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"); let mut num = 0; while beg < end { if (*beg).is_taken() { let start = beg as usize; let memaddr = ALLOC_START + (start - HEAP_START) * PAGE_SIZE; print!("0x{:x} => ", memaddr); loop { num += 1; if (*beg).is_last() { let end = beg as usize; let memaddr = ALLOC_START + (end - HEAP_START) * PAGE_SIZE + PAGE_SIZE - 1; print!( "0x{:x}: {:>3} page(s)", memaddr, (end - start + 1) ); println!("."); break; } beg = beg.add(1); } } beg = beg.add(1); } println!("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"); println!( "Allocated: {:>6} pages ({:>10} bytes).", num, num * PAGE_SIZE ); println!( "Free : {:>6} pages ({:>10} bytes).", num_pages - num, (num_pages - num) * PAGE_SIZE ); println!(); } } // //////////////////////////////// // // MMU Routines // //////////////////////////////// // Represent (repr) our entry bits as // unsigned 64-bit integers. #[repr(i64)] #[derive(Copy, Clone)] pub enum EntryBits { None = 0, Valid = 1 << 0, Read = 1 << 1, Write = 1 << 2, Execute = 1 << 3, User = 1 << 4, Global = 1 << 5, Access = 1 << 6, Dirty = 1 << 7, // Convenience combinations ReadWrite = 1 << 1 | 1 << 2, ReadExecute = 1 << 1 | 1 << 3, ReadWriteExecute = 1 << 1 | 1 << 2 | 1 << 3, // User Convenience Combinations UserReadWrite = 1 << 1 | 1 << 2 | 1 << 4, UserReadExecute = 1 << 1 | 1 << 3 | 1 << 4, UserReadWriteExecute = 1 << 1 | 1 << 2 | 1 << 3 | 1 << 4, } // Helper functions to convert the enumeration // into an i64, which is what our page table // entries will be. impl EntryBits { pub fn val(self) -> i64 { self as i64 } } // A single entry. We're using an i64 so that // this will sign-extend rather than zero-extend // since RISC-V requires that the reserved sections // take on the most significant bit. pub struct Entry { pub entry: i64, } // The Entry structure describes one of the 512 entries per table, which is // described in the RISC-V privileged spec Figure 4.18. impl Entry { pub fn is_valid(&self) -> bool { self.get_entry() & EntryBits::Valid.val() != 0 } // The first bit (bit index #0) is the V bit for // valid. pub fn is_invalid(&self) -> bool { !self.is_valid() } // A leaf has one or more RWX bits set pub fn is_leaf(&self) -> bool { self.get_entry() & 0xe != 0 } pub fn is_branch(&self) -> bool { !self.is_leaf() } pub fn set_entry(&mut self, entry: i64) { self.entry = entry; } pub fn get_entry(&self) -> i64 { self.entry } } // Table represents a single table, which contains 512 (2^9), 64-bit entries. pub struct Table { pub entries: [Entry; 512], } impl Table { pub fn len() -> usize { 512 } } /// Map a virtual address to a physical address using 4096-byte page /// size. /// root: a mutable reference to the root Table /// vaddr: The virtual address to map /// paddr: The physical address to map /// bits: An OR'd bitset containing the bits the leaf should have. /// The bits should contain only the following: /// Read, Write, Execute, User, and/or Global /// The bits MUST include one or more of the following: /// Read, Write, Execute /// The valid bit automatically gets added. pub fn map(root: &mut Table, vaddr: usize, paddr: usize, bits: i64, level: usize) { // Make sure that Read, Write, or Execute have been provided // otherwise, we'll leak memory and always create a page fault. assert!(bits & 0xe != 0); // Extract out each VPN from the virtual address // On the virtual address, each VPN is exactly 9 bits, // which is why we use the mask 0x1ff = 0b1_1111_1111 (9 bits) let vpn = [ // VPN[0] = vaddr[20:12] (vaddr >> 12) & 0x1ff, // VPN[1] = vaddr[29:21] (vaddr >> 21) & 0x1ff, // VPN[2] = vaddr[38:30] (vaddr >> 30) & 0x1ff, ]; // Just like the virtual address, extract the physical address // numbers (PPN). However, PPN[2] is different in that it stores // 26 bits instead of 9. Therefore, we use, // 0x3ff_ffff = 0b11_1111_1111_1111_1111_1111_1111 (26 bits). let ppn = [ // PPN[0] = paddr[20:12] (paddr >> 12) & 0x1ff, // PPN[1] = paddr[29:21] (paddr >> 21) & 0x1ff, // PPN[2] = paddr[55:30] (paddr >> 30) & 0x3ff_ffff, ]; // We will use this as a floating reference so that we can set // individual entries as we walk the table. let mut v = &mut root.entries[vpn[2]]; // Now, we're going to traverse the page table and set the bits // properly. We expect the root to be valid, however we're required to // create anything beyond the root. // In Rust, we create a range iterator using the .. operator. // The .rev() will reverse the iteration since we need to start with // VPN[2] The .. operator is inclusive on start but exclusive on end. // So, (0..2) will iterate 0 and 1. for i in (level..2).rev() { if !v.is_valid() { // Allocate a page let page = zalloc(1); // The page is already aligned by 4,096, so store it // directly The page is stored in the entry shifted // right by 2 places. v.set_entry( (page as i64 >> 2) | EntryBits::Valid.val(), ); } let entry = ((v.get_entry() & !0x3ff) << 2) as *mut Entry; v = unsafe { entry.add(vpn[i]).as_mut().unwrap() }; } // When we get here, we should be at VPN[0] and v should be pointing to // our entry. // The entry structure is Figure 4.18 in the RISC-V Privileged // Specification let entry = (ppn[2] << 28) as i64 | // PPN[2] = [53:28] (ppn[1] << 19) as i64 | // PPN[1] = [27:19] (ppn[0] << 10) as i64 | // PPN[0] = [18:10] bits | // Specified bits, such as User, Read, Write, etc EntryBits::Valid.val() | // Valid bit EntryBits::Dirty.val() | // Some machines require this to =1 EntryBits::Access.val() // Just like dirty, some machines require this ; // Set the entry. V should be set to the correct pointer by the loop // above. v.set_entry(entry); } /// Unmaps and frees all memory associated with a table. /// root: The root table to start freeing. /// NOTE: This does NOT free root directly. This must be /// freed manually. /// The reason we don't free the root is because it is /// usually embedded into the Process structure. pub fn unmap(root: &mut Table) { // Start with level 2 for lv2 in 0..Table::len() { let ref entry_lv2 = root.entries[lv2]; if entry_lv2.is_valid() && entry_lv2.is_branch() { // This is a valid entry, so drill down and free. let memaddr_lv1 = (entry_lv2.get_entry() & !0x3ff) << 2; let table_lv1 = unsafe { // Make table_lv1 a mutable reference instead of // a pointer. (memaddr_lv1 as *mut Table).as_mut().unwrap() }; for lv1 in 0..Table::len() { let ref entry_lv1 = table_lv1.entries[lv1]; if entry_lv1.is_valid() && entry_lv1.is_branch() { let memaddr_lv0 = (entry_lv1.get_entry() & !0x3ff) << 2; // The next level is level 0, which // cannot have branches, therefore, // we free here. dealloc(memaddr_lv0 as *mut u8); } } dealloc(memaddr_lv1 as *mut u8); } } } /// Walk the page table to convert a virtual address to a /// physical address. /// If a page fault would occur, this returns None /// Otherwise, it returns Some with the physical address. pub fn virt_to_phys(root: &Table, vaddr: usize) -> Option { // Walk the page table pointed to by root let vpn = [ // VPN[0] = vaddr[20:12] (vaddr >> 12) & 0x1ff, // VPN[1] = vaddr[29:21] (vaddr >> 21) & 0x1ff, // VPN[2] = vaddr[38:30] (vaddr >> 30) & 0x1ff, ]; let mut v = &root.entries[vpn[2]]; for i in (0..=2).rev() { if v.is_invalid() { // This is an invalid entry, page fault. break; } else if v.is_leaf() { // According to RISC-V, a leaf can be at any level. // The offset mask masks off the PPN. Each PPN is 9 // bits and they start at bit #12. So, our formula // 12 + i * 9 let off_mask = (1 << (12 + i * 9)) - 1; let vaddr_pgoff = vaddr & off_mask; let addr = ((v.get_entry() << 2) as usize) & !off_mask; return Some(addr | vaddr_pgoff); } // Set v to the next entry which is pointed to by this // entry. However, the address was shifted right by 2 places // when stored in the page table entry, so we shift it left // to get it back into place. let entry = ((v.get_entry() & !0x3ff) << 2) as *const Entry; // We do i - 1 here, however we should get None or Some() above // before we do 0 - 1 = -1. v = unsafe { entry.add(vpn[i - 1]).as_ref().unwrap() }; } // If we get here, we've exhausted all valid tables and haven't // found a leaf. None } ================================================ FILE: risc_v/chapters/ch8/src/plic.rs ================================================ // plic.rs // Platform Level Interrupt Controller (PLIC) // Stephen Marz // 1 Nov 2019 const PLIC_PRIORITY: usize = 0x0c00_0000; const PLIC_PENDING: usize = 0x0c00_1000; const PLIC_INT_ENABLE: usize = 0x0c00_2000; const PLIC_THRESHOLD: usize = 0x0c20_0000; const PLIC_CLAIM: usize = 0x0c20_0004; // Each register is 4-bytes (u32) // The PLIC is an external interrupt controller. The one // used by QEMU virt is the same as the SiFive PLIC. // https://sifive.cdn.prismic.io/sifive%2F834354f0-08e6-423c-bf1f-0cb58ef14061_fu540-c000-v1.0.pdf // Chapter 10 explains the priority, pending, interrupt enable, threshold and claims // The virt machine has the following external interrupts (from Qemu source): // Interrupt 0 is a "null" interrupt and is hardwired to 0. // VIRTIO = [1..8] // UART0 = 10 // PCIE = [32..35] /// Get the next available interrupt. This is the "claim" process. /// The plic will automatically sort by priority and hand us the /// ID of the interrupt. For example, if the UART is interrupting /// and it's next, we will get the value 10. pub fn next() -> Option { let claim_reg = PLIC_CLAIM as *const u32; let claim_no; // The claim register is filled with the highest-priority, enabled interrupt. unsafe { claim_no = claim_reg.read_volatile(); } if claim_no == 0 { // The interrupt 0 is hardwired to 0, which tells us that there is no // interrupt to claim, hence we return None. None } else { // If we get here, we've gotten a non-0 interrupt. Some(claim_no) } } /// Complete a pending interrupt by id. The id should come /// from the next() function above. pub fn complete(id: u32) { let complete_reg = PLIC_CLAIM as *mut u32; unsafe { // We actually write a u32 into the entire complete_register. // This is the same register as the claim register, but it can // differentiate based on whether we're reading or writing. complete_reg.write_volatile(id); } } /// Set the global threshold. The threshold can be a value [0..7]. /// The PLIC will mask any interrupts at or below the given threshold. /// This means that a threshold of 7 will mask ALL interrupts and /// a threshold of 0 will allow ALL interrupts. pub fn set_threshold(tsh: u8) { // We do tsh because we're using a u8, but our maximum number // is a 3-bit 0b111. So, we and with 7 (0b111) to just get the // last three bits. let actual_tsh = tsh & 7; let tsh_reg = PLIC_THRESHOLD as *mut u32; unsafe { tsh_reg.write_volatile(actual_tsh as u32); } } /// See if a given interrupt id is pending. pub fn is_pending(id: u32) -> bool { let pend = PLIC_PENDING as *const u32; let actual_id = 1 << id; let pend_ids; unsafe { pend_ids = pend.read_volatile(); } actual_id & pend_ids != 0 } /// Enable a given interrupt id pub fn enable(id: u32) { let enables = PLIC_INT_ENABLE as *mut u32; let actual_id = 1 << id; unsafe { // Unlike the complete and claim registers, the plic_int_enable // register is a bitset where the id is the bit index. The register // is a 32-bit register, so that gives us enables for interrupts // 31 through 1 (0 is hardwired to 0). enables.write_volatile(enables.read_volatile() | actual_id); } } /// Set a given interrupt priority to the given priority. /// The priority must be [0..7] pub fn set_priority(id: u32, prio: u8) { let actual_prio = prio as u32 & 7; let prio_reg = PLIC_PRIORITY as *mut u32; unsafe { // The offset for the interrupt id is: // PLIC_PRIORITY + 4 * id // Since we're using pointer arithmetic on a u32 type, // it will automatically multiply the id by 4. prio_reg.add(id as usize).write_volatile(actual_prio); } } ================================================ FILE: risc_v/chapters/ch8/src/process.rs ================================================ // process.rs // Kernel and user processes // Stephen Marz // 27 Nov 2019 use crate::{cpu::TrapFrame, page::{alloc, dealloc, map, unmap, zalloc, EntryBits, Table, PAGE_SIZE}}; use alloc::collections::vec_deque::VecDeque; // How many pages are we going to give a process for their // stack? const STACK_PAGES: usize = 2; // We want to adjust the stack to be at the bottom of the memory allocation // regardless of where it is on the kernel heap. const STACK_ADDR: usize = 0x1_0000_0000; // All processes will have a defined starting point in virtual memory. const PROCESS_STARTING_ADDR: usize = 0x8000_0000; // Here, we store a process list. It uses the global allocator // that we made before and its job is to store all processes. // We will have this list OWN the process. So, anytime we want // the process, we will consult the process list. // Using an Option here is one method of creating a "lazy static". // Rust requires that all statics be initialized, but all // initializations must be at compile-time. We cannot allocate // a VecDeque at compile time, so we are somewhat forced to // do this. pub static mut PROCESS_LIST: Option> = None; // We can search through the process list to get a new PID, but // it's probably easier and faster just to increase the pid: static mut NEXT_PID: u16 = 1; extern "C" { fn make_syscall(a: usize) -> usize; } /// We will eventually move this function out of here, but its /// job is just to take a slot in the process list. fn init_process() { // We can't do much here until we have system calls because // we're running in User space. let mut i: usize = 0; loop { i += 1; if i > 70_000_000 { unsafe { make_syscall(1); } i = 0; } } } /// Add a process given a function address and then /// push it onto the LinkedList. Uses Process::new_default /// to create a new stack, etc. pub fn add_process_default(pr: fn()) { unsafe { // This is the Rust-ism that really trips up C++ programmers. // PROCESS_LIST is wrapped in an Option<> enumeration, which // means that the Option owns the Deque. We can only borrow from // it or move ownership to us. In this case, we choose the // latter, where we move ownership to us, add a process, and // then move ownership back to the PROCESS_LIST. // This allows mutual exclusion as anyone else trying to grab // the process list will get None rather than the Deque. if let Some(mut pl) = PROCESS_LIST.take() { // .take() will replace PROCESS_LIST with None and give // us the only copy of the Deque. let p = Process::new_default(pr); pl.push_back(p); // Now, we no longer need the owned Deque, so we hand it // back by replacing the PROCESS_LIST's None with the // Some(pl). PROCESS_LIST.replace(pl); } // TODO: When we get to multi-hart processing, we need to keep // trying to grab the process list. We can do this with an // atomic instruction. but right now, we're a single-processor // computer. } } /// This should only be called once, and its job is to create /// the init process. Right now, this process is in the kernel, /// but later, it should call the shell. pub fn init() -> usize { unsafe { PROCESS_LIST = Some(VecDeque::with_capacity(15)); add_process_default(init_process); // Ugh....Rust is giving me fits over here! // I just want a memory address to the trap frame, but // due to the borrow rules of Rust, I'm fighting here. So, // instead, let's move the value out of PROCESS_LIST, get // the address, and then move it right back in. let pl = PROCESS_LIST.take().unwrap(); let p = pl.front().unwrap().frame; let func_vaddr = pl.front().unwrap().program_counter; let frame = p as *const TrapFrame as usize; println!("Init's frame is at 0x{:08x}", frame); // Put the process list back in the global. PROCESS_LIST.replace(pl); // Return the first instruction's address to execute. // Since we use the MMU, all start here. func_vaddr } } // Our process must be able to sleep, wait, or run. // Running - means that when the scheduler finds this process, it can run it. // Sleeping - means that the process is waiting on a certain amount of time. // Waiting - means that the process is waiting on I/O // Dead - We should never get here, but we can flag a process as Dead and clean // it out of the list later. #[repr(u8)] pub enum ProcessState { Running, Sleeping, Waiting, Dead, } // Let's represent this in C ABI. We do this // because we need to access some of the fields // in assembly. Rust gets to choose how it orders // the fields unless we represent the structure in // C-style ABI. #[repr(C)] pub struct Process { frame: *mut TrapFrame, stack: *mut u8, program_counter: usize, pid: u16, root: *mut Table, state: ProcessState, data: ProcessData, sleep_until: usize, } impl Process { pub fn get_frame_address(&self) -> usize { self.frame as usize } pub fn get_program_counter(&self) -> usize { self.program_counter } pub fn get_table_address(&self) -> usize { self.root as usize } pub fn get_state(&self) -> &ProcessState { &self.state } pub fn get_pid(&self) -> u16 { self.pid } pub fn get_sleep_until(&self) -> usize { self.sleep_until } pub fn new_default(func: fn()) -> Self { let func_addr = func as usize; let func_vaddr = func_addr; //- 0x6000_0000; // println!("func_addr = {:x} -> {:x}", func_addr, func_vaddr); // We will convert NEXT_PID below into an atomic increment when // we start getting into multi-hart processing. For now, we want // a process. Get it to work, then improve it! let mut ret_proc = Process { frame: zalloc(1) as *mut TrapFrame, stack: alloc(STACK_PAGES), program_counter: func_vaddr, pid: unsafe { NEXT_PID }, root: zalloc(1) as *mut Table, state: ProcessState::Running, data: ProcessData::zero(), sleep_until: 0 }; unsafe { NEXT_PID += 1; } // Now we move the stack pointer to the bottom of the // allocation. The spec shows that register x2 (2) is the stack // pointer. // We could use ret_proc.stack.add, but that's an unsafe // function which would require an unsafe block. So, convert it // to usize first and then add PAGE_SIZE is better. // We also need to set the stack adjustment so that it is at the // bottom of the memory and far away from heap allocations. let saddr = ret_proc.stack as usize; unsafe { (*ret_proc.frame).regs[2] = STACK_ADDR + PAGE_SIZE * STACK_PAGES; } // Map the stack on the MMU let pt; unsafe { pt = &mut *ret_proc.root; } // We need to map the stack onto the user process' virtual // memory This gets a little hairy because we need to also map // the function code too. for i in 0..STACK_PAGES { let addr = i * PAGE_SIZE; map( pt, STACK_ADDR + addr, saddr + addr, EntryBits::UserReadWrite.val(), 0, ); println!("Set stack from 0x{:016x} -> 0x{:016x}", STACK_ADDR + addr, saddr + addr); } // Map the program counter on the MMU and other bits for i in 0..=100 { let modifier = i * 0x1000; map( pt, func_vaddr + modifier, func_addr + modifier, EntryBits::UserReadWriteExecute.val(), 0, ); } // This is the make_syscall function // The reason we need this is because we're running a process // that is inside of the kernel. When we start loading from a block // devices, we can load the instructions anywhere in memory. map(pt, 0x8000_0000, 0x8000_0000, EntryBits::UserReadExecute.val(), 0); ret_proc } } impl Drop for Process { /// Since we're storing ownership of a Process in the linked list, /// we can cause it to deallocate automatically when it is removed. fn drop(&mut self) { // We allocate the stack as a page. dealloc(self.stack); // This is unsafe, but it's at the drop stage, so we won't // be using this again. unsafe { // Remember that unmap unmaps all levels of page tables // except for the root. It also deallocates the memory // associated with the tables. unmap(&mut *self.root); } dealloc(self.root as *mut u8); } } // The private data in a process contains information // that is relevant to where we are, including the path // and open file descriptors. pub struct ProcessData { cwd_path: [u8; 128], } // This is private data that we can query with system calls. // If we want to implement CFQ (completely fair queuing), which // is a per-process block queuing algorithm, we can put that here. impl ProcessData { pub fn zero() -> Self { ProcessData { cwd_path: [0; 128], } } } ================================================ FILE: risc_v/chapters/ch8/src/sched.rs ================================================ // sched.rs // Simple process scheduler // Stephen Marz // 27 Dec 2019 use crate::{process::{ProcessState, PROCESS_LIST}}; pub fn schedule() -> (usize, usize, usize) { unsafe { if let Some(mut pl) = PROCESS_LIST.take() { pl.rotate_left(1); let mut frame_addr: usize = 0; let mut mepc: usize = 0; let mut satp: usize = 0; let mut pid: usize = 0; if let Some(prc) = pl.front() { match prc.get_state() { ProcessState::Running => { frame_addr = prc.get_frame_address(); mepc = prc.get_program_counter(); satp = prc.get_table_address() >> 12; pid = prc.get_pid() as usize; }, ProcessState::Sleeping => { }, _ => {}, } } println!("Scheduling {}", pid); PROCESS_LIST.replace(pl); if frame_addr != 0 { // MODE 8 is 39-bit virtual address MMU // I'm using the PID as the address space identifier to hopefully // help with (not?) flushing the TLB whenever we switch processes. if satp != 0 { return (frame_addr, mepc, (8 << 60) | (pid << 44) | satp); } else { return (frame_addr, mepc, 0); } } } } (0, 0, 0) } ================================================ FILE: risc_v/chapters/ch8/src/syscall.rs ================================================ // syscall.rs // System calls // Stephen Marz // 3 Jan 2020 use crate::cpu::TrapFrame; pub fn do_syscall(mepc: usize, frame: *mut TrapFrame) -> usize { let syscall_number; unsafe { // A0 is X10, so it's register number 10. syscall_number = (*frame).regs[10]; // for i in 0..32 { // print!("regs[{:02}] = 0x{:08x} ", i, (*frame).regs[i]); // if (i+1) % 4 == 0 { // println!(); // } // } } match syscall_number { 0 => { // Exit // Currently, we cannot kill a process, it runs forever. We will delete // the process later and free the resources, but for now, we want to get // used to how processes will be scheduled on the CPU. mepc + 4 }, 1 => { println!("Test syscall"); mepc + 4 }, _ => { println!("Unknown syscall number {}", syscall_number); mepc + 4 } } } ================================================ FILE: risc_v/chapters/ch8/src/trap.rs ================================================ // trap.rs // Trap routines // Stephen Marz // 10 October 2019 use crate::cpu::TrapFrame; use crate::{plic, uart}; use crate::syscall::do_syscall; use crate::sched::schedule; extern "C" { fn switch_to_user(frame: usize, mepc: usize, satp: usize) -> !; } #[no_mangle] /// The m_trap stands for "machine trap". Right now, we are handling /// all traps at machine mode. In this mode, we can figure out what's /// going on and send a trap where it needs to be. Remember, in machine /// mode and in this trap, interrupts are disabled and the MMU is off. extern "C" fn m_trap(epc: usize, tval: usize, cause: usize, hart: usize, status: usize, frame: *mut TrapFrame) -> usize { // We're going to handle all traps in machine mode. RISC-V lets // us delegate to supervisor mode, but switching out SATP (virtual memory) // gets hairy. let is_async = { if cause >> 63 & 1 == 1 { true } else { false } }; // The cause contains the type of trap (sync, async) as well as the cause // number. So, here we narrow down just the cause number. let cause_num = cause & 0xfff; let mut return_pc = epc; if is_async { // Asynchronous trap match cause_num { 3 => { // Machine software println!("Machine software interrupt CPU#{}", hart); }, 7 => unsafe { // This is the context-switch timer. // We would typically invoke the scheduler here to pick another // process to run. // Machine timer // println!("CTX"); let (frame, mepc, satp) = schedule(); let mtimecmp = 0x0200_4000 as *mut u64; let mtime = 0x0200_bff8 as *const u64; // The frequency given by QEMU is 10_000_000 Hz, so this sets // the next interrupt to fire one second from now. // This is much too slow for normal operations, but it gives us // a visual of what's happening behind the scenes. mtimecmp.write_volatile(mtime.read_volatile() + 10_000_000); unsafe { switch_to_user(frame, mepc, satp); } }, 11 => { // Machine external (interrupt from Platform Interrupt Controller (PLIC)) // println!("Machine external interrupt CPU#{}", hart); // We will check the next interrupt. If the interrupt isn't available, this will // give us None. However, that would mean we got a spurious interrupt, unless we // get an interrupt from a non-PLIC source. This is the main reason that the PLIC // hardwires the id 0 to 0, so that we can use it as an error case. if let Some(interrupt) = plic::next() { // If we get here, we've got an interrupt from the claim register. The PLIC will // automatically prioritize the next interrupt, so when we get it from claim, it // will be the next in priority order. match interrupt { 10 => { // Interrupt 10 is the UART interrupt. // We would typically set this to be handled out of the interrupt context, // but we're testing here! C'mon! // We haven't yet used the singleton pattern for my_uart, but remember, this // just simply wraps 0x1000_0000 (UART). let mut my_uart = uart::Uart::new(0x1000_0000); // If we get here, the UART better have something! If not, what happened?? if let Some(c) = my_uart.get() { // If you recognize this code, it used to be in the lib.rs under kmain(). That // was because we needed to poll for UART data. Now that we have interrupts, // here it goes! match c { 8 => { // This is a backspace, so we // essentially have to write a space and // backup again: print!("{} {}", 8 as char, 8 as char); }, 10 | 13 => { // Newline or carriage-return println!(); }, _ => { print!("{}", c as char); }, } } }, // Non-UART interrupts go here and do nothing. _ => { println!("Non-UART external interrupt: {}", interrupt); } } // We've claimed it, so now say that we've handled it. This resets the interrupt pending // and allows the UART to interrupt again. Otherwise, the UART will get "stuck". plic::complete(interrupt); } }, _ => { panic!("Unhandled async trap CPU#{} -> {}\n", hart, cause_num); } } } else { // Synchronous trap match cause_num { 2 => { // Illegal instruction panic!("Illegal instruction CPU#{} -> 0x{:08x}: 0x{:08x}\n", hart, epc, tval); // We need while trues here until we have a functioning "delete from scheduler" while true {} }, 8 => { // Environment (system) call from User mode // println!("E-call from User mode! CPU#{} -> 0x{:08x}", hart, epc); return_pc = do_syscall(return_pc, frame); }, 9 => { // Environment (system) call from Supervisor mode println!("E-call from Supervisor mode! CPU#{} -> 0x{:08x}", hart, epc); return_pc = do_syscall(return_pc, frame); }, 11 => { // Environment (system) call from Machine mode panic!("E-call from Machine mode! CPU#{} -> 0x{:08x}\n", hart, epc); }, // Page faults 12 => { // Instruction page fault println!("Instruction page fault CPU#{} -> 0x{:08x}: 0x{:08x}", hart, epc, tval); // We need while trues here until we have a functioning "delete from scheduler" while true {} return_pc += 4; }, 13 => { // Load page fault println!("Load page fault CPU#{} -> 0x{:08x}: 0x{:08x}", hart, epc, tval); // We need while trues here until we have a functioning "delete from scheduler" while true {} return_pc += 4; }, 15 => { // Store page fault println!("Store page fault CPU#{} -> 0x{:08x}: 0x{:08x}", hart, epc, tval); // We need while trues here until we have a functioning "delete from scheduler" while true {} return_pc += 4; }, _ => { panic!("Unhandled sync trap CPU#{} -> {}\n", hart, cause_num); } } }; // Finally, return the updated program counter return_pc } ================================================ FILE: risc_v/chapters/ch8/src/uart.rs ================================================ // uart.rs // UART routines and driver use core::{convert::TryInto, fmt::{Error, Write}}; pub struct Uart { base_address: usize, } impl Write for Uart { fn write_str(&mut self, out: &str) -> Result<(), Error> { for c in out.bytes() { self.put(c); } Ok(()) } } impl Uart { pub fn new(base_address: usize) -> Self { Uart { base_address } } pub fn init(&mut self) { let ptr = self.base_address as *mut u8; unsafe { // First, set the word length, which // are bits 0 and 1 of the line control register (LCR) // which is at base_address + 3 // We can easily write the value 3 here or 0b11, but I'm // extending it so that it is clear we're setting two // individual fields // Word 0 Word 1 // ~~~~~~ ~~~~~~ let lcr: u8 = (1 << 0) | (1 << 1); ptr.add(3).write_volatile(lcr); // Now, enable the FIFO, which is bit index 0 of the // FIFO control register (FCR at offset 2). // Again, we can just write 1 here, but when we use left // shift, it's easier to see that we're trying to write // bit index #0. ptr.add(2).write_volatile(1 << 0); // Enable receiver buffer interrupts, which is at bit // index 0 of the interrupt enable register (IER at // offset 1). ptr.add(1).write_volatile(1 << 0); // If we cared about the divisor, the code below would // set the divisor from a global clock rate of 22.729 // MHz (22,729,000 cycles per second) to a signaling // rate of 2400 (BAUD). We usually have much faster // signalling rates nowadays, but this demonstrates what // the divisor actually does. The formula given in the // NS16500A specification for calculating the divisor // is: // divisor = ceil( (clock_hz) / (baud_sps x 16) ) // So, we substitute our values and get: // divisor = ceil( 22_729_000 / (2400 x 16) ) // divisor = ceil( 22_729_000 / 38_400 ) // divisor = ceil( 591.901 ) = 592 // The divisor register is two bytes (16 bits), so we // need to split the value 592 into two bytes. // Typically, we would calculate this based on measuring // the clock rate, but again, for our purposes [qemu], // this doesn't really do anything. let divisor: u16 = 592; let divisor_least: u8 = (divisor & 0xff).try_into().unwrap(); let divisor_most: u8 = (divisor >> 8).try_into().unwrap(); // Notice that the divisor register DLL (divisor latch // least) and DLM (divisor latch most) have the same // base address as the receiver/transmitter and the // interrupt enable register. To change what the base // address points to, we open the "divisor latch" by // writing 1 into the Divisor Latch Access Bit (DLAB), // which is bit index 7 of the Line Control Register // (LCR) which is at base_address + 3. ptr.add(3).write_volatile(lcr | 1 << 7); // Now, base addresses 0 and 1 point to DLL and DLM, // respectively. Put the lower 8 bits of the divisor // into DLL ptr.add(0).write_volatile(divisor_least); ptr.add(1).write_volatile(divisor_most); // Now that we've written the divisor, we never have to // touch this again. In hardware, this will divide the // global clock (22.729 MHz) into one suitable for 2,400 // signals per second. So, to once again get access to // the RBR/THR/IER registers, we need to close the DLAB // bit by clearing it to 0. ptr.add(3).write_volatile(lcr); } } pub fn put(&mut self, c: u8) { let ptr = self.base_address as *mut u8; unsafe { ptr.add(0).write_volatile(c); } } pub fn get(&mut self) -> Option { let ptr = self.base_address as *mut u8; unsafe { if ptr.add(5).read_volatile() & 1 == 0 { // The DR bit is 0, meaning no data None } else { // The DR bit is 1, meaning data! Some(ptr.add(0).read_volatile()) } } } } ================================================ FILE: risc_v/chapters/ch9/.cargo/config ================================================ [build] target = "riscv64gc-unknown-none-elf" [target.riscv64gc-unknown-none-elf] linker = "riscv64-unknown-linux-gnu-gcc" ================================================ FILE: risc_v/chapters/ch9/.gitignore ================================================ os.elf target/* Cargo.lock hdd.dsk ================================================ FILE: risc_v/chapters/ch9/Cargo.toml ================================================ [package] name = "sos" version = "0.1.0" authors = ["Stephen Marz "] edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [lib] crate-type = ["staticlib"] [dependencies] ================================================ FILE: risc_v/chapters/ch9/Makefile ================================================ ##### ## BUILD ##### CC=riscv64-unknown-linux-gnu-gcc CFLAGS=-Wall -Wextra -pedantic -Wextra -O0 -g CFLAGS+=-static -ffreestanding -nostdlib -fno-rtti -fno-exceptions CFLAGS+=-march=rv64gc -mabi=lp64 INCLUDES= LINKER_SCRIPT=-Tsrc/lds/virt.lds TYPE=debug RUST_TARGET=./target/riscv64gc-unknown-none-elf/$(TYPE) LIBS=-L$(RUST_TARGET) SOURCES_ASM=$(wildcard src/asm/*.S) LIB=-lsos -lgcc OUT=os.elf ##### ## QEMU ##### QEMU=qemu-system-riscv64 MACH=virt CPU=rv64 CPUS=4 MEM=128M DISK=hdd.dsk DRIVE= -drive if=none,format=raw,file=$(DISK),id=foo -device virtio-blk-device,scsi=off,drive=foo OPTS=-nographic -serial mon:stdio -bios none -device virtio-rng-device -device virtio-gpu-device OPTS+=-device virtio-net-device -device virtio-tablet-device -device virtio-keyboard-device #DRIVE= all: cargo build $(CC) $(CFLAGS) $(LINKER_SCRIPT) $(INCLUDES) -o $(OUT) $(SOURCES_ASM) $(LIBS) $(LIB) run: all $(QEMU) -machine $(MACH) -cpu $(CPU) -smp $(CPUS) -m $(MEM) $(DRIVE) $(OPTS) -kernel $(OUT) .PHONY: clean clean: cargo clean rm -f $(OUT) ================================================ FILE: risc_v/chapters/ch9/make_hdd.sh ================================================ #!/bin/sh dd if=/dev/urandom of=hdd.dsk bs=1M count=32 ================================================ FILE: risc_v/chapters/ch9/src/asm/boot.S ================================================ # boot.S # bootloader for SoS # Stephen Marz # 8 February 2019 # Disable generation of compressed instructions. .option norvc # Define a .text.init section. The .text.init is put at the # starting address so that the entry _start is put at the RISC-V # address 0x8000_0000. .section .text.init # Execution starts here. .global _start _start: # Disable linker instruction relaxation for the `la` instruction below. # This disallows the assembler from assuming that `gp` is already initialized. # This causes the value stored in `gp` to be calculated from `pc`. # The job of the global pointer is to give the linker the ability to address # memory relative to GP instead of as an absolute address. .option push .option norelax la gp, _global_pointer .option pop # SATP should be zero, but let's make sure. Each HART has its own # SATP register. csrw satp, zero # Any hardware threads (hart) that are not bootstrapping # need to wait for an IPI csrr t0, mhartid bnez t0, 3f # Set all bytes in the BSS section to zero. la a0, _bss_start la a1, _bss_end bgeu a0, a1, 2f 1: sd zero, (a0) addi a0, a0, 8 bltu a0, a1, 1b 2: # The stack grows from bottom to top, so we put the stack pointer # to the very end of the stack range. la sp, _stack_end # Setting `mstatus` register: # 0b01 << 11: Machine's previous protection mode is 2 (MPP=2). li t0, 0b11 << 11 csrw mstatus, t0 # Do not allow interrupts while running kinit csrw mie, zero # Machine's exception program counter (MEPC) is set to `kinit`. la t1, kinit csrw mepc, t1 # Set the return address to get us into supervisor mode la ra, 2f # We use mret here so that the mstatus register is properly updated. mret 2: # We set the return address (ra above) to this label. When kinit() is finished # in Rust, it will return here. # Setting `mstatus` (supervisor status) register: # 0b01 << 11 : Previous protection mode is 1 (MPP=01 [Supervisor]). # 1 << 7 : Previous machine interrupt-enable bit is 1 (MPIE=1 [Enabled]) # 1 << 5 : Previous interrupt-enable bit is 1 (SPIE=1 [Enabled]). # We set the "previous" bits because the mret will write the current bits # with the previous bits. li t0, (0b00 << 11) | (1 << 7) | (1 << 5) csrw mstatus, t0 # Machine's trap vector base address is set to `m_trap_vector`, for # "machine" trap vector. la t2, m_trap_vector csrw mtvec, t2 # Jump to first process. We put the MPP = 00 for user mode, so after # mret, we will jump to the first process' addresss in user mode. la ra, 4f mret 3: # Parked harts go here. We need to set these # to only awaken if it receives a software interrupt, # which we're going to call the SIPI (Software Intra-Processor Interrupt). # We call the SIPI by writing the software interrupt into the Core Local Interruptor (CLINT) # Which is calculated by: base_address + hart * 4 # where base address is 0x0200_0000 (MMIO CLINT base address) # We only use additional harts to run user-space programs, although this may # change. # We divide up the stack so the harts aren't clobbering one another. la sp, _stack_end li t0, 0x10000 csrr a0, mhartid mul t0, t0, a0 sub sp, sp, t0 # The parked harts will be put into machine mode with interrupts enabled. li t0, 0b11 << 11 | (1 << 7) csrw mstatus, t0 # Allow for MSIP (Software interrupt). We will write the MSIP from hart #0 to # awaken these parked harts. li t3, (1 << 3) csrw mie, t3 # Machine's exception program counter (MEPC) is set to the Rust initialization # code and waiting loop. la t1, kinit_hart csrw mepc, t1 # Machine's trap vector base address is set to `m_trap_vector`, for # "machine" trap vector. The Rust initialization routines will give each # hart its own trap frame. We can use the same trap function and distinguish # between each hart by looking at the trap frame. la t2, m_trap_vector csrw mtvec, t2 # Whenever our hart is done initializing, we want it to return to the waiting # loop, which is just below mret. la ra, 4f # We use mret here so that the mstatus register is properly updated. mret 4: # wfi = wait for interrupt. This is a hint to the harts to shut everything needed # down. However, the RISC-V specification allows for wfi to do nothing. Anyway, # with QEMU, this will save some CPU! wfi j 4b ================================================ FILE: risc_v/chapters/ch9/src/asm/mem.S ================================================ // mem.S // Importation of linker symbols .section .rodata .global HEAP_START HEAP_START: .dword _heap_start .global HEAP_SIZE HEAP_SIZE: .dword _heap_size .global TEXT_START TEXT_START: .dword _text_start .global TEXT_END TEXT_END: .dword _text_end .global DATA_START DATA_START: .dword _data_start .global DATA_END DATA_END: .dword _data_end .global RODATA_START RODATA_START: .dword _rodata_start .global RODATA_END RODATA_END: .dword _rodata_end .global BSS_START BSS_START: .dword _bss_start .global BSS_END BSS_END: .dword _bss_end .global KERNEL_STACK_START KERNEL_STACK_START: .dword _stack_start .global KERNEL_STACK_END KERNEL_STACK_END: .dword _stack_end ================================================ FILE: risc_v/chapters/ch9/src/asm/trap.S ================================================ # trap.S # Trap handler and global context # Steve Operating System # Stephen Marz # 24 February 2019 .option norvc .altmacro .set NUM_GP_REGS, 32 # Number of registers per context .set REG_SIZE, 8 # Register size (in bytes) # Use macros for saving and restoring multiple registers .macro save_gp i, basereg=t6 sd x\i, ((\i)*REG_SIZE)(\basereg) .endm .macro load_gp i, basereg=t6 ld x\i, ((\i)*REG_SIZE)(\basereg) .endm .macro save_fp i, basereg=t6 fsd f\i, ((NUM_GP_REGS+(\i))*REG_SIZE)(\basereg) .endm .macro load_fp i, basereg=t6 fld f\i, ((NUM_GP_REGS+(\i))*REG_SIZE)(\basereg) .endm .section .text .global m_trap_vector # This must be aligned by 4 since the last two bits # of the mtvec register do not contribute to the address # of this vector. .align 4 m_trap_vector: # All registers are volatile here, we need to save them # before we do anything. csrrw t6, mscratch, t6 # csrrw will atomically swap t6 into mscratch and the old # value of mscratch into t6. This is nice because we just # switched values and didn't destroy anything -- all atomically! # in cpu.rs we have a structure of: # 32 gp regs 0 # 32 fp regs 256 # SATP register 512 # Trap stack 520 # CPU HARTID 528 # We use t6 as the temporary register because it is the very # bottom register (x31) .set i, 0 .rept 31 save_gp %i .set i, i+1 .endr # Save the actual t6 register, which we swapped into # mscratch mv t5, t6 csrr t6, mscratch save_gp 31, t5 # Restore the kernel trap frame into mscratch csrw mscratch, t5 # csrw mie, zero # Get ready to go into Rust (trap.rs) # We don't want to write into the user's stack or whomever # messed with us here. # csrw mie, zero csrr a0, mepc sd a0, 520(t5) csrr a1, mtval csrr a2, mcause csrr a3, mhartid csrr a4, mstatus csrr a5, mscratch la t0, KERNEL_STACK_END ld sp, 0(t0) call m_trap # When we get here, we've returned from m_trap, restore registers # and return. # m_trap will return the return address via a0. csrw mepc, a0 # Now load the trap frame back into t6 csrr t6, mscratch # Restore all GP registers .set i, 1 .rept 31 load_gp %i .set i, i+1 .endr # Since we ran this loop 31 times starting with i = 1, # the last one loaded t6 back to its original value. mret .global switch_to_user switch_to_user: # a0 - Frame address # a1 - Program counter # a2 - SATP Register csrw mscratch, a0 // Load program counter ld a1, 520(a0) // Load satp ld a2, 512(a0) # 1 << 7 is MPIE # Since user mode is 00, we don't need to set anything # in MPP (bits 12:11) li t0, 1 << 7 | 1 << 5 csrw mstatus, t0 csrw mepc, a1 csrw satp, a2 li t1, 0xaaa csrw mie, t1 la t2, m_trap_vector csrw mtvec, t2 # This fence forces the MMU to flush the TLB. However, since # we're using the PID as the address space identifier, we might # only need this when we create a process. Right now, this ensures # correctness, however it isn't the most efficient. # sfence.vma # A0 is the context frame, so we need to reload it back # and mret so we can start running the program. mv t6, a0 .set i, 1 .rept 31 load_gp %i, t6 .set i, i+1 .endr # j . mret .global make_syscall make_syscall: ecall ret ================================================ FILE: risc_v/chapters/ch9/src/block.rs ================================================ // block.rs // Block device using VirtIO protocol // Stephen Marz // 10 March 2020 use crate::{kmem::{kfree, kmalloc}, page::{zalloc, PAGE_SIZE}, virtio, virtio::{Descriptor, MmioOffsets, Queue, StatusField, VIRTIO_RING_SIZE}}; use core::mem::size_of; #[repr(C)] pub struct Geometry { cylinders: u16, heads: u8, sectors: u8, } #[repr(C)] pub struct Topology { physical_block_exp: u8, alignment_offset: u8, min_io_size: u16, opt_io_size: u32, } // There is a configuration space for VirtIO that begins // at offset 0x100 and continues to the size of the configuration. // The structure below represents the configuration for a // block device. Really, all that this OS cares about is the // capacity. #[repr(C)] pub struct Config { capacity: u64, size_max: u32, seg_max: u32, geometry: Geometry, blk_size: u32, topology: Topology, writeback: u8, unused0: [u8; 3], max_discard_sector: u32, max_discard_seg: u32, discard_sector_alignment: u32, max_write_zeroes_sectors: u32, max_write_zeroes_seg: u32, write_zeroes_may_unmap: u8, unused1: [u8; 3], } // The header/data/status is a block request // packet. We send the header to tell the direction // (blktype: IN/OUT) and then the starting sector // we want to read. Then, we put the data buffer // as the Data structure and finally an 8-bit // status. The device will write one of three values // in here: 0 = success, 1 = io error, 2 = unsupported // operation. #[repr(C)] pub struct Header { blktype: u32, reserved: u32, sector: u64, } #[repr(C)] pub struct Data { data: *mut u8, } #[repr(C)] pub struct Status { status: u8, } #[repr(C)] pub struct Request { header: Header, data: Data, status: Status, head: u16, } // Internal block device structure // We keep our own used_idx and idx for // descriptors. There is a shared index, but that // tells us or the device if we've kept up with where // we are for the available (us) or used (device) ring. pub struct BlockDevice { queue: *mut Queue, dev: *mut u32, idx: u16, ack_used_idx: u16, read_only: bool, } // Type values pub const VIRTIO_BLK_T_IN: u32 = 0; pub const VIRTIO_BLK_T_OUT: u32 = 1; pub const VIRTIO_BLK_T_FLUSH: u32 = 4; pub const VIRTIO_BLK_T_DISCARD: u32 = 11; pub const VIRTIO_BLK_T_WRITE_ZEROES: u32 = 13; // Status values pub const VIRTIO_BLK_S_OK: u8 = 0; pub const VIRTIO_BLK_S_IOERR: u8 = 1; pub const VIRTIO_BLK_S_UNSUPP: u8 = 2; // Feature bits pub const VIRTIO_BLK_F_SIZE_MAX: u32 = 1; pub const VIRTIO_BLK_F_SEG_MAX: u32 = 2; pub const VIRTIO_BLK_F_GEOMETRY: u32 = 4; pub const VIRTIO_BLK_F_RO: u32 = 5; pub const VIRTIO_BLK_F_BLK_SIZE: u32 = 6; pub const VIRTIO_BLK_F_FLUSH: u32 = 9; pub const VIRTIO_BLK_F_TOPOLOGY: u32 = 10; pub const VIRTIO_BLK_F_CONFIG_WCE: u32 = 11; pub const VIRTIO_BLK_F_DISCARD: u32 = 13; pub const VIRTIO_BLK_F_WRITE_ZEROES: u32 = 14; // Much like with processes, Rust requires some initialization // when we declare a static. In this case, we use the Option // value type to signal that the variable exists, but not the // queue itself. We will replace this with an actual queue when // we initialize the block system. static mut BLOCK_DEVICES: [Option; 8] = [None, None, None, None, None, None, None, None]; pub fn setup_block_device(ptr: *mut u32) -> bool { unsafe { // We can get the index of the device based on its address. // 0x1000_1000 is index 0 // 0x1000_2000 is index 1 // ... // 0x1000_8000 is index 7 // To get the number that changes over, we shift right 12 places (3 hex digits) let idx = (ptr as usize - virtio::MMIO_VIRTIO_START) >> 12; // [Driver] Device Initialization // 1. Reset the device (write 0 into status) ptr.add(MmioOffsets::Status.scale32()).write_volatile(0); let mut status_bits = StatusField::Acknowledge.val32(); // 2. Set ACKNOWLEDGE status bit ptr.add(MmioOffsets::Status.scale32()).write_volatile(status_bits); // 3. Set the DRIVER status bit status_bits |= StatusField::DriverOk.val32(); ptr.add(MmioOffsets::Status.scale32()).write_volatile(status_bits); // 4. Read device feature bits, write subset of feature // bits understood by OS and driver to the device. let host_features = ptr.add(MmioOffsets::HostFeatures.scale32()).read_volatile(); let guest_features = host_features & !(1 << VIRTIO_BLK_F_RO); let ro = host_features & (1 << VIRTIO_BLK_F_RO) != 0; ptr.add(MmioOffsets::GuestFeatures.scale32()).write_volatile(guest_features); // 5. Set the FEATURES_OK status bit status_bits |= StatusField::FeaturesOk.val32(); ptr.add(MmioOffsets::Status.scale32()).write_volatile(status_bits); // 6. Re-read status to ensure FEATURES_OK is still set. // Otherwise, it doesn't support our features. let status_ok = ptr.add(MmioOffsets::Status.scale32()).read_volatile(); // If the status field no longer has features_ok set, // that means that the device couldn't accept // the features that we request. Therefore, this is // considered a "failed" state. if false == StatusField::features_ok(status_ok) { print!("features fail..."); ptr.add(MmioOffsets::Status.scale32()).write_volatile(StatusField::Failed.val32()); return false; } // 7. Perform device-specific setup. // Set the queue num. We have to make sure that the // queue size is valid because the device can only take // a certain size. let qnmax = ptr.add(MmioOffsets::QueueNumMax.scale32()).read_volatile(); ptr.add(MmioOffsets::QueueNum.scale32()).write_volatile(VIRTIO_RING_SIZE as u32); if VIRTIO_RING_SIZE as u32 > qnmax { print!("queue size fail..."); return false; } // First, if the block device array is empty, create it! // We add 4095 to round this up and then do an integer // divide to truncate the decimal. We don't add 4096, // because if it is exactly 4096 bytes, we would get two // pages, not one. let num_pages = (size_of::() + PAGE_SIZE - 1) / PAGE_SIZE; // println!("np = {}", num_pages); // We allocate a page for each device. This will the the // descriptor where we can communicate with the block // device. We will still use an MMIO register (in // particular, QueueNotify) to actually tell the device // we put something in memory. We also have to be // careful with memory ordering. We don't want to // issue a notify before all memory writes have // finished. We will look at that later, but we need // what is called a memory "fence" or barrier. ptr.add(MmioOffsets::QueueSel.scale32()).write_volatile(0); // Alignment is very important here. This is the memory address // alignment between the available and used rings. If this is wrong, // then we and the device will refer to different memory addresses // and hence get the wrong data in the used ring. // ptr.add(MmioOffsets::QueueAlign.scale32()).write_volatile(2); let queue_ptr = zalloc(num_pages) as *mut Queue; let queue_pfn = queue_ptr as u32; ptr.add(MmioOffsets::GuestPageSize.scale32()).write_volatile(PAGE_SIZE as u32); // QueuePFN is a physical page number, however it // appears for QEMU we have to write the entire memory // address. This is a physical memory address where we // (the OS) and the block device have in common for // making and receiving requests. ptr.add(MmioOffsets::QueuePfn.scale32()).write_volatile(queue_pfn / PAGE_SIZE as u32); // We need to store all of this data as a "BlockDevice" // structure We will be referring to this structure when // making block requests AND when handling responses. let bd = BlockDevice { queue: queue_ptr, dev: ptr, idx: 0, ack_used_idx: 0, read_only: ro, }; BLOCK_DEVICES[idx] = Some(bd); // 8. Set the DRIVER_OK status bit. Device is now "live" status_bits |= StatusField::DriverOk.val32(); ptr.add(MmioOffsets::Status.scale32()).write_volatile(status_bits); true } } pub fn fill_next_descriptor(bd: &mut BlockDevice, desc: Descriptor) -> u16 { unsafe { // The ring structure increments here first. This allows us to skip // index 0, which then in the used ring will show that .id > 0. This // is one way to error check. We will eventually get back to 0 as // this index is cyclical. However, it shows if the first read/write // actually works. bd.idx = (bd.idx + 1) % VIRTIO_RING_SIZE as u16; (*bd.queue).desc[bd.idx as usize] = desc; if (*bd.queue).desc[bd.idx as usize].flags & virtio::VIRTIO_DESC_F_NEXT != 0 { // If the next flag is set, we need another descriptor. (*bd.queue).desc[bd.idx as usize].next = (bd.idx + 1) % VIRTIO_RING_SIZE as u16; } bd.idx } } /// This is now a common block operation for both reads and writes. Therefore, /// when one thing needs to change, we can change it for both reads and writes. /// There is a lot of error checking that I haven't done. The block device reads /// sectors at a time, which are 512 bytes. Therefore, our buffer must be capable /// of storing multiples of 512 bytes depending on the size. The size is also /// a multiple of 512, but we don't really check that. /// We DO however, check that we aren't writing to an R/O device. This would /// cause a I/O error if we tried to write to a R/O device. pub fn block_op(dev: usize, buffer: *mut u8, size: u32, offset: u64, write: bool) { unsafe { if let Some(bdev) = BLOCK_DEVICES[dev - 1].as_mut() { // Check to see if we are trying to write to a read only device. if true == bdev.read_only && true == write { println!("Trying to write to read/only!"); return; } let sector = offset / 512; // TODO: Before we get here, we are NOT allowed to schedule a read or // write OUTSIDE of the disk's size. So, we can read capacity from // the configuration space to ensure we stay within bounds. let blk_request_size = size_of::(); let blk_request = kmalloc(blk_request_size) as *mut Request; let desc = Descriptor { addr: &(*blk_request).header as *const Header as u64, len: size_of::
() as u32, flags: virtio::VIRTIO_DESC_F_NEXT, next: 0, }; let head_idx = fill_next_descriptor(bdev, desc); (*blk_request).header.sector = sector; // A write is an "out" direction, whereas a read is an "in" direction. (*blk_request).header.blktype = if true == write { VIRTIO_BLK_T_OUT } else { VIRTIO_BLK_T_IN }; // We put 111 in the status. Whenever the device finishes, it will write into // status. If we read status and it is 111, we know that it wasn't written to by // the device. (*blk_request).data.data = buffer; (*blk_request).header.reserved = 0; (*blk_request).status.status = 111; let desc = Descriptor { addr: buffer as u64, len: size, flags: virtio::VIRTIO_DESC_F_NEXT | if false == write { virtio::VIRTIO_DESC_F_WRITE } else { 0 }, next: 0, }; let _data_idx = fill_next_descriptor(bdev, desc); let desc = Descriptor { addr: &(*blk_request).status as *const Status as u64, len: size_of::() as u32, flags: virtio::VIRTIO_DESC_F_WRITE, next: 0, }; let _status_idx = fill_next_descriptor(bdev, desc); (*bdev.queue).avail.ring[(*bdev.queue).avail.idx as usize] = head_idx; (*bdev.queue).avail.idx = ((*bdev.queue).avail.idx + 1) % virtio::VIRTIO_RING_SIZE as u16; // The only queue a block device has is 0, which is the request // queue. bdev.dev.add(MmioOffsets::QueueNotify.scale32()).write_volatile(0); } } } pub fn read(dev: usize, buffer: *mut u8, size: u32, offset: u64) { block_op(dev, buffer, size, offset, false); } pub fn write(dev: usize, buffer: *mut u8, size: u32, offset: u64) { block_op(dev, buffer, size, offset, true); } /// Here we handle block specific interrupts. Here, we need to check /// the used ring and wind it up until we've handled everything. /// This is how the device tells us that it's finished a request. pub fn pending(bd: &mut BlockDevice) { // Here we need to check the used ring and then free the resources // given by the descriptor id. unsafe { let ref queue = *bd.queue; while bd.ack_used_idx != queue.used.idx { let ref elem = queue.used.ring[bd.ack_used_idx as usize]; bd.ack_used_idx = (bd.ack_used_idx + 1) % VIRTIO_RING_SIZE as u16; let rq = queue.desc[elem.id as usize].addr as *const Request; kfree(rq as *mut u8); // TODO: Awaken the process that will need this I/O. This is // the purpose of the waiting state. } } } /// The trap code will route PLIC interrupts 1..=8 for virtio devices. When /// virtio determines that this is a block device, it sends it here. pub fn handle_interrupt(idx: usize) { unsafe { if let Some(bdev) = BLOCK_DEVICES[idx].as_mut() { pending(bdev); } else { println!("Invalid block device for interrupt {}", idx + 1); } } } ================================================ FILE: risc_v/chapters/ch9/src/cpu.rs ================================================ // cpu.rs // CPU and CPU-related routines // Also contains the kernel's trap frame // Stephen Marz // 14 October 2019 // The frequency of QEMU is 10 MHz pub const FREQ: u64 = 10_000_000; // Let's do this 250 times per second for switching pub const CONTEXT_SWITCH_TIME: u64 = FREQ / 250; /// In 64-bit mode, we're given three different modes for the MMU: /// 0 - The MMU is off -- no protection and no translation PA = VA /// 8 - This is Sv39 mode -- 39-bit virtual addresses /// 9 - This is Sv48 mode -- 48-bit virtual addresses #[repr(usize)] pub enum SatpMode { Off = 0, Sv39 = 8, Sv48 = 9, } /// The trap frame is set into a structure /// and packed into each hart's mscratch register. /// This allows for quick reference and full /// context switch handling. #[repr(C)] #[derive(Clone, Copy)] pub struct TrapFrame { pub regs: [usize; 32], // 0 - 255 pub fregs: [usize; 32], // 256 - 511 pub satp: usize, // 512 - 519 pub pc: usize, // 520 pub hartid: usize, // 528 pub qm: usize, // 536 } /// Rust requires that we initialize our structures /// because of the move semantics. What'll happen below /// is Rust will construct a new TrapFrame and move it /// out of the zero() function below. Rust contains two /// different "selfs" where self can refer to the object /// in memory or Self (capital S) which refers to the /// data type of the structure. In the case below, this /// is TrapFrame. impl TrapFrame { pub const fn new() -> Self { TrapFrame { regs: [0; 32], fregs: [0; 32], satp: 0, pc: 0, hartid: 0, qm: 1, } } } /// The SATP register contains three fields: mode, address space id, and /// the first level table address (level 2 for Sv39). This function /// helps make the 64-bit register contents based on those three /// fields. pub const fn build_satp(mode: SatpMode, asid: usize, addr: usize) -> usize { (mode as usize) << 60 | (asid & 0xffff) << 44 | (addr >> 12) & 0xff_ffff_ffff } pub fn mhartid_read() -> usize { unsafe { let rval; asm!("csrr $0, mhartid" :"=r"(rval)); rval } } pub fn mie_read() -> usize { unsafe { let rval; asm!("csrr $0, mie" :"=r"(rval)); rval } } pub fn mie_write(val: usize) { unsafe { asm!("csrw mie, $0" :: "r"(val)); } } pub fn mstatus_write(val: usize) { unsafe { asm!("csrw mstatus, $0" ::"r"(val)); } } pub fn mstatus_read() -> usize { unsafe { let rval; asm!("csrr $0, mstatus":"=r"(rval)); rval } } pub fn stvec_write(val: usize) { unsafe { asm!("csrw stvec, $0" ::"r"(val)); } } pub fn stvec_read() -> usize { unsafe { let rval; asm!("csrr $0, stvec" :"=r"(rval)); rval } } pub fn mscratch_write(val: usize) { unsafe { asm!("csrw mscratch, $0" ::"r"(val)); } } pub fn mscratch_read() -> usize { unsafe { let rval; asm!("csrr $0, mscratch" : "=r"(rval)); rval } } pub fn mscratch_swap(to: usize) -> usize { unsafe { let from; asm!("csrrw $0, mscratch, $1" : "=r"(from) : "r"(to)); from } } pub fn sscratch_write(val: usize) { unsafe { asm!("csrw sscratch, $0" ::"r"(val)); } } pub fn sscratch_read() -> usize { unsafe { let rval; asm!("csrr $0, sscratch" : "=r"(rval)); rval } } pub fn sscratch_swap(to: usize) -> usize { unsafe { let from; asm!("csrrw $0, sscratch, $1" : "=r"(from) : "r"(to)); from } } pub fn mepc_write(val: usize) { unsafe { asm!("csrw mepc, $0" :: "r"(val)); } } pub fn mepc_read() -> usize { unsafe { let rval; asm!("csrr $0, mepc" :"=r"(rval)); rval } } pub fn sepc_write(val: usize) { unsafe { asm!("csrw sepc, $0" :: "r"(val)); } } pub fn sepc_read() -> usize { unsafe { let rval; asm!("csrr $0, sepc" :"=r"(rval)); rval } } pub fn satp_write(val: usize) { unsafe { asm!("csrw satp, $0" :: "r"(val)); } } pub fn satp_read() -> usize { unsafe { let rval; asm!("csrr $0, satp" :"=r"(rval)); rval } } /// Take a hammer to the page tables and synchronize /// all of them. This essentially flushes the entire /// TLB. pub fn satp_fence(vaddr: usize, asid: usize) { unsafe { asm!("sfence.vma $0, $1" :: "r"(vaddr), "r"(asid)); } } /// Synchronize based on the address space identifier /// This allows us to fence a particular process rather /// than the entire TLB. /// The RISC-V documentation calls this a TLB flush +. /// Since there are other memory routines involved, they /// didn't call it a TLB flush, but it is much like /// Intel/AMD's invtlb [] instruction. pub fn satp_fence_asid(asid: usize) { unsafe { asm!("sfence.vma zero, $0" :: "r"(asid)); } } ================================================ FILE: risc_v/chapters/ch9/src/kmem.rs ================================================ // kmem.rs // Sub-page level: malloc-like allocation system // Stephen Marz // 7 October 2019 use crate::page::{align_val, zalloc, Table, PAGE_SIZE}; use core::{mem::size_of, ptr::null_mut}; #[repr(usize)] enum AllocListFlags { Taken = 1 << 63, } impl AllocListFlags { pub fn val(self) -> usize { self as usize } } struct AllocList { pub flags_size: usize, } impl AllocList { pub fn is_taken(&self) -> bool { self.flags_size & AllocListFlags::Taken.val() != 0 } pub fn is_free(&self) -> bool { !self.is_taken() } pub fn set_taken(&mut self) { self.flags_size |= AllocListFlags::Taken.val(); } pub fn set_free(&mut self) { self.flags_size &= !AllocListFlags::Taken.val(); } pub fn set_size(&mut self, sz: usize) { let k = self.is_taken(); self.flags_size = sz & !AllocListFlags::Taken.val(); if k { self.flags_size |= AllocListFlags::Taken.val(); } } pub fn get_size(&self) -> usize { self.flags_size & !AllocListFlags::Taken.val() } } // This is the head of the allocation. We start here when // we search for a free memory location. static mut KMEM_HEAD: *mut AllocList = null_mut(); // In the future, we will have on-demand pages // so, we need to keep track of our memory footprint to // see if we actually need to allocate more. static mut KMEM_ALLOC: usize = 0; static mut KMEM_PAGE_TABLE: *mut Table = null_mut(); // These functions are safe helpers around an unsafe // operation. pub fn get_head() -> *mut u8 { unsafe { KMEM_HEAD as *mut u8 } } pub fn get_page_table() -> *mut Table { unsafe { KMEM_PAGE_TABLE as *mut Table } } pub fn get_num_allocations() -> usize { unsafe { KMEM_ALLOC } } /// Initialize kernel's memory /// This is not to be used to allocate memory /// for user processes. If that's the case, use /// alloc/dealloc from the page crate. pub fn init() { unsafe { // Allocate kernel pages (KMEM_ALLOC) KMEM_ALLOC = 512; let k_alloc = zalloc(KMEM_ALLOC); assert!(!k_alloc.is_null()); KMEM_HEAD = k_alloc as *mut AllocList; (*KMEM_HEAD).set_free(); (*KMEM_HEAD).set_size(KMEM_ALLOC * PAGE_SIZE); KMEM_PAGE_TABLE = zalloc(1) as *mut Table; } } /// Allocate sub-page level allocation based on bytes and zero the memory pub fn kzmalloc(sz: usize) -> *mut u8 { let size = align_val(sz, 3); let ret = kmalloc(size); if !ret.is_null() { for i in 0..size { unsafe { (*ret.add(i)) = 0; } } } ret } /// Allocate sub-page level allocation based on bytes pub fn kmalloc(sz: usize) -> *mut u8 { unsafe { let size = align_val(sz, 3) + size_of::(); let mut head = KMEM_HEAD; // .add() uses pointer arithmetic, so we type-cast into a u8 // so that we multiply by an absolute size (KMEM_ALLOC * // PAGE_SIZE). let tail = (KMEM_HEAD as *mut u8).add(KMEM_ALLOC * PAGE_SIZE) as *mut AllocList; while head < tail { if (*head).is_free() && size <= (*head).get_size() { let chunk_size = (*head).get_size(); let rem = chunk_size - size; (*head).set_taken(); if rem > size_of::() { let next = (head as *mut u8).add(size) as *mut AllocList; // There is space remaining here. (*next).set_free(); (*next).set_size(rem); (*head).set_size(size); } else { // If we get here, take the entire chunk (*head).set_size(chunk_size); } return head.add(1) as *mut u8; } else { // If we get here, what we saw wasn't a free // chunk, move on to the next. head = (head as *mut u8).add((*head).get_size()) as *mut AllocList; } } } // If we get here, we didn't find any free chunks--i.e. there isn't // enough memory for this. TODO: Add on-demand page allocation. null_mut() } /// Free a sub-page level allocation pub fn kfree(ptr: *mut u8) { unsafe { if !ptr.is_null() { let p = (ptr as *mut AllocList).offset(-1); if (*p).is_taken() { (*p).set_free(); } // After we free, see if we can combine adjacent free // spots to see if we can reduce fragmentation. coalesce(); } } } /// Merge smaller chunks into a bigger chunk pub fn coalesce() { unsafe { let mut head = KMEM_HEAD; let tail = (KMEM_HEAD as *mut u8).add(KMEM_ALLOC * PAGE_SIZE) as *mut AllocList; while head < tail { let next = (head as *mut u8).add((*head).get_size()) as *mut AllocList; if (*head).get_size() == 0 { // If this happens, then we have a bad heap // (double free or something). However, that // will cause an infinite loop since the next // pointer will never move beyond the current // location. break; } else if next >= tail { // We calculated the next by using the size // given as get_size(), however this could push // us past the tail. In that case, the size is // wrong, hence we break and stop doing what we // need to do. break; } else if (*head).is_free() && (*next).is_free() { // This means we have adjacent blocks needing to // be freed. So, we combine them into one // allocation. (*head).set_size( (*head).get_size() + (*next).get_size(), ); } // If we get here, we might've moved. Recalculate new // head. head = (head as *mut u8).add((*head).get_size()) as *mut AllocList; } } } /// For debugging purposes, print the kmem table pub fn print_table() { unsafe { let mut head = KMEM_HEAD; let tail = (KMEM_HEAD as *mut u8).add(KMEM_ALLOC * PAGE_SIZE) as *mut AllocList; while head < tail { println!( "{:p}: Length = {:<10} Taken = {}", head, (*head).get_size(), (*head).is_taken() ); head = (head as *mut u8).add((*head).get_size()) as *mut AllocList; } } } // /////////////////////////////////// // / GLOBAL ALLOCATOR // /////////////////////////////////// // The global allocator allows us to use the data structures // in the core library, such as a linked list or B-tree. // We want to use these sparingly since we have a coarse-grained // allocator. use core::alloc::{GlobalAlloc, Layout}; // The global allocator is a static constant to a global allocator // structure. We don't need any members because we're using this // structure just to implement alloc and dealloc. struct OsGlobalAlloc; unsafe impl GlobalAlloc for OsGlobalAlloc { unsafe fn alloc(&self, layout: Layout) -> *mut u8 { // We align to the next page size so that when // we divide by PAGE_SIZE, we get exactly the number // of pages necessary. kzmalloc(layout.size()) } unsafe fn dealloc(&self, ptr: *mut u8, _layout: Layout) { // We ignore layout since our allocator uses ptr_start -> last // to determine the span of an allocation. kfree(ptr); } } #[global_allocator] /// Technically, we don't need the {} at the end, but it /// reveals that we're creating a new structure and not just /// copying a value. static GA: OsGlobalAlloc = OsGlobalAlloc {}; #[alloc_error_handler] /// If for some reason alloc() in the global allocator gets null_mut(), /// then we come here. This is a divergent function, so we call panic to /// let the tester know what's going on. pub fn alloc_error(l: Layout) -> ! { panic!( "Allocator failed to allocate {} bytes with {}-byte alignment.", l.size(), l.align() ); } ================================================ FILE: risc_v/chapters/ch9/src/lds/virt.lds ================================================ /* virt.lds Linker script for outputting to RISC-V QEMU "virt" machine. Stephen Marz 6 October 2019 */ /* riscv is the name of the architecture that the linker understands for any RISC-V target (64-bit or 32-bit). We will further refine this by using -mabi=lp64 and -march=rv64gc */ OUTPUT_ARCH( "riscv" ) /* We're setting our entry point to a symbol called _start which is inside of boot.S. This essentially stores the address of _start as the "entry point", or where CPU instructions should start executing. In the rest of this script, we are going to place _start right at the beginning of 0x8000_0000 because this is where the virtual machine and many RISC-V boards will start executing. */ ENTRY( _start ) /* The MEMORY section will explain that we have "ram" that contains a section that is 'w' (writeable), 'x' (executable), and 'a' (allocatable). We use '!' to invert 'r' (read-only) and 'i' (initialized). We don't want our memory to be read-only, and we're stating that it is NOT initialized at the beginning. The ORIGIN is the memory address 0x8000_0000. If we look at the virt spec or the specification for the RISC-V HiFive Unleashed, this is the starting memory address for our code. Side note: There might be other boot ROMs at different addresses, but their job is to get to this point. Finally LENGTH = 128M tells the linker that we have 128 megabyte of RAM. The linker will double check this to make sure everything can fit. The HiFive Unleashed has a lot more RAM than this, but for the virtual machine, I went with 128M since I think that's enough RAM for now. We can provide other pieces of memory, such as QSPI, or ROM, but we're telling the linker script here that we have one pool of RAM. */ MEMORY { ram (wxa!ri) : ORIGIN = 0x80000000, LENGTH = 128M } /* PHDRS is short for "program headers", which we specify three here: text - CPU instructions (executable sections) data - Global, initialized variables bss - Global, uninitialized variables (all will be set to 0 by boot.S) The command PT_LOAD tells the linker that these sections will be loaded from the file into memory. We can actually stuff all of these into a single program header, but by splitting it up into three, we can actually use the other PT_* commands such as PT_DYNAMIC, PT_INTERP, PT_NULL to tell the linker where to find additional information. However, for our purposes, every section will be loaded from the program headers. */ PHDRS { text PT_LOAD; data PT_LOAD; bss PT_LOAD; } /* We are now going to organize the memory based on which section it is in. In assembly, we can change the section with the ".section" directive. However, in C++ and Rust, CPU instructions go into text, global constants go into rodata, global initialized variables go into data, and global uninitialized variables go into bss. */ SECTIONS { /* The first part of our RAM layout will be the text section. Since our CPU instructions are here, and our memory starts at 0x8000_0000, we need our entry point to line up here. */ .text : { /* PROVIDE allows me to access a symbol called _text_start so I know where the text section starts in the operating system. This should not move, but it is here for convenience. The period '.' tells the linker to set _text_start to the CURRENT location ('.' = current memory location). This current memory location moves as we add things. */ PROVIDE(_text_start = .); /* We are going to layout all text sections here, starting with .text.init. The asterisk in front of the parentheses means to match the .text.init section of ANY object file. Otherwise, we can specify which object file should contain the .text.init section, for example, boot.o(.text.init) would specifically put the .text.init section of our bootloader here. Because we might want to change the name of our files, we'll leave it with a *. Inside the parentheses is the name of the section. I created my own called .text.init to make 100% sure that the _start is put right at the beginning. The linker will lay this out in the order it receives it: .text.init first all .text sections next any .text.* sections last .text.* means to match anything after .text. If we didn't already specify .text.init, this would've matched here. The assembler and linker can place things in "special" text sections, so we match any we might come across here. */ *(.text.init) *(.text .text.*) /* Again, with PROVIDE, we're providing a readable symbol called _text_end, which is set to the memory address AFTER .text.init, .text, and .text.*'s have been added. */ PROVIDE(_text_end = .); /* The portion after the right brace is in an odd format. However, this is telling the linker what memory portion to put it in. We labeled our RAM, ram, with the constraints that it is writeable, allocatable, and executable. The linker will make sure with this that we can do all of those things. >ram - This just tells the linker script to put this entire section (.text) into the ram region of memory. To my knowledge, the '>' does not mean "greater than". Instead, it is a symbol to let the linker know we want to put this in ram. AT>ram - This sets the LMA (load memory address) region to the same thing. LMA is the final translation of a VMA (virtual memory address). With this linker script, we're loading everything into its physical location. We'll let the kernel copy and sort out the virtual memory. That's why >ram and AT>ram are continually the same thing. :text - This tells the linker script to put this into the :text program header. We've only defined three: text, data, and bss. In this case, we're telling the linker script to go into the text section. */ } >ram AT>ram :text /* The global pointer allows the linker to position global variables and constants into independent positions relative to the gp (global pointer) register. The globals start after the text sections and are only relevant to the rodata, data, and bss sections. */ PROVIDE(_global_pointer = .); /* Most compilers create a rodata (read only data) section for global constants. However, we're going to place ours in the text section. We can actually put this in :data, but since the .text section is read-only, we can place it there. NOTE: This doesn't actually do anything, yet. The actual "protection" cannot be done at link time. Instead, when we program the memory management unit (MMU), we will be able to choose which bits (R=read, W=write, X=execute) we want each memory segment to be able to do. */ .rodata : { PROVIDE(_rodata_start = .); *(.rodata .rodata.*) PROVIDE(_rodata_end = .); /* Again, we're placing the rodata section in the memory segment "ram" and we're putting it in the :text program header. We don't have one for rodata anyway. */ } >ram AT>ram :text .data : { /* . = ALIGN(4096) tells the linker to align the current memory location (which is 0x8000_0000 + text section + rodata section) to 4096 bytes. This is because our paging system's resolution is 4,096 bytes or 4 KiB. */ . = ALIGN(4096); PROVIDE(_data_start = .); /* sdata and data are essentially the same thing. However, compilers usually use the sdata sections for shorter, quicker loading sections. So, usually critical data is loaded there. However, we're loading all of this in one fell swoop. So, we're looking to put all of the following sections under the umbrella .data: .sdata .sdata.[anything] .data .data.[anything] ...in that order. */ *(.sdata .sdata.*) *(.data .data.*) PROVIDE(_data_end = .); } >ram AT>ram :data .bss : { PROVIDE(_bss_start = .); *(.sbss .sbss.*) *(.bss .bss.*) PROVIDE(_bss_end = .); } >ram AT>ram :bss /* The following will be helpful when we allocate the kernel stack (_stack) and determine where the heap begnis and ends (_heap_start and _heap_start + _heap_size)/ When we do memory allocation, we can use these symbols. We use the symbols instead of hard-coding an address because this is a floating target. As we add code, the heap moves farther down the memory and gets shorter. _memory_start will be set to 0x8000_0000 here. We use ORIGIN(ram) so that it will take whatever we set the origin of ram to. Otherwise, we'd have to change it more than once if we ever stray away from 0x8000_0000 as our entry point. */ PROVIDE(_memory_start = ORIGIN(ram)); /* Our kernel stack starts at the end of the bss segment (_bss_end). However, we're allocating 0x80000 bytes (524 KiB) to our kernel stack. This should be PLENTY of space. The reason we add the memory is because the stack grows from higher memory to lower memory (bottom to top). Therefore we set the stack at the very bottom of its allocated slot. When we go to allocate from the stack, we'll subtract the number of bytes we need. */ PROVIDE(_stack_start = _bss_end); PROVIDE(_stack_end = _stack_start + 0x8000); PROVIDE(_memory_end = ORIGIN(ram) + LENGTH(ram)); /* Finally, our heap starts right after the kernel stack. This heap will be used mainly to dole out memory for user-space applications. However, in some circumstances, it will be used for kernel memory as well. We don't align here because we let the kernel determine how it wants to do this. */ PROVIDE(_heap_start = _stack_end); PROVIDE(_heap_size = _memory_end - _heap_start); } ================================================ FILE: risc_v/chapters/ch9/src/lib.rs ================================================ // Steve Operating System // Stephen Marz // 21 Sep 2019 #![no_std] #![feature(panic_info_message, asm, allocator_api, alloc_error_handler, alloc_prelude, const_raw_ptr_to_usize_cast)] // #[macro_use] extern crate alloc; // This is experimental and requires alloc_prelude as a feature // use alloc::prelude::v1::*; // /////////////////////////////////// // / RUST MACROS // /////////////////////////////////// #[macro_export] macro_rules! print { ($($args:tt)+) => ({ use core::fmt::Write; let _ = write!(crate::uart::Uart::new(0x1000_0000), $($args)+); }); } #[macro_export] macro_rules! println { () => ({ print!("\r\n") }); ($fmt:expr) => ({ print!(concat!($fmt, "\r\n")) }); ($fmt:expr, $($args:tt)+) => ({ print!(concat!($fmt, "\r\n"), $($args)+) }); } // /////////////////////////////////// // / LANGUAGE STRUCTURES / FUNCTIONS // /////////////////////////////////// #[no_mangle] extern "C" fn eh_personality() {} #[panic_handler] fn panic(info: &core::panic::PanicInfo) -> ! { print!("Aborting: "); if let Some(p) = info.location() { println!( "line {}, file {}: {}", p.line(), p.file(), info.message().unwrap() ); } else { println!("no information available."); } abort(); } #[no_mangle] extern "C" fn abort() -> ! { loop { unsafe { asm!("wfi"::::"volatile"); } } } // /////////////////////////////////// // / CONSTANTS // /////////////////////////////////// // const STR_Y: &str = "\x1b[38;2;79;221;13m✓\x1b[m"; // const STR_N: &str = "\x1b[38;2;221;41;13m✘\x1b[m"; // The following symbols come from asm/mem.S. We can use // the symbols directly, but the address of the symbols // themselves are their values, which can cause issues. // Instead, I created doubleword values in mem.S in the .rodata and .data // sections. // extern "C" { // static TEXT_START: usize; // static TEXT_END: usize; // static DATA_START: usize; // static DATA_END: usize; // static RODATA_START: usize; // static RODATA_END: usize; // static BSS_START: usize; // static BSS_END: usize; // static KERNEL_STACK_START: usize; // static KERNEL_STACK_END: usize; // static HEAP_START: usize; // static HEAP_SIZE: usize; // } /// Identity map range /// Takes a contiguous allocation of memory and maps it using PAGE_SIZE /// This assumes that start <= end pub fn id_map_range(root: &mut page::Table, start: usize, end: usize, bits: i64) { let mut memaddr = start & !(page::PAGE_SIZE - 1); let num_kb_pages = (page::align_val(end, 12) - memaddr) / page::PAGE_SIZE; // I named this num_kb_pages for future expansion when // I decide to allow for GiB (2^30) and 2MiB (2^21) page // sizes. However, the overlapping memory regions are causing // nightmares. for _ in 0..num_kb_pages { page::map(root, memaddr, memaddr, bits, 0); memaddr += 1 << 12; } } extern "C" { fn switch_to_user(frame: usize) -> !; } fn rust_switch_to_user(frame: usize) -> ! { unsafe { switch_to_user(frame); } } // /////////////////////////////////// // / ENTRY POINT // /////////////////////////////////// #[no_mangle] extern "C" fn kinit() { uart::Uart::new(0x1000_0000).init(); page::init(); kmem::init(); process::init(); // We lower the threshold wall so our interrupts can jump over it. // Any priority > 0 will be able to be "heard" plic::set_threshold(0); // VIRTIO = [1..8] // UART0 = 10 // PCIE = [32..35] // Enable PLIC interrupts. for i in 1..=10 { plic::enable(i); plic::set_priority(i, 1); } // Set up virtio. This requires a working heap and page-grained allocator. virtio::probe(); // This just tests the block device. We know that it connects backwards (8, 7, ..., 1). let buffer = kmem::kmalloc(1024); // Offset 1024 is the first block, which is the superblock. In the minix 3 file system, the first // block is the "boot block", which in our case will be 0. block::read(8, buffer, 512, 1024); let mut i = 0; loop { if i > 100_000_000 { break; } i += 1; } println!("Test hdd.dsk:"); unsafe { print!(" "); for i in 0..16 { print!("{:02x} ", buffer.add(i).read()); } println!(); print!(" "); for i in 0..16 { print!("{:02x} ", buffer.add(16+i).read()); } println!(); print!(" "); for i in 0..16 { print!("{:02x} ", buffer.add(32+i).read()); } println!(); print!(" "); for i in 0..16 { print!("{:02x} ", buffer.add(48+i).read()); } println!(); buffer.add(0).write(0xaa); buffer.add(1).write(0xbb); buffer.add(2).write(0x7a); } block::write(8, buffer, 512, 0); // Free the testing buffer. kmem::kfree(buffer); // We schedule the next context switch using a multiplier of 1 trap::schedule_next_context_switch(1); rust_switch_to_user(sched::schedule()); // switch_to_user will not return, so we should never get here } #[no_mangle] extern "C" fn kinit_hart(_hartid: usize) { // We aren't going to do anything here until we get SMP going. // All non-0 harts initialize here. } // /////////////////////////////////// // / RUST MODULES // /////////////////////////////////// pub mod block; pub mod cpu; pub mod kmem; pub mod page; pub mod plic; pub mod process; pub mod rng; pub mod sched; pub mod syscall; pub mod trap; pub mod uart; pub mod virtio; ================================================ FILE: risc_v/chapters/ch9/src/page.rs ================================================ // page.rs // Memory routines // Stephen Marz // 6 October 2019 use core::{mem::size_of, ptr::null_mut}; // //////////////////////////////// // // Allocation routines // //////////////////////////////// extern "C" { static HEAP_START: usize; static HEAP_SIZE: usize; } // We will use ALLOC_START to mark the start of the actual // memory we can dish out. static mut ALLOC_START: usize = 0; const PAGE_ORDER: usize = 12; pub const PAGE_SIZE: usize = 1 << 12; /// Align (set to a multiple of some power of two) /// This takes an order which is the exponent to 2^order /// Therefore, all alignments must be made as a power of two. /// This function always rounds up. pub const fn align_val(val: usize, order: usize) -> usize { let o = (1usize << order) - 1; (val + o) & !o } #[repr(u8)] pub enum PageBits { Empty = 0, Taken = 1 << 0, Last = 1 << 1, } impl PageBits { // We convert PageBits to a u8 a lot, so this is // for convenience. pub fn val(self) -> u8 { self as u8 } } // Each page is described by the Page structure. Linux does this // as well, where each 4096-byte chunk of memory has a structure // associated with it. However, there structure is much larger. pub struct Page { flags: u8, } impl Page { // If this page has been marked as the final allocation, // this function returns true. Otherwise, it returns false. pub fn is_last(&self) -> bool { if self.flags & PageBits::Last.val() != 0 { true } else { false } } // If the page is marked as being taken (allocated), then // this function returns true. Otherwise, it returns false. pub fn is_taken(&self) -> bool { if self.flags & PageBits::Taken.val() != 0 { true } else { false } } // This is the opposite of is_taken(). pub fn is_free(&self) -> bool { !self.is_taken() } // Clear the Page structure and all associated allocations. pub fn clear(&mut self) { self.flags = PageBits::Empty.val(); } // Set a certain flag. We ran into trouble here since PageBits // is an enumeration and we haven't implemented the BitOr Trait // on it. pub fn set_flag(&mut self, flag: PageBits) { self.flags |= flag.val(); } pub fn clear_flag(&mut self, flag: PageBits) { self.flags &= !(flag.val()); } } /// Initialize the allocation system. There are several ways that we can /// implement the page allocator: /// 1. Free list (singly linked list where it starts at the first free /// allocation) 2. Bookkeeping list (structure contains a taken and length) /// 3. Allocate one Page structure per 4096 bytes (this is what I chose) /// 4. Others pub fn init() { unsafe { // let desc_per_page = PAGE_SIZE / size_of::(); let num_pages = HEAP_SIZE / PAGE_SIZE; // let num_desc_pages = num_pages / desc_per_page; let ptr = HEAP_START as *mut Page; // Clear all pages to make sure that they aren't accidentally // taken for i in 0..num_pages { (*ptr.add(i)).clear(); } // Determine where the actual useful memory starts. This will be // after all Page structures. We also must align the ALLOC_START // to a page-boundary (PAGE_SIZE = 4096). ALLOC_START = // (HEAP_START + num_pages * size_of::() + PAGE_SIZE - 1) // & !(PAGE_SIZE - 1); ALLOC_START = align_val( HEAP_START + num_pages * size_of::(), PAGE_ORDER, ); } } /// Allocate a page or multiple pages /// pages: the number of PAGE_SIZE pages to allocate pub fn alloc(pages: usize) -> *mut u8 { // We have to find a contiguous allocation of pages assert!(pages > 0); unsafe { // We create a Page structure for each page on the heap. We // actually might have more since HEAP_SIZE moves and so does // the size of our structure, but we'll only waste a few bytes. let num_pages = HEAP_SIZE / PAGE_SIZE; let ptr = HEAP_START as *mut Page; for i in 0..num_pages - pages { let mut found = false; // Check to see if this Page is free. If so, we have our // first candidate memory address. if (*ptr.add(i)).is_free() { // It was FREE! Yay! found = true; for j in i..i + pages { // Now check to see if we have a // contiguous allocation for all of the // request pages. If not, we should // check somewhere else. if (*ptr.add(j)).is_taken() { found = false; break; } } } // We've checked to see if there are enough contiguous // pages to form what we need. If we couldn't, found // will be false, otherwise it will be true, which means // we've found valid memory we can allocate. if found { for k in i..i + pages - 1 { (*ptr.add(k)).set_flag(PageBits::Taken); } // The marker for the last page is // PageBits::Last This lets us know when we've // hit the end of this particular allocation. (*ptr.add(i+pages-1)).set_flag(PageBits::Taken); (*ptr.add(i+pages-1)).set_flag(PageBits::Last); // The Page structures themselves aren't the // useful memory. Instead, there is 1 Page // structure per 4096 bytes starting at // ALLOC_START. return (ALLOC_START + PAGE_SIZE * i) as *mut u8; } } } // If we get here, that means that no contiguous allocation was // found. null_mut() } /// Allocate and zero a page or multiple pages /// pages: the number of pages to allocate /// Each page is PAGE_SIZE which is calculated as 1 << PAGE_ORDER /// On RISC-V, this typically will be 4,096 bytes. pub fn zalloc(pages: usize) -> *mut u8 { // Allocate and zero a page. // First, let's get the allocation let ret = alloc(pages); if !ret.is_null() { let size = (PAGE_SIZE * pages) / 8; let big_ptr = ret as *mut u64; for i in 0..size { // We use big_ptr so that we can force an // sd (store doubleword) instruction rather than // the sb. This means 8x fewer stores than before. // Typically we have to be concerned about remaining // bytes, but fortunately 4096 % 8 = 0, so we // won't have any remaining bytes. unsafe { (*big_ptr.add(i)) = 0; } } } ret } /// Deallocate a page by its pointer /// The way we've structured this, it will automatically coalesce /// contiguous pages. pub fn dealloc(ptr: *mut u8) { // Make sure we don't try to free a null pointer. assert!(!ptr.is_null()); unsafe { let addr = HEAP_START + (ptr as usize - ALLOC_START) / PAGE_SIZE; // Make sure that the address makes sense. The address we // calculate here is the page structure, not the HEAP address! assert!(addr >= HEAP_START && addr < HEAP_START + HEAP_SIZE); let mut p = addr as *mut Page; // Keep clearing pages until we hit the last page. while (*p).is_taken() && !(*p).is_last() { (*p).clear(); p = p.add(1); } // If the following assertion fails, it is most likely // caused by a double-free. assert!( (*p).is_last() == true, "Possible double-free detected! (Not taken found \ before last)" ); // If we get here, we've taken care of all previous pages and // we are on the last page. (*p).clear(); } } /// Print all page allocations /// This is mainly used for debugging. pub fn print_page_allocations() { unsafe { let num_pages = (HEAP_SIZE - (ALLOC_START - HEAP_START)) / PAGE_SIZE; let mut beg = HEAP_START as *const Page; let end = beg.add(num_pages); let alloc_beg = ALLOC_START; let alloc_end = ALLOC_START + num_pages * PAGE_SIZE; println!(); println!( "PAGE ALLOCATION TABLE\nMETA: {:p} -> {:p}\nPHYS: \ 0x{:x} -> 0x{:x}", beg, end, alloc_beg, alloc_end ); println!("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"); let mut num = 0; while beg < end { if (*beg).is_taken() { let start = beg as usize; let memaddr = ALLOC_START + (start - HEAP_START) * PAGE_SIZE; print!("0x{:x} => ", memaddr); loop { num += 1; if (*beg).is_last() { let end = beg as usize; let memaddr = ALLOC_START + (end - HEAP_START) * PAGE_SIZE + PAGE_SIZE - 1; print!( "0x{:x}: {:>3} page(s)", memaddr, (end - start + 1) ); println!("."); break; } beg = beg.add(1); } } beg = beg.add(1); } println!("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"); println!( "Allocated: {:>6} pages ({:>10} bytes).", num, num * PAGE_SIZE ); println!( "Free : {:>6} pages ({:>10} bytes).", num_pages - num, (num_pages - num) * PAGE_SIZE ); println!(); } } // //////////////////////////////// // // MMU Routines // //////////////////////////////// // Represent (repr) our entry bits as // unsigned 64-bit integers. #[repr(i64)] #[derive(Copy, Clone)] pub enum EntryBits { None = 0, Valid = 1 << 0, Read = 1 << 1, Write = 1 << 2, Execute = 1 << 3, User = 1 << 4, Global = 1 << 5, Access = 1 << 6, Dirty = 1 << 7, // Convenience combinations ReadWrite = 1 << 1 | 1 << 2, ReadExecute = 1 << 1 | 1 << 3, ReadWriteExecute = 1 << 1 | 1 << 2 | 1 << 3, // User Convenience Combinations UserReadWrite = 1 << 1 | 1 << 2 | 1 << 4, UserReadExecute = 1 << 1 | 1 << 3 | 1 << 4, UserReadWriteExecute = 1 << 1 | 1 << 2 | 1 << 3 | 1 << 4, } // Helper functions to convert the enumeration // into an i64, which is what our page table // entries will be. impl EntryBits { pub fn val(self) -> i64 { self as i64 } } // A single entry. We're using an i64 so that // this will sign-extend rather than zero-extend // since RISC-V requires that the reserved sections // take on the most significant bit. pub struct Entry { pub entry: i64, } // The Entry structure describes one of the 512 entries per table, which is // described in the RISC-V privileged spec Figure 4.18. impl Entry { pub fn is_valid(&self) -> bool { self.get_entry() & EntryBits::Valid.val() != 0 } // The first bit (bit index #0) is the V bit for // valid. pub fn is_invalid(&self) -> bool { !self.is_valid() } // A leaf has one or more RWX bits set pub fn is_leaf(&self) -> bool { self.get_entry() & 0xe != 0 } pub fn is_branch(&self) -> bool { !self.is_leaf() } pub fn set_entry(&mut self, entry: i64) { self.entry = entry; } pub fn get_entry(&self) -> i64 { self.entry } } // Table represents a single table, which contains 512 (2^9), 64-bit entries. pub struct Table { pub entries: [Entry; 512], } impl Table { pub fn len() -> usize { 512 } } /// Map a virtual address to a physical address using 4096-byte page /// size. /// root: a mutable reference to the root Table /// vaddr: The virtual address to map /// paddr: The physical address to map /// bits: An OR'd bitset containing the bits the leaf should have. /// The bits should contain only the following: /// Read, Write, Execute, User, and/or Global /// The bits MUST include one or more of the following: /// Read, Write, Execute /// The valid bit automatically gets added. pub fn map(root: &mut Table, vaddr: usize, paddr: usize, bits: i64, level: usize) { // Make sure that Read, Write, or Execute have been provided // otherwise, we'll leak memory and always create a page fault. assert!(bits & 0xe != 0); // Extract out each VPN from the virtual address // On the virtual address, each VPN is exactly 9 bits, // which is why we use the mask 0x1ff = 0b1_1111_1111 (9 bits) let vpn = [ // VPN[0] = vaddr[20:12] (vaddr >> 12) & 0x1ff, // VPN[1] = vaddr[29:21] (vaddr >> 21) & 0x1ff, // VPN[2] = vaddr[38:30] (vaddr >> 30) & 0x1ff, ]; // Just like the virtual address, extract the physical address // numbers (PPN). However, PPN[2] is different in that it stores // 26 bits instead of 9. Therefore, we use, // 0x3ff_ffff = 0b11_1111_1111_1111_1111_1111_1111 (26 bits). let ppn = [ // PPN[0] = paddr[20:12] (paddr >> 12) & 0x1ff, // PPN[1] = paddr[29:21] (paddr >> 21) & 0x1ff, // PPN[2] = paddr[55:30] (paddr >> 30) & 0x3ff_ffff, ]; // We will use this as a floating reference so that we can set // individual entries as we walk the table. let mut v = &mut root.entries[vpn[2]]; // Now, we're going to traverse the page table and set the bits // properly. We expect the root to be valid, however we're required to // create anything beyond the root. // In Rust, we create a range iterator using the .. operator. // The .rev() will reverse the iteration since we need to start with // VPN[2] The .. operator is inclusive on start but exclusive on end. // So, (0..2) will iterate 0 and 1. for i in (level..2).rev() { if !v.is_valid() { // Allocate a page let page = zalloc(1); // The page is already aligned by 4,096, so store it // directly The page is stored in the entry shifted // right by 2 places. v.set_entry( (page as i64 >> 2) | EntryBits::Valid.val(), ); } let entry = ((v.get_entry() & !0x3ff) << 2) as *mut Entry; v = unsafe { entry.add(vpn[i]).as_mut().unwrap() }; } // When we get here, we should be at VPN[0] and v should be pointing to // our entry. // The entry structure is Figure 4.18 in the RISC-V Privileged // Specification let entry = (ppn[2] << 28) as i64 | // PPN[2] = [53:28] (ppn[1] << 19) as i64 | // PPN[1] = [27:19] (ppn[0] << 10) as i64 | // PPN[0] = [18:10] bits | // Specified bits, such as User, Read, Write, etc EntryBits::Valid.val() | // Valid bit EntryBits::Dirty.val() | // Some machines require this to =1 EntryBits::Access.val() // Just like dirty, some machines require this ; // Set the entry. V should be set to the correct pointer by the loop // above. v.set_entry(entry); } /// Unmaps and frees all memory associated with a table. /// root: The root table to start freeing. /// NOTE: This does NOT free root directly. This must be /// freed manually. /// The reason we don't free the root is because it is /// usually embedded into the Process structure. pub fn unmap(root: &mut Table) { // Start with level 2 for lv2 in 0..Table::len() { let ref entry_lv2 = root.entries[lv2]; if entry_lv2.is_valid() && entry_lv2.is_branch() { // This is a valid entry, so drill down and free. let memaddr_lv1 = (entry_lv2.get_entry() & !0x3ff) << 2; let table_lv1 = unsafe { // Make table_lv1 a mutable reference instead of // a pointer. (memaddr_lv1 as *mut Table).as_mut().unwrap() }; for lv1 in 0..Table::len() { let ref entry_lv1 = table_lv1.entries[lv1]; if entry_lv1.is_valid() && entry_lv1.is_branch() { let memaddr_lv0 = (entry_lv1.get_entry() & !0x3ff) << 2; // The next level is level 0, which // cannot have branches, therefore, // we free here. dealloc(memaddr_lv0 as *mut u8); } } dealloc(memaddr_lv1 as *mut u8); } } } /// Walk the page table to convert a virtual address to a /// physical address. /// If a page fault would occur, this returns None /// Otherwise, it returns Some with the physical address. pub fn virt_to_phys(root: &Table, vaddr: usize) -> Option { // Walk the page table pointed to by root let vpn = [ // VPN[0] = vaddr[20:12] (vaddr >> 12) & 0x1ff, // VPN[1] = vaddr[29:21] (vaddr >> 21) & 0x1ff, // VPN[2] = vaddr[38:30] (vaddr >> 30) & 0x1ff, ]; let mut v = &root.entries[vpn[2]]; for i in (0..=2).rev() { if v.is_invalid() { // This is an invalid entry, page fault. break; } else if v.is_leaf() { // According to RISC-V, a leaf can be at any level. // The offset mask masks off the PPN. Each PPN is 9 // bits and they start at bit #12. So, our formula // 12 + i * 9 let off_mask = (1 << (12 + i * 9)) - 1; let vaddr_pgoff = vaddr & off_mask; let addr = ((v.get_entry() << 2) as usize) & !off_mask; return Some(addr | vaddr_pgoff); } // Set v to the next entry which is pointed to by this // entry. However, the address was shifted right by 2 places // when stored in the page table entry, so we shift it left // to get it back into place. let entry = ((v.get_entry() & !0x3ff) << 2) as *const Entry; // We do i - 1 here, however we should get None or Some() above // before we do 0 - 1 = -1. v = unsafe { entry.add(vpn[i - 1]).as_ref().unwrap() }; } // If we get here, we've exhausted all valid tables and haven't // found a leaf. None } ================================================ FILE: risc_v/chapters/ch9/src/plic.rs ================================================ // plic.rs // Platform Level Interrupt Controller (PLIC) // Stephen Marz // 1 Nov 2019 use crate::uart::Uart; use crate::virtio; const PLIC_PRIORITY: usize = 0x0c00_0000; const PLIC_PENDING: usize = 0x0c00_1000; const PLIC_INT_ENABLE: usize = 0x0c00_2000; const PLIC_THRESHOLD: usize = 0x0c20_0000; const PLIC_CLAIM: usize = 0x0c20_0004; // Each register is 4-bytes (u32) // The PLIC is an external interrupt controller. The one // used by QEMU virt is the same as the SiFive PLIC. // https://sifive.cdn.prismic.io/sifive%2F834354f0-08e6-423c-bf1f-0cb58ef14061_fu540-c000-v1.0.pdf // Chapter 10 explains the priority, pending, interrupt enable, threshold and claims // The virt machine has the following external interrupts (from Qemu source): // Interrupt 0 is a "null" interrupt and is hardwired to 0. // VIRTIO = [1..8] // UART0 = 10 // PCIE = [32..35] /// Get the next available interrupt. This is the "claim" process. /// The plic will automatically sort by priority and hand us the /// ID of the interrupt. For example, if the UART is interrupting /// and it's next, we will get the value 10. pub fn next() -> Option { let claim_reg = PLIC_CLAIM as *const u32; let claim_no; // The claim register is filled with the highest-priority, enabled interrupt. unsafe { claim_no = claim_reg.read_volatile(); } if claim_no == 0 { // The interrupt 0 is hardwired to 0, which tells us that there is no // interrupt to claim, hence we return None. None } else { // If we get here, we've gotten a non-0 interrupt. Some(claim_no) } } /// Complete a pending interrupt by id. The id should come /// from the next() function above. pub fn complete(id: u32) { let complete_reg = PLIC_CLAIM as *mut u32; unsafe { // We actually write a u32 into the entire complete_register. // This is the same register as the claim register, but it can // differentiate based on whether we're reading or writing. complete_reg.write_volatile(id); } } /// Set the global threshold. The threshold can be a value [0..7]. /// The PLIC will mask any interrupts at or below the given threshold. /// This means that a threshold of 7 will mask ALL interrupts and /// a threshold of 0 will allow ALL interrupts. pub fn set_threshold(tsh: u8) { // We do tsh because we're using a u8, but our maximum number // is a 3-bit 0b111. So, we and with 7 (0b111) to just get the // last three bits. let actual_tsh = tsh & 7; let tsh_reg = PLIC_THRESHOLD as *mut u32; unsafe { tsh_reg.write_volatile(actual_tsh as u32); } } /// See if a given interrupt id is pending. pub fn is_pending(id: u32) -> bool { let pend = PLIC_PENDING as *const u32; let actual_id = 1 << id; let pend_ids; unsafe { pend_ids = pend.read_volatile(); } actual_id & pend_ids != 0 } /// Enable a given interrupt id pub fn enable(id: u32) { let enables = PLIC_INT_ENABLE as *mut u32; let actual_id = 1 << id; unsafe { // Unlike the complete and claim registers, the plic_int_enable // register is a bitset where the id is the bit index. The register // is a 32-bit register, so that gives us enables for interrupts // 31 through 1 (0 is hardwired to 0). enables.write_volatile(enables.read_volatile() | actual_id); } } /// Set a given interrupt priority to the given priority. /// The priority must be [0..7] pub fn set_priority(id: u32, prio: u8) { let actual_prio = prio as u32 & 7; let prio_reg = PLIC_PRIORITY as *mut u32; unsafe { // The offset for the interrupt id is: // PLIC_PRIORITY + 4 * id // Since we're using pointer arithmetic on a u32 type, // it will automatically multiply the id by 4. prio_reg.add(id as usize).write_volatile(actual_prio); } } pub fn handle_interrupt() { if let Some(interrupt) = next() { // If we get here, we've got an interrupt from the claim register. The PLIC will // automatically prioritize the next interrupt, so when we get it from claim, it // will be the next in priority order. match interrupt { 1..=8 => { virtio::handle_interrupt(interrupt); }, 10 => { // Interrupt 10 is the UART interrupt. // We would typically set this to be handled out of the interrupt context, // but we're testing here! C'mon! // We haven't yet used the singleton pattern for my_uart, but remember, this // just simply wraps 0x1000_0000 (UART). let mut my_uart = Uart::new(0x1000_0000); // If we get here, the UART better have something! If not, what happened?? if let Some(c) = my_uart.get() { // If you recognize this code, it used to be in the lib.rs under kmain(). That // was because we needed to poll for UART data. Now that we have interrupts, // here it goes! match c { 8 => { // This is a backspace, so we // essentially have to write a space and // backup again: print!("{} {}", 8 as char, 8 as char); }, 10 | 13 => { // Newline or carriage-return println!(); }, _ => { print!("{}", c as char); }, } } }, // Non-UART interrupts go here and do nothing. _ => { println!("Non-UART external interrupt: {}", interrupt); } } // We've claimed it, so now say that we've handled it. This resets the interrupt pending // and allows the UART to interrupt again. Otherwise, the UART will get "stuck". complete(interrupt); } } ================================================ FILE: risc_v/chapters/ch9/src/process.rs ================================================ // process.rs // Kernel and user processes // Stephen Marz // 27 Nov 2019 use crate::{cpu::{TrapFrame, satp_fence_asid, build_satp, SatpMode}, page::{alloc, dealloc, map, unmap, zalloc, EntryBits, Table, PAGE_SIZE}}; use alloc::collections::vec_deque::VecDeque; // How many pages are we going to give a process for their // stack? const STACK_PAGES: usize = 2; // We want to adjust the stack to be at the bottom of the memory allocation // regardless of where it is on the kernel heap. const STACK_ADDR: usize = 0x1_0000_0000; // All processes will have a defined starting point in virtual memory. // We will use this later when we load processes from disk. // const PROCESS_STARTING_ADDR: usize = 0x2000_0000; // Here, we store a process list. It uses the global allocator // that we made before and its job is to store all processes. // We will have this list OWN the process. So, anytime we want // the process, we will consult the process list. // Using an Option here is one method of creating a "lazy static". // Rust requires that all statics be initialized, but all // initializations must be at compile-time. We cannot allocate // a VecDeque at compile time, so we are somewhat forced to // do this. pub static mut PROCESS_LIST: Option> = None; // We can search through the process list to get a new PID, but // it's probably easier and faster just to increase the pid: static mut NEXT_PID: u16 = 1; extern "C" { fn make_syscall(a: usize) -> usize; } /// We will eventually move this function out of here, but its /// job is just to take a slot in the process list. fn init_process() { // We can't do much here until we have system calls because // we're running in User space. let mut i: usize = 0; loop { i += 1; if i > 100_000_000 { unsafe { make_syscall(1); } i = 0; } } } /// Add a process given a function address and then /// push it onto the LinkedList. Uses Process::new_default /// to create a new stack, etc. pub fn add_process_default(pr: fn()) { unsafe { // This is the Rust-ism that really trips up C++ programmers. // PROCESS_LIST is wrapped in an Option<> enumeration, which // means that the Option owns the Deque. We can only borrow from // it or move ownership to us. In this case, we choose the // latter, where we move ownership to us, add a process, and // then move ownership back to the PROCESS_LIST. // This allows mutual exclusion as anyone else trying to grab // the process list will get None rather than the Deque. if let Some(mut pl) = PROCESS_LIST.take() { // .take() will replace PROCESS_LIST with None and give // us the only copy of the Deque. let p = Process::new_default(pr); pl.push_back(p); // Now, we no longer need the owned Deque, so we hand it // back by replacing the PROCESS_LIST's None with the // Some(pl). PROCESS_LIST.replace(pl); } // TODO: When we get to multi-hart processing, we need to keep // trying to grab the process list. We can do this with an // atomic instruction. but right now, we're a single-processor // computer. } } /// This should only be called once, and its job is to create /// the init process. Right now, this process is in the kernel, /// but later, it should call the shell. pub fn init() -> usize { unsafe { PROCESS_LIST = Some(VecDeque::with_capacity(15)); add_process_default(init_process); // Ugh....Rust is giving me fits over here! // I just want a memory address to the trap frame, but // due to the borrow rules of Rust, I'm fighting here. So, // instead, let's move the value out of PROCESS_LIST, get // the address, and then move it right back in. let pl = PROCESS_LIST.take().unwrap(); let p = pl.front().unwrap().frame; // let frame = p as *const TrapFrame as usize; // println!("Init's frame is at 0x{:08x}", frame); // Put the process list back in the global. PROCESS_LIST.replace(pl); // Return the first instruction's address to execute. // Since we use the MMU, all start here. (*p).pc } } // Our process must be able to sleep, wait, or run. // Running - means that when the scheduler finds this process, it can run it. // Sleeping - means that the process is waiting on a certain amount of time. // Waiting - means that the process is waiting on I/O // Dead - We should never get here, but we can flag a process as Dead and clean // it out of the list later. #[repr(u8)] pub enum ProcessState { Running, Sleeping, Waiting, Dead, } // Let's represent this in C ABI. We do this // because we need to access some of the fields // in assembly. Rust gets to choose how it orders // the fields unless we represent the structure in // C-style ABI. #[repr(C)] pub struct Process { frame: *mut TrapFrame, stack: *mut u8, pid: u16, root: *mut Table, state: ProcessState, data: ProcessData, sleep_until: usize, } impl Process { pub fn get_frame_address(&self) -> usize { self.frame as usize } pub fn get_program_counter(&self) -> usize { unsafe { (*self.frame).pc } } pub fn get_table_address(&self) -> usize { self.root as usize } pub fn get_state(&self) -> &ProcessState { &self.state } pub fn get_pid(&self) -> u16 { self.pid } pub fn get_sleep_until(&self) -> usize { self.sleep_until } pub fn new_default(func: fn()) -> Self { let func_addr = func as usize; let func_vaddr = func_addr; //- 0x6000_0000; // println!("func_addr = {:x} -> {:x}", func_addr, func_vaddr); // We will convert NEXT_PID below into an atomic increment when // we start getting into multi-hart processing. For now, we want // a process. Get it to work, then improve it! let mut ret_proc = Process { frame: zalloc(1) as *mut TrapFrame, stack: alloc(STACK_PAGES), pid: unsafe { NEXT_PID }, root: zalloc(1) as *mut Table, state: ProcessState::Running, data: ProcessData::zero(), sleep_until: 0 }; unsafe { satp_fence_asid(NEXT_PID as usize); NEXT_PID += 1; } // Now we move the stack pointer to the bottom of the // allocation. The spec shows that register x2 (2) is the stack // pointer. // We could use ret_proc.stack.add, but that's an unsafe // function which would require an unsafe block. So, convert it // to usize first and then add PAGE_SIZE is better. // We also need to set the stack adjustment so that it is at the // bottom of the memory and far away from heap allocations. let saddr = ret_proc.stack as usize; unsafe { (*ret_proc.frame).pc = func_vaddr; (*ret_proc.frame).regs[2] = STACK_ADDR + PAGE_SIZE * STACK_PAGES; } // Map the stack on the MMU let pt; unsafe { pt = &mut *ret_proc.root; (*ret_proc.frame).satp = build_satp(SatpMode::Sv39, ret_proc.pid as usize, ret_proc.root as usize); } // We need to map the stack onto the user process' virtual // memory This gets a little hairy because we need to also map // the function code too. for i in 0..STACK_PAGES { let addr = i * PAGE_SIZE; map( pt, STACK_ADDR + addr, saddr + addr, EntryBits::UserReadWrite.val(), 0, ); // println!("Set stack from 0x{:016x} -> 0x{:016x}", STACK_ADDR + addr, saddr + addr); } // Map the program counter on the MMU and other bits for i in 0..=100 { let modifier = i * 0x1000; map( pt, func_vaddr + modifier, func_addr + modifier, EntryBits::UserReadWriteExecute.val(), 0, ); } // This is the make_syscall function // The reason we need this is because we're running a process // that is inside of the kernel. When we start loading from a block // devices, we can load the instructions anywhere in memory. map(pt, 0x8000_0000, 0x8000_0000, EntryBits::UserReadExecute.val(), 0); ret_proc } } impl Drop for Process { /// Since we're storing ownership of a Process in the linked list, /// we can cause it to deallocate automatically when it is removed. fn drop(&mut self) { // We allocate the stack as a page. dealloc(self.stack); // This is unsafe, but it's at the drop stage, so we won't // be using this again. unsafe { // Remember that unmap unmaps all levels of page tables // except for the root. It also deallocates the memory // associated with the tables. unmap(&mut *self.root); } dealloc(self.root as *mut u8); } } // The private data in a process contains information // that is relevant to where we are, including the path // and open file descriptors. // We will allow dead code for now until we have a need for the // private process data. This is essentially our resource control block (RCB). #[allow(dead_code)] pub struct ProcessData { cwd_path: [u8; 128], } // This is private data that we can query with system calls. // If we want to implement CFQ (completely fair queuing), which // is a per-process block queuing algorithm, we can put that here. impl ProcessData { pub fn zero() -> Self { ProcessData { cwd_path: [0; 128], } } } ================================================ FILE: risc_v/chapters/ch9/src/rng.rs ================================================ // rng.rs // Random number generator using VirtIO // Stephen Marz // 16 March 2020 pub fn setup_entropy_device(_ptr: *mut u32) -> bool { false } ================================================ FILE: risc_v/chapters/ch9/src/sched.rs ================================================ // sched.rs // Simple process scheduler // Stephen Marz // 27 Dec 2019 use crate::process::{ProcessState, PROCESS_LIST}; pub fn schedule() -> usize { unsafe { if let Some(mut pl) = PROCESS_LIST.take() { pl.rotate_left(1); let mut frame_addr: usize = 0; // let mut mepc: usize = 0; // let mut satp: usize = 0; // let mut pid: usize = 0; if let Some(prc) = pl.front() { match prc.get_state() { ProcessState::Running => { frame_addr = prc.get_frame_address(); // satp = prc.get_table_address(); // pid = prc.get_pid() as usize; }, ProcessState::Sleeping => {}, _ => {}, } } // println!("Scheduling {}", pid); PROCESS_LIST.replace(pl); if frame_addr != 0 { // MODE 8 is 39-bit virtual address MMU // I'm using the PID as the address space // identifier to hopefully help with (not?) // flushing the TLB whenever we switch // processes. return frame_addr; } } } 0 } ================================================ FILE: risc_v/chapters/ch9/src/syscall.rs ================================================ // syscall.rs // System calls // Stephen Marz // 3 Jan 2020 use crate::cpu::TrapFrame; pub fn do_syscall(mepc: usize, frame: *mut TrapFrame) -> usize { let syscall_number; unsafe { // A0 is X10, so it's register number 10. syscall_number = (*frame).regs[10]; // for i in 0..32 { // print!("regs[{:02}] = 0x{:08x} ", i, (*frame).regs[i]); // if (i+1) % 4 == 0 { // println!(); // } // } } match syscall_number { 0 => { // Exit // Currently, we cannot kill a process, it runs forever. We will delete // the process later and free the resources, but for now, we want to get // used to how processes will be scheduled on the CPU. mepc + 4 }, 1 => { println!("Test syscall"); mepc + 4 }, _ => { println!("Unknown syscall number {}", syscall_number); mepc + 4 } } } ================================================ FILE: risc_v/chapters/ch9/src/trap.rs ================================================ // trap.rs // Trap routines // Stephen Marz // 10 October 2019 use crate::cpu::{CONTEXT_SWITCH_TIME, TrapFrame}; use crate::plic; use crate::syscall::do_syscall; use crate::sched::schedule; use crate::rust_switch_to_user; #[no_mangle] /// The m_trap stands for "machine trap". Right now, we are handling /// all traps at machine mode. In this mode, we can figure out what's /// going on and send a trap where it needs to be. Remember, in machine /// mode and in this trap, interrupts are disabled and the MMU is off. extern "C" fn m_trap(epc: usize, tval: usize, cause: usize, hart: usize, _status: usize, frame: *mut TrapFrame) -> usize { // We're going to handle all traps in machine mode. RISC-V lets // us delegate to supervisor mode, but switching out SATP (virtual memory) // gets hairy. let is_async = { if cause >> 63 & 1 == 1 { true } else { false } }; // The cause contains the type of trap (sync, async) as well as the cause // number. So, here we narrow down just the cause number. let cause_num = cause & 0xfff; let mut return_pc = epc; if is_async { // Asynchronous trap match cause_num { 3 => { // Machine software println!("Machine software interrupt CPU #{}", hart); }, 7 => { // This is the context-switch timer. // We would typically invoke the scheduler here to pick another // process to run. // Machine timer // println!("CTX"); let frame = schedule(); schedule_next_context_switch(1); rust_switch_to_user(frame); }, 11 => { // Machine external (interrupt from Platform Interrupt Controller (PLIC)) // println!("Machine external interrupt CPU#{}", hart); // We will check the next interrupt. If the interrupt isn't available, this will // give us None. However, that would mean we got a spurious interrupt, unless we // get an interrupt from a non-PLIC source. This is the main reason that the PLIC // hardwires the id 0 to 0, so that we can use it as an error case. plic::handle_interrupt(); }, _ => { panic!("Unhandled async trap CPU#{} -> {}\n", hart, cause_num); } } } else { // Synchronous trap match cause_num { 2 => { // Illegal instruction panic!("Illegal instruction CPU#{} -> 0x{:08x}: 0x{:08x}\n", hart, epc, tval); // We need while trues here until we have a functioning "delete from scheduler" // I use while true because Rust will warn us that it looks stupid. // This is what I want so that I remember to remove this and replace // them later. while true {} }, 8 => { // Environment (system) call from User mode // println!("E-call from User mode! CPU#{} -> 0x{:08x}", hart, epc); return_pc = do_syscall(return_pc, frame); }, 9 => { // Environment (system) call from Supervisor mode println!("E-call from Supervisor mode! CPU#{} -> 0x{:08x}", hart, epc); return_pc = do_syscall(return_pc, frame); }, 11 => { // Environment (system) call from Machine mode panic!("E-call from Machine mode! CPU#{} -> 0x{:08x}\n", hart, epc); }, // Page faults 12 => { // Instruction page fault println!("Instruction page fault CPU#{} -> 0x{:08x}: 0x{:08x}", hart, epc, tval); // We need while trues here until we have a functioning "delete from scheduler" while true {} return_pc += 4; }, 13 => { // Load page fault println!("Load page fault CPU#{} -> 0x{:08x}: 0x{:08x}", hart, epc, tval); // We need while trues here until we have a functioning "delete from scheduler" while true {} return_pc += 4; }, 15 => { // Store page fault println!("Store page fault CPU#{} -> 0x{:08x}: 0x{:08x}", hart, epc, tval); // We need while trues here until we have a functioning "delete from scheduler" while true {} return_pc += 4; }, _ => { panic!("Unhandled sync trap CPU#{} -> {}\n", hart, cause_num); } } }; // Finally, return the updated program counter return_pc } pub const MMIO_MTIMECMP: *mut u64 = 0x0200_4000usize as *mut u64; pub const MMIO_MTIME: *const u64 = 0x0200_BFF8 as *const u64; pub fn schedule_next_context_switch(qm: u16) { // This is much too slow for normal operations, but it gives us // a visual of what's happening behind the scenes. unsafe { MMIO_MTIMECMP.write_volatile(MMIO_MTIME.read_volatile().wrapping_add(CONTEXT_SWITCH_TIME * qm as u64)); } } ================================================ FILE: risc_v/chapters/ch9/src/uart.rs ================================================ // uart.rs // UART routines and driver use core::{convert::TryInto, fmt::{Error, Write}}; pub struct Uart { base_address: usize, } impl Write for Uart { fn write_str(&mut self, out: &str) -> Result<(), Error> { for c in out.bytes() { self.put(c); } Ok(()) } } impl Uart { pub fn new(base_address: usize) -> Self { Uart { base_address } } pub fn init(&mut self) { let ptr = self.base_address as *mut u8; unsafe { // First, set the word length, which // are bits 0 and 1 of the line control register (LCR) // which is at base_address + 3 // We can easily write the value 3 here or 0b11, but I'm // extending it so that it is clear we're setting two // individual fields // Word 0 Word 1 // ~~~~~~ ~~~~~~ let lcr: u8 = (1 << 0) | (1 << 1); ptr.add(3).write_volatile(lcr); // Now, enable the FIFO, which is bit index 0 of the // FIFO control register (FCR at offset 2). // Again, we can just write 1 here, but when we use left // shift, it's easier to see that we're trying to write // bit index #0. ptr.add(2).write_volatile(1 << 0); // Enable receiver buffer interrupts, which is at bit // index 0 of the interrupt enable register (IER at // offset 1). ptr.add(1).write_volatile(1 << 0); // If we cared about the divisor, the code below would // set the divisor from a global clock rate of 22.729 // MHz (22,729,000 cycles per second) to a signaling // rate of 2400 (BAUD). We usually have much faster // signalling rates nowadays, but this demonstrates what // the divisor actually does. The formula given in the // NS16500A specification for calculating the divisor // is: // divisor = ceil( (clock_hz) / (baud_sps x 16) ) // So, we substitute our values and get: // divisor = ceil( 22_729_000 / (2400 x 16) ) // divisor = ceil( 22_729_000 / 38_400 ) // divisor = ceil( 591.901 ) = 592 // The divisor register is two bytes (16 bits), so we // need to split the value 592 into two bytes. // Typically, we would calculate this based on measuring // the clock rate, but again, for our purposes [qemu], // this doesn't really do anything. let divisor: u16 = 592; let divisor_least: u8 = (divisor & 0xff).try_into().unwrap(); let divisor_most: u8 = (divisor >> 8).try_into().unwrap(); // Notice that the divisor register DLL (divisor latch // least) and DLM (divisor latch most) have the same // base address as the receiver/transmitter and the // interrupt enable register. To change what the base // address points to, we open the "divisor latch" by // writing 1 into the Divisor Latch Access Bit (DLAB), // which is bit index 7 of the Line Control Register // (LCR) which is at base_address + 3. ptr.add(3).write_volatile(lcr | 1 << 7); // Now, base addresses 0 and 1 point to DLL and DLM, // respectively. Put the lower 8 bits of the divisor // into DLL ptr.add(0).write_volatile(divisor_least); ptr.add(1).write_volatile(divisor_most); // Now that we've written the divisor, we never have to // touch this again. In hardware, this will divide the // global clock (22.729 MHz) into one suitable for 2,400 // signals per second. So, to once again get access to // the RBR/THR/IER registers, we need to close the DLAB // bit by clearing it to 0. ptr.add(3).write_volatile(lcr); } } pub fn put(&mut self, c: u8) { let ptr = self.base_address as *mut u8; unsafe { ptr.add(0).write_volatile(c); } } pub fn get(&mut self) -> Option { let ptr = self.base_address as *mut u8; unsafe { if ptr.add(5).read_volatile() & 1 == 0 { // The DR bit is 0, meaning no data None } else { // The DR bit is 1, meaning data! Some(ptr.add(0).read_volatile()) } } } } ================================================ FILE: risc_v/chapters/ch9/src/virtio.rs ================================================ // virtio.rs // VirtIO routines for the VirtIO protocol // Stephen Marz // 10 March 2020 use crate::{block, block::setup_block_device, page::PAGE_SIZE}; use crate::rng::setup_entropy_device; use core::mem::size_of; // Flags // Descriptor flags have VIRTIO_DESC_F as a prefix // Available flags have VIRTIO_AVAIL_F pub const VIRTIO_DESC_F_NEXT: u16 = 1; pub const VIRTIO_DESC_F_WRITE: u16 = 2; pub const VIRTIO_DESC_F_INDIRECT: u16 = 4; pub const VIRTIO_AVAIL_F_NO_INTERRUPT: u16 = 1; pub const VIRTIO_USED_F_NO_NOTIFY: u16 = 1; // According to the documentation, this must be a power // of 2 for the new style. So, I'm changing this to use // 1 << instead because that will enforce this standard. pub const VIRTIO_RING_SIZE: usize = 1 << 7; // VirtIO structures // The descriptor holds the data that we need to send to // the device. The address is a physical address and NOT // a virtual address. The len is in bytes and the flags are // specified above. Any descriptor can be chained, hence the // next field, but only if the F_NEXT flag is specified. #[repr(C)] pub struct Descriptor { pub addr: u64, pub len: u32, pub flags: u16, pub next: u16, } #[repr(C)] pub struct Available { pub flags: u16, pub idx: u16, pub ring: [u16; VIRTIO_RING_SIZE], pub event: u16, } #[repr(C)] pub struct UsedElem { pub id: u32, pub len: u32, } #[repr(C)] pub struct Used { pub flags: u16, pub idx: u16, pub ring: [UsedElem; VIRTIO_RING_SIZE], pub event: u16, } #[repr(C)] pub struct Queue { pub desc: [Descriptor; VIRTIO_RING_SIZE], pub avail: Available, // Calculating padding, we need the used ring to start on a page boundary. We take the page size, subtract the // amount the descriptor ring takes then subtract the available structure and ring. pub padding0: [u8; PAGE_SIZE - size_of::() * VIRTIO_RING_SIZE - size_of::()], pub used: Used, } // The MMIO transport is "legacy" in QEMU, so these registers represent // the legacy interface. #[repr(usize)] pub enum MmioOffsets { MagicValue = 0x000, Version = 0x004, DeviceId = 0x008, VendorId = 0x00c, HostFeatures = 0x010, HostFeaturesSel = 0x014, GuestFeatures = 0x020, GuestFeaturesSel = 0x024, GuestPageSize = 0x028, QueueSel = 0x030, QueueNumMax = 0x034, QueueNum = 0x038, QueueAlign = 0x03c, QueuePfn = 0x040, QueueNotify = 0x050, InterruptStatus = 0x060, InterruptAck = 0x064, Status = 0x070, Config = 0x100, } #[repr(usize)] pub enum DeviceTypes { None = 0, Network = 1, Block = 2, Console = 3, Entropy = 4, Gpu = 16, Input = 18, Memory = 24, } // Enumerations in Rust aren't easy to convert back // and forth. Furthermore, we're going to use a u32 // pointer, so we need to "undo" the scaling that // Rust will do with the .add() function. impl MmioOffsets { pub fn val(self) -> usize { self as usize } pub fn scaled(self, scale: usize) -> usize { self.val() / scale } pub fn scale32(self) -> usize { self.scaled(4) } } pub enum StatusField { Acknowledge = 1, Driver = 2, Failed = 128, FeaturesOk = 8, DriverOk = 4, DeviceNeedsReset = 64, } // The status field will be compared to the status register. So, // I've made some helper functions to checking that register easier. impl StatusField { pub fn val(self) -> usize { self as usize } pub fn val32(self) -> u32 { self as u32 } pub fn test(sf: u32, bit: StatusField) -> bool { sf & bit.val32() != 0 } pub fn is_failed(sf: u32) -> bool { StatusField::test(sf, StatusField::Failed) } pub fn needs_reset(sf: u32) -> bool { StatusField::test(sf, StatusField::DeviceNeedsReset) } pub fn driver_ok(sf: u32) -> bool { StatusField::test(sf, StatusField::DriverOk) } pub fn features_ok(sf: u32) -> bool { StatusField::test(sf, StatusField::FeaturesOk) } } // We probably shouldn't put these here, but it'll help // with probing the bus, etc. These are architecture specific // which is why I say that. pub const MMIO_VIRTIO_START: usize = 0x1000_1000; pub const MMIO_VIRTIO_END: usize = 0x1000_8000; pub const MMIO_VIRTIO_STRIDE: usize = 0x1000; pub const MMIO_VIRTIO_MAGIC: u32 = 0x74_72_69_76; // The VirtioDevice is essentially a structure we can put into an array // to determine what virtio devices are attached to the system. Right now, // we're using the 1..=8 linearity of the VirtIO devices on QEMU to help // with reducing the data structure itself. Otherwise, we might be forced // to use an MMIO pointer. pub struct VirtioDevice { pub devtype: DeviceTypes, } impl VirtioDevice { pub const fn new() -> Self { VirtioDevice { devtype: DeviceTypes::None, } } pub const fn new_with(devtype: DeviceTypes) -> Self { VirtioDevice { devtype } } } static mut VIRTIO_DEVICES: [Option; 8] = [None, None, None, None, None, None, None, None]; /// Probe the VirtIO bus for devices that might be /// out there. pub fn probe() { // Rust's for loop uses an Iterator object, which now has a step_by // modifier to change how much it steps. Also recall that ..= means up // to AND including MMIO_VIRTIO_END. for addr in (MMIO_VIRTIO_START..=MMIO_VIRTIO_END).step_by(MMIO_VIRTIO_STRIDE) { print!("Virtio probing 0x{:08x}...", addr); let magicvalue; let deviceid; let ptr = addr as *mut u32; unsafe { magicvalue = ptr.read_volatile(); deviceid = ptr.add(2).read_volatile(); } // 0x74_72_69_76 is "virt" in little endian, so in reality // it is triv. All VirtIO devices have this attached to the // MagicValue register (offset 0x000) if MMIO_VIRTIO_MAGIC != magicvalue { println!("not virtio."); } // If we are a virtio device, we now need to see if anything // is actually attached to it. The DeviceID register will // contain what type of device this is. If this value is 0, // then it is not connected. else if 0 == deviceid { println!("not connected."); } // If we get here, we have a connected virtio device. Now we have // to figure out what kind it is so we can do device-specific setup. else { match deviceid { // DeviceID 1 is a network device 1 => { print!("network device..."); if false == setup_network_device(ptr) { println!("setup failed."); } else { println!("setup succeeded!"); } }, // DeviceID 2 is a block device 2 => { print!("block device..."); if false == setup_block_device(ptr) { println!("setup failed."); } else { let idx = (addr - MMIO_VIRTIO_START) >> 12; unsafe { VIRTIO_DEVICES[idx] = Some(VirtioDevice::new_with(DeviceTypes::Block)); } println!("setup succeeded!"); } }, // DeviceID 4 is a random number generator device 4 => { print!("entropy device..."); if false == setup_entropy_device(ptr) { println!("setup failed."); } else { println!("setup succeeded!"); } }, // DeviceID 16 is a GPU device 16 => { print!("GPU device..."); if false == setup_gpu_device(ptr) { println!("setup failed."); } else { println!("setup succeeded!"); } }, // DeviceID 18 is an input device 18 => { print!("input device..."); if false == setup_input_device(ptr) { println!("setup failed."); } else { println!("setup succeeded!"); } }, _ => println!("unknown device type."), } } } } pub fn setup_network_device(_ptr: *mut u32) -> bool { false } pub fn setup_gpu_device(_ptr: *mut u32) -> bool { false } pub fn setup_input_device(_ptr: *mut u32) -> bool { false } // The External pin (PLIC) trap will lead us here if it is // determined that interrupts 1..=8 are what caused the interrupt. // In here, we try to figure out where to direct the interrupt // and then handle it. pub fn handle_interrupt(interrupt: u32) { let idx = interrupt as usize - 1; unsafe { if let Some(vd) = &VIRTIO_DEVICES[idx] { match vd.devtype { DeviceTypes::Block => { block::handle_interrupt(idx); }, _ => { println!("Invalid device generated interrupt!"); }, } } else { println!("Spurious interrupt {}", interrupt); } } } ================================================ FILE: risc_v/src/asm/boot.S ================================================ # boot.S # bootloader for SoS # Stephen Marz # 8 February 2019 # Disable generation of compressed instructions. .option norvc # Define a .text.init section. The .text.init is put at the # starting address so that the entry _start is put at the RISC-V # address 0x8000_0000. .section .text.init # Execution starts here. .global _start _start: # Disable linker instruction relaxation for the `la` instruction below. # This disallows the assembler from assuming that `gp` is already initialized. # This causes the value stored in `gp` to be calculated from `pc`. # The job of the global pointer is to give the linker the ability to address # memory relative to GP instead of as an absolute address. .option push .option norelax la gp, _global_pointer .option pop # SATP should be zero, but let's make sure. Each HART has its own # SATP register. csrw satp, zero # Any hardware threads (hart) that are not bootstrapping # need to wait for an IPI csrr t0, mhartid bnez t0, 3f # Set all bytes in the BSS section to zero. la a0, _bss_start la a1, _bss_end bgeu a0, a1, 2f 1: sd zero, (a0) addi a0, a0, 8 bltu a0, a1, 1b 2: # The stack grows from bottom to top, so we put the stack pointer # to the very end of the stack range. la sp, _stack_end # Setting `mstatus` register: # 0b01 << 11: Machine's previous protection mode is 2 (MPP=2). li t0, 0b11 << 11 | (1 << 13) csrw mstatus, t0 # Do not allow interrupts while running kinit csrw mie, zero # Machine's exception program counter (MEPC) is set to `kinit`. la t1, kinit csrw mepc, t1 # Set the return address to get us into supervisor mode la ra, 2f # We use mret here so that the mstatus register is properly updated. mret 2: # We set the return address (ra above) to this label. When kinit() is finished # in Rust, it will return here. # Setting `mstatus` (supervisor status) register: # 0b01 << 11 : Previous protection mode is 1 (MPP=01 [Supervisor]). # 1 << 7 : Previous machine interrupt-enable bit is 1 (MPIE=1 [Enabled]) # 1 << 5 : Previous interrupt-enable bit is 1 (SPIE=1 [Enabled]). # We set the "previous" bits because the mret will write the current bits # with the previous bits. li t0, (0b00 << 11) | (1 << 7) | (1 << 5) | (1 << 13) csrw mstatus, t0 # Machine's trap vector base address is set to `m_trap_vector`, for # "machine" trap vector. la t2, m_trap_vector csrw mtvec, t2 # Jump to first process. We put the MPP = 00 for user mode, so after # mret, we will jump to the first process' addresss in user mode. la ra, 4f mret 3: # Parked harts go here. We need to set these # to only awaken if it receives a software interrupt, # which we're going to call the SIPI (Software Intra-Processor Interrupt). # We call the SIPI by writing the software interrupt into the Core Local Interruptor (CLINT) # Which is calculated by: base_address + hart * 4 # where base address is 0x0200_0000 (MMIO CLINT base address) # We only use additional harts to run user-space programs, although this may # change. # We divide up the stack so the harts aren't clobbering one another. la sp, _stack_end li t0, 0x10000 csrr a0, mhartid mul t0, t0, a0 sub sp, sp, t0 # The parked harts will be put into machine mode with interrupts enabled. li t0, 0b11 << 11 | (1 << 7) | (1 << 13) csrw mstatus, t0 # Allow for MSIP (Software interrupt). We will write the MSIP from hart #0 to # awaken these parked harts. li t3, (1 << 3) csrw mie, t3 # Machine's exception program counter (MEPC) is set to the Rust initialization # code and waiting loop. la t1, kinit_hart csrw mepc, t1 # Machine's trap vector base address is set to `m_trap_vector`, for # "machine" trap vector. The Rust initialization routines will give each # hart its own trap frame. We can use the same trap function and distinguish # between each hart by looking at the trap frame. la t2, m_trap_vector csrw mtvec, t2 # Whenever our hart is done initializing, we want it to return to the waiting # loop, which is just below mret. la ra, 4f # We use mret here so that the mstatus register is properly updated. mret 4: # wfi = wait for interrupt. This is a hint to the harts to shut everything needed # down. However, the RISC-V specification allows for wfi to do nothing. Anyway, # with QEMU, this will save some CPU! wfi j 4b ================================================ FILE: risc_v/src/asm/mem.S ================================================ // mem.S // Importation of linker symbols .section .rodata .global HEAP_START HEAP_START: .dword _heap_start .global HEAP_SIZE HEAP_SIZE: .dword _heap_size .global TEXT_START TEXT_START: .dword _text_start .global TEXT_END TEXT_END: .dword _text_end .global DATA_START DATA_START: .dword _data_start .global DATA_END DATA_END: .dword _data_end .global RODATA_START RODATA_START: .dword _rodata_start .global RODATA_END RODATA_END: .dword _rodata_end .global BSS_START BSS_START: .dword _bss_start .global BSS_END BSS_END: .dword _bss_end .global KERNEL_STACK_START KERNEL_STACK_START: .dword _stack_start .global KERNEL_STACK_END KERNEL_STACK_END: .dword _stack_end ================================================ FILE: risc_v/src/asm/trap.S ================================================ # trap.S # Trap handler and global context # Steve Operating System # Stephen Marz # 24 February 2019 .option norvc .altmacro .set NUM_GP_REGS, 32 # Number of registers per context .set REG_SIZE, 8 # Register size (in bytes) # Use macros for saving and restoring multiple registers .macro save_gp i, basereg=t6 sd x\i, ((\i)*REG_SIZE)(\basereg) .endm .macro load_gp i, basereg=t6 ld x\i, ((\i)*REG_SIZE)(\basereg) .endm .macro save_fp i, basereg=t6 fsd f\i, ((NUM_GP_REGS+(\i))*REG_SIZE)(\basereg) .endm .macro load_fp i, basereg=t6 fld f\i, ((NUM_GP_REGS+(\i))*REG_SIZE)(\basereg) .endm .section .text .global m_trap_vector # This must be aligned by 4 since the last two bits # of the mtvec register do not contribute to the address # of this vector. .align 4 m_trap_vector: # All registers are volatile here, we need to save them # before we do anything. csrrw t6, mscratch, t6 # csrrw will atomically swap t6 into mscratch and the old # value of mscratch into t6. This is nice because we just # switched values and didn't destroy anything -- all atomically! # in cpu.rs we have a structure of: # 32 gp regs 0 # 32 fp regs 256 # We use t6 as the temporary register because it is the very # bottom register (x31) .set i, 0 .rept 31 save_gp %i .set i, i+1 .endr # Save the actual t6 register, which we swapped into # mscratch mv t5, t6 csrr t6, mscratch save_gp 31, t5 # Restore the kernel trap frame into mscratch csrw mscratch, t5 csrr t1, mstatus srli t0, t1, 13 andi t0, t0, 3 li t3, 3 bne t0, t3, 1f # Save floating point registers .set i, 0 .rept 32 save_fp %i, t5 .set i, i+1 .endr 1: # Get ready to go into Rust (trap.rs) # We don't want to write into the user's stack or whomever # messed with us here. # csrw mie, zero csrr a0, mepc sd a0, 520(t5) csrr a1, mtval csrr a2, mcause csrr a3, mhartid csrr a4, mstatus csrr a5, mscratch la t0, KERNEL_STACK_END ld sp, 0(t0) call m_trap # When we get here, we've returned from m_trap, restore registers # and return. # m_trap will return the return address via a0. csrw mepc, a0 # Now load the trap frame back into t6 csrr t6, mscratch csrr t1, mstatus srli t0, t1, 13 andi t0, t0, 3 li t3, 3 bne t0, t3, 1f .set i, 0 .rept 32 load_fp %i .set i, i+1 .endr 1: # Restore all GP registers .set i, 1 .rept 31 load_gp %i .set i, i+1 .endr # Since we ran this loop 31 times starting with i = 1, # the last one loaded t6 back to its original value. mret .global switch_to_user switch_to_user: # a0 - Frame address # a1 - Program counter # a2 - SATP Register csrw mscratch, a0 # Load program counter ld a1, 520(a0) # Load satp ld a2, 512(a0) # Load processor mode ld a3, 552(a0) # Pid # ld a4, 544(a0) # 1 << 7 is MPIE # Since user mode is 00, we don't need to set anything # in MPP (bits 12:11) li t0, 1 << 7 | 1 << 5 | 1 << 13 # Combine enable bits with mode bits. slli a3, a3, 11 or t0, t0, a3 csrw mstatus, t0 csrw mepc, a1 csrw satp, a2 li t1, 0xaaa csrw mie, t1 la t2, m_trap_vector csrw mtvec, t2 # This fence forces the MMU to flush the TLB. However, since # we're using the PID as the address space identifier, we might # only need this when we create a process. Right now, this ensures # correctness, however it isn't the most efficient. # sfence.vma # A0 is the context frame, so we need to reload it back # and mret so we can start running the program. mv t6, a0 .set i, 0 .rept 32 load_fp %i .set i, i+1 .endr 1: .set i, 1 .rept 31 load_gp %i, t6 .set i, i+1 .endr mret .global make_syscall make_syscall: # We're setting this up to work with libgloss # They want a7 to be the system call number and all parameters # in a0 - a5 mv a7, a0 mv a0, a1 mv a1, a2 mv a2, a3 mv a3, a4 mv a4, a5 mv a5, a6 ecall ret ================================================ FILE: risc_v/src/assembly.rs ================================================ // assembly.rs // Assembly imports module // Stephen Marz // 20 April 2020 // This came from the Rust book documenting global_asm!. // They show using include_str! with it to // import a full assembly file, which is what I want here. global_asm!(include_str!("asm/boot.S")); global_asm!(include_str!("asm/mem.S")); global_asm!(include_str!("asm/trap.S")); ================================================ FILE: risc_v/src/block.rs ================================================ // block.rs // Block device using VirtIO protocol // Stephen Marz // 10 March 2020 use crate::{kmem::{kfree, kmalloc}, page::{zalloc, PAGE_SIZE}, process::{add_kernel_process_args, get_by_pid, set_running, set_waiting}, virtio, virtio::{Descriptor, MmioOffsets, Queue, StatusField, VIRTIO_RING_SIZE}}; use core::mem::size_of; use alloc::boxed::Box; #[repr(C)] pub struct Geometry { cylinders: u16, heads: u8, sectors: u8, } #[repr(C)] pub struct Topology { physical_block_exp: u8, alignment_offset: u8, min_io_size: u16, opt_io_size: u32, } // There is a configuration space for VirtIO that begins // at offset 0x100 and continues to the size of the configuration. // The structure below represents the configuration for a // block device. Really, all that this OS cares about is the // capacity. #[repr(C)] pub struct Config { capacity: u64, size_max: u32, seg_max: u32, geometry: Geometry, blk_size: u32, topology: Topology, writeback: u8, unused0: [u8; 3], max_discard_sector: u32, max_discard_seg: u32, discard_sector_alignment: u32, max_write_zeroes_sectors: u32, max_write_zeroes_seg: u32, write_zeroes_may_unmap: u8, unused1: [u8; 3], } // The header/data/status is a block request // packet. We send the header to tell the direction // (blktype: IN/OUT) and then the starting sector // we want to read. Then, we put the data buffer // as the Data structure and finally an 8-bit // status. The device will write one of three values // in here: 0 = success, 1 = io error, 2 = unsupported // operation. #[repr(C)] pub struct Header { blktype: u32, reserved: u32, sector: u64, } #[repr(C)] pub struct Data { data: *mut u8, } #[repr(C)] pub struct Status { status: u8, } #[repr(C)] pub struct Request { header: Header, data: Data, status: Status, head: u16, // Do not change anything above this line. // This is the PID of watcher. We store the PID // because it is possible that the process DIES // before we get here. If we used a pointer, we // may dereference invalid memory. watcher: u16, } // Internal block device structure // We keep our own used_idx and idx for // descriptors. There is a shared index, but that // tells us or the device if we've kept up with where // we are for the available (us) or used (device) ring. pub struct BlockDevice { queue: *mut Queue, dev: *mut u32, idx: u16, ack_used_idx: u16, read_only: bool, } // Type values pub const VIRTIO_BLK_T_IN: u32 = 0; pub const VIRTIO_BLK_T_OUT: u32 = 1; pub const VIRTIO_BLK_T_FLUSH: u32 = 4; pub const VIRTIO_BLK_T_DISCARD: u32 = 11; pub const VIRTIO_BLK_T_WRITE_ZEROES: u32 = 13; // Status values pub const VIRTIO_BLK_S_OK: u8 = 0; pub const VIRTIO_BLK_S_IOERR: u8 = 1; pub const VIRTIO_BLK_S_UNSUPP: u8 = 2; // Feature bits pub const VIRTIO_BLK_F_SIZE_MAX: u32 = 1; pub const VIRTIO_BLK_F_SEG_MAX: u32 = 2; pub const VIRTIO_BLK_F_GEOMETRY: u32 = 4; pub const VIRTIO_BLK_F_RO: u32 = 5; pub const VIRTIO_BLK_F_BLK_SIZE: u32 = 6; pub const VIRTIO_BLK_F_FLUSH: u32 = 9; pub const VIRTIO_BLK_F_TOPOLOGY: u32 = 10; pub const VIRTIO_BLK_F_CONFIG_WCE: u32 = 11; pub const VIRTIO_BLK_F_DISCARD: u32 = 13; pub const VIRTIO_BLK_F_WRITE_ZEROES: u32 = 14; // We might get several types of errors, but they can be enumerated here. pub enum BlockErrors { Success = 0, BlockDeviceNotFound, InvalidArgument, ReadOnly, } // Much like with processes, Rust requires some initialization // when we declare a static. In this case, we use the Option // value type to signal that the variable exists, but not the // queue itself. We will replace this with an actual queue when // we initialize the block system. static mut BLOCK_DEVICES: [Option; 8] = [None, None, None, None, None, None, None, None]; pub fn setup_block_device(ptr: *mut u32) -> bool { unsafe { // We can get the index of the device based on its address. // 0x1000_1000 is index 0 // 0x1000_2000 is index 1 // ... // 0x1000_8000 is index 7 // To get the number that changes over, we shift right 12 places // (3 hex digits) let idx = (ptr as usize - virtio::MMIO_VIRTIO_START) >> 12; // [Driver] Device Initialization // 1. Reset the device (write 0 into status) ptr.add(MmioOffsets::Status.scale32()).write_volatile(0); let mut status_bits = StatusField::Acknowledge.val32(); // 2. Set ACKNOWLEDGE status bit ptr.add(MmioOffsets::Status.scale32()) .write_volatile(status_bits); // 3. Set the DRIVER status bit status_bits |= StatusField::DriverOk.val32(); ptr.add(MmioOffsets::Status.scale32()) .write_volatile(status_bits); // 4. Read device feature bits, write subset of feature // bits understood by OS and driver to the device. let host_features = ptr.add(MmioOffsets::HostFeatures.scale32()) .read_volatile(); let guest_features = host_features & !(1 << VIRTIO_BLK_F_RO); let ro = host_features & (1 << VIRTIO_BLK_F_RO) != 0; ptr.add(MmioOffsets::GuestFeatures.scale32()) .write_volatile(guest_features); // 5. Set the FEATURES_OK status bit status_bits |= StatusField::FeaturesOk.val32(); ptr.add(MmioOffsets::Status.scale32()) .write_volatile(status_bits); // 6. Re-read status to ensure FEATURES_OK is still set. // Otherwise, it doesn't support our features. let status_ok = ptr.add(MmioOffsets::Status.scale32()).read_volatile(); // If the status field no longer has features_ok set, // that means that the device couldn't accept // the features that we request. Therefore, this is // considered a "failed" state. if false == StatusField::features_ok(status_ok) { print!("features fail..."); ptr.add(MmioOffsets::Status.scale32()) .write_volatile(StatusField::Failed.val32()); return false; } // 7. Perform device-specific setup. // Set the queue num. We have to make sure that the // queue size is valid because the device can only take // a certain size. let qnmax = ptr.add(MmioOffsets::QueueNumMax.scale32()) .read_volatile(); ptr.add(MmioOffsets::QueueNum.scale32()) .write_volatile(VIRTIO_RING_SIZE as u32); if VIRTIO_RING_SIZE as u32 > qnmax { print!("queue size fail..."); return false; } // First, if the block device array is empty, create it! // We add 4095 to round this up and then do an integer // divide to truncate the decimal. We don't add 4096, // because if it is exactly 4096 bytes, we would get two // pages, not one. let num_pages = (size_of::() + PAGE_SIZE - 1) / PAGE_SIZE; // println!("np = {}", num_pages); // We allocate a page for each device. This will the the // descriptor where we can communicate with the block // device. We will still use an MMIO register (in // particular, QueueNotify) to actually tell the device // we put something in memory. We also have to be // careful with memory ordering. We don't want to // issue a notify before all memory writes have // finished. We will look at that later, but we need // what is called a memory "fence" or barrier. ptr.add(MmioOffsets::QueueSel.scale32()).write_volatile(0); // Alignment is very important here. This is the memory address // alignment between the available and used rings. If this is // wrong, then we and the device will refer to different memory // addresses and hence get the wrong data in the used ring. // ptr.add(MmioOffsets::QueueAlign.scale32()).write_volatile(2); let queue_ptr = zalloc(num_pages) as *mut Queue; let queue_pfn = queue_ptr as u32; ptr.add(MmioOffsets::GuestPageSize.scale32()) .write_volatile(PAGE_SIZE as u32); // QueuePFN is a physical page number, however it // appears for QEMU we have to write the entire memory // address. This is a physical memory address where we // (the OS) and the block device have in common for // making and receiving requests. ptr.add(MmioOffsets::QueuePfn.scale32()) .write_volatile(queue_pfn / PAGE_SIZE as u32); // We need to store all of this data as a "BlockDevice" // structure We will be referring to this structure when // making block requests AND when handling responses. let bd = BlockDevice { queue: queue_ptr, dev: ptr, idx: 0, ack_used_idx: 0, read_only: ro, }; BLOCK_DEVICES[idx] = Some(bd); // 8. Set the DRIVER_OK status bit. Device is now "live" status_bits |= StatusField::DriverOk.val32(); ptr.add(MmioOffsets::Status.scale32()) .write_volatile(status_bits); true } } pub fn fill_next_descriptor(bd: &mut BlockDevice, desc: Descriptor) -> u16 { unsafe { // The ring structure increments here first. This allows us to // skip index 0, which then in the used ring will show that .id // > 0. This is one way to error check. We will eventually get // back to 0 as this index is cyclical. However, it shows if the // first read/write actually works. bd.idx = (bd.idx + 1) % VIRTIO_RING_SIZE as u16; (*bd.queue).desc[bd.idx as usize] = desc; if (*bd.queue).desc[bd.idx as usize].flags & virtio::VIRTIO_DESC_F_NEXT != 0 { // If the next flag is set, we need another descriptor. (*bd.queue).desc[bd.idx as usize].next = (bd.idx + 1) % VIRTIO_RING_SIZE as u16; } bd.idx } } /// This is now a common block operation for both reads and writes. Therefore, /// when one thing needs to change, we can change it for both reads and writes. /// There is a lot of error checking that I haven't done. The block device reads /// sectors at a time, which are 512 bytes. Therefore, our buffer must be /// capable of storing multiples of 512 bytes depending on the size. The size is /// also a multiple of 512, but we don't really check that. /// We DO however, check that we aren't writing to an R/O device. This would /// cause a I/O error if we tried to write to a R/O device. pub fn block_op(dev: usize, buffer: *mut u8, size: u32, offset: u64, write: bool, watcher: u16) -> Result { unsafe { if let Some(bdev) = BLOCK_DEVICES[dev - 1].as_mut() { // Check to see if we are trying to write to a read only // device. if bdev.read_only && write { println!("Trying to write to read/only!"); return Err(BlockErrors::ReadOnly); } if size % 512 != 0 { return Err(BlockErrors::InvalidArgument); } let sector = offset / 512; // TODO: Before we get here, we are NOT allowed to // schedule a read or write OUTSIDE of the disk's size. // So, we can read capacity from the configuration space // to ensure we stay within bounds. let blk_request_size = size_of::(); let blk_request = kmalloc(blk_request_size) as *mut Request; let desc = Descriptor { addr: &(*blk_request).header as *const Header as u64, len: size_of::
() as u32, flags: virtio::VIRTIO_DESC_F_NEXT, next: 0, }; let head_idx = fill_next_descriptor(bdev, desc); (*blk_request).header.sector = sector; // A write is an "out" direction, whereas a read is an // "in" direction. (*blk_request).header.blktype = if write { VIRTIO_BLK_T_OUT } else { VIRTIO_BLK_T_IN }; // We put 111 in the status. Whenever the device // finishes, it will write into status. If we read // status and it is 111, we know that it wasn't written // to by the device. (*blk_request).data.data = buffer; (*blk_request).header.reserved = 0; (*blk_request).status.status = 111; (*blk_request).watcher = watcher; let desc = Descriptor { addr: buffer as u64, len: size, flags: virtio::VIRTIO_DESC_F_NEXT | if !write { virtio::VIRTIO_DESC_F_WRITE } else { 0 }, next: 0, }; let _data_idx = fill_next_descriptor(bdev, desc); let desc = Descriptor { addr: &(*blk_request).status as *const Status as u64, len: size_of::() as u32, flags: virtio::VIRTIO_DESC_F_WRITE, next: 0, }; let _status_idx = fill_next_descriptor(bdev, desc); (*bdev.queue).avail.ring[(*bdev.queue).avail.idx as usize % virtio::VIRTIO_RING_SIZE] = head_idx; (*bdev.queue).avail.idx = (*bdev.queue).avail.idx.wrapping_add(1); // The only queue a block device has is 0, which is the // request queue. bdev.dev .add(MmioOffsets::QueueNotify.scale32()) .write_volatile(0); Ok(size) } else { Err(BlockErrors::BlockDeviceNotFound) } } } pub fn read(dev: usize, buffer: *mut u8, size: u32, offset: u64) -> Result { block_op(dev, buffer, size, offset, false, 0) } pub fn write(dev: usize, buffer: *mut u8, size: u32, offset: u64) -> Result { block_op(dev, buffer, size, offset, true, 0) } /// Here we handle block specific interrupts. Here, we need to check /// the used ring and wind it up until we've handled everything. /// This is how the device tells us that it's finished a request. pub fn pending(bd: &mut BlockDevice) { // Here we need to check the used ring and then free the resources // given by the descriptor id. unsafe { let ref queue = *bd.queue; while bd.ack_used_idx != queue.used.idx { let ref elem = queue.used.ring [bd.ack_used_idx as usize % VIRTIO_RING_SIZE]; bd.ack_used_idx = bd.ack_used_idx.wrapping_add(1); // Requests stay resident on the heap until this // function, so we can recapture the address here let rq = queue.desc[elem.id as usize].addr as *const Request; // A process might be waiting for this interrupt. Awaken // the process attached here. let pid_of_watcher = (*rq).watcher; // A PID of 0 means that we don't have a watcher. if pid_of_watcher > 0 { set_running(pid_of_watcher); let proc = get_by_pid(pid_of_watcher); (*(*proc).frame).regs[10] = (*rq).status.status as usize; // TODO: Set GpA0 to the value of the return // status. } kfree(rq as *mut u8); } } } /// The trap code will route PLIC interrupts 1..=8 for virtio devices. When /// virtio determines that this is a block device, it sends it here. pub fn handle_interrupt(idx: usize) { unsafe { if let Some(bdev) = BLOCK_DEVICES[idx].as_mut() { pending(bdev); } else { println!( "Invalid block device for interrupt {}", idx + 1 ); } } } // /////////////////////////////////////////////// // // BLOCK PROCESSES (KERNEL PROCESSES) // /////////////////////////////////////////////// struct ProcArgs { pub pid: u16, pub dev: usize, pub buffer: *mut u8, pub size: u32, pub offset: u64, } /// This will be a fn read_proc(args_addr: usize) { let args = unsafe { Box::from_raw(args_addr as *mut ProcArgs) }; let _ = block_op( args.dev, args.buffer, args.size, args.offset, false, args.pid, ); // This should be handled by the RA now. // syscall_exit(); } pub fn process_read(pid: u16, dev: usize, buffer: *mut u8, size: u32, offset: u64) { // println!("Block read {}, {}, 0x{:x}, {}, {}", pid, dev, buffer as // usize, size, offset); let args = ProcArgs { pid, dev, buffer, size, offset, }; let boxed_args = Box::new(args); set_waiting(pid); let _ = add_kernel_process_args( read_proc, Box::into_raw(boxed_args) as usize, ); } fn write_proc(args_addr: usize) { let args = unsafe { Box::from_raw(args_addr as *mut ProcArgs) }; let _ = block_op( args.dev, args.buffer, args.size, args.offset, true, args.pid, ); // syscall_exit(); } pub fn process_write(pid: u16, dev: usize, buffer: *mut u8, size: u32, offset: u64) { let args = ProcArgs { pid, dev, buffer, size, offset, }; let boxed_args = Box::new(args); set_waiting(pid); let _ = add_kernel_process_args( write_proc, Box::into_raw(boxed_args) as usize, ); } ================================================ FILE: risc_v/src/buffer.rs ================================================ // buffer.rs // BlockBuffer is so useful, we put it here instead // of in the file system. // Stephen Marz use crate::{cpu::memcpy, kmem::{kmalloc, kfree}}; use core::{ptr::null_mut, ops::{Index, IndexMut}}; // We need a Buffer that can automatically be created and destroyed // in the lifetime of our read and write functions. In C, this would entail // goto statements that "unravel" all of the allocations that we made. Take // a look at the read() function to see why I thought this way would be better. pub struct Buffer { buffer: *mut u8, len: usize } impl Buffer { pub fn new(sz: usize) -> Self { Self { buffer: kmalloc(sz), len: sz } } pub fn get_mut(&mut self) -> *mut u8 { self.buffer } pub fn get(&self) -> *const u8 { self.buffer } pub fn len(&self) -> usize { self.len } } impl Default for Buffer { fn default() -> Self { Self::new(1024) } } impl Index for Buffer { type Output = u8; fn index(&self, idx: usize) -> &Self::Output { unsafe { self.get().add(idx).as_ref().unwrap() } } } impl IndexMut for Buffer { fn index_mut(&mut self, idx: usize) -> &mut Self::Output { unsafe { self.get_mut().add(idx).as_mut().unwrap() } } } impl Clone for Buffer { fn clone(&self) -> Self { let mut new = Self { buffer: kmalloc(self.len()), len: self.len() }; unsafe { memcpy(new.get_mut(), self.get(), self.len()); } new } } // This is why we have the Buffer. Instead of having to unwind // all other buffers, we drop here when the block buffer goes out of scope. impl Drop for Buffer { fn drop(&mut self) { if !self.buffer.is_null() { kfree(self.buffer); self.buffer = null_mut(); } } } ================================================ FILE: risc_v/src/console.rs ================================================ // console.rs // Console utilities for buffering // Stephen Marz // 4 June 2020 use alloc::collections::VecDeque; use crate::lock::Mutex; use crate::process::{get_by_pid, set_running}; pub static mut IN_BUFFER: Option> = None; pub static mut OUT_BUFFER: Option> = None; pub static mut IN_LOCK: Mutex = Mutex::new(); pub static mut OUT_LOCK: Mutex = Mutex::new(); pub const DEFAULT_OUT_BUFFER_SIZE: usize = 10_000; pub const DEFAULT_IN_BUFFER_SIZE: usize = 1_000; pub static mut CONSOLE_QUEUE: Option> = None; pub fn init() { unsafe { IN_BUFFER.replace(VecDeque::with_capacity(DEFAULT_IN_BUFFER_SIZE)); OUT_BUFFER.replace(VecDeque::with_capacity(DEFAULT_OUT_BUFFER_SIZE)); } } /// Push a u8 (character) onto the output buffer /// If the buffer is full, silently drop. pub fn push_stdout(c: u8) { unsafe { OUT_LOCK.spin_lock(); if let Some(mut buf) = OUT_BUFFER.take() { if buf.len() < DEFAULT_OUT_BUFFER_SIZE { buf.push_back(c); } OUT_BUFFER.replace(buf); } OUT_LOCK.unlock(); } } pub fn pop_stdout() -> u8 { let mut ret = None; unsafe { OUT_LOCK.spin_lock(); if let Some(mut buf) = OUT_BUFFER.take() { ret = buf.pop_front(); OUT_BUFFER.replace(buf); } OUT_LOCK.unlock(); } ret.unwrap_or(0) } pub fn push_stdin(c: u8) { unsafe { IN_LOCK.spin_lock(); if let Some(mut buf) = IN_BUFFER.take() { if buf.len() < DEFAULT_IN_BUFFER_SIZE { buf.push_back(c); if c == 10 || c == 11 { if let Some(mut q) = CONSOLE_QUEUE.take() { for i in q.drain(..) { set_running(i); // We also need to put stuff in here. } CONSOLE_QUEUE.replace(q); } } } IN_BUFFER.replace(buf); } IN_LOCK.unlock(); } } pub fn pop_stdin() -> u8 { let mut ret = None; unsafe { IN_LOCK.spin_lock(); if let Some(mut buf) = IN_BUFFER.take() { ret = buf.pop_front(); IN_BUFFER.replace(buf); } IN_LOCK.unlock(); } ret.unwrap_or(0) } pub fn push_queue(pid: u16) { unsafe { if let Some(mut q) = CONSOLE_QUEUE.take() { q.push_back(pid); CONSOLE_QUEUE.replace(q); } } } ================================================ FILE: risc_v/src/cpu.rs ================================================ // cpu.rs // CPU and CPU-related routines // Also contains the kernel's trap frame // Stephen Marz // 14 October 2019 // The frequency of QEMU is 10 MHz pub const FREQ: u64 = 10_000_000; // Let's do this 250 times per second for switching pub const CONTEXT_SWITCH_TIME: u64 = FREQ / 500; /// In 64-bit mode, we're given three different modes for the MMU: /// 0 - The MMU is off -- no protection and no translation PA = VA /// 8 - This is Sv39 mode -- 39-bit virtual addresses /// 9 - This is Sv48 mode -- 48-bit virtual addresses #[repr(usize)] pub enum SatpMode { Off = 0, Sv39 = 8, Sv48 = 9, } #[repr(usize)] pub enum CpuMode { User = 0, Supervisor = 1, Machine = 3, } #[repr(usize)] pub enum Registers { Zero = 0, Ra, Sp, Gp, Tp, T0, T1, T2, S0, S1, A0, /* 10 */ A1, A2, A3, A4, A5, A6, A7, S2, S3, S4, /* 20 */ S5, S6, S7, S8, S9, S10, S11, T3, T4, T5, /* 30 */ T6 } pub const fn gp(r: Registers) -> usize { r as usize } // Floating point registers #[repr(usize)] pub enum FRegisters { Ft0, Ft1, Ft2, Ft3, Ft4, Ft5, Ft6, Ft7, Fs0, Fs1, Fa0, /* 10 */ Fa1, Fa2, Fa3, Fa4, Fa5, Fa6, Fa7, Fs2, Fs3, Fs4, /* 20 */ Fs5, Fs6, Fs7, Fs8, Fs9, Fs10, Fs11, Ft8, Ft9, Ft10, /* 30 */ Ft11 } /// The trap frame is set into a structure /// and packed into each hart's mscratch register. /// This allows for quick reference and full /// context switch handling. /// To make offsets easier, everything will be a usize (8 bytes) #[repr(C)] #[derive(Clone, Copy)] pub struct TrapFrame { pub regs: [usize; 32], // 0 - 255 pub fregs: [usize; 32], // 256 - 511 pub satp: usize, // 512 - 519 pub pc: usize, // 520 pub hartid: usize, // 528 pub qm: usize, // 536 pub pid: usize, // 544 pub mode: usize, // 552 } /// Rust requires that we initialize our structures /// because of the move semantics. What'll happen below /// is Rust will construct a new TrapFrame and move it /// out of the zero() function below. Rust contains two /// different "selfs" where self can refer to the object /// in memory or Self (capital S) which refers to the /// data type of the structure. In the case below, this /// is TrapFrame. impl TrapFrame { pub const fn new() -> Self { TrapFrame { regs: [0; 32], fregs: [0; 32], satp: 0, pc: 0, hartid: 0, qm: 1, pid: 0, mode: 0, } } } /// The SATP register contains three fields: mode, address space id, and /// the first level table address (level 2 for Sv39). This function /// helps make the 64-bit register contents based on those three /// fields. pub const fn build_satp(mode: SatpMode, asid: usize, addr: usize) -> usize { (mode as usize) << 60 | (asid & 0xffff) << 44 | (addr >> 12) & 0xff_ffff_ffff } pub fn mhartid_read() -> usize { unsafe { let rval; llvm_asm!("csrr $0, mhartid" :"=r"(rval)); rval } } pub fn mie_read() -> usize { unsafe { let rval; llvm_asm!("csrr $0, mie" :"=r"(rval)); rval } } pub fn mie_write(val: usize) { unsafe { llvm_asm!("csrw mie, $0" :: "r"(val)); } } pub fn mstatus_write(val: usize) { unsafe { llvm_asm!("csrw mstatus, $0" ::"r"(val)); } } pub fn mstatus_read() -> usize { unsafe { let rval; llvm_asm!("csrr $0, mstatus":"=r"(rval)); rval } } pub fn stvec_write(val: usize) { unsafe { llvm_asm!("csrw stvec, $0" ::"r"(val)); } } pub fn stvec_read() -> usize { unsafe { let rval; llvm_asm!("csrr $0, stvec" :"=r"(rval)); rval } } pub fn mscratch_write(val: usize) { unsafe { llvm_asm!("csrw mscratch, $0" ::"r"(val)); } } pub fn mscratch_read() -> usize { unsafe { let rval; llvm_asm!("csrr $0, mscratch" : "=r"(rval)); rval } } pub fn mscratch_swap(to: usize) -> usize { unsafe { let from; llvm_asm!("csrrw $0, mscratch, $1" : "=r"(from) : "r"(to)); from } } pub fn sscratch_write(val: usize) { unsafe { llvm_asm!("csrw sscratch, $0" ::"r"(val)); } } pub fn sscratch_read() -> usize { unsafe { let rval; llvm_asm!("csrr $0, sscratch" : "=r"(rval)); rval } } pub fn sscratch_swap(to: usize) -> usize { unsafe { let from; llvm_asm!("csrrw $0, sscratch, $1" : "=r"(from) : "r"(to)); from } } pub fn mepc_write(val: usize) { unsafe { llvm_asm!("csrw mepc, $0" :: "r"(val)); } } pub fn mepc_read() -> usize { unsafe { let rval; llvm_asm!("csrr $0, mepc" :"=r"(rval)); rval } } pub fn sepc_write(val: usize) { unsafe { llvm_asm!("csrw sepc, $0" :: "r"(val)); } } pub fn sepc_read() -> usize { unsafe { let rval; llvm_asm!("csrr $0, sepc" :"=r"(rval)); rval } } pub fn satp_write(val: usize) { unsafe { llvm_asm!("csrw satp, $0" :: "r"(val)); } } pub fn satp_read() -> usize { unsafe { let rval; llvm_asm!("csrr $0, satp" :"=r"(rval)); rval } } /// Take a hammer to the page tables and synchronize /// all of them. This essentially flushes the entire /// TLB. pub fn satp_fence(vaddr: usize, asid: usize) { unsafe { llvm_asm!("sfence.vma $0, $1" :: "r"(vaddr), "r"(asid)); } } /// Synchronize based on the address space identifier /// This allows us to fence a particular process rather /// than the entire TLB. /// The RISC-V documentation calls this a TLB flush +. /// Since there are other memory routines involved, they /// didn't call it a TLB flush, but it is much like /// Intel/AMD's invtlb [] instruction. pub fn satp_fence_asid(asid: usize) { unsafe { llvm_asm!("sfence.vma zero, $0" :: "r"(asid)); } } const MMIO_MTIME: *const u64 = 0x0200_BFF8 as *const u64; pub fn get_mtime() -> usize { unsafe { (*MMIO_MTIME) as usize } } /// Copy one data from one memory location to another. pub unsafe fn memcpy(dest: *mut u8, src: *const u8, bytes: usize) { let bytes_as_8 = bytes / 8; let dest_as_8 = dest as *mut u64; let src_as_8 = src as *const u64; for i in 0..bytes_as_8 { *(dest_as_8.add(i)) = *(src_as_8.add(i)); } let bytes_completed = bytes_as_8 * 8; let bytes_remaining = bytes - bytes_completed; for i in bytes_completed..bytes_remaining { *(dest.add(i)) = *(src.add(i)); } } /// Dumps the registers of a given trap frame. This is NOT the /// current CPU registers! pub fn dump_registers(frame: *const TrapFrame) { print!(" "); for i in 1..32 { if i % 4 == 0 { println!(); print!(" "); } print!("x{:2}:{:08x} ", i, unsafe { (*frame).regs[i] }); } println!(); } ================================================ FILE: risc_v/src/elf.rs ================================================ // elf.rs // Routines for reading and parsing ELF // (Executable and Linkable Format) files. // 26-April-2020 // Stephen Marz use crate::{buffer::Buffer, cpu::{build_satp, memcpy, satp_fence_asid, CpuMode, Registers, SatpMode, TrapFrame}, page::{map, zalloc, EntryBits, Table, PAGE_SIZE}, process::{Process, ProcessData, ProcessState, NEXT_PID, STACK_ADDR, STACK_PAGES}}; use alloc::collections::VecDeque; // Every ELF file starts with ELF "magic", which is a sequence of four bytes 0x7f followed by capital ELF, which is 0x45, 0x4c, and 0x46 respectively. pub const MAGIC: u32 = 0x464c_457f; /// The ELF header contains information about placement and numbers of the important sections within our file. #[repr(C)] #[derive(Copy, Clone)] pub struct Header { pub magic: u32, pub bitsize: u8, pub endian: u8, pub ident_abi_version: u8, pub target_platform: u8, pub abi_version: u8, pub padding: [u8; 7], pub obj_type: u16, pub machine: u16, // 0xf3 for RISC-V pub version: u32, pub entry_addr: usize, pub phoff: usize, pub shoff: usize, pub flags: u32, pub ehsize: u16, pub phentsize: u16, pub phnum: u16, pub shentsize: u16, pub shnum: u16, pub shstrndx: u16 } #[repr(C)] #[derive(Copy, Clone)] pub struct ProgramHeader { pub seg_type: u32, pub flags: u32, pub off: usize, pub vaddr: usize, pub paddr: usize, pub filesz: usize, pub memsz: usize, pub align: usize } pub const TYPE_EXEC: u16 = 2; pub const PROG_READ: u32 = 4; pub const PROG_WRITE: u32 = 2; pub const PROG_EXECUTE: u32 = 1; pub const MACHINE_RISCV: u16 = 0xf3; pub const PH_SEG_TYPE_NULL: u32 = 0; pub const PH_SEG_TYPE_LOAD: u32 = 1; pub const PH_SEG_TYPE_DYNAMIC: u32 = 2; pub const PH_SEG_TYPE_INTERP: u32 = 3; pub const PH_SEG_TYPE_NOTE: u32 = 4; pub struct Program { pub header: ProgramHeader, pub data: Buffer } pub enum LoadErrors { Magic, Machine, TypeExec, FileRead } pub struct File { pub header: Header, pub programs: VecDeque } impl File { pub fn load(buffer: &Buffer) -> Result { let elf_hdr; unsafe { // Load the ELF elf_hdr = (buffer.get() as *const Header).as_ref().unwrap(); } // The ELF magic is 0x75, followed by ELF if elf_hdr.magic != MAGIC { return Err(LoadErrors::Magic); } // We need to make sure we're built for RISC-V if elf_hdr.machine != MACHINE_RISCV { return Err(LoadErrors::Machine); } // ELF has several types. However, we can only load // executables. if elf_hdr.obj_type != TYPE_EXEC { return Err(LoadErrors::TypeExec); } let ph_tab = unsafe { buffer.get().add(elf_hdr.phoff) } as *const ProgramHeader; // There are phnum number of program headers. We need to go through // each one and load it into memory, if necessary. let mut ret = Self { header: *elf_hdr, programs: VecDeque::new() }; for i in 0..elf_hdr.phnum as usize { unsafe { let ph = ph_tab.add(i).as_ref().unwrap(); // If the segment isn't marked as LOAD (loaded into memory), // then there is no point to this. Most executables use a LOAD // type for their program headers. if ph.seg_type != PH_SEG_TYPE_LOAD { continue; } // If there's nothing in this section, don't load it. if ph.memsz == 0 { continue; } let mut ph_buffer = Buffer::new(ph.memsz); memcpy(ph_buffer.get_mut(), buffer.get().add(ph.off), ph.memsz); ret.programs.push_back(Program { header: *ph, data: ph_buffer }); } } Ok(ret) } // load pub fn load_proc(buffer: &Buffer) -> Result { let elf_fl = Self::load(&buffer); if elf_fl.is_err() { return Err(elf_fl.err().unwrap()); } let elf_fl = elf_fl.ok().unwrap(); let mut sz = 0usize; // Get the size, in memory, that we're going to need for the program storage. for p in elf_fl.programs.iter() { sz += p.header.memsz; } // We add two pages since we could possibly split the front and back pages, hence // necessitating the need for two extra pages. This can get wasteful, but for now // if we don't do this, we could end up mapping into the MMU table! let program_pages = (sz + PAGE_SIZE * 2) / PAGE_SIZE; // I did this to demonstrate the expressive nature of Rust. Kinda cool, no? let my_pid = unsafe { let p = NEXT_PID + 1; NEXT_PID += 1; p }; let mut my_proc = Process { frame: zalloc(1) as *mut TrapFrame, stack: zalloc(STACK_PAGES), pid: my_pid, mmu_table: zalloc(1) as *mut Table, state: ProcessState::Running, data: ProcessData::new(), sleep_until: 0, program: zalloc(program_pages), brk: 0, }; let program_mem = my_proc.program; let table = unsafe { my_proc.mmu_table.as_mut().unwrap() }; // The ELF has several "program headers". This usually mimics the .text, // .rodata, .data, and .bss sections, but not necessarily. // What we do here is map the program headers into the process' page // table. for p in elf_fl.programs.iter() { // The program header table starts where the ELF header says it is // given by the field phoff (program header offset). // Copy the buffer we got from the filesystem into the program // memory we're going to map to the user. The memsz field in the // program header tells us how many bytes will need to be loaded. // The ph.off is the offset to load this into. unsafe { memcpy(program_mem.add(p.header.off), p.data.get(), p.header.memsz); } // We start off with the user bit set. let mut bits = EntryBits::User.val(); // This sucks, but we check each bit in the flags to see // if we need to add it to the PH permissions. if p.header.flags & PROG_EXECUTE != 0 { bits |= EntryBits::Execute.val(); } if p.header.flags & PROG_READ != 0 { bits |= EntryBits::Read.val(); } if p.header.flags & PROG_WRITE != 0 { bits |= EntryBits::Write.val(); } // Now we map the program counter. The virtual address // is provided in the ELF program header. let pages = (p.header.memsz + PAGE_SIZE) / PAGE_SIZE; for i in 0..pages { let vaddr = p.header.vaddr + i * PAGE_SIZE; // The ELF specifies a paddr, but not when we // use the vaddr! let paddr = program_mem as usize + p.header.off + i * PAGE_SIZE; // There is no checking here! This is very dangerous, and I have already // been bitten by it. I mapped too far and mapped userspace into the MMU // table, which is AWFUL! map(table, vaddr, paddr, bits, 0); if vaddr > my_proc.brk { my_proc.brk = vaddr; } // println!("DEBUG: Map 0x{:08x} to 0x{:08x} {:02x}", vaddr, paddr, bits); } my_proc.brk += 0x1000; } // This will map all of the program pages. Notice that in linker.lds in // userspace we set the entry point address to 0x2000_0000. This is the // same address as PROCESS_STARTING_ADDR, and they must match. // Map the stack let ptr = my_proc.stack as *mut u8; for i in 0..STACK_PAGES { let vaddr = STACK_ADDR + i * PAGE_SIZE; let paddr = ptr as usize + i * PAGE_SIZE; // We create the stack. We don't load a stack from the disk. // This is why I don't need to make the stack executable. map(table, vaddr, paddr, EntryBits::UserReadWrite.val(), 0); } // Set everything up in the trap frame unsafe { // The program counter is a virtual memory address and is loaded // into mepc when we execute mret. (*my_proc.frame).pc = elf_fl.header.entry_addr; // Stack pointer. The stack starts at the bottom and works its // way up, so we have to set the stack pointer to the bottom. (*my_proc.frame).regs[Registers::Sp as usize] = STACK_ADDR as usize + STACK_PAGES * PAGE_SIZE - 0x1000; // USER MODE! This is how we set what'll go into mstatus when we // run the process. (*my_proc.frame).mode = CpuMode::User as usize; (*my_proc.frame).pid = my_proc.pid as usize; // The SATP register is used for the MMU, so we need to // map our table into that register. The switch_to_user // function will load .satp into the actual register // when the time comes. (*my_proc.frame).satp = build_satp(SatpMode::Sv39, my_proc.pid as usize, my_proc.mmu_table as usize); } // The ASID field of the SATP register is only 16-bits, and we reserved // 0 for the kernel, even though we run the kernel in machine mode for // now. Since we don't reuse PIDs, this means that we can only spawn // 65534 processes. satp_fence_asid(my_pid as usize); Ok(my_proc) } } ================================================ FILE: risc_v/src/fs.rs ================================================ // minixfs.rs // Minix 3 Filesystem Implementation // Stephen Marz // 16 March 2020 use crate::{cpu::Registers, process::{add_kernel_process_args, get_by_pid, set_running, set_waiting}, syscall::syscall_block_read}; use crate::{buffer::Buffer, cpu::memcpy}; use alloc::{boxed::Box, collections::BTreeMap, string::String}; use core::mem::size_of; pub const MAGIC: u16 = 0x4d5a; pub const BLOCK_SIZE: u32 = 1024; pub const NUM_IPTRS: usize = BLOCK_SIZE as usize / 4; pub const S_IFDIR: u16 = 0o040_000; pub const S_IFREG: u16 = 0o100_000; /// The superblock describes the file system on the disk. It gives /// us all the information we need to read the file system and navigate /// the file system, including where to find the inodes and zones (blocks). #[repr(C)] pub struct SuperBlock { pub ninodes: u32, pub pad0: u16, pub imap_blocks: u16, pub zmap_blocks: u16, pub first_data_zone: u16, pub log_zone_size: u16, pub pad1: u16, pub max_size: u32, pub zones: u32, pub magic: u16, pub pad2: u16, pub block_size: u16, pub disk_version: u8 } /// An inode stores the "meta-data" to a file. The mode stores the permissions /// AND type of file. This is how we differentiate a directory from a file. A file /// size is in here too, which tells us how many blocks we need to read. Finally, the /// zones array points to where we can find the blocks, which is where the data /// is contained for the file. #[repr(C)] #[derive(Copy, Clone)] pub struct Inode { pub mode: u16, pub nlinks: u16, pub uid: u16, pub gid: u16, pub size: u32, pub atime: u32, pub mtime: u32, pub ctime: u32, pub zones: [u32; 10] } /// Notice that an inode does not contain the name of a file. This is because /// more than one file name may refer to the same inode. These are called "hard links" /// Instead, a DirEntry essentially associates a file name with an inode as shown in /// the structure below. #[repr(C)] pub struct DirEntry { pub inode: u32, pub name: [u8; 60] } /// The MinixFileSystem implements the FileSystem trait for the VFS. pub struct MinixFileSystem; // The plan for this in the future is to have a single inode cache. What we // will do is have a cache of Node structures which will combine the Inode // with the block drive. static mut MFS_INODE_CACHE: [Option>; 8] = [None, None, None, None, None, None, None, None]; impl MinixFileSystem { /// Inodes are the meta-data of a file, including the mode (permissions and type) and /// the file's size. They are stored above the data zones, but to figure out where we /// need to go to get the inode, we first need the superblock, which is where we can /// find all of the information about the filesystem itself. pub fn get_inode(bdev: usize, inode_num: u32) -> Option { // When we read, everything needs to be a multiple of a sector (512 bytes) // So, we need to have memory available that's at least 512 bytes, even if // we only want 10 bytes or 32 bytes (size of an Inode). let mut buffer = Buffer::new(1024); // Here is a little memory trick. We have a reference and it will refer to the // top portion of our buffer. Since we won't be using the super block and inode // simultaneously, we can overlap the memory regions. // For Rust-ers, I'm showing two ways here. The first way is to get a reference // from a pointer. You will see the &* a lot in Rust for references. Rust // makes dereferencing a pointer cumbersome, which lends to not using them. let super_block = unsafe { &*(buffer.get_mut() as *mut SuperBlock) }; // I opted for a pointer here instead of a reference because we will be offsetting the inode by a certain amount. let inode = buffer.get_mut() as *mut Inode; // Read from the block device. The size is 1 sector (512 bytes) and our offset is past // the boot block (first 1024 bytes). This is where the superblock sits. syc_read(bdev, buffer.get_mut(), 512, 1024); if super_block.magic == MAGIC { // If we get here, we successfully read what we think is the super block. // The math here is 2 - one for the boot block, one for the super block. Then we // have to skip the bitmaps blocks. We have a certain number of inode map blocks (imap) // and zone map blocks (zmap). // The inode comes to us as a NUMBER, not an index. So, we need to subtract 1. let inode_offset = (2 + super_block.imap_blocks + super_block.zmap_blocks) as usize * BLOCK_SIZE as usize + ((inode_num as usize - 1) / (BLOCK_SIZE as usize / size_of::())) * BLOCK_SIZE as usize; // Now, we read the inode itself. // The block driver requires that our offset be a multiple of 512. We do that with the // inode_offset. However, we're going to be reading a group of inodes. syc_read(bdev, buffer.get_mut(), 1024, inode_offset as u32); // There are 1024 / size_of() inodes in each read that we can do. However, we need to figure out which inode in that group we need to read. We just take the % of this to find out. let read_this_node = (inode_num as usize - 1) % (BLOCK_SIZE as usize / size_of::()); // We copy the inode over. This might not be the best thing since the Inode will // eventually have to change after writing. return unsafe { Some(*(inode.add(read_this_node))) }; } // If we get here, some result wasn't OK. Either the super block // or the inode itself. None } } impl MinixFileSystem { /// Init is where we would cache the superblock and inode to avoid having to read /// it over and over again, like we do for read right now. fn cache_at(btm: &mut BTreeMap, cwd: &String, inode_num: u32, bdev: usize) { let ino = Self::get_inode(bdev, inode_num).unwrap(); let mut buf = Buffer::new(((ino.size + BLOCK_SIZE - 1) & !BLOCK_SIZE) as usize); let dirents = buf.get() as *const DirEntry; let sz = Self::read(bdev, &ino, buf.get_mut(), BLOCK_SIZE, 0); let num_dirents = sz as usize / size_of::(); // We start at 2 because the first two entries are . and .. for i in 2..num_dirents { unsafe { let ref d = *dirents.add(i); let d_ino = Self::get_inode(bdev, d.inode).unwrap(); let mut new_cwd = String::with_capacity(120); for i in cwd.bytes() { new_cwd.push(i as char); } // Add a directory separator between this inode and the next. // If we're the root (inode 1), we don't want to double up the // frontslash, so only do it for non-roots. if inode_num != 1 { new_cwd.push('/'); } for i in 0..60 { if d.name[i] == 0 { break; } new_cwd.push(d.name[i] as char); } new_cwd.shrink_to_fit(); if d_ino.mode & S_IFDIR != 0 { // This is a directory, cache these. This is a recursive call, // which I don't really like. Self::cache_at(btm, &new_cwd, d.inode, bdev); } else { btm.insert(new_cwd, d_ino); } } } } // Run this ONLY in a process! pub fn init(bdev: usize) { if unsafe { MFS_INODE_CACHE[bdev - 1].is_none() } { let mut btm = BTreeMap::new(); let cwd = String::from("/"); // Let's look at the root (inode #1) Self::cache_at(&mut btm, &cwd, 1, bdev); unsafe { MFS_INODE_CACHE[bdev - 1] = Some(btm); } } else { println!("KERNEL: Initialized an already initialized filesystem {}", bdev); } } /// The goal of open is to traverse the path given by path. If we cache the inodes /// in RAM, it might make this much quicker. For now, this doesn't do anything since /// we're just testing read based on if we know the Inode we're looking for. pub fn open(bdev: usize, path: &str) -> Result { if let Some(cache) = unsafe { MFS_INODE_CACHE[bdev - 1].take() } { let ret; if let Some(inode) = cache.get(path) { ret = Ok(*inode); } else { ret = Err(FsError::FileNotFound); } unsafe { MFS_INODE_CACHE[bdev - 1].replace(cache); } ret } else { Err(FsError::FileNotFound) } } pub fn read(bdev: usize, inode: &Inode, buffer: *mut u8, size: u32, offset: u32) -> u32 { // Our strategy here is to use blocks to see when we need to start reading // based on the offset. That's offset_block. Then, the actual byte within // that block that we need is offset_byte. let mut blocks_seen = 0u32; let offset_block = offset / BLOCK_SIZE; let mut offset_byte = offset % BLOCK_SIZE; // First, the _size parameter (now in bytes_left) is the size of the buffer, not // necessarily the size of the file. If our buffer is bigger than the file, we're OK. // If our buffer is smaller than the file, then we can only read up to the buffer size. let mut bytes_left = if size > inode.size { inode.size } else { size }; let mut bytes_read = 0u32; // The block buffer automatically drops when we quit early due to an error or we've read enough. This will be the holding port when we go out and read a block. Recall that even if we want 10 bytes, we have to read the entire block (really only 512 bytes of the block) first. So, we use the block_buffer as the middle man, which is then copied into the buffer. let mut block_buffer = Buffer::new(BLOCK_SIZE as usize); // Triply indirect zones point to a block of pointers (BLOCK_SIZE / 4). Each one of those pointers points to another block of pointers (BLOCK_SIZE / 4). Each one of those pointers yet again points to another block of pointers (BLOCK_SIZE / 4). This is why we have indirect, iindirect (doubly), and iiindirect (triply). let mut indirect_buffer = Buffer::new(BLOCK_SIZE as usize); let mut iindirect_buffer = Buffer::new(BLOCK_SIZE as usize); let mut iiindirect_buffer = Buffer::new(BLOCK_SIZE as usize); // I put the pointers *const u32 here. That means we will allocate the indirect, doubly indirect, and triply indirect even for small files. I initially had these in their respective scopes, but that required us to recreate the indirect buffer for doubly indirect and both the indirect and doubly indirect buffers for the triply indirect. Not sure which is better, but I probably wasted brain cells on this. let izones = indirect_buffer.get() as *const u32; let iizones = iindirect_buffer.get() as *const u32; let iiizones = iiindirect_buffer.get() as *const u32; // //////////////////////////////////////////// // // DIRECT ZONES // //////////////////////////////////////////// // In Rust, our for loop automatically "declares" i from 0 to < 7. The syntax // 0..7 means 0 through to 7 but not including 7. If we want to include 7, we // would use the syntax 0..=7. for i in 0..7 { // There are 7 direct zones in the Minix 3 file system. So, we can just read them one by one. Any zone that has the value 0 is skipped and we check the next zones. This might happen as we start writing and truncating. if inode.zones[i] == 0 { continue; } // We really use this to keep track of when we need to actually start reading // But an if statement probably takes more time than just incrementing it. if offset_block <= blocks_seen { // If we get here, then our offset is within our window that we want to see. // We need to go to the direct pointer's index. That'll give us a block INDEX. // That makes it easy since all we have to do is multiply the block size // by whatever we get. If it's 0, we skip it and move on. let zone_offset = inode.zones[i] * BLOCK_SIZE; // We read the zone, which is where the data is located. The zone offset is simply the block // size times the zone number. This makes it really easy to read! syc_read(bdev, block_buffer.get_mut(), BLOCK_SIZE, zone_offset); // There's a little bit of math to see how much we need to read. We don't want to read // more than the buffer passed in can handle, and we don't want to read if we haven't // taken care of the offset. For example, an offset of 10000 with a size of 2 means we // can only read bytes 10,000 and 10,001. let read_this_many = if BLOCK_SIZE - offset_byte > bytes_left { bytes_left } else { BLOCK_SIZE - offset_byte }; // Once again, here we actually copy the bytes into the final destination, the buffer. This memcpy // is written in cpu.rs. unsafe { memcpy(buffer.add(bytes_read as usize), block_buffer.get().add(offset_byte as usize), read_this_many as usize); } // Regardless of whether we have an offset or not, we reset the offset byte back to 0. This // probably will get set to 0 many times, but who cares? offset_byte = 0; // Reset the statistics to see how many bytes we've read versus how many are left. bytes_read += read_this_many; bytes_left -= read_this_many; // If no more bytes are left, then we're done. if bytes_left == 0 { return bytes_read; } } // The blocks_seen is for the offset. We need to skip a certain number of blocks FIRST before getting // to the offset. The reason we need to read the zones is because we need to skip zones of 0, and they // do not contribute as a "seen" block. blocks_seen += 1; } // //////////////////////////////////////////// // // SINGLY INDIRECT ZONES // //////////////////////////////////////////// // Each indirect zone is a list of pointers, each 4 bytes. These then // point to zones where the data can be found. Just like with the direct zones, // we need to make sure the zone isn't 0. A zone of 0 means skip it. if inode.zones[7] != 0 { syc_read(bdev, indirect_buffer.get_mut(), BLOCK_SIZE, BLOCK_SIZE * inode.zones[7]); let izones = indirect_buffer.get() as *const u32; for i in 0..NUM_IPTRS { // Where do I put unsafe? Dereferencing the pointers and memcpy are the unsafe functions. unsafe { if izones.add(i).read() != 0 { if offset_block <= blocks_seen { syc_read(bdev, block_buffer.get_mut(), BLOCK_SIZE, BLOCK_SIZE * izones.add(i).read()); let read_this_many = if BLOCK_SIZE - offset_byte > bytes_left { bytes_left } else { BLOCK_SIZE - offset_byte }; memcpy(buffer.add(bytes_read as usize), block_buffer.get().add(offset_byte as usize), read_this_many as usize); bytes_read += read_this_many; bytes_left -= read_this_many; offset_byte = 0; if bytes_left == 0 { return bytes_read; } } blocks_seen += 1; } } } } // //////////////////////////////////////////// // // DOUBLY INDIRECT ZONES // //////////////////////////////////////////// if inode.zones[8] != 0 { syc_read(bdev, indirect_buffer.get_mut(), BLOCK_SIZE, BLOCK_SIZE * inode.zones[8]); unsafe { for i in 0..NUM_IPTRS { if izones.add(i).read() != 0 { syc_read(bdev, iindirect_buffer.get_mut(), BLOCK_SIZE, BLOCK_SIZE * izones.add(i).read()); for j in 0..NUM_IPTRS { if iizones.add(j).read() != 0 { // Notice that this inner code is the same for all end-zone pointers. I'm thinking about // moving this out of here into a function of its own, but that might make it harder // to follow. if offset_block <= blocks_seen { syc_read(bdev, block_buffer.get_mut(), BLOCK_SIZE, BLOCK_SIZE * iizones.add(j).read()); let read_this_many = if BLOCK_SIZE - offset_byte > bytes_left { bytes_left } else { BLOCK_SIZE - offset_byte }; memcpy( buffer.add(bytes_read as usize), block_buffer.get().add(offset_byte as usize), read_this_many as usize ); bytes_read += read_this_many; bytes_left -= read_this_many; offset_byte = 0; if bytes_left == 0 { return bytes_read; } } blocks_seen += 1; } } } } } } // //////////////////////////////////////////// // // TRIPLY INDIRECT ZONES // //////////////////////////////////////////// if inode.zones[9] != 0 { syc_read(bdev, indirect_buffer.get_mut(), BLOCK_SIZE, BLOCK_SIZE * inode.zones[9]); unsafe { for i in 0..NUM_IPTRS { if izones.add(i).read() != 0 { syc_read(bdev, iindirect_buffer.get_mut(), BLOCK_SIZE, BLOCK_SIZE * izones.add(i).read()); for j in 0..NUM_IPTRS { if iizones.add(j).read() != 0 { syc_read(bdev, iiindirect_buffer.get_mut(), BLOCK_SIZE, BLOCK_SIZE * iizones.add(j).read()); for k in 0..NUM_IPTRS { if iiizones.add(k).read() != 0 { // Hey look! This again. if offset_block <= blocks_seen { syc_read(bdev, block_buffer.get_mut(), BLOCK_SIZE, BLOCK_SIZE * iiizones.add(k).read()); let read_this_many = if BLOCK_SIZE - offset_byte > bytes_left { bytes_left } else { BLOCK_SIZE - offset_byte }; memcpy( buffer.add(bytes_read as usize), block_buffer.get().add(offset_byte as usize), read_this_many as usize ); bytes_read += read_this_many; bytes_left -= read_this_many; offset_byte = 0; if bytes_left == 0 { return bytes_read; } } blocks_seen += 1; } } } } } } } } // Anyone else love this stairstep style? I probably should put the pointers in a function by themselves, // but I think that'll make it more difficult to see what's actually happening. bytes_read } pub fn write(&mut self, _desc: &Inode, _buffer: *const u8, _offset: u32, _size: u32) -> u32 { 0 } pub fn stat(&self, inode: &Inode) -> Stat { Stat { mode: inode.mode, size: inode.size, uid: inode.uid, gid: inode.gid } } } /// This is a wrapper function around the syscall_block_read. This allows me to do /// other things before I call the system call (or after). However, all the things I /// wanted to do are no longer there, so this is a worthless function. fn syc_read(bdev: usize, buffer: *mut u8, size: u32, offset: u32) -> u8 { syscall_block_read(bdev, buffer, size, offset) } // We have to start a process when reading from a file since the block // device will block. We only want to block in a process context, not an // interrupt context. struct ProcArgs { pub pid: u16, pub dev: usize, pub buffer: *mut u8, pub size: u32, pub offset: u32, pub node: u32 } // This is the actual code ran inside of the read process. fn read_proc(args_addr: usize) { let args = unsafe { Box::from_raw(args_addr as *mut ProcArgs) }; // Start the read! Since we're in a kernel process, we can block by putting this // process into a waiting state and wait until the block driver returns. let inode = MinixFileSystem::get_inode(args.dev, args.node); let bytes = MinixFileSystem::read(args.dev, &inode.unwrap(), args.buffer, args.size, args.offset); // Let's write the return result into regs[10], which is A0. unsafe { let ptr = get_by_pid(args.pid); if !ptr.is_null() { (*(*ptr).frame).regs[Registers::A0 as usize] = bytes as usize; } } // This is the process making the system call. The system itself spawns another process // which goes out to the block device. Since we're passed the read call, we need to awaken // the process and get it ready to go. The only thing this process needs to clean up is the // tfree(), but the user process doesn't care about that. set_running(args.pid); } /// System calls will call process_read, which will spawn off a kernel process to read /// the requested data. pub fn process_read(pid: u16, dev: usize, node: u32, buffer: *mut u8, size: u32, offset: u32) { // println!("FS read {}, {}, 0x{:x}, {}, {}", pid, dev, buffer as usize, size, offset); let args = ProcArgs { pid, dev, buffer, size, offset, node }; let boxed_args = Box::new(args); set_waiting(pid); let _ = add_kernel_process_args(read_proc, Box::into_raw(boxed_args) as usize); } /// Stats on a file. This generally mimics an inode /// since that's the information we want anyway. /// However, inodes are filesystem specific, and we /// want a more generic stat. pub struct Stat { pub mode: u16, pub size: u32, pub uid: u16, pub gid: u16 } pub enum FsError { Success, FileNotFound, Permission, IsFile, IsDirectory } ================================================ FILE: risc_v/src/gpu.rs ================================================ // gpu.rs // Graphics stuff // Stephen Marz // 12 May 2020 #![allow(dead_code)] use crate::{page::{zalloc, PAGE_SIZE}, kmem::{kmalloc, kfree}, virtio, virtio::{MmioOffsets, Queue, StatusField, VIRTIO_RING_SIZE, Descriptor, VIRTIO_DESC_F_WRITE, VIRTIO_DESC_F_NEXT}}; use core::{mem::size_of, ptr::null_mut}; // use alloc::boxed::Box; const F_VIRGL: u32 = 0; const F_EDID: u32 = 1; const EVENT_DISPLAY: u32 = 1 << 0; #[repr(C)] struct Config { //events_read signals pending events to the driver. The driver MUST NOT write to this field. // events_clear clears pending events in the device. Writing a ’1’ into a bit will clear the corresponding bit in events_read mimicking write-to-clear behavior. //num_scanouts specifies the maximum number of scanouts supported by the device. Minimum value is 1, maximum value is 16. events_read: u32, events_clear: u32, num_scanouts: u32, reserved: u32, } #[repr(u32)] enum CtrlType { // 2d commands CmdGetDisplayInfo = 0x0100, CmdResourceCreate2d, CmdResourceUref, CmdSetScanout, CmdResourceFlush, CmdTransferToHost2d, CmdResourceAttachBacking, CmdResourceDetachBacking, CmdGetCapsetInfo, CmdGetCapset, CmdGetEdid, // cursor commands CmdUpdateCursor = 0x0300, CmdMoveCursor, // success responses RespOkNoData = 0x1100, RespOkDisplayInfo, RespOkCapsetInfo, RespOkCapset, RespOkEdid, // error responses RespErrUnspec = 0x1200, RespErrOutOfMemory, RespErrInvalidScanoutId, RespErrInvalidResourceId, RespErrInvalidContextId, RespErrInvalidParameter, } const FLAG_FENCE: u32 = 1 << 0; #[repr(C)] struct CtrlHeader { ctrl_type: CtrlType, flags: u32, fence_id: u64, ctx_id: u32, padding: u32 } const MAX_SCANOUTS: usize = 16; #[repr(C)] #[derive(Clone, Copy)] pub struct Rect { pub x: u32, pub y: u32, pub width: u32, pub height: u32, } impl Rect { pub const fn new(x: u32, y: u32, width: u32, height: u32) -> Self { Self { x, y, width, height } } } #[repr(C)] struct DisplayOne { r: Rect, enabled: u32, flags: u32, } #[repr(C)] struct RespDisplayInfo { hdr: CtrlHeader, pmodes: [DisplayOne; MAX_SCANOUTS], } #[repr(C)] struct GetEdid { hdr: CtrlHeader, scanout: u32, padding: u32, } #[repr(C)] struct RespEdid { hdr: CtrlHeader, size: u32, padding: u32, edid: [u8; 1024], } #[repr(u32)] enum Formats { B8G8R8A8Unorm = 1, B8G8R8X8Unorm = 2, A8R8G8B8Unorm = 3, X8R8G8B8Unorm = 4, R8G8B8A8Unorm = 67, X8B8G8R8Unorm = 68, A8B8G8R8Unorm = 121, R8G8B8X8Unorm = 134, } #[repr(C)] struct ResourceCreate2d { hdr: CtrlHeader, resource_id: u32, format: Formats, width: u32, height: u32, } #[repr(C)] struct ResourceUnref { hdr: CtrlHeader, resource_id: u32, padding: u32, } #[repr(C)] struct SetScanout { hdr: CtrlHeader, r: Rect, scanout_id: u32, resource_id: u32, } #[repr(C)] struct ResourceFlush { hdr: CtrlHeader, r: Rect, resource_id: u32, padding: u32, } #[repr(C)] struct TransferToHost2d { hdr: CtrlHeader, r: Rect, offset: u64, resource_id: u32, padding: u32, } #[repr(C)] struct AttachBacking { hdr: CtrlHeader, resource_id: u32, nr_entries: u32, } #[repr(C)] struct MemEntry { addr: u64, length: u32, padding: u32, } #[repr(C)] struct DetachBacking { hdr: CtrlHeader, resource_id: u32, padding: u32, } #[repr(C)] struct CursorPos { scanout_id: u32, x: u32, y: u32, padding: u32, } #[repr(C)] struct UpdateCursor { hdr: CtrlHeader, pos: CursorPos, resource_id: u32, hot_x: u32, hot_y: u32, padding: u32, } #[derive(Clone, Copy)] pub struct Pixel { pub r: u8, pub g: u8, pub b: u8, pub a: u8, } impl Pixel { pub const fn new(r: u8, g: u8, b: u8, a: u8) -> Self { Self { r, g, b, a } } } // This is not in the specification, but this makes // it easier for us to do just a single kfree. struct Request { request: RqT, response: RpT, } impl Request { pub fn new(request: RqT) -> *mut Self { let sz = size_of::() + size_of::(); let ptr = kmalloc(sz) as *mut Self; unsafe { (*ptr).request = request; } ptr } } struct Request3 { request: RqT, mementries: RmT, response: RpT, } impl Request3 { pub fn new(request: RqT, meminfo: RmT) -> *mut Self { let sz = size_of::() + size_of::() + size_of::(); let ptr = kmalloc(sz) as *mut Self; unsafe { (*ptr).request = request; (*ptr).mementries = meminfo; } ptr } } pub struct Device { queue: *mut Queue, dev: *mut u32, idx: u16, ack_used_idx: u16, framebuffer: *mut Pixel, width: u32, height: u32, } impl Device { pub const fn new() -> Self { Self { queue: null_mut(), dev: null_mut(), idx: 0, ack_used_idx: 0, framebuffer: null_mut(), width: 640, height: 480 } } pub fn get_framebuffer(&self) -> *mut Pixel { self.framebuffer } pub fn get_width(&self) -> u32 { self.width } pub fn get_height(&self) -> u32 { self.height } } pub static mut GPU_DEVICES: [Option; 8] = [ None, None, None, None, None, None, None, None, ]; pub fn fill_rect(dev: &mut Device, rect: Rect, color: Pixel) { for row in rect.y..(rect.y+rect.height) { for col in rect.x..(rect.x+rect.width) { let byte = row as usize * dev.width as usize + col as usize; unsafe { dev.framebuffer.add(byte).write(color); } } } } pub fn stroke_rect(dev: &mut Device, rect: Rect, color: Pixel, size: u32) { // Essentially fill the four sides. // Top fill_rect(dev, Rect::new( rect.x, rect.y, rect.width, size ), color); // Bottom fill_rect(dev, Rect::new( rect.x, rect.y+rect.height, rect.width, size ), color); // Left fill_rect(dev, Rect::new( rect.x, rect.y, size, rect.height ), color); // Right fill_rect(dev, Rect::new( rect.x+rect.width, rect.y, size, rect.height+size ), color); } pub fn init(gdev: usize) { if let Some(mut dev) = unsafe { GPU_DEVICES[gdev-1].take() } { // Put some crap in the framebuffer: // First clear the buffer to white? fill_rect(&mut dev, Rect::new(0, 0, 640, 480), Pixel::new(2, 2, 2, 255)); // fill_rect(&mut dev, Rect::new(15, 15, 200, 200), Pixel::new(255, 130, 0, 255)); // stroke_rect(&mut dev, Rect::new( 255, 15, 150, 150), Pixel::new( 0, 0, 0, 255), 5); // draw_cosine(&mut dev, Rect::new(0, 300, 550, 60), Pixel::new(255, 15, 15, 255)); // //// STEP 1: Create a host resource using create 2d let rq = Request::new(ResourceCreate2d { hdr: CtrlHeader { ctrl_type: CtrlType::CmdResourceCreate2d, flags: 0, fence_id: 0, ctx_id: 0, padding: 0, }, resource_id: 1, format: Formats::R8G8B8A8Unorm, width: dev.width, height: dev.height, }); let desc_c2d = Descriptor { addr: unsafe { &(*rq).request as *const ResourceCreate2d as u64 }, len: size_of::() as u32, flags: VIRTIO_DESC_F_NEXT, next: (dev.idx + 1) % VIRTIO_RING_SIZE as u16, }; let desc_c2d_resp = Descriptor { addr: unsafe { &(*rq).response as *const CtrlHeader as u64 }, len: size_of::() as u32, flags: VIRTIO_DESC_F_WRITE, next: 0, }; unsafe { let head = dev.idx; (*dev.queue).desc[dev.idx as usize] = desc_c2d; dev.idx = (dev.idx + 1) % VIRTIO_RING_SIZE as u16; (*dev.queue).desc[dev.idx as usize] = desc_c2d_resp; dev.idx = (dev.idx + 1) % VIRTIO_RING_SIZE as u16; (*dev.queue).avail.ring[(*dev.queue).avail.idx as usize % VIRTIO_RING_SIZE] = head; (*dev.queue).avail.idx = (*dev.queue).avail.idx.wrapping_add(1); } // //// STEP 2: Attach backing let rq = Request3::new(AttachBacking { hdr: CtrlHeader { ctrl_type: CtrlType::CmdResourceAttachBacking, flags: 0, fence_id: 0, ctx_id: 0, padding: 0, }, resource_id: 1, nr_entries: 1, }, MemEntry { addr: dev.framebuffer as u64, length: dev.width * dev.height * size_of::() as u32, padding: 0, } ); let desc_ab = Descriptor { addr: unsafe { &(*rq).request as *const AttachBacking as u64 }, len: size_of::() as u32, flags: VIRTIO_DESC_F_NEXT, next: (dev.idx + 1) % VIRTIO_RING_SIZE as u16, }; let desc_ab_mementry = Descriptor { addr: unsafe { &(*rq).mementries as *const MemEntry as u64 }, len: size_of::() as u32, flags: VIRTIO_DESC_F_NEXT, next: (dev.idx + 2) % VIRTIO_RING_SIZE as u16, }; let desc_ab_resp = Descriptor { addr: unsafe { &(*rq).response as *const CtrlHeader as u64 }, len: size_of::() as u32, flags: VIRTIO_DESC_F_WRITE, next: 0, }; unsafe { let head = dev.idx; (*dev.queue).desc[dev.idx as usize] = desc_ab; dev.idx = (dev.idx + 1) % VIRTIO_RING_SIZE as u16; (*dev.queue).desc[dev.idx as usize] = desc_ab_mementry; dev.idx = (dev.idx + 1) % VIRTIO_RING_SIZE as u16; (*dev.queue).desc[dev.idx as usize] = desc_ab_resp; dev.idx = (dev.idx + 1) % VIRTIO_RING_SIZE as u16; (*dev.queue).avail.ring[(*dev.queue).avail.idx as usize % VIRTIO_RING_SIZE] = head; (*dev.queue).avail.idx = (*dev.queue).avail.idx.wrapping_add(1); } // //// STEP 3: Set scanout let rq = Request::new(SetScanout { hdr: CtrlHeader { ctrl_type: CtrlType::CmdSetScanout, flags: 0, fence_id: 0, ctx_id: 0, padding: 0, }, r: Rect::new(0, 0, dev.width, dev.height), resource_id: 1, scanout_id: 0, }); let desc_sso = Descriptor { addr: unsafe { &(*rq).request as *const SetScanout as u64 }, len: size_of::() as u32, flags: VIRTIO_DESC_F_NEXT, next: (dev.idx + 1) % VIRTIO_RING_SIZE as u16, }; let desc_sso_resp = Descriptor { addr: unsafe { &(*rq).response as *const CtrlHeader as u64 }, len: size_of::() as u32, flags: VIRTIO_DESC_F_WRITE, next: 0, }; unsafe { let head = dev.idx; (*dev.queue).desc[dev.idx as usize] = desc_sso; dev.idx = (dev.idx + 1) % VIRTIO_RING_SIZE as u16; (*dev.queue).desc[dev.idx as usize] = desc_sso_resp; dev.idx = (dev.idx + 1) % VIRTIO_RING_SIZE as u16; (*dev.queue).avail.ring[(*dev.queue).avail.idx as usize % VIRTIO_RING_SIZE] = head; (*dev.queue).avail.idx = (*dev.queue).avail.idx.wrapping_add(1); } // //// STEP 4: Transfer to host let rq = Request::new(TransferToHost2d { hdr: CtrlHeader { ctrl_type: CtrlType::CmdTransferToHost2d, flags: 0, fence_id: 0, ctx_id: 0, padding: 0, }, r: Rect::new(0, 0, dev.width, dev.height), offset: 0, resource_id: 1, padding: 0, }); let desc_t2h = Descriptor { addr: unsafe { &(*rq).request as *const TransferToHost2d as u64 }, len: size_of::() as u32, flags: VIRTIO_DESC_F_NEXT, next: (dev.idx + 1) % VIRTIO_RING_SIZE as u16, }; let desc_t2h_resp = Descriptor { addr: unsafe { &(*rq).response as *const CtrlHeader as u64 }, len: size_of::() as u32, flags: VIRTIO_DESC_F_WRITE, next: 0, }; unsafe { let head = dev.idx; (*dev.queue).desc[dev.idx as usize] = desc_t2h; dev.idx = (dev.idx + 1) % VIRTIO_RING_SIZE as u16; (*dev.queue).desc[dev.idx as usize] = desc_t2h_resp; dev.idx = (dev.idx + 1) % VIRTIO_RING_SIZE as u16; (*dev.queue).avail.ring[(*dev.queue).avail.idx as usize % VIRTIO_RING_SIZE] = head; (*dev.queue).avail.idx = (*dev.queue).avail.idx.wrapping_add(1); } // Step 5: Flush let rq = Request::new(ResourceFlush { hdr: CtrlHeader { ctrl_type: CtrlType::CmdResourceFlush, flags: 0, fence_id: 0, ctx_id: 0, padding: 0, }, r: Rect::new(0, 0, dev.width, dev.height), resource_id: 1, padding: 0, }); let desc_rf = Descriptor { addr: unsafe { &(*rq).request as *const ResourceFlush as u64 }, len: size_of::() as u32, flags: VIRTIO_DESC_F_NEXT, next: (dev.idx + 1) % VIRTIO_RING_SIZE as u16, }; let desc_rf_resp = Descriptor { addr: unsafe { &(*rq).response as *const CtrlHeader as u64 }, len: size_of::() as u32, flags: VIRTIO_DESC_F_WRITE, next: 0, }; unsafe { let head = dev.idx; (*dev.queue).desc[dev.idx as usize] = desc_rf; dev.idx = (dev.idx + 1) % VIRTIO_RING_SIZE as u16; (*dev.queue).desc[dev.idx as usize] = desc_rf_resp; dev.idx = (dev.idx + 1) % VIRTIO_RING_SIZE as u16; (*dev.queue).avail.ring[(*dev.queue).avail.idx as usize % VIRTIO_RING_SIZE] = head; (*dev.queue).avail.idx = (*dev.queue).avail.idx.wrapping_add(1); } // Run Queue unsafe { dev.dev .add(MmioOffsets::QueueNotify.scale32()) .write_volatile(0); GPU_DEVICES[gdev-1].replace(dev); } } } /// Invalidate and transfer a rectangular portion of the screen. /// I found out that width and height are actually x2, y2...oh well. pub fn transfer(gdev: usize, x: u32, y: u32, width: u32, height: u32) { if let Some(mut dev) = unsafe { GPU_DEVICES[gdev-1].take() } { let rq = Request::new(TransferToHost2d { hdr: CtrlHeader { ctrl_type: CtrlType::CmdTransferToHost2d, flags: 0, fence_id: 0, ctx_id: 0, padding: 0, }, r: Rect::new(x, y, width, height), offset: 0, resource_id: 1, padding: 0, }); let desc_t2h = Descriptor { addr: unsafe { &(*rq).request as *const TransferToHost2d as u64 }, len: size_of::() as u32, flags: VIRTIO_DESC_F_NEXT, next: (dev.idx + 1) % VIRTIO_RING_SIZE as u16, }; let desc_t2h_resp = Descriptor { addr: unsafe { &(*rq).response as *const CtrlHeader as u64 }, len: size_of::() as u32, flags: VIRTIO_DESC_F_WRITE, next: 0, }; unsafe { let head = dev.idx; (*dev.queue).desc[dev.idx as usize] = desc_t2h; dev.idx = (dev.idx + 1) % VIRTIO_RING_SIZE as u16; (*dev.queue).desc[dev.idx as usize] = desc_t2h_resp; dev.idx = (dev.idx + 1) % VIRTIO_RING_SIZE as u16; (*dev.queue).avail.ring[(*dev.queue).avail.idx as usize % VIRTIO_RING_SIZE] = head; (*dev.queue).avail.idx = (*dev.queue).avail.idx.wrapping_add(1); } // Step 5: Flush let rq = Request::new(ResourceFlush { hdr: CtrlHeader { ctrl_type: CtrlType::CmdResourceFlush, flags: 0, fence_id: 0, ctx_id: 0, padding: 0, }, r: Rect::new(x, y, width, height), resource_id: 1, padding: 0, }); let desc_rf = Descriptor { addr: unsafe { &(*rq).request as *const ResourceFlush as u64 }, len: size_of::() as u32, flags: VIRTIO_DESC_F_NEXT, next: (dev.idx + 1) % VIRTIO_RING_SIZE as u16, }; let desc_rf_resp = Descriptor { addr: unsafe { &(*rq).response as *const CtrlHeader as u64 }, len: size_of::() as u32, flags: VIRTIO_DESC_F_WRITE, next: 0, }; unsafe { let head = dev.idx; (*dev.queue).desc[dev.idx as usize] = desc_rf; dev.idx = (dev.idx + 1) % VIRTIO_RING_SIZE as u16; (*dev.queue).desc[dev.idx as usize] = desc_rf_resp; dev.idx = (dev.idx + 1) % VIRTIO_RING_SIZE as u16; (*dev.queue).avail.ring[(*dev.queue).avail.idx as usize % VIRTIO_RING_SIZE] = head; (*dev.queue).avail.idx = (*dev.queue).avail.idx.wrapping_add(1); } // Run Queue unsafe { dev.dev .add(MmioOffsets::QueueNotify.scale32()) .write_volatile(0); GPU_DEVICES[gdev-1].replace(dev); } } } pub fn setup_gpu_device(ptr: *mut u32) -> bool { unsafe { // We can get the index of the device based on its address. // 0x1000_1000 is index 0 // 0x1000_2000 is index 1 // ... // 0x1000_8000 is index 7 // To get the number that changes over, we shift right 12 places (3 hex digits) let idx = (ptr as usize - virtio::MMIO_VIRTIO_START) >> 12; // [Driver] Device Initialization // 1. Reset the device (write 0 into status) ptr.add(MmioOffsets::Status.scale32()).write_volatile(0); let mut status_bits = StatusField::Acknowledge.val32(); // 2. Set ACKNOWLEDGE status bit ptr.add(MmioOffsets::Status.scale32()).write_volatile(status_bits); // 3. Set the DRIVER status bit status_bits |= StatusField::DriverOk.val32(); ptr.add(MmioOffsets::Status.scale32()).write_volatile(status_bits); // 4. Read device feature bits, write subset of feature // bits understood by OS and driver to the device. let host_features = ptr.add(MmioOffsets::HostFeatures.scale32()).read_volatile(); ptr.add(MmioOffsets::GuestFeatures.scale32()).write_volatile(host_features); // 5. Set the FEATURES_OK status bit status_bits |= StatusField::FeaturesOk.val32(); ptr.add(MmioOffsets::Status.scale32()).write_volatile(status_bits); // 6. Re-read status to ensure FEATURES_OK is still set. // Otherwise, it doesn't support our features. let status_ok = ptr.add(MmioOffsets::Status.scale32()).read_volatile(); // If the status field no longer has features_ok set, // that means that the device couldn't accept // the features that we request. Therefore, this is // considered a "failed" state. if false == StatusField::features_ok(status_ok) { print!("features fail..."); ptr.add(MmioOffsets::Status.scale32()).write_volatile(StatusField::Failed.val32()); return false; } // 7. Perform device-specific setup. // Set the queue num. We have to make sure that the // queue size is valid because the device can only take // a certain size. let qnmax = ptr.add(MmioOffsets::QueueNumMax.scale32()).read_volatile(); ptr.add(MmioOffsets::QueueNum.scale32()).write_volatile(VIRTIO_RING_SIZE as u32); if VIRTIO_RING_SIZE as u32 > qnmax { print!("queue size fail..."); return false; } // First, if the block device array is empty, create it! // We add 4095 to round this up and then do an integer // divide to truncate the decimal. We don't add 4096, // because if it is exactly 4096 bytes, we would get two // pages, not one. let num_pages = (size_of::() + PAGE_SIZE - 1) / PAGE_SIZE; // println!("np = {}", num_pages); // We allocate a page for each device. This will the the // descriptor where we can communicate with the block // device. We will still use an MMIO register (in // particular, QueueNotify) to actually tell the device // we put something in memory. We also have to be // careful with memory ordering. We don't want to // issue a notify before all memory writes have // finished. We will look at that later, but we need // what is called a memory "fence" or barrier. ptr.add(MmioOffsets::QueueSel.scale32()).write_volatile(0); // TODO: Set up queue #1 (cursorq) // Alignment is very important here. This is the memory address // alignment between the available and used rings. If this is wrong, // then we and the device will refer to different memory addresses // and hence get the wrong data in the used ring. // ptr.add(MmioOffsets::QueueAlign.scale32()).write_volatile(2); let queue_ptr = zalloc(num_pages) as *mut Queue; let queue_pfn = queue_ptr as u32; ptr.add(MmioOffsets::GuestPageSize.scale32()).write_volatile(PAGE_SIZE as u32); // QueuePFN is a physical page number, however it // appears for QEMU we have to write the entire memory // address. This is a physical memory address where we // (the OS) and the block device have in common for // making and receiving requests. ptr.add(MmioOffsets::QueuePfn.scale32()).write_volatile(queue_pfn / PAGE_SIZE as u32); // 8. Set the DRIVER_OK status bit. Device is now "live" status_bits |= StatusField::DriverOk.val32(); ptr.add(MmioOffsets::Status.scale32()).write_volatile(status_bits); // We are going to give the framebuffer to user space, so this needs to be page aligned // so that we can map it into the user space's MMU. This is why we don't want kmalloc here! let num_pages = (PAGE_SIZE * 2+640*480*size_of::())/PAGE_SIZE; let page_alloc = zalloc(num_pages) as *mut Pixel; let dev = Device { queue: queue_ptr, dev: ptr, idx: 0, ack_used_idx: 0, framebuffer: page_alloc, width: 640, height: 480, }; GPU_DEVICES[idx] = Some(dev); true } } pub fn pending(dev: &mut Device) { // Here we need to check the used ring and then free the resources // given by the descriptor id. unsafe { let ref queue = *dev.queue; while dev.ack_used_idx != queue.used.idx { let ref elem = queue.used.ring [dev.ack_used_idx as usize % VIRTIO_RING_SIZE]; // println!("Ack {}, elem {}, len {}", dev.ack_used_idx, elem.id, elem.len); let ref desc = queue.desc[elem.id as usize]; // Requests stay resident on the heap until this // function, so we can recapture the address here kfree(desc.addr as *mut u8); dev.ack_used_idx = dev.ack_used_idx.wrapping_add(1); } } } pub fn handle_interrupt(idx: usize) { unsafe { if let Some(bdev) = GPU_DEVICES[idx].as_mut() { pending(bdev); } else { println!( "Invalid GPU device for interrupt {}", idx + 1 ); } } } ================================================ FILE: risc_v/src/input.rs ================================================ // input.rs // Input handling. // Stephen Marz use crate::virtio::{Queue, MmioOffsets, MMIO_VIRTIO_START, StatusField, VIRTIO_RING_SIZE, Descriptor, VIRTIO_DESC_F_WRITE, VIRTIO_F_RING_EVENT_IDX}; use crate::kmem::kmalloc; use crate::page::{PAGE_SIZE, zalloc}; use core::mem::size_of; use alloc::collections::VecDeque; pub static mut ABS_EVENTS: Option> = None; // pub static mut ABS_OBSERVERS: Option> = None; pub static mut KEY_EVENTS: Option> = None; // pub static mut KEY_OBSERVERS: Option> = None; const EVENT_BUFFER_ELEMENTS: usize = 64; pub enum InputType { None, Abs(u32, u32, u32, u32, u32), Key(u32, u32) } #[repr(C)] #[derive(Copy, Clone)] pub struct Event { pub event_type: EventType, pub code: u16, pub value: u32, } #[repr(u8)] #[derive(Copy, Clone)] pub enum ConfigSelect { UNSET = 0x00, IdName = 0x01, IdSerial = 0x02, IdDevids = 0x03, PropBits = 0x10, EvBits = 0x11, AbsInfo = 0x12, } #[repr(C)] #[derive(Clone, Copy)] pub struct AbsInfo { pub min: u32, pub max: u32, pub fuzz: u32, pub flat: u32, pub res: u32, } #[repr(C)] #[derive(Clone, Copy)] pub struct DevIds { pub bustype: u16, pub vendor: u16, pub product: u16, pub version: u16, } #[repr(C)] #[derive(Clone, Copy)] pub union ConfigUnion { pub string: [u8; 128], pub bitmap: [i8; 128], pub abs: AbsInfo, pub ids: DevIds, } #[repr(C)] #[derive(Clone, Copy)] pub struct Config { pub select: ConfigSelect, pub subsel: u8, pub size: u8, reserved: [u8; 5], pub config: ConfigUnion, } #[repr(u16)] #[derive(Copy, Clone)] pub enum EventType { Syn = 0x00, Key = 0x01, Rel = 0x02, Abs = 0x03, Msc = 0x04, Sw = 0x05, Led = 0x11, Snd = 0x12, Rep = 0x14, Ff = 0x15, Pwr = 0x16, FfStatus = 0x17, Max = 0x1f, } const EVENT_SIZE: usize = size_of::(); pub struct Device { event_queue: *mut Queue, status_queue: *mut Queue, event_idx: u16, event_ack_used_idx: u16, event_buffer: *mut Event, status_ack_used_idx: u16, } pub static mut INPUT_DEVICES: [Option; 8] = [ None, None, None, None, None, None, None, None, ]; pub fn setup_input_device(ptr: *mut u32) -> bool { unsafe { // We can get the index of the device based on its address. // 0x1000_1000 is index 0 // 0x1000_2000 is index 1 // ... // 0x1000_8000 is index 7 // To get the number that changes over, we shift right 12 places (3 hex digits) let idx = (ptr as usize - MMIO_VIRTIO_START) >> 12; // [Driver] Device Initialization // 1. Reset the device (write 0 into status) ptr.add(MmioOffsets::Status.scale32()).write_volatile(0); let mut status_bits = StatusField::Acknowledge.val32(); // 2. Set ACKNOWLEDGE status bit ptr.add(MmioOffsets::Status.scale32()).write_volatile(status_bits); // 3. Set the DRIVER status bit status_bits |= StatusField::DriverOk.val32(); ptr.add(MmioOffsets::Status.scale32()).write_volatile(status_bits); // 4. Read device feature bits, write subset of feature // bits understood by OS and driver to the device. let mut host_features = ptr.add(MmioOffsets::HostFeatures.scale32()).read_volatile(); // Turn off EVENT_IDX host_features &= !(1 << VIRTIO_F_RING_EVENT_IDX); ptr.add(MmioOffsets::GuestFeatures.scale32()).write_volatile(host_features); // 5. Set the FEATURES_OK status bit status_bits |= StatusField::FeaturesOk.val32(); ptr.add(MmioOffsets::Status.scale32()).write_volatile(status_bits); // 6. Re-read status to ensure FEATURES_OK is still set. // Otherwise, it doesn't support our features. let status_ok = ptr.add(MmioOffsets::Status.scale32()).read_volatile(); // If the status field no longer has features_ok set, // that means that the device couldn't accept // the features that we request. Therefore, this is // considered a "failed" state. if false == StatusField::features_ok(status_ok) { print!("features fail..."); ptr.add(MmioOffsets::Status.scale32()).write_volatile(StatusField::Failed.val32()); return false; } // 7. Perform device-specific setup. // Set the queue num. We have to make sure that the // queue size is valid because the device can only take // a certain size. let qnmax = ptr.add(MmioOffsets::QueueNumMax.scale32()).read_volatile(); ptr.add(MmioOffsets::QueueNum.scale32()).write_volatile(VIRTIO_RING_SIZE as u32); if VIRTIO_RING_SIZE as u32 > qnmax { print!("queue size fail..."); return false; } // First, if the block device array is empty, create it! // We add 4095 to round this up and then do an integer // divide to truncate the decimal. We don't add 4096, // because if it is exactly 4096 bytes, we would get two // pages, not one. let num_pages = (size_of::() + PAGE_SIZE - 1) / PAGE_SIZE; // println!("np = {}", num_pages); // We allocate a page for each device. This will the the // descriptor where we can communicate with the block // device. We will still use an MMIO register (in // particular, QueueNotify) to actually tell the device // we put something in memory. We also have to be // careful with memory ordering. We don't want to // issue a notify before all memory writes have // finished. We will look at that later, but we need // what is called a memory "fence" or barrier. ptr.add(MmioOffsets::QueueSel.scale32()).write_volatile(0); // Alignment is very important here. This is the memory address // alignment between the available and used rings. If this is wrong, // then we and the device will refer to different memory addresses // and hence get the wrong data in the used ring. // ptr.add(MmioOffsets::QueueAlign.scale32()).write_volatile(2); let event_queue_ptr = zalloc(num_pages) as *mut Queue; let queue_pfn = event_queue_ptr as u32; ptr.add(MmioOffsets::GuestPageSize.scale32()).write_volatile(PAGE_SIZE as u32); ptr.add(MmioOffsets::QueuePfn.scale32()).write_volatile(queue_pfn / PAGE_SIZE as u32); // Status queue ptr.add(MmioOffsets::QueueSel.scale32()).write_volatile(1); // Alignment is very important here. This is the memory address // alignment between the available and used rings. If this is wrong, // then we and the device will refer to different memory addresses // and hence get the wrong data in the used ring. // ptr.add(MmioOffsets::QueueAlign.scale32()).write_volatile(2); let status_queue_ptr = zalloc(num_pages) as *mut Queue; let queue_pfn = status_queue_ptr as u32; ptr.add(MmioOffsets::GuestPageSize.scale32()).write_volatile(PAGE_SIZE as u32); ptr.add(MmioOffsets::QueuePfn.scale32()).write_volatile(queue_pfn / PAGE_SIZE as u32); // 8. Set the DRIVER_OK status bit. Device is now "live" status_bits |= StatusField::DriverOk.val32(); ptr.add(MmioOffsets::Status.scale32()).write_volatile(status_bits); // let config_ptr = ptr.add(MmioOffsets::Config.scale32()) as *mut Config; // let mut config = config_ptr.read_volatile(); // config.select = ConfigSelect::AbsInfo; // config.subsel = 0; // config_ptr.write_volatile(config); // let id = config_ptr.read_volatile().config.abs; // println!("Min: {}, Max: {}, fuzz: {}, flat: {}, res: {}", id.min, id.max, id.fuzz, id.flat, id.res); let mut dev = Device { event_queue: event_queue_ptr, status_queue: status_queue_ptr, status_ack_used_idx: 0, event_idx: 0, event_ack_used_idx: 0, event_buffer: kmalloc(EVENT_SIZE * EVENT_BUFFER_ELEMENTS) as *mut Event, }; for i in 0..EVENT_BUFFER_ELEMENTS { repopulate_event(&mut dev, i); } INPUT_DEVICES[idx] = Some(dev); ABS_EVENTS = Some(VecDeque::with_capacity(100)); // ABS_OBSERVERS = Some(VecDeque::new()); KEY_EVENTS = Some(VecDeque::with_capacity(10)); // KEY_OBSERVERS = Some(VecDeque::new()); true } } unsafe fn repopulate_event(dev: &mut Device, buffer: usize) { // Populate eventq with buffers, these must be at least the size of struct virtio_input_event. let desc = Descriptor { addr: dev.event_buffer.add(buffer) as u64, len: EVENT_SIZE as u32, flags: VIRTIO_DESC_F_WRITE, next: 0 }; let head = dev.event_idx as u16; (*dev.event_queue).desc[dev.event_idx as usize] = desc; dev.event_idx = (dev.event_idx + 1) % VIRTIO_RING_SIZE as u16; (*dev.event_queue).avail.ring[(*dev.event_queue).avail.idx as usize % VIRTIO_RING_SIZE] = head; (*dev.event_queue).avail.idx = (*dev.event_queue).avail.idx.wrapping_add(1); } fn pending(dev: &mut Device) { // Here we need to check the used ring and then free the resources // given by the descriptor id. unsafe { // Check the event queue first let ref queue = *dev.event_queue; while dev.event_ack_used_idx != queue.used.idx { let ref elem = queue.used.ring[dev.event_ack_used_idx as usize % VIRTIO_RING_SIZE]; let ref desc = queue.desc[elem.id as usize]; let event = (desc.addr as *const Event).as_ref().unwrap(); // print!("EAck {}, elem {}, len {}, addr 0x{:08x}: ", dev.event_ack_used_idx, elem.id, elem.len, desc.addr as usize); // println!("Type = {:x}, Code = {:x}, Value = {:x}", event.event_type, event.code, event.value); repopulate_event(dev, elem.id as usize); dev.event_ack_used_idx = dev.event_ack_used_idx.wrapping_add(1); match event.event_type { EventType::Abs => { let mut ev = ABS_EVENTS.take().unwrap(); ev.push_back(*event); ABS_EVENTS.replace(ev); }, EventType::Key => { let mut ev = KEY_EVENTS.take().unwrap(); ev.push_back(*event); KEY_EVENTS.replace(ev); }, _ => { } } } // Next, the status queue let ref queue = *dev.status_queue; while dev.status_ack_used_idx != queue.used.idx { let ref elem = queue.used.ring[dev.status_ack_used_idx as usize % VIRTIO_RING_SIZE]; print!("SAck {}, elem {}, len {}: ", dev.status_ack_used_idx, elem.id, elem.len); let ref desc = queue.desc[elem.id as usize]; let event = (desc.addr as *const Event).as_ref().unwrap(); println!("Type = {:x}, Code = {:x}, Value = {:x}", event.event_type as u8, event.code, event.value); dev.status_ack_used_idx = dev.status_ack_used_idx.wrapping_add(1); } } } pub fn handle_interrupt(idx: usize) { unsafe { if let Some(bdev) = INPUT_DEVICES[idx].as_mut() { pending(bdev); } else { println!( "Invalid input device for interrupt {}", idx + 1 ); } } } ================================================ FILE: risc_v/src/kmem.rs ================================================ // kmem.rs // Sub-page level: malloc-like allocation system // Stephen Marz // 7 October 2019 use crate::page::{align_val, zalloc, Table, PAGE_SIZE}; use core::{mem::size_of, ptr::null_mut}; #[repr(usize)] enum AllocListFlags { Taken = 1 << 63, } impl AllocListFlags { pub fn val(self) -> usize { self as usize } } struct AllocList { pub flags_size: usize, } impl AllocList { pub fn is_taken(&self) -> bool { self.flags_size & AllocListFlags::Taken.val() != 0 } pub fn is_free(&self) -> bool { !self.is_taken() } pub fn set_taken(&mut self) { self.flags_size |= AllocListFlags::Taken.val(); } pub fn set_free(&mut self) { self.flags_size &= !AllocListFlags::Taken.val(); } pub fn set_size(&mut self, sz: usize) { let k = self.is_taken(); self.flags_size = sz & !AllocListFlags::Taken.val(); if k { self.flags_size |= AllocListFlags::Taken.val(); } } pub fn get_size(&self) -> usize { self.flags_size & !AllocListFlags::Taken.val() } } // This is the head of the allocation. We start here when // we search for a free memory location. static mut KMEM_HEAD: *mut AllocList = null_mut(); // In the future, we will have on-demand pages // so, we need to keep track of our memory footprint to // see if we actually need to allocate more. static mut KMEM_ALLOC: usize = 0; static mut KMEM_PAGE_TABLE: *mut Table = null_mut(); // These functions are safe helpers around an unsafe // operation. pub fn get_head() -> *mut u8 { unsafe { KMEM_HEAD as *mut u8 } } pub fn get_page_table() -> *mut Table { unsafe { KMEM_PAGE_TABLE as *mut Table } } pub fn get_num_allocations() -> usize { unsafe { KMEM_ALLOC } } /// Initialize kernel's memory /// This is not to be used to allocate memory /// for user processes. If that's the case, use /// alloc/dealloc from the page crate. pub fn init() { unsafe { // Allocate kernel pages (KMEM_ALLOC) KMEM_ALLOC = 2048; let k_alloc = zalloc(KMEM_ALLOC); assert!(!k_alloc.is_null()); KMEM_HEAD = k_alloc as *mut AllocList; (*KMEM_HEAD).set_free(); (*KMEM_HEAD).set_size(KMEM_ALLOC * PAGE_SIZE); KMEM_PAGE_TABLE = zalloc(1) as *mut Table; } } /// Allocate sub-page level allocation based on bytes and zero the memory pub fn kzmalloc(sz: usize) -> *mut u8 { let size = align_val(sz, 3); let ret = kmalloc(size); if !ret.is_null() { for i in 0..size { unsafe { (*ret.add(i)) = 0; } } } ret } /// Allocate sub-page level allocation based on bytes pub fn kmalloc(sz: usize) -> *mut u8 { unsafe { let size = align_val(sz, 3) + size_of::(); let mut head = KMEM_HEAD; // .add() uses pointer arithmetic, so we type-cast into a u8 // so that we multiply by an absolute size (KMEM_ALLOC * // PAGE_SIZE). let tail = (KMEM_HEAD as *mut u8).add(KMEM_ALLOC * PAGE_SIZE) as *mut AllocList; while head < tail { if (*head).is_free() && size <= (*head).get_size() { let chunk_size = (*head).get_size(); let rem = chunk_size - size; (*head).set_taken(); if rem > size_of::() { let next = (head as *mut u8).add(size) as *mut AllocList; // There is space remaining here. (*next).set_free(); (*next).set_size(rem); (*head).set_size(size); } else { // If we get here, take the entire chunk (*head).set_size(chunk_size); } return head.add(1) as *mut u8; } else { // If we get here, what we saw wasn't a free // chunk, move on to the next. head = (head as *mut u8).add((*head).get_size()) as *mut AllocList; } } } // If we get here, we didn't find any free chunks--i.e. there isn't // enough memory for this. TODO: Add on-demand page allocation. null_mut() } /// Free a sub-page level allocation pub fn kfree(ptr: *mut u8) { unsafe { if !ptr.is_null() { let p = (ptr as *mut AllocList).offset(-1); if (*p).is_taken() { (*p).set_free(); } // After we free, see if we can combine adjacent free // spots to see if we can reduce fragmentation. coalesce(); } } } /// Merge smaller chunks into a bigger chunk pub fn coalesce() { unsafe { let mut head = KMEM_HEAD; let tail = (KMEM_HEAD as *mut u8).add(KMEM_ALLOC * PAGE_SIZE) as *mut AllocList; while head < tail { let next = (head as *mut u8).add((*head).get_size()) as *mut AllocList; if (*head).get_size() == 0 { // If this happens, then we have a bad heap // (double free or something). However, that // will cause an infinite loop since the next // pointer will never move beyond the current // location. break; } else if next >= tail { // We calculated the next by using the size // given as get_size(), however this could push // us past the tail. In that case, the size is // wrong, hence we break and stop doing what we // need to do. break; } else if (*head).is_free() && (*next).is_free() { // This means we have adjacent blocks needing to // be freed. So, we combine them into one // allocation. (*head).set_size( (*head).get_size() + (*next).get_size(), ); } // If we get here, we might've moved. Recalculate new // head. head = (head as *mut u8).add((*head).get_size()) as *mut AllocList; } } } /// For debugging purposes, print the kmem table pub fn print_table() { unsafe { let mut head = KMEM_HEAD; let tail = (KMEM_HEAD as *mut u8).add(KMEM_ALLOC * PAGE_SIZE) as *mut AllocList; while head < tail { println!( "{:p}: Length = {:<10} Taken = {}", head, (*head).get_size(), (*head).is_taken() ); head = (head as *mut u8).add((*head).get_size()) as *mut AllocList; } } } // /////////////////////////////////// // / GLOBAL ALLOCATOR // /////////////////////////////////// // The global allocator allows us to use the data structures // in the core library, such as a linked list or B-tree. // We want to use these sparingly since we have a coarse-grained // allocator. use core::alloc::{GlobalAlloc, Layout}; // The global allocator is a static constant to a global allocator // structure. We don't need any members because we're using this // structure just to implement alloc and dealloc. struct OsGlobalAlloc; unsafe impl GlobalAlloc for OsGlobalAlloc { unsafe fn alloc(&self, layout: Layout) -> *mut u8 { // We align to the next page size so that when // we divide by PAGE_SIZE, we get exactly the number // of pages necessary. kzmalloc(layout.size()) } unsafe fn dealloc(&self, ptr: *mut u8, _layout: Layout) { // We ignore layout since our allocator uses ptr_start -> last // to determine the span of an allocation. kfree(ptr); } } #[global_allocator] /// Technically, we don't need the {} at the end, but it /// reveals that we're creating a new structure and not just /// copying a value. static GA: OsGlobalAlloc = OsGlobalAlloc {}; #[alloc_error_handler] /// If for some reason alloc() in the global allocator gets null_mut(), /// then we come here. This is a divergent function, so we call panic to /// let the tester know what's going on. pub fn alloc_error(l: Layout) -> ! { panic!( "Allocator failed to allocate {} bytes with {}-byte alignment.", l.size(), l.align() ); } ================================================ FILE: risc_v/src/lds/virt.lds ================================================ /* virt.lds Linker script for outputting to RISC-V QEMU "virt" machine. Stephen Marz 6 October 2019 */ /* riscv is the name of the architecture that the linker understands for any RISC-V target (64-bit or 32-bit). We will further refine this by using -mabi=lp64 and -march=rv64gc */ OUTPUT_ARCH( "riscv" ) /* We're setting our entry point to a symbol called _start which is inside of boot.S. This essentially stores the address of _start as the "entry point", or where CPU instructions should start executing. In the rest of this script, we are going to place _start right at the beginning of 0x8000_0000 because this is where the virtual machine and many RISC-V boards will start executing. */ ENTRY( _start ) /* The MEMORY section will explain that we have "ram" that contains a section that is 'w' (writeable), 'x' (executable), and 'a' (allocatable). We use '!' to invert 'r' (read-only) and 'i' (initialized). We don't want our memory to be read-only, and we're stating that it is NOT initialized at the beginning. The ORIGIN is the memory address 0x8000_0000. If we look at the virt spec or the specification for the RISC-V HiFive Unleashed, this is the starting memory address for our code. Side note: There might be other boot ROMs at different addresses, but their job is to get to this point. Finally LENGTH = 128M tells the linker that we have 128 megabyte of RAM. The linker will double check this to make sure everything can fit. The HiFive Unleashed has a lot more RAM than this, but for the virtual machine, I went with 128M since I think that's enough RAM for now. We can provide other pieces of memory, such as QSPI, or ROM, but we're telling the linker script here that we have one pool of RAM. */ MEMORY { ram (wxa) : ORIGIN = 0x80000000, LENGTH = 128M } /* PHDRS is short for "program headers", which we specify three here: text - CPU instructions (executable sections) data - Global, initialized variables bss - Global, uninitialized variables (all will be set to 0 by boot.S) The command PT_LOAD tells the linker that these sections will be loaded from the file into memory. We can actually stuff all of these into a single program header, but by splitting it up into three, we can actually use the other PT_* commands such as PT_DYNAMIC, PT_INTERP, PT_NULL to tell the linker where to find additional information. However, for our purposes, every section will be loaded from the program headers. */ PHDRS { text PT_LOAD; data PT_LOAD; bss PT_LOAD; } /* We are now going to organize the memory based on which section it is in. In assembly, we can change the section with the ".section" directive. However, in C++ and Rust, CPU instructions go into text, global constants go into rodata, global initialized variables go into data, and global uninitialized variables go into bss. */ SECTIONS { /* The first part of our RAM layout will be the text section. Since our CPU instructions are here, and our memory starts at 0x8000_0000, we need our entry point to line up here. */ .text : { /* PROVIDE allows me to access a symbol called _text_start so I know where the text section starts in the operating system. This should not move, but it is here for convenience. The period '.' tells the linker to set _text_start to the CURRENT location ('.' = current memory location). This current memory location moves as we add things. */ PROVIDE(_text_start = .); /* We are going to layout all text sections here, starting with .text.init. The asterisk in front of the parentheses means to match the .text.init section of ANY object file. Otherwise, we can specify which object file should contain the .text.init section, for example, boot.o(.text.init) would specifically put the .text.init section of our bootloader here. Because we might want to change the name of our files, we'll leave it with a *. Inside the parentheses is the name of the section. I created my own called .text.init to make 100% sure that the _start is put right at the beginning. The linker will lay this out in the order it receives it: .text.init first all .text sections next any .text.* sections last .text.* means to match anything after .text. If we didn't already specify .text.init, this would've matched here. The assembler and linker can place things in "special" text sections, so we match any we might come across here. */ *(.text.init) *(.text .text.*) /* Again, with PROVIDE, we're providing a readable symbol called _text_end, which is set to the memory address AFTER .text.init, .text, and .text.*'s have been added. */ PROVIDE(_text_end = .); /* The portion after the right brace is in an odd format. However, this is telling the linker what memory portion to put it in. We labeled our RAM, ram, with the constraints that it is writeable, allocatable, and executable. The linker will make sure with this that we can do all of those things. >ram - This just tells the linker script to put this entire section (.text) into the ram region of memory. To my knowledge, the '>' does not mean "greater than". Instead, it is a symbol to let the linker know we want to put this in ram. AT>ram - This sets the LMA (load memory address) region to the same thing. LMA is the final translation of a VMA (virtual memory address). With this linker script, we're loading everything into its physical location. We'll let the kernel copy and sort out the virtual memory. That's why >ram and AT>ram are continually the same thing. :text - This tells the linker script to put this into the :text program header. We've only defined three: text, data, and bss. In this case, we're telling the linker script to go into the text section. */ } >ram AT>ram :text /* The global pointer allows the linker to position global variables and constants into independent positions relative to the gp (global pointer) register. The globals start after the text sections and are only relevant to the rodata, data, and bss sections. */ PROVIDE(_global_pointer = .); /* Most compilers create a rodata (read only data) section for global constants. However, we're going to place ours in the text section. We can actually put this in :data, but since the .text section is read-only, we can place it there. NOTE: This doesn't actually do anything, yet. The actual "protection" cannot be done at link time. Instead, when we program the memory management unit (MMU), we will be able to choose which bits (R=read, W=write, X=execute) we want each memory segment to be able to do. */ .rodata : { PROVIDE(_rodata_start = .); *(.rodata .rodata.*) PROVIDE(_rodata_end = .); /* Again, we're placing the rodata section in the memory segment "ram" and we're putting it in the :text program header. We don't have one for rodata anyway. */ } >ram AT>ram :text .data : { /* . = ALIGN(4096) tells the linker to align the current memory location (which is 0x8000_0000 + text section + rodata section) to 4096 bytes. This is because our paging system's resolution is 4,096 bytes or 4 KiB. */ . = ALIGN(4096); PROVIDE(_data_start = .); /* sdata and data are essentially the same thing. However, compilers usually use the sdata sections for shorter, quicker loading sections. So, usually critical data is loaded there. However, we're loading all of this in one fell swoop. So, we're looking to put all of the following sections under the umbrella .data: .sdata .sdata.[anything] .data .data.[anything] ...in that order. */ *(.sdata .sdata.*) *(.data .data.*) PROVIDE(_data_end = .); } >ram AT>ram :data .bss : { PROVIDE(_bss_start = .); *(.sbss .sbss.*) *(.bss .bss.*) PROVIDE(_bss_end = .); } >ram AT>ram :bss /* The following will be helpful when we allocate the kernel stack (_stack) and determine where the heap begnis and ends (_heap_start and _heap_start + _heap_size)/ When we do memory allocation, we can use these symbols. We use the symbols instead of hard-coding an address because this is a floating target. As we add code, the heap moves farther down the memory and gets shorter. _memory_start will be set to 0x8000_0000 here. We use ORIGIN(ram) so that it will take whatever we set the origin of ram to. Otherwise, we'd have to change it more than once if we ever stray away from 0x8000_0000 as our entry point. */ PROVIDE(_memory_start = ORIGIN(ram)); /* Our kernel stack starts at the end of the bss segment (_bss_end). However, we're allocating 0x80000 bytes (524 KiB) to our kernel stack. This should be PLENTY of space. The reason we add the memory is because the stack grows from higher memory to lower memory (bottom to top). Therefore we set the stack at the very bottom of its allocated slot. When we go to allocate from the stack, we'll subtract the number of bytes we need. */ PROVIDE(_stack_start = _bss_end); PROVIDE(_stack_end = _stack_start + 0x80000); PROVIDE(_memory_end = ORIGIN(ram) + LENGTH(ram)); /* Finally, our heap starts right after the kernel stack. This heap will be used mainly to dole out memory for user-space applications. However, in some circumstances, it will be used for kernel memory as well. We don't align here because we let the kernel determine how it wants to do this. */ PROVIDE(_heap_start = _stack_end); PROVIDE(_heap_size = _memory_end - _heap_start); } ================================================ FILE: risc_v/src/lock.rs ================================================ // lock.rs // Locking routines // Stephen Marz // 26 Apr 2020 use crate::syscall::syscall_sleep; pub const DEFAULT_LOCK_SLEEP: usize = 10000; #[repr(u32)] pub enum MutexState { Unlocked = 0, Locked = 1 } #[repr(C)] pub struct Mutex { state: MutexState } impl<'a> Mutex { pub const fn new() -> Self { Self { state: MutexState::Unlocked } } pub fn val(&'a self) -> &'a MutexState { &self.state } /// Try to lock the Mutex. If the mutex is already locked, this function returns false, otherwise it will return true if the mutex was acquired. pub fn try_lock(&mut self) -> bool { unsafe { let state: MutexState; llvm_asm!("amoswap.w.aq $0, $1, ($2)\n" : "=r"(state) : "r"(1), "r"(self) :: "volatile"); match state { // amoswap returns the OLD state of the lock. If it was already locked, we didn't acquire it. MutexState::Locked => false, MutexState::Unlocked => true } } } /// Do NOT sleep lock inside of an interrupt context! /// Never use a sleep lock for the process list. Sleeping requires /// the process list to function, so you'll deadlock if you do. pub fn sleep_lock(&mut self) { while !self.try_lock() { syscall_sleep(DEFAULT_LOCK_SLEEP); } } /// Can safely be used inside of an interrupt context. pub fn spin_lock(&mut self) { while !self.try_lock() {} } /// Unlock a mutex without regard for its previous state. pub fn unlock(&mut self) { unsafe { llvm_asm!("amoswap.w.rl zero, zero, ($0)" :: "r"(self) :: "volatile"); } } } ================================================ FILE: risc_v/src/main.rs ================================================ // Steve Operating System // Stephen Marz // 21 Sep 2019 #![no_main] #![no_std] #![feature(panic_info_message, asm, llvm_asm, global_asm, allocator_api, alloc_error_handler, alloc_prelude, const_raw_ptr_to_usize_cast, lang_items)] #[lang = "eh_personality"] extern fn eh_personality() {} // #[macro_use] extern crate alloc; // This is experimental and requires alloc_prelude as a feature // use alloc::prelude::v1::*; // /////////////////////////////////// // / RUST MACROS // /////////////////////////////////// #[macro_export] macro_rules! print { ($($args:tt)+) => ({ use core::fmt::Write; let _ = write!(crate::uart::Uart::new(0x1000_0000), $($args)+); }); } #[macro_export] macro_rules! println { () => ({ print!("\r\n") }); ($fmt:expr) => ({ print!(concat!($fmt, "\r\n")) }); ($fmt:expr, $($args:tt)+) => ({ print!(concat!($fmt, "\r\n"), $($args)+) }); } // /////////////////////////////////// // / LANGUAGE STRUCTURES / FUNCTIONS // /////////////////////////////////// #[panic_handler] fn panic(info: &core::panic::PanicInfo) -> ! { print!("Aborting: "); if let Some(p) = info.location() { println!( "line {}, file {}: {}", p.line(), p.file(), info.message().unwrap() ); } else { println!("no information available."); } abort(); } #[no_mangle] extern "C" fn abort() -> ! { loop { unsafe { llvm_asm!("wfi"::::"volatile"); } } } extern "C" { fn switch_to_user(frame: usize) -> !; } /// Switch to user is an assembly function that loads /// a frame. Since it will jump to another program counter, /// it will never return back here. We don't care if we leak /// the stack, since we will recapture the stack during m_trap. fn rust_switch_to_user(frame: usize) -> ! { unsafe { switch_to_user(frame); } } // /////////////////////////////////// // / ENTRY POINT // /////////////////////////////////// #[no_mangle] extern "C" fn kinit() { uart::Uart::new(0x1000_0000).init(); page::init(); kmem::init(); process::init(); // We lower the threshold wall so our interrupts can jump over it. // Any priority > 0 will be able to be "heard" plic::set_threshold(0); // VIRTIO = [1..8] // UART0 = 10 // PCIE = [32..35] // Enable PLIC interrupts. for i in 1..=10 { plic::enable(i); plic::set_priority(i, 1); } // Set up virtio. This requires a working heap and page-grained allocator. virtio::probe(); console::init(); process::add_kernel_process(test::test); // Get the GPU going gpu::init(6); // We schedule the next context switch using a multiplier of 1 // Block testing code removed. trap::schedule_next_context_switch(1); rust_switch_to_user(sched::schedule()); // switch_to_user will not return, so we should never get here } #[no_mangle] extern "C" fn kinit_hart(_hartid: usize) { // We aren't going to do anything here until we get SMP going. // All non-0 harts initialize here. } // /////////////////////////////////// // / RUST MODULES // /////////////////////////////////// pub mod assembly; pub mod block; pub mod buffer; pub mod console; pub mod cpu; pub mod elf; pub mod fs; pub mod gpu; pub mod input; pub mod kmem; pub mod lock; pub mod page; pub mod plic; pub mod process; pub mod rng; pub mod sched; pub mod syscall; pub mod trap; pub mod uart; pub mod vfs; pub mod virtio; pub mod test; ================================================ FILE: risc_v/src/page.rs ================================================ // page.rs // Memory routines // Stephen Marz // 6 October 2019 use core::{mem::size_of, ptr::null_mut}; // //////////////////////////////// // // Allocation routines // //////////////////////////////// extern "C" { static HEAP_START: usize; static HEAP_SIZE: usize; } // We will use ALLOC_START to mark the start of the actual // memory we can dish out. static mut ALLOC_START: usize = 0; const PAGE_ORDER: usize = 12; pub const PAGE_SIZE: usize = 1 << 12; /// Align (set to a multiple of some power of two) /// This takes an order which is the exponent to 2^order /// Therefore, all alignments must be made as a power of two. /// This function always rounds up. pub const fn align_val(val: usize, order: usize) -> usize { let o = (1usize << order) - 1; (val + o) & !o } #[repr(u8)] pub enum PageBits { Empty = 0, Taken = 1 << 0, Last = 1 << 1, } impl PageBits { // We convert PageBits to a u8 a lot, so this is // for convenience. pub fn val(self) -> u8 { self as u8 } } // Each page is described by the Page structure. Linux does this // as well, where each 4096-byte chunk of memory has a structure // associated with it. However, there structure is much larger. pub struct Page { flags: u8, } impl Page { // If this page has been marked as the final allocation, // this function returns true. Otherwise, it returns false. pub fn is_last(&self) -> bool { self.flags & PageBits::Last.val() != 0 } // If the page is marked as being taken (allocated), then // this function returns true. Otherwise, it returns false. pub fn is_taken(&self) -> bool { self.flags & PageBits::Taken.val() != 0 } // This is the opposite of is_taken(). pub fn is_free(&self) -> bool { !self.is_taken() } // Clear the Page structure and all associated allocations. pub fn clear(&mut self) { self.flags = PageBits::Empty.val(); } // Set a certain flag. We ran into trouble here since PageBits // is an enumeration and we haven't implemented the BitOr Trait // on it. pub fn set_flag(&mut self, flag: PageBits) { self.flags |= flag.val(); } pub fn clear_flag(&mut self, flag: PageBits) { self.flags &= !(flag.val()); } } /// Initialize the allocation system. There are several ways that we can /// implement the page allocator: /// 1. Free list (singly linked list where it starts at the first free /// allocation) 2. Bookkeeping list (structure contains a taken and length) /// 3. Allocate one Page structure per 4096 bytes (this is what I chose) /// 4. Others pub fn init() { unsafe { // let desc_per_page = PAGE_SIZE / size_of::(); let num_pages = HEAP_SIZE / PAGE_SIZE; // let num_desc_pages = num_pages / desc_per_page; let ptr = HEAP_START as *mut Page; // Clear all pages to make sure that they aren't accidentally // taken for i in 0..num_pages { (*ptr.add(i)).clear(); } // Determine where the actual useful memory starts. This will be // after all Page structures. We also must align the ALLOC_START // to a page-boundary (PAGE_SIZE = 4096). ALLOC_START = // (HEAP_START + num_pages * size_of::() + PAGE_SIZE - 1) // & !(PAGE_SIZE - 1); ALLOC_START = align_val( HEAP_START + num_pages * size_of::(), PAGE_ORDER, ); } } /// Allocate a page or multiple pages /// pages: the number of PAGE_SIZE pages to allocate pub fn alloc(pages: usize) -> *mut u8 { // We have to find a contiguous allocation of pages assert!(pages > 0); unsafe { // We create a Page structure for each page on the heap. We // actually might have more since HEAP_SIZE moves and so does // the size of our structure, but we'll only waste a few bytes. let num_pages = HEAP_SIZE / PAGE_SIZE; let ptr = HEAP_START as *mut Page; for i in 0..num_pages - pages { let mut found = false; // Check to see if this Page is free. If so, we have our // first candidate memory address. if (*ptr.add(i)).is_free() { // It was FREE! Yay! found = true; for j in i..i + pages { // Now check to see if we have a // contiguous allocation for all of the // request pages. If not, we should // check somewhere else. if (*ptr.add(j)).is_taken() { found = false; break; } } } // We've checked to see if there are enough contiguous // pages to form what we need. If we couldn't, found // will be false, otherwise it will be true, which means // we've found valid memory we can allocate. if found { for k in i..i + pages - 1 { (*ptr.add(k)).set_flag(PageBits::Taken); } // The marker for the last page is // PageBits::Last This lets us know when we've // hit the end of this particular allocation. (*ptr.add(i+pages-1)).set_flag(PageBits::Taken); (*ptr.add(i+pages-1)).set_flag(PageBits::Last); // The Page structures themselves aren't the // useful memory. Instead, there is 1 Page // structure per 4096 bytes starting at // ALLOC_START. return (ALLOC_START + PAGE_SIZE * i) as *mut u8; } } } // If we get here, that means that no contiguous allocation was // found. null_mut() } /// Allocate and zero a page or multiple pages /// pages: the number of pages to allocate /// Each page is PAGE_SIZE which is calculated as 1 << PAGE_ORDER /// On RISC-V, this typically will be 4,096 bytes. pub fn zalloc(pages: usize) -> *mut u8 { // Allocate and zero a page. // First, let's get the allocation let ret = alloc(pages); if !ret.is_null() { let size = (PAGE_SIZE * pages) / 8; let big_ptr = ret as *mut u64; for i in 0..size { // We use big_ptr so that we can force an // sd (store doubleword) instruction rather than // the sb. This means 8x fewer stores than before. // Typically we have to be concerned about remaining // bytes, but fortunately 4096 % 8 = 0, so we // won't have any remaining bytes. unsafe { (*big_ptr.add(i)) = 0; } } } ret } /// Deallocate a page by its pointer /// The way we've structured this, it will automatically coalesce /// contiguous pages. pub fn dealloc(ptr: *mut u8) { // Make sure we don't try to free a null pointer. assert!(!ptr.is_null()); unsafe { let addr = HEAP_START + (ptr as usize - ALLOC_START) / PAGE_SIZE; // Make sure that the address makes sense. The address we // calculate here is the page structure, not the HEAP address! assert!(addr >= HEAP_START && addr < ALLOC_START); let mut p = addr as *mut Page; // println!("PTR in is {:p}, addr is 0x{:x}", ptr, addr); assert!((*p).is_taken(), "Freeing a non-taken page?"); // Keep clearing pages until we hit the last page. while (*p).is_taken() && !(*p).is_last() { (*p).clear(); p = p.add(1); } // If the following assertion fails, it is most likely // caused by a double-free. assert!( (*p).is_last() == true, "Possible double-free detected! (Not taken found \ before last)" ); // If we get here, we've taken care of all previous pages and // we are on the last page. (*p).clear(); } } /// Print all page allocations /// This is mainly used for debugging. pub fn print_page_allocations() { unsafe { let num_pages = (HEAP_SIZE - (ALLOC_START - HEAP_START)) / PAGE_SIZE; let mut beg = HEAP_START as *const Page; let end = beg.add(num_pages); let alloc_beg = ALLOC_START; let alloc_end = ALLOC_START + num_pages * PAGE_SIZE; println!(); println!( "PAGE ALLOCATION TABLE\nMETA: {:p} -> {:p}\nPHYS: \ 0x{:x} -> 0x{:x}", beg, end, alloc_beg, alloc_end ); println!("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"); let mut num = 0; while beg < end { if (*beg).is_taken() { let start = beg as usize; let memaddr = ALLOC_START + (start - HEAP_START) * PAGE_SIZE; print!("0x{:x} => ", memaddr); loop { num += 1; if (*beg).is_last() { let end = beg as usize; let memaddr = ALLOC_START + (end - HEAP_START) * PAGE_SIZE + PAGE_SIZE - 1; print!( "0x{:x}: {:>3} page(s)", memaddr, (end - start + 1) ); println!("."); break; } beg = beg.add(1); } } beg = beg.add(1); } println!("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"); println!( "Allocated: {:>6} pages ({:>10} bytes).", num, num * PAGE_SIZE ); println!( "Free : {:>6} pages ({:>10} bytes).", num_pages - num, (num_pages - num) * PAGE_SIZE ); println!(); } } // //////////////////////////////// // // MMU Routines // //////////////////////////////// // Represent (repr) our entry bits as // unsigned 64-bit integers. #[repr(usize)] #[derive(Copy, Clone)] pub enum EntryBits { None = 0, Valid = 1 << 0, Read = 1 << 1, Write = 1 << 2, Execute = 1 << 3, User = 1 << 4, Global = 1 << 5, Access = 1 << 6, Dirty = 1 << 7, // Convenience combinations ReadWrite = 1 << 1 | 1 << 2, ReadExecute = 1 << 1 | 1 << 3, ReadWriteExecute = 1 << 1 | 1 << 2 | 1 << 3, // User Convenience Combinations UserReadWrite = 1 << 1 | 1 << 2 | 1 << 4, UserReadExecute = 1 << 1 | 1 << 3 | 1 << 4, UserReadWriteExecute = 1 << 1 | 1 << 2 | 1 << 3 | 1 << 4, } // Helper functions to convert the enumeration // into an i64, which is what our page table // entries will be. impl EntryBits { pub fn val(self) -> usize { self as usize } } // A single entry. pub struct Entry { pub entry: usize, } // The Entry structure describes one of the 512 entries per table, which is // described in the RISC-V privileged spec Figure 4.18. impl Entry { pub fn is_valid(&self) -> bool { self.get_entry() & EntryBits::Valid.val() != 0 } // The first bit (bit index #0) is the V bit for // valid. pub fn is_invalid(&self) -> bool { !self.is_valid() } // A leaf has one or more RWX bits set pub fn is_leaf(&self) -> bool { self.get_entry() & 0xe != 0 } pub fn is_branch(&self) -> bool { !self.is_leaf() } pub fn set_entry(&mut self, entry: usize) { self.entry = entry; } pub fn get_entry(&self) -> usize { self.entry } } // Table represents a single table, which contains 512 (2^9), 64-bit entries. pub struct Table { pub entries: [Entry; 512], } impl Table { pub fn len() -> usize { 512 } } /// Map a virtual address to a physical address using 4096-byte page /// size. /// root: a mutable reference to the root Table /// vaddr: The virtual address to map /// paddr: The physical address to map /// bits: An OR'd bitset containing the bits the leaf should have. /// The bits should contain only the following: /// Read, Write, Execute, User, and/or Global /// The bits MUST include one or more of the following: /// Read, Write, Execute /// The valid bit automatically gets added. pub fn map(root: &mut Table, vaddr: usize, paddr: usize, bits: usize, level: usize) { // Make sure that Read, Write, or Execute have been provided // otherwise, we'll leak memory and always create a page fault. assert!(bits & 0xe != 0); // Extract out each VPN from the virtual address // On the virtual address, each VPN is exactly 9 bits, // which is why we use the mask 0x1ff = 0b1_1111_1111 (9 bits) let vpn = [ // VPN[0] = vaddr[20:12] (vaddr >> 12) & 0x1ff, // VPN[1] = vaddr[29:21] (vaddr >> 21) & 0x1ff, // VPN[2] = vaddr[38:30] (vaddr >> 30) & 0x1ff, ]; // Just like the virtual address, extract the physical address // numbers (PPN). However, PPN[2] is different in that it stores // 26 bits instead of 9. Therefore, we use, // 0x3ff_ffff = 0b11_1111_1111_1111_1111_1111_1111 (26 bits). let ppn = [ // PPN[0] = paddr[20:12] (paddr >> 12) & 0x1ff, // PPN[1] = paddr[29:21] (paddr >> 21) & 0x1ff, // PPN[2] = paddr[55:30] (paddr >> 30) & 0x3ff_ffff, ]; // We will use this as a floating reference so that we can set // individual entries as we walk the table. let mut v = &mut root.entries[vpn[2]]; // Now, we're going to traverse the page table and set the bits // properly. We expect the root to be valid, however we're required to // create anything beyond the root. // In Rust, we create a range iterator using the .. operator. // The .rev() will reverse the iteration since we need to start with // VPN[2] The .. operator is inclusive on start but exclusive on end. // So, (0..2) will iterate 0 and 1. for i in (level..2).rev() { if !v.is_valid() { // Allocate a page let page = zalloc(1); // The page is already aligned by 4,096, so store it // directly The page is stored in the entry shifted // right by 2 places. v.set_entry( (page as usize >> 2) | EntryBits::Valid.val(), ); } let entry = ((v.get_entry() & !0x3ff) << 2) as *mut Entry; v = unsafe { entry.add(vpn[i]).as_mut().unwrap() }; } // When we get here, we should be at VPN[0] and v should be pointing to // our entry. // The entry structure is Figure 4.18 in the RISC-V Privileged // Specification let entry = (ppn[2] << 28) | // PPN[2] = [53:28] (ppn[1] << 19) | // PPN[1] = [27:19] (ppn[0] << 10) | // PPN[0] = [18:10] bits | // Specified bits, such as User, Read, Write, etc EntryBits::Valid.val() | // Valid bit EntryBits::Dirty.val() | // Some machines require this to =1 EntryBits::Access.val() // Just like dirty, some machines require this ; // Set the entry. V should be set to the correct pointer by the loop // above. v.set_entry(entry); } /// Unmaps and frees all memory associated with a table. /// root: The root table to start freeing. /// NOTE: This does NOT free root directly. This must be /// freed manually. /// The reason we don't free the root is because it is /// usually embedded into the Process structure. pub fn unmap(root: &mut Table) { // Start with level 2 for lv2 in 0..Table::len() { let ref entry_lv2 = root.entries[lv2]; if entry_lv2.is_valid() && entry_lv2.is_branch() { // This is a valid entry, so drill down and free. let memaddr_lv1 = (entry_lv2.get_entry() & !0x3ff) << 2; let table_lv1 = unsafe { // Make table_lv1 a mutable reference instead of // a pointer. (memaddr_lv1 as *mut Table).as_mut().unwrap() }; for lv1 in 0..Table::len() { let ref entry_lv1 = table_lv1.entries[lv1]; if entry_lv1.is_valid() && entry_lv1.is_branch() { let memaddr_lv0 = (entry_lv1.get_entry() & !0x3ff) << 2; // The next level is level 0, which // cannot have branches, therefore, // we free here. dealloc(memaddr_lv0 as *mut u8); } } dealloc(memaddr_lv1 as *mut u8); } } } /// Walk the page table to convert a virtual address to a /// physical address. /// If a page fault would occur, this returns None /// Otherwise, it returns Some with the physical address. pub fn virt_to_phys(root: &Table, vaddr: usize) -> Option { // Walk the page table pointed to by root let vpn = [ // VPN[0] = vaddr[20:12] (vaddr >> 12) & 0x1ff, // VPN[1] = vaddr[29:21] (vaddr >> 21) & 0x1ff, // VPN[2] = vaddr[38:30] (vaddr >> 30) & 0x1ff, ]; let mut v = &root.entries[vpn[2]]; for i in (0..=2).rev() { if v.is_invalid() { // This is an invalid entry, page fault. break; } else if v.is_leaf() { // According to RISC-V, a leaf can be at any level. // The offset mask masks off the PPN. Each PPN is 9 // bits and they start at bit #12. So, our formula // 12 + i * 9 let off_mask = (1 << (12 + i * 9)) - 1; let vaddr_pgoff = vaddr & off_mask; let addr = ((v.get_entry() << 2) as usize) & !off_mask; return Some(addr | vaddr_pgoff); } // Set v to the next entry which is pointed to by this // entry. However, the address was shifted right by 2 places // when stored in the page table entry, so we shift it left // to get it back into place. let entry = ((v.get_entry() & !0x3ff) << 2) as *const Entry; // We do i - 1 here, however we should get None or Some() above // before we do 0 - 1 = -1. v = unsafe { entry.add(vpn[i - 1]).as_ref().unwrap() }; } // If we get here, we've exhausted all valid tables and haven't // found a leaf. None } ================================================ FILE: risc_v/src/plic.rs ================================================ // plic.rs // Platform Level Interrupt Controller (PLIC) // Stephen Marz // 1 Nov 2019 use crate::uart; use crate::virtio; const PLIC_PRIORITY: usize = 0x0c00_0000; const PLIC_PENDING: usize = 0x0c00_1000; const PLIC_INT_ENABLE: usize = 0x0c00_2000; const PLIC_THRESHOLD: usize = 0x0c20_0000; const PLIC_CLAIM: usize = 0x0c20_0004; // Each register is 4-bytes (u32) // The PLIC is an external interrupt controller. The one // used by QEMU virt is the same as the SiFive PLIC. // https://sifive.cdn.prismic.io/sifive%2F834354f0-08e6-423c-bf1f-0cb58ef14061_fu540-c000-v1.0.pdf // Chapter 10 explains the priority, pending, interrupt enable, threshold and claims // The virt machine has the following external interrupts (from Qemu source): // Interrupt 0 is a "null" interrupt and is hardwired to 0. // VIRTIO = [1..8] // UART0 = 10 // PCIE = [32..35] /// Get the next available interrupt. This is the "claim" process. /// The plic will automatically sort by priority and hand us the /// ID of the interrupt. For example, if the UART is interrupting /// and it's next, we will get the value 10. pub fn next() -> Option { let claim_reg = PLIC_CLAIM as *const u32; let claim_no; // The claim register is filled with the highest-priority, enabled interrupt. unsafe { claim_no = claim_reg.read_volatile(); } if claim_no == 0 { // The interrupt 0 is hardwired to 0, which tells us that there is no // interrupt to claim, hence we return None. None } else { // If we get here, we've gotten a non-0 interrupt. Some(claim_no) } } /// Complete a pending interrupt by id. The id should come /// from the next() function above. pub fn complete(id: u32) { let complete_reg = PLIC_CLAIM as *mut u32; unsafe { // We actually write a u32 into the entire complete_register. // This is the same register as the claim register, but it can // differentiate based on whether we're reading or writing. complete_reg.write_volatile(id); } } /// Set the global threshold. The threshold can be a value [0..7]. /// The PLIC will mask any interrupts at or below the given threshold. /// This means that a threshold of 7 will mask ALL interrupts and /// a threshold of 0 will allow ALL interrupts. pub fn set_threshold(tsh: u8) { // We do tsh because we're using a u8, but our maximum number // is a 3-bit 0b111. So, we and with 7 (0b111) to just get the // last three bits. let actual_tsh = tsh & 7; let tsh_reg = PLIC_THRESHOLD as *mut u32; unsafe { tsh_reg.write_volatile(actual_tsh as u32); } } /// See if a given interrupt id is pending. pub fn is_pending(id: u32) -> bool { let pend = PLIC_PENDING as *const u32; let actual_id = 1 << id; let pend_ids; unsafe { pend_ids = pend.read_volatile(); } actual_id & pend_ids != 0 } /// Enable a given interrupt id pub fn enable(id: u32) { let enables = PLIC_INT_ENABLE as *mut u32; let actual_id = 1 << id; unsafe { // Unlike the complete and claim registers, the plic_int_enable // register is a bitset where the id is the bit index. The register // is a 32-bit register, so that gives us enables for interrupts // 31 through 1 (0 is hardwired to 0). enables.write_volatile(enables.read_volatile() | actual_id); } } /// Set a given interrupt priority to the given priority. /// The priority must be [0..7] pub fn set_priority(id: u32, prio: u8) { let actual_prio = prio as u32 & 7; let prio_reg = PLIC_PRIORITY as *mut u32; unsafe { // The offset for the interrupt id is: // PLIC_PRIORITY + 4 * id // Since we're using pointer arithmetic on a u32 type, // it will automatically multiply the id by 4. prio_reg.add(id as usize).write_volatile(actual_prio); } } pub fn handle_interrupt() { if let Some(interrupt) = next() { // If we get here, we've got an interrupt from the claim register. The PLIC will // automatically prioritize the next interrupt, so when we get it from claim, it // will be the next in priority order. match interrupt { 1..=8 => { virtio::handle_interrupt(interrupt); } 10 => { // Interrupt 10 is the UART interrupt. uart::handle_interrupt(); } _ => { println!("Unknown external interrupt: {}", interrupt); } } // We've claimed it, so now say that we've handled it. This resets the interrupt pending // and allows the UART to interrupt again. Otherwise, the UART will get "stuck". complete(interrupt); } } ================================================ FILE: risc_v/src/process.rs ================================================ // process.rs // Kernel and user processes // Stephen Marz // 27 Nov 2019 use crate::{cpu::{get_mtime, CpuMode, TrapFrame, Registers}, fs::Inode, page::{dealloc, unmap, zalloc, Table}, syscall::{syscall_exit, syscall_yield}}; use alloc::{string::String, collections::{vec_deque::VecDeque, BTreeMap}}; use core::ptr::null_mut; use crate::lock::Mutex; // How many pages are we going to give a process for their // stack? pub const STACK_PAGES: usize = 35; // We want to adjust the stack to be at the bottom of the memory allocation // regardless of where it is on the kernel heap. pub const STACK_ADDR: usize = 0x1_0000_0000; // All processes will have a defined starting point in virtual memory. // We will use this later when we load processes from disk. pub const PROCESS_STARTING_ADDR: usize = 0x2000_0000; // Here, we store a process list. It uses the global allocator // that we made before and its job is to store all processes. // We will have this list OWN the process. So, anytime we want // the process, we will consult the process list. // Using an Option here is one method of creating a "lazy static". // Rust requires that all statics be initialized, but all // initializations must be at compile-time. We cannot allocate // a VecDeque at compile time, so we are somewhat forced to // do this. pub static mut PROCESS_LIST: Option> = None; pub static mut PROCESS_LIST_MUTEX: Mutex = Mutex::new(); // We can search through the process list to get a new PID, but // it's probably easier and faster just to increase the pid: pub static mut NEXT_PID: u16 = 1; // The following set_* and get_by_pid functions are C-style functions // They probably need to be re-written in a more Rusty style, but for // now they are how we control processes by PID. /// Set a process' state to running. This doesn't do any checks. /// If this PID is not found, this returns false. Otherwise, it /// returns true. pub fn set_running(pid: u16) -> bool { // Yes, this is O(n). A better idea here would be a static list // of process pointers. let mut retval = false; unsafe { if let Some(mut pl) = PROCESS_LIST.take() { for proc in pl.iter_mut() { if proc.pid == pid { proc.state = ProcessState::Running; retval = true; break; } } // Now, we no longer need the owned Deque, so we hand it // back by replacing the PROCESS_LIST's None with the // Some(pl). PROCESS_LIST.replace(pl); } } retval } /// Set a process' state to waiting. This doesn't do any checks. /// If this PID is not found, this returns false. Otherwise, it /// returns true. pub fn set_waiting(pid: u16) -> bool { // Yes, this is O(n). A better idea here would be a static list // of process pointers. let mut retval = false; unsafe { if let Some(mut pl) = PROCESS_LIST.take() { for proc in pl.iter_mut() { if proc.pid == pid { proc.state = ProcessState::Waiting; retval = true; break; } } // Now, we no longer need the owned Deque, so we hand it // back by replacing the PROCESS_LIST's None with the // Some(pl). PROCESS_LIST.replace(pl); } } retval } /// Sleep a process pub fn set_sleeping(pid: u16, duration: usize) -> bool { // Yes, this is O(n). A better idea here would be a static list // of process pointers. let mut retval = false; unsafe { if let Some(mut pl) = PROCESS_LIST.take() { for proc in pl.iter_mut() { if proc.pid == pid { proc.state = ProcessState::Sleeping; proc.sleep_until = get_mtime() + duration; retval = true; break; } } // Now, we no longer need the owned Deque, so we hand it // back by replacing the PROCESS_LIST's None with the // Some(pl). PROCESS_LIST.replace(pl); } } retval } /// Delete a process given by pid. If this process doesn't exist, /// this function does nothing. pub fn delete_process(pid: u16) { unsafe { if let Some(mut pl) = PROCESS_LIST.take() { for i in 0..pl.len() { let p = pl.get_mut(i).unwrap(); if (*(*p).frame).pid as u16 == pid { // When the structure gets dropped, all // of the allocations get deallocated. pl.remove(i); break; } } // Now, we no longer need the owned Deque, so we hand it // back by replacing the PROCESS_LIST's None with the // Some(pl). PROCESS_LIST.replace(pl); } } } /// Get a process by PID. Since we leak the process list, this is /// unsafe since the process can be deleted and we'll still have a pointer. pub unsafe fn get_by_pid(pid: u16) -> *mut Process { let mut ret = null_mut(); if let Some(mut pl) = PROCESS_LIST.take() { for i in pl.iter_mut() { if (*(i.frame)).pid as u16 == pid { ret = i as *mut Process; break; } } PROCESS_LIST.replace(pl); } ret } /// We will eventually move this function out of here, but its /// job is just to take a slot in the process list. fn init_process() { // We can't do much here until we have system calls because // we're running in User space. println!("Init process started..."); loop { // Alright, I forgot. We cannot put init to sleep since the // scheduler will loop until it finds a process to run. Since // the scheduler is called in an interrupt context, nothing else // can happen until a process becomes available. syscall_yield(); } } /// Add a kernel process. pub fn add_kernel_process(func: fn()) -> u16 { // This is the Rust-ism that really trips up C++ programmers. // PROCESS_LIST is wrapped in an Option<> enumeration, which // means that the Option owns the Deque. We can only borrow from // it or move ownership to us. In this case, we choose the // latter, where we move ownership to us, add a process, and // then move ownership back to the PROCESS_LIST. // This allows mutual exclusion as anyone else trying to grab // the process list will get None rather than the Deque. // .take() will replace PROCESS_LIST with None and give // us the only copy of the Deque. let func_addr = func as usize; let func_vaddr = func_addr; //- 0x6000_0000; // println!("func_addr = {:x} -> {:x}", func_addr, func_vaddr); // We will convert NEXT_PID below into an atomic increment when // we start getting into multi-hart processing. For now, we want // a process. Get it to work, then improve it! let my_pid = unsafe { NEXT_PID }; let mut ret_proc = Process { frame: zalloc(1) as *mut TrapFrame, stack: zalloc(STACK_PAGES), pid: my_pid, mmu_table: zalloc(1) as *mut Table, state: ProcessState::Running, data: ProcessData::new(), sleep_until: 0, program: null_mut(), brk: 0, }; unsafe { NEXT_PID += 1; } // Now we move the stack pointer to the bottom of the // allocation. The spec shows that register x2 (2) is the stack // pointer. // We could use ret_proc.stack.add, but that's an unsafe // function which would require an unsafe block. So, convert it // to usize first and then add PAGE_SIZE is better. // We also need to set the stack adjustment so that it is at the // bottom of the memory and far away from heap allocations. unsafe { (*ret_proc.frame).pc = func_vaddr; // 1 is the return address register. This makes it so we // don't have to do syscall_exit() when a kernel process // finishes. (*ret_proc.frame).regs[Registers::Ra as usize] = ra_delete_proc as usize; (*ret_proc.frame).regs[Registers::Sp as usize] = ret_proc.stack as usize + STACK_PAGES * 4096; (*ret_proc.frame).mode = CpuMode::Machine as usize; (*ret_proc.frame).pid = ret_proc.pid as usize; } if let Some(mut pl) = unsafe { PROCESS_LIST.take() } { pl.push_back(ret_proc); // Now, we no longer need the owned Deque, so we hand it // back by replacing the PROCESS_LIST's None with the // Some(pl). unsafe { PROCESS_LIST.replace(pl); } my_pid } else { unsafe { PROCESS_LIST_MUTEX.unlock(); } // TODO: When we get to multi-hart processing, we need to keep // trying to grab the process list. We can do this with an // atomic instruction. but right now, we're a single-processor // computer. 0 } } /// A kernel process is just a function inside of the kernel. Each /// function will perform a "ret" or return through the return address /// (ra) register. This function address is what it will return to, which /// in turn calls exit. If we don't exit, the process will most likely /// fault. fn ra_delete_proc() { syscall_exit(); } /// This is the same as the add_kernel_process function, except you can pass /// arguments. Typically, this will be a memory address on the heap where /// arguments can be found. pub fn add_kernel_process_args(func: fn(args_ptr: usize), args: usize) -> u16 { // This is the Rust-ism that really trips up C++ programmers. // PROCESS_LIST is wrapped in an Option<> enumeration, which // means that the Option owns the Deque. We can only borrow from // it or move ownership to us. In this case, we choose the // latter, where we move ownership to us, add a process, and // then move ownership back to the PROCESS_LIST. // This allows mutual exclusion as anyone else trying to grab // the process list will get None rather than the Deque. unsafe {PROCESS_LIST_MUTEX.spin_lock(); } if let Some(mut pl) = unsafe { PROCESS_LIST.take() } { // .take() will replace PROCESS_LIST with None and give // us the only copy of the Deque. let func_addr = func as usize; let func_vaddr = func_addr; //- 0x6000_0000; // println!("func_addr = {:x} -> {:x}", func_addr, func_vaddr); // We will convert NEXT_PID below into an atomic increment when // we start getting into multi-hart processing. For now, we want // a process. Get it to work, then improve it! let my_pid = unsafe { NEXT_PID }; let mut ret_proc = Process { frame: zalloc(1) as *mut TrapFrame, stack: zalloc(STACK_PAGES), pid: my_pid, mmu_table: zalloc(1) as *mut Table, state: ProcessState::Running, data: ProcessData::new(), sleep_until: 0, program: null_mut(), brk: 0, }; unsafe { NEXT_PID += 1; } // Now we move the stack pointer to the bottom of the // allocation. The spec shows that register x2 (2) is the stack // pointer. // We could use ret_proc.stack.add, but that's an unsafe // function which would require an unsafe block. So, convert it // to usize first and then add PAGE_SIZE is better. // We also need to set the stack adjustment so that it is at the // bottom of the memory and far away from heap allocations. unsafe { (*ret_proc.frame).pc = func_vaddr; (*ret_proc.frame).regs[Registers::A0 as usize] = args; // 1 is the return address register. This makes it so we // don't have to do syscall_exit() when a kernel process // finishes. (*ret_proc.frame).regs[Registers::Ra as usize] = ra_delete_proc as usize; (*ret_proc.frame).regs[Registers::Sp as usize] = ret_proc.stack as usize + STACK_PAGES * 4096; (*ret_proc.frame).mode = CpuMode::Machine as usize; (*ret_proc.frame).pid = ret_proc.pid as usize; } pl.push_back(ret_proc); // Now, we no longer need the owned Deque, so we hand it // back by replacing the PROCESS_LIST's None with the // Some(pl). unsafe { PROCESS_LIST.replace(pl); PROCESS_LIST_MUTEX.unlock(); } my_pid } else { unsafe { PROCESS_LIST_MUTEX.unlock(); } // TODO: When we get to multi-hart processing, we need to keep // trying to grab the process list. We can do this with an // atomic instruction. but right now, we're a single-processor // computer. 0 } } /// This should only be called once, and its job is to create /// the init process. Right now, this process is in the kernel, /// but later, it should call the shell. pub fn init() -> usize { unsafe { PROCESS_LIST_MUTEX.spin_lock(); PROCESS_LIST = Some(VecDeque::with_capacity(15)); // add_process_default(init_process); add_kernel_process(init_process); // Ugh....Rust is giving me fits over here! // I just want a memory address to the trap frame, but // due to the borrow rules of Rust, I'm fighting here. So, // instead, let's move the value out of PROCESS_LIST, get // the address, and then move it right back in. let pl = PROCESS_LIST.take().unwrap(); let p = pl.front().unwrap().frame; // let frame = p as *const TrapFrame as usize; // println!("Init's frame is at 0x{:08x}", frame); // Put the process list back in the global. PROCESS_LIST.replace(pl); PROCESS_LIST_MUTEX.unlock(); // Return the first instruction's address to execute. // Since we use the MMU, all start here. (*p).pc } } // Our process must be able to sleep, wait, or run. // Running - means that when the scheduler finds this process, it can run it. // Sleeping - means that the process is waiting on a certain amount of time. // Waiting - means that the process is waiting on I/O // Dead - We should never get here, but we can flag a process as Dead and clean // it out of the list later. pub enum ProcessState { Running, Sleeping, Waiting, Dead, } pub struct Process { pub frame: *mut TrapFrame, pub stack: *mut u8, pub pid: u16, pub mmu_table: *mut Table, pub state: ProcessState, pub data: ProcessData, pub sleep_until: usize, pub program: *mut u8, pub brk: usize, } impl Drop for Process { /// Since we're storing ownership of a Process in the linked list, /// we can cause it to deallocate automatically when it is removed. fn drop(&mut self) { // We allocate the stack as a page. dealloc(self.stack); // This is unsafe, but it's at the drop stage, so we won't // be using this again. unsafe { // Remember that unmap unmaps all levels of page tables // except for the root. It also deallocates the memory // associated with the tables. unmap(&mut *self.mmu_table); } dealloc(self.mmu_table as *mut u8); dealloc(self.frame as *mut u8); for i in self.data.pages.drain(..) { dealloc(i as *mut u8); } // Kernel processes don't have a program, instead the program is linked // directly in the kernel. if !self.program.is_null() { dealloc(self.program); } } } pub enum Descriptor { File(Inode), Device(usize), Framebuffer, ButtonEvents, AbsoluteEvents, Console, Network, Unknown, } // The private data in a process contains information // that is relevant to where we are, including the path // and open file descriptors. // We will allow dead code for now until we have a need for the // private process data. This is essentially our resource control block (RCB). #[allow(dead_code)] pub struct ProcessData { pub environ: BTreeMap, pub fdesc: BTreeMap, pub cwd: String, pub pages: VecDeque, } // This is private data that we can query with system calls. // If we want to implement CFQ (completely fair queuing), which // is a per-process block queuing algorithm, we can put that here. impl ProcessData { pub fn new() -> Self { ProcessData { environ: BTreeMap::new(), fdesc: BTreeMap::new(), cwd: String::from("/"), pages: VecDeque::new(), } } } ================================================ FILE: risc_v/src/rng.rs ================================================ // rng.rs // Random number generator using VirtIO // Stephen Marz // 16 March 2020 #![allow(dead_code)] use crate::{kmem::{kfree, kmalloc}, page::{zalloc, PAGE_SIZE}, virtio, virtio::{Descriptor, MmioOffsets, Queue, StatusField, VIRTIO_RING_SIZE}}; use core::{mem::size_of, ptr::null_mut}; pub struct EntropyDevice { queue: *mut Queue, dev: *mut u32, idx: u16, ack_used_idx: u16, } impl EntropyDevice { pub const fn new() -> Self { EntropyDevice { queue: null_mut(), dev: null_mut(), idx: 0, ack_used_idx: 0, } } } static mut ENTROPY_DEVICES: [Option; 8] = [ None, None, None, None, None, None, None, None, ]; pub fn setup_entropy_device(ptr: *mut u32) -> bool { unsafe { // We can get the index of the device based on its address. // 0x1000_1000 is index 0 // 0x1000_2000 is index 1 // ... // 0x1000_8000 is index 7 // To get the number that changes over, we shift right 12 places (3 hex digits) let idx = (ptr as usize - virtio::MMIO_VIRTIO_START) >> 12; // [Driver] Device Initialization // 1. Reset the device (write 0 into status) ptr.add(MmioOffsets::Status.scale32()).write_volatile(0); let mut status_bits = StatusField::Acknowledge.val32(); // 2. Set ACKNOWLEDGE status bit ptr.add(MmioOffsets::Status.scale32()).write_volatile(status_bits); // 3. Set the DRIVER status bit status_bits |= StatusField::DriverOk.val32(); ptr.add(MmioOffsets::Status.scale32()).write_volatile(status_bits); // 4. Read device feature bits, write subset of feature // bits understood by OS and driver to the device. let host_features = ptr.add(MmioOffsets::HostFeatures.scale32()).read_volatile(); ptr.add(MmioOffsets::GuestFeatures.scale32()).write_volatile(host_features); // 5. Set the FEATURES_OK status bit status_bits |= StatusField::FeaturesOk.val32(); ptr.add(MmioOffsets::Status.scale32()).write_volatile(status_bits); // 6. Re-read status to ensure FEATURES_OK is still set. // Otherwise, it doesn't support our features. let status_ok = ptr.add(MmioOffsets::Status.scale32()).read_volatile(); // If the status field no longer has features_ok set, // that means that the device couldn't accept // the features that we request. Therefore, this is // considered a "failed" state. if false == StatusField::features_ok(status_ok) { print!("features fail..."); ptr.add(MmioOffsets::Status.scale32()).write_volatile(StatusField::Failed.val32()); return false; } // 7. Perform device-specific setup. // Set the queue num. We have to make sure that the // queue size is valid because the device can only take // a certain size. let qnmax = ptr.add(MmioOffsets::QueueNumMax.scale32()).read_volatile(); ptr.add(MmioOffsets::QueueNum.scale32()).write_volatile(VIRTIO_RING_SIZE as u32); if VIRTIO_RING_SIZE as u32 > qnmax { print!("queue size fail..."); return false; } // First, if the block device array is empty, create it! // We add 4095 to round this up and then do an integer // divide to truncate the decimal. We don't add 4096, // because if it is exactly 4096 bytes, we would get two // pages, not one. let num_pages = (size_of::() + PAGE_SIZE - 1) / PAGE_SIZE; // println!("np = {}", num_pages); // We allocate a page for each device. This will the the // descriptor where we can communicate with the block // device. We will still use an MMIO register (in // particular, QueueNotify) to actually tell the device // we put something in memory. We also have to be // careful with memory ordering. We don't want to // issue a notify before all memory writes have // finished. We will look at that later, but we need // what is called a memory "fence" or barrier. ptr.add(MmioOffsets::QueueSel.scale32()).write_volatile(0); // Alignment is very important here. This is the memory address // alignment between the available and used rings. If this is wrong, // then we and the device will refer to different memory addresses // and hence get the wrong data in the used ring. // ptr.add(MmioOffsets::QueueAlign.scale32()).write_volatile(2); let queue_ptr = zalloc(num_pages) as *mut Queue; let queue_pfn = queue_ptr as u32; ptr.add(MmioOffsets::GuestPageSize.scale32()).write_volatile(PAGE_SIZE as u32); // QueuePFN is a physical page number, however it // appears for QEMU we have to write the entire memory // address. This is a physical memory address where we // (the OS) and the block device have in common for // making and receiving requests. ptr.add(MmioOffsets::QueuePfn.scale32()).write_volatile(queue_pfn / PAGE_SIZE as u32); // 8. Set the DRIVER_OK status bit. Device is now "live" status_bits |= StatusField::DriverOk.val32(); ptr.add(MmioOffsets::Status.scale32()).write_volatile(status_bits); let rngdev = EntropyDevice { queue: queue_ptr, dev: ptr, idx: 0, ack_used_idx: 0, }; ENTROPY_DEVICES[idx] = Some(rngdev); true } } pub fn get_random() -> u64 { unsafe { for i in ENTROPY_DEVICES.iter() { if let Some(_edev) = i { let ptr = kmalloc(8); let _desc = Descriptor { addr: ptr as u64, len: 8, flags: virtio::VIRTIO_DESC_F_WRITE, next: 0, }; let _val = *ptr as u64; kfree(ptr); break; } } } 0u64.wrapping_sub(1) } ================================================ FILE: risc_v/src/sched.rs ================================================ // sched.rs // Simple process scheduler // Stephen Marz // 27 Dec 2019 use crate::process::{ProcessState, PROCESS_LIST, PROCESS_LIST_MUTEX}; use crate::cpu::get_mtime; pub fn schedule() -> usize { let mut frame_addr: usize = 0x1111; unsafe { // If we can't get the lock, then usually this means a kernel // process has the lock. So, we return 0. This has a special // meaning to whomever calls the scheduler to say "nobody else got scheduled" if PROCESS_LIST_MUTEX.try_lock() == false { return 0; } if let Some(mut pl) = PROCESS_LIST.take() { // Rust allows us to label loops so that break statements can be // targeted. 'procfindloop: loop { pl.rotate_left(1); if let Some(prc) = pl.front_mut() { match prc.state { ProcessState::Running => { frame_addr = prc.frame as usize; break 'procfindloop; }, ProcessState::Sleeping => { // Awaken sleeping processes whose sleep until is in // the past. if prc.sleep_until <= get_mtime() { prc.state = ProcessState::Running; frame_addr = prc.frame as usize; break 'procfindloop; } }, _ => {}, } } } PROCESS_LIST.replace(pl); } else { println!("could not take process list"); } PROCESS_LIST_MUTEX.unlock(); } frame_addr } ================================================ FILE: risc_v/src/syscall.rs ================================================ // syscall.rs // System calls // Stephen Marz // 3 Jan 2020 use crate::{block::block_op, buffer::Buffer, cpu::{dump_registers, Registers, TrapFrame, gp}, elf, fs, gpu, input::{Event, ABS_EVENTS, KEY_EVENTS}, page::{map, virt_to_phys, EntryBits, Table, PAGE_SIZE, zalloc}, process::{add_kernel_process_args, delete_process, get_by_pid, set_sleeping, set_waiting, PROCESS_LIST, PROCESS_LIST_MUTEX, Descriptor}}; use crate::console::{IN_LOCK, IN_BUFFER, push_queue}; use alloc::{boxed::Box, string::String}; /// do_syscall is called from trap.rs to invoke a system call. No discernment is /// made here whether this is a U-mode, S-mode, or M-mode system call. /// Since we can't do anything unless we dereference the passed pointer, /// I went ahead and made the entire function unsafe. /// If we return 0 from this function, the m_trap function will schedule /// the next process--consider this a yield. A non-0 is the program counter /// we want to go back to. pub unsafe fn do_syscall(mepc: usize, frame: *mut TrapFrame) { // Libgloss expects the system call number in A7, so let's follow // their lead. // A7 is X17, so it's register number 17. let syscall_number = (*frame).regs[gp(Registers::A7)]; // skip the ecall (*frame).pc = mepc + 4; match syscall_number { 93 | 94 => { // exit and exit_group delete_process((*frame).pid as u16); } 1 => { //yield // We don't do anything, but we don't want to print "unknown system call" } 2 => { // Easy putchar print!("{}", (*frame).regs[Registers::A0 as usize] as u8 as char); } 8 => { dump_registers(frame); } 10 => { // Sleep set_sleeping((*frame).pid as u16, (*frame).regs[Registers::A0 as usize]); } 11 => { // execv // A0 = path // A1 = argv let mut path_addr = (*frame).regs[Registers::A0 as usize]; // If the MMU is turned on, translate. if (*frame).satp >> 60 != 0 { let p = get_by_pid((*frame).pid as u16); let table = ((*p).mmu_table).as_ref().unwrap(); path_addr = virt_to_phys(table, path_addr).unwrap(); } // Our path address here is now a physical address. If it came in virtual, // it is now physical. let path_bytes = path_addr as *const u8; let mut path = String::new(); let mut iterator: usize = 0; // I really have to figure out how to change an array of bytes // to a string. For now, this is very C-style and mimics strcpy. loop { let ch = *path_bytes.add(iterator); if ch == 0 { break; } iterator += 1; path.push(ch as char); } // See if we can find the path. if let Ok(inode) = fs::MinixFileSystem::open(8, &path) { let inode_heap = Box::new(inode); // The Box above moves the Inode to a new memory location on the heap. // This needs to be on the heap since we are about to hand over control // to a kernel process. // THERE is an issue here. If we fail somewhere inside the kernel process, // we shouldn't delete our process here. However, since this is asynchronous // our process will still get deleted and the error won't be reported. // We have to make sure we relinquish Box control here by using into_raw. // Otherwise, the Box will free the memory associated with this inode. add_kernel_process_args(exec_func, Box::into_raw(inode_heap) as usize); // This deletes us, which is what we want. delete_process((*frame).pid as u16); } else { // If we get here, the path couldn't be found, or for some reason // open failed. So, we return -1 and move on. println!("Could not open path '{}'.", path); (*frame).regs[Registers::A0 as usize] = -1isize as usize; } } 17 => { //getcwd let mut buf = (*frame).regs[gp(Registers::A0)] as *mut u8; let size = (*frame).regs[gp(Registers::A1)]; let process = get_by_pid((*frame).pid as u16).as_ref().unwrap(); let mut iter = 0usize; if (*frame).satp >> 60 != 0 { let table = ((*process).mmu_table).as_mut().unwrap(); let paddr = virt_to_phys(table, buf as usize); if let Some(bufaddr) = paddr { buf = bufaddr as *mut u8; } else { (*frame).regs[gp(Registers::A0)] = -1isize as usize; return; } } for i in process.data.cwd.as_bytes() { if iter == 0 || iter >= size { break; } buf.add(iter).write(*i); iter += 1; } } 48 => { // #define SYS_faccessat 48 (*frame).regs[gp(Registers::A0)] = -1isize as usize; } 57 => { // #define SYS_close 57 let fd = (*frame).regs[gp(Registers::A0)] as u16; let process = get_by_pid((*frame).pid as u16).as_mut().unwrap(); if process.data.fdesc.contains_key(&fd) { process.data.fdesc.remove(&fd); (*frame).regs[gp(Registers::A0)] = 0; } else { (*frame).regs[gp(Registers::A0)] = -1isize as usize; } // Flush? } 63 => { // sys_read let fd = (*frame).regs[gp(Registers::A0)] as u16; let mut buf = (*frame).regs[gp(Registers::A1)] as *mut u8; let size = (*frame).regs[gp(Registers::A2)]; let process = get_by_pid((*frame).pid as u16).as_mut().unwrap(); let mut ret = 0usize; // If we return 0, the trap handler will schedule // another process. if fd == 0 { // stdin IN_LOCK.spin_lock(); if let Some(mut inb) = IN_BUFFER.take() { let num_elements = if inb.len() >= size { size } else { inb.len() }; let mut buf_ptr = buf as *mut u8; if num_elements == 0 { push_queue((*frame).pid as u16); set_waiting((*frame).pid as u16); } else { for i in inb.drain(0..num_elements) { if (*frame).satp >> 60 != 0 { let table = ((*process).mmu_table).as_mut().unwrap(); let buf_addr = virt_to_phys(table, buf as usize); if buf_addr.is_none() { break; } buf_ptr = buf_addr.unwrap() as *mut u8; buf_ptr.write(i); ret += 1; println!("R: {}", ret); } buf = buf.add(1); buf_ptr = buf_ptr.add(1); } } IN_BUFFER.replace(inb); } IN_LOCK.unlock(); } (*frame).regs[gp(Registers::A0)] = ret; } 64 => { // sys_write let fd = (*frame).regs[gp(Registers::A0)] as u16; let buf = (*frame).regs[gp(Registers::A1)] as *const u8; let size = (*frame).regs[gp(Registers::A2)]; let process = get_by_pid((*frame).pid as u16).as_ref().unwrap(); if fd == 1 || fd == 2 { // stdout / stderr // println!("WRITE {}, 0x{:08x}, {}", fd, bu/f as usize, size); let mut iter = 0; for i in 0..size { iter += 1; if (*frame).satp >> 60 != 0 { let table = ((*process).mmu_table).as_mut().unwrap(); // We don't need to do the following until we reach a page boundary, // however that code isn't written, yet. let paddr = virt_to_phys(table, buf.add(i) as usize); if let Some(bufaddr) = paddr { print!("{}", *(bufaddr as *const u8) as char); } else { break; } } } (*frame).regs[gp(Registers::A0)] = iter as usize; } else { let descriptor = process.data.fdesc.get(&fd); if descriptor.is_none() { (*frame).regs[gp(Registers::A0)] = 0; return; } else { let descriptor = descriptor.unwrap(); match descriptor { Descriptor::Framebuffer => { } Descriptor::File(inode) => { } _ => { // unsupported (*frame).regs[gp(Registers::A0)] = 0; } } } } } 66 => { (*frame).regs[gp(Registers::A0)] = -1isize as usize; } // #define SYS_fstat 80 80 => { // int fstat(int filedes, struct stat *buf) (*frame).regs[gp(Registers::A0)] = 0; } 172 => { // A0 = pid (*frame).regs[Registers::A0 as usize] = (*frame).pid; } 180 => { set_waiting((*frame).pid as u16); let _ = block_op( (*frame).regs[Registers::A0 as usize], (*frame).regs[Registers::A1 as usize] as *mut u8, (*frame).regs[Registers::A2 as usize] as u32, (*frame).regs[Registers::A3 as usize] as u64, false, (*frame).pid as u16 ); } 214 => { // brk // #define SYS_brk 214 // void *brk(void *addr); let addr = (*frame).regs[gp(Registers::A0)]; let process = get_by_pid((*frame).pid as u16).as_mut().unwrap(); // println!("Break move from 0x{:08x} to 0x{:08x}", process.brk, addr); if addr > process.brk { if (*frame).satp >> 60 != 0 { let table = ((*process).mmu_table).as_mut().unwrap(); let diff = (addr + PAGE_SIZE - process.brk) / PAGE_SIZE; for i in 0..diff { let new_addr = zalloc(1) as usize; process.data.pages.push_back(new_addr); map(table, process.brk + (i << 12), new_addr, EntryBits::UserReadWrite.val(), 0); } } process.brk = addr; } (*frame).regs[gp(Registers::A0)] = process.brk; } // System calls 1000 and above are "special" system calls for our OS. I'll // try to mimic the normal system calls below 1000 so that this OS is compatible // with libraries. 1000 => { // get framebuffer // syscall_get_framebuffer(device) let dev = (*frame).regs[Registers::A0 as usize]; (*frame).regs[Registers::A0 as usize] = 0; if dev > 0 && dev <= 8 { if let Some(p) = gpu::GPU_DEVICES[dev - 1].take() { let ptr = p.get_framebuffer() as usize; if (*frame).satp >> 60 != 0 { let process = get_by_pid((*frame).pid as u16); let table = ((*process).mmu_table).as_mut().unwrap(); let num_pages = (p.get_width() * p.get_height() * 4) as usize / PAGE_SIZE; for i in 0..num_pages { let vaddr = 0x3000_0000 + (i << 12); let paddr = ptr + (i << 12); map(table, vaddr, paddr, EntryBits::UserReadWrite as usize, 0); } gpu::GPU_DEVICES[dev - 1].replace(p); } (*frame).regs[Registers::A0 as usize] = 0x3000_0000; } } } 1001 => { // transfer rectangle and invalidate let dev = (*frame).regs[Registers::A0 as usize]; let x = (*frame).regs[Registers::A1 as usize] as u32; let y = (*frame).regs[Registers::A2 as usize] as u32; let width = (*frame).regs[Registers::A3 as usize] as u32; let height = (*frame).regs[Registers::A4 as usize] as u32; gpu::transfer(dev, x, y, width, height); } 1002 => { // wait for keyboard events let mut ev = KEY_EVENTS.take().unwrap(); let max_events = (*frame).regs[Registers::A1 as usize]; let vaddr = (*frame).regs[Registers::A0 as usize] as *const Event; if (*frame).satp >> 60 != 0 { let process = get_by_pid((*frame).pid as u16); let table = (*process).mmu_table.as_mut().unwrap(); (*frame).regs[Registers::A0 as usize] = 0; let num_events = if max_events <= ev.len() { max_events } else { ev.len() }; for i in 0..num_events { let paddr = virt_to_phys(table, vaddr.add(i) as usize); if paddr.is_none() { break; } let paddr = paddr.unwrap() as *mut Event; *paddr = ev.pop_front().unwrap(); (*frame).regs[Registers::A0 as usize] += 1; } } KEY_EVENTS.replace(ev); } 1004 => { // wait for abs events let mut ev = ABS_EVENTS.take().unwrap(); let max_events = (*frame).regs[Registers::A1 as usize]; let vaddr = (*frame).regs[Registers::A0 as usize] as *const Event; if (*frame).satp >> 60 != 0 { let process = get_by_pid((*frame).pid as u16); let table = ((*process).mmu_table as *mut Table).as_mut().unwrap(); (*frame).regs[Registers::A0 as usize] = 0; for i in 0..if max_events <= ev.len() { max_events } else { ev.len() } { let paddr = virt_to_phys(table, vaddr.add(i) as usize); if paddr.is_none() { break; } let paddr = paddr.unwrap() as *mut Event; *paddr = ev.pop_front().unwrap(); (*frame).regs[Registers::A0 as usize] += 1; } } ABS_EVENTS.replace(ev); } 1024 => { // #define SYS_open 1024 let mut path = (*frame).regs[gp(Registers::A0)]; let _perm = (*frame).regs[gp(Registers::A1)]; let process = get_by_pid((*frame).pid as u16).as_mut().unwrap(); if (*frame).satp >> 60 != 0 { let table = process.mmu_table.as_mut().unwrap(); let paddr = virt_to_phys(table, path); if paddr.is_none() { (*frame).regs[gp(Registers::A0)] = -1isize as usize; return; } path = paddr.unwrap(); } let path_ptr = path as *const u8; let mut str_path = String::new(); for i in 0..256 { let c = path_ptr.add(i).read(); if c == 0 { break; } str_path.push(c as char); } // Allocate a blank file descriptor let mut max_fd = 2; for k in process.data.fdesc.keys() { if *k > max_fd { max_fd = *k; } } max_fd += 1; match str_path.as_str() { "/dev/fb" => { // framebuffer process.data.fdesc.insert(max_fd, Descriptor::Framebuffer); } "/dev/butev" => { process.data.fdesc.insert(max_fd, Descriptor::ButtonEvents); } "/dev/absev" => { process.data.fdesc.insert(max_fd, Descriptor::AbsoluteEvents); } _ => { let res = fs::MinixFileSystem::open(8, &str_path); if res.is_err() { (*frame).regs[gp(Registers::A0)] = -1isize as usize; return; } else { let inode = res.ok().unwrap(); process.data.fdesc.insert(max_fd, Descriptor::File(inode)); } } } (*frame).regs[gp(Registers::A0)] = max_fd as usize; } 1062 => { // gettime (*frame).regs[Registers::A0 as usize] = crate::cpu::get_mtime(); } _ => { println!("Unknown syscall number {}", syscall_number); } } } extern "C" { fn make_syscall(sysno: usize, arg0: usize, arg1: usize, arg2: usize, arg3: usize, arg4: usize, arg5: usize) -> usize; } fn do_make_syscall(sysno: usize, arg0: usize, arg1: usize, arg2: usize, arg3: usize, arg4: usize, arg5: usize) -> usize { unsafe { make_syscall(sysno, arg0, arg1, arg2, arg3, arg4, arg5) } } pub fn syscall_yield() { let _ = do_make_syscall(1, 0, 0, 0, 0, 0, 0); } pub fn syscall_exit() { let _ = do_make_syscall(93, 0, 0, 0, 0, 0, 0); } pub fn syscall_execv(path: *const u8, argv: usize) -> usize { do_make_syscall(11, path as usize, argv, 0, 0, 0, 0) } pub fn syscall_fs_read(dev: usize, inode: u32, buffer: *mut u8, size: u32, offset: u32) -> usize { do_make_syscall(63, dev, inode as usize, buffer as usize, size as usize, offset as usize, 0) } pub fn syscall_block_read(dev: usize, buffer: *mut u8, size: u32, offset: u32) -> u8 { do_make_syscall(180, dev, buffer as usize, size as usize, offset as usize, 0, 0) as u8 } pub fn syscall_sleep(duration: usize) { let _ = do_make_syscall(10, duration, 0, 0, 0, 0, 0); } pub fn syscall_get_pid() -> u16 { do_make_syscall(172, 0, 0, 0, 0, 0, 0) as u16 } /// This is a helper function ran as a process in kernel space /// to finish loading and executing a process. fn exec_func(args: usize) { unsafe { // We got the inode from the syscall. Its Box rid itself of control, so // we take control back here. The Box now owns the Inode and will complete // freeing the heap memory allocated for it. let inode = Box::from_raw(args as *mut fs::Inode); let mut buffer = Buffer::new(inode.size as usize); // This is why we need to be in a process context. The read() call may sleep as it // waits for the block driver to return. fs::MinixFileSystem::read(8, &inode, buffer.get_mut(), inode.size, 0); // Now we have the data, so the following will load the ELF file and give us a process. let proc = elf::File::load_proc(&buffer); if proc.is_err() { println!("Failed to launch process."); } else { let process = proc.ok().unwrap(); // If we hold this lock, we can still be preempted, but the scheduler will // return control to us. This required us to use try_lock in the scheduler. PROCESS_LIST_MUTEX.sleep_lock(); if let Some(mut proc_list) = PROCESS_LIST.take() { proc_list.push_back(process); PROCESS_LIST.replace(proc_list); } PROCESS_LIST_MUTEX.unlock(); } } } // These system call numbers come from libgloss so that we can use newlib // for our system calls. // Libgloss wants the system call number in A7 and arguments in A0..A6 // #define SYS_dup 23 // #define SYS_fcntl 25 // #define SYS_faccessat 48 // #define SYS_chdir 49 // #define SYS_openat 56 // #define SYS_getdents 61 // #define SYS_lseek 62 // #define SYS_read 63 // #define SYS_pread 67 // #define SYS_pwrite 68 // #define SYS_fstatat 79 // #define SYS_kill 129 // #define SYS_rt_sigaction 134 // #define SYS_times 153 // #define SYS_uname 160 // #define SYS_gettimeofday 169 // #define SYS_getpid 172 // #define SYS_getuid 174 // #define SYS_geteuid 175 // #define SYS_getgid 176 // #define SYS_getegid 177 // #define SYS_munmap 215 // #define SYS_mremap 216 // #define SYS_mmap 222 // #define SYS_link 1025 // #define SYS_unlink 1026 // #define SYS_mkdir 1030 // #define SYS_access 1033 // #define SYS_stat 1038 // #define SYS_lstat 1039 // #define SYS_time 1062 // #define SYS_getmainvars 2011 ================================================ FILE: risc_v/src/test.rs ================================================ // test.rs use crate::fs::MinixFileSystem; use crate::syscall; /// Test block will load raw binaries into memory to execute them. This function /// will load ELF files and try to execute them. pub fn test() { // The majority of the testing code needs to move into a system call (execv maybe?) MinixFileSystem::init(8); let path = "/shell\0".as_bytes().as_ptr(); syscall::syscall_execv(path,0); println!("I should never get here, execv should destroy our process."); } ================================================ FILE: risc_v/src/trap.rs ================================================ // trap.rs // Trap routines // Stephen Marz // 10 October 2019 use crate::{cpu::{TrapFrame, CONTEXT_SWITCH_TIME}, plic, process::delete_process, rust_switch_to_user, sched::schedule, syscall::do_syscall}; #[no_mangle] /// The m_trap stands for "machine trap". Right now, we are handling /// all traps at machine mode. In this mode, we can figure out what's /// going on and send a trap where it needs to be. Remember, in machine /// mode and in this trap, interrupts are disabled and the MMU is off. extern "C" fn m_trap(epc: usize, tval: usize, cause: usize, hart: usize, _status: usize, frame: *mut TrapFrame) -> usize { // We're going to handle all traps in machine mode. RISC-V lets // us delegate to supervisor mode, but switching out SATP (virtual memory) // gets hairy. let is_async = { if cause >> 63 & 1 == 1 { true } else { false } }; // The cause contains the type of trap (sync, async) as well as the cause // number. So, here we narrow down just the cause number. let cause_num = cause & 0xfff; let mut return_pc = epc; if is_async { // Asynchronous trap match cause_num { 3 => { // We will use this to awaken our other CPUs so they can process // processes. println!("Machine software interrupt CPU #{}", hart); } 7 => { // This is the context-switch timer. // We would typically invoke the scheduler here to pick another // process to run. // Machine timer let new_frame = schedule(); schedule_next_context_switch(1); if new_frame != 0 { rust_switch_to_user(new_frame); } } 11 => { // Machine external (interrupt from Platform Interrupt Controller (PLIC)) // println!("Machine external interrupt CPU#{}", hart); // We will check the next interrupt. If the interrupt isn't available, this will // give us None. However, that would mean we got a spurious interrupt, unless we // get an interrupt from a non-PLIC source. This is the main reason that the PLIC // hardwires the id 0 to 0, so that we can use it as an error case. plic::handle_interrupt(); } _ => { panic!("Unhandled async trap CPU#{} -> {}\n", hart, cause_num); } } } else { // Synchronous trap match cause_num { 2 => unsafe { // Illegal instruction println!("Illegal instruction CPU#{} -> 0x{:08x}: 0x{:08x}\n", hart, epc, tval); // We need while trues here until we have a functioning "delete from scheduler" // I use while true because Rust will warn us that it looks stupid. // This is what I want so that I remember to remove this and replace // them later. delete_process((*frame).pid as u16); let frame = schedule(); schedule_next_context_switch(1); rust_switch_to_user(frame); } 3 => { // breakpoint println!("BKPT\n\n"); return_pc += 2; } 7 => unsafe { println!("Error with pid {}, at PC 0x{:08x}, mepc 0x{:08x}", (*frame).pid, (*frame).pc, epc); delete_process((*frame).pid as u16); let frame = schedule(); schedule_next_context_switch(1); rust_switch_to_user(frame); } 8 | 9 | 11 => unsafe { // Environment (system) call from User, Supervisor, and Machine modes // println!("E-call from User mode! CPU#{} -> 0x{:08x}", hart, epc); do_syscall(return_pc, frame); let frame = schedule(); schedule_next_context_switch(1); rust_switch_to_user(frame); } // Page faults 12 => unsafe { // Instruction page fault println!("Instruction page fault CPU#{} -> 0x{:08x}: 0x{:08x}", hart, epc, tval); delete_process((*frame).pid as u16); let frame = schedule(); schedule_next_context_switch(1); rust_switch_to_user(frame); } 13 => unsafe { // Load page fault println!("Load page fault CPU#{} -> 0x{:08x}: 0x{:08x}", hart, epc, tval); delete_process((*frame).pid as u16); let frame = schedule(); schedule_next_context_switch(1); rust_switch_to_user(frame); } 15 => unsafe { // Store page fault println!("Store page fault CPU#{} -> 0x{:08x}: 0x{:08x}", hart, epc, tval); delete_process((*frame).pid as u16); let frame = schedule(); schedule_next_context_switch(1); rust_switch_to_user(frame); } _ => { panic!( "Unhandled sync trap {}. CPU#{} -> 0x{:08x}: 0x{:08x}\n", cause_num, hart, epc, tval ); } } }; // Finally, return the updated program counter return_pc } pub const MMIO_MTIMECMP: *mut u64 = 0x0200_4000usize as *mut u64; pub const MMIO_MTIME: *const u64 = 0x0200_BFF8 as *const u64; pub fn schedule_next_context_switch(qm: u16) { unsafe { MMIO_MTIMECMP.write_volatile(MMIO_MTIME.read_volatile().wrapping_add(CONTEXT_SWITCH_TIME * qm as u64)); } } ================================================ FILE: risc_v/src/uart.rs ================================================ // uart.rs // UART routines and driver use core::{convert::TryInto, fmt::{Error, Write}}; use crate::console::push_stdin; pub struct Uart { base_address: usize, } impl Write for Uart { fn write_str(&mut self, out: &str) -> Result<(), Error> { for c in out.bytes() { self.put(c); } Ok(()) } } impl Uart { pub fn new(base_address: usize) -> Self { Uart { base_address } } pub fn init(&mut self) { let ptr = self.base_address as *mut u8; unsafe { // First, set the word length, which // are bits 0 and 1 of the line control register (LCR) // which is at base_address + 3 // We can easily write the value 3 here or 0b11, but I'm // extending it so that it is clear we're setting two // individual fields // Word 0 Word 1 // ~~~~~~ ~~~~~~ let lcr: u8 = (1 << 0) | (1 << 1); ptr.add(3).write_volatile(lcr); // Now, enable the FIFO, which is bit index 0 of the // FIFO control register (FCR at offset 2). // Again, we can just write 1 here, but when we use left // shift, it's easier to see that we're trying to write // bit index #0. ptr.add(2).write_volatile(1 << 0); // Enable receiver buffer interrupts, which is at bit // index 0 of the interrupt enable register (IER at // offset 1). ptr.add(1).write_volatile(1 << 0); // If we cared about the divisor, the code below would // set the divisor from a global clock rate of 22.729 // MHz (22,729,000 cycles per second) to a signaling // rate of 2400 (BAUD). We usually have much faster // signalling rates nowadays, but this demonstrates what // the divisor actually does. The formula given in the // NS16500A specification for calculating the divisor // is: // divisor = ceil( (clock_hz) / (baud_sps x 16) ) // So, we substitute our values and get: // divisor = ceil( 22_729_000 / (2400 x 16) ) // divisor = ceil( 22_729_000 / 38_400 ) // divisor = ceil( 591.901 ) = 592 // The divisor register is two bytes (16 bits), so we // need to split the value 592 into two bytes. // Typically, we would calculate this based on measuring // the clock rate, but again, for our purposes [qemu], // this doesn't really do anything. let divisor: u16 = 592; let divisor_least: u8 = (divisor & 0xff).try_into().unwrap(); let divisor_most: u8 = (divisor >> 8).try_into().unwrap(); // Notice that the divisor register DLL (divisor latch // least) and DLM (divisor latch most) have the same // base address as the receiver/transmitter and the // interrupt enable register. To change what the base // address points to, we open the "divisor latch" by // writing 1 into the Divisor Latch Access Bit (DLAB), // which is bit index 7 of the Line Control Register // (LCR) which is at base_address + 3. ptr.add(3).write_volatile(lcr | 1 << 7); // Now, base addresses 0 and 1 point to DLL and DLM, // respectively. Put the lower 8 bits of the divisor // into DLL ptr.add(0).write_volatile(divisor_least); ptr.add(1).write_volatile(divisor_most); // Now that we've written the divisor, we never have to // touch this again. In hardware, this will divide the // global clock (22.729 MHz) into one suitable for 2,400 // signals per second. So, to once again get access to // the RBR/THR/IER registers, we need to close the DLAB // bit by clearing it to 0. ptr.add(3).write_volatile(lcr); } } pub fn put(&mut self, c: u8) { let ptr = self.base_address as *mut u8; unsafe { ptr.add(0).write_volatile(c); } } pub fn get(&mut self) -> Option { let ptr = self.base_address as *mut u8; unsafe { if ptr.add(5).read_volatile() & 1 == 0 { // The DR bit is 0, meaning no data None } else { // The DR bit is 1, meaning data! Some(ptr.add(0).read_volatile()) } } } } pub fn handle_interrupt() { // We would typically set this to be handled out of the interrupt context, // but we're testing here! C'mon! // We haven't yet used the singleton pattern for my_uart, but remember, this // just simply wraps 0x1000_0000 (UART). let mut my_uart = Uart::new(0x1000_0000); // If we get here, the UART better have something! If not, what happened?? if let Some(c) = my_uart.get() { // If you recognize this code, it used to be in the lib.rs under kmain(). That // was because we needed to poll for UART data. Now that we have interrupts, // here it goes! push_stdin(c); match c { 8 => { // This is a backspace, so we // essentially have to write a space and // backup again: print!("{} {}", 8 as char, 8 as char); }, 10 | 13 => { // Newline or carriage-return println!(); }, _ => { print!("{}", c as char); }, } } } ================================================ FILE: risc_v/src/vfs.rs ================================================ // vfs.rs // Virtual File System // Stephen Marz // 4 June 2020 ================================================ FILE: risc_v/src/virtio.rs ================================================ // virtio.rs // VirtIO routines for the VirtIO protocol // Stephen Marz // 10 March 2020 use crate::{block, block::setup_block_device, page::PAGE_SIZE}; use crate::rng::setup_entropy_device; use crate::{gpu, gpu::setup_gpu_device}; use crate::{input, input::setup_input_device}; use core::mem::size_of; // Flags // Descriptor flags have VIRTIO_DESC_F as a prefix // Available flags have VIRTIO_AVAIL_F pub const VIRTIO_F_RING_INDIRECT_DESC: u32 = 28; pub const VIRTIO_F_RING_EVENT_IDX: u32 = 29; pub const VIRTIO_F_VERSION_1: u32 = 32; pub const VIRTIO_DESC_F_NEXT: u16 = 1; pub const VIRTIO_DESC_F_WRITE: u16 = 2; pub const VIRTIO_DESC_F_INDIRECT: u16 = 4; pub const VIRTIO_AVAIL_F_NO_INTERRUPT: u16 = 1; pub const VIRTIO_USED_F_NO_NOTIFY: u16 = 1; // According to the documentation, this must be a power // of 2 for the new style. So, I'm changing this to use // 1 << instead because that will enforce this standard. pub const VIRTIO_RING_SIZE: usize = 1 << 7; // VirtIO structures // The descriptor holds the data that we need to send to // the device. The address is a physical address and NOT // a virtual address. The len is in bytes and the flags are // specified above. Any descriptor can be chained, hence the // next field, but only if the F_NEXT flag is specified. #[repr(C)] pub struct Descriptor { pub addr: u64, pub len: u32, pub flags: u16, pub next: u16, } #[repr(C)] pub struct Available { pub flags: u16, pub idx: u16, pub ring: [u16; VIRTIO_RING_SIZE], pub event: u16, } #[repr(C)] pub struct UsedElem { pub id: u32, pub len: u32, } #[repr(C)] pub struct Used { pub flags: u16, pub idx: u16, pub ring: [UsedElem; VIRTIO_RING_SIZE], pub event: u16, } #[repr(C)] pub struct Queue { pub desc: [Descriptor; VIRTIO_RING_SIZE], pub avail: Available, // Calculating padding, we need the used ring to start on a page boundary. We take the page size, subtract the // amount the descriptor ring takes then subtract the available structure and ring. pub padding0: [u8; PAGE_SIZE - size_of::() * VIRTIO_RING_SIZE - size_of::()], pub used: Used, } // The MMIO transport is "legacy" in QEMU, so these registers represent // the legacy interface. #[repr(usize)] pub enum MmioOffsets { MagicValue = 0x000, Version = 0x004, DeviceId = 0x008, VendorId = 0x00c, HostFeatures = 0x010, HostFeaturesSel = 0x014, GuestFeatures = 0x020, GuestFeaturesSel = 0x024, GuestPageSize = 0x028, QueueSel = 0x030, QueueNumMax = 0x034, QueueNum = 0x038, QueueAlign = 0x03c, QueuePfn = 0x040, QueueNotify = 0x050, InterruptStatus = 0x060, InterruptAck = 0x064, Status = 0x070, Config = 0x100, } // This currently isn't used, but if anyone wants to try their hand at putting a structure // to the MMIO address space, you can use the following. Remember that this is volatile! #[repr(C)] pub struct MmioDevice { magic_value: u32, version: u32, device_id: u32, vendor_id: u32, host_features: u32, host_features_sel: u32, rsv1: [u8; 8], guest_features: u32, guest_features_sel: u32, guest_page_size: u32, rsv2: [u8; 4], queue_sel: u32, queue_num_max: u32, queue_num: u32, queue_align: u32, queue_pfn: u64, rsv3: [u8; 8], queue_notify: u32, rsv4: [u8; 12], interrupt_status: u32, interrupt_ack: u32, rsv5: [u8; 8], status: u32, //rsv6: [u8; 140], //uint32_t config[1]; // The config space starts at 0x100, but it is device dependent. } #[repr(usize)] pub enum DeviceTypes { None = 0, Network = 1, Block = 2, Console = 3, Entropy = 4, Gpu = 16, Input = 18, Memory = 24, } // Enumerations in Rust aren't easy to convert back // and forth. Furthermore, we're going to use a u32 // pointer, so we need to "undo" the scaling that // Rust will do with the .add() function. impl MmioOffsets { pub fn val(self) -> usize { self as usize } pub fn scaled(self, scale: usize) -> usize { self.val() / scale } pub fn scale32(self) -> usize { self.scaled(4) } } pub enum StatusField { Acknowledge = 1, Driver = 2, Failed = 128, FeaturesOk = 8, DriverOk = 4, DeviceNeedsReset = 64, } // The status field will be compared to the status register. So, // I've made some helper functions to checking that register easier. impl StatusField { pub fn val(self) -> usize { self as usize } pub fn val32(self) -> u32 { self as u32 } pub fn test(sf: u32, bit: StatusField) -> bool { sf & bit.val32() != 0 } pub fn is_failed(sf: u32) -> bool { StatusField::test(sf, StatusField::Failed) } pub fn needs_reset(sf: u32) -> bool { StatusField::test(sf, StatusField::DeviceNeedsReset) } pub fn driver_ok(sf: u32) -> bool { StatusField::test(sf, StatusField::DriverOk) } pub fn features_ok(sf: u32) -> bool { StatusField::test(sf, StatusField::FeaturesOk) } } // We probably shouldn't put these here, but it'll help // with probing the bus, etc. These are architecture specific // which is why I say that. pub const MMIO_VIRTIO_START: usize = 0x1000_1000; pub const MMIO_VIRTIO_END: usize = 0x1000_8000; pub const MMIO_VIRTIO_STRIDE: usize = 0x1000; pub const MMIO_VIRTIO_MAGIC: u32 = 0x74_72_69_76; // The VirtioDevice is essentially a structure we can put into an array // to determine what virtio devices are attached to the system. Right now, // we're using the 1..=8 linearity of the VirtIO devices on QEMU to help // with reducing the data structure itself. Otherwise, we might be forced // to use an MMIO pointer. pub struct VirtioDevice { pub devtype: DeviceTypes, } impl VirtioDevice { pub const fn new() -> Self { VirtioDevice { devtype: DeviceTypes::None, } } pub const fn new_with(devtype: DeviceTypes) -> Self { VirtioDevice { devtype } } } static mut VIRTIO_DEVICES: [Option; 8] = [None, None, None, None, None, None, None, None]; /// Probe the VirtIO bus for devices that might be /// out there. pub fn probe() { // Rust's for loop uses an Iterator object, which now has a step_by // modifier to change how much it steps. Also recall that ..= means up // to AND including MMIO_VIRTIO_END. for addr in (MMIO_VIRTIO_START..=MMIO_VIRTIO_END).step_by(MMIO_VIRTIO_STRIDE) { print!("Virtio probing 0x{:08x}...", addr); let magicvalue; let deviceid; let ptr = addr as *mut u32; unsafe { magicvalue = ptr.read_volatile(); deviceid = ptr.add(2).read_volatile(); } // 0x74_72_69_76 is "virt" in little endian, so in reality // it is triv. All VirtIO devices have this attached to the // MagicValue register (offset 0x000) if MMIO_VIRTIO_MAGIC != magicvalue { println!("not virtio."); } // If we are a virtio device, we now need to see if anything // is actually attached to it. The DeviceID register will // contain what type of device this is. If this value is 0, // then it is not connected. else if 0 == deviceid { println!("not connected."); } // If we get here, we have a connected virtio device. Now we have // to figure out what kind it is so we can do device-specific setup. else { match deviceid { // DeviceID 1 is a network device 1 => { print!("network device..."); if false == setup_network_device(ptr) { println!("setup failed."); } else { println!("setup succeeded!"); } }, // DeviceID 2 is a block device 2 => { print!("block device..."); if false == setup_block_device(ptr) { println!("setup failed."); } else { let idx = (addr - MMIO_VIRTIO_START) >> 12; unsafe { VIRTIO_DEVICES[idx] = Some(VirtioDevice::new_with(DeviceTypes::Block)); } println!("setup succeeded!"); } }, // DeviceID 4 is a random number generator device 4 => { print!("entropy device..."); if false == setup_entropy_device(ptr) { println!("setup failed."); } else { println!("setup succeeded!"); } }, // DeviceID 16 is a GPU device 16 => { print!("GPU device..."); if false == setup_gpu_device(ptr) { println!("setup failed."); } else { let idx = (addr - MMIO_VIRTIO_START) >> 12; unsafe { VIRTIO_DEVICES[idx] = Some(VirtioDevice::new_with(DeviceTypes::Gpu)); } println!("setup succeeded!"); } }, // DeviceID 18 is an input device 18 => { print!("input device..."); if false == setup_input_device(ptr) { println!("setup failed."); } else { let idx = (addr - MMIO_VIRTIO_START) >> 12; unsafe { VIRTIO_DEVICES[idx] = Some(VirtioDevice::new_with(DeviceTypes::Input)); } println!("setup succeeded!"); } }, _ => println!("unknown device type."), } } } } pub fn setup_network_device(_ptr: *mut u32) -> bool { false } // The External pin (PLIC) trap will lead us here if it is // determined that interrupts 1..=8 are what caused the interrupt. // In here, we try to figure out where to direct the interrupt // and then handle it. pub fn handle_interrupt(interrupt: u32) { let idx = interrupt as usize - 1; unsafe { if let Some(vd) = &VIRTIO_DEVICES[idx] { match vd.devtype { DeviceTypes::Block => { block::handle_interrupt(idx); }, DeviceTypes::Gpu => { gpu::handle_interrupt(idx); }, DeviceTypes::Input => { input::handle_interrupt(idx); }, _ => { println!("Invalid device generated interrupt!"); }, } } else { println!("Spurious interrupt {}", interrupt); } } } ================================================ FILE: risc_v/userspace/.gitignore ================================================ helloworld helloworld.elf sleepy sleepy.elf shell shell.elf fb fb.elf ================================================ FILE: risc_v/userspace/Makefile ================================================ CROSS=riscv64-unknown-elf- CXX=g++ CXXFLAGS=-Wall -O3 -static -I. SOURCES=$(wildcard *.cpp) OUT=$(patsubst %.cpp,%,$(SOURCES)) all: $(OUT) %: %.cpp Makefile $(CROSS)$(CXX) $(CXXFLAGS) -o $@ $< clean: rm -f $(OUT) ================================================ FILE: risc_v/userspace/fb.cpp ================================================ #include #include #include #include #include #include #define MAX_EVENTS 100 #define min(x, y) ((x < y) ? x : y) #define max(x, y) ((x > y) ? x : y) using u8 = unsigned char; using i8 = signed char; using u16 = unsigned short; using i16 = signed short; using u32 = unsigned int; using i32 = signed int; using u64 = unsigned long; using i64 = signed long; using f64 = double; using f32 = float; struct Pixel { u8 r; u8 g; u8 b; u8 a; }; struct Event { u16 event_type; u16 code; u32 value; }; void fill_rect(Pixel *fb, u32 x, u32 y, u32 width, u32 height, Pixel &color); void stroke_rect(Pixel *fb, u32 x, u32 y, u32 width, u32 height, Pixel &color, u32 size); void set_pixel(Pixel *fb, u32 x, u32 y, Pixel &color); void draw_cosine(Pixel *fb, u32 x, u32 y, u32 width, u32 height, Pixel &color); void draw_circle(Pixel *fb, u32 x, u32 y, f64 r, Pixel &color); const u64 noevt_slptm = 10000; const u64 evt_slptm = 10000; #define FB_DEV "/dev/fb" #define BUT_DEV "/dev/butev" #define ABS_DEV "/dev/absev" struct Rect { u32 x; u32 y; u32 width; u32 height; }; constexpr u32 lerp(u32 val, u32 mx1, u32 mx2) { f64 r = val / static_cast(mx1); return r * mx2; } int main() { Event *events = new Event[100]; bool pressed = false; int fb = open(FB_DEV, O_RDWR); int but = open(BUT_DEV, O_RDONLY); int abs = open(ABS_DEV, O_RDONLY); if (fb < 0) { printf("Unable to open framebuffer %s.\n", FB_DEV); return -1; } if (but < 0) { printf("Unable to open button events %s.\n", BUT_DEV); return -1; } if (abs < 0) { printf("Unable to open absolute events %s.\n", ABS_DEV); return -1; } close(fb); close(but); close(abs); delete [] events; return 0; } void set_pixel(Pixel *fb, u32 x, u32 y, Pixel &color) { if (x < 640 && y < 480) { fb[y * 640 + x] = color; } } void fill_rect(Pixel *fb, u32 x, u32 y, u32 width, u32 height, Pixel &color) { for (u32 row = y; row < (y+height);row++) { for (u32 col = x; col < (x+width);col++) { set_pixel(fb, col, row, color); } } } void stroke_rect(Pixel *fb, u32 x, u32 y, u32 width, u32 height, Pixel &color, u32 size) { // Essentially fill the four sides. // Top fill_rect(fb, x, y, width, size, color); // Bottom fill_rect(fb, x, y + height, width, size, color); // Left fill_rect(fb, x, y, size, height, color); // Right fill_rect(fb, x + width, y, size, height + size, color); } void draw_cosine(Pixel *fb, u32 x, u32 y, u32 width, u32 height, Pixel &color) { for (u32 i = 1; i <= width;i++) { f64 fy = -cos(i % 360); f64 yy = fy / 2.0 * height; u32 nx = x + i; u32 ny = yy + y; // printf("Cos %u = %lf, x: %u, y: %u\n", (i % 360), fy, nx, ny); fill_rect(fb, nx, ny, 2, 2, color); } } void draw_circle(Pixel *fb, u32 x, u32 y, f64 r, Pixel &color) { } ================================================ FILE: risc_v/userspace/helloworld.cpp ================================================ #include #include const int SIZE = 100000; double myarray[SIZE]; int another_array[5] = {1, 2, 3, 4, 5}; int main() { printf("I'm a C++ program, and I'm running in user space. How about a big, Hello World\n"); printf("My array is at 0x%p\n", myarray); printf("I'm going to start crunching some numbers, so gimme a minute.\n"); for (int i = 0;i < SIZE;i++) { myarray[i] = another_array[i % 5]; } for (int i = 0;i < SIZE;i++) { myarray[i % SIZE] += cos(i); } printf("Ok, I'm done crunching. Wanna see myarray[0]? It's %lf\n", myarray[0]); return 0; } ================================================ FILE: risc_v/userspace/input-event-codes.h ================================================ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * Input event codes * * *** IMPORTANT *** * This file is not only included from C-code but also from devicetree source * files. As such this file MUST only contain comments and defines. * * Copyright (c) 1999-2002 Vojtech Pavlik * Copyright (c) 2015 Hans de Goede * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published by * the Free Software Foundation. */ #ifndef _UAPI_INPUT_EVENT_CODES_H #define _UAPI_INPUT_EVENT_CODES_H /* * Device properties and quirks */ #define INPUT_PROP_POINTER 0x00 /* needs a pointer */ #define INPUT_PROP_DIRECT 0x01 /* direct input devices */ #define INPUT_PROP_BUTTONPAD 0x02 /* has button(s) under pad */ #define INPUT_PROP_SEMI_MT 0x03 /* touch rectangle only */ #define INPUT_PROP_TOPBUTTONPAD 0x04 /* softbuttons at top of pad */ #define INPUT_PROP_POINTING_STICK 0x05 /* is a pointing stick */ #define INPUT_PROP_ACCELEROMETER 0x06 /* has accelerometer */ #define INPUT_PROP_MAX 0x1f #define INPUT_PROP_CNT (INPUT_PROP_MAX + 1) /* * Event types */ #define EV_SYN 0x00 #define EV_KEY 0x01 #define EV_REL 0x02 #define EV_ABS 0x03 #define EV_MSC 0x04 #define EV_SW 0x05 #define EV_LED 0x11 #define EV_SND 0x12 #define EV_REP 0x14 #define EV_FF 0x15 #define EV_PWR 0x16 #define EV_FF_STATUS 0x17 #define EV_MAX 0x1f #define EV_CNT (EV_MAX+1) /* * Synchronization events. */ #define SYN_REPORT 0 #define SYN_CONFIG 1 #define SYN_MT_REPORT 2 #define SYN_DROPPED 3 #define SYN_MAX 0xf #define SYN_CNT (SYN_MAX+1) /* * Keys and buttons * * Most of the keys/buttons are modeled after USB HUT 1.12 * (see http://www.usb.org/developers/hidpage). * Abbreviations in the comments: * AC - Application Control * AL - Application Launch Button * SC - System Control */ #define KEY_RESERVED 0 #define KEY_ESC 1 #define KEY_1 2 #define KEY_2 3 #define KEY_3 4 #define KEY_4 5 #define KEY_5 6 #define KEY_6 7 #define KEY_7 8 #define KEY_8 9 #define KEY_9 10 #define KEY_0 11 #define KEY_MINUS 12 #define KEY_EQUAL 13 #define KEY_BACKSPACE 14 #define KEY_TAB 15 #define KEY_Q 16 #define KEY_W 17 #define KEY_E 18 #define KEY_R 19 #define KEY_T 20 #define KEY_Y 21 #define KEY_U 22 #define KEY_I 23 #define KEY_O 24 #define KEY_P 25 #define KEY_LEFTBRACE 26 #define KEY_RIGHTBRACE 27 #define KEY_ENTER 28 #define KEY_LEFTCTRL 29 #define KEY_A 30 #define KEY_S 31 #define KEY_D 32 #define KEY_F 33 #define KEY_G 34 #define KEY_H 35 #define KEY_J 36 #define KEY_K 37 #define KEY_L 38 #define KEY_SEMICOLON 39 #define KEY_APOSTROPHE 40 #define KEY_GRAVE 41 #define KEY_LEFTSHIFT 42 #define KEY_BACKSLASH 43 #define KEY_Z 44 #define KEY_X 45 #define KEY_C 46 #define KEY_V 47 #define KEY_B 48 #define KEY_N 49 #define KEY_M 50 #define KEY_COMMA 51 #define KEY_DOT 52 #define KEY_SLASH 53 #define KEY_RIGHTSHIFT 54 #define KEY_KPASTERISK 55 #define KEY_LEFTALT 56 #define KEY_SPACE 57 #define KEY_CAPSLOCK 58 #define KEY_F1 59 #define KEY_F2 60 #define KEY_F3 61 #define KEY_F4 62 #define KEY_F5 63 #define KEY_F6 64 #define KEY_F7 65 #define KEY_F8 66 #define KEY_F9 67 #define KEY_F10 68 #define KEY_NUMLOCK 69 #define KEY_SCROLLLOCK 70 #define KEY_KP7 71 #define KEY_KP8 72 #define KEY_KP9 73 #define KEY_KPMINUS 74 #define KEY_KP4 75 #define KEY_KP5 76 #define KEY_KP6 77 #define KEY_KPPLUS 78 #define KEY_KP1 79 #define KEY_KP2 80 #define KEY_KP3 81 #define KEY_KP0 82 #define KEY_KPDOT 83 #define KEY_ZENKAKUHANKAKU 85 #define KEY_102ND 86 #define KEY_F11 87 #define KEY_F12 88 #define KEY_RO 89 #define KEY_KATAKANA 90 #define KEY_HIRAGANA 91 #define KEY_HENKAN 92 #define KEY_KATAKANAHIRAGANA 93 #define KEY_MUHENKAN 94 #define KEY_KPJPCOMMA 95 #define KEY_KPENTER 96 #define KEY_RIGHTCTRL 97 #define KEY_KPSLASH 98 #define KEY_SYSRQ 99 #define KEY_RIGHTALT 100 #define KEY_LINEFEED 101 #define KEY_HOME 102 #define KEY_UP 103 #define KEY_PAGEUP 104 #define KEY_LEFT 105 #define KEY_RIGHT 106 #define KEY_END 107 #define KEY_DOWN 108 #define KEY_PAGEDOWN 109 #define KEY_INSERT 110 #define KEY_DELETE 111 #define KEY_MACRO 112 #define KEY_MUTE 113 #define KEY_VOLUMEDOWN 114 #define KEY_VOLUMEUP 115 #define KEY_POWER 116 /* SC System Power Down */ #define KEY_KPEQUAL 117 #define KEY_KPPLUSMINUS 118 #define KEY_PAUSE 119 #define KEY_SCALE 120 /* AL Compiz Scale (Expose) */ #define KEY_KPCOMMA 121 #define KEY_HANGEUL 122 #define KEY_HANGUEL KEY_HANGEUL #define KEY_HANJA 123 #define KEY_YEN 124 #define KEY_LEFTMETA 125 #define KEY_RIGHTMETA 126 #define KEY_COMPOSE 127 #define KEY_STOP 128 /* AC Stop */ #define KEY_AGAIN 129 #define KEY_PROPS 130 /* AC Properties */ #define KEY_UNDO 131 /* AC Undo */ #define KEY_FRONT 132 #define KEY_COPY 133 /* AC Copy */ #define KEY_OPEN 134 /* AC Open */ #define KEY_PASTE 135 /* AC Paste */ #define KEY_FIND 136 /* AC Search */ #define KEY_CUT 137 /* AC Cut */ #define KEY_HELP 138 /* AL Integrated Help Center */ #define KEY_MENU 139 /* Menu (show menu) */ #define KEY_CALC 140 /* AL Calculator */ #define KEY_SETUP 141 #define KEY_SLEEP 142 /* SC System Sleep */ #define KEY_WAKEUP 143 /* System Wake Up */ #define KEY_FILE 144 /* AL Local Machine Browser */ #define KEY_SENDFILE 145 #define KEY_DELETEFILE 146 #define KEY_XFER 147 #define KEY_PROG1 148 #define KEY_PROG2 149 #define KEY_WWW 150 /* AL Internet Browser */ #define KEY_MSDOS 151 #define KEY_COFFEE 152 /* AL Terminal Lock/Screensaver */ #define KEY_SCREENLOCK KEY_COFFEE #define KEY_ROTATE_DISPLAY 153 /* Display orientation for e.g. tablets */ #define KEY_DIRECTION KEY_ROTATE_DISPLAY #define KEY_CYCLEWINDOWS 154 #define KEY_MAIL 155 #define KEY_BOOKMARKS 156 /* AC Bookmarks */ #define KEY_COMPUTER 157 #define KEY_BACK 158 /* AC Back */ #define KEY_FORWARD 159 /* AC Forward */ #define KEY_CLOSECD 160 #define KEY_EJECTCD 161 #define KEY_EJECTCLOSECD 162 #define KEY_NEXTSONG 163 #define KEY_PLAYPAUSE 164 #define KEY_PREVIOUSSONG 165 #define KEY_STOPCD 166 #define KEY_RECORD 167 #define KEY_REWIND 168 #define KEY_PHONE 169 /* Media Select Telephone */ #define KEY_ISO 170 #define KEY_CONFIG 171 /* AL Consumer Control Configuration */ #define KEY_HOMEPAGE 172 /* AC Home */ #define KEY_REFRESH 173 /* AC Refresh */ #define KEY_EXIT 174 /* AC Exit */ #define KEY_MOVE 175 #define KEY_EDIT 176 #define KEY_SCROLLUP 177 #define KEY_SCROLLDOWN 178 #define KEY_KPLEFTPAREN 179 #define KEY_KPRIGHTPAREN 180 #define KEY_NEW 181 /* AC New */ #define KEY_REDO 182 /* AC Redo/Repeat */ #define KEY_F13 183 #define KEY_F14 184 #define KEY_F15 185 #define KEY_F16 186 #define KEY_F17 187 #define KEY_F18 188 #define KEY_F19 189 #define KEY_F20 190 #define KEY_F21 191 #define KEY_F22 192 #define KEY_F23 193 #define KEY_F24 194 #define KEY_PLAYCD 200 #define KEY_PAUSECD 201 #define KEY_PROG3 202 #define KEY_PROG4 203 #define KEY_DASHBOARD 204 /* AL Dashboard */ #define KEY_SUSPEND 205 #define KEY_CLOSE 206 /* AC Close */ #define KEY_PLAY 207 #define KEY_FASTFORWARD 208 #define KEY_BASSBOOST 209 #define KEY_PRINT 210 /* AC Print */ #define KEY_HP 211 #define KEY_CAMERA 212 #define KEY_SOUND 213 #define KEY_QUESTION 214 #define KEY_EMAIL 215 #define KEY_CHAT 216 #define KEY_SEARCH 217 #define KEY_CONNECT 218 #define KEY_FINANCE 219 /* AL Checkbook/Finance */ #define KEY_SPORT 220 #define KEY_SHOP 221 #define KEY_ALTERASE 222 #define KEY_CANCEL 223 /* AC Cancel */ #define KEY_BRIGHTNESSDOWN 224 #define KEY_BRIGHTNESSUP 225 #define KEY_MEDIA 226 #define KEY_SWITCHVIDEOMODE 227 /* Cycle between available video outputs (Monitor/LCD/TV-out/etc) */ #define KEY_KBDILLUMTOGGLE 228 #define KEY_KBDILLUMDOWN 229 #define KEY_KBDILLUMUP 230 #define KEY_SEND 231 /* AC Send */ #define KEY_REPLY 232 /* AC Reply */ #define KEY_FORWARDMAIL 233 /* AC Forward Msg */ #define KEY_SAVE 234 /* AC Save */ #define KEY_DOCUMENTS 235 #define KEY_BATTERY 236 #define KEY_BLUETOOTH 237 #define KEY_WLAN 238 #define KEY_UWB 239 #define KEY_UNKNOWN 240 #define KEY_VIDEO_NEXT 241 /* drive next video source */ #define KEY_VIDEO_PREV 242 /* drive previous video source */ #define KEY_BRIGHTNESS_CYCLE 243 /* brightness up, after max is min */ #define KEY_BRIGHTNESS_AUTO 244 /* Set Auto Brightness: manual brightness control is off, rely on ambient */ #define KEY_BRIGHTNESS_ZERO KEY_BRIGHTNESS_AUTO #define KEY_DISPLAY_OFF 245 /* display device to off state */ #define KEY_WWAN 246 /* Wireless WAN (LTE, UMTS, GSM, etc.) */ #define KEY_WIMAX KEY_WWAN #define KEY_RFKILL 247 /* Key that controls all radios */ #define KEY_MICMUTE 248 /* Mute / unmute the microphone */ /* Code 255 is reserved for special needs of AT keyboard driver */ #define BTN_MISC 0x100 #define BTN_0 0x100 #define BTN_1 0x101 #define BTN_2 0x102 #define BTN_3 0x103 #define BTN_4 0x104 #define BTN_5 0x105 #define BTN_6 0x106 #define BTN_7 0x107 #define BTN_8 0x108 #define BTN_9 0x109 #define BTN_MOUSE 0x110 #define BTN_LEFT 0x110 #define BTN_RIGHT 0x111 #define BTN_MIDDLE 0x112 #define BTN_SIDE 0x113 #define BTN_EXTRA 0x114 #define BTN_FORWARD 0x115 #define BTN_BACK 0x116 #define BTN_TASK 0x117 #define BTN_JOYSTICK 0x120 #define BTN_TRIGGER 0x120 #define BTN_THUMB 0x121 #define BTN_THUMB2 0x122 #define BTN_TOP 0x123 #define BTN_TOP2 0x124 #define BTN_PINKIE 0x125 #define BTN_BASE 0x126 #define BTN_BASE2 0x127 #define BTN_BASE3 0x128 #define BTN_BASE4 0x129 #define BTN_BASE5 0x12a #define BTN_BASE6 0x12b #define BTN_DEAD 0x12f #define BTN_GAMEPAD 0x130 #define BTN_SOUTH 0x130 #define BTN_A BTN_SOUTH #define BTN_EAST 0x131 #define BTN_B BTN_EAST #define BTN_C 0x132 #define BTN_NORTH 0x133 #define BTN_X BTN_NORTH #define BTN_WEST 0x134 #define BTN_Y BTN_WEST #define BTN_Z 0x135 #define BTN_TL 0x136 #define BTN_TR 0x137 #define BTN_TL2 0x138 #define BTN_TR2 0x139 #define BTN_SELECT 0x13a #define BTN_START 0x13b #define BTN_MODE 0x13c #define BTN_THUMBL 0x13d #define BTN_THUMBR 0x13e #define BTN_DIGI 0x140 #define BTN_TOOL_PEN 0x140 #define BTN_TOOL_RUBBER 0x141 #define BTN_TOOL_BRUSH 0x142 #define BTN_TOOL_PENCIL 0x143 #define BTN_TOOL_AIRBRUSH 0x144 #define BTN_TOOL_FINGER 0x145 #define BTN_TOOL_MOUSE 0x146 #define BTN_TOOL_LENS 0x147 #define BTN_TOOL_QUINTTAP 0x148 /* Five fingers on trackpad */ #define BTN_STYLUS3 0x149 #define BTN_TOUCH 0x14a #define BTN_STYLUS 0x14b #define BTN_STYLUS2 0x14c #define BTN_TOOL_DOUBLETAP 0x14d #define BTN_TOOL_TRIPLETAP 0x14e #define BTN_TOOL_QUADTAP 0x14f /* Four fingers on trackpad */ #define BTN_WHEEL 0x150 #define BTN_GEAR_DOWN 0x150 #define BTN_GEAR_UP 0x151 #define KEY_OK 0x160 #define KEY_SELECT 0x161 #define KEY_GOTO 0x162 #define KEY_CLEAR 0x163 #define KEY_POWER2 0x164 #define KEY_OPTION 0x165 #define KEY_INFO 0x166 /* AL OEM Features/Tips/Tutorial */ #define KEY_TIME 0x167 #define KEY_VENDOR 0x168 #define KEY_ARCHIVE 0x169 #define KEY_PROGRAM 0x16a /* Media Select Program Guide */ #define KEY_CHANNEL 0x16b #define KEY_FAVORITES 0x16c #define KEY_EPG 0x16d #define KEY_PVR 0x16e /* Media Select Home */ #define KEY_MHP 0x16f #define KEY_LANGUAGE 0x170 #define KEY_TITLE 0x171 #define KEY_SUBTITLE 0x172 #define KEY_ANGLE 0x173 #define KEY_FULL_SCREEN 0x174 /* AC View Toggle */ #define KEY_ZOOM KEY_FULL_SCREEN #define KEY_MODE 0x175 #define KEY_KEYBOARD 0x176 #define KEY_ASPECT_RATIO 0x177 /* HUTRR37: Aspect */ #define KEY_SCREEN KEY_ASPECT_RATIO #define KEY_PC 0x178 /* Media Select Computer */ #define KEY_TV 0x179 /* Media Select TV */ #define KEY_TV2 0x17a /* Media Select Cable */ #define KEY_VCR 0x17b /* Media Select VCR */ #define KEY_VCR2 0x17c /* VCR Plus */ #define KEY_SAT 0x17d /* Media Select Satellite */ #define KEY_SAT2 0x17e #define KEY_CD 0x17f /* Media Select CD */ #define KEY_TAPE 0x180 /* Media Select Tape */ #define KEY_RADIO 0x181 #define KEY_TUNER 0x182 /* Media Select Tuner */ #define KEY_PLAYER 0x183 #define KEY_TEXT 0x184 #define KEY_DVD 0x185 /* Media Select DVD */ #define KEY_AUX 0x186 #define KEY_MP3 0x187 #define KEY_AUDIO 0x188 /* AL Audio Browser */ #define KEY_VIDEO 0x189 /* AL Movie Browser */ #define KEY_DIRECTORY 0x18a #define KEY_LIST 0x18b #define KEY_MEMO 0x18c /* Media Select Messages */ #define KEY_CALENDAR 0x18d #define KEY_RED 0x18e #define KEY_GREEN 0x18f #define KEY_YELLOW 0x190 #define KEY_BLUE 0x191 #define KEY_CHANNELUP 0x192 /* Channel Increment */ #define KEY_CHANNELDOWN 0x193 /* Channel Decrement */ #define KEY_FIRST 0x194 #define KEY_LAST 0x195 /* Recall Last */ #define KEY_AB 0x196 #define KEY_NEXT 0x197 #define KEY_RESTART 0x198 #define KEY_SLOW 0x199 #define KEY_SHUFFLE 0x19a #define KEY_BREAK 0x19b #define KEY_PREVIOUS 0x19c #define KEY_DIGITS 0x19d #define KEY_TEEN 0x19e #define KEY_TWEN 0x19f #define KEY_VIDEOPHONE 0x1a0 /* Media Select Video Phone */ #define KEY_GAMES 0x1a1 /* Media Select Games */ #define KEY_ZOOMIN 0x1a2 /* AC Zoom In */ #define KEY_ZOOMOUT 0x1a3 /* AC Zoom Out */ #define KEY_ZOOMRESET 0x1a4 /* AC Zoom */ #define KEY_WORDPROCESSOR 0x1a5 /* AL Word Processor */ #define KEY_EDITOR 0x1a6 /* AL Text Editor */ #define KEY_SPREADSHEET 0x1a7 /* AL Spreadsheet */ #define KEY_GRAPHICSEDITOR 0x1a8 /* AL Graphics Editor */ #define KEY_PRESENTATION 0x1a9 /* AL Presentation App */ #define KEY_DATABASE 0x1aa /* AL Database App */ #define KEY_NEWS 0x1ab /* AL Newsreader */ #define KEY_VOICEMAIL 0x1ac /* AL Voicemail */ #define KEY_ADDRESSBOOK 0x1ad /* AL Contacts/Address Book */ #define KEY_MESSENGER 0x1ae /* AL Instant Messaging */ #define KEY_DISPLAYTOGGLE 0x1af /* Turn display (LCD) on and off */ #define KEY_BRIGHTNESS_TOGGLE KEY_DISPLAYTOGGLE #define KEY_SPELLCHECK 0x1b0 /* AL Spell Check */ #define KEY_LOGOFF 0x1b1 /* AL Logoff */ #define KEY_DOLLAR 0x1b2 #define KEY_EURO 0x1b3 #define KEY_FRAMEBACK 0x1b4 /* Consumer - transport controls */ #define KEY_FRAMEFORWARD 0x1b5 #define KEY_CONTEXT_MENU 0x1b6 /* GenDesc - system context menu */ #define KEY_MEDIA_REPEAT 0x1b7 /* Consumer - transport control */ #define KEY_10CHANNELSUP 0x1b8 /* 10 channels up (10+) */ #define KEY_10CHANNELSDOWN 0x1b9 /* 10 channels down (10-) */ #define KEY_IMAGES 0x1ba /* AL Image Browser */ #define KEY_DEL_EOL 0x1c0 #define KEY_DEL_EOS 0x1c1 #define KEY_INS_LINE 0x1c2 #define KEY_DEL_LINE 0x1c3 #define KEY_FN 0x1d0 #define KEY_FN_ESC 0x1d1 #define KEY_FN_F1 0x1d2 #define KEY_FN_F2 0x1d3 #define KEY_FN_F3 0x1d4 #define KEY_FN_F4 0x1d5 #define KEY_FN_F5 0x1d6 #define KEY_FN_F6 0x1d7 #define KEY_FN_F7 0x1d8 #define KEY_FN_F8 0x1d9 #define KEY_FN_F9 0x1da #define KEY_FN_F10 0x1db #define KEY_FN_F11 0x1dc #define KEY_FN_F12 0x1dd #define KEY_FN_1 0x1de #define KEY_FN_2 0x1df #define KEY_FN_D 0x1e0 #define KEY_FN_E 0x1e1 #define KEY_FN_F 0x1e2 #define KEY_FN_S 0x1e3 #define KEY_FN_B 0x1e4 #define KEY_BRL_DOT1 0x1f1 #define KEY_BRL_DOT2 0x1f2 #define KEY_BRL_DOT3 0x1f3 #define KEY_BRL_DOT4 0x1f4 #define KEY_BRL_DOT5 0x1f5 #define KEY_BRL_DOT6 0x1f6 #define KEY_BRL_DOT7 0x1f7 #define KEY_BRL_DOT8 0x1f8 #define KEY_BRL_DOT9 0x1f9 #define KEY_BRL_DOT10 0x1fa #define KEY_NUMERIC_0 0x200 /* used by phones, remote controls, */ #define KEY_NUMERIC_1 0x201 /* and other keypads */ #define KEY_NUMERIC_2 0x202 #define KEY_NUMERIC_3 0x203 #define KEY_NUMERIC_4 0x204 #define KEY_NUMERIC_5 0x205 #define KEY_NUMERIC_6 0x206 #define KEY_NUMERIC_7 0x207 #define KEY_NUMERIC_8 0x208 #define KEY_NUMERIC_9 0x209 #define KEY_NUMERIC_STAR 0x20a #define KEY_NUMERIC_POUND 0x20b #define KEY_NUMERIC_A 0x20c /* Phone key A - HUT Telephony 0xb9 */ #define KEY_NUMERIC_B 0x20d #define KEY_NUMERIC_C 0x20e #define KEY_NUMERIC_D 0x20f #define KEY_CAMERA_FOCUS 0x210 #define KEY_WPS_BUTTON 0x211 /* WiFi Protected Setup key */ #define KEY_TOUCHPAD_TOGGLE 0x212 /* Request switch touchpad on or off */ #define KEY_TOUCHPAD_ON 0x213 #define KEY_TOUCHPAD_OFF 0x214 #define KEY_CAMERA_ZOOMIN 0x215 #define KEY_CAMERA_ZOOMOUT 0x216 #define KEY_CAMERA_UP 0x217 #define KEY_CAMERA_DOWN 0x218 #define KEY_CAMERA_LEFT 0x219 #define KEY_CAMERA_RIGHT 0x21a #define KEY_ATTENDANT_ON 0x21b #define KEY_ATTENDANT_OFF 0x21c #define KEY_ATTENDANT_TOGGLE 0x21d /* Attendant call on or off */ #define KEY_LIGHTS_TOGGLE 0x21e /* Reading light on or off */ #define BTN_DPAD_UP 0x220 #define BTN_DPAD_DOWN 0x221 #define BTN_DPAD_LEFT 0x222 #define BTN_DPAD_RIGHT 0x223 #define KEY_ALS_TOGGLE 0x230 /* Ambient light sensor */ #define KEY_ROTATE_LOCK_TOGGLE 0x231 /* Display rotation lock */ #define KEY_BUTTONCONFIG 0x240 /* AL Button Configuration */ #define KEY_TASKMANAGER 0x241 /* AL Task/Project Manager */ #define KEY_JOURNAL 0x242 /* AL Log/Journal/Timecard */ #define KEY_CONTROLPANEL 0x243 /* AL Control Panel */ #define KEY_APPSELECT 0x244 /* AL Select Task/Application */ #define KEY_SCREENSAVER 0x245 /* AL Screen Saver */ #define KEY_VOICECOMMAND 0x246 /* Listening Voice Command */ #define KEY_ASSISTANT 0x247 /* AL Context-aware desktop assistant */ #define KEY_KBD_LAYOUT_NEXT 0x248 /* AC Next Keyboard Layout Select */ #define KEY_BRIGHTNESS_MIN 0x250 /* Set Brightness to Minimum */ #define KEY_BRIGHTNESS_MAX 0x251 /* Set Brightness to Maximum */ #define KEY_KBDINPUTASSIST_PREV 0x260 #define KEY_KBDINPUTASSIST_NEXT 0x261 #define KEY_KBDINPUTASSIST_PREVGROUP 0x262 #define KEY_KBDINPUTASSIST_NEXTGROUP 0x263 #define KEY_KBDINPUTASSIST_ACCEPT 0x264 #define KEY_KBDINPUTASSIST_CANCEL 0x265 /* Diagonal movement keys */ #define KEY_RIGHT_UP 0x266 #define KEY_RIGHT_DOWN 0x267 #define KEY_LEFT_UP 0x268 #define KEY_LEFT_DOWN 0x269 #define KEY_ROOT_MENU 0x26a /* Show Device's Root Menu */ /* Show Top Menu of the Media (e.g. DVD) */ #define KEY_MEDIA_TOP_MENU 0x26b #define KEY_NUMERIC_11 0x26c #define KEY_NUMERIC_12 0x26d /* * Toggle Audio Description: refers to an audio service that helps blind and * visually impaired consumers understand the action in a program. Note: in * some countries this is referred to as "Video Description". */ #define KEY_AUDIO_DESC 0x26e #define KEY_3D_MODE 0x26f #define KEY_NEXT_FAVORITE 0x270 #define KEY_STOP_RECORD 0x271 #define KEY_PAUSE_RECORD 0x272 #define KEY_VOD 0x273 /* Video on Demand */ #define KEY_UNMUTE 0x274 #define KEY_FASTREVERSE 0x275 #define KEY_SLOWREVERSE 0x276 /* * Control a data application associated with the currently viewed channel, * e.g. teletext or data broadcast application (MHEG, MHP, HbbTV, etc.) */ #define KEY_DATA 0x277 #define KEY_ONSCREEN_KEYBOARD 0x278 #define BTN_TRIGGER_HAPPY 0x2c0 #define BTN_TRIGGER_HAPPY1 0x2c0 #define BTN_TRIGGER_HAPPY2 0x2c1 #define BTN_TRIGGER_HAPPY3 0x2c2 #define BTN_TRIGGER_HAPPY4 0x2c3 #define BTN_TRIGGER_HAPPY5 0x2c4 #define BTN_TRIGGER_HAPPY6 0x2c5 #define BTN_TRIGGER_HAPPY7 0x2c6 #define BTN_TRIGGER_HAPPY8 0x2c7 #define BTN_TRIGGER_HAPPY9 0x2c8 #define BTN_TRIGGER_HAPPY10 0x2c9 #define BTN_TRIGGER_HAPPY11 0x2ca #define BTN_TRIGGER_HAPPY12 0x2cb #define BTN_TRIGGER_HAPPY13 0x2cc #define BTN_TRIGGER_HAPPY14 0x2cd #define BTN_TRIGGER_HAPPY15 0x2ce #define BTN_TRIGGER_HAPPY16 0x2cf #define BTN_TRIGGER_HAPPY17 0x2d0 #define BTN_TRIGGER_HAPPY18 0x2d1 #define BTN_TRIGGER_HAPPY19 0x2d2 #define BTN_TRIGGER_HAPPY20 0x2d3 #define BTN_TRIGGER_HAPPY21 0x2d4 #define BTN_TRIGGER_HAPPY22 0x2d5 #define BTN_TRIGGER_HAPPY23 0x2d6 #define BTN_TRIGGER_HAPPY24 0x2d7 #define BTN_TRIGGER_HAPPY25 0x2d8 #define BTN_TRIGGER_HAPPY26 0x2d9 #define BTN_TRIGGER_HAPPY27 0x2da #define BTN_TRIGGER_HAPPY28 0x2db #define BTN_TRIGGER_HAPPY29 0x2dc #define BTN_TRIGGER_HAPPY30 0x2dd #define BTN_TRIGGER_HAPPY31 0x2de #define BTN_TRIGGER_HAPPY32 0x2df #define BTN_TRIGGER_HAPPY33 0x2e0 #define BTN_TRIGGER_HAPPY34 0x2e1 #define BTN_TRIGGER_HAPPY35 0x2e2 #define BTN_TRIGGER_HAPPY36 0x2e3 #define BTN_TRIGGER_HAPPY37 0x2e4 #define BTN_TRIGGER_HAPPY38 0x2e5 #define BTN_TRIGGER_HAPPY39 0x2e6 #define BTN_TRIGGER_HAPPY40 0x2e7 /* We avoid low common keys in module aliases so they don't get huge. */ #define KEY_MIN_INTERESTING KEY_MUTE #define KEY_MAX 0x2ff #define KEY_CNT (KEY_MAX+1) /* * Relative axes */ #define REL_X 0x00 #define REL_Y 0x01 #define REL_Z 0x02 #define REL_RX 0x03 #define REL_RY 0x04 #define REL_RZ 0x05 #define REL_HWHEEL 0x06 #define REL_DIAL 0x07 #define REL_WHEEL 0x08 #define REL_MISC 0x09 /* * 0x0a is reserved and should not be used in input drivers. * It was used by HID as REL_MISC+1 and userspace needs to detect if * the next REL_* event is correct or is just REL_MISC + n. * We define here REL_RESERVED so userspace can rely on it and detect * the situation described above. */ #define REL_RESERVED 0x0a #define REL_WHEEL_HI_RES 0x0b #define REL_HWHEEL_HI_RES 0x0c #define REL_MAX 0x0f #define REL_CNT (REL_MAX+1) /* * Absolute axes */ #define ABS_X 0x00 #define ABS_Y 0x01 #define ABS_Z 0x02 #define ABS_RX 0x03 #define ABS_RY 0x04 #define ABS_RZ 0x05 #define ABS_THROTTLE 0x06 #define ABS_RUDDER 0x07 #define ABS_WHEEL 0x08 #define ABS_GAS 0x09 #define ABS_BRAKE 0x0a #define ABS_HAT0X 0x10 #define ABS_HAT0Y 0x11 #define ABS_HAT1X 0x12 #define ABS_HAT1Y 0x13 #define ABS_HAT2X 0x14 #define ABS_HAT2Y 0x15 #define ABS_HAT3X 0x16 #define ABS_HAT3Y 0x17 #define ABS_PRESSURE 0x18 #define ABS_DISTANCE 0x19 #define ABS_TILT_X 0x1a #define ABS_TILT_Y 0x1b #define ABS_TOOL_WIDTH 0x1c #define ABS_VOLUME 0x20 #define ABS_MISC 0x28 /* * 0x2e is reserved and should not be used in input drivers. * It was used by HID as ABS_MISC+6 and userspace needs to detect if * the next ABS_* event is correct or is just ABS_MISC + n. * We define here ABS_RESERVED so userspace can rely on it and detect * the situation described above. */ #define ABS_RESERVED 0x2e #define ABS_MT_SLOT 0x2f /* MT slot being modified */ #define ABS_MT_TOUCH_MAJOR 0x30 /* Major axis of touching ellipse */ #define ABS_MT_TOUCH_MINOR 0x31 /* Minor axis (omit if circular) */ #define ABS_MT_WIDTH_MAJOR 0x32 /* Major axis of approaching ellipse */ #define ABS_MT_WIDTH_MINOR 0x33 /* Minor axis (omit if circular) */ #define ABS_MT_ORIENTATION 0x34 /* Ellipse orientation */ #define ABS_MT_POSITION_X 0x35 /* Center X touch position */ #define ABS_MT_POSITION_Y 0x36 /* Center Y touch position */ #define ABS_MT_TOOL_TYPE 0x37 /* Type of touching device */ #define ABS_MT_BLOB_ID 0x38 /* Group a set of packets as a blob */ #define ABS_MT_TRACKING_ID 0x39 /* Unique ID of initiated contact */ #define ABS_MT_PRESSURE 0x3a /* Pressure on contact area */ #define ABS_MT_DISTANCE 0x3b /* Contact hover distance */ #define ABS_MT_TOOL_X 0x3c /* Center X tool position */ #define ABS_MT_TOOL_Y 0x3d /* Center Y tool position */ #define ABS_MAX 0x3f #define ABS_CNT (ABS_MAX+1) /* * Switch events */ #define SW_LID 0x00 /* set = lid shut */ #define SW_TABLET_MODE 0x01 /* set = tablet mode */ #define SW_HEADPHONE_INSERT 0x02 /* set = inserted */ #define SW_RFKILL_ALL 0x03 /* rfkill master switch, type "any" set = radio enabled */ #define SW_RADIO SW_RFKILL_ALL /* deprecated */ #define SW_MICROPHONE_INSERT 0x04 /* set = inserted */ #define SW_DOCK 0x05 /* set = plugged into dock */ #define SW_LINEOUT_INSERT 0x06 /* set = inserted */ #define SW_JACK_PHYSICAL_INSERT 0x07 /* set = mechanical switch set */ #define SW_VIDEOOUT_INSERT 0x08 /* set = inserted */ #define SW_CAMERA_LENS_COVER 0x09 /* set = lens covered */ #define SW_KEYPAD_SLIDE 0x0a /* set = keypad slide out */ #define SW_FRONT_PROXIMITY 0x0b /* set = front proximity sensor active */ #define SW_ROTATE_LOCK 0x0c /* set = rotate locked/disabled */ #define SW_LINEIN_INSERT 0x0d /* set = inserted */ #define SW_MUTE_DEVICE 0x0e /* set = device disabled */ #define SW_PEN_INSERTED 0x0f /* set = pen inserted */ #define SW_MAX 0x0f #define SW_CNT (SW_MAX+1) /* * Misc events */ #define MSC_SERIAL 0x00 #define MSC_PULSELED 0x01 #define MSC_GESTURE 0x02 #define MSC_RAW 0x03 #define MSC_SCAN 0x04 #define MSC_TIMESTAMP 0x05 #define MSC_MAX 0x07 #define MSC_CNT (MSC_MAX+1) /* * LEDs */ #define LED_NUML 0x00 #define LED_CAPSL 0x01 #define LED_SCROLLL 0x02 #define LED_COMPOSE 0x03 #define LED_KANA 0x04 #define LED_SLEEP 0x05 #define LED_SUSPEND 0x06 #define LED_MUTE 0x07 #define LED_MISC 0x08 #define LED_MAIL 0x09 #define LED_CHARGING 0x0a #define LED_MAX 0x0f #define LED_CNT (LED_MAX+1) /* * Autorepeat values */ #define REP_DELAY 0x00 #define REP_PERIOD 0x01 #define REP_MAX 0x01 #define REP_CNT (REP_MAX+1) /* * Sounds */ #define SND_CLICK 0x00 #define SND_BELL 0x01 #define SND_TONE 0x02 #define SND_MAX 0x07 #define SND_CNT (SND_MAX+1) #endif ================================================ FILE: risc_v/userspace/shell.cpp ================================================ #include #include int main() { printf("Started shell.\n"); char data[100]; while (1) { printf("Enter value: "); int r = read(0, data, 100); if (r > 0) { printf("Got %s\n", data); } } return 0; } ================================================ FILE: risc_v/userspace/sleepy.cpp ================================================ #include int main() { printf("I'm going to bed.\nYou can watch me sleep for 100 switches using 'top'\n"); return 0; } ================================================ FILE: risc_v/userspace/startlib/.gitignore ================================================ *.a *.o ================================================ FILE: risc_v/userspace/startlib/Makefile ================================================ CROSS=riscv64-unknown-linux-gnu- CXX=g++ OBJCOPY=objcopy AR=ar CXXFLAGS=-Wall -O0 -ffreestanding -nostartfiles -nostdlib -I. -march=rv64g -mabi=lp64d OUT=libstart.a SOURCES_S=$(wildcard *.S) SOURCES_CPP=$(wildcard *.cpp) OBJS=$(patsubst %.S,%.o,$(SOURCES_S)) $(patsubst %.cpp,%.o,$(SOURCES_CPP)) all: $(OUT) $(OUT): $(OBJS) Makefile rm -f $(OUT) $(AR) rcv $(OUT) $(OBJS) %.o: %.S $(CROSS)$(CXX) $(CXXFLAGS) -c $< -o $@ %.o: %.cpp $(CROSS)$(CXX) $(CXXFLAGS) -c $< -o $@ .PHONY: clean clean: rm -f $(OUT) $(OBJS) ================================================ FILE: risc_v/userspace/startlib/linker.lds ================================================ OUTPUT_ARCH( "riscv" ) ENTRY( _start ) MEMORY { ram (wxa!ri) : ORIGIN = 0x20000000, LENGTH = 128M } PHDRS { text PT_LOAD; rodata PT_LOAD; data PT_LOAD; bss PT_LOAD; } SECTIONS { .text : { PROVIDE(_text_start = .); *(.text.init) *(.text .text.*) PROVIDE(_text_end = .); } >ram AT>ram :text PROVIDE(_global_pointer = .); .rodata : { PROVIDE(_rodata_start = .); *(.rodata .rodata.*) PROVIDE(_rodata_end = .); } >ram AT>ram :rodata .data : { PROVIDE(_data_start = .); *(.sdata .sdata.*) *(.data .data.*) PROVIDE(_data_end = .); } >ram AT>ram :data .bss :{ PROVIDE(_bss_start = .); *(.sbss .sbss.*) *(.bss .bss.*) PROVIDE(_bss_end = .); } >ram AT>ram :bss PROVIDE(_memory_start = ORIGIN(ram)); PROVIDE(_stack = _bss_end + 0x80000); PROVIDE(_memory_end = ORIGIN(ram) + LENGTH(ram)); PROVIDE(_heap_start = _stack); PROVIDE(_heap_size = _memory_end - _stack); } ================================================ FILE: risc_v/userspace/startlib/printf.cpp ================================================ /////////////////////////////////////////////////////////////////////////////// // \author (c) Marco Paland (info@paland.com) // 2014-2019, PALANDesign Hannover, Germany // // \license The MIT License (MIT) // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. // // \brief Tiny printf, sprintf and (v)snprintf implementation, optimized for speed on // embedded systems with a very limited resources. These routines are thread // safe and reentrant! // Use this instead of the bloated standard/newlib printf cause these use // malloc for printf (and may not be thread safe). // /////////////////////////////////////////////////////////////////////////////// #include #include #include #include // #define USE_DIRECT_UART // // 'ntoa' conversion buffer size, this must be big enough to hold one converted // numeric number including padded zeros (dynamically created on stack) // default: 32 byte #ifndef PRINTF_NTOA_BUFFER_SIZE #define PRINTF_NTOA_BUFFER_SIZE 32U #endif // 'ftoa' conversion buffer size, this must be big enough to hold one converted // float number including padded zeros (dynamically created on stack) // default: 32 byte #ifndef PRINTF_FTOA_BUFFER_SIZE #define PRINTF_FTOA_BUFFER_SIZE 32U #endif // support for the floating point type (%f) // default: activated //#ifndef PRINTF_DISABLE_SUPPORT_FLOAT #define PRINTF_SUPPORT_FLOAT //#endif // support for exponential floating point notation (%e/%g) // default: activated //#ifndef PRINTF_DISABLE_SUPPORT_EXPONENTIAL #define PRINTF_SUPPORT_EXPONENTIAL //#endif // define the default floating point precision // default: 6 digits #ifndef PRINTF_DEFAULT_FLOAT_PRECISION #define PRINTF_DEFAULT_FLOAT_PRECISION 6U #endif // define the largest float suitable to print with %f // default: 1e9 #ifndef PRINTF_MAX_FLOAT #define PRINTF_MAX_FLOAT 1e9 #endif // support for the long long types (%llu or %p) // default: activated // #ifndef PRINTF_DISABLE_SUPPORT_LONG_LONG #define PRINTF_SUPPORT_LONG_LONG // #endif // support for the ptrdiff_t type (%t) // ptrdiff_t is normally defined in as long or long long type // default: activated // #ifndef PRINTF_DISABLE_SUPPORT_PTRDIFF_T #define PRINTF_SUPPORT_PTRDIFF_T // #endif /////////////////////////////////////////////////////////////////////////////// // internal flag definitions #define FLAGS_ZEROPAD (1U << 0U) #define FLAGS_LEFT (1U << 1U) #define FLAGS_PLUS (1U << 2U) #define FLAGS_SPACE (1U << 3U) #define FLAGS_HASH (1U << 4U) #define FLAGS_UPPERCASE (1U << 5U) #define FLAGS_CHAR (1U << 6U) #define FLAGS_SHORT (1U << 7U) #define FLAGS_LONG (1U << 8U) #define FLAGS_LONG_LONG (1U << 9U) #define FLAGS_PRECISION (1U << 10U) #define FLAGS_ADAPT_EXP (1U << 11U) // import float.h for DBL_MAX #if defined(PRINTF_SUPPORT_FLOAT) #include #endif #include // output function type typedef void (*out_fct_type)(char character, void *buffer, size_t idx, size_t maxlen); // wrapper (used as buffer) for output function type typedef struct { void (*fct)(char character, void *arg); void *arg; } out_fct_wrap_type; // internal buffer output static inline void _out_buffer(char character, void *buffer, size_t idx, size_t maxlen) { if (idx < maxlen) { ((char *)buffer)[idx] = character; } } void _putchar(char c) { syscall_put_char(c); } // internal null output static inline void _out_null(char character, void *buffer, size_t idx, size_t maxlen) { (void)character; (void)buffer; (void)idx; (void)maxlen; } // internal _putchar wrapper static inline void _out_char(char character, void *buffer, size_t idx, size_t maxlen) { (void)buffer; (void)idx; (void)maxlen; if (character) { _putchar(character); } } // internal output function wrapper static inline void _out_fct(char character, void *buffer, size_t idx, size_t maxlen) { (void)idx; (void)maxlen; if (character) { // buffer is the output fct pointer ((out_fct_wrap_type *)buffer)->fct(character, ((out_fct_wrap_type *)buffer)->arg); } } // internal secure strlen // \return The length of the string (excluding the terminating 0) limited by 'maxsize' static inline unsigned int _strnlen_s(const char *str, size_t maxsize) { const char *s; for (s = str; *s && maxsize--; ++s) ; return (unsigned int)(s - str); } // internal test if char is a digit (0-9) // \return true if char is a digit static inline bool _is_digit(char ch) { return (ch >= '0') && (ch <= '9'); } // internal ASCII string to unsigned int conversion static unsigned int _atoi(const char **str) { unsigned int i = 0U; while (_is_digit(**str)) { i = i * 10U + (unsigned int)(*((*str)++) - '0'); } return i; } // output the specified string in reverse, taking care of any zero-padding static size_t _out_rev(out_fct_type out, char *buffer, size_t idx, size_t maxlen, const char *buf, size_t len, unsigned int width, unsigned int flags) { const size_t start_idx = idx; // pad spaces up to given width if (!(flags & FLAGS_LEFT) && !(flags & FLAGS_ZEROPAD)) { for (size_t i = len; i < width; i++) { out(' ', buffer, idx++, maxlen); } } // reverse string while (len) { out(buf[--len], buffer, idx++, maxlen); } // append pad spaces up to given width if (flags & FLAGS_LEFT) { while (idx - start_idx < width) { out(' ', buffer, idx++, maxlen); } } return idx; } // internal itoa format static size_t _ntoa_format(out_fct_type out, char *buffer, size_t idx, size_t maxlen, char *buf, size_t len, bool negative, unsigned int base, unsigned int prec, unsigned int width, unsigned int flags) { // pad leading zeros if (!(flags & FLAGS_LEFT)) { if (width && (flags & FLAGS_ZEROPAD) && (negative || (flags & (FLAGS_PLUS | FLAGS_SPACE)))) { width--; } while ((len < prec) && (len < PRINTF_NTOA_BUFFER_SIZE)) { buf[len++] = '0'; } while ((flags & FLAGS_ZEROPAD) && (len < width) && (len < PRINTF_NTOA_BUFFER_SIZE)) { buf[len++] = '0'; } } // handle hash if (flags & FLAGS_HASH) { if (!(flags & FLAGS_PRECISION) && len && ((len == prec) || (len == width))) { len--; if (len && (base == 16U)) { len--; } } if ((base == 16U) && !(flags & FLAGS_UPPERCASE) && (len < PRINTF_NTOA_BUFFER_SIZE)) { buf[len++] = 'x'; } else if ((base == 16U) && (flags & FLAGS_UPPERCASE) && (len < PRINTF_NTOA_BUFFER_SIZE)) { buf[len++] = 'X'; } else if ((base == 2U) && (len < PRINTF_NTOA_BUFFER_SIZE)) { buf[len++] = 'b'; } if (len < PRINTF_NTOA_BUFFER_SIZE) { buf[len++] = '0'; } } if (len < PRINTF_NTOA_BUFFER_SIZE) { if (negative) { buf[len++] = '-'; } else if (flags & FLAGS_PLUS) { buf[len++] = '+'; // ignore the space if the '+' exists } else if (flags & FLAGS_SPACE) { buf[len++] = ' '; } } return _out_rev(out, buffer, idx, maxlen, buf, len, width, flags); } // internal itoa for 'long' type static size_t _ntoa_long(out_fct_type out, char *buffer, size_t idx, size_t maxlen, unsigned long value, bool negative, unsigned long base, unsigned int prec, unsigned int width, unsigned int flags) { char buf[PRINTF_NTOA_BUFFER_SIZE]; size_t len = 0U; // no hash for 0 values if (!value) { flags &= ~FLAGS_HASH; } // write if precision != 0 and value is != 0 if (!(flags & FLAGS_PRECISION) || value) { do { const char digit = (char)(value % base); buf[len++] = digit < 10 ? '0' + digit : (flags & FLAGS_UPPERCASE ? 'A' : 'a') + digit - 10; value /= base; } while (value && (len < PRINTF_NTOA_BUFFER_SIZE)); } return _ntoa_format(out, buffer, idx, maxlen, buf, len, negative, (unsigned int)base, prec, width, flags); } // internal itoa for 'long long' type #if defined(PRINTF_SUPPORT_LONG_LONG) static size_t _ntoa_long_long(out_fct_type out, char *buffer, size_t idx, size_t maxlen, unsigned long long value, bool negative, unsigned long long base, unsigned int prec, unsigned int width, unsigned int flags) { char buf[PRINTF_NTOA_BUFFER_SIZE]; size_t len = 0U; // no hash for 0 values if (!value) { flags &= ~FLAGS_HASH; } // write if precision != 0 and value is != 0 if (!(flags & FLAGS_PRECISION) || value) { do { const char digit = (char)(value % base); buf[len++] = digit < 10 ? '0' + digit : (flags & FLAGS_UPPERCASE ? 'A' : 'a') + digit - 10; value /= base; } while (value && (len < PRINTF_NTOA_BUFFER_SIZE)); } return _ntoa_format(out, buffer, idx, maxlen, buf, len, negative, (unsigned int)base, prec, width, flags); } #endif // PRINTF_SUPPORT_LONG_LONG #if defined(PRINTF_SUPPORT_FLOAT) #if defined(PRINTF_SUPPORT_EXPONENTIAL) // forward declaration so that _ftoa can switch to exp notation for values > PRINTF_MAX_FLOAT static size_t _etoa(out_fct_type out, char *buffer, size_t idx, size_t maxlen, double value, unsigned int prec, unsigned int width, unsigned int flags); #endif // internal ftoa for fixed decimal floating point static size_t _ftoa(out_fct_type out, char *buffer, size_t idx, size_t maxlen, double value, unsigned int prec, unsigned int width, unsigned int flags) { char buf[PRINTF_FTOA_BUFFER_SIZE]; size_t len = 0U; double diff = 0.0; // powers of 10 static const double pow10[] = {1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000}; // test for special values if (value != value) return _out_rev(out, buffer, idx, maxlen, "nan", 3, width, flags); if (value < -DBL_MAX) return _out_rev(out, buffer, idx, maxlen, "fni-", 4, width, flags); if (value > DBL_MAX) return _out_rev(out, buffer, idx, maxlen, (flags & FLAGS_PLUS) ? "fni+" : "fni", (flags & FLAGS_PLUS) ? 4U : 3U, width, flags); // test for very large values // standard printf behavior is to print EVERY whole number digit -- which could be 100s of characters overflowing your buffers == bad if ((value > PRINTF_MAX_FLOAT) || (value < -PRINTF_MAX_FLOAT)) { #if defined(PRINTF_SUPPORT_EXPONENTIAL) return _etoa(out, buffer, idx, maxlen, value, prec, width, flags); #else return 0U; #endif } // test for negative bool negative = false; if (value < 0) { negative = true; value = 0 - value; } // set default precision, if not set explicitly if (!(flags & FLAGS_PRECISION)) { prec = PRINTF_DEFAULT_FLOAT_PRECISION; } // limit precision to 9, cause a prec >= 10 can lead to overflow errors while ((len < PRINTF_FTOA_BUFFER_SIZE) && (prec > 9U)) { buf[len++] = '0'; prec--; } int whole = (int)value; double tmp = (value - whole) * pow10[prec]; unsigned long frac = (unsigned long)tmp; diff = tmp - frac; if (diff > 0.5) { ++frac; // handle rollover, e.g. case 0.99 with prec 1 is 1.0 if (frac >= pow10[prec]) { frac = 0; ++whole; } } else if (diff < 0.5) { } else if ((frac == 0U) || (frac & 1U)) { // if halfway, round up if odd OR if last digit is 0 ++frac; } if (prec == 0U) { diff = value - (double)whole; if ((!(diff < 0.5) || (diff > 0.5)) && (whole & 1)) { // exactly 0.5 and ODD, then round up // 1.5 -> 2, but 2.5 -> 2 ++whole; } } else { unsigned int count = prec; // now do fractional part, as an unsigned number while (len < PRINTF_FTOA_BUFFER_SIZE) { --count; buf[len++] = (char)(48U + (frac % 10U)); if (!(frac /= 10U)) { break; } } // add extra 0s while ((len < PRINTF_FTOA_BUFFER_SIZE) && (count-- > 0U)) { buf[len++] = '0'; } if (len < PRINTF_FTOA_BUFFER_SIZE) { // add decimal buf[len++] = '.'; } } // do whole part, number is reversed while (len < PRINTF_FTOA_BUFFER_SIZE) { buf[len++] = (char)(48 + (whole % 10)); if (!(whole /= 10)) { break; } } // pad leading zeros if (!(flags & FLAGS_LEFT) && (flags & FLAGS_ZEROPAD)) { if (width && (negative || (flags & (FLAGS_PLUS | FLAGS_SPACE)))) { width--; } while ((len < width) && (len < PRINTF_FTOA_BUFFER_SIZE)) { buf[len++] = '0'; } } if (len < PRINTF_FTOA_BUFFER_SIZE) { if (negative) { buf[len++] = '-'; } else if (flags & FLAGS_PLUS) { buf[len++] = '+'; // ignore the space if the '+' exists } else if (flags & FLAGS_SPACE) { buf[len++] = ' '; } } return _out_rev(out, buffer, idx, maxlen, buf, len, width, flags); } #if defined(PRINTF_SUPPORT_EXPONENTIAL) // internal ftoa variant for exponential floating-point type, contributed by Martijn Jasperse static size_t _etoa(out_fct_type out, char *buffer, size_t idx, size_t maxlen, double value, unsigned int prec, unsigned int width, unsigned int flags) { // check for NaN and special values if ((value != value) || (value > DBL_MAX) || (value < -DBL_MAX)) { return _ftoa(out, buffer, idx, maxlen, value, prec, width, flags); } // determine the sign const bool negative = value < 0; if (negative) { value = -value; } // default precision if (!(flags & FLAGS_PRECISION)) { prec = PRINTF_DEFAULT_FLOAT_PRECISION; } // determine the decimal exponent // based on the algorithm by David Gay (https://www.ampl.com/netlib/fp/dtoa.c) union { uint64_t U; double F; } conv; conv.F = value; int exp2 = (int)((conv.U >> 52U) & 0x07FFU) - 1023; // effectively log2 conv.U = (conv.U & ((1ULL << 52U) - 1U)) | (1023ULL << 52U); // drop the exponent so conv.F is now in [1,2) // now approximate log10 from the log2 integer part and an expansion of ln around 1.5 int expval = (int)(0.1760912590558 + exp2 * 0.301029995663981 + (conv.F - 1.5) * 0.289529654602168); // now we want to compute 10^expval but we want to be sure it won't overflow exp2 = (int)(expval * 3.321928094887362 + 0.5); const double z = expval * 2.302585092994046 - exp2 * 0.6931471805599453; const double z2 = z * z; conv.U = (uint64_t)(exp2 + 1023) << 52U; // compute exp(z) using continued fractions, see https://en.wikipedia.org/wiki/Exponential_function#Continued_fractions_for_ex conv.F *= 1 + 2 * z / (2 - z + (z2 / (6 + (z2 / (10 + z2 / 14))))); // correct for rounding errors if (value < conv.F) { expval--; conv.F /= 10; } // the exponent format is "%+03d" and largest value is "307", so set aside 4-5 characters unsigned int minwidth = ((expval < 100) && (expval > -100)) ? 4U : 5U; // in "%g" mode, "prec" is the number of *significant figures* not decimals if (flags & FLAGS_ADAPT_EXP) { // do we want to fall-back to "%f" mode? if ((value >= 1e-4) && (value < 1e6)) { if ((int)prec > expval) { prec = (unsigned)((int)prec - expval - 1); } else { prec = 0; } flags |= FLAGS_PRECISION; // make sure _ftoa respects precision // no characters in exponent minwidth = 0U; expval = 0; } else { // we use one sigfig for the whole part if ((prec > 0) && (flags & FLAGS_PRECISION)) { --prec; } } } // will everything fit? unsigned int fwidth = width; if (width > minwidth) { // we didn't fall-back so subtract the characters required for the exponent fwidth -= minwidth; } else { // not enough characters, so go back to default sizing fwidth = 0U; } if ((flags & FLAGS_LEFT) && minwidth) { // if we're padding on the right, DON'T pad the floating part fwidth = 0U; } // rescale the float value if (expval) { value /= conv.F; } // output the floating part const size_t start_idx = idx; idx = _ftoa(out, buffer, idx, maxlen, negative ? -value : value, prec, fwidth, flags & ~FLAGS_ADAPT_EXP); // output the exponent part if (minwidth) { // output the exponential symbol out((flags & FLAGS_UPPERCASE) ? 'E' : 'e', buffer, idx++, maxlen); // output the exponent value idx = _ntoa_long(out, buffer, idx, maxlen, (expval < 0) ? -expval : expval, expval < 0, 10, 0, minwidth - 1, FLAGS_ZEROPAD | FLAGS_PLUS); // might need to right-pad spaces if (flags & FLAGS_LEFT) { while (idx - start_idx < width) out(' ', buffer, idx++, maxlen); } } return idx; } #endif // PRINTF_SUPPORT_EXPONENTIAL #endif // PRINTF_SUPPORT_FLOAT // internal vsnprintf static int _vsnprintf(out_fct_type out, char *buffer, const size_t maxlen, const char *format, va_list va) { unsigned int flags, width, precision, n; size_t idx = 0U; if (!buffer) { // use null output function out = _out_null; } while (*format) { // format specifier? %[flags][width][.precision][length] if (*format != '%') { // no out(*format, buffer, idx++, maxlen); format++; continue; } else { // yes, evaluate it format++; } // evaluate flags flags = 0U; do { switch (*format) { case '0': flags |= FLAGS_ZEROPAD; format++; n = 1U; break; case '-': flags |= FLAGS_LEFT; format++; n = 1U; break; case '+': flags |= FLAGS_PLUS; format++; n = 1U; break; case ' ': flags |= FLAGS_SPACE; format++; n = 1U; break; case '#': flags |= FLAGS_HASH; format++; n = 1U; break; default: n = 0U; break; } } while (n); // evaluate width field width = 0U; if (_is_digit(*format)) { width = _atoi(&format); } else if (*format == '*') { const int w = va_arg(va, int); if (w < 0) { flags |= FLAGS_LEFT; // reverse padding width = (unsigned int)-w; } else { width = (unsigned int)w; } format++; } // evaluate precision field precision = 0U; if (*format == '.') { flags |= FLAGS_PRECISION; format++; if (_is_digit(*format)) { precision = _atoi(&format); } else if (*format == '*') { const int prec = (int)va_arg(va, int); precision = prec > 0 ? (unsigned int)prec : 0U; format++; } } // evaluate length field switch (*format) { case 'l': flags |= FLAGS_LONG; format++; if (*format == 'l') { flags |= FLAGS_LONG_LONG; format++; } break; case 'h': flags |= FLAGS_SHORT; format++; if (*format == 'h') { flags |= FLAGS_CHAR; format++; } break; #if defined(PRINTF_SUPPORT_PTRDIFF_T) case 't': flags |= (sizeof(ptrdiff_t) == sizeof(long) ? FLAGS_LONG : FLAGS_LONG_LONG); format++; break; #endif case 'j': flags |= (sizeof(intmax_t) == sizeof(long) ? FLAGS_LONG : FLAGS_LONG_LONG); format++; break; case 'z': flags |= (sizeof(size_t) == sizeof(long) ? FLAGS_LONG : FLAGS_LONG_LONG); format++; break; default: break; } // evaluate specifier switch (*format) { case 'd': case 'i': case 'u': case 'x': case 'X': case 'o': case 'b': { // set the base unsigned int base; if (*format == 'x' || *format == 'X') { base = 16U; } else if (*format == 'o') { base = 8U; } else if (*format == 'b') { base = 2U; } else { base = 10U; flags &= ~FLAGS_HASH; // no hash for dec format } // uppercase if (*format == 'X') { flags |= FLAGS_UPPERCASE; } // no plus or space flag for u, x, X, o, b if ((*format != 'i') && (*format != 'd')) { flags &= ~(FLAGS_PLUS | FLAGS_SPACE); } // ignore '0' flag when precision is given if (flags & FLAGS_PRECISION) { flags &= ~FLAGS_ZEROPAD; } // convert the integer if ((*format == 'i') || (*format == 'd')) { // signed if (flags & FLAGS_LONG_LONG) { #if defined(PRINTF_SUPPORT_LONG_LONG) const long long value = va_arg(va, long long); idx = _ntoa_long_long(out, buffer, idx, maxlen, (unsigned long long)(value > 0 ? value : 0 - value), value < 0, base, precision, width, flags); #endif } else if (flags & FLAGS_LONG) { const long value = va_arg(va, long); idx = _ntoa_long(out, buffer, idx, maxlen, (unsigned long)(value > 0 ? value : 0 - value), value < 0, base, precision, width, flags); } else { const int value = (flags & FLAGS_CHAR) ? (char)va_arg(va, int) : (flags & FLAGS_SHORT) ? (short int)va_arg(va, int) : va_arg(va, int); idx = _ntoa_long(out, buffer, idx, maxlen, (unsigned int)(value > 0 ? value : 0 - value), value < 0, base, precision, width, flags); } } else { // unsigned if (flags & FLAGS_LONG_LONG) { #if defined(PRINTF_SUPPORT_LONG_LONG) idx = _ntoa_long_long(out, buffer, idx, maxlen, va_arg(va, unsigned long long), false, base, precision, width, flags); #endif } else if (flags & FLAGS_LONG) { idx = _ntoa_long(out, buffer, idx, maxlen, va_arg(va, unsigned long), false, base, precision, width, flags); } else { const unsigned int value = (flags & FLAGS_CHAR) ? (unsigned char)va_arg(va, unsigned int) : (flags & FLAGS_SHORT) ? (unsigned short int)va_arg(va, unsigned int) : va_arg(va, unsigned int); idx = _ntoa_long(out, buffer, idx, maxlen, value, false, base, precision, width, flags); } } format++; break; } #if defined(PRINTF_SUPPORT_FLOAT) case 'f': case 'F': if (*format == 'F') flags |= FLAGS_UPPERCASE; idx = _ftoa(out, buffer, idx, maxlen, va_arg(va, double), precision, width, flags); format++; break; #if defined(PRINTF_SUPPORT_EXPONENTIAL) case 'e': case 'E': case 'g': case 'G': if ((*format == 'g') || (*format == 'G')) flags |= FLAGS_ADAPT_EXP; if ((*format == 'E') || (*format == 'G')) flags |= FLAGS_UPPERCASE; idx = _etoa(out, buffer, idx, maxlen, va_arg(va, double), precision, width, flags); format++; break; #endif // PRINTF_SUPPORT_EXPONENTIAL #endif // PRINTF_SUPPORT_FLOAT case 'c': { unsigned int l = 1U; // pre padding if (!(flags & FLAGS_LEFT)) { while (l++ < width) { out(' ', buffer, idx++, maxlen); } } // char output out((char)va_arg(va, int), buffer, idx++, maxlen); // post padding if (flags & FLAGS_LEFT) { while (l++ < width) { out(' ', buffer, idx++, maxlen); } } format++; break; } case 's': { const char *p = va_arg(va, char *); unsigned int l = _strnlen_s(p, precision ? precision : (size_t)-1); // pre padding if (flags & FLAGS_PRECISION) { l = (l < precision ? l : precision); } if (!(flags & FLAGS_LEFT)) { while (l++ < width) { out(' ', buffer, idx++, maxlen); } } // string output while ((*p != 0) && (!(flags & FLAGS_PRECISION) || precision--)) { out(*(p++), buffer, idx++, maxlen); } // post padding if (flags & FLAGS_LEFT) { while (l++ < width) { out(' ', buffer, idx++, maxlen); } } format++; break; } case 'p': { width = sizeof(void *) * 2U; flags |= FLAGS_ZEROPAD | FLAGS_UPPERCASE; #if defined(PRINTF_SUPPORT_LONG_LONG) const bool is_ll = sizeof(uintptr_t) == sizeof(long long); if (is_ll) { idx = _ntoa_long_long(out, buffer, idx, maxlen, (uintptr_t)va_arg(va, void *), false, 16U, precision, width, flags); } else { #endif idx = _ntoa_long(out, buffer, idx, maxlen, (unsigned long)((uintptr_t)va_arg(va, void *)), false, 16U, precision, width, flags); #if defined(PRINTF_SUPPORT_LONG_LONG) } #endif format++; break; } case '%': out('%', buffer, idx++, maxlen); format++; break; default: out(*format, buffer, idx++, maxlen); format++; break; } } // termination out((char)0, buffer, idx < maxlen ? idx : maxlen - 1U, maxlen); // return written chars without terminating \0 return (int)idx; } /////////////////////////////////////////////////////////////////////////////// int printf(const char *format, ...) { va_list va; va_start(va, format); char buffer[1]; const int ret = _vsnprintf(_out_char, buffer, (size_t)-1, format, va); va_end(va); return ret; } int sprintf(char *buffer, const char *format, ...) { va_list va; va_start(va, format); const int ret = _vsnprintf(_out_buffer, buffer, (size_t)-1, format, va); va_end(va); return ret; } int snprintf(char *buffer, size_t count, const char *format, ...) { va_list va; va_start(va, format); const int ret = _vsnprintf(_out_buffer, buffer, count, format, va); va_end(va); return ret; } int vprintf_(const char *format, va_list va) { char buffer[1]; return _vsnprintf(_out_char, buffer, (size_t)-1, format, va); } int vsnprintf_(char *buffer, size_t count, const char *format, va_list va) { return _vsnprintf(_out_buffer, buffer, count, format, va); } int fctprintf(void (*out)(char character, void *arg), void *arg, const char *format, ...) { va_list va; va_start(va, format); const out_fct_wrap_type out_fct_wrap = {out, arg}; const int ret = _vsnprintf(_out_fct, (char *)(uintptr_t)&out_fct_wrap, (size_t)-1, format, va); va_end(va); return ret; } ================================================ FILE: risc_v/userspace/startlib/printf.h ================================================ /////////////////////////////////////////////////////////////////////////////// // \author (c) Marco Paland (info@paland.com) // 2014-2019, PALANDesign Hannover, Germany // // \license The MIT License (MIT) // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. // // \brief Tiny printf, sprintf and snprintf implementation, optimized for speed on // embedded systems with a very limited resources. // Use this instead of bloated standard/newlib printf. // These routines are thread safe and reentrant. // /////////////////////////////////////////////////////////////////////////////// #pragma once #ifndef _PRINTF_H_ #define _PRINTF_H_ #include #include #ifdef __cplusplus extern "C" { #endif /** * Output a character to a custom device like UART, used by the printf() function * This function is declared here only. You have to write your custom implementation somewhere * \param character Character to output */ void _putchar(char character); /** * Tiny printf implementation * You have to implement _putchar if you use printf() * To avoid conflicts with the regular printf() API it is overridden by macro defines * and internal underscore-appended functions like printf_() are used * \param format A string that specifies the format of the output * \return The number of characters that are written into the array, not counting the terminating null character */ //int cosc361_printf(const char* format, ...); int printf(const char *format, ...); /** * Tiny sprintf implementation * Due to security reasons (buffer overflow) YOU SHOULD CONSIDER USING (V)SNPRINTF INSTEAD! * \param buffer A pointer to the buffer where to store the formatted string. MUST be big enough to store the output! * \param format A string that specifies the format of the output * \return The number of characters that are WRITTEN into the buffer, not counting the terminating null character */ int sprintf(char* buffer, const char* format, ...); /** * Tiny snprintf/vsnprintf implementation * \param buffer A pointer to the buffer where to store the formatted string * \param count The maximum number of characters to store in the buffer, including a terminating null character * \param format A string that specifies the format of the output * \param va A value identifying a variable arguments list * \return The number of characters that COULD have been written into the buffer, not counting the terminating * null character. A value equal or larger than count indicates truncation. Only when the returned value * is non-negative and less than count, the string has been completely written. */ int snprintf(char* buffer, size_t count, const char* format, ...); int vsnprintf(char* buffer, size_t count, const char* format, va_list va); /** * Tiny vprintf implementation * \param format A string that specifies the format of the output * \param va A value identifying a variable arguments list * \return The number of characters that are WRITTEN into the buffer, not counting the terminating null character */ #define vprintf vprintf_ int vprintf_(const char* format, va_list va); /** * printf with output function * You may use this as dynamic alternative to printf() with its fixed _putchar() output * \param out An output function which takes one character and an argument pointer * \param arg An argument pointer for user data passed to output function * \param format A string that specifies the format of the output * \return The number of characters that are sent to the output function, not counting the terminating null character */ int fctprintf(void (*out)(char character, void* arg), void* arg, const char* format, ...); #ifdef __cplusplus } #endif #endif // _PRINTF_H_ ================================================ FILE: risc_v/userspace/startlib/start.S ================================================ .section .text .global _start _start: .option push .option norelax la gp, __global_pointer$ .option pop call main # Exit system call after main li a0, 93 j make_syscall .type _start, function .size _start, .-_start ================================================ FILE: risc_v/userspace/startlib/syscall.S ================================================ .section .text .global make_syscall make_syscall: mv a7, a0 mv a0, a1 mv a1, a2 mv a2, a3 mv a3, a4 mv a4, a5 mv a5, a6 ecall ret .type make_syscall, function .size make_syscall, .-make_syscall ================================================ FILE: risc_v/userspace/startlib/syscall.h ================================================ #pragma once extern "C" { unsigned long make_syscall(unsigned long sysno, unsigned long a1=0, unsigned long a2=0, unsigned long a3=0, unsigned long a4=0, unsigned long a5=0, unsigned long a6=0); } #define syscall_exit() make_syscall(93) #define syscall_get_char() make_syscall(1) #define syscall_put_char(x) make_syscall(2, (unsigned long)x) #define syscall_yield() make_syscall(9) #define syscall_sleep(x) make_syscall(10, (unsigned long)x) #define syscall_get_fb(x) make_syscall(1000, (unsigned long)x) #define syscall_inv_rect(d, x, y, w, h) make_syscall(1001, (unsigned long) d, (unsigned long)x, (unsigned long)y, (unsigned long)w, (unsigned long)h) #define syscall_get_key(x, y) make_syscall(1002, (unsigned long)x, (unsigned long)y) #define syscall_get_abs(x, y) make_syscall(1004, (unsigned long)x, (unsigned long)y) #define syscall_get_time() make_syscall(1062) ================================================ FILE: risc_v/userspace/upload.sh ================================================ #!/bin/sh if [ $# -ne 1 ]; then echo "You provied $# parameters, need 1" exit 1 fi if [ ! -r $1 ]; then echo "Unknown file $1" exit 2 fi if [ $UID -ne 0 ]; then echo "You are not running as root, this might not work." fi losetup /dev/loop0 ../hdd.dsk mount /dev/loop0 /mnt cp $1 /mnt umount /dev/loop0 losetup -d /dev/loop0