Repository: mit-pdos/xv6-riscv Branch: riscv Commit: 5474d4bf72fd Files: 78 Total size: 259.4 KB Directory structure: gitextract_csx8z9op/ ├── .dir-locals.el ├── .editorconfig ├── .gdbinit.tmpl-riscv ├── .gitignore ├── LICENSE ├── Makefile ├── README ├── kernel/ │ ├── bio.c │ ├── buf.h │ ├── console.c │ ├── defs.h │ ├── elf.h │ ├── entry.S │ ├── exec.c │ ├── fcntl.h │ ├── file.c │ ├── file.h │ ├── fs.c │ ├── fs.h │ ├── kalloc.c │ ├── kernel.ld │ ├── kernelvec.S │ ├── log.c │ ├── main.c │ ├── memlayout.h │ ├── param.h │ ├── pipe.c │ ├── plic.c │ ├── printf.c │ ├── proc.c │ ├── proc.h │ ├── riscv.h │ ├── sleeplock.c │ ├── sleeplock.h │ ├── spinlock.c │ ├── spinlock.h │ ├── start.c │ ├── stat.h │ ├── string.c │ ├── swtch.S │ ├── syscall.c │ ├── syscall.h │ ├── sysfile.c │ ├── sysproc.c │ ├── trampoline.S │ ├── trap.c │ ├── types.h │ ├── uart.c │ ├── virtio.h │ ├── virtio_disk.c │ ├── vm.c │ └── vm.h ├── test-xv6.py └── user/ ├── cat.c ├── dorphan.c ├── echo.c ├── forktest.c ├── forphan.c ├── grep.c ├── grind.c ├── init.c ├── kill.c ├── ln.c ├── logstress.c ├── ls.c ├── mkdir.c ├── printf.c ├── rm.c ├── sh.c ├── stressfs.c ├── ulib.c ├── umalloc.c ├── user.h ├── user.ld ├── usertests.c ├── usys.pl ├── wc.c └── zombie.c ================================================ FILE CONTENTS ================================================ ================================================ FILE: .dir-locals.el ================================================ ((c-mode (indent-tabs-mode . nil) (c-file-style . "bsd") (c-basic-offset . 2))) ================================================ FILE: .editorconfig ================================================ ; https://editorconfig.org root = true [*] end_of_line = lf insert_final_newline = true indent_style = space indent_size = 4 [*.{c,h}] indent_size = 2 [*.S] indent_size = 8 [*.ld] indent_size = 2 [Makefile] indent_style = tab indent_size = 8 ================================================ FILE: .gdbinit.tmpl-riscv ================================================ set confirm off set architecture riscv:rv64 target remote 127.0.0.1:1234 symbol-file kernel/kernel set disassemble-next-line auto set riscv use-compressed-breakpoints yes ================================================ FILE: .gitignore ================================================ *~ _* *.o *.d *.asm *.sym *.img vectors.S bootblock entryother initcode initcode.out kernelmemfs mkfs kernel/kernel user/usys.S .gdbinit TAGS ================================================ FILE: LICENSE ================================================ The xv6 software is: Copyright (c) 2006-2024 Frans Kaashoek, Robert Morris, Russ Cox, Massachusetts Institute of Technology Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: Makefile ================================================ K=kernel U=user OBJS = \ $K/entry.o \ $K/start.o \ $K/console.o \ $K/printf.o \ $K/uart.o \ $K/kalloc.o \ $K/spinlock.o \ $K/string.o \ $K/main.o \ $K/vm.o \ $K/proc.o \ $K/swtch.o \ $K/trampoline.o \ $K/trap.o \ $K/syscall.o \ $K/sysproc.o \ $K/bio.o \ $K/fs.o \ $K/log.o \ $K/sleeplock.o \ $K/file.o \ $K/pipe.o \ $K/exec.o \ $K/sysfile.o \ $K/kernelvec.o \ $K/plic.o \ $K/virtio_disk.o # riscv64-unknown-elf- or riscv64-linux-gnu- # perhaps in /opt/riscv/bin #TOOLPREFIX = # Try to infer the correct TOOLPREFIX if not set ifndef TOOLPREFIX TOOLPREFIX := $(shell if riscv64-unknown-elf-objdump -i 2>&1 | grep 'elf64-big' >/dev/null 2>&1; \ then echo 'riscv64-unknown-elf-'; \ elif riscv64-elf-objdump -i 2>&1 | grep 'elf64-big' >/dev/null 2>&1; \ then echo 'riscv64-elf-'; \ elif riscv64-none-elf-objdump -i 2>&1 | grep 'elf64-big' >/dev/null 2>&1; \ then echo 'riscv64-none-elf-'; \ elif riscv64-linux-gnu-objdump -i 2>&1 | grep 'elf64-big' >/dev/null 2>&1; \ then echo 'riscv64-linux-gnu-'; \ elif riscv64-unknown-linux-gnu-objdump -i 2>&1 | grep 'elf64-big' >/dev/null 2>&1; \ then echo 'riscv64-unknown-linux-gnu-'; \ else echo "***" 1>&2; \ echo "*** Error: Couldn't find a riscv64 version of GCC/binutils." 1>&2; \ echo "*** To turn off this error, run 'gmake TOOLPREFIX= ...'." 1>&2; \ echo "***" 1>&2; exit 1; fi) endif QEMU = qemu-system-riscv64 MIN_QEMU_VERSION = 7.2 CC = $(TOOLPREFIX)gcc AS = $(TOOLPREFIX)gas LD = $(TOOLPREFIX)ld OBJCOPY = $(TOOLPREFIX)objcopy OBJDUMP = $(TOOLPREFIX)objdump CFLAGS = -Wall -Werror -Wno-unknown-attributes -O -fno-omit-frame-pointer -ggdb -gdwarf-2 CFLAGS += -march=rv64gc CFLAGS += -MD CFLAGS += -mcmodel=medany CFLAGS += -ffreestanding CFLAGS += -fno-common -nostdlib CFLAGS += -fno-builtin-strncpy -fno-builtin-strncmp -fno-builtin-strlen -fno-builtin-memset CFLAGS += -fno-builtin-memmove -fno-builtin-memcmp -fno-builtin-log -fno-builtin-bzero CFLAGS += -fno-builtin-strchr -fno-builtin-exit -fno-builtin-malloc -fno-builtin-putc CFLAGS += -fno-builtin-free CFLAGS += -fno-builtin-memcpy -Wno-main CFLAGS += -fno-builtin-printf -fno-builtin-fprintf -fno-builtin-vprintf CFLAGS += -I. CFLAGS += $(shell $(CC) -fno-stack-protector -E -x c /dev/null >/dev/null 2>&1 && echo -fno-stack-protector) # Disable PIE when possible (for Ubuntu 16.10 toolchain) ifneq ($(shell $(CC) -dumpspecs 2>/dev/null | grep -e '[^f]no-pie'),) CFLAGS += -fno-pie -no-pie endif ifneq ($(shell $(CC) -dumpspecs 2>/dev/null | grep -e '[^f]nopie'),) CFLAGS += -fno-pie -nopie endif LDFLAGS = -z max-page-size=4096 $K/kernel: $(OBJS) $K/kernel.ld $(LD) $(LDFLAGS) -T $K/kernel.ld -o $K/kernel $(OBJS) $(OBJDUMP) -S $K/kernel > $K/kernel.asm $(OBJDUMP) -t $K/kernel | sed '1,/SYMBOL TABLE/d; s/ .* / /; /^$$/d' > $K/kernel.sym $K/%.o: $K/%.S $(CC) -march=rv64gc -g -c -o $@ $< tags: $(OBJS) etags kernel/*.S kernel/*.c ULIB = $U/ulib.o $U/usys.o $U/printf.o $U/umalloc.o _%: %.o $(ULIB) $U/user.ld $(LD) $(LDFLAGS) -T $U/user.ld -o $@ $< $(ULIB) $(OBJDUMP) -S $@ > $*.asm $(OBJDUMP) -t $@ | sed '1,/SYMBOL TABLE/d; s/ .* / /; /^$$/d' > $*.sym $U/usys.S : $U/usys.pl perl $U/usys.pl > $U/usys.S $U/usys.o : $U/usys.S $(CC) $(CFLAGS) -c -o $U/usys.o $U/usys.S $U/_forktest: $U/forktest.o $(ULIB) # forktest has less library code linked in - needs to be small # in order to be able to max out the proc table. $(LD) $(LDFLAGS) -N -e main -Ttext 0 -o $U/_forktest $U/forktest.o $U/ulib.o $U/usys.o $(OBJDUMP) -S $U/_forktest > $U/forktest.asm mkfs/mkfs: mkfs/mkfs.c $K/fs.h $K/param.h gcc -Wno-unknown-attributes -I. -o mkfs/mkfs mkfs/mkfs.c # Prevent deletion of intermediate files, e.g. cat.o, after first build, so # that disk image changes after first build are persistent until clean. More # details: # http://www.gnu.org/software/make/manual/html_node/Chained-Rules.html .PRECIOUS: %.o UPROGS=\ $U/_cat\ $U/_echo\ $U/_forktest\ $U/_grep\ $U/_init\ $U/_kill\ $U/_ln\ $U/_ls\ $U/_mkdir\ $U/_rm\ $U/_sh\ $U/_stressfs\ $U/_usertests\ $U/_grind\ $U/_wc\ $U/_zombie\ $U/_logstress\ $U/_forphan\ $U/_dorphan\ fs.img: mkfs/mkfs README $(UPROGS) mkfs/mkfs fs.img README $(UPROGS) -include kernel/*.d user/*.d clean: rm -f *.tex *.dvi *.idx *.aux *.log *.ind *.ilg \ */*.o */*.d */*.asm */*.sym \ $K/kernel fs.img \ mkfs/mkfs .gdbinit \ $U/usys.S \ $(UPROGS) # try to generate a unique GDB port GDBPORT = $(shell expr `id -u` % 5000 + 25000) # QEMU's gdb stub command line changed in 0.11 QEMUGDB = $(shell if $(QEMU) -help | grep -q '^-gdb'; \ then echo "-gdb tcp::$(GDBPORT)"; \ else echo "-s -p $(GDBPORT)"; fi) ifndef CPUS CPUS := 3 endif QEMUOPTS = -machine virt -bios none -kernel $K/kernel -m 128M -smp $(CPUS) -nographic QEMUOPTS += -global virtio-mmio.force-legacy=false QEMUOPTS += -drive file=fs.img,if=none,format=raw,id=x0 QEMUOPTS += -device virtio-blk-device,drive=x0,bus=virtio-mmio-bus.0 qemu: check-qemu-version $K/kernel fs.img $(QEMU) $(QEMUOPTS) .gdbinit: .gdbinit.tmpl-riscv sed "s/:1234/:$(GDBPORT)/" < $^ > $@ qemu-gdb: $K/kernel .gdbinit fs.img @echo "*** Now run 'gdb' in another window." 1>&2 $(QEMU) $(QEMUOPTS) -S $(QEMUGDB) print-gdbport: @echo $(GDBPORT) QEMU_VERSION := $(shell $(QEMU) --version | head -n 1 | sed -E 's/^QEMU emulator version ([0-9]+\.[0-9]+)\..*/\1/') check-qemu-version: @if [ "$(shell echo "$(QEMU_VERSION) >= $(MIN_QEMU_VERSION)" | bc)" -eq 0 ]; then \ echo "ERROR: Need qemu version >= $(MIN_QEMU_VERSION)"; \ exit 1; \ fi ================================================ FILE: README ================================================ xv6 is a re-implementation of Dennis Ritchie's and Ken Thompson's Unix Version 6 (v6). xv6 loosely follows the structure and style of v6, but is implemented for a modern RISC-V multiprocessor using ANSI C. ACKNOWLEDGMENTS xv6 is inspired by John Lions's Commentary on UNIX 6th Edition (Peer to Peer Communications; ISBN: 1-57398-013-7; 1st edition (June 14, 2000)). See also https://pdos.csail.mit.edu/6.1810/, which provides pointers to on-line resources for v6. The following people have made contributions: Russ Cox (context switching, locking), Cliff Frey (MP), Xiao Yu (MP), Nickolai Zeldovich, and Austin Clements. We are also grateful for the bug reports and patches contributed by Abhinavpatel00, Takahiro Aoyagi, Marcelo Arroyo, Hirbod Behnam, Silas Boyd-Wickizer, Anton Burtsev, carlclone, Ian Chen, clivezeng, Dan Cross, Cody Cutler, Mike CAT, Tej Chajed, Asami Doi,Wenyang Duan, echtwerner, eyalz800, Nelson Elhage, Saar Ettinger, Alice Ferrazzi, Nathaniel Filardo, flespark, Peter Froehlich, Yakir Goaron, Shivam Handa, Matt Harvey, Bryan Henry, jaichenhengjie, Jim Huang, Matúš Jókay, John Jolly, Alexander Kapshuk, Anders Kaseorg, kehao95, Wolfgang Keller, Jungwoo Kim, Jonathan Kimmitt, Eddie Kohler, Vadim Kolontsov, Austin Liew, l0stman, Pavan Maddamsetti, Imbar Marinescu, Yandong Mao, Matan Shabtay, Hitoshi Mitake, Carmi Merimovich, mes900903, Mark Morrissey, mtasm, Joel Nider, Hayato Ohhashi, OptimisticSide, papparapa, phosphagos, Harry Porter, Greg Price, Zheng qhuo, Quancheng, RayAndrew, Jude Rich, segfault, Ayan Shafqat, Eldar Sehayek, Yongming Shen, Fumiya Shigemitsu, snoire, Taojie, Cam Tenny, tyfkda, Warren Toomey, Stephen Tu, Alissa Tung, Rafael Ubal, unicornx, Amane Uehara, Pablo Ventura, Luc Videau, Xi Wang, WaheedHafez, Keiichi Watanabe, Lucas Wolf, Nicolas Wolovick, wxdao, Grant Wu, x653, Andy Zhang, Jindong Zhang, Icenowy Zheng, ZhUyU1997, and Zou Chang Wei. ERROR REPORTS Please send errors and suggestions to Frans Kaashoek and Robert Morris (kaashoek,rtm@mit.edu). The main purpose of xv6 is as a teaching operating system for MIT's 6.1810, so we are more interested in simplifications and clarifications than new features. BUILDING AND RUNNING XV6 You will need a RISC-V "newlib" tool chain from https://github.com/riscv/riscv-gnu-toolchain, and qemu compiled for riscv64-softmmu. Once they are installed, and in your shell search path, you can run "make qemu". ================================================ FILE: kernel/bio.c ================================================ // Buffer cache. // // The buffer cache is a linked list of buf structures holding // cached copies of disk block contents. Caching disk blocks // in memory reduces the number of disk reads and also provides // a synchronization point for disk blocks used by multiple processes. // // Interface: // * To get a buffer for a particular disk block, call bread. // * After changing buffer data, call bwrite to write it to disk. // * When done with the buffer, call brelse. // * Do not use the buffer after calling brelse. // * Only one process at a time can use a buffer, // so do not keep them longer than necessary. #include "types.h" #include "param.h" #include "spinlock.h" #include "sleeplock.h" #include "riscv.h" #include "defs.h" #include "fs.h" #include "buf.h" struct { struct spinlock lock; struct buf buf[NBUF]; // Linked list of all buffers, through prev/next. // Sorted by how recently the buffer was used. // head.next is most recent, head.prev is least. struct buf head; } bcache; void binit(void) { struct buf *b; initlock(&bcache.lock, "bcache"); // Create linked list of buffers bcache.head.prev = &bcache.head; bcache.head.next = &bcache.head; for(b = bcache.buf; b < bcache.buf+NBUF; b++){ b->next = bcache.head.next; b->prev = &bcache.head; initsleeplock(&b->lock, "buffer"); bcache.head.next->prev = b; bcache.head.next = b; } } // Look through buffer cache for block on device dev. // If not found, allocate a buffer. // In either case, return locked buffer. static struct buf* bget(uint dev, uint blockno) { struct buf *b; acquire(&bcache.lock); // Is the block already cached? for(b = bcache.head.next; b != &bcache.head; b = b->next){ if(b->dev == dev && b->blockno == blockno){ b->refcnt++; release(&bcache.lock); acquiresleep(&b->lock); return b; } } // Not cached. // Recycle the least recently used (LRU) unused buffer. for(b = bcache.head.prev; b != &bcache.head; b = b->prev){ if(b->refcnt == 0) { b->dev = dev; b->blockno = blockno; b->valid = 0; b->refcnt = 1; release(&bcache.lock); acquiresleep(&b->lock); return b; } } panic("bget: no buffers"); } // Return a locked buf with the contents of the indicated block. struct buf* bread(uint dev, uint blockno) { struct buf *b; b = bget(dev, blockno); if(!b->valid) { virtio_disk_rw(b, 0); b->valid = 1; } return b; } // Write b's contents to disk. Must be locked. void bwrite(struct buf *b) { if(!holdingsleep(&b->lock)) panic("bwrite"); virtio_disk_rw(b, 1); } // Release a locked buffer. // Move to the head of the most-recently-used list. void brelse(struct buf *b) { if(!holdingsleep(&b->lock)) panic("brelse"); releasesleep(&b->lock); acquire(&bcache.lock); b->refcnt--; if (b->refcnt == 0) { // no one is waiting for it. b->next->prev = b->prev; b->prev->next = b->next; b->next = bcache.head.next; b->prev = &bcache.head; bcache.head.next->prev = b; bcache.head.next = b; } release(&bcache.lock); } void bpin(struct buf *b) { acquire(&bcache.lock); b->refcnt++; release(&bcache.lock); } void bunpin(struct buf *b) { acquire(&bcache.lock); b->refcnt--; release(&bcache.lock); } ================================================ FILE: kernel/buf.h ================================================ struct buf { int valid; // has data been read from disk? int disk; // does disk "own" buf? uint dev; uint blockno; struct sleeplock lock; uint refcnt; struct buf *prev; // LRU cache list struct buf *next; uchar data[BSIZE]; }; ================================================ FILE: kernel/console.c ================================================ // // Console input and output, to the uart. // Reads are line at a time. // Implements special input characters: // newline -- end of line // control-h -- backspace // control-u -- kill line // control-d -- end of file // control-p -- print process list // #include #include "types.h" #include "param.h" #include "spinlock.h" #include "sleeplock.h" #include "fs.h" #include "file.h" #include "memlayout.h" #include "riscv.h" #include "defs.h" #include "proc.h" #define BACKSPACE 0x100 // erase the last output character #define C(x) ((x)-'@') // Control-x // // send one character to the uart, but don't use // interrupts or sleep(). safe to be called from // interrupts, e.g. by printf and to echo input // characters. // void consputc(int c) { if(c == BACKSPACE){ // if the user typed backspace, overwrite with a space. uartputc_sync('\b'); uartputc_sync(' '); uartputc_sync('\b'); } else { uartputc_sync(c); } } struct { struct spinlock lock; // input circular buffer #define INPUT_BUF_SIZE 128 char buf[INPUT_BUF_SIZE]; uint r; // Read index uint w; // Write index uint e; // Edit index } cons; // // user write() system calls to the console go here. // uses sleep() and UART interrupts. // int consolewrite(int user_src, uint64 src, int n) { char buf[32]; // move batches from user space to uart. int i = 0; while(i < n){ int nn = sizeof(buf); if(nn > n - i) nn = n - i; if(either_copyin(buf, user_src, src+i, nn) == -1) break; uartwrite(buf, nn); i += nn; } return i; } // // user read()s from the console go here. // copy (up to) a whole input line to dst. // user_dst indicates whether dst is a user // or kernel address. // int consoleread(int user_dst, uint64 dst, int n) { uint target; int c; char cbuf; target = n; acquire(&cons.lock); while(n > 0){ // wait until interrupt handler has put some // input into cons.buffer. while(cons.r == cons.w){ if(killed(myproc())){ release(&cons.lock); return -1; } sleep(&cons.r, &cons.lock); } c = cons.buf[cons.r++ % INPUT_BUF_SIZE]; if(c == C('D')){ // end-of-file if(n < target){ // Save ^D for next time, to make sure // caller gets a 0-byte result. cons.r--; } break; } // copy the input byte to the user-space buffer. cbuf = c; if(either_copyout(user_dst, dst, &cbuf, 1) == -1) break; dst++; --n; if(c == '\n'){ // a whole line has arrived, return to // the user-level read(). break; } } release(&cons.lock); return target - n; } // // the console input interrupt handler. // uartintr() calls this for each input character. // do erase/kill processing, append to cons.buf, // wake up consoleread() if a whole line has arrived. // void consoleintr(int c) { acquire(&cons.lock); switch(c){ case C('P'): // Print process list. procdump(); break; case C('U'): // Kill line. while(cons.e != cons.w && cons.buf[(cons.e-1) % INPUT_BUF_SIZE] != '\n'){ cons.e--; consputc(BACKSPACE); } break; case C('H'): // Backspace case '\x7f': // Delete key if(cons.e != cons.w){ cons.e--; consputc(BACKSPACE); } break; default: if(c != 0 && cons.e-cons.r < INPUT_BUF_SIZE){ c = (c == '\r') ? '\n' : c; // echo back to the user. consputc(c); // store for consumption by consoleread(). cons.buf[cons.e++ % INPUT_BUF_SIZE] = c; if(c == '\n' || c == C('D') || cons.e-cons.r == INPUT_BUF_SIZE){ // wake up consoleread() if a whole line (or end-of-file) // has arrived. cons.w = cons.e; wakeup(&cons.r); } } break; } release(&cons.lock); } void consoleinit(void) { initlock(&cons.lock, "cons"); uartinit(); // connect read and write system calls // to consoleread and consolewrite. devsw[CONSOLE].read = consoleread; devsw[CONSOLE].write = consolewrite; } ================================================ FILE: kernel/defs.h ================================================ struct buf; struct context; struct file; struct inode; struct pipe; struct proc; struct spinlock; struct sleeplock; struct stat; struct superblock; // bio.c void binit(void); struct buf* bread(uint, uint); void brelse(struct buf*); void bwrite(struct buf*); void bpin(struct buf*); void bunpin(struct buf*); // console.c void consoleinit(void); void consoleintr(int); void consputc(int); // exec.c int kexec(char*, char**); // file.c struct file* filealloc(void); void fileclose(struct file*); struct file* filedup(struct file*); void fileinit(void); int fileread(struct file*, uint64, int n); int filestat(struct file*, uint64 addr); int filewrite(struct file*, uint64, int n); // fs.c void fsinit(int); int dirlink(struct inode*, char*, uint); struct inode* dirlookup(struct inode*, char*, uint*); struct inode* ialloc(uint, short); struct inode* idup(struct inode*); void iinit(); void ilock(struct inode*); void iput(struct inode*); void iunlock(struct inode*); void iunlockput(struct inode*); void iupdate(struct inode*); int namecmp(const char*, const char*); struct inode* namei(char*); struct inode* nameiparent(char*, char*); int readi(struct inode*, int, uint64, uint, uint); void stati(struct inode*, struct stat*); int writei(struct inode*, int, uint64, uint, uint); void itrunc(struct inode*); void ireclaim(int); // kalloc.c void* kalloc(void); void kfree(void *); void kinit(void); // log.c void initlog(int, struct superblock*); void log_write(struct buf*); void begin_op(void); void end_op(void); // pipe.c int pipealloc(struct file**, struct file**); void pipeclose(struct pipe*, int); int piperead(struct pipe*, uint64, int); int pipewrite(struct pipe*, uint64, int); // printf.c int printf(char*, ...) __attribute__ ((format (printf, 1, 2))); void panic(char*) __attribute__((noreturn)); void printfinit(void); // proc.c int cpuid(void); void kexit(int); int kfork(void); int growproc(int); void proc_mapstacks(pagetable_t); pagetable_t proc_pagetable(struct proc *); void proc_freepagetable(pagetable_t, uint64); int kkill(int); int killed(struct proc*); void setkilled(struct proc*); struct cpu* mycpu(void); struct proc* myproc(); void procinit(void); void scheduler(void) __attribute__((noreturn)); void sched(void); void sleep(void*, struct spinlock*); void userinit(void); int kwait(uint64); void wakeup(void*); void yield(void); int either_copyout(int user_dst, uint64 dst, void *src, uint64 len); int either_copyin(void *dst, int user_src, uint64 src, uint64 len); void procdump(void); // swtch.S void swtch(struct context*, struct context*); // spinlock.c void acquire(struct spinlock*); int holding(struct spinlock*); void initlock(struct spinlock*, char*); void release(struct spinlock*); void push_off(void); void pop_off(void); // sleeplock.c void acquiresleep(struct sleeplock*); void releasesleep(struct sleeplock*); int holdingsleep(struct sleeplock*); void initsleeplock(struct sleeplock*, char*); // string.c int memcmp(const void*, const void*, uint); void* memmove(void*, const void*, uint); void* memset(void*, int, uint); char* safestrcpy(char*, const char*, int); int strlen(const char*); int strncmp(const char*, const char*, uint); char* strncpy(char*, const char*, int); // syscall.c void argint(int, int*); int argstr(int, char*, int); void argaddr(int, uint64 *); int fetchstr(uint64, char*, int); int fetchaddr(uint64, uint64*); void syscall(); // trap.c extern uint ticks; void trapinit(void); void trapinithart(void); extern struct spinlock tickslock; void prepare_return(void); // uart.c void uartinit(void); void uartintr(void); void uartwrite(char [], int); void uartputc_sync(int); int uartgetc(void); // vm.c void kvminit(void); void kvminithart(void); void kvmmap(pagetable_t, uint64, uint64, uint64, int); int mappages(pagetable_t, uint64, uint64, uint64, int); pagetable_t uvmcreate(void); uint64 uvmalloc(pagetable_t, uint64, uint64, int); uint64 uvmdealloc(pagetable_t, uint64, uint64); int uvmcopy(pagetable_t, pagetable_t, uint64); void uvmfree(pagetable_t, uint64); void uvmunmap(pagetable_t, uint64, uint64, int); void uvmclear(pagetable_t, uint64); pte_t * walk(pagetable_t, uint64, int); uint64 walkaddr(pagetable_t, uint64); int copyout(pagetable_t, uint64, char *, uint64); int copyin(pagetable_t, char *, uint64, uint64); int copyinstr(pagetable_t, char *, uint64, uint64); int ismapped(pagetable_t, uint64); uint64 vmfault(pagetable_t, uint64, int); // plic.c void plicinit(void); void plicinithart(void); int plic_claim(void); void plic_complete(int); // virtio_disk.c void virtio_disk_init(void); void virtio_disk_rw(struct buf *, int); void virtio_disk_intr(void); // number of elements in fixed-size array #define NELEM(x) (sizeof(x)/sizeof((x)[0])) ================================================ FILE: kernel/elf.h ================================================ // Format of an ELF executable file #define ELF_MAGIC 0x464C457FU // "\x7FELF" in little endian // File header struct elfhdr { uint magic; // must equal ELF_MAGIC uchar elf[12]; ushort type; ushort machine; uint version; uint64 entry; uint64 phoff; uint64 shoff; uint flags; ushort ehsize; ushort phentsize; ushort phnum; ushort shentsize; ushort shnum; ushort shstrndx; }; // Program section header struct proghdr { uint32 type; uint32 flags; uint64 off; uint64 vaddr; uint64 paddr; uint64 filesz; uint64 memsz; uint64 align; }; // Values for Proghdr type #define ELF_PROG_LOAD 1 // Flag bits for Proghdr flags #define ELF_PROG_FLAG_EXEC 1 #define ELF_PROG_FLAG_WRITE 2 #define ELF_PROG_FLAG_READ 4 ================================================ FILE: kernel/entry.S ================================================ # qemu -kernel loads the kernel at 0x80000000 # and causes each hart (i.e. CPU) to jump there. # kernel.ld causes the following code to # be placed at 0x80000000. .section .text .global _entry _entry: # set up a stack for C. # stack0 is declared in start.c, # with a 4096-byte stack per CPU. # sp = stack0 + ((hartid + 1) * 4096) la sp, stack0 li a0, 1024*4 csrr a1, mhartid addi a1, a1, 1 mul a0, a0, a1 add sp, sp, a0 # jump to start() in start.c call start spin: j spin ================================================ FILE: kernel/exec.c ================================================ #include "types.h" #include "param.h" #include "memlayout.h" #include "riscv.h" #include "spinlock.h" #include "proc.h" #include "defs.h" #include "elf.h" static int loadseg(pde_t *, uint64, struct inode *, uint, uint); // map ELF permissions to PTE permission bits. int flags2perm(int flags) { int perm = 0; if(flags & 0x1) perm = PTE_X; if(flags & 0x2) perm |= PTE_W; return perm; } // // the implementation of the exec() system call // int kexec(char *path, char **argv) { char *s, *last; int i, off; uint64 argc, sz = 0, sp, ustack[MAXARG], stackbase; struct elfhdr elf; struct inode *ip; struct proghdr ph; pagetable_t pagetable = 0, oldpagetable; struct proc *p = myproc(); begin_op(); // Open the executable file. if((ip = namei(path)) == 0){ end_op(); return -1; } ilock(ip); // Read the ELF header. if(readi(ip, 0, (uint64)&elf, 0, sizeof(elf)) != sizeof(elf)) goto bad; // Is this really an ELF file? if(elf.magic != ELF_MAGIC) goto bad; if((pagetable = proc_pagetable(p)) == 0) goto bad; // Load program into memory. for(i=0, off=elf.phoff; isz; // Allocate some pages at the next page boundary. // Make the first inaccessible as a stack guard. // Use the rest as the user stack. sz = PGROUNDUP(sz); uint64 sz1; if((sz1 = uvmalloc(pagetable, sz, sz + (USERSTACK+1)*PGSIZE, PTE_W)) == 0) goto bad; sz = sz1; uvmclear(pagetable, sz-(USERSTACK+1)*PGSIZE); sp = sz; stackbase = sp - USERSTACK*PGSIZE; // Copy argument strings into new stack, remember their // addresses in ustack[]. for(argc = 0; argv[argc]; argc++) { if(argc >= MAXARG) goto bad; sp -= strlen(argv[argc]) + 1; sp -= sp % 16; // riscv sp must be 16-byte aligned if(sp < stackbase) goto bad; if(copyout(pagetable, sp, argv[argc], strlen(argv[argc]) + 1) < 0) goto bad; ustack[argc] = sp; } ustack[argc] = 0; // push a copy of ustack[], the array of argv[] pointers. sp -= (argc+1) * sizeof(uint64); sp -= sp % 16; if(sp < stackbase) goto bad; if(copyout(pagetable, sp, (char *)ustack, (argc+1)*sizeof(uint64)) < 0) goto bad; // a0 and a1 contain arguments to user main(argc, argv) // argc is returned via the system call return // value, which goes in a0. p->trapframe->a1 = sp; // Save program name for debugging. for(last=s=path; *s; s++) if(*s == '/') last = s+1; safestrcpy(p->name, last, sizeof(p->name)); // Commit to the user image. oldpagetable = p->pagetable; p->pagetable = pagetable; p->sz = sz; p->trapframe->epc = elf.entry; // initial program counter = ulib.c:start() p->trapframe->sp = sp; // initial stack pointer proc_freepagetable(oldpagetable, oldsz); return argc; // this ends up in a0, the first argument to main(argc, argv) bad: if(pagetable) proc_freepagetable(pagetable, sz); if(ip){ iunlockput(ip); end_op(); } return -1; } // Load an ELF program segment into pagetable at virtual address va. // va must be page-aligned // and the pages from va to va+sz must already be mapped. // Returns 0 on success, -1 on failure. static int loadseg(pagetable_t pagetable, uint64 va, struct inode *ip, uint offset, uint sz) { uint i, n; uint64 pa; for(i = 0; i < sz; i += PGSIZE){ pa = walkaddr(pagetable, va + i); if(pa == 0) panic("loadseg: address should exist"); if(sz - i < PGSIZE) n = sz - i; else n = PGSIZE; if(readi(ip, 0, (uint64)pa, offset+i, n) != n) return -1; } return 0; } ================================================ FILE: kernel/fcntl.h ================================================ #define O_RDONLY 0x000 #define O_WRONLY 0x001 #define O_RDWR 0x002 #define O_CREATE 0x200 #define O_TRUNC 0x400 ================================================ FILE: kernel/file.c ================================================ // // Support functions for system calls that involve file descriptors. // #include "types.h" #include "riscv.h" #include "defs.h" #include "param.h" #include "fs.h" #include "spinlock.h" #include "sleeplock.h" #include "file.h" #include "stat.h" #include "proc.h" struct devsw devsw[NDEV]; struct { struct spinlock lock; struct file file[NFILE]; } ftable; void fileinit(void) { initlock(&ftable.lock, "ftable"); } // Allocate a file structure. struct file* filealloc(void) { struct file *f; acquire(&ftable.lock); for(f = ftable.file; f < ftable.file + NFILE; f++){ if(f->ref == 0){ f->ref = 1; release(&ftable.lock); return f; } } release(&ftable.lock); return 0; } // Increment ref count for file f. struct file* filedup(struct file *f) { acquire(&ftable.lock); if(f->ref < 1) panic("filedup"); f->ref++; release(&ftable.lock); return f; } // Close file f. (Decrement ref count, close when reaches 0.) void fileclose(struct file *f) { struct file ff; acquire(&ftable.lock); if(f->ref < 1) panic("fileclose"); if(--f->ref > 0){ release(&ftable.lock); return; } ff = *f; f->ref = 0; f->type = FD_NONE; release(&ftable.lock); if(ff.type == FD_PIPE){ pipeclose(ff.pipe, ff.writable); } else if(ff.type == FD_INODE || ff.type == FD_DEVICE){ begin_op(); iput(ff.ip); end_op(); } } // Get metadata about file f. // addr is a user virtual address, pointing to a struct stat. int filestat(struct file *f, uint64 addr) { struct proc *p = myproc(); struct stat st; if(f->type == FD_INODE || f->type == FD_DEVICE){ ilock(f->ip); stati(f->ip, &st); iunlock(f->ip); if(copyout(p->pagetable, addr, (char *)&st, sizeof(st)) < 0) return -1; return 0; } return -1; } // Read from file f. // addr is a user virtual address. int fileread(struct file *f, uint64 addr, int n) { int r = 0; if(f->readable == 0) return -1; if(f->type == FD_PIPE){ r = piperead(f->pipe, addr, n); } else if(f->type == FD_DEVICE){ if(f->major < 0 || f->major >= NDEV || !devsw[f->major].read) return -1; r = devsw[f->major].read(1, addr, n); } else if(f->type == FD_INODE){ ilock(f->ip); if((r = readi(f->ip, 1, addr, f->off, n)) > 0) f->off += r; iunlock(f->ip); } else { panic("fileread"); } return r; } // Write to file f. // addr is a user virtual address. int filewrite(struct file *f, uint64 addr, int n) { int r, ret = 0; if(f->writable == 0) return -1; if(f->type == FD_PIPE){ ret = pipewrite(f->pipe, addr, n); } else if(f->type == FD_DEVICE){ if(f->major < 0 || f->major >= NDEV || !devsw[f->major].write) return -1; ret = devsw[f->major].write(1, addr, n); } else if(f->type == FD_INODE){ // write a few blocks at a time to avoid exceeding // the maximum log transaction size, including // i-node, indirect block, allocation blocks, // and 2 blocks of slop for non-aligned writes. int max = ((MAXOPBLOCKS-1-1-2) / 2) * BSIZE; int i = 0; while(i < n){ int n1 = n - i; if(n1 > max) n1 = max; begin_op(); ilock(f->ip); if ((r = writei(f->ip, 1, addr + i, f->off, n1)) > 0) f->off += r; iunlock(f->ip); end_op(); if(r != n1){ // error from writei break; } i += r; } ret = (i == n ? n : -1); } else { panic("filewrite"); } return ret; } ================================================ FILE: kernel/file.h ================================================ struct file { enum { FD_NONE, FD_PIPE, FD_INODE, FD_DEVICE } type; int ref; // reference count char readable; char writable; struct pipe *pipe; // FD_PIPE struct inode *ip; // FD_INODE and FD_DEVICE uint off; // FD_INODE short major; // FD_DEVICE }; #define major(dev) ((dev) >> 16 & 0xFFFF) #define minor(dev) ((dev) & 0xFFFF) #define mkdev(m,n) ((uint)((m)<<16| (n))) // in-memory copy of an inode struct inode { uint dev; // Device number uint inum; // Inode number int ref; // Reference count struct sleeplock lock; // protects everything below here int valid; // inode has been read from disk? short type; // copy of disk inode short major; short minor; short nlink; uint size; uint addrs[NDIRECT+1]; }; // map major device number to device functions. struct devsw { int (*read)(int, uint64, int); int (*write)(int, uint64, int); }; extern struct devsw devsw[]; #define CONSOLE 1 ================================================ FILE: kernel/fs.c ================================================ // File system implementation. Five layers: // + Blocks: allocator for raw disk blocks. // + Log: crash recovery for multi-step updates. // + Files: inode allocator, reading, writing, metadata. // + Directories: inode with special contents (list of other inodes!) // + Names: paths like /usr/rtm/xv6/fs.c for convenient naming. // // This file contains the low-level file system manipulation // routines. The (higher-level) system call implementations // are in sysfile.c. #include "types.h" #include "riscv.h" #include "defs.h" #include "param.h" #include "stat.h" #include "spinlock.h" #include "proc.h" #include "sleeplock.h" #include "fs.h" #include "buf.h" #include "file.h" #define min(a, b) ((a) < (b) ? (a) : (b)) // there should be one superblock per disk device, but we run with // only one device struct superblock sb; // Read the super block. static void readsb(int dev, struct superblock *sb) { struct buf *bp; bp = bread(dev, 1); memmove(sb, bp->data, sizeof(*sb)); brelse(bp); } // Init fs void fsinit(int dev) { readsb(dev, &sb); if(sb.magic != FSMAGIC) panic("invalid file system"); initlog(dev, &sb); ireclaim(dev); } // Zero a block. static void bzero(int dev, int bno) { struct buf *bp; bp = bread(dev, bno); memset(bp->data, 0, BSIZE); log_write(bp); brelse(bp); } // Blocks. // Allocate a zeroed disk block. // returns 0 if out of disk space. static uint balloc(uint dev) { int b, bi, m; struct buf *bp; bp = 0; for(b = 0; b < sb.size; b += BPB){ bp = bread(dev, BBLOCK(b, sb)); for(bi = 0; bi < BPB && b + bi < sb.size; bi++){ m = 1 << (bi % 8); if((bp->data[bi/8] & m) == 0){ // Is block free? bp->data[bi/8] |= m; // Mark block in use. log_write(bp); brelse(bp); bzero(dev, b + bi); return b + bi; } } brelse(bp); } printf("balloc: out of blocks\n"); return 0; } // Free a disk block. static void bfree(int dev, uint b) { struct buf *bp; int bi, m; bp = bread(dev, BBLOCK(b, sb)); bi = b % BPB; m = 1 << (bi % 8); if((bp->data[bi/8] & m) == 0) panic("freeing free block"); bp->data[bi/8] &= ~m; log_write(bp); brelse(bp); } // Inodes. // // An inode describes a single unnamed file. // The inode disk structure holds metadata: the file's type, // its size, the number of links referring to it, and the // list of blocks holding the file's content. // // The inodes are laid out sequentially on disk at block // sb.inodestart. Each inode has a number, indicating its // position on the disk. // // The kernel keeps a table of in-use inodes in memory // to provide a place for synchronizing access // to inodes used by multiple processes. The in-memory // inodes include book-keeping information that is // not stored on disk: ip->ref and ip->valid. // // An inode and its in-memory representation go through a // sequence of states before they can be used by the // rest of the file system code. // // * Allocation: an inode is allocated if its type (on disk) // is non-zero. ialloc() allocates, and iput() frees if // the reference and link counts have fallen to zero. // // * Referencing in table: an entry in the inode table // is free if ip->ref is zero. Otherwise ip->ref tracks // the number of in-memory pointers to the entry (open // files and current directories). iget() finds or // creates a table entry and increments its ref; iput() // decrements ref. // // * Valid: the information (type, size, &c) in an inode // table entry is only correct when ip->valid is 1. // ilock() reads the inode from // the disk and sets ip->valid, while iput() clears // ip->valid if ip->ref has fallen to zero. // // * Locked: file system code may only examine and modify // the information in an inode and its content if it // has first locked the inode. // // Thus a typical sequence is: // ip = iget(dev, inum) // ilock(ip) // ... examine and modify ip->xxx ... // iunlock(ip) // iput(ip) // // ilock() is separate from iget() so that system calls can // get a long-term reference to an inode (as for an open file) // and only lock it for short periods (e.g., in read()). // The separation also helps avoid deadlock and races during // pathname lookup. iget() increments ip->ref so that the inode // stays in the table and pointers to it remain valid. // // Many internal file system functions expect the caller to // have locked the inodes involved; this lets callers create // multi-step atomic operations. // // The itable.lock spin-lock protects the allocation of itable // entries. Since ip->ref indicates whether an entry is free, // and ip->dev and ip->inum indicate which i-node an entry // holds, one must hold itable.lock while using any of those fields. // // An ip->lock sleep-lock protects all ip-> fields other than ref, // dev, and inum. One must hold ip->lock in order to // read or write that inode's ip->valid, ip->size, ip->type, &c. struct { struct spinlock lock; struct inode inode[NINODE]; } itable; void iinit() { int i = 0; initlock(&itable.lock, "itable"); for(i = 0; i < NINODE; i++) { initsleeplock(&itable.inode[i].lock, "inode"); } } static struct inode* iget(uint dev, uint inum); // Allocate an inode on device dev. // Mark it as allocated by giving it type type. // Returns an unlocked but allocated and referenced inode, // or NULL if there is no free inode. struct inode* ialloc(uint dev, short type) { int inum; struct buf *bp; struct dinode *dip; for(inum = 1; inum < sb.ninodes; inum++){ bp = bread(dev, IBLOCK(inum, sb)); dip = (struct dinode*)bp->data + inum%IPB; if(dip->type == 0){ // a free inode memset(dip, 0, sizeof(*dip)); dip->type = type; log_write(bp); // mark it allocated on the disk brelse(bp); return iget(dev, inum); } brelse(bp); } printf("ialloc: no inodes\n"); return 0; } // Copy a modified in-memory inode to disk. // Must be called after every change to an ip->xxx field // that lives on disk. // Caller must hold ip->lock. void iupdate(struct inode *ip) { struct buf *bp; struct dinode *dip; bp = bread(ip->dev, IBLOCK(ip->inum, sb)); dip = (struct dinode*)bp->data + ip->inum%IPB; dip->type = ip->type; dip->major = ip->major; dip->minor = ip->minor; dip->nlink = ip->nlink; dip->size = ip->size; memmove(dip->addrs, ip->addrs, sizeof(ip->addrs)); log_write(bp); brelse(bp); } // Find the inode with number inum on device dev // and return the in-memory copy. Does not lock // the inode and does not read it from disk. static struct inode* iget(uint dev, uint inum) { struct inode *ip, *empty; acquire(&itable.lock); // Is the inode already in the table? empty = 0; for(ip = &itable.inode[0]; ip < &itable.inode[NINODE]; ip++){ if(ip->ref > 0 && ip->dev == dev && ip->inum == inum){ ip->ref++; release(&itable.lock); return ip; } if(empty == 0 && ip->ref == 0) // Remember empty slot. empty = ip; } // Recycle an inode entry. if(empty == 0) panic("iget: no inodes"); ip = empty; ip->dev = dev; ip->inum = inum; ip->ref = 1; ip->valid = 0; release(&itable.lock); return ip; } // Increment reference count for ip. // Returns ip to enable ip = idup(ip1) idiom. struct inode* idup(struct inode *ip) { acquire(&itable.lock); ip->ref++; release(&itable.lock); return ip; } // Lock the given inode. // Reads the inode from disk if necessary. void ilock(struct inode *ip) { struct buf *bp; struct dinode *dip; if(ip == 0 || ip->ref < 1) panic("ilock"); acquiresleep(&ip->lock); if(ip->valid == 0){ bp = bread(ip->dev, IBLOCK(ip->inum, sb)); dip = (struct dinode*)bp->data + ip->inum%IPB; ip->type = dip->type; ip->major = dip->major; ip->minor = dip->minor; ip->nlink = dip->nlink; ip->size = dip->size; memmove(ip->addrs, dip->addrs, sizeof(ip->addrs)); brelse(bp); ip->valid = 1; if(ip->type == 0) panic("ilock: no type"); } } // Unlock the given inode. void iunlock(struct inode *ip) { if(ip == 0 || !holdingsleep(&ip->lock) || ip->ref < 1) panic("iunlock"); releasesleep(&ip->lock); } // Drop a reference to an in-memory inode. // If that was the last reference, the inode table entry can // be recycled. // If that was the last reference and the inode has no links // to it, free the inode (and its content) on disk. // All calls to iput() must be inside a transaction in // case it has to free the inode. void iput(struct inode *ip) { acquire(&itable.lock); if(ip->ref == 1 && ip->valid && ip->nlink == 0){ // inode has no links and no other references: truncate and free. // ip->ref == 1 means no other process can have ip locked, // so this acquiresleep() won't block (or deadlock). acquiresleep(&ip->lock); release(&itable.lock); itrunc(ip); ip->type = 0; iupdate(ip); ip->valid = 0; releasesleep(&ip->lock); acquire(&itable.lock); } ip->ref--; release(&itable.lock); } // Common idiom: unlock, then put. void iunlockput(struct inode *ip) { iunlock(ip); iput(ip); } void ireclaim(int dev) { for (int inum = 1; inum < sb.ninodes; inum++) { struct inode *ip = 0; struct buf *bp = bread(dev, IBLOCK(inum, sb)); struct dinode *dip = (struct dinode *)bp->data + inum % IPB; if (dip->type != 0 && dip->nlink == 0) { // is an orphaned inode printf("ireclaim: orphaned inode %d\n", inum); ip = iget(dev, inum); } brelse(bp); if (ip) { begin_op(); ilock(ip); iunlock(ip); iput(ip); end_op(); } } } // Inode content // // The content (data) associated with each inode is stored // in blocks on the disk. The first NDIRECT block numbers // are listed in ip->addrs[]. The next NINDIRECT blocks are // listed in block ip->addrs[NDIRECT]. // Return the disk block address of the nth block in inode ip. // If there is no such block, bmap allocates one. // returns 0 if out of disk space. static uint bmap(struct inode *ip, uint bn) { uint addr, *a; struct buf *bp; if(bn < NDIRECT){ if((addr = ip->addrs[bn]) == 0){ addr = balloc(ip->dev); if(addr == 0) return 0; ip->addrs[bn] = addr; } return addr; } bn -= NDIRECT; if(bn < NINDIRECT){ // Load indirect block, allocating if necessary. if((addr = ip->addrs[NDIRECT]) == 0){ addr = balloc(ip->dev); if(addr == 0) return 0; ip->addrs[NDIRECT] = addr; } bp = bread(ip->dev, addr); a = (uint*)bp->data; if((addr = a[bn]) == 0){ addr = balloc(ip->dev); if(addr){ a[bn] = addr; log_write(bp); } } brelse(bp); return addr; } panic("bmap: out of range"); } // Truncate inode (discard contents). // Caller must hold ip->lock. void itrunc(struct inode *ip) { int i, j; struct buf *bp; uint *a; for(i = 0; i < NDIRECT; i++){ if(ip->addrs[i]){ bfree(ip->dev, ip->addrs[i]); ip->addrs[i] = 0; } } if(ip->addrs[NDIRECT]){ bp = bread(ip->dev, ip->addrs[NDIRECT]); a = (uint*)bp->data; for(j = 0; j < NINDIRECT; j++){ if(a[j]) bfree(ip->dev, a[j]); } brelse(bp); bfree(ip->dev, ip->addrs[NDIRECT]); ip->addrs[NDIRECT] = 0; } ip->size = 0; iupdate(ip); } // Copy stat information from inode. // Caller must hold ip->lock. void stati(struct inode *ip, struct stat *st) { st->dev = ip->dev; st->ino = ip->inum; st->type = ip->type; st->nlink = ip->nlink; st->size = ip->size; } // Read data from inode. // Caller must hold ip->lock. // If user_dst==1, then dst is a user virtual address; // otherwise, dst is a kernel address. int readi(struct inode *ip, int user_dst, uint64 dst, uint off, uint n) { uint tot, m; struct buf *bp; if(off > ip->size || off + n < off) return 0; if(off + n > ip->size) n = ip->size - off; for(tot=0; totdev, addr); m = min(n - tot, BSIZE - off%BSIZE); if(either_copyout(user_dst, dst, bp->data + (off % BSIZE), m) == -1) { brelse(bp); tot = -1; break; } brelse(bp); } return tot; } // Write data to inode. // Caller must hold ip->lock. // If user_src==1, then src is a user virtual address; // otherwise, src is a kernel address. // Returns the number of bytes successfully written. // If the return value is less than the requested n, // there was an error of some kind. int writei(struct inode *ip, int user_src, uint64 src, uint off, uint n) { uint tot, m; struct buf *bp; if(off > ip->size || off + n < off) return -1; if(off + n > MAXFILE*BSIZE) return -1; for(tot=0; totdev, addr); m = min(n - tot, BSIZE - off%BSIZE); if(either_copyin(bp->data + (off % BSIZE), user_src, src, m) == -1) { brelse(bp); break; } log_write(bp); brelse(bp); } if(off > ip->size) ip->size = off; // write the i-node back to disk even if the size didn't change // because the loop above might have called bmap() and added a new // block to ip->addrs[]. iupdate(ip); return tot; } // Directories int namecmp(const char *s, const char *t) { return strncmp(s, t, DIRSIZ); } // Look for a directory entry in a directory. // If found, set *poff to byte offset of entry. struct inode* dirlookup(struct inode *dp, char *name, uint *poff) { uint off, inum; struct dirent de; if(dp->type != T_DIR) panic("dirlookup not DIR"); for(off = 0; off < dp->size; off += sizeof(de)){ if(readi(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de)) panic("dirlookup read"); if(de.inum == 0) continue; if(namecmp(name, de.name) == 0){ // entry matches path element if(poff) *poff = off; inum = de.inum; return iget(dp->dev, inum); } } return 0; } // Write a new directory entry (name, inum) into the directory dp. // Returns 0 on success, -1 on failure (e.g. out of disk blocks). int dirlink(struct inode *dp, char *name, uint inum) { int off; struct dirent de; struct inode *ip; // Check that name is not present. if((ip = dirlookup(dp, name, 0)) != 0){ iput(ip); return -1; } // Look for an empty dirent. for(off = 0; off < dp->size; off += sizeof(de)){ if(readi(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de)) panic("dirlink read"); if(de.inum == 0) break; } strncpy(de.name, name, DIRSIZ); de.inum = inum; if(writei(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de)) return -1; return 0; } // Paths // Copy the next path element from path into name. // Return a pointer to the element following the copied one. // The returned path has no leading slashes, // so the caller can check *path=='\0' to see if the name is the last one. // If no name to remove, return 0. // // Examples: // skipelem("a/bb/c", name) = "bb/c", setting name = "a" // skipelem("///a//bb", name) = "bb", setting name = "a" // skipelem("a", name) = "", setting name = "a" // skipelem("", name) = skipelem("////", name) = 0 // static char* skipelem(char *path, char *name) { char *s; int len; while(*path == '/') path++; if(*path == 0) return 0; s = path; while(*path != '/' && *path != 0) path++; len = path - s; if(len >= DIRSIZ) memmove(name, s, DIRSIZ); else { memmove(name, s, len); name[len] = 0; } while(*path == '/') path++; return path; } // Look up and return the inode for a path name. // If parent != 0, return the inode for the parent and copy the final // path element into name, which must have room for DIRSIZ bytes. // Must be called inside a transaction since it calls iput(). static struct inode* namex(char *path, int nameiparent, char *name) { struct inode *ip, *next; if(*path == '/') ip = iget(ROOTDEV, ROOTINO); else ip = idup(myproc()->cwd); while((path = skipelem(path, name)) != 0){ ilock(ip); if(ip->type != T_DIR){ iunlockput(ip); return 0; } if(nameiparent && *path == '\0'){ // Stop one level early. iunlock(ip); return ip; } if((next = dirlookup(ip, name, 0)) == 0){ iunlockput(ip); return 0; } iunlockput(ip); ip = next; } if(nameiparent){ iput(ip); return 0; } return ip; } struct inode* namei(char *path) { char name[DIRSIZ]; return namex(path, 0, name); } struct inode* nameiparent(char *path, char *name) { return namex(path, 1, name); } ================================================ FILE: kernel/fs.h ================================================ // On-disk file system format. // Both the kernel and user programs use this header file. #define ROOTINO 1 // root i-number #define BSIZE 1024 // block size // Disk layout: // [ boot block | super block | log | inode blocks | // free bit map | data blocks] // // mkfs computes the super block and builds an initial file system. The // super block describes the disk layout: struct superblock { uint magic; // Must be FSMAGIC uint size; // Size of file system image (blocks) uint nblocks; // Number of data blocks uint ninodes; // Number of inodes. uint nlog; // Number of log blocks uint logstart; // Block number of first log block uint inodestart; // Block number of first inode block uint bmapstart; // Block number of first free map block }; #define FSMAGIC 0x10203040 #define NDIRECT 12 #define NINDIRECT (BSIZE / sizeof(uint)) #define MAXFILE (NDIRECT + NINDIRECT) // On-disk inode structure struct dinode { short type; // File type short major; // Major device number (T_DEVICE only) short minor; // Minor device number (T_DEVICE only) short nlink; // Number of links to inode in file system uint size; // Size of file (bytes) uint addrs[NDIRECT+1]; // Data block addresses }; // Inodes per block. #define IPB (BSIZE / sizeof(struct dinode)) // Block containing inode i #define IBLOCK(i, sb) ((i) / IPB + sb.inodestart) // Bitmap bits per block #define BPB (BSIZE*8) // Block of free map containing bit for block b #define BBLOCK(b, sb) ((b)/BPB + sb.bmapstart) // Directory is a file containing a sequence of dirent structures. #define DIRSIZ 14 // The name field may have DIRSIZ characters and not end in a NUL // character. struct dirent { ushort inum; char name[DIRSIZ] __attribute__((nonstring)); }; ================================================ FILE: kernel/kalloc.c ================================================ // Physical memory allocator, for user processes, // kernel stacks, page-table pages, // and pipe buffers. Allocates whole 4096-byte pages. #include "types.h" #include "param.h" #include "memlayout.h" #include "spinlock.h" #include "riscv.h" #include "defs.h" void freerange(void *pa_start, void *pa_end); extern char end[]; // first address after kernel. // defined by kernel.ld. struct run { struct run *next; }; struct { struct spinlock lock; struct run *freelist; } kmem; void kinit() { initlock(&kmem.lock, "kmem"); freerange(end, (void*)PHYSTOP); } void freerange(void *pa_start, void *pa_end) { char *p; p = (char*)PGROUNDUP((uint64)pa_start); for(; p + PGSIZE <= (char*)pa_end; p += PGSIZE) kfree(p); } // Free the page of physical memory pointed at by pa, // which normally should have been returned by a // call to kalloc(). (The exception is when // initializing the allocator; see kinit above.) void kfree(void *pa) { struct run *r; if(((uint64)pa % PGSIZE) != 0 || (char*)pa < end || (uint64)pa >= PHYSTOP) panic("kfree"); // Fill with junk to catch dangling refs. memset(pa, 1, PGSIZE); r = (struct run*)pa; acquire(&kmem.lock); r->next = kmem.freelist; kmem.freelist = r; release(&kmem.lock); } // Allocate one 4096-byte page of physical memory. // Returns a pointer that the kernel can use. // Returns 0 if the memory cannot be allocated. void * kalloc(void) { struct run *r; acquire(&kmem.lock); r = kmem.freelist; if(r) kmem.freelist = r->next; release(&kmem.lock); if(r) memset((char*)r, 5, PGSIZE); // fill with junk return (void*)r; } ================================================ FILE: kernel/kernel.ld ================================================ OUTPUT_ARCH( "riscv" ) ENTRY( _entry ) SECTIONS { /* * ensure that entry.S / _entry is at 0x80000000, * where qemu's -kernel jumps. */ . = 0x80000000; .text : { kernel/entry.o(_entry) *(.text .text.*) . = ALIGN(0x1000); _trampoline = .; *(trampsec) . = ALIGN(0x1000); ASSERT(. - _trampoline == 0x1000, "error: trampoline larger than one page"); PROVIDE(etext = .); } .rodata : { . = ALIGN(16); *(.srodata .srodata.*) /* do not need to distinguish this from .rodata */ . = ALIGN(16); *(.rodata .rodata.*) } .data : { . = ALIGN(16); *(.sdata .sdata.*) /* do not need to distinguish this from .data */ . = ALIGN(16); *(.data .data.*) } .bss : { . = ALIGN(16); *(.sbss .sbss.*) /* do not need to distinguish this from .bss */ . = ALIGN(16); *(.bss .bss.*) } PROVIDE(end = .); } ================================================ FILE: kernel/kernelvec.S ================================================ # # interrupts and exceptions while in supervisor # mode come here. # # the current stack is a kernel stack. # push registers, call kerneltrap(). # when kerneltrap() returns, restore registers, return. # .globl kerneltrap .globl kernelvec .align 4 kernelvec: # make room to save registers. addi sp, sp, -256 # save caller-saved registers. sd ra, 0(sp) # sd sp, 8(sp) sd gp, 16(sp) sd tp, 24(sp) sd t0, 32(sp) sd t1, 40(sp) sd t2, 48(sp) sd a0, 72(sp) sd a1, 80(sp) sd a2, 88(sp) sd a3, 96(sp) sd a4, 104(sp) sd a5, 112(sp) sd a6, 120(sp) sd a7, 128(sp) sd t3, 216(sp) sd t4, 224(sp) sd t5, 232(sp) sd t6, 240(sp) # call the C trap handler in trap.c call kerneltrap # restore registers. ld ra, 0(sp) # ld sp, 8(sp) ld gp, 16(sp) # not tp (contains hartid), in case we moved CPUs ld t0, 32(sp) ld t1, 40(sp) ld t2, 48(sp) ld a0, 72(sp) ld a1, 80(sp) ld a2, 88(sp) ld a3, 96(sp) ld a4, 104(sp) ld a5, 112(sp) ld a6, 120(sp) ld a7, 128(sp) ld t3, 216(sp) ld t4, 224(sp) ld t5, 232(sp) ld t6, 240(sp) addi sp, sp, 256 # return to whatever we were doing in the kernel. sret ================================================ FILE: kernel/log.c ================================================ #include "types.h" #include "riscv.h" #include "defs.h" #include "param.h" #include "spinlock.h" #include "sleeplock.h" #include "fs.h" #include "buf.h" // Simple logging that allows concurrent FS system calls. // // A log transaction contains the updates of multiple FS system // calls. The logging system only commits when there are // no FS system calls active. Thus there is never // any reasoning required about whether a commit might // write an uncommitted system call's updates to disk. // // A system call should call begin_op()/end_op() to mark // its start and end. Usually begin_op() just increments // the count of in-progress FS system calls and returns. // But if it thinks the log is close to running out, it // sleeps until the last outstanding end_op() commits. // // The log is a physical re-do log containing disk blocks. // The on-disk log format: // header block, containing block #s for block A, B, C, ... // block A // block B // block C // ... // Log appends are synchronous. // Contents of the header block, used for both the on-disk header block // and to keep track in memory of logged block# before commit. struct logheader { int n; int block[LOGBLOCKS]; }; struct log { struct spinlock lock; int start; int outstanding; // how many FS sys calls are executing. int committing; // in commit(), please wait. int dev; struct logheader lh; }; struct log log; static void recover_from_log(void); static void commit(); void initlog(int dev, struct superblock *sb) { if (sizeof(struct logheader) >= BSIZE) panic("initlog: too big logheader"); initlock(&log.lock, "log"); log.start = sb->logstart; log.dev = dev; recover_from_log(); } // Copy committed blocks from log to their home location static void install_trans(int recovering) { int tail; for (tail = 0; tail < log.lh.n; tail++) { if(recovering) { printf("recovering tail %d dst %d\n", tail, log.lh.block[tail]); } struct buf *lbuf = bread(log.dev, log.start+tail+1); // read log block struct buf *dbuf = bread(log.dev, log.lh.block[tail]); // read dst memmove(dbuf->data, lbuf->data, BSIZE); // copy block to dst bwrite(dbuf); // write dst to disk if(recovering == 0) bunpin(dbuf); brelse(lbuf); brelse(dbuf); } } // Read the log header from disk into the in-memory log header static void read_head(void) { struct buf *buf = bread(log.dev, log.start); struct logheader *lh = (struct logheader *) (buf->data); int i; log.lh.n = lh->n; for (i = 0; i < log.lh.n; i++) { log.lh.block[i] = lh->block[i]; } brelse(buf); } // Write in-memory log header to disk. // This is the true point at which the // current transaction commits. static void write_head(void) { struct buf *buf = bread(log.dev, log.start); struct logheader *hb = (struct logheader *) (buf->data); int i; hb->n = log.lh.n; for (i = 0; i < log.lh.n; i++) { hb->block[i] = log.lh.block[i]; } bwrite(buf); brelse(buf); } static void recover_from_log(void) { read_head(); install_trans(1); // if committed, copy from log to disk log.lh.n = 0; write_head(); // clear the log } // called at the start of each FS system call. void begin_op(void) { acquire(&log.lock); while(1){ if(log.committing){ sleep(&log, &log.lock); } else if(log.lh.n + (log.outstanding+1)*MAXOPBLOCKS > LOGBLOCKS){ // this op might exhaust log space; wait for commit. sleep(&log, &log.lock); } else { log.outstanding += 1; release(&log.lock); break; } } } // called at the end of each FS system call. // commits if this was the last outstanding operation. void end_op(void) { int do_commit = 0; acquire(&log.lock); log.outstanding -= 1; if(log.committing) panic("log.committing"); if(log.outstanding == 0){ do_commit = 1; log.committing = 1; } else { // begin_op() may be waiting for log space, // and decrementing log.outstanding has decreased // the amount of reserved space. wakeup(&log); } release(&log.lock); if(do_commit){ // call commit w/o holding locks, since not allowed // to sleep with locks. commit(); acquire(&log.lock); log.committing = 0; wakeup(&log); release(&log.lock); } } // Copy modified blocks from cache to log. static void write_log(void) { int tail; for (tail = 0; tail < log.lh.n; tail++) { struct buf *to = bread(log.dev, log.start+tail+1); // log block struct buf *from = bread(log.dev, log.lh.block[tail]); // cache block memmove(to->data, from->data, BSIZE); bwrite(to); // write the log brelse(from); brelse(to); } } static void commit() { if (log.lh.n > 0) { write_log(); // Write modified blocks from cache to log write_head(); // Write header to disk -- the real commit install_trans(0); // Now install writes to home locations log.lh.n = 0; write_head(); // Erase the transaction from the log } } // Caller has modified b->data and is done with the buffer. // Record the block number and pin in the cache by increasing refcnt. // commit()/write_log() will do the disk write. // // log_write() replaces bwrite(); a typical use is: // bp = bread(...) // modify bp->data[] // log_write(bp) // brelse(bp) void log_write(struct buf *b) { int i; acquire(&log.lock); if (log.lh.n >= LOGBLOCKS) panic("too big a transaction"); if (log.outstanding < 1) panic("log_write outside of trans"); for (i = 0; i < log.lh.n; i++) { if (log.lh.block[i] == b->blockno) // log absorption break; } log.lh.block[i] = b->blockno; if (i == log.lh.n) { // Add new block to log? bpin(b); log.lh.n++; } release(&log.lock); } ================================================ FILE: kernel/main.c ================================================ #include "types.h" #include "param.h" #include "memlayout.h" #include "riscv.h" #include "defs.h" volatile static int started = 0; // start() jumps here in supervisor mode on all CPUs. void main() { if(cpuid() == 0){ consoleinit(); printfinit(); printf("\n"); printf("xv6 kernel is booting\n"); printf("\n"); kinit(); // physical page allocator kvminit(); // create kernel page table kvminithart(); // turn on paging procinit(); // process table trapinit(); // trap vectors trapinithart(); // install kernel trap vector plicinit(); // set up interrupt controller plicinithart(); // ask PLIC for device interrupts binit(); // buffer cache iinit(); // inode table fileinit(); // file table virtio_disk_init(); // emulated hard disk userinit(); // first user process __sync_synchronize(); started = 1; } else { while(started == 0) ; __sync_synchronize(); printf("hart %d starting\n", cpuid()); kvminithart(); // turn on paging trapinithart(); // install kernel trap vector plicinithart(); // ask PLIC for device interrupts } scheduler(); } ================================================ FILE: kernel/memlayout.h ================================================ // Physical memory layout // qemu -machine virt is set up like this, // based on qemu's hw/riscv/virt.c: // // 00001000 -- boot ROM, provided by qemu // 02000000 -- CLINT // 0C000000 -- PLIC // 10000000 -- uart0 // 10001000 -- virtio disk // 80000000 -- qemu's boot ROM loads the kernel here, // then jumps here. // unused RAM after 80000000. // the kernel uses physical memory thus: // 80000000 -- entry.S, then kernel text and data // end -- start of kernel page allocation area // PHYSTOP -- end RAM used by the kernel // qemu puts UART registers here in physical memory. #define UART0 0x10000000L #define UART0_IRQ 10 // virtio mmio interface #define VIRTIO0 0x10001000 #define VIRTIO0_IRQ 1 // qemu puts platform-level interrupt controller (PLIC) here. #define PLIC 0x0c000000L #define PLIC_PRIORITY (PLIC + 0x0) #define PLIC_PENDING (PLIC + 0x1000) #define PLIC_SENABLE(hart) (PLIC + 0x2080 + (hart)*0x100) #define PLIC_SPRIORITY(hart) (PLIC + 0x201000 + (hart)*0x2000) #define PLIC_SCLAIM(hart) (PLIC + 0x201004 + (hart)*0x2000) // the kernel expects there to be RAM // for use by the kernel and user pages // from physical address 0x80000000 to PHYSTOP. #define KERNBASE 0x80000000L #define PHYSTOP (KERNBASE + 128*1024*1024) // map the trampoline page to the highest address, // in both user and kernel space. #define TRAMPOLINE (MAXVA - PGSIZE) // map kernel stacks beneath the trampoline, // each surrounded by invalid guard pages. #define KSTACK(p) (TRAMPOLINE - ((p)+1)* 2*PGSIZE) // User memory layout. // Address zero first: // text // original data and bss // fixed-size stack // expandable heap // ... // TRAPFRAME (p->trapframe, used by the trampoline) // TRAMPOLINE (the same page as in the kernel) #define TRAPFRAME (TRAMPOLINE - PGSIZE) ================================================ FILE: kernel/param.h ================================================ #define NPROC 64 // maximum number of processes #define NCPU 8 // maximum number of CPUs #define NOFILE 16 // open files per process #define NFILE 100 // open files per system #define NINODE 50 // maximum number of active i-nodes #define NDEV 10 // maximum major device number #define ROOTDEV 1 // device number of file system root disk #define MAXARG 32 // max exec arguments #define MAXOPBLOCKS 10 // max # of blocks any FS op writes #define LOGBLOCKS (MAXOPBLOCKS*3) // max data blocks in on-disk log #define NBUF (MAXOPBLOCKS*3) // size of disk block cache #define FSSIZE 2000 // size of file system in blocks #define MAXPATH 128 // maximum file path name #define USERSTACK 1 // user stack pages ================================================ FILE: kernel/pipe.c ================================================ #include "types.h" #include "riscv.h" #include "defs.h" #include "param.h" #include "spinlock.h" #include "proc.h" #include "fs.h" #include "sleeplock.h" #include "file.h" #define PIPESIZE 512 struct pipe { struct spinlock lock; char data[PIPESIZE]; uint nread; // number of bytes read uint nwrite; // number of bytes written int readopen; // read fd is still open int writeopen; // write fd is still open }; int pipealloc(struct file **f0, struct file **f1) { struct pipe *pi; pi = 0; *f0 = *f1 = 0; if((*f0 = filealloc()) == 0 || (*f1 = filealloc()) == 0) goto bad; if((pi = (struct pipe*)kalloc()) == 0) goto bad; pi->readopen = 1; pi->writeopen = 1; pi->nwrite = 0; pi->nread = 0; initlock(&pi->lock, "pipe"); (*f0)->type = FD_PIPE; (*f0)->readable = 1; (*f0)->writable = 0; (*f0)->pipe = pi; (*f1)->type = FD_PIPE; (*f1)->readable = 0; (*f1)->writable = 1; (*f1)->pipe = pi; return 0; bad: if(pi) kfree((char*)pi); if(*f0) fileclose(*f0); if(*f1) fileclose(*f1); return -1; } void pipeclose(struct pipe *pi, int writable) { acquire(&pi->lock); if(writable){ pi->writeopen = 0; wakeup(&pi->nread); } else { pi->readopen = 0; wakeup(&pi->nwrite); } if(pi->readopen == 0 && pi->writeopen == 0){ release(&pi->lock); kfree((char*)pi); } else release(&pi->lock); } int pipewrite(struct pipe *pi, uint64 addr, int n) { int i = 0; struct proc *pr = myproc(); acquire(&pi->lock); while(i < n){ if(pi->readopen == 0 || killed(pr)){ release(&pi->lock); return -1; } if(pi->nwrite == pi->nread + PIPESIZE){ //DOC: pipewrite-full wakeup(&pi->nread); sleep(&pi->nwrite, &pi->lock); } else { char ch; if(copyin(pr->pagetable, &ch, addr + i, 1) == -1) break; pi->data[pi->nwrite++ % PIPESIZE] = ch; i++; } } wakeup(&pi->nread); release(&pi->lock); return i; } int piperead(struct pipe *pi, uint64 addr, int n) { int i; struct proc *pr = myproc(); char ch; acquire(&pi->lock); while(pi->nread == pi->nwrite && pi->writeopen){ //DOC: pipe-empty if(killed(pr)){ release(&pi->lock); return -1; } sleep(&pi->nread, &pi->lock); //DOC: piperead-sleep } for(i = 0; i < n; i++){ //DOC: piperead-copy if(pi->nread == pi->nwrite) break; ch = pi->data[pi->nread % PIPESIZE]; if(copyout(pr->pagetable, addr + i, &ch, 1) == -1) { if(i == 0) i = -1; break; } pi->nread++; } wakeup(&pi->nwrite); //DOC: piperead-wakeup release(&pi->lock); return i; } ================================================ FILE: kernel/plic.c ================================================ #include "types.h" #include "param.h" #include "memlayout.h" #include "riscv.h" #include "defs.h" // // the riscv Platform Level Interrupt Controller (PLIC). // void plicinit(void) { // set desired IRQ priorities non-zero (otherwise disabled). *(uint32*)(PLIC + UART0_IRQ*4) = 1; *(uint32*)(PLIC + VIRTIO0_IRQ*4) = 1; } void plicinithart(void) { int hart = cpuid(); // set enable bits for this hart's S-mode // for the uart and virtio disk. *(uint32*)PLIC_SENABLE(hart) = (1 << UART0_IRQ) | (1 << VIRTIO0_IRQ); // set this hart's S-mode priority threshold to 0. *(uint32*)PLIC_SPRIORITY(hart) = 0; } // ask the PLIC what interrupt we should serve. int plic_claim(void) { int hart = cpuid(); int irq = *(uint32*)PLIC_SCLAIM(hart); return irq; } // tell the PLIC we've served this IRQ. void plic_complete(int irq) { int hart = cpuid(); *(uint32*)PLIC_SCLAIM(hart) = irq; } ================================================ FILE: kernel/printf.c ================================================ // // formatted console output -- printf, panic. // #include #include "types.h" #include "param.h" #include "spinlock.h" #include "sleeplock.h" #include "fs.h" #include "file.h" #include "memlayout.h" #include "riscv.h" #include "defs.h" #include "proc.h" volatile int panicking = 0; // printing a panic message volatile int panicked = 0; // spinning forever at end of a panic // lock to avoid interleaving concurrent printf's. static struct { struct spinlock lock; } pr; static char digits[] = "0123456789abcdef"; static void printint(long long xx, int base, int sign) { char buf[20]; int i; unsigned long long x; if(sign && (sign = (xx < 0))) x = -xx; else x = xx; i = 0; do { buf[i++] = digits[x % base]; } while((x /= base) != 0); if(sign) buf[i++] = '-'; while(--i >= 0) consputc(buf[i]); } static void printptr(uint64 x) { int i; consputc('0'); consputc('x'); for (i = 0; i < (sizeof(uint64) * 2); i++, x <<= 4) consputc(digits[x >> (sizeof(uint64) * 8 - 4)]); } // Print to the console. int printf(char *fmt, ...) { va_list ap; int i, cx, c0, c1, c2; char *s; if(panicking == 0) acquire(&pr.lock); va_start(ap, fmt); for(i = 0; (cx = fmt[i] & 0xff) != 0; i++){ if(cx != '%'){ consputc(cx); continue; } i++; c0 = fmt[i+0] & 0xff; c1 = c2 = 0; if(c0) c1 = fmt[i+1] & 0xff; if(c1) c2 = fmt[i+2] & 0xff; if(c0 == 'd'){ printint(va_arg(ap, int), 10, 1); } else if(c0 == 'l' && c1 == 'd'){ printint(va_arg(ap, uint64), 10, 1); i += 1; } else if(c0 == 'l' && c1 == 'l' && c2 == 'd'){ printint(va_arg(ap, uint64), 10, 1); i += 2; } else if(c0 == 'u'){ printint(va_arg(ap, uint32), 10, 0); } else if(c0 == 'l' && c1 == 'u'){ printint(va_arg(ap, uint64), 10, 0); i += 1; } else if(c0 == 'l' && c1 == 'l' && c2 == 'u'){ printint(va_arg(ap, uint64), 10, 0); i += 2; } else if(c0 == 'x'){ printint(va_arg(ap, uint32), 16, 0); } else if(c0 == 'l' && c1 == 'x'){ printint(va_arg(ap, uint64), 16, 0); i += 1; } else if(c0 == 'l' && c1 == 'l' && c2 == 'x'){ printint(va_arg(ap, uint64), 16, 0); i += 2; } else if(c0 == 'p'){ printptr(va_arg(ap, uint64)); } else if(c0 == 'c'){ consputc(va_arg(ap, uint)); } else if(c0 == 's'){ if((s = va_arg(ap, char*)) == 0) s = "(null)"; for(; *s; s++) consputc(*s); } else if(c0 == '%'){ consputc('%'); } else if(c0 == 0){ break; } else { // Print unknown % sequence to draw attention. consputc('%'); consputc(c0); } } va_end(ap); if(panicking == 0) release(&pr.lock); return 0; } void panic(char *s) { panicking = 1; printf("panic: "); printf("%s\n", s); panicked = 1; // freeze uart output from other CPUs for(;;) ; } void printfinit(void) { initlock(&pr.lock, "pr"); } ================================================ FILE: kernel/proc.c ================================================ #include "types.h" #include "param.h" #include "memlayout.h" #include "riscv.h" #include "spinlock.h" #include "proc.h" #include "defs.h" struct cpu cpus[NCPU]; struct proc proc[NPROC]; struct proc *initproc; int nextpid = 1; struct spinlock pid_lock; extern void forkret(void); static void freeproc(struct proc *p); extern char trampoline[]; // trampoline.S // helps ensure that wakeups of wait()ing // parents are not lost. helps obey the // memory model when using p->parent. // must be acquired before any p->lock. struct spinlock wait_lock; // Allocate a page for each process's kernel stack. // Map it high in memory, followed by an invalid // guard page. void proc_mapstacks(pagetable_t kpgtbl) { struct proc *p; for(p = proc; p < &proc[NPROC]; p++) { char *pa = kalloc(); if(pa == 0) panic("kalloc"); uint64 va = KSTACK((int) (p - proc)); kvmmap(kpgtbl, va, (uint64)pa, PGSIZE, PTE_R | PTE_W); } } // initialize the proc table. void procinit(void) { struct proc *p; initlock(&pid_lock, "nextpid"); initlock(&wait_lock, "wait_lock"); for(p = proc; p < &proc[NPROC]; p++) { initlock(&p->lock, "proc"); p->state = UNUSED; p->kstack = KSTACK((int) (p - proc)); } } // Must be called with interrupts disabled, // to prevent race with process being moved // to a different CPU. int cpuid() { int id = r_tp(); return id; } // Return this CPU's cpu struct. // Interrupts must be disabled. struct cpu* mycpu(void) { int id = cpuid(); struct cpu *c = &cpus[id]; return c; } // Return the current struct proc *, or zero if none. struct proc* myproc(void) { push_off(); struct cpu *c = mycpu(); struct proc *p = c->proc; pop_off(); return p; } int allocpid() { int pid; acquire(&pid_lock); pid = nextpid; nextpid = nextpid + 1; release(&pid_lock); return pid; } // Look in the process table for an UNUSED proc. // If found, initialize state required to run in the kernel, // and return with p->lock held. // If there are no free procs, or a memory allocation fails, return 0. static struct proc* allocproc(void) { struct proc *p; for(p = proc; p < &proc[NPROC]; p++) { acquire(&p->lock); if(p->state == UNUSED) { goto found; } else { release(&p->lock); } } return 0; found: p->pid = allocpid(); p->state = USED; // Allocate a trapframe page. if((p->trapframe = (struct trapframe *)kalloc()) == 0){ freeproc(p); release(&p->lock); return 0; } // An empty user page table. p->pagetable = proc_pagetable(p); if(p->pagetable == 0){ freeproc(p); release(&p->lock); return 0; } // Set up new context to start executing at forkret, // which returns to user space. memset(&p->context, 0, sizeof(p->context)); p->context.ra = (uint64)forkret; p->context.sp = p->kstack + PGSIZE; return p; } // free a proc structure and the data hanging from it, // including user pages. // p->lock must be held. static void freeproc(struct proc *p) { if(p->trapframe) kfree((void*)p->trapframe); p->trapframe = 0; if(p->pagetable) proc_freepagetable(p->pagetable, p->sz); p->pagetable = 0; p->sz = 0; p->pid = 0; p->parent = 0; p->name[0] = 0; p->chan = 0; p->killed = 0; p->xstate = 0; p->state = UNUSED; } // Create a user page table for a given process, with no user memory, // but with trampoline and trapframe pages. pagetable_t proc_pagetable(struct proc *p) { pagetable_t pagetable; // An empty page table. pagetable = uvmcreate(); if(pagetable == 0) return 0; // map the trampoline code (for system call return) // at the highest user virtual address. // only the supervisor uses it, on the way // to/from user space, so not PTE_U. if(mappages(pagetable, TRAMPOLINE, PGSIZE, (uint64)trampoline, PTE_R | PTE_X) < 0){ uvmfree(pagetable, 0); return 0; } // map the trapframe page just below the trampoline page, for // trampoline.S. if(mappages(pagetable, TRAPFRAME, PGSIZE, (uint64)(p->trapframe), PTE_R | PTE_W) < 0){ uvmunmap(pagetable, TRAMPOLINE, 1, 0); uvmfree(pagetable, 0); return 0; } return pagetable; } // Free a process's page table, and free the // physical memory it refers to. void proc_freepagetable(pagetable_t pagetable, uint64 sz) { uvmunmap(pagetable, TRAMPOLINE, 1, 0); uvmunmap(pagetable, TRAPFRAME, 1, 0); uvmfree(pagetable, sz); } // Set up first user process. void userinit(void) { struct proc *p; p = allocproc(); initproc = p; p->cwd = namei("/"); p->state = RUNNABLE; release(&p->lock); } // Grow or shrink user memory by n bytes. // Return 0 on success, -1 on failure. int growproc(int n) { uint64 sz; struct proc *p = myproc(); sz = p->sz; if(n > 0){ if(sz + n > TRAPFRAME) { return -1; } if((sz = uvmalloc(p->pagetable, sz, sz + n, PTE_W)) == 0) { return -1; } } else if(n < 0){ sz = uvmdealloc(p->pagetable, sz, sz + n); } p->sz = sz; return 0; } // Create a new process, copying the parent. // Sets up child kernel stack to return as if from fork() system call. int kfork(void) { int i, pid; struct proc *np; struct proc *p = myproc(); // Allocate process. if((np = allocproc()) == 0){ return -1; } // Copy user memory from parent to child. if(uvmcopy(p->pagetable, np->pagetable, p->sz) < 0){ freeproc(np); release(&np->lock); return -1; } np->sz = p->sz; // copy saved user registers. *(np->trapframe) = *(p->trapframe); // Cause fork to return 0 in the child. np->trapframe->a0 = 0; // increment reference counts on open file descriptors. for(i = 0; i < NOFILE; i++) if(p->ofile[i]) np->ofile[i] = filedup(p->ofile[i]); np->cwd = idup(p->cwd); safestrcpy(np->name, p->name, sizeof(p->name)); pid = np->pid; release(&np->lock); acquire(&wait_lock); np->parent = p; release(&wait_lock); acquire(&np->lock); np->state = RUNNABLE; release(&np->lock); return pid; } // Pass p's abandoned children to init. // Caller must hold wait_lock. void reparent(struct proc *p) { struct proc *pp; for(pp = proc; pp < &proc[NPROC]; pp++){ if(pp->parent == p){ pp->parent = initproc; wakeup(initproc); } } } // Exit the current process. Does not return. // An exited process remains in the zombie state // until its parent calls wait(). void kexit(int status) { struct proc *p = myproc(); if(p == initproc) panic("init exiting"); // Close all open files. for(int fd = 0; fd < NOFILE; fd++){ if(p->ofile[fd]){ struct file *f = p->ofile[fd]; fileclose(f); p->ofile[fd] = 0; } } begin_op(); iput(p->cwd); end_op(); p->cwd = 0; acquire(&wait_lock); // Give any children to init. reparent(p); // Parent might be sleeping in wait(). wakeup(p->parent); acquire(&p->lock); p->xstate = status; p->state = ZOMBIE; release(&wait_lock); // Jump into the scheduler, never to return. sched(); panic("zombie exit"); } // Wait for a child process to exit and return its pid. // Return -1 if this process has no children. int kwait(uint64 addr) { struct proc *pp; int havekids, pid; struct proc *p = myproc(); acquire(&wait_lock); for(;;){ // Scan through table looking for exited children. havekids = 0; for(pp = proc; pp < &proc[NPROC]; pp++){ if(pp->parent == p){ // make sure the child isn't still in exit() or swtch(). acquire(&pp->lock); havekids = 1; if(pp->state == ZOMBIE){ // Found one. pid = pp->pid; if(addr != 0 && copyout(p->pagetable, addr, (char *)&pp->xstate, sizeof(pp->xstate)) < 0) { release(&pp->lock); release(&wait_lock); return -1; } freeproc(pp); release(&pp->lock); release(&wait_lock); return pid; } release(&pp->lock); } } // No point waiting if we don't have any children. if(!havekids || killed(p)){ release(&wait_lock); return -1; } // Wait for a child to exit. sleep(p, &wait_lock); //DOC: wait-sleep } } // Per-CPU process scheduler. // Each CPU calls scheduler() after setting itself up. // Scheduler never returns. It loops, doing: // - choose a process to run. // - swtch to start running that process. // - eventually that process transfers control // via swtch back to the scheduler. void scheduler(void) { struct proc *p; struct cpu *c = mycpu(); c->proc = 0; for(;;){ // The most recent process to run may have had interrupts // turned off; enable them to avoid a deadlock if all // processes are waiting. Then turn them back off // to avoid a possible race between an interrupt // and wfi. intr_on(); intr_off(); int found = 0; for(p = proc; p < &proc[NPROC]; p++) { acquire(&p->lock); if(p->state == RUNNABLE) { // Switch to chosen process. It is the process's job // to release its lock and then reacquire it // before jumping back to us. p->state = RUNNING; c->proc = p; swtch(&c->context, &p->context); // Process is done running for now. // It should have changed its p->state before coming back. c->proc = 0; found = 1; } release(&p->lock); } if(found == 0) { // nothing to run; stop running on this core until an interrupt. asm volatile("wfi"); } } } // Switch to scheduler. Must hold only p->lock // and have changed proc->state. Saves and restores // intena because intena is a property of this // kernel thread, not this CPU. It should // be proc->intena and proc->noff, but that would // break in the few places where a lock is held but // there's no process. void sched(void) { int intena; struct proc *p = myproc(); if(!holding(&p->lock)) panic("sched p->lock"); if(mycpu()->noff != 1) panic("sched locks"); if(p->state == RUNNING) panic("sched RUNNING"); if(intr_get()) panic("sched interruptible"); intena = mycpu()->intena; swtch(&p->context, &mycpu()->context); mycpu()->intena = intena; } // Give up the CPU for one scheduling round. void yield(void) { struct proc *p = myproc(); acquire(&p->lock); p->state = RUNNABLE; sched(); release(&p->lock); } // A fork child's very first scheduling by scheduler() // will swtch to forkret. void forkret(void) { extern char userret[]; static int first = 1; struct proc *p = myproc(); // Still holding p->lock from scheduler. release(&p->lock); if (first) { // File system initialization must be run in the context of a // regular process (e.g., because it calls sleep), and thus cannot // be run from main(). fsinit(ROOTDEV); first = 0; // ensure other cores see first=0. __sync_synchronize(); // We can invoke kexec() now that file system is initialized. // Put the return value (argc) of kexec into a0. p->trapframe->a0 = kexec("/init", (char *[]){ "/init", 0 }); if (p->trapframe->a0 == -1) { panic("exec"); } } // return to user space, mimicing usertrap()'s return. prepare_return(); uint64 satp = MAKE_SATP(p->pagetable); uint64 trampoline_userret = TRAMPOLINE + (userret - trampoline); ((void (*)(uint64))trampoline_userret)(satp); } // Sleep on channel chan, releasing condition lock lk. // Re-acquires lk when awakened. void sleep(void *chan, struct spinlock *lk) { struct proc *p = myproc(); // Must acquire p->lock in order to // change p->state and then call sched. // Once we hold p->lock, we can be // guaranteed that we won't miss any wakeup // (wakeup locks p->lock), // so it's okay to release lk. acquire(&p->lock); //DOC: sleeplock1 release(lk); // Go to sleep. p->chan = chan; p->state = SLEEPING; sched(); // Tidy up. p->chan = 0; // Reacquire original lock. release(&p->lock); acquire(lk); } // Wake up all processes sleeping on channel chan. // Caller should hold the condition lock. void wakeup(void *chan) { struct proc *p; for(p = proc; p < &proc[NPROC]; p++) { if(p != myproc()){ acquire(&p->lock); if(p->state == SLEEPING && p->chan == chan) { p->state = RUNNABLE; } release(&p->lock); } } } // Kill the process with the given pid. // The victim won't exit until it tries to return // to user space (see usertrap() in trap.c). int kkill(int pid) { struct proc *p; for(p = proc; p < &proc[NPROC]; p++){ acquire(&p->lock); if(p->pid == pid){ p->killed = 1; if(p->state == SLEEPING){ // Wake process from sleep(). p->state = RUNNABLE; } release(&p->lock); return 0; } release(&p->lock); } return -1; } void setkilled(struct proc *p) { acquire(&p->lock); p->killed = 1; release(&p->lock); } int killed(struct proc *p) { int k; acquire(&p->lock); k = p->killed; release(&p->lock); return k; } // Copy to either a user address, or kernel address, // depending on usr_dst. // Returns 0 on success, -1 on error. int either_copyout(int user_dst, uint64 dst, void *src, uint64 len) { struct proc *p = myproc(); if(user_dst){ return copyout(p->pagetable, dst, src, len); } else { memmove((char *)dst, src, len); return 0; } } // Copy from either a user address, or kernel address, // depending on usr_src. // Returns 0 on success, -1 on error. int either_copyin(void *dst, int user_src, uint64 src, uint64 len) { struct proc *p = myproc(); if(user_src){ return copyin(p->pagetable, dst, src, len); } else { memmove(dst, (char*)src, len); return 0; } } // Print a process listing to console. For debugging. // Runs when user types ^P on console. // No lock to avoid wedging a stuck machine further. void procdump(void) { static char *states[] = { [UNUSED] "unused", [USED] "used", [SLEEPING] "sleep ", [RUNNABLE] "runble", [RUNNING] "run ", [ZOMBIE] "zombie" }; struct proc *p; char *state; printf("\n"); for(p = proc; p < &proc[NPROC]; p++){ if(p->state == UNUSED) continue; if(p->state >= 0 && p->state < NELEM(states) && states[p->state]) state = states[p->state]; else state = "???"; printf("%d %s %s", p->pid, state, p->name); printf("\n"); } } ================================================ FILE: kernel/proc.h ================================================ // Saved registers for kernel context switches. struct context { uint64 ra; uint64 sp; // callee-saved uint64 s0; uint64 s1; uint64 s2; uint64 s3; uint64 s4; uint64 s5; uint64 s6; uint64 s7; uint64 s8; uint64 s9; uint64 s10; uint64 s11; }; // Per-CPU state. struct cpu { struct proc *proc; // The process running on this cpu, or null. struct context context; // swtch() here to enter scheduler(). int noff; // Depth of push_off() nesting. int intena; // Were interrupts enabled before push_off()? }; extern struct cpu cpus[NCPU]; // per-process data for the trap handling code in trampoline.S. // sits in a page by itself just under the trampoline page in the // user page table. not specially mapped in the kernel page table. // uservec in trampoline.S saves user registers in the trapframe, // then initializes registers from the trapframe's // kernel_sp, kernel_hartid, kernel_satp, and jumps to kernel_trap. // usertrapret() and userret in trampoline.S set up // the trapframe's kernel_*, restore user registers from the // trapframe, switch to the user page table, and enter user space. // the trapframe includes callee-saved user registers like s0-s11 because the // return-to-user path via usertrapret() doesn't return through // the entire kernel call stack. struct trapframe { /* 0 */ uint64 kernel_satp; // kernel page table /* 8 */ uint64 kernel_sp; // top of process's kernel stack /* 16 */ uint64 kernel_trap; // usertrap() /* 24 */ uint64 epc; // saved user program counter /* 32 */ uint64 kernel_hartid; // saved kernel tp /* 40 */ uint64 ra; /* 48 */ uint64 sp; /* 56 */ uint64 gp; /* 64 */ uint64 tp; /* 72 */ uint64 t0; /* 80 */ uint64 t1; /* 88 */ uint64 t2; /* 96 */ uint64 s0; /* 104 */ uint64 s1; /* 112 */ uint64 a0; /* 120 */ uint64 a1; /* 128 */ uint64 a2; /* 136 */ uint64 a3; /* 144 */ uint64 a4; /* 152 */ uint64 a5; /* 160 */ uint64 a6; /* 168 */ uint64 a7; /* 176 */ uint64 s2; /* 184 */ uint64 s3; /* 192 */ uint64 s4; /* 200 */ uint64 s5; /* 208 */ uint64 s6; /* 216 */ uint64 s7; /* 224 */ uint64 s8; /* 232 */ uint64 s9; /* 240 */ uint64 s10; /* 248 */ uint64 s11; /* 256 */ uint64 t3; /* 264 */ uint64 t4; /* 272 */ uint64 t5; /* 280 */ uint64 t6; }; enum procstate { UNUSED, USED, SLEEPING, RUNNABLE, RUNNING, ZOMBIE }; // Per-process state struct proc { struct spinlock lock; // p->lock must be held when using these: enum procstate state; // Process state void *chan; // If non-zero, sleeping on chan int killed; // If non-zero, have been killed int xstate; // Exit status to be returned to parent's wait int pid; // Process ID // wait_lock must be held when using this: struct proc *parent; // Parent process // these are private to the process, so p->lock need not be held. uint64 kstack; // Virtual address of kernel stack uint64 sz; // Size of process memory (bytes) pagetable_t pagetable; // User page table struct trapframe *trapframe; // data page for trampoline.S struct context context; // swtch() here to run process struct file *ofile[NOFILE]; // Open files struct inode *cwd; // Current directory char name[16]; // Process name (debugging) }; ================================================ FILE: kernel/riscv.h ================================================ #ifndef __ASSEMBLER__ // which hart (core) is this? static inline uint64 r_mhartid() { uint64 x; asm volatile("csrr %0, mhartid" : "=r" (x) ); return x; } // Machine Status Register, mstatus #define MSTATUS_MPP_MASK (3L << 11) // previous mode. #define MSTATUS_MPP_M (3L << 11) #define MSTATUS_MPP_S (1L << 11) #define MSTATUS_MPP_U (0L << 11) static inline uint64 r_mstatus() { uint64 x; asm volatile("csrr %0, mstatus" : "=r" (x) ); return x; } static inline void w_mstatus(uint64 x) { asm volatile("csrw mstatus, %0" : : "r" (x)); } // machine exception program counter, holds the // instruction address to which a return from // exception will go. static inline void w_mepc(uint64 x) { asm volatile("csrw mepc, %0" : : "r" (x)); } // Supervisor Status Register, sstatus #define SSTATUS_SPP (1L << 8) // Previous mode, 1=Supervisor, 0=User #define SSTATUS_SPIE (1L << 5) // Supervisor Previous Interrupt Enable #define SSTATUS_UPIE (1L << 4) // User Previous Interrupt Enable #define SSTATUS_SIE (1L << 1) // Supervisor Interrupt Enable #define SSTATUS_UIE (1L << 0) // User Interrupt Enable static inline uint64 r_sstatus() { uint64 x; asm volatile("csrr %0, sstatus" : "=r" (x) ); return x; } static inline void w_sstatus(uint64 x) { asm volatile("csrw sstatus, %0" : : "r" (x)); } // Supervisor Interrupt Pending static inline uint64 r_sip() { uint64 x; asm volatile("csrr %0, sip" : "=r" (x) ); return x; } static inline void w_sip(uint64 x) { asm volatile("csrw sip, %0" : : "r" (x)); } // Supervisor Interrupt Enable #define SIE_SEIE (1L << 9) // external #define SIE_STIE (1L << 5) // timer static inline uint64 r_sie() { uint64 x; asm volatile("csrr %0, sie" : "=r" (x) ); return x; } static inline void w_sie(uint64 x) { asm volatile("csrw sie, %0" : : "r" (x)); } // Machine-mode Interrupt Enable #define MIE_STIE (1L << 5) // supervisor timer static inline uint64 r_mie() { uint64 x; asm volatile("csrr %0, mie" : "=r" (x) ); return x; } static inline void w_mie(uint64 x) { asm volatile("csrw mie, %0" : : "r" (x)); } // supervisor exception program counter, holds the // instruction address to which a return from // exception will go. static inline void w_sepc(uint64 x) { asm volatile("csrw sepc, %0" : : "r" (x)); } static inline uint64 r_sepc() { uint64 x; asm volatile("csrr %0, sepc" : "=r" (x) ); return x; } // Machine Exception Delegation static inline uint64 r_medeleg() { uint64 x; asm volatile("csrr %0, medeleg" : "=r" (x) ); return x; } static inline void w_medeleg(uint64 x) { asm volatile("csrw medeleg, %0" : : "r" (x)); } // Machine Interrupt Delegation static inline uint64 r_mideleg() { uint64 x; asm volatile("csrr %0, mideleg" : "=r" (x) ); return x; } static inline void w_mideleg(uint64 x) { asm volatile("csrw mideleg, %0" : : "r" (x)); } // Supervisor Trap-Vector Base Address // low two bits are mode. static inline void w_stvec(uint64 x) { asm volatile("csrw stvec, %0" : : "r" (x)); } static inline uint64 r_stvec() { uint64 x; asm volatile("csrr %0, stvec" : "=r" (x) ); return x; } // Supervisor Timer Comparison Register static inline uint64 r_stimecmp() { uint64 x; // asm volatile("csrr %0, stimecmp" : "=r" (x) ); asm volatile("csrr %0, 0x14d" : "=r" (x) ); return x; } static inline void w_stimecmp(uint64 x) { // asm volatile("csrw stimecmp, %0" : : "r" (x)); asm volatile("csrw 0x14d, %0" : : "r" (x)); } // Machine Environment Configuration Register static inline uint64 r_menvcfg() { uint64 x; // asm volatile("csrr %0, menvcfg" : "=r" (x) ); asm volatile("csrr %0, 0x30a" : "=r" (x) ); return x; } static inline void w_menvcfg(uint64 x) { // asm volatile("csrw menvcfg, %0" : : "r" (x)); asm volatile("csrw 0x30a, %0" : : "r" (x)); } // Physical Memory Protection static inline void w_pmpcfg0(uint64 x) { asm volatile("csrw pmpcfg0, %0" : : "r" (x)); } static inline void w_pmpaddr0(uint64 x) { asm volatile("csrw pmpaddr0, %0" : : "r" (x)); } // use riscv's sv39 page table scheme. #define SATP_SV39 (8L << 60) #define MAKE_SATP(pagetable) (SATP_SV39 | (((uint64)pagetable) >> 12)) // supervisor address translation and protection; // holds the address of the page table. static inline void w_satp(uint64 x) { asm volatile("csrw satp, %0" : : "r" (x)); } static inline uint64 r_satp() { uint64 x; asm volatile("csrr %0, satp" : "=r" (x) ); return x; } // Supervisor Trap Cause static inline uint64 r_scause() { uint64 x; asm volatile("csrr %0, scause" : "=r" (x) ); return x; } // Supervisor Trap Value static inline uint64 r_stval() { uint64 x; asm volatile("csrr %0, stval" : "=r" (x) ); return x; } // Machine-mode Counter-Enable static inline void w_mcounteren(uint64 x) { asm volatile("csrw mcounteren, %0" : : "r" (x)); } static inline uint64 r_mcounteren() { uint64 x; asm volatile("csrr %0, mcounteren" : "=r" (x) ); return x; } // machine-mode cycle counter static inline uint64 r_time() { uint64 x; asm volatile("csrr %0, time" : "=r" (x) ); return x; } // enable device interrupts static inline void intr_on() { w_sstatus(r_sstatus() | SSTATUS_SIE); } // disable device interrupts static inline void intr_off() { w_sstatus(r_sstatus() & ~SSTATUS_SIE); } // are device interrupts enabled? static inline int intr_get() { uint64 x = r_sstatus(); return (x & SSTATUS_SIE) != 0; } static inline uint64 r_sp() { uint64 x; asm volatile("mv %0, sp" : "=r" (x) ); return x; } // read and write tp, the thread pointer, which xv6 uses to hold // this core's hartid (core number), the index into cpus[]. static inline uint64 r_tp() { uint64 x; asm volatile("mv %0, tp" : "=r" (x) ); return x; } static inline void w_tp(uint64 x) { asm volatile("mv tp, %0" : : "r" (x)); } static inline uint64 r_ra() { uint64 x; asm volatile("mv %0, ra" : "=r" (x) ); return x; } // flush the TLB. static inline void sfence_vma() { // the zero, zero means flush all TLB entries. asm volatile("sfence.vma zero, zero"); } typedef uint64 pte_t; typedef uint64 *pagetable_t; // 512 PTEs #endif // __ASSEMBLER__ #define PGSIZE 4096 // bytes per page #define PGSHIFT 12 // bits of offset within a page #define PGROUNDUP(sz) (((sz)+PGSIZE-1) & ~(PGSIZE-1)) #define PGROUNDDOWN(a) (((a)) & ~(PGSIZE-1)) #define PTE_V (1L << 0) // valid #define PTE_R (1L << 1) #define PTE_W (1L << 2) #define PTE_X (1L << 3) #define PTE_U (1L << 4) // user can access // shift a physical address to the right place for a PTE. #define PA2PTE(pa) ((((uint64)pa) >> 12) << 10) #define PTE2PA(pte) (((pte) >> 10) << 12) #define PTE_FLAGS(pte) ((pte) & 0x3FF) // extract the three 9-bit page table indices from a virtual address. #define PXMASK 0x1FF // 9 bits #define PXSHIFT(level) (PGSHIFT+(9*(level))) #define PX(level, va) ((((uint64) (va)) >> PXSHIFT(level)) & PXMASK) // one beyond the highest possible virtual address. // MAXVA is actually one bit less than the max allowed by // Sv39, to avoid having to sign-extend virtual addresses // that have the high bit set. #define MAXVA (1L << (9 + 9 + 9 + 12 - 1)) ================================================ FILE: kernel/sleeplock.c ================================================ // Sleeping locks #include "types.h" #include "riscv.h" #include "defs.h" #include "param.h" #include "memlayout.h" #include "spinlock.h" #include "proc.h" #include "sleeplock.h" void initsleeplock(struct sleeplock *lk, char *name) { initlock(&lk->lk, "sleep lock"); lk->name = name; lk->locked = 0; lk->pid = 0; } void acquiresleep(struct sleeplock *lk) { acquire(&lk->lk); while (lk->locked) { sleep(lk, &lk->lk); } lk->locked = 1; lk->pid = myproc()->pid; release(&lk->lk); } void releasesleep(struct sleeplock *lk) { acquire(&lk->lk); lk->locked = 0; lk->pid = 0; wakeup(lk); release(&lk->lk); } int holdingsleep(struct sleeplock *lk) { int r; acquire(&lk->lk); r = lk->locked && (lk->pid == myproc()->pid); release(&lk->lk); return r; } ================================================ FILE: kernel/sleeplock.h ================================================ // Long-term locks for processes struct sleeplock { uint locked; // Is the lock held? struct spinlock lk; // spinlock protecting this sleep lock // For debugging: char *name; // Name of lock. int pid; // Process holding lock }; ================================================ FILE: kernel/spinlock.c ================================================ // Mutual exclusion spin locks. #include "types.h" #include "param.h" #include "memlayout.h" #include "spinlock.h" #include "riscv.h" #include "proc.h" #include "defs.h" void initlock(struct spinlock *lk, char *name) { lk->name = name; lk->locked = 0; lk->cpu = 0; } // Acquire the lock. // Loops (spins) until the lock is acquired. void acquire(struct spinlock *lk) { push_off(); // disable interrupts to avoid deadlock. if(holding(lk)) panic("acquire"); // On RISC-V, sync_lock_test_and_set turns into an atomic swap: // a5 = 1 // s1 = &lk->locked // amoswap.w.aq a5, a5, (s1) while(__sync_lock_test_and_set(&lk->locked, 1) != 0) ; // Tell the C compiler and the processor to not move loads or stores // past this point, to ensure that the critical section's memory // references happen strictly after the lock is acquired. // On RISC-V, this emits a fence instruction. __sync_synchronize(); // Record info about lock acquisition for holding() and debugging. lk->cpu = mycpu(); } // Release the lock. void release(struct spinlock *lk) { if(!holding(lk)) panic("release"); lk->cpu = 0; // Tell the C compiler and the CPU to not move loads or stores // past this point, to ensure that all the stores in the critical // section are visible to other CPUs before the lock is released, // and that loads in the critical section occur strictly before // the lock is released. // On RISC-V, this emits a fence instruction. __sync_synchronize(); // Release the lock, equivalent to lk->locked = 0. // This code doesn't use a C assignment, since the C standard // implies that an assignment might be implemented with // multiple store instructions. // On RISC-V, sync_lock_release turns into an atomic swap: // s1 = &lk->locked // amoswap.w zero, zero, (s1) __sync_lock_release(&lk->locked); pop_off(); } // Check whether this cpu is holding the lock. // Interrupts must be off. int holding(struct spinlock *lk) { int r; r = (lk->locked && lk->cpu == mycpu()); return r; } // push_off/pop_off are like intr_off()/intr_on() except that they are matched: // it takes two pop_off()s to undo two push_off()s. Also, if interrupts // are initially off, then push_off, pop_off leaves them off. void push_off(void) { int old = intr_get(); // disable interrupts to prevent an involuntary context // switch while using mycpu(). intr_off(); if(mycpu()->noff == 0) mycpu()->intena = old; mycpu()->noff += 1; } void pop_off(void) { struct cpu *c = mycpu(); if(intr_get()) panic("pop_off - interruptible"); if(c->noff < 1) panic("pop_off"); c->noff -= 1; if(c->noff == 0 && c->intena) intr_on(); } ================================================ FILE: kernel/spinlock.h ================================================ // Mutual exclusion lock. struct spinlock { uint locked; // Is the lock held? // For debugging: char *name; // Name of lock. struct cpu *cpu; // The cpu holding the lock. }; ================================================ FILE: kernel/start.c ================================================ #include "types.h" #include "param.h" #include "memlayout.h" #include "riscv.h" #include "defs.h" void main(); void timerinit(); // entry.S needs one stack per CPU. __attribute__ ((aligned (16))) char stack0[4096 * NCPU]; // entry.S jumps here in machine mode on stack0. void start() { // set M Previous Privilege mode to Supervisor, for mret. unsigned long x = r_mstatus(); x &= ~MSTATUS_MPP_MASK; x |= MSTATUS_MPP_S; w_mstatus(x); // set M Exception Program Counter to main, for mret. // requires gcc -mcmodel=medany w_mepc((uint64)main); // disable paging for now. w_satp(0); // delegate all interrupts and exceptions to supervisor mode. w_medeleg(0xffff); w_mideleg(0xffff); w_sie(r_sie() | SIE_SEIE | SIE_STIE); // configure Physical Memory Protection to give supervisor mode // access to all of physical memory. w_pmpaddr0(0x3fffffffffffffull); w_pmpcfg0(0xf); // ask for clock interrupts. timerinit(); // keep each CPU's hartid in its tp register, for cpuid(). int id = r_mhartid(); w_tp(id); // switch to supervisor mode and jump to main(). asm volatile("mret"); } // ask each hart to generate timer interrupts. void timerinit() { // enable supervisor-mode timer interrupts. w_mie(r_mie() | MIE_STIE); // enable the sstc extension (i.e. stimecmp). w_menvcfg(r_menvcfg() | (1L << 63)); // allow supervisor to use stimecmp and time. w_mcounteren(r_mcounteren() | 2); // ask for the very first timer interrupt. w_stimecmp(r_time() + 1000000); } ================================================ FILE: kernel/stat.h ================================================ #define T_DIR 1 // Directory #define T_FILE 2 // File #define T_DEVICE 3 // Device struct stat { int dev; // File system's disk device uint ino; // Inode number short type; // Type of file short nlink; // Number of links to file uint64 size; // Size of file in bytes }; ================================================ FILE: kernel/string.c ================================================ #include "types.h" void* memset(void *dst, int c, uint n) { char *cdst = (char *) dst; int i; for(i = 0; i < n; i++){ cdst[i] = c; } return dst; } int memcmp(const void *v1, const void *v2, uint n) { const uchar *s1, *s2; s1 = v1; s2 = v2; while(n-- > 0){ if(*s1 != *s2) return *s1 - *s2; s1++, s2++; } return 0; } void* memmove(void *dst, const void *src, uint n) { const char *s; char *d; if(n == 0) return dst; s = src; d = dst; if(s < d && s + n > d){ s += n; d += n; while(n-- > 0) *--d = *--s; } else while(n-- > 0) *d++ = *s++; return dst; } // memcpy exists to placate GCC. Use memmove. void* memcpy(void *dst, const void *src, uint n) { return memmove(dst, src, n); } int strncmp(const char *p, const char *q, uint n) { while(n > 0 && *p && *p == *q) n--, p++, q++; if(n == 0) return 0; return (uchar)*p - (uchar)*q; } char* strncpy(char *s, const char *t, int n) { char *os; os = s; while(n-- > 0 && (*s++ = *t++) != 0) ; while(n-- > 0) *s++ = 0; return os; } // Like strncpy but guaranteed to NUL-terminate. char* safestrcpy(char *s, const char *t, int n) { char *os; os = s; if(n <= 0) return os; while(--n > 0 && (*s++ = *t++) != 0) ; *s = 0; return os; } int strlen(const char *s) { int n; for(n = 0; s[n]; n++) ; return n; } ================================================ FILE: kernel/swtch.S ================================================ # Context switch # # void swtch(struct context *old, struct context *new); # # Save current registers in old. Load from new. .globl swtch swtch: sd ra, 0(a0) sd sp, 8(a0) sd s0, 16(a0) sd s1, 24(a0) sd s2, 32(a0) sd s3, 40(a0) sd s4, 48(a0) sd s5, 56(a0) sd s6, 64(a0) sd s7, 72(a0) sd s8, 80(a0) sd s9, 88(a0) sd s10, 96(a0) sd s11, 104(a0) ld ra, 0(a1) ld sp, 8(a1) ld s0, 16(a1) ld s1, 24(a1) ld s2, 32(a1) ld s3, 40(a1) ld s4, 48(a1) ld s5, 56(a1) ld s6, 64(a1) ld s7, 72(a1) ld s8, 80(a1) ld s9, 88(a1) ld s10, 96(a1) ld s11, 104(a1) ret ================================================ FILE: kernel/syscall.c ================================================ #include "types.h" #include "param.h" #include "memlayout.h" #include "riscv.h" #include "spinlock.h" #include "proc.h" #include "syscall.h" #include "defs.h" // Fetch the uint64 at addr from the current process. int fetchaddr(uint64 addr, uint64 *ip) { struct proc *p = myproc(); if(addr >= p->sz || addr+sizeof(uint64) > p->sz) // both tests needed, in case of overflow return -1; if(copyin(p->pagetable, (char *)ip, addr, sizeof(*ip)) != 0) return -1; return 0; } // Fetch the nul-terminated string at addr from the current process. // Returns length of string, not including nul, or -1 for error. int fetchstr(uint64 addr, char *buf, int max) { struct proc *p = myproc(); if(copyinstr(p->pagetable, buf, addr, max) < 0) return -1; return strlen(buf); } static uint64 argraw(int n) { struct proc *p = myproc(); switch (n) { case 0: return p->trapframe->a0; case 1: return p->trapframe->a1; case 2: return p->trapframe->a2; case 3: return p->trapframe->a3; case 4: return p->trapframe->a4; case 5: return p->trapframe->a5; } panic("argraw"); return -1; } // Fetch the nth 32-bit system call argument. void argint(int n, int *ip) { *ip = argraw(n); } // Retrieve an argument as a pointer. // Doesn't check for legality, since // copyin/copyout will do that. void argaddr(int n, uint64 *ip) { *ip = argraw(n); } // Fetch the nth word-sized system call argument as a null-terminated string. // Copies into buf, at most max. // Returns string length if OK (including nul), -1 if error. int argstr(int n, char *buf, int max) { uint64 addr; argaddr(n, &addr); return fetchstr(addr, buf, max); } // Prototypes for the functions that handle system calls. extern uint64 sys_fork(void); extern uint64 sys_exit(void); extern uint64 sys_wait(void); extern uint64 sys_pipe(void); extern uint64 sys_read(void); extern uint64 sys_kill(void); extern uint64 sys_exec(void); extern uint64 sys_fstat(void); extern uint64 sys_chdir(void); extern uint64 sys_dup(void); extern uint64 sys_getpid(void); extern uint64 sys_sbrk(void); extern uint64 sys_pause(void); extern uint64 sys_uptime(void); extern uint64 sys_open(void); extern uint64 sys_write(void); extern uint64 sys_mknod(void); extern uint64 sys_unlink(void); extern uint64 sys_link(void); extern uint64 sys_mkdir(void); extern uint64 sys_close(void); // An array mapping syscall numbers from syscall.h // to the function that handles the system call. static uint64 (*syscalls[])(void) = { [SYS_fork] sys_fork, [SYS_exit] sys_exit, [SYS_wait] sys_wait, [SYS_pipe] sys_pipe, [SYS_read] sys_read, [SYS_kill] sys_kill, [SYS_exec] sys_exec, [SYS_fstat] sys_fstat, [SYS_chdir] sys_chdir, [SYS_dup] sys_dup, [SYS_getpid] sys_getpid, [SYS_sbrk] sys_sbrk, [SYS_pause] sys_pause, [SYS_uptime] sys_uptime, [SYS_open] sys_open, [SYS_write] sys_write, [SYS_mknod] sys_mknod, [SYS_unlink] sys_unlink, [SYS_link] sys_link, [SYS_mkdir] sys_mkdir, [SYS_close] sys_close, }; void syscall(void) { int num; struct proc *p = myproc(); num = p->trapframe->a7; if(num > 0 && num < NELEM(syscalls) && syscalls[num]) { // Use num to lookup the system call function for num, call it, // and store its return value in p->trapframe->a0 p->trapframe->a0 = syscalls[num](); } else { printf("%d %s: unknown sys call %d\n", p->pid, p->name, num); p->trapframe->a0 = -1; } } ================================================ FILE: kernel/syscall.h ================================================ // System call numbers #define SYS_fork 1 #define SYS_exit 2 #define SYS_wait 3 #define SYS_pipe 4 #define SYS_read 5 #define SYS_kill 6 #define SYS_exec 7 #define SYS_fstat 8 #define SYS_chdir 9 #define SYS_dup 10 #define SYS_getpid 11 #define SYS_sbrk 12 #define SYS_pause 13 #define SYS_uptime 14 #define SYS_open 15 #define SYS_write 16 #define SYS_mknod 17 #define SYS_unlink 18 #define SYS_link 19 #define SYS_mkdir 20 #define SYS_close 21 ================================================ FILE: kernel/sysfile.c ================================================ // // File-system system calls. // Mostly argument checking, since we don't trust // user code, and calls into file.c and fs.c. // #include "types.h" #include "riscv.h" #include "defs.h" #include "param.h" #include "stat.h" #include "spinlock.h" #include "proc.h" #include "fs.h" #include "sleeplock.h" #include "file.h" #include "fcntl.h" // Fetch the nth word-sized system call argument as a file descriptor // and return both the descriptor and the corresponding struct file. static int argfd(int n, int *pfd, struct file **pf) { int fd; struct file *f; argint(n, &fd); if(fd < 0 || fd >= NOFILE || (f=myproc()->ofile[fd]) == 0) return -1; if(pfd) *pfd = fd; if(pf) *pf = f; return 0; } // Allocate a file descriptor for the given file. // Takes over file reference from caller on success. static int fdalloc(struct file *f) { int fd; struct proc *p = myproc(); for(fd = 0; fd < NOFILE; fd++){ if(p->ofile[fd] == 0){ p->ofile[fd] = f; return fd; } } return -1; } uint64 sys_dup(void) { struct file *f; int fd; if(argfd(0, 0, &f) < 0) return -1; if((fd=fdalloc(f)) < 0) return -1; filedup(f); return fd; } uint64 sys_read(void) { struct file *f; int n; uint64 p; argaddr(1, &p); argint(2, &n); if(argfd(0, 0, &f) < 0) return -1; return fileread(f, p, n); } uint64 sys_write(void) { struct file *f; int n; uint64 p; argaddr(1, &p); argint(2, &n); if(argfd(0, 0, &f) < 0) return -1; return filewrite(f, p, n); } uint64 sys_close(void) { int fd; struct file *f; if(argfd(0, &fd, &f) < 0) return -1; myproc()->ofile[fd] = 0; fileclose(f); return 0; } uint64 sys_fstat(void) { struct file *f; uint64 st; // user pointer to struct stat argaddr(1, &st); if(argfd(0, 0, &f) < 0) return -1; return filestat(f, st); } // Create the path new as a link to the same inode as old. uint64 sys_link(void) { char name[DIRSIZ], new[MAXPATH], old[MAXPATH]; struct inode *dp, *ip; if(argstr(0, old, MAXPATH) < 0 || argstr(1, new, MAXPATH) < 0) return -1; begin_op(); if((ip = namei(old)) == 0){ end_op(); return -1; } ilock(ip); if(ip->type == T_DIR){ iunlockput(ip); end_op(); return -1; } ip->nlink++; iupdate(ip); iunlock(ip); if((dp = nameiparent(new, name)) == 0) goto bad; ilock(dp); if(dp->dev != ip->dev || dirlink(dp, name, ip->inum) < 0){ iunlockput(dp); goto bad; } iunlockput(dp); iput(ip); end_op(); return 0; bad: ilock(ip); ip->nlink--; iupdate(ip); iunlockput(ip); end_op(); return -1; } // Is the directory dp empty except for "." and ".." ? static int isdirempty(struct inode *dp) { int off; struct dirent de; for(off=2*sizeof(de); offsize; off+=sizeof(de)){ if(readi(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de)) panic("isdirempty: readi"); if(de.inum != 0) return 0; } return 1; } uint64 sys_unlink(void) { struct inode *ip, *dp; struct dirent de; char name[DIRSIZ], path[MAXPATH]; uint off; if(argstr(0, path, MAXPATH) < 0) return -1; begin_op(); if((dp = nameiparent(path, name)) == 0){ end_op(); return -1; } ilock(dp); // Cannot unlink "." or "..". if(namecmp(name, ".") == 0 || namecmp(name, "..") == 0) goto bad; if((ip = dirlookup(dp, name, &off)) == 0) goto bad; ilock(ip); if(ip->nlink < 1) panic("unlink: nlink < 1"); if(ip->type == T_DIR && !isdirempty(ip)){ iunlockput(ip); goto bad; } memset(&de, 0, sizeof(de)); if(writei(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de)) panic("unlink: writei"); if(ip->type == T_DIR){ dp->nlink--; iupdate(dp); } iunlockput(dp); ip->nlink--; iupdate(ip); iunlockput(ip); end_op(); return 0; bad: iunlockput(dp); end_op(); return -1; } static struct inode* create(char *path, short type, short major, short minor) { struct inode *ip, *dp; char name[DIRSIZ]; if((dp = nameiparent(path, name)) == 0) return 0; ilock(dp); if((ip = dirlookup(dp, name, 0)) != 0){ iunlockput(dp); ilock(ip); if(type == T_FILE && (ip->type == T_FILE || ip->type == T_DEVICE)) return ip; iunlockput(ip); return 0; } if((ip = ialloc(dp->dev, type)) == 0){ iunlockput(dp); return 0; } ilock(ip); ip->major = major; ip->minor = minor; ip->nlink = 1; iupdate(ip); if(type == T_DIR){ // Create . and .. entries. // No ip->nlink++ for ".": avoid cyclic ref count. if(dirlink(ip, ".", ip->inum) < 0 || dirlink(ip, "..", dp->inum) < 0) goto fail; } if(dirlink(dp, name, ip->inum) < 0) goto fail; if(type == T_DIR){ // now that success is guaranteed: dp->nlink++; // for ".." iupdate(dp); } iunlockput(dp); return ip; fail: // something went wrong. de-allocate ip. ip->nlink = 0; iupdate(ip); iunlockput(ip); iunlockput(dp); return 0; } uint64 sys_open(void) { char path[MAXPATH]; int fd, omode; struct file *f; struct inode *ip; int n; argint(1, &omode); if((n = argstr(0, path, MAXPATH)) < 0) return -1; begin_op(); if(omode & O_CREATE){ ip = create(path, T_FILE, 0, 0); if(ip == 0){ end_op(); return -1; } } else { if((ip = namei(path)) == 0){ end_op(); return -1; } ilock(ip); if(ip->type == T_DIR && omode != O_RDONLY){ iunlockput(ip); end_op(); return -1; } } if(ip->type == T_DEVICE && (ip->major < 0 || ip->major >= NDEV)){ iunlockput(ip); end_op(); return -1; } if((f = filealloc()) == 0 || (fd = fdalloc(f)) < 0){ if(f) fileclose(f); iunlockput(ip); end_op(); return -1; } if(ip->type == T_DEVICE){ f->type = FD_DEVICE; f->major = ip->major; } else { f->type = FD_INODE; f->off = 0; } f->ip = ip; f->readable = !(omode & O_WRONLY); f->writable = (omode & O_WRONLY) || (omode & O_RDWR); if((omode & O_TRUNC) && ip->type == T_FILE){ itrunc(ip); } iunlock(ip); end_op(); return fd; } uint64 sys_mkdir(void) { char path[MAXPATH]; struct inode *ip; begin_op(); if(argstr(0, path, MAXPATH) < 0 || (ip = create(path, T_DIR, 0, 0)) == 0){ end_op(); return -1; } iunlockput(ip); end_op(); return 0; } uint64 sys_mknod(void) { struct inode *ip; char path[MAXPATH]; int major, minor; begin_op(); argint(1, &major); argint(2, &minor); if((argstr(0, path, MAXPATH)) < 0 || (ip = create(path, T_DEVICE, major, minor)) == 0){ end_op(); return -1; } iunlockput(ip); end_op(); return 0; } uint64 sys_chdir(void) { char path[MAXPATH]; struct inode *ip; struct proc *p = myproc(); begin_op(); if(argstr(0, path, MAXPATH) < 0 || (ip = namei(path)) == 0){ end_op(); return -1; } ilock(ip); if(ip->type != T_DIR){ iunlockput(ip); end_op(); return -1; } iunlock(ip); iput(p->cwd); end_op(); p->cwd = ip; return 0; } uint64 sys_exec(void) { char path[MAXPATH], *argv[MAXARG]; int i; uint64 uargv, uarg; argaddr(1, &uargv); if(argstr(0, path, MAXPATH) < 0) { return -1; } memset(argv, 0, sizeof(argv)); for(i=0;; i++){ if(i >= NELEM(argv)){ goto bad; } if(fetchaddr(uargv+sizeof(uint64)*i, (uint64*)&uarg) < 0){ goto bad; } if(uarg == 0){ argv[i] = 0; break; } argv[i] = kalloc(); if(argv[i] == 0) goto bad; if(fetchstr(uarg, argv[i], PGSIZE) < 0) goto bad; } int ret = kexec(path, argv); for(i = 0; i < NELEM(argv) && argv[i] != 0; i++) kfree(argv[i]); return ret; bad: for(i = 0; i < NELEM(argv) && argv[i] != 0; i++) kfree(argv[i]); return -1; } uint64 sys_pipe(void) { uint64 fdarray; // user pointer to array of two integers struct file *rf, *wf; int fd0, fd1; struct proc *p = myproc(); argaddr(0, &fdarray); if(pipealloc(&rf, &wf) < 0) return -1; fd0 = -1; if((fd0 = fdalloc(rf)) < 0 || (fd1 = fdalloc(wf)) < 0){ if(fd0 >= 0) p->ofile[fd0] = 0; fileclose(rf); fileclose(wf); return -1; } if(copyout(p->pagetable, fdarray, (char*)&fd0, sizeof(fd0)) < 0 || copyout(p->pagetable, fdarray+sizeof(fd0), (char *)&fd1, sizeof(fd1)) < 0){ p->ofile[fd0] = 0; p->ofile[fd1] = 0; fileclose(rf); fileclose(wf); return -1; } return 0; } ================================================ FILE: kernel/sysproc.c ================================================ #include "types.h" #include "riscv.h" #include "defs.h" #include "param.h" #include "memlayout.h" #include "spinlock.h" #include "proc.h" #include "vm.h" uint64 sys_exit(void) { int n; argint(0, &n); kexit(n); return 0; // not reached } uint64 sys_getpid(void) { return myproc()->pid; } uint64 sys_fork(void) { return kfork(); } uint64 sys_wait(void) { uint64 p; argaddr(0, &p); return kwait(p); } uint64 sys_sbrk(void) { uint64 addr; int t; int n; argint(0, &n); argint(1, &t); addr = myproc()->sz; if(t == SBRK_EAGER || n < 0) { if(growproc(n) < 0) { return -1; } } else { // Lazily allocate memory for this process: increase its memory // size but don't allocate memory. If the processes uses the // memory, vmfault() will allocate it. if(addr + n < addr) return -1; if(addr + n > TRAPFRAME) return -1; myproc()->sz += n; } return addr; } uint64 sys_pause(void) { int n; uint ticks0; argint(0, &n); if(n < 0) n = 0; acquire(&tickslock); ticks0 = ticks; while(ticks - ticks0 < n){ if(killed(myproc())){ release(&tickslock); return -1; } sleep(&ticks, &tickslock); } release(&tickslock); return 0; } uint64 sys_kill(void) { int pid; argint(0, &pid); return kkill(pid); } // return how many clock tick interrupts have occurred // since start. uint64 sys_uptime(void) { uint xticks; acquire(&tickslock); xticks = ticks; release(&tickslock); return xticks; } ================================================ FILE: kernel/trampoline.S ================================================ # # low-level code to handle traps from user space into # the kernel, and returns from kernel to user. # # the kernel maps the page holding this code # at the same virtual address (TRAMPOLINE) # in user and kernel space so that it continues # to work when it switches page tables. # kernel.ld causes this code to start at # a page boundary. # #include "riscv.h" #include "memlayout.h" .section trampsec .globl trampoline .globl usertrap trampoline: .align 4 .globl uservec uservec: # # trap.c sets stvec to point here, so # traps from user space start here, # in supervisor mode, but with a # user page table. # # save user a0 in sscratch so # a0 can be used to get at TRAPFRAME. csrw sscratch, a0 # each process has a separate p->trapframe memory area, # but it's mapped to the same virtual address # (TRAPFRAME) in every process's user page table. li a0, TRAPFRAME # save the user registers in TRAPFRAME sd ra, 40(a0) sd sp, 48(a0) sd gp, 56(a0) sd tp, 64(a0) sd t0, 72(a0) sd t1, 80(a0) sd t2, 88(a0) sd s0, 96(a0) sd s1, 104(a0) sd a1, 120(a0) sd a2, 128(a0) sd a3, 136(a0) sd a4, 144(a0) sd a5, 152(a0) sd a6, 160(a0) sd a7, 168(a0) sd s2, 176(a0) sd s3, 184(a0) sd s4, 192(a0) sd s5, 200(a0) sd s6, 208(a0) sd s7, 216(a0) sd s8, 224(a0) sd s9, 232(a0) sd s10, 240(a0) sd s11, 248(a0) sd t3, 256(a0) sd t4, 264(a0) sd t5, 272(a0) sd t6, 280(a0) # save the user a0 in p->trapframe->a0 csrr t0, sscratch sd t0, 112(a0) # initialize kernel stack pointer, from p->trapframe->kernel_sp ld sp, 8(a0) # make tp hold the current hartid, from p->trapframe->kernel_hartid ld tp, 32(a0) # load the address of usertrap(), from p->trapframe->kernel_trap ld t0, 16(a0) # fetch the kernel page table address, from p->trapframe->kernel_satp. ld t1, 0(a0) # wait for any previous memory operations to complete, so that # they use the user page table. sfence.vma zero, zero # install the kernel page table. csrw satp, t1 # flush now-stale user entries from the TLB. sfence.vma zero, zero # call usertrap() jalr t0 .globl userret userret: # usertrap() returns here, with user satp in a0. # return from kernel to user. # switch to the user page table. sfence.vma zero, zero csrw satp, a0 sfence.vma zero, zero li a0, TRAPFRAME # restore all but a0 from TRAPFRAME ld ra, 40(a0) ld sp, 48(a0) ld gp, 56(a0) ld tp, 64(a0) ld t0, 72(a0) ld t1, 80(a0) ld t2, 88(a0) ld s0, 96(a0) ld s1, 104(a0) ld a1, 120(a0) ld a2, 128(a0) ld a3, 136(a0) ld a4, 144(a0) ld a5, 152(a0) ld a6, 160(a0) ld a7, 168(a0) ld s2, 176(a0) ld s3, 184(a0) ld s4, 192(a0) ld s5, 200(a0) ld s6, 208(a0) ld s7, 216(a0) ld s8, 224(a0) ld s9, 232(a0) ld s10, 240(a0) ld s11, 248(a0) ld t3, 256(a0) ld t4, 264(a0) ld t5, 272(a0) ld t6, 280(a0) # restore user a0 ld a0, 112(a0) # return to user mode and user pc. # usertrapret() set up sstatus and sepc. sret ================================================ FILE: kernel/trap.c ================================================ #include "types.h" #include "param.h" #include "memlayout.h" #include "riscv.h" #include "spinlock.h" #include "proc.h" #include "defs.h" struct spinlock tickslock; uint ticks; extern char trampoline[], uservec[]; // in kernelvec.S, calls kerneltrap(). void kernelvec(); extern int devintr(); void trapinit(void) { initlock(&tickslock, "time"); } // set up to take exceptions and traps while in the kernel. void trapinithart(void) { w_stvec((uint64)kernelvec); } // // handle an interrupt, exception, or system call from user space. // called from, and returns to, trampoline.S // return value is user satp for trampoline.S to switch to. // uint64 usertrap(void) { int which_dev = 0; if((r_sstatus() & SSTATUS_SPP) != 0) panic("usertrap: not from user mode"); // send interrupts and exceptions to kerneltrap(), // since we're now in the kernel. w_stvec((uint64)kernelvec); //DOC: kernelvec struct proc *p = myproc(); // save user program counter. p->trapframe->epc = r_sepc(); if(r_scause() == 8){ // system call if(killed(p)) kexit(-1); // sepc points to the ecall instruction, // but we want to return to the next instruction. p->trapframe->epc += 4; // an interrupt will change sepc, scause, and sstatus, // so enable only now that we're done with those registers. intr_on(); syscall(); } else if((which_dev = devintr()) != 0){ // ok } else if((r_scause() == 15 || r_scause() == 13) && vmfault(p->pagetable, r_stval(), (r_scause() == 13)? 1 : 0) != 0) { // page fault on lazily-allocated page } else { printf("usertrap(): unexpected scause 0x%lx pid=%d\n", r_scause(), p->pid); printf(" sepc=0x%lx stval=0x%lx\n", r_sepc(), r_stval()); setkilled(p); } if(killed(p)) kexit(-1); // give up the CPU if this is a timer interrupt. if(which_dev == 2) yield(); prepare_return(); // the user page table to switch to, for trampoline.S uint64 satp = MAKE_SATP(p->pagetable); // return to trampoline.S; satp value in a0. return satp; } // // set up trapframe and control registers for a return to user space // void prepare_return(void) { struct proc *p = myproc(); // we're about to switch the destination of traps from // kerneltrap() to usertrap(). because a trap from kernel // code to usertrap would be a disaster, turn off interrupts. intr_off(); // send syscalls, interrupts, and exceptions to uservec in trampoline.S uint64 trampoline_uservec = TRAMPOLINE + (uservec - trampoline); w_stvec(trampoline_uservec); // set up trapframe values that uservec will need when // the process next traps into the kernel. p->trapframe->kernel_satp = r_satp(); // kernel page table p->trapframe->kernel_sp = p->kstack + PGSIZE; // process's kernel stack p->trapframe->kernel_trap = (uint64)usertrap; p->trapframe->kernel_hartid = r_tp(); // hartid for cpuid() // set up the registers that trampoline.S's sret will use // to get to user space. // set S Previous Privilege mode to User. unsigned long x = r_sstatus(); x &= ~SSTATUS_SPP; // clear SPP to 0 for user mode x |= SSTATUS_SPIE; // enable interrupts in user mode w_sstatus(x); // set S Exception Program Counter to the saved user pc. w_sepc(p->trapframe->epc); } // interrupts and exceptions from kernel code go here via kernelvec, // on whatever the current kernel stack is. void kerneltrap() { int which_dev = 0; uint64 sepc = r_sepc(); uint64 sstatus = r_sstatus(); uint64 scause = r_scause(); if((sstatus & SSTATUS_SPP) == 0) panic("kerneltrap: not from supervisor mode"); if(intr_get() != 0) panic("kerneltrap: interrupts enabled"); if((which_dev = devintr()) == 0){ // interrupt or trap from an unknown source printf("scause=0x%lx sepc=0x%lx stval=0x%lx\n", scause, r_sepc(), r_stval()); panic("kerneltrap"); } // give up the CPU if this is a timer interrupt. if(which_dev == 2 && myproc() != 0) yield(); // the yield() may have caused some traps to occur, // so restore trap registers for use by kernelvec.S's sepc instruction. w_sepc(sepc); w_sstatus(sstatus); } void clockintr() { if(cpuid() == 0){ acquire(&tickslock); ticks++; wakeup(&ticks); release(&tickslock); } // ask for the next timer interrupt. this also clears // the interrupt request. 1000000 is about a tenth // of a second. w_stimecmp(r_time() + 1000000); } // check if it's an external interrupt or software interrupt, // and handle it. // returns 2 if timer interrupt, // 1 if other device, // 0 if not recognized. int devintr() { uint64 scause = r_scause(); if(scause == 0x8000000000000009L){ // this is a supervisor external interrupt, via PLIC. // irq indicates which device interrupted. int irq = plic_claim(); if(irq == UART0_IRQ){ uartintr(); } else if(irq == VIRTIO0_IRQ){ virtio_disk_intr(); } else if(irq){ printf("unexpected interrupt irq=%d\n", irq); } // the PLIC allows each device to raise at most one // interrupt at a time; tell the PLIC the device is // now allowed to interrupt again. if(irq) plic_complete(irq); return 1; } else if(scause == 0x8000000000000005L){ // timer interrupt. clockintr(); return 2; } else { return 0; } } ================================================ FILE: kernel/types.h ================================================ typedef unsigned int uint; typedef unsigned short ushort; typedef unsigned char uchar; typedef unsigned char uint8; typedef unsigned short uint16; typedef unsigned int uint32; typedef unsigned long uint64; typedef uint64 pde_t; ================================================ FILE: kernel/uart.c ================================================ // // low-level driver for 16550a UART. // #include "types.h" #include "param.h" #include "memlayout.h" #include "riscv.h" #include "spinlock.h" #include "proc.h" #include "defs.h" // the UART control registers are memory-mapped // at address UART0. this macro returns the // address of one of the registers. #define Reg(reg) ((volatile unsigned char *)(UART0 + (reg))) #define ReadReg(reg) (*(Reg(reg))) #define WriteReg(reg, v) (*(Reg(reg)) = (v)) // the UART control registers. // some have different meanings for read vs write. // see http://byterunner.com/16550.html #define RHR 0 // receive holding register (for input bytes) #define THR 0 // transmit holding register (for output bytes) #define IER 1 // interrupt enable register #define IER_RX_ENABLE (1<<0) #define IER_TX_ENABLE (1<<1) #define FCR 2 // FIFO control register #define FCR_FIFO_ENABLE (1<<0) #define FCR_FIFO_CLEAR (3<<1) // clear the content of the two FIFOs #define ISR 2 // interrupt status register #define LCR 3 // line control register #define LCR_EIGHT_BITS (3<<0) #define LCR_BAUD_LATCH (1<<7) // special mode to set baud rate #define LSR 5 // line status register #define LSR_RX_READY (1<<0) // input is waiting to be read from RHR #define LSR_TX_IDLE (1<<5) // THR can accept another character to send // for sending threads to synchronize with uart "ready" interrupts. static struct spinlock tx_lock; static int tx_busy; // is the UART busy sending? static int tx_chan; // &tx_chan is the "wait channel" extern volatile int panicking; // from printf.c extern volatile int panicked; // from printf.c void uartinit(void) { // disable interrupts. WriteReg(IER, 0x00); // special mode to set baud rate. WriteReg(LCR, LCR_BAUD_LATCH); // LSB for baud rate of 38.4K. WriteReg(0, 0x03); // MSB for baud rate of 38.4K. WriteReg(1, 0x00); // leave set-baud mode, // and set word length to 8 bits, no parity. WriteReg(LCR, LCR_EIGHT_BITS); // reset and enable FIFOs. WriteReg(FCR, FCR_FIFO_ENABLE | FCR_FIFO_CLEAR); // enable transmit and receive interrupts. WriteReg(IER, IER_TX_ENABLE | IER_RX_ENABLE); initlock(&tx_lock, "uart"); } // transmit buf[] to the uart. it blocks if the // uart is busy, so it cannot be called from // interrupts, only from write() system calls. void uartwrite(char buf[], int n) { acquire(&tx_lock); int i = 0; while(i < n){ while(tx_busy != 0){ // wait for a UART transmit-complete interrupt // to set tx_busy to 0. sleep(&tx_chan, &tx_lock); } WriteReg(THR, buf[i]); i += 1; tx_busy = 1; } release(&tx_lock); } // write a byte to the uart without using // interrupts, for use by kernel printf() and // to echo characters. it spins waiting for the uart's // output register to be empty. void uartputc_sync(int c) { if(panicking == 0) push_off(); if(panicked){ for(;;) ; } // wait for UART to set Transmit Holding Empty in LSR. while((ReadReg(LSR) & LSR_TX_IDLE) == 0) ; WriteReg(THR, c); if(panicking == 0) pop_off(); } // try to read one input character from the UART. // return -1 if none is waiting. int uartgetc(void) { if(ReadReg(LSR) & LSR_RX_READY){ // input data is ready. return ReadReg(RHR); } else { return -1; } } // handle a uart interrupt, raised because input has // arrived, or the uart is ready for more output, or // both. called from devintr(). void uartintr(void) { ReadReg(ISR); // acknowledge the interrupt acquire(&tx_lock); if(ReadReg(LSR) & LSR_TX_IDLE){ // UART finished transmitting; wake up sending thread. tx_busy = 0; wakeup(&tx_chan); } release(&tx_lock); // read and process incoming characters, if any. while(1){ int c = uartgetc(); if(c == -1) break; consoleintr(c); } } ================================================ FILE: kernel/virtio.h ================================================ // // virtio device definitions. // for both the mmio interface, and virtio descriptors. // only tested with qemu. // // the virtio spec: // https://docs.oasis-open.org/virtio/virtio/v1.1/virtio-v1.1.pdf // // virtio mmio control registers, mapped starting at 0x10001000. // from qemu virtio_mmio.h #define VIRTIO_MMIO_MAGIC_VALUE 0x000 // 0x74726976 #define VIRTIO_MMIO_VERSION 0x004 // version; should be 2 #define VIRTIO_MMIO_DEVICE_ID 0x008 // device type; 1 is net, 2 is disk #define VIRTIO_MMIO_VENDOR_ID 0x00c // 0x554d4551 #define VIRTIO_MMIO_DEVICE_FEATURES 0x010 #define VIRTIO_MMIO_DRIVER_FEATURES 0x020 #define VIRTIO_MMIO_QUEUE_SEL 0x030 // select queue, write-only #define VIRTIO_MMIO_QUEUE_NUM_MAX 0x034 // max size of current queue, read-only #define VIRTIO_MMIO_QUEUE_NUM 0x038 // size of current queue, write-only #define VIRTIO_MMIO_QUEUE_READY 0x044 // ready bit #define VIRTIO_MMIO_QUEUE_NOTIFY 0x050 // write-only #define VIRTIO_MMIO_INTERRUPT_STATUS 0x060 // read-only #define VIRTIO_MMIO_INTERRUPT_ACK 0x064 // write-only #define VIRTIO_MMIO_STATUS 0x070 // read/write #define VIRTIO_MMIO_QUEUE_DESC_LOW 0x080 // physical address for descriptor table, write-only #define VIRTIO_MMIO_QUEUE_DESC_HIGH 0x084 #define VIRTIO_MMIO_DRIVER_DESC_LOW 0x090 // physical address for available ring, write-only #define VIRTIO_MMIO_DRIVER_DESC_HIGH 0x094 #define VIRTIO_MMIO_DEVICE_DESC_LOW 0x0a0 // physical address for used ring, write-only #define VIRTIO_MMIO_DEVICE_DESC_HIGH 0x0a4 // status register bits, from qemu virtio_config.h #define VIRTIO_CONFIG_S_ACKNOWLEDGE 1 #define VIRTIO_CONFIG_S_DRIVER 2 #define VIRTIO_CONFIG_S_DRIVER_OK 4 #define VIRTIO_CONFIG_S_FEATURES_OK 8 // device feature bits #define VIRTIO_BLK_F_RO 5 /* Disk is read-only */ #define VIRTIO_BLK_F_SCSI 7 /* Supports scsi command passthru */ #define VIRTIO_BLK_F_CONFIG_WCE 11 /* Writeback mode available in config */ #define VIRTIO_BLK_F_MQ 12 /* support more than one vq */ #define VIRTIO_F_ANY_LAYOUT 27 #define VIRTIO_RING_F_INDIRECT_DESC 28 #define VIRTIO_RING_F_EVENT_IDX 29 // this many virtio descriptors. // must be a power of two. #define NUM 8 // a single descriptor, from the spec. struct virtq_desc { uint64 addr; uint32 len; uint16 flags; uint16 next; }; #define VRING_DESC_F_NEXT 1 // chained with another descriptor #define VRING_DESC_F_WRITE 2 // device writes (vs read) // the (entire) avail ring, from the spec. struct virtq_avail { uint16 flags; // always zero uint16 idx; // driver will write ring[idx] next uint16 ring[NUM]; // descriptor numbers of chain heads uint16 unused; }; // one entry in the "used" ring, with which the // device tells the driver about completed requests. struct virtq_used_elem { uint32 id; // index of start of completed descriptor chain uint32 len; }; struct virtq_used { uint16 flags; // always zero uint16 idx; // device increments when it adds a ring[] entry struct virtq_used_elem ring[NUM]; }; // these are specific to virtio block devices, e.g. disks, // described in Section 5.2 of the spec. #define VIRTIO_BLK_T_IN 0 // read the disk #define VIRTIO_BLK_T_OUT 1 // write the disk // the format of the first descriptor in a disk request. // to be followed by two more descriptors containing // the block, and a one-byte status. struct virtio_blk_req { uint32 type; // VIRTIO_BLK_T_IN or ..._OUT uint32 reserved; uint64 sector; }; ================================================ FILE: kernel/virtio_disk.c ================================================ // // driver for qemu's virtio disk device. // uses qemu's mmio interface to virtio. // // qemu ... -drive file=fs.img,if=none,format=raw,id=x0 -device virtio-blk-device,drive=x0,bus=virtio-mmio-bus.0 // #include "types.h" #include "riscv.h" #include "defs.h" #include "param.h" #include "memlayout.h" #include "spinlock.h" #include "sleeplock.h" #include "fs.h" #include "buf.h" #include "virtio.h" // the address of virtio mmio register r. #define R(r) ((volatile uint32 *)(VIRTIO0 + (r))) static struct disk { // a set (not a ring) of DMA descriptors, with which the // driver tells the device where to read and write individual // disk operations. there are NUM descriptors. // most commands consist of a "chain" (a linked list) of a couple of // these descriptors. struct virtq_desc *desc; // a ring in which the driver writes descriptor numbers // that the driver would like the device to process. it only // includes the head descriptor of each chain. the ring has // NUM elements. struct virtq_avail *avail; // a ring in which the device writes descriptor numbers that // the device has finished processing (just the head of each chain). // there are NUM used ring entries. struct virtq_used *used; // our own book-keeping. char free[NUM]; // is a descriptor free? uint16 used_idx; // we've looked this far in used[2..NUM]. // track info about in-flight operations, // for use when completion interrupt arrives. // indexed by first descriptor index of chain. struct { struct buf *b; char status; } info[NUM]; // disk command headers. // one-for-one with descriptors, for convenience. struct virtio_blk_req ops[NUM]; struct spinlock vdisk_lock; } disk; void virtio_disk_init(void) { uint32 status = 0; initlock(&disk.vdisk_lock, "virtio_disk"); if(*R(VIRTIO_MMIO_MAGIC_VALUE) != 0x74726976 || *R(VIRTIO_MMIO_VERSION) != 2 || *R(VIRTIO_MMIO_DEVICE_ID) != 2 || *R(VIRTIO_MMIO_VENDOR_ID) != 0x554d4551){ panic("could not find virtio disk"); } // reset device *R(VIRTIO_MMIO_STATUS) = status; // set ACKNOWLEDGE status bit status |= VIRTIO_CONFIG_S_ACKNOWLEDGE; *R(VIRTIO_MMIO_STATUS) = status; // set DRIVER status bit status |= VIRTIO_CONFIG_S_DRIVER; *R(VIRTIO_MMIO_STATUS) = status; // negotiate features uint64 features = *R(VIRTIO_MMIO_DEVICE_FEATURES); features &= ~(1 << VIRTIO_BLK_F_RO); features &= ~(1 << VIRTIO_BLK_F_SCSI); features &= ~(1 << VIRTIO_BLK_F_CONFIG_WCE); features &= ~(1 << VIRTIO_BLK_F_MQ); features &= ~(1 << VIRTIO_F_ANY_LAYOUT); features &= ~(1 << VIRTIO_RING_F_EVENT_IDX); features &= ~(1 << VIRTIO_RING_F_INDIRECT_DESC); *R(VIRTIO_MMIO_DRIVER_FEATURES) = features; // tell device that feature negotiation is complete. status |= VIRTIO_CONFIG_S_FEATURES_OK; *R(VIRTIO_MMIO_STATUS) = status; // re-read status to ensure FEATURES_OK is set. status = *R(VIRTIO_MMIO_STATUS); if(!(status & VIRTIO_CONFIG_S_FEATURES_OK)) panic("virtio disk FEATURES_OK unset"); // initialize queue 0. *R(VIRTIO_MMIO_QUEUE_SEL) = 0; // ensure queue 0 is not in use. if(*R(VIRTIO_MMIO_QUEUE_READY)) panic("virtio disk should not be ready"); // check maximum queue size. uint32 max = *R(VIRTIO_MMIO_QUEUE_NUM_MAX); if(max == 0) panic("virtio disk has no queue 0"); if(max < NUM) panic("virtio disk max queue too short"); // allocate and zero queue memory. disk.desc = kalloc(); disk.avail = kalloc(); disk.used = kalloc(); if(!disk.desc || !disk.avail || !disk.used) panic("virtio disk kalloc"); memset(disk.desc, 0, PGSIZE); memset(disk.avail, 0, PGSIZE); memset(disk.used, 0, PGSIZE); // set queue size. *R(VIRTIO_MMIO_QUEUE_NUM) = NUM; // write physical addresses. *R(VIRTIO_MMIO_QUEUE_DESC_LOW) = (uint64)disk.desc; *R(VIRTIO_MMIO_QUEUE_DESC_HIGH) = (uint64)disk.desc >> 32; *R(VIRTIO_MMIO_DRIVER_DESC_LOW) = (uint64)disk.avail; *R(VIRTIO_MMIO_DRIVER_DESC_HIGH) = (uint64)disk.avail >> 32; *R(VIRTIO_MMIO_DEVICE_DESC_LOW) = (uint64)disk.used; *R(VIRTIO_MMIO_DEVICE_DESC_HIGH) = (uint64)disk.used >> 32; // queue is ready. *R(VIRTIO_MMIO_QUEUE_READY) = 0x1; // all NUM descriptors start out unused. for(int i = 0; i < NUM; i++) disk.free[i] = 1; // tell device we're completely ready. status |= VIRTIO_CONFIG_S_DRIVER_OK; *R(VIRTIO_MMIO_STATUS) = status; // plic.c and trap.c arrange for interrupts from VIRTIO0_IRQ. } // find a free descriptor, mark it non-free, return its index. static int alloc_desc() { for(int i = 0; i < NUM; i++){ if(disk.free[i]){ disk.free[i] = 0; return i; } } return -1; } // mark a descriptor as free. static void free_desc(int i) { if(i >= NUM) panic("free_desc 1"); if(disk.free[i]) panic("free_desc 2"); disk.desc[i].addr = 0; disk.desc[i].len = 0; disk.desc[i].flags = 0; disk.desc[i].next = 0; disk.free[i] = 1; wakeup(&disk.free[0]); } // free a chain of descriptors. static void free_chain(int i) { while(1){ int flag = disk.desc[i].flags; int nxt = disk.desc[i].next; free_desc(i); if(flag & VRING_DESC_F_NEXT) i = nxt; else break; } } // allocate three descriptors (they need not be contiguous). // disk transfers always use three descriptors. static int alloc3_desc(int *idx) { for(int i = 0; i < 3; i++){ idx[i] = alloc_desc(); if(idx[i] < 0){ for(int j = 0; j < i; j++) free_desc(idx[j]); return -1; } } return 0; } void virtio_disk_rw(struct buf *b, int write) { uint64 sector = b->blockno * (BSIZE / 512); acquire(&disk.vdisk_lock); // the spec's Section 5.2 says that legacy block operations use // three descriptors: one for type/reserved/sector, one for the // data, one for a 1-byte status result. // allocate the three descriptors. int idx[3]; while(1){ if(alloc3_desc(idx) == 0) { break; } sleep(&disk.free[0], &disk.vdisk_lock); } // format the three descriptors. // qemu's virtio-blk.c reads them. struct virtio_blk_req *buf0 = &disk.ops[idx[0]]; if(write) buf0->type = VIRTIO_BLK_T_OUT; // write the disk else buf0->type = VIRTIO_BLK_T_IN; // read the disk buf0->reserved = 0; buf0->sector = sector; disk.desc[idx[0]].addr = (uint64) buf0; disk.desc[idx[0]].len = sizeof(struct virtio_blk_req); disk.desc[idx[0]].flags = VRING_DESC_F_NEXT; disk.desc[idx[0]].next = idx[1]; disk.desc[idx[1]].addr = (uint64) b->data; disk.desc[idx[1]].len = BSIZE; if(write) disk.desc[idx[1]].flags = 0; // device reads b->data else disk.desc[idx[1]].flags = VRING_DESC_F_WRITE; // device writes b->data disk.desc[idx[1]].flags |= VRING_DESC_F_NEXT; disk.desc[idx[1]].next = idx[2]; disk.info[idx[0]].status = 0xff; // device writes 0 on success disk.desc[idx[2]].addr = (uint64) &disk.info[idx[0]].status; disk.desc[idx[2]].len = 1; disk.desc[idx[2]].flags = VRING_DESC_F_WRITE; // device writes the status disk.desc[idx[2]].next = 0; // record struct buf for virtio_disk_intr(). b->disk = 1; disk.info[idx[0]].b = b; // tell the device the first index in our chain of descriptors. disk.avail->ring[disk.avail->idx % NUM] = idx[0]; __sync_synchronize(); // tell the device another avail ring entry is available. disk.avail->idx += 1; // not % NUM ... __sync_synchronize(); *R(VIRTIO_MMIO_QUEUE_NOTIFY) = 0; // value is queue number // Wait for virtio_disk_intr() to say request has finished. while(b->disk == 1) { sleep(b, &disk.vdisk_lock); } disk.info[idx[0]].b = 0; free_chain(idx[0]); release(&disk.vdisk_lock); } void virtio_disk_intr() { acquire(&disk.vdisk_lock); // the device won't raise another interrupt until we tell it // we've seen this interrupt, which the following line does. // this may race with the device writing new entries to // the "used" ring, in which case we may process the new // completion entries in this interrupt, and have nothing to do // in the next interrupt, which is harmless. *R(VIRTIO_MMIO_INTERRUPT_ACK) = *R(VIRTIO_MMIO_INTERRUPT_STATUS) & 0x3; __sync_synchronize(); // the device increments disk.used->idx when it // adds an entry to the used ring. while(disk.used_idx != disk.used->idx){ __sync_synchronize(); int id = disk.used->ring[disk.used_idx % NUM].id; if(disk.info[id].status != 0) panic("virtio_disk_intr status"); struct buf *b = disk.info[id].b; b->disk = 0; // disk is done with buf wakeup(b); disk.used_idx += 1; } release(&disk.vdisk_lock); } ================================================ FILE: kernel/vm.c ================================================ #include "param.h" #include "types.h" #include "memlayout.h" #include "elf.h" #include "riscv.h" #include "defs.h" #include "spinlock.h" #include "proc.h" #include "fs.h" /* * the kernel's page table. */ pagetable_t kernel_pagetable; extern char etext[]; // kernel.ld sets this to end of kernel code. extern char trampoline[]; // trampoline.S // Make a direct-map page table for the kernel. pagetable_t kvmmake(void) { pagetable_t kpgtbl; kpgtbl = (pagetable_t) kalloc(); memset(kpgtbl, 0, PGSIZE); // uart registers kvmmap(kpgtbl, UART0, UART0, PGSIZE, PTE_R | PTE_W); // virtio mmio disk interface kvmmap(kpgtbl, VIRTIO0, VIRTIO0, PGSIZE, PTE_R | PTE_W); // PLIC kvmmap(kpgtbl, PLIC, PLIC, 0x4000000, PTE_R | PTE_W); // map kernel text executable and read-only. kvmmap(kpgtbl, KERNBASE, KERNBASE, (uint64)etext-KERNBASE, PTE_R | PTE_X); // map kernel data and the physical RAM we'll make use of. kvmmap(kpgtbl, (uint64)etext, (uint64)etext, PHYSTOP-(uint64)etext, PTE_R | PTE_W); // map the trampoline for trap entry/exit to // the highest virtual address in the kernel. kvmmap(kpgtbl, TRAMPOLINE, (uint64)trampoline, PGSIZE, PTE_R | PTE_X); // allocate and map a kernel stack for each process. proc_mapstacks(kpgtbl); return kpgtbl; } // add a mapping to the kernel page table. // only used when booting. // does not flush TLB or enable paging. void kvmmap(pagetable_t kpgtbl, uint64 va, uint64 pa, uint64 sz, int perm) { if(mappages(kpgtbl, va, sz, pa, perm) != 0) panic("kvmmap"); } // Initialize the kernel_pagetable, shared by all CPUs. void kvminit(void) { kernel_pagetable = kvmmake(); } // Switch the current CPU's h/w page table register to // the kernel's page table, and enable paging. void kvminithart() { // wait for any previous writes to the page table memory to finish. sfence_vma(); w_satp(MAKE_SATP(kernel_pagetable)); // flush stale entries from the TLB. sfence_vma(); } // Return the address of the PTE in page table pagetable // that corresponds to virtual address va. If alloc!=0, // create any required page-table pages. // // The risc-v Sv39 scheme has three levels of page-table // pages. A page-table page contains 512 64-bit PTEs. // A 64-bit virtual address is split into five fields: // 39..63 -- must be zero. // 30..38 -- 9 bits of level-2 index. // 21..29 -- 9 bits of level-1 index. // 12..20 -- 9 bits of level-0 index. // 0..11 -- 12 bits of byte offset within the page. pte_t * walk(pagetable_t pagetable, uint64 va, int alloc) { if(va >= MAXVA) panic("walk"); for(int level = 2; level > 0; level--) { pte_t *pte = &pagetable[PX(level, va)]; if(*pte & PTE_V) { pagetable = (pagetable_t)PTE2PA(*pte); } else { if(!alloc || (pagetable = (pde_t*)kalloc()) == 0) return 0; memset(pagetable, 0, PGSIZE); *pte = PA2PTE(pagetable) | PTE_V; } } return &pagetable[PX(0, va)]; } // Look up a virtual address, return the physical address, // or 0 if not mapped. // Can only be used to look up user pages. uint64 walkaddr(pagetable_t pagetable, uint64 va) { pte_t *pte; uint64 pa; if(va >= MAXVA) return 0; pte = walk(pagetable, va, 0); if(pte == 0) return 0; if((*pte & PTE_V) == 0) return 0; if((*pte & PTE_U) == 0) return 0; pa = PTE2PA(*pte); return pa; } // Create PTEs for virtual addresses starting at va that refer to // physical addresses starting at pa. // va and size MUST be page-aligned. // Returns 0 on success, -1 if walk() couldn't // allocate a needed page-table page. int mappages(pagetable_t pagetable, uint64 va, uint64 size, uint64 pa, int perm) { uint64 a, last; pte_t *pte; if((va % PGSIZE) != 0) panic("mappages: va not aligned"); if((size % PGSIZE) != 0) panic("mappages: size not aligned"); if(size == 0) panic("mappages: size"); a = va; last = va + size - PGSIZE; for(;;){ if((pte = walk(pagetable, a, 1)) == 0) return -1; if(*pte & PTE_V) panic("mappages: remap"); *pte = PA2PTE(pa) | perm | PTE_V; if(a == last) break; a += PGSIZE; pa += PGSIZE; } return 0; } // create an empty user page table. // returns 0 if out of memory. pagetable_t uvmcreate() { pagetable_t pagetable; pagetable = (pagetable_t) kalloc(); if(pagetable == 0) return 0; memset(pagetable, 0, PGSIZE); return pagetable; } // Remove npages of mappings starting from va. va must be // page-aligned. It's OK if the mappings don't exist. // Optionally free the physical memory. void uvmunmap(pagetable_t pagetable, uint64 va, uint64 npages, int do_free) { uint64 a; pte_t *pte; if((va % PGSIZE) != 0) panic("uvmunmap: not aligned"); for(a = va; a < va + npages*PGSIZE; a += PGSIZE){ if((pte = walk(pagetable, a, 0)) == 0) // leaf page table entry allocated? continue; if((*pte & PTE_V) == 0) // has physical page been allocated? continue; if(do_free){ uint64 pa = PTE2PA(*pte); kfree((void*)pa); } *pte = 0; } } // Allocate PTEs and physical memory to grow a process from oldsz to // newsz, which need not be page aligned. Returns new size or 0 on error. uint64 uvmalloc(pagetable_t pagetable, uint64 oldsz, uint64 newsz, int xperm) { char *mem; uint64 a; if(newsz < oldsz) return oldsz; oldsz = PGROUNDUP(oldsz); for(a = oldsz; a < newsz; a += PGSIZE){ mem = kalloc(); if(mem == 0){ uvmdealloc(pagetable, a, oldsz); return 0; } memset(mem, 0, PGSIZE); if(mappages(pagetable, a, PGSIZE, (uint64)mem, PTE_R|PTE_U|xperm) != 0){ kfree(mem); uvmdealloc(pagetable, a, oldsz); return 0; } } return newsz; } // Deallocate user pages to bring the process size from oldsz to // newsz. oldsz and newsz need not be page-aligned, nor does newsz // need to be less than oldsz. oldsz can be larger than the actual // process size. Returns the new process size. uint64 uvmdealloc(pagetable_t pagetable, uint64 oldsz, uint64 newsz) { if(newsz >= oldsz) return oldsz; if(PGROUNDUP(newsz) < PGROUNDUP(oldsz)){ int npages = (PGROUNDUP(oldsz) - PGROUNDUP(newsz)) / PGSIZE; uvmunmap(pagetable, PGROUNDUP(newsz), npages, 1); } return newsz; } // Recursively free page-table pages. // All leaf mappings must already have been removed. void freewalk(pagetable_t pagetable) { // there are 2^9 = 512 PTEs in a page table. for(int i = 0; i < 512; i++){ pte_t pte = pagetable[i]; if((pte & PTE_V) && (pte & (PTE_R|PTE_W|PTE_X)) == 0){ // this PTE points to a lower-level page table. uint64 child = PTE2PA(pte); freewalk((pagetable_t)child); pagetable[i] = 0; } else if(pte & PTE_V){ panic("freewalk: leaf"); } } kfree((void*)pagetable); } // Free user memory pages, // then free page-table pages. void uvmfree(pagetable_t pagetable, uint64 sz) { if(sz > 0) uvmunmap(pagetable, 0, PGROUNDUP(sz)/PGSIZE, 1); freewalk(pagetable); } // Given a parent process's page table, copy // its memory into a child's page table. // Copies both the page table and the // physical memory. // returns 0 on success, -1 on failure. // frees any allocated pages on failure. int uvmcopy(pagetable_t old, pagetable_t new, uint64 sz) { pte_t *pte; uint64 pa, i; uint flags; char *mem; for(i = 0; i < sz; i += PGSIZE){ if((pte = walk(old, i, 0)) == 0) continue; // page table entry hasn't been allocated if((*pte & PTE_V) == 0) continue; // physical page hasn't been allocated pa = PTE2PA(*pte); flags = PTE_FLAGS(*pte); if((mem = kalloc()) == 0) goto err; memmove(mem, (char*)pa, PGSIZE); if(mappages(new, i, PGSIZE, (uint64)mem, flags) != 0){ kfree(mem); goto err; } } return 0; err: uvmunmap(new, 0, i / PGSIZE, 1); return -1; } // mark a PTE invalid for user access. // used by exec for the user stack guard page. void uvmclear(pagetable_t pagetable, uint64 va) { pte_t *pte; pte = walk(pagetable, va, 0); if(pte == 0) panic("uvmclear"); *pte &= ~PTE_U; } // Copy from kernel to user. // Copy len bytes from src to virtual address dstva in a given page table. // Return 0 on success, -1 on error. int copyout(pagetable_t pagetable, uint64 dstva, char *src, uint64 len) { uint64 n, va0, pa0; pte_t *pte; while(len > 0){ va0 = PGROUNDDOWN(dstva); if(va0 >= MAXVA) return -1; pa0 = walkaddr(pagetable, va0); if(pa0 == 0) { if((pa0 = vmfault(pagetable, va0, 0)) == 0) { return -1; } } pte = walk(pagetable, va0, 0); // forbid copyout over read-only user text pages. if((*pte & PTE_W) == 0) return -1; n = PGSIZE - (dstva - va0); if(n > len) n = len; memmove((void *)(pa0 + (dstva - va0)), src, n); len -= n; src += n; dstva = va0 + PGSIZE; } return 0; } // Copy from user to kernel. // Copy len bytes to dst from virtual address srcva in a given page table. // Return 0 on success, -1 on error. int copyin(pagetable_t pagetable, char *dst, uint64 srcva, uint64 len) { uint64 n, va0, pa0; while(len > 0){ va0 = PGROUNDDOWN(srcva); pa0 = walkaddr(pagetable, va0); if(pa0 == 0) { if((pa0 = vmfault(pagetable, va0, 0)) == 0) { return -1; } } n = PGSIZE - (srcva - va0); if(n > len) n = len; memmove(dst, (void *)(pa0 + (srcva - va0)), n); len -= n; dst += n; srcva = va0 + PGSIZE; } return 0; } // Copy a null-terminated string from user to kernel. // Copy bytes to dst from virtual address srcva in a given page table, // until a '\0', or max. // Return 0 on success, -1 on error. int copyinstr(pagetable_t pagetable, char *dst, uint64 srcva, uint64 max) { uint64 n, va0, pa0; int got_null = 0; while(got_null == 0 && max > 0){ va0 = PGROUNDDOWN(srcva); pa0 = walkaddr(pagetable, va0); if(pa0 == 0) return -1; n = PGSIZE - (srcva - va0); if(n > max) n = max; char *p = (char *) (pa0 + (srcva - va0)); while(n > 0){ if(*p == '\0'){ *dst = '\0'; got_null = 1; break; } else { *dst = *p; } --n; --max; p++; dst++; } srcva = va0 + PGSIZE; } if(got_null){ return 0; } else { return -1; } } // allocate and map user memory if process is referencing a page // that was lazily allocated in sys_sbrk(). // returns 0 if va is invalid or already mapped, or if // out of physical memory, and physical address if successful. uint64 vmfault(pagetable_t pagetable, uint64 va, int read) { uint64 mem; struct proc *p = myproc(); if (va >= p->sz) return 0; va = PGROUNDDOWN(va); if(ismapped(pagetable, va)) { return 0; } mem = (uint64) kalloc(); if(mem == 0) return 0; memset((void *) mem, 0, PGSIZE); if (mappages(p->pagetable, va, PGSIZE, mem, PTE_W|PTE_U|PTE_R) != 0) { kfree((void *)mem); return 0; } return mem; } int ismapped(pagetable_t pagetable, uint64 va) { pte_t *pte = walk(pagetable, va, 0); if (pte == 0) { return 0; } if (*pte & PTE_V){ return 1; } return 0; } ================================================ FILE: kernel/vm.h ================================================ #define SBRK_EAGER 1 #define SBRK_LAZY 2 ================================================ FILE: test-xv6.py ================================================ #!/usr/bin/env python3 # # python script that tests xv6 without having to boot it and type to its shell # # ./test-xv6.py usertests (runs usertests) # ./test-xv6.py -q usertests (runs the quick tests of usertests) # ./test-xv6.py crash (runs the crash tests) # ./test-xv6.py log (runs the log crash test) import argparse, os, inspect, re, signal, subprocess, sys, time from subprocess import run parser = argparse.ArgumentParser() parser.add_argument('testrex', help="test name or regular expression") parser.add_argument("-q", action='store_true', help="usertests quick") args = parser.parse_args() class QEMU(object): def __init__(self, reset=False): if reset: self.build_xv6() self.reset_fs() q = ["make", "qemu"] self.proc = subprocess.Popen(q, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) self.output = "" self.outbytes = bytearray() time.sleep(1) def reset_fs(self): try: run(["rm", "fs.img"], check=True) run(["make", "fs.img"], check=True) except subprocess.CalledProcessError as e: print(f"Command failed with exit code {e.returncode}") def build_xv6(self): try: run(["make", "kernel/kernel"], check=True) except subprocess.CalledProcessError as e: print(f"Command failed with exit code {e.returncode}") def save_output(self): try: with open("test-xv6.out", "w") as f: f.write(self.out) f.close() except OSError as e: print("Provided a bad results path. Error:", e) def cmd(self, c): if isinstance(c, str): c = c.encode('utf-8') self.proc.stdin.write(c) self.proc.stdin.flush() def crash(self): ps = run(['ps', '-opid', '--no-headers', '--ppid', str(self.proc.pid)], stdout=subprocess.PIPE, encoding='utf8') kids = [int(line) for line in ps.stdout.splitlines()] if len(kids) == 0: print("no qemu") os.exit(1) print("kill", kids[0]) os.kill(kids[0], signal.SIGKILL) def stop(self): self.proc.terminate() def read(self): buf = os.read(self.proc.stdout.fileno(), 4096) self.outbytes.extend(buf) self.output = self.outbytes.decode("utf-8", "replace") def lines(self): return self.output.splitlines() def error(self): print("FAIL: match failed", regexps) self.save_output() self.stop() sys.exit(1) def match(self, *regexps, exit=True): lines = self.lines() last = -1 for i, line in enumerate(lines): if any(re.match(r, line) for r in regexps): print(line) last = i if last == -1 and exit: self.error() l = "" if last >= 0: l = lines[last] return last >= 0, l def monitor(self, *regexps, progress="", timeout): deadline = time.time() + timeout while True: time.sleep(1) timeleft = deadline - time.time() if timeleft < 0: self.error() self.read() ok, _ = self.match(*regexps, exit=False) if ok: return ok, line = self.match(progress, exit=False) if ok: print(line) def crash_log(): q = QEMU(True) q.cmd("logstress f0 f1 f2 f3 f4 f5\n") time.sleep(2) q.crash() q.stop() def recover_log(): q = QEMU() time.sleep(2) q.read() ok, _ = q.match('^recovering', exit=False) if ok: q.cmd("ls\n") time.sleep(2) q.read() q.match('f5') q.stop() return ok def forphan(): q = QEMU(True) q.cmd("forphan\n") time.sleep(5) q.read() q.match('wait') q.crash() q.stop() def dorphan(): q = QEMU(True) q.cmd("dorphan\n") time.sleep(5) q.read() q.match('wait') q.crash() q.stop() def recover_orphan(): q = QEMU() time.sleep(2) q.read() q.match('^ireclaim') q.stop() def test_log(): print("Test recovery of log") for i in range(5): crash_log() ok = recover_log() if ok: print("OK") return print("log attempt ", i+1) print("FAIL") sys.exit(1) def test_forphan(): print("Test recovery of an orphaned file") forphan() recover_orphan() print("OK") def test_dorphan(): print("Test recovery of an orphaned file") dorphan() recover_orphan() print("OK") def test_crash(): test_log() test_forphan() test_dorphan() def test_usertests(test=""): timeout = 600 opt = "" if args.q: opt = " -q" timeout = 300 elif test != "": opt += " " + test q = QEMU(True) q.cmd("usertests" + opt + "\n") q.monitor('^ALL TESTS PASSED', progress='test', timeout=timeout) q.stop() def main(): print(args) rex = r'%s' % args.testrex funcs = [(obj,name) for name,obj in inspect.getmembers(sys.modules[__name__]) if (inspect.isfunction(obj) and name.startswith('test'))] none = True for (f,n) in funcs: if re.search(rex, n): none = False f() if none: test_usertests(test=args.testrex) main() ================================================ FILE: user/cat.c ================================================ #include "kernel/types.h" #include "kernel/fcntl.h" #include "user/user.h" char buf[512]; void cat(int fd) { int n; while((n = read(fd, buf, sizeof(buf))) > 0) { if (write(1, buf, n) != n) { fprintf(2, "cat: write error\n"); exit(1); } } if(n < 0){ fprintf(2, "cat: read error\n"); exit(1); } } int main(int argc, char *argv[]) { int fd, i; if(argc <= 1){ cat(0); exit(0); } for(i = 1; i < argc; i++){ if((fd = open(argv[i], O_RDONLY)) < 0){ fprintf(2, "cat: cannot open %s\n", argv[i]); exit(1); } cat(fd); close(fd); } exit(0); } ================================================ FILE: user/dorphan.c ================================================ #include "kernel/types.h" #include "kernel/stat.h" #include "kernel/fcntl.h" #include "user/user.h" // Create an orphaned directory and check if test-xv6.py recovers it. #define BUFSZ 500 char buf[BUFSZ]; int main(int argc, char **argv) { char *s = argv[0]; if(mkdir("dd") != 0){ printf("%s: mkdir dd failed\n", s); exit(1); } if(chdir("dd") != 0){ printf("%s: chdir dd failed\n", s); exit(1); } if (unlink("../dd") < 0) { printf("%s: unlink failed\n", s); exit(1); } printf("wait for kill and reclaim\n"); // sit around until killed for(;;) pause(1000); } ================================================ FILE: user/echo.c ================================================ #include "kernel/types.h" #include "kernel/stat.h" #include "user/user.h" int main(int argc, char *argv[]) { int i; for(i = 1; i < argc; i++){ write(1, argv[i], strlen(argv[i])); if(i + 1 < argc){ write(1, " ", 1); } else { write(1, "\n", 1); } } exit(0); } ================================================ FILE: user/forktest.c ================================================ // Test that fork fails gracefully. // Tiny executable so that the limit can be filling the proc table. #include "kernel/types.h" #include "kernel/stat.h" #include "user/user.h" #define N 1000 void print(const char *s) { write(1, s, strlen(s)); } void forktest(void) { int n, pid; print("fork test\n"); for(n=0; n 0; n--){ if(wait(0) < 0){ print("wait stopped early\n"); exit(1); } } if(wait(0) != -1){ print("wait got too many\n"); exit(1); } print("fork test OK\n"); } int main(void) { forktest(); exit(0); } ================================================ FILE: user/forphan.c ================================================ #include "kernel/types.h" #include "kernel/stat.h" #include "kernel/fcntl.h" #include "user/user.h" // Create an orphaned file and check if test-xv6.py recovers it. #define BUFSZ 500 char buf[BUFSZ]; int main(int argc, char **argv) { int fd = 0; char *s = argv[0]; struct stat st; char *ff = "file0"; if ((fd = open(ff, O_CREATE|O_WRONLY)) < 0) { printf("%s: open failed\n", s); exit(1); } if(fstat(fd, &st) < 0){ fprintf(2, "%s: cannot stat %s\n", s, "ff"); exit(1); } if (unlink(ff) < 0) { printf("%s: unlink failed\n", s); exit(1); } if (open(ff, O_RDONLY) != -1) { printf("%s: open successed\n", s); exit(1); } printf("wait for kill and reclaim %d\n", st.ino); // sit around until killed for(;;) pause(1000); } ================================================ FILE: user/grep.c ================================================ // Simple grep. Only supports ^ . * $ operators. #include "kernel/types.h" #include "kernel/stat.h" #include "kernel/fcntl.h" #include "user/user.h" char buf[1024]; int match(char*, char*); void grep(char *pattern, int fd) { int n, m; char *p, *q; m = 0; while((n = read(fd, buf+m, sizeof(buf)-m-1)) > 0){ m += n; buf[m] = '\0'; p = buf; while((q = strchr(p, '\n')) != 0){ *q = 0; if(match(pattern, p)){ *q = '\n'; write(1, p, q+1 - p); } p = q+1; } if(m > 0){ m -= p - buf; memmove(buf, p, m); } } } int main(int argc, char *argv[]) { int fd, i; char *pattern; if(argc <= 1){ fprintf(2, "usage: grep pattern [file ...]\n"); exit(1); } pattern = argv[1]; if(argc <= 2){ grep(pattern, 0); exit(0); } for(i = 2; i < argc; i++){ if((fd = open(argv[i], O_RDONLY)) < 0){ printf("grep: cannot open %s\n", argv[i]); exit(1); } grep(pattern, fd); close(fd); } exit(0); } // Regexp matcher from Kernighan & Pike, // The Practice of Programming, Chapter 9, or // https://www.cs.princeton.edu/courses/archive/spr09/cos333/beautiful.html int matchhere(char*, char*); int matchstar(int, char*, char*); int match(char *re, char *text) { if(re[0] == '^') return matchhere(re+1, text); do{ // must look at empty string if(matchhere(re, text)) return 1; }while(*text++ != '\0'); return 0; } // matchhere: search for re at beginning of text int matchhere(char *re, char *text) { if(re[0] == '\0') return 1; if(re[1] == '*') return matchstar(re[0], re+2, text); if(re[0] == '$' && re[1] == '\0') return *text == '\0'; if(*text!='\0' && (re[0]=='.' || re[0]==*text)) return matchhere(re+1, text+1); return 0; } // matchstar: search for c*re at beginning of text int matchstar(int c, char *re, char *text) { do{ // a * matches zero or more instances if(matchhere(re, text)) return 1; }while(*text!='\0' && (*text++==c || c=='.')); return 0; } ================================================ FILE: user/grind.c ================================================ // // run random system calls in parallel forever. // #include "kernel/param.h" #include "kernel/types.h" #include "kernel/stat.h" #include "user/user.h" #include "kernel/fs.h" #include "kernel/fcntl.h" #include "kernel/syscall.h" #include "kernel/memlayout.h" #include "kernel/riscv.h" // from FreeBSD. int do_rand(unsigned long *ctx) { /* * Compute x = (7^5 * x) mod (2^31 - 1) * without overflowing 31 bits: * (2^31 - 1) = 127773 * (7^5) + 2836 * From "Random number generators: good ones are hard to find", * Park and Miller, Communications of the ACM, vol. 31, no. 10, * October 1988, p. 1195. */ long hi, lo, x; /* Transform to [1, 0x7ffffffe] range. */ x = (*ctx % 0x7ffffffe) + 1; hi = x / 127773; lo = x % 127773; x = 16807 * lo - 2836 * hi; if (x < 0) x += 0x7fffffff; /* Transform to [0, 0x7ffffffd] range. */ x--; *ctx = x; return (x); } unsigned long rand_next = 1; int rand(void) { return (do_rand(&rand_next)); } void go(int which_child) { int fd = -1; static char buf[999]; char *break0 = sbrk(0); uint64 iters = 0; mkdir("grindir"); if(chdir("grindir") != 0){ printf("grind: chdir grindir failed\n"); exit(1); } chdir("/"); while(1){ iters++; if((iters % 500) == 0) write(1, which_child?"B":"A", 1); int what = rand() % 23; if(what == 1){ close(open("grindir/../a", O_CREATE|O_RDWR)); } else if(what == 2){ close(open("grindir/../grindir/../b", O_CREATE|O_RDWR)); } else if(what == 3){ unlink("grindir/../a"); } else if(what == 4){ if(chdir("grindir") != 0){ printf("grind: chdir grindir failed\n"); exit(1); } unlink("../b"); chdir("/"); } else if(what == 5){ close(fd); fd = open("/grindir/../a", O_CREATE|O_RDWR); } else if(what == 6){ close(fd); fd = open("/./grindir/./../b", O_CREATE|O_RDWR); } else if(what == 7){ write(fd, buf, sizeof(buf)); } else if(what == 8){ read(fd, buf, sizeof(buf)); } else if(what == 9){ mkdir("grindir/../a"); close(open("a/../a/./a", O_CREATE|O_RDWR)); unlink("a/a"); } else if(what == 10){ mkdir("/../b"); close(open("grindir/../b/b", O_CREATE|O_RDWR)); unlink("b/b"); } else if(what == 11){ unlink("b"); link("../grindir/./../a", "../b"); } else if(what == 12){ unlink("../grindir/../a"); link(".././b", "/grindir/../a"); } else if(what == 13){ int pid = fork(); if(pid == 0){ exit(0); } else if(pid < 0){ printf("grind: fork failed\n"); exit(1); } wait(0); } else if(what == 14){ int pid = fork(); if(pid == 0){ fork(); fork(); exit(0); } else if(pid < 0){ printf("grind: fork failed\n"); exit(1); } wait(0); } else if(what == 15){ sbrk(6011); } else if(what == 16){ if(sbrk(0) > break0) sbrk(-(sbrk(0) - break0)); } else if(what == 17){ int pid = fork(); if(pid == 0){ close(open("a", O_CREATE|O_RDWR)); exit(0); } else if(pid < 0){ printf("grind: fork failed\n"); exit(1); } if(chdir("../grindir/..") != 0){ printf("grind: chdir failed\n"); exit(1); } kill(pid); wait(0); } else if(what == 18){ int pid = fork(); if(pid == 0){ kill(getpid()); exit(0); } else if(pid < 0){ printf("grind: fork failed\n"); exit(1); } wait(0); } else if(what == 19){ int fds[2]; if(pipe(fds) < 0){ printf("grind: pipe failed\n"); exit(1); } int pid = fork(); if(pid == 0){ fork(); fork(); if(write(fds[1], "x", 1) != 1) printf("grind: pipe write failed\n"); char c; if(read(fds[0], &c, 1) != 1) printf("grind: pipe read failed\n"); exit(0); } else if(pid < 0){ printf("grind: fork failed\n"); exit(1); } close(fds[0]); close(fds[1]); wait(0); } else if(what == 20){ int pid = fork(); if(pid == 0){ unlink("a"); mkdir("a"); chdir("a"); unlink("../a"); fd = open("x", O_CREATE|O_RDWR); unlink("x"); exit(0); } else if(pid < 0){ printf("grind: fork failed\n"); exit(1); } wait(0); } else if(what == 21){ unlink("c"); // should always succeed. check that there are free i-nodes, // file descriptors, blocks. int fd1 = open("c", O_CREATE|O_RDWR); if(fd1 < 0){ printf("grind: create c failed\n"); exit(1); } if(write(fd1, "x", 1) != 1){ printf("grind: write c failed\n"); exit(1); } struct stat st; if(fstat(fd1, &st) != 0){ printf("grind: fstat failed\n"); exit(1); } if(st.size != 1){ printf("grind: fstat reports wrong size %d\n", (int)st.size); exit(1); } if(st.ino > 200){ printf("grind: fstat reports crazy i-number %d\n", st.ino); exit(1); } close(fd1); unlink("c"); } else if(what == 22){ // echo hi | cat int aa[2], bb[2]; if(pipe(aa) < 0){ fprintf(2, "grind: pipe failed\n"); exit(1); } if(pipe(bb) < 0){ fprintf(2, "grind: pipe failed\n"); exit(1); } int pid1 = fork(); if(pid1 == 0){ close(bb[0]); close(bb[1]); close(aa[0]); close(1); if(dup(aa[1]) != 1){ fprintf(2, "grind: dup failed\n"); exit(1); } close(aa[1]); char *args[3] = { "echo", "hi", 0 }; exec("grindir/../echo", args); fprintf(2, "grind: echo: not found\n"); exit(2); } else if(pid1 < 0){ fprintf(2, "grind: fork failed\n"); exit(3); } int pid2 = fork(); if(pid2 == 0){ close(aa[1]); close(bb[0]); close(0); if(dup(aa[0]) != 0){ fprintf(2, "grind: dup failed\n"); exit(4); } close(aa[0]); close(1); if(dup(bb[1]) != 1){ fprintf(2, "grind: dup failed\n"); exit(5); } close(bb[1]); char *args[2] = { "cat", 0 }; exec("/cat", args); fprintf(2, "grind: cat: not found\n"); exit(6); } else if(pid2 < 0){ fprintf(2, "grind: fork failed\n"); exit(7); } close(aa[0]); close(aa[1]); close(bb[1]); char buf[4] = { 0, 0, 0, 0 }; read(bb[0], buf+0, 1); read(bb[0], buf+1, 1); read(bb[0], buf+2, 1); close(bb[0]); int st1, st2; wait(&st1); wait(&st2); if(st1 != 0 || st2 != 0 || strcmp(buf, "hi\n") != 0){ printf("grind: exec pipeline failed %d %d \"%s\"\n", st1, st2, buf); exit(1); } } } } void iter() { unlink("a"); unlink("b"); int pid1 = fork(); if(pid1 < 0){ printf("grind: fork failed\n"); exit(1); } if(pid1 == 0){ rand_next ^= 31; go(0); exit(0); } int pid2 = fork(); if(pid2 < 0){ printf("grind: fork failed\n"); exit(1); } if(pid2 == 0){ rand_next ^= 7177; go(1); exit(0); } int st1 = -1; wait(&st1); if(st1 != 0){ kill(pid1); kill(pid2); } int st2 = -1; wait(&st2); exit(0); } int main() { while(1){ int pid = fork(); if(pid == 0){ iter(); exit(0); } if(pid > 0){ wait(0); } pause(20); rand_next += 1; } } ================================================ FILE: user/init.c ================================================ // init: The initial user-level program #include "kernel/types.h" #include "kernel/stat.h" #include "kernel/spinlock.h" #include "kernel/sleeplock.h" #include "kernel/fs.h" #include "kernel/file.h" #include "user/user.h" #include "kernel/fcntl.h" char *argv[] = { "sh", 0 }; int main(void) { int pid, wpid; if(open("console", O_RDWR) < 0){ mknod("console", CONSOLE, 0); open("console", O_RDWR); } dup(0); // stdout dup(0); // stderr for(;;){ printf("init: starting sh\n"); pid = fork(); if(pid < 0){ printf("init: fork failed\n"); exit(1); } if(pid == 0){ exec("sh", argv); printf("init: exec sh failed\n"); exit(1); } for(;;){ // this call to wait() returns if the shell exits, // or if a parentless process exits. wpid = wait((int *) 0); if(wpid == pid){ // the shell exited; restart it. break; } else if(wpid < 0){ printf("init: wait returned an error\n"); exit(1); } else { // it was a parentless process; do nothing. } } } } ================================================ FILE: user/kill.c ================================================ #include "kernel/types.h" #include "kernel/stat.h" #include "user/user.h" int main(int argc, char **argv) { int i; if(argc < 2){ fprintf(2, "usage: kill pid...\n"); exit(1); } for(i=1; i= path && *p != '/'; p--) ; p++; // Return blank-padded name. if(strlen(p) >= DIRSIZ) return p; memmove(buf, p, strlen(p)); memset(buf+strlen(p), ' ', DIRSIZ-strlen(p)); buf[sizeof(buf)-1] = '\0'; return buf; } void ls(char *path) { char buf[512], *p; int fd; struct dirent de; struct stat st; if((fd = open(path, O_RDONLY)) < 0){ fprintf(2, "ls: cannot open %s\n", path); return; } if(fstat(fd, &st) < 0){ fprintf(2, "ls: cannot stat %s\n", path); close(fd); return; } switch(st.type){ case T_DEVICE: case T_FILE: printf("%s %d %d %d\n", fmtname(path), st.type, st.ino, (int) st.size); break; case T_DIR: if(strlen(path) + 1 + DIRSIZ + 1 > sizeof buf){ printf("ls: path too long\n"); break; } strcpy(buf, path); p = buf+strlen(buf); *p++ = '/'; while(read(fd, &de, sizeof(de)) == sizeof(de)){ if(de.inum == 0) continue; memmove(p, de.name, DIRSIZ); p[DIRSIZ] = 0; if(stat(buf, &st) < 0){ printf("ls: cannot stat %s\n", buf); continue; } printf("%s %d %d %d\n", fmtname(buf), st.type, st.ino, (int) st.size); } break; } close(fd); } int main(int argc, char *argv[]) { int i; if(argc < 2){ ls("."); exit(0); } for(i=1; i static char digits[] = "0123456789ABCDEF"; static void putc(int fd, char c) { write(fd, &c, 1); } static void printint(int fd, long long xx, int base, int sgn) { char buf[20]; int i, neg; unsigned long long x; neg = 0; if(sgn && xx < 0){ neg = 1; x = -xx; } else { x = xx; } i = 0; do{ buf[i++] = digits[x % base]; }while((x /= base) != 0); if(neg) buf[i++] = '-'; while(--i >= 0) putc(fd, buf[i]); } static void printptr(int fd, uint64 x) { int i; putc(fd, '0'); putc(fd, 'x'); for (i = 0; i < (sizeof(uint64) * 2); i++, x <<= 4) putc(fd, digits[x >> (sizeof(uint64) * 8 - 4)]); } // Print to the given fd. Only understands %d, %x, %p, %c, %s. void vprintf(int fd, const char *fmt, va_list ap) { char *s; int c0, c1, c2, i, state; state = 0; for(i = 0; fmt[i]; i++){ c0 = fmt[i] & 0xff; if(state == 0){ if(c0 == '%'){ state = '%'; } else { putc(fd, c0); } } else if(state == '%'){ c1 = c2 = 0; if(c0) c1 = fmt[i+1] & 0xff; if(c1) c2 = fmt[i+2] & 0xff; if(c0 == 'd'){ printint(fd, va_arg(ap, int), 10, 1); } else if(c0 == 'l' && c1 == 'd'){ printint(fd, va_arg(ap, uint64), 10, 1); i += 1; } else if(c0 == 'l' && c1 == 'l' && c2 == 'd'){ printint(fd, va_arg(ap, uint64), 10, 1); i += 2; } else if(c0 == 'u'){ printint(fd, va_arg(ap, uint32), 10, 0); } else if(c0 == 'l' && c1 == 'u'){ printint(fd, va_arg(ap, uint64), 10, 0); i += 1; } else if(c0 == 'l' && c1 == 'l' && c2 == 'u'){ printint(fd, va_arg(ap, uint64), 10, 0); i += 2; } else if(c0 == 'x'){ printint(fd, va_arg(ap, uint32), 16, 0); } else if(c0 == 'l' && c1 == 'x'){ printint(fd, va_arg(ap, uint64), 16, 0); i += 1; } else if(c0 == 'l' && c1 == 'l' && c2 == 'x'){ printint(fd, va_arg(ap, uint64), 16, 0); i += 2; } else if(c0 == 'p'){ printptr(fd, va_arg(ap, uint64)); } else if(c0 == 'c'){ putc(fd, va_arg(ap, uint32)); } else if(c0 == 's'){ if((s = va_arg(ap, char*)) == 0) s = "(null)"; for(; *s; s++) putc(fd, *s); } else if(c0 == '%'){ putc(fd, '%'); } else { // Unknown % sequence. Print it to draw attention. putc(fd, '%'); putc(fd, c0); } state = 0; } } } void fprintf(int fd, const char *fmt, ...) { va_list ap; va_start(ap, fmt); vprintf(fd, fmt, ap); } void printf(const char *fmt, ...) { va_list ap; va_start(ap, fmt); vprintf(1, fmt, ap); } ================================================ FILE: user/rm.c ================================================ #include "kernel/types.h" #include "kernel/stat.h" #include "user/user.h" int main(int argc, char *argv[]) { int i; if(argc < 2){ fprintf(2, "Usage: rm files...\n"); exit(1); } for(i = 1; i < argc; i++){ if(unlink(argv[i]) < 0){ fprintf(2, "rm: %s failed to delete\n", argv[i]); break; } } exit(0); } ================================================ FILE: user/sh.c ================================================ // Shell. #include "kernel/types.h" #include "user/user.h" #include "kernel/fcntl.h" // Parsed command representation #define EXEC 1 #define REDIR 2 #define PIPE 3 #define LIST 4 #define BACK 5 #define MAXARGS 10 struct cmd { int type; }; struct execcmd { int type; char *argv[MAXARGS]; char *eargv[MAXARGS]; }; struct redircmd { int type; struct cmd *cmd; char *file; char *efile; int mode; int fd; }; struct pipecmd { int type; struct cmd *left; struct cmd *right; }; struct listcmd { int type; struct cmd *left; struct cmd *right; }; struct backcmd { int type; struct cmd *cmd; }; int fork1(void); // Fork but panics on failure. void panic(char*); struct cmd *parsecmd(char*); void runcmd(struct cmd*) __attribute__((noreturn)); // Execute cmd. Never returns. void runcmd(struct cmd *cmd) { int p[2]; struct backcmd *bcmd; struct execcmd *ecmd; struct listcmd *lcmd; struct pipecmd *pcmd; struct redircmd *rcmd; if(cmd == 0) exit(1); switch(cmd->type){ default: panic("runcmd"); case EXEC: ecmd = (struct execcmd*)cmd; if(ecmd->argv[0] == 0) exit(1); exec(ecmd->argv[0], ecmd->argv); fprintf(2, "exec %s failed\n", ecmd->argv[0]); break; case REDIR: rcmd = (struct redircmd*)cmd; close(rcmd->fd); if(open(rcmd->file, rcmd->mode) < 0){ fprintf(2, "open %s failed\n", rcmd->file); exit(1); } runcmd(rcmd->cmd); break; case LIST: lcmd = (struct listcmd*)cmd; if(fork1() == 0) runcmd(lcmd->left); wait(0); runcmd(lcmd->right); break; case PIPE: pcmd = (struct pipecmd*)cmd; if(pipe(p) < 0) panic("pipe"); if(fork1() == 0){ close(1); dup(p[1]); close(p[0]); close(p[1]); runcmd(pcmd->left); } if(fork1() == 0){ close(0); dup(p[0]); close(p[0]); close(p[1]); runcmd(pcmd->right); } close(p[0]); close(p[1]); wait(0); wait(0); break; case BACK: bcmd = (struct backcmd*)cmd; if(fork1() == 0) runcmd(bcmd->cmd); break; } exit(0); } int getcmd(char *buf, int nbuf) { write(2, "$ ", 2); memset(buf, 0, nbuf); gets(buf, nbuf); if(buf[0] == 0) // EOF return -1; return 0; } int main(void) { static char buf[100]; int fd; // Ensure that three file descriptors are open. while((fd = open("console", O_RDWR)) >= 0){ if(fd >= 3){ close(fd); break; } } // Read and run input commands. while(getcmd(buf, sizeof(buf)) >= 0){ char *cmd = buf; while (*cmd == ' ' || *cmd == '\t') cmd++; if (*cmd == '\n') // is a blank command continue; if(cmd[0] == 'c' && cmd[1] == 'd' && cmd[2] == ' '){ // Chdir must be called by the parent, not the child. cmd[strlen(cmd)-1] = 0; // chop \n if(chdir(cmd+3) < 0) fprintf(2, "cannot cd %s\n", cmd+3); } else { if(fork1() == 0) runcmd(parsecmd(cmd)); wait(0); } } exit(0); } void panic(char *s) { fprintf(2, "%s\n", s); exit(1); } int fork1(void) { int pid; pid = fork(); if(pid == -1) panic("fork"); return pid; } //PAGEBREAK! // Constructors struct cmd* execcmd(void) { struct execcmd *cmd; cmd = malloc(sizeof(*cmd)); memset(cmd, 0, sizeof(*cmd)); cmd->type = EXEC; return (struct cmd*)cmd; } struct cmd* redircmd(struct cmd *subcmd, char *file, char *efile, int mode, int fd) { struct redircmd *cmd; cmd = malloc(sizeof(*cmd)); memset(cmd, 0, sizeof(*cmd)); cmd->type = REDIR; cmd->cmd = subcmd; cmd->file = file; cmd->efile = efile; cmd->mode = mode; cmd->fd = fd; return (struct cmd*)cmd; } struct cmd* pipecmd(struct cmd *left, struct cmd *right) { struct pipecmd *cmd; cmd = malloc(sizeof(*cmd)); memset(cmd, 0, sizeof(*cmd)); cmd->type = PIPE; cmd->left = left; cmd->right = right; return (struct cmd*)cmd; } struct cmd* listcmd(struct cmd *left, struct cmd *right) { struct listcmd *cmd; cmd = malloc(sizeof(*cmd)); memset(cmd, 0, sizeof(*cmd)); cmd->type = LIST; cmd->left = left; cmd->right = right; return (struct cmd*)cmd; } struct cmd* backcmd(struct cmd *subcmd) { struct backcmd *cmd; cmd = malloc(sizeof(*cmd)); memset(cmd, 0, sizeof(*cmd)); cmd->type = BACK; cmd->cmd = subcmd; return (struct cmd*)cmd; } //PAGEBREAK! // Parsing char whitespace[] = " \t\r\n\v"; char symbols[] = "<|>&;()"; int gettoken(char **ps, char *es, char **q, char **eq) { char *s; int ret; s = *ps; while(s < es && strchr(whitespace, *s)) s++; if(q) *q = s; ret = *s; switch(*s){ case 0: break; case '|': case '(': case ')': case ';': case '&': case '<': s++; break; case '>': s++; if(*s == '>'){ ret = '+'; s++; } break; default: ret = 'a'; while(s < es && !strchr(whitespace, *s) && !strchr(symbols, *s)) s++; break; } if(eq) *eq = s; while(s < es && strchr(whitespace, *s)) s++; *ps = s; return ret; } int peek(char **ps, char *es, char *toks) { char *s; s = *ps; while(s < es && strchr(whitespace, *s)) s++; *ps = s; return *s && strchr(toks, *s); } struct cmd *parseline(char**, char*); struct cmd *parsepipe(char**, char*); struct cmd *parseexec(char**, char*); struct cmd *nulterminate(struct cmd*); struct cmd* parsecmd(char *s) { char *es; struct cmd *cmd; es = s + strlen(s); cmd = parseline(&s, es); peek(&s, es, ""); if(s != es){ fprintf(2, "leftovers: %s\n", s); panic("syntax"); } nulterminate(cmd); return cmd; } struct cmd* parseline(char **ps, char *es) { struct cmd *cmd; cmd = parsepipe(ps, es); while(peek(ps, es, "&")){ gettoken(ps, es, 0, 0); cmd = backcmd(cmd); } if(peek(ps, es, ";")){ gettoken(ps, es, 0, 0); cmd = listcmd(cmd, parseline(ps, es)); } return cmd; } struct cmd* parsepipe(char **ps, char *es) { struct cmd *cmd; cmd = parseexec(ps, es); if(peek(ps, es, "|")){ gettoken(ps, es, 0, 0); cmd = pipecmd(cmd, parsepipe(ps, es)); } return cmd; } struct cmd* parseredirs(struct cmd *cmd, char **ps, char *es) { int tok; char *q, *eq; while(peek(ps, es, "<>")){ tok = gettoken(ps, es, 0, 0); if(gettoken(ps, es, &q, &eq) != 'a') panic("missing file for redirection"); switch(tok){ case '<': cmd = redircmd(cmd, q, eq, O_RDONLY, 0); break; case '>': cmd = redircmd(cmd, q, eq, O_WRONLY|O_CREATE|O_TRUNC, 1); break; case '+': // >> cmd = redircmd(cmd, q, eq, O_WRONLY|O_CREATE, 1); break; } } return cmd; } struct cmd* parseblock(char **ps, char *es) { struct cmd *cmd; if(!peek(ps, es, "(")) panic("parseblock"); gettoken(ps, es, 0, 0); cmd = parseline(ps, es); if(!peek(ps, es, ")")) panic("syntax - missing )"); gettoken(ps, es, 0, 0); cmd = parseredirs(cmd, ps, es); return cmd; } struct cmd* parseexec(char **ps, char *es) { char *q, *eq; int tok, argc; struct execcmd *cmd; struct cmd *ret; if(peek(ps, es, "(")) return parseblock(ps, es); ret = execcmd(); cmd = (struct execcmd*)ret; argc = 0; ret = parseredirs(ret, ps, es); while(!peek(ps, es, "|)&;")){ if((tok=gettoken(ps, es, &q, &eq)) == 0) break; if(tok != 'a') panic("syntax"); cmd->argv[argc] = q; cmd->eargv[argc] = eq; argc++; if(argc >= MAXARGS) panic("too many args"); ret = parseredirs(ret, ps, es); } cmd->argv[argc] = 0; cmd->eargv[argc] = 0; return ret; } // NUL-terminate all the counted strings. struct cmd* nulterminate(struct cmd *cmd) { int i; struct backcmd *bcmd; struct execcmd *ecmd; struct listcmd *lcmd; struct pipecmd *pcmd; struct redircmd *rcmd; if(cmd == 0) return 0; switch(cmd->type){ case EXEC: ecmd = (struct execcmd*)cmd; for(i=0; ecmd->argv[i]; i++) *ecmd->eargv[i] = 0; break; case REDIR: rcmd = (struct redircmd*)cmd; nulterminate(rcmd->cmd); *rcmd->efile = 0; break; case PIPE: pcmd = (struct pipecmd*)cmd; nulterminate(pcmd->left); nulterminate(pcmd->right); break; case LIST: lcmd = (struct listcmd*)cmd; nulterminate(lcmd->left); nulterminate(lcmd->right); break; case BACK: bcmd = (struct backcmd*)cmd; nulterminate(bcmd->cmd); break; } return cmd; } ================================================ FILE: user/stressfs.c ================================================ // Demonstrate that moving the "acquire" in iderw after the loop that // appends to the idequeue results in a race. // For this to work, you should also add a spin within iderw's // idequeue traversal loop. Adding the following demonstrated a panic // after about 5 runs of stressfs in QEMU on a 2.1GHz CPU: // for (i = 0; i < 40000; i++) // asm volatile(""); #include "kernel/types.h" #include "kernel/stat.h" #include "user/user.h" #include "kernel/fs.h" #include "kernel/fcntl.h" int main(int argc, char *argv[]) { int fd, i; char path[] = "stressfs0"; char data[512]; printf("stressfs starting\n"); memset(data, 'a', sizeof(data)); for(i = 0; i < 4; i++) if(fork() > 0) break; printf("write %d\n", i); path[8] += i; fd = open(path, O_CREATE | O_RDWR); for(i = 0; i < 20; i++) // printf(fd, "%d\n", i); write(fd, data, sizeof(data)); close(fd); printf("read\n"); fd = open(path, O_RDONLY); for (i = 0; i < 20; i++) read(fd, data, sizeof(data)); close(fd); wait(0); exit(0); } ================================================ FILE: user/ulib.c ================================================ #include "kernel/types.h" #include "kernel/stat.h" #include "kernel/fcntl.h" #include "kernel/riscv.h" #include "kernel/vm.h" #include "user/user.h" // // wrapper so that it's OK if main() does not call exit(). // void start(int argc, char **argv) { int r; extern int main(int argc, char **argv); r = main(argc, argv); exit(r); } char* strcpy(char *s, const char *t) { char *os; os = s; while((*s++ = *t++) != 0) ; return os; } int strcmp(const char *p, const char *q) { while(*p && *p == *q) p++, q++; return (uchar)*p - (uchar)*q; } uint strlen(const char *s) { int n; for(n = 0; s[n]; n++) ; return n; } void* memset(void *dst, int c, uint n) { char *cdst = (char *) dst; int i; for(i = 0; i < n; i++){ cdst[i] = c; } return dst; } char* strchr(const char *s, char c) { for(; *s; s++) if(*s == c) return (char*)s; return 0; } char* gets(char *buf, int max) { int i, cc; char c; for(i=0; i+1 < max; ){ cc = read(0, &c, 1); if(cc < 1) break; buf[i++] = c; if(c == '\n' || c == '\r') break; } buf[i] = '\0'; return buf; } int stat(const char *n, struct stat *st) { int fd; int r; fd = open(n, O_RDONLY); if(fd < 0) return -1; r = fstat(fd, st); close(fd); return r; } int atoi(const char *s) { int n; n = 0; while('0' <= *s && *s <= '9') n = n*10 + *s++ - '0'; return n; } void* memmove(void *vdst, const void *vsrc, int n) { char *dst; const char *src; dst = vdst; src = vsrc; if (src > dst) { while(n-- > 0) *dst++ = *src++; } else { dst += n; src += n; while(n-- > 0) *--dst = *--src; } return vdst; } int memcmp(const void *s1, const void *s2, uint n) { const char *p1 = s1, *p2 = s2; while (n-- > 0) { if (*p1 != *p2) { return *p1 - *p2; } p1++; p2++; } return 0; } void * memcpy(void *dst, const void *src, uint n) { return memmove(dst, src, n); } char * sbrk(int n) { return sys_sbrk(n, SBRK_EAGER); } char * sbrklazy(int n) { return sys_sbrk(n, SBRK_LAZY); } ================================================ FILE: user/umalloc.c ================================================ #include "kernel/types.h" #include "kernel/stat.h" #include "user/user.h" #include "kernel/param.h" // Memory allocator by Kernighan and Ritchie, // The C programming Language, 2nd ed. Section 8.7. typedef long Align; union header { struct { union header *ptr; uint size; } s; Align x; }; typedef union header Header; static Header base; static Header *freep; void free(void *ap) { Header *bp, *p; bp = (Header*)ap - 1; for(p = freep; !(bp > p && bp < p->s.ptr); p = p->s.ptr) if(p >= p->s.ptr && (bp > p || bp < p->s.ptr)) break; if(bp + bp->s.size == p->s.ptr){ bp->s.size += p->s.ptr->s.size; bp->s.ptr = p->s.ptr->s.ptr; } else bp->s.ptr = p->s.ptr; if(p + p->s.size == bp){ p->s.size += bp->s.size; p->s.ptr = bp->s.ptr; } else p->s.ptr = bp; freep = p; } static Header* morecore(uint nu) { char *p; Header *hp; if(nu < 4096) nu = 4096; p = sbrk(nu * sizeof(Header)); if(p == SBRK_ERROR) return 0; hp = (Header*)p; hp->s.size = nu; free((void*)(hp + 1)); return freep; } void* malloc(uint nbytes) { Header *p, *prevp; uint nunits; nunits = (nbytes + sizeof(Header) - 1)/sizeof(Header) + 1; if((prevp = freep) == 0){ base.s.ptr = freep = prevp = &base; base.s.size = 0; } for(p = prevp->s.ptr; ; prevp = p, p = p->s.ptr){ if(p->s.size >= nunits){ if(p->s.size == nunits) prevp->s.ptr = p->s.ptr; else { p->s.size -= nunits; p += p->s.size; p->s.size = nunits; } freep = prevp; return (void*)(p + 1); } if(p == freep) if((p = morecore(nunits)) == 0) return 0; } } ================================================ FILE: user/user.h ================================================ #define SBRK_ERROR ((char *)-1) struct stat; // system calls int fork(void); int exit(int) __attribute__((noreturn)); int wait(int*); int pipe(int*); int write(int, const void*, int); int read(int, void*, int); int close(int); int kill(int); int exec(const char*, char**); int open(const char*, int); int mknod(const char*, short, short); int unlink(const char*); int fstat(int fd, struct stat*); int link(const char*, const char*); int mkdir(const char*); int chdir(const char*); int dup(int); int getpid(void); char* sys_sbrk(int,int); int pause(int); int uptime(void); // ulib.c int stat(const char*, struct stat*); char* strcpy(char*, const char*); void *memmove(void*, const void*, int); char* strchr(const char*, char c); int strcmp(const char*, const char*); char* gets(char*, int max); uint strlen(const char*); void* memset(void*, int, uint); int atoi(const char*); int memcmp(const void *, const void *, uint); void *memcpy(void *, const void *, uint); char* sbrk(int); char* sbrklazy(int); // printf.c void fprintf(int, const char*, ...) __attribute__ ((format (printf, 2, 3))); void printf(const char*, ...) __attribute__ ((format (printf, 1, 2))); // umalloc.c void* malloc(uint); void free(void*); ================================================ FILE: user/user.ld ================================================ OUTPUT_ARCH( "riscv" ) SECTIONS { . = 0x0; .text : { *(.text .text.*) } .rodata : { . = ALIGN(16); *(.srodata .srodata.*) /* do not need to distinguish this from .rodata */ . = ALIGN(16); *(.rodata .rodata.*) } .eh_frame : { *(.eh_frame) *(.eh_frame.*) } . = ALIGN(0x1000); .data : { . = ALIGN(16); *(.sdata .sdata.*) /* do not need to distinguish this from .data */ . = ALIGN(16); *(.data .data.*) } .bss : { . = ALIGN(16); *(.sbss .sbss.*) /* do not need to distinguish this from .bss */ . = ALIGN(16); *(.bss .bss.*) } PROVIDE(end = .); } ================================================ FILE: user/usertests.c ================================================ #include "kernel/param.h" #include "kernel/types.h" #include "kernel/stat.h" #include "user/user.h" #include "kernel/fs.h" #include "kernel/fcntl.h" #include "kernel/syscall.h" #include "kernel/memlayout.h" #include "kernel/riscv.h" // // Tests xv6 system calls. usertests without arguments runs them all // and usertests runs test. The test runner creates for // each test a process and based on the exit status of the process, // the test runner reports "OK" or "FAILED". Some tests result in // kernel printing usertrap messages, which can be ignored if test // prints "OK". // #define BUFSZ ((MAXOPBLOCKS+2)*BSIZE) char buf[BUFSZ]; // // Section with tests that run fairly quickly. Use -q if you want to // run just those. Without -q usertests also runs the ones that take a // fair amount of time. // // what if you pass ridiculous pointers to system calls // that read user memory with copyin? void copyin(char *s) { uint64 addrs[] = { 0x80000000LL, 0x3fffffe000, 0x3ffffff000, 0x4000000000, 0xffffffffffffffff }; for(int ai = 0; ai < sizeof(addrs)/sizeof(addrs[0]); ai++){ uint64 addr = addrs[ai]; int fd = open("copyin1", O_CREATE|O_WRONLY); if(fd < 0){ printf("open(copyin1) failed\n"); exit(1); } int n = write(fd, (void*)addr, 8192); if(n >= 0){ printf("write(fd, %p, 8192) returned %d, not -1\n", (void*)addr, n); exit(1); } close(fd); unlink("copyin1"); n = write(1, (char*)addr, 8192); if(n > 0){ printf("write(1, %p, 8192) returned %d, not -1 or 0\n", (void*)addr, n); exit(1); } int fds[2]; if(pipe(fds) < 0){ printf("pipe() failed\n"); exit(1); } n = write(fds[1], (char*)addr, 8192); if(n > 0){ printf("write(pipe, %p, 8192) returned %d, not -1 or 0\n", (void*)addr, n); exit(1); } close(fds[0]); close(fds[1]); } } // what if you pass ridiculous pointers to system calls // that write user memory with copyout? void copyout(char *s) { uint64 addrs[] = { 0LL, 0x80000000LL, 0x3fffffe000, 0x3ffffff000, 0x4000000000, 0xffffffffffffffff }; for(int ai = 0; ai < sizeof(addrs)/sizeof(addrs[0]); ai++){ uint64 addr = addrs[ai]; int fd = open("README", 0); if(fd < 0){ printf("open(README) failed\n"); exit(1); } int n = read(fd, (void*)addr, 8192); if(n > 0){ printf("read(fd, %p, 8192) returned %d, not -1 or 0\n", (void*)addr, n); exit(1); } close(fd); int fds[2]; if(pipe(fds) < 0){ printf("pipe() failed\n"); exit(1); } n = write(fds[1], "x", 1); if(n != 1){ printf("pipe write failed\n"); exit(1); } n = read(fds[0], (void*)addr, 8192); if(n > 0){ printf("read(pipe, %p, 8192) returned %d, not -1 or 0\n", (void*)addr, n); exit(1); } close(fds[0]); close(fds[1]); } } // what if you pass ridiculous string pointers to system calls? void copyinstr1(char *s) { uint64 addrs[] = { 0x80000000LL, 0x3fffffe000, 0x3ffffff000, 0x4000000000, 0xffffffffffffffff }; for(int ai = 0; ai < sizeof(addrs)/sizeof(addrs[0]); ai++){ uint64 addr = addrs[ai]; int fd = open((char *)addr, O_CREATE|O_WRONLY); if(fd >= 0){ printf("open(%p) returned %d, not -1\n", (void*)addr, fd); exit(1); } } } // what if a string system call argument is exactly the size // of the kernel buffer it is copied into, so that the null // would fall just beyond the end of the kernel buffer? void copyinstr2(char *s) { char b[MAXPATH+1]; for(int i = 0; i < MAXPATH; i++) b[i] = 'x'; b[MAXPATH] = '\0'; int ret = unlink(b); if(ret != -1){ printf("unlink(%s) returned %d, not -1\n", b, ret); exit(1); } int fd = open(b, O_CREATE | O_WRONLY); if(fd != -1){ printf("open(%s) returned %d, not -1\n", b, fd); exit(1); } ret = link(b, b); if(ret != -1){ printf("link(%s, %s) returned %d, not -1\n", b, b, ret); exit(1); } char *args[] = { "xx", 0 }; ret = exec(b, args); if(ret != -1){ printf("exec(%s) returned %d, not -1\n", b, fd); exit(1); } int pid = fork(); if(pid < 0){ printf("fork failed\n"); exit(1); } if(pid == 0){ static char big[PGSIZE+1]; for(int i = 0; i < PGSIZE; i++) big[i] = 'x'; big[PGSIZE] = '\0'; char *args2[] = { big, big, big, 0 }; ret = exec("echo", args2); if(ret != -1){ printf("exec(echo, BIG) returned %d, not -1\n", fd); exit(1); } exit(747); // OK } int st = 0; wait(&st); if(st != 747){ printf("exec(echo, BIG) succeeded, should have failed\n"); exit(1); } } // what if a string argument crosses over the end of last user page? void copyinstr3(char *s) { sbrk(8192); uint64 top = (uint64) sbrk(0); if((top % PGSIZE) != 0){ sbrk(PGSIZE - (top % PGSIZE)); } top = (uint64) sbrk(0); if(top % PGSIZE){ printf("oops\n"); exit(1); } char *b = (char *) (top - 1); *b = 'x'; int ret = unlink(b); if(ret != -1){ printf("unlink(%s) returned %d, not -1\n", b, ret); exit(1); } int fd = open(b, O_CREATE | O_WRONLY); if(fd != -1){ printf("open(%s) returned %d, not -1\n", b, fd); exit(1); } ret = link(b, b); if(ret != -1){ printf("link(%s, %s) returned %d, not -1\n", b, b, ret); exit(1); } char *args[] = { "xx", 0 }; ret = exec(b, args); if(ret != -1){ printf("exec(%s) returned %d, not -1\n", b, fd); exit(1); } } // See if the kernel refuses to read/write user memory that the // application doesn't have anymore, because it returned it. void rwsbrk(char *s) { int fd, n; uint64 a = (uint64) sbrk(8192); if(a == (uint64) SBRK_ERROR) { printf("sbrk(rwsbrk) failed\n"); exit(1); } if (sbrk(-8192) == SBRK_ERROR) { printf("sbrk(rwsbrk) shrink failed\n"); exit(1); } fd = open("rwsbrk", O_CREATE|O_WRONLY); if(fd < 0){ printf("open(rwsbrk) failed\n"); exit(1); } n = write(fd, (void*)(a+PGSIZE), 1024); if(n >= 0){ printf("write(fd, %p, 1024) returned %d, not -1\n", (void*)a+PGSIZE, n); exit(1); } close(fd); unlink("rwsbrk"); fd = open("README", O_RDONLY); if(fd < 0){ printf("open(README) failed\n"); exit(1); } n = read(fd, (void*)(a+PGSIZE), 10); if(n >= 0){ printf("read(fd, %p, 10) returned %d, not -1\n", (void*)a+PGSIZE, n); exit(1); } close(fd); exit(0); } // test O_TRUNC. void truncate1(char *s) { char buf[32]; unlink("truncfile"); int fd1 = open("truncfile", O_CREATE|O_WRONLY|O_TRUNC); write(fd1, "abcd", 4); close(fd1); int fd2 = open("truncfile", O_RDONLY); int n = read(fd2, buf, sizeof(buf)); if(n != 4){ printf("%s: read %d bytes, wanted 4\n", s, n); exit(1); } fd1 = open("truncfile", O_WRONLY|O_TRUNC); int fd3 = open("truncfile", O_RDONLY); n = read(fd3, buf, sizeof(buf)); if(n != 0){ printf("aaa fd3=%d\n", fd3); printf("%s: read %d bytes, wanted 0\n", s, n); exit(1); } n = read(fd2, buf, sizeof(buf)); if(n != 0){ printf("bbb fd2=%d\n", fd2); printf("%s: read %d bytes, wanted 0\n", s, n); exit(1); } write(fd1, "abcdef", 6); n = read(fd3, buf, sizeof(buf)); if(n != 6){ printf("%s: read %d bytes, wanted 6\n", s, n); exit(1); } n = read(fd2, buf, sizeof(buf)); if(n != 2){ printf("%s: read %d bytes, wanted 2\n", s, n); exit(1); } unlink("truncfile"); close(fd1); close(fd2); close(fd3); } // write to an open FD whose file has just been truncated. // this causes a write at an offset beyond the end of the file. // such writes fail on xv6 (unlike POSIX) but at least // they don't crash. void truncate2(char *s) { unlink("truncfile"); int fd1 = open("truncfile", O_CREATE|O_TRUNC|O_WRONLY); write(fd1, "abcd", 4); int fd2 = open("truncfile", O_TRUNC|O_WRONLY); int n = write(fd1, "x", 1); if(n != -1){ printf("%s: write returned %d, expected -1\n", s, n); exit(1); } unlink("truncfile"); close(fd1); close(fd2); } void truncate3(char *s) { int pid, xstatus; close(open("truncfile", O_CREATE|O_TRUNC|O_WRONLY)); pid = fork(); if(pid < 0){ printf("%s: fork failed\n", s); exit(1); } if(pid == 0){ for(int i = 0; i < 100; i++){ char buf[32]; int fd = open("truncfile", O_WRONLY); if(fd < 0){ printf("%s: open failed\n", s); exit(1); } int n = write(fd, "1234567890", 10); if(n != 10){ printf("%s: write got %d, expected 10\n", s, n); exit(1); } close(fd); fd = open("truncfile", O_RDONLY); read(fd, buf, sizeof(buf)); close(fd); } exit(0); } for(int i = 0; i < 150; i++){ int fd = open("truncfile", O_CREATE|O_WRONLY|O_TRUNC); if(fd < 0){ printf("%s: open failed\n", s); exit(1); } int n = write(fd, "xxx", 3); if(n != 3){ printf("%s: write got %d, expected 3\n", s, n); exit(1); } close(fd); } wait(&xstatus); unlink("truncfile"); exit(xstatus); } // does chdir() call iput(p->cwd) in a transaction? void iputtest(char *s) { if(mkdir("iputdir") < 0){ printf("%s: mkdir failed\n", s); exit(1); } if(chdir("iputdir") < 0){ printf("%s: chdir iputdir failed\n", s); exit(1); } if(unlink("../iputdir") < 0){ printf("%s: unlink ../iputdir failed\n", s); exit(1); } if(chdir("/") < 0){ printf("%s: chdir / failed\n", s); exit(1); } } // does exit() call iput(p->cwd) in a transaction? void exitiputtest(char *s) { int pid, xstatus; pid = fork(); if(pid < 0){ printf("%s: fork failed\n", s); exit(1); } if(pid == 0){ if(mkdir("iputdir") < 0){ printf("%s: mkdir failed\n", s); exit(1); } if(chdir("iputdir") < 0){ printf("%s: child chdir failed\n", s); exit(1); } if(unlink("../iputdir") < 0){ printf("%s: unlink ../iputdir failed\n", s); exit(1); } exit(0); } wait(&xstatus); exit(xstatus); } // does the error path in open() for attempt to write a // directory call iput() in a transaction? // needs a hacked kernel that pauses just after the namei() // call in sys_open(): // if((ip = namei(path)) == 0) // return -1; // { // int i; // for(i = 0; i < 10000; i++) // yield(); // } void openiputtest(char *s) { int pid, xstatus; if(mkdir("oidir") < 0){ printf("%s: mkdir oidir failed\n", s); exit(1); } pid = fork(); if(pid < 0){ printf("%s: fork failed\n", s); exit(1); } if(pid == 0){ int fd = open("oidir", O_RDWR); if(fd >= 0){ printf("%s: open directory for write succeeded\n", s); exit(1); } exit(0); } pause(1); if(unlink("oidir") != 0){ printf("%s: unlink failed\n", s); exit(1); } wait(&xstatus); exit(xstatus); } // simple file system tests void opentest(char *s) { int fd; fd = open("echo", 0); if(fd < 0){ printf("%s: open echo failed!\n", s); exit(1); } close(fd); fd = open("doesnotexist", 0); if(fd >= 0){ printf("%s: open doesnotexist succeeded!\n", s); exit(1); } } void writetest(char *s) { int fd; int i; enum { N=100, SZ=10 }; fd = open("small", O_CREATE|O_RDWR); if(fd < 0){ printf("%s: error: creat small failed!\n", s); exit(1); } for(i = 0; i < N; i++){ if(write(fd, "aaaaaaaaaa", SZ) != SZ){ printf("%s: error: write aa %d new file failed\n", s, i); exit(1); } if(write(fd, "bbbbbbbbbb", SZ) != SZ){ printf("%s: error: write bb %d new file failed\n", s, i); exit(1); } } close(fd); fd = open("small", O_RDONLY); if(fd < 0){ printf("%s: error: open small failed!\n", s); exit(1); } i = read(fd, buf, N*SZ*2); if(i != N*SZ*2){ printf("%s: read failed\n", s); exit(1); } close(fd); if(unlink("small") < 0){ printf("%s: unlink small failed\n", s); exit(1); } } void writebig(char *s) { int i, fd, n; fd = open("big", O_CREATE|O_RDWR); if(fd < 0){ printf("%s: error: creat big failed!\n", s); exit(1); } for(i = 0; i < MAXFILE; i++){ ((int*)buf)[0] = i; if(write(fd, buf, BSIZE) != BSIZE){ printf("%s: error: write big file failed i=%d\n", s, i); exit(1); } } close(fd); fd = open("big", O_RDONLY); if(fd < 0){ printf("%s: error: open big failed!\n", s); exit(1); } n = 0; for(;;){ i = read(fd, buf, BSIZE); if(i == 0){ if(n != MAXFILE){ printf("%s: read only %d blocks from big", s, n); exit(1); } break; } else if(i != BSIZE){ printf("%s: read failed %d\n", s, i); exit(1); } if(((int*)buf)[0] != n){ printf("%s: read content of block %d is %d\n", s, n, ((int*)buf)[0]); exit(1); } n++; } close(fd); if(unlink("big") < 0){ printf("%s: unlink big failed\n", s); exit(1); } } // many creates, followed by unlink test void createtest(char *s) { int i, fd; enum { N=52 }; char name[3]; name[0] = 'a'; name[2] = '\0'; for(i = 0; i < N; i++){ name[1] = '0' + i; fd = open(name, O_CREATE|O_RDWR); close(fd); } name[0] = 'a'; name[2] = '\0'; for(i = 0; i < N; i++){ name[1] = '0' + i; unlink(name); } } void dirtest(char *s) { if(mkdir("dir0") < 0){ printf("%s: mkdir failed\n", s); exit(1); } if(chdir("dir0") < 0){ printf("%s: chdir dir0 failed\n", s); exit(1); } if(chdir("..") < 0){ printf("%s: chdir .. failed\n", s); exit(1); } if(unlink("dir0") < 0){ printf("%s: unlink dir0 failed\n", s); exit(1); } } void exectest(char *s) { int fd, xstatus, pid; char *echoargv[] = { "echo", "OK", 0 }; char buf[3]; unlink("echo-ok"); pid = fork(); if(pid < 0) { printf("%s: fork failed\n", s); exit(1); } if(pid == 0) { close(1); fd = open("echo-ok", O_CREATE|O_WRONLY); if(fd < 0) { printf("%s: create failed\n", s); exit(1); } if(fd != 1) { printf("%s: wrong fd\n", s); exit(1); } if(exec("echo", echoargv) < 0){ printf("%s: exec echo failed\n", s); exit(1); } // won't get to here } if (wait(&xstatus) != pid) { printf("%s: wait failed!\n", s); } if(xstatus != 0) exit(xstatus); fd = open("echo-ok", O_RDONLY); if(fd < 0) { printf("%s: open failed\n", s); exit(1); } if (read(fd, buf, 2) != 2) { printf("%s: read failed\n", s); exit(1); } unlink("echo-ok"); if(buf[0] == 'O' && buf[1] == 'K') exit(0); else { printf("%s: wrong output\n", s); exit(1); } } // simple fork and pipe read/write void pipe1(char *s) { int fds[2], pid, xstatus; int seq, i, n, cc, total; enum { N=5, SZ=1033 }; if(pipe(fds) != 0){ printf("%s: pipe() failed\n", s); exit(1); } pid = fork(); seq = 0; if(pid == 0){ close(fds[0]); for(n = 0; n < N; n++){ for(i = 0; i < SZ; i++) buf[i] = seq++; if(write(fds[1], buf, SZ) != SZ){ printf("%s: pipe1 oops 1\n", s); exit(1); } } exit(0); } else if(pid > 0){ close(fds[1]); total = 0; cc = 1; while((n = read(fds[0], buf, cc)) > 0){ for(i = 0; i < n; i++){ if((buf[i] & 0xff) != (seq++ & 0xff)){ printf("%s: pipe1 oops 2\n", s); return; } } total += n; cc = cc * 2; if(cc > sizeof(buf)) cc = sizeof(buf); } if(total != N * SZ){ printf("%s: pipe1 oops 3 total %d\n", s, total); exit(1); } close(fds[0]); wait(&xstatus); exit(xstatus); } else { printf("%s: fork() failed\n", s); exit(1); } } // test if child is killed (status = -1) void killstatus(char *s) { int xst; for(int i = 0; i < 100; i++){ int pid1 = fork(); if(pid1 < 0){ printf("%s: fork failed\n", s); exit(1); } if(pid1 == 0){ while(1) { getpid(); } exit(0); } pause(1); kill(pid1); wait(&xst); if(xst != -1) { printf("%s: status should be -1\n", s); exit(1); } } exit(0); } // meant to be run w/ at most two CPUs void preempt(char *s) { int pid1, pid2, pid3; int pfds[2]; pid1 = fork(); if(pid1 < 0) { printf("%s: fork failed", s); exit(1); } if(pid1 == 0) for(;;) ; pid2 = fork(); if(pid2 < 0) { printf("%s: fork failed\n", s); exit(1); } if(pid2 == 0) for(;;) ; pipe(pfds); pid3 = fork(); if(pid3 < 0) { printf("%s: fork failed\n", s); exit(1); } if(pid3 == 0){ close(pfds[0]); if(write(pfds[1], "x", 1) != 1) printf("%s: preempt write error", s); close(pfds[1]); for(;;) ; } close(pfds[1]); if(read(pfds[0], buf, sizeof(buf)) != 1){ printf("%s: preempt read error", s); return; } close(pfds[0]); printf("kill... "); kill(pid1); kill(pid2); kill(pid3); printf("wait... "); wait(0); wait(0); wait(0); } // try to find any races between exit and wait void exitwait(char *s) { int i, pid; for(i = 0; i < 100; i++){ pid = fork(); if(pid < 0){ printf("%s: fork failed\n", s); exit(1); } if(pid){ int xstate; if(wait(&xstate) != pid){ printf("%s: wait wrong pid\n", s); exit(1); } if(i != xstate) { printf("%s: wait wrong exit status\n", s); exit(1); } } else { exit(i); } } } // try to find races in the reparenting // code that handles a parent exiting // when it still has live children. void reparent(char *s) { int master_pid = getpid(); for(int i = 0; i < 200; i++){ int pid = fork(); if(pid < 0){ printf("%s: fork failed\n", s); exit(1); } if(pid){ if(wait(0) != pid){ printf("%s: wait wrong pid\n", s); exit(1); } } else { int pid2 = fork(); if(pid2 < 0){ kill(master_pid); exit(1); } exit(0); } } exit(0); } // what if two children exit() at the same time? void twochildren(char *s) { for(int i = 0; i < 1000; i++){ int pid1 = fork(); if(pid1 < 0){ printf("%s: fork failed\n", s); exit(1); } if(pid1 == 0){ exit(0); } else { int pid2 = fork(); if(pid2 < 0){ printf("%s: fork failed\n", s); exit(1); } if(pid2 == 0){ exit(0); } else { wait(0); wait(0); } } } } // concurrent forks to try to expose locking bugs. void forkfork(char *s) { enum { N=2 }; for(int i = 0; i < N; i++){ int pid = fork(); if(pid < 0){ printf("%s: fork failed", s); exit(1); } if(pid == 0){ for(int j = 0; j < 200; j++){ int pid1 = fork(); if(pid1 < 0){ exit(1); } if(pid1 == 0){ exit(0); } wait(0); } exit(0); } } int xstatus; for(int i = 0; i < N; i++){ wait(&xstatus); if(xstatus != 0) { printf("%s: fork in child failed", s); exit(1); } } } void forkforkfork(char *s) { unlink("stopforking"); int pid = fork(); if(pid < 0){ printf("%s: fork failed", s); exit(1); } if(pid == 0){ while(1){ int fd = open("stopforking", 0); if(fd >= 0){ exit(0); } if(fork() < 0){ close(open("stopforking", O_CREATE|O_RDWR)); } } exit(0); } pause(20); // two seconds close(open("stopforking", O_CREATE|O_RDWR)); wait(0); pause(10); // one second } // regression test. does reparent() violate the parent-then-child // locking order when giving away a child to init, so that exit() // deadlocks against init's wait()? also used to trigger a "panic: // release" due to exit() releasing a different p->parent->lock than // it acquired. void reparent2(char *s) { for(int i = 0; i < 800; i++){ int pid1 = fork(); if(pid1 < 0){ printf("fork failed\n"); exit(1); } if(pid1 == 0){ fork(); fork(); exit(0); } wait(0); } exit(0); } // allocate all mem, free it, and allocate again void mem(char *s) { void *m1, *m2; int pid; if((pid = fork()) == 0){ m1 = 0; while((m2 = malloc(10001)) != 0){ *(char**)m2 = m1; m1 = m2; } while(m1){ m2 = *(char**)m1; free(m1); m1 = m2; } m1 = malloc(1024*20); if(m1 == 0){ printf("%s: couldn't allocate mem?!!\n", s); exit(1); } free(m1); exit(0); } else { int xstatus; wait(&xstatus); if(xstatus == -1){ // probably page fault, so might be lazy lab, // so OK. exit(0); } exit(xstatus); } } // More file system tests // two processes write to the same file descriptor // is the offset shared? does inode locking work? void sharedfd(char *s) { int fd, pid, i, n, nc, np; enum { N = 1000, SZ=10}; char buf[SZ]; unlink("sharedfd"); fd = open("sharedfd", O_CREATE|O_RDWR); if(fd < 0){ printf("%s: cannot open sharedfd for writing", s); exit(1); } pid = fork(); memset(buf, pid==0?'c':'p', sizeof(buf)); for(i = 0; i < N; i++){ if(write(fd, buf, sizeof(buf)) != sizeof(buf)){ printf("%s: write sharedfd failed\n", s); exit(1); } } if(pid == 0) { exit(0); } else { int xstatus; wait(&xstatus); if(xstatus != 0) exit(xstatus); } close(fd); fd = open("sharedfd", 0); if(fd < 0){ printf("%s: cannot open sharedfd for reading\n", s); exit(1); } nc = np = 0; while((n = read(fd, buf, sizeof(buf))) > 0){ for(i = 0; i < sizeof(buf); i++){ if(buf[i] == 'c') nc++; if(buf[i] == 'p') np++; } } close(fd); unlink("sharedfd"); if(nc == N*SZ && np == N*SZ){ exit(0); } else { printf("%s: nc/np test fails\n", s); exit(1); } } // four processes write different files at the same // time, to test block allocation. void fourfiles(char *s) { int fd, pid, i, j, n, total, pi; char *names[] = { "f0", "f1", "f2", "f3" }; char *fname; enum { N=12, NCHILD=4, SZ=500 }; for(pi = 0; pi < NCHILD; pi++){ fname = names[pi]; unlink(fname); pid = fork(); if(pid < 0){ printf("%s: fork failed\n", s); exit(1); } if(pid == 0){ fd = open(fname, O_CREATE | O_RDWR); if(fd < 0){ printf("%s: create failed\n", s); exit(1); } memset(buf, '0'+pi, SZ); for(i = 0; i < N; i++){ if((n = write(fd, buf, SZ)) != SZ){ printf("write failed %d\n", n); exit(1); } } exit(0); } } int xstatus; for(pi = 0; pi < NCHILD; pi++){ wait(&xstatus); if(xstatus != 0) exit(xstatus); } for(i = 0; i < NCHILD; i++){ fname = names[i]; fd = open(fname, 0); total = 0; while((n = read(fd, buf, sizeof(buf))) > 0){ for(j = 0; j < n; j++){ if(buf[j] != '0'+i){ printf("%s: wrong char\n", s); exit(1); } } total += n; } close(fd); if(total != N*SZ){ printf("wrong length %d\n", total); exit(1); } unlink(fname); } } // four processes create and delete different files in same directory void createdelete(char *s) { enum { N = 20, NCHILD=4 }; int pid, i, fd, pi; char name[32]; for(pi = 0; pi < NCHILD; pi++){ pid = fork(); if(pid < 0){ printf("%s: fork failed\n", s); exit(1); } if(pid == 0){ name[0] = 'p' + pi; name[2] = '\0'; for(i = 0; i < N; i++){ name[1] = '0' + i; fd = open(name, O_CREATE | O_RDWR); if(fd < 0){ printf("%s: create failed\n", s); exit(1); } close(fd); if(i > 0 && (i % 2 ) == 0){ name[1] = '0' + (i / 2); if(unlink(name) < 0){ printf("%s: unlink failed\n", s); exit(1); } } } exit(0); } } int xstatus; for(pi = 0; pi < NCHILD; pi++){ wait(&xstatus); if(xstatus != 0) exit(1); } name[0] = name[1] = name[2] = 0; for(i = 0; i < N; i++){ for(pi = 0; pi < NCHILD; pi++){ name[0] = 'p' + pi; name[1] = '0' + i; fd = open(name, 0); if((i == 0 || i >= N/2) && fd < 0){ printf("%s: oops createdelete %s didn't exist\n", s, name); exit(1); } else if((i >= 1 && i < N/2) && fd >= 0){ printf("%s: oops createdelete %s did exist\n", s, name); exit(1); } if(fd >= 0) close(fd); } } for(i = 0; i < N; i++){ for(pi = 0; pi < NCHILD; pi++){ name[0] = 'p' + pi; name[1] = '0' + i; unlink(name); } } } // can I unlink a file and still read it? void unlinkread(char *s) { enum { SZ = 5 }; int fd, fd1; fd = open("unlinkread", O_CREATE | O_RDWR); if(fd < 0){ printf("%s: create unlinkread failed\n", s); exit(1); } write(fd, "hello", SZ); close(fd); fd = open("unlinkread", O_RDWR); if(fd < 0){ printf("%s: open unlinkread failed\n", s); exit(1); } if(unlink("unlinkread") != 0){ printf("%s: unlink unlinkread failed\n", s); exit(1); } fd1 = open("unlinkread", O_CREATE | O_RDWR); write(fd1, "yyy", 3); close(fd1); if(read(fd, buf, sizeof(buf)) != SZ){ printf("%s: unlinkread read failed", s); exit(1); } if(buf[0] != 'h'){ printf("%s: unlinkread wrong data\n", s); exit(1); } if(write(fd, buf, 10) != 10){ printf("%s: unlinkread write failed\n", s); exit(1); } close(fd); unlink("unlinkread"); } void linktest(char *s) { enum { SZ = 5 }; int fd; unlink("lf1"); unlink("lf2"); fd = open("lf1", O_CREATE|O_RDWR); if(fd < 0){ printf("%s: create lf1 failed\n", s); exit(1); } if(write(fd, "hello", SZ) != SZ){ printf("%s: write lf1 failed\n", s); exit(1); } close(fd); if(link("lf1", "lf2") < 0){ printf("%s: link lf1 lf2 failed\n", s); exit(1); } unlink("lf1"); if(open("lf1", 0) >= 0){ printf("%s: unlinked lf1 but it is still there!\n", s); exit(1); } fd = open("lf2", 0); if(fd < 0){ printf("%s: open lf2 failed\n", s); exit(1); } if(read(fd, buf, sizeof(buf)) != SZ){ printf("%s: read lf2 failed\n", s); exit(1); } close(fd); if(link("lf2", "lf2") >= 0){ printf("%s: link lf2 lf2 succeeded! oops\n", s); exit(1); } unlink("lf2"); if(link("lf2", "lf1") >= 0){ printf("%s: link non-existent succeeded! oops\n", s); exit(1); } if(link(".", "lf1") >= 0){ printf("%s: link . lf1 succeeded! oops\n", s); exit(1); } } // test concurrent create/link/unlink of the same file void concreate(char *s) { enum { N = 40 }; char file[3]; int i, pid, n, fd; char fa[N]; struct { ushort inum; char name[DIRSIZ]; } de; file[0] = 'C'; file[2] = '\0'; for(i = 0; i < N; i++){ file[1] = '0' + i; unlink(file); pid = fork(); if(pid && (i % 3) == 1){ link("C0", file); } else if(pid == 0 && (i % 5) == 1){ link("C0", file); } else { fd = open(file, O_CREATE | O_RDWR); if(fd < 0){ printf("concreate create %s failed\n", file); exit(1); } close(fd); } if(pid == 0) { exit(0); } else { int xstatus; wait(&xstatus); if(xstatus != 0) exit(1); } } memset(fa, 0, sizeof(fa)); fd = open(".", 0); n = 0; while(read(fd, &de, sizeof(de)) > 0){ if(de.inum == 0) continue; if(de.name[0] == 'C' && de.name[2] == '\0'){ i = de.name[1] - '0'; if(i < 0 || i >= sizeof(fa)){ printf("%s: concreate weird file %s\n", s, de.name); exit(1); } if(fa[i]){ printf("%s: concreate duplicate file %s\n", s, de.name); exit(1); } fa[i] = 1; n++; } } close(fd); if(n != N){ printf("%s: concreate not enough files in directory listing\n", s); exit(1); } for(i = 0; i < N; i++){ file[1] = '0' + i; pid = fork(); if(pid < 0){ printf("%s: fork failed\n", s); exit(1); } if(((i % 3) == 0 && pid == 0) || ((i % 3) == 1 && pid != 0)){ close(open(file, 0)); close(open(file, 0)); close(open(file, 0)); close(open(file, 0)); close(open(file, 0)); close(open(file, 0)); } else { unlink(file); unlink(file); unlink(file); unlink(file); unlink(file); unlink(file); } if(pid == 0) exit(0); else wait(0); } } // another concurrent link/unlink/create test, // to look for deadlocks. void linkunlink(char *s) { int pid, i; unlink("x"); pid = fork(); if(pid < 0){ printf("%s: fork failed\n", s); exit(1); } unsigned int x = (pid ? 1 : 97); for(i = 0; i < 100; i++){ x = x * 1103515245 + 12345; if((x % 3) == 0){ close(open("x", O_RDWR | O_CREATE)); } else if((x % 3) == 1){ link("cat", "x"); } else { unlink("x"); } } if(pid) wait(0); else exit(0); } void subdir(char *s) { int fd, cc; unlink("ff"); if(mkdir("dd") != 0){ printf("%s: mkdir dd failed\n", s); exit(1); } fd = open("dd/ff", O_CREATE | O_RDWR); if(fd < 0){ printf("%s: create dd/ff failed\n", s); exit(1); } write(fd, "ff", 2); close(fd); if(unlink("dd") >= 0){ printf("%s: unlink dd (non-empty dir) succeeded!\n", s); exit(1); } if(mkdir("/dd/dd") != 0){ printf("%s: subdir mkdir dd/dd failed\n", s); exit(1); } fd = open("dd/dd/ff", O_CREATE | O_RDWR); if(fd < 0){ printf("%s: create dd/dd/ff failed\n", s); exit(1); } write(fd, "FF", 2); close(fd); fd = open("dd/dd/../ff", 0); if(fd < 0){ printf("%s: open dd/dd/../ff failed\n", s); exit(1); } cc = read(fd, buf, sizeof(buf)); if(cc != 2 || buf[0] != 'f'){ printf("%s: dd/dd/../ff wrong content\n", s); exit(1); } close(fd); if(link("dd/dd/ff", "dd/dd/ffff") != 0){ printf("%s: link dd/dd/ff dd/dd/ffff failed\n", s); exit(1); } if(unlink("dd/dd/ff") != 0){ printf("%s: unlink dd/dd/ff failed\n", s); exit(1); } if(open("dd/dd/ff", O_RDONLY) >= 0){ printf("%s: open (unlinked) dd/dd/ff succeeded\n", s); exit(1); } if(chdir("dd") != 0){ printf("%s: chdir dd failed\n", s); exit(1); } if(chdir("dd/../../dd") != 0){ printf("%s: chdir dd/../../dd failed\n", s); exit(1); } if(chdir("dd/../../../dd") != 0){ printf("%s: chdir dd/../../../dd failed\n", s); exit(1); } if(chdir("./..") != 0){ printf("%s: chdir ./.. failed\n", s); exit(1); } fd = open("dd/dd/ffff", 0); if(fd < 0){ printf("%s: open dd/dd/ffff failed\n", s); exit(1); } if(read(fd, buf, sizeof(buf)) != 2){ printf("%s: read dd/dd/ffff wrong len\n", s); exit(1); } close(fd); if(open("dd/dd/ff", O_RDONLY) >= 0){ printf("%s: open (unlinked) dd/dd/ff succeeded!\n", s); exit(1); } if(open("dd/ff/ff", O_CREATE|O_RDWR) >= 0){ printf("%s: create dd/ff/ff succeeded!\n", s); exit(1); } if(open("dd/xx/ff", O_CREATE|O_RDWR) >= 0){ printf("%s: create dd/xx/ff succeeded!\n", s); exit(1); } if(open("dd", O_CREATE) >= 0){ printf("%s: create dd succeeded!\n", s); exit(1); } if(open("dd", O_RDWR) >= 0){ printf("%s: open dd rdwr succeeded!\n", s); exit(1); } if(open("dd", O_WRONLY) >= 0){ printf("%s: open dd wronly succeeded!\n", s); exit(1); } if(link("dd/ff/ff", "dd/dd/xx") == 0){ printf("%s: link dd/ff/ff dd/dd/xx succeeded!\n", s); exit(1); } if(link("dd/xx/ff", "dd/dd/xx") == 0){ printf("%s: link dd/xx/ff dd/dd/xx succeeded!\n", s); exit(1); } if(link("dd/ff", "dd/dd/ffff") == 0){ printf("%s: link dd/ff dd/dd/ffff succeeded!\n", s); exit(1); } if(mkdir("dd/ff/ff") == 0){ printf("%s: mkdir dd/ff/ff succeeded!\n", s); exit(1); } if(mkdir("dd/xx/ff") == 0){ printf("%s: mkdir dd/xx/ff succeeded!\n", s); exit(1); } if(mkdir("dd/dd/ffff") == 0){ printf("%s: mkdir dd/dd/ffff succeeded!\n", s); exit(1); } if(unlink("dd/xx/ff") == 0){ printf("%s: unlink dd/xx/ff succeeded!\n", s); exit(1); } if(unlink("dd/ff/ff") == 0){ printf("%s: unlink dd/ff/ff succeeded!\n", s); exit(1); } if(chdir("dd/ff") == 0){ printf("%s: chdir dd/ff succeeded!\n", s); exit(1); } if(chdir("dd/xx") == 0){ printf("%s: chdir dd/xx succeeded!\n", s); exit(1); } if(unlink("dd/dd/ffff") != 0){ printf("%s: unlink dd/dd/ff failed\n", s); exit(1); } if(unlink("dd/ff") != 0){ printf("%s: unlink dd/ff failed\n", s); exit(1); } if(unlink("dd") == 0){ printf("%s: unlink non-empty dd succeeded!\n", s); exit(1); } if(unlink("dd/dd") < 0){ printf("%s: unlink dd/dd failed\n", s); exit(1); } if(unlink("dd") < 0){ printf("%s: unlink dd failed\n", s); exit(1); } } // test writes that are larger than the log. void bigwrite(char *s) { int fd, sz; unlink("bigwrite"); for(sz = 499; sz < (MAXOPBLOCKS+2)*BSIZE; sz += 471){ fd = open("bigwrite", O_CREATE | O_RDWR); if(fd < 0){ printf("%s: cannot create bigwrite\n", s); exit(1); } int i; for(i = 0; i < 2; i++){ int cc = write(fd, buf, sz); if(cc != sz){ printf("%s: write(%d) ret %d\n", s, sz, cc); exit(1); } } close(fd); unlink("bigwrite"); } } void bigfile(char *s) { enum { N = 20, SZ=600 }; int fd, i, total, cc; unlink("bigfile.dat"); fd = open("bigfile.dat", O_CREATE | O_RDWR); if(fd < 0){ printf("%s: cannot create bigfile", s); exit(1); } for(i = 0; i < N; i++){ memset(buf, i, SZ); if(write(fd, buf, SZ) != SZ){ printf("%s: write bigfile failed\n", s); exit(1); } } close(fd); fd = open("bigfile.dat", 0); if(fd < 0){ printf("%s: cannot open bigfile\n", s); exit(1); } total = 0; for(i = 0; ; i++){ cc = read(fd, buf, SZ/2); if(cc < 0){ printf("%s: read bigfile failed\n", s); exit(1); } if(cc == 0) break; if(cc != SZ/2){ printf("%s: short read bigfile\n", s); exit(1); } if(buf[0] != i/2 || buf[SZ/2-1] != i/2){ printf("%s: read bigfile wrong data\n", s); exit(1); } total += cc; } close(fd); if(total != N*SZ){ printf("%s: read bigfile wrong total\n", s); exit(1); } unlink("bigfile.dat"); } void fourteen(char *s) { int fd; // DIRSIZ is 14. if(mkdir("12345678901234") != 0){ printf("%s: mkdir 12345678901234 failed\n", s); exit(1); } if(mkdir("12345678901234/123456789012345") != 0){ printf("%s: mkdir 12345678901234/123456789012345 failed\n", s); exit(1); } fd = open("123456789012345/123456789012345/123456789012345", O_CREATE); if(fd < 0){ printf("%s: create 123456789012345/123456789012345/123456789012345 failed\n", s); exit(1); } close(fd); fd = open("12345678901234/12345678901234/12345678901234", 0); if(fd < 0){ printf("%s: open 12345678901234/12345678901234/12345678901234 failed\n", s); exit(1); } close(fd); if(mkdir("12345678901234/12345678901234") == 0){ printf("%s: mkdir 12345678901234/12345678901234 succeeded!\n", s); exit(1); } if(mkdir("123456789012345/12345678901234") == 0){ printf("%s: mkdir 12345678901234/123456789012345 succeeded!\n", s); exit(1); } // clean up unlink("123456789012345/12345678901234"); unlink("12345678901234/12345678901234"); unlink("12345678901234/12345678901234/12345678901234"); unlink("123456789012345/123456789012345/123456789012345"); unlink("12345678901234/123456789012345"); unlink("12345678901234"); } void rmdot(char *s) { if(mkdir("dots") != 0){ printf("%s: mkdir dots failed\n", s); exit(1); } if(chdir("dots") != 0){ printf("%s: chdir dots failed\n", s); exit(1); } if(unlink(".") == 0){ printf("%s: rm . worked!\n", s); exit(1); } if(unlink("..") == 0){ printf("%s: rm .. worked!\n", s); exit(1); } if(chdir("/") != 0){ printf("%s: chdir / failed\n", s); exit(1); } if(unlink("dots/.") == 0){ printf("%s: unlink dots/. worked!\n", s); exit(1); } if(unlink("dots/..") == 0){ printf("%s: unlink dots/.. worked!\n", s); exit(1); } if(unlink("dots") != 0){ printf("%s: unlink dots failed!\n", s); exit(1); } } void dirfile(char *s) { int fd; fd = open("dirfile", O_CREATE); if(fd < 0){ printf("%s: create dirfile failed\n", s); exit(1); } close(fd); if(chdir("dirfile") == 0){ printf("%s: chdir dirfile succeeded!\n", s); exit(1); } fd = open("dirfile/xx", 0); if(fd >= 0){ printf("%s: create dirfile/xx succeeded!\n", s); exit(1); } fd = open("dirfile/xx", O_CREATE); if(fd >= 0){ printf("%s: create dirfile/xx succeeded!\n", s); exit(1); } if(mkdir("dirfile/xx") == 0){ printf("%s: mkdir dirfile/xx succeeded!\n", s); exit(1); } if(unlink("dirfile/xx") == 0){ printf("%s: unlink dirfile/xx succeeded!\n", s); exit(1); } if(link("README", "dirfile/xx") == 0){ printf("%s: link to dirfile/xx succeeded!\n", s); exit(1); } if(unlink("dirfile") != 0){ printf("%s: unlink dirfile failed!\n", s); exit(1); } fd = open(".", O_RDWR); if(fd >= 0){ printf("%s: open . for writing succeeded!\n", s); exit(1); } fd = open(".", 0); if(write(fd, "x", 1) > 0){ printf("%s: write . succeeded!\n", s); exit(1); } close(fd); } // test that iput() is called at the end of _namei(). // also tests empty file names. void iref(char *s) { int i, fd; for(i = 0; i < NINODE + 1; i++){ if(mkdir("irefd") != 0){ printf("%s: mkdir irefd failed\n", s); exit(1); } if(chdir("irefd") != 0){ printf("%s: chdir irefd failed\n", s); exit(1); } mkdir(""); link("README", ""); fd = open("", O_CREATE); if(fd >= 0) close(fd); fd = open("xx", O_CREATE); if(fd >= 0) close(fd); unlink("xx"); } // clean up for(i = 0; i < NINODE + 1; i++){ chdir(".."); unlink("irefd"); } chdir("/"); } // test that fork fails gracefully // the forktest binary also does this, but it runs out of proc entries first. // inside the bigger usertests binary, we run out of memory first. void forktest(char *s) { enum{ N = 1000 }; int n, pid; for(n=0; n 0; n--){ if(wait(0) < 0){ printf("%s: wait stopped early\n", s); exit(1); } } if(wait(0) != -1){ printf("%s: wait got too many\n", s); exit(1); } } void sbrkbasic(char *s) { enum { TOOMUCH=1024*1024*1024}; int i, pid, xstatus; char *c, *a, *b; // does sbrk() return the expected failure value? pid = fork(); if(pid < 0){ printf("fork failed in sbrkbasic\n"); exit(1); } if(pid == 0){ a = sbrk(TOOMUCH); if(a == (char*)SBRK_ERROR){ // it's OK if this fails. exit(0); } for(b = a; b < a+TOOMUCH; b += PGSIZE){ *b = 99; } // we should not get here! either sbrk(TOOMUCH) // should have failed, or (with lazy allocation) // a pagefault should have killed this process. exit(1); } wait(&xstatus); if(xstatus == 1){ printf("%s: too much memory allocated!\n", s); exit(1); } // can one sbrk() less than a page? a = sbrk(0); for(i = 0; i < 5000; i++){ b = sbrk(1); if(b != a){ printf("%s: sbrk test failed %d %p %p\n", s, i, a, b); exit(1); } *b = 1; a = b + 1; } pid = fork(); if(pid < 0){ printf("%s: sbrk test fork failed\n", s); exit(1); } c = sbrk(1); c = sbrk(1); if(c != a + 1){ printf("%s: sbrk test failed post-fork\n", s); exit(1); } if(pid == 0) exit(0); wait(&xstatus); exit(xstatus); } void sbrkmuch(char *s) { enum { BIG=100*1024*1024 }; char *c, *oldbrk, *a, *lastaddr, *p; uint64 amt; oldbrk = sbrk(0); // can one grow address space to something big? a = sbrk(0); amt = BIG - (uint64)a; p = sbrk(amt); if (p != a) { printf("%s: sbrk test failed to grow big address space; enough phys mem?\n", s); exit(1); } lastaddr = (char*) (BIG-1); *lastaddr = 99; // can one de-allocate? a = sbrk(0); c = sbrk(-PGSIZE); if(c == (char*)SBRK_ERROR){ printf("%s: sbrk could not deallocate\n", s); exit(1); } c = sbrk(0); if(c != a - PGSIZE){ printf("%s: sbrk deallocation produced wrong address, a %p c %p\n", s, a, c); exit(1); } // can one re-allocate that page? a = sbrk(0); c = sbrk(PGSIZE); if(c != a || sbrk(0) != a + PGSIZE){ printf("%s: sbrk re-allocation failed, a %p c %p\n", s, a, c); exit(1); } if(*lastaddr == 99){ // should be zero printf("%s: sbrk de-allocation didn't really deallocate\n", s); exit(1); } a = sbrk(0); c = sbrk(-(sbrk(0) - oldbrk)); if(c != a){ printf("%s: sbrk downsize failed, a %p c %p\n", s, a, c); exit(1); } } // can we read the kernel's memory? void kernmem(char *s) { char *a; int pid; for(a = (char*)(KERNBASE); a < (char*) (KERNBASE+2000000); a += 50000){ pid = fork(); if(pid < 0){ printf("%s: fork failed\n", s); exit(1); } if(pid == 0){ printf("%s: oops could read %p = %x\n", s, a, *a); exit(1); } int xstatus; wait(&xstatus); if(xstatus != -1) // did kernel kill child? exit(1); } } // user code should not be able to write to addresses above MAXVA. void MAXVAplus(char *s) { volatile uint64 a = MAXVA; for( ; a != 0; a <<= 1){ int pid; pid = fork(); if(pid < 0){ printf("%s: fork failed\n", s); exit(1); } if(pid == 0){ *(char*)a = 99; printf("%s: oops wrote %p\n", s, (void*)a); exit(1); } int xstatus; wait(&xstatus); if(xstatus != -1) // did kernel kill child? exit(1); } } // if we run the system out of memory, does it clean up the last // failed allocation? void sbrkfail(char *s) { enum { BIG=100*1024*1024 }; int i, xstatus; int fds[2]; char scratch; char *c, *a; int pids[10]; int pid; int failed; failed = 0; if(pipe(fds) != 0){ printf("%s: pipe() failed\n", s); exit(1); } for(i = 0; i < sizeof(pids)/sizeof(pids[0]); i++){ if((pids[i] = fork()) == 0){ // allocate a lot of memory if (sbrk(BIG - (uint64)sbrk(0)) == (char*)SBRK_ERROR) write(fds[1], "0", 1); else write(fds[1], "1", 1); // sit around until killed for(;;) pause(1000); } if(pids[i] != -1) { read(fds[0], &scratch, 1); if(scratch == '0') failed = 1; } } if(!failed) { printf("%s: no allocation failed; allocate more?\n", s); } // if those failed allocations freed up the pages they did allocate, // we'll be able to allocate here c = sbrk(PGSIZE); for(i = 0; i < sizeof(pids)/sizeof(pids[0]); i++){ if(pids[i] == -1) continue; kill(pids[i]); wait(0); } if(c == (char*)SBRK_ERROR){ printf("%s: failed sbrk leaked memory\n", s); exit(1); } // test running fork with the above allocated page pid = fork(); if(pid < 0){ printf("%s: fork failed\n", s); exit(1); } if(pid == 0){ // allocate a lot of memory. this should produce an error a = sbrk(10*BIG); if(a == (char*)SBRK_ERROR){ exit(0); } printf("%s: allocate a lot of memory succeeded %d\n", s, 10*BIG); exit(1); } wait(&xstatus); if(xstatus != 0) exit(1); } // test reads/writes from/to allocated memory void sbrkarg(char *s) { char *a; int fd, n; a = sbrk(PGSIZE); fd = open("sbrk", O_CREATE|O_WRONLY); unlink("sbrk"); if(fd < 0) { printf("%s: open sbrk failed\n", s); exit(1); } if ((n = write(fd, a, PGSIZE)) < 0) { printf("%s: write sbrk failed\n", s); exit(1); } close(fd); // test writes to allocated memory a = sbrk(PGSIZE); if(pipe((int *) a) != 0){ printf("%s: pipe() failed\n", s); exit(1); } } void validatetest(char *s) { int hi; uint64 p; hi = 1100*1024; for(p = 0; p <= (uint)hi; p += PGSIZE){ // try to crash the kernel by passing in a bad string pointer if(link("nosuchfile", (char*)p) != -1){ printf("%s: link should not succeed\n", s); exit(1); } } } // does uninitialized data start out zero? char uninit[10000]; void bsstest(char *s) { int i; for(i = 0; i < sizeof(uninit); i++){ if(uninit[i] != '\0'){ printf("%s: bss test failed\n", s); exit(1); } } } // does exec return an error if the arguments // are larger than a page? or does it write // below the stack and wreck the instructions/data? void bigargtest(char *s) { int pid, fd, xstatus; unlink("bigarg-ok"); pid = fork(); if(pid == 0){ static char *args[MAXARG]; int i; char big[400]; memset(big, ' ', sizeof(big)); big[sizeof(big)-1] = '\0'; for(i = 0; i < MAXARG-1; i++) args[i] = big; args[MAXARG-1] = 0; // this exec() should fail (and return) because the // arguments are too large. exec("echo", args); fd = open("bigarg-ok", O_CREATE); close(fd); exit(0); } else if(pid < 0){ printf("%s: bigargtest: fork failed\n", s); exit(1); } wait(&xstatus); if(xstatus != 0) exit(xstatus); fd = open("bigarg-ok", 0); if(fd < 0){ printf("%s: bigarg test failed!\n", s); exit(1); } close(fd); } // what happens when the file system runs out of blocks? // answer: balloc panics, so this test is not useful. void fsfull() { int nfiles; int fsblocks = 0; printf("fsfull test\n"); for(nfiles = 0; ; nfiles++){ char name[64]; name[0] = 'f'; name[1] = '0' + nfiles / 1000; name[2] = '0' + (nfiles % 1000) / 100; name[3] = '0' + (nfiles % 100) / 10; name[4] = '0' + (nfiles % 10); name[5] = '\0'; printf("writing %s\n", name); int fd = open(name, O_CREATE|O_RDWR); if(fd < 0){ printf("open %s failed\n", name); break; } int total = 0; while(1){ int cc = write(fd, buf, BSIZE); if(cc < BSIZE) break; total += cc; fsblocks++; } printf("wrote %d bytes\n", total); close(fd); if(total == 0) break; } while(nfiles >= 0){ char name[64]; name[0] = 'f'; name[1] = '0' + nfiles / 1000; name[2] = '0' + (nfiles % 1000) / 100; name[3] = '0' + (nfiles % 100) / 10; name[4] = '0' + (nfiles % 10); name[5] = '\0'; unlink(name); nfiles--; } printf("fsfull test finished\n"); } void argptest(char *s) { int fd; fd = open("init", O_RDONLY); if (fd < 0) { printf("%s: open failed\n", s); exit(1); } read(fd, sbrk(0) - 1, -1); close(fd); } // check that there's an invalid page beneath // the user stack, to catch stack overflow. void stacktest(char *s) { int pid; int xstatus; pid = fork(); if(pid == 0) { char *sp = (char *) r_sp(); sp -= USERSTACK*PGSIZE; // the *sp should cause a trap. printf("%s: stacktest: read below stack %d\n", s, *sp); exit(1); } else if(pid < 0){ printf("%s: fork failed\n", s); exit(1); } wait(&xstatus); if(xstatus == -1) // kernel killed child? exit(0); else exit(xstatus); } // check that writes to a few forbidden addresses // cause a fault, e.g. process's text and TRAMPOLINE. void nowrite(char *s) { int pid; int xstatus; uint64 addrs[] = { 0, 0x80000000LL, 0x3fffffe000, 0x3ffffff000, 0x4000000000, 0xffffffffffffffff }; for(int ai = 0; ai < sizeof(addrs)/sizeof(addrs[0]); ai++){ pid = fork(); if(pid == 0) { volatile int *addr = (int *) addrs[ai]; *addr = 10; printf("%s: write to %p did not fail!\n", s, addr); exit(0); } else if(pid < 0){ printf("%s: fork failed\n", s); exit(1); } wait(&xstatus); if(xstatus == 0){ // kernel did not kill child! exit(1); } } exit(0); } // regression test. copyin(), copyout(), and copyinstr() used to cast // the virtual page address to uint, which (with certain wild system // call arguments) resulted in a kernel page faults. void *big = (void*) 0xeaeb0b5b00002f5e; void pgbug(char *s) { char *argv[1]; argv[0] = 0; exec(big, argv); pipe(big); exit(0); } // regression test. does the kernel panic if a process sbrk()s its // size to be less than a page, or zero, or reduces the break by an // amount too small to cause a page to be freed? void sbrkbugs(char *s) { int pid = fork(); if(pid < 0){ printf("fork failed\n"); exit(1); } if(pid == 0){ int sz = (uint64) sbrk(0); // free all user memory; there used to be a bug that // would not adjust p->sz correctly in this case, // causing exit() to panic. sbrk(-sz); // user page fault here. exit(0); } wait(0); pid = fork(); if(pid < 0){ printf("fork failed\n"); exit(1); } if(pid == 0){ int sz = (uint64) sbrk(0); // set the break to somewhere in the very first // page; there used to be a bug that would incorrectly // free the first page. sbrk(-(sz - 3500)); exit(0); } wait(0); pid = fork(); if(pid < 0){ printf("fork failed\n"); exit(1); } if(pid == 0){ // set the break in the middle of a page. sbrk((10*PGSIZE + 2048) - (uint64)sbrk(0)); // reduce the break a bit, but not enough to // cause a page to be freed. this used to cause // a panic. sbrk(-10); exit(0); } wait(0); exit(0); } // if process size was somewhat more than a page boundary, and then // shrunk to be somewhat less than that page boundary, can the kernel // still copyin() from addresses in the last page? void sbrklast(char *s) { uint64 top = (uint64) sbrk(0); if((top % PGSIZE) != 0) sbrk(PGSIZE - (top % PGSIZE)); sbrk(PGSIZE); sbrk(10); sbrk(-20); top = (uint64) sbrk(0); char *p = (char *) (top - 64); p[0] = 'x'; p[1] = '\0'; int fd = open(p, O_RDWR|O_CREATE); write(fd, p, 1); close(fd); fd = open(p, O_RDWR); p[0] = '\0'; read(fd, p, 1); if(p[0] != 'x') exit(1); } // does sbrk handle signed int32 wrap-around with // negative arguments? void sbrk8000(char *s) { sbrk(0x80000004); volatile char *top = sbrk(0); *(top-1) = *(top-1) + 1; } // regression test. test whether exec() leaks memory if one of the // arguments is invalid. the test passes if the kernel doesn't panic. void badarg(char *s) { for(int i = 0; i < 50000; i++){ char *argv[2]; argv[0] = (char*)0xffffffff; argv[1] = 0; exec("echo", argv); } exit(0); } #define REGION_SZ (1024 * 1024 * 1024) // Touch a page every 64 pages, which with lazy allocation // causes one page to be allocated. void lazy_alloc(char *s) { char *i, *prev_end, *new_end; prev_end = sbrklazy(REGION_SZ); if (prev_end == (char *) SBRK_ERROR) { printf("sbrklazy() failed\n"); exit(1); } new_end = prev_end + REGION_SZ; for (i = prev_end + PGSIZE; i < new_end; i += 64 * PGSIZE) *(char **)i = i; for (i = prev_end + PGSIZE; i < new_end; i += 64 * PGSIZE) { if (*(char **)i != i) { printf("failed to read value from memory\n"); exit(1); } } exit(0); } // Touch a page every 64 pages in region, which with lazy allocation // causes one page to be allocated. Check that freeing the region // frees the allocated pages. void lazy_unmap(char *s) { int pid; char *i, *prev_end, *new_end; prev_end = sbrklazy(REGION_SZ); if (prev_end == (char*)SBRK_ERROR) { printf("sbrklazy() failed\n"); exit(1); } new_end = prev_end + REGION_SZ; for (i = prev_end + PGSIZE; i < new_end; i += PGSIZE * PGSIZE) *(char **)i = i; for (i = prev_end + PGSIZE; i < new_end; i += PGSIZE * PGSIZE) { pid = fork(); if (pid < 0) { printf("error forking\n"); exit(1); } else if (pid == 0) { sbrklazy(-1L * REGION_SZ); *(char **)i = i; exit(0); } else { int status; wait(&status); if (status == 0) { printf("memory not unmapped\n"); exit(1); } } } exit(0); } void lazy_copy(char *s) { // copyinstr on lazy page { char *p = sbrk(0); sbrklazy(4*PGSIZE); open(p + 8192, 0); } { void *xx = sbrk(0); void *ret = sbrk(-(((uint64) xx)+1)); if(ret != xx){ printf("sbrk(sbrk(0)+1) returned %p, not old sz\n", ret); exit(1); } } // read() and write() to these addresses should fail. unsigned long bad[] = { 0x3fffffc000, 0x3fffffd000, 0x3fffffe000, 0x3ffffff000, 0x4000000000, 0x8000000000, }; for(int i = 0; i < sizeof(bad)/sizeof(bad[0]); i++){ int fd = open("README", 0); if(fd < 0) { printf("cannot open README\n"); exit(1); } if(read(fd, (char*)bad[i], 512) >= 0) { printf("read succeeded\n"); exit(1); } close(fd); fd = open("junk", O_CREATE|O_RDWR|O_TRUNC); if(fd < 0) { printf("cannot open junk\n"); exit(1); } if(write(fd, (char*)bad[i], 512) >= 0) { printf("write succeeded\n"); exit(1); } close(fd); } exit(0); } void lazy_sbrk(char *s) { // sbrk() takes just int, so take 2^30-sized steps towards MAXVA char *p = sbrk(0); while ((uint64)p < MAXVA-(1<<30)) { p = sbrklazy(1<<30); if (p < 0) { printf("sbrklazy(%d) returned %p\n", 1<<30, p); exit(1); } p = sbrklazy(0); } int n = TRAPFRAME-PGSIZE-(uint64)p; char *p1 = sbrklazy(n); if (p1 < 0 || p1 != p) { printf("sbrklazy(%d) returned %p, not expected %p\n", n, p1, p); exit(1); } p = sbrk(PGSIZE); if (p < 0 || (uint64)p != TRAPFRAME-PGSIZE) { printf("sbrk(%d) returned %p, not expected TRAPFRAME-PGSIZE\n", PGSIZE, p); exit(1); } p[0] = 1; if (p[1] != 0) { printf("sbrk() returned non-zero-filled memory\n"); exit(1); } p = sbrk(1); if ((uint64)p != -1) { printf("sbrk(1) returned %p, expected error\n", p); exit(1); } p = sbrklazy(1); if ((uint64)p != -1) { printf("sbrklazy(1) returned %p, expected error\n", p); exit(1); } exit(0); } struct test { void (*f)(char *); char *s; } quicktests[] = { {copyin, "copyin"}, {copyout, "copyout"}, {copyinstr1, "copyinstr1"}, {copyinstr2, "copyinstr2"}, {copyinstr3, "copyinstr3"}, {rwsbrk, "rwsbrk" }, {truncate1, "truncate1"}, {truncate2, "truncate2"}, {truncate3, "truncate3"}, {openiputtest, "openiput"}, {exitiputtest, "exitiput"}, {iputtest, "iput"}, {opentest, "opentest"}, {writetest, "writetest"}, {writebig, "writebig"}, {createtest, "createtest"}, {dirtest, "dirtest"}, {exectest, "exectest"}, {pipe1, "pipe1"}, {killstatus, "killstatus"}, {preempt, "preempt"}, {exitwait, "exitwait"}, {reparent, "reparent" }, {twochildren, "twochildren"}, {forkfork, "forkfork"}, {forkforkfork, "forkforkfork"}, {reparent2, "reparent2"}, {mem, "mem"}, {sharedfd, "sharedfd"}, {fourfiles, "fourfiles"}, {createdelete, "createdelete"}, {unlinkread, "unlinkread"}, {linktest, "linktest"}, {concreate, "concreate"}, {linkunlink, "linkunlink"}, {subdir, "subdir"}, {bigwrite, "bigwrite"}, {bigfile, "bigfile"}, {fourteen, "fourteen"}, {rmdot, "rmdot"}, {dirfile, "dirfile"}, {iref, "iref"}, {forktest, "forktest"}, {sbrkbasic, "sbrkbasic"}, {sbrkmuch, "sbrkmuch"}, {kernmem, "kernmem"}, {MAXVAplus, "MAXVAplus"}, {sbrkfail, "sbrkfail"}, {sbrkarg, "sbrkarg"}, {validatetest, "validatetest"}, {bsstest, "bsstest"}, {bigargtest, "bigargtest"}, {argptest, "argptest"}, {stacktest, "stacktest"}, {nowrite, "nowrite"}, {pgbug, "pgbug" }, {sbrkbugs, "sbrkbugs" }, {sbrklast, "sbrklast"}, {sbrk8000, "sbrk8000"}, {badarg, "badarg" }, {lazy_alloc, "lazy_alloc"}, {lazy_unmap, "lazy_unmap"}, {lazy_copy, "lazy_copy"}, {lazy_sbrk, "lazy_sbrk"}, { 0, 0}, }; // // Section with tests that take a fair bit of time // // directory that uses indirect blocks void bigdir(char *s) { enum { N = 500 }; int i, fd; char name[10]; unlink("bd"); fd = open("bd", O_CREATE); if(fd < 0){ printf("%s: bigdir create failed\n", s); exit(1); } close(fd); for(i = 0; i < N; i++){ name[0] = 'x'; name[1] = '0' + (i / 64); name[2] = '0' + (i % 64); name[3] = '\0'; if(link("bd", name) != 0){ printf("%s: bigdir i=%d link(bd, %s) failed\n", s, i, name); exit(1); } } unlink("bd"); for(i = 0; i < N; i++){ name[0] = 'x'; name[1] = '0' + (i / 64); name[2] = '0' + (i % 64); name[3] = '\0'; if(unlink(name) != 0){ printf("%s: bigdir unlink failed", s); exit(1); } } } // concurrent writes to try to provoke deadlock in the virtio disk // driver. void manywrites(char *s) { int nchildren = 4; int howmany = 30; // increase to look for deadlock for(int ci = 0; ci < nchildren; ci++){ int pid = fork(); if(pid < 0){ printf("fork failed\n"); exit(1); } if(pid == 0){ char name[3]; name[0] = 'b'; name[1] = 'a' + ci; name[2] = '\0'; unlink(name); for(int iters = 0; iters < howmany; iters++){ for(int i = 0; i < ci+1; i++){ int fd = open(name, O_CREATE | O_RDWR); if(fd < 0){ printf("%s: cannot create %s\n", s, name); exit(1); } int sz = sizeof(buf); int cc = write(fd, buf, sz); if(cc != sz){ printf("%s: write(%d) ret %d\n", s, sz, cc); exit(1); } close(fd); } unlink(name); } unlink(name); exit(0); } } for(int ci = 0; ci < nchildren; ci++){ int st = 0; wait(&st); if(st != 0) exit(st); } exit(0); } // regression test. does write() with an invalid buffer pointer cause // a block to be allocated for a file that is then not freed when the // file is deleted? if the kernel has this bug, it will panic: balloc: // out of blocks. assumed_free may need to be raised to be more than // the number of free blocks. this test takes a long time. void badwrite(char *s) { int assumed_free = 600; unlink("junk"); for(int i = 0; i < assumed_free; i++){ int fd = open("junk", O_CREATE|O_WRONLY); if(fd < 0){ printf("open junk failed\n"); exit(1); } write(fd, (char*)0xffffffffffL, 1); close(fd); unlink("junk"); } int fd = open("junk", O_CREATE|O_WRONLY); if(fd < 0){ printf("open junk failed\n"); exit(1); } if(write(fd, "x", 1) != 1){ printf("write failed\n"); exit(1); } close(fd); unlink("junk"); exit(0); } // test the exec() code that cleans up if it runs out // of memory. it's really a test that such a condition // doesn't cause a panic. void execout(char *s) { for(int avail = 0; avail < 15; avail++){ int pid = fork(); if(pid < 0){ printf("fork failed\n"); exit(1); } else if(pid == 0){ // allocate all of memory. while(1){ char *a = sbrk(PGSIZE); if(a == SBRK_ERROR) break; *(a + PGSIZE - 1) = 1; } // free a few pages, in order to let exec() make some // progress. for(int i = 0; i < avail; i++) sbrk(-PGSIZE); close(1); char *args[] = { "echo", "x", 0 }; exec("echo", args); exit(0); } else { wait((int*)0); } } exit(0); } // can the kernel tolerate running out of disk space? void diskfull(char *s) { int fi; int done = 0; unlink("diskfulldir"); for(fi = 0; done == 0 && '0' + fi < 0177; fi++){ char name[32]; name[0] = 'b'; name[1] = 'i'; name[2] = 'g'; name[3] = '0' + fi; name[4] = '\0'; unlink(name); int fd = open(name, O_CREATE|O_RDWR|O_TRUNC); if(fd < 0){ // oops, ran out of inodes before running out of blocks. printf("%s: could not create file %s\n", s, name); done = 1; break; } for(int i = 0; i < MAXFILE; i++){ char buf[BSIZE]; if(write(fd, buf, BSIZE) != BSIZE){ done = 1; close(fd); break; } } close(fd); } // now that there are no free blocks, test that dirlink() // merely fails (doesn't panic) if it can't extend // directory content. one of these file creations // is expected to fail. int nzz = 128; for(int i = 0; i < nzz; i++){ char name[32]; name[0] = 'z'; name[1] = 'z'; name[2] = '0' + (i / 32); name[3] = '0' + (i % 32); name[4] = '\0'; unlink(name); int fd = open(name, O_CREATE|O_RDWR|O_TRUNC); if(fd < 0) break; close(fd); } // this mkdir() is expected to fail. if(mkdir("diskfulldir") == 0) printf("%s: mkdir(diskfulldir) unexpectedly succeeded!\n", s); unlink("diskfulldir"); for(int i = 0; i < nzz; i++){ char name[32]; name[0] = 'z'; name[1] = 'z'; name[2] = '0' + (i / 32); name[3] = '0' + (i % 32); name[4] = '\0'; unlink(name); } for(int i = 0; '0' + i < 0177; i++){ char name[32]; name[0] = 'b'; name[1] = 'i'; name[2] = 'g'; name[3] = '0' + i; name[4] = '\0'; unlink(name); } } void outofinodes(char *s) { int nzz = 32*32; for(int i = 0; i < nzz; i++){ char name[32]; name[0] = 'z'; name[1] = 'z'; name[2] = '0' + (i / 32); name[3] = '0' + (i % 32); name[4] = '\0'; unlink(name); int fd = open(name, O_CREATE|O_RDWR|O_TRUNC); if(fd < 0){ // failure is eventually expected. break; } close(fd); } for(int i = 0; i < nzz; i++){ char name[32]; name[0] = 'z'; name[1] = 'z'; name[2] = '0' + (i / 32); name[3] = '0' + (i % 32); name[4] = '\0'; unlink(name); } } struct test slowtests[] = { {bigdir, "bigdir"}, {manywrites, "manywrites"}, {badwrite, "badwrite" }, {execout, "execout"}, {diskfull, "diskfull"}, {outofinodes, "outofinodes"}, { 0, 0}, }; // // drive tests // // run each test in its own process. run returns 1 if child's exit() // indicates success. int run(void f(char *), char *s) { int pid; int xstatus; printf("test %s: ", s); if((pid = fork()) < 0) { printf("runtest: fork error\n"); exit(1); } if(pid == 0) { f(s); exit(0); } else { wait(&xstatus); if(xstatus != 0) printf("FAILED\n"); else printf("OK\n"); return xstatus == 0; } } int runtests(struct test *tests, char *justone, int continuous) { int ntests = 0; for (struct test *t = tests; t->s != 0; t++) { if((justone == 0) || strcmp(t->s, justone) == 0) { ntests++; if(!run(t->f, t->s)){ if(continuous != 2){ printf("SOME TESTS FAILED\n"); return -1; } } } } return ntests; } // use sbrk() to count how many free physical memory pages there are. int countfree() { int n = 0; uint64 sz0 = (uint64)sbrk(0); while(1){ char *a = sbrk(PGSIZE); if(a == SBRK_ERROR){ break; } n += 1; } sbrk(-((uint64)sbrk(0) - sz0)); return n; } int drivetests(int quick, int continuous, char *justone) { do { printf("usertests starting\n"); int free0 = countfree(); int free1 = 0; int ntests = 0; int n; n = runtests(quicktests, justone, continuous); if (n < 0) { if(continuous != 2) { return 1; } } else { ntests += n; } if(!quick) { if (justone == 0) printf("usertests slow tests starting\n"); n = runtests(slowtests, justone, continuous); if (n < 0) { if(continuous != 2) { return 1; } } else { ntests += n; } } if((free1 = countfree()) < free0) { printf("FAILED -- lost some free pages %d (out of %d)\n", free1, free0); if(continuous != 2) { return 1; } } if (justone != 0 && ntests == 0) { printf("NO TESTS EXECUTED\n"); return 1; } } while(continuous); return 0; } int main(int argc, char *argv[]) { int continuous = 0; int quick = 0; char *justone = 0; if(argc == 2 && strcmp(argv[1], "-q") == 0){ quick = 1; } else if(argc == 2 && strcmp(argv[1], "-c") == 0){ continuous = 1; } else if(argc == 2 && strcmp(argv[1], "-C") == 0){ continuous = 2; } else if(argc == 2 && argv[1][0] != '-'){ justone = argv[1]; } else if(argc > 1){ printf("Usage: usertests [-c] [-C] [-q] [testname]\n"); exit(1); } if (drivetests(quick, continuous, justone)) { exit(1); } printf("ALL TESTS PASSED\n"); exit(0); } ================================================ FILE: user/usys.pl ================================================ #!/usr/bin/perl -w # Generate usys.S, the stubs for syscalls. print "# generated by usys.pl - do not edit\n"; print "#include \"kernel/syscall.h\"\n"; sub entry { my $prefix = "sys_"; my $name = shift; if ($name eq "sbrk") { print ".global $prefix$name\n"; print "$prefix$name:\n"; } else { print ".global $name\n"; print "$name:\n"; } print " li a7, SYS_${name}\n"; print " ecall\n"; print " ret\n"; } entry("fork"); entry("exit"); entry("wait"); entry("pipe"); entry("read"); entry("write"); entry("close"); entry("kill"); entry("exec"); entry("open"); entry("mknod"); entry("unlink"); entry("fstat"); entry("link"); entry("mkdir"); entry("chdir"); entry("dup"); entry("getpid"); entry("sbrk"); entry("pause"); entry("uptime"); ================================================ FILE: user/wc.c ================================================ #include "kernel/types.h" #include "kernel/stat.h" #include "kernel/fcntl.h" #include "user/user.h" char buf[512]; void wc(int fd, char *name) { int i, n; int l, w, c, inword; l = w = c = 0; inword = 0; while((n = read(fd, buf, sizeof(buf))) > 0){ for(i=0; i 0) pause(5); // Let child exit before parent. exit(0); }