diff options
-rw-r--r-- | .gdbinit.tmpl | 27 | ||||
-rw-r--r-- | .gdbinit.tmpl-riscv | 5 | ||||
-rw-r--r-- | .gitignore | 3 | ||||
-rw-r--r-- | BUGS | 7 | ||||
-rw-r--r-- | LICENSE | 2 | ||||
-rw-r--r-- | Makefile | 287 | ||||
-rw-r--r-- | Notes | 123 | ||||
-rw-r--r-- | README | 14 | ||||
-rw-r--r-- | TRICKS | 140 | ||||
-rw-r--r-- | asm.h | 18 | ||||
-rw-r--r-- | bootasm.S | 88 | ||||
-rw-r--r-- | bootmain.c | 96 | ||||
-rw-r--r-- | console.c | 299 | ||||
-rwxr-xr-x | cuth | 48 | ||||
-rw-r--r-- | doc/FU540-C000-v1.0.pdf | bin | 0 -> 2240525 bytes | |||
-rw-r--r-- | doc/riscv-calling.pdf | bin | 0 -> 138193 bytes | |||
-rw-r--r-- | doc/riscv-privileged-v1.10.pdf | bin | 0 -> 536816 bytes | |||
-rw-r--r-- | doc/riscv-spec-v2.2.pdf | bin | 0 -> 615016 bytes | |||
-rw-r--r-- | doc/virtio-v1.1-csprd01.pdf | bin | 0 -> 694936 bytes | |||
-rwxr-xr-x | dot-bochsrc | 738 | ||||
-rw-r--r-- | entry.S | 68 | ||||
-rw-r--r-- | entryother.S | 93 | ||||
-rw-r--r-- | exec.c | 114 | ||||
-rw-r--r-- | gdbutil | 291 | ||||
-rw-r--r-- | ide.c | 168 | ||||
-rw-r--r-- | ioapic.c | 75 | ||||
-rw-r--r-- | kalloc.c | 96 | ||||
-rw-r--r-- | kbd.c | 50 | ||||
-rw-r--r-- | kbd.h | 112 | ||||
-rw-r--r-- | kernel.ld | 68 | ||||
-rw-r--r-- | kernel/bio.c (renamed from bio.c) | 41 | ||||
-rw-r--r-- | kernel/buf.h (renamed from buf.h) | 5 | ||||
-rw-r--r-- | kernel/console.c | 199 | ||||
-rw-r--r-- | kernel/date.h (renamed from date.h) | 0 | ||||
-rw-r--r-- | kernel/defs.h (renamed from defs.h) | 152 | ||||
-rw-r--r-- | kernel/elf.h (renamed from elf.h) | 22 | ||||
-rw-r--r-- | kernel/entry.S | 26 | ||||
-rw-r--r-- | kernel/exec.c | 153 | ||||
-rw-r--r-- | kernel/fcntl.h (renamed from fcntl.h) | 0 | ||||
-rw-r--r-- | kernel/file.c (renamed from file.c) | 67 | ||||
-rw-r--r-- | kernel/file.h (renamed from file.h) | 16 | ||||
-rw-r--r-- | kernel/fs.c (renamed from fs.c) | 92 | ||||
-rw-r--r-- | kernel/fs.h (renamed from fs.h) | 13 | ||||
-rw-r--r-- | kernel/kalloc.c | 83 | ||||
-rw-r--r-- | kernel/kernel.ld | 32 | ||||
-rw-r--r-- | kernel/kernelvec.S | 121 | ||||
-rw-r--r-- | kernel/log.c (renamed from log.c) | 17 | ||||
-rw-r--r-- | kernel/main.c | 43 | ||||
-rw-r--r-- | kernel/memlayout.h | 67 | ||||
-rw-r--r-- | kernel/param.h (renamed from param.h) | 3 | ||||
-rw-r--r-- | kernel/pipe.c | 127 | ||||
-rw-r--r-- | kernel/plic.c | 62 | ||||
-rw-r--r-- | kernel/printf.c | 134 | ||||
-rw-r--r-- | kernel/proc.c | 647 | ||||
-rw-r--r-- | kernel/proc.h | 105 | ||||
-rw-r--r-- | kernel/ramdisk.c | 45 | ||||
-rw-r--r-- | kernel/riscv.h | 358 | ||||
-rw-r--r-- | kernel/sleeplock.c (renamed from sleeplock.c) | 5 | ||||
-rw-r--r-- | kernel/sleeplock.h (renamed from sleeplock.h) | 0 | ||||
-rw-r--r-- | kernel/spinlock.c | 108 | ||||
-rw-r--r-- | kernel/spinlock.h (renamed from spinlock.h) | 2 | ||||
-rw-r--r-- | kernel/start.c | 82 | ||||
-rw-r--r-- | kernel/stat.h (renamed from stat.h) | 10 | ||||
-rw-r--r-- | kernel/string.c (renamed from string.c) | 11 | ||||
-rw-r--r-- | kernel/swtch.S | 42 | ||||
-rw-r--r-- | kernel/syscall.c | 147 | ||||
-rw-r--r-- | kernel/syscall.h (renamed from syscall.h) | 0 | ||||
-rw-r--r-- | kernel/sysfile.c (renamed from sysfile.c) | 152 | ||||
-rw-r--r-- | kernel/sysproc.c (renamed from sysproc.c) | 50 | ||||
-rw-r--r-- | kernel/trampoline.S | 141 | ||||
-rw-r--r-- | kernel/trap.c | 213 | ||||
-rw-r--r-- | kernel/types.h | 10 | ||||
-rw-r--r-- | kernel/uart.c | 92 | ||||
-rw-r--r-- | kernel/virtio.h | 72 | ||||
-rw-r--r-- | kernel/virtio_disk.c | 269 | ||||
-rw-r--r-- | kernel/vm.c | 441 | ||||
-rw-r--r-- | labs/cow.html | 109 | ||||
-rw-r--r-- | labs/fs.html | 360 | ||||
-rw-r--r-- | labs/fs1.html | 215 | ||||
-rw-r--r-- | labs/lazy.html | 132 | ||||
-rw-r--r-- | labs/lock.html | 148 | ||||
-rw-r--r-- | labs/mmap.html | 171 | ||||
-rw-r--r-- | labs/syscall.html | 443 | ||||
-rw-r--r-- | labs/xv6.html | 238 | ||||
-rw-r--r-- | lapic.c | 229 | ||||
-rw-r--r-- | main.c | 116 | ||||
-rw-r--r-- | memide.c | 60 | ||||
-rw-r--r-- | memlayout.h | 15 | ||||
-rw-r--r-- | mkfs/mkfs.c (renamed from mkfs.c) | 24 | ||||
-rw-r--r-- | mmu.h | 181 | ||||
-rw-r--r-- | mp.c | 139 | ||||
-rw-r--r-- | mp.h | 56 | ||||
-rw-r--r-- | picirq.c | 19 | ||||
-rw-r--r-- | pipe.c | 121 | ||||
-rwxr-xr-x | pr.pl | 36 | ||||
-rwxr-xr-x | printpcs | 14 | ||||
-rw-r--r-- | proc.c | 534 | ||||
-rw-r--r-- | proc.h | 58 | ||||
-rwxr-xr-x | runoff | 246 | ||||
-rw-r--r-- | runoff.list | 80 | ||||
-rw-r--r-- | runoff.spec | 102 | ||||
-rwxr-xr-x | runoff1 | 108 | ||||
-rwxr-xr-x | show1 | 3 | ||||
-rwxr-xr-x | sign.pl | 19 | ||||
-rw-r--r-- | sleep1.p | 134 | ||||
-rw-r--r-- | spinlock.c | 126 | ||||
-rwxr-xr-x | spinp | 16 | ||||
-rw-r--r-- | swtch.S | 29 | ||||
-rw-r--r-- | syscall.c | 145 | ||||
-rw-r--r-- | toc.ftr | 13 | ||||
-rw-r--r-- | toc.hdr | 6 | ||||
-rw-r--r-- | trap.c | 112 | ||||
-rw-r--r-- | trapasm.S | 32 | ||||
-rw-r--r-- | traps.h | 38 | ||||
-rw-r--r-- | types.h | 4 | ||||
-rw-r--r-- | uart.c | 77 | ||||
-rw-r--r-- | user/alarmtest.c | 88 | ||||
-rw-r--r-- | user/cat.c (renamed from cat.c) | 6 | ||||
-rw-r--r-- | user/cow.c | 196 | ||||
-rw-r--r-- | user/echo.c (renamed from echo.c) | 6 | ||||
-rw-r--r-- | user/forktest.c (renamed from forktest.c) | 6 | ||||
-rw-r--r-- | user/grep.c (renamed from grep.c) | 6 | ||||
-rw-r--r-- | user/init.c (renamed from init.c) | 13 | ||||
-rw-r--r-- | user/initcode.S (renamed from initcode.S) | 18 | ||||
-rw-r--r-- | user/kill.c (renamed from kill.c) | 6 | ||||
-rw-r--r-- | user/ln.c (renamed from ln.c) | 6 | ||||
-rw-r--r-- | user/ls.c (renamed from ls.c) | 10 | ||||
-rw-r--r-- | user/mkdir.c (renamed from mkdir.c) | 6 | ||||
-rw-r--r-- | user/printf.c (renamed from printf.c) | 42 | ||||
-rw-r--r-- | user/rm.c (renamed from rm.c) | 6 | ||||
-rw-r--r-- | user/sh.c (renamed from sh.c) | 6 | ||||
-rw-r--r-- | user/stressfs.c (renamed from stressfs.c) | 10 | ||||
-rw-r--r-- | user/ulib.c (renamed from ulib.c) | 15 | ||||
-rw-r--r-- | user/umalloc.c (renamed from umalloc.c) | 8 | ||||
-rw-r--r-- | user/user.h (renamed from user.h) | 0 | ||||
-rw-r--r-- | user/usertests.c (renamed from usertests.c) | 335 | ||||
-rwxr-xr-x | user/usys.pl | 38 | ||||
-rw-r--r-- | user/wc.c (renamed from wc.c) | 6 | ||||
-rw-r--r-- | user/zombie.c (renamed from zombie.c) | 6 | ||||
-rw-r--r-- | usys.S | 31 | ||||
-rwxr-xr-x | vectors.pl | 47 | ||||
-rw-r--r-- | vm.c | 394 | ||||
-rw-r--r-- | x86.h | 183 |
143 files changed, 6819 insertions, 6890 deletions
diff --git a/.gdbinit.tmpl b/.gdbinit.tmpl deleted file mode 100644 index f71681a..0000000 --- a/.gdbinit.tmpl +++ /dev/null @@ -1,27 +0,0 @@ -set $lastcs = -1 - -define hook-stop - # There doesn't seem to be a good way to detect if we're in 16- or - # 32-bit mode, but in 32-bit mode we always run with CS == 8 in the - # kernel and CS == 35 in user space - if $cs == 8 || $cs == 35 - if $lastcs != 8 && $lastcs != 35 - set architecture i386 - end - x/i $pc - else - if $lastcs == -1 || $lastcs == 8 || $lastcs == 35 - set architecture i8086 - end - # Translate the segment:offset into a physical address - printf "[%4x:%4x] ", $cs, $eip - x/i $cs*16+$eip - end - set $lastcs = $cs -end - -echo + target remote localhost:1234\n -target remote localhost:1234 - -echo + symbol-file kernel\n -symbol-file kernel diff --git a/.gdbinit.tmpl-riscv b/.gdbinit.tmpl-riscv new file mode 100644 index 0000000..452f04e --- /dev/null +++ b/.gdbinit.tmpl-riscv @@ -0,0 +1,5 @@ +set confirm off +set architecture riscv +target remote 127.0.0.1:1234 +symbol-file kernel/kernel +set disassemble-next-line auto @@ -10,7 +10,8 @@ bootblock entryother initcode initcode.out -kernel kernelmemfs mkfs +kernel/kernel +user/usys.S .gdbinit @@ -1,7 +0,0 @@ -formatting: - need to fix PAGEBREAK mechanism - -sh: - can't always runcmd in child -- breaks cd. - maybe should hard-code PATH=/ ? - @@ -1,6 +1,6 @@ The xv6 software is: -Copyright (c) 2006-2018 Frans Kaashoek, Robert Morris, Russ Cox, +Copyright (c) 2006-2019 Frans Kaashoek, Robert Morris, Russ Cox, Massachusetts Institute of Technology Permission is hereby granted, free of charge, to any person obtaining @@ -1,86 +1,65 @@ +K=kernel +U=user + OBJS = \ - bio.o\ - console.o\ - exec.o\ - file.o\ - fs.o\ - ide.o\ - ioapic.o\ - kalloc.o\ - kbd.o\ - lapic.o\ - log.o\ - main.o\ - mp.o\ - picirq.o\ - pipe.o\ - proc.o\ - sleeplock.o\ - spinlock.o\ - string.o\ - swtch.o\ - syscall.o\ - sysfile.o\ - sysproc.o\ - trapasm.o\ - trap.o\ - uart.o\ - vectors.o\ - vm.o\ - -# Cross-compiling (e.g., on Mac OS X) -# TOOLPREFIX = i386-jos-elf - -# Using native tools (e.g., on X86 Linux) + $K/entry.o \ + $K/start.o \ + $K/console.o \ + $K/printf.o \ + $K/uart.o \ + $K/kalloc.o \ + $K/spinlock.o \ + $K/string.o \ + $K/main.o \ + $K/vm.o \ + $K/proc.o \ + $K/swtch.o \ + $K/trampoline.o \ + $K/trap.o \ + $K/syscall.o \ + $K/sysproc.o \ + $K/bio.o \ + $K/fs.o \ + $K/log.o \ + $K/sleeplock.o \ + $K/file.o \ + $K/pipe.o \ + $K/exec.o \ + $K/sysfile.o \ + $K/kernelvec.o \ + $K/plic.o \ + $K/virtio_disk.o + +# riscv64-unknown-elf- or riscv64-linux-gnu- +# perhaps in /opt/riscv/bin #TOOLPREFIX = # Try to infer the correct TOOLPREFIX if not set ifndef TOOLPREFIX -TOOLPREFIX := $(shell if i386-jos-elf-objdump -i 2>&1 | grep '^elf32-i386$$' >/dev/null 2>&1; \ - then echo 'i386-jos-elf-'; \ - elif objdump -i 2>&1 | grep 'elf32-i386' >/dev/null 2>&1; \ - then echo ''; \ +TOOLPREFIX := $(shell if riscv64-unknown-elf-objdump -i 2>&1 | grep 'elf64-big' >/dev/null 2>&1; \ + then echo 'riscv64-unknown-elf-'; \ + elif riscv64-linux-gnu-objdump -i 2>&1 | grep 'elf64-big' >/dev/null 2>&1; \ + then echo 'riscv64-linux-gnu-'; \ else echo "***" 1>&2; \ - echo "*** Error: Couldn't find an i386-*-elf version of GCC/binutils." 1>&2; \ - echo "*** Is the directory with i386-jos-elf-gcc in your PATH?" 1>&2; \ - echo "*** If your i386-*-elf toolchain is installed with a command" 1>&2; \ - echo "*** prefix other than 'i386-jos-elf-', set your TOOLPREFIX" 1>&2; \ - echo "*** environment variable to that prefix and run 'make' again." 1>&2; \ + echo "*** Error: Couldn't find an riscv64 version of GCC/binutils." 1>&2; \ echo "*** To turn off this error, run 'gmake TOOLPREFIX= ...'." 1>&2; \ echo "***" 1>&2; exit 1; fi) endif -# If the makefile can't find QEMU, specify its path here -# QEMU = qemu-system-i386 - -# Try to infer the correct QEMU -ifndef QEMU -QEMU = $(shell if which qemu > /dev/null; \ - then echo qemu; exit; \ - elif which qemu-system-i386 > /dev/null; \ - then echo qemu-system-i386; exit; \ - elif which qemu-system-x86_64 > /dev/null; \ - then echo qemu-system-x86_64; exit; \ - else \ - qemu=/Applications/Q.app/Contents/MacOS/i386-softmmu.app/Contents/MacOS/i386-softmmu; \ - if test -x $$qemu; then echo $$qemu; exit; fi; fi; \ - echo "***" 1>&2; \ - echo "*** Error: Couldn't find a working QEMU executable." 1>&2; \ - echo "*** Is the directory containing the qemu binary in your PATH" 1>&2; \ - echo "*** or have you tried setting the QEMU variable in Makefile?" 1>&2; \ - echo "***" 1>&2; exit 1) -endif +QEMU = qemu-system-riscv64 CC = $(TOOLPREFIX)gcc AS = $(TOOLPREFIX)gas LD = $(TOOLPREFIX)ld OBJCOPY = $(TOOLPREFIX)objcopy OBJDUMP = $(TOOLPREFIX)objdump -CFLAGS = -fno-pic -static -fno-builtin -fno-strict-aliasing -O2 -Wall -MD -ggdb -m32 -Werror -fno-omit-frame-pointer + +CFLAGS = -Wall -Werror -O -fno-omit-frame-pointer -ggdb +CFLAGS += -MD +CFLAGS += -mcmodel=medany +CFLAGS += -ffreestanding -fno-common -nostdlib -mno-relax +CFLAGS += -I. CFLAGS += $(shell $(CC) -fno-stack-protector -E -x c /dev/null >/dev/null 2>&1 && echo -fno-stack-protector) -ASFLAGS = -m32 -gdwarf-2 -Wa,-divide -# FreeBSD ld wants ``elf_i386_fbsd'' -LDFLAGS += -m $(shell $(LD) -V | grep elf_i386 2>/dev/null | head -n 1) # Disable PIE when possible (for Ubuntu 16.10 toolchain) ifneq ($(shell $(CC) -dumpspecs 2>/dev/null | grep -e '[^f]no-pie'),) @@ -90,74 +69,43 @@ ifneq ($(shell $(CC) -dumpspecs 2>/dev/null | grep -e '[^f]nopie'),) CFLAGS += -fno-pie -nopie endif -xv6.img: bootblock kernel - dd if=/dev/zero of=xv6.img count=10000 - dd if=bootblock of=xv6.img conv=notrunc - dd if=kernel of=xv6.img seek=1 conv=notrunc - -xv6memfs.img: bootblock kernelmemfs - dd if=/dev/zero of=xv6memfs.img count=10000 - dd if=bootblock of=xv6memfs.img conv=notrunc - dd if=kernelmemfs of=xv6memfs.img seek=1 conv=notrunc - -bootblock: bootasm.S bootmain.c - $(CC) $(CFLAGS) -fno-pic -O -nostdinc -I. -c bootmain.c - $(CC) $(CFLAGS) -fno-pic -nostdinc -I. -c bootasm.S - $(LD) $(LDFLAGS) -N -e start -Ttext 0x7C00 -o bootblock.o bootasm.o bootmain.o - $(OBJDUMP) -S bootblock.o > bootblock.asm - $(OBJCOPY) -S -O binary -j .text bootblock.o bootblock - ./sign.pl bootblock - -entryother: entryother.S - $(CC) $(CFLAGS) -fno-pic -nostdinc -I. -c entryother.S - $(LD) $(LDFLAGS) -N -e start -Ttext 0x7000 -o bootblockother.o entryother.o - $(OBJCOPY) -S -O binary -j .text bootblockother.o entryother - $(OBJDUMP) -S bootblockother.o > entryother.asm - -initcode: initcode.S - $(CC) $(CFLAGS) -nostdinc -I. -c initcode.S - $(LD) $(LDFLAGS) -N -e start -Ttext 0 -o initcode.out initcode.o - $(OBJCOPY) -S -O binary initcode.out initcode - $(OBJDUMP) -S initcode.o > initcode.asm - -kernel: $(OBJS) entry.o entryother initcode kernel.ld - $(LD) $(LDFLAGS) -T kernel.ld -o kernel entry.o $(OBJS) -b binary initcode entryother - $(OBJDUMP) -S kernel > kernel.asm - $(OBJDUMP) -t kernel | sed '1,/SYMBOL TABLE/d; s/ .* / /; /^$$/d' > kernel.sym - -# kernelmemfs is a copy of kernel that maintains the -# disk image in memory instead of writing to a disk. -# This is not so useful for testing persistent storage or -# exploring disk buffering implementations, but it is -# great for testing the kernel on real hardware without -# needing a scratch disk. -MEMFSOBJS = $(filter-out ide.o,$(OBJS)) memide.o -kernelmemfs: $(MEMFSOBJS) entry.o entryother initcode kernel.ld fs.img - $(LD) $(LDFLAGS) -T kernel.ld -o kernelmemfs entry.o $(MEMFSOBJS) -b binary initcode entryother fs.img - $(OBJDUMP) -S kernelmemfs > kernelmemfs.asm - $(OBJDUMP) -t kernelmemfs | sed '1,/SYMBOL TABLE/d; s/ .* / /; /^$$/d' > kernelmemfs.sym - -tags: $(OBJS) entryother.S _init - etags *.S *.c +LDFLAGS = -z max-page-size=4096 -vectors.S: vectors.pl - ./vectors.pl > vectors.S +$K/kernel: $(OBJS) $K/kernel.ld $U/initcode + $(LD) $(LDFLAGS) -T $K/kernel.ld -o $K/kernel $(OBJS) + $(OBJDUMP) -S $K/kernel > $K/kernel.asm + $(OBJDUMP) -t $K/kernel | sed '1,/SYMBOL TABLE/d; s/ .* / /; /^$$/d' > $K/kernel.sym -ULIB = ulib.o usys.o printf.o umalloc.o +$U/initcode: $U/initcode.S + $(CC) $(CFLAGS) -nostdinc -I. -Ikernel -c $U/initcode.S -o $U/initcode.o + $(LD) $(LDFLAGS) -N -e start -Ttext 0 -o $U/initcode.out $U/initcode.o + $(OBJCOPY) -S -O binary $U/initcode.out $U/initcode + $(OBJDUMP) -S $U/initcode.o > $U/initcode.asm + +tags: $(OBJS) _init + etags *.S *.c + +ULIB = $U/ulib.o $U/usys.o $U/printf.o $U/umalloc.o _%: %.o $(ULIB) $(LD) $(LDFLAGS) -N -e main -Ttext 0 -o $@ $^ $(OBJDUMP) -S $@ > $*.asm $(OBJDUMP) -t $@ | sed '1,/SYMBOL TABLE/d; s/ .* / /; /^$$/d' > $*.sym -_forktest: forktest.o $(ULIB) +$U/usys.S : $U/usys.pl + perl $U/usys.pl > $U/usys.S + +$U/usys.o : $U/usys.S + $(CC) $(CFLAGS) -c -o $U/usys.o $U/usys.S + +$U/_forktest: $U/forktest.o $(ULIB) # forktest has less library code linked in - needs to be small # in order to be able to max out the proc table. - $(LD) $(LDFLAGS) -N -e main -Ttext 0 -o _forktest forktest.o ulib.o usys.o - $(OBJDUMP) -S _forktest > forktest.asm + $(LD) $(LDFLAGS) -N -e main -Ttext 0 -o $U/_forktest $U/forktest.o $U/ulib.o $U/usys.o + $(OBJDUMP) -S $U/_forktest > $U/forktest.asm -mkfs: mkfs.c fs.h - gcc -Werror -Wall -o mkfs mkfs.c +mkfs/mkfs: mkfs/mkfs.c $K/fs.h + gcc -Werror -Wall -I. -o mkfs/mkfs mkfs/mkfs.c # Prevent deletion of intermediate files, e.g. cat.o, after first build, so # that disk image changes after first build are persistent until clean. More @@ -166,50 +114,36 @@ mkfs: mkfs.c fs.h .PRECIOUS: %.o UPROGS=\ - _cat\ - _echo\ - _forktest\ - _grep\ - _init\ - _kill\ - _ln\ - _ls\ - _mkdir\ - _rm\ - _sh\ - _stressfs\ - _usertests\ - _wc\ - _zombie\ - -fs.img: mkfs README $(UPROGS) - ./mkfs fs.img README $(UPROGS) - --include *.d + $U/_cat\ + $U/_echo\ + $U/_forktest\ + $U/_grep\ + $U/_init\ + $U/_kill\ + $U/_ln\ + $U/_ls\ + $U/_mkdir\ + $U/_rm\ + $U/_sh\ + $U/_stressfs\ + $U/_usertests\ + $U/_wc\ + $U/_zombie\ + $U/_cow\ + +fs.img: mkfs/mkfs README $(UPROGS) + mkfs/mkfs fs.img README $(UPROGS) + +-include kernel/*.d user/*.d clean: rm -f *.tex *.dvi *.idx *.aux *.log *.ind *.ilg \ - *.o *.d *.asm *.sym vectors.S bootblock entryother \ - initcode initcode.out kernel xv6.img fs.img kernelmemfs \ - xv6memfs.img mkfs .gdbinit \ + */*.o */*.d */*.asm */*.sym \ + $U/initcode $U/initcode.out $K/kernel fs.img \ + mkfs/mkfs .gdbinit \ + $U/usys.S \ $(UPROGS) -# make a printout -FILES = $(shell grep -v '^\#' runoff.list) -PRINT = runoff.list runoff.spec README toc.hdr toc.ftr $(FILES) - -xv6.pdf: $(PRINT) - ./runoff - ls -l xv6.pdf - -print: xv6.pdf - -# run in emulators - -bochs : fs.img xv6.img - if [ ! -e .bochsrc ]; then ln -s dot-bochsrc .bochsrc; fi - bochs -q - # try to generate a unique GDB port GDBPORT = $(shell expr `id -u` % 5000 + 25000) # QEMU's gdb stub command line changed in 0.11 @@ -217,29 +151,20 @@ QEMUGDB = $(shell if $(QEMU) -help | grep -q '^-gdb'; \ then echo "-gdb tcp::$(GDBPORT)"; \ else echo "-s -p $(GDBPORT)"; fi) ifndef CPUS -CPUS := 2 +CPUS := 3 endif -QEMUOPTS = -drive file=fs.img,index=1,media=disk,format=raw -drive file=xv6.img,index=0,media=disk,format=raw -smp $(CPUS) -m 512 $(QEMUEXTRA) +QEMUOPTS = -machine virt -kernel $K/kernel -m 3G -smp $(CPUS) -nographic +QEMUOPTS += -drive file=fs.img,if=none,format=raw,id=x0 -device virtio-blk-device,drive=x0,bus=virtio-mmio-bus.0 -qemu: fs.img xv6.img - $(QEMU) -serial mon:stdio $(QEMUOPTS) +qemu: $K/kernel fs.img + $(QEMU) $(QEMUOPTS) -qemu-memfs: xv6memfs.img - $(QEMU) -drive file=xv6memfs.img,index=0,media=disk,format=raw -smp $(CPUS) -m 256 - -qemu-nox: fs.img xv6.img - $(QEMU) -nographic $(QEMUOPTS) - -.gdbinit: .gdbinit.tmpl - sed "s/localhost:1234/localhost:$(GDBPORT)/" < $^ > $@ - -qemu-gdb: fs.img xv6.img .gdbinit - @echo "*** Now run 'gdb'." 1>&2 - $(QEMU) -serial mon:stdio $(QEMUOPTS) -S $(QEMUGDB) +.gdbinit: .gdbinit.tmpl-riscv + sed "s/:1234/:$(GDBPORT)/" < $^ > $@ -qemu-nox-gdb: fs.img xv6.img .gdbinit +qemu-gdb: $K/kernel .gdbinit fs.img @echo "*** Now run 'gdb'." 1>&2 - $(QEMU) -nographic $(QEMUOPTS) -S $(QEMUGDB) + $(QEMU) $(QEMUOPTS) -S $(QEMUGDB) # CUT HERE # prepare dist for students @@ -251,7 +176,7 @@ EXTRA=\ mkfs.c ulib.c user.h cat.c echo.c forktest.c grep.c kill.c\ ln.c ls.c mkdir.c rm.c stressfs.c usertests.c wc.c zombie.c\ printf.c umalloc.c\ - README dot-bochsrc *.pl toc.* runoff runoff1 runoff.list\ + README dot-bochsrc *.pl \ .gdbinit.tmpl gdbutil\ dist: @@ -1,123 +0,0 @@ -bochs 2.2.6: -./configure --enable-smp --enable-disasm --enable-debugger --enable-all-optimizations --enable-4meg-pages --enable-global-pages --enable-pae --disable-reset-on-triple-fault -bochs CVS after 2.2.6: -./configure --enable-smp --enable-disasm --enable-debugger --enable-all-optimizations --enable-4meg-pages --enable-global-pages --enable-pae - -bootmain.c doesn't work right if the ELF sections aren't -sector-aligned. so you can't use ld -N. and the sections may also need -to be non-zero length, only really matters for tiny "kernels". - -kernel loaded at 1 megabyte. stack same place that bootasm.S left it. - -kinit() should find real mem size - and rescue useable memory below 1 meg - -no paging, no use of page table hardware, just segments - -no user area: no magic kernel stack mapping - so no copying of kernel stack during fork - though there is a kernel stack page for each process - -no kernel malloc(), just kalloc() for user core - -user pointers aren't valid in the kernel - -are interrupts turned on in the kernel? yes. - -pass curproc explicitly, or implicit from cpu #? - e.g. argument to newproc()? - hmm, you need a global curproc[cpu] for trap() &c - -no stack expansion - -test running out of memory, process slots - -we can't really use a separate stack segment, since stack addresses -need to work correctly as ordinary pointers. the same may be true of -data vs text. how can we have a gap between data and stack, so that -both can grow, without committing 4GB of physical memory? does this -mean we need paging? - -perhaps have fixed-size stack, put it in the data segment? - -oops, if kernel stack is in contiguous user phys mem, then moving -users' memory (e.g. to expand it) will wreck any pointers into the -kernel stack. - -do we need to set fs and gs? so user processes can't abuse them? - -setupsegs() may modify current segment table, is that legal? - -trap() ought to lgdt on return, since currently only done in swtch() - -protect hardware interrupt vectors from user INT instructions? - -test out-of-fd cases for creating pipe. -test pipe reader closes then write -test two readers, two writers. -test children being inherited by grandparent &c - -some sleep()s should be interruptible by kill() - -locks - init_lock - sequences CPU startup - proc_table_lock - also protects next_pid - per-fd lock *just* protects count read-modify-write - also maybe freeness? - memory allocator - printf - -in general, the table locks protect both free-ness and - public variables of table elements - in many cases you can use table elements w/o a lock - e.g. if you are the process, or you are using an fd - -lock order - per-pipe lock - proc_table_lock fd_table_lock kalloc_lock - console_lock - -do you have to be holding the mutex in order to call wakeup()? yes - -device interrupts don't clear FL_IF - so a recursive timer interrupt is possible - -what does inode->busy mean? - might be held across disk reads - no-one is allowed to do anything to the inode - protected by inode_table_lock -inode->count counts in-memory pointers to the struct - prevents inode[] element from being re-used - protected by inode_table_lock - -blocks and inodes have ad-hoc sleep-locks - provide a single mechanism? - -kalloc() can return 0; do callers handle this right? - -test: one process unlinks a file while another links to it -test: one process opens a file while another deletes it -test: deadlock d/.. vs ../d, two processes. -test: dup() shared fd->off -test: does echo foo > x truncate x? - -sh: ioredirection incorrect now we have pipes -sh: chain of pipes won't work, also ugly that parent closes fdarray entries too -sh: dynamic memory allocation? -sh: should sh support ; () & -sh: stop stdin on ctrl-d (for cat > y) - -really should have bdwrite() for file content - and make some inode updates async - so soft updates make sense - -disk scheduling -echo foo > bar should truncate bar - so O_CREATE should not truncate - but O_TRUNC should - -make it work on a real machine -release before acquire at end of sleep? -check 2nd disk (i.e. if not in .bochsrc) @@ -1,6 +1,6 @@ xv6 is a re-implementation of Dennis Ritchie's and Ken Thompson's Unix Version 6 (v6). xv6 loosely follows the structure and style of v6, -but is implemented for a modern x86-based multiprocessor using ANSI C. +but is implemented for a modern RISC-V multiprocessor using ANSI C. ACKNOWLEDGMENTS @@ -31,7 +31,7 @@ Toomey, Stephen Tu, Pablo Ventura, Xi Wang, Keiichi Watanabe, Nicolas Wolovick, wxdao, Grant Wu, Jindong Zhang, Icenowy Zheng, and Zou Chang Wei. The code in the files that constitute xv6 is -Copyright 2006-2018 Frans Kaashoek, Robert Morris, and Russ Cox. +Copyright 2006-2019 Frans Kaashoek, Robert Morris, and Russ Cox. ERROR REPORTS @@ -42,9 +42,7 @@ simplifications and clarifications than new features. BUILDING AND RUNNING XV6 -To build xv6 on an x86 ELF machine (like Linux or FreeBSD), run -"make". On non-x86 or non-ELF machines (like OS X, even on x86), you -will need to install a cross-compiler gcc suite capable of producing -x86 ELF binaries (see https://pdos.csail.mit.edu/6.828/). -Then run "make TOOLPREFIX=i386-jos-elf-". Now install the QEMU PC -simulator and run "make qemu". +You will need a RISC-V "newlib" tool chain from +https://github.com/riscv/riscv-gnu-toolchain, and qemu compiled for +riscv64-softmmu. Once they are installed, and in your shell +search path, you can run "make qemu". @@ -1,140 +0,0 @@ -This file lists subtle things that might not be commented -as well as they should be in the source code and that -might be worth pointing out in a longer explanation or in class. - ---- - -[2009/07/12: No longer relevant; forkret1 changed -and this is now cleaner.] - -forkret1 in trapasm.S is called with a tf argument. -In order to use it, forkret1 copies the tf pointer into -%esp and then jumps to trapret, which pops the -register state out of the trap frame. If an interrupt -came in between the mov tf, %esp and the iret that -goes back out to user space, the interrupt stack frame -would end up scribbling over the tf and whatever memory -lay under it. - -Why is this safe? Because forkret1 is only called -the first time a process returns to user space, and -at that point, cp->tf is set to point to a trap frame -constructed at the top of cp's kernel stack. So tf -*is* a valid %esp that can hold interrupt state. - -If other tf's were used in forkret1, we could add -a cli before the mov tf, %esp. - ---- - -In pushcli, must cli() no matter what. It is not safe to do - - if(cpus[cpu()].ncli == 0) - cli(); - cpus[cpu()].ncli++; - -because if interrupts are off then we might call cpu(), get -rescheduled to a different cpu, look at cpus[oldcpu].ncli, -and wrongly decide not to disable interrupts on the new cpu. - -Instead do - - cli(); - cpus[cpu()].ncli++; - -always. - ---- - -There is a (harmless) race in pushcli, which does - - eflags = readeflags(); - cli(); - if(c->ncli++ == 0) - c->intena = eflags & FL_IF; - -Consider a bottom-level pushcli. -If interrupts are disabled already, then the right thing -happens: read_eflags finds that FL_IF is not set, -and intena = 0. If interrupts are enabled, then -it is less clear that the right thing happens: -the readeflags can execute, then the process -can get preempted and rescheduled on another cpu, -and then once it starts running, perhaps with -interrupts disabled (can happen since the scheduler -only enables interrupts once per scheduling loop, -not every time it schedules a process), it will -incorrectly record that interrupts *were* enabled. -This doesn't matter, because if it was safe to be -running with interrupts enabled before the context -switch, it is still safe (and arguably more correct) -to run with them enabled after the context switch too. - -In fact it would be safe if scheduler always set - c->intena = 1; -before calling swtch, and perhaps it should. - ---- - -The x86's processor-ordering memory model -matches spin locks well, so no explicit memory -synchronization instructions are required in -acquire and release. - -Consider two sequences of code on different CPUs: - -CPU0 -A; -release(lk); - -and - -CPU1 -acquire(lk); -B; - -We want to make sure that: - - all reads in B see the effects of writes in A. - - all reads in A do *not* see the effects of writes in B. - -The x86 guarantees that writes in A will go out -to memory before the write of lk->locked = 0 in -release(lk). It further guarantees that CPU1 -will observe CPU0's write of lk->locked = 0 only -after observing the earlier writes by CPU0. -So any reads in B are guaranteed to observe the -effects of writes in A. - -According to the Intel manual behavior spec, the -second condition requires a serialization instruction -in release, to avoid reads in A happening after giving -up lk. No Intel SMP processor in existence actually -moves reads down after writes, but the language in -the spec allows it. There is no telling whether future -processors will need it. - ---- - -The code in fork needs to read np->pid before -setting np->state to RUNNABLE. The following -is not a correct way to do this: - - int - fork(void) - { - ... - np->state = RUNNABLE; - return np->pid; // oops - } - -After setting np->state to RUNNABLE, some other CPU -might run the process, it might exit, and then it might -get reused for a different process (with a new pid), all -before the return statement. So it's not safe to just -"return np->pid". Even saving a copy of np->pid before -setting np->state isn't safe, since the compiler is -allowed to re-order statements. - -The real code saves a copy of np->pid, then acquires a lock -around the write to np->state. The acquire() prevents the -compiler from re-ordering. @@ -1,18 +0,0 @@ -// -// assembler macros to create x86 segments -// - -#define SEG_NULLASM \ - .word 0, 0; \ - .byte 0, 0, 0, 0 - -// The 0xC0 means the limit is in 4096-byte units -// and (for executable segments) 32-bit mode. -#define SEG_ASM(type,base,lim) \ - .word (((lim) >> 12) & 0xffff), ((base) & 0xffff); \ - .byte (((base) >> 16) & 0xff), (0x90 | (type)), \ - (0xC0 | (((lim) >> 28) & 0xf)), (((base) >> 24) & 0xff) - -#define STA_X 0x8 // Executable segment -#define STA_W 0x2 // Writeable (non-executable segments) -#define STA_R 0x2 // Readable (executable segments) diff --git a/bootasm.S b/bootasm.S deleted file mode 100644 index 257867c..0000000 --- a/bootasm.S +++ /dev/null @@ -1,88 +0,0 @@ -#include "asm.h" -#include "memlayout.h" -#include "mmu.h" - -# Start the first CPU: switch to 32-bit protected mode, jump into C. -# The BIOS loads this code from the first sector of the hard disk into -# memory at physical address 0x7c00 and starts executing in real mode -# with %cs=0 %ip=7c00. - -.code16 # Assemble for 16-bit mode -.globl start -start: - cli # BIOS enabled interrupts; disable - - # Zero data segment registers DS, ES, and SS. - xorw %ax,%ax # Set %ax to zero - movw %ax,%ds # -> Data Segment - movw %ax,%es # -> Extra Segment - movw %ax,%ss # -> Stack Segment - - # Physical address line A20 is tied to zero so that the first PCs - # with 2 MB would run software that assumed 1 MB. Undo that. -seta20.1: - inb $0x64,%al # Wait for not busy - testb $0x2,%al - jnz seta20.1 - - movb $0xd1,%al # 0xd1 -> port 0x64 - outb %al,$0x64 - -seta20.2: - inb $0x64,%al # Wait for not busy - testb $0x2,%al - jnz seta20.2 - - movb $0xdf,%al # 0xdf -> port 0x60 - outb %al,$0x60 - - # Switch from real to protected mode. Use a bootstrap GDT that makes - # virtual addresses map directly to physical addresses so that the - # effective memory map doesn't change during the transition. - lgdt gdtdesc - movl %cr0, %eax - orl $CR0_PE, %eax - movl %eax, %cr0 - -//PAGEBREAK! - # Complete the transition to 32-bit protected mode by using a long jmp - # to reload %cs and %eip. The segment descriptors are set up with no - # translation, so that the mapping is still the identity mapping. - ljmp $(SEG_KCODE<<3), $start32 - -.code32 # Tell assembler to generate 32-bit code now. -start32: - # Set up the protected-mode data segment registers - movw $(SEG_KDATA<<3), %ax # Our data segment selector - movw %ax, %ds # -> DS: Data Segment - movw %ax, %es # -> ES: Extra Segment - movw %ax, %ss # -> SS: Stack Segment - movw $0, %ax # Zero segments not ready for use - movw %ax, %fs # -> FS - movw %ax, %gs # -> GS - - # Set up the stack pointer and call into C. - movl $start, %esp - call bootmain - - # If bootmain returns (it shouldn't), trigger a Bochs - # breakpoint if running under Bochs, then loop. - movw $0x8a00, %ax # 0x8a00 -> port 0x8a00 - movw %ax, %dx - outw %ax, %dx - movw $0x8ae0, %ax # 0x8ae0 -> port 0x8a00 - outw %ax, %dx -spin: - jmp spin - -# Bootstrap GDT -.p2align 2 # force 4 byte alignment -gdt: - SEG_NULLASM # null seg - SEG_ASM(STA_X|STA_R, 0x0, 0xffffffff) # code seg - SEG_ASM(STA_W, 0x0, 0xffffffff) # data seg - -gdtdesc: - .word (gdtdesc - gdt - 1) # sizeof(gdt) - 1 - .long gdt # address gdt - diff --git a/bootmain.c b/bootmain.c deleted file mode 100644 index 1f20e5b..0000000 --- a/bootmain.c +++ /dev/null @@ -1,96 +0,0 @@ -// Boot loader. -// -// Part of the boot block, along with bootasm.S, which calls bootmain(). -// bootasm.S has put the processor into protected 32-bit mode. -// bootmain() loads an ELF kernel image from the disk starting at -// sector 1 and then jumps to the kernel entry routine. - -#include "types.h" -#include "elf.h" -#include "x86.h" -#include "memlayout.h" - -#define SECTSIZE 512 - -void readseg(uchar*, uint, uint); - -void -bootmain(void) -{ - struct elfhdr *elf; - struct proghdr *ph, *eph; - void (*entry)(void); - uchar* pa; - - elf = (struct elfhdr*)0x10000; // scratch space - - // Read 1st page off disk - readseg((uchar*)elf, 4096, 0); - - // Is this an ELF executable? - if(elf->magic != ELF_MAGIC) - return; // let bootasm.S handle error - - // Load each program segment (ignores ph flags). - ph = (struct proghdr*)((uchar*)elf + elf->phoff); - eph = ph + elf->phnum; - for(; ph < eph; ph++){ - pa = (uchar*)ph->paddr; - readseg(pa, ph->filesz, ph->off); - if(ph->memsz > ph->filesz) - stosb(pa + ph->filesz, 0, ph->memsz - ph->filesz); - } - - // Call the entry point from the ELF header. - // Does not return! - entry = (void(*)(void))(elf->entry); - entry(); -} - -void -waitdisk(void) -{ - // Wait for disk ready. - while((inb(0x1F7) & 0xC0) != 0x40) - ; -} - -// Read a single sector at offset into dst. -void -readsect(void *dst, uint offset) -{ - // Issue command. - waitdisk(); - outb(0x1F2, 1); // count = 1 - outb(0x1F3, offset); - outb(0x1F4, offset >> 8); - outb(0x1F5, offset >> 16); - outb(0x1F6, (offset >> 24) | 0xE0); - outb(0x1F7, 0x20); // cmd 0x20 - read sectors - - // Read data. - waitdisk(); - insl(0x1F0, dst, SECTSIZE/4); -} - -// Read 'count' bytes at 'offset' from kernel into physical address 'pa'. -// Might copy more than asked. -void -readseg(uchar* pa, uint count, uint offset) -{ - uchar* epa; - - epa = pa + count; - - // Round down to sector boundary. - pa -= offset % SECTSIZE; - - // Translate from bytes to sectors; kernel starts at sector 1. - offset = (offset / SECTSIZE) + 1; - - // If this is too slow, we could read lots of sectors at a time. - // We'd write more to memory than asked, but it doesn't matter -- - // we load in increasing order. - for(; pa < epa; pa += SECTSIZE, offset++) - readsect(pa, offset); -} diff --git a/console.c b/console.c deleted file mode 100644 index a280d2b..0000000 --- a/console.c +++ /dev/null @@ -1,299 +0,0 @@ -// Console input and output. -// Input is from the keyboard or serial port. -// Output is written to the screen and serial port. - -#include "types.h" -#include "defs.h" -#include "param.h" -#include "traps.h" -#include "spinlock.h" -#include "sleeplock.h" -#include "fs.h" -#include "file.h" -#include "memlayout.h" -#include "mmu.h" -#include "proc.h" -#include "x86.h" - -static void consputc(int); - -static int panicked = 0; - -static struct { - struct spinlock lock; - int locking; -} cons; - -static void -printint(int xx, int base, int sign) -{ - static char digits[] = "0123456789abcdef"; - char buf[16]; - int i; - uint x; - - if(sign && (sign = xx < 0)) - x = -xx; - else - x = xx; - - i = 0; - do{ - buf[i++] = digits[x % base]; - }while((x /= base) != 0); - - if(sign) - buf[i++] = '-'; - - while(--i >= 0) - consputc(buf[i]); -} -//PAGEBREAK: 50 - -// Print to the console. only understands %d, %x, %p, %s. -void -cprintf(char *fmt, ...) -{ - int i, c, locking; - uint *argp; - char *s; - - locking = cons.locking; - if(locking) - acquire(&cons.lock); - - if (fmt == 0) - panic("null fmt"); - - argp = (uint*)(void*)(&fmt + 1); - for(i = 0; (c = fmt[i] & 0xff) != 0; i++){ - if(c != '%'){ - consputc(c); - continue; - } - c = fmt[++i] & 0xff; - if(c == 0) - break; - switch(c){ - case 'd': - printint(*argp++, 10, 1); - break; - case 'x': - case 'p': - printint(*argp++, 16, 0); - break; - case 's': - if((s = (char*)*argp++) == 0) - s = "(null)"; - for(; *s; s++) - consputc(*s); - break; - case '%': - consputc('%'); - break; - default: - // Print unknown % sequence to draw attention. - consputc('%'); - consputc(c); - break; - } - } - - if(locking) - release(&cons.lock); -} - -void -panic(char *s) -{ - int i; - uint pcs[10]; - - cli(); - cons.locking = 0; - // use lapiccpunum so that we can call panic from mycpu() - cprintf("lapicid %d: panic: ", lapicid()); - cprintf(s); - cprintf("\n"); - getcallerpcs(&s, pcs); - for(i=0; i<10; i++) - cprintf(" %p", pcs[i]); - panicked = 1; // freeze other CPU - for(;;) - ; -} - -//PAGEBREAK: 50 -#define BACKSPACE 0x100 -#define CRTPORT 0x3d4 -static ushort *crt = (ushort*)P2V(0xb8000); // CGA memory - -static void -cgaputc(int c) -{ - int pos; - - // Cursor position: col + 80*row. - outb(CRTPORT, 14); - pos = inb(CRTPORT+1) << 8; - outb(CRTPORT, 15); - pos |= inb(CRTPORT+1); - - if(c == '\n') - pos += 80 - pos%80; - else if(c == BACKSPACE){ - if(pos > 0) --pos; - } else - crt[pos++] = (c&0xff) | 0x0700; // black on white - - if(pos < 0 || pos > 25*80) - panic("pos under/overflow"); - - if((pos/80) >= 24){ // Scroll up. - memmove(crt, crt+80, sizeof(crt[0])*23*80); - pos -= 80; - memset(crt+pos, 0, sizeof(crt[0])*(24*80 - pos)); - } - - outb(CRTPORT, 14); - outb(CRTPORT+1, pos>>8); - outb(CRTPORT, 15); - outb(CRTPORT+1, pos); - crt[pos] = ' ' | 0x0700; -} - -void -consputc(int c) -{ - if(panicked){ - cli(); - for(;;) - ; - } - - if(c == BACKSPACE){ - uartputc('\b'); uartputc(' '); uartputc('\b'); - } else - uartputc(c); - cgaputc(c); -} - -#define INPUT_BUF 128 -struct { - char buf[INPUT_BUF]; - uint r; // Read index - uint w; // Write index - uint e; // Edit index -} input; - -#define C(x) ((x)-'@') // Control-x - -void -consoleintr(int (*getc)(void)) -{ - int c, doprocdump = 0; - - acquire(&cons.lock); - while((c = getc()) >= 0){ - switch(c){ - case C('P'): // Process listing. - // procdump() locks cons.lock indirectly; invoke later - doprocdump = 1; - break; - case C('U'): // Kill line. - while(input.e != input.w && - input.buf[(input.e-1) % INPUT_BUF] != '\n'){ - input.e--; - consputc(BACKSPACE); - } - break; - case C('H'): case '\x7f': // Backspace - if(input.e != input.w){ - input.e--; - consputc(BACKSPACE); - } - break; - default: - if(c != 0 && input.e-input.r < INPUT_BUF){ - c = (c == '\r') ? '\n' : c; - input.buf[input.e++ % INPUT_BUF] = c; - consputc(c); - if(c == '\n' || c == C('D') || input.e == input.r+INPUT_BUF){ - input.w = input.e; - wakeup(&input.r); - } - } - break; - } - } - release(&cons.lock); - if(doprocdump) { - procdump(); // now call procdump() wo. cons.lock held - } -} - -int -consoleread(struct inode *ip, char *dst, int n) -{ - uint target; - int c; - - iunlock(ip); - target = n; - acquire(&cons.lock); - while(n > 0){ - while(input.r == input.w){ - if(myproc()->killed){ - release(&cons.lock); - ilock(ip); - return -1; - } - sleep(&input.r, &cons.lock); - } - c = input.buf[input.r++ % INPUT_BUF]; - if(c == C('D')){ // EOF - if(n < target){ - // Save ^D for next time, to make sure - // caller gets a 0-byte result. - input.r--; - } - break; - } - *dst++ = c; - --n; - if(c == '\n') - break; - } - release(&cons.lock); - ilock(ip); - - return target - n; -} - -int -consolewrite(struct inode *ip, char *buf, int n) -{ - int i; - - iunlock(ip); - acquire(&cons.lock); - for(i = 0; i < n; i++) - consputc(buf[i] & 0xff); - release(&cons.lock); - ilock(ip); - - return n; -} - -void -consoleinit(void) -{ - initlock(&cons.lock, "console"); - - devsw[CONSOLE].write = consolewrite; - devsw[CONSOLE].read = consoleread; - cons.locking = 1; - - ioapicenable(IRQ_KBD, 0); -} - @@ -1,48 +0,0 @@ -#!/usr/bin/perl - -$| = 1; - -sub writefile($@){ - my ($file, @lines) = @_; - - sleep(1); - open(F, ">$file") || die "open >$file: $!"; - print F @lines; - close(F); -} - -# Cut out #include lines that don't contribute anything. -for($i=0; $i<@ARGV; $i++){ - $file = $ARGV[$i]; - if(!open(F, $file)){ - print STDERR "open $file: $!\n"; - next; - } - @lines = <F>; - close(F); - - $obj = "$file.o"; - $obj =~ s/\.c\.o$/.o/; - system("touch $file"); - - if(system("make CC='gcc -Werror' $obj >/dev/null 2>\&1") != 0){ - print STDERR "make $obj failed: $rv\n"; - next; - } - - system("cp $file =$file"); - for($j=@lines-1; $j>=0; $j--){ - if($lines[$j] =~ /^#include/){ - $old = $lines[$j]; - $lines[$j] = "/* CUT-H */\n"; - writefile($file, @lines); - if(system("make CC='gcc -Werror' $obj >/dev/null 2>\&1") != 0){ - $lines[$j] = $old; - }else{ - print STDERR "$file $old"; - } - } - } - writefile($file, grep {!/CUT-H/} @lines); - system("rm =$file"); -} diff --git a/doc/FU540-C000-v1.0.pdf b/doc/FU540-C000-v1.0.pdf Binary files differnew file mode 100644 index 0000000..5322d67 --- /dev/null +++ b/doc/FU540-C000-v1.0.pdf diff --git a/doc/riscv-calling.pdf b/doc/riscv-calling.pdf Binary files differnew file mode 100644 index 0000000..a3351b1 --- /dev/null +++ b/doc/riscv-calling.pdf diff --git a/doc/riscv-privileged-v1.10.pdf b/doc/riscv-privileged-v1.10.pdf Binary files differnew file mode 100644 index 0000000..6942fe7 --- /dev/null +++ b/doc/riscv-privileged-v1.10.pdf diff --git a/doc/riscv-spec-v2.2.pdf b/doc/riscv-spec-v2.2.pdf Binary files differnew file mode 100644 index 0000000..e4a4634 --- /dev/null +++ b/doc/riscv-spec-v2.2.pdf diff --git a/doc/virtio-v1.1-csprd01.pdf b/doc/virtio-v1.1-csprd01.pdf Binary files differnew file mode 100644 index 0000000..c7be62b --- /dev/null +++ b/doc/virtio-v1.1-csprd01.pdf diff --git a/dot-bochsrc b/dot-bochsrc deleted file mode 100755 index ba13db7..0000000 --- a/dot-bochsrc +++ /dev/null @@ -1,738 +0,0 @@ -# You may now use double quotes around pathnames, in case -# your pathname includes spaces. - -#======================================================================= -# CONFIG_INTERFACE -# -# The configuration interface is a series of menus or dialog boxes that -# allows you to change all the settings that control Bochs's behavior. -# There are two choices of configuration interface: a text mode version -# called "textconfig" and a graphical version called "wx". The text -# mode version uses stdin/stdout and is always compiled in. The graphical -# version is only available when you use "--with-wx" on the configure -# command. If you do not write a config_interface line, Bochs will -# choose a default for you. -# -# NOTE: if you use the "wx" configuration interface, you must also use -# the "wx" display library. -#======================================================================= -#config_interface: textconfig -#config_interface: wx - -#======================================================================= -# DISPLAY_LIBRARY -# -# The display library is the code that displays the Bochs VGA screen. Bochs -# has a selection of about 10 different display library implementations for -# different platforms. If you run configure with multiple --with-* options, -# the display_library command lets you choose which one you want to run with. -# If you do not write a display_library line, Bochs will choose a default for -# you. -# -# The choices are: -# x use X windows interface, cross platform -# win32 use native win32 libraries -# carbon use Carbon library (for MacOS X) -# beos use native BeOS libraries -# macintosh use MacOS pre-10 -# amigaos use native AmigaOS libraries -# sdl use SDL library, cross platform -# svga use SVGALIB library for Linux, allows graphics without X11 -# term text only, uses curses/ncurses library, cross platform -# rfb provides an interface to AT&T's VNC viewer, cross platform -# wx use wxWidgets library, cross platform -# nogui no display at all -# -# NOTE: if you use the "wx" configuration interface, you must also use -# the "wx" display library. -# -# Specific options: -# Some display libraries now support specific option to control their -# behaviour. See the examples below for currently supported options. -#======================================================================= -#display_library: amigaos -#display_library: beos -#display_library: carbon -#display_library: macintosh -#display_library: nogui -#display_library: rfb, options="timeout=60" # time to wait for client -#display_library: sdl, options="fullscreen" # startup in fullscreen mode -#display_library: term -#display_library: win32, options="legacyF12" # use F12 to toggle mouse -#display_library: wx -#display_library: x - -#======================================================================= -# ROMIMAGE: -# The ROM BIOS controls what the PC does when it first powers on. -# Normally, you can use a precompiled BIOS in the source or binary -# distribution called BIOS-bochs-latest. The ROM BIOS is usually loaded -# starting at address 0xf0000, and it is exactly 64k long. -# You can also use the environment variable $BXSHARE to specify the -# location of the BIOS. -# The usage of external large BIOS images (up to 512k) at memory top is -# now supported, but we still recommend to use the BIOS distributed with -# Bochs. Now the start address can be calculated from image size. -#======================================================================= -romimage: file=$BXSHARE/BIOS-bochs-latest -#romimage: file=mybios.bin, address=0xfff80000 # 512k at memory top -#romimage: file=mybios.bin # calculate start address from image size - -#======================================================================= -# CPU: -# This defines cpu-related parameters inside Bochs: -# -# COUNT: -# Set the number of processors when Bochs is compiled for SMP emulation. -# Bochs currently supports up to 8 processors. If Bochs is compiled -# without SMP support, it won't accept values different from 1. -# -# IPS: -# Emulated Instructions Per Second. This is the number of IPS that bochs -# is capable of running on your machine. You can recompile Bochs with -# --enable-show-ips option enabled, to find your workstation's capability. -# Measured IPS value will then be logged into your log file or status bar -# (if supported by the gui). -# -# IPS is used to calibrate many time-dependent events within the bochs -# simulation. For example, changing IPS affects the frequency of VGA -# updates, the duration of time before a key starts to autorepeat, and -# the measurement of BogoMips and other benchmarks. -# -# Examples: -# Machine Mips -# ________________________________________________________________ -# 2.1Ghz Athlon XP with Linux 2.6/g++ 3.4 12 to 15 Mips -# 1.6Ghz Intel P4 with Win2000/g++ 3.3 5 to 7 Mips -# 650Mhz Athlon K-7 with Linux 2.4.4/egcs-2.91.66 2 to 2.5 Mips -# 400Mhz Pentium II with Linux 2.0.36/egcs-1.0.3 1 to 1.8 Mips -#======================================================================= -cpu: count=2, ips=10000000 - -#======================================================================= -# MEGS -# Set the number of Megabytes of physical memory you want to emulate. -# The default is 32MB, most OS's won't need more than that. -# The maximum amount of memory supported is 2048Mb. -#======================================================================= -#megs: 256 -#megs: 128 -#megs: 64 -megs: 32 -#megs: 16 -#megs: 8 - -#======================================================================= -# OPTROMIMAGE[1-4]: -# You may now load up to 4 optional ROM images. Be sure to use a -# read-only area, typically between C8000 and EFFFF. These optional -# ROM images should not overwrite the rombios (located at -# F0000-FFFFF) and the videobios (located at C0000-C7FFF). -# Those ROM images will be initialized by the bios if they contain -# the right signature (0x55AA) and a valid checksum. -# It can also be a convenient way to upload some arbitrary code/data -# in the simulation, that can be retrieved by the boot loader -#======================================================================= -#optromimage1: file=optionalrom.bin, address=0xd0000 -#optromimage2: file=optionalrom.bin, address=0xd1000 -#optromimage3: file=optionalrom.bin, address=0xd2000 -#optromimage4: file=optionalrom.bin, address=0xd3000 - -#optramimage1: file=/path/file1.img, address=0x0010000 -#optramimage2: file=/path/file2.img, address=0x0020000 -#optramimage3: file=/path/file3.img, address=0x0030000 -#optramimage4: file=/path/file4.img, address=0x0040000 - -#======================================================================= -# VGAROMIMAGE -# You now need to load a VGA ROM BIOS into C0000. -#======================================================================= -#vgaromimage: file=bios/VGABIOS-elpin-2.40 -vgaromimage: file=$BXSHARE/VGABIOS-lgpl-latest -#vgaromimage: file=bios/VGABIOS-lgpl-latest-cirrus - -#======================================================================= -# VGA: -# Here you can specify the display extension to be used. With the value -# 'none' you can use standard VGA with no extension. Other supported -# values are 'vbe' for Bochs VBE and 'cirrus' for Cirrus SVGA support. -#======================================================================= -#vga: extension=cirrus -#vga: extension=vbe -vga: extension=none - -#======================================================================= -# FLOPPYA: -# Point this to pathname of floppy image file or device -# This should be of a bootable floppy(image/device) if you're -# booting from 'a' (or 'floppy'). -# -# You can set the initial status of the media to 'ejected' or 'inserted'. -# floppya: 2_88=path, status=ejected (2.88M 3.5" floppy) -# floppya: 1_44=path, status=inserted (1.44M 3.5" floppy) -# floppya: 1_2=path, status=ejected (1.2M 5.25" floppy) -# floppya: 720k=path, status=inserted (720K 3.5" floppy) -# floppya: 360k=path, status=inserted (360K 5.25" floppy) -# floppya: 320k=path, status=inserted (320K 5.25" floppy) -# floppya: 180k=path, status=inserted (180K 5.25" floppy) -# floppya: 160k=path, status=inserted (160K 5.25" floppy) -# floppya: image=path, status=inserted (guess type from image size) -# -# The path should be the name of a disk image file. On Unix, you can use a raw -# device name such as /dev/fd0 on Linux. On win32 platforms, use drive letters -# such as a: or b: as the path. The parameter 'image' works with image files -# only. In that case the size must match one of the supported types. -#======================================================================= -floppya: 1_44=/dev/fd0, status=inserted -#floppya: image=../1.44, status=inserted -#floppya: 1_44=/dev/fd0H1440, status=inserted -#floppya: 1_2=../1_2, status=inserted -#floppya: 1_44=a:, status=inserted -#floppya: 1_44=a.img, status=inserted -#floppya: 1_44=/dev/rfd0a, status=inserted - -#======================================================================= -# FLOPPYB: -# See FLOPPYA above for syntax -#======================================================================= -#floppyb: 1_44=b:, status=inserted -floppyb: 1_44=b.img, status=inserted - -#======================================================================= -# ATA0, ATA1, ATA2, ATA3 -# ATA controller for hard disks and cdroms -# -# ata[0-3]: enabled=[0|1], ioaddr1=addr, ioaddr2=addr, irq=number -# -# These options enables up to 4 ata channels. For each channel -# the two base io addresses and the irq must be specified. -# -# ata0 and ata1 are enabled by default with the values shown below -# -# Examples: -# ata0: enabled=1, ioaddr1=0x1f0, ioaddr2=0x3f0, irq=14 -# ata1: enabled=1, ioaddr1=0x170, ioaddr2=0x370, irq=15 -# ata2: enabled=1, ioaddr1=0x1e8, ioaddr2=0x3e0, irq=11 -# ata3: enabled=1, ioaddr1=0x168, ioaddr2=0x360, irq=9 -#======================================================================= -ata0: enabled=1, ioaddr1=0x1f0, ioaddr2=0x3f0, irq=14 -ata1: enabled=1, ioaddr1=0x170, ioaddr2=0x370, irq=15 -ata2: enabled=0, ioaddr1=0x1e8, ioaddr2=0x3e0, irq=11 -ata3: enabled=0, ioaddr1=0x168, ioaddr2=0x360, irq=9 - -#======================================================================= -# ATA[0-3]-MASTER, ATA[0-3]-SLAVE -# -# This defines the type and characteristics of all attached ata devices: -# type= type of attached device [disk|cdrom] -# mode= only valid for disks [flat|concat|external|dll|sparse|vmware3] -# mode= only valid for disks [undoable|growing|volatile] -# path= path of the image -# cylinders= only valid for disks -# heads= only valid for disks -# spt= only valid for disks -# status= only valid for cdroms [inserted|ejected] -# biosdetect= type of biosdetection [none|auto], only for disks on ata0 [cmos] -# translation=type of translation of the bios, only for disks [none|lba|large|rechs|auto] -# model= string returned by identify device command -# journal= optional filename of the redolog for undoable and volatile disks -# -# Point this at a hard disk image file, cdrom iso file, or physical cdrom -# device. To create a hard disk image, try running bximage. It will help you -# choose the size and then suggest a line that works with it. -# -# In UNIX it may be possible to use a raw device as a Bochs hard disk, -# but WE DON'T RECOMMEND IT. In Windows there is no easy way. -# -# In windows, the drive letter + colon notation should be used for cdroms. -# Depending on versions of windows and drivers, you may only be able to -# access the "first" cdrom in the system. On MacOSX, use path="drive" -# to access the physical drive. -# -# The path is always mandatory. For flat hard disk images created with -# bximage geometry autodetection can be used (cylinders=0 -> cylinders are -# calculated using heads=16 and spt=63). For other hard disk images and modes -# the cylinders, heads, and spt are mandatory. -# -# Default values are: -# mode=flat, biosdetect=auto, translation=auto, model="Generic 1234" -# -# The biosdetect option has currently no effect on the bios -# -# Examples: -# ata0-master: type=disk, mode=flat, path=10M.sample, cylinders=306, heads=4, spt=17 -# ata0-slave: type=disk, mode=flat, path=20M.sample, cylinders=615, heads=4, spt=17 -# ata1-master: type=disk, mode=flat, path=30M.sample, cylinders=615, heads=6, spt=17 -# ata1-slave: type=disk, mode=flat, path=46M.sample, cylinders=940, heads=6, spt=17 -# ata2-master: type=disk, mode=flat, path=62M.sample, cylinders=940, heads=8, spt=17 -# ata2-slave: type=disk, mode=flat, path=112M.sample, cylinders=900, heads=15, spt=17 -# ata3-master: type=disk, mode=flat, path=483M.sample, cylinders=1024, heads=15, spt=63 -# ata3-slave: type=cdrom, path=iso.sample, status=inserted -#======================================================================= -ata0-master: type=disk, mode=flat, path="xv6.img", cylinders=100, heads=10, spt=10 -ata0-slave: type=disk, mode=flat, path="fs.img", cylinders=1024, heads=1, spt=1 -#ata0-slave: type=cdrom, path=D:, status=inserted -#ata0-slave: type=cdrom, path=/dev/cdrom, status=inserted -#ata0-slave: type=cdrom, path="drive", status=inserted -#ata0-slave: type=cdrom, path=/dev/rcd0d, status=inserted - -#======================================================================= -# BOOT: -# This defines the boot sequence. Now you can specify up to 3 boot drives. -# You can either boot from 'floppy', 'disk' or 'cdrom' -# legacy 'a' and 'c' are also supported -# Examples: -# boot: floppy -# boot: disk -# boot: cdrom -# boot: c -# boot: a -# boot: cdrom, floppy, disk -#======================================================================= -#boot: floppy -boot: disk - -#======================================================================= -# CLOCK: -# This defines the parameters of the clock inside Bochs: -# -# SYNC: -# TO BE COMPLETED (see Greg explanation in feature request #536329) -# -# TIME0: -# Specifies the start (boot) time of the virtual machine. Use a time -# value as returned by the time(2) system call. If no time0 value is -# set or if time0 equal to 1 (special case) or if time0 equal 'local', -# the simulation will be started at the current local host time. -# If time0 equal to 2 (special case) or if time0 equal 'utc', -# the simulation will be started at the current utc time. -# -# Syntax: -# clock: sync=[none|slowdown|realtime|both], time0=[timeValue|local|utc] -# -# Example: -# clock: sync=none, time0=local # Now (localtime) -# clock: sync=slowdown, time0=315529200 # Tue Jan 1 00:00:00 1980 -# clock: sync=none, time0=631148400 # Mon Jan 1 00:00:00 1990 -# clock: sync=realtime, time0=938581955 # Wed Sep 29 07:12:35 1999 -# clock: sync=realtime, time0=946681200 # Sat Jan 1 00:00:00 2000 -# clock: sync=none, time0=1 # Now (localtime) -# clock: sync=none, time0=utc # Now (utc/gmt) -# -# Default value are sync=none, time0=local -#======================================================================= -#clock: sync=none, time0=local - - -#======================================================================= -# FLOPPY_BOOTSIG_CHECK: disabled=[0|1] -# Enables or disables the 0xaa55 signature check on boot floppies -# Defaults to disabled=0 -# Examples: -# floppy_bootsig_check: disabled=0 -# floppy_bootsig_check: disabled=1 -#======================================================================= -#floppy_bootsig_check: disabled=1 -floppy_bootsig_check: disabled=0 - -#======================================================================= -# LOG: -# Give the path of the log file you'd like Bochs debug and misc. verbiage -# to be written to. If you don't use this option or set the filename to -# '-' the output is written to the console. If you really don't want it, -# make it "/dev/null" (Unix) or "nul" (win32). :^( -# -# Examples: -# log: ./bochs.out -# log: /dev/tty -#======================================================================= -#log: /dev/null -log: bochsout.txt - -#======================================================================= -# LOGPREFIX: -# This handles the format of the string prepended to each log line. -# You may use those special tokens : -# %t : 11 decimal digits timer tick -# %i : 8 hexadecimal digits of cpu current eip (ignored in SMP configuration) -# %e : 1 character event type ('i'nfo, 'd'ebug, 'p'anic, 'e'rror) -# %d : 5 characters string of the device, between brackets -# -# Default : %t%e%d -# Examples: -# logprefix: %t-%e-@%i-%d -# logprefix: %i%e%d -#======================================================================= -#logprefix: %t%e%d - -#======================================================================= -# LOG CONTROLS -# -# Bochs now has four severity levels for event logging. -# panic: cannot proceed. If you choose to continue after a panic, -# don't be surprised if you get strange behavior or crashes. -# error: something went wrong, but it is probably safe to continue the -# simulation. -# info: interesting or useful messages. -# debug: messages useful only when debugging the code. This may -# spit out thousands per second. -# -# For events of each level, you can choose to crash, report, or ignore. -# TODO: allow choice based on the facility: e.g. crash on panics from -# everything except the cdrom, and only report those. -# -# If you are experiencing many panics, it can be helpful to change -# the panic action to report instead of fatal. However, be aware -# that anything executed after a panic is uncharted territory and can -# cause bochs to become unstable. The panic is a "graceful exit," so -# if you disable it you may get a spectacular disaster instead. -#======================================================================= -panic: action=ask -error: action=report -info: action=report -debug: action=ignore -#pass: action=fatal - -#======================================================================= -# DEBUGGER_LOG: -# Give the path of the log file you'd like Bochs to log debugger output. -# If you really don't want it, make it /dev/null or '-'. :^( -# -# Examples: -# debugger_log: ./debugger.out -#======================================================================= -#debugger_log: /dev/null -#debugger_log: debugger.out -debugger_log: - - -#======================================================================= -# COM1, COM2, COM3, COM4: -# This defines a serial port (UART type 16550A). In the 'term' you can specify -# a device to use as com1. This can be a real serial line, or a pty. To use -# a pty (under X/Unix), create two windows (xterms, usually). One of them will -# run bochs, and the other will act as com1. Find out the tty the com1 -# window using the `tty' command, and use that as the `dev' parameter. -# Then do `sleep 1000000' in the com1 window to keep the shell from -# messing with things, and run bochs in the other window. Serial I/O to -# com1 (port 0x3f8) will all go to the other window. -# Other serial modes are 'null' (no input/output), 'file' (output to a file -# specified as the 'dev' parameter), 'raw' (use the real serial port - under -# construction for win32), 'mouse' (standard serial mouse - requires -# mouse option setting 'type=serial' or 'type=serial_wheel') and 'socket' -# (connect a networking socket). -# -# Examples: -# com1: enabled=1, mode=null -# com1: enabled=1, mode=mouse -# com2: enabled=1, mode=file, dev=serial.out -# com3: enabled=1, mode=raw, dev=com1 -# com3: enabled=1, mode=socket, dev=localhost:8888 -#======================================================================= -#com1: enabled=1, mode=term, dev=/dev/ttyp9 - - -#======================================================================= -# PARPORT1, PARPORT2: -# This defines a parallel (printer) port. When turned on and an output file is -# defined the emulated printer port sends characters printed by the guest OS -# into the output file. On some platforms a device filename can be used to -# send the data to the real parallel port (e.g. "/dev/lp0" on Linux, "lpt1" on -# win32 platforms). -# -# Examples: -# parport1: enabled=1, file="parport.out" -# parport2: enabled=1, file="/dev/lp0" -# parport1: enabled=0 -#======================================================================= -parport1: enabled=1, file="/dev/stdout" - -#======================================================================= -# SB16: -# This defines the SB16 sound emulation. It can have several of the -# following properties. -# All properties are in the format sb16: property=value -# midi: The filename is where the midi data is sent. This can be a -# device or just a file if you want to record the midi data. -# midimode: -# 0=no data -# 1=output to device (system dependent. midi denotes the device driver) -# 2=SMF file output, including headers -# 3=output the midi data stream to the file (no midi headers and no -# delta times, just command and data bytes) -# wave: This is the device/file where wave output is stored -# wavemode: -# 0=no data -# 1=output to device (system dependent. wave denotes the device driver) -# 2=VOC file output, incl. headers -# 3=output the raw wave stream to the file -# log: The file to write the sb16 emulator messages to. -# loglevel: -# 0=no log -# 1=resource changes, midi program and bank changes -# 2=severe errors -# 3=all errors -# 4=all errors plus all port accesses -# 5=all errors and port accesses plus a lot of extra info -# dmatimer: -# microseconds per second for a DMA cycle. Make it smaller to fix -# non-continuous sound. 750000 is usually a good value. This needs a -# reasonably correct setting for the IPS parameter of the CPU option. -# -# For an example look at the next line: -#======================================================================= - -#sb16: midimode=1, midi=/dev/midi00, wavemode=1, wave=/dev/dsp, loglevel=2, log=sb16.log, dmatimer=600000 - -#======================================================================= -# VGA_UPDATE_INTERVAL: -# Video memory is scanned for updates and screen updated every so many -# virtual seconds. The default is 40000, about 25Hz. Keep in mind that -# you must tweak the 'cpu: ips=N' directive to be as close to the number -# of emulated instructions-per-second your workstation can do, for this -# to be accurate. -# -# Examples: -# vga_update_interval: 250000 -#======================================================================= -vga_update_interval: 300000 - -# using for Winstone '98 tests -#vga_update_interval: 100000 - -#======================================================================= -# KEYBOARD_SERIAL_DELAY: -# Approximate time in microseconds that it takes one character to -# be transfered from the keyboard to controller over the serial path. -# Examples: -# keyboard_serial_delay: 200 -#======================================================================= -keyboard_serial_delay: 250 - -#======================================================================= -# KEYBOARD_PASTE_DELAY: -# Approximate time in microseconds between attempts to paste -# characters to the keyboard controller. This leaves time for the -# guest os to deal with the flow of characters. The ideal setting -# depends on how your operating system processes characters. The -# default of 100000 usec (.1 seconds) was chosen because it works -# consistently in Windows. -# -# If your OS is losing characters during a paste, increase the paste -# delay until it stops losing characters. -# -# Examples: -# keyboard_paste_delay: 100000 -#======================================================================= -keyboard_paste_delay: 100000 - -#======================================================================= -# MOUSE: -# This option prevents Bochs from creating mouse "events" unless a mouse -# is enabled. The hardware emulation itself is not disabled by this. -# You can turn the mouse on by setting enabled to 1, or turn it off by -# setting enabled to 0. Unless you have a particular reason for enabling -# the mouse by default, it is recommended that you leave it off. -# You can also toggle the mouse usage at runtime (control key + middle -# mouse button on X11, SDL, wxWidgets and Win32). -# With the mouse type option you can select the type of mouse to emulate. -# The default value is 'ps2'. The other choices are 'imps2' (wheel mouse -# on PS/2), 'serial', 'serial_wheel' (one com port requires setting -# 'mode=mouse') and 'usb' (3-button mouse - one of the USB ports must be -# connected with the 'mouse' device - requires PCI and USB support). -# -# Examples: -# mouse: enabled=1 -# mouse: enabled=1, type=imps2 -# mouse: enabled=1, type=serial -# mouse: enabled=0 -#======================================================================= -mouse: enabled=0 - -#======================================================================= -# private_colormap: Request that the GUI create and use it's own -# non-shared colormap. This colormap will be used -# when in the bochs window. If not enabled, a -# shared colormap scheme may be used. Not implemented -# on all GUI's. -# -# Examples: -# private_colormap: enabled=1 -# private_colormap: enabled=0 -#======================================================================= -private_colormap: enabled=0 - -#======================================================================= -# fullscreen: ONLY IMPLEMENTED ON AMIGA -# Request that Bochs occupy the entire screen instead of a -# window. -# -# Examples: -# fullscreen: enabled=0 -# fullscreen: enabled=1 -#======================================================================= -#fullscreen: enabled=0 -#screenmode: name="sample" - -#======================================================================= -# ne2k: NE2000 compatible ethernet adapter -# -# Examples: -# ne2k: ioaddr=IOADDR, irq=IRQ, mac=MACADDR, ethmod=MODULE, ethdev=DEVICE, script=SCRIPT -# -# ioaddr, irq: You probably won't need to change ioaddr and irq, unless there -# are IRQ conflicts. -# -# mac: The MAC address MUST NOT match the address of any machine on the net. -# Also, the first byte must be an even number (bit 0 set means a multicast -# address), and you cannot use ff:ff:ff:ff:ff:ff because that's the broadcast -# address. For the ethertap module, you must use fe:fd:00:00:00:01. There may -# be other restrictions too. To be safe, just use the b0:c4... address. -# -# ethdev: The ethdev value is the name of the network interface on your host -# platform. On UNIX machines, you can get the name by running ifconfig. On -# Windows machines, you must run niclist to get the name of the ethdev. -# Niclist source code is in misc/niclist.c and it is included in Windows -# binary releases. -# -# script: The script value is optional, and is the name of a script that -# is executed after bochs initialize the network interface. You can use -# this script to configure this network interface, or enable masquerading. -# This is mainly useful for the tun/tap devices that only exist during -# Bochs execution. The network interface name is supplied to the script -# as first parameter -# -# If you don't want to make connections to any physical networks, -# you can use the following 'ethmod's to simulate a virtual network. -# null: All packets are discarded, but logged to a few files. -# arpback: ARP is simulated. Disabled by default. -# vde: Virtual Distributed Ethernet -# vnet: ARP, ICMP-echo(ping), DHCP and read/write TFTP are simulated. -# The virtual host uses 192.168.10.1. -# DHCP assigns 192.168.10.2 to the guest. -# TFTP uses the ethdev value for the root directory and doesn't -# overwrite files. -# -#======================================================================= -# ne2k: ioaddr=0x240, irq=9, mac=fe:fd:00:00:00:01, ethmod=fbsd, ethdev=en0 #macosx -# ne2k: ioaddr=0x240, irq=9, mac=b0:c4:20:00:00:00, ethmod=fbsd, ethdev=xl0 -# ne2k: ioaddr=0x240, irq=9, mac=b0:c4:20:00:00:00, ethmod=linux, ethdev=eth0 -# ne2k: ioaddr=0x240, irq=9, mac=b0:c4:20:00:00:01, ethmod=win32, ethdev=MYCARD -# ne2k: ioaddr=0x240, irq=9, mac=fe:fd:00:00:00:01, ethmod=tap, ethdev=tap0 -# ne2k: ioaddr=0x240, irq=9, mac=fe:fd:00:00:00:01, ethmod=tuntap, ethdev=/dev/net/tun0, script=./tunconfig -# ne2k: ioaddr=0x240, irq=9, mac=b0:c4:20:00:00:01, ethmod=null, ethdev=eth0 -# ne2k: ioaddr=0x240, irq=9, mac=b0:c4:20:00:00:01, ethmod=vde, ethdev="/tmp/vde.ctl" -# ne2k: ioaddr=0x240, irq=9, mac=b0:c4:20:00:00:01, ethmod=vnet, ethdev="c:/temp" - -#======================================================================= -# KEYBOARD_MAPPING: -# This enables a remap of a physical localized keyboard to a -# virtualized us keyboard, as the PC architecture expects. -# If enabled, the keymap file must be specified. -# -# Examples: -# keyboard_mapping: enabled=1, map=gui/keymaps/x11-pc-de.map -#======================================================================= -keyboard_mapping: enabled=0, map= - -#======================================================================= -# KEYBOARD_TYPE: -# Type of keyboard return by a "identify keyboard" command to the -# keyboard controler. It must be one of "xt", "at" or "mf". -# Defaults to "mf". It should be ok for almost everybody. A known -# exception is french macs, that do have a "at"-like keyboard. -# -# Examples: -# keyboard_type: mf -#======================================================================= -#keyboard_type: mf - -#======================================================================= -# USER_SHORTCUT: -# This defines the keyboard shortcut to be sent when you press the "user" -# button in the headerbar. The shortcut string is a combination of maximum -# 3 key names (listed below) separated with a '-' character. The old-style -# syntax (without the '-') still works for the key combinations supported -# in Bochs 2.2.1. -# Valid key names: -# "alt", "bksl", "bksp", "ctrl", "del", "down", "end", "enter", "esc", -# "f1", ... "f12", "home", "ins", "left", "menu", "minus", "pgdwn", "pgup", -# "plus", "right", "shift", "space", "tab", "up", and "win". -# -# Example: -# user_shortcut: keys=ctrl-alt-del -#======================================================================= -#user_shortcut: keys=ctrl-alt-del - -#======================================================================= -# I440FXSUPPORT: -# This option controls the presence of the i440FX PCI chipset. You can -# also specify the devices connected to PCI slots. Up to 5 slots are -# available now. These devices are currently supported: ne2k, pcivga, -# pcidev and pcipnic. If Bochs is compiled with Cirrus SVGA support -# you'll have the additional choice 'cirrus'. -# -# Example: -# i440fxsupport: enabled=1, slot1=pcivga, slot2=ne2k -#======================================================================= -#i440fxsupport: enabled=1 - -#======================================================================= -# USB1: -# This option controls the presence of the USB root hub which is a part -# of the i440FX PCI chipset. With the portX option you can connect devices -# to the hub (currently supported: 'mouse' and 'keypad'). If you connect -# the mouse to one of the ports and use the mouse option 'type=usb' you'll -# have a 3-button USB mouse. -# -# Example: -# usb1: enabled=1, port1=mouse, port2=keypad -#======================================================================= -#usb1: enabled=1 - -#======================================================================= -# CMOSIMAGE: -# This defines image file that can be loaded into the CMOS RAM at startup. -# The rtc_init parameter controls whether initialize the RTC with values stored -# in the image. By default the time0 argument given to the clock option is used. -# With 'rtc_init=image' the image is the source for the initial time. -# -# Example: -# cmosimage: file=cmos.img, rtc_init=image -#======================================================================= -#cmosimage: file=cmos.img, rtc_init=time0 - -#======================================================================= -# other stuff -#======================================================================= -#magic_break: enabled=1 -#load32bitOSImage: os=nullkernel, path=../kernel.img, iolog=../vga_io.log -#load32bitOSImage: os=linux, path=../linux.img, iolog=../vga_io.log, initrd=../initrd.img -#text_snapshot_check: enable - -#------------------------- -# PCI host device mapping -#------------------------- -#pcidev: vendor=0x1234, device=0x5678 - -#======================================================================= -# GDBSTUB: -# Enable GDB stub. See user documentation for details. -# Default value is enabled=0. -#======================================================================= -#gdbstub: enabled=0, port=1234, text_base=0, data_base=0, bss_base=0 - -#======================================================================= -# IPS: -# The IPS directive is DEPRECATED. Use the parameter IPS of the CPU -# directive instead. -#======================================================================= -#ips: 10000000 - -#======================================================================= -# for Macintosh, use the style of pathnames in the following -# examples. -# -# vgaromimage: :bios:VGABIOS-elpin-2.40 -# romimage: file=:bios:BIOS-bochs-latest, address=0xf0000 -# floppya: 1_44=[fd:], status=inserted -#======================================================================= diff --git a/entry.S b/entry.S deleted file mode 100644 index bc79bab..0000000 --- a/entry.S +++ /dev/null @@ -1,68 +0,0 @@ -# The xv6 kernel starts executing in this file. This file is linked with -# the kernel C code, so it can refer to kernel symbols such as main(). -# The boot block (bootasm.S and bootmain.c) jumps to entry below. - -# Multiboot header, for multiboot boot loaders like GNU Grub. -# http://www.gnu.org/software/grub/manual/multiboot/multiboot.html -# -# Using GRUB 2, you can boot xv6 from a file stored in a -# Linux file system by copying kernel or kernelmemfs to /boot -# and then adding this menu entry: -# -# menuentry "xv6" { -# insmod ext2 -# set root='(hd0,msdos1)' -# set kernel='/boot/kernel' -# echo "Loading ${kernel}..." -# multiboot ${kernel} ${kernel} -# boot -# } - -#include "asm.h" -#include "memlayout.h" -#include "mmu.h" -#include "param.h" - -# Multiboot header. Data to direct multiboot loader. -.p2align 2 -.text -.globl multiboot_header -multiboot_header: - #define magic 0x1badb002 - #define flags 0 - .long magic - .long flags - .long (-magic-flags) - -# By convention, the _start symbol specifies the ELF entry point. -# Since we haven't set up virtual memory yet, our entry point is -# the physical address of 'entry'. -.globl _start -_start = V2P_WO(entry) - -# Entering xv6 on boot processor, with paging off. -.globl entry -entry: - # Turn on page size extension for 4Mbyte pages - movl %cr4, %eax - orl $(CR4_PSE), %eax - movl %eax, %cr4 - # Set page directory - movl $(V2P_WO(entrypgdir)), %eax - movl %eax, %cr3 - # Turn on paging. - movl %cr0, %eax - orl $(CR0_PG|CR0_WP), %eax - movl %eax, %cr0 - - # Set up the stack pointer. - movl $(stack + KSTACKSIZE), %esp - - # Jump to main(), and switch to executing at - # high addresses. The indirect call is needed because - # the assembler produces a PC-relative instruction - # for a direct jump. - mov $main, %eax - jmp *%eax - -.comm stack, KSTACKSIZE diff --git a/entryother.S b/entryother.S deleted file mode 100644 index a3b6dc2..0000000 --- a/entryother.S +++ /dev/null @@ -1,93 +0,0 @@ -#include "asm.h" -#include "memlayout.h" -#include "mmu.h" - -# Each non-boot CPU ("AP") is started up in response to a STARTUP -# IPI from the boot CPU. Section B.4.2 of the Multi-Processor -# Specification says that the AP will start in real mode with CS:IP -# set to XY00:0000, where XY is an 8-bit value sent with the -# STARTUP. Thus this code must start at a 4096-byte boundary. -# -# Because this code sets DS to zero, it must sit -# at an address in the low 2^16 bytes. -# -# Startothers (in main.c) sends the STARTUPs one at a time. -# It copies this code (start) at 0x7000. It puts the address of -# a newly allocated per-core stack in start-4,the address of the -# place to jump to (mpenter) in start-8, and the physical address -# of entrypgdir in start-12. -# -# This code combines elements of bootasm.S and entry.S. - -.code16 -.globl start -start: - cli - - # Zero data segment registers DS, ES, and SS. - xorw %ax,%ax - movw %ax,%ds - movw %ax,%es - movw %ax,%ss - - # Switch from real to protected mode. Use a bootstrap GDT that makes - # virtual addresses map directly to physical addresses so that the - # effective memory map doesn't change during the transition. - lgdt gdtdesc - movl %cr0, %eax - orl $CR0_PE, %eax - movl %eax, %cr0 - - # Complete the transition to 32-bit protected mode by using a long jmp - # to reload %cs and %eip. The segment descriptors are set up with no - # translation, so that the mapping is still the identity mapping. - ljmpl $(SEG_KCODE<<3), $(start32) - -//PAGEBREAK! -.code32 # Tell assembler to generate 32-bit code now. -start32: - # Set up the protected-mode data segment registers - movw $(SEG_KDATA<<3), %ax # Our data segment selector - movw %ax, %ds # -> DS: Data Segment - movw %ax, %es # -> ES: Extra Segment - movw %ax, %ss # -> SS: Stack Segment - movw $0, %ax # Zero segments not ready for use - movw %ax, %fs # -> FS - movw %ax, %gs # -> GS - - # Turn on page size extension for 4Mbyte pages - movl %cr4, %eax - orl $(CR4_PSE), %eax - movl %eax, %cr4 - # Use entrypgdir as our initial page table - movl (start-12), %eax - movl %eax, %cr3 - # Turn on paging. - movl %cr0, %eax - orl $(CR0_PE|CR0_PG|CR0_WP), %eax - movl %eax, %cr0 - - # Switch to the stack allocated by startothers() - movl (start-4), %esp - # Call mpenter() - call *(start-8) - - movw $0x8a00, %ax - movw %ax, %dx - outw %ax, %dx - movw $0x8ae0, %ax - outw %ax, %dx -spin: - jmp spin - -.p2align 2 -gdt: - SEG_NULLASM - SEG_ASM(STA_X|STA_R, 0, 0xffffffff) - SEG_ASM(STA_W, 0, 0xffffffff) - - -gdtdesc: - .word (gdtdesc - gdt - 1) - .long gdt - @@ -1,114 +0,0 @@ -#include "types.h" -#include "param.h" -#include "memlayout.h" -#include "mmu.h" -#include "proc.h" -#include "defs.h" -#include "x86.h" -#include "elf.h" - -int -exec(char *path, char **argv) -{ - char *s, *last; - int i, off; - uint argc, sz, sp, ustack[3+MAXARG+1]; - struct elfhdr elf; - struct inode *ip; - struct proghdr ph; - pde_t *pgdir, *oldpgdir; - struct proc *curproc = myproc(); - - begin_op(); - - if((ip = namei(path)) == 0){ - end_op(); - cprintf("exec: fail\n"); - return -1; - } - ilock(ip); - pgdir = 0; - - // Check ELF header - if(readi(ip, (char*)&elf, 0, sizeof(elf)) != sizeof(elf)) - goto bad; - if(elf.magic != ELF_MAGIC) - goto bad; - - if((pgdir = setupkvm()) == 0) - goto bad; - - // Load program into memory. - sz = 0; - for(i=0, off=elf.phoff; i<elf.phnum; i++, off+=sizeof(ph)){ - if(readi(ip, (char*)&ph, off, sizeof(ph)) != sizeof(ph)) - goto bad; - if(ph.type != ELF_PROG_LOAD) - continue; - if(ph.memsz < ph.filesz) - goto bad; - if(ph.vaddr + ph.memsz < ph.vaddr) - goto bad; - if((sz = allocuvm(pgdir, sz, ph.vaddr + ph.memsz)) == 0) - goto bad; - if(ph.vaddr % PGSIZE != 0) - goto bad; - if(loaduvm(pgdir, (char*)ph.vaddr, ip, ph.off, ph.filesz) < 0) - goto bad; - } - iunlockput(ip); - end_op(); - ip = 0; - - // Allocate two pages at the next page boundary. - // Make the first inaccessible. Use the second as the user stack. - sz = PGROUNDUP(sz); - if((sz = allocuvm(pgdir, sz, sz + 2*PGSIZE)) == 0) - goto bad; - clearpteu(pgdir, (char*)(sz - 2*PGSIZE)); - sp = sz; - - // Push argument strings, prepare rest of stack in ustack. - for(argc = 0; argv[argc]; argc++) { - if(argc >= MAXARG) - goto bad; - sp = (sp - (strlen(argv[argc]) + 1)) & ~3; - if(copyout(pgdir, sp, argv[argc], strlen(argv[argc]) + 1) < 0) - goto bad; - ustack[3+argc] = sp; - } - ustack[3+argc] = 0; - - ustack[0] = 0xffffffff; // fake return PC - ustack[1] = argc; - ustack[2] = sp - (argc+1)*4; // argv pointer - - sp -= (3+argc+1) * 4; - if(copyout(pgdir, sp, ustack, (3+argc+1)*4) < 0) - goto bad; - - // Save program name for debugging. - for(last=s=path; *s; s++) - if(*s == '/') - last = s+1; - safestrcpy(curproc->name, last, sizeof(curproc->name)); - - // Commit to the user image. - oldpgdir = curproc->pgdir; - curproc->pgdir = pgdir; - curproc->sz = sz; - curproc->tf->eip = elf.entry; // main - curproc->tf->esp = sp; - switchuvm(curproc); - freevm(oldpgdir); - return 0; - - bad: - if(pgdir) - freevm(pgdir); - if(ip){ - iunlockput(ip); - end_op(); - } - return -1; -} diff --git a/gdbutil b/gdbutil deleted file mode 100644 index e0c362f..0000000 --- a/gdbutil +++ /dev/null @@ -1,291 +0,0 @@ -# -*- gdb-script -*- - -# Utility functions to pretty-print x86 segment/interrupt descriptors. -# To load this file, run "source gdbutil" in gdb. -# printdesc and printdescs are the main entry points. - -# IA32 2007, Volume 3A, Table 3-2 -set $STS_T16A = 0x1 -set $STS_LDT = 0x2 -set $STS_T16B = 0x3 -set $STS_CG16 = 0x4 -set $STS_TG = 0x5 -set $STS_IG16 = 0x6 -set $STS_TG16 = 0x7 -set $STS_T32A = 0x9 -set $STS_T32B = 0xB -set $STS_CG32 = 0xC -set $STS_IG32 = 0xE -set $STS_TG32 = 0xF - -define outputsts - while 1 - if $arg0 == $STS_T16A - echo STS_T16A - loop_break - end - if $arg0 == $STS_LDT - echo STS_LDT\ - loop_break - end - if $arg0 == $STS_T16B - echo STS_T16B - loop_break - end - if $arg0 == $STS_CG16 - echo STS_CG16 - loop_break - end - if $arg0 == $STS_TG - echo STS_TG\ \ - loop_break - end - if $arg0 == $STS_IG16 - echo STS_IG16 - loop_break - end - if $arg0 == $STS_TG16 - echo STS_TG16 - loop_break - end - if $arg0 == $STS_T32A - echo STS_T32A - loop_break - end - if $arg0 == $STS_T32B - echo STS_T32B - loop_break - end - if $arg0 == $STS_CG32 - echo STS_CG32 - loop_break - end - if $arg0 == $STS_IG32 - echo STS_IG32 - loop_break - end - if $arg0 == $STS_TG32 - echo STS_TG32 - loop_break - end - echo Reserved - loop_break - end -end - -# IA32 2007, Volume 3A, Table 3-1 -set $STA_X = 0x8 -set $STA_E = 0x4 -set $STA_C = 0x4 -set $STA_W = 0x2 -set $STA_R = 0x2 -set $STA_A = 0x1 - -define outputsta - if $arg0 & $STA_X - # Code segment - echo code - if $arg0 & $STA_C - echo |STA_C - end - if $arg0 & $STA_R - echo |STA_R - end - else - # Data segment - echo data - if $arg0 & $STA_E - echo |STA_E - end - if $arg0 & $STA_W - echo |STA_W - end - end - if $arg0 & $STA_A - echo |STA_A - else - printf " " - end -end - -# xv6-specific -set $SEG_KCODE = 1 -set $SEG_KDATA = 2 -set $SEG_KCPU = 3 -set $SEG_UCODE = 4 -set $SEG_UDATA = 5 -set $SEG_TSS = 6 - -define outputcs - if ($arg0 & 4) == 0 - if $arg0 >> 3 == $SEG_KCODE - printf "SEG_KCODE<<3" - end - if $arg0 >> 3 == $SEG_KDATA - printf "SEG_KDATA<<3" - end - if $arg0 >> 3 == $SEG_KCPU - printf "SEG_KCPU<<3" - end - if $arg0 >> 3 == $SEG_UCODE - printf "SEG_UCODE<<3" - end - if $arg0 >> 3 == $SEG_UDATA - printf "SEG_UDATA<<3" - end - if $arg0 >> 3 == $SEG_TSS - printf "SEG_TSS<<3" - end - if ($arg0 >> 3 < 1) + ($arg0 >> 3 > 6) - printf "GDT[%d]", $arg0 >> 3 - end - else - printf "LDT[%d]", $arg0 >> 3 - end - if ($arg0 & 3) > 0 - printf "|" - outputdpl ($arg0&3) - end -end - -define outputdpl - if $arg0 == 0 - printf "DPL_KERN" - else - if $arg0 == 3 - printf "DPL_USER" - else - printf "DPL%d", $arg0 - end - end -end - -define printdesc - if $argc != 1 - echo Usage: printdesc expr - else - _printdesc ((uint*)&($arg0))[0] ((uint*)&($arg0))[1] - printf "\n" - end -end - -document printdesc -Print an x86 segment or gate descriptor. -printdesc EXPR -EXPR must evaluate to a descriptor value. It can be of any C type. -end - -define _printdesc - _printdesc1 $arg0 $arg1 ($arg1>>15&1) ($arg1>>13&3) ($arg1>>12&1) ($arg1>>8&15) -end - -define _printdesc1 - # 2:P 3:DPL 4:S 5:Type - if $arg2 == 0 - printf "P = 0 (Not present)" - else - printf "type = " - if $arg4 == 0 - # System segment - outputsts $arg5 - printf " (0x%x) ", $arg5 - _printsysdesc $arg0 $arg1 $arg5 - else - # Code/data segment - outputsta $arg5 - printf " " - _printsegdesc $arg0 $arg1 - end - - printf " DPL = " - outputdpl $arg3 - printf " (%d)", $arg3 - end -end - -define _printsysdesc - # 2:Type - # GDB's || is buggy - if ($arg2 == $STS_TG) + (($arg2&7) == $STS_IG16) + (($arg2&7) == $STS_TG16) - # Gate descriptor - _printgate $arg2 ($arg0>>16) ($arg0&0xFFFF) ($arg1>>16) - else - # System segment descriptor - _printsegdesc $arg0 $arg1 - end -end - -define _printgate - # IA32 2007, Voume 3A, Figure 5-2 - # 0:Type 1:CS 2:Offset 15..0 3:Offset 31..16 - printf "CS = " - outputcs $arg1 - printf " (%d)", $arg1 - - if (($arg0&7) == $STS_IG16) + (($arg0&7) == $STS_TG16) - printf " Offset = " - output/a $arg3 << 16 | $arg2 - end -end - -define _printsegdesc - # IA32 20007, Volume 3A, Figure 3-8 and Figure 4-1 - _printsegdesc1 ($arg0>>16) ($arg1&0xFF) ($arg1>>24) ($arg0&0xFFFF) ($arg1>>16&15) ($arg1>>23&1) - if ($arg1>>12&1) == 1 - printf " AVL = %d", $arg1>>20&1 - if ($arg1>>11&1) == 0 - # Data segment - if ($arg1>>22&1) == 0 - printf " B = small (0) " - else - printf " B = big (1) " - end - else - # Code segment - printf " D = " - if ($arg1>>22&1) == 0 - printf "16-bit (0)" - else - printf "32-bit (1)" - end - end - end -end - -define _printsegdesc1 - # 0:Base 0..15 1:Base 16..23 2:Base 24..32 3:Limit 0..15 4:Limit 16..19 5:G - printf "base = 0x%08x", $arg0 | ($arg1<<16) | ($arg2<<24) - printf " limit = 0x" - if $arg5 == 0 - printf "%08x", $arg3 | ($arg4<<16) - else - printf "%08x", (($arg3 | ($arg4<<16)) << 12) | 0xFFF - end -end - -define printdescs - if $argc < 1 || $argc > 2 - echo Usage: printdescs expr [count] - else - if $argc == 1 - _printdescs ($arg0) (sizeof($arg0)/sizeof(($arg0)[0])) - else - _printdescs ($arg0) ($arg1) - end - end -end - -document printdescs -Print an array of x86 segment or gate descriptors. -printdescs EXPR [COUNT] -EXPR must evaluate to an array of descriptors. -end - -define _printdescs - set $i = 0 - while $i < $arg1 - printf "[%d] ", $i - printdesc $arg0[$i] - set $i = $i + 1 - end -end @@ -1,168 +0,0 @@ -// Simple PIO-based (non-DMA) IDE driver code. - -#include "types.h" -#include "defs.h" -#include "param.h" -#include "memlayout.h" -#include "mmu.h" -#include "proc.h" -#include "x86.h" -#include "traps.h" -#include "spinlock.h" -#include "sleeplock.h" -#include "fs.h" -#include "buf.h" - -#define SECTOR_SIZE 512 -#define IDE_BSY 0x80 -#define IDE_DRDY 0x40 -#define IDE_DF 0x20 -#define IDE_ERR 0x01 - -#define IDE_CMD_READ 0x20 -#define IDE_CMD_WRITE 0x30 -#define IDE_CMD_RDMUL 0xc4 -#define IDE_CMD_WRMUL 0xc5 - -// idequeue points to the buf now being read/written to the disk. -// idequeue->qnext points to the next buf to be processed. -// You must hold idelock while manipulating queue. - -static struct spinlock idelock; -static struct buf *idequeue; - -static int havedisk1; -static void idestart(struct buf*); - -// Wait for IDE disk to become ready. -static int -idewait(int checkerr) -{ - int r; - - while(((r = inb(0x1f7)) & (IDE_BSY|IDE_DRDY)) != IDE_DRDY) - ; - if(checkerr && (r & (IDE_DF|IDE_ERR)) != 0) - return -1; - return 0; -} - -void -ideinit(void) -{ - int i; - - initlock(&idelock, "ide"); - ioapicenable(IRQ_IDE, ncpu - 1); - idewait(0); - - // Check if disk 1 is present - outb(0x1f6, 0xe0 | (1<<4)); - for(i=0; i<1000; i++){ - if(inb(0x1f7) != 0){ - havedisk1 = 1; - break; - } - } - - // Switch back to disk 0. - outb(0x1f6, 0xe0 | (0<<4)); -} - -// Start the request for b. Caller must hold idelock. -static void -idestart(struct buf *b) -{ - if(b == 0) - panic("idestart"); - if(b->blockno >= FSSIZE) - panic("incorrect blockno"); - int sector_per_block = BSIZE/SECTOR_SIZE; - int sector = b->blockno * sector_per_block; - int read_cmd = (sector_per_block == 1) ? IDE_CMD_READ : IDE_CMD_RDMUL; - int write_cmd = (sector_per_block == 1) ? IDE_CMD_WRITE : IDE_CMD_WRMUL; - - if (sector_per_block > 7) panic("idestart"); - - idewait(0); - outb(0x3f6, 0); // generate interrupt - outb(0x1f2, sector_per_block); // number of sectors - outb(0x1f3, sector & 0xff); - outb(0x1f4, (sector >> 8) & 0xff); - outb(0x1f5, (sector >> 16) & 0xff); - outb(0x1f6, 0xe0 | ((b->dev&1)<<4) | ((sector>>24)&0x0f)); - if(b->flags & B_DIRTY){ - outb(0x1f7, write_cmd); - outsl(0x1f0, b->data, BSIZE/4); - } else { - outb(0x1f7, read_cmd); - } -} - -// Interrupt handler. -void -ideintr(void) -{ - struct buf *b; - - // First queued buffer is the active request. - acquire(&idelock); - - if((b = idequeue) == 0){ - release(&idelock); - return; - } - idequeue = b->qnext; - - // Read data if needed. - if(!(b->flags & B_DIRTY) && idewait(1) >= 0) - insl(0x1f0, b->data, BSIZE/4); - - // Wake process waiting for this buf. - b->flags |= B_VALID; - b->flags &= ~B_DIRTY; - wakeup(b); - - // Start disk on next buf in queue. - if(idequeue != 0) - idestart(idequeue); - - release(&idelock); -} - -//PAGEBREAK! -// Sync buf with disk. -// If B_DIRTY is set, write buf to disk, clear B_DIRTY, set B_VALID. -// Else if B_VALID is not set, read buf from disk, set B_VALID. -void -iderw(struct buf *b) -{ - struct buf **pp; - - if(!holdingsleep(&b->lock)) - panic("iderw: buf not locked"); - if((b->flags & (B_VALID|B_DIRTY)) == B_VALID) - panic("iderw: nothing to do"); - if(b->dev != 0 && !havedisk1) - panic("iderw: ide disk 1 not present"); - - acquire(&idelock); //DOC:acquire-lock - - // Append b to idequeue. - b->qnext = 0; - for(pp=&idequeue; *pp; pp=&(*pp)->qnext) //DOC:insert-queue - ; - *pp = b; - - // Start disk if necessary. - if(idequeue == b) - idestart(b); - - // Wait for request to finish. - while((b->flags & (B_VALID|B_DIRTY)) != B_VALID){ - sleep(b, &idelock); - } - - - release(&idelock); -} diff --git a/ioapic.c b/ioapic.c deleted file mode 100644 index cb0f015..0000000 --- a/ioapic.c +++ /dev/null @@ -1,75 +0,0 @@ -// The I/O APIC manages hardware interrupts for an SMP system. -// http://www.intel.com/design/chipsets/datashts/29056601.pdf -// See also picirq.c. - -#include "types.h" -#include "defs.h" -#include "traps.h" - -#define IOAPIC 0xFEC00000 // Default physical address of IO APIC - -#define REG_ID 0x00 // Register index: ID -#define REG_VER 0x01 // Register index: version -#define REG_TABLE 0x10 // Redirection table base - -// The redirection table starts at REG_TABLE and uses -// two registers to configure each interrupt. -// The first (low) register in a pair contains configuration bits. -// The second (high) register contains a bitmask telling which -// CPUs can serve that interrupt. -#define INT_DISABLED 0x00010000 // Interrupt disabled -#define INT_LEVEL 0x00008000 // Level-triggered (vs edge-) -#define INT_ACTIVELOW 0x00002000 // Active low (vs high) -#define INT_LOGICAL 0x00000800 // Destination is CPU id (vs APIC ID) - -volatile struct ioapic *ioapic; - -// IO APIC MMIO structure: write reg, then read or write data. -struct ioapic { - uint reg; - uint pad[3]; - uint data; -}; - -static uint -ioapicread(int reg) -{ - ioapic->reg = reg; - return ioapic->data; -} - -static void -ioapicwrite(int reg, uint data) -{ - ioapic->reg = reg; - ioapic->data = data; -} - -void -ioapicinit(void) -{ - int i, id, maxintr; - - ioapic = (volatile struct ioapic*)IOAPIC; - maxintr = (ioapicread(REG_VER) >> 16) & 0xFF; - id = ioapicread(REG_ID) >> 24; - if(id != ioapicid) - cprintf("ioapicinit: id isn't equal to ioapicid; not a MP\n"); - - // Mark all interrupts edge-triggered, active high, disabled, - // and not routed to any CPUs. - for(i = 0; i <= maxintr; i++){ - ioapicwrite(REG_TABLE+2*i, INT_DISABLED | (T_IRQ0 + i)); - ioapicwrite(REG_TABLE+2*i+1, 0); - } -} - -void -ioapicenable(int irq, int cpunum) -{ - // Mark interrupt edge-triggered, active high, - // enabled, and routed to the given cpunum, - // which happens to be that cpu's APIC ID. - ioapicwrite(REG_TABLE+2*irq, T_IRQ0 + irq); - ioapicwrite(REG_TABLE+2*irq+1, cpunum << 24); -} diff --git a/kalloc.c b/kalloc.c deleted file mode 100644 index 14cd4f4..0000000 --- a/kalloc.c +++ /dev/null @@ -1,96 +0,0 @@ -// Physical memory allocator, intended to allocate -// memory for user processes, kernel stacks, page table pages, -// and pipe buffers. Allocates 4096-byte pages. - -#include "types.h" -#include "defs.h" -#include "param.h" -#include "memlayout.h" -#include "mmu.h" -#include "spinlock.h" - -void freerange(void *vstart, void *vend); -extern char end[]; // first address after kernel loaded from ELF file - // defined by the kernel linker script in kernel.ld - -struct run { - struct run *next; -}; - -struct { - struct spinlock lock; - int use_lock; - struct run *freelist; -} kmem; - -// Initialization happens in two phases. -// 1. main() calls kinit1() while still using entrypgdir to place just -// the pages mapped by entrypgdir on free list. -// 2. main() calls kinit2() with the rest of the physical pages -// after installing a full page table that maps them on all cores. -void -kinit1(void *vstart, void *vend) -{ - initlock(&kmem.lock, "kmem"); - kmem.use_lock = 0; - freerange(vstart, vend); -} - -void -kinit2(void *vstart, void *vend) -{ - freerange(vstart, vend); - kmem.use_lock = 1; -} - -void -freerange(void *vstart, void *vend) -{ - char *p; - p = (char*)PGROUNDUP((uint)vstart); - for(; p + PGSIZE <= (char*)vend; p += PGSIZE) - kfree(p); -} -//PAGEBREAK: 21 -// Free the page of physical memory pointed at by v, -// which normally should have been returned by a -// call to kalloc(). (The exception is when -// initializing the allocator; see kinit above.) -void -kfree(char *v) -{ - struct run *r; - - if((uint)v % PGSIZE || v < end || V2P(v) >= PHYSTOP) - panic("kfree"); - - // Fill with junk to catch dangling refs. - memset(v, 1, PGSIZE); - - if(kmem.use_lock) - acquire(&kmem.lock); - r = (struct run*)v; - r->next = kmem.freelist; - kmem.freelist = r; - if(kmem.use_lock) - release(&kmem.lock); -} - -// Allocate one 4096-byte page of physical memory. -// Returns a pointer that the kernel can use. -// Returns 0 if the memory cannot be allocated. -char* -kalloc(void) -{ - struct run *r; - - if(kmem.use_lock) - acquire(&kmem.lock); - r = kmem.freelist; - if(r) - kmem.freelist = r->next; - if(kmem.use_lock) - release(&kmem.lock); - return (char*)r; -} - @@ -1,50 +0,0 @@ -#include "types.h" -#include "x86.h" -#include "defs.h" -#include "kbd.h" - -int -kbdgetc(void) -{ - static uint shift; - static uchar *charcode[4] = { - normalmap, shiftmap, ctlmap, ctlmap - }; - uint st, data, c; - - st = inb(KBSTATP); - if((st & KBS_DIB) == 0) - return -1; - data = inb(KBDATAP); - - if(data == 0xE0){ - shift |= E0ESC; - return 0; - } else if(data & 0x80){ - // Key released - data = (shift & E0ESC ? data : data & 0x7F); - shift &= ~(shiftcode[data] | E0ESC); - return 0; - } else if(shift & E0ESC){ - // Last character was an E0 escape; or with 0x80 - data |= 0x80; - shift &= ~E0ESC; - } - - shift |= shiftcode[data]; - shift ^= togglecode[data]; - c = charcode[shift & (CTL | SHIFT)][data]; - if(shift & CAPSLOCK){ - if('a' <= c && c <= 'z') - c += 'A' - 'a'; - else if('A' <= c && c <= 'Z') - c += 'a' - 'A'; - } - return c; -} - -void -kbdintr(void) -{ - consoleintr(kbdgetc); -} @@ -1,112 +0,0 @@ -// PC keyboard interface constants - -#define KBSTATP 0x64 // kbd controller status port(I) -#define KBS_DIB 0x01 // kbd data in buffer -#define KBDATAP 0x60 // kbd data port(I) - -#define NO 0 - -#define SHIFT (1<<0) -#define CTL (1<<1) -#define ALT (1<<2) - -#define CAPSLOCK (1<<3) -#define NUMLOCK (1<<4) -#define SCROLLLOCK (1<<5) - -#define E0ESC (1<<6) - -// Special keycodes -#define KEY_HOME 0xE0 -#define KEY_END 0xE1 -#define KEY_UP 0xE2 -#define KEY_DN 0xE3 -#define KEY_LF 0xE4 -#define KEY_RT 0xE5 -#define KEY_PGUP 0xE6 -#define KEY_PGDN 0xE7 -#define KEY_INS 0xE8 -#define KEY_DEL 0xE9 - -// C('A') == Control-A -#define C(x) (x - '@') - -static uchar shiftcode[256] = -{ - [0x1D] CTL, - [0x2A] SHIFT, - [0x36] SHIFT, - [0x38] ALT, - [0x9D] CTL, - [0xB8] ALT -}; - -static uchar togglecode[256] = -{ - [0x3A] CAPSLOCK, - [0x45] NUMLOCK, - [0x46] SCROLLLOCK -}; - -static uchar normalmap[256] = -{ - NO, 0x1B, '1', '2', '3', '4', '5', '6', // 0x00 - '7', '8', '9', '0', '-', '=', '\b', '\t', - 'q', 'w', 'e', 'r', 't', 'y', 'u', 'i', // 0x10 - 'o', 'p', '[', ']', '\n', NO, 'a', 's', - 'd', 'f', 'g', 'h', 'j', 'k', 'l', ';', // 0x20 - '\'', '`', NO, '\\', 'z', 'x', 'c', 'v', - 'b', 'n', 'm', ',', '.', '/', NO, '*', // 0x30 - NO, ' ', NO, NO, NO, NO, NO, NO, - NO, NO, NO, NO, NO, NO, NO, '7', // 0x40 - '8', '9', '-', '4', '5', '6', '+', '1', - '2', '3', '0', '.', NO, NO, NO, NO, // 0x50 - [0x9C] '\n', // KP_Enter - [0xB5] '/', // KP_Div - [0xC8] KEY_UP, [0xD0] KEY_DN, - [0xC9] KEY_PGUP, [0xD1] KEY_PGDN, - [0xCB] KEY_LF, [0xCD] KEY_RT, - [0x97] KEY_HOME, [0xCF] KEY_END, - [0xD2] KEY_INS, [0xD3] KEY_DEL -}; - -static uchar shiftmap[256] = -{ - NO, 033, '!', '@', '#', '$', '%', '^', // 0x00 - '&', '*', '(', ')', '_', '+', '\b', '\t', - 'Q', 'W', 'E', 'R', 'T', 'Y', 'U', 'I', // 0x10 - 'O', 'P', '{', '}', '\n', NO, 'A', 'S', - 'D', 'F', 'G', 'H', 'J', 'K', 'L', ':', // 0x20 - '"', '~', NO, '|', 'Z', 'X', 'C', 'V', - 'B', 'N', 'M', '<', '>', '?', NO, '*', // 0x30 - NO, ' ', NO, NO, NO, NO, NO, NO, - NO, NO, NO, NO, NO, NO, NO, '7', // 0x40 - '8', '9', '-', '4', '5', '6', '+', '1', - '2', '3', '0', '.', NO, NO, NO, NO, // 0x50 - [0x9C] '\n', // KP_Enter - [0xB5] '/', // KP_Div - [0xC8] KEY_UP, [0xD0] KEY_DN, - [0xC9] KEY_PGUP, [0xD1] KEY_PGDN, - [0xCB] KEY_LF, [0xCD] KEY_RT, - [0x97] KEY_HOME, [0xCF] KEY_END, - [0xD2] KEY_INS, [0xD3] KEY_DEL -}; - -static uchar ctlmap[256] = -{ - NO, NO, NO, NO, NO, NO, NO, NO, - NO, NO, NO, NO, NO, NO, NO, NO, - C('Q'), C('W'), C('E'), C('R'), C('T'), C('Y'), C('U'), C('I'), - C('O'), C('P'), NO, NO, '\r', NO, C('A'), C('S'), - C('D'), C('F'), C('G'), C('H'), C('J'), C('K'), C('L'), NO, - NO, NO, NO, C('\\'), C('Z'), C('X'), C('C'), C('V'), - C('B'), C('N'), C('M'), NO, NO, C('/'), NO, NO, - [0x9C] '\r', // KP_Enter - [0xB5] C('/'), // KP_Div - [0xC8] KEY_UP, [0xD0] KEY_DN, - [0xC9] KEY_PGUP, [0xD1] KEY_PGDN, - [0xCB] KEY_LF, [0xCD] KEY_RT, - [0x97] KEY_HOME, [0xCF] KEY_END, - [0xD2] KEY_INS, [0xD3] KEY_DEL -}; - diff --git a/kernel.ld b/kernel.ld deleted file mode 100644 index e24c860..0000000 --- a/kernel.ld +++ /dev/null @@ -1,68 +0,0 @@ -/* Simple linker script for the JOS kernel. - See the GNU ld 'info' manual ("info ld") to learn the syntax. */ - -OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") -OUTPUT_ARCH(i386) -ENTRY(_start) - -SECTIONS -{ - /* Link the kernel at this address: "." means the current address */ - /* Must be equal to KERNLINK */ - . = 0x80100000; - - .text : AT(0x100000) { - *(.text .stub .text.* .gnu.linkonce.t.*) - } - - PROVIDE(etext = .); /* Define the 'etext' symbol to this value */ - - .rodata : { - *(.rodata .rodata.* .gnu.linkonce.r.*) - } - - /* Include debugging information in kernel memory */ - .stab : { - PROVIDE(__STAB_BEGIN__ = .); - *(.stab); - PROVIDE(__STAB_END__ = .); - BYTE(0) /* Force the linker to allocate space - for this section */ - } - - .stabstr : { - PROVIDE(__STABSTR_BEGIN__ = .); - *(.stabstr); - PROVIDE(__STABSTR_END__ = .); - BYTE(0) /* Force the linker to allocate space - for this section */ - } - - /* Adjust the address for the data segment to the next page */ - . = ALIGN(0x1000); - - /* Conventionally, Unix linkers provide pseudo-symbols - * etext, edata, and end, at the end of the text, data, and bss. - * For the kernel mapping, we need the address at the beginning - * of the data section, but that's not one of the conventional - * symbols, because the convention started before there was a - * read-only rodata section between text and data. */ - PROVIDE(data = .); - - /* The data segment */ - .data : { - *(.data) - } - - PROVIDE(edata = .); - - .bss : { - *(.bss) - } - - PROVIDE(end = .); - - /DISCARD/ : { - *(.eh_frame .note.GNU-stack) - } -} @@ -12,17 +12,14 @@ // * Do not use the buffer after calling brelse. // * Only one process at a time can use a buffer, // so do not keep them longer than necessary. -// -// The implementation uses two state flags internally: -// * B_VALID: the buffer data has been read from the disk. -// * B_DIRTY: the buffer data has been modified -// and needs to be written to disk. + #include "types.h" -#include "defs.h" #include "param.h" #include "spinlock.h" #include "sleeplock.h" +#include "riscv.h" +#include "defs.h" #include "fs.h" #include "buf.h" @@ -42,7 +39,6 @@ binit(void) initlock(&bcache.lock, "bcache"); -//PAGEBREAK! // Create linked list of buffers bcache.head.prev = &bcache.head; bcache.head.next = &bcache.head; @@ -76,13 +72,11 @@ bget(uint dev, uint blockno) } // Not cached; recycle an unused buffer. - // Even if refcnt==0, B_DIRTY indicates a buffer is in use - // because log.c has modified it but not yet committed it. for(b = bcache.head.prev; b != &bcache.head; b = b->prev){ - if(b->refcnt == 0 && (b->flags & B_DIRTY) == 0) { + if(b->refcnt == 0) { b->dev = dev; b->blockno = blockno; - b->flags = 0; + b->valid = 0; b->refcnt = 1; release(&bcache.lock); acquiresleep(&b->lock); @@ -99,8 +93,9 @@ bread(uint dev, uint blockno) struct buf *b; b = bget(dev, blockno); - if((b->flags & B_VALID) == 0) { - iderw(b); + if(!b->valid) { + virtio_disk_rw(b, 0); + b->valid = 1; } return b; } @@ -111,8 +106,7 @@ bwrite(struct buf *b) { if(!holdingsleep(&b->lock)) panic("bwrite"); - b->flags |= B_DIRTY; - iderw(b); + virtio_disk_rw(b, 1); } // Release a locked buffer. @@ -139,6 +133,19 @@ brelse(struct buf *b) release(&bcache.lock); } -//PAGEBREAK! -// Blank page. + +void +bpin(struct buf *b) { + acquire(&bcache.lock); + b->refcnt++; + release(&bcache.lock); +} + +void +bunpin(struct buf *b) { + acquire(&bcache.lock); + b->refcnt--; + release(&bcache.lock); +} + @@ -1,5 +1,6 @@ struct buf { - int flags; + int valid; // has data been read from disk? + int disk; // does disk "own" buf? uint dev; uint blockno; struct sleeplock lock; @@ -9,6 +10,4 @@ struct buf { struct buf *qnext; // disk queue uchar data[BSIZE]; }; -#define B_VALID 0x2 // buffer has been read from disk -#define B_DIRTY 0x4 // buffer needs to be written to disk diff --git a/kernel/console.c b/kernel/console.c new file mode 100644 index 0000000..87a83ff --- /dev/null +++ b/kernel/console.c @@ -0,0 +1,199 @@ +// +// Console input and output, to the uart. +// Reads are line at a time. +// Implements special input characters: +// newline -- end of line +// control-h -- backspace +// control-u -- kill line +// control-d -- end of file +// control-p -- print process list +// + +#include <stdarg.h> + +#include "types.h" +#include "param.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "fs.h" +#include "file.h" +#include "memlayout.h" +#include "riscv.h" +#include "defs.h" +#include "proc.h" + +#define BACKSPACE 0x100 +#define C(x) ((x)-'@') // Control-x + +// +// send one character to the uart. +// +void +consputc(int c) +{ + extern volatile int panicked; // from printf.c + + if(panicked){ + for(;;) + ; + } + + if(c == BACKSPACE){ + // if the user typed backspace, overwrite with a space. + uartputc('\b'); uartputc(' '); uartputc('\b'); + } else { + uartputc(c); + } +} + +struct { + struct spinlock lock; + + // input +#define INPUT_BUF 128 + char buf[INPUT_BUF]; + uint r; // Read index + uint w; // Write index + uint e; // Edit index +} cons; + +// +// user write()s to the console go here. +// +int +consolewrite(int user_src, uint64 src, int n) +{ + int i; + + acquire(&cons.lock); + for(i = 0; i < n; i++){ + char c; + if(either_copyin(&c, user_src, src+i, 1) == -1) + break; + consputc(c); + } + release(&cons.lock); + + return n; +} + +// +// user read()s from the console go here. +// copy (up to) a whole input line to dst. +// user_dist indicates whether dst is a user +// or kernel address. +// +int +consoleread(int user_dst, uint64 dst, int n) +{ + uint target; + int c; + char cbuf; + + target = n; + acquire(&cons.lock); + while(n > 0){ + // wait until interrupt handler has put some + // input into cons.buffer. + while(cons.r == cons.w){ + if(myproc()->killed){ + release(&cons.lock); + return -1; + } + sleep(&cons.r, &cons.lock); + } + + c = cons.buf[cons.r++ % INPUT_BUF]; + + if(c == C('D')){ // end-of-file + if(n < target){ + // Save ^D for next time, to make sure + // caller gets a 0-byte result. + cons.r--; + } + break; + } + + // copy the input byte to the user-space buffer. + cbuf = c; + if(either_copyout(user_dst, dst, &cbuf, 1) == -1) + break; + + dst++; + --n; + + if(c == '\n'){ + // a whole line has arrived, return to + // the user-level read(). + break; + } + } + release(&cons.lock); + + return target - n; +} + +// +// the console input interrupt handler. +// uartintr() calls this for input character. +// do erase/kill processing, append to cons.buf, +// wake up consoleread() if a whole line has arrived. +// +void +consoleintr(int c) +{ + acquire(&cons.lock); + + switch(c){ + case C('P'): // Print process list. + procdump(); + break; + case C('U'): // Kill line. + while(cons.e != cons.w && + cons.buf[(cons.e-1) % INPUT_BUF] != '\n'){ + cons.e--; + consputc(BACKSPACE); + } + break; + case C('H'): // Backspace + case '\x7f': + if(cons.e != cons.w){ + cons.e--; + consputc(BACKSPACE); + } + break; + default: + if(c != 0 && cons.e-cons.r < INPUT_BUF){ + c = (c == '\r') ? '\n' : c; + + // echo back to the user. + consputc(c); + + // store for consumption by consoleread(). + cons.buf[cons.e++ % INPUT_BUF] = c; + + if(c == '\n' || c == C('D') || cons.e == cons.r+INPUT_BUF){ + // wake up consoleread() if a whole line (or end-of-file) + // has arrived. + cons.w = cons.e; + wakeup(&cons.r); + } + } + break; + } + + release(&cons.lock); +} + +void +consoleinit(void) +{ + initlock(&cons.lock, "cons"); + + uartinit(); + + // connect read and write system calls + // to consoleread and consolewrite. + devsw[CONSOLE].read = consoleread; + devsw[CONSOLE].write = consolewrite; +} @@ -4,7 +4,6 @@ struct file; struct inode; struct pipe; struct proc; -struct rtcdate; struct spinlock; struct sleeplock; struct stat; @@ -15,12 +14,13 @@ void binit(void); struct buf* bread(uint, uint); void brelse(struct buf*); void bwrite(struct buf*); +void bpin(struct buf*); +void bunpin(struct buf*); // console.c void consoleinit(void); -void cprintf(char*, ...); -void consoleintr(int(*)(void)); -void panic(char*) __attribute__((noreturn)); +void consoleintr(int); +void consputc(int); // exec.c int exec(char*, char**); @@ -30,17 +30,17 @@ struct file* filealloc(void); void fileclose(struct file*); struct file* filedup(struct file*); void fileinit(void); -int fileread(struct file*, char*, int n); -int filestat(struct file*, struct stat*); -int filewrite(struct file*, char*, int n); +int fileread(struct file*, uint64, int n); +int filestat(struct file*, uint64 addr); +int filewrite(struct file*, uint64, int n); // fs.c -void readsb(int dev, struct superblock *sb); +void fsinit(int); int dirlink(struct inode*, char*, uint); struct inode* dirlookup(struct inode*, char*, uint*); struct inode* ialloc(uint, short); struct inode* idup(struct inode*); -void iinit(int dev); +void iinit(); void ilock(struct inode*); void iput(struct inode*); void iunlock(struct inode*); @@ -49,69 +49,49 @@ void iupdate(struct inode*); int namecmp(const char*, const char*); struct inode* namei(char*); struct inode* nameiparent(char*, char*); -int readi(struct inode*, char*, uint, uint); +int readi(struct inode*, int, uint64, uint, uint); void stati(struct inode*, struct stat*); -int writei(struct inode*, char*, uint, uint); - -// ide.c -void ideinit(void); -void ideintr(void); -void iderw(struct buf*); +int writei(struct inode*, int, uint64, uint, uint); -// ioapic.c -void ioapicenable(int irq, int cpu); -extern uchar ioapicid; -void ioapicinit(void); +// ramdisk.c +void ramdiskinit(void); +void ramdiskintr(void); +void ramdiskrw(struct buf*); // kalloc.c -char* kalloc(void); -void kfree(char*); -void kinit1(void*, void*); -void kinit2(void*, void*); - -// kbd.c -void kbdintr(void); - -// lapic.c -void cmostime(struct rtcdate *r); -int lapicid(void); -extern volatile uint* lapic; -void lapiceoi(void); -void lapicinit(void); -void lapicstartap(uchar, uint); -void microdelay(int); +void* kalloc(void); +void kfree(void *); +void kinit(); // log.c -void initlog(int dev); +void initlog(int, struct superblock*); void log_write(struct buf*); void begin_op(); void end_op(); -// mp.c -extern int ismp; -void mpinit(void); - -// picirq.c -void picenable(int); -void picinit(void); - // pipe.c int pipealloc(struct file**, struct file**); void pipeclose(struct pipe*, int); -int piperead(struct pipe*, char*, int); -int pipewrite(struct pipe*, char*, int); +int piperead(struct pipe*, uint64, int); +int pipewrite(struct pipe*, uint64, int); + +// printf.c +void printf(char*, ...); +void panic(char*) __attribute__((noreturn)); +void printfinit(void); -//PAGEBREAK: 16 // proc.c int cpuid(void); void exit(void); int fork(void); int growproc(int); +pagetable_t proc_pagetable(struct proc *); +void proc_freepagetable(pagetable_t, uint64); int kill(int); struct cpu* mycpu(void); +struct cpu* getmycpu(void); struct proc* myproc(); -void pinit(void); -void procdump(void); +void procinit(void); void scheduler(void) __attribute__((noreturn)); void sched(void); void setproc(struct proc*); @@ -120,18 +100,20 @@ void userinit(void); int wait(void); void wakeup(void*); void yield(void); +int either_copyout(int user_dst, uint64 dst, void *src, uint64 len); +int either_copyin(void *dst, int user_src, uint64 src, uint64 len); +void procdump(void); // swtch.S -void swtch(struct context**, struct context*); +void swtch(struct context*, struct context*); // spinlock.c void acquire(struct spinlock*); -void getcallerpcs(void*, uint*); int holding(struct spinlock*); void initlock(struct spinlock*, char*); void release(struct spinlock*); -void pushcli(void); -void popcli(void); +void push_off(void); +void pop_off(void); // sleeplock.c void acquiresleep(struct sleeplock*); @@ -150,41 +132,55 @@ char* strncpy(char*, const char*, int); // syscall.c int argint(int, int*); -int argptr(int, char**, int); -int argstr(int, char**); -int fetchint(uint, int*); -int fetchstr(uint, char**); -void syscall(void); - -// timer.c -void timerinit(void); +int argstr(int, char*, int); +int argaddr(int, uint64 *); +int fetchstr(uint64, char*, int); +int fetchaddr(uint64, uint64*); +void syscall(); // trap.c -void idtinit(void); extern uint ticks; -void tvinit(void); +void trapinit(void); +void trapinithart(void); extern struct spinlock tickslock; +void usertrapret(void); // uart.c void uartinit(void); void uartintr(void); void uartputc(int); +int uartgetc(void); // vm.c -void seginit(void); -void kvmalloc(void); -pde_t* setupkvm(void); -char* uva2ka(pde_t*, char*); -int allocuvm(pde_t*, uint, uint); -int deallocuvm(pde_t*, uint, uint); -void freevm(pde_t*); -void inituvm(pde_t*, char*, uint); -int loaduvm(pde_t*, char*, struct inode*, uint, uint); -pde_t* copyuvm(pde_t*, uint); -void switchuvm(struct proc*); -void switchkvm(void); -int copyout(pde_t*, uint, void*, uint); -void clearpteu(pde_t *pgdir, char *uva); +void kvminit(void); +void kvminithart(void); +uint64 kvmpa(uint64); +void kvmmap(uint64, uint64, uint64, int); +int mappages(pagetable_t, uint64, uint64, uint64, int); +pagetable_t uvmcreate(void); +void uvminit(pagetable_t, uchar *, uint); +uint64 uvmalloc(pagetable_t, uint64, uint64); +uint64 uvmdealloc(pagetable_t, uint64, uint64); +int uvmcopy(pagetable_t, pagetable_t, uint64); +void uvmfree(pagetable_t, uint64); +void uvmunmap(pagetable_t, uint64, uint64, int); +void uvmclear(pagetable_t, uint64); +uint64 walkaddr(pagetable_t, uint64); +int copyout(pagetable_t, uint64, char *, uint64); +int copyin(pagetable_t, char *, uint64, uint64); +int copyinstr(pagetable_t, char *, uint64, uint64); + +// plic.c +void plicinit(void); +void plicinithart(void); +uint64 plic_pending(void); +int plic_claim(void); +void plic_complete(int); + +// virtio_disk.c +void virtio_disk_init(void); +void virtio_disk_rw(struct buf *, int); +void virtio_disk_intr(); // number of elements in fixed-size array #define NELEM(x) (sizeof(x)/sizeof((x)[0])) @@ -9,9 +9,9 @@ struct elfhdr { ushort type; ushort machine; uint version; - uint entry; - uint phoff; - uint shoff; + uint64 entry; + uint64 phoff; + uint64 shoff; uint flags; ushort ehsize; ushort phentsize; @@ -23,14 +23,14 @@ struct elfhdr { // Program section header struct proghdr { - uint type; - uint off; - uint vaddr; - uint paddr; - uint filesz; - uint memsz; - uint flags; - uint align; + uint32 type; + uint32 flags; + uint64 off; + uint64 vaddr; + uint64 paddr; + uint64 filesz; + uint64 memsz; + uint64 align; }; // Values for Proghdr type diff --git a/kernel/entry.S b/kernel/entry.S new file mode 100644 index 0000000..ef5a56a --- /dev/null +++ b/kernel/entry.S @@ -0,0 +1,26 @@ + # qemu -kernel starts at 0x1000. the instructions + # there seem to be provided by qemu, as if it + # were a ROM. the code at 0x1000 jumps to + # 0x8000000, the _start function here, + # in machine mode. each CPU starts here. +.section .data +.globl stack0 +.section .text +.globl start +.section .text +.globl _entry +_entry: + # set up a stack for C. + # stack0 is declared in start.c, + # with a 4096-byte stack per CPU. + # sp = stack0 + (hartid * 4096) + la sp, stack0 + li a0, 1024*4 + csrr a1, mhartid + addi a1, a1, 1 + mul a0, a0, a1 + add sp, sp, a0 + # jump to start() in start.c + call start +junk: + j junk diff --git a/kernel/exec.c b/kernel/exec.c new file mode 100644 index 0000000..74ef654 --- /dev/null +++ b/kernel/exec.c @@ -0,0 +1,153 @@ +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "spinlock.h" +#include "proc.h" +#include "defs.h" +#include "elf.h" + +static int loadseg(pde_t *pgdir, uint64 addr, struct inode *ip, uint offset, uint sz); + +int +exec(char *path, char **argv) +{ + char *s, *last; + int i, off; + uint64 argc, sz, sp, ustack[MAXARG+1], stackbase; + struct elfhdr elf; + struct inode *ip; + struct proghdr ph; + pagetable_t pagetable = 0, oldpagetable; + struct proc *p = myproc(); + + begin_op(); + + if((ip = namei(path)) == 0){ + end_op(); + return -1; + } + ilock(ip); + + // Check ELF header + if(readi(ip, 0, (uint64)&elf, 0, sizeof(elf)) != sizeof(elf)) + goto bad; + if(elf.magic != ELF_MAGIC) + goto bad; + + if((pagetable = proc_pagetable(p)) == 0) + goto bad; + + // Load program into memory. + sz = 0; + for(i=0, off=elf.phoff; i<elf.phnum; i++, off+=sizeof(ph)){ + if(readi(ip, 0, (uint64)&ph, off, sizeof(ph)) != sizeof(ph)) + goto bad; + if(ph.type != ELF_PROG_LOAD) + continue; + if(ph.memsz < ph.filesz) + goto bad; + if(ph.vaddr + ph.memsz < ph.vaddr) + goto bad; + if((sz = uvmalloc(pagetable, sz, ph.vaddr + ph.memsz)) == 0) + goto bad; + if(ph.vaddr % PGSIZE != 0) + goto bad; + if(loadseg(pagetable, ph.vaddr, ip, ph.off, ph.filesz) < 0) + goto bad; + } + iunlockput(ip); + end_op(); + ip = 0; + + p = myproc(); + uint64 oldsz = p->sz; + + // Allocate two pages at the next page boundary. + // Use the second as the user stack. + sz = PGROUNDUP(sz); + if((sz = uvmalloc(pagetable, sz, sz + 2*PGSIZE)) == 0) + goto bad; + uvmclear(pagetable, sz-2*PGSIZE); + sp = sz; + stackbase = sp - PGSIZE; + + // Push argument strings, prepare rest of stack in ustack. + for(argc = 0; argv[argc]; argc++) { + if(argc >= MAXARG) + goto bad; + sp -= strlen(argv[argc]) + 1; + sp -= sp % 16; // riscv sp must be 16-byte aligned + if(sp < stackbase) + goto bad; + if(copyout(pagetable, sp, argv[argc], strlen(argv[argc]) + 1) < 0) + goto bad; + ustack[argc] = sp; + } + ustack[argc] = 0; + + // push the array of argv[] pointers. + sp -= (argc+1) * sizeof(uint64); + sp -= sp % 16; + if(sp < stackbase) + goto bad; + if(copyout(pagetable, sp, (char *)ustack, (argc+1)*sizeof(uint64)) < 0) + goto bad; + + // arguments to user main(argc, argv) + // argc is returned via the system call return + // value, which goes in a0. + p->tf->a1 = sp; + + // Save program name for debugging. + for(last=s=path; *s; s++) + if(*s == '/') + last = s+1; + safestrcpy(p->name, last, sizeof(p->name)); + + // Commit to the user image. + oldpagetable = p->pagetable; + p->pagetable = pagetable; + p->sz = sz; + p->tf->epc = elf.entry; // initial program counter = main + p->tf->sp = sp; // initial stack pointer + proc_freepagetable(oldpagetable, oldsz); + return argc; // this ends up in a0, the first argument to main(argc, argv) + + bad: + if(pagetable) + proc_freepagetable(pagetable, sz); + if(ip){ + iunlockput(ip); + end_op(); + } + return -1; +} + +// Load a program segment into pagetable at virtual address va. +// va must be page-aligned +// and the pages from va to va+sz must already be mapped. +// Returns 0 on success, -1 on failure. +static int +loadseg(pagetable_t pagetable, uint64 va, struct inode *ip, uint offset, uint sz) +{ + uint i, n; + uint64 pa; + + if((va % PGSIZE) != 0) + panic("loadseg: va must be page aligned"); + + for(i = 0; i < sz; i += PGSIZE){ + pa = walkaddr(pagetable, va + i); + if(pa == 0) + panic("loadseg: address should exist"); + if(sz - i < PGSIZE) + n = sz - i; + else + n = PGSIZE; + if(readi(ip, 0, (uint64)pa, offset+i, n) != n) + return -1; + } + + return 0; +} @@ -1,14 +1,17 @@ // -// File descriptors +// Support functions for system calls that involve file descriptors. // #include "types.h" +#include "riscv.h" #include "defs.h" #include "param.h" #include "fs.h" #include "spinlock.h" #include "sleeplock.h" #include "file.h" +#include "stat.h" +#include "proc.h" struct devsw devsw[NDEV]; struct { @@ -70,9 +73,9 @@ fileclose(struct file *f) f->type = FD_NONE; release(&ftable.lock); - if(ff.type == FD_PIPE) + if(ff.type == FD_PIPE){ pipeclose(ff.pipe, ff.writable); - else if(ff.type == FD_INODE){ + } else if(ff.type == FD_INODE || ff.type == FD_DEVICE){ begin_op(); iput(ff.ip); end_op(); @@ -80,50 +83,65 @@ fileclose(struct file *f) } // Get metadata about file f. +// addr is a user virtual address, pointing to a struct stat. int -filestat(struct file *f, struct stat *st) +filestat(struct file *f, uint64 addr) { - if(f->type == FD_INODE){ + struct proc *p = myproc(); + struct stat st; + + if(f->type == FD_INODE || f->type == FD_DEVICE){ ilock(f->ip); - stati(f->ip, st); + stati(f->ip, &st); iunlock(f->ip); + if(copyout(p->pagetable, addr, (char *)&st, sizeof(st)) < 0) + return -1; return 0; } return -1; } // Read from file f. +// addr is a user virtual address. int -fileread(struct file *f, char *addr, int n) +fileread(struct file *f, uint64 addr, int n) { - int r; + int r = 0; if(f->readable == 0) return -1; - if(f->type == FD_PIPE) - return piperead(f->pipe, addr, n); - if(f->type == FD_INODE){ + + if(f->type == FD_PIPE){ + r = piperead(f->pipe, addr, n); + } else if(f->type == FD_DEVICE){ + r = devsw[f->major].read(1, addr, n); + } else if(f->type == FD_INODE){ ilock(f->ip); - if((r = readi(f->ip, addr, f->off, n)) > 0) + if((r = readi(f->ip, 1, addr, f->off, n)) > 0) f->off += r; iunlock(f->ip); - return r; + } else { + panic("fileread"); } - panic("fileread"); + + return r; } -//PAGEBREAK! // Write to file f. +// addr is a user virtual address. int -filewrite(struct file *f, char *addr, int n) +filewrite(struct file *f, uint64 addr, int n) { - int r; + int r, ret = 0; if(f->writable == 0) return -1; - if(f->type == FD_PIPE) - return pipewrite(f->pipe, addr, n); - if(f->type == FD_INODE){ + + if(f->type == FD_PIPE){ + ret = pipewrite(f->pipe, addr, n); + } else if(f->type == FD_DEVICE){ + ret = devsw[f->major].write(1, addr, n); + } else if(f->type == FD_INODE){ // write a few blocks at a time to avoid exceeding // the maximum log transaction size, including // i-node, indirect block, allocation blocks, @@ -139,7 +157,7 @@ filewrite(struct file *f, char *addr, int n) begin_op(); ilock(f->ip); - if ((r = writei(f->ip, addr + i, f->off, n1)) > 0) + if ((r = writei(f->ip, 1, addr + i, f->off, n1)) > 0) f->off += r; iunlock(f->ip); end_op(); @@ -150,8 +168,11 @@ filewrite(struct file *f, char *addr, int n) panic("short filewrite"); i += r; } - return i == n ? n : -1; + ret = (i == n ? n : -1); + } else { + panic("filewrite"); } - panic("filewrite"); + + return ret; } @@ -1,11 +1,12 @@ struct file { - enum { FD_NONE, FD_PIPE, FD_INODE } type; + enum { FD_NONE, FD_PIPE, FD_INODE, FD_DEVICE } type; int ref; // reference count char readable; char writable; - struct pipe *pipe; - struct inode *ip; - uint off; + struct pipe *pipe; // FD_PIPE + struct inode *ip; // FD_INODE and FD_DEVICE + uint off; // FD_INODE + short major; // FD_DEVICE }; @@ -25,11 +26,10 @@ struct inode { uint addrs[NDIRECT+1]; }; -// table mapping major device number to -// device functions +// map major device number to device functions. struct devsw { - int (*read)(struct inode*, char*, int); - int (*write)(struct inode*, char*, int); + int (*read)(int, uint64, int); + int (*write)(int, uint64, int); }; extern struct devsw devsw[]; @@ -10,12 +10,12 @@ // are in sysfile.c. #include "types.h" +#include "riscv.h" #include "defs.h" #include "param.h" #include "stat.h" -#include "mmu.h" -#include "proc.h" #include "spinlock.h" +#include "proc.h" #include "sleeplock.h" #include "fs.h" #include "buf.h" @@ -28,7 +28,7 @@ static void itrunc(struct inode*); struct superblock sb; // Read the super block. -void +static void readsb(int dev, struct superblock *sb) { struct buf *bp; @@ -38,6 +38,15 @@ readsb(int dev, struct superblock *sb) brelse(bp); } +// Init fs +void +fsinit(int dev) { + readsb(dev, &sb); + if(sb.magic != FSMAGIC) + panic("invalid file system"); + initlog(dev, &sb); +} + // Zero a block. static void bzero(int dev, int bno) @@ -170,7 +179,7 @@ struct { } icache; void -iinit(int dev) +iinit() { int i = 0; @@ -178,17 +187,10 @@ iinit(int dev) for(i = 0; i < NINODE; i++) { initsleeplock(&icache.inode[i].lock, "inode"); } - - readsb(dev, &sb); - cprintf("sb: size %d nblocks %d ninodes %d nlog %d logstart %d\ - inodestart %d bmap start %d\n", sb.size, sb.nblocks, - sb.ninodes, sb.nlog, sb.logstart, sb.inodestart, - sb.bmapstart); } static struct inode* iget(uint dev, uint inum); -//PAGEBREAK! // Allocate an inode on device dev. // Mark it as allocated by giving it type type. // Returns an unlocked but allocated and referenced inode. @@ -332,22 +334,27 @@ iunlock(struct inode *ip) void iput(struct inode *ip) { - acquiresleep(&ip->lock); - if(ip->valid && ip->nlink == 0){ - acquire(&icache.lock); - int r = ip->ref; + acquire(&icache.lock); + + if(ip->ref == 1 && ip->valid && ip->nlink == 0){ + // inode has no links and no other references: truncate and free. + + // ip->ref == 1 means no other process can have ip locked, + // so this acquiresleep() won't block (or deadlock). + acquiresleep(&ip->lock); + release(&icache.lock); - if(r == 1){ - // inode has no links and no other references: truncate and free. - itrunc(ip); - ip->type = 0; - iupdate(ip); - ip->valid = 0; - } + + itrunc(ip); + ip->type = 0; + iupdate(ip); + ip->valid = 0; + + releasesleep(&ip->lock); + + acquire(&icache.lock); } - releasesleep(&ip->lock); - acquire(&icache.lock); ip->ref--; release(&icache.lock); } @@ -360,7 +367,6 @@ iunlockput(struct inode *ip) iput(ip); } -//PAGEBREAK! // Inode content // // The content (data) associated with each inode is stored @@ -447,21 +453,16 @@ stati(struct inode *ip, struct stat *st) st->size = ip->size; } -//PAGEBREAK! // Read data from inode. // Caller must hold ip->lock. +// If user_dst==1, then dst is a user virtual address; +// otherwise, dst is a kernel address. int -readi(struct inode *ip, char *dst, uint off, uint n) +readi(struct inode *ip, int user_dst, uint64 dst, uint off, uint n) { uint tot, m; struct buf *bp; - if(ip->type == T_DEV){ - if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].read) - return -1; - return devsw[ip->major].read(ip, dst, n); - } - if(off > ip->size || off + n < off) return -1; if(off + n > ip->size) @@ -470,27 +471,23 @@ readi(struct inode *ip, char *dst, uint off, uint n) for(tot=0; tot<n; tot+=m, off+=m, dst+=m){ bp = bread(ip->dev, bmap(ip, off/BSIZE)); m = min(n - tot, BSIZE - off%BSIZE); - memmove(dst, bp->data + off%BSIZE, m); + if(either_copyout(user_dst, dst, bp->data + (off % BSIZE), m) == -1) + break; brelse(bp); } return n; } -// PAGEBREAK! // Write data to inode. // Caller must hold ip->lock. +// If user_src==1, then src is a user virtual address; +// otherwise, src is a kernel address. int -writei(struct inode *ip, char *src, uint off, uint n) +writei(struct inode *ip, int user_src, uint64 src, uint off, uint n) { uint tot, m; struct buf *bp; - if(ip->type == T_DEV){ - if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].write) - return -1; - return devsw[ip->major].write(ip, src, n); - } - if(off > ip->size || off + n < off) return -1; if(off + n > MAXFILE*BSIZE) @@ -499,7 +496,8 @@ writei(struct inode *ip, char *src, uint off, uint n) for(tot=0; tot<n; tot+=m, off+=m, src+=m){ bp = bread(ip->dev, bmap(ip, off/BSIZE)); m = min(n - tot, BSIZE - off%BSIZE); - memmove(bp->data + off%BSIZE, src, m); + if(either_copyin(bp->data + (off % BSIZE), user_src, src, m) == -1) + break; log_write(bp); brelse(bp); } @@ -511,7 +509,6 @@ writei(struct inode *ip, char *src, uint off, uint n) return n; } -//PAGEBREAK! // Directories int @@ -532,7 +529,7 @@ dirlookup(struct inode *dp, char *name, uint *poff) panic("dirlookup not DIR"); for(off = 0; off < dp->size; off += sizeof(de)){ - if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de)) + if(readi(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de)) panic("dirlookup read"); if(de.inum == 0) continue; @@ -564,7 +561,7 @@ dirlink(struct inode *dp, char *name, uint inum) // Look for an empty dirent. for(off = 0; off < dp->size; off += sizeof(de)){ - if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de)) + if(readi(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de)) panic("dirlink read"); if(de.inum == 0) break; @@ -572,13 +569,12 @@ dirlink(struct inode *dp, char *name, uint inum) strncpy(de.name, name, DIRSIZ); de.inum = inum; - if(writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de)) + if(writei(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de)) panic("dirlink"); return 0; } -//PAGEBREAK! // Paths // Copy the next path element from path into name. @@ -2,8 +2,8 @@ // Both the kernel and user programs use this header file. -#define ROOTINO 1 // root i-number -#define BSIZE 512 // block size +#define ROOTINO 1 // root i-number +#define BSIZE 1024 // block size // Disk layout: // [ boot block | super block | log | inode blocks | @@ -12,6 +12,7 @@ // mkfs computes the super block and builds an initial file system. The // super block describes the disk layout: struct superblock { + uint magic; // Must be FSMAGIC uint size; // Size of file system image (blocks) uint nblocks; // Number of data blocks uint ninodes; // Number of inodes. @@ -21,6 +22,8 @@ struct superblock { uint bmapstart; // Block number of first free map block }; +#define FSMAGIC 0x10203040 + #define NDIRECT 12 #define NINDIRECT (BSIZE / sizeof(uint)) #define MAXFILE (NDIRECT + NINDIRECT) @@ -28,8 +31,8 @@ struct superblock { // On-disk inode structure struct dinode { short type; // File type - short major; // Major device number (T_DEV only) - short minor; // Minor device number (T_DEV only) + short major; // Major device number (T_DEVICE only) + short minor; // Minor device number (T_DEVICE only) short nlink; // Number of links to inode in file system uint size; // Size of file (bytes) uint addrs[NDIRECT+1]; // Data block addresses @@ -45,7 +48,7 @@ struct dinode { #define BPB (BSIZE*8) // Block of free map containing bit for block b -#define BBLOCK(b, sb) (b/BPB + sb.bmapstart) +#define BBLOCK(b, sb) ((b)/BPB + sb.bmapstart) // Directory is a file containing a sequence of dirent structures. #define DIRSIZ 14 diff --git a/kernel/kalloc.c b/kernel/kalloc.c new file mode 100644 index 0000000..ae3863b --- /dev/null +++ b/kernel/kalloc.c @@ -0,0 +1,83 @@ +// Physical memory allocator, for user processes, +// kernel stacks, page-table pages, +// and pipe buffers. Allocates whole 4096-byte pages. + +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "spinlock.h" +#include "riscv.h" +#include "defs.h" + +void freerange(void *pa_start, void *pa_end); + +extern char end[]; // first address after kernel. + // defined by kernel.ld. + +struct run { + struct run *next; +}; + +struct { + struct spinlock lock; + struct run *freelist; +} kmem; + +void +kinit() +{ + initlock(&kmem.lock, "kmem"); + freerange(end, (void*)PHYSTOP); +} + +void +freerange(void *pa_start, void *pa_end) +{ + char *p; + p = (char*)PGROUNDUP((uint64)pa_start); + p += 4096; // XXX I can't get kernel.ld to place end beyond the last bss symbol. + for(; p + PGSIZE <= (char*)pa_end; p += PGSIZE) + kfree(p); +} + +// Free the page of physical memory pointed at by v, +// which normally should have been returned by a +// call to kalloc(). (The exception is when +// initializing the allocator; see kinit above.) +void +kfree(void *pa) +{ + struct run *r; + + if(((uint64)pa % PGSIZE) != 0 || (char*)pa < end || (uint64)pa >= PHYSTOP) + panic("kfree"); + + // Fill with junk to catch dangling refs. + memset(pa, 1, PGSIZE); + + r = (struct run*)pa; + + acquire(&kmem.lock); + r->next = kmem.freelist; + kmem.freelist = r; + release(&kmem.lock); +} + +// Allocate one 4096-byte page of physical memory. +// Returns a pointer that the kernel can use. +// Returns 0 if the memory cannot be allocated. +void * +kalloc(void) +{ + struct run *r; + + acquire(&kmem.lock); + r = kmem.freelist; + if(r) + kmem.freelist = r->next; + release(&kmem.lock); + + if(r) + memset((char*)r, 5, PGSIZE); // fill with junk + return (void*)r; +} diff --git a/kernel/kernel.ld b/kernel/kernel.ld new file mode 100644 index 0000000..0b5e76b --- /dev/null +++ b/kernel/kernel.ld @@ -0,0 +1,32 @@ +OUTPUT_ARCH( "riscv" ) +ENTRY( _entry ) + +SECTIONS +{ + /* + * ensure that entry.S / _entry is at 0x80000000, + * where qemu's -kernel jumps. + */ + . = 0x80000000; + .text : + { + *(.text) + . = ALIGN(0x1000); + *(trampsec) + } + + . = ALIGN(0x1000); + PROVIDE(etext = .); + + /* + * make sure end is after data and bss. + */ + .data : { + *(.data) + } + bss : { + *(.bss) + PROVIDE(end = .); + } + +} diff --git a/kernel/kernelvec.S b/kernel/kernelvec.S new file mode 100644 index 0000000..3e9d3e9 --- /dev/null +++ b/kernel/kernelvec.S @@ -0,0 +1,121 @@ + # + # interrupts and exceptions while in supervisor + # mode come here. + # + # push all registers, call kerneltrap(), restore, return. + # +.globl kerneltrap +.globl kernelvec +.align 4 +kernelvec: + // make room to save registers. + addi sp, sp, -256 + + // save the registers. + sd ra, 0(sp) + sd sp, 8(sp) + sd gp, 16(sp) + sd tp, 24(sp) + sd t0, 32(sp) + sd t1, 40(sp) + sd t2, 48(sp) + sd s0, 56(sp) + sd s1, 64(sp) + sd a0, 72(sp) + sd a1, 80(sp) + sd a2, 88(sp) + sd a3, 96(sp) + sd a4, 104(sp) + sd a5, 112(sp) + sd a6, 120(sp) + sd a7, 128(sp) + sd s2, 136(sp) + sd s3, 144(sp) + sd s4, 152(sp) + sd s5, 160(sp) + sd s6, 168(sp) + sd s7, 176(sp) + sd s8, 184(sp) + sd s9, 192(sp) + sd s10, 200(sp) + sd s11, 208(sp) + sd t3, 216(sp) + sd t4, 224(sp) + sd t5, 232(sp) + sd t6, 240(sp) + + // call the C trap handler in trap.c + call kerneltrap + + // restore registers. + ld ra, 0(sp) + ld sp, 8(sp) + ld gp, 16(sp) + // not this, in case we moved CPUs: ld tp, 24(sp) + ld t0, 32(sp) + ld t1, 40(sp) + ld t2, 48(sp) + ld s0, 56(sp) + ld s1, 64(sp) + ld a0, 72(sp) + ld a1, 80(sp) + ld a2, 88(sp) + ld a3, 96(sp) + ld a4, 104(sp) + ld a5, 112(sp) + ld a6, 120(sp) + ld a7, 128(sp) + ld s2, 136(sp) + ld s3, 144(sp) + ld s4, 152(sp) + ld s5, 160(sp) + ld s6, 168(sp) + ld s7, 176(sp) + ld s8, 184(sp) + ld s9, 192(sp) + ld s10, 200(sp) + ld s11, 208(sp) + ld t3, 216(sp) + ld t4, 224(sp) + ld t5, 232(sp) + ld t6, 240(sp) + + addi sp, sp, 256 + + // return to whatever we were doing in the kernel. + sret + + # + # machine-mode timer interrupt. + # +.globl timervec +.align 4 +timervec: + # start.c has set up the memory that mscratch points to: + # scratch[0,8,16] : register save area. + # scratch[32] : address of CLINT's MTIMECMP register. + # scratch[40] : desired interval between interrupts. + + csrrw a0, mscratch, a0 + sd a1, 0(a0) + sd a2, 8(a0) + sd a3, 16(a0) + + # schedule the next timer interrupt + # by adding interval to mtimecmp. + ld a1, 32(a0) # CLINT_MTIMECMP(hart) + ld a2, 40(a0) # interval + ld a3, 0(a1) + add a3, a3, a2 + sd a3, 0(a1) + + # raise a supervisor software interrupt. + li a1, 2 + csrw sip, a1 + + ld a3, 16(a0) + ld a2, 8(a0) + ld a1, 0(a0) + csrrw a0, mscratch, a0 + + mret @@ -1,4 +1,5 @@ #include "types.h" +#include "riscv.h" #include "defs.h" #include "param.h" #include "spinlock.h" @@ -51,16 +52,14 @@ static void recover_from_log(void); static void commit(); void -initlog(int dev) +initlog(int dev, struct superblock *sb) { if (sizeof(struct logheader) >= BSIZE) panic("initlog: too big logheader"); - struct superblock sb; initlock(&log.lock, "log"); - readsb(dev, &sb); - log.start = sb.logstart; - log.size = sb.nlog; + log.start = sb->logstart; + log.size = sb->nlog; log.dev = dev; recover_from_log(); } @@ -76,6 +75,7 @@ install_trans(void) struct buf *dbuf = bread(log.dev, log.lh.block[tail]); // read dst memmove(dbuf->data, lbuf->data, BSIZE); // copy block to dst bwrite(dbuf); // write dst to disk + bunpin(dbuf); brelse(lbuf); brelse(dbuf); } @@ -202,7 +202,7 @@ commit() } // Caller has modified b->data and is done with the buffer. -// Record the block number and pin in the cache with B_DIRTY. +// Record the block number and pin in the cache by increasing refcnt. // commit()/write_log() will do the disk write. // // log_write() replaces bwrite(); a typical use is: @@ -226,9 +226,10 @@ log_write(struct buf *b) break; } log.lh.block[i] = b->blockno; - if (i == log.lh.n) + if (i == log.lh.n) { // Add new block to log? + bpin(b); log.lh.n++; - b->flags |= B_DIRTY; // prevent eviction + } release(&log.lock); } diff --git a/kernel/main.c b/kernel/main.c new file mode 100644 index 0000000..a936fd3 --- /dev/null +++ b/kernel/main.c @@ -0,0 +1,43 @@ +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "defs.h" + +volatile static int started = 0; + +// start() jumps here in supervisor mode on all CPUs. +void +main() +{ + if(cpuid() == 0){ + consoleinit(); + printfinit(); + printf("hart %d starting\n", cpuid()); + kinit(); // physical page allocator + kvminit(); // create kernel page table + kvminithart(); // turn on paging + procinit(); // process table + trapinit(); // trap vectors + trapinithart(); // install kernel trap vector + plicinit(); // set up interrupt controller + plicinithart(); // ask PLIC for device interrupts + binit(); // buffer cache + iinit(); // inode cache + fileinit(); // file table + virtio_disk_init(); // emulated hard disk + userinit(); // first user process + __sync_synchronize(); + started = 1; + } else { + while(started == 0) + ; + __sync_synchronize(); + printf("hart %d starting\n", cpuid()); + kvminithart(); // turn on paging + trapinithart(); // install kernel trap vector + plicinithart(); // ask PLIC for device interrupts + } + + scheduler(); +} diff --git a/kernel/memlayout.h b/kernel/memlayout.h new file mode 100644 index 0000000..8ffd538 --- /dev/null +++ b/kernel/memlayout.h @@ -0,0 +1,67 @@ +// Physical memory layout + +// qemu -machine virt is set up like this, +// based on qemu's hw/riscv/virt.c: +// +// 00001000 -- boot ROM, provided by qemu +// 02000000 -- CLINT +// 0C000000 -- PLIC +// 10000000 -- uart0 +// 10001000 -- virtio disk +// 80000000 -- boot ROM jumps here in machine mode +// -kernel loads the kernel here +// unused RAM after 80000000. + +// the kernel uses physical memory thus: +// 80000000 -- entry.S, then kernel text and data +// end -- start of kernel page allocation area +// PHYSTOP -- end RAM used by the kernel + +// qemu puts UART registers here in physical memory. +#define UART0 0x10000000L +#define UART0_IRQ 10 + +// virtio mmio interface +#define VIRTIO0 0x10001000 +#define VIRTIO0_IRQ 1 + +// local interrupt controller, which contains the timer. +#define CLINT 0x2000000L +#define CLINT_MTIMECMP(hartid) (CLINT + 0x4000 + 8*(hartid)) +#define CLINT_MTIME (CLINT + 0xBFF8) // cycles since boot. + +// qemu puts programmable interrupt controller here. +#define PLIC 0x0c000000L +#define PLIC_PRIORITY (PLIC + 0x0) +#define PLIC_PENDING (PLIC + 0x1000) +#define PLIC_MENABLE(hart) (PLIC + 0x2000 + (hart)*0x100) +#define PLIC_SENABLE(hart) (PLIC + 0x2080 + (hart)*0x100) +#define PLIC_MPRIORITY(hart) (PLIC + 0x200000 + (hart)*0x2000) +#define PLIC_SPRIORITY(hart) (PLIC + 0x201000 + (hart)*0x2000) +#define PLIC_MCLAIM(hart) (PLIC + 0x200004 + (hart)*0x2000) +#define PLIC_SCLAIM(hart) (PLIC + 0x201004 + (hart)*0x2000) + +// the kernel expects there to be RAM +// for use by the kernel and user pages +// from physical address 0x80000000 to PHYSTOP. +#define KERNBASE 0x80000000L +#define PHYSTOP (KERNBASE + 128*1024*1024) + +// map the trampoline page to the highest address, +// in both user and kernel space. +#define TRAMPOLINE (MAXVA - PGSIZE) + +// map kernel stacks beneath the trampoline, +// each surrounded by invalid guard pages. +#define KSTACK(p) (TRAMPOLINE - ((p)+1)* 2*PGSIZE) + +// User memory layout. +// Address zero first: +// text +// original data and bss +// fixed-size stack +// expandable heap +// ... +// TRAPFRAME (p->tf, used by the trampoline) +// TRAMPOLINE (the same page as in the kernel) +#define TRAPFRAME (TRAMPOLINE - PGSIZE) @@ -1,5 +1,4 @@ #define NPROC 64 // maximum number of processes -#define KSTACKSIZE 4096 // size of per-process kernel stack #define NCPU 8 // maximum number of CPUs #define NOFILE 16 // open files per process #define NFILE 100 // open files per system @@ -11,4 +10,4 @@ #define LOGSIZE (MAXOPBLOCKS*3) // max data blocks in on-disk log #define NBUF (MAXOPBLOCKS*3) // size of disk block cache #define FSSIZE 1000 // size of file system in blocks - +#define MAXPATH 128 // maximum file path name diff --git a/kernel/pipe.c b/kernel/pipe.c new file mode 100644 index 0000000..c3a8acf --- /dev/null +++ b/kernel/pipe.c @@ -0,0 +1,127 @@ +#include "types.h" +#include "riscv.h" +#include "defs.h" +#include "param.h" +#include "spinlock.h" +#include "proc.h" +#include "fs.h" +#include "sleeplock.h" +#include "file.h" + +#define PIPESIZE 512 + +struct pipe { + struct spinlock lock; + char data[PIPESIZE]; + uint nread; // number of bytes read + uint nwrite; // number of bytes written + int readopen; // read fd is still open + int writeopen; // write fd is still open +}; + +int +pipealloc(struct file **f0, struct file **f1) +{ + struct pipe *pi; + + pi = 0; + *f0 = *f1 = 0; + if((*f0 = filealloc()) == 0 || (*f1 = filealloc()) == 0) + goto bad; + if((pi = (struct pipe*)kalloc()) == 0) + goto bad; + pi->readopen = 1; + pi->writeopen = 1; + pi->nwrite = 0; + pi->nread = 0; + initlock(&pi->lock, "pipe"); + (*f0)->type = FD_PIPE; + (*f0)->readable = 1; + (*f0)->writable = 0; + (*f0)->pipe = pi; + (*f1)->type = FD_PIPE; + (*f1)->readable = 0; + (*f1)->writable = 1; + (*f1)->pipe = pi; + return 0; + + bad: + if(pi) + kfree((char*)pi); + if(*f0) + fileclose(*f0); + if(*f1) + fileclose(*f1); + return -1; +} + +void +pipeclose(struct pipe *pi, int writable) +{ + acquire(&pi->lock); + if(writable){ + pi->writeopen = 0; + wakeup(&pi->nread); + } else { + pi->readopen = 0; + wakeup(&pi->nwrite); + } + if(pi->readopen == 0 && pi->writeopen == 0){ + release(&pi->lock); + kfree((char*)pi); + } else + release(&pi->lock); +} + +int +pipewrite(struct pipe *pi, uint64 addr, int n) +{ + int i; + char ch; + struct proc *pr = myproc(); + + acquire(&pi->lock); + for(i = 0; i < n; i++){ + while(pi->nwrite == pi->nread + PIPESIZE){ //DOC: pipewrite-full + if(pi->readopen == 0 || myproc()->killed){ + release(&pi->lock); + return -1; + } + wakeup(&pi->nread); + sleep(&pi->nwrite, &pi->lock); //DOC: pipewrite-sleep + } + if(copyin(pr->pagetable, &ch, addr + i, 1) == -1) + break; + pi->data[pi->nwrite++ % PIPESIZE] = ch; + } + wakeup(&pi->nread); //DOC: pipewrite-wakeup1 + release(&pi->lock); + return n; +} + +int +piperead(struct pipe *pi, uint64 addr, int n) +{ + int i; + struct proc *pr = myproc(); + char ch; + + acquire(&pi->lock); + while(pi->nread == pi->nwrite && pi->writeopen){ //DOC: pipe-empty + if(myproc()->killed){ + release(&pi->lock); + return -1; + } + sleep(&pi->nread, &pi->lock); //DOC: piperead-sleep + } + for(i = 0; i < n; i++){ //DOC: piperead-copy + if(pi->nread == pi->nwrite) + break; + ch = pi->data[pi->nread++ % PIPESIZE]; + if(copyout(pr->pagetable, addr + i, &ch, 1) == -1) + break; + } + wakeup(&pi->nwrite); //DOC: piperead-wakeup + release(&pi->lock); + return i; +} diff --git a/kernel/plic.c b/kernel/plic.c new file mode 100644 index 0000000..b569492 --- /dev/null +++ b/kernel/plic.c @@ -0,0 +1,62 @@ +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "defs.h" + +// +// the riscv Platform Level Interrupt Controller (PLIC). +// + +void +plicinit(void) +{ + // set desired IRQ priorities non-zero (otherwise disabled). + *(uint32*)(PLIC + UART0_IRQ*4) = 1; + *(uint32*)(PLIC + VIRTIO0_IRQ*4) = 1; +} + +void +plicinithart(void) +{ + int hart = cpuid(); + + // set uart's enable bit for this hart's S-mode. + *(uint32*)PLIC_SENABLE(hart)= (1 << UART0_IRQ) | (1 << VIRTIO0_IRQ); + + // set this hart's S-mode priority threshold to 0. + *(uint32*)PLIC_SPRIORITY(hart) = 0; +} + +// return a bitmap of which IRQs are waiting +// to be served. +uint64 +plic_pending(void) +{ + uint64 mask; + + //mask = *(uint32*)(PLIC + 0x1000); + //mask |= (uint64)*(uint32*)(PLIC + 0x1004) << 32; + mask = *(uint64*)PLIC_PENDING; + + return mask; +} + +// ask the PLIC what interrupt we should serve. +int +plic_claim(void) +{ + int hart = cpuid(); + //int irq = *(uint32*)(PLIC + 0x201004); + int irq = *(uint32*)PLIC_SCLAIM(hart); + return irq; +} + +// tell the PLIC we've served this IRQ. +void +plic_complete(int irq) +{ + int hart = cpuid(); + //*(uint32*)(PLIC + 0x201004) = irq; + *(uint32*)PLIC_SCLAIM(hart) = irq; +} diff --git a/kernel/printf.c b/kernel/printf.c new file mode 100644 index 0000000..777cc5f --- /dev/null +++ b/kernel/printf.c @@ -0,0 +1,134 @@ +// +// formatted console output -- printf, panic. +// + +#include <stdarg.h> + +#include "types.h" +#include "param.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "fs.h" +#include "file.h" +#include "memlayout.h" +#include "riscv.h" +#include "defs.h" +#include "proc.h" + +volatile int panicked = 0; + +// lock to avoid interleaving concurrent printf's. +static struct { + struct spinlock lock; + int locking; +} pr; + +static char digits[] = "0123456789abcdef"; + +static void +printint(int xx, int base, int sign) +{ + char buf[16]; + int i; + uint x; + + if(sign && (sign = xx < 0)) + x = -xx; + else + x = xx; + + i = 0; + do { + buf[i++] = digits[x % base]; + } while((x /= base) != 0); + + if(sign) + buf[i++] = '-'; + + while(--i >= 0) + consputc(buf[i]); +} + +static void +printptr(uint64 x) +{ + int i; + consputc('0'); + consputc('x'); + for (i = 0; i < (sizeof(uint64) * 2); i++, x <<= 4) + consputc(digits[x >> (sizeof(uint64) * 8 - 4)]); +} + +// Print to the console. only understands %d, %x, %p, %s. +void +printf(char *fmt, ...) +{ + va_list ap; + int i, c, locking; + char *s; + + locking = pr.locking; + if(locking) + acquire(&pr.lock); + + if (fmt == 0) + panic("null fmt"); + + va_start(ap, fmt); + for(i = 0; (c = fmt[i] & 0xff) != 0; i++){ + if(c != '%'){ + consputc(c); + continue; + } + c = fmt[++i] & 0xff; + if(c == 0) + break; + switch(c){ + case 'd': + printint(va_arg(ap, int), 10, 1); + break; + case 'x': + printint(va_arg(ap, int), 16, 1); + break; + case 'p': + printptr(va_arg(ap, uint64)); + break; + case 's': + if((s = va_arg(ap, char*)) == 0) + s = "(null)"; + for(; *s; s++) + consputc(*s); + break; + case '%': + consputc('%'); + break; + default: + // Print unknown % sequence to draw attention. + consputc('%'); + consputc(c); + break; + } + } + + if(locking) + release(&pr.lock); +} + +void +panic(char *s) +{ + pr.locking = 0; + printf("panic: "); + printf(s); + printf("\n"); + panicked = 1; // freeze other CPUs + for(;;) + ; +} + +void +printfinit(void) +{ + initlock(&pr.lock, "pr"); + pr.locking = 1; +} diff --git a/kernel/proc.c b/kernel/proc.c new file mode 100644 index 0000000..3d65b46 --- /dev/null +++ b/kernel/proc.c @@ -0,0 +1,647 @@ +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "spinlock.h" +#include "proc.h" +#include "defs.h" + +struct cpu cpus[NCPU]; + +struct proc proc[NPROC]; + +struct proc *initproc; + +int nextpid = 1; +struct spinlock pid_lock; + +extern void forkret(void); +static void wakeup1(struct proc *chan); + +extern char trampoline[]; // trampoline.S + +void +procinit(void) +{ + struct proc *p; + + initlock(&pid_lock, "nextpid"); + for(p = proc; p < &proc[NPROC]; p++) { + initlock(&p->lock, "proc"); + + // Allocate a page for the process's kernel stack. + // Map it high in memory, followed by an invalid + // guard page. + char *pa = kalloc(); + if(pa == 0) + panic("kalloc"); + uint64 va = KSTACK((int) (p - proc)); + kvmmap(va, (uint64)pa, PGSIZE, PTE_R | PTE_W); + p->kstack = va; + } + kvminithart(); +} + +// Must be called with interrupts disabled, +// to prevent race with process being moved +// to a different CPU. +int +cpuid() +{ + int id = r_tp(); + return id; +} + +// Return this CPU's cpu struct. +// Interrupts must be disabled. +struct cpu* +mycpu(void) { + int id = cpuid(); + struct cpu *c = &cpus[id]; + return c; +} + +// Return the current struct proc *, or zero if none. +struct proc* +myproc(void) { + push_off(); + struct cpu *c = mycpu(); + struct proc *p = c->proc; + pop_off(); + return p; +} + +int +allocpid() { + int pid; + + acquire(&pid_lock); + pid = nextpid; + nextpid = nextpid + 1; + release(&pid_lock); + + return pid; +} + +// Look in the process table for an UNUSED proc. +// If found, initialize state required to run in the kernel, +// and return with p->lock held. +// If there are no free procs, return 0. +static struct proc* +allocproc(void) +{ + struct proc *p; + + for(p = proc; p < &proc[NPROC]; p++) { + acquire(&p->lock); + if(p->state == UNUSED) { + goto found; + } else { + release(&p->lock); + } + } + return 0; + +found: + p->pid = allocpid(); + + // Allocate a trapframe page. + if((p->tf = (struct trapframe *)kalloc()) == 0){ + release(&p->lock); + return 0; + } + + // An empty user page table. + p->pagetable = proc_pagetable(p); + + // Set up new context to start executing at forkret, + // which returns to user space. + memset(&p->context, 0, sizeof p->context); + p->context.ra = (uint64)forkret; + p->context.sp = p->kstack + PGSIZE; + + return p; +} + +// free a proc structure and the data hanging from it, +// including user pages. +// p->lock must be held. +static void +freeproc(struct proc *p) +{ + if(p->tf) + kfree((void*)p->tf); + p->tf = 0; + if(p->pagetable) + proc_freepagetable(p->pagetable, p->sz); + p->pagetable = 0; + p->sz = 0; + p->pid = 0; + p->parent = 0; + p->name[0] = 0; + p->chan = 0; + p->killed = 0; + p->state = UNUSED; +} + +// Create a page table for a given process, +// with no user pages, but with trampoline pages. +pagetable_t +proc_pagetable(struct proc *p) +{ + pagetable_t pagetable; + + // An empty page table. + pagetable = uvmcreate(); + + // map the trampoline code (for system call return) + // at the highest user virtual address. + // only the supervisor uses it, on the way + // to/from user space, so not PTE_U. + mappages(pagetable, TRAMPOLINE, PGSIZE, + (uint64)trampoline, PTE_R | PTE_X); + + // map the trapframe just below TRAMPOLINE, for trampoline.S. + mappages(pagetable, TRAPFRAME, PGSIZE, + (uint64)(p->tf), PTE_R | PTE_W); + + return pagetable; +} + +// Free a process's page table, and free the +// physical memory it refers to. +void +proc_freepagetable(pagetable_t pagetable, uint64 sz) +{ + uvmunmap(pagetable, TRAMPOLINE, PGSIZE, 0); + uvmunmap(pagetable, TRAPFRAME, PGSIZE, 0); + if(sz > 0) + uvmfree(pagetable, sz); +} + +// a user program that calls exec("/init") +// od -t xC initcode +uchar initcode[] = { + 0x17, 0x05, 0x00, 0x00, 0x13, 0x05, 0x05, 0x02, + 0x97, 0x05, 0x00, 0x00, 0x93, 0x85, 0x05, 0x02, + 0x9d, 0x48, 0x73, 0x00, 0x00, 0x00, 0x89, 0x48, + 0x73, 0x00, 0x00, 0x00, 0xef, 0xf0, 0xbf, 0xff, + 0x2f, 0x69, 0x6e, 0x69, 0x74, 0x00, 0x00, 0x01, + 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00 +}; + +// Set up first user process. +void +userinit(void) +{ + struct proc *p; + + p = allocproc(); + initproc = p; + + // allocate one user page and copy init's instructions + // and data into it. + uvminit(p->pagetable, initcode, sizeof(initcode)); + p->sz = PGSIZE; + + // prepare for the very first "return" from kernel to user. + p->tf->epc = 0; // user program counter + p->tf->sp = PGSIZE; // user stack pointer + + safestrcpy(p->name, "initcode", sizeof(p->name)); + p->cwd = namei("/"); + + p->state = RUNNABLE; + + release(&p->lock); +} + +// Grow or shrink user memory by n bytes. +// Return 0 on success, -1 on failure. +int +growproc(int n) +{ + uint sz; + struct proc *p = myproc(); + + sz = p->sz; + if(n > 0){ + if((sz = uvmalloc(p->pagetable, sz, sz + n)) == 0) { + return -1; + } + } else if(n < 0){ + if((sz = uvmdealloc(p->pagetable, sz, sz + n)) == 0) { + return -1; + } + } + p->sz = sz; + return 0; +} + +// Create a new process, copying the parent. +// Sets up child kernel stack to return as if from fork() system call. +int +fork(void) +{ + int i, pid; + struct proc *np; + struct proc *p = myproc(); + + // Allocate process. + if((np = allocproc()) == 0){ + return -1; + } + + // Copy user memory from parent to child. + if(uvmcopy(p->pagetable, np->pagetable, p->sz) < 0){ + freeproc(np); + release(&np->lock); + return -1; + } + np->sz = p->sz; + + np->parent = p; + + // copy saved user registers. + *(np->tf) = *(p->tf); + + // Cause fork to return 0 in the child. + np->tf->a0 = 0; + + // increment reference counts on open file descriptors. + for(i = 0; i < NOFILE; i++) + if(p->ofile[i]) + np->ofile[i] = filedup(p->ofile[i]); + np->cwd = idup(p->cwd); + + safestrcpy(np->name, p->name, sizeof(p->name)); + + pid = np->pid; + + np->state = RUNNABLE; + + release(&np->lock); + + return pid; +} + +// Pass p's abandoned children to init. +// Caller must hold p->lock and parent->lock. +void +reparent(struct proc *p, struct proc *parent) { + struct proc *pp; + int child_of_init = (p->parent == initproc); + + for(pp = proc; pp < &proc[NPROC]; pp++){ + // this code uses pp->parent without holding pp->lock. + // acquiring the lock first could cause a deadlock + // if pp or a child of pp were also in exit() + // and about to try to lock p. + if(pp->parent == p){ + // pp->parent can't change between the check and the acquire() + // because only the parent changes it, and we're the parent. + acquire(&pp->lock); + pp->parent = initproc; + if(pp->state == ZOMBIE) { + if(!child_of_init) + acquire(&initproc->lock); + wakeup1(initproc); + if(!child_of_init) + release(&initproc->lock); + } + release(&pp->lock); + } + } +} + +// Exit the current process. Does not return. +// An exited process remains in the zombie state +// until its parent calls wait(). +void +exit(void) +{ + struct proc *p = myproc(); + + if(p == initproc) + panic("init exiting"); + + // Close all open files. + for(int fd = 0; fd < NOFILE; fd++){ + if(p->ofile[fd]){ + struct file *f = p->ofile[fd]; + fileclose(f); + p->ofile[fd] = 0; + } + } + + begin_op(); + iput(p->cwd); + end_op(); + p->cwd = 0; + + acquire(&p->parent->lock); + + acquire(&p->lock); + + // Give any children to init. + reparent(p, p->parent); + + // Parent might be sleeping in wait(). + wakeup1(p->parent); + + p->state = ZOMBIE; + + release(&p->parent->lock); + + // Jump into the scheduler, never to return. + sched(); + panic("zombie exit"); +} + +// Wait for a child process to exit and return its pid. +// Return -1 if this process has no children. +int +wait(void) +{ + struct proc *np; + int havekids, pid; + struct proc *p = myproc(); + + // hold p->lock for the whole time to avoid lost + // wakeups from a child's exit(). + acquire(&p->lock); + + for(;;){ + // Scan through table looking for exited children. + havekids = 0; + for(np = proc; np < &proc[NPROC]; np++){ + // this code uses np->parent without holding np->lock. + // acquiring the lock first would cause a deadlock, + // since np might be an ancestor, and we already hold p->lock. + if(np->parent == p){ + // np->parent can't change between the check and the acquire() + // because only the parent changes it, and we're the parent. + acquire(&np->lock); + havekids = 1; + if(np->state == ZOMBIE){ + // Found one. + pid = np->pid; + freeproc(np); + release(&np->lock); + release(&p->lock); + return pid; + } + release(&np->lock); + } + } + + // No point waiting if we don't have any children. + if(!havekids || p->killed){ + release(&p->lock); + return -1; + } + + // Wait for a child to exit. + sleep(p, &p->lock); //DOC: wait-sleep + } +} + +// Per-CPU process scheduler. +// Each CPU calls scheduler() after setting itself up. +// Scheduler never returns. It loops, doing: +// - choose a process to run. +// - swtch to start running that process. +// - eventually that process transfers control +// via swtch back to the scheduler. +void +scheduler(void) +{ + struct proc *p; + struct cpu *c = mycpu(); + + c->proc = 0; + for(;;){ + // Avoid deadlock by ensuring that devices can interrupt. + intr_on(); + + for(p = proc; p < &proc[NPROC]; p++) { + acquire(&p->lock); + if(p->state == RUNNABLE) { + // Switch to chosen process. It is the process's job + // to release its lock and then reacquire it + // before jumping back to us. + p->state = RUNNING; + c->proc = p; + swtch(&c->scheduler, &p->context); + + // Process is done running for now. + // It should have changed its p->state before coming back. + c->proc = 0; + } + release(&p->lock); + } + } +} + +// Switch to scheduler. Must hold only p->lock +// and have changed proc->state. Saves and restores +// intena because intena is a property of this +// kernel thread, not this CPU. It should +// be proc->intena and proc->noff, but that would +// break in the few places where a lock is held but +// there's no process. +void +sched(void) +{ + int intena; + struct proc *p = myproc(); + + if(!holding(&p->lock)) + panic("sched p->lock"); + if(mycpu()->noff != 1) + panic("sched locks"); + if(p->state == RUNNING) + panic("sched running"); + if(intr_get()) + panic("sched interruptible"); + + intena = mycpu()->intena; + swtch(&p->context, &mycpu()->scheduler); + mycpu()->intena = intena; +} + +// Give up the CPU for one scheduling round. +void +yield(void) +{ + struct proc *p = myproc(); + acquire(&p->lock); //DOC: yieldlock + p->state = RUNNABLE; + sched(); + release(&p->lock); +} + +// A fork child's very first scheduling by scheduler() +// will swtch to forkret. +void +forkret(void) +{ + static int first = 1; + + // Still holding p->lock from scheduler. + release(&myproc()->lock); + + if (first) { + // File system initialization must be run in the context of a + // regular process (e.g., because it calls sleep), and thus cannot + // be run from main(). + first = 0; + fsinit(ROOTDEV); + } + + usertrapret(); +} + +// Atomically release lock and sleep on chan. +// Reacquires lock when awakened. +void +sleep(void *chan, struct spinlock *lk) +{ + struct proc *p = myproc(); + + // Must acquire p->lock in order to + // change p->state and then call sched. + // Once we hold p->lock, we can be + // guaranteed that we won't miss any wakeup + // (wakeup locks p->lock), + // so it's okay to release lk. + if(lk != &p->lock){ //DOC: sleeplock0 + acquire(&p->lock); //DOC: sleeplock1 + release(lk); + } + + // Go to sleep. + p->chan = chan; + p->state = SLEEPING; + + sched(); + + // Tidy up. + p->chan = 0; + + // Reacquire original lock. + if(lk != &p->lock){ //DOC: sleeplock2 + release(&p->lock); + acquire(lk); + } +} + +// Wake up all processes sleeping on chan. +// Must be called without any p->lock. +void +wakeup(void *chan) +{ + struct proc *p; + + for(p = proc; p < &proc[NPROC]; p++) { + acquire(&p->lock); + if(p->state == SLEEPING && p->chan == chan) { + p->state = RUNNABLE; + } + release(&p->lock); + } +} + +// Wake up p if it is sleeping in wait(); used by exit(). +// Caller must hold p->lock. +static void +wakeup1(struct proc *p) +{ + if(p->chan == p && p->state == SLEEPING) { + p->state = RUNNABLE; + } +} + +// Kill the process with the given pid. +// The victim won't exit until it tries to return +// to user space (see usertrap() in trap.c). +int +kill(int pid) +{ + struct proc *p; + + for(p = proc; p < &proc[NPROC]; p++){ + acquire(&p->lock); + if(p->pid == pid){ + p->killed = 1; + if(p->state == SLEEPING){ + // Wake process from sleep(). + p->state = RUNNABLE; + } + release(&p->lock); + return 0; + } + release(&p->lock); + } + return -1; +} + +// Copy to either a user address, or kernel address, +// depending on usr_dst. +// Returns 0 on success, -1 on error. +int +either_copyout(int user_dst, uint64 dst, void *src, uint64 len) +{ + struct proc *p = myproc(); + if(user_dst){ + return copyout(p->pagetable, dst, src, len); + } else { + memmove((char *)dst, src, len); + return 0; + } +} + +// Copy from either a user address, or kernel address, +// depending on usr_src. +// Returns 0 on success, -1 on error. +int +either_copyin(void *dst, int user_src, uint64 src, uint64 len) +{ + struct proc *p = myproc(); + if(user_src){ + return copyin(p->pagetable, dst, src, len); + } else { + memmove(dst, (char*)src, len); + return 0; + } +} + +// Print a process listing to console. For debugging. +// Runs when user types ^P on console. +// No lock to avoid wedging a stuck machine further. +void +procdump(void) +{ + static char *states[] = { + [UNUSED] "unused", + [SLEEPING] "sleep ", + [RUNNABLE] "runble", + [RUNNING] "run ", + [ZOMBIE] "zombie" + }; + struct proc *p; + char *state; + + printf("\n"); + for(p = proc; p < &proc[NPROC]; p++){ + if(p->state == UNUSED) + continue; + if(p->state >= 0 && p->state < NELEM(states) && states[p->state]) + state = states[p->state]; + else + state = "???"; + printf("%d %s %s", p->pid, state, p->name); + printf("\n"); + } +} diff --git a/kernel/proc.h b/kernel/proc.h new file mode 100644 index 0000000..655d79f --- /dev/null +++ b/kernel/proc.h @@ -0,0 +1,105 @@ +// Saved registers for kernel context switches. +struct context { + uint64 ra; + uint64 sp; + + // callee-saved + uint64 s0; + uint64 s1; + uint64 s2; + uint64 s3; + uint64 s4; + uint64 s5; + uint64 s6; + uint64 s7; + uint64 s8; + uint64 s9; + uint64 s10; + uint64 s11; +}; + +// Per-CPU state. +struct cpu { + struct proc *proc; // The process running on this cpu, or null. + struct context scheduler; // swtch() here to enter scheduler(). + int noff; // Depth of push_off() nesting. + int intena; // Were interrupts enabled before push_off()? +}; + +extern struct cpu cpus[NCPU]; + +// per-process data for the trap handling code in trampoline.S. +// sits in a page by itself just under the trampoline page in the +// user page table. not specially mapped in the kernel page table. +// the sscratch register points here. +// uservec in trampoline.S saves user registers in the trapframe, +// then initializes registers from the trapframe's +// kernel_sp, kernel_hartid, kernel_satp, and jumps to kernel_trap. +// usertrapret() and userret in trampoline.S set up +// the trapframe's kernel_*, restore user registers from the +// trapframe, switch to the user page table, and enter user space. +// the trapframe includes callee-saved user registers like s0-s11 because the +// return-to-user path via usertrapret() doesn't return through +// the entire kernel call stack. +struct trapframe { + /* 0 */ uint64 kernel_satp; // kernel page table + /* 8 */ uint64 kernel_sp; // top of process's kernel stack + /* 16 */ uint64 kernel_trap; // usertrap() + /* 24 */ uint64 epc; // saved user program counter + /* 32 */ uint64 kernel_hartid; // saved kernel tp + /* 40 */ uint64 ra; + /* 48 */ uint64 sp; + /* 56 */ uint64 gp; + /* 64 */ uint64 tp; + /* 72 */ uint64 t0; + /* 80 */ uint64 t1; + /* 88 */ uint64 t2; + /* 96 */ uint64 s0; + /* 104 */ uint64 s1; + /* 112 */ uint64 a0; + /* 120 */ uint64 a1; + /* 128 */ uint64 a2; + /* 136 */ uint64 a3; + /* 144 */ uint64 a4; + /* 152 */ uint64 a5; + /* 160 */ uint64 a6; + /* 168 */ uint64 a7; + /* 176 */ uint64 s2; + /* 184 */ uint64 s3; + /* 192 */ uint64 s4; + /* 200 */ uint64 s5; + /* 208 */ uint64 s6; + /* 216 */ uint64 s7; + /* 224 */ uint64 s8; + /* 232 */ uint64 s9; + /* 240 */ uint64 s10; + /* 248 */ uint64 s11; + /* 256 */ uint64 t3; + /* 264 */ uint64 t4; + /* 272 */ uint64 t5; + /* 280 */ uint64 t6; +}; + +enum procstate { UNUSED, SLEEPING, RUNNABLE, RUNNING, ZOMBIE }; + +// Per-process state +struct proc { + struct spinlock lock; + + // p->lock must be held when using these: + enum procstate state; // Process state + struct proc *parent; // Parent process + void *chan; // If non-zero, sleeping on chan + int killed; // If non-zero, have been killed + int pid; // Process ID + + // these are private to the process, so p->lock need not be held. + uint64 kstack; // Bottom of kernel stack for this process + uint64 sz; // Size of process memory (bytes) + pagetable_t pagetable; // Page table + struct trapframe *tf; // data page for trampoline.S + struct context context; // swtch() here to run process + struct file *ofile[NOFILE]; // Open files + struct inode *cwd; // Current directory + char name[16]; // Process name (debugging) +}; diff --git a/kernel/ramdisk.c b/kernel/ramdisk.c new file mode 100644 index 0000000..9901294 --- /dev/null +++ b/kernel/ramdisk.c @@ -0,0 +1,45 @@ +// +// ramdisk that uses the disk image loaded by qemu -rdinit fs.img +// + +#include "types.h" +#include "riscv.h" +#include "defs.h" +#include "param.h" +#include "memlayout.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "fs.h" +#include "buf.h" + +void +ramdiskinit(void) +{ +} + +// If B_DIRTY is set, write buf to disk, clear B_DIRTY, set B_VALID. +// Else if B_VALID is not set, read buf from disk, set B_VALID. +void +ramdiskrw(struct buf *b) +{ + if(!holdingsleep(&b->lock)) + panic("ramdiskrw: buf not locked"); + if((b->flags & (B_VALID|B_DIRTY)) == B_VALID) + panic("ramdiskrw: nothing to do"); + + if(b->blockno >= FSSIZE) + panic("ramdiskrw: blockno too big"); + + uint64 diskaddr = b->blockno * BSIZE; + char *addr = (char *)RAMDISK + diskaddr; + + if(b->flags & B_DIRTY){ + // write + memmove(addr, b->data, BSIZE); + b->flags &= ~B_DIRTY; + } else { + // read + memmove(b->data, addr, BSIZE); + b->flags |= B_VALID; + } +} diff --git a/kernel/riscv.h b/kernel/riscv.h new file mode 100644 index 0000000..0f83db6 --- /dev/null +++ b/kernel/riscv.h @@ -0,0 +1,358 @@ +// which hart (core) is this? +static inline uint64 +r_mhartid() +{ + uint64 x; + asm volatile("csrr %0, mhartid" : "=r" (x) ); + return x; +} + +// Machine Status Register, mstatus + +#define MSTATUS_MPP_MASK (3L << 11) // previous mode. +#define MSTATUS_MPP_M (3L << 11) +#define MSTATUS_MPP_S (1L << 11) +#define MSTATUS_MPP_U (0L << 11) +#define MSTATUS_MIE (1L << 3) // machine-mode interrupt enable. + +static inline uint64 +r_mstatus() +{ + uint64 x; + asm volatile("csrr %0, mstatus" : "=r" (x) ); + return x; +} + +static inline void +w_mstatus(uint64 x) +{ + asm volatile("csrw mstatus, %0" : : "r" (x)); +} + +// machine exception program counter, holds the +// instruction address to which a return from +// exception will go. +static inline void +w_mepc(uint64 x) +{ + asm volatile("csrw mepc, %0" : : "r" (x)); +} + +// Supervisor Status Register, sstatus + +#define SSTATUS_SPP (1L << 8) // Previous mode, 1=Supervisor, 0=User +#define SSTATUS_SPIE (1L << 5) // Supervisor Previous Interrupt Enable +#define SSTATUS_UPIE (1L << 4) // User Previous Interrupt Enable +#define SSTATUS_SIE (1L << 1) // Supervisor Interrupt Enable +#define SSTATUS_UIE (1L << 0) // User Interrupt Enable + +static inline uint64 +r_sstatus() +{ + uint64 x; + asm volatile("csrr %0, sstatus" : "=r" (x) ); + return x; +} + +static inline void +w_sstatus(uint64 x) +{ + asm volatile("csrw sstatus, %0" : : "r" (x)); +} + +// Supervisor Interrupt Pending +static inline uint64 +r_sip() +{ + uint64 x; + asm volatile("csrr %0, sip" : "=r" (x) ); + return x; +} + +static inline void +w_sip(uint64 x) +{ + asm volatile("csrw sip, %0" : : "r" (x)); +} + +// Supervisor Interrupt Enable +#define SIE_SEIE (1L << 9) // external +#define SIE_STIE (1L << 5) // timer +#define SIE_SSIE (1L << 1) // software +static inline uint64 +r_sie() +{ + uint64 x; + asm volatile("csrr %0, sie" : "=r" (x) ); + return x; +} + +static inline void +w_sie(uint64 x) +{ + asm volatile("csrw sie, %0" : : "r" (x)); +} + +// Machine-mode Interrupt Enable +#define MIE_MEIE (1L << 11) // external +#define MIE_MTIE (1L << 7) // timer +#define MIE_MSIE (1L << 3) // software +static inline uint64 +r_mie() +{ + uint64 x; + asm volatile("csrr %0, mie" : "=r" (x) ); + return x; +} + +static inline void +w_mie(uint64 x) +{ + asm volatile("csrw mie, %0" : : "r" (x)); +} + +// machine exception program counter, holds the +// instruction address to which a return from +// exception will go. +static inline void +w_sepc(uint64 x) +{ + asm volatile("csrw sepc, %0" : : "r" (x)); +} + +static inline uint64 +r_sepc() +{ + uint64 x; + asm volatile("csrr %0, sepc" : "=r" (x) ); + return x; +} + +// Machine Exception Delegation +static inline uint64 +r_medeleg() +{ + uint64 x; + asm volatile("csrr %0, medeleg" : "=r" (x) ); + return x; +} + +static inline void +w_medeleg(uint64 x) +{ + asm volatile("csrw medeleg, %0" : : "r" (x)); +} + +// Machine Interrupt Delegation +static inline uint64 +r_mideleg() +{ + uint64 x; + asm volatile("csrr %0, mideleg" : "=r" (x) ); + return x; +} + +static inline void +w_mideleg(uint64 x) +{ + asm volatile("csrw mideleg, %0" : : "r" (x)); +} + +// Supervisor Trap-Vector Base Address +// low two bits are mode. +static inline void +w_stvec(uint64 x) +{ + asm volatile("csrw stvec, %0" : : "r" (x)); +} + +static inline uint64 +r_stvec() +{ + uint64 x; + asm volatile("csrr %0, stvec" : "=r" (x) ); + return x; +} + +// Machine-mode interrupt vector +static inline void +w_mtvec(uint64 x) +{ + asm volatile("csrw mtvec, %0" : : "r" (x)); +} + +// use riscv's sv39 page table scheme. +#define SATP_SV39 (8L << 60) + +#define MAKE_SATP(pagetable) (SATP_SV39 | (((uint64)pagetable) >> 12)) + +// supervisor address translation and protection; +// holds the address of the page table. +static inline void +w_satp(uint64 x) +{ + asm volatile("csrw satp, %0" : : "r" (x)); +} + +static inline uint64 +r_satp() +{ + uint64 x; + asm volatile("csrr %0, satp" : "=r" (x) ); + return x; +} + +// Supervisor Scratch register, for early trap handler in trampoline.S. +static inline void +w_sscratch(uint64 x) +{ + asm volatile("csrw sscratch, %0" : : "r" (x)); +} + +static inline void +w_mscratch(uint64 x) +{ + asm volatile("csrw mscratch, %0" : : "r" (x)); +} + +// Supervisor Trap Cause +static inline uint64 +r_scause() +{ + uint64 x; + asm volatile("csrr %0, scause" : "=r" (x) ); + return x; +} + +// Supervisor Trap Value +static inline uint64 +r_stval() +{ + uint64 x; + asm volatile("csrr %0, stval" : "=r" (x) ); + return x; +} + +// Machine-mode Counter-Enable +static inline void +w_mcounteren(uint64 x) +{ + asm volatile("csrw mcounteren, %0" : : "r" (x)); +} + +static inline uint64 +r_mcounteren() +{ + uint64 x; + asm volatile("csrr %0, mcounteren" : "=r" (x) ); + return x; +} + +// machine-mode cycle counter +static inline uint64 +r_time() +{ + uint64 x; + asm volatile("csrr %0, time" : "=r" (x) ); + return x; +} + +// enable device interrupts +static inline void +intr_on() +{ + w_sie(r_sie() | SIE_SEIE | SIE_STIE | SIE_SSIE); + w_sstatus(r_sstatus() | SSTATUS_SIE); +} + +// disable device interrupts +static inline void +intr_off() +{ + w_sstatus(r_sstatus() & ~SSTATUS_SIE); +} + +// are device interrupts enabled? +static inline int +intr_get() +{ + uint64 x = r_sstatus(); + return (x & SSTATUS_SIE) != 0; +} + +static inline uint64 +r_sp() +{ + uint64 x; + asm volatile("mv %0, sp" : "=r" (x) ); + return x; +} + +// read and write tp, the thread pointer, which holds +// this core's hartid (core number), the index into cpus[]. +static inline uint64 +r_tp() +{ + uint64 x; + asm volatile("mv %0, tp" : "=r" (x) ); + return x; +} + +static inline void +w_tp(uint64 x) +{ + asm volatile("mv tp, %0" : : "r" (x)); +} + +static inline uint64 +r_ra() +{ + uint64 x; + asm volatile("mv %0, ra" : "=r" (x) ); + return x; +} + +// tell the machine to finish any previous writes to +// PTEs, so that a subsequent use of a virtual +// address or load of the SATP will see those writes. +// perhaps this also flushes the TLB. +static inline void +sfence_vma() +{ + // the zero, zero means flush all TLB entries. + asm volatile("sfence.vma zero, zero"); +} + + +#define PGSIZE 4096 // bytes per page +#define PGSHIFT 12 // bits of offset within a page + +#define PGROUNDUP(sz) (((sz)+PGSIZE-1) & ~(PGSIZE-1)) +#define PGROUNDDOWN(a) (((a)) & ~(PGSIZE-1)) + +#define PTE_V (1L << 0) // valid +#define PTE_R (1L << 1) +#define PTE_W (1L << 2) +#define PTE_X (1L << 3) +#define PTE_U (1L << 4) // 1 -> user can access + +// shift a physical address to the right place for a PTE. +#define PA2PTE(pa) ((((uint64)pa) >> 12) << 10) + +#define PTE2PA(pte) (((pte) >> 10) << 12) + +#define PTE_FLAGS(pte) ((pte) & (PTE_V|PTE_R|PTE_W|PTE_X|PTE_U)) + +// extract the three 9-bit page table indices from a virtual address. +#define PXMASK 0x1FF // 9 bits +#define PXSHIFT(level) (PGSHIFT+(9*(level))) +#define PX(level, va) ((((uint64) (va)) >> PXSHIFT(level)) & PXMASK) + +// one beyond the highest possible virtual address. +// MAXVA is actually one bit less than the max allowed by +// Sv39, to avoid having to sign-extend virtual addresses +// that have the high bit set. +#define MAXVA (1L << (9 + 9 + 9 + 12 - 1)) + +typedef uint64 pte_t; +typedef uint64 *pagetable_t; // 512 PTEs diff --git a/sleeplock.c b/kernel/sleeplock.c index e0750ea..81de585 100644 --- a/sleeplock.c +++ b/kernel/sleeplock.c @@ -1,13 +1,12 @@ // Sleeping locks #include "types.h" +#include "riscv.h" #include "defs.h" #include "param.h" -#include "x86.h" #include "memlayout.h" -#include "mmu.h" -#include "proc.h" #include "spinlock.h" +#include "proc.h" #include "sleeplock.h" void diff --git a/sleeplock.h b/kernel/sleeplock.h index 110e6f3..110e6f3 100644 --- a/sleeplock.h +++ b/kernel/sleeplock.h diff --git a/kernel/spinlock.c b/kernel/spinlock.c new file mode 100644 index 0000000..563532e --- /dev/null +++ b/kernel/spinlock.c @@ -0,0 +1,108 @@ +// Mutual exclusion spin locks. + +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "spinlock.h" +#include "riscv.h" +#include "proc.h" +#include "defs.h" + +void +initlock(struct spinlock *lk, char *name) +{ + lk->name = name; + lk->locked = 0; + lk->cpu = 0; +} + +// Acquire the lock. +// Loops (spins) until the lock is acquired. +void +acquire(struct spinlock *lk) +{ + push_off(); // disable interrupts to avoid deadlock. + if(holding(lk)) + panic("acquire"); + + // On RISC-V, sync_lock_test_and_set turns into an atomic swap: + // a5 = 1 + // s1 = &lk->locked + // amoswap.w.aq a5, a5, (s1) + while(__sync_lock_test_and_set(&lk->locked, 1) != 0) + ; + + // Tell the C compiler and the processor to not move loads or stores + // past this point, to ensure that the critical section's memory + // references happen after the lock is acquired. + __sync_synchronize(); + + // Record info about lock acquisition for holding() and debugging. + lk->cpu = mycpu(); +} + +// Release the lock. +void +release(struct spinlock *lk) +{ + if(!holding(lk)) + panic("release"); + + lk->cpu = 0; + + // Tell the C compiler and the CPU to not move loads or stores + // past this point, to ensure that all the stores in the critical + // section are visible to other CPUs before the lock is released. + // On RISC-V, this turns into a fence instruction. + __sync_synchronize(); + + // Release the lock, equivalent to lk->locked = 0. + // This code doesn't use a C assignment, since the C standard + // implies that an assignment might be implemented with + // multiple store instructions. + // On RISC-V, sync_lock_release turns into an atomic swap: + // s1 = &lk->locked + // amoswap.w zero, zero, (s1) + __sync_lock_release(&lk->locked); + + pop_off(); +} + +// Check whether this cpu is holding the lock. +int +holding(struct spinlock *lk) +{ + int r; + push_off(); + r = (lk->locked && lk->cpu == mycpu()); + pop_off(); + return r; +} + +// push_off/pop_off are like intr_off()/intr_on() except that they are matched: +// it takes two pop_off()s to undo two push_off()s. Also, if interrupts +// are initially off, then push_off, pop_off leaves them off. + +void +push_off(void) +{ + int old = intr_get(); + + intr_off(); + if(mycpu()->noff == 0) + mycpu()->intena = old; + mycpu()->noff += 1; +} + +void +pop_off(void) +{ + struct cpu *c = mycpu(); + if(intr_get()) + panic("pop_off - interruptible"); + c->noff -= 1; + if(c->noff < 0) + panic("pop_off"); + if(c->noff == 0 && c->intena) + intr_on(); +} diff --git a/spinlock.h b/kernel/spinlock.h index 0a9d8e2..4392820 100644 --- a/spinlock.h +++ b/kernel/spinlock.h @@ -5,7 +5,5 @@ struct spinlock { // For debugging: char *name; // Name of lock. struct cpu *cpu; // The cpu holding the lock. - uint pcs[10]; // The call stack (an array of program counters) - // that locked the lock. }; diff --git a/kernel/start.c b/kernel/start.c new file mode 100644 index 0000000..203c5e6 --- /dev/null +++ b/kernel/start.c @@ -0,0 +1,82 @@ +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "defs.h" + +void main(); +void timerinit(); + +// entry.S needs one stack per CPU. +__attribute__ ((aligned (16))) char stack0[4096 * NCPU]; + +// scratch area for timer interrupt, one per CPU. +uint64 mscratch0[NCPU * 32]; + +// assembly code in kernelvec.S for machine-mode timer interrupt. +extern void timervec(); + +// entry.S jumps here in machine mode on stack0. +void +start() +{ + // set M Previous Privilege mode to Supervisor, for mret. + unsigned long x = r_mstatus(); + x &= ~MSTATUS_MPP_MASK; + x |= MSTATUS_MPP_S; + w_mstatus(x); + + // set M Exception Program Counter to main, for mret. + // requires gcc -mcmodel=medany + w_mepc((uint64)main); + + // disable paging for now. + w_satp(0); + + // delegate all interrupts and exceptions to supervisor mode. + w_medeleg(0xffff); + w_mideleg(0xffff); + + // ask for clock interrupts. + timerinit(); + + // keep each CPU's hartid in its tp register, for cpuid(). + int id = r_mhartid(); + w_tp(id); + + // switch to supervisor mode and jump to main(). + asm volatile("mret"); +} + +// set up to receive timer interrupts in machine mode, +// which arrive at timervec in kernelvec.S, +// which turns them into software interrupts for +// devintr() in trap.c. +void +timerinit() +{ + // each CPU has a separate source of timer interrupts. + int id = r_mhartid(); + + // ask the CLINT for a timer interrupt. + int interval = 1000000; // cycles; about 1/10th second in qemu. + *(uint64*)CLINT_MTIMECMP(id) = *(uint64*)CLINT_MTIME + interval; + + // prepare information in scratch[] for timervec. + // scratch[0..3] : space for timervec to save registers. + // scratch[4] : address of CLINT MTIMECMP register. + // scratch[5] : desired interval (in cycles) between timer interrupts. + uint64 *scratch = &mscratch0[32 * id]; + scratch[4] = CLINT_MTIMECMP(id); + scratch[5] = interval; + w_mscratch((uint64)scratch); + + // set the machine-mode trap handler. + w_mtvec((uint64)timervec); + + // enable machine-mode interrupts. + w_mstatus(r_mstatus() | MSTATUS_MIE); + + // enable machine-mode timer interrupts. + w_mie(r_mie() | MIE_MTIE); +} @@ -1,11 +1,11 @@ -#define T_DIR 1 // Directory -#define T_FILE 2 // File -#define T_DEV 3 // Device +#define T_DIR 1 // Directory +#define T_FILE 2 // File +#define T_DEVICE 3 // Device struct stat { - short type; // Type of file int dev; // File system's disk device uint ino; // Inode number + short type; // Type of file short nlink; // Number of links to file - uint size; // Size of file in bytes + uint64 size; // Size of file in bytes }; diff --git a/string.c b/kernel/string.c index a7cc61f..d99e612 100644 --- a/string.c +++ b/kernel/string.c @@ -1,14 +1,13 @@ #include "types.h" -#include "x86.h" void* memset(void *dst, int c, uint n) { - if ((int)dst%4 == 0 && n%4 == 0){ - c &= 0xFF; - stosl(dst, (c<<24)|(c<<16)|(c<<8)|c, n/4); - } else - stosb(dst, c, n); + char *cdst = (char *) dst; + int i; + for(i = 0; i < n; i++){ + cdst[i] = c; + } return dst; } diff --git a/kernel/swtch.S b/kernel/swtch.S new file mode 100644 index 0000000..17a8663 --- /dev/null +++ b/kernel/swtch.S @@ -0,0 +1,42 @@ +# Context switch +# +# void swtch(struct context *old, struct context *new); +# +# Save current registers in old. Load from new. + + +.globl swtch +swtch: + sd ra, 0(a0) + sd sp, 8(a0) + sd s0, 16(a0) + sd s1, 24(a0) + sd s2, 32(a0) + sd s3, 40(a0) + sd s4, 48(a0) + sd s5, 56(a0) + sd s6, 64(a0) + sd s7, 72(a0) + sd s8, 80(a0) + sd s9, 88(a0) + sd s10, 96(a0) + sd s11, 104(a0) + + ld ra, 0(a1) + ld sp, 8(a1) + ld s0, 16(a1) + ld s1, 24(a1) + ld s2, 32(a1) + ld s3, 40(a1) + ld s4, 48(a1) + ld s5, 56(a1) + ld s6, 64(a1) + ld s7, 72(a1) + ld s8, 80(a1) + ld s9, 88(a1) + ld s10, 96(a1) + ld s11, 104(a1) + + ret + + diff --git a/kernel/syscall.c b/kernel/syscall.c new file mode 100644 index 0000000..97974d6 --- /dev/null +++ b/kernel/syscall.c @@ -0,0 +1,147 @@ +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "spinlock.h" +#include "proc.h" +#include "syscall.h" +#include "defs.h" + +// Fetch the uint64 at addr from the current process. +int +fetchaddr(uint64 addr, uint64 *ip) +{ + struct proc *p = myproc(); + if(addr >= p->sz || addr+sizeof(uint64) > p->sz) + return -1; + if(copyin(p->pagetable, (char *)ip, addr, sizeof(*ip)) != 0) + return -1; + return 0; +} + +// Fetch the nul-terminated string at addr from the current process. +// Doesn't actually copy the string - just sets *pp to point at it. +// Returns length of string, not including nul, or -1 for error. +int +fetchstr(uint64 addr, char *buf, int max) +{ + struct proc *p = myproc(); + int err = copyinstr(p->pagetable, buf, addr, max); + if(err < 0) + return err; + return strlen(buf); +} + +static uint64 +argraw(int n) +{ + struct proc *p = myproc(); + switch (n) { + case 0: + return p->tf->a0; + case 1: + return p->tf->a1; + case 2: + return p->tf->a2; + case 3: + return p->tf->a3; + case 4: + return p->tf->a4; + case 5: + return p->tf->a5; + } + panic("argraw"); + return -1; +} + +// Fetch the nth 32-bit system call argument. +int +argint(int n, int *ip) +{ + *ip = argraw(n); + return 0; +} + +// Retrieve an argument as a pointer. +// Doesn't check for legality, since +// copyin/copyout will do that. +int +argaddr(int n, uint64 *ip) +{ + *ip = argraw(n); + return 0; +} + +// Fetch the nth word-sized system call argument as a null-terminated string. +// Copies into buf, at most max. +// Returns string length if OK (including nul), -1 if error. +int +argstr(int n, char *buf, int max) +{ + uint64 addr; + if(argaddr(n, &addr) < 0) + return -1; + return fetchstr(addr, buf, max); +} + +extern uint64 sys_chdir(void); +extern uint64 sys_close(void); +extern uint64 sys_dup(void); +extern uint64 sys_exec(void); +extern uint64 sys_exit(void); +extern uint64 sys_fork(void); +extern uint64 sys_fstat(void); +extern uint64 sys_getpid(void); +extern uint64 sys_kill(void); +extern uint64 sys_link(void); +extern uint64 sys_mkdir(void); +extern uint64 sys_mknod(void); +extern uint64 sys_open(void); +extern uint64 sys_pipe(void); +extern uint64 sys_read(void); +extern uint64 sys_sbrk(void); +extern uint64 sys_sleep(void); +extern uint64 sys_unlink(void); +extern uint64 sys_wait(void); +extern uint64 sys_write(void); +extern uint64 sys_uptime(void); + +static uint64 (*syscalls[])(void) = { +[SYS_fork] sys_fork, +[SYS_exit] sys_exit, +[SYS_wait] sys_wait, +[SYS_pipe] sys_pipe, +[SYS_read] sys_read, +[SYS_kill] sys_kill, +[SYS_exec] sys_exec, +[SYS_fstat] sys_fstat, +[SYS_chdir] sys_chdir, +[SYS_dup] sys_dup, +[SYS_getpid] sys_getpid, +[SYS_sbrk] sys_sbrk, +[SYS_sleep] sys_sleep, +[SYS_uptime] sys_uptime, +[SYS_open] sys_open, +[SYS_write] sys_write, +[SYS_mknod] sys_mknod, +[SYS_unlink] sys_unlink, +[SYS_link] sys_link, +[SYS_mkdir] sys_mkdir, +[SYS_close] sys_close, +}; + +void +syscall(void) +{ + int num; + struct proc *p = myproc(); + + num = p->tf->a7; + if(num > 0 && num < NELEM(syscalls) && syscalls[num]) { + p->tf->a0 = syscalls[num](); + } else { + printf("%d %s: unknown sys call %d\n", + p->pid, p->name, num); + p->tf->a0 = -1; + } +} diff --git a/syscall.h b/kernel/syscall.h index bc5f356..bc5f356 100644 --- a/syscall.h +++ b/kernel/syscall.h diff --git a/sysfile.c b/kernel/sysfile.c index bfe61b7..23a9540 100644 --- a/sysfile.c +++ b/kernel/sysfile.c @@ -5,13 +5,13 @@ // #include "types.h" +#include "riscv.h" #include "defs.h" #include "param.h" #include "stat.h" -#include "mmu.h" +#include "spinlock.h" #include "proc.h" #include "fs.h" -#include "spinlock.h" #include "sleeplock.h" #include "file.h" #include "fcntl.h" @@ -41,18 +41,18 @@ static int fdalloc(struct file *f) { int fd; - struct proc *curproc = myproc(); + struct proc *p = myproc(); for(fd = 0; fd < NOFILE; fd++){ - if(curproc->ofile[fd] == 0){ - curproc->ofile[fd] = f; + if(p->ofile[fd] == 0){ + p->ofile[fd] = f; return fd; } } return -1; } -int +uint64 sys_dup(void) { struct file *f; @@ -66,31 +66,32 @@ sys_dup(void) return fd; } -int +uint64 sys_read(void) { struct file *f; int n; - char *p; + uint64 p; - if(argfd(0, 0, &f) < 0 || argint(2, &n) < 0 || argptr(1, &p, n) < 0) + if(argfd(0, 0, &f) < 0 || argint(2, &n) < 0 || argaddr(1, &p) < 0) return -1; return fileread(f, p, n); } -int +uint64 sys_write(void) { struct file *f; int n; - char *p; + uint64 p; - if(argfd(0, 0, &f) < 0 || argint(2, &n) < 0 || argptr(1, &p, n) < 0) + if(argfd(0, 0, &f) < 0 || argint(2, &n) < 0 || argaddr(1, &p) < 0) return -1; + return filewrite(f, p, n); } -int +uint64 sys_close(void) { int fd; @@ -103,25 +104,25 @@ sys_close(void) return 0; } -int +uint64 sys_fstat(void) { struct file *f; - struct stat *st; + uint64 st; // user pointer to struct stat - if(argfd(0, 0, &f) < 0 || argptr(1, (void*)&st, sizeof(*st)) < 0) + if(argfd(0, 0, &f) < 0 || argaddr(1, &st) < 0) return -1; return filestat(f, st); } // Create the path new as a link to the same inode as old. -int +uint64 sys_link(void) { - char name[DIRSIZ], *new, *old; + char name[DIRSIZ], new[MAXPATH], old[MAXPATH]; struct inode *dp, *ip; - if(argstr(0, &old) < 0 || argstr(1, &new) < 0) + if(argstr(0, old, MAXPATH) < 0 || argstr(1, new, MAXPATH) < 0) return -1; begin_op(); @@ -172,7 +173,7 @@ isdirempty(struct inode *dp) struct dirent de; for(off=2*sizeof(de); off<dp->size; off+=sizeof(de)){ - if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de)) + if(readi(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de)) panic("isdirempty: readi"); if(de.inum != 0) return 0; @@ -180,16 +181,15 @@ isdirempty(struct inode *dp) return 1; } -//PAGEBREAK! -int +uint64 sys_unlink(void) { struct inode *ip, *dp; struct dirent de; - char name[DIRSIZ], *path; + char name[DIRSIZ], path[MAXPATH]; uint off; - if(argstr(0, &path) < 0) + if(argstr(0, path, MAXPATH) < 0) return -1; begin_op(); @@ -216,7 +216,7 @@ sys_unlink(void) } memset(&de, 0, sizeof(de)); - if(writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de)) + if(writei(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de)) panic("unlink: writei"); if(ip->type == T_DIR){ dp->nlink--; @@ -251,7 +251,7 @@ create(char *path, short type, short major, short minor) if((ip = dirlookup(dp, name, 0)) != 0){ iunlockput(dp); ilock(ip); - if(type == T_FILE && ip->type == T_FILE) + if(type == T_FILE && (ip->type == T_FILE || ip->type == T_DEVICE)) return ip; iunlockput(ip); return 0; @@ -282,15 +282,15 @@ create(char *path, short type, short major, short minor) return ip; } -int +uint64 sys_open(void) { - char *path; + char path[MAXPATH]; int fd, omode; struct file *f; struct inode *ip; - if(argstr(0, &path) < 0 || argint(1, &omode) < 0) + if(argstr(0, path, MAXPATH) < 0 || argint(1, &omode) < 0) return -1; begin_op(); @@ -314,6 +314,12 @@ sys_open(void) } } + if(ip->type == T_DEVICE && (ip->major < 0 || ip->major >= NDEV)){ + iunlockput(ip); + end_op(); + return -1; + } + if((f = filealloc()) == 0 || (fd = fdalloc(f)) < 0){ if(f) fileclose(f); @@ -321,25 +327,32 @@ sys_open(void) end_op(); return -1; } - iunlock(ip); - end_op(); - f->type = FD_INODE; + if(ip->type == T_DEVICE){ + f->type = FD_DEVICE; + f->major = ip->major; + } else { + f->type = FD_INODE; + f->off = 0; + } f->ip = ip; - f->off = 0; f->readable = !(omode & O_WRONLY); f->writable = (omode & O_WRONLY) || (omode & O_RDWR); + + iunlock(ip); + end_op(); + return fd; } -int +uint64 sys_mkdir(void) { - char *path; + char path[MAXPATH]; struct inode *ip; begin_op(); - if(argstr(0, &path) < 0 || (ip = create(path, T_DIR, 0, 0)) == 0){ + if(argstr(0, path, MAXPATH) < 0 || (ip = create(path, T_DIR, 0, 0)) == 0){ end_op(); return -1; } @@ -348,18 +361,18 @@ sys_mkdir(void) return 0; } -int +uint64 sys_mknod(void) { struct inode *ip; - char *path; + char path[MAXPATH]; int major, minor; begin_op(); - if((argstr(0, &path)) < 0 || + if((argstr(0, path, MAXPATH)) < 0 || argint(1, &major) < 0 || argint(2, &minor) < 0 || - (ip = create(path, T_DEV, major, minor)) == 0){ + (ip = create(path, T_DEVICE, major, minor)) == 0){ end_op(); return -1; } @@ -368,15 +381,15 @@ sys_mknod(void) return 0; } -int +uint64 sys_chdir(void) { - char *path; + char path[MAXPATH]; struct inode *ip; - struct proc *curproc = myproc(); + struct proc *p = myproc(); begin_op(); - if(argstr(0, &path) < 0 || (ip = namei(path)) == 0){ + if(argstr(0, path, MAXPATH) < 0 || (ip = namei(path)) == 0){ end_op(); return -1; } @@ -387,58 +400,77 @@ sys_chdir(void) return -1; } iunlock(ip); - iput(curproc->cwd); + iput(p->cwd); end_op(); - curproc->cwd = ip; + p->cwd = ip; return 0; } -int +uint64 sys_exec(void) { - char *path, *argv[MAXARG]; + char path[MAXPATH], *argv[MAXARG]; int i; - uint uargv, uarg; + uint64 uargv, uarg; - if(argstr(0, &path) < 0 || argint(1, (int*)&uargv) < 0){ + if(argstr(0, path, MAXPATH) < 0 || argaddr(1, &uargv) < 0){ return -1; } memset(argv, 0, sizeof(argv)); for(i=0;; i++){ - if(i >= NELEM(argv)) + if(i >= NELEM(argv)){ return -1; - if(fetchint(uargv+4*i, (int*)&uarg) < 0) + } + if(fetchaddr(uargv+sizeof(uint64)*i, (uint64*)&uarg) < 0){ return -1; + } if(uarg == 0){ argv[i] = 0; break; } - if(fetchstr(uarg, &argv[i]) < 0) + argv[i] = kalloc(); + if(argv[i] == 0) + panic("sys_exec kalloc"); + if(fetchstr(uarg, argv[i], PGSIZE) < 0){ return -1; + } } - return exec(path, argv); + + int ret = exec(path, argv); + + for(i = 0; i < NELEM(argv) && argv[i] != 0; i++) + kfree(argv[i]); + + return ret; } -int +uint64 sys_pipe(void) { - int *fd; + uint64 fdarray; // user pointer to array of two integers struct file *rf, *wf; int fd0, fd1; + struct proc *p = myproc(); - if(argptr(0, (void*)&fd, 2*sizeof(fd[0])) < 0) + if(argaddr(0, &fdarray) < 0) return -1; if(pipealloc(&rf, &wf) < 0) return -1; fd0 = -1; if((fd0 = fdalloc(rf)) < 0 || (fd1 = fdalloc(wf)) < 0){ if(fd0 >= 0) - myproc()->ofile[fd0] = 0; + p->ofile[fd0] = 0; + fileclose(rf); + fileclose(wf); + return -1; + } + if(copyout(p->pagetable, fdarray, (char*)&fd0, sizeof(fd0)) < 0 || + copyout(p->pagetable, fdarray+sizeof(fd0), (char *)&fd1, sizeof(fd1)) < 0){ + p->ofile[fd0] = 0; + p->ofile[fd1] = 0; fileclose(rf); fileclose(wf); return -1; } - fd[0] = fd0; - fd[1] = fd1; return 0; } diff --git a/sysproc.c b/kernel/sysproc.c index 0686d29..face81a 100644 --- a/sysproc.c +++ b/kernel/sysproc.c @@ -1,48 +1,38 @@ #include "types.h" -#include "x86.h" +#include "riscv.h" #include "defs.h" #include "date.h" #include "param.h" #include "memlayout.h" -#include "mmu.h" +#include "spinlock.h" #include "proc.h" -int -sys_fork(void) -{ - return fork(); -} - -int +uint64 sys_exit(void) { exit(); return 0; // not reached } -int -sys_wait(void) +uint64 +sys_getpid(void) { - return wait(); + return myproc()->pid; } -int -sys_kill(void) +uint64 +sys_fork(void) { - int pid; - - if(argint(0, &pid) < 0) - return -1; - return kill(pid); + return fork(); } -int -sys_getpid(void) +uint64 +sys_wait(void) { - return myproc()->pid; + return wait(); } -int +uint64 sys_sbrk(void) { int addr; @@ -56,7 +46,7 @@ sys_sbrk(void) return addr; } -int +uint64 sys_sleep(void) { int n; @@ -77,9 +67,19 @@ sys_sleep(void) return 0; } +uint64 +sys_kill(void) +{ + int pid; + + if(argint(0, &pid) < 0) + return -1; + return kill(pid); +} + // return how many clock tick interrupts have occurred // since start. -int +uint64 sys_uptime(void) { uint xticks; diff --git a/kernel/trampoline.S b/kernel/trampoline.S new file mode 100644 index 0000000..24499d9 --- /dev/null +++ b/kernel/trampoline.S @@ -0,0 +1,141 @@ + # + # code to switch between user and kernel space. + # + # this code is mapped at the same virtual address + # (TRAMPOLINE) in user and kernel space so that + # it continues to work when it switches page tables. + # + # kernel.ld causes this to be aligned + # to a page boundary. + # + .section trampsec +.globl trampoline +trampoline: +.align 4 +.globl uservec +uservec: + # + # trap.c sets stvec to point here, so + # traps from user space start here, + # in supervisor mode, but with a + # user page table. + # + # sscratch points to where the process's p->tf is + # mapped into user space, at TRAPFRAME. + # + + # swap a0 and sscratch + # so that a0 is TRAPFRAME + csrrw a0, sscratch, a0 + + # save the user registers in TRAPFRAME + sd ra, 40(a0) + sd sp, 48(a0) + sd gp, 56(a0) + sd tp, 64(a0) + sd t0, 72(a0) + sd t1, 80(a0) + sd t2, 88(a0) + sd s0, 96(a0) + sd s1, 104(a0) + sd a1, 120(a0) + sd a2, 128(a0) + sd a3, 136(a0) + sd a4, 144(a0) + sd a5, 152(a0) + sd a6, 160(a0) + sd a7, 168(a0) + sd s2, 176(a0) + sd s3, 184(a0) + sd s4, 192(a0) + sd s5, 200(a0) + sd s6, 208(a0) + sd s7, 216(a0) + sd s8, 224(a0) + sd s9, 232(a0) + sd s10, 240(a0) + sd s11, 248(a0) + sd t3, 256(a0) + sd t4, 264(a0) + sd t5, 272(a0) + sd t6, 280(a0) + + # save the user a0 in p->tf->a0 + csrr t0, sscratch + sd t0, 112(a0) + + # restore kernel stack pointer from p->tf->kernel_sp + ld sp, 8(a0) + + # make tp hold the current hartid, from p->tf->kernel_hartid + ld tp, 32(a0) + + # remember the address of usertrap(), p->tf->kernel_trap + ld t0, 16(a0) + + # restore kernel page table from p->tf->kernel_satp + ld t1, 0(a0) + sfence.vma zero, zero + csrw satp, t1 + + # a0 is no longer valid, since the kernel page + # table does not specially map p->td. + + # jump to usertrap(), which does not return + jr t0 + +.globl userret +userret: + # userret(TRAPFRAME, pagetable) + # switch from kernel to user. + # usertrapret() calls here. + # a0: TRAPFRAME, in user page table + # a1: user page table, for satp + + # switch to the user page table. + sfence.vma zero, zero + csrw satp, a1 + + # put the saved user a0 in sscratch, so we + # can swap it with our a0 (TRAPFRAME) in the last step. + ld t0, 112(a0) + csrw sscratch, t0 + + # restore all but a0 from TRAPFRAME + ld ra, 40(a0) + ld sp, 48(a0) + ld gp, 56(a0) + ld tp, 64(a0) + ld t0, 72(a0) + ld t1, 80(a0) + ld t2, 88(a0) + ld s0, 96(a0) + ld s1, 104(a0) + ld a1, 120(a0) + ld a2, 128(a0) + ld a3, 136(a0) + ld a4, 144(a0) + ld a5, 152(a0) + ld a6, 160(a0) + ld a7, 168(a0) + ld s2, 176(a0) + ld s3, 184(a0) + ld s4, 192(a0) + ld s5, 200(a0) + ld s6, 208(a0) + ld s7, 216(a0) + ld s8, 224(a0) + ld s9, 232(a0) + ld s10, 240(a0) + ld s11, 248(a0) + ld t3, 256(a0) + ld t4, 264(a0) + ld t5, 272(a0) + ld t6, 280(a0) + + # restore user a0, and save TRAPFRAME in sscratch + csrrw a0, sscratch, a0 + + # return to user mode and user pc. + # usertrapret() set up sstatus and sepc. + sret diff --git a/kernel/trap.c b/kernel/trap.c new file mode 100644 index 0000000..ec57bed --- /dev/null +++ b/kernel/trap.c @@ -0,0 +1,213 @@ +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "spinlock.h" +#include "proc.h" +#include "defs.h" + +struct spinlock tickslock; +uint ticks; + +extern char trampoline[], uservec[], userret[]; + +// in kernelvec.S, calls kerneltrap(). +void kernelvec(); + +extern int devintr(); + +void +trapinit(void) +{ + initlock(&tickslock, "time"); +} + +// set up to take exceptions and traps while in the kernel. +void +trapinithart(void) +{ + w_stvec((uint64)kernelvec); +} + +// +// handle an interrupt, exception, or system call from user space. +// called from trampoline.S +// +void +usertrap(void) +{ + int which_dev = 0; + + if((r_sstatus() & SSTATUS_SPP) != 0) + panic("usertrap: not from user mode"); + + // send interrupts and exceptions to kerneltrap(), + // since we're now in the kernel. + w_stvec((uint64)kernelvec); + + struct proc *p = myproc(); + + // save user program counter. + p->tf->epc = r_sepc(); + + if(r_scause() == 8){ + // system call + + if(p->killed) + exit(); + + // sepc points to the ecall instruction, + // but we want to return to the next instruction. + p->tf->epc += 4; + + // an interrupt will change sstatus &c registers, + // so don't enable until done with those registers. + intr_on(); + + syscall(); + } else if((which_dev = devintr()) != 0){ + // ok + } else { + printf("usertrap(): unexpected scause %p pid=%d\n", r_scause(), p->pid); + printf(" sepc=%p stval=%p\n", r_sepc(), r_stval()); + p->killed = 1; + } + + if(p->killed) + exit(); + + // give up the CPU if this is a timer interrupt. + if(which_dev == 2) + yield(); + + usertrapret(); +} + +// +// return to user space +// +void +usertrapret(void) +{ + struct proc *p = myproc(); + + // turn off interrupts, since we're switching + // now from kerneltrap() to usertrap(). + intr_off(); + + // send interrupts and exceptions to trampoline.S + w_stvec(TRAMPOLINE + (uservec - trampoline)); + + // set up values that uservec will need when + // the process next re-enters the kernel. + p->tf->kernel_satp = r_satp(); // kernel page table + p->tf->kernel_sp = p->kstack + PGSIZE; // process's kernel stack + p->tf->kernel_trap = (uint64)usertrap; + p->tf->kernel_hartid = r_tp(); // hartid for cpuid() + + // set up the registers that trampoline.S's sret will use + // to get to user space. + + // set S Previous Privilege mode to User. + unsigned long x = r_sstatus(); + x &= ~SSTATUS_SPP; // clear SPP to 0 for user mode + x |= SSTATUS_SPIE; // enable interrupts in user mode + w_sstatus(x); + + // set S Exception Program Counter to the saved user pc. + w_sepc(p->tf->epc); + + // tell trampoline.S the user page table to switch to. + uint64 satp = MAKE_SATP(p->pagetable); + + // jump to trampoline.S at the top of memory, which + // switches to the user page table, restores user registers, + // and switches to user mode with sret. + uint64 fn = TRAMPOLINE + (userret - trampoline); + ((void (*)(uint64,uint64))fn)(TRAPFRAME, satp); +} + +// interrupts and exceptions from kernel code go here via kernelvec, +// on whatever the current kernel stack is. +// must be 4-byte aligned to fit in stvec. +void +kerneltrap() +{ + int which_dev = 0; + uint64 sepc = r_sepc(); + uint64 sstatus = r_sstatus(); + uint64 scause = r_scause(); + + if((sstatus & SSTATUS_SPP) == 0) + panic("kerneltrap: not from supervisor mode"); + if(intr_get() != 0) + panic("kerneltrap: interrupts enabled"); + + if((which_dev = devintr()) == 0){ + printf("scause %p\n", scause); + printf("sepc=%p stval=%p\n", r_sepc(), r_stval()); + panic("kerneltrap"); + } + + // give up the CPU if this is a timer interrupt. + if(which_dev == 2 && myproc() != 0 && myproc()->state == RUNNING) + yield(); + + // the yield() may have caused some traps to occur, + // so restore trap registers for use by kernelvec.S's sepc instruction. + w_sepc(sepc); + w_sstatus(sstatus); +} + +void +clockintr() +{ + acquire(&tickslock); + ticks++; + wakeup(&ticks); + release(&tickslock); +} + +// check if it's an external interrupt or software interrupt, +// and handle it. +// returns 2 if timer interrupt, +// 1 if other device, +// 0 if not recognized. +int +devintr() +{ + uint64 scause = r_scause(); + + if((scause & 0x8000000000000000L) && + (scause & 0xff) == 9){ + // this is a supervisor external interrupt, via PLIC. + + // irq indicates which device interrupted. + int irq = plic_claim(); + + if(irq == UART0_IRQ){ + uartintr(); + } else if(irq == VIRTIO0_IRQ){ + virtio_disk_intr(); + } + + plic_complete(irq); + return 1; + } else if(scause == 0x8000000000000001L){ + // software interrupt from a machine-mode timer interrupt, + // forwarded by timervec in kernelvec.S. + + if(cpuid() == 0){ + clockintr(); + } + + // acknowledge the software interrupt by clearing + // the SSIP bit in sip. + w_sip(r_sip() & ~2); + + return 2; + } else { + return 0; + } +} + diff --git a/kernel/types.h b/kernel/types.h new file mode 100644 index 0000000..ee73164 --- /dev/null +++ b/kernel/types.h @@ -0,0 +1,10 @@ +typedef unsigned int uint; +typedef unsigned short ushort; +typedef unsigned char uchar; + +typedef unsigned char uint8; +typedef unsigned short uint16; +typedef unsigned int uint32; +typedef unsigned long uint64; + +typedef uint64 pde_t; diff --git a/kernel/uart.c b/kernel/uart.c new file mode 100644 index 0000000..3a5cdc4 --- /dev/null +++ b/kernel/uart.c @@ -0,0 +1,92 @@ +// +// low-level driver routines for 16550a UART. +// + +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "spinlock.h" +#include "proc.h" +#include "defs.h" + +// the UART control registers are memory-mapped +// at address UART0. this macro returns the +// address of one of the registers. +#define Reg(reg) ((volatile unsigned char *)(UART0 + reg)) + +// the UART control registers. +// some have different meanings for +// read vs write. +// http://byterunner.com/16550.html +#define RHR 0 // receive holding register (for input bytes) +#define THR 0 // transmit holding register (for output bytes) +#define IER 1 // interrupt enable register +#define FCR 2 // FIFO control register +#define ISR 2 // interrupt status register +#define LCR 3 // line control register +#define LSR 5 // line status register + +#define ReadReg(reg) (*(Reg(reg))) +#define WriteReg(reg, v) (*(Reg(reg)) = (v)) + +void +uartinit(void) +{ + // disable interrupts. + WriteReg(IER, 0x00); + + // special mode to set baud rate. + WriteReg(LCR, 0x80); + + // LSB for baud rate of 38.4K. + WriteReg(0, 0x03); + + // MSB for baud rate of 38.4K. + WriteReg(1, 0x00); + + // leave set-baud mode, + // and set word length to 8 bits, no parity. + WriteReg(LCR, 0x03); + + // reset and enable FIFOs. + WriteReg(FCR, 0x07); + + // enable receive interrupts. + WriteReg(IER, 0x01); +} + +// write one output character to the UART. +void +uartputc(int c) +{ + // wait for Transmit Holding Empty to be set in LSR. + while((ReadReg(LSR) & (1 << 5)) == 0) + ; + WriteReg(THR, c); +} + +// read one input character from the UART. +// return -1 if none is waiting. +int +uartgetc(void) +{ + if(ReadReg(LSR) & 0x01){ + // input data is ready. + return ReadReg(RHR); + } else { + return -1; + } +} + +// trap.c calls here when the uart interrupts. +void +uartintr(void) +{ + while(1){ + int c = uartgetc(); + if(c == -1) + break; + consoleintr(c); + } +} diff --git a/kernel/virtio.h b/kernel/virtio.h new file mode 100644 index 0000000..03b53a9 --- /dev/null +++ b/kernel/virtio.h @@ -0,0 +1,72 @@ +// +// virtio device definitions. +// for both the mmio interface, and virtio descriptors. +// only tested with qemu. +// this is the "legacy" virtio interface. +// +// the virtio spec: +// https://docs.oasis-open.org/virtio/virtio/v1.1/virtio-v1.1.pdf +// + +// virtio mmio control registers, mapped starting at 0x10001000. +// from qemu virtio_mmio.h +#define VIRTIO_MMIO_MAGIC_VALUE 0x000 // 0x74726976 +#define VIRTIO_MMIO_VERSION 0x004 // version; 1 is legacy +#define VIRTIO_MMIO_DEVICE_ID 0x008 // device type; 1 is net, 2 is disk +#define VIRTIO_MMIO_VENDOR_ID 0x00c // 0x554d4551 +#define VIRTIO_MMIO_DEVICE_FEATURES 0x010 +#define VIRTIO_MMIO_DRIVER_FEATURES 0x020 +#define VIRTIO_MMIO_GUEST_PAGE_SIZE 0x028 // page size for PFN, write-only +#define VIRTIO_MMIO_QUEUE_SEL 0x030 // select queue, write-only +#define VIRTIO_MMIO_QUEUE_NUM_MAX 0x034 // max size of current queue, read-only +#define VIRTIO_MMIO_QUEUE_NUM 0x038 // size of current queue, write-only +#define VIRTIO_MMIO_QUEUE_ALIGN 0x03c // used ring alignment, write-only +#define VIRTIO_MMIO_QUEUE_PFN 0x040 // physical page number for queue, read/write +#define VIRTIO_MMIO_QUEUE_READY 0x044 // ready bit +#define VIRTIO_MMIO_QUEUE_NOTIFY 0x050 // write-only +#define VIRTIO_MMIO_INTERRUPT_STATUS 0x060 // read-only +#define VIRTIO_MMIO_INTERRUPT_ACK 0x064 // write-only +#define VIRTIO_MMIO_STATUS 0x070 // read/write + +// status register bits, from qemu virtio_config.h +#define VIRTIO_CONFIG_S_ACKNOWLEDGE 1 +#define VIRTIO_CONFIG_S_DRIVER 2 +#define VIRTIO_CONFIG_S_DRIVER_OK 4 +#define VIRTIO_CONFIG_S_FEATURES_OK 8 + +// device feature bits +#define VIRTIO_BLK_F_RO 5 /* Disk is read-only */ +#define VIRTIO_BLK_F_SCSI 7 /* Supports scsi command passthru */ +#define VIRTIO_BLK_F_CONFIG_WCE 11 /* Writeback mode available in config */ +#define VIRTIO_BLK_F_MQ 12 /* support more than one vq */ +#define VIRTIO_F_ANY_LAYOUT 27 +#define VIRTIO_RING_F_INDIRECT_DESC 28 +#define VIRTIO_RING_F_EVENT_IDX 29 + +// this many virtio descriptors. +// must be a power of two. +#define NUM 8 + +struct VRingDesc { + uint64 addr; + uint32 len; + uint16 flags; + uint16 next; +}; +#define VRING_DESC_F_NEXT 1 // chained with another descriptor +#define VRING_DESC_F_WRITE 2 // device writes (vs read) + +struct VRingUsedElem { + uint32 id; // index of start of completed descriptor chain + uint32 len; +}; + +// for disk ops +#define VIRTIO_BLK_T_IN 0 // read the disk +#define VIRTIO_BLK_T_OUT 1 // write the disk + +struct UsedArea { + uint16 flags; + uint16 id; + struct VRingUsedElem elems[NUM]; +}; diff --git a/kernel/virtio_disk.c b/kernel/virtio_disk.c new file mode 100644 index 0000000..3cff024 --- /dev/null +++ b/kernel/virtio_disk.c @@ -0,0 +1,269 @@ +// +// driver for qemu's virtio disk device. +// uses qemu's mmio interface to virtio. +// qemu presents a "legacy" virtio interface. +// +// qemu ... -drive file=fs.img,if=none,format=raw,id=x0 -device virtio-blk-device,drive=x0,bus=virtio-mmio-bus.0 +// + +#include "types.h" +#include "riscv.h" +#include "defs.h" +#include "param.h" +#include "memlayout.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "fs.h" +#include "buf.h" +#include "virtio.h" + +// the address of virtio mmio register r. +#define R(r) ((volatile uint32 *)(VIRTIO0 + (r))) + +static struct disk { + // memory for virtio descriptors &c for queue 0. + // this is a global instead of allocated because it must + // be multiple contiguous pages, which kalloc() + // doesn't support, and page aligned. + char pages[2*PGSIZE]; + struct VRingDesc *desc; + uint16 *avail; + struct UsedArea *used; + + // our own book-keeping. + char free[NUM]; // is a descriptor free? + uint16 used_idx; // we've looked this far in used[2..NUM]. + + // track info about in-flight operations, + // for use when completion interrupt arrives. + // indexed by first descriptor index of chain. + struct { + struct buf *b; + char status; + } info[NUM]; + + struct spinlock vdisk_lock; + +} __attribute__ ((aligned (PGSIZE))) disk; + +void +virtio_disk_init(void) +{ + uint32 status = 0; + + initlock(&disk.vdisk_lock, "virtio_disk"); + + if(*R(VIRTIO_MMIO_MAGIC_VALUE) != 0x74726976 || + *R(VIRTIO_MMIO_VERSION) != 1 || + *R(VIRTIO_MMIO_DEVICE_ID) != 2 || + *R(VIRTIO_MMIO_VENDOR_ID) != 0x554d4551){ + panic("could not find virtio disk"); + } + + status |= VIRTIO_CONFIG_S_ACKNOWLEDGE; + *R(VIRTIO_MMIO_STATUS) = status; + + status |= VIRTIO_CONFIG_S_DRIVER; + *R(VIRTIO_MMIO_STATUS) = status; + + // negotiate features + uint64 features = *R(VIRTIO_MMIO_DEVICE_FEATURES); + features &= ~(1 << VIRTIO_BLK_F_RO); + features &= ~(1 << VIRTIO_BLK_F_SCSI); + features &= ~(1 << VIRTIO_BLK_F_CONFIG_WCE); + features &= ~(1 << VIRTIO_BLK_F_MQ); + features &= ~(1 << VIRTIO_F_ANY_LAYOUT); + features &= ~(1 << VIRTIO_RING_F_EVENT_IDX); + features &= ~(1 << VIRTIO_RING_F_INDIRECT_DESC); + *R(VIRTIO_MMIO_DRIVER_FEATURES) = features; + + // tell device that feature negotiation is complete. + status |= VIRTIO_CONFIG_S_FEATURES_OK; + *R(VIRTIO_MMIO_STATUS) = status; + + // tell device we're completely ready. + status |= VIRTIO_CONFIG_S_DRIVER_OK; + *R(VIRTIO_MMIO_STATUS) = status; + + *R(VIRTIO_MMIO_GUEST_PAGE_SIZE) = PGSIZE; + + // initialize queue 0. + *R(VIRTIO_MMIO_QUEUE_SEL) = 0; + uint32 max = *R(VIRTIO_MMIO_QUEUE_NUM_MAX); + if(max == 0) + panic("virtio disk has no queue 0"); + if(max < NUM) + panic("virtio disk max queue too short"); + *R(VIRTIO_MMIO_QUEUE_NUM) = NUM; + memset(disk.pages, 0, sizeof(disk.pages)); + *R(VIRTIO_MMIO_QUEUE_PFN) = ((uint64)disk.pages) >> PGSHIFT; + + // desc = pages -- num * VRingDesc + // avail = pages + 0x40 -- 2 * uint16, then num * uint16 + // used = pages + 4096 -- 2 * uint16, then num * vRingUsedElem + + disk.desc = (struct VRingDesc *) disk.pages; + disk.avail = (uint16*)(((char*)disk.desc) + NUM*sizeof(struct VRingDesc)); + disk.used = (struct UsedArea *) (disk.pages + PGSIZE); + + for(int i = 0; i < NUM; i++) + disk.free[i] = 1; + + // plic.c and trap.c arrange for interrupts from VIRTIO0_IRQ. +} + +// find a free descriptor, mark it non-free, return its index. +static int +alloc_desc() +{ + for(int i = 0; i < NUM; i++){ + if(disk.free[i]){ + disk.free[i] = 0; + return i; + } + } + return -1; +} + +// mark a descriptor as free. +static void +free_desc(int i) +{ + if(i >= NUM) + panic("virtio_disk_intr 1"); + if(disk.free[i]) + panic("virtio_disk_intr 2"); + disk.desc[i].addr = 0; + disk.free[i] = 1; + wakeup(&disk.free[0]); +} + +// free a chain of descriptors. +static void +free_chain(int i) +{ + while(1){ + free_desc(i); + if(disk.desc[i].flags & VRING_DESC_F_NEXT) + i = disk.desc[i].next; + else + break; + } +} + +static int +alloc3_desc(int *idx) +{ + for(int i = 0; i < 3; i++){ + idx[i] = alloc_desc(); + if(idx[i] < 0){ + for(int j = 0; j < i; j++) + free_desc(idx[j]); + return -1; + } + } + return 0; +} + +void +virtio_disk_rw(struct buf *b, int write) +{ + uint64 sector = b->blockno * (BSIZE / 512); + + acquire(&disk.vdisk_lock); + + // the spec says that legacy block operations use three + // descriptors: one for type/reserved/sector, one for + // the data, one for a 1-byte status result. + + // allocate the three descriptors. + int idx[3]; + while(1){ + if(alloc3_desc(idx) == 0) { + break; + } + sleep(&disk.free[0], &disk.vdisk_lock); + } + + // format the three descriptors. + // qemu's virtio-blk.c reads them. + + struct virtio_blk_outhdr { + uint32 type; + uint32 reserved; + uint64 sector; + } buf0; + + if(write) + buf0.type = VIRTIO_BLK_T_OUT; // write the disk + else + buf0.type = VIRTIO_BLK_T_IN; // read the disk + buf0.reserved = 0; + buf0.sector = sector; + + // buf0 is on a kernel stack, which is not direct mapped, + // thus the call to kvmpa(). + disk.desc[idx[0]].addr = (uint64) kvmpa((uint64) &buf0); + disk.desc[idx[0]].len = sizeof(buf0); + disk.desc[idx[0]].flags = VRING_DESC_F_NEXT; + disk.desc[idx[0]].next = idx[1]; + + disk.desc[idx[1]].addr = (uint64) b->data; + disk.desc[idx[1]].len = BSIZE; + if(write) + disk.desc[idx[1]].flags = 0; // device reads b->data + else + disk.desc[idx[1]].flags = VRING_DESC_F_WRITE; // device writes b->data + disk.desc[idx[1]].flags |= VRING_DESC_F_NEXT; + disk.desc[idx[1]].next = idx[2]; + + disk.info[idx[0]].status = 0; + disk.desc[idx[2]].addr = (uint64) &disk.info[idx[0]].status; + disk.desc[idx[2]].len = 1; + disk.desc[idx[2]].flags = VRING_DESC_F_WRITE; // device writes the status + disk.desc[idx[2]].next = 0; + + // record struct buf for virtio_disk_intr(). + b->disk = 1; + disk.info[idx[0]].b = b; + + // avail[0] is flags + // avail[1] tells the device how far to look in avail[2...]. + // avail[2...] are desc[] indices the device should process. + // we only tell device the first index in our chain of descriptors. + disk.avail[2 + (disk.avail[1] % NUM)] = idx[0]; + __sync_synchronize(); + disk.avail[1] = disk.avail[1] + 1; + + *R(VIRTIO_MMIO_QUEUE_NOTIFY) = 0; // value is queue number + + // Wait for virtio_disk_intr() to say request has finished. + while(b->disk == 1) { + sleep(b, &disk.vdisk_lock); + } + + disk.info[idx[0]].b = 0; + free_chain(idx[0]); + + release(&disk.vdisk_lock); +} + +void +virtio_disk_intr() +{ + acquire(&disk.vdisk_lock); + + while((disk.used_idx % NUM) != (disk.used->id % NUM)){ + int id = disk.used->elems[disk.used_idx].id; + + if(disk.info[id].status != 0) + panic("virtio_disk_intr status"); + + disk.info[id].b->disk = 0; // disk is done with buf + wakeup(disk.info[id].b); + + disk.used_idx = (disk.used_idx + 1) % NUM; + } + + release(&disk.vdisk_lock); +} diff --git a/kernel/vm.c b/kernel/vm.c new file mode 100644 index 0000000..3631c9c --- /dev/null +++ b/kernel/vm.c @@ -0,0 +1,441 @@ +#include "param.h" +#include "types.h" +#include "memlayout.h" +#include "elf.h" +#include "riscv.h" +#include "defs.h" +#include "fs.h" + +/* + * the kernel's page table. + */ +pagetable_t kernel_pagetable; + +extern char etext[]; // kernel.ld sets this to end of kernel code. + +extern char trampoline[]; // trampoline.S + +/* + * create a direct-map page table for the kernel and + * turn on paging. called early, in supervisor mode. + * the page allocator is already initialized. + */ +void +kvminit() +{ + kernel_pagetable = (pagetable_t) kalloc(); + memset(kernel_pagetable, 0, PGSIZE); + + // uart registers + kvmmap(UART0, UART0, PGSIZE, PTE_R | PTE_W); + + // virtio mmio disk interface + kvmmap(VIRTIO0, VIRTIO0, PGSIZE, PTE_R | PTE_W); + + // CLINT + kvmmap(CLINT, CLINT, 0x10000, PTE_R | PTE_W); + + // PLIC + kvmmap(PLIC, PLIC, 0x400000, PTE_R | PTE_W); + + // map kernel text executable and read-only. + kvmmap(KERNBASE, KERNBASE, (uint64)etext-KERNBASE, PTE_R | PTE_X); + + // map kernel data and the physical RAM we'll make use of. + kvmmap((uint64)etext, (uint64)etext, PHYSTOP-(uint64)etext, PTE_R | PTE_W); + + // map the trampoline for trap entry/exit to + // the highest virtual address in the kernel. + kvmmap(TRAMPOLINE, (uint64)trampoline, PGSIZE, PTE_R | PTE_X); +} + +// Switch h/w page table register to the kernel's page table, +// and enable paging. +void +kvminithart() +{ + sfence_vma(); + w_satp(MAKE_SATP(kernel_pagetable)); +} + +// Return the address of the PTE in page table pagetable +// that corresponds to virtual address va. If alloc!=0, +// create any required page-table pages. +// +// The risc-v Sv39 scheme has three levels of page-table +// pages. A page-table page contains 512 64-bit PTEs. +// A 64-bit virtual address is split into five fields: +// 39..63 -- must be zero. +// 30..38 -- 9 bits of level-2 index. +// 21..39 -- 9 bits of level-1 index. +// 12..20 -- 9 bits of level-0 index. +// 0..12 -- 12 bits of byte offset within the page. +static pte_t * +walk(pagetable_t pagetable, uint64 va, int alloc) +{ + if(va >= MAXVA) + panic("walk"); + + for(int level = 2; level > 0; level--) { + pte_t *pte = &pagetable[PX(level, va)]; + if(*pte & PTE_V) { + pagetable = (pagetable_t)PTE2PA(*pte); + } else { + if(!alloc || (pagetable = (pde_t*)kalloc()) == 0) + return 0; + memset(pagetable, 0, PGSIZE); + *pte = PA2PTE(pagetable) | PTE_V; + } + } + return &pagetable[PX(0, va)]; +} + +// Look up a virtual address, return the physical address, +// or 0 if not mapped. +// Can only be used to look up user pages. +uint64 +walkaddr(pagetable_t pagetable, uint64 va) +{ + pte_t *pte; + uint64 pa; + + pte = walk(pagetable, va, 0); + if(pte == 0) + return 0; + if((*pte & PTE_V) == 0) + return 0; + if((*pte & PTE_U) == 0) + return 0; + pa = PTE2PA(*pte); + return pa; +} + +// add a mapping to the kernel page table. +// only used when booting. +// does not flush TLB or enable paging. +void +kvmmap(uint64 va, uint64 pa, uint64 sz, int perm) +{ + if(mappages(kernel_pagetable, va, sz, pa, perm) != 0) + panic("kvmmap"); +} + +// translate a kernel virtual address to +// a physical address. only needed for +// addresses on the stack. +// assumes va is page aligned. +uint64 +kvmpa(uint64 va) +{ + uint64 off = va % PGSIZE; + pte_t *pte; + uint64 pa; + + pte = walk(kernel_pagetable, va, 0); + if(pte == 0) + panic("kvmpa"); + if((*pte & PTE_V) == 0) + panic("kvmpa"); + pa = PTE2PA(*pte); + return pa+off; +} + +// Create PTEs for virtual addresses starting at va that refer to +// physical addresses starting at pa. va and size might not +// be page-aligned. Returns 0 on success, -1 if walk() couldn't +// allocate a needed page-table page. +int +mappages(pagetable_t pagetable, uint64 va, uint64 size, uint64 pa, int perm) +{ + uint64 a, last; + pte_t *pte; + + a = PGROUNDDOWN(va); + last = PGROUNDDOWN(va + size - 1); + for(;;){ + if((pte = walk(pagetable, a, 1)) == 0) + return -1; + if(*pte & PTE_V) + panic("remap"); + *pte = PA2PTE(pa) | perm | PTE_V; + if(a == last) + break; + a += PGSIZE; + pa += PGSIZE; + } + return 0; +} + +// Remove mappings from a page table. The mappings in +// the given range must exist. Optionally free the +// physical memory. +void +uvmunmap(pagetable_t pagetable, uint64 va, uint64 size, int do_free) +{ + uint64 a, last; + pte_t *pte; + uint64 pa; + + a = PGROUNDDOWN(va); + last = PGROUNDDOWN(va + size - 1); + for(;;){ + if((pte = walk(pagetable, a, 0)) == 0) + panic("uvmunmap: walk"); + if((*pte & PTE_V) == 0){ + printf("va=%p pte=%p\n", a, *pte); + panic("uvmunmap: not mapped"); + } + if(PTE_FLAGS(*pte) == PTE_V) + panic("uvmunmap: not a leaf"); + if(do_free){ + pa = PTE2PA(*pte); + kfree((void*)pa); + } + *pte = 0; + if(a == last) + break; + a += PGSIZE; + pa += PGSIZE; + } +} + +// create an empty user page table. +pagetable_t +uvmcreate() +{ + pagetable_t pagetable; + pagetable = (pagetable_t) kalloc(); + if(pagetable == 0) + panic("uvmcreate: out of memory"); + memset(pagetable, 0, PGSIZE); + return pagetable; +} + +// Load the user initcode into address 0 of pagetable, +// for the very first process. +// sz must be less than a page. +void +uvminit(pagetable_t pagetable, uchar *src, uint sz) +{ + char *mem; + + if(sz >= PGSIZE) + panic("inituvm: more than a page"); + mem = kalloc(); + memset(mem, 0, PGSIZE); + mappages(pagetable, 0, PGSIZE, (uint64)mem, PTE_W|PTE_R|PTE_X|PTE_U); + memmove(mem, src, sz); +} + +// Allocate PTEs and physical memory to grow process from oldsz to +// newsz, which need not be page aligned. Returns new size or 0 on error. +uint64 +uvmalloc(pagetable_t pagetable, uint64 oldsz, uint64 newsz) +{ + char *mem; + uint64 a; + + if(newsz < oldsz) + return oldsz; + + oldsz = PGROUNDUP(oldsz); + a = oldsz; + for(; a < newsz; a += PGSIZE){ + mem = kalloc(); + if(mem == 0){ + uvmdealloc(pagetable, a, oldsz); + return 0; + } + memset(mem, 0, PGSIZE); + if(mappages(pagetable, a, PGSIZE, (uint64)mem, PTE_W|PTE_X|PTE_R|PTE_U) != 0){ + kfree(mem); + uvmdealloc(pagetable, a, oldsz); + return 0; + } + } + return newsz; +} + +// Deallocate user pages to bring the process size from oldsz to +// newsz. oldsz and newsz need not be page-aligned, nor does newsz +// need to be less than oldsz. oldsz can be larger than the actual +// process size. Returns the new process size. +uint64 +uvmdealloc(pagetable_t pagetable, uint64 oldsz, uint64 newsz) +{ + if(newsz >= oldsz) + return oldsz; + uvmunmap(pagetable, newsz, oldsz - newsz, 1); + return newsz; +} + +// Recursively free page-table pages. +// All leaf mappings must already have been removed. +static void +freewalk(pagetable_t pagetable) +{ + // there are 2^9 = 512 PTEs in a page table. + for(int i = 0; i < 512; i++){ + pte_t pte = pagetable[i]; + if((pte & PTE_V) && (pte & (PTE_R|PTE_W|PTE_X)) == 0){ + // this PTE points to a lower-level page table. + uint64 child = PTE2PA(pte); + freewalk((pagetable_t)child); + pagetable[i] = 0; + } else if(pte & PTE_V){ + panic("freewalk: leaf"); + } + } + kfree((void*)pagetable); +} + +// Free user memory pages, +// then free page-table pages. +void +uvmfree(pagetable_t pagetable, uint64 sz) +{ + uvmunmap(pagetable, 0, sz, 1); + freewalk(pagetable); +} + +// Given a parent process's page table, copy +// its memory into a child's page table. +// Copies both the page table and the +// physical memory. +// returns 0 on success, -1 on failure. +// frees any allocated pages on failure. +int +uvmcopy(pagetable_t old, pagetable_t new, uint64 sz) +{ + pte_t *pte; + uint64 pa, i; + uint flags; + char *mem; + + for(i = 0; i < sz; i += PGSIZE){ + if((pte = walk(old, i, 0)) == 0) + panic("copyuvm: pte should exist"); + if((*pte & PTE_V) == 0) + panic("copyuvm: page not present"); + pa = PTE2PA(*pte); + flags = PTE_FLAGS(*pte); + if((mem = kalloc()) == 0) + goto err; + memmove(mem, (char*)pa, PGSIZE); + if(mappages(new, i, PGSIZE, (uint64)mem, flags) != 0){ + kfree(mem); + goto err; + } + } + return 0; + + err: + uvmunmap(new, 0, i, 1); + return -1; +} + +// mark a PTE invalid for user access. +// used by exec for the user stack guard page. +void +uvmclear(pagetable_t pagetable, uint64 va) +{ + pte_t *pte; + + pte = walk(pagetable, va, 0); + if(pte == 0) + panic("uvmclear"); + *pte &= ~PTE_U; +} + +// Copy from kernel to user. +// Copy len bytes from src to virtual address dstva in a given page table. +// Return 0 on success, -1 on error. +int +copyout(pagetable_t pagetable, uint64 dstva, char *src, uint64 len) +{ + uint64 n, va0, pa0; + + while(len > 0){ + va0 = (uint)PGROUNDDOWN(dstva); + pa0 = walkaddr(pagetable, va0); + if(pa0 == 0) + return -1; + n = PGSIZE - (dstva - va0); + if(n > len) + n = len; + memmove((void *)(pa0 + (dstva - va0)), src, n); + + len -= n; + src += n; + dstva = va0 + PGSIZE; + } + return 0; +} + +// Copy from user to kernel. +// Copy len bytes to dst from virtual address srcva in a given page table. +// Return 0 on success, -1 on error. +int +copyin(pagetable_t pagetable, char *dst, uint64 srcva, uint64 len) +{ + uint64 n, va0, pa0; + + while(len > 0){ + va0 = (uint)PGROUNDDOWN(srcva); + pa0 = walkaddr(pagetable, va0); + if(pa0 == 0) + return -1; + n = PGSIZE - (srcva - va0); + if(n > len) + n = len; + memmove(dst, (void *)(pa0 + (srcva - va0)), n); + + len -= n; + dst += n; + srcva = va0 + PGSIZE; + } + return 0; +} + +// Copy a null-terminated string from user to kernel. +// Copy bytes to dst from virtual address srcva in a given page table, +// until a '\0', or max. +// Return 0 on success, -1 on error. +int +copyinstr(pagetable_t pagetable, char *dst, uint64 srcva, uint64 max) +{ + uint64 n, va0, pa0; + int got_null = 0; + + while(got_null == 0 && max > 0){ + va0 = (uint)PGROUNDDOWN(srcva); + pa0 = walkaddr(pagetable, va0); + if(pa0 == 0) + return -1; + n = PGSIZE - (srcva - va0); + if(n > max) + n = max; + + char *p = (char *) (pa0 + (srcva - va0)); + while(n > 0){ + if(*p == '\0'){ + *dst = '\0'; + got_null = 1; + break; + } else { + *dst = *p; + } + --n; + --max; + p++; + dst++; + } + + srcva = va0 + PGSIZE; + } + if(got_null){ + return 0; + } else { + return -1; + } +} diff --git a/labs/cow.html b/labs/cow.html new file mode 100644 index 0000000..2cc18fa --- /dev/null +++ b/labs/cow.html @@ -0,0 +1,109 @@ +<html> +<head> +<title>Lab: Copy-on-Write Fork for xv6</title> +<link rel="stylesheet" href="homework.css" type="text/css" /> +</head> +<body> + +<h1>Lab: Copy-on-Write Fork for xv6</h2> + +<p> +Your task is implement copy-on-write fork in the xv6 kernel. You are +done if your modified kernel executes both the cow and usertests +programs successfully. + +<h2>The problem</h2> + +The fork() system call in xv6 copies all of the parent process's +user-space memory into the child. If the parent is large, copying can +take a long time. In addition, the copies often waste memory; in many +cases neither the parent nor the child modifies a page, so that in +principle they could share the same physical memory. The inefficiency +is particularly clear if the child calls exec(), since then most of +the copied pages are thrown away without ever being used. Of course, +sometimes both child and parent modify memory at the same virtual +address after a fork(), so for some pages the copying is truly needed. + +<h2>The solution</h2> + +The goal of copy-on-write (COW) fork() is to defer allocating and +copying physical memory pages for the child until they are actually +needed, in the hope that they may never be needed. + +<p> +COW fork() creates just a pagetable for the child, with PTEs for user +memory pointing to the parent's physical pages. COW fork() marks all +the user PTEs in both parent and child as read-only. When either +process tries to write one of these COW pages, the CPU will force a +page fault. The kernel page-fault handler detects this case, allocates +a page of physical memory for the faulting process, copies the +original page into the new page, and modifies the relevant PTE in the +faulting process to refer to the new page, this time with the PTE +marked writeable. When the page fault handler returns, the user +process will be able to write its copy of the page. + +<p> +COW fork() makes freeing of the physical pages that implement user +memory a little trickier. A given physical page may be referred to by +multiple processes' page tables, and should be freed when the last +reference disappears. + +<h2>The cow test program</h2> + +To help you test your implementation, we've provided an xv6 program +called cow (source in user/cow.c). cow runs various tests, but +even the first will fail on unmodified xv6. Thus, initially, you +will see: + +<pre> +$ cow +simple: fork() failed +$ +</pre> + +The "simple" test allocates more than half of available physical +memory, and then fork()s. The fork fails because there is not enough +free physical memory to give the child a complete copy of the parent. + +<p> +When you are done, your kernel should be able to run both cow and +usertests. That is: + +<pre> +$ cow +simple: ok +simple: ok +three: zombie! +ok +three: zombie! +ok +three: zombie! +ok +file: ok +ALL COW TESTS PASSED +$ usertests +... +ALL TESTS PASSED +$ +</pre> + +<h2>Hints</h2> + +Here's one reasonable plan of attack. Modify uvmcopy() to map the +parent's physical pages into the child, instead of allocating new +pages, and clear PTE_W in the PTEs of both child and parent. +Modify usertrap() to recognize a page fault. When a page fault occurs +on a COW page, allocate a new page with kalloc(), copy the old page to +the new page, and install the new page in the PTE with PTE_W set. +Next, ensure that each physical page is freed when the last PTE +reference to it goes away (but not before!), perhaps by implementing +reference counts in kalloc.c. Finally, modify copyout() to use the +same scheme as page faults when it encounters a COW page. + +<p> +It may be useful to have a way to record, for each PTE, whether it is +a COW mapping. You can use the RSW (reserved for software) bits in +the RISC-V PTE for this. + +</body> +</html> diff --git a/labs/fs.html b/labs/fs.html new file mode 100644 index 0000000..a21e61f --- /dev/null +++ b/labs/fs.html @@ -0,0 +1,360 @@ +<html> +<head> +<title>Lab: file system</title> +<link rel="stylesheet" href="homework.css" type="text/css" /> +</head> +<body> + +<h1>Lab: file system</h1> + +<p>In this lab you will add large files and <tt>mmap</tt> to the xv6 file system. + +<h2>Large files</h2> + +<p>In this assignment you'll increase the maximum size of an xv6 +file. Currently xv6 files are limited to 268 blocks, or 268*BSIZE +bytes (BSIZE is 1024 in xv6). This limit comes from the fact that an +xv6 inode contains 12 "direct" block numbers and one "singly-indirect" +block number, which refers to a block that holds up to 256 more block +numbers, for a total of 12+256=268. You'll change the xv6 file system +code to support a "doubly-indirect" block in each inode, containing +256 addresses of singly-indirect blocks, each of which can contain up +to 256 addresses of data blocks. The result will be that a file will +be able to consist of up to 256*256+256+11 blocks (11 instead of 12, +because we will sacrifice one of the direct block numbers for the +double-indirect block). + +<h3>Preliminaries</h3> + +<p>Modify your Makefile's <tt>CPUS</tt> definition so that it reads: +<pre> +CPUS := 1 +</pre> + +<b>XXX doesn't seem to speedup things</b> +<p>Add +<pre> +QEMUEXTRA = -snapshot +</pre> +right before +<tt>QEMUOPTS</tt> +<p> +The above two steps speed up qemu tremendously when xv6 +creates large files. + +<p><tt>mkfs</tt> initializes the file system to have fewer +than 1000 free data blocks, too few to show off the changes +you'll make. Modify <tt>param.h</tt> to +set <tt>FSSIZE</tt> to: +<pre> + #define FSSIZE 20000 // size of file system in blocks +</pre> + +<p>Download <a href="big.c">big.c</a> into your xv6 directory, +add it to the UPROGS list, start up xv6, and run <tt>big</tt>. +It creates as big a file as xv6 will let +it, and reports the resulting size. It should say 140 sectors. + +<h3>What to Look At</h3> + +The format of an on-disk inode is defined by <tt>struct dinode</tt> +in <tt>fs.h</tt>. You're particularly interested in <tt>NDIRECT</tt>, +<tt>NINDIRECT</tt>, <tt>MAXFILE</tt>, and the <tt>addrs[]</tt> element +of <tt>struct dinode</tt>. Look Figure 7.3 in the xv6 text for a +diagram of the standard xv6 inode. + +<p> +The code that finds a file's data on disk is in <tt>bmap()</tt> +in <tt>fs.c</tt>. Have a look at it and make sure you understand +what it's doing. <tt>bmap()</tt> is called both when reading and +writing a file. When writing, <tt>bmap()</tt> allocates new +blocks as needed to hold file content, as well as allocating +an indirect block if needed to hold block addresses. + +<p> +<tt>bmap()</tt> deals with two kinds of block numbers. The <tt>bn</tt> +argument is a "logical block" -- a block number relative to the start +of the file. The block numbers in <tt>ip->addrs[]</tt>, and the +argument to <tt>bread()</tt>, are disk block numbers. +You can view <tt>bmap()</tt> as mapping a file's logical +block numbers into disk block numbers. + +<h3>Your Job</h3> + +Modify <tt>bmap()</tt> so that it implements a doubly-indirect +block, in addition to direct blocks and a singly-indirect block. +You'll have to have only 11 direct blocks, rather than 12, +to make room for your new doubly-indirect block; you're +not allowed to change the size of an on-disk inode. +The first 11 elements of <tt>ip->addrs[]</tt> should be +direct blocks; the 12th should be a singly-indirect block +(just like the current one); the 13th should be your new +doubly-indirect block. + +<p> +You don't have to modify xv6 to handle deletion of files with +doubly-indirect blocks. + +<p> +If all goes well, <tt>big</tt> will now report that it +can write sectors. It will take <tt>big</tt> minutes +to finish. + +<b>XXX this runs for a while!</b> + +<h3>Hints</h3> + +<p> +Make sure you understand <tt>bmap()</tt>. Write out a diagram of the +relationships between <tt>ip->addrs[]</tt>, the indirect block, the +doubly-indirect block and the singly-indirect blocks it points to, and +data blocks. Make sure you understand why adding a doubly-indirect +block increases the maximum file size by 256*256 blocks (really -1), +since you have to decrease the number of direct blocks by one). + +<p> +Think about how you'll index the doubly-indirect block, and +the indirect blocks it points to, with the logical block +number. + +<p>If you change the definition of <tt>NDIRECT</tt>, you'll +probably have to change the size of <tt>addrs[]</tt> +in <tt>struct inode</tt> in <tt>file.h</tt>. Make sure that +<tt>struct inode</tt> and <tt>struct dinode</tt> have the +same number of elements in their <tt>addrs[]</tt> arrays. + +<p>If you change the definition of <tt>NDIRECT</tt>, make sure to create a +new <tt>fs.img</tt>, since <tt>mkfs</tt> uses <tt>NDIRECT</tt> too to build the +initial file systems. If you delete <tt>fs.img</tt>, <tt>make</tt> on Unix (not +xv6) will build a new one for you. + +<p>If your file system gets into a bad state, perhaps by crashing, +delete <tt>fs.img</tt> (do this from Unix, not xv6). <tt>make</tt> will build a +new clean file system image for you. + +<p>Don't forget to <tt>brelse()</tt> each block that you +<tt>bread()</tt>. + +<p>You should allocate indirect blocks and doubly-indirect + blocks only as needed, like the original <tt>bmap()</tt>. + +<p>Optional challenge: support triple-indirect blocks. + +<h2>Writing with a Log</h2> + +Insert a print statement in bwrite (in bio.c) so that you get a +print every time a block is written to disk: + +<pre> + printf("bwrite block %d\n", b->blockno); +</pre> + +Build and boot a new kernel and run this: +<pre> + $ rm README +</pre> + +<p>You should see a sequence of bwrite prints after the <tt>rm</tt>.</p> + +<div class="question"> +<ol> +<li>Annotate the bwrite lines with the kind of information that is +being written to the disk (e.g., "README's inode", "allocation +bitmap"). If the log is being written, note both that the log is being +written and also what kind of information is being written to the log. +<li>Mark with an arrow the first point at which, if a +crash occured, README would be missing after a reboot +(after the call to <tt>recover_from_log()</tt>). +</ol> +</p> +</div> + + +<h2>Crash safety</h2> + +<p>This assignment explores the xv6 log in two parts. +First, you'll artificially create a crash which illustrates +why logging is needed. Second, you'll remove one +inefficiency in the xv6 logging system. + +<p> +Submit your solution before the beginning of the next lecture +to <a href="https://6828.scripts.mit.edu/2018/handin.py/">the submission +web site</a>. + +<h3>Creating a Problem</h3> + +<p> +The point of the xv6 log is to cause all the disk updates of a +filesystem operation to be atomic with respect to crashes. +For example, file creation involves both adding a new entry +to a directory and marking the new file's inode as in-use. +A crash that happened after one but before the other would +leave the file system in an incorrect state after a reboot, +if there were no log. + +<p> +The following steps will break the logging code in a way that +leaves a file partially created. + +<p> +First, replace <tt>commit()</tt> in <tt>log.c</tt> with +this code: +<pre> +#include "kernel/proc.h" +void +commit(void) +{ + int pid = myproc()->pid; + if (log.lh.n > 0) { + write_log(); + write_head(); + if(pid > 1) // AAA + log.lh.block[0] = 0; // BBB + install_trans(); + if(pid > 1) // AAA + panic("commit mimicking crash"); // CCC + log.lh.n = 0; + write_head(); + } +} +</pre> + +<p> +The BBB line causes the first block in the log to be written to +block zero, rather than wherever it should be written. During file +creation, the first block in the log is the new file's inode updated +to have non-zero <tt>type</tt>. +Line BBB causes the block +with the updated inode to be written to block 0 (whence +it will never be read), leaving the on-disk inode still marked +unallocated. The CCC line forces a crash. +The AAA lines suppress this buggy behavior for <tt>init</tt>, +which creates files before the shell starts. + +<p> +Second, replace <tt>recover_from_log()</tt> in <tt>log.c</tt> +with this code: +<pre> +static void +recover_from_log(void) +{ + read_head(); + printf("recovery: n=%d but ignoring\n", log.lh.n); + // install_trans(); + log.lh.n = 0; + // write_head(); +} +</pre> + +<p> +This modification suppresses log recovery (which would repair +the damage caused by your change to <tt>commit()</tt>). + +<p> +Finally, remove the <tt>-snapshot</tt> option from the definition +of <tt>QEMUEXTRA</tt> in your Makefile so that the disk image will see the +changes. + +<p> +Now remove <tt>fs.img</tt> and run xv6: +<pre> + % rm fs.img ; make qemu +</pre> +<p> +Tell the xv6 shell to create a file: +<pre> + $ echo hi > a +</pre> + +<p> +You should see the panic from <tt>commit()</tt>. So far +it is as if a crash occurred in a non-logging system in the middle +of creating a file. + +<p> +Now re-start xv6, keeping the same <tt>fs.img</tt>: +<pre> + % make qemu +</pre> + +<p> +And look at file <tt>a</tt>: +<pre> + $ cat a +</pre> + +<p> + You should see <tt>panic: ilock: no type</tt>. Make sure you understand what happened. +Which of the file creation's modifications were written to the disk +before the crash, and which were not? + +<h3>Solving the Problem</h3> + +Now fix <tt>recover_from_log()</tt>: +<pre> +static void +recover_from_log(void) +{ + read_head(); + cprintf("recovery: n=%d\n", log.lh.n); + install_trans(); + log.lh.n = 0; + write_head(); +} +</pre> + +<p> +Run xv6 (keeping the same <tt>fs.img</tt>) and read <tt>a</tt> again: +<pre> + $ cat a +</pre> + +<p> +This time there should be no crash. Make sure you understand why +the file system now works. + +<p> +Why was the file empty, even though you created +it with <tt>echo hi > a</tt>? + +<p> +Now remove your modifications to <tt>commit()</tt> +(the if's and the AAA and BBB lines), so that logging works again, +and remove <tt>fs.img</tt>. + +<h3>Streamlining Commit</h3> + +<p> +Suppose the file system code wants to update an inode in block 33. +The file system code will call <tt>bp=bread(block 33)</tt> and update the +buffer data. <tt>write_log()</tt> in <tt>commit()</tt> +will copy the data to a block in the log on disk, for example block 3. +A bit later in <tt>commit</tt>, <tt>install_trans()</tt> reads +block 3 from the log (containing block 33), copies its contents into the in-memory +buffer for block 33, and then writes that buffer to block 33 on the disk. + +<p> +However, in <tt>install_trans()</tt>, it turns out that the modified +block 33 is guaranteed to be still in the buffer cache, where the +file system code left it. Make sure you understand why it would be a +mistake for the buffer cache to evict block 33 from the buffer cache +before the commit. + +<p> +Since the modified block 33 is guaranteed to already be in the buffer +cache, there's no need for <tt>install_trans()</tt> to read block +33 from the log. Your job: modify <tt>log.c</tt> so that, when +<tt>install_trans()</tt> is called from <tt>commit()</tt>, +<tt>install_trans()</tt> does not perform the needless read from the log. + +<p>To test your changes, create a file in xv6, restart, and make sure +the file is still there. + +<b>XXX Does this speedup bigfile?</b> + +<b>XXX Maybe support lseek and modify shell to append to a file?</b> + + +</body> +</html> diff --git a/labs/fs1.html b/labs/fs1.html new file mode 100644 index 0000000..45d3e0c --- /dev/null +++ b/labs/fs1.html @@ -0,0 +1,215 @@ +<html> +<head> +<title>Lab: mount/umount</title> +<link rel="stylesheet" href="homework.css" type="text/css" /> +</head> +<body> + +<h1>Lab: mount/umount</h1> + +<p>In this lab you will add support for mounting/unmounting of file +systems to xv6. This lab will expose you to many parts of the xv6 +file system, including pathname lookup, inodes, logging/recovery, disk +driver, concurrency, etc. + +<p>Your job is modify xv6 so that your modified kernel passes the + tests in mounttest. You will have to implement two system + calls: <tt>mount(char *source, char *target)</tt> + and <tt>umount(char *target)</tt>. Mount attaches the device + referenced by <tt>source</tt> (e.g., <tt>/disk1</tt>) at the + location specified by <tt>target</tt>. For + example, <tt>mount("/disk1", "/m")</tt> will attach <tt>disk1</tt> + at the directory <tt>/m</tt>. After this mount call, users can use + pathnames such as <tt>/m/README</tt> to read the + file <tt>README</tt> stored in the root directory + on <tt>disk1</tt>. <tt>Umount</tt> removes the attachment. For + example, <tt>umount("/m")</tt> unmounts disk1 from <tt>/m</tt>. + +<p>There are several major challenges in implementing the mount system +calls: + + <ul> + + <li>Adding the actual system calls so that user programs can call + them. This is similar to previous labs in which you added + systems calls xv6. + + <li>Supporting several disks. You will have generalize to + virtio_disk.c to support at least two disks. + + <li>Logging file system modifications to the right disk. xv6 + assumes there is only disk and file system calls typically start + with <tt>begin_op</tt> and end with <tt>end_op</tt>, logging all + modifications between these two calls to the log on the one + disk. With mount, modifications to the file system on the + second disk must be logged to the second disk. + + <li>Modifying pathname lookup (<tt>namex</tt>) so that when a + lookup cross a mount point, it continues at the root inode of + the attached disk. + + </ul> + +<p>The rest of this assignment provides some hints how you might go +about the above challenges. + +<h2>Adding system calls</h2> + +<p>Add the stubs for the two systems calls to xv6 so that you can +compile mounttest and add two empty functions for the two system calls +to sysfile.c. Run mounttest and it will fail on the first call +to <tt>mount</tt>. + + +<h2>Adding a second disk</h2> + +<p>To be able to mount another disk, you need to extend xv6 to support +at least two disks. Modify virtio_disk.c to support an array of two +disks instead of a single disk. The address of the second disk +is <tt>0x10002000</tt>; modify the macro <tt>R</tt> to take a disk +number (0, 1,..) and read/write to the memory address for that disk. + +<p>All functions in <tt>virtio_disk.c</tt> need to take the disk +number as an argument to update the state of the disk that is +read/written to or to receive an interrupt from the disk. +Modify <tt>virtio_disk_init</tt> to take a disk number as an argument +and update is to that it initializes that disk. Similar, go through +the other functions; make these changes should be most mechanical +(i.e., text substitutions). + +<p>The second disk interrupts at IRQ 2; modify trap.c to receive that +interrupt and <tt>virtio_disk_intr</tt> with the number of the disk +that generated the interrupt. + +<p>Modify the file Makefile to tell qemu to provide a second +disk. Define the variable <tt>QEMUEXTRA = -drive +file=fs1.img,if=none,format=raw,id=x1 -device +virtio-blk-device,drive=x1,bus=virtio-mmio-bus.1</tt> and +add <tt>$(QEMUEXTRA)</tt> to the end of <tt>QEMUOPTS</tt>. + +<p>Create a second disk image <tt>fs1.img</tt>. Easiest thing to do + is just copy the file <tt>fs.img</tt>. You might want to add rules + to the Makefile to make this image and remove it on <tt>make + clean</tt>. + +<p>Add to the user program init a call to create a device for the new + disk. For example, add the line <tt>mknod("disk1", DISK, 1);</tt> to + init.c. This will create an inode of type device in the root + directory with major number <tt>DISK</tt> and minor number 1. + +<p>The first argument of the <tt>mount</tt> system call ("disk1") will + refer to the device you created using <tt>mknod</tt> above. In your + implementation of the mount system call, + call <tt>virtio_disk_init</tt> with the minor number as the argument + to initialize the second disk. (We reserve minor number 0 for the + first disk.) + +<p>Boot xv6, run mounttest, and make sure <tt>virtio_disk_init</tt> + gets called (e.g., add print statement). You won't know if your + changes are correct, but your code should compile and invoke the + driver for the second disk. + +<h2>Modify the logging system</h2> + +<p>After calling <tt>virtio_disk_init</tt>, you need to also + call <tt>loginit</tt> to initialize the logging system for the + second disk (and restore the second disk if a power failure happened + while modifying the second disk). Generalize the logging system to + support to two logs, one on disk 0 and one disk 1. These changes + are mostly mechanical (e.g., <tt>log.</tt> changes + to <tt>log[n].</tt>), similar to generalizing the disk driver to + support two disks. + +<p>To make xv6 compile, you need to provide a disk number + to <tt>begin_op</tt> and <tt>end_op</tt>. It will be a challenge to + figure out what the right value is; for now just specify the first + disk (i.e., 0). This isn't correct, since modifications to the + second disk should be logged on the second disk, but we have no way + yet to read/write the second disk. Come back to this later when you + have a better idea how things will fit together, but make sure that + xv6 compiles and still runs. + +<h2>Pathname lookup</h2> + +<p>Modify <tt>namex</tt> to traverse mount points: when <tt>namex</tt> + sees an inode to which a file system is attached, it should traverse + to the root inode of that file system. Hint: modify the in-memory + inode in file.h to keep some additional state, and initialize that + state in the mount system call. Note that the inode already has a + field for disk number (i.e., <tt>dev</tt>), which is initialized and + passed to reads and writes to the driver. <tt>dev</tt> corresponds + to the minor number for disk devices. + +<p>Your modified xv6 should be able to pass the first tests in + mounttest (i.e., <tt>stat</tt>). This is likely to be challenging, + however, because now your kernel will be reading from the second + disk for the first time, and you may run into many issues. + +<p>Even though <tt>stat</tt> may return correctly, your code is likely + to be incorrect, because in <tt>namex</tt> + because <tt>iunlockput</tt> may modify the second disk (e.g., if + another process removes the file or directory) and those + modifications must be written to the second disk. Your job is to + fix the calls to <tt>begin_op</tt> and <tt>end_op</tt> to take the + right device. One challenge is that <tt>begin_op</tt> is called at + the beginning of a system call but then you don't know the device + that will be involved; you will have to postpone this call until you + know which inode is involved (which tells you will which device is + involved). Another challenge is that you cannot postpone + calling <tt>begin_op</tt> passed <tt>ilock</tt> because that + violates lock ordering in xv6; you should not be + calling <tt>begin_op</tt> while holding locks on inodes. (The log + system allows a few systems calls to run; if a system call that + holds an inode lock isn't admitted and one of the admitted system + calls needs that inode to complete, then xv6 will deadlock.) + +<p>Once you have implemented a plan for <tt>begin_op</tt> + and <tt>end_op</tt>, see if your kernel can pass <tt>test0</tt>. It + is likely that you will have to modify your implementation of the + mount system call to handle several corner cases. See the tests + in <tt>test0</tt>. + +<p>Run usertests to see if you didn't break anything else. Since you + modified <tt>namex</tt> and <tt>begin/end_op</tt>, which are at the + core of the xv6 file system, you might have introduced bugs, perhaps + including deadlocks. Deadlocks manifest themselves as no output + being produced because all processes are sleeping (hit ctrl-p a few + times). Your kernel might also suffer kernel panics, because your + changes violate invariants. You may have to iterate a few times to + get a good design and implementation. + +<h2>umount</h2> + +<p>Once your kernel passes usertests and test0 of mounttest, implement + umount. The main challenge is that umount of a file system should + fail if the file system is still in use; that is, if there is an + inode on the mounted device that has a <tt>ref > 0</tt>. + Furthermore, this test and unmounting should be an atomic + operation. (Hint: lock the inode cache.) Make sure your kernel + passes test1 of mounttest. + +<p>Test2 of mounttest stresses <tt>namex</tt> more; if you have done + everything right above, your kernel should pass it. Test3 tests + concurrent mount/unmounts with file creation. + +<h2>crash safety</h2> + +<p>One of the main goals of the file system is to provide crash + safety: if there is a power failure during a file system operation, + xv6 should recover correctly. It is difficult to introduce power + failure at the critical steps of logging; instead, we added a system + call that causes a kernel panic after committing an operation but + before installing the operation. Test4 with crashtest tests if your + xv6 recovers the mounted disk correctly. + + +</body> +</html> + +<h2>Optional challenges</h2> + +<p>Modify xv6 so that init mounts the first disk on the root inode. + This will allow you to remove some code specific for the first disk + from the kernel. + +<p>Support mounts on top of mounts. diff --git a/labs/lazy.html b/labs/lazy.html new file mode 100644 index 0000000..9d97cab --- /dev/null +++ b/labs/lazy.html @@ -0,0 +1,132 @@ +<html> +<head> +<title>Lab: xv6 lazy page allocation</title> +<link rel="stylesheet" href="homework.css" type="text/css" /> +</head> +<body> + +<h1>Lab: xv6 lazy page allocation</h1> + +<p> +One of the many neat tricks an O/S can play with page table hardware +is lazy allocation of heap memory. Xv6 applications ask the kernel for +heap memory using the sbrk() system call. In the kernel we've given +you, sbrk() allocates physical memory and maps it into the process's +virtual address space. There are programs that allocate memory but +never use it, for example to implement large sparse arrays. +Sophisticated kernels delay allocation of each page of memory until +the application tries to use that page -- as signaled by a page fault. +You'll add this lazy allocation feature to xv6 in this lab. + +<h2>Part One: Eliminate allocation from sbrk()</h2> + +Your first task is to delete page allocation from the sbrk(n) system +call implementation, which is the function sys_sbrk() in sysproc.c. The +sbrk(n) system call grows the process's memory size by n bytes, and +then returns the start of the newly allocated region (i.e., the old +size). Your new sbrk(n) should just increment the process's size +(myproc()->sz) by n and return the old size. It should not allocate memory +-- so you should delete the call to growproc() (but you still need to +increase the process's size!). + +<p> +Try to guess what the result of this modification will be: what will +break? + +<p> +Make this modification, boot xv6, and type <tt>echo hi</tt> to the shell. +You should see something like this: + +<pre> +init: starting sh +$ echo hi +usertrap(): unexpected scause 0x000000000000000f pid=3 + sepc=0x00000000000011dc stval=0x0000000000004008 +va=0x0000000000004000 pte=0x0000000000000000 +panic: unmappages: not mapped +</pre> + +The "usertrap(): ..." message is from the user trap handler in trap.c; +it has caught an exception that it does not know how to handle. Make +sure you understand why this page fault occurs. The "stval=0x0..04008" +indicates that the virtual address that caused the page fault is +0x4008. + +<h2>Part Two: Lazy allocation</h2> + +Modify the code in trap.c to respond to a page fault from user space +by mapping a newly-allocated page of physical memory at the faulting +address, and then returning back to user space to let the process +continue executing. You should add your code just before +the <tt>printf</tt> call that produced the "usertrap(): ..." +message. + +<p> +Hint: look at the printf arguments to see how to find the virtual +address that caused the page fault. + +<p> +Hint: steal code from allocuvm() in vm.c, which is what sbrk() +calls (via growproc()). + +<p> +Hint: use PGROUNDDOWN(va) to round the faulting virtual address +down to a page boundary. + +<p> +Hint: <tt>usertrapret()</tt> in order to avoid +the <tt>printf</tt> and the <tt>myproc()->killed = 1</tt>. + +<p> +Hint: you'll need to call mappages(). + +<p>Hint: you can check whether a fault is a page fault by r_scause() + is 13 or 15 in trap(). + +<p>Hint: modify unmappages() to not free pages that aren't mapped. + +<p>Hint: if the kernel crashes, look up sepc in kernel/kernel.asm + +<p>Hint: if you see the error "imcomplete type proc", include "proc.h" + (and "spinlock.h"). + +<p>Hint: the first test in sbrk() allocates something large, this + should succeed now. + +<p> +If all goes well, your lazy allocation code should result in <tt>echo +hi</tt> working. You should get at least one page fault (and thus lazy +allocation) in the shell, and perhaps two. + +<p>If you have the basics working, now turn your implementation into + one that handles the corner cases too: + +<ul> + + <li> Handle negative sbrk() arguments. sbrktest() in usertests will + tests this. + + <li> Handle fork correctly. sbrktst() will test this. + + <li> Make sure that kernel use of not-yet-allocated user addresses + works; for example, if a program passes an sbrk()-allocated + address to write(). sbrktest() will test this. + + <li> Handle out of memory correctly. sbrktst() will test this. + + <li> Handle faults on the invalid page below the stack. stacktest() + in usertests will tests this. + +</ul> + +<p>Run all tests in usertests() to make sure your solution doesn't +break other tests. + +<p> +<div class="question"> +<p><b>Submit</b>: The code that you added to trap.c in a file named <em>hwN.c</em> where <em>N</em> is the homework number as listed on the schedule. +</div> + + +</body> +</html> diff --git a/labs/lock.html b/labs/lock.html new file mode 100644 index 0000000..707d6c4 --- /dev/null +++ b/labs/lock.html @@ -0,0 +1,148 @@ +<html> +<head> +<title>Lab: locks</title> +<link rel="stylesheet" href="homework.css" type="text/css" /> +</head> +<body> + +<h1>Lab: locks</h1> + +<p>In this lab you will try to avoid lock contention for certain +workloads. + +<h2>lock contention</h2> + +<p>The program user/kalloctest stresses xv6's memory allocator: three + processes grow and shrink there address space, which will results in + many calls to <tt>kalloc</tt> and <tt>kfree</tt>, + respectively. <tt>kalloc</tt> and <tt>kfree</tt> + obtain <tt>kmem.lock</tt>. To see if there is lock contention for + <tt>kmem.lock</tt> replace the call to <tt>acquire</tt> + in <tt>kalloc</tt> with the following code: + + <pre> + while(!tryacquire(&kmem.lock)) { + printf("!"); + } + </pre> + +<p><tt>tryacquire</tt> tries to acquire <tt>kmem.lock</tt>: if the + lock is taking it returns false (0); otherwise, it returns true (1) + and with the lock acquired. Your first job is to + implement <tt>tryacquire</tt> in kernel/spinlock.c. + +<p>A few hints: + <ul> + <li>look at <tt>acquire</tt>. + <li>don't forget to restore interrupts when acquision fails + <li>Add tryacquire's signature to defs.h. + </ul> + +<p>Run usertests to see if you didn't break anything. Note that + usertests never prints "!"; there is never contention + for <tt>kmem.lock</tt>. The caller is always able to immediately + acquire the lock and never has to wait because some other process + has the lock. + +<p>Now run kalloctest. You should see quite a number of "!" on the + console. kalloctest causes many processes to contend on + the <tt>kmem.lock</tt>. This lock contention is a bit artificial, + because qemu is simulating 3 processors, but it is likely on real + hardware, there would be contention too. + +<h2>Removing lock contention</h2> + +<p>The root cause of lock contention in kalloctest is that there is a + single free list, protected by a single lock. To remove lock + contention, you will have to redesign the memory allocator to avoid + a single lock and list. The basic idea is to maintain a free list + per CPU, each list with its own lock. Allocations and frees on each + CPU can run in parallel, because each CPU will operate on a + different list. + +<p> The main challenge will be to deal with the case that one CPU runs + out of memory, but another CPU has still free memory; in that case, + the one CPU must "steal" part of the other CPU's free list. + Stealing may introduce lock contention, but that may be acceptable + because it may happen infrequently. + +<p>Your job is to implement per-CPU freelists and stealing when one + CPU is out of memory. Run kalloctest() to see if your + implementation has removed lock contention. + +<p>Some hints: + <ul> + <li>You can use the constant <tt>NCPU</tt> in kernel/param.h + <li>Let <tt>freerange</tt> give all free memory to the CPU + running <tt>freerange</tt>. + <li>The function <tt>cpuid</tt> returns the current core, but note + that you can use it when interrupts are turned off and so you will + need to turn on/off interrupts in your solution. + </ul> + +<p>Run usertests to see if you don't break anything. + +<h2>More scalabale bcache lookup</h2> + + +<p>Several processes reading different files repeatedly will + bottleneck in the buffer cache, bcache, in bio.c. Replace the + acquire in <tt>bget</tt> with + + <pre> + while(!tryacquire(&bcache.lock)) { + printf("!"); + } + </pre> + + and run test0 from bcachetest and you will see "!"s. + +<p>Modify <tt>bget</tt> so that a lookup for a buffer that is in the + bcache doesn't need to acquire <tt>bcache.lock</tt>. This is more + tricky than the kalloc assignment, because bcache buffers are truly + shared among processes. You must maintain the invariant that a + buffer is only once in memory. + +<p> There are several races that <tt>bcache.lock</tt> protects +against, including: + <ul> + <li>A <tt>brelse</tt> may set <tt>b->ref</tt> to 0, + while concurrent <tt>bget</tt> is incrementing it. + <li>Two <tt>bget</tt> may see <tt>b->ref = 0</tt> and one may re-use + the buffer, while the other may replaces it with another block. + <li>A concurrent <tt>brelse</tt> modifies the list + that <tt>bget</tt> traverses. + </ul> + +<p>A challenge is testing whether you code is still correct. One way + to do is to artificially delay certain operations + using <tt>sleepticks</tt>. <tt>test1</tt> trashes the buffer cache + and exercises more code paths. + +<p>Here are some hints: + <ul> + <li>Read the description of buffer cache in the xv6 book (Section 7.2). + <li>Use a simple design: i.e., don't design a lock-free implementation. + <li>Use a simple hash table with locks per bucket. + <li>Searching in hash table for a buffer and allocating an entry + for that buffer when the buffer is not found must be atomic. + <li>It is fine to acquire <tt>bcache.lock</tt> in <tt>brelse</tt> + to update the LRU/MRU list. + </ul> + +<p>Check that your implementation has less contention + on <tt>test0</tt> + +<p>Make sure your implementation passes bcachetest and usertests. + +<p>Optional: + <ul> + <li>make the buffer cache more scalable (e.g., avoid taking + out <tt>bcache.lock</tt> on <tt>brelse</tt>). + <li>make lookup lock-free (Hint: use gcc's <tt>__sync_*</tt> + functions.) How do you convince yourself that your implementation is correct? + </ul> + + +</body> +</html> diff --git a/labs/mmap.html b/labs/mmap.html new file mode 100644 index 0000000..6f779c4 --- /dev/null +++ b/labs/mmap.html @@ -0,0 +1,171 @@ +<html> +<head> +<title>Lab: mmap</title> +<link rel="stylesheet" href="homework.css" type="text/css" /> +</head> +<body> + +<h1>Lab: mmap</h1> + +<p>In this lab you will use </tt>mmap</tt> on Linux to demand-page a +very large table and add memory-mapped files to xv6. + +<h2>Using mmap on Linux</h2> + +<p>This assignment will make you more familiar with how to manage virtual memory +in user programs using the Unix system call interface. You can do this +assignment on any operating system that supports the Unix API (a Linux Athena +machine, your laptop with Linux or MacOS, etc.). + +<p>Download the <a href="mmap.c">mmap homework assignment</a> and look +it over. The program maintains a very large table of square root +values in virtual memory. However, the table is too large to fit in +physical RAM. Instead, the square root values should be computed on +demand in response to page faults that occur in the table's address +range. Your job is to implement the demand faulting mechanism using a +signal handler and UNIX memory mapping system calls. To stay within +the physical RAM limit, we suggest using the simple strategy of +unmapping the last page whenever a new page is faulted in. + +<p>To compile <tt>mmap.c</tt>, you need a C compiler, such as gcc. On Athena, +you can type: +<pre> +$ add gnu +</pre> +Once you have gcc, you can compile mmap.c as follows: +<pre> +$ gcc mmap.c -lm -o mmap +</pre> +Which produces a <tt>mmap</tt> file, which you can run: +<pre> +$ ./mmap +page_size is 4096 +Validating square root table contents... +oops got SIGSEGV at 0x7f6bf7fd7f18 +</pre> + +<p>When the process accesses the square root table, the mapping does not exist +and the kernel passes control to the signal handler code in +<tt>handle_sigsegv()</tt>. Modify the code in <tt>handle_sigsegv()</tt> to map +in a page at the faulting address, unmap a previous page to stay within the +physical memory limit, and initialize the new page with the correct square root +values. Use the function <tt>calculate_sqrts()</tt> to compute the values. +The program includes test logic that verifies if the contents of the +square root table are correct. When you have completed your task +successfully, the process will print “All tests passed!”. + +<p>You may find that the man pages for mmap() and munmap() are helpful references. +<pre> +$ man mmap +$ man munmap +</pre> + + +<h2>Implement memory-mapped files in xv6</h2> + +<p>In this assignment you will implement memory-mapped files in xv6. + The test program <tt>mmaptest</tt> tells you what should work. + +<p>Here are some hints about how you might go about this assignment: + + <ul> + <li>Start with adding the two systems calls to the kernel, as you + done for other systems calls (e.g., <tt>sigalarm</tt>), but + don't implement them yet; just return an + error. run <tt>mmaptest</tt> to observe the error. + + <li>Keep track for each process what <tt>mmap</tt> has mapped. + You will need to allocate a <tt>struct vma</tt> to record the + address, length, permissions, etc. for each virtual memory area + (VMA) that maps a file. Since the xv6 kernel doesn't have a + memory allocator in the kernel, you can use the same approach has + for <tt>struct file</tt>: have a global array of <tt>struct + vma</tt>s and have for each process a fixed-sized array of VMAs + (like the file descriptor array). + + <li>Implement <tt>mmap</tt>: allocate a VMA, add it to the process's + table of VMAs, fill in the VMA, and find a hole in the process's + address space where you will map the file. You can assume that no + file will be bigger than 1GB. The VMA will contain a pointer to + a <tt>struct file</tt> for the file being mapped; you will need to + increase the file's reference count so that the structure doesn't + disappear when the file is closed (hint: + see <tt>filedup</tt>). You don't have worry about overlapping + VMAs. Run <tt>mmaptest</tt>: the first <tt>mmap</tt> should + succeed, but the first access to the mmaped- memory will fail, + because you haven't updated the page fault handler. + + <li>Modify the page-fault handler from the lazy-allocation and COW + labs to call a VMA function that handles page faults in VMAs. + This function allocates a page, reads a 4KB from the mmap-ed + file into the page, and maps the page into the address space of + the process. To read the page, you can use <tt>readi</tt>, + which allows you to specify an offset from where to read in the + file (but you will have to lock/unlock the inode passed + to <tt>readi</tt>). Don't forget to set the permissions correctly + on the page. Run <tt>mmaptest</tt>; you should get to the + first <tt>munmap</tt>. + + <li>Implement <tt>munmap</tt>: find the <tt>struct vma</tt> for + the address and unmap the specified pages (hint: + use <tt>uvmunmap</tt>). If <tt>munmap</tt> removes all pages + from a VMA, you will have to free the VMA (don't forget to + decrement the reference count of the VMA's <tt>struct + file</tt>); otherwise, you may have to shrink the VMA. You can + assume that <tt>munmap</tt> will not split a VMA into two VMAs; + that is, we don't unmap a few pages in the middle of a VMA. If + an unmapped page has been modified and the file is + mapped <tt>MAP_SHARED</tt>, you will have to write the page back + to the file. RISC-V has a dirty bit (<tt>D</tt>) in a PTE to + record whether a page has ever been written too; add the + declaration to kernel/riscv.h and use it. Modify <tt>exit</tt> + to call <tt>munmap</tt> for the process's open VMAs. + Run <tt>mmaptest</tt>; you should <tt>mmaptest</tt>, but + probably not <tt>forktest</tt>. + + <li>Modify <tt>fork</tt> to copy VMAs from parent to child. Don't + forget to increment reference count for a VMA's <tt>struct + file</tt>. In the page fault handler of the child, it is OK to + allocate a new page instead of sharing the page with the + parent. The latter would be cooler, but it would require more + implementation work. Run <tt>mmaptest</tt>; make sure you pass + both <tt>mmaptest</tt> and <tt>forktest</tt>. + + </ul> + +<p>Run usertests to make sure you didn't break anything. + +<p>Optional challenges: + <ul> + + <li>If two processes have the same file mmap-ed (as + in <tt>forktest</tt>), share their physical pages. You will need + reference counts on physical pages. + + <li>The solution above allocates a new physical page for each page + read from the mmap-ed file, even though the data is also in kernel + memory in the buffer cache. Modify your implementation to mmap + that memory, instead of allocating a new page. This requires that + file blocks be the same size as pages (set <tt>BSIZE</tt> to + 4096). You will need to pin mmap-ed blocks into the buffer cache. + You will need worry about reference counts. + + <li>Remove redundancy between your implementation for lazy + allocation and your implementation of mmapp-ed files. (Hint: + create an VMA for the lazy allocation area.) + + <li>Modify <tt>exec</tt> to use a VMA for different sections of + the binary so that you get on-demand-paged executables. This will + make starting programs faster, because <tt>exec</tt> will not have + to read any data from the file system. + + <li>Implement on-demand paging: don't keep a process in memory, + but let the kernel move some parts of processes to disk when + physical memory is low. Then, page in the paged-out memory when + the process references it. Port your linux program from the first + assignment to xv6 and run it. + + </ul> + +</body> +</html> diff --git a/labs/syscall.html b/labs/syscall.html new file mode 100644 index 0000000..2281f2e --- /dev/null +++ b/labs/syscall.html @@ -0,0 +1,443 @@ +<html> +<head> +<title>Lab: Alarm and uthread</title> +<link rel="stylesheet" href="homework.css" type="text/css" /> +</head> +<body> + +<h1>Lab: Alarm and uthread</h1> + +This lab will familiarize you with the implementation of system calls +and switching between threads of execution. In particular, you will +implement new system calls (<tt>sigalarm</tt> and <tt>sigreturn</tt>) +and switching between threads in a user-level thread package. + +<h2>Warmup: RISC-V assembly</h2> + +<p>For this lab it will be important to understand a bit of RISC-V assembly. + +<p>Add a file user/call.c with the following content, modify the + Makefile to add the program to the user programs, and compile (make + fs.img). The Makefile also produces a binary and a readable + assembly a version of the program in the file user/call.asm. +<pre> +#include "kernel/param.h" +#include "kernel/types.h" +#include "kernel/stat.h" +#include "user/user.h" + +int g(int x) { + return x+3; +} + +int f(int x) { + return g(x); +} + +void main(void) { + printf(1, "%d %d\n", f(8)+1, 13); + exit(); +} +</pre> + +<p>Read through user/call.asm and understand it. The instruction manual + for RISC-V is in the doc directory (doc/riscv-spec-v2.2.pdf). Here + are some questions that you should answer for yourself: + + <ul> + <li>Which registers contain arguments to functions? Which + register holds 13 in the call to <tt>printf</tt>? Which register + holds the second argument? Which register holds the third one? Etc. + + <li>Where is the function call to <tt>f</tt> from main? Where + is the call to <tt>g</tt>? + (Hint: the compiler may inline functions.) + + <li>At what address is the function <tt>printf</tt> located? + + <li>What value is in the register <tt>ra</tt> just after the <tt>jalr</tt> + to <tt>printf</tt> in <tt>main</tt>? + </ul> + +<h2>Warmup: system call tracing</h2> + +<p>In this exercise you will modify the xv6 kernel to print out a line +for each system call invocation. It is enough to print the name of the +system call and the return value; you don't need to print the system +call arguments. + +<p> +When you're done, you should see output like this when booting +xv6: + +<pre> +... +fork -> 2 +exec -> 0 +open -> 3 +close -> 0 +$write -> 1 + write -> 1 +</pre> + +<p> +That's init forking and execing sh, sh making sure only two file descriptors are +open, and sh writing the $ prompt. (Note: the output of the shell and the +system call trace are intermixed, because the shell uses the write syscall to +print its output.) + +<p> Hint: modify the syscall() function in kernel/syscall.c. + +<p>Run the xv6 programs you wrote in earlier labs and inspect the system call + trace. Are there many system calls? Which system calls correspond + to code in the applications you wrote? + +<p>Optional: print the system call arguments. + + +<h2>Alarm</h2> + +<p> +In this exercise you'll add a feature to xv6 that periodically alerts +a process as it uses CPU time. This might be useful for compute-bound +processes that want to limit how much CPU time they chew up, or for +processes that want to compute but also want to take some periodic +action. More generally, you'll be implementing a primitive form of +user-level interrupt/fault handlers; you could use something similar +to handle page faults in the application, for example. + +<p> +You should add a new <tt>sigalarm(interval, handler)</tt> system call. +If an application calls <tt>sigalarm(n, fn)</tt>, then after every +<tt>n</tt> "ticks" of CPU time that the program consumes, the kernel +should cause application function +<tt>fn</tt> to be called. When <tt>fn</tt> returns, the application +should resume where it left off. A tick is a fairly arbitrary unit of +time in xv6, determined by how often a hardware timer generates +interrupts. + +<p> +You'll find a file <tt>user/alarmtest.c</tt> in your xv6 +repository. Add it to the Makefile. It won't compile correctly +until you've added <tt>sigalarm</tt> and <tt>sigreturn</tt> +system calls (see below). + +<p> +<tt>alarmtest</tt> calls <tt>sigalarm(2, periodic)</tt> in <tt>test0</tt> to +ask the kernel to force a call to <tt>periodic()</tt> every 2 ticks, +and then spins for a while. +You can see the assembly +code for alarmtest in user/alarmtest.asm, which may be handy +for debugging. +When you've finished the lab, +<tt>alarmtest</tt> should produce output like this: + +<pre> +$ alarmtest +test0 start +......................................alarm! +test0 passed +test1 start +..alarm! +..alarm! +..alarm! +.alarm! +..alarm! +..alarm! +..alarm! +..alarm! +..alarm! +..alarm! +test1 passed +$ +</pre> + +<p>The main challenge will be to arrange that the handler is invoked + when the process's alarm interval expires. You'll need to modify + usertrap() in kernel/trap.c so that when a + process's alarm interval expires, the process executes + the handler. How can you do that? You will need to understand + how system calls work (i.e., the code in kernel/trampoline.S + and kernel/trap.c). Which register contains the address to which + system calls return? + +<p>Your solution will be only a few lines of code, but it may be tricky to + get it right. +We'll test your code with the version of alarmtest.c in the original +repository; if you modify alarmtest.c, make sure your kernel changes +cause the original alarmtest to pass the tests. + +<h3>test0: invoke handler</h3> + +<p>Get started by modifying the kernel to jump to the alarm handler in +user space, which will cause test0 to print "alarm!". Don't worry yet +what happens after the "alarm!" output; it's OK for now if your +program crashes after printing "alarm!". Here are some hints: + +<ul> + +<li>You'll need to modify the Makefile to cause <tt>alarmtest.c</tt> +to be compiled as an xv6 user program. + +<li>The right declarations to put in <tt>user/user.h</tt> are: +<pre> + int sigalarm(int ticks, void (*handler)()); + int sigreturn(void); +</pre> + +<li>Update user/sys.pl (which generates user/usys.S), + kernel/syscall.h, and kernel/syscall.c + to allow <tt>alarmtest</tt> to invoke the sigalarm and + sigreturn system calls. + +<li>For now, your <tt>sys_sigreturn</tt> should just return zero. + +<li>Your <tt>sys_sigalarm()</tt> should store the alarm interval and +the pointer to the handler function in new fields in the <tt>proc</tt> +structure, defined in <tt>kernel/proc.h</tt>. + +<li>You'll need to keep track of how many ticks have passed since the +last call (or are left until the next call) to a process's alarm +handler; you'll need a new field in <tt>struct proc</tt> for this +too. You can initialize <tt>proc</tt> fields in <tt>allocproc()</tt> +in <tt>proc.c</tt>. + +<li>Every tick, the hardware clock forces an interrupt, which is handled +in <tt>usertrap()</tt>; you should add some code here. + +<li>You only want to manipulate a process's alarm ticks if there's a a + timer interrupt; you want something like +<pre> + if(which_dev == 2) ... +</pre> + +<li>Only invoke the alarm function if the process has a + timer outstanding. Note that the address of the user's alarm + function might be 0 (e.g., in alarmtest.asm, <tt>periodic</tt> is at + address 0). + +<li>It will be easier to look at traps with gdb if you tell qemu to +use only one CPU, which you can do by running +<pre> + make CPUS=1 qemu +</pre> + +<li>You've succeeded if alarmtest prints "alarm!". + +</ul> + +<h3>test1(): resume interrupted code</h3> + +Chances are that alarmtest crashes at some point after it prints +"alarm!". Depending on how your solution works, that point may be in +test0, or it may be in test1. Crashes are likely caused +by the alarm handler (<tt>periodic</tt> in alarmtest.c) returning +to the wrong point in the user program. + +<p> +Your job now is to ensure that, when the alarm handler is done, +control returns to +the instruction at which the user program was originally +interrupted by the timer interrupt. You must also ensure that +the register contents are restored to values they held +at the time of the interrupt, so that the user program +can continue undisturbed after the alarm. + +<p>Your solution is likely to require you to save and restore + registers---what registers do you need to save and restore to resume + the interrupted code correctly? (Hint: it will be many). + Several approaches are possible; for this lab you should make + the <tt>sigreturn</tt> system call + restore registers and return to the original + interrupted user instruction. + The user-space alarm handler + calls sigreturn when it is done. + + Some hints: + <ul> + <li>Have <tt>usertrap</tt> save enough state in + <tt>struct proc</tt> when the timer goes off + that <tt>sigreturn</tt> can correctly return to the + interrupted user code. + + <li>Prevent re-entrant calls to the handler----if a handler hasn't + returned yet, the kernel shouldn't call it again. + </ul> + +<p>Once you pass <tt>test0</tt> and <tt>test1</tt>, run usertests to + make sure you didn't break any other parts of the kernel. + +<h2>Uthread: switching between threads</h2> + +<p>Download <a href="uthread.c">uthread.c</a> and <a + href="uthread_switch.S">uthread_switch.S</a> into your xv6 directory. +Make sure <tt>uthread_switch.S</tt> ends with <tt>.S</tt>, not +<tt>.s</tt>. Add the +following rule to the xv6 Makefile after the _forktest rule: + +<pre> +$U/_uthread: $U/uthread.o $U/uthread_switch.o + $(LD) $(LDFLAGS) -N -e main -Ttext 0 -o $U/_uthread $U/uthread.o $U/uthread_switch.o $(ULIB) + $(OBJDUMP) -S $U/_uthread > $U/uthread.asm +</pre> +Make sure that the blank space at the start of each line is a tab, +not spaces. + +<p> +Add <tt>_uthread</tt> in the Makefile to the list of user programs defined by UPROGS. + +<p>Run xv6, then run <tt>uthread</tt> from the xv6 shell. The xv6 kernel will print an error message about <tt>uthread</tt> encountering a page fault. + +<p>Your job is to complete <tt>uthread_switch.S</tt>, so that you see output similar to +this (make sure to run with CPUS=1): +<pre> +~/classes/6828/xv6$ make CPUS=1 qemu +... +$ uthread +my thread running +my thread 0x0000000000002A30 +my thread running +my thread 0x0000000000004A40 +my thread 0x0000000000002A30 +my thread 0x0000000000004A40 +my thread 0x0000000000002A30 +my thread 0x0000000000004A40 +my thread 0x0000000000002A30 +my thread 0x0000000000004A40 +my thread 0x0000000000002A30 +... +my thread 0x0000000000002A88 +my thread 0x0000000000004A98 +my thread: exit +my thread: exit +thread_schedule: no runnable threads +$ +</pre> + +<p><tt>uthread</tt> creates two threads and switches back and forth between +them. Each thread prints "my thread ..." and then yields to give the other +thread a chance to run. + +<p>To observe the above output, you need to complete <tt>uthread_switch.S</tt>, but before +jumping into <tt>uthread_switch.S</tt>, first understand how <tt>uthread.c</tt> +uses <tt>uthread_switch</tt>. <tt>uthread.c</tt> has two global variables +<tt>current_thread</tt> and <tt>next_thread</tt>. Each is a pointer to a +<tt>thread</tt> structure. The thread structure has a stack for a thread and a +saved stack pointer (<tt>sp</tt>, which points into the thread's stack). The +job of <tt>uthread_switch</tt> is to save the current thread state into the +structure pointed to by <tt>current_thread</tt>, restore <tt>next_thread</tt>'s +state, and make <tt>current_thread</tt> point to where <tt>next_thread</tt> was +pointing to, so that when <tt>uthread_switch</tt> returns <tt>next_thread</tt> +is running and is the <tt>current_thread</tt>. + +<p>You should study <tt>thread_create</tt>, which sets up the initial stack for +a new thread. It provides hints about what <tt>uthread_switch</tt> should do. +Note that <tt>thread_create</tt> simulates saving all callee-save registers +on a new thread's stack. + +<p>To write the assembly in <tt>thread_switch</tt>, you need to know how the C +compiler lays out <tt>struct thread</tt> in memory, which is as +follows: + +<pre> + -------------------- + | 4 bytes for state| + -------------------- + | stack size bytes | + | for stack | + -------------------- + | 8 bytes for sp | + -------------------- <--- current_thread + ...... + + ...... + -------------------- + | 4 bytes for state| + -------------------- + | stack size bytes | + | for stack | + -------------------- + | 8 bytes for sp | + -------------------- <--- next_thread +</pre> + +The variables <tt>&next_thread</tt> and <tt>¤t_thread</tt> each +contain the address of a pointer to <tt>struct thread</tt>, and are +passed to <tt>thread_switch</tt>. The following fragment of assembly +will be useful: + +<pre> + ld t0, 0(a0) + sd sp, 0(t0) +</pre> + +This saves <tt>sp</tt> in <tt>current_thread->sp</tt>. This works because +<tt>sp</tt> is at +offset 0 in the struct. +You can study the assembly the compiler generates for +<tt>uthread.c</tt> by looking at <tt>uthread.asm</tt>. + +<p>To test your code it might be helpful to single step through your +<tt>uthread_switch</tt> using <tt>riscv64-linux-gnu-gdb</tt>. You can get started in this way: + +<pre> +(gdb) file user/_uthread +Reading symbols from user/_uthread... +(gdb) b *0x230 + +</pre> +0x230 is the address of uthread_switch (see uthread.asm). When you +compile it may be at a different address, so check uthread_asm. +You may also be able to type "b uthread_switch". <b>XXX This doesn't work + for me; why?</b> + +<p>The breakpoint may (or may not) be triggered before you even run +<tt>uthread</tt>. How could that happen? + +<p>Once your xv6 shell runs, type "uthread", and gdb will break at +<tt>thread_switch</tt>. Now you can type commands like the following to inspect +the state of <tt>uthread</tt>: + +<pre> + (gdb) p/x *next_thread + $1 = {sp = 0x4a28, stack = {0x0 (repeats 8088 times), + 0x68, 0x1, 0x0 <repeats 102 times>}, state = 0x1} +</pre> +What address is <tt>0x168</tt>, which sits on the bottom of the stack +of <tt>next_thread</tt>? + +With "x", you can examine the content of a memory location +<pre> + (gdb) x/x next_thread->sp + 0x4a28 <all_thread+16304>: 0x00000168 +</pre> +Why does that print <tt>0x168</tt>? + +<h3>Optional challenges</h3> + +<p>The user-level thread package interacts badly with the operating system in +several ways. For example, if one user-level thread blocks in a system call, +another user-level thread won't run, because the user-level threads scheduler +doesn't know that one of its threads has been descheduled by the xv6 scheduler. As +another example, two user-level threads will not run concurrently on different +cores, because the xv6 scheduler isn't aware that there are multiple +threads that could run in parallel. Note that if two user-level threads were to +run truly in parallel, this implementation won't work because of several races +(e.g., two threads on different processors could call <tt>thread_schedule</tt> +concurrently, select the same runnable thread, and both run it on different +processors.) + +<p>There are several ways of addressing these problems. One is + using <a href="http://en.wikipedia.org/wiki/Scheduler_activations">scheduler + activations</a> and another is to use one kernel thread per + user-level thread (as Linux kernels do). Implement one of these ways + in xv6. This is not easy to get right; for example, you will need to + implement TLB shootdown when updating a page table for a + multithreaded user process. + +<p>Add locks, condition variables, barriers, +etc. to your thread package. + +</body> +</html> + diff --git a/labs/xv6.html b/labs/xv6.html new file mode 100644 index 0000000..13d581e --- /dev/null +++ b/labs/xv6.html @@ -0,0 +1,238 @@ +<html> +<head> +<title>Lab: xv6</title> +<link rel="stylesheet" href="homework.css" type="text/css" /> +</head> +<body> + +<h1>Lab: xv6</h1> + +This lab makes you familiar with xv6 and its system calls. + +<h2>Boot xv6</h2> + +<p>Login to Athena (e.g., ssh -X athena.dialup.mit.edu) and attach the course +locker: (You must run this command every time you log in; or add it to your +~/.environment file.) + +<pre> +$ add -f 6.828 +</pre> + +<p>Fetch the xv6 source: + +<pre> +$ mkdir 6.828 +$ cd 6.828 +$ git clone git://github.com/mit-pdos/xv6-riscv.git +Cloning into 'xv6-riscv'... +... +$ +</pre> + +<p>XXX pointer to an update tools page + +<p>Build xv6 on Athena: +<pre> +$ cd xv6-public +$ makeriscv64-linux-gnu-gcc -c -o kernel/entry.o kernel/entry.S +riscv64-linux-gnu-gcc -Wall -Werror -O -fno-omit-frame-pointer -ggdb -MD -mcmodel=medany -ffreestanding -fno-common -nostdlib -mno-relax -I. -fno-stack-protector -fno-pie -no-pie -c -o kernel/start.o kernel/start.c +... +$ make qemu +... +mkfs/mkfs fs.img README user/_cat user/_echo user/_forktest user/_grep user/_init user/_kill user/_ln user/_ls user/_mkdir user/_rm user/_sh user/_stressfs user/_usertests user/_wc user/_zombie user/_cow +nmeta 46 (boot, super, log blocks 30 inode blocks 13, bitmap blocks 1) blocks 954 total 1000 +balloc: first 497 blocks have been allocated +balloc: write bitmap block at sector 45 +qemu-system-riscv64 -machine virt -kernel kernel/kernel -m 3G -smp 3 -nographic -drive file=fs.img,if=none,format=raw,id=x0 -device virtio-blk-device,drive=x0,bus=virtio-mmio-bus.0 +hart 0 starting +hart 2 starting +hart 1 starting +init: starting sh +$ +</pre> + +<p> +If you type <tt>ls</tt> at the prompt, you should output similar to the following: +<pre> +$ ls +. 1 1 1024 +.. 1 1 1024 +README 2 2 2181 +cat 2 3 21024 +echo 2 4 19776 +forktest 2 5 11456 +grep 2 6 24512 +init 2 7 20656 +kill 2 8 19856 +ln 2 9 19832 +ls 2 10 23280 +mkdir 2 11 19952 +rm 2 12 19936 +sh 2 13 38632 +stressfs 2 14 20912 +usertests 2 15 106264 +wc 2 16 22160 +zombie 2 17 19376 +cow 2 18 27152 +console 3 19 0 +</pre> +These are the programs/files that <tt>mkfs</tt> includes in the +initial file system. You just ran one of them: <tt>ls</tt>. + +<h2>sleep</h2> + +<p>Implement the UNIX program sleep for xv6; your sleep should pause + for a user-specified number of ticks. + +<p>Some hints: + <ul> + <li>Look at some of the other programs in <tt>user/</tt> to see + how you can obtain the command-line arguments passed to a program. If the user + forgets to pass an argument, sleep should print an error message. + + <li>The command-line argument is passed as a string; you can convert it to an + integer using <tt>atoi</tt> (see user/ulib.c). + + <li>Use the system call <tt>sleep</tt> (see user/usys.S and kernel/sysproc.c). + + <li>Make sure <tt>main</tt> calls <tt>exit()</tt> in order to exit + your program. + + <li>Add the program to <tt>UPROGS</tt> in Makefile and compile + user programs by typing <tt>make fs.img</tt>. + + </ul> + + <p>Run the program from the xv6 shell: + <pre> + $ make qemu + ... + init: starting sh + $ sleep 10 + (waits for a little while) + $ + </pre> + + <p>Optional: write an uptime program that prints the uptime in terms + of ticks using the <tt>uptime</tt> system call. + +<h2>pingpong</h2> + +<p> Write a program that uses UNIX system calls to ``ping-pong'' a + byte between two processes over a pair of pipes, one for each + direction. The parent sends by writing a byte to <tt>fd[1]</tt> and + the child receives it by reading from <tt>fd[0]</tt>. After + receiving a byte from parent, the child responds with its own byte + by writing to <tt>fd[1]</tt>, which the parent then reads. + +<p>Some hints: + <ul> + <li>Use <tt>pipe</tt> to create a pipe. + <li>Use <tt>fork</tt> to create a child. + <li>Use <tt>read</tt> to read from the pipe, and <tt>write</tt> to write to the pipe. + </ul> + +<h2>primes</h2> + + <p>Write a concurrent version of prime sieve using pipes. This idea + is due to Doug McIlroy, inventor of Unix pipes. The picture + halfway down <a href="http://swtch.com/~rsc/thread/">the page</a> + and the text surrounding it explain how to do it. + + <p>Your goal is to use <tt>pipe</tt> and <tt>fork</tt> to set up + the pipeline. The first process feeds the numbers 2 through 35 + into the pipeline. For each prime number, you will arrange to + create one process that reads from its left neighbor over a pipe + and writes to its right neighbor over another pipe. Since xv6 has + limited number of file descriptors and processes, the first + process can stop at 35. + +<p>Some hints: + <ul> + <li>Be careful to close file descriptors that a process doesn't + need, because otherwise your program will run xv6 out of resources + before the first process reaches 35. + + <li>Once the first process reach 35, you should arrange that the + pipeline terminates cleanly (Hint: read will return an end-of-file + when the write-side of the pipe is closed). + </ul> + +<h2>find</h2> + +<p>Write a simple version of the UNIX find program: find all the files + in a directory tree whose name matches a string. For example if the + file system contains a file <tt>a/b</tt>, then running find as + follows should produce: + <pre> + $ find . b + ./a/b + $ + </pre> + +<p>Some hints: + <ul> + <li>Look at user/ls.c to see how to read directories. + <li>Use recursion to run find in sub-directories. + <li>Don't recurse into "." and "..". + </ul> + +<p>Optional: support regular expressions in name matching. Grep has some + primitive support for regular expressions. + +<h2>xargs</h2> + +<p>Write a simple version of the UNIX xargs program: read lines from + standard in and run a command for each line, supplying the line as + arguments to the command. The following example illustrates xarg's + behavior: + <pre> + $ xargs echo bye + hello too + bye hello too + <ctrl-d> + $ + </pre> + Note that the command here is "echo bye" and the additional + arguments are "hello too", making the command "echo bye hello too", + which outputs "bye hello too". + +<p>xargs and find combine well: + <pre> + find . b | xargs grep hello + </pre> + will run "grep hello" on each file named b in the directories below ".". + +<p>Some hints: + <ul> + <li>Use <tt>fork</tt> and <tt>exec</tt> system call to invoke the + command on each line of input. Use <tt>wait</tt> in the parent + to wait for the child to complete running the command. + <li>Read from stdin a character at the time until the newline + character ('\n'). + <li>kernel/param.h declares MAXARG, which may be useful if you need + to declare an argv. + </ul> + +<h2>Optional: modify the shell</h2> + +There are endless ways in which the shell could be extended. Here are +some suggestions: + +<ul> + +<li>Modify the shell to support wait. + +<li>Modify the shell to support lists of commands, separated by ";" + +<li>Modify the shell to support sub-shells by implementing "(" and ")" + +<li>Modify the shell to allow users to edit the command line + +</ul> + +</body> +</html> + + diff --git a/lapic.c b/lapic.c deleted file mode 100644 index b22bbd7..0000000 --- a/lapic.c +++ /dev/null @@ -1,229 +0,0 @@ -// The local APIC manages internal (non-I/O) interrupts. -// See Chapter 8 & Appendix C of Intel processor manual volume 3. - -#include "param.h" -#include "types.h" -#include "defs.h" -#include "date.h" -#include "memlayout.h" -#include "traps.h" -#include "mmu.h" -#include "x86.h" - -// Local APIC registers, divided by 4 for use as uint[] indices. -#define ID (0x0020/4) // ID -#define VER (0x0030/4) // Version -#define TPR (0x0080/4) // Task Priority -#define EOI (0x00B0/4) // EOI -#define SVR (0x00F0/4) // Spurious Interrupt Vector - #define ENABLE 0x00000100 // Unit Enable -#define ESR (0x0280/4) // Error Status -#define ICRLO (0x0300/4) // Interrupt Command - #define INIT 0x00000500 // INIT/RESET - #define STARTUP 0x00000600 // Startup IPI - #define DELIVS 0x00001000 // Delivery status - #define ASSERT 0x00004000 // Assert interrupt (vs deassert) - #define DEASSERT 0x00000000 - #define LEVEL 0x00008000 // Level triggered - #define BCAST 0x00080000 // Send to all APICs, including self. - #define BUSY 0x00001000 - #define FIXED 0x00000000 -#define ICRHI (0x0310/4) // Interrupt Command [63:32] -#define TIMER (0x0320/4) // Local Vector Table 0 (TIMER) - #define X1 0x0000000B // divide counts by 1 - #define PERIODIC 0x00020000 // Periodic -#define PCINT (0x0340/4) // Performance Counter LVT -#define LINT0 (0x0350/4) // Local Vector Table 1 (LINT0) -#define LINT1 (0x0360/4) // Local Vector Table 2 (LINT1) -#define ERROR (0x0370/4) // Local Vector Table 3 (ERROR) - #define MASKED 0x00010000 // Interrupt masked -#define TICR (0x0380/4) // Timer Initial Count -#define TCCR (0x0390/4) // Timer Current Count -#define TDCR (0x03E0/4) // Timer Divide Configuration - -volatile uint *lapic; // Initialized in mp.c - -//PAGEBREAK! -static void -lapicw(int index, int value) -{ - lapic[index] = value; - lapic[ID]; // wait for write to finish, by reading -} - -void -lapicinit(void) -{ - if(!lapic) - return; - - // Enable local APIC; set spurious interrupt vector. - lapicw(SVR, ENABLE | (T_IRQ0 + IRQ_SPURIOUS)); - - // The timer repeatedly counts down at bus frequency - // from lapic[TICR] and then issues an interrupt. - // If xv6 cared more about precise timekeeping, - // TICR would be calibrated using an external time source. - lapicw(TDCR, X1); - lapicw(TIMER, PERIODIC | (T_IRQ0 + IRQ_TIMER)); - lapicw(TICR, 10000000); - - // Disable logical interrupt lines. - lapicw(LINT0, MASKED); - lapicw(LINT1, MASKED); - - // Disable performance counter overflow interrupts - // on machines that provide that interrupt entry. - if(((lapic[VER]>>16) & 0xFF) >= 4) - lapicw(PCINT, MASKED); - - // Map error interrupt to IRQ_ERROR. - lapicw(ERROR, T_IRQ0 + IRQ_ERROR); - - // Clear error status register (requires back-to-back writes). - lapicw(ESR, 0); - lapicw(ESR, 0); - - // Ack any outstanding interrupts. - lapicw(EOI, 0); - - // Send an Init Level De-Assert to synchronise arbitration ID's. - lapicw(ICRHI, 0); - lapicw(ICRLO, BCAST | INIT | LEVEL); - while(lapic[ICRLO] & DELIVS) - ; - - // Enable interrupts on the APIC (but not on the processor). - lapicw(TPR, 0); -} - -int -lapicid(void) -{ - if (!lapic) - return 0; - return lapic[ID] >> 24; -} - -// Acknowledge interrupt. -void -lapiceoi(void) -{ - if(lapic) - lapicw(EOI, 0); -} - -// Spin for a given number of microseconds. -// On real hardware would want to tune this dynamically. -void -microdelay(int us) -{ -} - -#define CMOS_PORT 0x70 -#define CMOS_RETURN 0x71 - -// Start additional processor running entry code at addr. -// See Appendix B of MultiProcessor Specification. -void -lapicstartap(uchar apicid, uint addr) -{ - int i; - ushort *wrv; - - // "The BSP must initialize CMOS shutdown code to 0AH - // and the warm reset vector (DWORD based at 40:67) to point at - // the AP startup code prior to the [universal startup algorithm]." - outb(CMOS_PORT, 0xF); // offset 0xF is shutdown code - outb(CMOS_PORT+1, 0x0A); - wrv = (ushort*)P2V((0x40<<4 | 0x67)); // Warm reset vector - wrv[0] = 0; - wrv[1] = addr >> 4; - - // "Universal startup algorithm." - // Send INIT (level-triggered) interrupt to reset other CPU. - lapicw(ICRHI, apicid<<24); - lapicw(ICRLO, INIT | LEVEL | ASSERT); - microdelay(200); - lapicw(ICRLO, INIT | LEVEL); - microdelay(100); // should be 10ms, but too slow in Bochs! - - // Send startup IPI (twice!) to enter code. - // Regular hardware is supposed to only accept a STARTUP - // when it is in the halted state due to an INIT. So the second - // should be ignored, but it is part of the official Intel algorithm. - // Bochs complains about the second one. Too bad for Bochs. - for(i = 0; i < 2; i++){ - lapicw(ICRHI, apicid<<24); - lapicw(ICRLO, STARTUP | (addr>>12)); - microdelay(200); - } -} - -#define CMOS_STATA 0x0a -#define CMOS_STATB 0x0b -#define CMOS_UIP (1 << 7) // RTC update in progress - -#define SECS 0x00 -#define MINS 0x02 -#define HOURS 0x04 -#define DAY 0x07 -#define MONTH 0x08 -#define YEAR 0x09 - -static uint -cmos_read(uint reg) -{ - outb(CMOS_PORT, reg); - microdelay(200); - - return inb(CMOS_RETURN); -} - -static void -fill_rtcdate(struct rtcdate *r) -{ - r->second = cmos_read(SECS); - r->minute = cmos_read(MINS); - r->hour = cmos_read(HOURS); - r->day = cmos_read(DAY); - r->month = cmos_read(MONTH); - r->year = cmos_read(YEAR); -} - -// qemu seems to use 24-hour GWT and the values are BCD encoded -void -cmostime(struct rtcdate *r) -{ - struct rtcdate t1, t2; - int sb, bcd; - - sb = cmos_read(CMOS_STATB); - - bcd = (sb & (1 << 2)) == 0; - - // make sure CMOS doesn't modify time while we read it - for(;;) { - fill_rtcdate(&t1); - if(cmos_read(CMOS_STATA) & CMOS_UIP) - continue; - fill_rtcdate(&t2); - if(memcmp(&t1, &t2, sizeof(t1)) == 0) - break; - } - - // convert - if(bcd) { -#define CONV(x) (t1.x = ((t1.x >> 4) * 10) + (t1.x & 0xf)) - CONV(second); - CONV(minute); - CONV(hour ); - CONV(day ); - CONV(month ); - CONV(year ); -#undef CONV - } - - *r = t1; - r->year += 2000; -} @@ -1,116 +0,0 @@ -#include "types.h" -#include "defs.h" -#include "param.h" -#include "memlayout.h" -#include "mmu.h" -#include "proc.h" -#include "x86.h" - -static void startothers(void); -static void mpmain(void) __attribute__((noreturn)); -extern pde_t *kpgdir; -extern char end[]; // first address after kernel loaded from ELF file - -// Bootstrap processor starts running C code here. -// Allocate a real stack and switch to it, first -// doing some setup required for memory allocator to work. -int -main(void) -{ - kinit1(end, P2V(4*1024*1024)); // phys page allocator - kvmalloc(); // kernel page table - mpinit(); // detect other processors - lapicinit(); // interrupt controller - seginit(); // segment descriptors - picinit(); // disable pic - ioapicinit(); // another interrupt controller - consoleinit(); // console hardware - uartinit(); // serial port - pinit(); // process table - tvinit(); // trap vectors - binit(); // buffer cache - fileinit(); // file table - ideinit(); // disk - startothers(); // start other processors - kinit2(P2V(4*1024*1024), P2V(PHYSTOP)); // must come after startothers() - userinit(); // first user process - mpmain(); // finish this processor's setup -} - -// Other CPUs jump here from entryother.S. -static void -mpenter(void) -{ - switchkvm(); - seginit(); - lapicinit(); - mpmain(); -} - -// Common CPU setup code. -static void -mpmain(void) -{ - cprintf("cpu%d: starting %d\n", cpuid(), cpuid()); - idtinit(); // load idt register - xchg(&(mycpu()->started), 1); // tell startothers() we're up - scheduler(); // start running processes -} - -pde_t entrypgdir[]; // For entry.S - -// Start the non-boot (AP) processors. -static void -startothers(void) -{ - extern uchar _binary_entryother_start[], _binary_entryother_size[]; - uchar *code; - struct cpu *c; - char *stack; - - // Write entry code to unused memory at 0x7000. - // The linker has placed the image of entryother.S in - // _binary_entryother_start. - code = P2V(0x7000); - memmove(code, _binary_entryother_start, (uint)_binary_entryother_size); - - for(c = cpus; c < cpus+ncpu; c++){ - if(c == mycpu()) // We've started already. - continue; - - // Tell entryother.S what stack to use, where to enter, and what - // pgdir to use. We cannot use kpgdir yet, because the AP processor - // is running in low memory, so we use entrypgdir for the APs too. - stack = kalloc(); - *(void**)(code-4) = stack + KSTACKSIZE; - *(void(**)(void))(code-8) = mpenter; - *(int**)(code-12) = (void *) V2P(entrypgdir); - - lapicstartap(c->apicid, V2P(code)); - - // wait for cpu to finish mpmain() - while(c->started == 0) - ; - } -} - -// The boot page table used in entry.S and entryother.S. -// Page directories (and page tables) must start on page boundaries, -// hence the __aligned__ attribute. -// PTE_PS in a page directory entry enables 4Mbyte pages. - -__attribute__((__aligned__(PGSIZE))) -pde_t entrypgdir[NPDENTRIES] = { - // Map VA's [0, 4MB) to PA's [0, 4MB) - [0] = (0) | PTE_P | PTE_W | PTE_PS, - // Map VA's [KERNBASE, KERNBASE+4MB) to PA's [0, 4MB) - [KERNBASE>>PDXSHIFT] = (0) | PTE_P | PTE_W | PTE_PS, -}; - -//PAGEBREAK! -// Blank page. -//PAGEBREAK! -// Blank page. -//PAGEBREAK! -// Blank page. - diff --git a/memide.c b/memide.c deleted file mode 100644 index ba267ac..0000000 --- a/memide.c +++ /dev/null @@ -1,60 +0,0 @@ -// Fake IDE disk; stores blocks in memory. -// Useful for running kernel without scratch disk. - -#include "types.h" -#include "defs.h" -#include "param.h" -#include "mmu.h" -#include "proc.h" -#include "x86.h" -#include "traps.h" -#include "spinlock.h" -#include "sleeplock.h" -#include "fs.h" -#include "buf.h" - -extern uchar _binary_fs_img_start[], _binary_fs_img_size[]; - -static int disksize; -static uchar *memdisk; - -void -ideinit(void) -{ - memdisk = _binary_fs_img_start; - disksize = (uint)_binary_fs_img_size/BSIZE; -} - -// Interrupt handler. -void -ideintr(void) -{ - // no-op -} - -// Sync buf with disk. -// If B_DIRTY is set, write buf to disk, clear B_DIRTY, set B_VALID. -// Else if B_VALID is not set, read buf from disk, set B_VALID. -void -iderw(struct buf *b) -{ - uchar *p; - - if(!holdingsleep(&b->lock)) - panic("iderw: buf not locked"); - if((b->flags & (B_VALID|B_DIRTY)) == B_VALID) - panic("iderw: nothing to do"); - if(b->dev != 1) - panic("iderw: request not for disk 1"); - if(b->blockno >= disksize) - panic("iderw: block out of range"); - - p = memdisk + b->blockno*BSIZE; - - if(b->flags & B_DIRTY){ - b->flags &= ~B_DIRTY; - memmove(p, b->data, BSIZE); - } else - memmove(b->data, p, BSIZE); - b->flags |= B_VALID; -} diff --git a/memlayout.h b/memlayout.h deleted file mode 100644 index d1615f7..0000000 --- a/memlayout.h +++ /dev/null @@ -1,15 +0,0 @@ -// Memory layout - -#define EXTMEM 0x100000 // Start of extended memory -#define PHYSTOP 0xE000000 // Top physical memory -#define DEVSPACE 0xFE000000 // Other devices are at high addresses - -// Key addresses for address space layout (see kmap in vm.c for layout) -#define KERNBASE 0x80000000 // First kernel virtual address -#define KERNLINK (KERNBASE+EXTMEM) // Address where kernel is linked - -#define V2P(a) (((uint) (a)) - KERNBASE) -#define P2V(a) ((void *)(((char *) (a)) + KERNBASE)) - -#define V2P_WO(x) ((x) - KERNBASE) // same as V2P, but without casts -#define P2V_WO(x) ((x) + KERNBASE) // same as P2V, but without casts @@ -6,10 +6,10 @@ #include <assert.h> #define stat xv6_stat // avoid clash with host struct stat -#include "types.h" -#include "fs.h" -#include "stat.h" -#include "param.h" +#include "kernel/types.h" +#include "kernel/fs.h" +#include "kernel/stat.h" +#include "kernel/param.h" #ifndef static_assert #define static_assert(a, b) do { switch (0) case 0: case (a): ; } while (0) @@ -94,6 +94,7 @@ main(int argc, char *argv[]) nmeta = 2 + nlog + ninodeblocks + nbitmap; nblocks = FSSIZE - nmeta; + sb.magic = FSMAGIC; sb.size = xint(FSSIZE); sb.nblocks = xint(nblocks); sb.ninodes = xint(NINODES); @@ -128,7 +129,14 @@ main(int argc, char *argv[]) iappend(rootino, &de, sizeof(de)); for(i = 2; i < argc; i++){ - assert(index(argv[i], '/') == 0); + // get rid of "user/" + char *shortname; + if(strncmp(argv[i], "user/", 5) == 0) + shortname = argv[i] + 5; + else + shortname = argv[i]; + + assert(index(shortname, '/') == 0); if((fd = open(argv[i], 0)) < 0){ perror(argv[i]); @@ -139,14 +147,14 @@ main(int argc, char *argv[]) // The binaries are named _rm, _cat, etc. to keep the // build operating system from trying to execute them // in place of system binaries like rm and cat. - if(argv[i][0] == '_') - ++argv[i]; + if(shortname[0] == '_') + shortname += 1; inum = ialloc(T_FILE); bzero(&de, sizeof(de)); de.inum = xshort(inum); - strncpy(de.name, argv[i], DIRSIZ); + strncpy(de.name, shortname, DIRSIZ); iappend(rootino, &de, sizeof(de)); while((cc = read(fd, buf, sizeof(buf))) > 0) @@ -1,181 +0,0 @@ -// This file contains definitions for the -// x86 memory management unit (MMU). - -// Eflags register -#define FL_IF 0x00000200 // Interrupt Enable - -// Control Register flags -#define CR0_PE 0x00000001 // Protection Enable -#define CR0_WP 0x00010000 // Write Protect -#define CR0_PG 0x80000000 // Paging - -#define CR4_PSE 0x00000010 // Page size extension - -// various segment selectors. -#define SEG_KCODE 1 // kernel code -#define SEG_KDATA 2 // kernel data+stack -#define SEG_UCODE 3 // user code -#define SEG_UDATA 4 // user data+stack -#define SEG_TSS 5 // this process's task state - -// cpu->gdt[NSEGS] holds the above segments. -#define NSEGS 6 - -#ifndef __ASSEMBLER__ -// Segment Descriptor -struct segdesc { - uint lim_15_0 : 16; // Low bits of segment limit - uint base_15_0 : 16; // Low bits of segment base address - uint base_23_16 : 8; // Middle bits of segment base address - uint type : 4; // Segment type (see STS_ constants) - uint s : 1; // 0 = system, 1 = application - uint dpl : 2; // Descriptor Privilege Level - uint p : 1; // Present - uint lim_19_16 : 4; // High bits of segment limit - uint avl : 1; // Unused (available for software use) - uint rsv1 : 1; // Reserved - uint db : 1; // 0 = 16-bit segment, 1 = 32-bit segment - uint g : 1; // Granularity: limit scaled by 4K when set - uint base_31_24 : 8; // High bits of segment base address -}; - -// Normal segment -#define SEG(type, base, lim, dpl) (struct segdesc) \ -{ ((lim) >> 12) & 0xffff, (uint)(base) & 0xffff, \ - ((uint)(base) >> 16) & 0xff, type, 1, dpl, 1, \ - (uint)(lim) >> 28, 0, 0, 1, 1, (uint)(base) >> 24 } -#define SEG16(type, base, lim, dpl) (struct segdesc) \ -{ (lim) & 0xffff, (uint)(base) & 0xffff, \ - ((uint)(base) >> 16) & 0xff, type, 1, dpl, 1, \ - (uint)(lim) >> 16, 0, 0, 1, 0, (uint)(base) >> 24 } -#endif - -#define DPL_USER 0x3 // User DPL - -// Application segment type bits -#define STA_X 0x8 // Executable segment -#define STA_W 0x2 // Writeable (non-executable segments) -#define STA_R 0x2 // Readable (executable segments) - -// System segment type bits -#define STS_T32A 0x9 // Available 32-bit TSS -#define STS_IG32 0xE // 32-bit Interrupt Gate -#define STS_TG32 0xF // 32-bit Trap Gate - -// A virtual address 'la' has a three-part structure as follows: -// -// +--------10------+-------10-------+---------12----------+ -// | Page Directory | Page Table | Offset within Page | -// | Index | Index | | -// +----------------+----------------+---------------------+ -// \--- PDX(va) --/ \--- PTX(va) --/ - -// page directory index -#define PDX(va) (((uint)(va) >> PDXSHIFT) & 0x3FF) - -// page table index -#define PTX(va) (((uint)(va) >> PTXSHIFT) & 0x3FF) - -// construct virtual address from indexes and offset -#define PGADDR(d, t, o) ((uint)((d) << PDXSHIFT | (t) << PTXSHIFT | (o))) - -// Page directory and page table constants. -#define NPDENTRIES 1024 // # directory entries per page directory -#define NPTENTRIES 1024 // # PTEs per page table -#define PGSIZE 4096 // bytes mapped by a page - -#define PTXSHIFT 12 // offset of PTX in a linear address -#define PDXSHIFT 22 // offset of PDX in a linear address - -#define PGROUNDUP(sz) (((sz)+PGSIZE-1) & ~(PGSIZE-1)) -#define PGROUNDDOWN(a) (((a)) & ~(PGSIZE-1)) - -// Page table/directory entry flags. -#define PTE_P 0x001 // Present -#define PTE_W 0x002 // Writeable -#define PTE_U 0x004 // User -#define PTE_PS 0x080 // Page Size - -// Address in page table or page directory entry -#define PTE_ADDR(pte) ((uint)(pte) & ~0xFFF) -#define PTE_FLAGS(pte) ((uint)(pte) & 0xFFF) - -#ifndef __ASSEMBLER__ -typedef uint pte_t; - -// Task state segment format -struct taskstate { - uint link; // Old ts selector - uint esp0; // Stack pointers and segment selectors - ushort ss0; // after an increase in privilege level - ushort padding1; - uint *esp1; - ushort ss1; - ushort padding2; - uint *esp2; - ushort ss2; - ushort padding3; - void *cr3; // Page directory base - uint *eip; // Saved state from last task switch - uint eflags; - uint eax; // More saved state (registers) - uint ecx; - uint edx; - uint ebx; - uint *esp; - uint *ebp; - uint esi; - uint edi; - ushort es; // Even more saved state (segment selectors) - ushort padding4; - ushort cs; - ushort padding5; - ushort ss; - ushort padding6; - ushort ds; - ushort padding7; - ushort fs; - ushort padding8; - ushort gs; - ushort padding9; - ushort ldt; - ushort padding10; - ushort t; // Trap on task switch - ushort iomb; // I/O map base address -}; - -// Gate descriptors for interrupts and traps -struct gatedesc { - uint off_15_0 : 16; // low 16 bits of offset in segment - uint cs : 16; // code segment selector - uint args : 5; // # args, 0 for interrupt/trap gates - uint rsv1 : 3; // reserved(should be zero I guess) - uint type : 4; // type(STS_{IG32,TG32}) - uint s : 1; // must be 0 (system) - uint dpl : 2; // descriptor(meaning new) privilege level - uint p : 1; // Present - uint off_31_16 : 16; // high bits of offset in segment -}; - -// Set up a normal interrupt/trap gate descriptor. -// - istrap: 1 for a trap (= exception) gate, 0 for an interrupt gate. -// interrupt gate clears FL_IF, trap gate leaves FL_IF alone -// - sel: Code segment selector for interrupt/trap handler -// - off: Offset in code segment for interrupt/trap handler -// - dpl: Descriptor Privilege Level - -// the privilege level required for software to invoke -// this interrupt/trap gate explicitly using an int instruction. -#define SETGATE(gate, istrap, sel, off, d) \ -{ \ - (gate).off_15_0 = (uint)(off) & 0xffff; \ - (gate).cs = (sel); \ - (gate).args = 0; \ - (gate).rsv1 = 0; \ - (gate).type = (istrap) ? STS_TG32 : STS_IG32; \ - (gate).s = 0; \ - (gate).dpl = (d); \ - (gate).p = 1; \ - (gate).off_31_16 = (uint)(off) >> 16; \ -} - -#endif @@ -1,139 +0,0 @@ -// Multiprocessor support -// Search memory for MP description structures. -// http://developer.intel.com/design/pentium/datashts/24201606.pdf - -#include "types.h" -#include "defs.h" -#include "param.h" -#include "memlayout.h" -#include "mp.h" -#include "x86.h" -#include "mmu.h" -#include "proc.h" - -struct cpu cpus[NCPU]; -int ncpu; -uchar ioapicid; - -static uchar -sum(uchar *addr, int len) -{ - int i, sum; - - sum = 0; - for(i=0; i<len; i++) - sum += addr[i]; - return sum; -} - -// Look for an MP structure in the len bytes at addr. -static struct mp* -mpsearch1(uint a, int len) -{ - uchar *e, *p, *addr; - - addr = P2V(a); - e = addr+len; - for(p = addr; p < e; p += sizeof(struct mp)) - if(memcmp(p, "_MP_", 4) == 0 && sum(p, sizeof(struct mp)) == 0) - return (struct mp*)p; - return 0; -} - -// Search for the MP Floating Pointer Structure, which according to the -// spec is in one of the following three locations: -// 1) in the first KB of the EBDA; -// 2) in the last KB of system base memory; -// 3) in the BIOS ROM between 0xE0000 and 0xFFFFF. -static struct mp* -mpsearch(void) -{ - uchar *bda; - uint p; - struct mp *mp; - - bda = (uchar *) P2V(0x400); - if((p = ((bda[0x0F]<<8)| bda[0x0E]) << 4)){ - if((mp = mpsearch1(p, 1024))) - return mp; - } else { - p = ((bda[0x14]<<8)|bda[0x13])*1024; - if((mp = mpsearch1(p-1024, 1024))) - return mp; - } - return mpsearch1(0xF0000, 0x10000); -} - -// Search for an MP configuration table. For now, -// don't accept the default configurations (physaddr == 0). -// Check for correct signature, calculate the checksum and, -// if correct, check the version. -// To do: check extended table checksum. -static struct mpconf* -mpconfig(struct mp **pmp) -{ - struct mpconf *conf; - struct mp *mp; - - if((mp = mpsearch()) == 0 || mp->physaddr == 0) - return 0; - conf = (struct mpconf*) P2V((uint) mp->physaddr); - if(memcmp(conf, "PCMP", 4) != 0) - return 0; - if(conf->version != 1 && conf->version != 4) - return 0; - if(sum((uchar*)conf, conf->length) != 0) - return 0; - *pmp = mp; - return conf; -} - -void -mpinit(void) -{ - uchar *p, *e; - int ismp; - struct mp *mp; - struct mpconf *conf; - struct mpproc *proc; - struct mpioapic *ioapic; - - if((conf = mpconfig(&mp)) == 0) - panic("Expect to run on an SMP"); - ismp = 1; - lapic = (uint*)conf->lapicaddr; - for(p=(uchar*)(conf+1), e=(uchar*)conf+conf->length; p<e; ){ - switch(*p){ - case MPPROC: - proc = (struct mpproc*)p; - if(ncpu < NCPU) { - cpus[ncpu].apicid = proc->apicid; // apicid may differ from ncpu - ncpu++; - } - p += sizeof(struct mpproc); - continue; - case MPIOAPIC: - ioapic = (struct mpioapic*)p; - ioapicid = ioapic->apicno; - p += sizeof(struct mpioapic); - continue; - case MPBUS: - case MPIOINTR: - case MPLINTR: - p += 8; - continue; - default: - ismp = 0; - break; - } - } - if(!ismp) - panic("Didn't find a suitable machine"); - - if(mp->imcrp){ - // Bochs doesn't support IMCR, so this doesn't run on Bochs. - // But it would on real hardware. - outb(0x22, 0x70); // Select IMCR - outb(0x23, inb(0x23) | 1); // Mask external interrupts. - } -} @@ -1,56 +0,0 @@ -// See MultiProcessor Specification Version 1.[14] - -struct mp { // floating pointer - uchar signature[4]; // "_MP_" - void *physaddr; // phys addr of MP config table - uchar length; // 1 - uchar specrev; // [14] - uchar checksum; // all bytes must add up to 0 - uchar type; // MP system config type - uchar imcrp; - uchar reserved[3]; -}; - -struct mpconf { // configuration table header - uchar signature[4]; // "PCMP" - ushort length; // total table length - uchar version; // [14] - uchar checksum; // all bytes must add up to 0 - uchar product[20]; // product id - uint *oemtable; // OEM table pointer - ushort oemlength; // OEM table length - ushort entry; // entry count - uint *lapicaddr; // address of local APIC - ushort xlength; // extended table length - uchar xchecksum; // extended table checksum - uchar reserved; -}; - -struct mpproc { // processor table entry - uchar type; // entry type (0) - uchar apicid; // local APIC id - uchar version; // local APIC verison - uchar flags; // CPU flags - #define MPBOOT 0x02 // This proc is the bootstrap processor. - uchar signature[4]; // CPU signature - uint feature; // feature flags from CPUID instruction - uchar reserved[8]; -}; - -struct mpioapic { // I/O APIC table entry - uchar type; // entry type (2) - uchar apicno; // I/O APIC id - uchar version; // I/O APIC version - uchar flags; // I/O APIC flags - uint *addr; // I/O APIC address -}; - -// Table entry types -#define MPPROC 0x00 // One per processor -#define MPBUS 0x01 // One per bus -#define MPIOAPIC 0x02 // One per I/O APIC -#define MPIOINTR 0x03 // One per bus interrupt source -#define MPLINTR 0x04 // One per system interrupt source - -//PAGEBREAK! -// Blank page. diff --git a/picirq.c b/picirq.c deleted file mode 100644 index e26957f..0000000 --- a/picirq.c +++ /dev/null @@ -1,19 +0,0 @@ -#include "types.h" -#include "x86.h" -#include "traps.h" - -// I/O Addresses of the two programmable interrupt controllers -#define IO_PIC1 0x20 // Master (IRQs 0-7) -#define IO_PIC2 0xA0 // Slave (IRQs 8-15) - -// Don't use the 8259A interrupt controllers. Xv6 assumes SMP hardware. -void -picinit(void) -{ - // mask all interrupts - outb(IO_PIC1+1, 0xFF); - outb(IO_PIC2+1, 0xFF); -} - -//PAGEBREAK! -// Blank page. @@ -1,121 +0,0 @@ -#include "types.h" -#include "defs.h" -#include "param.h" -#include "mmu.h" -#include "proc.h" -#include "fs.h" -#include "spinlock.h" -#include "sleeplock.h" -#include "file.h" - -#define PIPESIZE 512 - -struct pipe { - struct spinlock lock; - char data[PIPESIZE]; - uint nread; // number of bytes read - uint nwrite; // number of bytes written - int readopen; // read fd is still open - int writeopen; // write fd is still open -}; - -int -pipealloc(struct file **f0, struct file **f1) -{ - struct pipe *p; - - p = 0; - *f0 = *f1 = 0; - if((*f0 = filealloc()) == 0 || (*f1 = filealloc()) == 0) - goto bad; - if((p = (struct pipe*)kalloc()) == 0) - goto bad; - p->readopen = 1; - p->writeopen = 1; - p->nwrite = 0; - p->nread = 0; - initlock(&p->lock, "pipe"); - (*f0)->type = FD_PIPE; - (*f0)->readable = 1; - (*f0)->writable = 0; - (*f0)->pipe = p; - (*f1)->type = FD_PIPE; - (*f1)->readable = 0; - (*f1)->writable = 1; - (*f1)->pipe = p; - return 0; - -//PAGEBREAK: 20 - bad: - if(p) - kfree((char*)p); - if(*f0) - fileclose(*f0); - if(*f1) - fileclose(*f1); - return -1; -} - -void -pipeclose(struct pipe *p, int writable) -{ - acquire(&p->lock); - if(writable){ - p->writeopen = 0; - wakeup(&p->nread); - } else { - p->readopen = 0; - wakeup(&p->nwrite); - } - if(p->readopen == 0 && p->writeopen == 0){ - release(&p->lock); - kfree((char*)p); - } else - release(&p->lock); -} - -//PAGEBREAK: 40 -int -pipewrite(struct pipe *p, char *addr, int n) -{ - int i; - - acquire(&p->lock); - for(i = 0; i < n; i++){ - while(p->nwrite == p->nread + PIPESIZE){ //DOC: pipewrite-full - if(p->readopen == 0 || myproc()->killed){ - release(&p->lock); - return -1; - } - wakeup(&p->nread); - sleep(&p->nwrite, &p->lock); //DOC: pipewrite-sleep - } - p->data[p->nwrite++ % PIPESIZE] = addr[i]; - } - wakeup(&p->nread); //DOC: pipewrite-wakeup1 - release(&p->lock); - return n; -} - -int -piperead(struct pipe *p, char *addr, int n) -{ - int i; - - acquire(&p->lock); - while(p->nread == p->nwrite && p->writeopen){ //DOC: pipe-empty - if(myproc()->killed){ - release(&p->lock); - return -1; - } - sleep(&p->nread, &p->lock); //DOC: piperead-sleep - } - for(i = 0; i < n; i++){ //DOC: piperead-copy - if(p->nread == p->nwrite) - break; - addr[i] = p->data[p->nread++ % PIPESIZE]; - } - wakeup(&p->nwrite); //DOC: piperead-wakeup - release(&p->lock); - return i; -} @@ -1,36 +0,0 @@ -#!/usr/bin/perl - -use POSIX qw(strftime); - -if($ARGV[0] eq "-h"){ - shift @ARGV; - $h = $ARGV[0]; - shift @ARGV; -}else{ - $h = $ARGV[0]; -} - -$page = 0; -$now = strftime "%b %e %H:%M %Y", localtime; - -@lines = <>; -for($i=0; $i<@lines; $i+=50){ - print "\n\n"; - ++$page; - print "$now $h Page $page\n"; - print "\n\n"; - for($j=$i; $j<@lines && $j<$i +50; $j++){ - $lines[$j] =~ s!//DOC.*!!; - print $lines[$j]; - } - for(; $j<$i+50; $j++){ - print "\n"; - } - $sheet = ""; - if($lines[$i] =~ /^([0-9][0-9])[0-9][0-9] /){ - $sheet = "Sheet $1"; - } - print "\n\n"; - print "$sheet\n"; - print "\n\n"; -} diff --git a/printpcs b/printpcs deleted file mode 100755 index 81d039b..0000000 --- a/printpcs +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/sh - -# Decode the symbols from a panic EIP list - -# Find a working addr2line -for p in i386-jos-elf-addr2line addr2line; do - if which $p 2>&1 >/dev/null && \ - $p -h 2>&1 | grep -q '\belf32-i386\b'; then - break - fi -done - -# Enable as much pretty-printing as this addr2line can do -$p $($p -h | grep ' -[aipsf] ' | awk '{print $1}') -e kernel "$@" @@ -1,534 +0,0 @@ -#include "types.h" -#include "defs.h" -#include "param.h" -#include "memlayout.h" -#include "mmu.h" -#include "x86.h" -#include "proc.h" -#include "spinlock.h" - -struct { - struct spinlock lock; - struct proc proc[NPROC]; -} ptable; - -static struct proc *initproc; - -int nextpid = 1; -extern void forkret(void); -extern void trapret(void); - -static void wakeup1(void *chan); - -void -pinit(void) -{ - initlock(&ptable.lock, "ptable"); -} - -// Must be called with interrupts disabled -int -cpuid() { - return mycpu()-cpus; -} - -// Must be called with interrupts disabled to avoid the caller being -// rescheduled between reading lapicid and running through the loop. -struct cpu* -mycpu(void) -{ - int apicid, i; - - if(readeflags()&FL_IF) - panic("mycpu called with interrupts enabled\n"); - - apicid = lapicid(); - // APIC IDs are not guaranteed to be contiguous. Maybe we should have - // a reverse map, or reserve a register to store &cpus[i]. - for (i = 0; i < ncpu; ++i) { - if (cpus[i].apicid == apicid) - return &cpus[i]; - } - panic("unknown apicid\n"); -} - -// Disable interrupts so that we are not rescheduled -// while reading proc from the cpu structure -struct proc* -myproc(void) { - struct cpu *c; - struct proc *p; - pushcli(); - c = mycpu(); - p = c->proc; - popcli(); - return p; -} - -//PAGEBREAK: 32 -// Look in the process table for an UNUSED proc. -// If found, change state to EMBRYO and initialize -// state required to run in the kernel. -// Otherwise return 0. -static struct proc* -allocproc(void) -{ - struct proc *p; - char *sp; - - acquire(&ptable.lock); - - for(p = ptable.proc; p < &ptable.proc[NPROC]; p++) - if(p->state == UNUSED) - goto found; - - release(&ptable.lock); - return 0; - -found: - p->state = EMBRYO; - p->pid = nextpid++; - - release(&ptable.lock); - - // Allocate kernel stack. - if((p->kstack = kalloc()) == 0){ - p->state = UNUSED; - return 0; - } - sp = p->kstack + KSTACKSIZE; - - // Leave room for trap frame. - sp -= sizeof *p->tf; - p->tf = (struct trapframe*)sp; - - // Set up new context to start executing at forkret, - // which returns to trapret. - sp -= 4; - *(uint*)sp = (uint)trapret; - - sp -= sizeof *p->context; - p->context = (struct context*)sp; - memset(p->context, 0, sizeof *p->context); - p->context->eip = (uint)forkret; - - return p; -} - -//PAGEBREAK: 32 -// Set up first user process. -void -userinit(void) -{ - struct proc *p; - extern char _binary_initcode_start[], _binary_initcode_size[]; - - p = allocproc(); - - initproc = p; - if((p->pgdir = setupkvm()) == 0) - panic("userinit: out of memory?"); - inituvm(p->pgdir, _binary_initcode_start, (int)_binary_initcode_size); - p->sz = PGSIZE; - memset(p->tf, 0, sizeof(*p->tf)); - p->tf->cs = (SEG_UCODE << 3) | DPL_USER; - p->tf->ds = (SEG_UDATA << 3) | DPL_USER; - p->tf->es = p->tf->ds; - p->tf->ss = p->tf->ds; - p->tf->eflags = FL_IF; - p->tf->esp = PGSIZE; - p->tf->eip = 0; // beginning of initcode.S - - safestrcpy(p->name, "initcode", sizeof(p->name)); - p->cwd = namei("/"); - - // this assignment to p->state lets other cores - // run this process. the acquire forces the above - // writes to be visible, and the lock is also needed - // because the assignment might not be atomic. - acquire(&ptable.lock); - - p->state = RUNNABLE; - - release(&ptable.lock); -} - -// Grow current process's memory by n bytes. -// Return 0 on success, -1 on failure. -int -growproc(int n) -{ - uint sz; - struct proc *curproc = myproc(); - - sz = curproc->sz; - if(n > 0){ - if((sz = allocuvm(curproc->pgdir, sz, sz + n)) == 0) - return -1; - } else if(n < 0){ - if((sz = deallocuvm(curproc->pgdir, sz, sz + n)) == 0) - return -1; - } - curproc->sz = sz; - switchuvm(curproc); - return 0; -} - -// Create a new process copying p as the parent. -// Sets up stack to return as if from system call. -// Caller must set state of returned proc to RUNNABLE. -int -fork(void) -{ - int i, pid; - struct proc *np; - struct proc *curproc = myproc(); - - // Allocate process. - if((np = allocproc()) == 0){ - return -1; - } - - // Copy process state from proc. - if((np->pgdir = copyuvm(curproc->pgdir, curproc->sz)) == 0){ - kfree(np->kstack); - np->kstack = 0; - np->state = UNUSED; - return -1; - } - np->sz = curproc->sz; - np->parent = curproc; - *np->tf = *curproc->tf; - - // Clear %eax so that fork returns 0 in the child. - np->tf->eax = 0; - - for(i = 0; i < NOFILE; i++) - if(curproc->ofile[i]) - np->ofile[i] = filedup(curproc->ofile[i]); - np->cwd = idup(curproc->cwd); - - safestrcpy(np->name, curproc->name, sizeof(curproc->name)); - - pid = np->pid; - - acquire(&ptable.lock); - - np->state = RUNNABLE; - - release(&ptable.lock); - - return pid; -} - -// Exit the current process. Does not return. -// An exited process remains in the zombie state -// until its parent calls wait() to find out it exited. -void -exit(void) -{ - struct proc *curproc = myproc(); - struct proc *p; - int fd; - - if(curproc == initproc) - panic("init exiting"); - - // Close all open files. - for(fd = 0; fd < NOFILE; fd++){ - if(curproc->ofile[fd]){ - fileclose(curproc->ofile[fd]); - curproc->ofile[fd] = 0; - } - } - - begin_op(); - iput(curproc->cwd); - end_op(); - curproc->cwd = 0; - - acquire(&ptable.lock); - - // Parent might be sleeping in wait(). - wakeup1(curproc->parent); - - // Pass abandoned children to init. - for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){ - if(p->parent == curproc){ - p->parent = initproc; - if(p->state == ZOMBIE) - wakeup1(initproc); - } - } - - // Jump into the scheduler, never to return. - curproc->state = ZOMBIE; - sched(); - panic("zombie exit"); -} - -// Wait for a child process to exit and return its pid. -// Return -1 if this process has no children. -int -wait(void) -{ - struct proc *p; - int havekids, pid; - struct proc *curproc = myproc(); - - acquire(&ptable.lock); - for(;;){ - // Scan through table looking for exited children. - havekids = 0; - for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){ - if(p->parent != curproc) - continue; - havekids = 1; - if(p->state == ZOMBIE){ - // Found one. - pid = p->pid; - kfree(p->kstack); - p->kstack = 0; - freevm(p->pgdir); - p->pid = 0; - p->parent = 0; - p->name[0] = 0; - p->killed = 0; - p->state = UNUSED; - release(&ptable.lock); - return pid; - } - } - - // No point waiting if we don't have any children. - if(!havekids || curproc->killed){ - release(&ptable.lock); - return -1; - } - - // Wait for children to exit. (See wakeup1 call in proc_exit.) - sleep(curproc, &ptable.lock); //DOC: wait-sleep - } -} - -//PAGEBREAK: 42 -// Per-CPU process scheduler. -// Each CPU calls scheduler() after setting itself up. -// Scheduler never returns. It loops, doing: -// - choose a process to run -// - swtch to start running that process -// - eventually that process transfers control -// via swtch back to the scheduler. -void -scheduler(void) -{ - struct proc *p; - struct cpu *c = mycpu(); - c->proc = 0; - - for(;;){ - // Enable interrupts on this processor. - sti(); - - // Loop over process table looking for process to run. - acquire(&ptable.lock); - for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){ - if(p->state != RUNNABLE) - continue; - - // Switch to chosen process. It is the process's job - // to release ptable.lock and then reacquire it - // before jumping back to us. - c->proc = p; - switchuvm(p); - p->state = RUNNING; - - swtch(&(c->scheduler), p->context); - switchkvm(); - - // Process is done running for now. - // It should have changed its p->state before coming back. - c->proc = 0; - } - release(&ptable.lock); - - } -} - -// Enter scheduler. Must hold only ptable.lock -// and have changed proc->state. Saves and restores -// intena because intena is a property of this -// kernel thread, not this CPU. It should -// be proc->intena and proc->ncli, but that would -// break in the few places where a lock is held but -// there's no process. -void -sched(void) -{ - int intena; - struct proc *p = myproc(); - - if(!holding(&ptable.lock)) - panic("sched ptable.lock"); - if(mycpu()->ncli != 1) - panic("sched locks"); - if(p->state == RUNNING) - panic("sched running"); - if(readeflags()&FL_IF) - panic("sched interruptible"); - intena = mycpu()->intena; - swtch(&p->context, mycpu()->scheduler); - mycpu()->intena = intena; -} - -// Give up the CPU for one scheduling round. -void -yield(void) -{ - acquire(&ptable.lock); //DOC: yieldlock - myproc()->state = RUNNABLE; - sched(); - release(&ptable.lock); -} - -// A fork child's very first scheduling by scheduler() -// will swtch here. "Return" to user space. -void -forkret(void) -{ - static int first = 1; - // Still holding ptable.lock from scheduler. - release(&ptable.lock); - - if (first) { - // Some initialization functions must be run in the context - // of a regular process (e.g., they call sleep), and thus cannot - // be run from main(). - first = 0; - iinit(ROOTDEV); - initlog(ROOTDEV); - } - - // Return to "caller", actually trapret (see allocproc). -} - -// Atomically release lock and sleep on chan. -// Reacquires lock when awakened. -void -sleep(void *chan, struct spinlock *lk) -{ - struct proc *p = myproc(); - - if(p == 0) - panic("sleep"); - - if(lk == 0) - panic("sleep without lk"); - - // Must acquire ptable.lock in order to - // change p->state and then call sched. - // Once we hold ptable.lock, we can be - // guaranteed that we won't miss any wakeup - // (wakeup runs with ptable.lock locked), - // so it's okay to release lk. - if(lk != &ptable.lock){ //DOC: sleeplock0 - acquire(&ptable.lock); //DOC: sleeplock1 - release(lk); - } - // Go to sleep. - p->chan = chan; - p->state = SLEEPING; - - sched(); - - // Tidy up. - p->chan = 0; - - // Reacquire original lock. - if(lk != &ptable.lock){ //DOC: sleeplock2 - release(&ptable.lock); - acquire(lk); - } -} - -//PAGEBREAK! -// Wake up all processes sleeping on chan. -// The ptable lock must be held. -static void -wakeup1(void *chan) -{ - struct proc *p; - - for(p = ptable.proc; p < &ptable.proc[NPROC]; p++) - if(p->state == SLEEPING && p->chan == chan) - p->state = RUNNABLE; -} - -// Wake up all processes sleeping on chan. -void -wakeup(void *chan) -{ - acquire(&ptable.lock); - wakeup1(chan); - release(&ptable.lock); -} - -// Kill the process with the given pid. -// Process won't exit until it returns -// to user space (see trap in trap.c). -int -kill(int pid) -{ - struct proc *p; - - acquire(&ptable.lock); - for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){ - if(p->pid == pid){ - p->killed = 1; - // Wake process from sleep if necessary. - if(p->state == SLEEPING) - p->state = RUNNABLE; - release(&ptable.lock); - return 0; - } - } - release(&ptable.lock); - return -1; -} - -//PAGEBREAK: 36 -// Print a process listing to console. For debugging. -// Runs when user types ^P on console. -// No lock to avoid wedging a stuck machine further. -void -procdump(void) -{ - static char *states[] = { - [UNUSED] "unused", - [EMBRYO] "embryo", - [SLEEPING] "sleep ", - [RUNNABLE] "runble", - [RUNNING] "run ", - [ZOMBIE] "zombie" - }; - int i; - struct proc *p; - char *state; - uint pc[10]; - - for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){ - if(p->state == UNUSED) - continue; - if(p->state >= 0 && p->state < NELEM(states) && states[p->state]) - state = states[p->state]; - else - state = "???"; - cprintf("%d %s %s", p->pid, state, p->name); - if(p->state == SLEEPING){ - getcallerpcs((uint*)p->context->ebp+2, pc); - for(i=0; i<10 && pc[i] != 0; i++) - cprintf(" %p", pc[i]); - } - cprintf("\n"); - } -} @@ -1,58 +0,0 @@ -// Per-CPU state -struct cpu { - uchar apicid; // Local APIC ID - struct context *scheduler; // swtch() here to enter scheduler - struct taskstate ts; // Used by x86 to find stack for interrupt - struct segdesc gdt[NSEGS]; // x86 global descriptor table - volatile uint started; // Has the CPU started? - int ncli; // Depth of pushcli nesting. - int intena; // Were interrupts enabled before pushcli? - struct proc *proc; // The process running on this cpu or null -}; - -extern struct cpu cpus[NCPU]; -extern int ncpu; - -//PAGEBREAK: 17 -// Saved registers for kernel context switches. -// Don't need to save all the segment registers (%cs, etc), -// because they are constant across kernel contexts. -// Don't need to save %eax, %ecx, %edx, because the -// x86 convention is that the caller has saved them. -// Contexts are stored at the bottom of the stack they -// describe; the stack pointer is the address of the context. -// The layout of the context matches the layout of the stack in swtch.S -// at the "Switch stacks" comment. Switch doesn't save eip explicitly, -// but it is on the stack and allocproc() manipulates it. -struct context { - uint edi; - uint esi; - uint ebx; - uint ebp; - uint eip; -}; - -enum procstate { UNUSED, EMBRYO, SLEEPING, RUNNABLE, RUNNING, ZOMBIE }; - -// Per-process state -struct proc { - uint sz; // Size of process memory (bytes) - pde_t* pgdir; // Page table - char *kstack; // Bottom of kernel stack for this process - enum procstate state; // Process state - int pid; // Process ID - struct proc *parent; // Parent process - struct trapframe *tf; // Trap frame for current syscall - struct context *context; // swtch() here to run process - void *chan; // If non-zero, sleeping on chan - int killed; // If non-zero, have been killed - struct file *ofile[NOFILE]; // Open files - struct inode *cwd; // Current directory - char name[16]; // Process name (debugging) -}; - -// Process memory is laid out contiguously, low addresses first: -// text -// original data and bss -// fixed-size stack -// expandable heap @@ -1,246 +0,0 @@ -#!/bin/sh - -echo This script takes a minute to run. Be patient. 1>&2 - -LC_CTYPE=C export LC_CTYPE - -# pad stdin to multiple of 120 lines -pad() -{ - awk '{print} END{for(; NR%120!=0; NR++) print ""}' -} - -# create formatted (numbered) files -mkdir -p fmt -rm -f fmt/* -cp README fmt -echo > fmt/blank -files=`grep -v '^#' runoff.list | awk '{print $1}'` -n=99 -for i in $files -do - ./runoff1 -n $n $i >fmt/$i - nn=`tail -1 fmt/$i | sed 's/ .*//; s/^0*//'` - if [ "x$nn" != x ]; then - n=$nn - fi -done - -# create table of contents -cat toc.hdr >fmt/toc -pr -e8 -t runoff.list | awk ' -/^[a-z0-9]/ { - s=$0 - f="fmt/"$1 - getline<f - close(f) - n=$1 - printf("%02d %s\n", n/100, s); - printf("TOC: %04d %s\n", n, s) >"fmt/tocdata" - next -} -{ - print -}' | pr -3 -t >>fmt/toc -cat toc.ftr >>fmt/toc - -# check for bad alignments -perl -e ' - $leftwarn = 0; - while(<>){ - chomp; - s!#.*!!; - s!\s+! !g; - s! +$!!; - next if /^$/; - - if(/TOC: (\d+) (.*)/){ - $toc{$2} = $1; - next; - } - - if(/sheet1: (left|right)$/){ - print STDERR "assuming that sheet 1 is a $1 page. double-check!\n"; - $left = $1 eq "left" ? "13579" : "02468"; - $right = $1 eq "left" ? "02468" : "13579"; - next; - } - - if(/even: (.*)/){ - $file = $1; - if(!defined($toc{$file})){ - print STDERR "Have no toc for $file\n"; - next; - } - if($toc{$file} =~ /^\d\d[^0]/){ - print STDERR "$file does not start on a fresh page.\n"; - } - next; - } - - if(/odd: (.*)/){ - $file = $1; - if(!defined($toc{$file})){ - print STDERR "Have no toc for $file\n"; - next; - } - if($toc{$file} !~ /^\d\d5/){ - print STDERR "$file does not start on a second half page.\n"; - } - next; - } - - if(/(left|right): (.*)/){ - $what = $1; - $file = $2; - if(!defined($toc{$file})){ - print STDERR "Have no toc for $file\n"; - next; - } - if($what eq "left" && !($toc{$file} =~ /^\d[$left][05]/)){ - print STDERR "$file does not start on a left page [$toc{$file}]\n"; - } - # why does this not work if I inline $x in the if? - $x = ($toc{$file} =~ /^\d[$right][05]/); - if($what eq "right" && !$x){ - print STDERR "$file does not start on a right page [$toc{$file}] [$x]\n"; - } - next; - } - - print STDERR "Unknown spec: $_\n"; - } -' fmt/tocdata runoff.spec - -# make definition list -cd fmt -perl -e ' - while(<>) { - chomp; - - s!//.*!!; - s!/\*([^*]|[*][^/])*\*/!!g; - s!\s! !g; - s! +$!!; - - # look for declarations like char* x; - if (/^[0-9]+ typedef .* u(int|short|long|char);/) { - next; - } - if (/^[0-9]+ extern/) { - next; - } - if (/^[0-9]+ struct [a-zA-Z0-9_]+;/) { - next; - } - if (/^([0-9]+) #define +([A-za-z0-9_]+) +?\(.*/) { - print "$1 $2\n" - } - elsif (/^([0-9]+) #define +([A-Za-z0-9_]+) +([^ ]+)/) { - print "$1 $2 $3\n"; - } - elsif (/^([0-9]+) #define +([A-Za-z0-9_]+)/) { - print "$1 $2\n"; - } - - if(/^^([0-9]+) \.globl ([a-zA-Z0-9_]+)/){ - $isglobl{$2} = 1; - } - if(/^^([0-9]+) ([a-zA-Z0-9_]+):$/ && $isglobl{$2}){ - print "$1 $2\n"; - } - - if (/\(/) { - next; - } - - if (/^([0-9]+) (((static|struct|extern|union|enum) +)*([A-Za-z0-9_]+))( .*)? +([A-Za-z_][A-Za-z0-9_]*)(,|;|=| =)/) { - print "$1 $7\n"; - } - - elsif(/^([0-9]+) (enum|struct|union) +([A-Za-z0-9_]+) +{/){ - print "$1 $3\n"; - } - # TODO: enum members - } -' $files >defs - -(for i in $files -do - case "$i" in - *.S) - cat $i | sed 's;#.*;;; s;//.*;;;' - ;; - *) - cat $i | sed 's;//.*;;; s;"([^"\\]|\\.)*";;;' - esac -done -) >alltext - -perl -n -e 'print if s/^([0-9]+ [a-zA-Z0-9_]+)\(.*$/\1/;' alltext | - egrep -v ' (STUB|usage|main|if|for)$' >>defs -#perl -n -e 'print if s/^([0-9]+) STUB\(([a-zA-Z0-9_]+)\)$/\1 \2/;' alltext \ -# >>defs -( ->s.defs - -# make reference list -for i in `awk '{print $2}' defs | sort -f | uniq` -do - defs=`egrep '^[0-9]+ '$i'( |$)' defs | awk '{print $1}'` - echo $i $defs >>s.defs - uses=`egrep -h '([^a-zA-Z_0-9])'$i'($|[^a-zA-Z_0-9])' alltext | awk '{print $1}'` - if [ "x$defs" != "x$uses" ]; then - echo $i $defs - echo $uses |fmt -29 | sed 's/^/ /' -# else -# echo $i defined but not used >&2 - fi -done -) >refs - -# build defs list -awk ' -{ - printf("%04d %s\n", $2, $1); - for(i=3; i<=NF; i++) - printf("%04d \" \n", $i); -} -' s.defs > t.defs - -# format the whole thing -( - ../pr.pl README - ../pr.pl -h "table of contents" toc - # pr -t -2 t.defs | ../pr.pl -h "definitions" | pad - pr -t -l50 -2 refs | ../pr.pl -h "cross-references" | pad - # pr.pl -h "definitions" -2 t.defs | pad - # pr.pl -h "cross-references" -2 refs | pad - ../pr.pl blank # make sheet 1 start on left page - ../pr.pl blank - for i in $files - do - ../pr.pl -h "xv6/$i" $i - done -) | mpage -m50t50b -o -bLetter -T -t -2 -FCourier -L60 >all.ps -grep Pages: all.ps - -# if we have the nice font, use it -nicefont=LucidaSans-Typewriter83 -if [ ! -f ../$nicefont ] -then - if git cat-file blob font:$nicefont > ../$nicefont~; then - mv ../$nicefont~ ../$nicefont - fi -fi -if [ -f ../$nicefont ] -then - echo nicefont - (sed 1q all.ps; cat ../$nicefont; sed "1d; s/Courier/$nicefont/" all.ps) >allf.ps -else - echo ugly font! - cp all.ps allf.ps -fi -ps2pdf allf.ps ../xv6.pdf -# cd .. -# pdftops xv6.pdf xv6.ps diff --git a/runoff.list b/runoff.list deleted file mode 100644 index 2df9b81..0000000 --- a/runoff.list +++ /dev/null @@ -1,80 +0,0 @@ -# basic headers -types.h -param.h -memlayout.h -defs.h -x86.h -asm.h -mmu.h -elf.h -date.h - -# entering xv6 -entry.S -entryother.S -main.c - -# locks -spinlock.h -spinlock.c - -# processes -vm.c -proc.h -proc.c -swtch.S -kalloc.c - -# system calls -traps.h -vectors.pl -trapasm.S -trap.c -syscall.h -syscall.c -sysproc.c - -# file system -buf.h -sleeplock.h -fcntl.h -stat.h -fs.h -file.h -ide.c -bio.c -sleeplock.c -log.c -fs.c -file.c -sysfile.c -exec.c - -# pipes -pipe.c - -# string operations -string.c - -# low-level hardware -mp.h -mp.c -lapic.c -ioapic.c -kbd.h -kbd.c -console.c -uart.c - -# user-level -initcode.S -usys.S -init.c -sh.c - -# bootloader -bootasm.S -bootmain.c - -# link -kernel.ld diff --git a/runoff.spec b/runoff.spec deleted file mode 100644 index 9247948..0000000 --- a/runoff.spec +++ /dev/null @@ -1,102 +0,0 @@ -# Is sheet 01 (after the TOC) a left sheet or a right sheet? -sheet1: left - -# "left" and "right" specify which page of a two-page spread a file -# must start on. "left" means that a file must start on the first of -# the two pages. "right" means it must start on the second of the two -# pages. The file may start in either column. -# -# "even" and "odd" specify which column a file must start on. "even" -# means it must start in the left of the two columns (00). "odd" means it -# must start in the right of the two columns (50). -# -# You'd think these would be the other way around. - -# types.h either -# param.h either -# defs.h either -# x86.h either -# asm.h either -# mmu.h either -# elf.h either -# mp.h either - -even: entry.S # mild preference -even: entryother.S # mild preference -even: main.c -# mp.c don't care at all -# even: initcode.S -# odd: init.c - -left: spinlock.h -even: spinlock.h - -# This gets struct proc and allocproc on the same spread -left: proc.h -even: proc.h - -# goal is to have two action-packed 2-page spreads, -# one with -# userinit growproc fork exit wait -# and another with -# scheduler sched yield forkret sleep wakeup1 wakeup -right: proc.c # VERY important -even: proc.c # VERY important - -# A few more action packed spreads -# page table creation and process loading -# walkpgdir mappages setupkvm switch[ku]vm inituvm (loaduvm) -# process memory management -# allocuvm deallocuvm freevm -left: vm.c - -even: kalloc.c # mild preference - -# syscall.h either -# trapasm.S either -# traps.h either -# even: trap.c -# vectors.pl either -# syscall.c either -# sysproc.c either - -# buf.h either -# dev.h either -# fcntl.h either -# stat.h either -# file.h either -# fs.h either -# fsvar.h either -# left: ide.c # mild preference -even: ide.c -# odd: bio.c - -# log.c fits nicely in a spread -even: log.c -left: log.c - -# with fs.c starting on 2nd column of a left page, we get these 2-page spreads: -# ialloc iupdate iget idup ilock iunlock iput iunlockput -# bmap itrunc stati readi writei -# namecmp dirlookup dirlink skipelem namex namei -# fileinit filealloc filedup fileclose filestat fileread filewrite -# starting on 2nd column of a right page is not terrible either -odd: fs.c # VERY important -left: fs.c # mild preference -# file.c either -# exec.c either -# sysfile.c either - -# Mild preference, but makes spreads of mp.c, lapic.c, and ioapic.c+picirq.c -even: mp.c -left: mp.c - -# even: pipe.c # mild preference -# string.c either -# left: kbd.h # mild preference -even: kbd.h -even: console.c -odd: sh.c - -even: bootasm.S # mild preference -even: bootmain.c # mild preference diff --git a/runoff1 b/runoff1 deleted file mode 100755 index 532f844..0000000 --- a/runoff1 +++ /dev/null @@ -1,108 +0,0 @@ -#!/usr/bin/perl - -$n = 0; -$v = 0; -if($ARGV[0] eq "-v") { - $v = 1; - shift @ARGV; -} -if($ARGV[0] eq "-n") { - $n = $ARGV[1]; - shift @ARGV; - shift @ARGV; -} -$n = int(($n+49)/50)*50 - 1; - -$file = $ARGV[0]; -@lines = <>; -$linenum = 0; -foreach (@lines) { - $linenum++; - chomp; - s/\s+$//; - if(length() >= 75){ - print STDERR "$file:$linenum: line too long\n"; - } -} -@outlines = (); -$nextout = 0; - -for($i=0; $i<@lines; ){ - # Skip leading blank lines. - $i++ while $i<@lines && $lines[$i] =~ /^$/; - last if $i>=@lines; - - # If the rest of the file fits, use the whole thing. - if(@lines <= $i+50 && !grep { /PAGEBREAK/ } @lines){ - $breakbefore = @lines; - }else{ - # Find a good next page break; - # Hope for end of function. - # but settle for a blank line (but not first blank line - # in function, which comes after variable declarations). - $breakbefore = $i; - $lastblank = $i; - $sawbrace = 0; - $breaksize = 15; # 15 lines to get to function - for($j=$i; $j<$i+50 && $j < @lines; $j++){ - if($lines[$j] =~ /PAGEBREAK!/){ - $lines[$j] = ""; - $breakbefore = $j; - $breaksize = 100; - last; - } - if($lines[$j] =~ /PAGEBREAK:\s*([0-9]+)/){ - $breaksize = $1; - $breakbefore = $j; - $lines[$j] = ""; - } - if($lines[$j] =~ /^};?$/){ - $breakbefore = $j+1; - $breaksize = 15; - } - if($lines[$j] =~ /^{$/){ - $sawbrace = 1; - } - if($lines[$j] =~ /^$/){ - if($sawbrace){ - $sawbrace = 0; - }else{ - $lastblank = $j; - } - } - } - if($j<@lines && $lines[$j] =~ /^$/){ - $lastblank = $j; - } - - # If we are not putting enough on a page, try a blank line. - if($breakbefore - $i < 50 - $breaksize && $lastblank > $breakbefore && $lastblank >= $i+50 - 5){ - if($v){ - print STDERR "breakbefore $breakbefore i $i breaksize $breaksize\n"; - } - $breakbefore = $lastblank; - $breaksize = 5; # only 5 lines to get to blank line - } - - # If we are not putting enough on a page, force a full page. - if($breakbefore - $i < 50 - $breaksize && $breakbefore != @lines){ - $breakbefore = $i + 50; - $breakbefore = @lines if @lines < $breakbefore; - } - - if($breakbefore < $i+2){ - $breakbefore = $i+2; - } - } - - # Emit the page. - $i50 = $i + 50; - for(; $i<$breakbefore; $i++){ - printf "%04d %s\n", ++$n, $lines[$i]; - } - - # Finish page - for($j=$i; $j<$i50; $j++){ - printf "%04d \n", ++$n; - } -} @@ -1,3 +0,0 @@ -#!/bin/sh - -runoff1 "$@" | pr.pl -h "xv6/$@" | mpage -m50t50b -o -bLetter -T -t -2 -FLucidaSans-Typewriter83 -L60 >x.ps; gv --swap x.ps diff --git a/sign.pl b/sign.pl deleted file mode 100755 index d793035..0000000 --- a/sign.pl +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/perl - -open(SIG, $ARGV[0]) || die "open $ARGV[0]: $!"; - -$n = sysread(SIG, $buf, 1000); - -if($n > 510){ - print STDERR "boot block too large: $n bytes (max 510)\n"; - exit 1; -} - -print STDERR "boot block is $n bytes (max 510)\n"; - -$buf .= "\0" x (510-$n); -$buf .= "\x55\xAA"; - -open(SIG, ">$ARGV[0]") || die "open >$ARGV[0]: $!"; -print SIG $buf; -close SIG; diff --git a/sleep1.p b/sleep1.p deleted file mode 100644 index af69772..0000000 --- a/sleep1.p +++ /dev/null @@ -1,134 +0,0 @@ -/* -This file defines a Promela model for xv6's -acquire, release, sleep, and wakeup, along with -a model of a simple producer/consumer queue. - -To run: - spinp sleep1.p - -(You may need to install Spin, available at http://spinroot.com/.) - -After a successful run spin prints something like: - - unreached in proctype consumer - (0 of 37 states) - unreached in proctype producer - (0 of 23 states) - -After an unsuccessful run, the spinp script prints -an execution trace that causes a deadlock. - -The safe body of producer reads: - - acquire(lk); - x = value; value = x + 1; x = 0; - wakeup(0); - release(lk); - i = i + 1; - -If this is changed to: - - x = value; value = x + 1; x = 0; - acquire(lk); - wakeup(0); - release(lk); - i = i + 1; - -then a deadlock can happen, because the non-atomic -increment of value conflicts with the non-atomic -decrement in consumer, causing value to have a bad value. -Try this. - -If it is changed to: - - acquire(lk); - x = value; value = x + 1; x = 0; - release(lk); - wakeup(0); - i = i + 1; - -then nothing bad happens: it is okay to wakeup after release -instead of before, although it seems morally wrong. -*/ - -#define ITER 4 -#define N 2 - -bit lk; -byte value; -bit sleeping[N]; - -inline acquire(x) -{ - atomic { x == 0; x = 1 } -} - -inline release(x) -{ - assert x==1; - x = 0 -} - -inline sleep(cond, lk) -{ - assert !sleeping[_pid]; - if - :: cond -> - skip - :: else -> - atomic { release(lk); sleeping[_pid] = 1 }; - sleeping[_pid] == 0; - acquire(lk) - fi -} - -inline wakeup() -{ - w = 0; - do - :: w < N -> - sleeping[w] = 0; - w = w + 1 - :: else -> - break - od -} - -active[N] proctype consumer() -{ - byte i, x; - - i = 0; - do - :: i < ITER -> - acquire(lk); - sleep(value > 0, lk); - x = value; value = x - 1; x = 0; - release(lk); - i = i + 1; - :: else -> - break - od; - i = 0; - skip -} - -active[N] proctype producer() -{ - byte i, x, w; - - i = 0; - do - :: i < ITER -> - acquire(lk); - x = value; value = x + 1; x = 0; - release(lk); - wakeup(); - i = i + 1; - :: else -> - break - od; - i = 0; - skip -} - diff --git a/spinlock.c b/spinlock.c deleted file mode 100644 index 4020186..0000000 --- a/spinlock.c +++ /dev/null @@ -1,126 +0,0 @@ -// Mutual exclusion spin locks. - -#include "types.h" -#include "defs.h" -#include "param.h" -#include "x86.h" -#include "memlayout.h" -#include "mmu.h" -#include "proc.h" -#include "spinlock.h" - -void -initlock(struct spinlock *lk, char *name) -{ - lk->name = name; - lk->locked = 0; - lk->cpu = 0; -} - -// Acquire the lock. -// Loops (spins) until the lock is acquired. -// Holding a lock for a long time may cause -// other CPUs to waste time spinning to acquire it. -void -acquire(struct spinlock *lk) -{ - pushcli(); // disable interrupts to avoid deadlock. - if(holding(lk)) - panic("acquire"); - - // The xchg is atomic. - while(xchg(&lk->locked, 1) != 0) - ; - - // Tell the C compiler and the processor to not move loads or stores - // past this point, to ensure that the critical section's memory - // references happen after the lock is acquired. - __sync_synchronize(); - - // Record info about lock acquisition for debugging. - lk->cpu = mycpu(); - getcallerpcs(&lk, lk->pcs); -} - -// Release the lock. -void -release(struct spinlock *lk) -{ - if(!holding(lk)) - panic("release"); - - lk->pcs[0] = 0; - lk->cpu = 0; - - // Tell the C compiler and the processor to not move loads or stores - // past this point, to ensure that all the stores in the critical - // section are visible to other cores before the lock is released. - // Both the C compiler and the hardware may re-order loads and - // stores; __sync_synchronize() tells them both not to. - __sync_synchronize(); - - // Release the lock, equivalent to lk->locked = 0. - // This code can't use a C assignment, since it might - // not be atomic. A real OS would use C atomics here. - asm volatile("movl $0, %0" : "+m" (lk->locked) : ); - - popcli(); -} - -// Record the current call stack in pcs[] by following the %ebp chain. -void -getcallerpcs(void *v, uint pcs[]) -{ - uint *ebp; - int i; - - ebp = (uint*)v - 2; - for(i = 0; i < 10; i++){ - if(ebp == 0 || ebp < (uint*)KERNBASE || ebp == (uint*)0xffffffff) - break; - pcs[i] = ebp[1]; // saved %eip - ebp = (uint*)ebp[0]; // saved %ebp - } - for(; i < 10; i++) - pcs[i] = 0; -} - -// Check whether this cpu is holding the lock. -int -holding(struct spinlock *lock) -{ - int r; - pushcli(); - r = lock->locked && lock->cpu == mycpu(); - popcli(); - return r; -} - - -// Pushcli/popcli are like cli/sti except that they are matched: -// it takes two popcli to undo two pushcli. Also, if interrupts -// are off, then pushcli, popcli leaves them off. - -void -pushcli(void) -{ - int eflags; - - eflags = readeflags(); - cli(); - if(mycpu()->ncli == 0) - mycpu()->intena = eflags & FL_IF; - mycpu()->ncli += 1; -} - -void -popcli(void) -{ - if(readeflags()&FL_IF) - panic("popcli - interruptible"); - if(--mycpu()->ncli < 0) - panic("popcli"); - if(mycpu()->ncli == 0 && mycpu()->intena) - sti(); -} - @@ -1,16 +0,0 @@ -#!/bin/sh - -if [ $# != 1 ] || [ ! -f "$1" ]; then - echo 'usage: spinp file.p' 1>&2 - exit 1 -fi - -rm -f $1.trail -spin -a $1 || exit 1 -cc -DSAFETY -DREACH -DMEMLIM=500 -o pan pan.c -pan -i -rm pan.* pan -if [ -f $1.trail ]; then - spin -t -p $1 -fi - diff --git a/swtch.S b/swtch.S deleted file mode 100644 index 63a7dcc..0000000 --- a/swtch.S +++ /dev/null @@ -1,29 +0,0 @@ -# Context switch -# -# void swtch(struct context **old, struct context *new); -# -# Save the current registers on the stack, creating -# a struct context, and save its address in *old. -# Switch stacks to new and pop previously-saved registers. - -.globl swtch -swtch: - movl 4(%esp), %eax - movl 8(%esp), %edx - - # Save old callee-saved registers - pushl %ebp - pushl %ebx - pushl %esi - pushl %edi - - # Switch stacks - movl %esp, (%eax) - movl %edx, %esp - - # Load new callee-saved registers - popl %edi - popl %esi - popl %ebx - popl %ebp - ret diff --git a/syscall.c b/syscall.c deleted file mode 100644 index ee85261..0000000 --- a/syscall.c +++ /dev/null @@ -1,145 +0,0 @@ -#include "types.h" -#include "defs.h" -#include "param.h" -#include "memlayout.h" -#include "mmu.h" -#include "proc.h" -#include "x86.h" -#include "syscall.h" - -// User code makes a system call with INT T_SYSCALL. -// System call number in %eax. -// Arguments on the stack, from the user call to the C -// library system call function. The saved user %esp points -// to a saved program counter, and then the first argument. - -// Fetch the int at addr from the current process. -int -fetchint(uint addr, int *ip) -{ - struct proc *curproc = myproc(); - - if(addr >= curproc->sz || addr+4 > curproc->sz) - return -1; - *ip = *(int*)(addr); - return 0; -} - -// Fetch the nul-terminated string at addr from the current process. -// Doesn't actually copy the string - just sets *pp to point at it. -// Returns length of string, not including nul. -int -fetchstr(uint addr, char **pp) -{ - char *s, *ep; - struct proc *curproc = myproc(); - - if(addr >= curproc->sz) - return -1; - *pp = (char*)addr; - ep = (char*)curproc->sz; - for(s = *pp; s < ep; s++){ - if(*s == 0) - return s - *pp; - } - return -1; -} - -// Fetch the nth 32-bit system call argument. -int -argint(int n, int *ip) -{ - return fetchint((myproc()->tf->esp) + 4 + 4*n, ip); -} - -// Fetch the nth word-sized system call argument as a pointer -// to a block of memory of size bytes. Check that the pointer -// lies within the process address space. -int -argptr(int n, char **pp, int size) -{ - int i; - struct proc *curproc = myproc(); - - if(argint(n, &i) < 0) - return -1; - if(size < 0 || (uint)i >= curproc->sz || (uint)i+size > curproc->sz) - return -1; - *pp = (char*)i; - return 0; -} - -// Fetch the nth word-sized system call argument as a string pointer. -// Check that the pointer is valid and the string is nul-terminated. -// (There is no shared writable memory, so the string can't change -// between this check and being used by the kernel.) -int -argstr(int n, char **pp) -{ - int addr; - if(argint(n, &addr) < 0) - return -1; - return fetchstr(addr, pp); -} - -extern int sys_chdir(void); -extern int sys_close(void); -extern int sys_dup(void); -extern int sys_exec(void); -extern int sys_exit(void); -extern int sys_fork(void); -extern int sys_fstat(void); -extern int sys_getpid(void); -extern int sys_kill(void); -extern int sys_link(void); -extern int sys_mkdir(void); -extern int sys_mknod(void); -extern int sys_open(void); -extern int sys_pipe(void); -extern int sys_read(void); -extern int sys_sbrk(void); -extern int sys_sleep(void); -extern int sys_unlink(void); -extern int sys_wait(void); -extern int sys_write(void); -extern int sys_uptime(void); - -static int (*syscalls[])(void) = { -[SYS_fork] sys_fork, -[SYS_exit] sys_exit, -[SYS_wait] sys_wait, -[SYS_pipe] sys_pipe, -[SYS_read] sys_read, -[SYS_kill] sys_kill, -[SYS_exec] sys_exec, -[SYS_fstat] sys_fstat, -[SYS_chdir] sys_chdir, -[SYS_dup] sys_dup, -[SYS_getpid] sys_getpid, -[SYS_sbrk] sys_sbrk, -[SYS_sleep] sys_sleep, -[SYS_uptime] sys_uptime, -[SYS_open] sys_open, -[SYS_write] sys_write, -[SYS_mknod] sys_mknod, -[SYS_unlink] sys_unlink, -[SYS_link] sys_link, -[SYS_mkdir] sys_mkdir, -[SYS_close] sys_close, -}; - -void -syscall(void) -{ - int num; - struct proc *curproc = myproc(); - - num = curproc->tf->eax; - if(num > 0 && num < NELEM(syscalls) && syscalls[num]) { - curproc->tf->eax = syscalls[num](); - } else { - cprintf("%d %s: unknown sys call %d\n", - curproc->pid, curproc->name, num); - curproc->tf->eax = -1; - } -} diff --git a/toc.ftr b/toc.ftr deleted file mode 100644 index 0061c1d..0000000 --- a/toc.ftr +++ /dev/null @@ -1,13 +0,0 @@ - - -The source listing is preceded by a cross-reference that lists every defined -constant, struct, global variable, and function in xv6. Each entry gives, -on the same line as the name, the line number (or, in a few cases, numbers) -where the name is defined. Successive lines in an entry list the line -numbers where the name is used. For example, this entry: - - swtch 2658 - 0374 2428 2466 2657 2658 - -indicates that swtch is defined on line 2658 and is mentioned on five lines -on sheets 03, 24, and 26. diff --git a/toc.hdr b/toc.hdr deleted file mode 100644 index 3698d81..0000000 --- a/toc.hdr +++ /dev/null @@ -1,6 +0,0 @@ -The numbers to the left of the file names in the table are sheet numbers. -The source code has been printed in a double column format with fifty -lines per column, giving one hundred lines per sheet (or page). -Thus there is a convenient relationship between line numbers and sheet numbers. - - @@ -1,112 +0,0 @@ -#include "types.h" -#include "defs.h" -#include "param.h" -#include "memlayout.h" -#include "mmu.h" -#include "proc.h" -#include "x86.h" -#include "traps.h" -#include "spinlock.h" - -// Interrupt descriptor table (shared by all CPUs). -struct gatedesc idt[256]; -extern uint vectors[]; // in vectors.S: array of 256 entry pointers -struct spinlock tickslock; -uint ticks; - -void -tvinit(void) -{ - int i; - - for(i = 0; i < 256; i++) - SETGATE(idt[i], 0, SEG_KCODE<<3, vectors[i], 0); - SETGATE(idt[T_SYSCALL], 1, SEG_KCODE<<3, vectors[T_SYSCALL], DPL_USER); - - initlock(&tickslock, "time"); -} - -void -idtinit(void) -{ - lidt(idt, sizeof(idt)); -} - -//PAGEBREAK: 41 -void -trap(struct trapframe *tf) -{ - if(tf->trapno == T_SYSCALL){ - if(myproc()->killed) - exit(); - myproc()->tf = tf; - syscall(); - if(myproc()->killed) - exit(); - return; - } - - switch(tf->trapno){ - case T_IRQ0 + IRQ_TIMER: - if(cpuid() == 0){ - acquire(&tickslock); - ticks++; - wakeup(&ticks); - release(&tickslock); - } - lapiceoi(); - break; - case T_IRQ0 + IRQ_IDE: - ideintr(); - lapiceoi(); - break; - case T_IRQ0 + IRQ_IDE+1: - // Bochs generates spurious IDE1 interrupts. - break; - case T_IRQ0 + IRQ_KBD: - kbdintr(); - lapiceoi(); - break; - case T_IRQ0 + IRQ_COM1: - uartintr(); - lapiceoi(); - break; - case T_IRQ0 + 7: - case T_IRQ0 + IRQ_SPURIOUS: - cprintf("cpu%d: spurious interrupt at %x:%x\n", - cpuid(), tf->cs, tf->eip); - lapiceoi(); - break; - - //PAGEBREAK: 13 - default: - if(myproc() == 0 || (tf->cs&3) == 0){ - // In kernel, it must be our mistake. - cprintf("unexpected trap %d from cpu %d eip %x (cr2=0x%x)\n", - tf->trapno, cpuid(), tf->eip, rcr2()); - panic("trap"); - } - // In user space, assume process misbehaved. - cprintf("pid %d %s: trap %d err %d on cpu %d " - "eip 0x%x addr 0x%x--kill proc\n", - myproc()->pid, myproc()->name, tf->trapno, - tf->err, cpuid(), tf->eip, rcr2()); - myproc()->killed = 1; - } - - // Force process exit if it has been killed and is in user space. - // (If it is still executing in the kernel, let it keep running - // until it gets to the regular system call return.) - if(myproc() && myproc()->killed && (tf->cs&3) == DPL_USER) - exit(); - - // Force process to give up CPU on clock tick. - // If interrupts were on while locks held, would need to check nlock. - if(myproc() && myproc()->state == RUNNING && - tf->trapno == T_IRQ0+IRQ_TIMER) - yield(); - - // Check if the process has been killed since we yielded - if(myproc() && myproc()->killed && (tf->cs&3) == DPL_USER) - exit(); -} diff --git a/trapasm.S b/trapasm.S deleted file mode 100644 index da8aefc..0000000 --- a/trapasm.S +++ /dev/null @@ -1,32 +0,0 @@ -#include "mmu.h" - - # vectors.S sends all traps here. -.globl alltraps -alltraps: - # Build trap frame. - pushl %ds - pushl %es - pushl %fs - pushl %gs - pushal - - # Set up data segments. - movw $(SEG_KDATA<<3), %ax - movw %ax, %ds - movw %ax, %es - - # Call trap(tf), where tf=%esp - pushl %esp - call trap - addl $4, %esp - - # Return falls through to trapret... -.globl trapret -trapret: - popal - popl %gs - popl %fs - popl %es - popl %ds - addl $0x8, %esp # trapno and errcode - iret diff --git a/traps.h b/traps.h deleted file mode 100644 index 0bd1fd8..0000000 --- a/traps.h +++ /dev/null @@ -1,38 +0,0 @@ -// x86 trap and interrupt constants. - -// Processor-defined: -#define T_DIVIDE 0 // divide error -#define T_DEBUG 1 // debug exception -#define T_NMI 2 // non-maskable interrupt -#define T_BRKPT 3 // breakpoint -#define T_OFLOW 4 // overflow -#define T_BOUND 5 // bounds check -#define T_ILLOP 6 // illegal opcode -#define T_DEVICE 7 // device not available -#define T_DBLFLT 8 // double fault -// #define T_COPROC 9 // reserved (not used since 486) -#define T_TSS 10 // invalid task switch segment -#define T_SEGNP 11 // segment not present -#define T_STACK 12 // stack exception -#define T_GPFLT 13 // general protection fault -#define T_PGFLT 14 // page fault -// #define T_RES 15 // reserved -#define T_FPERR 16 // floating point error -#define T_ALIGN 17 // aligment check -#define T_MCHK 18 // machine check -#define T_SIMDERR 19 // SIMD floating point error - -// These are arbitrarily chosen, but with care not to overlap -// processor defined exceptions or interrupt vectors. -#define T_SYSCALL 64 // system call -#define T_DEFAULT 500 // catchall - -#define T_IRQ0 32 // IRQ 0 corresponds to int T_IRQ - -#define IRQ_TIMER 0 -#define IRQ_KBD 1 -#define IRQ_COM1 4 -#define IRQ_IDE 14 -#define IRQ_ERROR 19 -#define IRQ_SPURIOUS 31 - diff --git a/types.h b/types.h deleted file mode 100644 index e4adf64..0000000 --- a/types.h +++ /dev/null @@ -1,4 +0,0 @@ -typedef unsigned int uint; -typedef unsigned short ushort; -typedef unsigned char uchar; -typedef uint pde_t; @@ -1,77 +0,0 @@ -// Intel 8250 serial port (UART). - -#include "types.h" -#include "defs.h" -#include "param.h" -#include "traps.h" -#include "spinlock.h" -#include "sleeplock.h" -#include "fs.h" -#include "file.h" -#include "mmu.h" -#include "proc.h" -#include "x86.h" - -#define COM1 0x3f8 - -static int uart; // is there a uart? - -void -uartinit(void) -{ - char *p; - - // Turn off the FIFO - outb(COM1+2, 0); - - // 9600 baud, 8 data bits, 1 stop bit, parity off. - outb(COM1+3, 0x80); // Unlock divisor - outb(COM1+0, 115200/9600); - outb(COM1+1, 0); - outb(COM1+3, 0x03); // Lock divisor, 8 data bits. - outb(COM1+4, 0); - outb(COM1+1, 0x01); // Enable receive interrupts. - - // If status is 0xFF, no serial port. - if(inb(COM1+5) == 0xFF) - return; - uart = 1; - - // Acknowledge pre-existing interrupt conditions; - // enable interrupts. - inb(COM1+2); - inb(COM1+0); - ioapicenable(IRQ_COM1, 0); - - // Announce that we're here. - for(p="xv6...\n"; *p; p++) - uartputc(*p); -} - -void -uartputc(int c) -{ - int i; - - if(!uart) - return; - for(i = 0; i < 128 && !(inb(COM1+5) & 0x20); i++) - microdelay(10); - outb(COM1+0, c); -} - -static int -uartgetc(void) -{ - if(!uart) - return -1; - if(!(inb(COM1+5) & 0x01)) - return -1; - return inb(COM1+0); -} - -void -uartintr(void) -{ - consoleintr(uartgetc); -} diff --git a/user/alarmtest.c b/user/alarmtest.c new file mode 100644 index 0000000..c6da547 --- /dev/null +++ b/user/alarmtest.c @@ -0,0 +1,88 @@ +// +// test program for the alarm lab. +// you can modify this file for testing, +// but please make sure your kernel +// modifications pass the original +// versions of these tests. +// + +#include "kernel/param.h" +#include "kernel/types.h" +#include "kernel/stat.h" +#include "kernel/riscv.h" +#include "user/user.h" + +void test0(); +void test1(); +void periodic(); + +int +main(int argc, char *argv[]) +{ + test0(); + test1(); + exit(); +} + +volatile static int count; + +void +periodic() +{ + count = count + 1; + printf(1, "alarm!\n"); + sigreturn(); +} + +// tests whether the kernel calls +// the alarm handler even a single time. +void +test0() +{ + int i; + printf(1, "test0 start\n"); + count = 0; + sigalarm(2, periodic); + for(i = 0; i < 1000*500000; i++){ + if((i % 250000) == 0) + write(2, ".", 1); + if(count > 0) + break; + } + sigalarm(0, 0); + if(count > 0){ + printf(1, "test0 passed\n"); + } else { + printf(1, "test0 failed\n"); + } +} + +void __attribute__ ((noinline)) foo(int i, int *j) { + if((i % 2500000) == 0) { + write(2, ".", 1); + } + *j += 1; +} + +void +test1() +{ + int i; + int j; + + printf(1, "test1 start\n"); + count = 0; + j = 0; + sigalarm(2, periodic); + for(i = 0; i < 500000000; i++){ + if(count >= 10) + break; + foo(i, &j); + } + if(i != j || count < 10){ + // i should equal j + printf(1, "test1 failed\n"); + } else { + printf(1, "test1 passed\n"); + } +} @@ -1,6 +1,6 @@ -#include "types.h" -#include "stat.h" -#include "user.h" +#include "kernel/types.h" +#include "kernel/stat.h" +#include "user/user.h" char buf[512]; diff --git a/user/cow.c b/user/cow.c new file mode 100644 index 0000000..0426600 --- /dev/null +++ b/user/cow.c @@ -0,0 +1,196 @@ +// +// tests for copy-on-write fork() assignment. +// + +#include "kernel/types.h" +#include "kernel/memlayout.h" +#include "user/user.h" + +// allocate more than half of physical memory, +// then fork. this will fail in the default +// kernel, which does not support copy-on-write. +void +simpletest() +{ + uint64 phys_size = PHYSTOP - KERNBASE; + int sz = (phys_size / 3) * 2; + + printf(1, "simple: "); + + char *p = sbrk(sz); + if(p == (char*)0xffffffffffffffffL){ + printf(1, "sbrk(%d) failed\n", sz); + exit(); + } + + for(char *q = p; q < p + sz; q += 4096){ + *(int*)q = getpid(); + } + + int pid = fork(); + if(pid < 0){ + printf(1, "fork() failed\n"); + exit(); + } + + if(pid == 0) + exit(); + + wait(); + + if(sbrk(-sz) == (char*)0xffffffffffffffffL){ + printf(1, "sbrk(-%d) failed\n", sz); + exit(); + } + + printf(1, "ok\n"); +} + +// three processes all write COW memory. +// this causes more than half of physical memory +// to be allocated, so it also checks whether +// copied pages are freed. +void +threetest() +{ + uint64 phys_size = PHYSTOP - KERNBASE; + int sz = phys_size / 4; + int pid1, pid2; + + printf(1, "three: "); + + char *p = sbrk(sz); + if(p == (char*)0xffffffffffffffffL){ + printf(1, "sbrk(%d) failed\n", sz); + exit(); + } + + pid1 = fork(); + if(pid1 < 0){ + printf(1, "fork failed\n"); + exit(); + } + if(pid1 == 0){ + pid2 = fork(); + if(pid2 < 0){ + printf(1, "fork failed"); + exit(); + } + if(pid2 == 0){ + for(char *q = p; q < p + (sz/5)*4; q += 4096){ + *(int*)q = getpid(); + } + for(char *q = p; q < p + (sz/5)*4; q += 4096){ + if(*(int*)q != getpid()){ + printf(1, "wrong content\n"); + exit(); + } + } + exit(); + } + for(char *q = p; q < p + (sz/2); q += 4096){ + *(int*)q = 9999; + } + exit(); + } + + for(char *q = p; q < p + sz; q += 4096){ + *(int*)q = getpid(); + } + + wait(); + + sleep(1); + + for(char *q = p; q < p + sz; q += 4096){ + if(*(int*)q != getpid()){ + printf(1, "wrong content\n"); + exit(); + } + } + + if(sbrk(-sz) == (char*)0xffffffffffffffffL){ + printf(1, "sbrk(-%d) failed\n", sz); + exit(); + } + + printf(1, "ok\n"); +} + +char junk1[4096]; +int fds[2]; +char junk2[4096]; +char buf[4096]; +char junk3[4096]; + +// test whether copyout() simulates COW faults. +void +filetest() +{ + int parent = getpid(); + + printf(1, "file: "); + + buf[0] = 99; + + for(int i = 0; i < 4; i++){ + if(pipe(fds) != 0){ + printf(1, "pipe() failed\n"); + exit(); + } + int pid = fork(); + if(pid < 0){ + printf(1, "fork failed\n"); + exit(); + } + if(pid == 0){ + sleep(1); + if(read(fds[0], buf, sizeof(i)) != sizeof(i)){ + printf(1, "read failed\n"); + kill(parent); + exit(); + } + sleep(1); + int j = *(int*)buf; + if(j != i){ + printf(1, "read the wrong value\n"); + kill(parent); + exit(); + } + exit(); + } + if(write(fds[1], &i, sizeof(i)) != sizeof(i)){ + printf(1, "write failed\n"); + exit(); + } + } + + for(int i = 0; i < 4; i++) + wait(); + + if(buf[0] != 99){ + printf(1, "child overwrote parent\n"); + exit(); + } + + printf(1, "ok\n"); +} + +int +main(int argc, char *argv[]) +{ + simpletest(); + + // check that the first simpletest() freed the physical memory. + simpletest(); + + threetest(); + threetest(); + threetest(); + + filetest(); + + printf(1, "ALL COW TESTS PASSED\n"); + + exit(); +} @@ -1,6 +1,6 @@ -#include "types.h" -#include "stat.h" -#include "user.h" +#include "kernel/types.h" +#include "kernel/stat.h" +#include "user/user.h" int main(int argc, char *argv[]) diff --git a/forktest.c b/user/forktest.c index 8bc984d..be4915e 100644 --- a/forktest.c +++ b/user/forktest.c @@ -1,9 +1,9 @@ // Test that fork fails gracefully. // Tiny executable so that the limit can be filling the proc table. -#include "types.h" -#include "stat.h" -#include "user.h" +#include "kernel/types.h" +#include "kernel/stat.h" +#include "user/user.h" #define N 1000 @@ -1,8 +1,8 @@ // Simple grep. Only supports ^ . * $ operators. -#include "types.h" -#include "stat.h" -#include "user.h" +#include "kernel/types.h" +#include "kernel/stat.h" +#include "user/user.h" char buf[1024]; int match(char*, char*); @@ -1,9 +1,9 @@ // init: The initial user-level program -#include "types.h" -#include "stat.h" -#include "user.h" -#include "fcntl.h" +#include "kernel/types.h" +#include "kernel/stat.h" +#include "user/user.h" +#include "kernel/fcntl.h" char *argv[] = { "sh", 0 }; @@ -31,7 +31,8 @@ main(void) printf(1, "init: exec sh failed\n"); exit(); } - while((wpid=wait()) >= 0 && wpid != pid) - printf(1, "zombie!\n"); + while((wpid=wait()) >= 0 && wpid != pid){ + //printf(1, "zombie!\n"); + } } } diff --git a/initcode.S b/user/initcode.S index 80ac5d8..ca76972 100644 --- a/initcode.S +++ b/user/initcode.S @@ -2,23 +2,20 @@ # This code runs in user space. #include "syscall.h" -#include "traps.h" - # exec(init, argv) .globl start start: - pushl $argv - pushl $init - pushl $0 // where caller pc would be - movl $SYS_exec, %eax - int $T_SYSCALL + la a0, init + la a1, argv + li a7, SYS_exec + ecall # for(;;) exit(); exit: - movl $SYS_exit, %eax - int $T_SYSCALL - jmp exit + li a7, SYS_exit + ecall + jal exit # char init[] = "/init\0"; init: @@ -29,4 +26,3 @@ init: argv: .long init .long 0 - @@ -1,6 +1,6 @@ -#include "types.h" -#include "stat.h" -#include "user.h" +#include "kernel/types.h" +#include "kernel/stat.h" +#include "user/user.h" int main(int argc, char **argv) @@ -1,6 +1,6 @@ -#include "types.h" -#include "stat.h" -#include "user.h" +#include "kernel/types.h" +#include "kernel/stat.h" +#include "user/user.h" int main(int argc, char *argv[]) @@ -1,7 +1,7 @@ -#include "types.h" -#include "stat.h" -#include "user.h" -#include "fs.h" +#include "kernel/types.h" +#include "kernel/stat.h" +#include "user/user.h" +#include "kernel/fs.h" char* fmtname(char *path) @@ -43,7 +43,7 @@ ls(char *path) switch(st.type){ case T_FILE: - printf(1, "%s %d %d %d\n", fmtname(path), st.type, st.ino, st.size); + printf(1, "%s %d %d %l\n", fmtname(path), st.type, st.ino, st.size); break; case T_DIR: @@ -1,6 +1,6 @@ -#include "types.h" -#include "stat.h" -#include "user.h" +#include "kernel/types.h" +#include "kernel/stat.h" +#include "user/user.h" int main(int argc, char *argv[]) @@ -1,6 +1,10 @@ -#include "types.h" -#include "stat.h" -#include "user.h" +#include "kernel/types.h" +#include "kernel/stat.h" +#include "user/user.h" + +#include <stdarg.h> + +static char digits[] = "0123456789ABCDEF"; static void putc(int fd, char c) @@ -11,7 +15,6 @@ putc(int fd, char c) static void printint(int fd, int xx, int base, int sgn) { - static char digits[] = "0123456789ABCDEF"; char buf[16]; int i, neg; uint x; @@ -35,16 +38,25 @@ printint(int fd, int xx, int base, int sgn) putc(fd, buf[i]); } +static void +printptr(int fd, uint64 x) { + int i; + putc(fd, '0'); + putc(fd, 'x'); + for (i = 0; i < (sizeof(uint64) * 2); i++, x <<= 4) + putc(fd, digits[x >> (sizeof(uint64) * 8 - 4)]); +} + // Print to the given fd. Only understands %d, %x, %p, %s. void printf(int fd, const char *fmt, ...) { + va_list ap; char *s; int c, i, state; - uint *ap; + va_start(ap, fmt); state = 0; - ap = (uint*)(void*)&fmt + 1; for(i = 0; fmt[i]; i++){ c = fmt[i] & 0xff; if(state == 0){ @@ -55,14 +67,15 @@ printf(int fd, const char *fmt, ...) } } else if(state == '%'){ if(c == 'd'){ - printint(fd, *ap, 10, 1); - ap++; - } else if(c == 'x' || c == 'p'){ - printint(fd, *ap, 16, 0); - ap++; + printint(fd, va_arg(ap, int), 10, 1); + } else if(c == 'l') { + printint(fd, va_arg(ap, uint64), 10, 0); + } else if(c == 'x') { + printint(fd, va_arg(ap, int), 16, 0); + } else if(c == 'p') { + printptr(fd, va_arg(ap, uint64)); } else if(c == 's'){ - s = (char*)*ap; - ap++; + s = va_arg(ap, char*); if(s == 0) s = "(null)"; while(*s != 0){ @@ -70,8 +83,7 @@ printf(int fd, const char *fmt, ...) s++; } } else if(c == 'c'){ - putc(fd, *ap); - ap++; + putc(fd, va_arg(ap, uint)); } else if(c == '%'){ putc(fd, c); } else { @@ -1,6 +1,6 @@ -#include "types.h" -#include "stat.h" -#include "user.h" +#include "kernel/types.h" +#include "kernel/stat.h" +#include "user/user.h" int main(int argc, char *argv[]) @@ -1,8 +1,8 @@ // Shell. -#include "types.h" -#include "user.h" -#include "fcntl.h" +#include "kernel/types.h" +#include "user/user.h" +#include "kernel/fcntl.h" // Parsed command representation #define EXEC 1 diff --git a/stressfs.c b/user/stressfs.c index c0a4743..ef8f1cd 100644 --- a/stressfs.c +++ b/user/stressfs.c @@ -7,11 +7,11 @@ // for (i = 0; i < 40000; i++) // asm volatile(""); -#include "types.h" -#include "stat.h" -#include "user.h" -#include "fs.h" -#include "fcntl.h" +#include "kernel/types.h" +#include "kernel/stat.h" +#include "user/user.h" +#include "kernel/fs.h" +#include "kernel/fcntl.h" int main(int argc, char *argv[]) @@ -1,8 +1,7 @@ -#include "types.h" -#include "stat.h" -#include "fcntl.h" -#include "user.h" -#include "x86.h" +#include "kernel/types.h" +#include "kernel/stat.h" +#include "kernel/fcntl.h" +#include "user/user.h" char* strcpy(char *s, const char *t) @@ -36,7 +35,11 @@ strlen(const char *s) void* memset(void *dst, int c, uint n) { - stosb(dst, c, n); + char *cdst = (char *) dst; + int i; + for(i = 0; i < n; i++){ + cdst[i] = c; + } return dst; } diff --git a/umalloc.c b/user/umalloc.c index a7e7d2c..2092a32 100644 --- a/umalloc.c +++ b/user/umalloc.c @@ -1,7 +1,7 @@ -#include "types.h" -#include "stat.h" -#include "user.h" -#include "param.h" +#include "kernel/types.h" +#include "kernel/stat.h" +#include "user/user.h" +#include "kernel/param.h" // Memory allocator by Kernighan and Ritchie, // The C programming Language, 2nd ed. Section 8.7. diff --git a/usertests.c b/user/usertests.c index a1e97e7..f74b88c 100644 --- a/usertests.c +++ b/user/usertests.c @@ -1,12 +1,12 @@ -#include "param.h" -#include "types.h" -#include "stat.h" -#include "user.h" -#include "fs.h" -#include "fcntl.h" -#include "syscall.h" -#include "traps.h" -#include "memlayout.h" +#include "kernel/param.h" +#include "kernel/types.h" +#include "kernel/stat.h" +#include "user/user.h" +#include "kernel/fs.h" +#include "kernel/fcntl.h" +#include "kernel/syscall.h" +#include "kernel/memlayout.h" +#include "kernel/riscv.h" char buf[8192]; char name[3]; @@ -363,17 +363,29 @@ preempt(void) printf(1, "preempt: "); pid1 = fork(); + if(pid1 < 0) { + printf(1, "fork failed"); + exit(); + } if(pid1 == 0) for(;;) ; pid2 = fork(); + if(pid2 < 0) { + printf(1, "fork failed\n"); + exit(); + } if(pid2 == 0) for(;;) ; pipe(pfds); pid3 = fork(); + if(pid3 < 0) { + printf(1, "fork failed\n"); + exit(); + } if(pid3 == 0){ close(pfds[0]); if(write(pfds[1], "x", 1) != 1) @@ -406,16 +418,18 @@ exitwait(void) { int i, pid; + printf(1, "exitwait test\n"); + for(i = 0; i < 100; i++){ pid = fork(); if(pid < 0){ printf(1, "fork failed\n"); - return; + exit(); } if(pid){ if(wait() != pid){ printf(1, "wait wrong pid\n"); - return; + exit(); } } else { exit(); @@ -424,6 +438,147 @@ exitwait(void) printf(1, "exitwait ok\n"); } +// try to find races in the reparenting +// code that handles a parent exiting +// when it still has live children. +void +reparent(void) +{ + int master_pid = getpid(); + + printf(1, "reparent test\n"); + + for(int i = 0; i < 200; i++){ + int pid = fork(); + if(pid < 0){ + printf(1, "fork failed\n"); + exit(); + } + if(pid){ + if(wait() != pid){ + printf(1, "wait wrong pid\n"); + exit(); + } + } else { + int pid2 = fork(); + if(pid2 < 0){ + printf(1, "fork failed\n"); + kill(master_pid); + exit(); + } + if(pid2 == 0){ + exit(); + } else { + exit(); + } + } + } + printf(1, "reparent ok\n"); +} + +// what if two children exit() at the same time? +void +twochildren(void) +{ + printf(1, "twochildren test\n"); + + for(int i = 0; i < 1000; i++){ + int pid1 = fork(); + if(pid1 < 0){ + printf(1, "fork failed\n"); + exit(); + } + if(pid1 == 0){ + exit(); + } else { + int pid2 = fork(); + if(pid2 < 0){ + printf(1, "fork failed\n"); + exit(); + } + if(pid2 == 0){ + exit(); + } else { + wait(); + wait(); + } + } + } + printf(1, "twochildren ok\n"); +} + +// concurrent forks to try to expose locking bugs. +void +forkfork(void) +{ + int ppid = getpid(); + + printf(1, "forkfork test\n"); + + for(int i = 0; i < 2; i++){ + int pid = fork(); + if(pid < 0){ + printf(1, "fork failed"); + exit(); + } + if(pid == 0){ + for(int j = 0; j < 200; j++){ + int pid1 = fork(); + if(pid1 < 0){ + printf(1, "fork failed\n"); + kill(ppid); + exit(); + } + if(pid1 == 0){ + exit(); + } + wait(); + } + exit(); + } + } + + for(int i = 0; i < 2; i++){ + wait(); + } + + printf(1, "forkfork ok\n"); +} + +void +forkforkfork(void) +{ + printf(1, "forkforkfork test\n"); + + unlink("stopforking"); + + int pid = fork(); + if(pid < 0){ + printf(1, "fork failed"); + exit(); + } + if(pid == 0){ + while(1){ + int fd = open("stopforking", 0); + if(fd >= 0){ + exit(); + } + if(fork() < 0){ + close(open("stopforking", O_CREATE|O_RDWR)); + } + } + + exit(); + } + + sleep(20); // two seconds + close(open("stopforking", O_CREATE|O_RDWR)); + wait(); + sleep(10); // one second + + printf(1, "forkforkfork ok\n"); +} + void mem(void) { @@ -583,13 +738,13 @@ fourfiles(void) void createdelete(void) { - enum { N = 20 }; + enum { N = 20, NCHILD=4 }; int pid, i, fd, pi; char name[32]; printf(1, "createdelete test\n"); - for(pi = 0; pi < 4; pi++){ + for(pi = 0; pi < NCHILD; pi++){ pid = fork(); if(pid < 0){ printf(1, "fork failed\n"); @@ -619,13 +774,13 @@ createdelete(void) } } - for(pi = 0; pi < 4; pi++){ + for(pi = 0; pi < NCHILD; pi++){ wait(); } name[0] = name[1] = name[2] = 0; for(i = 0; i < N; i++){ - for(pi = 0; pi < 4; pi++){ + for(pi = 0; pi < NCHILD; pi++){ name[0] = 'p' + pi; name[1] = '0' + i; fd = open(name, 0); @@ -642,7 +797,7 @@ createdelete(void) } for(i = 0; i < N; i++){ - for(pi = 0; pi < 4; pi++){ + for(pi = 0; pi < NCHILD; pi++){ name[0] = 'p' + i; name[1] = '0' + i; unlink(name); @@ -1391,6 +1546,11 @@ forktest(void) exit(); } + if (n == 0) { + printf(1, "no fork at all!\n"); + exit(); + } + if(n == 1000){ printf(1, "fork claimed to work 1000 times!\n"); exit(); @@ -1414,16 +1574,25 @@ forktest(void) void sbrktest(void) { - int fds[2], pid, pids[10], ppid; - char *a, *b, *c, *lastaddr, *oldbrk, *p, scratch; - uint amt; + int i, fds[2], pids[10], pid, ppid; + char *c, *oldbrk, scratch, *a, *b, *lastaddr, *p; + uint64 amt; + int fd; + int n; + #define BIG (100*1024*1024) printf(stdout, "sbrk test\n"); oldbrk = sbrk(0); + // does sbrk() return the expected failure value? + a = sbrk(1024*1024*1024); + if(a != (char*)0xffffffffffffffffL){ + printf(stdout, "sbrk(<toomuch>) returned %p\n", a); + exit(); + } + // can one sbrk() less than a page? a = sbrk(0); - int i; for(i = 0; i < 5000; i++){ b = sbrk(1); if(b != a){ @@ -1449,9 +1618,8 @@ sbrktest(void) wait(); // can one grow address space to something big? -#define BIG (100*1024*1024) a = sbrk(0); - amt = (BIG) - (uint)a; + amt = BIG - (uint64)a; p = sbrk(amt); if (p != a) { printf(stdout, "sbrk test failed to grow big address space; enough phys mem?\n"); @@ -1463,7 +1631,7 @@ sbrktest(void) // can one de-allocate? a = sbrk(0); c = sbrk(-4096); - if(c == (char*)0xffffffff){ + if(c == (char*)0xffffffffffffffffL){ printf(stdout, "sbrk could not deallocate\n"); exit(); } @@ -1508,7 +1676,7 @@ sbrktest(void) } wait(); } - + // if we run the system out of memory, does it clean up the last // failed allocation? if(pipe(fds) != 0){ @@ -1518,7 +1686,7 @@ sbrktest(void) for(i = 0; i < sizeof(pids)/sizeof(pids[0]); i++){ if((pids[i] = fork()) == 0){ // allocate a lot of memory - sbrk(BIG - (uint)sbrk(0)); + sbrk(BIG - (uint64)sbrk(0)); write(fds[1], "x", 1); // sit around until killed for(;;) sleep(1000); @@ -1526,6 +1694,7 @@ sbrktest(void) if(pids[i] != -1) read(fds[0], &scratch, 1); } + // if those failed allocations freed up the pages they did allocate, // we'll be able to allocate here c = sbrk(4096); @@ -1535,11 +1704,55 @@ sbrktest(void) kill(pids[i]); wait(); } - if(c == (char*)0xffffffff){ + if(c == (char*)0xffffffffffffffffL){ printf(stdout, "failed sbrk leaked memory\n"); exit(); } + // test running fork with the above allocated page + ppid = getpid(); + pid = fork(); + if(pid < 0){ + printf(stdout, "fork failed\n"); + exit(); + } + + // test out of memory during sbrk + if(pid == 0){ + // allocate a lot of memory + a = sbrk(0); + sbrk(10*BIG); + int n = 0; + for (i = 0; i < 10*BIG; i += 4096) { + n += *(a+i); + } + printf(stdout, "allocate a lot of memory succeeded %d\n", n); + kill(ppid); + exit(); + } + wait(); + + // test reads from allocated memory + a = sbrk(4096); + fd = open("sbrk", O_CREATE|O_WRONLY); + unlink("sbrk"); + if(fd < 0) { + printf(stdout, "open sbrk failed\n"); + exit(); + } + if ((n = write(fd, a, 10)) < 0) { + printf(stdout, "write sbrk failed\n"); + exit(); + } + close(fd); + + // test writes to allocated memory + a = sbrk(4096); + if(pipe((int *) a) != 0){ + printf(1, "pipe() failed\n"); + exit(); + } + if(sbrk(0) > oldbrk) sbrk(-(sbrk(0) - oldbrk)); @@ -1549,7 +1762,7 @@ sbrktest(void) void validateint(int *p) { - int res; + /* XXX int res; asm("mov %%esp, %%ebx\n\t" "mov %3, %%esp\n\t" "int %2\n\t" @@ -1557,13 +1770,14 @@ validateint(int *p) "=a" (res) : "a" (SYS_sleep), "n" (T_SYSCALL), "c" (p) : "ebx"); + */ } void validatetest(void) { int hi, pid; - uint p; + uint64 p; printf(stdout, "validate test\n"); hi = 1100*1024; @@ -1695,35 +1909,6 @@ fsfull() printf(1, "fsfull test finished\n"); } -void -uio() -{ - #define RTC_ADDR 0x70 - #define RTC_DATA 0x71 - - ushort port = 0; - uchar val = 0; - int pid; - - printf(1, "uio test\n"); - pid = fork(); - if(pid == 0){ - port = RTC_ADDR; - val = 0x09; /* year */ - /* http://wiki.osdev.org/Inline_Assembly/Examples */ - asm volatile("outb %0,%1"::"a"(val), "d" (port)); - port = RTC_DATA; - asm volatile("inb %1,%0" : "=a" (val) : "d" (port)); - printf(1, "uio: uio succeeded; test FAILED\n"); - exit(); - } else if(pid < 0){ - printf (1, "fork failed\n"); - exit(); - } - wait(); - printf(1, "uio test done\n"); -} - void argptest() { int fd; @@ -1745,6 +1930,32 @@ rand() return randstate; } +// check that there's an invalid page beneath +// the user stack, to catch stack overflow. +void +stacktest() +{ + int pid; + int ppid = getpid(); + + printf(1, "stack guard test\n"); + pid = fork(); + if(pid == 0) { + char *sp = (char *) r_sp(); + sp -= 4096; + // the *sp should cause a trap. + printf(1, "stacktest: read below stack %p\n", *sp); + printf(1, "stacktest: test FAILED\n"); + kill(ppid); + exit(); + } else if(pid < 0){ + printf (1, "fork failed\n"); + exit(); + } + wait(); + printf(1, "stack guard test ok\n"); +} + int main(int argc, char *argv[]) { @@ -1756,6 +1967,11 @@ main(int argc, char *argv[]) } close(open("usertests.ran", O_CREATE)); + reparent(); + twochildren(); + forkfork(); + forkforkfork(); + argptest(); createdelete(); linkunlink(); @@ -1769,7 +1985,8 @@ main(int argc, char *argv[]) bsstest(); sbrktest(); validatetest(); - + stacktest(); + opentest(); writetest(); writetest1(); @@ -1795,8 +2012,6 @@ main(int argc, char *argv[]) forktest(); bigdir(); // slow - uio(); - exectest(); exit(); diff --git a/user/usys.pl b/user/usys.pl new file mode 100755 index 0000000..01e426e --- /dev/null +++ b/user/usys.pl @@ -0,0 +1,38 @@ +#!/usr/bin/perl -w + +# Generate usys.S, the stubs for syscalls. + +print "# generated by usys.pl - do not edit\n"; + +print "#include \"kernel/syscall.h\"\n"; + +sub entry { + my $name = shift; + print ".global $name\n"; + print "${name}:\n"; + print " li a7, SYS_${name}\n"; + print " ecall\n"; + print " ret\n"; +} + +entry("fork"); +entry("exit"); +entry("wait"); +entry("pipe"); +entry("read"); +entry("write"); +entry("close"); +entry("kill"); +entry("exec"); +entry("open"); +entry("mknod"); +entry("unlink"); +entry("fstat"); +entry("link"); +entry("mkdir"); +entry("chdir"); +entry("dup"); +entry("getpid"); +entry("sbrk"); +entry("sleep"); +entry("uptime"); @@ -1,6 +1,6 @@ -#include "types.h" -#include "stat.h" -#include "user.h" +#include "kernel/types.h" +#include "kernel/stat.h" +#include "user/user.h" char buf[512]; @@ -1,9 +1,9 @@ // Create a zombie process that // must be reparented at exit. -#include "types.h" -#include "stat.h" -#include "user.h" +#include "kernel/types.h" +#include "kernel/stat.h" +#include "user/user.h" int main(void) @@ -1,31 +0,0 @@ -#include "syscall.h" -#include "traps.h" - -#define SYSCALL(name) \ - .globl name; \ - name: \ - movl $SYS_ ## name, %eax; \ - int $T_SYSCALL; \ - ret - -SYSCALL(fork) -SYSCALL(exit) -SYSCALL(wait) -SYSCALL(pipe) -SYSCALL(read) -SYSCALL(write) -SYSCALL(close) -SYSCALL(kill) -SYSCALL(exec) -SYSCALL(open) -SYSCALL(mknod) -SYSCALL(unlink) -SYSCALL(fstat) -SYSCALL(link) -SYSCALL(mkdir) -SYSCALL(chdir) -SYSCALL(dup) -SYSCALL(getpid) -SYSCALL(sbrk) -SYSCALL(sleep) -SYSCALL(uptime) diff --git a/vectors.pl b/vectors.pl deleted file mode 100755 index 57b49dd..0000000 --- a/vectors.pl +++ /dev/null @@ -1,47 +0,0 @@ -#!/usr/bin/perl -w - -# Generate vectors.S, the trap/interrupt entry points. -# There has to be one entry point per interrupt number -# since otherwise there's no way for trap() to discover -# the interrupt number. - -print "# generated by vectors.pl - do not edit\n"; -print "# handlers\n"; -print ".globl alltraps\n"; -for(my $i = 0; $i < 256; $i++){ - print ".globl vector$i\n"; - print "vector$i:\n"; - if(!($i == 8 || ($i >= 10 && $i <= 14) || $i == 17)){ - print " pushl \$0\n"; - } - print " pushl \$$i\n"; - print " jmp alltraps\n"; -} - -print "\n# vector table\n"; -print ".data\n"; -print ".globl vectors\n"; -print "vectors:\n"; -for(my $i = 0; $i < 256; $i++){ - print " .long vector$i\n"; -} - -# sample output: -# # handlers -# .globl alltraps -# .globl vector0 -# vector0: -# pushl $0 -# pushl $0 -# jmp alltraps -# ... -# -# # vector table -# .data -# .globl vectors -# vectors: -# .long vector0 -# .long vector1 -# .long vector2 -# ... - @@ -1,394 +0,0 @@ -#include "param.h" -#include "types.h" -#include "defs.h" -#include "x86.h" -#include "memlayout.h" -#include "mmu.h" -#include "proc.h" -#include "elf.h" - -extern char data[]; // defined by kernel.ld -pde_t *kpgdir; // for use in scheduler() - -// Set up CPU's kernel segment descriptors. -// Run once on entry on each CPU. -void -seginit(void) -{ - struct cpu *c; - - // Map "logical" addresses to virtual addresses using identity map. - // Cannot share a CODE descriptor for both kernel and user - // because it would have to have DPL_USR, but the CPU forbids - // an interrupt from CPL=0 to DPL=3. - c = &cpus[cpuid()]; - c->gdt[SEG_KCODE] = SEG(STA_X|STA_R, 0, 0xffffffff, 0); - c->gdt[SEG_KDATA] = SEG(STA_W, 0, 0xffffffff, 0); - c->gdt[SEG_UCODE] = SEG(STA_X|STA_R, 0, 0xffffffff, DPL_USER); - c->gdt[SEG_UDATA] = SEG(STA_W, 0, 0xffffffff, DPL_USER); - lgdt(c->gdt, sizeof(c->gdt)); -} - -// Return the address of the PTE in page table pgdir -// that corresponds to virtual address va. If alloc!=0, -// create any required page table pages. -static pte_t * -walkpgdir(pde_t *pgdir, const void *va, int alloc) -{ - pde_t *pde; - pte_t *pgtab; - - pde = &pgdir[PDX(va)]; - if(*pde & PTE_P){ - pgtab = (pte_t*)P2V(PTE_ADDR(*pde)); - } else { - if(!alloc || (pgtab = (pte_t*)kalloc()) == 0) - return 0; - // Make sure all those PTE_P bits are zero. - memset(pgtab, 0, PGSIZE); - // The permissions here are overly generous, but they can - // be further restricted by the permissions in the page table - // entries, if necessary. - *pde = V2P(pgtab) | PTE_P | PTE_W | PTE_U; - } - return &pgtab[PTX(va)]; -} - -// Create PTEs for virtual addresses starting at va that refer to -// physical addresses starting at pa. va and size might not -// be page-aligned. -static int -mappages(pde_t *pgdir, void *va, uint size, uint pa, int perm) -{ - char *a, *last; - pte_t *pte; - - a = (char*)PGROUNDDOWN((uint)va); - last = (char*)PGROUNDDOWN(((uint)va) + size - 1); - for(;;){ - if((pte = walkpgdir(pgdir, a, 1)) == 0) - return -1; - if(*pte & PTE_P) - panic("remap"); - *pte = pa | perm | PTE_P; - if(a == last) - break; - a += PGSIZE; - pa += PGSIZE; - } - return 0; -} - -// There is one page table per process, plus one that's used when -// a CPU is not running any process (kpgdir). The kernel uses the -// current process's page table during system calls and interrupts; -// page protection bits prevent user code from using the kernel's -// mappings. -// -// setupkvm() and exec() set up every page table like this: -// -// 0..KERNBASE: user memory (text+data+stack+heap), mapped to -// phys memory allocated by the kernel -// KERNBASE..KERNBASE+EXTMEM: mapped to 0..EXTMEM (for I/O space) -// KERNBASE+EXTMEM..data: mapped to EXTMEM..V2P(data) -// for the kernel's instructions and r/o data -// data..KERNBASE+PHYSTOP: mapped to V2P(data)..PHYSTOP, -// rw data + free physical memory -// 0xfe000000..0: mapped direct (devices such as ioapic) -// -// The kernel allocates physical memory for its heap and for user memory -// between V2P(end) and the end of physical memory (PHYSTOP) -// (directly addressable from end..P2V(PHYSTOP)). - -// This table defines the kernel's mappings, which are present in -// every process's page table. -static struct kmap { - void *virt; - uint phys_start; - uint phys_end; - int perm; -} kmap[] = { - { (void*)KERNBASE, 0, EXTMEM, PTE_W}, // I/O space - { (void*)KERNLINK, V2P(KERNLINK), V2P(data), 0}, // kern text+rodata - { (void*)data, V2P(data), PHYSTOP, PTE_W}, // kern data+memory - { (void*)DEVSPACE, DEVSPACE, 0, PTE_W}, // more devices -}; - -// Set up kernel part of a page table. -pde_t* -setupkvm(void) -{ - pde_t *pgdir; - struct kmap *k; - - if((pgdir = (pde_t*)kalloc()) == 0) - return 0; - memset(pgdir, 0, PGSIZE); - if (P2V(PHYSTOP) > (void*)DEVSPACE) - panic("PHYSTOP too high"); - for(k = kmap; k < &kmap[NELEM(kmap)]; k++) - if(mappages(pgdir, k->virt, k->phys_end - k->phys_start, - (uint)k->phys_start, k->perm) < 0) { - freevm(pgdir); - return 0; - } - return pgdir; -} - -// Allocate one page table for the machine for the kernel address -// space for scheduler processes. -void -kvmalloc(void) -{ - kpgdir = setupkvm(); - switchkvm(); -} - -// Switch h/w page table register to the kernel-only page table, -// for when no process is running. -void -switchkvm(void) -{ - lcr3(V2P(kpgdir)); // switch to the kernel page table -} - -// Switch TSS and h/w page table to correspond to process p. -void -switchuvm(struct proc *p) -{ - if(p == 0) - panic("switchuvm: no process"); - if(p->kstack == 0) - panic("switchuvm: no kstack"); - if(p->pgdir == 0) - panic("switchuvm: no pgdir"); - - pushcli(); - mycpu()->gdt[SEG_TSS] = SEG16(STS_T32A, &mycpu()->ts, - sizeof(mycpu()->ts)-1, 0); - mycpu()->gdt[SEG_TSS].s = 0; - mycpu()->ts.ss0 = SEG_KDATA << 3; - mycpu()->ts.esp0 = (uint)p->kstack + KSTACKSIZE; - // setting IOPL=0 in eflags *and* iomb beyond the tss segment limit - // forbids I/O instructions (e.g., inb and outb) from user space - mycpu()->ts.iomb = (ushort) 0xFFFF; - ltr(SEG_TSS << 3); - lcr3(V2P(p->pgdir)); // switch to process's address space - popcli(); -} - -// Load the initcode into address 0 of pgdir. -// sz must be less than a page. -void -inituvm(pde_t *pgdir, char *init, uint sz) -{ - char *mem; - - if(sz >= PGSIZE) - panic("inituvm: more than a page"); - mem = kalloc(); - memset(mem, 0, PGSIZE); - mappages(pgdir, 0, PGSIZE, V2P(mem), PTE_W|PTE_U); - memmove(mem, init, sz); -} - -// Load a program segment into pgdir. addr must be page-aligned -// and the pages from addr to addr+sz must already be mapped. -int -loaduvm(pde_t *pgdir, char *addr, struct inode *ip, uint offset, uint sz) -{ - uint i, pa, n; - pte_t *pte; - - if((uint) addr % PGSIZE != 0) - panic("loaduvm: addr must be page aligned"); - for(i = 0; i < sz; i += PGSIZE){ - if((pte = walkpgdir(pgdir, addr+i, 0)) == 0) - panic("loaduvm: address should exist"); - pa = PTE_ADDR(*pte); - if(sz - i < PGSIZE) - n = sz - i; - else - n = PGSIZE; - if(readi(ip, P2V(pa), offset+i, n) != n) - return -1; - } - return 0; -} - -// Allocate page tables and physical memory to grow process from oldsz to -// newsz, which need not be page aligned. Returns new size or 0 on error. -int -allocuvm(pde_t *pgdir, uint oldsz, uint newsz) -{ - char *mem; - uint a; - - if(newsz >= KERNBASE) - return 0; - if(newsz < oldsz) - return oldsz; - - a = PGROUNDUP(oldsz); - for(; a < newsz; a += PGSIZE){ - mem = kalloc(); - if(mem == 0){ - cprintf("allocuvm out of memory\n"); - deallocuvm(pgdir, newsz, oldsz); - return 0; - } - memset(mem, 0, PGSIZE); - if(mappages(pgdir, (char*)a, PGSIZE, V2P(mem), PTE_W|PTE_U) < 0){ - cprintf("allocuvm out of memory (2)\n"); - deallocuvm(pgdir, newsz, oldsz); - kfree(mem); - return 0; - } - } - return newsz; -} - -// Deallocate user pages to bring the process size from oldsz to -// newsz. oldsz and newsz need not be page-aligned, nor does newsz -// need to be less than oldsz. oldsz can be larger than the actual -// process size. Returns the new process size. -int -deallocuvm(pde_t *pgdir, uint oldsz, uint newsz) -{ - pte_t *pte; - uint a, pa; - - if(newsz >= oldsz) - return oldsz; - - a = PGROUNDUP(newsz); - for(; a < oldsz; a += PGSIZE){ - pte = walkpgdir(pgdir, (char*)a, 0); - if(!pte) - a = PGADDR(PDX(a) + 1, 0, 0) - PGSIZE; - else if((*pte & PTE_P) != 0){ - pa = PTE_ADDR(*pte); - if(pa == 0) - panic("kfree"); - char *v = P2V(pa); - kfree(v); - *pte = 0; - } - } - return newsz; -} - -// Free a page table and all the physical memory pages -// in the user part. -void -freevm(pde_t *pgdir) -{ - uint i; - - if(pgdir == 0) - panic("freevm: no pgdir"); - deallocuvm(pgdir, KERNBASE, 0); - for(i = 0; i < NPDENTRIES; i++){ - if(pgdir[i] & PTE_P){ - char * v = P2V(PTE_ADDR(pgdir[i])); - kfree(v); - } - } - kfree((char*)pgdir); -} - -// Clear PTE_U on a page. Used to create an inaccessible -// page beneath the user stack. -void -clearpteu(pde_t *pgdir, char *uva) -{ - pte_t *pte; - - pte = walkpgdir(pgdir, uva, 0); - if(pte == 0) - panic("clearpteu"); - *pte &= ~PTE_U; -} - -// Given a parent process's page table, create a copy -// of it for a child. -pde_t* -copyuvm(pde_t *pgdir, uint sz) -{ - pde_t *d; - pte_t *pte; - uint pa, i, flags; - char *mem; - - if((d = setupkvm()) == 0) - return 0; - for(i = 0; i < sz; i += PGSIZE){ - if((pte = walkpgdir(pgdir, (void *) i, 0)) == 0) - panic("copyuvm: pte should exist"); - if(!(*pte & PTE_P)) - panic("copyuvm: page not present"); - pa = PTE_ADDR(*pte); - flags = PTE_FLAGS(*pte); - if((mem = kalloc()) == 0) - goto bad; - memmove(mem, (char*)P2V(pa), PGSIZE); - if(mappages(d, (void*)i, PGSIZE, V2P(mem), flags) < 0) { - kfree(mem); - goto bad; - } - } - return d; - -bad: - freevm(d); - return 0; -} - -//PAGEBREAK! -// Map user virtual address to kernel address. -char* -uva2ka(pde_t *pgdir, char *uva) -{ - pte_t *pte; - - pte = walkpgdir(pgdir, uva, 0); - if((*pte & PTE_P) == 0) - return 0; - if((*pte & PTE_U) == 0) - return 0; - return (char*)P2V(PTE_ADDR(*pte)); -} - -// Copy len bytes from p to user address va in page table pgdir. -// Most useful when pgdir is not the current page table. -// uva2ka ensures this only works for PTE_U pages. -int -copyout(pde_t *pgdir, uint va, void *p, uint len) -{ - char *buf, *pa0; - uint n, va0; - - buf = (char*)p; - while(len > 0){ - va0 = (uint)PGROUNDDOWN(va); - pa0 = uva2ka(pgdir, (char*)va0); - if(pa0 == 0) - return -1; - n = PGSIZE - (va - va0); - if(n > len) - n = len; - memmove(pa0 + (va - va0), buf, n); - len -= n; - buf += n; - va = va0 + PGSIZE; - } - return 0; -} - -//PAGEBREAK! -// Blank page. -//PAGEBREAK! -// Blank page. -//PAGEBREAK! -// Blank page. - @@ -1,183 +0,0 @@ -// Routines to let C code use special x86 instructions. - -static inline uchar -inb(ushort port) -{ - uchar data; - - asm volatile("in %1,%0" : "=a" (data) : "d" (port)); - return data; -} - -static inline void -insl(int port, void *addr, int cnt) -{ - asm volatile("cld; rep insl" : - "=D" (addr), "=c" (cnt) : - "d" (port), "0" (addr), "1" (cnt) : - "memory", "cc"); -} - -static inline void -outb(ushort port, uchar data) -{ - asm volatile("out %0,%1" : : "a" (data), "d" (port)); -} - -static inline void -outw(ushort port, ushort data) -{ - asm volatile("out %0,%1" : : "a" (data), "d" (port)); -} - -static inline void -outsl(int port, const void *addr, int cnt) -{ - asm volatile("cld; rep outsl" : - "=S" (addr), "=c" (cnt) : - "d" (port), "0" (addr), "1" (cnt) : - "cc"); -} - -static inline void -stosb(void *addr, int data, int cnt) -{ - asm volatile("cld; rep stosb" : - "=D" (addr), "=c" (cnt) : - "0" (addr), "1" (cnt), "a" (data) : - "memory", "cc"); -} - -static inline void -stosl(void *addr, int data, int cnt) -{ - asm volatile("cld; rep stosl" : - "=D" (addr), "=c" (cnt) : - "0" (addr), "1" (cnt), "a" (data) : - "memory", "cc"); -} - -struct segdesc; - -static inline void -lgdt(struct segdesc *p, int size) -{ - volatile ushort pd[3]; - - pd[0] = size-1; - pd[1] = (uint)p; - pd[2] = (uint)p >> 16; - - asm volatile("lgdt (%0)" : : "r" (pd)); -} - -struct gatedesc; - -static inline void -lidt(struct gatedesc *p, int size) -{ - volatile ushort pd[3]; - - pd[0] = size-1; - pd[1] = (uint)p; - pd[2] = (uint)p >> 16; - - asm volatile("lidt (%0)" : : "r" (pd)); -} - -static inline void -ltr(ushort sel) -{ - asm volatile("ltr %0" : : "r" (sel)); -} - -static inline uint -readeflags(void) -{ - uint eflags; - asm volatile("pushfl; popl %0" : "=r" (eflags)); - return eflags; -} - -static inline void -loadgs(ushort v) -{ - asm volatile("movw %0, %%gs" : : "r" (v)); -} - -static inline void -cli(void) -{ - asm volatile("cli"); -} - -static inline void -sti(void) -{ - asm volatile("sti"); -} - -static inline uint -xchg(volatile uint *addr, uint newval) -{ - uint result; - - // The + in "+m" denotes a read-modify-write operand. - asm volatile("lock; xchgl %0, %1" : - "+m" (*addr), "=a" (result) : - "1" (newval) : - "cc"); - return result; -} - -static inline uint -rcr2(void) -{ - uint val; - asm volatile("movl %%cr2,%0" : "=r" (val)); - return val; -} - -static inline void -lcr3(uint val) -{ - asm volatile("movl %0,%%cr3" : : "r" (val)); -} - -//PAGEBREAK: 36 -// Layout of the trap frame built on the stack by the -// hardware and by trapasm.S, and passed to trap(). -struct trapframe { - // registers as pushed by pusha - uint edi; - uint esi; - uint ebp; - uint oesp; // useless & ignored - uint ebx; - uint edx; - uint ecx; - uint eax; - - // rest of trap frame - ushort gs; - ushort padding1; - ushort fs; - ushort padding2; - ushort es; - ushort padding3; - ushort ds; - ushort padding4; - uint trapno; - - // below here defined by x86 hardware - uint err; - uint eip; - ushort cs; - ushort padding5; - uint eflags; - - // below here only when crossing rings, such as from user to kernel - uint esp; - ushort ss; - ushort padding6; -}; |