diff options
| -rw-r--r-- | .gdbinit.tmpl | 27 | ||||
| -rw-r--r-- | .gdbinit.tmpl-riscv | 5 | ||||
| -rw-r--r-- | .gitignore | 3 | ||||
| -rw-r--r-- | BUGS | 7 | ||||
| -rw-r--r-- | LICENSE | 2 | ||||
| -rw-r--r-- | Makefile | 287 | ||||
| -rw-r--r-- | Notes | 123 | ||||
| -rw-r--r-- | README | 35 | ||||
| -rw-r--r-- | TRICKS | 140 | ||||
| -rw-r--r-- | asm.h | 18 | ||||
| -rw-r--r-- | bootasm.S | 88 | ||||
| -rw-r--r-- | bootmain.c | 96 | ||||
| -rw-r--r-- | console.c | 299 | ||||
| -rwxr-xr-x | cuth | 48 | ||||
| -rw-r--r-- | doc/FU540-C000-v1.0.pdf | bin | 0 -> 2240525 bytes | |||
| -rw-r--r-- | doc/riscv-calling.pdf | bin | 0 -> 138193 bytes | |||
| -rw-r--r-- | doc/riscv-privileged-v1.10.pdf | bin | 0 -> 536816 bytes | |||
| -rw-r--r-- | doc/riscv-spec-v2.2.pdf | bin | 0 -> 615016 bytes | |||
| -rw-r--r-- | doc/virtio-v1.1-csprd01.pdf | bin | 0 -> 694936 bytes | |||
| -rwxr-xr-x | dot-bochsrc | 738 | ||||
| -rw-r--r-- | entry.S | 68 | ||||
| -rw-r--r-- | entryother.S | 93 | ||||
| -rw-r--r-- | exec.c | 114 | ||||
| -rw-r--r-- | gdbutil | 291 | ||||
| -rw-r--r-- | ide.c | 168 | ||||
| -rw-r--r-- | ioapic.c | 75 | ||||
| -rw-r--r-- | kalloc.c | 96 | ||||
| -rw-r--r-- | kbd.c | 50 | ||||
| -rw-r--r-- | kbd.h | 112 | ||||
| -rw-r--r-- | kernel.ld | 68 | ||||
| -rw-r--r-- | kernel/bio.c (renamed from bio.c) | 41 | ||||
| -rw-r--r-- | kernel/buf.h (renamed from buf.h) | 5 | ||||
| -rw-r--r-- | kernel/console.c | 199 | ||||
| -rw-r--r-- | kernel/date.h (renamed from date.h) | 0 | ||||
| -rw-r--r-- | kernel/defs.h (renamed from defs.h) | 152 | ||||
| -rw-r--r-- | kernel/elf.h (renamed from elf.h) | 22 | ||||
| -rw-r--r-- | kernel/entry.S | 26 | ||||
| -rw-r--r-- | kernel/exec.c | 153 | ||||
| -rw-r--r-- | kernel/fcntl.h (renamed from fcntl.h) | 0 | ||||
| -rw-r--r-- | kernel/file.c (renamed from file.c) | 67 | ||||
| -rw-r--r-- | kernel/file.h (renamed from file.h) | 16 | ||||
| -rw-r--r-- | kernel/fs.c (renamed from fs.c) | 92 | ||||
| -rw-r--r-- | kernel/fs.h (renamed from fs.h) | 13 | ||||
| -rw-r--r-- | kernel/kalloc.c | 83 | ||||
| -rw-r--r-- | kernel/kernel.ld | 32 | ||||
| -rw-r--r-- | kernel/kernelvec.S | 121 | ||||
| -rw-r--r-- | kernel/log.c (renamed from log.c) | 17 | ||||
| -rw-r--r-- | kernel/main.c | 43 | ||||
| -rw-r--r-- | kernel/memlayout.h | 67 | ||||
| -rw-r--r-- | kernel/param.h (renamed from param.h) | 3 | ||||
| -rw-r--r-- | kernel/pipe.c | 127 | ||||
| -rw-r--r-- | kernel/plic.c | 62 | ||||
| -rw-r--r-- | kernel/printf.c | 134 | ||||
| -rw-r--r-- | kernel/proc.c | 647 | ||||
| -rw-r--r-- | kernel/proc.h | 105 | ||||
| -rw-r--r-- | kernel/ramdisk.c | 45 | ||||
| -rw-r--r-- | kernel/riscv.h | 358 | ||||
| -rw-r--r-- | kernel/sleeplock.c (renamed from sleeplock.c) | 5 | ||||
| -rw-r--r-- | kernel/sleeplock.h (renamed from sleeplock.h) | 0 | ||||
| -rw-r--r-- | kernel/spinlock.c | 108 | ||||
| -rw-r--r-- | kernel/spinlock.h (renamed from spinlock.h) | 2 | ||||
| -rw-r--r-- | kernel/start.c | 82 | ||||
| -rw-r--r-- | kernel/stat.h (renamed from stat.h) | 10 | ||||
| -rw-r--r-- | kernel/string.c (renamed from string.c) | 11 | ||||
| -rw-r--r-- | kernel/swtch.S | 42 | ||||
| -rw-r--r-- | kernel/syscall.c | 147 | ||||
| -rw-r--r-- | kernel/syscall.h (renamed from syscall.h) | 0 | ||||
| -rw-r--r-- | kernel/sysfile.c (renamed from sysfile.c) | 152 | ||||
| -rw-r--r-- | kernel/sysproc.c (renamed from sysproc.c) | 50 | ||||
| -rw-r--r-- | kernel/trampoline.S | 141 | ||||
| -rw-r--r-- | kernel/trap.c | 213 | ||||
| -rw-r--r-- | kernel/types.h | 10 | ||||
| -rw-r--r-- | kernel/uart.c | 92 | ||||
| -rw-r--r-- | kernel/virtio.h | 72 | ||||
| -rw-r--r-- | kernel/virtio_disk.c | 269 | ||||
| -rw-r--r-- | kernel/vm.c | 441 | ||||
| -rw-r--r-- | labs/cow.html | 109 | ||||
| -rw-r--r-- | labs/fs.html | 360 | ||||
| -rw-r--r-- | labs/fs1.html | 215 | ||||
| -rw-r--r-- | labs/lazy.html | 132 | ||||
| -rw-r--r-- | labs/lock.html | 148 | ||||
| -rw-r--r-- | labs/mmap.html | 171 | ||||
| -rw-r--r-- | labs/syscall.html | 443 | ||||
| -rw-r--r-- | labs/xv6.html | 238 | ||||
| -rw-r--r-- | lapic.c | 229 | ||||
| -rw-r--r-- | main.c | 116 | ||||
| -rw-r--r-- | memide.c | 60 | ||||
| -rw-r--r-- | memlayout.h | 15 | ||||
| -rw-r--r-- | mkfs/mkfs.c (renamed from mkfs.c) | 24 | ||||
| -rw-r--r-- | mmu.h | 181 | ||||
| -rw-r--r-- | mp.c | 139 | ||||
| -rw-r--r-- | mp.h | 56 | ||||
| -rw-r--r-- | picirq.c | 19 | ||||
| -rw-r--r-- | pipe.c | 121 | ||||
| -rwxr-xr-x | pr.pl | 36 | ||||
| -rwxr-xr-x | printpcs | 14 | ||||
| -rw-r--r-- | proc.c | 534 | ||||
| -rw-r--r-- | proc.h | 58 | ||||
| -rwxr-xr-x | runoff | 246 | ||||
| -rw-r--r-- | runoff.list | 80 | ||||
| -rw-r--r-- | runoff.spec | 102 | ||||
| -rwxr-xr-x | runoff1 | 108 | ||||
| -rwxr-xr-x | show1 | 3 | ||||
| -rwxr-xr-x | sign.pl | 19 | ||||
| -rw-r--r-- | sleep1.p | 134 | ||||
| -rw-r--r-- | spinlock.c | 126 | ||||
| -rwxr-xr-x | spinp | 16 | ||||
| -rw-r--r-- | swtch.S | 29 | ||||
| -rw-r--r-- | syscall.c | 145 | ||||
| -rw-r--r-- | toc.ftr | 13 | ||||
| -rw-r--r-- | toc.hdr | 6 | ||||
| -rw-r--r-- | trap.c | 112 | ||||
| -rw-r--r-- | trapasm.S | 32 | ||||
| -rw-r--r-- | traps.h | 38 | ||||
| -rw-r--r-- | types.h | 4 | ||||
| -rw-r--r-- | uart.c | 77 | ||||
| -rw-r--r-- | user/alarmtest.c | 88 | ||||
| -rw-r--r-- | user/cat.c (renamed from cat.c) | 6 | ||||
| -rw-r--r-- | user/cow.c | 196 | ||||
| -rw-r--r-- | user/echo.c (renamed from echo.c) | 6 | ||||
| -rw-r--r-- | user/forktest.c (renamed from forktest.c) | 6 | ||||
| -rw-r--r-- | user/grep.c (renamed from grep.c) | 6 | ||||
| -rw-r--r-- | user/init.c (renamed from init.c) | 13 | ||||
| -rw-r--r-- | user/initcode.S (renamed from initcode.S) | 18 | ||||
| -rw-r--r-- | user/kill.c (renamed from kill.c) | 6 | ||||
| -rw-r--r-- | user/ln.c (renamed from ln.c) | 6 | ||||
| -rw-r--r-- | user/ls.c (renamed from ls.c) | 10 | ||||
| -rw-r--r-- | user/mkdir.c (renamed from mkdir.c) | 6 | ||||
| -rw-r--r-- | user/printf.c (renamed from printf.c) | 42 | ||||
| -rw-r--r-- | user/rm.c (renamed from rm.c) | 6 | ||||
| -rw-r--r-- | user/sh.c (renamed from sh.c) | 6 | ||||
| -rw-r--r-- | user/stressfs.c (renamed from stressfs.c) | 10 | ||||
| -rw-r--r-- | user/ulib.c (renamed from ulib.c) | 15 | ||||
| -rw-r--r-- | user/umalloc.c (renamed from umalloc.c) | 8 | ||||
| -rw-r--r-- | user/user.h (renamed from user.h) | 0 | ||||
| -rw-r--r-- | user/usertests.c (renamed from usertests.c) | 335 | ||||
| -rwxr-xr-x | user/usys.pl | 38 | ||||
| -rw-r--r-- | user/wc.c (renamed from wc.c) | 6 | ||||
| -rw-r--r-- | user/zombie.c (renamed from zombie.c) | 6 | ||||
| -rw-r--r-- | usys.S | 31 | ||||
| -rwxr-xr-x | vectors.pl | 47 | ||||
| -rw-r--r-- | vm.c | 394 | ||||
| -rw-r--r-- | x86.h | 183 | 
143 files changed, 6830 insertions, 6900 deletions
| diff --git a/.gdbinit.tmpl b/.gdbinit.tmpl deleted file mode 100644 index f71681a..0000000 --- a/.gdbinit.tmpl +++ /dev/null @@ -1,27 +0,0 @@ -set $lastcs = -1 - -define hook-stop -  # There doesn't seem to be a good way to detect if we're in 16- or -  # 32-bit mode, but in 32-bit mode we always run with CS == 8 in the -  # kernel and CS == 35 in user space -  if $cs == 8 || $cs == 35 -    if $lastcs != 8 && $lastcs != 35 -      set architecture i386 -    end -    x/i $pc -  else -    if $lastcs == -1 || $lastcs == 8 || $lastcs == 35 -      set architecture i8086 -    end -    # Translate the segment:offset into a physical address -    printf "[%4x:%4x] ", $cs, $eip -    x/i $cs*16+$eip -  end -  set $lastcs = $cs -end - -echo + target remote localhost:1234\n -target remote localhost:1234 - -echo + symbol-file kernel\n -symbol-file kernel diff --git a/.gdbinit.tmpl-riscv b/.gdbinit.tmpl-riscv new file mode 100644 index 0000000..452f04e --- /dev/null +++ b/.gdbinit.tmpl-riscv @@ -0,0 +1,5 @@ +set confirm off +set architecture riscv +target remote 127.0.0.1:1234 +symbol-file kernel/kernel +set disassemble-next-line auto @@ -10,7 +10,8 @@ bootblock  entryother  initcode  initcode.out -kernel  kernelmemfs  mkfs +kernel/kernel +user/usys.S  .gdbinit @@ -1,7 +0,0 @@ -formatting: -	need to fix PAGEBREAK mechanism - -sh: -	can't always runcmd in child -- breaks cd. -	maybe should hard-code PATH=/ ? - @@ -1,6 +1,6 @@  The xv6 software is: -Copyright (c) 2006-2018 Frans Kaashoek, Robert Morris, Russ Cox, +Copyright (c) 2006-2019 Frans Kaashoek, Robert Morris, Russ Cox,                          Massachusetts Institute of Technology  Permission is hereby granted, free of charge, to any person obtaining @@ -1,86 +1,65 @@ +K=kernel +U=user +  OBJS = \ -	bio.o\ -	console.o\ -	exec.o\ -	file.o\ -	fs.o\ -	ide.o\ -	ioapic.o\ -	kalloc.o\ -	kbd.o\ -	lapic.o\ -	log.o\ -	main.o\ -	mp.o\ -	picirq.o\ -	pipe.o\ -	proc.o\ -	sleeplock.o\ -	spinlock.o\ -	string.o\ -	swtch.o\ -	syscall.o\ -	sysfile.o\ -	sysproc.o\ -	trapasm.o\ -	trap.o\ -	uart.o\ -	vectors.o\ -	vm.o\ - -# Cross-compiling (e.g., on Mac OS X) -# TOOLPREFIX = i386-jos-elf - -# Using native tools (e.g., on X86 Linux) +  $K/entry.o \ +  $K/start.o \ +  $K/console.o \ +  $K/printf.o \ +  $K/uart.o \ +  $K/kalloc.o \ +  $K/spinlock.o \ +  $K/string.o \ +  $K/main.o \ +  $K/vm.o \ +  $K/proc.o \ +  $K/swtch.o \ +  $K/trampoline.o \ +  $K/trap.o \ +  $K/syscall.o \ +  $K/sysproc.o \ +  $K/bio.o \ +  $K/fs.o \ +  $K/log.o \ +  $K/sleeplock.o \ +  $K/file.o \ +  $K/pipe.o \ +  $K/exec.o \ +  $K/sysfile.o \ +  $K/kernelvec.o \ +  $K/plic.o \ +  $K/virtio_disk.o + +# riscv64-unknown-elf- or riscv64-linux-gnu- +# perhaps in /opt/riscv/bin  #TOOLPREFIX =   # Try to infer the correct TOOLPREFIX if not set  ifndef TOOLPREFIX -TOOLPREFIX := $(shell if i386-jos-elf-objdump -i 2>&1 | grep '^elf32-i386$$' >/dev/null 2>&1; \ -	then echo 'i386-jos-elf-'; \ -	elif objdump -i 2>&1 | grep 'elf32-i386' >/dev/null 2>&1; \ -	then echo ''; \ +TOOLPREFIX := $(shell if riscv64-unknown-elf-objdump -i 2>&1 | grep 'elf64-big' >/dev/null 2>&1; \ +	then echo 'riscv64-unknown-elf-'; \ +	elif riscv64-linux-gnu-objdump -i 2>&1 | grep 'elf64-big' >/dev/null 2>&1; \ +	then echo 'riscv64-linux-gnu-'; \  	else echo "***" 1>&2; \ -	echo "*** Error: Couldn't find an i386-*-elf version of GCC/binutils." 1>&2; \ -	echo "*** Is the directory with i386-jos-elf-gcc in your PATH?" 1>&2; \ -	echo "*** If your i386-*-elf toolchain is installed with a command" 1>&2; \ -	echo "*** prefix other than 'i386-jos-elf-', set your TOOLPREFIX" 1>&2; \ -	echo "*** environment variable to that prefix and run 'make' again." 1>&2; \ +	echo "*** Error: Couldn't find an riscv64 version of GCC/binutils." 1>&2; \  	echo "*** To turn off this error, run 'gmake TOOLPREFIX= ...'." 1>&2; \  	echo "***" 1>&2; exit 1; fi)  endif -# If the makefile can't find QEMU, specify its path here -# QEMU = qemu-system-i386 - -# Try to infer the correct QEMU -ifndef QEMU -QEMU = $(shell if which qemu > /dev/null; \ -	then echo qemu; exit; \ -	elif which qemu-system-i386 > /dev/null; \ -	then echo qemu-system-i386; exit; \ -	elif which qemu-system-x86_64 > /dev/null; \ -	then echo qemu-system-x86_64; exit; \ -	else \ -	qemu=/Applications/Q.app/Contents/MacOS/i386-softmmu.app/Contents/MacOS/i386-softmmu; \ -	if test -x $$qemu; then echo $$qemu; exit; fi; fi; \ -	echo "***" 1>&2; \ -	echo "*** Error: Couldn't find a working QEMU executable." 1>&2; \ -	echo "*** Is the directory containing the qemu binary in your PATH" 1>&2; \ -	echo "*** or have you tried setting the QEMU variable in Makefile?" 1>&2; \ -	echo "***" 1>&2; exit 1) -endif +QEMU = qemu-system-riscv64  CC = $(TOOLPREFIX)gcc  AS = $(TOOLPREFIX)gas  LD = $(TOOLPREFIX)ld  OBJCOPY = $(TOOLPREFIX)objcopy  OBJDUMP = $(TOOLPREFIX)objdump -CFLAGS = -fno-pic -static -fno-builtin -fno-strict-aliasing -O2 -Wall -MD -ggdb -m32 -Werror -fno-omit-frame-pointer + +CFLAGS = -Wall -Werror -O -fno-omit-frame-pointer -ggdb +CFLAGS += -MD +CFLAGS += -mcmodel=medany +CFLAGS += -ffreestanding -fno-common -nostdlib -mno-relax +CFLAGS += -I.  CFLAGS += $(shell $(CC) -fno-stack-protector -E -x c /dev/null >/dev/null 2>&1 && echo -fno-stack-protector) -ASFLAGS = -m32 -gdwarf-2 -Wa,-divide -# FreeBSD ld wants ``elf_i386_fbsd'' -LDFLAGS += -m $(shell $(LD) -V | grep elf_i386 2>/dev/null | head -n 1)  # Disable PIE when possible (for Ubuntu 16.10 toolchain)  ifneq ($(shell $(CC) -dumpspecs 2>/dev/null | grep -e '[^f]no-pie'),) @@ -90,74 +69,43 @@ ifneq ($(shell $(CC) -dumpspecs 2>/dev/null | grep -e '[^f]nopie'),)  CFLAGS += -fno-pie -nopie  endif -xv6.img: bootblock kernel -	dd if=/dev/zero of=xv6.img count=10000 -	dd if=bootblock of=xv6.img conv=notrunc -	dd if=kernel of=xv6.img seek=1 conv=notrunc - -xv6memfs.img: bootblock kernelmemfs -	dd if=/dev/zero of=xv6memfs.img count=10000 -	dd if=bootblock of=xv6memfs.img conv=notrunc -	dd if=kernelmemfs of=xv6memfs.img seek=1 conv=notrunc - -bootblock: bootasm.S bootmain.c -	$(CC) $(CFLAGS) -fno-pic -O -nostdinc -I. -c bootmain.c -	$(CC) $(CFLAGS) -fno-pic -nostdinc -I. -c bootasm.S -	$(LD) $(LDFLAGS) -N -e start -Ttext 0x7C00 -o bootblock.o bootasm.o bootmain.o -	$(OBJDUMP) -S bootblock.o > bootblock.asm -	$(OBJCOPY) -S -O binary -j .text bootblock.o bootblock -	./sign.pl bootblock - -entryother: entryother.S -	$(CC) $(CFLAGS) -fno-pic -nostdinc -I. -c entryother.S -	$(LD) $(LDFLAGS) -N -e start -Ttext 0x7000 -o bootblockother.o entryother.o -	$(OBJCOPY) -S -O binary -j .text bootblockother.o entryother -	$(OBJDUMP) -S bootblockother.o > entryother.asm - -initcode: initcode.S -	$(CC) $(CFLAGS) -nostdinc -I. -c initcode.S -	$(LD) $(LDFLAGS) -N -e start -Ttext 0 -o initcode.out initcode.o -	$(OBJCOPY) -S -O binary initcode.out initcode -	$(OBJDUMP) -S initcode.o > initcode.asm - -kernel: $(OBJS) entry.o entryother initcode kernel.ld -	$(LD) $(LDFLAGS) -T kernel.ld -o kernel entry.o $(OBJS) -b binary initcode entryother -	$(OBJDUMP) -S kernel > kernel.asm -	$(OBJDUMP) -t kernel | sed '1,/SYMBOL TABLE/d; s/ .* / /; /^$$/d' > kernel.sym - -# kernelmemfs is a copy of kernel that maintains the -# disk image in memory instead of writing to a disk. -# This is not so useful for testing persistent storage or -# exploring disk buffering implementations, but it is -# great for testing the kernel on real hardware without -# needing a scratch disk. -MEMFSOBJS = $(filter-out ide.o,$(OBJS)) memide.o -kernelmemfs: $(MEMFSOBJS) entry.o entryother initcode kernel.ld fs.img -	$(LD) $(LDFLAGS) -T kernel.ld -o kernelmemfs entry.o  $(MEMFSOBJS) -b binary initcode entryother fs.img -	$(OBJDUMP) -S kernelmemfs > kernelmemfs.asm -	$(OBJDUMP) -t kernelmemfs | sed '1,/SYMBOL TABLE/d; s/ .* / /; /^$$/d' > kernelmemfs.sym - -tags: $(OBJS) entryother.S _init -	etags *.S *.c +LDFLAGS = -z max-page-size=4096 -vectors.S: vectors.pl -	./vectors.pl > vectors.S +$K/kernel: $(OBJS) $K/kernel.ld $U/initcode +	$(LD) $(LDFLAGS) -T $K/kernel.ld -o $K/kernel $(OBJS)  +	$(OBJDUMP) -S $K/kernel > $K/kernel.asm +	$(OBJDUMP) -t $K/kernel | sed '1,/SYMBOL TABLE/d; s/ .* / /; /^$$/d' > $K/kernel.sym -ULIB = ulib.o usys.o printf.o umalloc.o +$U/initcode: $U/initcode.S +	$(CC) $(CFLAGS) -nostdinc -I. -Ikernel -c $U/initcode.S -o $U/initcode.o +	$(LD) $(LDFLAGS) -N -e start -Ttext 0 -o $U/initcode.out $U/initcode.o +	$(OBJCOPY) -S -O binary $U/initcode.out $U/initcode +	$(OBJDUMP) -S $U/initcode.o > $U/initcode.asm + +tags: $(OBJS) _init +	etags *.S *.c + +ULIB = $U/ulib.o $U/usys.o $U/printf.o $U/umalloc.o  _%: %.o $(ULIB)  	$(LD) $(LDFLAGS) -N -e main -Ttext 0 -o $@ $^  	$(OBJDUMP) -S $@ > $*.asm  	$(OBJDUMP) -t $@ | sed '1,/SYMBOL TABLE/d; s/ .* / /; /^$$/d' > $*.sym -_forktest: forktest.o $(ULIB) +$U/usys.S : $U/usys.pl +	perl $U/usys.pl > $U/usys.S + +$U/usys.o : $U/usys.S +	$(CC) $(CFLAGS) -c -o $U/usys.o $U/usys.S + +$U/_forktest: $U/forktest.o $(ULIB)  	# forktest has less library code linked in - needs to be small  	# in order to be able to max out the proc table. -	$(LD) $(LDFLAGS) -N -e main -Ttext 0 -o _forktest forktest.o ulib.o usys.o -	$(OBJDUMP) -S _forktest > forktest.asm +	$(LD) $(LDFLAGS) -N -e main -Ttext 0 -o $U/_forktest $U/forktest.o $U/ulib.o $U/usys.o +	$(OBJDUMP) -S $U/_forktest > $U/forktest.asm -mkfs: mkfs.c fs.h -	gcc -Werror -Wall -o mkfs mkfs.c +mkfs/mkfs: mkfs/mkfs.c $K/fs.h +	gcc -Werror -Wall -I. -o mkfs/mkfs mkfs/mkfs.c  # Prevent deletion of intermediate files, e.g. cat.o, after first build, so  # that disk image changes after first build are persistent until clean.  More @@ -166,50 +114,36 @@ mkfs: mkfs.c fs.h  .PRECIOUS: %.o  UPROGS=\ -	_cat\ -	_echo\ -	_forktest\ -	_grep\ -	_init\ -	_kill\ -	_ln\ -	_ls\ -	_mkdir\ -	_rm\ -	_sh\ -	_stressfs\ -	_usertests\ -	_wc\ -	_zombie\ - -fs.img: mkfs README $(UPROGS) -	./mkfs fs.img README $(UPROGS) - --include *.d +	$U/_cat\ +	$U/_echo\ +	$U/_forktest\ +	$U/_grep\ +	$U/_init\ +	$U/_kill\ +	$U/_ln\ +	$U/_ls\ +	$U/_mkdir\ +	$U/_rm\ +	$U/_sh\ +	$U/_stressfs\ +	$U/_usertests\ +	$U/_wc\ +	$U/_zombie\ +	$U/_cow\ + +fs.img: mkfs/mkfs README $(UPROGS) +	mkfs/mkfs fs.img README $(UPROGS) + +-include kernel/*.d user/*.d  clean:   	rm -f *.tex *.dvi *.idx *.aux *.log *.ind *.ilg \ -	*.o *.d *.asm *.sym vectors.S bootblock entryother \ -	initcode initcode.out kernel xv6.img fs.img kernelmemfs \ -	xv6memfs.img mkfs .gdbinit \ +	*/*.o */*.d */*.asm */*.sym \ +	$U/initcode $U/initcode.out $K/kernel fs.img \ +	mkfs/mkfs .gdbinit \ +        $U/usys.S \  	$(UPROGS) -# make a printout -FILES = $(shell grep -v '^\#' runoff.list) -PRINT = runoff.list runoff.spec README toc.hdr toc.ftr $(FILES) - -xv6.pdf: $(PRINT) -	./runoff -	ls -l xv6.pdf - -print: xv6.pdf - -# run in emulators - -bochs : fs.img xv6.img -	if [ ! -e .bochsrc ]; then ln -s dot-bochsrc .bochsrc; fi -	bochs -q -  # try to generate a unique GDB port  GDBPORT = $(shell expr `id -u` % 5000 + 25000)  # QEMU's gdb stub command line changed in 0.11 @@ -217,29 +151,20 @@ QEMUGDB = $(shell if $(QEMU) -help | grep -q '^-gdb'; \  	then echo "-gdb tcp::$(GDBPORT)"; \  	else echo "-s -p $(GDBPORT)"; fi)  ifndef CPUS -CPUS := 2 +CPUS := 3  endif -QEMUOPTS = -drive file=fs.img,index=1,media=disk,format=raw -drive file=xv6.img,index=0,media=disk,format=raw -smp $(CPUS) -m 512 $(QEMUEXTRA) +QEMUOPTS = -machine virt -kernel $K/kernel -m 3G -smp $(CPUS) -nographic +QEMUOPTS += -drive file=fs.img,if=none,format=raw,id=x0 -device virtio-blk-device,drive=x0,bus=virtio-mmio-bus.0 -qemu: fs.img xv6.img -	$(QEMU) -serial mon:stdio $(QEMUOPTS) +qemu: $K/kernel fs.img +	$(QEMU) $(QEMUOPTS) -qemu-memfs: xv6memfs.img -	$(QEMU) -drive file=xv6memfs.img,index=0,media=disk,format=raw -smp $(CPUS) -m 256 - -qemu-nox: fs.img xv6.img -	$(QEMU) -nographic $(QEMUOPTS) - -.gdbinit: .gdbinit.tmpl -	sed "s/localhost:1234/localhost:$(GDBPORT)/" < $^ > $@ - -qemu-gdb: fs.img xv6.img .gdbinit -	@echo "*** Now run 'gdb'." 1>&2 -	$(QEMU) -serial mon:stdio $(QEMUOPTS) -S $(QEMUGDB) +.gdbinit: .gdbinit.tmpl-riscv +	sed "s/:1234/:$(GDBPORT)/" < $^ > $@ -qemu-nox-gdb: fs.img xv6.img .gdbinit +qemu-gdb: $K/kernel .gdbinit fs.img  	@echo "*** Now run 'gdb'." 1>&2 -	$(QEMU) -nographic $(QEMUOPTS) -S $(QEMUGDB) +	$(QEMU) $(QEMUOPTS) -S $(QEMUGDB)  # CUT HERE  # prepare dist for students @@ -251,7 +176,7 @@ EXTRA=\  	mkfs.c ulib.c user.h cat.c echo.c forktest.c grep.c kill.c\  	ln.c ls.c mkdir.c rm.c stressfs.c usertests.c wc.c zombie.c\  	printf.c umalloc.c\ -	README dot-bochsrc *.pl toc.* runoff runoff1 runoff.list\ +	README dot-bochsrc *.pl \  	.gdbinit.tmpl gdbutil\  dist: @@ -1,123 +0,0 @@ -bochs 2.2.6: -./configure --enable-smp --enable-disasm --enable-debugger --enable-all-optimizations --enable-4meg-pages --enable-global-pages --enable-pae --disable-reset-on-triple-fault -bochs CVS after 2.2.6: -./configure --enable-smp --enable-disasm --enable-debugger --enable-all-optimizations --enable-4meg-pages --enable-global-pages --enable-pae  - -bootmain.c doesn't work right if the ELF sections aren't -sector-aligned. so you can't use ld -N. and the sections may also need -to be non-zero length, only really matters for tiny "kernels". - -kernel loaded at 1 megabyte. stack same place that bootasm.S left it. - -kinit() should find real mem size -  and rescue useable memory below 1 meg - -no paging, no use of page table hardware, just segments - -no user area: no magic kernel stack mapping -  so no copying of kernel stack during fork -  though there is a kernel stack page for each process - -no kernel malloc(), just kalloc() for user core - -user pointers aren't valid in the kernel - -are interrupts turned on in the kernel? yes. - -pass curproc explicitly, or implicit from cpu #? -  e.g. argument to newproc()? -  hmm, you need a global curproc[cpu] for trap() &c - -no stack expansion - -test running out of memory, process slots - -we can't really use a separate stack segment, since stack addresses -need to work correctly as ordinary pointers. the same may be true of -data vs text. how can we have a gap between data and stack, so that -both can grow, without committing 4GB of physical memory? does this -mean we need paging? - -perhaps have fixed-size stack, put it in the data segment? - -oops, if kernel stack is in contiguous user phys mem, then moving -users' memory (e.g. to expand it) will wreck any pointers into the -kernel stack. - -do we need to set fs and gs? so user processes can't abuse them? - -setupsegs() may modify current segment table, is that legal? - -trap() ought to lgdt on return, since currently only done in swtch() - -protect hardware interrupt vectors from user INT instructions? - -test out-of-fd cases for creating pipe. -test pipe reader closes then write -test two readers, two writers. -test children being inherited by grandparent &c - -some sleep()s should be interruptible by kill() - -locks -  init_lock -    sequences CPU startup -  proc_table_lock -    also protects next_pid -  per-fd lock *just* protects count read-modify-write -    also maybe freeness? -  memory allocator -  printf - -in general, the table locks protect both free-ness and -  public variables of table elements -  in many cases you can use table elements w/o a lock -  e.g. if you are the process, or you are using an fd - -lock order -  per-pipe lock -  proc_table_lock fd_table_lock kalloc_lock -  console_lock - -do you have to be holding the mutex in order to call wakeup()? yes - -device interrupts don't clear FL_IF -  so a recursive timer interrupt is possible - -what does inode->busy mean? -  might be held across disk reads -  no-one is allowed to do anything to the inode -  protected by inode_table_lock -inode->count counts in-memory pointers to the struct -  prevents inode[] element from being re-used -  protected by inode_table_lock - -blocks and inodes have ad-hoc sleep-locks -  provide a single mechanism? - -kalloc() can return 0; do callers handle this right? - -test: one process unlinks a file while another links to it -test: one process opens a file while another deletes it -test: deadlock d/.. vs ../d, two processes. -test: dup() shared fd->off -test: does echo foo > x truncate x? - -sh: ioredirection incorrect now we have pipes -sh: chain of pipes won't work, also ugly that parent closes fdarray entries too -sh: dynamic memory allocation? -sh: should sh support ; () & -sh: stop stdin on ctrl-d (for cat > y) - -really should have bdwrite() for file content -  and make some inode updates async -  so soft updates make sense - -disk scheduling -echo foo > bar should truncate bar -  so O_CREATE should not truncate -  but O_TRUNC should - -make it work on a real machine -release before acquire at end of sleep? -check 2nd disk (i.e. if not in .bochsrc) @@ -1,6 +1,6 @@  xv6 is a re-implementation of Dennis Ritchie's and Ken Thompson's Unix  Version 6 (v6).  xv6 loosely follows the structure and style of v6, -but is implemented for a modern x86-based multiprocessor using ANSI C. +but is implemented for a modern RISC-V multiprocessor using ANSI C.  ACKNOWLEDGMENTS @@ -19,19 +19,20 @@ The following people have made contributions: Russ Cox (context switching,  locking), Cliff Frey (MP), Xiao Yu (MP), Nickolai Zeldovich, and Austin  Clements. -We are also grateful for the bug reports and patches contributed by Silas -Boyd-Wickizer, Anton Burtsev, Cody Cutler, Mike CAT, Tej Chajed, eyalz800, -Nelson Elhage, Saar Ettinger, Alice Ferrazzi, Nathaniel Filardo, Peter -Froehlich, Yakir Goaron,Shivam Handa, Bryan Henry, Jim Huang, Alexander -Kapshuk, Anders Kaseorg, kehao95, Wolfgang Keller, Eddie Kohler, Austin -Liew, Imbar Marinescu, Yandong Mao, Matan Shabtay, Hitoshi Mitake, Carmi -Merimovich, Mark Morrissey, mtasm, Joel Nider, Greg Price, Ayan Shafqat, -Eldar Sehayek, Yongming Shen, Cam Tenny, tyfkda, Rafael Ubal, Warren -Toomey, Stephen Tu, Pablo Ventura, Xi Wang, Keiichi Watanabe, Nicolas -Wolovick, wxdao, Grant Wu, Jindong Zhang, Icenowy Zheng, and Zou Chang Wei. +We are also grateful for the bug reports and patches contributed by +Silas Boyd-Wickizer, Anton Burtsev, Dan Cross, Cody Cutler, Mike CAT, +Tej Chajed, eyalz800, Nelson Elhage, Saar Ettinger, Alice Ferrazzi, +Nathaniel Filardo, Peter Froehlich, Yakir Goaron,Shivam Handa, Bryan +Henry, Jim Huang, Alexander Kapshuk, Anders Kaseorg, kehao95, Wolfgang +Keller, Eddie Kohler, Austin Liew, Imbar Marinescu, Yandong Mao, Matan +Shabtay, Hitoshi Mitake, Carmi Merimovich, Mark Morrissey, mtasm, Joel +Nider, Greg Price, Ayan Shafqat, Eldar Sehayek, Yongming Shen, Cam +Tenny, tyfkda, Rafael Ubal, Warren Toomey, Stephen Tu, Pablo Ventura, +Xi Wang, Keiichi Watanabe, Nicolas Wolovick, wxdao, Grant Wu, Jindong +Zhang, Icenowy Zheng, and Zou Chang Wei.  The code in the files that constitute xv6 is -Copyright 2006-2018 Frans Kaashoek, Robert Morris, and Russ Cox. +Copyright 2006-2019 Frans Kaashoek, Robert Morris, and Russ Cox.  ERROR REPORTS @@ -42,9 +43,7 @@ simplifications and clarifications than new features.  BUILDING AND RUNNING XV6 -To build xv6 on an x86 ELF machine (like Linux or FreeBSD), run -"make". On non-x86 or non-ELF machines (like OS X, even on x86), you -will need to install a cross-compiler gcc suite capable of producing -x86 ELF binaries (see https://pdos.csail.mit.edu/6.828/). -Then run "make TOOLPREFIX=i386-jos-elf-". Now install the QEMU PC -simulator and run "make qemu". +You will need a RISC-V "newlib" tool chain from +https://github.com/riscv/riscv-gnu-toolchain, and qemu compiled for +riscv64-softmmu. Once they are installed, and in your shell +search path, you can run "make qemu". @@ -1,140 +0,0 @@ -This file lists subtle things that might not be commented  -as well as they should be in the source code and that -might be worth pointing out in a longer explanation or in class. - ---- - -[2009/07/12: No longer relevant; forkret1 changed -and this is now cleaner.] - -forkret1 in trapasm.S is called with a tf argument. -In order to use it, forkret1 copies the tf pointer into -%esp and then jumps to trapret, which pops the  -register state out of the trap frame.  If an interrupt -came in between the mov tf, %esp and the iret that -goes back out to user space, the interrupt stack frame -would end up scribbling over the tf and whatever memory -lay under it. - -Why is this safe?  Because forkret1 is only called -the first time a process returns to user space, and -at that point, cp->tf is set to point to a trap frame -constructed at the top of cp's kernel stack.  So tf  -*is* a valid %esp that can hold interrupt state. - -If other tf's were used in forkret1, we could add -a cli before the mov tf, %esp. - ---- - -In pushcli, must cli() no matter what.  It is not safe to do - -  if(cpus[cpu()].ncli == 0) -    cli(); -  cpus[cpu()].ncli++; - -because if interrupts are off then we might call cpu(), get -rescheduled to a different cpu, look at cpus[oldcpu].ncli, -and wrongly decide not to disable interrupts on the new cpu. - -Instead do  - -  cli(); -  cpus[cpu()].ncli++; - -always. - ---- - -There is a (harmless) race in pushcli, which does - -	eflags = readeflags(); -	cli(); -	if(c->ncli++ == 0) -		c->intena = eflags & FL_IF; - -Consider a bottom-level pushcli.   -If interrupts are disabled already, then the right thing -happens: read_eflags finds that FL_IF is not set, -and intena = 0.  If interrupts are enabled, then -it is less clear that the right thing happens: -the readeflags can execute, then the process -can get preempted and rescheduled on another cpu, -and then once it starts running, perhaps with  -interrupts disabled (can happen since the scheduler -only enables interrupts once per scheduling loop, -not every time it schedules a process), it will  -incorrectly record that interrupts *were* enabled. -This doesn't matter, because if it was safe to be -running with interrupts enabled before the context -switch, it is still safe (and arguably more correct) -to run with them enabled after the context switch too. - -In fact it would be safe if scheduler always set -	c->intena = 1; -before calling swtch, and perhaps it should. - ---- - -The x86's processor-ordering memory model  -matches spin locks well, so no explicit memory -synchronization instructions are required in -acquire and release.   - -Consider two sequences of code on different CPUs: - -CPU0 -A; -release(lk); - -and - -CPU1 -acquire(lk); -B; - -We want to make sure that: -  - all reads in B see the effects of writes in A. -  - all reads in A do *not* see the effects of writes in B. -  -The x86 guarantees that writes in A will go out -to memory before the write of lk->locked = 0 in  -release(lk).  It further guarantees that CPU1  -will observe CPU0's write of lk->locked = 0 only -after observing the earlier writes by CPU0. -So any reads in B are guaranteed to observe the -effects of writes in A. - -According to the Intel manual behavior spec, the -second condition requires a serialization instruction -in release, to avoid reads in A happening after giving -up lk.  No Intel SMP processor in existence actually -moves reads down after writes, but the language in -the spec allows it.  There is no telling whether future -processors will need it. - ---- - -The code in fork needs to read np->pid before -setting np->state to RUNNABLE.  The following -is not a correct way to do this: - -	int -	fork(void) -	{ -	  ... -	  np->state = RUNNABLE; -	  return np->pid; // oops -	} - -After setting np->state to RUNNABLE, some other CPU -might run the process, it might exit, and then it might -get reused for a different process (with a new pid), all -before the return statement.  So it's not safe to just -"return np->pid". Even saving a copy of np->pid before -setting np->state isn't safe, since the compiler is -allowed to re-order statements. - -The real code saves a copy of np->pid, then acquires a lock -around the write to np->state. The acquire() prevents the -compiler from re-ordering. @@ -1,18 +0,0 @@ -// -// assembler macros to create x86 segments -// - -#define SEG_NULLASM                                             \ -        .word 0, 0;                                             \ -        .byte 0, 0, 0, 0 - -// The 0xC0 means the limit is in 4096-byte units -// and (for executable segments) 32-bit mode. -#define SEG_ASM(type,base,lim)                                  \ -        .word (((lim) >> 12) & 0xffff), ((base) & 0xffff);      \ -        .byte (((base) >> 16) & 0xff), (0x90 | (type)),         \ -                (0xC0 | (((lim) >> 28) & 0xf)), (((base) >> 24) & 0xff) - -#define STA_X     0x8       // Executable segment -#define STA_W     0x2       // Writeable (non-executable segments) -#define STA_R     0x2       // Readable (executable segments) diff --git a/bootasm.S b/bootasm.S deleted file mode 100644 index 257867c..0000000 --- a/bootasm.S +++ /dev/null @@ -1,88 +0,0 @@ -#include "asm.h" -#include "memlayout.h" -#include "mmu.h" - -# Start the first CPU: switch to 32-bit protected mode, jump into C. -# The BIOS loads this code from the first sector of the hard disk into -# memory at physical address 0x7c00 and starts executing in real mode -# with %cs=0 %ip=7c00. - -.code16                       # Assemble for 16-bit mode -.globl start -start: -  cli                         # BIOS enabled interrupts; disable - -  # Zero data segment registers DS, ES, and SS. -  xorw    %ax,%ax             # Set %ax to zero -  movw    %ax,%ds             # -> Data Segment -  movw    %ax,%es             # -> Extra Segment -  movw    %ax,%ss             # -> Stack Segment - -  # Physical address line A20 is tied to zero so that the first PCs  -  # with 2 MB would run software that assumed 1 MB.  Undo that. -seta20.1: -  inb     $0x64,%al               # Wait for not busy -  testb   $0x2,%al -  jnz     seta20.1 - -  movb    $0xd1,%al               # 0xd1 -> port 0x64 -  outb    %al,$0x64 - -seta20.2: -  inb     $0x64,%al               # Wait for not busy -  testb   $0x2,%al -  jnz     seta20.2 - -  movb    $0xdf,%al               # 0xdf -> port 0x60 -  outb    %al,$0x60 - -  # Switch from real to protected mode.  Use a bootstrap GDT that makes -  # virtual addresses map directly to physical addresses so that the -  # effective memory map doesn't change during the transition. -  lgdt    gdtdesc -  movl    %cr0, %eax -  orl     $CR0_PE, %eax -  movl    %eax, %cr0 - -//PAGEBREAK! -  # Complete the transition to 32-bit protected mode by using a long jmp -  # to reload %cs and %eip.  The segment descriptors are set up with no -  # translation, so that the mapping is still the identity mapping. -  ljmp    $(SEG_KCODE<<3), $start32 - -.code32  # Tell assembler to generate 32-bit code now. -start32: -  # Set up the protected-mode data segment registers -  movw    $(SEG_KDATA<<3), %ax    # Our data segment selector -  movw    %ax, %ds                # -> DS: Data Segment -  movw    %ax, %es                # -> ES: Extra Segment -  movw    %ax, %ss                # -> SS: Stack Segment -  movw    $0, %ax                 # Zero segments not ready for use -  movw    %ax, %fs                # -> FS -  movw    %ax, %gs                # -> GS - -  # Set up the stack pointer and call into C. -  movl    $start, %esp -  call    bootmain - -  # If bootmain returns (it shouldn't), trigger a Bochs -  # breakpoint if running under Bochs, then loop. -  movw    $0x8a00, %ax            # 0x8a00 -> port 0x8a00 -  movw    %ax, %dx -  outw    %ax, %dx -  movw    $0x8ae0, %ax            # 0x8ae0 -> port 0x8a00 -  outw    %ax, %dx -spin: -  jmp     spin - -# Bootstrap GDT -.p2align 2                                # force 4 byte alignment -gdt: -  SEG_NULLASM                             # null seg -  SEG_ASM(STA_X|STA_R, 0x0, 0xffffffff)   # code seg -  SEG_ASM(STA_W, 0x0, 0xffffffff)         # data seg - -gdtdesc: -  .word   (gdtdesc - gdt - 1)             # sizeof(gdt) - 1 -  .long   gdt                             # address gdt - diff --git a/bootmain.c b/bootmain.c deleted file mode 100644 index 1f20e5b..0000000 --- a/bootmain.c +++ /dev/null @@ -1,96 +0,0 @@ -// Boot loader. -// -// Part of the boot block, along with bootasm.S, which calls bootmain(). -// bootasm.S has put the processor into protected 32-bit mode. -// bootmain() loads an ELF kernel image from the disk starting at -// sector 1 and then jumps to the kernel entry routine. - -#include "types.h" -#include "elf.h" -#include "x86.h" -#include "memlayout.h" - -#define SECTSIZE  512 - -void readseg(uchar*, uint, uint); - -void -bootmain(void) -{ -  struct elfhdr *elf; -  struct proghdr *ph, *eph; -  void (*entry)(void); -  uchar* pa; - -  elf = (struct elfhdr*)0x10000;  // scratch space - -  // Read 1st page off disk -  readseg((uchar*)elf, 4096, 0); - -  // Is this an ELF executable? -  if(elf->magic != ELF_MAGIC) -    return;  // let bootasm.S handle error - -  // Load each program segment (ignores ph flags). -  ph = (struct proghdr*)((uchar*)elf + elf->phoff); -  eph = ph + elf->phnum; -  for(; ph < eph; ph++){ -    pa = (uchar*)ph->paddr; -    readseg(pa, ph->filesz, ph->off); -    if(ph->memsz > ph->filesz) -      stosb(pa + ph->filesz, 0, ph->memsz - ph->filesz); -  } - -  // Call the entry point from the ELF header. -  // Does not return! -  entry = (void(*)(void))(elf->entry); -  entry(); -} - -void -waitdisk(void) -{ -  // Wait for disk ready. -  while((inb(0x1F7) & 0xC0) != 0x40) -    ; -} - -// Read a single sector at offset into dst. -void -readsect(void *dst, uint offset) -{ -  // Issue command. -  waitdisk(); -  outb(0x1F2, 1);   // count = 1 -  outb(0x1F3, offset); -  outb(0x1F4, offset >> 8); -  outb(0x1F5, offset >> 16); -  outb(0x1F6, (offset >> 24) | 0xE0); -  outb(0x1F7, 0x20);  // cmd 0x20 - read sectors - -  // Read data. -  waitdisk(); -  insl(0x1F0, dst, SECTSIZE/4); -} - -// Read 'count' bytes at 'offset' from kernel into physical address 'pa'. -// Might copy more than asked. -void -readseg(uchar* pa, uint count, uint offset) -{ -  uchar* epa; - -  epa = pa + count; - -  // Round down to sector boundary. -  pa -= offset % SECTSIZE; - -  // Translate from bytes to sectors; kernel starts at sector 1. -  offset = (offset / SECTSIZE) + 1; - -  // If this is too slow, we could read lots of sectors at a time. -  // We'd write more to memory than asked, but it doesn't matter -- -  // we load in increasing order. -  for(; pa < epa; pa += SECTSIZE, offset++) -    readsect(pa, offset); -} diff --git a/console.c b/console.c deleted file mode 100644 index a280d2b..0000000 --- a/console.c +++ /dev/null @@ -1,299 +0,0 @@ -// Console input and output. -// Input is from the keyboard or serial port. -// Output is written to the screen and serial port. - -#include "types.h" -#include "defs.h" -#include "param.h" -#include "traps.h" -#include "spinlock.h" -#include "sleeplock.h" -#include "fs.h" -#include "file.h" -#include "memlayout.h" -#include "mmu.h" -#include "proc.h" -#include "x86.h" - -static void consputc(int); - -static int panicked = 0; - -static struct { -  struct spinlock lock; -  int locking; -} cons; - -static void -printint(int xx, int base, int sign) -{ -  static char digits[] = "0123456789abcdef"; -  char buf[16]; -  int i; -  uint x; - -  if(sign && (sign = xx < 0)) -    x = -xx; -  else -    x = xx; - -  i = 0; -  do{ -    buf[i++] = digits[x % base]; -  }while((x /= base) != 0); - -  if(sign) -    buf[i++] = '-'; - -  while(--i >= 0) -    consputc(buf[i]); -} -//PAGEBREAK: 50 - -// Print to the console. only understands %d, %x, %p, %s. -void -cprintf(char *fmt, ...) -{ -  int i, c, locking; -  uint *argp; -  char *s; - -  locking = cons.locking; -  if(locking) -    acquire(&cons.lock); - -  if (fmt == 0) -    panic("null fmt"); - -  argp = (uint*)(void*)(&fmt + 1); -  for(i = 0; (c = fmt[i] & 0xff) != 0; i++){ -    if(c != '%'){ -      consputc(c); -      continue; -    } -    c = fmt[++i] & 0xff; -    if(c == 0) -      break; -    switch(c){ -    case 'd': -      printint(*argp++, 10, 1); -      break; -    case 'x': -    case 'p': -      printint(*argp++, 16, 0); -      break; -    case 's': -      if((s = (char*)*argp++) == 0) -        s = "(null)"; -      for(; *s; s++) -        consputc(*s); -      break; -    case '%': -      consputc('%'); -      break; -    default: -      // Print unknown % sequence to draw attention. -      consputc('%'); -      consputc(c); -      break; -    } -  } - -  if(locking) -    release(&cons.lock); -} - -void -panic(char *s) -{ -  int i; -  uint pcs[10]; - -  cli(); -  cons.locking = 0; -  // use lapiccpunum so that we can call panic from mycpu() -  cprintf("lapicid %d: panic: ", lapicid()); -  cprintf(s); -  cprintf("\n"); -  getcallerpcs(&s, pcs); -  for(i=0; i<10; i++) -    cprintf(" %p", pcs[i]); -  panicked = 1; // freeze other CPU -  for(;;) -    ; -} - -//PAGEBREAK: 50 -#define BACKSPACE 0x100 -#define CRTPORT 0x3d4 -static ushort *crt = (ushort*)P2V(0xb8000);  // CGA memory - -static void -cgaputc(int c) -{ -  int pos; - -  // Cursor position: col + 80*row. -  outb(CRTPORT, 14); -  pos = inb(CRTPORT+1) << 8; -  outb(CRTPORT, 15); -  pos |= inb(CRTPORT+1); - -  if(c == '\n') -    pos += 80 - pos%80; -  else if(c == BACKSPACE){ -    if(pos > 0) --pos; -  } else -    crt[pos++] = (c&0xff) | 0x0700;  // black on white - -  if(pos < 0 || pos > 25*80) -    panic("pos under/overflow"); - -  if((pos/80) >= 24){  // Scroll up. -    memmove(crt, crt+80, sizeof(crt[0])*23*80); -    pos -= 80; -    memset(crt+pos, 0, sizeof(crt[0])*(24*80 - pos)); -  } - -  outb(CRTPORT, 14); -  outb(CRTPORT+1, pos>>8); -  outb(CRTPORT, 15); -  outb(CRTPORT+1, pos); -  crt[pos] = ' ' | 0x0700; -} - -void -consputc(int c) -{ -  if(panicked){ -    cli(); -    for(;;) -      ; -  } - -  if(c == BACKSPACE){ -    uartputc('\b'); uartputc(' '); uartputc('\b'); -  } else -    uartputc(c); -  cgaputc(c); -} - -#define INPUT_BUF 128 -struct { -  char buf[INPUT_BUF]; -  uint r;  // Read index -  uint w;  // Write index -  uint e;  // Edit index -} input; - -#define C(x)  ((x)-'@')  // Control-x - -void -consoleintr(int (*getc)(void)) -{ -  int c, doprocdump = 0; - -  acquire(&cons.lock); -  while((c = getc()) >= 0){ -    switch(c){ -    case C('P'):  // Process listing. -      // procdump() locks cons.lock indirectly; invoke later -      doprocdump = 1; -      break; -    case C('U'):  // Kill line. -      while(input.e != input.w && -            input.buf[(input.e-1) % INPUT_BUF] != '\n'){ -        input.e--; -        consputc(BACKSPACE); -      } -      break; -    case C('H'): case '\x7f':  // Backspace -      if(input.e != input.w){ -        input.e--; -        consputc(BACKSPACE); -      } -      break; -    default: -      if(c != 0 && input.e-input.r < INPUT_BUF){ -        c = (c == '\r') ? '\n' : c; -        input.buf[input.e++ % INPUT_BUF] = c; -        consputc(c); -        if(c == '\n' || c == C('D') || input.e == input.r+INPUT_BUF){ -          input.w = input.e; -          wakeup(&input.r); -        } -      } -      break; -    } -  } -  release(&cons.lock); -  if(doprocdump) { -    procdump();  // now call procdump() wo. cons.lock held -  } -} - -int -consoleread(struct inode *ip, char *dst, int n) -{ -  uint target; -  int c; - -  iunlock(ip); -  target = n; -  acquire(&cons.lock); -  while(n > 0){ -    while(input.r == input.w){ -      if(myproc()->killed){ -        release(&cons.lock); -        ilock(ip); -        return -1; -      } -      sleep(&input.r, &cons.lock); -    } -    c = input.buf[input.r++ % INPUT_BUF]; -    if(c == C('D')){  // EOF -      if(n < target){ -        // Save ^D for next time, to make sure -        // caller gets a 0-byte result. -        input.r--; -      } -      break; -    } -    *dst++ = c; -    --n; -    if(c == '\n') -      break; -  } -  release(&cons.lock); -  ilock(ip); - -  return target - n; -} - -int -consolewrite(struct inode *ip, char *buf, int n) -{ -  int i; - -  iunlock(ip); -  acquire(&cons.lock); -  for(i = 0; i < n; i++) -    consputc(buf[i] & 0xff); -  release(&cons.lock); -  ilock(ip); - -  return n; -} - -void -consoleinit(void) -{ -  initlock(&cons.lock, "console"); - -  devsw[CONSOLE].write = consolewrite; -  devsw[CONSOLE].read = consoleread; -  cons.locking = 1; - -  ioapicenable(IRQ_KBD, 0); -} - @@ -1,48 +0,0 @@ -#!/usr/bin/perl - -$| = 1; - -sub writefile($@){ -	my ($file, @lines) = @_; -	 -	sleep(1); -	open(F, ">$file") || die "open >$file: $!"; -	print F @lines; -	close(F); -} - -# Cut out #include lines that don't contribute anything. -for($i=0; $i<@ARGV; $i++){ -	$file = $ARGV[$i]; -	if(!open(F, $file)){ -		print STDERR "open $file: $!\n"; -		next; -	} -	@lines = <F>; -	close(F); -	 -	$obj = "$file.o"; -	$obj =~ s/\.c\.o$/.o/; -	system("touch $file"); - -	if(system("make CC='gcc -Werror' $obj >/dev/null 2>\&1") != 0){ -		print STDERR "make $obj failed: $rv\n"; -		next; -	} - -	system("cp $file =$file"); -	for($j=@lines-1; $j>=0; $j--){ -		if($lines[$j] =~ /^#include/){ -			$old = $lines[$j]; -			$lines[$j] = "/* CUT-H */\n"; -			writefile($file, @lines); -			if(system("make CC='gcc -Werror' $obj >/dev/null 2>\&1") != 0){ -				$lines[$j] = $old; -			}else{ -				print STDERR "$file $old"; -			} -		} -	} -	writefile($file, grep {!/CUT-H/} @lines); -	system("rm =$file"); -} diff --git a/doc/FU540-C000-v1.0.pdf b/doc/FU540-C000-v1.0.pdfBinary files differ new file mode 100644 index 0000000..5322d67 --- /dev/null +++ b/doc/FU540-C000-v1.0.pdf diff --git a/doc/riscv-calling.pdf b/doc/riscv-calling.pdfBinary files differ new file mode 100644 index 0000000..a3351b1 --- /dev/null +++ b/doc/riscv-calling.pdf diff --git a/doc/riscv-privileged-v1.10.pdf b/doc/riscv-privileged-v1.10.pdfBinary files differ new file mode 100644 index 0000000..6942fe7 --- /dev/null +++ b/doc/riscv-privileged-v1.10.pdf diff --git a/doc/riscv-spec-v2.2.pdf b/doc/riscv-spec-v2.2.pdfBinary files differ new file mode 100644 index 0000000..e4a4634 --- /dev/null +++ b/doc/riscv-spec-v2.2.pdf diff --git a/doc/virtio-v1.1-csprd01.pdf b/doc/virtio-v1.1-csprd01.pdfBinary files differ new file mode 100644 index 0000000..c7be62b --- /dev/null +++ b/doc/virtio-v1.1-csprd01.pdf diff --git a/dot-bochsrc b/dot-bochsrc deleted file mode 100755 index ba13db7..0000000 --- a/dot-bochsrc +++ /dev/null @@ -1,738 +0,0 @@ -# You may now use double quotes around pathnames, in case -# your pathname includes spaces. - -#======================================================================= -# CONFIG_INTERFACE -# -# The configuration interface is a series of menus or dialog boxes that -# allows you to change all the settings that control Bochs's behavior. -# There are two choices of configuration interface: a text mode version -# called "textconfig" and a graphical version called "wx".  The text -# mode version uses stdin/stdout and is always compiled in.  The graphical -# version is only available when you use "--with-wx" on the configure  -# command.  If you do not write a config_interface line, Bochs will  -# choose a default for you. -# -# NOTE: if you use the "wx" configuration interface, you must also use -# the "wx" display library. -#======================================================================= -#config_interface: textconfig -#config_interface: wx - -#======================================================================= -# DISPLAY_LIBRARY -# -# The display library is the code that displays the Bochs VGA screen.  Bochs  -# has a selection of about 10 different display library implementations for  -# different platforms.  If you run configure with multiple --with-* options,  -# the display_library command lets you choose which one you want to run with. -# If you do not write a display_library line, Bochs will choose a default for -# you. -# -# The choices are:  -#   x              use X windows interface, cross platform -#   win32          use native win32 libraries -#   carbon         use Carbon library (for MacOS X) -#   beos           use native BeOS libraries -#   macintosh      use MacOS pre-10 -#   amigaos        use native AmigaOS libraries -#   sdl            use SDL library, cross platform -#   svga           use SVGALIB library for Linux, allows graphics without X11 -#   term           text only, uses curses/ncurses library, cross platform -#   rfb            provides an interface to AT&T's VNC viewer, cross platform -#   wx             use wxWidgets library, cross platform -#   nogui          no display at all -# -# NOTE: if you use the "wx" configuration interface, you must also use -# the "wx" display library. -# -# Specific options: -# Some display libraries now support specific option to control their -# behaviour. See the examples below for currently supported options. -#======================================================================= -#display_library: amigaos -#display_library: beos -#display_library: carbon -#display_library: macintosh -#display_library: nogui -#display_library: rfb, options="timeout=60" # time to wait for client -#display_library: sdl, options="fullscreen" # startup in fullscreen mode -#display_library: term -#display_library: win32, options="legacyF12" # use F12 to toggle mouse -#display_library: wx -#display_library: x - -#======================================================================= -# ROMIMAGE: -# The ROM BIOS controls what the PC does when it first powers on. -# Normally, you can use a precompiled BIOS in the source or binary -# distribution called BIOS-bochs-latest. The ROM BIOS is usually loaded -# starting at address 0xf0000, and it is exactly 64k long. -# You can also use the environment variable $BXSHARE to specify the -# location of the BIOS. -# The usage of external large BIOS images (up to 512k) at memory top is -# now supported, but we still recommend to use the BIOS distributed with -# Bochs. Now the start address can be calculated from image size. -#======================================================================= -romimage: file=$BXSHARE/BIOS-bochs-latest -#romimage: file=mybios.bin, address=0xfff80000 # 512k at memory top -#romimage: file=mybios.bin # calculate start address from image size - -#======================================================================= -# CPU: -# This defines cpu-related parameters inside Bochs: -# -#  COUNT: -#  Set the number of processors when Bochs is compiled for SMP emulation. -#  Bochs currently supports up to 8 processors. If Bochs is compiled -#  without SMP support, it won't accept values different from 1. -# -#  IPS: -#  Emulated Instructions Per Second.  This is the number of IPS that bochs -#  is capable of running on your machine. You can recompile Bochs with -#  --enable-show-ips option enabled, to find your workstation's capability. -#  Measured IPS value will then be logged into your log file or status bar -#  (if supported by the gui). -# -#  IPS is used to calibrate many time-dependent events within the bochs  -#  simulation.  For example, changing IPS affects the frequency of VGA -#  updates, the duration of time before a key starts to autorepeat, and -#  the measurement of BogoMips and other benchmarks. -# -#  Examples: -#  Machine                                         Mips -# ________________________________________________________________ -#  2.1Ghz Athlon XP with Linux 2.6/g++ 3.4         12 to 15 Mips -#  1.6Ghz Intel P4 with Win2000/g++ 3.3             5 to  7 Mips -#  650Mhz Athlon K-7 with Linux 2.4.4/egcs-2.91.66  2 to  2.5 Mips -#  400Mhz Pentium II with Linux 2.0.36/egcs-1.0.3   1 to  1.8 Mips -#======================================================================= -cpu: count=2, ips=10000000 - -#======================================================================= -# MEGS -# Set the number of Megabytes of physical memory you want to emulate.  -# The default is 32MB, most OS's won't need more than that. -# The maximum amount of memory supported is 2048Mb. -#======================================================================= -#megs: 256 -#megs: 128 -#megs: 64 -megs: 32 -#megs: 16 -#megs: 8 - -#======================================================================= -# OPTROMIMAGE[1-4]: -# You may now load up to 4 optional ROM images. Be sure to use a  -# read-only area, typically between C8000 and EFFFF. These optional -# ROM images should not overwrite the rombios (located at -# F0000-FFFFF) and the videobios (located at C0000-C7FFF). -# Those ROM images will be initialized by the bios if they contain  -# the right signature (0x55AA) and a valid checksum. -# It can also be a convenient way to upload some arbitrary code/data -# in the simulation, that can be retrieved by the boot loader -#======================================================================= -#optromimage1: file=optionalrom.bin, address=0xd0000 -#optromimage2: file=optionalrom.bin, address=0xd1000 -#optromimage3: file=optionalrom.bin, address=0xd2000 -#optromimage4: file=optionalrom.bin, address=0xd3000 - -#optramimage1: file=/path/file1.img, address=0x0010000 -#optramimage2: file=/path/file2.img, address=0x0020000 -#optramimage3: file=/path/file3.img, address=0x0030000 -#optramimage4: file=/path/file4.img, address=0x0040000 - -#======================================================================= -# VGAROMIMAGE -# You now need to load a VGA ROM BIOS into C0000. -#======================================================================= -#vgaromimage: file=bios/VGABIOS-elpin-2.40 -vgaromimage: file=$BXSHARE/VGABIOS-lgpl-latest -#vgaromimage: file=bios/VGABIOS-lgpl-latest-cirrus - -#======================================================================= -# VGA: -# Here you can specify the display extension to be used. With the value -# 'none' you can use standard VGA with no extension. Other supported -# values are 'vbe' for Bochs VBE and 'cirrus' for Cirrus SVGA support. -#======================================================================= -#vga: extension=cirrus -#vga: extension=vbe -vga: extension=none - -#======================================================================= -# FLOPPYA: -# Point this to pathname of floppy image file or device -# This should be of a bootable floppy(image/device) if you're -# booting from 'a' (or 'floppy'). -# -# You can set the initial status of the media to 'ejected' or 'inserted'. -#   floppya: 2_88=path, status=ejected             (2.88M 3.5" floppy) -#   floppya: 1_44=path, status=inserted            (1.44M 3.5" floppy) -#   floppya: 1_2=path, status=ejected              (1.2M  5.25" floppy) -#   floppya: 720k=path, status=inserted            (720K  3.5" floppy) -#   floppya: 360k=path, status=inserted            (360K  5.25" floppy) -#   floppya: 320k=path, status=inserted            (320K  5.25" floppy) -#   floppya: 180k=path, status=inserted            (180K  5.25" floppy) -#   floppya: 160k=path, status=inserted            (160K  5.25" floppy) -#   floppya: image=path, status=inserted           (guess type from image size) -# -# The path should be the name of a disk image file.  On Unix, you can use a raw -# device name such as /dev/fd0 on Linux.  On win32 platforms, use drive letters -# such as a: or b: as the path.  The parameter 'image' works with image files -# only. In that case the size must match one of the supported types. -#======================================================================= -floppya: 1_44=/dev/fd0, status=inserted -#floppya: image=../1.44, status=inserted -#floppya: 1_44=/dev/fd0H1440, status=inserted -#floppya: 1_2=../1_2, status=inserted -#floppya: 1_44=a:, status=inserted -#floppya: 1_44=a.img, status=inserted -#floppya: 1_44=/dev/rfd0a, status=inserted - -#======================================================================= -# FLOPPYB: -# See FLOPPYA above for syntax -#======================================================================= -#floppyb: 1_44=b:, status=inserted -floppyb: 1_44=b.img, status=inserted - -#======================================================================= -# ATA0, ATA1, ATA2, ATA3 -# ATA controller for hard disks and cdroms -# -# ata[0-3]: enabled=[0|1], ioaddr1=addr, ioaddr2=addr, irq=number -#  -# These options enables up to 4 ata channels. For each channel -# the two base io addresses and the irq must be specified. -#  -# ata0 and ata1 are enabled by default with the values shown below -# -# Examples: -#   ata0: enabled=1, ioaddr1=0x1f0, ioaddr2=0x3f0, irq=14 -#   ata1: enabled=1, ioaddr1=0x170, ioaddr2=0x370, irq=15 -#   ata2: enabled=1, ioaddr1=0x1e8, ioaddr2=0x3e0, irq=11 -#   ata3: enabled=1, ioaddr1=0x168, ioaddr2=0x360, irq=9 -#======================================================================= -ata0: enabled=1, ioaddr1=0x1f0, ioaddr2=0x3f0, irq=14 -ata1: enabled=1, ioaddr1=0x170, ioaddr2=0x370, irq=15 -ata2: enabled=0, ioaddr1=0x1e8, ioaddr2=0x3e0, irq=11 -ata3: enabled=0, ioaddr1=0x168, ioaddr2=0x360, irq=9 - -#======================================================================= -# ATA[0-3]-MASTER, ATA[0-3]-SLAVE -# -# This defines the type and characteristics of all attached ata devices: -#   type=       type of attached device [disk|cdrom]  -#   mode=       only valid for disks [flat|concat|external|dll|sparse|vmware3] -#   mode=       only valid for disks [undoable|growing|volatile] -#   path=       path of the image -#   cylinders=  only valid for disks -#   heads=      only valid for disks -#   spt=        only valid for disks -#   status=     only valid for cdroms [inserted|ejected] -#   biosdetect= type of biosdetection [none|auto], only for disks on ata0 [cmos] -#   translation=type of translation of the bios, only for disks [none|lba|large|rechs|auto] -#   model=      string returned by identify device command -#   journal=    optional filename of the redolog for undoable and volatile disks -#    -# Point this at a hard disk image file, cdrom iso file, or physical cdrom -# device.  To create a hard disk image, try running bximage.  It will help you -# choose the size and then suggest a line that works with it. -# -# In UNIX it may be possible to use a raw device as a Bochs hard disk,  -# but WE DON'T RECOMMEND IT.  In Windows there is no easy way. -# -# In windows, the drive letter + colon notation should be used for cdroms. -# Depending on versions of windows and drivers, you may only be able to  -# access the "first" cdrom in the system.  On MacOSX, use path="drive" -# to access the physical drive. -# -# The path is always mandatory. For flat hard disk images created with -# bximage geometry autodetection can be used (cylinders=0 -> cylinders are -# calculated using heads=16 and spt=63). For other hard disk images and modes -# the cylinders, heads, and spt are mandatory. -# -# Default values are: -#   mode=flat, biosdetect=auto, translation=auto, model="Generic 1234" -# -# The biosdetect option has currently no effect on the bios -# -# Examples: -#   ata0-master: type=disk, mode=flat, path=10M.sample, cylinders=306, heads=4, spt=17 -#   ata0-slave:  type=disk, mode=flat, path=20M.sample, cylinders=615, heads=4, spt=17 -#   ata1-master: type=disk, mode=flat, path=30M.sample, cylinders=615, heads=6, spt=17 -#   ata1-slave:  type=disk, mode=flat, path=46M.sample, cylinders=940, heads=6, spt=17 -#   ata2-master: type=disk, mode=flat, path=62M.sample, cylinders=940, heads=8, spt=17 -#   ata2-slave:  type=disk, mode=flat, path=112M.sample, cylinders=900, heads=15, spt=17 -#   ata3-master: type=disk, mode=flat, path=483M.sample, cylinders=1024, heads=15, spt=63 -#   ata3-slave:  type=cdrom, path=iso.sample, status=inserted -#======================================================================= -ata0-master: type=disk, mode=flat, path="xv6.img", cylinders=100, heads=10, spt=10 -ata0-slave: type=disk, mode=flat, path="fs.img", cylinders=1024, heads=1, spt=1 -#ata0-slave: type=cdrom, path=D:, status=inserted -#ata0-slave: type=cdrom, path=/dev/cdrom, status=inserted -#ata0-slave: type=cdrom, path="drive", status=inserted -#ata0-slave: type=cdrom, path=/dev/rcd0d, status=inserted  - -#======================================================================= -# BOOT: -# This defines the boot sequence. Now you can specify up to 3 boot drives. -# You can either boot from 'floppy', 'disk' or 'cdrom' -# legacy 'a' and 'c' are also supported -# Examples: -#   boot: floppy -#   boot: disk -#   boot: cdrom -#   boot: c -#   boot: a -#   boot: cdrom, floppy, disk -#======================================================================= -#boot: floppy -boot: disk - -#======================================================================= -# CLOCK: -# This defines the parameters of the clock inside Bochs: -# -#  SYNC: -#  TO BE COMPLETED (see Greg explanation in feature request #536329) -# -#  TIME0: -#  Specifies the start (boot) time of the virtual machine. Use a time  -#  value as returned by the time(2) system call. If no time0 value is  -#  set or if time0 equal to 1 (special case) or if time0 equal 'local',  -#  the simulation will be started at the current local host time. -#  If time0 equal to 2 (special case) or if time0 equal 'utc', -#  the simulation will be started at the current utc time. -# -# Syntax: -#  clock: sync=[none|slowdown|realtime|both], time0=[timeValue|local|utc] -# -# Example: -#   clock: sync=none,     time0=local       # Now (localtime) -#   clock: sync=slowdown, time0=315529200   # Tue Jan  1 00:00:00 1980 -#   clock: sync=none,     time0=631148400   # Mon Jan  1 00:00:00 1990 -#   clock: sync=realtime, time0=938581955   # Wed Sep 29 07:12:35 1999 -#   clock: sync=realtime, time0=946681200   # Sat Jan  1 00:00:00 2000 -#   clock: sync=none,     time0=1           # Now (localtime) -#   clock: sync=none,     time0=utc         # Now (utc/gmt) -#  -# Default value are sync=none, time0=local -#======================================================================= -#clock: sync=none, time0=local - - -#======================================================================= -# FLOPPY_BOOTSIG_CHECK: disabled=[0|1] -# Enables or disables the 0xaa55 signature check on boot floppies -# Defaults to disabled=0 -# Examples: -#   floppy_bootsig_check: disabled=0 -#   floppy_bootsig_check: disabled=1 -#======================================================================= -#floppy_bootsig_check: disabled=1 -floppy_bootsig_check: disabled=0 - -#======================================================================= -# LOG: -# Give the path of the log file you'd like Bochs debug and misc. verbiage -# to be written to. If you don't use this option or set the filename to -# '-' the output is written to the console. If you really don't want it, -# make it "/dev/null" (Unix) or "nul" (win32). :^( -# -# Examples: -#   log: ./bochs.out -#   log: /dev/tty -#======================================================================= -#log: /dev/null -log: bochsout.txt - -#======================================================================= -# LOGPREFIX: -# This handles the format of the string prepended to each log line. -# You may use those special tokens : -#   %t : 11 decimal digits timer tick -#   %i : 8 hexadecimal digits of cpu current eip (ignored in SMP configuration) -#   %e : 1 character event type ('i'nfo, 'd'ebug, 'p'anic, 'e'rror) -#   %d : 5 characters string of the device, between brackets -#  -# Default : %t%e%d -# Examples: -#   logprefix: %t-%e-@%i-%d -#   logprefix: %i%e%d -#======================================================================= -#logprefix: %t%e%d - -#======================================================================= -# LOG CONTROLS -# -# Bochs now has four severity levels for event logging. -#   panic: cannot proceed.  If you choose to continue after a panic,  -#          don't be surprised if you get strange behavior or crashes. -#   error: something went wrong, but it is probably safe to continue the -#          simulation. -#   info: interesting or useful messages. -#   debug: messages useful only when debugging the code.  This may -#          spit out thousands per second. -# -# For events of each level, you can choose to crash, report, or ignore. -# TODO: allow choice based on the facility: e.g. crash on panics from -#       everything except the cdrom, and only report those. -# -# If you are experiencing many panics, it can be helpful to change -# the panic action to report instead of fatal.  However, be aware -# that anything executed after a panic is uncharted territory and can  -# cause bochs to become unstable.  The panic is a "graceful exit," so -# if you disable it you may get a spectacular disaster instead. -#======================================================================= -panic: action=ask -error: action=report -info: action=report -debug: action=ignore -#pass: action=fatal - -#======================================================================= -# DEBUGGER_LOG: -# Give the path of the log file you'd like Bochs to log debugger output. -# If you really don't want it, make it /dev/null or '-'. :^( -# -# Examples: -#   debugger_log: ./debugger.out -#======================================================================= -#debugger_log: /dev/null -#debugger_log: debugger.out -debugger_log: - - -#======================================================================= -# COM1, COM2, COM3, COM4: -# This defines a serial port (UART type 16550A). In the 'term' you can specify -# a device to use as com1. This can be a real serial line, or a pty.  To use -# a pty (under X/Unix), create two windows (xterms, usually).  One of them will -# run bochs, and the other will act as com1. Find out the tty the com1 -# window using the `tty' command, and use that as the `dev' parameter. -# Then do `sleep 1000000' in the com1 window to keep the shell from -# messing with things, and run bochs in the other window.  Serial I/O to -# com1 (port 0x3f8) will all go to the other window. -# Other serial modes are 'null' (no input/output), 'file' (output to a file -# specified as the 'dev' parameter), 'raw' (use the real serial port - under -# construction for win32), 'mouse' (standard serial mouse - requires -# mouse option setting 'type=serial' or 'type=serial_wheel') and 'socket' -# (connect a networking socket). -# -# Examples: -#   com1: enabled=1, mode=null -#   com1: enabled=1, mode=mouse -#   com2: enabled=1, mode=file, dev=serial.out -#   com3: enabled=1, mode=raw, dev=com1 -#   com3: enabled=1, mode=socket, dev=localhost:8888 -#======================================================================= -#com1: enabled=1, mode=term, dev=/dev/ttyp9 - - -#======================================================================= -# PARPORT1, PARPORT2: -# This defines a parallel (printer) port. When turned on and an output file is -# defined the emulated printer port sends characters printed by the guest OS -# into the output file. On some platforms a device filename can be used to -# send the data to the real parallel port (e.g. "/dev/lp0" on Linux, "lpt1" on -# win32 platforms). -# -# Examples: -#   parport1: enabled=1, file="parport.out" -#   parport2: enabled=1, file="/dev/lp0" -#   parport1: enabled=0 -#======================================================================= -parport1: enabled=1, file="/dev/stdout" - -#======================================================================= -# SB16: -# This defines the SB16 sound emulation. It can have several of the -# following properties. -# All properties are in the format sb16: property=value -# midi: The filename is where the midi data is sent. This can be a -#       device or just a file if you want to record the midi data. -# midimode: -#      0=no data -#      1=output to device (system dependent. midi denotes the device driver) -#      2=SMF file output, including headers -#      3=output the midi data stream to the file (no midi headers and no -#        delta times, just command and data bytes) -# wave: This is the device/file where wave output is stored -# wavemode: -#      0=no data -#      1=output to device (system dependent. wave denotes the device driver) -#      2=VOC file output, incl. headers -#      3=output the raw wave stream to the file -# log:  The file to write the sb16 emulator messages to. -# loglevel: -#      0=no log -#      1=resource changes, midi program and bank changes -#      2=severe errors -#      3=all errors -#      4=all errors plus all port accesses -#      5=all errors and port accesses plus a lot of extra info -# dmatimer: -#      microseconds per second for a DMA cycle.  Make it smaller to fix -#      non-continuous sound.  750000 is usually a good value.  This needs a -#      reasonably correct setting for the IPS parameter of the CPU option. -# -# For an example look at the next line: -#======================================================================= - -#sb16: midimode=1, midi=/dev/midi00, wavemode=1, wave=/dev/dsp, loglevel=2, log=sb16.log, dmatimer=600000 - -#======================================================================= -# VGA_UPDATE_INTERVAL: -# Video memory is scanned for updates and screen updated every so many -# virtual seconds.  The default is 40000, about 25Hz. Keep in mind that -# you must tweak the 'cpu: ips=N' directive to be as close to the number -# of emulated instructions-per-second your workstation can do, for this -# to be accurate. -# -# Examples: -#   vga_update_interval: 250000 -#======================================================================= -vga_update_interval: 300000 - -# using for Winstone '98 tests -#vga_update_interval:  100000 - -#======================================================================= -# KEYBOARD_SERIAL_DELAY: -# Approximate time in microseconds that it takes one character to -# be transfered from the keyboard to controller over the serial path. -# Examples: -#   keyboard_serial_delay: 200 -#======================================================================= -keyboard_serial_delay: 250 - -#======================================================================= -# KEYBOARD_PASTE_DELAY: -# Approximate time in microseconds between attempts to paste -# characters to the keyboard controller. This leaves time for the -# guest os to deal with the flow of characters.  The ideal setting -# depends on how your operating system processes characters.  The -# default of 100000 usec (.1 seconds) was chosen because it works  -# consistently in Windows. -# -# If your OS is losing characters during a paste, increase the paste -# delay until it stops losing characters. -# -# Examples: -#   keyboard_paste_delay: 100000 -#======================================================================= -keyboard_paste_delay: 100000 - -#======================================================================= -# MOUSE:  -# This option prevents Bochs from creating mouse "events" unless a mouse -# is  enabled. The hardware emulation itself is not disabled by this. -# You can turn the mouse on by setting enabled to 1, or turn it off by -# setting enabled to 0. Unless you have a particular reason for enabling -# the mouse by default, it is recommended that you leave it off. -# You can also toggle the mouse usage at runtime (control key + middle -# mouse button on X11, SDL, wxWidgets and Win32). -# With the mouse type option you can select the type of mouse to emulate. -# The default value is 'ps2'. The other choices are 'imps2' (wheel mouse -# on PS/2), 'serial', 'serial_wheel' (one com port requires setting -# 'mode=mouse') and 'usb' (3-button mouse - one of the USB ports must be -# connected with the 'mouse' device - requires PCI and USB support). -# -# Examples: -#   mouse: enabled=1 -#   mouse: enabled=1, type=imps2 -#   mouse: enabled=1, type=serial -#   mouse: enabled=0 -#======================================================================= -mouse: enabled=0 - -#======================================================================= -# private_colormap: Request that the GUI create and use it's own -#                   non-shared colormap.  This colormap will be used -#                   when in the bochs window.  If not enabled, a -#                   shared colormap scheme may be used.  Not implemented -#                   on all GUI's. -# -# Examples: -#   private_colormap: enabled=1 -#   private_colormap: enabled=0 -#======================================================================= -private_colormap: enabled=0 - -#======================================================================= -# fullscreen: ONLY IMPLEMENTED ON AMIGA -#             Request that Bochs occupy the entire screen instead of a  -#             window. -# -# Examples: -#   fullscreen: enabled=0 -#   fullscreen: enabled=1 -#======================================================================= -#fullscreen: enabled=0 -#screenmode: name="sample" - -#======================================================================= -# ne2k: NE2000 compatible ethernet adapter -# -# Examples: -# ne2k: ioaddr=IOADDR, irq=IRQ, mac=MACADDR, ethmod=MODULE, ethdev=DEVICE, script=SCRIPT -# -# ioaddr, irq: You probably won't need to change ioaddr and irq, unless there -# are IRQ conflicts. -# -# mac: The MAC address MUST NOT match the address of any machine on the net. -# Also, the first byte must be an even number (bit 0 set means a multicast -# address), and you cannot use ff:ff:ff:ff:ff:ff because that's the broadcast -# address.  For the ethertap module, you must use fe:fd:00:00:00:01.  There may -# be other restrictions too.  To be safe, just use the b0:c4... address. -# -# ethdev: The ethdev value is the name of the network interface on your host -# platform.  On UNIX machines, you can get the name by running ifconfig.  On -# Windows machines, you must run niclist to get the name of the ethdev. -# Niclist source code is in misc/niclist.c and it is included in Windows  -# binary releases. -# -# script: The script value is optional, and is the name of a script that  -# is executed after bochs initialize the network interface. You can use  -# this script to configure this network interface, or enable masquerading. -# This is mainly useful for the tun/tap devices that only exist during -# Bochs execution. The network interface name is supplied to the script -# as first parameter -# -# If you don't want to make connections to any physical networks, -# you can use the following 'ethmod's to simulate a virtual network. -#   null: All packets are discarded, but logged to a few files. -#   arpback: ARP is simulated. Disabled by default. -#   vde:  Virtual Distributed Ethernet -#   vnet: ARP, ICMP-echo(ping), DHCP and read/write TFTP are simulated. -#         The virtual host uses 192.168.10.1. -#         DHCP assigns 192.168.10.2 to the guest. -#         TFTP uses the ethdev value for the root directory and doesn't -#         overwrite files. -# -#======================================================================= -# ne2k: ioaddr=0x240, irq=9, mac=fe:fd:00:00:00:01, ethmod=fbsd, ethdev=en0 #macosx -# ne2k: ioaddr=0x240, irq=9, mac=b0:c4:20:00:00:00, ethmod=fbsd, ethdev=xl0 -# ne2k: ioaddr=0x240, irq=9, mac=b0:c4:20:00:00:00, ethmod=linux, ethdev=eth0 -# ne2k: ioaddr=0x240, irq=9, mac=b0:c4:20:00:00:01, ethmod=win32, ethdev=MYCARD -# ne2k: ioaddr=0x240, irq=9, mac=fe:fd:00:00:00:01, ethmod=tap, ethdev=tap0 -# ne2k: ioaddr=0x240, irq=9, mac=fe:fd:00:00:00:01, ethmod=tuntap, ethdev=/dev/net/tun0, script=./tunconfig -# ne2k: ioaddr=0x240, irq=9, mac=b0:c4:20:00:00:01, ethmod=null, ethdev=eth0 -# ne2k: ioaddr=0x240, irq=9, mac=b0:c4:20:00:00:01, ethmod=vde, ethdev="/tmp/vde.ctl" -# ne2k: ioaddr=0x240, irq=9, mac=b0:c4:20:00:00:01, ethmod=vnet, ethdev="c:/temp" - -#======================================================================= -# KEYBOARD_MAPPING: -# This enables a remap of a physical localized keyboard to a  -# virtualized us keyboard, as the PC architecture expects. -# If enabled, the keymap file must be specified. -#  -# Examples: -#   keyboard_mapping: enabled=1, map=gui/keymaps/x11-pc-de.map -#======================================================================= -keyboard_mapping: enabled=0, map= - -#======================================================================= -# KEYBOARD_TYPE: -# Type of keyboard return by a "identify keyboard" command to the -# keyboard controler. It must be one of "xt", "at" or "mf". -# Defaults to "mf". It should be ok for almost everybody. A known -# exception is french macs, that do have a "at"-like keyboard. -# -# Examples: -#   keyboard_type: mf -#======================================================================= -#keyboard_type: mf - -#======================================================================= -# USER_SHORTCUT: -# This defines the keyboard shortcut to be sent when you press the "user" -# button in the headerbar. The shortcut string is a combination of maximum -# 3 key names (listed below) separated with a '-' character. The old-style -# syntax (without the '-') still works for the key combinations supported -# in Bochs 2.2.1. -# Valid key names: -# "alt", "bksl", "bksp", "ctrl", "del", "down", "end", "enter", "esc", -# "f1", ... "f12", "home", "ins", "left", "menu", "minus", "pgdwn", "pgup", -# "plus", "right", "shift", "space", "tab", "up", and "win". -# -# Example: -#   user_shortcut: keys=ctrl-alt-del -#======================================================================= -#user_shortcut: keys=ctrl-alt-del - -#======================================================================= -# I440FXSUPPORT: -# This option controls the presence of the i440FX PCI chipset. You can -# also specify the devices connected to PCI slots. Up to 5 slots are -# available now. These devices are currently supported: ne2k, pcivga, -# pcidev and pcipnic. If Bochs is compiled with Cirrus SVGA support -# you'll have the additional choice 'cirrus'. -# -# Example: -#   i440fxsupport: enabled=1, slot1=pcivga, slot2=ne2k -#======================================================================= -#i440fxsupport: enabled=1 - -#======================================================================= -# USB1: -# This option controls the presence of the USB root hub which is a part -# of the i440FX PCI chipset. With the portX option you can connect devices -# to the hub (currently supported: 'mouse' and 'keypad'). If you connect -# the mouse to one of the ports and use the mouse option 'type=usb' you'll -# have a 3-button USB mouse. -# -# Example: -#   usb1: enabled=1, port1=mouse, port2=keypad -#======================================================================= -#usb1: enabled=1 - -#======================================================================= -# CMOSIMAGE: -# This defines image file that can be loaded into the CMOS RAM at startup. -# The rtc_init parameter controls whether initialize the RTC with values stored -# in the image. By default the time0 argument given to the clock option is used. -# With 'rtc_init=image' the image is the source for the initial time. -# -# Example: -#   cmosimage: file=cmos.img, rtc_init=image -#======================================================================= -#cmosimage: file=cmos.img, rtc_init=time0 - -#======================================================================= -# other stuff -#======================================================================= -#magic_break: enabled=1 -#load32bitOSImage: os=nullkernel, path=../kernel.img, iolog=../vga_io.log -#load32bitOSImage: os=linux, path=../linux.img, iolog=../vga_io.log, initrd=../initrd.img -#text_snapshot_check: enable - -#------------------------- -# PCI host device mapping -#------------------------- -#pcidev: vendor=0x1234, device=0x5678 - -#======================================================================= -# GDBSTUB: -# Enable GDB stub. See user documentation for details. -# Default value is enabled=0. -#======================================================================= -#gdbstub: enabled=0, port=1234, text_base=0, data_base=0, bss_base=0 - -#======================================================================= -# IPS: -# The IPS directive is DEPRECATED. Use the parameter IPS of the CPU -# directive instead. -#======================================================================= -#ips: 10000000 - -#======================================================================= -# for Macintosh, use the style of pathnames in the following -# examples. -# -# vgaromimage: :bios:VGABIOS-elpin-2.40 -# romimage: file=:bios:BIOS-bochs-latest, address=0xf0000 -# floppya: 1_44=[fd:], status=inserted -#======================================================================= diff --git a/entry.S b/entry.S deleted file mode 100644 index bc79bab..0000000 --- a/entry.S +++ /dev/null @@ -1,68 +0,0 @@ -# The xv6 kernel starts executing in this file. This file is linked with -# the kernel C code, so it can refer to kernel symbols such as main(). -# The boot block (bootasm.S and bootmain.c) jumps to entry below. -         -# Multiboot header, for multiboot boot loaders like GNU Grub. -# http://www.gnu.org/software/grub/manual/multiboot/multiboot.html -# -# Using GRUB 2, you can boot xv6 from a file stored in a -# Linux file system by copying kernel or kernelmemfs to /boot -# and then adding this menu entry: -# -# menuentry "xv6" { -# 	insmod ext2 -# 	set root='(hd0,msdos1)' -# 	set kernel='/boot/kernel' -# 	echo "Loading ${kernel}..." -# 	multiboot ${kernel} ${kernel} -# 	boot -# } - -#include "asm.h" -#include "memlayout.h" -#include "mmu.h" -#include "param.h" - -# Multiboot header.  Data to direct multiboot loader. -.p2align 2 -.text -.globl multiboot_header -multiboot_header: -  #define magic 0x1badb002 -  #define flags 0 -  .long magic -  .long flags -  .long (-magic-flags) - -# By convention, the _start symbol specifies the ELF entry point. -# Since we haven't set up virtual memory yet, our entry point is -# the physical address of 'entry'. -.globl _start -_start = V2P_WO(entry) - -# Entering xv6 on boot processor, with paging off. -.globl entry -entry: -  # Turn on page size extension for 4Mbyte pages -  movl    %cr4, %eax -  orl     $(CR4_PSE), %eax -  movl    %eax, %cr4 -  # Set page directory -  movl    $(V2P_WO(entrypgdir)), %eax -  movl    %eax, %cr3 -  # Turn on paging. -  movl    %cr0, %eax -  orl     $(CR0_PG|CR0_WP), %eax -  movl    %eax, %cr0 - -  # Set up the stack pointer. -  movl $(stack + KSTACKSIZE), %esp - -  # Jump to main(), and switch to executing at -  # high addresses. The indirect call is needed because -  # the assembler produces a PC-relative instruction -  # for a direct jump. -  mov $main, %eax -  jmp *%eax - -.comm stack, KSTACKSIZE diff --git a/entryother.S b/entryother.S deleted file mode 100644 index a3b6dc2..0000000 --- a/entryother.S +++ /dev/null @@ -1,93 +0,0 @@ -#include "asm.h" -#include "memlayout.h" -#include "mmu.h" -	 -# Each non-boot CPU ("AP") is started up in response to a STARTUP -# IPI from the boot CPU.  Section B.4.2 of the Multi-Processor -# Specification says that the AP will start in real mode with CS:IP -# set to XY00:0000, where XY is an 8-bit value sent with the -# STARTUP. Thus this code must start at a 4096-byte boundary. -# -# Because this code sets DS to zero, it must sit -# at an address in the low 2^16 bytes. -# -# Startothers (in main.c) sends the STARTUPs one at a time. -# It copies this code (start) at 0x7000.  It puts the address of -# a newly allocated per-core stack in start-4,the address of the -# place to jump to (mpenter) in start-8, and the physical address -# of entrypgdir in start-12. -# -# This code combines elements of bootasm.S and entry.S. - -.code16            -.globl start -start: -  cli             - -  # Zero data segment registers DS, ES, and SS. -  xorw    %ax,%ax -  movw    %ax,%ds -  movw    %ax,%es -  movw    %ax,%ss - -  # Switch from real to protected mode.  Use a bootstrap GDT that makes -  # virtual addresses map directly to physical addresses so that the -  # effective memory map doesn't change during the transition. -  lgdt    gdtdesc -  movl    %cr0, %eax -  orl     $CR0_PE, %eax -  movl    %eax, %cr0 - -  # Complete the transition to 32-bit protected mode by using a long jmp -  # to reload %cs and %eip.  The segment descriptors are set up with no -  # translation, so that the mapping is still the identity mapping. -  ljmpl    $(SEG_KCODE<<3), $(start32) - -//PAGEBREAK! -.code32  # Tell assembler to generate 32-bit code now. -start32: -  # Set up the protected-mode data segment registers -  movw    $(SEG_KDATA<<3), %ax    # Our data segment selector -  movw    %ax, %ds                # -> DS: Data Segment -  movw    %ax, %es                # -> ES: Extra Segment -  movw    %ax, %ss                # -> SS: Stack Segment -  movw    $0, %ax                 # Zero segments not ready for use -  movw    %ax, %fs                # -> FS -  movw    %ax, %gs                # -> GS - -  # Turn on page size extension for 4Mbyte pages -  movl    %cr4, %eax -  orl     $(CR4_PSE), %eax -  movl    %eax, %cr4 -  # Use entrypgdir as our initial page table -  movl    (start-12), %eax -  movl    %eax, %cr3 -  # Turn on paging. -  movl    %cr0, %eax -  orl     $(CR0_PE|CR0_PG|CR0_WP), %eax -  movl    %eax, %cr0 - -  # Switch to the stack allocated by startothers() -  movl    (start-4), %esp -  # Call mpenter() -  call	 *(start-8) - -  movw    $0x8a00, %ax -  movw    %ax, %dx -  outw    %ax, %dx -  movw    $0x8ae0, %ax -  outw    %ax, %dx -spin: -  jmp     spin - -.p2align 2 -gdt: -  SEG_NULLASM -  SEG_ASM(STA_X|STA_R, 0, 0xffffffff) -  SEG_ASM(STA_W, 0, 0xffffffff) - - -gdtdesc: -  .word   (gdtdesc - gdt - 1) -  .long   gdt - @@ -1,114 +0,0 @@ -#include "types.h" -#include "param.h" -#include "memlayout.h" -#include "mmu.h" -#include "proc.h" -#include "defs.h" -#include "x86.h" -#include "elf.h" - -int -exec(char *path, char **argv) -{ -  char *s, *last; -  int i, off; -  uint argc, sz, sp, ustack[3+MAXARG+1]; -  struct elfhdr elf; -  struct inode *ip; -  struct proghdr ph; -  pde_t *pgdir, *oldpgdir; -  struct proc *curproc = myproc(); - -  begin_op(); - -  if((ip = namei(path)) == 0){ -    end_op(); -    cprintf("exec: fail\n"); -    return -1; -  } -  ilock(ip); -  pgdir = 0; - -  // Check ELF header -  if(readi(ip, (char*)&elf, 0, sizeof(elf)) != sizeof(elf)) -    goto bad; -  if(elf.magic != ELF_MAGIC) -    goto bad; - -  if((pgdir = setupkvm()) == 0) -    goto bad; - -  // Load program into memory. -  sz = 0; -  for(i=0, off=elf.phoff; i<elf.phnum; i++, off+=sizeof(ph)){ -    if(readi(ip, (char*)&ph, off, sizeof(ph)) != sizeof(ph)) -      goto bad; -    if(ph.type != ELF_PROG_LOAD) -      continue; -    if(ph.memsz < ph.filesz) -      goto bad; -    if(ph.vaddr + ph.memsz < ph.vaddr) -      goto bad; -    if((sz = allocuvm(pgdir, sz, ph.vaddr + ph.memsz)) == 0) -      goto bad; -    if(ph.vaddr % PGSIZE != 0) -      goto bad; -    if(loaduvm(pgdir, (char*)ph.vaddr, ip, ph.off, ph.filesz) < 0) -      goto bad; -  } -  iunlockput(ip); -  end_op(); -  ip = 0; - -  // Allocate two pages at the next page boundary. -  // Make the first inaccessible.  Use the second as the user stack. -  sz = PGROUNDUP(sz); -  if((sz = allocuvm(pgdir, sz, sz + 2*PGSIZE)) == 0) -    goto bad; -  clearpteu(pgdir, (char*)(sz - 2*PGSIZE)); -  sp = sz; - -  // Push argument strings, prepare rest of stack in ustack. -  for(argc = 0; argv[argc]; argc++) { -    if(argc >= MAXARG) -      goto bad; -    sp = (sp - (strlen(argv[argc]) + 1)) & ~3; -    if(copyout(pgdir, sp, argv[argc], strlen(argv[argc]) + 1) < 0) -      goto bad; -    ustack[3+argc] = sp; -  } -  ustack[3+argc] = 0; - -  ustack[0] = 0xffffffff;  // fake return PC -  ustack[1] = argc; -  ustack[2] = sp - (argc+1)*4;  // argv pointer - -  sp -= (3+argc+1) * 4; -  if(copyout(pgdir, sp, ustack, (3+argc+1)*4) < 0) -    goto bad; - -  // Save program name for debugging. -  for(last=s=path; *s; s++) -    if(*s == '/') -      last = s+1; -  safestrcpy(curproc->name, last, sizeof(curproc->name)); - -  // Commit to the user image. -  oldpgdir = curproc->pgdir; -  curproc->pgdir = pgdir; -  curproc->sz = sz; -  curproc->tf->eip = elf.entry;  // main -  curproc->tf->esp = sp; -  switchuvm(curproc); -  freevm(oldpgdir); -  return 0; - - bad: -  if(pgdir) -    freevm(pgdir); -  if(ip){ -    iunlockput(ip); -    end_op(); -  } -  return -1; -} diff --git a/gdbutil b/gdbutil deleted file mode 100644 index e0c362f..0000000 --- a/gdbutil +++ /dev/null @@ -1,291 +0,0 @@ -# -*- gdb-script -*- - -# Utility functions to pretty-print x86 segment/interrupt descriptors. -# To load this file, run "source gdbutil" in gdb. -# printdesc and printdescs are the main entry points. - -# IA32 2007, Volume 3A, Table 3-2 -set $STS_T16A = 0x1 -set $STS_LDT  = 0x2 -set $STS_T16B = 0x3 -set $STS_CG16 = 0x4 -set $STS_TG   = 0x5 -set $STS_IG16 = 0x6 -set $STS_TG16 = 0x7 -set $STS_T32A = 0x9 -set $STS_T32B = 0xB -set $STS_CG32 = 0xC -set $STS_IG32 = 0xE -set $STS_TG32 = 0xF - -define outputsts -  while 1 -    if $arg0 == $STS_T16A -      echo STS_T16A -      loop_break -    end -    if $arg0 == $STS_LDT -      echo STS_LDT\  -      loop_break -    end -    if $arg0 == $STS_T16B -      echo STS_T16B -      loop_break -    end -    if $arg0 == $STS_CG16 -      echo STS_CG16 -      loop_break -    end -    if $arg0 == $STS_TG -      echo STS_TG\ \  -      loop_break -    end -    if $arg0 == $STS_IG16 -      echo STS_IG16 -      loop_break -    end -    if $arg0 == $STS_TG16 -      echo STS_TG16 -      loop_break -    end -    if $arg0 == $STS_T32A -      echo STS_T32A -      loop_break -    end -    if $arg0 == $STS_T32B -      echo STS_T32B -      loop_break -    end -    if $arg0 == $STS_CG32 -      echo STS_CG32 -      loop_break -    end -    if $arg0 == $STS_IG32 -      echo STS_IG32 -      loop_break -    end -    if $arg0 == $STS_TG32 -      echo STS_TG32 -      loop_break -    end -    echo Reserved -    loop_break -  end -end   - -# IA32 2007, Volume 3A, Table 3-1 -set $STA_X = 0x8 -set $STA_E = 0x4 -set $STA_C = 0x4 -set $STA_W = 0x2 -set $STA_R = 0x2 -set $STA_A = 0x1 - -define outputsta -  if $arg0 & $STA_X -    # Code segment -    echo code -    if $arg0 & $STA_C -      echo |STA_C -    end -    if $arg0 & $STA_R -      echo |STA_R -    end -  else -    # Data segment -    echo data -    if $arg0 & $STA_E -      echo |STA_E -    end -    if $arg0 & $STA_W -      echo |STA_W -    end -  end -  if $arg0 & $STA_A -    echo |STA_A -  else -    printf "      " -  end -end - -# xv6-specific -set $SEG_KCODE = 1 -set $SEG_KDATA = 2 -set $SEG_KCPU  = 3 -set $SEG_UCODE = 4 -set $SEG_UDATA = 5 -set $SEG_TSS   = 6 - -define outputcs -  if ($arg0 & 4) == 0 -    if $arg0 >> 3 == $SEG_KCODE -      printf "SEG_KCODE<<3" -    end -    if $arg0 >> 3 == $SEG_KDATA -      printf "SEG_KDATA<<3" -    end -    if $arg0 >> 3 == $SEG_KCPU -      printf "SEG_KCPU<<3" -    end -    if $arg0 >> 3 == $SEG_UCODE -      printf "SEG_UCODE<<3" -    end -    if $arg0 >> 3 == $SEG_UDATA -      printf "SEG_UDATA<<3" -    end -    if $arg0 >> 3 == $SEG_TSS -      printf "SEG_TSS<<3" -    end -    if ($arg0 >> 3 < 1) + ($arg0 >> 3 > 6) -      printf "GDT[%d]", $arg0 >> 3 -    end -  else -    printf "LDT[%d]", $arg0 >> 3 -  end -  if ($arg0 & 3) > 0 -    printf "|" -    outputdpl ($arg0&3) -  end -end - -define outputdpl -  if $arg0 == 0 -    printf "DPL_KERN" -  else -    if $arg0 == 3 -      printf "DPL_USER" -    else -      printf "DPL%d", $arg0 -    end -  end -end - -define printdesc -  if $argc != 1 -    echo Usage: printdesc expr -  else -    _printdesc ((uint*)&($arg0))[0] ((uint*)&($arg0))[1] -    printf "\n" -  end -end - -document printdesc -Print an x86 segment or gate descriptor. -printdesc EXPR -EXPR must evaluate to a descriptor value.  It can be of any C type. -end - -define _printdesc -  _printdesc1 $arg0 $arg1 ($arg1>>15&1) ($arg1>>13&3) ($arg1>>12&1) ($arg1>>8&15) -end - -define _printdesc1 -  # 2:P 3:DPL 4:S 5:Type -  if $arg2 == 0 -    printf "P = 0 (Not present)" -  else -    printf "type = " -    if $arg4 == 0 -      # System segment -      outputsts $arg5 -      printf " (0x%x)    ", $arg5 -      _printsysdesc $arg0 $arg1 $arg5 -    else -      # Code/data segment -      outputsta $arg5 -      printf "  " -      _printsegdesc $arg0 $arg1 -    end - -    printf "  DPL = " -    outputdpl $arg3 -    printf " (%d)", $arg3 -  end -end - -define _printsysdesc -  # 2:Type -  # GDB's || is buggy -  if ($arg2 == $STS_TG) + (($arg2&7) == $STS_IG16) + (($arg2&7) == $STS_TG16) -    # Gate descriptor -    _printgate $arg2 ($arg0>>16) ($arg0&0xFFFF) ($arg1>>16) -  else -    # System segment descriptor -    _printsegdesc $arg0 $arg1 -  end -end - -define _printgate -  # IA32 2007, Voume 3A, Figure 5-2 -  # 0:Type 1:CS 2:Offset 15..0 3:Offset 31..16 -  printf "CS = " -  outputcs $arg1 -  printf " (%d)", $arg1 - -  if (($arg0&7) == $STS_IG16) + (($arg0&7) == $STS_TG16) -    printf "  Offset = " -    output/a $arg3 << 16 | $arg2 -  end -end - -define _printsegdesc -  # IA32 20007, Volume 3A, Figure 3-8 and Figure 4-1 -  _printsegdesc1 ($arg0>>16) ($arg1&0xFF) ($arg1>>24) ($arg0&0xFFFF) ($arg1>>16&15) ($arg1>>23&1) -  if ($arg1>>12&1) == 1 -    printf "  AVL = %d", $arg1>>20&1 -    if ($arg1>>11&1) == 0 -      # Data segment -      if ($arg1>>22&1) == 0 -        printf "  B = small (0) " -      else -        printf "  B = big (1)   " -      end -    else -      # Code segment -      printf "  D = " -      if ($arg1>>22&1) == 0 -        printf "16-bit (0)" -      else -        printf "32-bit (1)" -      end -    end -  end -end - -define _printsegdesc1 -  # 0:Base 0..15  1:Base 16..23  2:Base 24..32  3:Limit 0..15  4:Limit 16..19  5:G -  printf "base = 0x%08x", $arg0 | ($arg1<<16) | ($arg2<<24) -  printf "  limit = 0x" -  if $arg5 == 0 -    printf "%08x", $arg3 | ($arg4<<16) -  else -    printf "%08x", (($arg3 | ($arg4<<16)) << 12) | 0xFFF -  end -end - -define printdescs -  if $argc < 1 || $argc > 2 -    echo Usage: printdescs expr [count] -  else -    if $argc == 1 -      _printdescs ($arg0) (sizeof($arg0)/sizeof(($arg0)[0])) -    else -      _printdescs ($arg0) ($arg1) -    end -  end -end - -document printdescs -Print an array of x86 segment or gate descriptors. -printdescs EXPR [COUNT] -EXPR must evaluate to an array of descriptors. -end - -define _printdescs -  set $i = 0 -  while $i < $arg1 -    printf "[%d] ", $i -    printdesc $arg0[$i] -    set $i = $i + 1 -  end -end @@ -1,168 +0,0 @@ -// Simple PIO-based (non-DMA) IDE driver code. - -#include "types.h" -#include "defs.h" -#include "param.h" -#include "memlayout.h" -#include "mmu.h" -#include "proc.h" -#include "x86.h" -#include "traps.h" -#include "spinlock.h" -#include "sleeplock.h" -#include "fs.h" -#include "buf.h" - -#define SECTOR_SIZE   512 -#define IDE_BSY       0x80 -#define IDE_DRDY      0x40 -#define IDE_DF        0x20 -#define IDE_ERR       0x01 - -#define IDE_CMD_READ  0x20 -#define IDE_CMD_WRITE 0x30 -#define IDE_CMD_RDMUL 0xc4 -#define IDE_CMD_WRMUL 0xc5 - -// idequeue points to the buf now being read/written to the disk. -// idequeue->qnext points to the next buf to be processed. -// You must hold idelock while manipulating queue. - -static struct spinlock idelock; -static struct buf *idequeue; - -static int havedisk1; -static void idestart(struct buf*); - -// Wait for IDE disk to become ready. -static int -idewait(int checkerr) -{ -  int r; - -  while(((r = inb(0x1f7)) & (IDE_BSY|IDE_DRDY)) != IDE_DRDY) -    ; -  if(checkerr && (r & (IDE_DF|IDE_ERR)) != 0) -    return -1; -  return 0; -} - -void -ideinit(void) -{ -  int i; - -  initlock(&idelock, "ide"); -  ioapicenable(IRQ_IDE, ncpu - 1); -  idewait(0); - -  // Check if disk 1 is present -  outb(0x1f6, 0xe0 | (1<<4)); -  for(i=0; i<1000; i++){ -    if(inb(0x1f7) != 0){ -      havedisk1 = 1; -      break; -    } -  } - -  // Switch back to disk 0. -  outb(0x1f6, 0xe0 | (0<<4)); -} - -// Start the request for b.  Caller must hold idelock. -static void -idestart(struct buf *b) -{ -  if(b == 0) -    panic("idestart"); -  if(b->blockno >= FSSIZE) -    panic("incorrect blockno"); -  int sector_per_block =  BSIZE/SECTOR_SIZE; -  int sector = b->blockno * sector_per_block; -  int read_cmd = (sector_per_block == 1) ? IDE_CMD_READ :  IDE_CMD_RDMUL; -  int write_cmd = (sector_per_block == 1) ? IDE_CMD_WRITE : IDE_CMD_WRMUL; - -  if (sector_per_block > 7) panic("idestart"); - -  idewait(0); -  outb(0x3f6, 0);  // generate interrupt -  outb(0x1f2, sector_per_block);  // number of sectors -  outb(0x1f3, sector & 0xff); -  outb(0x1f4, (sector >> 8) & 0xff); -  outb(0x1f5, (sector >> 16) & 0xff); -  outb(0x1f6, 0xe0 | ((b->dev&1)<<4) | ((sector>>24)&0x0f)); -  if(b->flags & B_DIRTY){ -    outb(0x1f7, write_cmd); -    outsl(0x1f0, b->data, BSIZE/4); -  } else { -    outb(0x1f7, read_cmd); -  } -} - -// Interrupt handler. -void -ideintr(void) -{ -  struct buf *b; - -  // First queued buffer is the active request. -  acquire(&idelock); - -  if((b = idequeue) == 0){ -    release(&idelock); -    return; -  } -  idequeue = b->qnext; - -  // Read data if needed. -  if(!(b->flags & B_DIRTY) && idewait(1) >= 0) -    insl(0x1f0, b->data, BSIZE/4); - -  // Wake process waiting for this buf. -  b->flags |= B_VALID; -  b->flags &= ~B_DIRTY; -  wakeup(b); - -  // Start disk on next buf in queue. -  if(idequeue != 0) -    idestart(idequeue); - -  release(&idelock); -} - -//PAGEBREAK! -// Sync buf with disk. -// If B_DIRTY is set, write buf to disk, clear B_DIRTY, set B_VALID. -// Else if B_VALID is not set, read buf from disk, set B_VALID. -void -iderw(struct buf *b) -{ -  struct buf **pp; - -  if(!holdingsleep(&b->lock)) -    panic("iderw: buf not locked"); -  if((b->flags & (B_VALID|B_DIRTY)) == B_VALID) -    panic("iderw: nothing to do"); -  if(b->dev != 0 && !havedisk1) -    panic("iderw: ide disk 1 not present"); - -  acquire(&idelock);  //DOC:acquire-lock - -  // Append b to idequeue. -  b->qnext = 0; -  for(pp=&idequeue; *pp; pp=&(*pp)->qnext)  //DOC:insert-queue -    ; -  *pp = b; - -  // Start disk if necessary. -  if(idequeue == b) -    idestart(b); - -  // Wait for request to finish. -  while((b->flags & (B_VALID|B_DIRTY)) != B_VALID){ -    sleep(b, &idelock); -  } - - -  release(&idelock); -} diff --git a/ioapic.c b/ioapic.c deleted file mode 100644 index cb0f015..0000000 --- a/ioapic.c +++ /dev/null @@ -1,75 +0,0 @@ -// The I/O APIC manages hardware interrupts for an SMP system. -// http://www.intel.com/design/chipsets/datashts/29056601.pdf -// See also picirq.c. - -#include "types.h" -#include "defs.h" -#include "traps.h" - -#define IOAPIC  0xFEC00000   // Default physical address of IO APIC - -#define REG_ID     0x00  // Register index: ID -#define REG_VER    0x01  // Register index: version -#define REG_TABLE  0x10  // Redirection table base - -// The redirection table starts at REG_TABLE and uses -// two registers to configure each interrupt. -// The first (low) register in a pair contains configuration bits. -// The second (high) register contains a bitmask telling which -// CPUs can serve that interrupt. -#define INT_DISABLED   0x00010000  // Interrupt disabled -#define INT_LEVEL      0x00008000  // Level-triggered (vs edge-) -#define INT_ACTIVELOW  0x00002000  // Active low (vs high) -#define INT_LOGICAL    0x00000800  // Destination is CPU id (vs APIC ID) - -volatile struct ioapic *ioapic; - -// IO APIC MMIO structure: write reg, then read or write data. -struct ioapic { -  uint reg; -  uint pad[3]; -  uint data; -}; - -static uint -ioapicread(int reg) -{ -  ioapic->reg = reg; -  return ioapic->data; -} - -static void -ioapicwrite(int reg, uint data) -{ -  ioapic->reg = reg; -  ioapic->data = data; -} - -void -ioapicinit(void) -{ -  int i, id, maxintr; - -  ioapic = (volatile struct ioapic*)IOAPIC; -  maxintr = (ioapicread(REG_VER) >> 16) & 0xFF; -  id = ioapicread(REG_ID) >> 24; -  if(id != ioapicid) -    cprintf("ioapicinit: id isn't equal to ioapicid; not a MP\n"); - -  // Mark all interrupts edge-triggered, active high, disabled, -  // and not routed to any CPUs. -  for(i = 0; i <= maxintr; i++){ -    ioapicwrite(REG_TABLE+2*i, INT_DISABLED | (T_IRQ0 + i)); -    ioapicwrite(REG_TABLE+2*i+1, 0); -  } -} - -void -ioapicenable(int irq, int cpunum) -{ -  // Mark interrupt edge-triggered, active high, -  // enabled, and routed to the given cpunum, -  // which happens to be that cpu's APIC ID. -  ioapicwrite(REG_TABLE+2*irq, T_IRQ0 + irq); -  ioapicwrite(REG_TABLE+2*irq+1, cpunum << 24); -} diff --git a/kalloc.c b/kalloc.c deleted file mode 100644 index 14cd4f4..0000000 --- a/kalloc.c +++ /dev/null @@ -1,96 +0,0 @@ -// Physical memory allocator, intended to allocate -// memory for user processes, kernel stacks, page table pages, -// and pipe buffers. Allocates 4096-byte pages. - -#include "types.h" -#include "defs.h" -#include "param.h" -#include "memlayout.h" -#include "mmu.h" -#include "spinlock.h" - -void freerange(void *vstart, void *vend); -extern char end[]; // first address after kernel loaded from ELF file -                   // defined by the kernel linker script in kernel.ld - -struct run { -  struct run *next; -}; - -struct { -  struct spinlock lock; -  int use_lock; -  struct run *freelist; -} kmem; - -// Initialization happens in two phases. -// 1. main() calls kinit1() while still using entrypgdir to place just -// the pages mapped by entrypgdir on free list. -// 2. main() calls kinit2() with the rest of the physical pages -// after installing a full page table that maps them on all cores. -void -kinit1(void *vstart, void *vend) -{ -  initlock(&kmem.lock, "kmem"); -  kmem.use_lock = 0; -  freerange(vstart, vend); -} - -void -kinit2(void *vstart, void *vend) -{ -  freerange(vstart, vend); -  kmem.use_lock = 1; -} - -void -freerange(void *vstart, void *vend) -{ -  char *p; -  p = (char*)PGROUNDUP((uint)vstart); -  for(; p + PGSIZE <= (char*)vend; p += PGSIZE) -    kfree(p); -} -//PAGEBREAK: 21 -// Free the page of physical memory pointed at by v, -// which normally should have been returned by a -// call to kalloc().  (The exception is when -// initializing the allocator; see kinit above.) -void -kfree(char *v) -{ -  struct run *r; - -  if((uint)v % PGSIZE || v < end || V2P(v) >= PHYSTOP) -    panic("kfree"); - -  // Fill with junk to catch dangling refs. -  memset(v, 1, PGSIZE); - -  if(kmem.use_lock) -    acquire(&kmem.lock); -  r = (struct run*)v; -  r->next = kmem.freelist; -  kmem.freelist = r; -  if(kmem.use_lock) -    release(&kmem.lock); -} - -// Allocate one 4096-byte page of physical memory. -// Returns a pointer that the kernel can use. -// Returns 0 if the memory cannot be allocated. -char* -kalloc(void) -{ -  struct run *r; - -  if(kmem.use_lock) -    acquire(&kmem.lock); -  r = kmem.freelist; -  if(r) -    kmem.freelist = r->next; -  if(kmem.use_lock) -    release(&kmem.lock); -  return (char*)r; -} - @@ -1,50 +0,0 @@ -#include "types.h" -#include "x86.h" -#include "defs.h" -#include "kbd.h" - -int -kbdgetc(void) -{ -  static uint shift; -  static uchar *charcode[4] = { -    normalmap, shiftmap, ctlmap, ctlmap -  }; -  uint st, data, c; - -  st = inb(KBSTATP); -  if((st & KBS_DIB) == 0) -    return -1; -  data = inb(KBDATAP); - -  if(data == 0xE0){ -    shift |= E0ESC; -    return 0; -  } else if(data & 0x80){ -    // Key released -    data = (shift & E0ESC ? data : data & 0x7F); -    shift &= ~(shiftcode[data] | E0ESC); -    return 0; -  } else if(shift & E0ESC){ -    // Last character was an E0 escape; or with 0x80 -    data |= 0x80; -    shift &= ~E0ESC; -  } - -  shift |= shiftcode[data]; -  shift ^= togglecode[data]; -  c = charcode[shift & (CTL | SHIFT)][data]; -  if(shift & CAPSLOCK){ -    if('a' <= c && c <= 'z') -      c += 'A' - 'a'; -    else if('A' <= c && c <= 'Z') -      c += 'a' - 'A'; -  } -  return c; -} - -void -kbdintr(void) -{ -  consoleintr(kbdgetc); -} @@ -1,112 +0,0 @@ -// PC keyboard interface constants - -#define KBSTATP         0x64    // kbd controller status port(I) -#define KBS_DIB         0x01    // kbd data in buffer -#define KBDATAP         0x60    // kbd data port(I) - -#define NO              0 - -#define SHIFT           (1<<0) -#define CTL             (1<<1) -#define ALT             (1<<2) - -#define CAPSLOCK        (1<<3) -#define NUMLOCK         (1<<4) -#define SCROLLLOCK      (1<<5) - -#define E0ESC           (1<<6) - -// Special keycodes -#define KEY_HOME        0xE0 -#define KEY_END         0xE1 -#define KEY_UP          0xE2 -#define KEY_DN          0xE3 -#define KEY_LF          0xE4 -#define KEY_RT          0xE5 -#define KEY_PGUP        0xE6 -#define KEY_PGDN        0xE7 -#define KEY_INS         0xE8 -#define KEY_DEL         0xE9 - -// C('A') == Control-A -#define C(x) (x - '@') - -static uchar shiftcode[256] = -{ -  [0x1D] CTL, -  [0x2A] SHIFT, -  [0x36] SHIFT, -  [0x38] ALT, -  [0x9D] CTL, -  [0xB8] ALT -}; - -static uchar togglecode[256] = -{ -  [0x3A] CAPSLOCK, -  [0x45] NUMLOCK, -  [0x46] SCROLLLOCK -}; - -static uchar normalmap[256] = -{ -  NO,   0x1B, '1',  '2',  '3',  '4',  '5',  '6',  // 0x00 -  '7',  '8',  '9',  '0',  '-',  '=',  '\b', '\t', -  'q',  'w',  'e',  'r',  't',  'y',  'u',  'i',  // 0x10 -  'o',  'p',  '[',  ']',  '\n', NO,   'a',  's', -  'd',  'f',  'g',  'h',  'j',  'k',  'l',  ';',  // 0x20 -  '\'', '`',  NO,   '\\', 'z',  'x',  'c',  'v', -  'b',  'n',  'm',  ',',  '.',  '/',  NO,   '*',  // 0x30 -  NO,   ' ',  NO,   NO,   NO,   NO,   NO,   NO, -  NO,   NO,   NO,   NO,   NO,   NO,   NO,   '7',  // 0x40 -  '8',  '9',  '-',  '4',  '5',  '6',  '+',  '1', -  '2',  '3',  '0',  '.',  NO,   NO,   NO,   NO,   // 0x50 -  [0x9C] '\n',      // KP_Enter -  [0xB5] '/',       // KP_Div -  [0xC8] KEY_UP,    [0xD0] KEY_DN, -  [0xC9] KEY_PGUP,  [0xD1] KEY_PGDN, -  [0xCB] KEY_LF,    [0xCD] KEY_RT, -  [0x97] KEY_HOME,  [0xCF] KEY_END, -  [0xD2] KEY_INS,   [0xD3] KEY_DEL -}; - -static uchar shiftmap[256] = -{ -  NO,   033,  '!',  '@',  '#',  '$',  '%',  '^',  // 0x00 -  '&',  '*',  '(',  ')',  '_',  '+',  '\b', '\t', -  'Q',  'W',  'E',  'R',  'T',  'Y',  'U',  'I',  // 0x10 -  'O',  'P',  '{',  '}',  '\n', NO,   'A',  'S', -  'D',  'F',  'G',  'H',  'J',  'K',  'L',  ':',  // 0x20 -  '"',  '~',  NO,   '|',  'Z',  'X',  'C',  'V', -  'B',  'N',  'M',  '<',  '>',  '?',  NO,   '*',  // 0x30 -  NO,   ' ',  NO,   NO,   NO,   NO,   NO,   NO, -  NO,   NO,   NO,   NO,   NO,   NO,   NO,   '7',  // 0x40 -  '8',  '9',  '-',  '4',  '5',  '6',  '+',  '1', -  '2',  '3',  '0',  '.',  NO,   NO,   NO,   NO,   // 0x50 -  [0x9C] '\n',      // KP_Enter -  [0xB5] '/',       // KP_Div -  [0xC8] KEY_UP,    [0xD0] KEY_DN, -  [0xC9] KEY_PGUP,  [0xD1] KEY_PGDN, -  [0xCB] KEY_LF,    [0xCD] KEY_RT, -  [0x97] KEY_HOME,  [0xCF] KEY_END, -  [0xD2] KEY_INS,   [0xD3] KEY_DEL -}; - -static uchar ctlmap[256] = -{ -  NO,      NO,      NO,      NO,      NO,      NO,      NO,      NO, -  NO,      NO,      NO,      NO,      NO,      NO,      NO,      NO, -  C('Q'),  C('W'),  C('E'),  C('R'),  C('T'),  C('Y'),  C('U'),  C('I'), -  C('O'),  C('P'),  NO,      NO,      '\r',    NO,      C('A'),  C('S'), -  C('D'),  C('F'),  C('G'),  C('H'),  C('J'),  C('K'),  C('L'),  NO, -  NO,      NO,      NO,      C('\\'), C('Z'),  C('X'),  C('C'),  C('V'), -  C('B'),  C('N'),  C('M'),  NO,      NO,      C('/'),  NO,      NO, -  [0x9C] '\r',      // KP_Enter -  [0xB5] C('/'),    // KP_Div -  [0xC8] KEY_UP,    [0xD0] KEY_DN, -  [0xC9] KEY_PGUP,  [0xD1] KEY_PGDN, -  [0xCB] KEY_LF,    [0xCD] KEY_RT, -  [0x97] KEY_HOME,  [0xCF] KEY_END, -  [0xD2] KEY_INS,   [0xD3] KEY_DEL -}; - diff --git a/kernel.ld b/kernel.ld deleted file mode 100644 index e24c860..0000000 --- a/kernel.ld +++ /dev/null @@ -1,68 +0,0 @@ -/* Simple linker script for the JOS kernel. -   See the GNU ld 'info' manual ("info ld") to learn the syntax. */ - -OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") -OUTPUT_ARCH(i386) -ENTRY(_start) - -SECTIONS -{ -	/* Link the kernel at this address: "." means the current address */ -        /* Must be equal to KERNLINK */ -	. = 0x80100000; - -	.text : AT(0x100000) { -		*(.text .stub .text.* .gnu.linkonce.t.*) -	} - -	PROVIDE(etext = .);	/* Define the 'etext' symbol to this value */ - -	.rodata : { -		*(.rodata .rodata.* .gnu.linkonce.r.*) -	} - -	/* Include debugging information in kernel memory */ -	.stab : { -		PROVIDE(__STAB_BEGIN__ = .); -		*(.stab); -		PROVIDE(__STAB_END__ = .); -		BYTE(0)		/* Force the linker to allocate space -				   for this section */ -	} - -	.stabstr : { -		PROVIDE(__STABSTR_BEGIN__ = .); -		*(.stabstr); -		PROVIDE(__STABSTR_END__ = .); -		BYTE(0)		/* Force the linker to allocate space -				   for this section */ -	} - -	/* Adjust the address for the data segment to the next page */ -	. = ALIGN(0x1000); - -	/* Conventionally, Unix linkers provide pseudo-symbols -	 * etext, edata, and end, at the end of the text, data, and bss. -	 * For the kernel mapping, we need the address at the beginning -	 * of the data section, but that's not one of the conventional -	 * symbols, because the convention started before there was a -	 * read-only rodata section between text and data. */ -	PROVIDE(data = .); - -	/* The data segment */ -	.data : { -		*(.data) -	} - -	PROVIDE(edata = .); - -	.bss : { -		*(.bss) -	} - -	PROVIDE(end = .); - -	/DISCARD/ : { -		*(.eh_frame .note.GNU-stack) -	} -} @@ -12,17 +12,14 @@  // * Do not use the buffer after calling brelse.  // * Only one process at a time can use a buffer,  //     so do not keep them longer than necessary. -// -// The implementation uses two state flags internally: -// * B_VALID: the buffer data has been read from the disk. -// * B_DIRTY: the buffer data has been modified -//     and needs to be written to disk. +  #include "types.h" -#include "defs.h"  #include "param.h"  #include "spinlock.h"  #include "sleeplock.h" +#include "riscv.h" +#include "defs.h"  #include "fs.h"  #include "buf.h" @@ -42,7 +39,6 @@ binit(void)    initlock(&bcache.lock, "bcache"); -//PAGEBREAK!    // Create linked list of buffers    bcache.head.prev = &bcache.head;    bcache.head.next = &bcache.head; @@ -76,13 +72,11 @@ bget(uint dev, uint blockno)    }    // Not cached; recycle an unused buffer. -  // Even if refcnt==0, B_DIRTY indicates a buffer is in use -  // because log.c has modified it but not yet committed it.    for(b = bcache.head.prev; b != &bcache.head; b = b->prev){ -    if(b->refcnt == 0 && (b->flags & B_DIRTY) == 0) { +    if(b->refcnt == 0) {        b->dev = dev;        b->blockno = blockno; -      b->flags = 0; +      b->valid = 0;        b->refcnt = 1;        release(&bcache.lock);        acquiresleep(&b->lock); @@ -99,8 +93,9 @@ bread(uint dev, uint blockno)    struct buf *b;    b = bget(dev, blockno); -  if((b->flags & B_VALID) == 0) { -    iderw(b); +  if(!b->valid) { +    virtio_disk_rw(b, 0); +    b->valid = 1;    }    return b;  } @@ -111,8 +106,7 @@ bwrite(struct buf *b)  {    if(!holdingsleep(&b->lock))      panic("bwrite"); -  b->flags |= B_DIRTY; -  iderw(b); +  virtio_disk_rw(b, 1);  }  // Release a locked buffer. @@ -139,6 +133,19 @@ brelse(struct buf *b)    release(&bcache.lock);  } -//PAGEBREAK! -// Blank page. + +void +bpin(struct buf *b) { +  acquire(&bcache.lock); +  b->refcnt++; +  release(&bcache.lock); +} + +void +bunpin(struct buf *b) { +  acquire(&bcache.lock); +  b->refcnt--; +  release(&bcache.lock); +} + @@ -1,5 +1,6 @@  struct buf { -  int flags; +  int valid;   // has data been read from disk? +  int disk;    // does disk "own" buf?    uint dev;    uint blockno;    struct sleeplock lock; @@ -9,6 +10,4 @@ struct buf {    struct buf *qnext; // disk queue    uchar data[BSIZE];  }; -#define B_VALID 0x2  // buffer has been read from disk -#define B_DIRTY 0x4  // buffer needs to be written to disk diff --git a/kernel/console.c b/kernel/console.c new file mode 100644 index 0000000..87a83ff --- /dev/null +++ b/kernel/console.c @@ -0,0 +1,199 @@ +// +// Console input and output, to the uart. +// Reads are line at a time. +// Implements special input characters: +//   newline -- end of line +//   control-h -- backspace +//   control-u -- kill line +//   control-d -- end of file +//   control-p -- print process list +// + +#include <stdarg.h> + +#include "types.h" +#include "param.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "fs.h" +#include "file.h" +#include "memlayout.h" +#include "riscv.h" +#include "defs.h" +#include "proc.h" + +#define BACKSPACE 0x100 +#define C(x)  ((x)-'@')  // Control-x + +// +// send one character to the uart. +// +void +consputc(int c) +{ +  extern volatile int panicked; // from printf.c + +  if(panicked){ +    for(;;) +      ; +  } + +  if(c == BACKSPACE){ +    // if the user typed backspace, overwrite with a space. +    uartputc('\b'); uartputc(' '); uartputc('\b'); +  } else { +    uartputc(c); +  } +} + +struct { +  struct spinlock lock; +   +  // input +#define INPUT_BUF 128 +  char buf[INPUT_BUF]; +  uint r;  // Read index +  uint w;  // Write index +  uint e;  // Edit index +} cons; + +// +// user write()s to the console go here. +// +int +consolewrite(int user_src, uint64 src, int n) +{ +  int i; + +  acquire(&cons.lock); +  for(i = 0; i < n; i++){ +    char c; +    if(either_copyin(&c, user_src, src+i, 1) == -1) +      break; +    consputc(c); +  } +  release(&cons.lock); + +  return n; +} + +// +// user read()s from the console go here. +// copy (up to) a whole input line to dst. +// user_dist indicates whether dst is a user +// or kernel address. +// +int +consoleread(int user_dst, uint64 dst, int n) +{ +  uint target; +  int c; +  char cbuf; + +  target = n; +  acquire(&cons.lock); +  while(n > 0){ +    // wait until interrupt handler has put some +    // input into cons.buffer. +    while(cons.r == cons.w){ +      if(myproc()->killed){ +        release(&cons.lock); +        return -1; +      } +      sleep(&cons.r, &cons.lock); +    } + +    c = cons.buf[cons.r++ % INPUT_BUF]; + +    if(c == C('D')){  // end-of-file +      if(n < target){ +        // Save ^D for next time, to make sure +        // caller gets a 0-byte result. +        cons.r--; +      } +      break; +    } + +    // copy the input byte to the user-space buffer. +    cbuf = c; +    if(either_copyout(user_dst, dst, &cbuf, 1) == -1) +      break; + +    dst++; +    --n; + +    if(c == '\n'){ +      // a whole line has arrived, return to +      // the user-level read(). +      break; +    } +  } +  release(&cons.lock); + +  return target - n; +} + +// +// the console input interrupt handler. +// uartintr() calls this for input character. +// do erase/kill processing, append to cons.buf, +// wake up consoleread() if a whole line has arrived. +// +void +consoleintr(int c) +{ +  acquire(&cons.lock); + +  switch(c){ +  case C('P'):  // Print process list. +    procdump(); +    break; +  case C('U'):  // Kill line. +    while(cons.e != cons.w && +          cons.buf[(cons.e-1) % INPUT_BUF] != '\n'){ +      cons.e--; +      consputc(BACKSPACE); +    } +    break; +  case C('H'): // Backspace +  case '\x7f': +    if(cons.e != cons.w){ +      cons.e--; +      consputc(BACKSPACE); +    } +    break; +  default: +    if(c != 0 && cons.e-cons.r < INPUT_BUF){ +      c = (c == '\r') ? '\n' : c; + +      // echo back to the user. +      consputc(c); + +      // store for consumption by consoleread(). +      cons.buf[cons.e++ % INPUT_BUF] = c; + +      if(c == '\n' || c == C('D') || cons.e == cons.r+INPUT_BUF){ +        // wake up consoleread() if a whole line (or end-of-file) +        // has arrived. +        cons.w = cons.e; +        wakeup(&cons.r); +      } +    } +    break; +  } +   +  release(&cons.lock); +} + +void +consoleinit(void) +{ +  initlock(&cons.lock, "cons"); + +  uartinit(); + +  // connect read and write system calls +  // to consoleread and consolewrite. +  devsw[CONSOLE].read = consoleread; +  devsw[CONSOLE].write = consolewrite; +} @@ -4,7 +4,6 @@ struct file;  struct inode;  struct pipe;  struct proc; -struct rtcdate;  struct spinlock;  struct sleeplock;  struct stat; @@ -15,12 +14,13 @@ void            binit(void);  struct buf*     bread(uint, uint);  void            brelse(struct buf*);  void            bwrite(struct buf*); +void            bpin(struct buf*); +void            bunpin(struct buf*);  // console.c  void            consoleinit(void); -void            cprintf(char*, ...); -void            consoleintr(int(*)(void)); -void            panic(char*) __attribute__((noreturn)); +void            consoleintr(int); +void            consputc(int);  // exec.c  int             exec(char*, char**); @@ -30,17 +30,17 @@ struct file*    filealloc(void);  void            fileclose(struct file*);  struct file*    filedup(struct file*);  void            fileinit(void); -int             fileread(struct file*, char*, int n); -int             filestat(struct file*, struct stat*); -int             filewrite(struct file*, char*, int n); +int             fileread(struct file*, uint64, int n); +int             filestat(struct file*, uint64 addr); +int             filewrite(struct file*, uint64, int n);  // fs.c -void            readsb(int dev, struct superblock *sb); +void            fsinit(int);  int             dirlink(struct inode*, char*, uint);  struct inode*   dirlookup(struct inode*, char*, uint*);  struct inode*   ialloc(uint, short);  struct inode*   idup(struct inode*); -void            iinit(int dev); +void            iinit();  void            ilock(struct inode*);  void            iput(struct inode*);  void            iunlock(struct inode*); @@ -49,69 +49,49 @@ void            iupdate(struct inode*);  int             namecmp(const char*, const char*);  struct inode*   namei(char*);  struct inode*   nameiparent(char*, char*); -int             readi(struct inode*, char*, uint, uint); +int             readi(struct inode*, int, uint64, uint, uint);  void            stati(struct inode*, struct stat*); -int             writei(struct inode*, char*, uint, uint); - -// ide.c -void            ideinit(void); -void            ideintr(void); -void            iderw(struct buf*); +int             writei(struct inode*, int, uint64, uint, uint); -// ioapic.c -void            ioapicenable(int irq, int cpu); -extern uchar    ioapicid; -void            ioapicinit(void); +// ramdisk.c +void            ramdiskinit(void); +void            ramdiskintr(void); +void            ramdiskrw(struct buf*);  // kalloc.c -char*           kalloc(void); -void            kfree(char*); -void            kinit1(void*, void*); -void            kinit2(void*, void*); - -// kbd.c -void            kbdintr(void); - -// lapic.c -void            cmostime(struct rtcdate *r); -int             lapicid(void); -extern volatile uint*    lapic; -void            lapiceoi(void); -void            lapicinit(void); -void            lapicstartap(uchar, uint); -void            microdelay(int); +void*           kalloc(void); +void            kfree(void *); +void            kinit();  // log.c -void            initlog(int dev); +void            initlog(int, struct superblock*);  void            log_write(struct buf*);  void            begin_op();  void            end_op(); -// mp.c -extern int      ismp; -void            mpinit(void); - -// picirq.c -void            picenable(int); -void            picinit(void); -  // pipe.c  int             pipealloc(struct file**, struct file**);  void            pipeclose(struct pipe*, int); -int             piperead(struct pipe*, char*, int); -int             pipewrite(struct pipe*, char*, int); +int             piperead(struct pipe*, uint64, int); +int             pipewrite(struct pipe*, uint64, int); + +// printf.c +void            printf(char*, ...); +void            panic(char*) __attribute__((noreturn)); +void            printfinit(void); -//PAGEBREAK: 16  // proc.c  int             cpuid(void);  void            exit(void);  int             fork(void);  int             growproc(int); +pagetable_t     proc_pagetable(struct proc *); +void            proc_freepagetable(pagetable_t, uint64);  int             kill(int);  struct cpu*     mycpu(void); +struct cpu*     getmycpu(void);  struct proc*    myproc(); -void            pinit(void); -void            procdump(void); +void            procinit(void);  void            scheduler(void) __attribute__((noreturn));  void            sched(void);  void            setproc(struct proc*); @@ -120,18 +100,20 @@ void            userinit(void);  int             wait(void);  void            wakeup(void*);  void            yield(void); +int             either_copyout(int user_dst, uint64 dst, void *src, uint64 len); +int             either_copyin(void *dst, int user_src, uint64 src, uint64 len); +void            procdump(void);  // swtch.S -void            swtch(struct context**, struct context*); +void            swtch(struct context*, struct context*);  // spinlock.c  void            acquire(struct spinlock*); -void            getcallerpcs(void*, uint*);  int             holding(struct spinlock*);  void            initlock(struct spinlock*, char*);  void            release(struct spinlock*); -void            pushcli(void); -void            popcli(void); +void            push_off(void); +void            pop_off(void);  // sleeplock.c  void            acquiresleep(struct sleeplock*); @@ -150,41 +132,55 @@ char*           strncpy(char*, const char*, int);  // syscall.c  int             argint(int, int*); -int             argptr(int, char**, int); -int             argstr(int, char**); -int             fetchint(uint, int*); -int             fetchstr(uint, char**); -void            syscall(void); - -// timer.c -void            timerinit(void); +int             argstr(int, char*, int); +int             argaddr(int, uint64 *); +int             fetchstr(uint64, char*, int); +int             fetchaddr(uint64, uint64*); +void            syscall();  // trap.c -void            idtinit(void);  extern uint     ticks; -void            tvinit(void); +void            trapinit(void); +void            trapinithart(void);  extern struct spinlock tickslock; +void            usertrapret(void);  // uart.c  void            uartinit(void);  void            uartintr(void);  void            uartputc(int); +int             uartgetc(void);  // vm.c -void            seginit(void); -void            kvmalloc(void); -pde_t*          setupkvm(void); -char*           uva2ka(pde_t*, char*); -int             allocuvm(pde_t*, uint, uint); -int             deallocuvm(pde_t*, uint, uint); -void            freevm(pde_t*); -void            inituvm(pde_t*, char*, uint); -int             loaduvm(pde_t*, char*, struct inode*, uint, uint); -pde_t*          copyuvm(pde_t*, uint); -void            switchuvm(struct proc*); -void            switchkvm(void); -int             copyout(pde_t*, uint, void*, uint); -void            clearpteu(pde_t *pgdir, char *uva); +void            kvminit(void); +void            kvminithart(void); +uint64          kvmpa(uint64); +void            kvmmap(uint64, uint64, uint64, int); +int             mappages(pagetable_t, uint64, uint64, uint64, int); +pagetable_t     uvmcreate(void); +void            uvminit(pagetable_t, uchar *, uint); +uint64          uvmalloc(pagetable_t, uint64, uint64); +uint64          uvmdealloc(pagetable_t, uint64, uint64); +int             uvmcopy(pagetable_t, pagetable_t, uint64); +void            uvmfree(pagetable_t, uint64); +void            uvmunmap(pagetable_t, uint64, uint64, int); +void            uvmclear(pagetable_t, uint64); +uint64          walkaddr(pagetable_t, uint64); +int             copyout(pagetable_t, uint64, char *, uint64); +int             copyin(pagetable_t, char *, uint64, uint64); +int             copyinstr(pagetable_t, char *, uint64, uint64); + +// plic.c +void            plicinit(void); +void            plicinithart(void); +uint64          plic_pending(void); +int             plic_claim(void); +void            plic_complete(int); + +// virtio_disk.c +void            virtio_disk_init(void); +void            virtio_disk_rw(struct buf *, int); +void            virtio_disk_intr();  // number of elements in fixed-size array  #define NELEM(x) (sizeof(x)/sizeof((x)[0])) @@ -9,9 +9,9 @@ struct elfhdr {    ushort type;    ushort machine;    uint version; -  uint entry; -  uint phoff; -  uint shoff; +  uint64 entry; +  uint64 phoff; +  uint64 shoff;    uint flags;    ushort ehsize;    ushort phentsize; @@ -23,14 +23,14 @@ struct elfhdr {  // Program section header  struct proghdr { -  uint type; -  uint off; -  uint vaddr; -  uint paddr; -  uint filesz; -  uint memsz; -  uint flags; -  uint align; +  uint32 type; +  uint32 flags; +  uint64 off; +  uint64 vaddr; +  uint64 paddr; +  uint64 filesz; +  uint64 memsz; +  uint64 align;  };  // Values for Proghdr type diff --git a/kernel/entry.S b/kernel/entry.S new file mode 100644 index 0000000..ef5a56a --- /dev/null +++ b/kernel/entry.S @@ -0,0 +1,26 @@ +	# qemu -kernel starts at 0x1000. the instructions +        # there seem to be provided by qemu, as if it +        # were a ROM. the code at 0x1000 jumps to +        # 0x8000000, the _start function here, +        # in machine mode. each CPU starts here. +.section .data +.globl stack0 +.section .text +.globl start +.section .text +.globl _entry +_entry: +	# set up a stack for C. +        # stack0 is declared in start.c, +        # with a 4096-byte stack per CPU. +        # sp = stack0 + (hartid * 4096) +        la sp, stack0 +        li a0, 1024*4 +	csrr a1, mhartid +        addi a1, a1, 1 +        mul a0, a0, a1 +        add sp, sp, a0 +	# jump to start() in start.c +        call start +junk: +        j junk diff --git a/kernel/exec.c b/kernel/exec.c new file mode 100644 index 0000000..74ef654 --- /dev/null +++ b/kernel/exec.c @@ -0,0 +1,153 @@ +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "spinlock.h" +#include "proc.h" +#include "defs.h" +#include "elf.h" + +static int loadseg(pde_t *pgdir, uint64 addr, struct inode *ip, uint offset, uint sz); + +int +exec(char *path, char **argv) +{ +  char *s, *last; +  int i, off; +  uint64 argc, sz, sp, ustack[MAXARG+1], stackbase; +  struct elfhdr elf; +  struct inode *ip; +  struct proghdr ph; +  pagetable_t pagetable = 0, oldpagetable; +  struct proc *p = myproc(); + +  begin_op(); + +  if((ip = namei(path)) == 0){ +    end_op(); +    return -1; +  } +  ilock(ip); + +  // Check ELF header +  if(readi(ip, 0, (uint64)&elf, 0, sizeof(elf)) != sizeof(elf)) +    goto bad; +  if(elf.magic != ELF_MAGIC) +    goto bad; + +  if((pagetable = proc_pagetable(p)) == 0) +    goto bad; + +  // Load program into memory. +  sz = 0; +  for(i=0, off=elf.phoff; i<elf.phnum; i++, off+=sizeof(ph)){ +    if(readi(ip, 0, (uint64)&ph, off, sizeof(ph)) != sizeof(ph)) +      goto bad; +    if(ph.type != ELF_PROG_LOAD) +      continue; +    if(ph.memsz < ph.filesz) +      goto bad; +    if(ph.vaddr + ph.memsz < ph.vaddr) +      goto bad; +    if((sz = uvmalloc(pagetable, sz, ph.vaddr + ph.memsz)) == 0) +      goto bad; +    if(ph.vaddr % PGSIZE != 0) +      goto bad; +    if(loadseg(pagetable, ph.vaddr, ip, ph.off, ph.filesz) < 0) +      goto bad; +  } +  iunlockput(ip); +  end_op(); +  ip = 0; + +  p = myproc(); +  uint64 oldsz = p->sz; + +  // Allocate two pages at the next page boundary. +  // Use the second as the user stack. +  sz = PGROUNDUP(sz); +  if((sz = uvmalloc(pagetable, sz, sz + 2*PGSIZE)) == 0) +    goto bad; +  uvmclear(pagetable, sz-2*PGSIZE); +  sp = sz; +  stackbase = sp - PGSIZE; + +  // Push argument strings, prepare rest of stack in ustack. +  for(argc = 0; argv[argc]; argc++) { +    if(argc >= MAXARG) +      goto bad; +    sp -= strlen(argv[argc]) + 1; +    sp -= sp % 16; // riscv sp must be 16-byte aligned +    if(sp < stackbase) +      goto bad; +    if(copyout(pagetable, sp, argv[argc], strlen(argv[argc]) + 1) < 0) +      goto bad; +    ustack[argc] = sp; +  } +  ustack[argc] = 0; + +  // push the array of argv[] pointers. +  sp -= (argc+1) * sizeof(uint64); +  sp -= sp % 16; +  if(sp < stackbase) +    goto bad; +  if(copyout(pagetable, sp, (char *)ustack, (argc+1)*sizeof(uint64)) < 0) +    goto bad; + +  // arguments to user main(argc, argv) +  // argc is returned via the system call return +  // value, which goes in a0. +  p->tf->a1 = sp; + +  // Save program name for debugging. +  for(last=s=path; *s; s++) +    if(*s == '/') +      last = s+1; +  safestrcpy(p->name, last, sizeof(p->name)); +     +  // Commit to the user image. +  oldpagetable = p->pagetable; +  p->pagetable = pagetable; +  p->sz = sz; +  p->tf->epc = elf.entry;  // initial program counter = main +  p->tf->sp = sp; // initial stack pointer +  proc_freepagetable(oldpagetable, oldsz); +  return argc; // this ends up in a0, the first argument to main(argc, argv) + + bad: +  if(pagetable) +    proc_freepagetable(pagetable, sz); +  if(ip){ +    iunlockput(ip); +    end_op(); +  } +  return -1; +} + +// Load a program segment into pagetable at virtual address va. +// va must be page-aligned +// and the pages from va to va+sz must already be mapped. +// Returns 0 on success, -1 on failure. +static int +loadseg(pagetable_t pagetable, uint64 va, struct inode *ip, uint offset, uint sz) +{ +  uint i, n; +  uint64 pa; + +  if((va % PGSIZE) != 0) +    panic("loadseg: va must be page aligned"); + +  for(i = 0; i < sz; i += PGSIZE){ +    pa = walkaddr(pagetable, va + i); +    if(pa == 0) +      panic("loadseg: address should exist"); +    if(sz - i < PGSIZE) +      n = sz - i; +    else +      n = PGSIZE; +    if(readi(ip, 0, (uint64)pa, offset+i, n) != n) +      return -1; +  } +   +  return 0; +} @@ -1,14 +1,17 @@  // -// File descriptors +// Support functions for system calls that involve file descriptors.  //  #include "types.h" +#include "riscv.h"  #include "defs.h"  #include "param.h"  #include "fs.h"  #include "spinlock.h"  #include "sleeplock.h"  #include "file.h" +#include "stat.h" +#include "proc.h"  struct devsw devsw[NDEV];  struct { @@ -70,9 +73,9 @@ fileclose(struct file *f)    f->type = FD_NONE;    release(&ftable.lock); -  if(ff.type == FD_PIPE) +  if(ff.type == FD_PIPE){      pipeclose(ff.pipe, ff.writable); -  else if(ff.type == FD_INODE){ +  } else if(ff.type == FD_INODE || ff.type == FD_DEVICE){      begin_op();      iput(ff.ip);      end_op(); @@ -80,50 +83,65 @@ fileclose(struct file *f)  }  // Get metadata about file f. +// addr is a user virtual address, pointing to a struct stat.  int -filestat(struct file *f, struct stat *st) +filestat(struct file *f, uint64 addr)  { -  if(f->type == FD_INODE){ +  struct proc *p = myproc(); +  struct stat st; +   +  if(f->type == FD_INODE || f->type == FD_DEVICE){      ilock(f->ip); -    stati(f->ip, st); +    stati(f->ip, &st);      iunlock(f->ip); +    if(copyout(p->pagetable, addr, (char *)&st, sizeof(st)) < 0) +      return -1;      return 0;    }    return -1;  }  // Read from file f. +// addr is a user virtual address.  int -fileread(struct file *f, char *addr, int n) +fileread(struct file *f, uint64 addr, int n)  { -  int r; +  int r = 0;    if(f->readable == 0)      return -1; -  if(f->type == FD_PIPE) -    return piperead(f->pipe, addr, n); -  if(f->type == FD_INODE){ + +  if(f->type == FD_PIPE){ +    r = piperead(f->pipe, addr, n); +  } else if(f->type == FD_DEVICE){ +    r = devsw[f->major].read(1, addr, n); +  } else if(f->type == FD_INODE){      ilock(f->ip); -    if((r = readi(f->ip, addr, f->off, n)) > 0) +    if((r = readi(f->ip, 1, addr, f->off, n)) > 0)        f->off += r;      iunlock(f->ip); -    return r; +  } else { +    panic("fileread");    } -  panic("fileread"); + +  return r;  } -//PAGEBREAK!  // Write to file f. +// addr is a user virtual address.  int -filewrite(struct file *f, char *addr, int n) +filewrite(struct file *f, uint64 addr, int n)  { -  int r; +  int r, ret = 0;    if(f->writable == 0)      return -1; -  if(f->type == FD_PIPE) -    return pipewrite(f->pipe, addr, n); -  if(f->type == FD_INODE){ + +  if(f->type == FD_PIPE){ +    ret = pipewrite(f->pipe, addr, n); +  } else if(f->type == FD_DEVICE){ +    ret = devsw[f->major].write(1, addr, n); +  } else if(f->type == FD_INODE){      // write a few blocks at a time to avoid exceeding      // the maximum log transaction size, including      // i-node, indirect block, allocation blocks, @@ -139,7 +157,7 @@ filewrite(struct file *f, char *addr, int n)        begin_op();        ilock(f->ip); -      if ((r = writei(f->ip, addr + i, f->off, n1)) > 0) +      if ((r = writei(f->ip, 1, addr + i, f->off, n1)) > 0)          f->off += r;        iunlock(f->ip);        end_op(); @@ -150,8 +168,11 @@ filewrite(struct file *f, char *addr, int n)          panic("short filewrite");        i += r;      } -    return i == n ? n : -1; +    ret = (i == n ? n : -1); +  } else { +    panic("filewrite");    } -  panic("filewrite"); + +  return ret;  } @@ -1,11 +1,12 @@  struct file { -  enum { FD_NONE, FD_PIPE, FD_INODE } type; +  enum { FD_NONE, FD_PIPE, FD_INODE, FD_DEVICE } type;    int ref; // reference count    char readable;    char writable; -  struct pipe *pipe; -  struct inode *ip; -  uint off; +  struct pipe *pipe; // FD_PIPE +  struct inode *ip;  // FD_INODE and FD_DEVICE +  uint off;          // FD_INODE +  short major;       // FD_DEVICE  }; @@ -25,11 +26,10 @@ struct inode {    uint addrs[NDIRECT+1];  }; -// table mapping major device number to -// device functions +// map major device number to device functions.  struct devsw { -  int (*read)(struct inode*, char*, int); -  int (*write)(struct inode*, char*, int); +  int (*read)(int, uint64, int); +  int (*write)(int, uint64, int);  };  extern struct devsw devsw[]; @@ -10,12 +10,12 @@  // are in sysfile.c.  #include "types.h" +#include "riscv.h"  #include "defs.h"  #include "param.h"  #include "stat.h" -#include "mmu.h" -#include "proc.h"  #include "spinlock.h" +#include "proc.h"  #include "sleeplock.h"  #include "fs.h"  #include "buf.h" @@ -28,7 +28,7 @@ static void itrunc(struct inode*);  struct superblock sb;   // Read the super block. -void +static void  readsb(int dev, struct superblock *sb)  {    struct buf *bp; @@ -38,6 +38,15 @@ readsb(int dev, struct superblock *sb)    brelse(bp);  } +// Init fs +void +fsinit(int dev) { +  readsb(dev, &sb); +  if(sb.magic != FSMAGIC) +    panic("invalid file system"); +  initlog(dev, &sb); +} +  // Zero a block.  static void  bzero(int dev, int bno) @@ -169,7 +178,7 @@ struct {  } icache;  void -iinit(int dev) +iinit()  {    int i = 0; @@ -177,17 +186,10 @@ iinit(int dev)    for(i = 0; i < NINODE; i++) {      initsleeplock(&icache.inode[i].lock, "inode");    } - -  readsb(dev, &sb); -  cprintf("sb: size %d nblocks %d ninodes %d nlog %d logstart %d\ - inodestart %d bmap start %d\n", sb.size, sb.nblocks, -          sb.ninodes, sb.nlog, sb.logstart, sb.inodestart, -          sb.bmapstart);  }  static struct inode* iget(uint dev, uint inum); -//PAGEBREAK!  // Allocate an inode on device dev.  // Mark it as allocated by  giving it type type.  // Returns an unlocked but allocated and referenced inode. @@ -331,22 +333,27 @@ iunlock(struct inode *ip)  void  iput(struct inode *ip)  { -  acquiresleep(&ip->lock); -  if(ip->valid && ip->nlink == 0){ -    acquire(&icache.lock); -    int r = ip->ref; +  acquire(&icache.lock); + +  if(ip->ref == 1 && ip->valid && ip->nlink == 0){ +    // inode has no links and no other references: truncate and free. + +    // ip->ref == 1 means no other process can have ip locked, +    // so this acquiresleep() won't block (or deadlock). +    acquiresleep(&ip->lock); +      release(&icache.lock); -    if(r == 1){ -      // inode has no links and no other references: truncate and free. -      itrunc(ip); -      ip->type = 0; -      iupdate(ip); -      ip->valid = 0; -    } + +    itrunc(ip); +    ip->type = 0; +    iupdate(ip); +    ip->valid = 0; + +    releasesleep(&ip->lock); + +    acquire(&icache.lock);    } -  releasesleep(&ip->lock); -  acquire(&icache.lock);    ip->ref--;    release(&icache.lock);  } @@ -359,7 +366,6 @@ iunlockput(struct inode *ip)    iput(ip);  } -//PAGEBREAK!  // Inode content  //  // The content (data) associated with each inode is stored @@ -446,21 +452,16 @@ stati(struct inode *ip, struct stat *st)    st->size = ip->size;  } -//PAGEBREAK!  // Read data from inode.  // Caller must hold ip->lock. +// If user_dst==1, then dst is a user virtual address; +// otherwise, dst is a kernel address.  int -readi(struct inode *ip, char *dst, uint off, uint n) +readi(struct inode *ip, int user_dst, uint64 dst, uint off, uint n)  {    uint tot, m;    struct buf *bp; -  if(ip->type == T_DEV){ -    if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].read) -      return -1; -    return devsw[ip->major].read(ip, dst, n); -  } -    if(off > ip->size || off + n < off)      return -1;    if(off + n > ip->size) @@ -469,27 +470,23 @@ readi(struct inode *ip, char *dst, uint off, uint n)    for(tot=0; tot<n; tot+=m, off+=m, dst+=m){      bp = bread(ip->dev, bmap(ip, off/BSIZE));      m = min(n - tot, BSIZE - off%BSIZE); -    memmove(dst, bp->data + off%BSIZE, m); +    if(either_copyout(user_dst, dst, bp->data + (off % BSIZE), m) == -1) +      break;      brelse(bp);    }    return n;  } -// PAGEBREAK!  // Write data to inode.  // Caller must hold ip->lock. +// If user_src==1, then src is a user virtual address; +// otherwise, src is a kernel address.  int -writei(struct inode *ip, char *src, uint off, uint n) +writei(struct inode *ip, int user_src, uint64 src, uint off, uint n)  {    uint tot, m;    struct buf *bp; -  if(ip->type == T_DEV){ -    if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].write) -      return -1; -    return devsw[ip->major].write(ip, src, n); -  } -    if(off > ip->size || off + n < off)      return -1;    if(off + n > MAXFILE*BSIZE) @@ -498,7 +495,8 @@ writei(struct inode *ip, char *src, uint off, uint n)    for(tot=0; tot<n; tot+=m, off+=m, src+=m){      bp = bread(ip->dev, bmap(ip, off/BSIZE));      m = min(n - tot, BSIZE - off%BSIZE); -    memmove(bp->data + off%BSIZE, src, m); +    if(either_copyin(bp->data + (off % BSIZE), user_src, src, m) == -1) +      break;      log_write(bp);      brelse(bp);    } @@ -510,7 +508,6 @@ writei(struct inode *ip, char *src, uint off, uint n)    return n;  } -//PAGEBREAK!  // Directories  int @@ -531,7 +528,7 @@ dirlookup(struct inode *dp, char *name, uint *poff)      panic("dirlookup not DIR");    for(off = 0; off < dp->size; off += sizeof(de)){ -    if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de)) +    if(readi(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de))        panic("dirlookup read");      if(de.inum == 0)        continue; @@ -563,7 +560,7 @@ dirlink(struct inode *dp, char *name, uint inum)    // Look for an empty dirent.    for(off = 0; off < dp->size; off += sizeof(de)){ -    if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de)) +    if(readi(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de))        panic("dirlink read");      if(de.inum == 0)        break; @@ -571,13 +568,12 @@ dirlink(struct inode *dp, char *name, uint inum)    strncpy(de.name, name, DIRSIZ);    de.inum = inum; -  if(writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de)) +  if(writei(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de))      panic("dirlink");    return 0;  } -//PAGEBREAK!  // Paths  // Copy the next path element from path into name. @@ -2,8 +2,8 @@  // Both the kernel and user programs use this header file. -#define ROOTINO 1  // root i-number -#define BSIZE 512  // block size +#define ROOTINO  1   // root i-number +#define BSIZE 1024  // block size  // Disk layout:  // [ boot block | super block | log | inode blocks | @@ -12,6 +12,7 @@  // mkfs computes the super block and builds an initial file system. The  // super block describes the disk layout:  struct superblock { +  uint magic;        // Must be FSMAGIC    uint size;         // Size of file system image (blocks)    uint nblocks;      // Number of data blocks    uint ninodes;      // Number of inodes. @@ -21,6 +22,8 @@ struct superblock {    uint bmapstart;    // Block number of first free map block  }; +#define FSMAGIC 0x10203040 +  #define NDIRECT 12  #define NINDIRECT (BSIZE / sizeof(uint))  #define MAXFILE (NDIRECT + NINDIRECT) @@ -28,8 +31,8 @@ struct superblock {  // On-disk inode structure  struct dinode {    short type;           // File type -  short major;          // Major device number (T_DEV only) -  short minor;          // Minor device number (T_DEV only) +  short major;          // Major device number (T_DEVICE only) +  short minor;          // Minor device number (T_DEVICE only)    short nlink;          // Number of links to inode in file system    uint size;            // Size of file (bytes)    uint addrs[NDIRECT+1];   // Data block addresses @@ -45,7 +48,7 @@ struct dinode {  #define BPB           (BSIZE*8)  // Block of free map containing bit for block b -#define BBLOCK(b, sb) (b/BPB + sb.bmapstart) +#define BBLOCK(b, sb) ((b)/BPB + sb.bmapstart)  // Directory is a file containing a sequence of dirent structures.  #define DIRSIZ 14 diff --git a/kernel/kalloc.c b/kernel/kalloc.c new file mode 100644 index 0000000..ae3863b --- /dev/null +++ b/kernel/kalloc.c @@ -0,0 +1,83 @@ +// Physical memory allocator, for user processes, +// kernel stacks, page-table pages, +// and pipe buffers. Allocates whole 4096-byte pages. + +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "spinlock.h" +#include "riscv.h" +#include "defs.h" + +void freerange(void *pa_start, void *pa_end); + +extern char end[]; // first address after kernel. +                   // defined by kernel.ld. + +struct run { +  struct run *next; +}; + +struct { +  struct spinlock lock; +  struct run *freelist; +} kmem; + +void +kinit() +{ +  initlock(&kmem.lock, "kmem"); +  freerange(end, (void*)PHYSTOP); +} + +void +freerange(void *pa_start, void *pa_end) +{ +  char *p; +  p = (char*)PGROUNDUP((uint64)pa_start); +  p += 4096; // XXX I can't get kernel.ld to place end beyond the last bss symbol. +  for(; p + PGSIZE <= (char*)pa_end; p += PGSIZE) +    kfree(p); +} + +// Free the page of physical memory pointed at by v, +// which normally should have been returned by a +// call to kalloc().  (The exception is when +// initializing the allocator; see kinit above.) +void +kfree(void *pa) +{ +  struct run *r; + +  if(((uint64)pa % PGSIZE) != 0 || (char*)pa < end || (uint64)pa >= PHYSTOP) +    panic("kfree"); + +  // Fill with junk to catch dangling refs. +  memset(pa, 1, PGSIZE); + +  r = (struct run*)pa; + +  acquire(&kmem.lock); +  r->next = kmem.freelist; +  kmem.freelist = r; +  release(&kmem.lock); +} + +// Allocate one 4096-byte page of physical memory. +// Returns a pointer that the kernel can use. +// Returns 0 if the memory cannot be allocated. +void * +kalloc(void) +{ +  struct run *r; + +  acquire(&kmem.lock); +  r = kmem.freelist; +  if(r) +    kmem.freelist = r->next; +  release(&kmem.lock); + +  if(r) +    memset((char*)r, 5, PGSIZE); // fill with junk +  return (void*)r; +} diff --git a/kernel/kernel.ld b/kernel/kernel.ld new file mode 100644 index 0000000..0b5e76b --- /dev/null +++ b/kernel/kernel.ld @@ -0,0 +1,32 @@ +OUTPUT_ARCH( "riscv" ) +ENTRY( _entry ) + +SECTIONS +{ +  /* +   * ensure that entry.S / _entry is at 0x80000000, +   * where qemu's -kernel jumps. +   */ +  . = 0x80000000; +  .text : +  { +    *(.text) +    . = ALIGN(0x1000); +    *(trampsec) +  } + +  . = ALIGN(0x1000); +  PROVIDE(etext = .); + +  /* +   * make sure end is after data and bss. +   */ +  .data : { +    *(.data) +  } +  bss : { +    *(.bss) +    PROVIDE(end = .); +  } + +} diff --git a/kernel/kernelvec.S b/kernel/kernelvec.S new file mode 100644 index 0000000..3e9d3e9 --- /dev/null +++ b/kernel/kernelvec.S @@ -0,0 +1,121 @@ +	# +        # interrupts and exceptions while in supervisor +        # mode come here. +        # +        # push all registers, call kerneltrap(), restore, return. +        # +.globl kerneltrap +.globl kernelvec +.align 4 +kernelvec: +        // make room to save registers. +        addi sp, sp, -256 + +        // save the registers. +        sd ra, 0(sp) +        sd sp, 8(sp) +        sd gp, 16(sp) +        sd tp, 24(sp) +        sd t0, 32(sp) +        sd t1, 40(sp) +        sd t2, 48(sp) +        sd s0, 56(sp) +        sd s1, 64(sp) +        sd a0, 72(sp) +        sd a1, 80(sp) +        sd a2, 88(sp) +        sd a3, 96(sp) +        sd a4, 104(sp) +        sd a5, 112(sp) +        sd a6, 120(sp) +        sd a7, 128(sp) +        sd s2, 136(sp) +        sd s3, 144(sp) +        sd s4, 152(sp) +        sd s5, 160(sp) +        sd s6, 168(sp) +        sd s7, 176(sp) +        sd s8, 184(sp) +        sd s9, 192(sp) +        sd s10, 200(sp) +        sd s11, 208(sp) +        sd t3, 216(sp) +        sd t4, 224(sp) +        sd t5, 232(sp) +        sd t6, 240(sp) + +	// call the C trap handler in trap.c +        call kerneltrap + +        // restore registers. +        ld ra, 0(sp) +        ld sp, 8(sp) +        ld gp, 16(sp) +        // not this, in case we moved CPUs: ld tp, 24(sp) +        ld t0, 32(sp) +        ld t1, 40(sp) +        ld t2, 48(sp) +        ld s0, 56(sp) +        ld s1, 64(sp) +        ld a0, 72(sp) +        ld a1, 80(sp) +        ld a2, 88(sp) +        ld a3, 96(sp) +        ld a4, 104(sp) +        ld a5, 112(sp) +        ld a6, 120(sp) +        ld a7, 128(sp) +        ld s2, 136(sp) +        ld s3, 144(sp) +        ld s4, 152(sp) +        ld s5, 160(sp) +        ld s6, 168(sp) +        ld s7, 176(sp) +        ld s8, 184(sp) +        ld s9, 192(sp) +        ld s10, 200(sp) +        ld s11, 208(sp) +        ld t3, 216(sp) +        ld t4, 224(sp) +        ld t5, 232(sp) +        ld t6, 240(sp) + +        addi sp, sp, 256 + +        // return to whatever we were doing in the kernel. +        sret + +        # +        # machine-mode timer interrupt. +        # +.globl timervec +.align 4 +timervec: +        # start.c has set up the memory that mscratch points to: +        # scratch[0,8,16] : register save area. +        # scratch[32] : address of CLINT's MTIMECMP register. +        # scratch[40] : desired interval between interrupts. +         +        csrrw a0, mscratch, a0 +        sd a1, 0(a0) +        sd a2, 8(a0) +        sd a3, 16(a0) + +        # schedule the next timer interrupt +        # by adding interval to mtimecmp. +        ld a1, 32(a0) # CLINT_MTIMECMP(hart) +        ld a2, 40(a0) # interval +        ld a3, 0(a1) +        add a3, a3, a2 +        sd a3, 0(a1) + +        # raise a supervisor software interrupt. +	li a1, 2 +        csrw sip, a1 + +        ld a3, 16(a0) +        ld a2, 8(a0) +        ld a1, 0(a0) +        csrrw a0, mscratch, a0 + +        mret @@ -1,4 +1,5 @@  #include "types.h" +#include "riscv.h"  #include "defs.h"  #include "param.h"  #include "spinlock.h" @@ -51,16 +52,14 @@ static void recover_from_log(void);  static void commit();  void -initlog(int dev) +initlog(int dev, struct superblock *sb)  {    if (sizeof(struct logheader) >= BSIZE)      panic("initlog: too big logheader"); -  struct superblock sb;    initlock(&log.lock, "log"); -  readsb(dev, &sb); -  log.start = sb.logstart; -  log.size = sb.nlog; +  log.start = sb->logstart; +  log.size = sb->nlog;    log.dev = dev;    recover_from_log();  } @@ -76,6 +75,7 @@ install_trans(void)      struct buf *dbuf = bread(log.dev, log.lh.block[tail]); // read dst      memmove(dbuf->data, lbuf->data, BSIZE);  // copy block to dst      bwrite(dbuf);  // write dst to disk +    bunpin(dbuf);      brelse(lbuf);      brelse(dbuf);    } @@ -202,7 +202,7 @@ commit()  }  // Caller has modified b->data and is done with the buffer. -// Record the block number and pin in the cache with B_DIRTY. +// Record the block number and pin in the cache by increasing refcnt.  // commit()/write_log() will do the disk write.  //  // log_write() replaces bwrite(); a typical use is: @@ -226,9 +226,10 @@ log_write(struct buf *b)        break;    }    log.lh.block[i] = b->blockno; -  if (i == log.lh.n) +  if (i == log.lh.n) {  // Add new block to log? +    bpin(b);      log.lh.n++; -  b->flags |= B_DIRTY; // prevent eviction +  }    release(&log.lock);  } diff --git a/kernel/main.c b/kernel/main.c new file mode 100644 index 0000000..a936fd3 --- /dev/null +++ b/kernel/main.c @@ -0,0 +1,43 @@ +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "defs.h" + +volatile static int started = 0; + +// start() jumps here in supervisor mode on all CPUs. +void +main() +{ +  if(cpuid() == 0){ +    consoleinit(); +    printfinit(); +    printf("hart %d starting\n", cpuid()); +    kinit();         // physical page allocator +    kvminit();       // create kernel page table +    kvminithart();   // turn on paging +    procinit();      // process table +    trapinit();      // trap vectors +    trapinithart();  // install kernel trap vector +    plicinit();      // set up interrupt controller +    plicinithart();  // ask PLIC for device interrupts +    binit();         // buffer cache +    iinit();         // inode cache +    fileinit();      // file table +    virtio_disk_init(); // emulated hard disk +    userinit();      // first user process +    __sync_synchronize(); +    started = 1; +  } else { +    while(started == 0) +      ; +    __sync_synchronize(); +    printf("hart %d starting\n", cpuid()); +    kvminithart();    // turn on paging +    trapinithart();   // install kernel trap vector +    plicinithart();   // ask PLIC for device interrupts +  } + +  scheduler();         +} diff --git a/kernel/memlayout.h b/kernel/memlayout.h new file mode 100644 index 0000000..8ffd538 --- /dev/null +++ b/kernel/memlayout.h @@ -0,0 +1,67 @@ +// Physical memory layout + +// qemu -machine virt is set up like this, +// based on qemu's hw/riscv/virt.c: +// +// 00001000 -- boot ROM, provided by qemu +// 02000000 -- CLINT +// 0C000000 -- PLIC +// 10000000 -- uart0  +// 10001000 -- virtio disk  +// 80000000 -- boot ROM jumps here in machine mode +//             -kernel loads the kernel here +// unused RAM after 80000000. + +// the kernel uses physical memory thus: +// 80000000 -- entry.S, then kernel text and data +// end -- start of kernel page allocation area +// PHYSTOP -- end RAM used by the kernel + +// qemu puts UART registers here in physical memory. +#define UART0 0x10000000L +#define UART0_IRQ 10 + +// virtio mmio interface +#define VIRTIO0 0x10001000 +#define VIRTIO0_IRQ 1 + +// local interrupt controller, which contains the timer. +#define CLINT 0x2000000L +#define CLINT_MTIMECMP(hartid) (CLINT + 0x4000 + 8*(hartid)) +#define CLINT_MTIME (CLINT + 0xBFF8) // cycles since boot. + +// qemu puts programmable interrupt controller here. +#define PLIC 0x0c000000L +#define PLIC_PRIORITY (PLIC + 0x0) +#define PLIC_PENDING (PLIC + 0x1000) +#define PLIC_MENABLE(hart) (PLIC + 0x2000 + (hart)*0x100) +#define PLIC_SENABLE(hart) (PLIC + 0x2080 + (hart)*0x100) +#define PLIC_MPRIORITY(hart) (PLIC + 0x200000 + (hart)*0x2000) +#define PLIC_SPRIORITY(hart) (PLIC + 0x201000 + (hart)*0x2000) +#define PLIC_MCLAIM(hart) (PLIC + 0x200004 + (hart)*0x2000) +#define PLIC_SCLAIM(hart) (PLIC + 0x201004 + (hart)*0x2000) + +// the kernel expects there to be RAM +// for use by the kernel and user pages +// from physical address 0x80000000 to PHYSTOP. +#define KERNBASE 0x80000000L +#define PHYSTOP (KERNBASE + 128*1024*1024) + +// map the trampoline page to the highest address, +// in both user and kernel space. +#define TRAMPOLINE (MAXVA - PGSIZE) + +// map kernel stacks beneath the trampoline, +// each surrounded by invalid guard pages. +#define KSTACK(p) (TRAMPOLINE - ((p)+1)* 2*PGSIZE) + +// User memory layout. +// Address zero first: +//   text +//   original data and bss +//   fixed-size stack +//   expandable heap +//   ... +//   TRAPFRAME (p->tf, used by the trampoline) +//   TRAMPOLINE (the same page as in the kernel) +#define TRAPFRAME (TRAMPOLINE - PGSIZE) @@ -1,5 +1,4 @@  #define NPROC        64  // maximum number of processes -#define KSTACKSIZE 4096  // size of per-process kernel stack  #define NCPU          8  // maximum number of CPUs  #define NOFILE       16  // open files per process  #define NFILE       100  // open files per system @@ -11,4 +10,4 @@  #define LOGSIZE      (MAXOPBLOCKS*3)  // max data blocks in on-disk log  #define NBUF         (MAXOPBLOCKS*3)  // size of disk block cache  #define FSSIZE       1000  // size of file system in blocks - +#define MAXPATH      128   // maximum file path name diff --git a/kernel/pipe.c b/kernel/pipe.c new file mode 100644 index 0000000..e358283 --- /dev/null +++ b/kernel/pipe.c @@ -0,0 +1,127 @@ +#include "types.h" +#include "riscv.h" +#include "defs.h" +#include "param.h" +#include "spinlock.h" +#include "proc.h" +#include "fs.h" +#include "sleeplock.h" +#include "file.h" + +#define PIPESIZE 512 + +struct pipe { +  struct spinlock lock; +  char data[PIPESIZE]; +  uint nread;     // number of bytes read +  uint nwrite;    // number of bytes written +  int readopen;   // read fd is still open +  int writeopen;  // write fd is still open +}; + +int +pipealloc(struct file **f0, struct file **f1) +{ +  struct pipe *pi; + +  pi = 0; +  *f0 = *f1 = 0; +  if((*f0 = filealloc()) == 0 || (*f1 = filealloc()) == 0) +    goto bad; +  if((pi = (struct pipe*)kalloc()) == 0) +    goto bad; +  pi->readopen = 1; +  pi->writeopen = 1; +  pi->nwrite = 0; +  pi->nread = 0; +  initlock(&pi->lock, "pipe"); +  (*f0)->type = FD_PIPE; +  (*f0)->readable = 1; +  (*f0)->writable = 0; +  (*f0)->pipe = pi; +  (*f1)->type = FD_PIPE; +  (*f1)->readable = 0; +  (*f1)->writable = 1; +  (*f1)->pipe = pi; +  return 0; + + bad: +  if(pi) +    kfree((char*)pi); +  if(*f0) +    fileclose(*f0); +  if(*f1) +    fileclose(*f1); +  return -1; +} + +void +pipeclose(struct pipe *pi, int writable) +{ +  acquire(&pi->lock); +  if(writable){ +    pi->writeopen = 0; +    wakeup(&pi->nread); +  } else { +    pi->readopen = 0; +    wakeup(&pi->nwrite); +  } +  if(pi->readopen == 0 && pi->writeopen == 0){ +    release(&pi->lock); +    kfree((char*)pi); +  } else +    release(&pi->lock); +} + +int +pipewrite(struct pipe *pi, uint64 addr, int n) +{ +  int i; +  char ch; +  struct proc *pr = myproc(); + +  acquire(&pi->lock); +  for(i = 0; i < n; i++){ +    while(pi->nwrite == pi->nread + PIPESIZE){  //DOC: pipewrite-full +      if(pi->readopen == 0 || myproc()->killed){ +        release(&pi->lock); +        return -1; +      } +      wakeup(&pi->nread); +      sleep(&pi->nwrite, &pi->lock); +    } +    if(copyin(pr->pagetable, &ch, addr + i, 1) == -1) +      break; +    pi->data[pi->nwrite++ % PIPESIZE] = ch; +  } +  wakeup(&pi->nread); +  release(&pi->lock); +  return n; +} + +int +piperead(struct pipe *pi, uint64 addr, int n) +{ +  int i; +  struct proc *pr = myproc(); +  char ch; + +  acquire(&pi->lock); +  while(pi->nread == pi->nwrite && pi->writeopen){  //DOC: pipe-empty +    if(myproc()->killed){ +      release(&pi->lock); +      return -1; +    } +    sleep(&pi->nread, &pi->lock); //DOC: piperead-sleep +  } +  for(i = 0; i < n; i++){  //DOC: piperead-copy +    if(pi->nread == pi->nwrite) +      break; +    ch = pi->data[pi->nread++ % PIPESIZE]; +    if(copyout(pr->pagetable, addr + i, &ch, 1) == -1) +      break; +  } +  wakeup(&pi->nwrite);  //DOC: piperead-wakeup +  release(&pi->lock); +  return i; +} diff --git a/kernel/plic.c b/kernel/plic.c new file mode 100644 index 0000000..b569492 --- /dev/null +++ b/kernel/plic.c @@ -0,0 +1,62 @@ +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "defs.h" + +// +// the riscv Platform Level Interrupt Controller (PLIC). +// + +void +plicinit(void) +{ +  // set desired IRQ priorities non-zero (otherwise disabled). +  *(uint32*)(PLIC + UART0_IRQ*4) = 1; +  *(uint32*)(PLIC + VIRTIO0_IRQ*4) = 1; +} + +void +plicinithart(void) +{ +  int hart = cpuid(); +   +  // set uart's enable bit for this hart's S-mode.  +  *(uint32*)PLIC_SENABLE(hart)= (1 << UART0_IRQ) | (1 << VIRTIO0_IRQ); + +  // set this hart's S-mode priority threshold to 0. +  *(uint32*)PLIC_SPRIORITY(hart) = 0; +} + +// return a bitmap of which IRQs are waiting +// to be served. +uint64 +plic_pending(void) +{ +  uint64 mask; + +  //mask = *(uint32*)(PLIC + 0x1000); +  //mask |= (uint64)*(uint32*)(PLIC + 0x1004) << 32; +  mask = *(uint64*)PLIC_PENDING; + +  return mask; +} + +// ask the PLIC what interrupt we should serve. +int +plic_claim(void) +{ +  int hart = cpuid(); +  //int irq = *(uint32*)(PLIC + 0x201004); +  int irq = *(uint32*)PLIC_SCLAIM(hart); +  return irq; +} + +// tell the PLIC we've served this IRQ. +void +plic_complete(int irq) +{ +  int hart = cpuid(); +  //*(uint32*)(PLIC + 0x201004) = irq; +  *(uint32*)PLIC_SCLAIM(hart) = irq; +} diff --git a/kernel/printf.c b/kernel/printf.c new file mode 100644 index 0000000..777cc5f --- /dev/null +++ b/kernel/printf.c @@ -0,0 +1,134 @@ +// +// formatted console output -- printf, panic. +// + +#include <stdarg.h> + +#include "types.h" +#include "param.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "fs.h" +#include "file.h" +#include "memlayout.h" +#include "riscv.h" +#include "defs.h" +#include "proc.h" + +volatile int panicked = 0; + +// lock to avoid interleaving concurrent printf's. +static struct { +  struct spinlock lock; +  int locking; +} pr; + +static char digits[] = "0123456789abcdef"; + +static void +printint(int xx, int base, int sign) +{ +  char buf[16]; +  int i; +  uint x; + +  if(sign && (sign = xx < 0)) +    x = -xx; +  else +    x = xx; + +  i = 0; +  do { +    buf[i++] = digits[x % base]; +  } while((x /= base) != 0); + +  if(sign) +    buf[i++] = '-'; + +  while(--i >= 0) +    consputc(buf[i]); +} + +static void +printptr(uint64 x) +{ +  int i; +  consputc('0'); +  consputc('x'); +  for (i = 0; i < (sizeof(uint64) * 2); i++, x <<= 4) +    consputc(digits[x >> (sizeof(uint64) * 8 - 4)]); +} + +// Print to the console. only understands %d, %x, %p, %s. +void +printf(char *fmt, ...) +{ +  va_list ap; +  int i, c, locking; +  char *s; + +  locking = pr.locking; +  if(locking) +    acquire(&pr.lock); + +  if (fmt == 0) +    panic("null fmt"); + +  va_start(ap, fmt); +  for(i = 0; (c = fmt[i] & 0xff) != 0; i++){ +    if(c != '%'){ +      consputc(c); +      continue; +    } +    c = fmt[++i] & 0xff; +    if(c == 0) +      break; +    switch(c){ +    case 'd': +      printint(va_arg(ap, int), 10, 1); +      break; +    case 'x': +      printint(va_arg(ap, int), 16, 1); +      break; +    case 'p': +      printptr(va_arg(ap, uint64)); +      break; +    case 's': +      if((s = va_arg(ap, char*)) == 0) +        s = "(null)"; +      for(; *s; s++) +        consputc(*s); +      break; +    case '%': +      consputc('%'); +      break; +    default: +      // Print unknown % sequence to draw attention. +      consputc('%'); +      consputc(c); +      break; +    } +  } + +  if(locking) +    release(&pr.lock); +} + +void +panic(char *s) +{ +  pr.locking = 0; +  printf("panic: "); +  printf(s); +  printf("\n"); +  panicked = 1; // freeze other CPUs +  for(;;) +    ; +} + +void +printfinit(void) +{ +  initlock(&pr.lock, "pr"); +  pr.locking = 1; +} diff --git a/kernel/proc.c b/kernel/proc.c new file mode 100644 index 0000000..428fdb0 --- /dev/null +++ b/kernel/proc.c @@ -0,0 +1,647 @@ +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "spinlock.h" +#include "proc.h" +#include "defs.h" + +struct cpu cpus[NCPU]; + +struct proc proc[NPROC]; + +struct proc *initproc; + +int nextpid = 1; +struct spinlock pid_lock; + +extern void forkret(void); +static void wakeup1(struct proc *chan); + +extern char trampoline[]; // trampoline.S + +void +procinit(void) +{ +  struct proc *p; +   +  initlock(&pid_lock, "nextpid"); +  for(p = proc; p < &proc[NPROC]; p++) { +      initlock(&p->lock, "proc"); + +      // Allocate a page for the process's kernel stack. +      // Map it high in memory, followed by an invalid +      // guard page. +      char *pa = kalloc(); +      if(pa == 0) +        panic("kalloc"); +      uint64 va = KSTACK((int) (p - proc)); +      kvmmap(va, (uint64)pa, PGSIZE, PTE_R | PTE_W); +      p->kstack = va; +  } +  kvminithart(); +} + +// Must be called with interrupts disabled, +// to prevent race with process being moved +// to a different CPU. +int +cpuid() +{ +  int id = r_tp(); +  return id; +} + +// Return this CPU's cpu struct. +// Interrupts must be disabled. +struct cpu* +mycpu(void) { +  int id = cpuid(); +  struct cpu *c = &cpus[id]; +  return c; +} + +// Return the current struct proc *, or zero if none. +struct proc* +myproc(void) { +  push_off(); +  struct cpu *c = mycpu(); +  struct proc *p = c->proc; +  pop_off(); +  return p; +} + +int +allocpid() { +  int pid; +   +  acquire(&pid_lock); +  pid = nextpid; +  nextpid = nextpid + 1; +  release(&pid_lock); + +  return pid; +} + +// Look in the process table for an UNUSED proc. +// If found, initialize state required to run in the kernel, +// and return with p->lock held. +// If there are no free procs, return 0. +static struct proc* +allocproc(void) +{ +  struct proc *p; + +  for(p = proc; p < &proc[NPROC]; p++) { +    acquire(&p->lock); +    if(p->state == UNUSED) { +      goto found; +    } else { +      release(&p->lock); +    } +  } +  return 0; + +found: +  p->pid = allocpid(); + +  // Allocate a trapframe page. +  if((p->tf = (struct trapframe *)kalloc()) == 0){ +    release(&p->lock); +    return 0; +  } + +  // An empty user page table. +  p->pagetable = proc_pagetable(p); + +  // Set up new context to start executing at forkret, +  // which returns to user space. +  memset(&p->context, 0, sizeof p->context); +  p->context.ra = (uint64)forkret; +  p->context.sp = p->kstack + PGSIZE; + +  return p; +} + +// free a proc structure and the data hanging from it, +// including user pages. +// p->lock must be held. +static void +freeproc(struct proc *p) +{ +  if(p->tf) +    kfree((void*)p->tf); +  p->tf = 0; +  if(p->pagetable) +    proc_freepagetable(p->pagetable, p->sz); +  p->pagetable = 0; +  p->sz = 0; +  p->pid = 0; +  p->parent = 0; +  p->name[0] = 0; +  p->chan = 0; +  p->killed = 0; +  p->state = UNUSED; +} + +// Create a page table for a given process, +// with no user pages, but with trampoline pages. +pagetable_t +proc_pagetable(struct proc *p) +{ +  pagetable_t pagetable; + +  // An empty page table. +  pagetable = uvmcreate(); + +  // map the trampoline code (for system call return) +  // at the highest user virtual address. +  // only the supervisor uses it, on the way +  // to/from user space, so not PTE_U. +  mappages(pagetable, TRAMPOLINE, PGSIZE, +           (uint64)trampoline, PTE_R | PTE_X); + +  // map the trapframe just below TRAMPOLINE, for trampoline.S. +  mappages(pagetable, TRAPFRAME, PGSIZE, +           (uint64)(p->tf), PTE_R | PTE_W); + +  return pagetable; +} + +// Free a process's page table, and free the +// physical memory it refers to. +void +proc_freepagetable(pagetable_t pagetable, uint64 sz) +{ +  uvmunmap(pagetable, TRAMPOLINE, PGSIZE, 0); +  uvmunmap(pagetable, TRAPFRAME, PGSIZE, 0); +  if(sz > 0) +    uvmfree(pagetable, sz); +} + +// a user program that calls exec("/init") +// od -t xC initcode +uchar initcode[] = { +  0x17, 0x05, 0x00, 0x00, 0x13, 0x05, 0x05, 0x02, +  0x97, 0x05, 0x00, 0x00, 0x93, 0x85, 0x05, 0x02, +  0x9d, 0x48, 0x73, 0x00, 0x00, 0x00, 0x89, 0x48, +  0x73, 0x00, 0x00, 0x00, 0xef, 0xf0, 0xbf, 0xff, +  0x2f, 0x69, 0x6e, 0x69, 0x74, 0x00, 0x00, 0x01, +  0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +  0x00, 0x00, 0x00 +}; + +// Set up first user process. +void +userinit(void) +{ +  struct proc *p; + +  p = allocproc(); +  initproc = p; +   +  // allocate one user page and copy init's instructions +  // and data into it. +  uvminit(p->pagetable, initcode, sizeof(initcode)); +  p->sz = PGSIZE; + +  // prepare for the very first "return" from kernel to user. +  p->tf->epc = 0;      // user program counter +  p->tf->sp = PGSIZE;  // user stack pointer + +  safestrcpy(p->name, "initcode", sizeof(p->name)); +  p->cwd = namei("/"); + +  p->state = RUNNABLE; + +  release(&p->lock); +} + +// Grow or shrink user memory by n bytes. +// Return 0 on success, -1 on failure. +int +growproc(int n) +{ +  uint sz; +  struct proc *p = myproc(); + +  sz = p->sz; +  if(n > 0){ +    if((sz = uvmalloc(p->pagetable, sz, sz + n)) == 0) { +      return -1; +    } +  } else if(n < 0){ +    if((sz = uvmdealloc(p->pagetable, sz, sz + n)) == 0) { +      return -1; +    } +  } +  p->sz = sz; +  return 0; +} + +// Create a new process, copying the parent. +// Sets up child kernel stack to return as if from fork() system call. +int +fork(void) +{ +  int i, pid; +  struct proc *np; +  struct proc *p = myproc(); + +  // Allocate process. +  if((np = allocproc()) == 0){ +    return -1; +  } + +  // Copy user memory from parent to child. +  if(uvmcopy(p->pagetable, np->pagetable, p->sz) < 0){ +    freeproc(np); +    release(&np->lock); +    return -1; +  } +  np->sz = p->sz; + +  np->parent = p; + +  // copy saved user registers. +  *(np->tf) = *(p->tf); + +  // Cause fork to return 0 in the child. +  np->tf->a0 = 0; + +  // increment reference counts on open file descriptors. +  for(i = 0; i < NOFILE; i++) +    if(p->ofile[i]) +      np->ofile[i] = filedup(p->ofile[i]); +  np->cwd = idup(p->cwd); + +  safestrcpy(np->name, p->name, sizeof(p->name)); + +  pid = np->pid; + +  np->state = RUNNABLE; + +  release(&np->lock); + +  return pid; +} + +// Pass p's abandoned children to init. +// Caller must hold p->lock and parent->lock. +void +reparent(struct proc *p, struct proc *parent) { +  struct proc *pp; +  int child_of_init = (p->parent == initproc); + +  for(pp = proc; pp < &proc[NPROC]; pp++){ +    // this code uses pp->parent without holding pp->lock. +    // acquiring the lock first could cause a deadlock +    // if pp or a child of pp were also in exit() +    // and about to try to lock p. +    if(pp->parent == p){ +      // pp->parent can't change between the check and the acquire() +      // because only the parent changes it, and we're the parent. +      acquire(&pp->lock); +      pp->parent = initproc; +      if(pp->state == ZOMBIE) { +        if(!child_of_init) +          acquire(&initproc->lock); +        wakeup1(initproc); +        if(!child_of_init) +          release(&initproc->lock); +      } +      release(&pp->lock); +    } +  } +} + +// Exit the current process.  Does not return. +// An exited process remains in the zombie state +// until its parent calls wait(). +void +exit(void) +{ +  struct proc *p = myproc(); + +  if(p == initproc) +    panic("init exiting"); + +  // Close all open files. +  for(int fd = 0; fd < NOFILE; fd++){ +    if(p->ofile[fd]){ +      struct file *f = p->ofile[fd]; +      fileclose(f); +      p->ofile[fd] = 0; +    } +  } + +  begin_op(); +  iput(p->cwd); +  end_op(); +  p->cwd = 0; + +  acquire(&p->parent->lock); +     +  acquire(&p->lock); + +  // Give any children to init. +  reparent(p, p->parent); + +  // Parent might be sleeping in wait(). +  wakeup1(p->parent); + +  p->state = ZOMBIE; + +  release(&p->parent->lock); + +  // Jump into the scheduler, never to return. +  sched(); +  panic("zombie exit"); +} + +// Wait for a child process to exit and return its pid. +// Return -1 if this process has no children. +int +wait(void) +{ +  struct proc *np; +  int havekids, pid; +  struct proc *p = myproc(); + +  // hold p->lock for the whole time to avoid lost +  // wakeups from a child's exit(). +  acquire(&p->lock); + +  for(;;){ +    // Scan through table looking for exited children. +    havekids = 0; +    for(np = proc; np < &proc[NPROC]; np++){ +      // this code uses np->parent without holding np->lock. +      // acquiring the lock first would cause a deadlock, +      // since np might be an ancestor, and we already hold p->lock. +      if(np->parent == p){ +        // np->parent can't change between the check and the acquire() +        // because only the parent changes it, and we're the parent. +        acquire(&np->lock); +        havekids = 1; +        if(np->state == ZOMBIE){ +          // Found one. +          pid = np->pid; +          freeproc(np); +          release(&np->lock); +          release(&p->lock); +          return pid; +        } +        release(&np->lock); +      } +    } + +    // No point waiting if we don't have any children. +    if(!havekids || p->killed){ +      release(&p->lock); +      return -1; +    } +     +    // Wait for a child to exit. +    sleep(p, &p->lock);  //DOC: wait-sleep +  } +} + +// Per-CPU process scheduler. +// Each CPU calls scheduler() after setting itself up. +// Scheduler never returns.  It loops, doing: +//  - choose a process to run. +//  - swtch to start running that process. +//  - eventually that process transfers control +//    via swtch back to the scheduler. +void +scheduler(void) +{ +  struct proc *p; +  struct cpu *c = mycpu(); +   +  c->proc = 0; +  for(;;){ +    // Avoid deadlock by ensuring that devices can interrupt. +    intr_on(); + +    for(p = proc; p < &proc[NPROC]; p++) { +      acquire(&p->lock); +      if(p->state == RUNNABLE) { +        // Switch to chosen process.  It is the process's job +        // to release its lock and then reacquire it +        // before jumping back to us. +        p->state = RUNNING; +        c->proc = p; +        swtch(&c->scheduler, &p->context); + +        // Process is done running for now. +        // It should have changed its p->state before coming back. +        c->proc = 0; +      } +      release(&p->lock); +    } +  } +} + +// Switch to scheduler.  Must hold only p->lock +// and have changed proc->state. Saves and restores +// intena because intena is a property of this +// kernel thread, not this CPU. It should +// be proc->intena and proc->noff, but that would +// break in the few places where a lock is held but +// there's no process. +void +sched(void) +{ +  int intena; +  struct proc *p = myproc(); + +  if(!holding(&p->lock)) +    panic("sched p->lock"); +  if(mycpu()->noff != 1) +    panic("sched locks"); +  if(p->state == RUNNING) +    panic("sched running"); +  if(intr_get()) +    panic("sched interruptible"); + +  intena = mycpu()->intena; +  swtch(&p->context, &mycpu()->scheduler); +  mycpu()->intena = intena; +} + +// Give up the CPU for one scheduling round. +void +yield(void) +{ +  struct proc *p = myproc(); +  acquire(&p->lock); +  p->state = RUNNABLE; +  sched(); +  release(&p->lock); +} + +// A fork child's very first scheduling by scheduler() +// will swtch to forkret. +void +forkret(void) +{ +  static int first = 1; + +  // Still holding p->lock from scheduler. +  release(&myproc()->lock); + +  if (first) { +    // File system initialization must be run in the context of a +    // regular process (e.g., because it calls sleep), and thus cannot +    // be run from main(). +    first = 0; +    fsinit(ROOTDEV); +  } + +  usertrapret(); +} + +// Atomically release lock and sleep on chan. +// Reacquires lock when awakened. +void +sleep(void *chan, struct spinlock *lk) +{ +  struct proc *p = myproc(); +   +  // Must acquire p->lock in order to +  // change p->state and then call sched. +  // Once we hold p->lock, we can be +  // guaranteed that we won't miss any wakeup +  // (wakeup locks p->lock), +  // so it's okay to release lk. +  if(lk != &p->lock){  //DOC: sleeplock0 +    acquire(&p->lock);  //DOC: sleeplock1 +    release(lk); +  } + +  // Go to sleep. +  p->chan = chan; +  p->state = SLEEPING; + +  sched(); + +  // Tidy up. +  p->chan = 0; + +  // Reacquire original lock. +  if(lk != &p->lock){ +    release(&p->lock); +    acquire(lk); +  } +} + +// Wake up all processes sleeping on chan. +// Must be called without any p->lock. +void +wakeup(void *chan) +{ +  struct proc *p; + +  for(p = proc; p < &proc[NPROC]; p++) { +    acquire(&p->lock); +    if(p->state == SLEEPING && p->chan == chan) { +      p->state = RUNNABLE; +    } +    release(&p->lock); +  } +} + +// Wake up p if it is sleeping in wait(); used by exit(). +// Caller must hold p->lock. +static void +wakeup1(struct proc *p) +{ +  if(p->chan == p && p->state == SLEEPING) { +    p->state = RUNNABLE; +  } +} + +// Kill the process with the given pid. +// The victim won't exit until it tries to return +// to user space (see usertrap() in trap.c). +int +kill(int pid) +{ +  struct proc *p; + +  for(p = proc; p < &proc[NPROC]; p++){ +    acquire(&p->lock); +    if(p->pid == pid){ +      p->killed = 1; +      if(p->state == SLEEPING){ +        // Wake process from sleep(). +        p->state = RUNNABLE; +      } +      release(&p->lock); +      return 0; +    } +    release(&p->lock); +  } +  return -1; +} + +// Copy to either a user address, or kernel address, +// depending on usr_dst. +// Returns 0 on success, -1 on error. +int +either_copyout(int user_dst, uint64 dst, void *src, uint64 len) +{ +  struct proc *p = myproc(); +  if(user_dst){ +    return copyout(p->pagetable, dst, src, len); +  } else { +    memmove((char *)dst, src, len); +    return 0; +  } +} + +// Copy from either a user address, or kernel address, +// depending on usr_src. +// Returns 0 on success, -1 on error. +int +either_copyin(void *dst, int user_src, uint64 src, uint64 len) +{ +  struct proc *p = myproc(); +  if(user_src){ +    return copyin(p->pagetable, dst, src, len); +  } else { +    memmove(dst, (char*)src, len); +    return 0; +  } +} + +// Print a process listing to console.  For debugging. +// Runs when user types ^P on console. +// No lock to avoid wedging a stuck machine further. +void +procdump(void) +{ +  static char *states[] = { +  [UNUSED]    "unused", +  [SLEEPING]  "sleep ", +  [RUNNABLE]  "runble", +  [RUNNING]   "run   ", +  [ZOMBIE]    "zombie" +  }; +  struct proc *p; +  char *state; + +  printf("\n"); +  for(p = proc; p < &proc[NPROC]; p++){ +    if(p->state == UNUSED) +      continue; +    if(p->state >= 0 && p->state < NELEM(states) && states[p->state]) +      state = states[p->state]; +    else +      state = "???"; +    printf("%d %s %s", p->pid, state, p->name); +    printf("\n"); +  } +} diff --git a/kernel/proc.h b/kernel/proc.h new file mode 100644 index 0000000..655d79f --- /dev/null +++ b/kernel/proc.h @@ -0,0 +1,105 @@ +// Saved registers for kernel context switches. +struct context { +  uint64 ra; +  uint64 sp; + +  // callee-saved +  uint64 s0; +  uint64 s1; +  uint64 s2; +  uint64 s3; +  uint64 s4; +  uint64 s5; +  uint64 s6; +  uint64 s7; +  uint64 s8; +  uint64 s9; +  uint64 s10; +  uint64 s11; +}; + +// Per-CPU state. +struct cpu { +  struct proc *proc;          // The process running on this cpu, or null. +  struct context scheduler;   // swtch() here to enter scheduler(). +  int noff;                   // Depth of push_off() nesting. +  int intena;                 // Were interrupts enabled before push_off()? +}; + +extern struct cpu cpus[NCPU]; + +// per-process data for the trap handling code in trampoline.S. +// sits in a page by itself just under the trampoline page in the +// user page table. not specially mapped in the kernel page table. +// the sscratch register points here. +// uservec in trampoline.S saves user registers in the trapframe, +// then initializes registers from the trapframe's +// kernel_sp, kernel_hartid, kernel_satp, and jumps to kernel_trap. +// usertrapret() and userret in trampoline.S set up +// the trapframe's kernel_*, restore user registers from the +// trapframe, switch to the user page table, and enter user space. +// the trapframe includes callee-saved user registers like s0-s11 because the +// return-to-user path via usertrapret() doesn't return through +// the entire kernel call stack. +struct trapframe { +  /*   0 */ uint64 kernel_satp;   // kernel page table +  /*   8 */ uint64 kernel_sp;     // top of process's kernel stack +  /*  16 */ uint64 kernel_trap;   // usertrap() +  /*  24 */ uint64 epc;           // saved user program counter +  /*  32 */ uint64 kernel_hartid; // saved kernel tp +  /*  40 */ uint64 ra; +  /*  48 */ uint64 sp; +  /*  56 */ uint64 gp; +  /*  64 */ uint64 tp; +  /*  72 */ uint64 t0; +  /*  80 */ uint64 t1; +  /*  88 */ uint64 t2; +  /*  96 */ uint64 s0; +  /* 104 */ uint64 s1; +  /* 112 */ uint64 a0; +  /* 120 */ uint64 a1; +  /* 128 */ uint64 a2; +  /* 136 */ uint64 a3; +  /* 144 */ uint64 a4; +  /* 152 */ uint64 a5; +  /* 160 */ uint64 a6; +  /* 168 */ uint64 a7; +  /* 176 */ uint64 s2; +  /* 184 */ uint64 s3; +  /* 192 */ uint64 s4; +  /* 200 */ uint64 s5; +  /* 208 */ uint64 s6; +  /* 216 */ uint64 s7; +  /* 224 */ uint64 s8; +  /* 232 */ uint64 s9; +  /* 240 */ uint64 s10; +  /* 248 */ uint64 s11; +  /* 256 */ uint64 t3; +  /* 264 */ uint64 t4; +  /* 272 */ uint64 t5; +  /* 280 */ uint64 t6; +}; + +enum procstate { UNUSED, SLEEPING, RUNNABLE, RUNNING, ZOMBIE }; + +// Per-process state +struct proc { +  struct spinlock lock; + +  // p->lock must be held when using these: +  enum procstate state;        // Process state +  struct proc *parent;         // Parent process +  void *chan;                  // If non-zero, sleeping on chan +  int killed;                  // If non-zero, have been killed +  int pid;                     // Process ID + +  // these are private to the process, so p->lock need not be held. +  uint64 kstack;               // Bottom of kernel stack for this process +  uint64 sz;                   // Size of process memory (bytes) +  pagetable_t pagetable;       // Page table +  struct trapframe *tf;        // data page for trampoline.S +  struct context context;      // swtch() here to run process +  struct file *ofile[NOFILE];  // Open files +  struct inode *cwd;           // Current directory +  char name[16];               // Process name (debugging) +}; diff --git a/kernel/ramdisk.c b/kernel/ramdisk.c new file mode 100644 index 0000000..9901294 --- /dev/null +++ b/kernel/ramdisk.c @@ -0,0 +1,45 @@ +// +// ramdisk that uses the disk image loaded by qemu -rdinit fs.img +// + +#include "types.h" +#include "riscv.h" +#include "defs.h" +#include "param.h" +#include "memlayout.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "fs.h" +#include "buf.h" + +void +ramdiskinit(void) +{ +} + +// If B_DIRTY is set, write buf to disk, clear B_DIRTY, set B_VALID. +// Else if B_VALID is not set, read buf from disk, set B_VALID. +void +ramdiskrw(struct buf *b) +{ +  if(!holdingsleep(&b->lock)) +    panic("ramdiskrw: buf not locked"); +  if((b->flags & (B_VALID|B_DIRTY)) == B_VALID) +    panic("ramdiskrw: nothing to do"); + +  if(b->blockno >= FSSIZE) +    panic("ramdiskrw: blockno too big"); + +  uint64 diskaddr = b->blockno * BSIZE; +  char *addr = (char *)RAMDISK + diskaddr; + +  if(b->flags & B_DIRTY){ +    // write +    memmove(addr, b->data, BSIZE); +    b->flags &= ~B_DIRTY; +  } else { +    // read +    memmove(b->data, addr, BSIZE); +    b->flags |= B_VALID; +  } +} diff --git a/kernel/riscv.h b/kernel/riscv.h new file mode 100644 index 0000000..0f83db6 --- /dev/null +++ b/kernel/riscv.h @@ -0,0 +1,358 @@ +// which hart (core) is this? +static inline uint64 +r_mhartid() +{ +  uint64 x; +  asm volatile("csrr %0, mhartid" : "=r" (x) ); +  return x; +} + +// Machine Status Register, mstatus + +#define MSTATUS_MPP_MASK (3L << 11) // previous mode. +#define MSTATUS_MPP_M (3L << 11) +#define MSTATUS_MPP_S (1L << 11) +#define MSTATUS_MPP_U (0L << 11) +#define MSTATUS_MIE (1L << 3)    // machine-mode interrupt enable. + +static inline uint64 +r_mstatus() +{ +  uint64 x; +  asm volatile("csrr %0, mstatus" : "=r" (x) ); +  return x; +} + +static inline void  +w_mstatus(uint64 x) +{ +  asm volatile("csrw mstatus, %0" : : "r" (x)); +} + +// machine exception program counter, holds the +// instruction address to which a return from +// exception will go. +static inline void  +w_mepc(uint64 x) +{ +  asm volatile("csrw mepc, %0" : : "r" (x)); +} + +// Supervisor Status Register, sstatus + +#define SSTATUS_SPP (1L << 8)  // Previous mode, 1=Supervisor, 0=User +#define SSTATUS_SPIE (1L << 5) // Supervisor Previous Interrupt Enable +#define SSTATUS_UPIE (1L << 4) // User Previous Interrupt Enable +#define SSTATUS_SIE (1L << 1)  // Supervisor Interrupt Enable +#define SSTATUS_UIE (1L << 0)  // User Interrupt Enable + +static inline uint64 +r_sstatus() +{ +  uint64 x; +  asm volatile("csrr %0, sstatus" : "=r" (x) ); +  return x; +} + +static inline void  +w_sstatus(uint64 x) +{ +  asm volatile("csrw sstatus, %0" : : "r" (x)); +} + +// Supervisor Interrupt Pending +static inline uint64 +r_sip() +{ +  uint64 x; +  asm volatile("csrr %0, sip" : "=r" (x) ); +  return x; +} + +static inline void  +w_sip(uint64 x) +{ +  asm volatile("csrw sip, %0" : : "r" (x)); +} + +// Supervisor Interrupt Enable +#define SIE_SEIE (1L << 9) // external +#define SIE_STIE (1L << 5) // timer +#define SIE_SSIE (1L << 1) // software +static inline uint64 +r_sie() +{ +  uint64 x; +  asm volatile("csrr %0, sie" : "=r" (x) ); +  return x; +} + +static inline void  +w_sie(uint64 x) +{ +  asm volatile("csrw sie, %0" : : "r" (x)); +} + +// Machine-mode Interrupt Enable +#define MIE_MEIE (1L << 11) // external +#define MIE_MTIE (1L << 7)  // timer +#define MIE_MSIE (1L << 3)  // software +static inline uint64 +r_mie() +{ +  uint64 x; +  asm volatile("csrr %0, mie" : "=r" (x) ); +  return x; +} + +static inline void  +w_mie(uint64 x) +{ +  asm volatile("csrw mie, %0" : : "r" (x)); +} + +// machine exception program counter, holds the +// instruction address to which a return from +// exception will go. +static inline void  +w_sepc(uint64 x) +{ +  asm volatile("csrw sepc, %0" : : "r" (x)); +} + +static inline uint64 +r_sepc() +{ +  uint64 x; +  asm volatile("csrr %0, sepc" : "=r" (x) ); +  return x; +} + +// Machine Exception Delegation +static inline uint64 +r_medeleg() +{ +  uint64 x; +  asm volatile("csrr %0, medeleg" : "=r" (x) ); +  return x; +} + +static inline void  +w_medeleg(uint64 x) +{ +  asm volatile("csrw medeleg, %0" : : "r" (x)); +} + +// Machine Interrupt Delegation +static inline uint64 +r_mideleg() +{ +  uint64 x; +  asm volatile("csrr %0, mideleg" : "=r" (x) ); +  return x; +} + +static inline void  +w_mideleg(uint64 x) +{ +  asm volatile("csrw mideleg, %0" : : "r" (x)); +} + +// Supervisor Trap-Vector Base Address +// low two bits are mode. +static inline void  +w_stvec(uint64 x) +{ +  asm volatile("csrw stvec, %0" : : "r" (x)); +} + +static inline uint64 +r_stvec() +{ +  uint64 x; +  asm volatile("csrr %0, stvec" : "=r" (x) ); +  return x; +} + +// Machine-mode interrupt vector +static inline void  +w_mtvec(uint64 x) +{ +  asm volatile("csrw mtvec, %0" : : "r" (x)); +} + +// use riscv's sv39 page table scheme. +#define SATP_SV39 (8L << 60) + +#define MAKE_SATP(pagetable) (SATP_SV39 | (((uint64)pagetable) >> 12)) + +// supervisor address translation and protection; +// holds the address of the page table. +static inline void  +w_satp(uint64 x) +{ +  asm volatile("csrw satp, %0" : : "r" (x)); +} + +static inline uint64 +r_satp() +{ +  uint64 x; +  asm volatile("csrr %0, satp" : "=r" (x) ); +  return x; +} + +// Supervisor Scratch register, for early trap handler in trampoline.S. +static inline void  +w_sscratch(uint64 x) +{ +  asm volatile("csrw sscratch, %0" : : "r" (x)); +} + +static inline void  +w_mscratch(uint64 x) +{ +  asm volatile("csrw mscratch, %0" : : "r" (x)); +} + +// Supervisor Trap Cause +static inline uint64 +r_scause() +{ +  uint64 x; +  asm volatile("csrr %0, scause" : "=r" (x) ); +  return x; +} + +// Supervisor Trap Value +static inline uint64 +r_stval() +{ +  uint64 x; +  asm volatile("csrr %0, stval" : "=r" (x) ); +  return x; +} + +// Machine-mode Counter-Enable +static inline void  +w_mcounteren(uint64 x) +{ +  asm volatile("csrw mcounteren, %0" : : "r" (x)); +} + +static inline uint64 +r_mcounteren() +{ +  uint64 x; +  asm volatile("csrr %0, mcounteren" : "=r" (x) ); +  return x; +} + +// machine-mode cycle counter +static inline uint64 +r_time() +{ +  uint64 x; +  asm volatile("csrr %0, time" : "=r" (x) ); +  return x; +} + +// enable device interrupts +static inline void +intr_on() +{ +  w_sie(r_sie() | SIE_SEIE | SIE_STIE | SIE_SSIE); +  w_sstatus(r_sstatus() | SSTATUS_SIE); +} + +// disable device interrupts +static inline void +intr_off() +{ +  w_sstatus(r_sstatus() & ~SSTATUS_SIE); +} + +// are device interrupts enabled? +static inline int +intr_get() +{ +  uint64 x = r_sstatus(); +  return (x & SSTATUS_SIE) != 0; +} + +static inline uint64 +r_sp() +{ +  uint64 x; +  asm volatile("mv %0, sp" : "=r" (x) ); +  return x; +} + +// read and write tp, the thread pointer, which holds +// this core's hartid (core number), the index into cpus[]. +static inline uint64 +r_tp() +{ +  uint64 x; +  asm volatile("mv %0, tp" : "=r" (x) ); +  return x; +} + +static inline void  +w_tp(uint64 x) +{ +  asm volatile("mv tp, %0" : : "r" (x)); +} + +static inline uint64 +r_ra() +{ +  uint64 x; +  asm volatile("mv %0, ra" : "=r" (x) ); +  return x; +} + +// tell the machine to finish any previous writes to +// PTEs, so that a subsequent use of a virtual +// address or load of the SATP will see those writes. +// perhaps this also flushes the TLB. +static inline void +sfence_vma() +{ +  // the zero, zero means flush all TLB entries. +  asm volatile("sfence.vma zero, zero"); +} + + +#define PGSIZE 4096 // bytes per page +#define PGSHIFT 12  // bits of offset within a page + +#define PGROUNDUP(sz)  (((sz)+PGSIZE-1) & ~(PGSIZE-1)) +#define PGROUNDDOWN(a) (((a)) & ~(PGSIZE-1)) + +#define PTE_V (1L << 0) // valid +#define PTE_R (1L << 1) +#define PTE_W (1L << 2) +#define PTE_X (1L << 3) +#define PTE_U (1L << 4) // 1 -> user can access + +// shift a physical address to the right place for a PTE. +#define PA2PTE(pa) ((((uint64)pa) >> 12) << 10) + +#define PTE2PA(pte) (((pte) >> 10) << 12) + +#define PTE_FLAGS(pte) ((pte) & (PTE_V|PTE_R|PTE_W|PTE_X|PTE_U)) + +// extract the three 9-bit page table indices from a virtual address. +#define PXMASK          0x1FF // 9 bits +#define PXSHIFT(level)  (PGSHIFT+(9*(level))) +#define PX(level, va) ((((uint64) (va)) >> PXSHIFT(level)) & PXMASK) + +// one beyond the highest possible virtual address. +// MAXVA is actually one bit less than the max allowed by +// Sv39, to avoid having to sign-extend virtual addresses +// that have the high bit set. +#define MAXVA (1L << (9 + 9 + 9 + 12 - 1)) + +typedef uint64 pte_t; +typedef uint64 *pagetable_t; // 512 PTEs diff --git a/sleeplock.c b/kernel/sleeplock.c index e0750ea..81de585 100644 --- a/sleeplock.c +++ b/kernel/sleeplock.c @@ -1,13 +1,12 @@  // Sleeping locks  #include "types.h" +#include "riscv.h"  #include "defs.h"  #include "param.h" -#include "x86.h"  #include "memlayout.h" -#include "mmu.h" -#include "proc.h"  #include "spinlock.h" +#include "proc.h"  #include "sleeplock.h"  void diff --git a/sleeplock.h b/kernel/sleeplock.h index 110e6f3..110e6f3 100644 --- a/sleeplock.h +++ b/kernel/sleeplock.h diff --git a/kernel/spinlock.c b/kernel/spinlock.c new file mode 100644 index 0000000..563532e --- /dev/null +++ b/kernel/spinlock.c @@ -0,0 +1,108 @@ +// Mutual exclusion spin locks. + +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "spinlock.h" +#include "riscv.h" +#include "proc.h" +#include "defs.h" + +void +initlock(struct spinlock *lk, char *name) +{ +  lk->name = name; +  lk->locked = 0; +  lk->cpu = 0; +} + +// Acquire the lock. +// Loops (spins) until the lock is acquired. +void +acquire(struct spinlock *lk) +{ +  push_off(); // disable interrupts to avoid deadlock. +  if(holding(lk)) +    panic("acquire"); + +  // On RISC-V, sync_lock_test_and_set turns into an atomic swap: +  //   a5 = 1 +  //   s1 = &lk->locked +  //   amoswap.w.aq a5, a5, (s1) +  while(__sync_lock_test_and_set(&lk->locked, 1) != 0) +    ; + +  // Tell the C compiler and the processor to not move loads or stores +  // past this point, to ensure that the critical section's memory +  // references happen after the lock is acquired. +  __sync_synchronize(); + +  // Record info about lock acquisition for holding() and debugging. +  lk->cpu = mycpu(); +} + +// Release the lock. +void +release(struct spinlock *lk) +{ +  if(!holding(lk)) +    panic("release"); + +  lk->cpu = 0; + +  // Tell the C compiler and the CPU to not move loads or stores +  // past this point, to ensure that all the stores in the critical +  // section are visible to other CPUs before the lock is released. +  // On RISC-V, this turns into a fence instruction. +  __sync_synchronize(); + +  // Release the lock, equivalent to lk->locked = 0. +  // This code doesn't use a C assignment, since the C standard +  // implies that an assignment might be implemented with +  // multiple store instructions. +  // On RISC-V, sync_lock_release turns into an atomic swap: +  //   s1 = &lk->locked +  //   amoswap.w zero, zero, (s1) +  __sync_lock_release(&lk->locked); + +  pop_off(); +} + +// Check whether this cpu is holding the lock. +int +holding(struct spinlock *lk) +{ +  int r; +  push_off(); +  r = (lk->locked && lk->cpu == mycpu()); +  pop_off(); +  return r; +} + +// push_off/pop_off are like intr_off()/intr_on() except that they are matched: +// it takes two pop_off()s to undo two push_off()s.  Also, if interrupts +// are initially off, then push_off, pop_off leaves them off. + +void +push_off(void) +{ +  int old = intr_get(); + +  intr_off(); +  if(mycpu()->noff == 0) +    mycpu()->intena = old; +  mycpu()->noff += 1; +} + +void +pop_off(void) +{ +  struct cpu *c = mycpu(); +  if(intr_get()) +    panic("pop_off - interruptible"); +  c->noff -= 1; +  if(c->noff < 0) +    panic("pop_off"); +  if(c->noff == 0 && c->intena) +    intr_on(); +} diff --git a/spinlock.h b/kernel/spinlock.h index 0a9d8e2..4392820 100644 --- a/spinlock.h +++ b/kernel/spinlock.h @@ -5,7 +5,5 @@ struct spinlock {    // For debugging:    char *name;        // Name of lock.    struct cpu *cpu;   // The cpu holding the lock. -  uint pcs[10];      // The call stack (an array of program counters) -                     // that locked the lock.  }; diff --git a/kernel/start.c b/kernel/start.c new file mode 100644 index 0000000..203c5e6 --- /dev/null +++ b/kernel/start.c @@ -0,0 +1,82 @@ +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "defs.h" + +void main(); +void timerinit(); + +// entry.S needs one stack per CPU. +__attribute__ ((aligned (16))) char stack0[4096 * NCPU]; + +// scratch area for timer interrupt, one per CPU. +uint64 mscratch0[NCPU * 32]; + +// assembly code in kernelvec.S for machine-mode timer interrupt. +extern void timervec(); + +// entry.S jumps here in machine mode on stack0. +void +start() +{ +  // set M Previous Privilege mode to Supervisor, for mret. +  unsigned long x = r_mstatus(); +  x &= ~MSTATUS_MPP_MASK; +  x |= MSTATUS_MPP_S; +  w_mstatus(x); + +  // set M Exception Program Counter to main, for mret. +  // requires gcc -mcmodel=medany +  w_mepc((uint64)main); + +  // disable paging for now. +  w_satp(0); + +  // delegate all interrupts and exceptions to supervisor mode. +  w_medeleg(0xffff); +  w_mideleg(0xffff); + +  // ask for clock interrupts. +  timerinit(); + +  // keep each CPU's hartid in its tp register, for cpuid(). +  int id = r_mhartid(); +  w_tp(id); + +  // switch to supervisor mode and jump to main(). +  asm volatile("mret"); +} + +// set up to receive timer interrupts in machine mode, +// which arrive at timervec in kernelvec.S, +// which turns them into software interrupts for +// devintr() in trap.c. +void +timerinit() +{ +  // each CPU has a separate source of timer interrupts. +  int id = r_mhartid(); + +  // ask the CLINT for a timer interrupt. +  int interval = 1000000; // cycles; about 1/10th second in qemu. +  *(uint64*)CLINT_MTIMECMP(id) = *(uint64*)CLINT_MTIME + interval; + +  // prepare information in scratch[] for timervec. +  // scratch[0..3] : space for timervec to save registers. +  // scratch[4] : address of CLINT MTIMECMP register. +  // scratch[5] : desired interval (in cycles) between timer interrupts. +  uint64 *scratch = &mscratch0[32 * id]; +  scratch[4] = CLINT_MTIMECMP(id); +  scratch[5] = interval; +  w_mscratch((uint64)scratch); + +  // set the machine-mode trap handler. +  w_mtvec((uint64)timervec); + +  // enable machine-mode interrupts. +  w_mstatus(r_mstatus() | MSTATUS_MIE); + +  // enable machine-mode timer interrupts. +  w_mie(r_mie() | MIE_MTIE); +} @@ -1,11 +1,11 @@ -#define T_DIR  1   // Directory -#define T_FILE 2   // File -#define T_DEV  3   // Device +#define T_DIR     1   // Directory +#define T_FILE    2   // File +#define T_DEVICE  3   // Device  struct stat { -  short type;  // Type of file    int dev;     // File system's disk device    uint ino;    // Inode number +  short type;  // Type of file    short nlink; // Number of links to file -  uint size;   // Size of file in bytes +  uint64 size; // Size of file in bytes  }; diff --git a/string.c b/kernel/string.c index a7cc61f..d99e612 100644 --- a/string.c +++ b/kernel/string.c @@ -1,14 +1,13 @@  #include "types.h" -#include "x86.h"  void*  memset(void *dst, int c, uint n)  { -  if ((int)dst%4 == 0 && n%4 == 0){ -    c &= 0xFF; -    stosl(dst, (c<<24)|(c<<16)|(c<<8)|c, n/4); -  } else -    stosb(dst, c, n); +  char *cdst = (char *) dst; +  int i; +  for(i = 0; i < n; i++){ +    cdst[i] = c; +  }    return dst;  } diff --git a/kernel/swtch.S b/kernel/swtch.S new file mode 100644 index 0000000..17a8663 --- /dev/null +++ b/kernel/swtch.S @@ -0,0 +1,42 @@ +# Context switch +# +#   void swtch(struct context *old, struct context *new); +#  +# Save current registers in old. Load from new.	 + + +.globl swtch +swtch: +        sd ra, 0(a0) +        sd sp, 8(a0) +        sd s0, 16(a0) +        sd s1, 24(a0) +        sd s2, 32(a0) +        sd s3, 40(a0) +        sd s4, 48(a0) +        sd s5, 56(a0) +        sd s6, 64(a0) +        sd s7, 72(a0) +        sd s8, 80(a0) +        sd s9, 88(a0) +        sd s10, 96(a0) +        sd s11, 104(a0) + +        ld ra, 0(a1) +        ld sp, 8(a1) +        ld s0, 16(a1) +        ld s1, 24(a1) +        ld s2, 32(a1) +        ld s3, 40(a1) +        ld s4, 48(a1) +        ld s5, 56(a1) +        ld s6, 64(a1) +        ld s7, 72(a1) +        ld s8, 80(a1) +        ld s9, 88(a1) +        ld s10, 96(a1) +        ld s11, 104(a1) +         +        ret + +	 diff --git a/kernel/syscall.c b/kernel/syscall.c new file mode 100644 index 0000000..97974d6 --- /dev/null +++ b/kernel/syscall.c @@ -0,0 +1,147 @@ +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "spinlock.h" +#include "proc.h" +#include "syscall.h" +#include "defs.h" + +// Fetch the uint64 at addr from the current process. +int +fetchaddr(uint64 addr, uint64 *ip) +{ +  struct proc *p = myproc(); +  if(addr >= p->sz || addr+sizeof(uint64) > p->sz) +    return -1; +  if(copyin(p->pagetable, (char *)ip, addr, sizeof(*ip)) != 0) +    return -1; +  return 0; +} + +// Fetch the nul-terminated string at addr from the current process. +// Doesn't actually copy the string - just sets *pp to point at it. +// Returns length of string, not including nul, or -1 for error. +int +fetchstr(uint64 addr, char *buf, int max) +{ +  struct proc *p = myproc(); +  int err = copyinstr(p->pagetable, buf, addr, max); +  if(err < 0) +    return err; +  return strlen(buf); +} + +static uint64 +argraw(int n) +{ +  struct proc *p = myproc(); +  switch (n) { +  case 0: +    return p->tf->a0; +  case 1: +    return p->tf->a1; +  case 2: +    return p->tf->a2; +  case 3: +    return p->tf->a3; +  case 4: +    return p->tf->a4; +  case 5: +    return p->tf->a5; +  } +  panic("argraw"); +  return -1; +} + +// Fetch the nth 32-bit system call argument. +int +argint(int n, int *ip) +{ +  *ip = argraw(n); +  return 0; +} + +// Retrieve an argument as a pointer. +// Doesn't check for legality, since +// copyin/copyout will do that. +int +argaddr(int n, uint64 *ip) +{ +  *ip = argraw(n); +  return 0; +} + +// Fetch the nth word-sized system call argument as a null-terminated string. +// Copies into buf, at most max. +// Returns string length if OK (including nul), -1 if error. +int +argstr(int n, char *buf, int max) +{ +  uint64 addr; +  if(argaddr(n, &addr) < 0) +    return -1; +  return fetchstr(addr, buf, max); +} + +extern uint64 sys_chdir(void); +extern uint64 sys_close(void); +extern uint64 sys_dup(void); +extern uint64 sys_exec(void); +extern uint64 sys_exit(void); +extern uint64 sys_fork(void); +extern uint64 sys_fstat(void); +extern uint64 sys_getpid(void); +extern uint64 sys_kill(void); +extern uint64 sys_link(void); +extern uint64 sys_mkdir(void); +extern uint64 sys_mknod(void); +extern uint64 sys_open(void); +extern uint64 sys_pipe(void); +extern uint64 sys_read(void); +extern uint64 sys_sbrk(void); +extern uint64 sys_sleep(void); +extern uint64 sys_unlink(void); +extern uint64 sys_wait(void); +extern uint64 sys_write(void); +extern uint64 sys_uptime(void); + +static uint64 (*syscalls[])(void) = { +[SYS_fork]    sys_fork, +[SYS_exit]    sys_exit, +[SYS_wait]    sys_wait, +[SYS_pipe]    sys_pipe, +[SYS_read]    sys_read, +[SYS_kill]    sys_kill, +[SYS_exec]    sys_exec, +[SYS_fstat]   sys_fstat, +[SYS_chdir]   sys_chdir, +[SYS_dup]     sys_dup, +[SYS_getpid]  sys_getpid, +[SYS_sbrk]    sys_sbrk, +[SYS_sleep]   sys_sleep, +[SYS_uptime]  sys_uptime, +[SYS_open]    sys_open, +[SYS_write]   sys_write, +[SYS_mknod]   sys_mknod, +[SYS_unlink]  sys_unlink, +[SYS_link]    sys_link, +[SYS_mkdir]   sys_mkdir, +[SYS_close]   sys_close, +}; + +void +syscall(void) +{ +  int num; +  struct proc *p = myproc(); + +  num = p->tf->a7; +  if(num > 0 && num < NELEM(syscalls) && syscalls[num]) { +    p->tf->a0 = syscalls[num](); +  } else { +    printf("%d %s: unknown sys call %d\n", +            p->pid, p->name, num); +    p->tf->a0 = -1; +  } +} diff --git a/syscall.h b/kernel/syscall.h index bc5f356..bc5f356 100644 --- a/syscall.h +++ b/kernel/syscall.h diff --git a/sysfile.c b/kernel/sysfile.c index bfe61b7..23a9540 100644 --- a/sysfile.c +++ b/kernel/sysfile.c @@ -5,13 +5,13 @@  //  #include "types.h" +#include "riscv.h"  #include "defs.h"  #include "param.h"  #include "stat.h" -#include "mmu.h" +#include "spinlock.h"  #include "proc.h"  #include "fs.h" -#include "spinlock.h"  #include "sleeplock.h"  #include "file.h"  #include "fcntl.h" @@ -41,18 +41,18 @@ static int  fdalloc(struct file *f)  {    int fd; -  struct proc *curproc = myproc(); +  struct proc *p = myproc();    for(fd = 0; fd < NOFILE; fd++){ -    if(curproc->ofile[fd] == 0){ -      curproc->ofile[fd] = f; +    if(p->ofile[fd] == 0){ +      p->ofile[fd] = f;        return fd;      }    }    return -1;  } -int +uint64  sys_dup(void)  {    struct file *f; @@ -66,31 +66,32 @@ sys_dup(void)    return fd;  } -int +uint64  sys_read(void)  {    struct file *f;    int n; -  char *p; +  uint64 p; -  if(argfd(0, 0, &f) < 0 || argint(2, &n) < 0 || argptr(1, &p, n) < 0) +  if(argfd(0, 0, &f) < 0 || argint(2, &n) < 0 || argaddr(1, &p) < 0)      return -1;    return fileread(f, p, n);  } -int +uint64  sys_write(void)  {    struct file *f;    int n; -  char *p; +  uint64 p; -  if(argfd(0, 0, &f) < 0 || argint(2, &n) < 0 || argptr(1, &p, n) < 0) +  if(argfd(0, 0, &f) < 0 || argint(2, &n) < 0 || argaddr(1, &p) < 0)      return -1; +    return filewrite(f, p, n);  } -int +uint64  sys_close(void)  {    int fd; @@ -103,25 +104,25 @@ sys_close(void)    return 0;  } -int +uint64  sys_fstat(void)  {    struct file *f; -  struct stat *st; +  uint64 st; // user pointer to struct stat -  if(argfd(0, 0, &f) < 0 || argptr(1, (void*)&st, sizeof(*st)) < 0) +  if(argfd(0, 0, &f) < 0 || argaddr(1, &st) < 0)      return -1;    return filestat(f, st);  }  // Create the path new as a link to the same inode as old. -int +uint64  sys_link(void)  { -  char name[DIRSIZ], *new, *old; +  char name[DIRSIZ], new[MAXPATH], old[MAXPATH];    struct inode *dp, *ip; -  if(argstr(0, &old) < 0 || argstr(1, &new) < 0) +  if(argstr(0, old, MAXPATH) < 0 || argstr(1, new, MAXPATH) < 0)      return -1;    begin_op(); @@ -172,7 +173,7 @@ isdirempty(struct inode *dp)    struct dirent de;    for(off=2*sizeof(de); off<dp->size; off+=sizeof(de)){ -    if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de)) +    if(readi(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de))        panic("isdirempty: readi");      if(de.inum != 0)        return 0; @@ -180,16 +181,15 @@ isdirempty(struct inode *dp)    return 1;  } -//PAGEBREAK! -int +uint64  sys_unlink(void)  {    struct inode *ip, *dp;    struct dirent de; -  char name[DIRSIZ], *path; +  char name[DIRSIZ], path[MAXPATH];    uint off; -  if(argstr(0, &path) < 0) +  if(argstr(0, path, MAXPATH) < 0)      return -1;    begin_op(); @@ -216,7 +216,7 @@ sys_unlink(void)    }    memset(&de, 0, sizeof(de)); -  if(writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de)) +  if(writei(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de))      panic("unlink: writei");    if(ip->type == T_DIR){      dp->nlink--; @@ -251,7 +251,7 @@ create(char *path, short type, short major, short minor)    if((ip = dirlookup(dp, name, 0)) != 0){      iunlockput(dp);      ilock(ip); -    if(type == T_FILE && ip->type == T_FILE) +    if(type == T_FILE && (ip->type == T_FILE || ip->type == T_DEVICE))        return ip;      iunlockput(ip);      return 0; @@ -282,15 +282,15 @@ create(char *path, short type, short major, short minor)    return ip;  } -int +uint64  sys_open(void)  { -  char *path; +  char path[MAXPATH];    int fd, omode;    struct file *f;    struct inode *ip; -  if(argstr(0, &path) < 0 || argint(1, &omode) < 0) +  if(argstr(0, path, MAXPATH) < 0 || argint(1, &omode) < 0)      return -1;    begin_op(); @@ -314,6 +314,12 @@ sys_open(void)      }    } +  if(ip->type == T_DEVICE && (ip->major < 0 || ip->major >= NDEV)){ +    iunlockput(ip); +    end_op(); +    return -1; +  } +    if((f = filealloc()) == 0 || (fd = fdalloc(f)) < 0){      if(f)        fileclose(f); @@ -321,25 +327,32 @@ sys_open(void)      end_op();      return -1;    } -  iunlock(ip); -  end_op(); -  f->type = FD_INODE; +  if(ip->type == T_DEVICE){ +    f->type = FD_DEVICE; +    f->major = ip->major; +  } else { +    f->type = FD_INODE; +    f->off = 0; +  }    f->ip = ip; -  f->off = 0;    f->readable = !(omode & O_WRONLY);    f->writable = (omode & O_WRONLY) || (omode & O_RDWR); + +  iunlock(ip); +  end_op(); +    return fd;  } -int +uint64  sys_mkdir(void)  { -  char *path; +  char path[MAXPATH];    struct inode *ip;    begin_op(); -  if(argstr(0, &path) < 0 || (ip = create(path, T_DIR, 0, 0)) == 0){ +  if(argstr(0, path, MAXPATH) < 0 || (ip = create(path, T_DIR, 0, 0)) == 0){      end_op();      return -1;    } @@ -348,18 +361,18 @@ sys_mkdir(void)    return 0;  } -int +uint64  sys_mknod(void)  {    struct inode *ip; -  char *path; +  char path[MAXPATH];    int major, minor;    begin_op(); -  if((argstr(0, &path)) < 0 || +  if((argstr(0, path, MAXPATH)) < 0 ||       argint(1, &major) < 0 ||       argint(2, &minor) < 0 || -     (ip = create(path, T_DEV, major, minor)) == 0){ +     (ip = create(path, T_DEVICE, major, minor)) == 0){      end_op();      return -1;    } @@ -368,15 +381,15 @@ sys_mknod(void)    return 0;  } -int +uint64  sys_chdir(void)  { -  char *path; +  char path[MAXPATH];    struct inode *ip; -  struct proc *curproc = myproc(); +  struct proc *p = myproc();    begin_op(); -  if(argstr(0, &path) < 0 || (ip = namei(path)) == 0){ +  if(argstr(0, path, MAXPATH) < 0 || (ip = namei(path)) == 0){      end_op();      return -1;    } @@ -387,58 +400,77 @@ sys_chdir(void)      return -1;    }    iunlock(ip); -  iput(curproc->cwd); +  iput(p->cwd);    end_op(); -  curproc->cwd = ip; +  p->cwd = ip;    return 0;  } -int +uint64  sys_exec(void)  { -  char *path, *argv[MAXARG]; +  char path[MAXPATH], *argv[MAXARG];    int i; -  uint uargv, uarg; +  uint64 uargv, uarg; -  if(argstr(0, &path) < 0 || argint(1, (int*)&uargv) < 0){ +  if(argstr(0, path, MAXPATH) < 0 || argaddr(1, &uargv) < 0){      return -1;    }    memset(argv, 0, sizeof(argv));    for(i=0;; i++){ -    if(i >= NELEM(argv)) +    if(i >= NELEM(argv)){        return -1; -    if(fetchint(uargv+4*i, (int*)&uarg) < 0) +    } +    if(fetchaddr(uargv+sizeof(uint64)*i, (uint64*)&uarg) < 0){        return -1; +    }      if(uarg == 0){        argv[i] = 0;        break;      } -    if(fetchstr(uarg, &argv[i]) < 0) +    argv[i] = kalloc(); +    if(argv[i] == 0) +      panic("sys_exec kalloc"); +    if(fetchstr(uarg, argv[i], PGSIZE) < 0){        return -1; +    }    } -  return exec(path, argv); + +  int ret = exec(path, argv); + +  for(i = 0; i < NELEM(argv) && argv[i] != 0; i++) +    kfree(argv[i]); + +  return ret;  } -int +uint64  sys_pipe(void)  { -  int *fd; +  uint64 fdarray; // user pointer to array of two integers    struct file *rf, *wf;    int fd0, fd1; +  struct proc *p = myproc(); -  if(argptr(0, (void*)&fd, 2*sizeof(fd[0])) < 0) +  if(argaddr(0, &fdarray) < 0)      return -1;    if(pipealloc(&rf, &wf) < 0)      return -1;    fd0 = -1;    if((fd0 = fdalloc(rf)) < 0 || (fd1 = fdalloc(wf)) < 0){      if(fd0 >= 0) -      myproc()->ofile[fd0] = 0; +      p->ofile[fd0] = 0; +    fileclose(rf); +    fileclose(wf); +    return -1; +  } +  if(copyout(p->pagetable, fdarray, (char*)&fd0, sizeof(fd0)) < 0 || +     copyout(p->pagetable, fdarray+sizeof(fd0), (char *)&fd1, sizeof(fd1)) < 0){ +    p->ofile[fd0] = 0; +    p->ofile[fd1] = 0;      fileclose(rf);      fileclose(wf);      return -1;    } -  fd[0] = fd0; -  fd[1] = fd1;    return 0;  } diff --git a/sysproc.c b/kernel/sysproc.c index 0686d29..face81a 100644 --- a/sysproc.c +++ b/kernel/sysproc.c @@ -1,48 +1,38 @@  #include "types.h" -#include "x86.h" +#include "riscv.h"  #include "defs.h"  #include "date.h"  #include "param.h"  #include "memlayout.h" -#include "mmu.h" +#include "spinlock.h"  #include "proc.h" -int -sys_fork(void) -{ -  return fork(); -} - -int +uint64  sys_exit(void)  {    exit();    return 0;  // not reached  } -int -sys_wait(void) +uint64 +sys_getpid(void)  { -  return wait(); +  return myproc()->pid;  } -int -sys_kill(void) +uint64 +sys_fork(void)  { -  int pid; - -  if(argint(0, &pid) < 0) -    return -1; -  return kill(pid); +  return fork();  } -int -sys_getpid(void) +uint64 +sys_wait(void)  { -  return myproc()->pid; +  return wait();  } -int +uint64  sys_sbrk(void)  {    int addr; @@ -56,7 +46,7 @@ sys_sbrk(void)    return addr;  } -int +uint64  sys_sleep(void)  {    int n; @@ -77,9 +67,19 @@ sys_sleep(void)    return 0;  } +uint64 +sys_kill(void) +{ +  int pid; + +  if(argint(0, &pid) < 0) +    return -1; +  return kill(pid); +} +  // return how many clock tick interrupts have occurred  // since start. -int +uint64  sys_uptime(void)  {    uint xticks; diff --git a/kernel/trampoline.S b/kernel/trampoline.S new file mode 100644 index 0000000..24499d9 --- /dev/null +++ b/kernel/trampoline.S @@ -0,0 +1,141 @@ +	# +        # code to switch between user and kernel space. +        # +        # this code is mapped at the same virtual address +        # (TRAMPOLINE) in user and kernel space so that +        # it continues to work when it switches page tables. +	# +	# kernel.ld causes this to be aligned +        # to a page boundary. +        # +	.section trampsec +.globl trampoline +trampoline: +.align 4 +.globl uservec +uservec:     +	# +        # trap.c sets stvec to point here, so +        # traps from user space start here, +        # in supervisor mode, but with a +        # user page table. +        # +        # sscratch points to where the process's p->tf is +        # mapped into user space, at TRAPFRAME. +        # +         +	# swap a0 and sscratch +        # so that a0 is TRAPFRAME +        csrrw a0, sscratch, a0 + +        # save the user registers in TRAPFRAME +        sd ra, 40(a0) +        sd sp, 48(a0) +        sd gp, 56(a0) +        sd tp, 64(a0) +        sd t0, 72(a0) +        sd t1, 80(a0) +        sd t2, 88(a0) +        sd s0, 96(a0) +        sd s1, 104(a0) +        sd a1, 120(a0) +        sd a2, 128(a0) +        sd a3, 136(a0) +        sd a4, 144(a0) +        sd a5, 152(a0) +        sd a6, 160(a0) +        sd a7, 168(a0) +        sd s2, 176(a0) +        sd s3, 184(a0) +        sd s4, 192(a0) +        sd s5, 200(a0) +        sd s6, 208(a0) +        sd s7, 216(a0) +        sd s8, 224(a0) +        sd s9, 232(a0) +        sd s10, 240(a0) +        sd s11, 248(a0) +        sd t3, 256(a0) +        sd t4, 264(a0) +        sd t5, 272(a0) +        sd t6, 280(a0) + +	# save the user a0 in p->tf->a0 +        csrr t0, sscratch +        sd t0, 112(a0) + +        # restore kernel stack pointer from p->tf->kernel_sp +        ld sp, 8(a0) + +        # make tp hold the current hartid, from p->tf->kernel_hartid +        ld tp, 32(a0) + +        # remember the address of usertrap(), p->tf->kernel_trap +        ld t0, 16(a0) + +        # restore kernel page table from p->tf->kernel_satp +        ld t1, 0(a0) +	sfence.vma zero, zero +        csrw satp, t1 + +        # a0 is no longer valid, since the kernel page +        # table does not specially map p->td. + +        # jump to usertrap(), which does not return +        jr t0 + +.globl userret +userret: +        # userret(TRAPFRAME, pagetable) +        # switch from kernel to user. +        # usertrapret() calls here. +	# a0: TRAPFRAME, in user page table +        # a1: user page table, for satp + +	# switch to the user page table. +	sfence.vma zero, zero +        csrw satp, a1 + +        # put the saved user a0 in sscratch, so we +        # can swap it with our a0 (TRAPFRAME) in the last step. +        ld t0, 112(a0) +        csrw sscratch, t0 + +        # restore all but a0 from TRAPFRAME +        ld ra, 40(a0) +        ld sp, 48(a0) +        ld gp, 56(a0) +        ld tp, 64(a0) +        ld t0, 72(a0) +        ld t1, 80(a0) +        ld t2, 88(a0) +        ld s0, 96(a0) +        ld s1, 104(a0) +        ld a1, 120(a0) +        ld a2, 128(a0) +        ld a3, 136(a0) +        ld a4, 144(a0) +        ld a5, 152(a0) +        ld a6, 160(a0) +        ld a7, 168(a0) +        ld s2, 176(a0) +        ld s3, 184(a0) +        ld s4, 192(a0) +        ld s5, 200(a0) +        ld s6, 208(a0) +        ld s7, 216(a0) +        ld s8, 224(a0) +        ld s9, 232(a0) +        ld s10, 240(a0) +        ld s11, 248(a0) +        ld t3, 256(a0) +        ld t4, 264(a0) +        ld t5, 272(a0) +        ld t6, 280(a0) + +	# restore user a0, and save TRAPFRAME in sscratch +        csrrw a0, sscratch, a0 +         +        # return to user mode and user pc. +        # usertrapret() set up sstatus and sepc. +        sret diff --git a/kernel/trap.c b/kernel/trap.c new file mode 100644 index 0000000..ec57bed --- /dev/null +++ b/kernel/trap.c @@ -0,0 +1,213 @@ +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "spinlock.h" +#include "proc.h" +#include "defs.h" + +struct spinlock tickslock; +uint ticks; + +extern char trampoline[], uservec[], userret[]; + +// in kernelvec.S, calls kerneltrap(). +void kernelvec(); + +extern int devintr(); + +void +trapinit(void) +{ +  initlock(&tickslock, "time"); +} + +// set up to take exceptions and traps while in the kernel. +void +trapinithart(void) +{ +  w_stvec((uint64)kernelvec); +} + +// +// handle an interrupt, exception, or system call from user space. +// called from trampoline.S +// +void +usertrap(void) +{ +  int which_dev = 0; + +  if((r_sstatus() & SSTATUS_SPP) != 0) +    panic("usertrap: not from user mode"); + +  // send interrupts and exceptions to kerneltrap(), +  // since we're now in the kernel. +  w_stvec((uint64)kernelvec); + +  struct proc *p = myproc(); +   +  // save user program counter. +  p->tf->epc = r_sepc(); +   +  if(r_scause() == 8){ +    // system call + +    if(p->killed) +      exit(); + +    // sepc points to the ecall instruction, +    // but we want to return to the next instruction. +    p->tf->epc += 4; + +    // an interrupt will change sstatus &c registers, +    // so don't enable until done with those registers. +    intr_on(); + +    syscall(); +  } else if((which_dev = devintr()) != 0){ +    // ok +  } else { +    printf("usertrap(): unexpected scause %p pid=%d\n", r_scause(), p->pid); +    printf("            sepc=%p stval=%p\n", r_sepc(), r_stval()); +    p->killed = 1; +  } + +  if(p->killed) +    exit(); + +  // give up the CPU if this is a timer interrupt. +  if(which_dev == 2) +    yield(); + +  usertrapret(); +} + +// +// return to user space +// +void +usertrapret(void) +{ +  struct proc *p = myproc(); + +  // turn off interrupts, since we're switching +  // now from kerneltrap() to usertrap(). +  intr_off(); + +  // send interrupts and exceptions to trampoline.S +  w_stvec(TRAMPOLINE + (uservec - trampoline)); + +  // set up values that uservec will need when +  // the process next re-enters the kernel. +  p->tf->kernel_satp = r_satp();         // kernel page table +  p->tf->kernel_sp = p->kstack + PGSIZE; // process's kernel stack +  p->tf->kernel_trap = (uint64)usertrap; +  p->tf->kernel_hartid = r_tp();         // hartid for cpuid() + +  // set up the registers that trampoline.S's sret will use +  // to get to user space. +   +  // set S Previous Privilege mode to User. +  unsigned long x = r_sstatus(); +  x &= ~SSTATUS_SPP; // clear SPP to 0 for user mode +  x |= SSTATUS_SPIE; // enable interrupts in user mode +  w_sstatus(x); + +  // set S Exception Program Counter to the saved user pc. +  w_sepc(p->tf->epc); + +  // tell trampoline.S the user page table to switch to. +  uint64 satp = MAKE_SATP(p->pagetable); + +  // jump to trampoline.S at the top of memory, which  +  // switches to the user page table, restores user registers, +  // and switches to user mode with sret. +  uint64 fn = TRAMPOLINE + (userret - trampoline); +  ((void (*)(uint64,uint64))fn)(TRAPFRAME, satp); +} + +// interrupts and exceptions from kernel code go here via kernelvec, +// on whatever the current kernel stack is. +// must be 4-byte aligned to fit in stvec. +void  +kerneltrap() +{ +  int which_dev = 0; +  uint64 sepc = r_sepc(); +  uint64 sstatus = r_sstatus(); +  uint64 scause = r_scause(); +   +  if((sstatus & SSTATUS_SPP) == 0) +    panic("kerneltrap: not from supervisor mode"); +  if(intr_get() != 0) +    panic("kerneltrap: interrupts enabled"); + +  if((which_dev = devintr()) == 0){ +    printf("scause %p\n", scause); +    printf("sepc=%p stval=%p\n", r_sepc(), r_stval()); +    panic("kerneltrap"); +  } + +  // give up the CPU if this is a timer interrupt. +  if(which_dev == 2 && myproc() != 0 && myproc()->state == RUNNING) +    yield(); + +  // the yield() may have caused some traps to occur, +  // so restore trap registers for use by kernelvec.S's sepc instruction. +  w_sepc(sepc); +  w_sstatus(sstatus); +} + +void +clockintr() +{ +  acquire(&tickslock); +  ticks++; +  wakeup(&ticks); +  release(&tickslock); +} + +// check if it's an external interrupt or software interrupt, +// and handle it. +// returns 2 if timer interrupt, +// 1 if other device, +// 0 if not recognized. +int +devintr() +{ +  uint64 scause = r_scause(); + +  if((scause & 0x8000000000000000L) && +     (scause & 0xff) == 9){ +    // this is a supervisor external interrupt, via PLIC. + +    // irq indicates which device interrupted. +    int irq = plic_claim(); + +    if(irq == UART0_IRQ){ +      uartintr(); +    } else if(irq == VIRTIO0_IRQ){ +      virtio_disk_intr(); +    } + +    plic_complete(irq); +    return 1; +  } else if(scause == 0x8000000000000001L){ +    // software interrupt from a machine-mode timer interrupt, +    // forwarded by timervec in kernelvec.S. + +    if(cpuid() == 0){ +      clockintr(); +    } +     +    // acknowledge the software interrupt by clearing +    // the SSIP bit in sip. +    w_sip(r_sip() & ~2); + +    return 2; +  } else { +    return 0; +  } +} + diff --git a/kernel/types.h b/kernel/types.h new file mode 100644 index 0000000..ee73164 --- /dev/null +++ b/kernel/types.h @@ -0,0 +1,10 @@ +typedef unsigned int   uint; +typedef unsigned short ushort; +typedef unsigned char  uchar; + +typedef unsigned char uint8; +typedef unsigned short uint16; +typedef unsigned int  uint32; +typedef unsigned long uint64; + +typedef uint64 pde_t; diff --git a/kernel/uart.c b/kernel/uart.c new file mode 100644 index 0000000..3a5cdc4 --- /dev/null +++ b/kernel/uart.c @@ -0,0 +1,92 @@ +// +// low-level driver routines for 16550a UART. +// + +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "spinlock.h" +#include "proc.h" +#include "defs.h" + +// the UART control registers are memory-mapped +// at address UART0. this macro returns the +// address of one of the registers. +#define Reg(reg) ((volatile unsigned char *)(UART0 + reg)) + +// the UART control registers. +// some have different meanings for +// read vs write. +// http://byterunner.com/16550.html +#define RHR 0 // receive holding register (for input bytes) +#define THR 0 // transmit holding register (for output bytes) +#define IER 1 // interrupt enable register +#define FCR 2 // FIFO control register +#define ISR 2 // interrupt status register +#define LCR 3 // line control register +#define LSR 5 // line status register + +#define ReadReg(reg) (*(Reg(reg))) +#define WriteReg(reg, v) (*(Reg(reg)) = (v)) + +void +uartinit(void) +{ +  // disable interrupts. +  WriteReg(IER, 0x00); + +  // special mode to set baud rate. +  WriteReg(LCR, 0x80); + +  // LSB for baud rate of 38.4K. +  WriteReg(0, 0x03); + +  // MSB for baud rate of 38.4K. +  WriteReg(1, 0x00); + +  // leave set-baud mode, +  // and set word length to 8 bits, no parity. +  WriteReg(LCR, 0x03); + +  // reset and enable FIFOs. +  WriteReg(FCR, 0x07); + +  // enable receive interrupts. +  WriteReg(IER, 0x01); +} + +// write one output character to the UART. +void +uartputc(int c) +{ +  // wait for Transmit Holding Empty to be set in LSR. +  while((ReadReg(LSR) & (1 << 5)) == 0) +    ; +  WriteReg(THR, c); +} + +// read one input character from the UART. +// return -1 if none is waiting. +int +uartgetc(void) +{ +  if(ReadReg(LSR) & 0x01){ +    // input data is ready. +    return ReadReg(RHR); +  } else { +    return -1; +  } +} + +// trap.c calls here when the uart interrupts. +void +uartintr(void) +{ +  while(1){ +    int c = uartgetc(); +    if(c == -1) +      break; +    consoleintr(c); +  } +} diff --git a/kernel/virtio.h b/kernel/virtio.h new file mode 100644 index 0000000..03b53a9 --- /dev/null +++ b/kernel/virtio.h @@ -0,0 +1,72 @@ +// +// virtio device definitions. +// for both the mmio interface, and virtio descriptors. +// only tested with qemu. +// this is the "legacy" virtio interface. +// +// the virtio spec: +// https://docs.oasis-open.org/virtio/virtio/v1.1/virtio-v1.1.pdf +// + +// virtio mmio control registers, mapped starting at 0x10001000. +// from qemu virtio_mmio.h +#define VIRTIO_MMIO_MAGIC_VALUE		0x000 // 0x74726976 +#define VIRTIO_MMIO_VERSION		0x004 // version; 1 is legacy +#define VIRTIO_MMIO_DEVICE_ID		0x008 // device type; 1 is net, 2 is disk +#define VIRTIO_MMIO_VENDOR_ID		0x00c // 0x554d4551 +#define VIRTIO_MMIO_DEVICE_FEATURES	0x010 +#define VIRTIO_MMIO_DRIVER_FEATURES	0x020 +#define VIRTIO_MMIO_GUEST_PAGE_SIZE	0x028 // page size for PFN, write-only +#define VIRTIO_MMIO_QUEUE_SEL		0x030 // select queue, write-only +#define VIRTIO_MMIO_QUEUE_NUM_MAX	0x034 // max size of current queue, read-only +#define VIRTIO_MMIO_QUEUE_NUM		0x038 // size of current queue, write-only +#define VIRTIO_MMIO_QUEUE_ALIGN		0x03c // used ring alignment, write-only +#define VIRTIO_MMIO_QUEUE_PFN		0x040 // physical page number for queue, read/write +#define VIRTIO_MMIO_QUEUE_READY		0x044 // ready bit +#define VIRTIO_MMIO_QUEUE_NOTIFY	0x050 // write-only +#define VIRTIO_MMIO_INTERRUPT_STATUS	0x060 // read-only +#define VIRTIO_MMIO_INTERRUPT_ACK	0x064 // write-only +#define VIRTIO_MMIO_STATUS		0x070 // read/write + +// status register bits, from qemu virtio_config.h +#define VIRTIO_CONFIG_S_ACKNOWLEDGE	1 +#define VIRTIO_CONFIG_S_DRIVER		2 +#define VIRTIO_CONFIG_S_DRIVER_OK	4 +#define VIRTIO_CONFIG_S_FEATURES_OK	8 + +// device feature bits +#define VIRTIO_BLK_F_RO              5	/* Disk is read-only */ +#define VIRTIO_BLK_F_SCSI            7	/* Supports scsi command passthru */ +#define VIRTIO_BLK_F_CONFIG_WCE     11	/* Writeback mode available in config */ +#define VIRTIO_BLK_F_MQ             12	/* support more than one vq */ +#define VIRTIO_F_ANY_LAYOUT         27 +#define VIRTIO_RING_F_INDIRECT_DESC 28 +#define VIRTIO_RING_F_EVENT_IDX     29 + +// this many virtio descriptors. +// must be a power of two. +#define NUM 8 + +struct VRingDesc { +  uint64 addr; +  uint32 len; +  uint16 flags; +  uint16 next; +}; +#define VRING_DESC_F_NEXT  1 // chained with another descriptor +#define VRING_DESC_F_WRITE 2 // device writes (vs read) + +struct VRingUsedElem { +  uint32 id;   // index of start of completed descriptor chain +  uint32 len; +}; + +// for disk ops +#define VIRTIO_BLK_T_IN  0 // read the disk +#define VIRTIO_BLK_T_OUT 1 // write the disk + +struct UsedArea { +  uint16 flags; +  uint16 id; +  struct VRingUsedElem elems[NUM]; +}; diff --git a/kernel/virtio_disk.c b/kernel/virtio_disk.c new file mode 100644 index 0000000..3cff024 --- /dev/null +++ b/kernel/virtio_disk.c @@ -0,0 +1,269 @@ +// +// driver for qemu's virtio disk device. +// uses qemu's mmio interface to virtio. +// qemu presents a "legacy" virtio interface. +// +// qemu ... -drive file=fs.img,if=none,format=raw,id=x0 -device virtio-blk-device,drive=x0,bus=virtio-mmio-bus.0 +// + +#include "types.h" +#include "riscv.h" +#include "defs.h" +#include "param.h" +#include "memlayout.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "fs.h" +#include "buf.h" +#include "virtio.h" + +// the address of virtio mmio register r. +#define R(r) ((volatile uint32 *)(VIRTIO0 + (r))) + +static struct disk { + // memory for virtio descriptors &c for queue 0. + // this is a global instead of allocated because it must + // be multiple contiguous pages, which kalloc() + // doesn't support, and page aligned. +  char pages[2*PGSIZE]; +  struct VRingDesc *desc; +  uint16 *avail; +  struct UsedArea *used; + +  // our own book-keeping. +  char free[NUM];  // is a descriptor free? +  uint16 used_idx; // we've looked this far in used[2..NUM]. + +  // track info about in-flight operations, +  // for use when completion interrupt arrives. +  // indexed by first descriptor index of chain. +  struct { +    struct buf *b; +    char status; +  } info[NUM]; +   +  struct spinlock vdisk_lock; +   +} __attribute__ ((aligned (PGSIZE))) disk; + +void +virtio_disk_init(void) +{ +  uint32 status = 0; + +  initlock(&disk.vdisk_lock, "virtio_disk"); + +  if(*R(VIRTIO_MMIO_MAGIC_VALUE) != 0x74726976 || +     *R(VIRTIO_MMIO_VERSION) != 1 || +     *R(VIRTIO_MMIO_DEVICE_ID) != 2 || +     *R(VIRTIO_MMIO_VENDOR_ID) != 0x554d4551){ +    panic("could not find virtio disk"); +  } +   +  status |= VIRTIO_CONFIG_S_ACKNOWLEDGE; +  *R(VIRTIO_MMIO_STATUS) = status; + +  status |= VIRTIO_CONFIG_S_DRIVER; +  *R(VIRTIO_MMIO_STATUS) = status; + +  // negotiate features +  uint64 features = *R(VIRTIO_MMIO_DEVICE_FEATURES); +  features &= ~(1 << VIRTIO_BLK_F_RO); +  features &= ~(1 << VIRTIO_BLK_F_SCSI); +  features &= ~(1 << VIRTIO_BLK_F_CONFIG_WCE); +  features &= ~(1 << VIRTIO_BLK_F_MQ); +  features &= ~(1 << VIRTIO_F_ANY_LAYOUT); +  features &= ~(1 << VIRTIO_RING_F_EVENT_IDX); +  features &= ~(1 << VIRTIO_RING_F_INDIRECT_DESC); +  *R(VIRTIO_MMIO_DRIVER_FEATURES) = features; + +  // tell device that feature negotiation is complete. +  status |= VIRTIO_CONFIG_S_FEATURES_OK; +  *R(VIRTIO_MMIO_STATUS) = status; + +  // tell device we're completely ready. +  status |= VIRTIO_CONFIG_S_DRIVER_OK; +  *R(VIRTIO_MMIO_STATUS) = status; + +  *R(VIRTIO_MMIO_GUEST_PAGE_SIZE) = PGSIZE; + +  // initialize queue 0. +  *R(VIRTIO_MMIO_QUEUE_SEL) = 0; +  uint32 max = *R(VIRTIO_MMIO_QUEUE_NUM_MAX); +  if(max == 0) +    panic("virtio disk has no queue 0"); +  if(max < NUM) +    panic("virtio disk max queue too short"); +  *R(VIRTIO_MMIO_QUEUE_NUM) = NUM; +  memset(disk.pages, 0, sizeof(disk.pages)); +  *R(VIRTIO_MMIO_QUEUE_PFN) = ((uint64)disk.pages) >> PGSHIFT; + +  // desc = pages -- num * VRingDesc +  // avail = pages + 0x40 -- 2 * uint16, then num * uint16 +  // used = pages + 4096 -- 2 * uint16, then num * vRingUsedElem + +  disk.desc = (struct VRingDesc *) disk.pages; +  disk.avail = (uint16*)(((char*)disk.desc) + NUM*sizeof(struct VRingDesc)); +  disk.used = (struct UsedArea *) (disk.pages + PGSIZE); + +  for(int i = 0; i < NUM; i++) +    disk.free[i] = 1; + +  // plic.c and trap.c arrange for interrupts from VIRTIO0_IRQ. +} + +// find a free descriptor, mark it non-free, return its index. +static int +alloc_desc() +{ +  for(int i = 0; i < NUM; i++){ +    if(disk.free[i]){ +      disk.free[i] = 0; +      return i; +    } +  } +  return -1; +} + +// mark a descriptor as free. +static void +free_desc(int i) +{ +  if(i >= NUM) +    panic("virtio_disk_intr 1"); +  if(disk.free[i]) +    panic("virtio_disk_intr 2"); +  disk.desc[i].addr = 0; +  disk.free[i] = 1; +  wakeup(&disk.free[0]); +} + +// free a chain of descriptors. +static void +free_chain(int i) +{ +  while(1){ +    free_desc(i); +    if(disk.desc[i].flags & VRING_DESC_F_NEXT) +      i = disk.desc[i].next; +    else +      break; +  } +} + +static int +alloc3_desc(int *idx) +{ +  for(int i = 0; i < 3; i++){ +    idx[i] = alloc_desc(); +    if(idx[i] < 0){ +      for(int j = 0; j < i; j++) +        free_desc(idx[j]); +      return -1; +    } +  } +  return 0; +} + +void +virtio_disk_rw(struct buf *b, int write) +{ +  uint64 sector = b->blockno * (BSIZE / 512); + +  acquire(&disk.vdisk_lock); + +  // the spec says that legacy block operations use three +  // descriptors: one for type/reserved/sector, one for +  // the data, one for a 1-byte status result. + +  // allocate the three descriptors. +  int idx[3]; +  while(1){ +    if(alloc3_desc(idx) == 0) { +      break; +    } +    sleep(&disk.free[0], &disk.vdisk_lock); +  } +   +  // format the three descriptors. +  // qemu's virtio-blk.c reads them. + +  struct virtio_blk_outhdr { +    uint32 type; +    uint32 reserved; +    uint64 sector; +  } buf0; + +  if(write) +    buf0.type = VIRTIO_BLK_T_OUT; // write the disk +  else +    buf0.type = VIRTIO_BLK_T_IN; // read the disk +  buf0.reserved = 0; +  buf0.sector = sector; + +  // buf0 is on a kernel stack, which is not direct mapped, +  // thus the call to kvmpa(). +  disk.desc[idx[0]].addr = (uint64) kvmpa((uint64) &buf0); +  disk.desc[idx[0]].len = sizeof(buf0); +  disk.desc[idx[0]].flags = VRING_DESC_F_NEXT; +  disk.desc[idx[0]].next = idx[1]; + +  disk.desc[idx[1]].addr = (uint64) b->data; +  disk.desc[idx[1]].len = BSIZE; +  if(write) +    disk.desc[idx[1]].flags = 0; // device reads b->data +  else +    disk.desc[idx[1]].flags = VRING_DESC_F_WRITE; // device writes b->data +  disk.desc[idx[1]].flags |= VRING_DESC_F_NEXT; +  disk.desc[idx[1]].next = idx[2]; + +  disk.info[idx[0]].status = 0; +  disk.desc[idx[2]].addr = (uint64) &disk.info[idx[0]].status; +  disk.desc[idx[2]].len = 1; +  disk.desc[idx[2]].flags = VRING_DESC_F_WRITE; // device writes the status +  disk.desc[idx[2]].next = 0; + +  // record struct buf for virtio_disk_intr(). +  b->disk = 1; +  disk.info[idx[0]].b = b; + +  // avail[0] is flags +  // avail[1] tells the device how far to look in avail[2...]. +  // avail[2...] are desc[] indices the device should process. +  // we only tell device the first index in our chain of descriptors. +  disk.avail[2 + (disk.avail[1] % NUM)] = idx[0]; +  __sync_synchronize(); +  disk.avail[1] = disk.avail[1] + 1; + +  *R(VIRTIO_MMIO_QUEUE_NOTIFY) = 0; // value is queue number + +  // Wait for virtio_disk_intr() to say request has finished. +  while(b->disk == 1) { +    sleep(b, &disk.vdisk_lock); +  } + +  disk.info[idx[0]].b = 0; +  free_chain(idx[0]); + +  release(&disk.vdisk_lock); +} + +void +virtio_disk_intr() +{ +  acquire(&disk.vdisk_lock); + +  while((disk.used_idx % NUM) != (disk.used->id % NUM)){ +    int id = disk.used->elems[disk.used_idx].id; + +    if(disk.info[id].status != 0) +      panic("virtio_disk_intr status"); +     +    disk.info[id].b->disk = 0;   // disk is done with buf +    wakeup(disk.info[id].b); + +    disk.used_idx = (disk.used_idx + 1) % NUM; +  } + +  release(&disk.vdisk_lock); +} diff --git a/kernel/vm.c b/kernel/vm.c new file mode 100644 index 0000000..3631c9c --- /dev/null +++ b/kernel/vm.c @@ -0,0 +1,441 @@ +#include "param.h" +#include "types.h" +#include "memlayout.h" +#include "elf.h" +#include "riscv.h" +#include "defs.h" +#include "fs.h" + +/* + * the kernel's page table. + */ +pagetable_t kernel_pagetable; + +extern char etext[];  // kernel.ld sets this to end of kernel code. + +extern char trampoline[]; // trampoline.S + +/* + * create a direct-map page table for the kernel and + * turn on paging. called early, in supervisor mode. + * the page allocator is already initialized. + */ +void +kvminit() +{ +  kernel_pagetable = (pagetable_t) kalloc(); +  memset(kernel_pagetable, 0, PGSIZE); + +  // uart registers +  kvmmap(UART0, UART0, PGSIZE, PTE_R | PTE_W); + +  // virtio mmio disk interface +  kvmmap(VIRTIO0, VIRTIO0, PGSIZE, PTE_R | PTE_W); + +  // CLINT +  kvmmap(CLINT, CLINT, 0x10000, PTE_R | PTE_W); + +  // PLIC +  kvmmap(PLIC, PLIC, 0x400000, PTE_R | PTE_W); + +  // map kernel text executable and read-only. +  kvmmap(KERNBASE, KERNBASE, (uint64)etext-KERNBASE, PTE_R | PTE_X); + +  // map kernel data and the physical RAM we'll make use of. +  kvmmap((uint64)etext, (uint64)etext, PHYSTOP-(uint64)etext, PTE_R | PTE_W); + +  // map the trampoline for trap entry/exit to +  // the highest virtual address in the kernel. +  kvmmap(TRAMPOLINE, (uint64)trampoline, PGSIZE, PTE_R | PTE_X); +} + +// Switch h/w page table register to the kernel's page table, +// and enable paging. +void +kvminithart() +{ +  sfence_vma(); +  w_satp(MAKE_SATP(kernel_pagetable)); +} + +// Return the address of the PTE in page table pagetable +// that corresponds to virtual address va.  If alloc!=0, +// create any required page-table pages. +// +// The risc-v Sv39 scheme has three levels of page-table +// pages. A page-table page contains 512 64-bit PTEs. +// A 64-bit virtual address is split into five fields: +//   39..63 -- must be zero. +//   30..38 -- 9 bits of level-2 index. +//   21..39 -- 9 bits of level-1 index. +//   12..20 -- 9 bits of level-0 index. +//    0..12 -- 12 bits of byte offset within the page. +static pte_t * +walk(pagetable_t pagetable, uint64 va, int alloc) +{ +  if(va >= MAXVA) +    panic("walk"); + +  for(int level = 2; level > 0; level--) { +    pte_t *pte = &pagetable[PX(level, va)]; +    if(*pte & PTE_V) { +      pagetable = (pagetable_t)PTE2PA(*pte); +    } else { +      if(!alloc || (pagetable = (pde_t*)kalloc()) == 0) +        return 0; +      memset(pagetable, 0, PGSIZE); +      *pte = PA2PTE(pagetable) | PTE_V; +    } +  } +  return &pagetable[PX(0, va)]; +} + +// Look up a virtual address, return the physical address, +// or 0 if not mapped. +// Can only be used to look up user pages. +uint64 +walkaddr(pagetable_t pagetable, uint64 va) +{ +  pte_t *pte; +  uint64 pa; + +  pte = walk(pagetable, va, 0); +  if(pte == 0) +    return 0; +  if((*pte & PTE_V) == 0) +    return 0; +  if((*pte & PTE_U) == 0) +    return 0; +  pa = PTE2PA(*pte); +  return pa; +} + +// add a mapping to the kernel page table. +// only used when booting. +// does not flush TLB or enable paging. +void +kvmmap(uint64 va, uint64 pa, uint64 sz, int perm) +{ +  if(mappages(kernel_pagetable, va, sz, pa, perm) != 0) +    panic("kvmmap"); +} + +// translate a kernel virtual address to +// a physical address. only needed for +// addresses on the stack. +// assumes va is page aligned. +uint64 +kvmpa(uint64 va) +{ +  uint64 off = va % PGSIZE; +  pte_t *pte; +  uint64 pa; +   +  pte = walk(kernel_pagetable, va, 0); +  if(pte == 0) +    panic("kvmpa"); +  if((*pte & PTE_V) == 0) +    panic("kvmpa"); +  pa = PTE2PA(*pte); +  return pa+off; +} + +// Create PTEs for virtual addresses starting at va that refer to +// physical addresses starting at pa. va and size might not +// be page-aligned. Returns 0 on success, -1 if walk() couldn't +// allocate a needed page-table page. +int +mappages(pagetable_t pagetable, uint64 va, uint64 size, uint64 pa, int perm) +{ +  uint64 a, last; +  pte_t *pte; + +  a = PGROUNDDOWN(va); +  last = PGROUNDDOWN(va + size - 1); +  for(;;){ +    if((pte = walk(pagetable, a, 1)) == 0) +      return -1; +    if(*pte & PTE_V) +      panic("remap"); +    *pte = PA2PTE(pa) | perm | PTE_V; +    if(a == last) +      break; +    a += PGSIZE; +    pa += PGSIZE; +  } +  return 0; +} + +// Remove mappings from a page table. The mappings in +// the given range must exist. Optionally free the +// physical memory. +void +uvmunmap(pagetable_t pagetable, uint64 va, uint64 size, int do_free) +{ +  uint64 a, last; +  pte_t *pte; +  uint64 pa; + +  a = PGROUNDDOWN(va); +  last = PGROUNDDOWN(va + size - 1); +  for(;;){ +    if((pte = walk(pagetable, a, 0)) == 0) +      panic("uvmunmap: walk"); +    if((*pte & PTE_V) == 0){ +      printf("va=%p pte=%p\n", a, *pte); +      panic("uvmunmap: not mapped"); +    } +    if(PTE_FLAGS(*pte) == PTE_V) +      panic("uvmunmap: not a leaf"); +    if(do_free){ +      pa = PTE2PA(*pte); +      kfree((void*)pa); +    } +    *pte = 0; +    if(a == last) +      break; +    a += PGSIZE; +    pa += PGSIZE; +  } +} + +// create an empty user page table. +pagetable_t +uvmcreate() +{ +  pagetable_t pagetable; +  pagetable = (pagetable_t) kalloc(); +  if(pagetable == 0) +    panic("uvmcreate: out of memory"); +  memset(pagetable, 0, PGSIZE); +  return pagetable; +} + +// Load the user initcode into address 0 of pagetable, +// for the very first process. +// sz must be less than a page. +void +uvminit(pagetable_t pagetable, uchar *src, uint sz) +{ +  char *mem; + +  if(sz >= PGSIZE) +    panic("inituvm: more than a page"); +  mem = kalloc(); +  memset(mem, 0, PGSIZE); +  mappages(pagetable, 0, PGSIZE, (uint64)mem, PTE_W|PTE_R|PTE_X|PTE_U); +  memmove(mem, src, sz); +} + +// Allocate PTEs and physical memory to grow process from oldsz to +// newsz, which need not be page aligned.  Returns new size or 0 on error. +uint64 +uvmalloc(pagetable_t pagetable, uint64 oldsz, uint64 newsz) +{ +  char *mem; +  uint64 a; + +  if(newsz < oldsz) +    return oldsz; + +  oldsz = PGROUNDUP(oldsz); +  a = oldsz; +  for(; a < newsz; a += PGSIZE){ +    mem = kalloc(); +    if(mem == 0){ +      uvmdealloc(pagetable, a, oldsz); +      return 0; +    } +    memset(mem, 0, PGSIZE); +    if(mappages(pagetable, a, PGSIZE, (uint64)mem, PTE_W|PTE_X|PTE_R|PTE_U) != 0){ +      kfree(mem); +      uvmdealloc(pagetable, a, oldsz); +      return 0; +    } +  } +  return newsz; +} + +// Deallocate user pages to bring the process size from oldsz to +// newsz.  oldsz and newsz need not be page-aligned, nor does newsz +// need to be less than oldsz.  oldsz can be larger than the actual +// process size.  Returns the new process size. +uint64 +uvmdealloc(pagetable_t pagetable, uint64 oldsz, uint64 newsz) +{ +  if(newsz >= oldsz) +    return oldsz; +  uvmunmap(pagetable, newsz, oldsz - newsz, 1); +  return newsz; +} + +// Recursively free page-table pages. +// All leaf mappings must already have been removed. +static void +freewalk(pagetable_t pagetable) +{ +  // there are 2^9 = 512 PTEs in a page table. +  for(int i = 0; i < 512; i++){ +    pte_t pte = pagetable[i]; +    if((pte & PTE_V) && (pte & (PTE_R|PTE_W|PTE_X)) == 0){ +      // this PTE points to a lower-level page table. +      uint64 child = PTE2PA(pte); +      freewalk((pagetable_t)child); +      pagetable[i] = 0; +    } else if(pte & PTE_V){ +      panic("freewalk: leaf"); +    } +  } +  kfree((void*)pagetable); +} + +// Free user memory pages, +// then free page-table pages. +void +uvmfree(pagetable_t pagetable, uint64 sz) +{ +  uvmunmap(pagetable, 0, sz, 1); +  freewalk(pagetable); +} + +// Given a parent process's page table, copy +// its memory into a child's page table. +// Copies both the page table and the +// physical memory. +// returns 0 on success, -1 on failure. +// frees any allocated pages on failure. +int +uvmcopy(pagetable_t old, pagetable_t new, uint64 sz) +{ +  pte_t *pte; +  uint64 pa, i; +  uint flags; +  char *mem; + +  for(i = 0; i < sz; i += PGSIZE){ +    if((pte = walk(old, i, 0)) == 0) +      panic("copyuvm: pte should exist"); +    if((*pte & PTE_V) == 0) +      panic("copyuvm: page not present"); +    pa = PTE2PA(*pte); +    flags = PTE_FLAGS(*pte); +    if((mem = kalloc()) == 0) +      goto err; +    memmove(mem, (char*)pa, PGSIZE); +    if(mappages(new, i, PGSIZE, (uint64)mem, flags) != 0){ +      kfree(mem); +      goto err; +    } +  } +  return 0; + + err: +  uvmunmap(new, 0, i, 1); +  return -1; +} + +// mark a PTE invalid for user access. +// used by exec for the user stack guard page. +void +uvmclear(pagetable_t pagetable, uint64 va) +{ +  pte_t *pte; +   +  pte = walk(pagetable, va, 0); +  if(pte == 0) +    panic("uvmclear"); +  *pte &= ~PTE_U; +} + +// Copy from kernel to user. +// Copy len bytes from src to virtual address dstva in a given page table. +// Return 0 on success, -1 on error. +int +copyout(pagetable_t pagetable, uint64 dstva, char *src, uint64 len) +{ +  uint64 n, va0, pa0; + +  while(len > 0){ +    va0 = (uint)PGROUNDDOWN(dstva); +    pa0 = walkaddr(pagetable, va0); +    if(pa0 == 0) +      return -1; +    n = PGSIZE - (dstva - va0); +    if(n > len) +      n = len; +    memmove((void *)(pa0 + (dstva - va0)), src, n); + +    len -= n; +    src += n; +    dstva = va0 + PGSIZE; +  } +  return 0; +} + +// Copy from user to kernel. +// Copy len bytes to dst from virtual address srcva in a given page table. +// Return 0 on success, -1 on error. +int +copyin(pagetable_t pagetable, char *dst, uint64 srcva, uint64 len) +{ +  uint64 n, va0, pa0; + +  while(len > 0){ +    va0 = (uint)PGROUNDDOWN(srcva); +    pa0 = walkaddr(pagetable, va0); +    if(pa0 == 0) +      return -1; +    n = PGSIZE - (srcva - va0); +    if(n > len) +      n = len; +    memmove(dst, (void *)(pa0 + (srcva - va0)), n); + +    len -= n; +    dst += n; +    srcva = va0 + PGSIZE; +  } +  return 0; +} + +// Copy a null-terminated string from user to kernel. +// Copy bytes to dst from virtual address srcva in a given page table, +// until a '\0', or max. +// Return 0 on success, -1 on error. +int +copyinstr(pagetable_t pagetable, char *dst, uint64 srcva, uint64 max) +{ +  uint64 n, va0, pa0; +  int got_null = 0; + +  while(got_null == 0 && max > 0){ +    va0 = (uint)PGROUNDDOWN(srcva); +    pa0 = walkaddr(pagetable, va0); +    if(pa0 == 0) +      return -1; +    n = PGSIZE - (srcva - va0); +    if(n > max) +      n = max; + +    char *p = (char *) (pa0 + (srcva - va0)); +    while(n > 0){ +      if(*p == '\0'){ +        *dst = '\0'; +        got_null = 1; +        break; +      } else { +        *dst = *p; +      } +      --n; +      --max; +      p++; +      dst++; +    } + +    srcva = va0 + PGSIZE; +  } +  if(got_null){ +    return 0; +  } else { +    return -1; +  } +} diff --git a/labs/cow.html b/labs/cow.html new file mode 100644 index 0000000..2cc18fa --- /dev/null +++ b/labs/cow.html @@ -0,0 +1,109 @@ +<html> +<head> +<title>Lab: Copy-on-Write Fork for xv6</title> +<link rel="stylesheet" href="homework.css" type="text/css" /> +</head> +<body> + +<h1>Lab: Copy-on-Write Fork for xv6</h2> + +<p> +Your task is implement copy-on-write fork in the xv6 kernel. You are +done if your modified kernel executes both the cow and usertests +programs successfully. + +<h2>The problem</h2> + +The fork() system call in xv6 copies all of the parent process's +user-space memory into the child. If the parent is large, copying can +take a long time. In addition, the copies often waste memory; in many +cases neither the parent nor the child modifies a page, so that in +principle they could share the same physical memory. The inefficiency +is particularly clear if the child calls exec(), since then most of +the copied pages are thrown away without ever being used. Of course, +sometimes both child and parent modify memory at the same virtual +address after a fork(), so for some pages the copying is truly needed. + +<h2>The solution</h2> + +The goal of copy-on-write (COW) fork() is to defer allocating and +copying physical memory pages for the child until they are actually +needed, in the hope that they may never be needed. + +<p> +COW fork() creates just a pagetable for the child, with PTEs for user +memory pointing to the parent's physical pages. COW fork() marks all +the user PTEs in both parent and child as read-only. When either +process tries to write one of these COW pages, the CPU will force a +page fault. The kernel page-fault handler detects this case, allocates +a page of physical memory for the faulting process, copies the +original page into the new page, and modifies the relevant PTE in the +faulting process to refer to the new page, this time with the PTE +marked writeable. When the page fault handler returns, the user +process will be able to write its copy of the page. + +<p> +COW fork() makes freeing of the physical pages that implement user +memory a little trickier. A given physical page may be referred to by +multiple processes' page tables, and should be freed when the last +reference disappears. + +<h2>The cow test program</h2> + +To help you test your implementation, we've provided an xv6 program +called cow (source in user/cow.c). cow runs various tests, but +even the first will fail on unmodified xv6. Thus, initially, you +will see: + +<pre> +$ cow +simple: fork() failed +$  +</pre> + +The "simple" test allocates more than half of available physical +memory, and then fork()s. The fork fails because there is not enough +free physical memory to give the child a complete copy of the parent. + +<p> +When you are done, your kernel should be able to run both cow and +usertests. That is: + +<pre> +$ cow +simple: ok +simple: ok +three: zombie! +ok +three: zombie! +ok +three: zombie! +ok +file: ok +ALL COW TESTS PASSED +$ usertests +... +ALL TESTS PASSED +$ +</pre> + +<h2>Hints</h2> + +Here's one reasonable plan of attack. Modify uvmcopy() to map the +parent's physical pages into the child, instead of allocating new +pages, and clear PTE_W in the PTEs of both child and parent. +Modify usertrap() to recognize a page fault. When a page fault occurs +on a COW page, allocate a new page with kalloc(), copy the old page to +the new page, and install the new page in the PTE with PTE_W set. +Next, ensure that each physical page is freed when the last PTE +reference to it goes away (but not before!), perhaps by implementing +reference counts in kalloc.c. Finally, modify copyout() to use the +same scheme as page faults when it encounters a COW page. + +<p> +It may be useful to have a way to record, for each PTE, whether it is +a COW mapping. You can use the RSW (reserved for software) bits in +the RISC-V PTE for this. + +</body> +</html> diff --git a/labs/fs.html b/labs/fs.html new file mode 100644 index 0000000..a21e61f --- /dev/null +++ b/labs/fs.html @@ -0,0 +1,360 @@ +<html> +<head> +<title>Lab: file system</title> +<link rel="stylesheet" href="homework.css" type="text/css" /> +</head> +<body> + +<h1>Lab: file system</h1> + +<p>In this lab you will add large files and <tt>mmap</tt> to the xv6 file system. + +<h2>Large files</h2> + +<p>In this assignment you'll increase the maximum size of an xv6 +file. Currently xv6 files are limited to 268 blocks, or 268*BSIZE +bytes (BSIZE is 1024 in xv6). This limit comes from the fact that an +xv6 inode contains 12 "direct" block numbers and one "singly-indirect" +block number, which refers to a block that holds up to 256 more block +numbers, for a total of 12+256=268. You'll change the xv6 file system +code to support a "doubly-indirect" block in each inode, containing +256 addresses of singly-indirect blocks, each of which can contain up +to 256 addresses of data blocks. The result will be that a file will +be able to consist of up to 256*256+256+11 blocks (11 instead of 12, +because we will sacrifice one of the direct block numbers for the +double-indirect block). + +<h3>Preliminaries</h3> +   +<p>Modify your Makefile's <tt>CPUS</tt> definition so that it reads: +<pre> +CPUS := 1 +</pre> + +<b>XXX doesn't seem to speedup things</b> +<p>Add +<pre> +QEMUEXTRA = -snapshot +</pre> +right before +<tt>QEMUOPTS</tt> +<p> +The above two steps speed up qemu tremendously when xv6 +creates large files. + +<p><tt>mkfs</tt> initializes the file system to have fewer +than 1000 free data blocks, too few to show off the changes +you'll make. Modify <tt>param.h</tt> to  +set <tt>FSSIZE</tt> to: +<pre> +    #define FSSIZE       20000  // size of file system in blocks +</pre> + +<p>Download <a href="big.c">big.c</a> into your xv6 directory, +add it to the UPROGS list, start up xv6, and run <tt>big</tt>. +It creates as big a file as xv6 will let +it, and reports the resulting size. It should say 140 sectors. + +<h3>What to Look At</h3> + +The format of an on-disk inode is defined by <tt>struct dinode</tt> +in <tt>fs.h</tt>. You're particularly interested in <tt>NDIRECT</tt>, +<tt>NINDIRECT</tt>, <tt>MAXFILE</tt>, and the <tt>addrs[]</tt> element +of <tt>struct dinode</tt>. Look Figure 7.3 in the xv6 text for a +diagram of the standard xv6 inode. + +<p> +The code that finds a file's data on disk is in <tt>bmap()</tt> +in <tt>fs.c</tt>. Have a look at it and make sure you understand +what it's doing. <tt>bmap()</tt> is called both when reading and +writing a file. When writing, <tt>bmap()</tt> allocates new +blocks as needed to hold file content, as well as allocating +an indirect block if needed to hold block addresses. + +<p> +<tt>bmap()</tt> deals with two kinds of block numbers. The <tt>bn</tt> +argument is a "logical block" -- a block number relative to the start +of the file. The block numbers in <tt>ip->addrs[]</tt>, and the +argument to <tt>bread()</tt>, are disk block numbers. +You can view <tt>bmap()</tt> as mapping a file's logical +block numbers into disk block numbers. + +<h3>Your Job</h3> + +Modify <tt>bmap()</tt> so that it implements a doubly-indirect +block, in addition to direct blocks and a singly-indirect block. +You'll have to have only 11 direct blocks, rather than 12, +to make room for your new doubly-indirect block; you're +not allowed to change the size of an on-disk inode. +The first 11 elements of <tt>ip->addrs[]</tt> should be +direct blocks; the 12th should be a singly-indirect block +(just like the current one); the 13th should be your new +doubly-indirect block. + +<p> +You don't have to modify xv6 to handle deletion of files with +doubly-indirect blocks. + +<p> +If all goes well, <tt>big</tt> will now report that it +can write  sectors. It will take <tt>big</tt> minutes +to finish. + +<b>XXX this runs for a while!</b> + +<h3>Hints</h3> + +<p> +Make sure you understand <tt>bmap()</tt>. Write out a diagram of the +relationships between <tt>ip->addrs[]</tt>, the indirect block, the +doubly-indirect block and the singly-indirect blocks it points to, and +data blocks. Make sure you understand why adding a doubly-indirect +block increases the maximum file size by 256*256 blocks (really -1), +since you have to decrease the number of direct blocks by one). + +<p> +Think about how you'll index the doubly-indirect block, and +the indirect blocks it points to, with the logical block +number. + +<p>If you change the definition of <tt>NDIRECT</tt>, you'll +probably have to change the size of <tt>addrs[]</tt> +in <tt>struct inode</tt> in <tt>file.h</tt>. Make sure that +<tt>struct inode</tt> and <tt>struct dinode</tt> have the +same number of elements in their <tt>addrs[]</tt> arrays. + +<p>If you change the definition of <tt>NDIRECT</tt>, make sure to create a +new <tt>fs.img</tt>, since <tt>mkfs</tt> uses <tt>NDIRECT</tt> too to build the +initial file systems.  If you delete <tt>fs.img</tt>, <tt>make</tt> on Unix (not +xv6) will build a new one for you. + +<p>If your file system gets into a bad state, perhaps by crashing, +delete <tt>fs.img</tt> (do this from Unix, not xv6).  <tt>make</tt> will build a +new clean file system image for you. + +<p>Don't forget to <tt>brelse()</tt> each block that you +<tt>bread()</tt>. + +<p>You should allocate indirect blocks and doubly-indirect +  blocks only as needed, like the original <tt>bmap()</tt>. + +<p>Optional challenge: support triple-indirect blocks. + +<h2>Writing with a Log</h2> + +Insert a print statement in bwrite (in bio.c) so that you get a +print every time a block is written to disk: + +<pre> +  printf("bwrite block %d\n", b->blockno); +</pre> + +Build and boot a new kernel and run this: +<pre> +  $ rm README +</pre> + +<p>You should see a sequence of bwrite prints after the <tt>rm</tt>.</p> + +<div class="question"> +<ol> +<li>Annotate the bwrite lines with the kind of information that is +being written to the disk (e.g., "README's inode", "allocation +bitmap"). If the log is being written, note both that the log is being +written and also what kind of information is being written to the log. +<li>Mark with an arrow the first point at which, if a +crash occured, README would be missing after a reboot +(after the call to <tt>recover_from_log()</tt>). +</ol> +</p> +</div> + + +<h2>Crash safety</h2> + +<p>This assignment explores the xv6 log in two parts. +First, you'll artificially create a crash which illustrates +why logging is needed. Second, you'll remove one +inefficiency in the xv6 logging system. + +<p> +Submit your solution before the beginning of the next lecture +to <a href="https://6828.scripts.mit.edu/2018/handin.py/">the submission +web site</a>. + +<h3>Creating a Problem</h3> + +<p> +The point of the xv6 log is to cause all the disk updates of a +filesystem operation to be atomic with respect to crashes. +For example, file creation involves both adding a new entry +to a directory and marking the new file's inode as in-use. +A crash that happened after one but before the other would +leave the file system in an incorrect state after a reboot, +if there were no log. + +<p> +The following steps will break the logging code in a way that +leaves a file partially created. + +<p> +First, replace <tt>commit()</tt> in <tt>log.c</tt> with +this code: +<pre> +#include "kernel/proc.h" +void +commit(void) +{ +  int pid = myproc()->pid; +  if (log.lh.n > 0) { +    write_log(); +    write_head(); +    if(pid > 1)            // AAA +      log.lh.block[0] = 0; // BBB +    install_trans(); +    if(pid > 1)            // AAA +      panic("commit mimicking crash"); // CCC +    log.lh.n = 0;  +    write_head(); +  } +} +</pre> + +<p> +The BBB line causes the first block in the log to be written to +block zero, rather than wherever it should be written. During file +creation, the first block in the log is the new file's inode updated +to have non-zero <tt>type</tt>. +Line BBB causes the block +with the updated inode to be written to block 0 (whence +it will never be read), leaving the on-disk inode still marked +unallocated. The CCC line forces a crash. +The AAA lines suppress this buggy behavior for <tt>init</tt>, +which creates files before the shell starts. + +<p> +Second, replace <tt>recover_from_log()</tt> in <tt>log.c</tt> +with this code: +<pre> +static void +recover_from_log(void) +{ +  read_head();       +  printf("recovery: n=%d but ignoring\n", log.lh.n); +  // install_trans(); +  log.lh.n = 0; +  // write_head(); +} +</pre> + +<p> +This modification suppresses log recovery (which would repair +the damage caused by your change to <tt>commit()</tt>). + +<p> +Finally, remove the <tt>-snapshot</tt> option from the definition +of <tt>QEMUEXTRA</tt> in your Makefile so that the disk image will see the +changes. + +<p> +Now remove <tt>fs.img</tt> and run xv6: +<pre> +  % rm fs.img ; make qemu +</pre> +<p> +Tell the xv6 shell to create a file: +<pre> +  $ echo hi > a +</pre> + +<p> +You should see the panic from <tt>commit()</tt>. So far +it is as if a crash occurred in a non-logging system in the middle +of creating a file. + +<p> +Now re-start xv6, keeping the same <tt>fs.img</tt>: +<pre> +  % make qemu +</pre> + +<p> +And look at file <tt>a</tt>: +<pre> +  $ cat a +</pre> + +<p> +  You should see <tt>panic: ilock: no type</tt>. Make sure you understand what happened. +Which of the file creation's modifications were written to the disk +before the crash, and which were not? + +<h3>Solving the Problem</h3> + +Now fix <tt>recover_from_log()</tt>: +<pre> +static void +recover_from_log(void) +{ +  read_head(); +  cprintf("recovery: n=%d\n", log.lh.n); +  install_trans(); +  log.lh.n = 0; +  write_head(); +} +</pre> + +<p> +Run xv6 (keeping the same <tt>fs.img</tt>) and read <tt>a</tt> again: +<pre> +  $ cat a +</pre> + +<p> +This time there should be no crash. Make sure you understand why +the file system now works. + +<p> +Why was the file empty, even though you created +it with <tt>echo hi > a</tt>? + +<p> +Now remove your modifications to <tt>commit()</tt> +(the if's and the AAA and BBB lines), so that logging works again, +and remove <tt>fs.img</tt>. + +<h3>Streamlining Commit</h3> + +<p> +Suppose the file system code wants to update an inode in block 33. +The file system code will call <tt>bp=bread(block 33)</tt> and update the +buffer data. <tt>write_log()</tt> in <tt>commit()</tt> +will copy the data to a block in the log on disk, for example block 3. +A bit later in <tt>commit</tt>, <tt>install_trans()</tt> reads +block 3 from the log (containing block 33), copies its contents into the in-memory +buffer for block 33, and then writes that buffer to block 33 on the disk. + +<p> +However, in <tt>install_trans()</tt>, it turns out that the modified +block 33 is guaranteed to be still in the buffer cache, where the +file system code left it. Make sure you understand why it would be a +mistake for the buffer cache to evict block 33 from the buffer cache +before the commit. + +<p> +Since the modified block 33 is guaranteed to already be in the buffer +cache, there's no need for <tt>install_trans()</tt> to read block +33 from the log. Your job: modify <tt>log.c</tt> so that, when +<tt>install_trans()</tt> is called from <tt>commit()</tt>, +<tt>install_trans()</tt> does not perform the needless read from the log. + +<p>To test your changes, create a file in xv6, restart, and make sure +the file is still there. + +<b>XXX Does this speedup bigfile?</b> + +<b>XXX Maybe support lseek and modify shell to append to a file?</b> +   +   +</body> +</html> diff --git a/labs/fs1.html b/labs/fs1.html new file mode 100644 index 0000000..45d3e0c --- /dev/null +++ b/labs/fs1.html @@ -0,0 +1,215 @@ +<html> +<head> +<title>Lab: mount/umount</title> +<link rel="stylesheet" href="homework.css" type="text/css" /> +</head> +<body> + +<h1>Lab: mount/umount</h1> + +<p>In this lab you will add support for mounting/unmounting of file +systems to xv6.  This lab will expose you to many parts of the xv6 +file system, including pathname lookup, inodes, logging/recovery, disk +driver, concurrency, etc. + +<p>Your job is modify xv6 so that your modified kernel passes the +  tests in mounttest. You will have to implement two system +  calls: <tt>mount(char *source, char *target)</tt> +  and <tt>umount(char *target)</tt>. Mount attaches the device +  referenced by <tt>source</tt> (e.g., <tt>/disk1</tt>) at the +  location specified by <tt>target</tt>.  For +  example, <tt>mount("/disk1", "/m")</tt> will attach <tt>disk1</tt> +  at the directory <tt>/m</tt>. After this mount call, users can use +  pathnames such as <tt>/m/README</tt> to read the +  file <tt>README</tt> stored in the root directory +  on <tt>disk1</tt>.  <tt>Umount</tt> removes the attachment.  For +  example, <tt>umount("/m")</tt> unmounts disk1 from <tt>/m</tt>. + +<p>There are several major challenges in implementing the mount system +calls: + +  <ul> +     +    <li>Adding the actual system calls so that user programs can call +      them.  This is similar to previous labs in which you added +      systems calls xv6. + +    <li>Supporting several disks.  You will have generalize to +      virtio_disk.c to support at least two disks. + +    <li>Logging file system modifications to the right disk.  xv6 +      assumes there is only disk and file system calls typically start +      with <tt>begin_op</tt> and end with <tt>end_op</tt>, logging all +      modifications between these two calls to the log on the one +      disk.  With mount, modifications to the file system on the +      second disk must be logged to the second disk. + +    <li>Modifying pathname lookup (<tt>namex</tt>) so that when a +      lookup cross a mount point, it continues at the root inode of +      the attached disk. + +  </ul> + +<p>The rest of this assignment provides some hints how you might go +about the above challenges. + +<h2>Adding system calls</h2> + +<p>Add the stubs for the two systems calls to xv6 so that you can +compile mounttest and add two empty functions for the two system calls +to sysfile.c. Run mounttest and it will fail on the first call +to <tt>mount</tt>. + + +<h2>Adding a second disk</h2>       + +<p>To be able to mount another disk, you need to extend xv6 to support +at least two disks.  Modify virtio_disk.c to support an array of two +disks instead of a single disk.  The address of the second disk +is <tt>0x10002000</tt>; modify the macro <tt>R</tt> to take a disk +number (0, 1,..) and read/write to the memory address for that disk. + +<p>All functions in <tt>virtio_disk.c</tt> need to take the disk +number as an argument to update the state of the disk that is +read/written to or to receive an interrupt from the disk. +Modify <tt>virtio_disk_init</tt> to take a disk number as an argument +and update is to that it initializes that disk.  Similar, go through +the other functions; make these changes should be most mechanical +(i.e., text substitutions). + +<p>The second disk interrupts at IRQ 2; modify trap.c to receive that +interrupt and <tt>virtio_disk_intr</tt> with the number of the disk +that generated the interrupt. +      +<p>Modify the file Makefile to tell qemu to provide a second +disk. Define the variable <tt>QEMUEXTRA = -drive +file=fs1.img,if=none,format=raw,id=x1 -device +virtio-blk-device,drive=x1,bus=virtio-mmio-bus.1</tt> and +add <tt>$(QEMUEXTRA)</tt> to the end of <tt>QEMUOPTS</tt>. + +<p>Create a second disk image <tt>fs1.img</tt>.  Easiest thing to do +  is just copy the file <tt>fs.img</tt>.  You might want to add rules +  to the Makefile to make this image and remove it on <tt>make +  clean</tt>. + +<p>Add to the user program init a call to create a device for the new +  disk. For example, add the line <tt>mknod("disk1", DISK, 1);</tt> to +  init.c. This will create an inode of type device in the root +  directory with major number <tt>DISK</tt> and minor number 1. + +<p>The first argument of the <tt>mount</tt> system call ("disk1") will +  refer to the device you created using <tt>mknod</tt> above.  In your +  implementation of the mount system call, +  call <tt>virtio_disk_init</tt> with the minor number as the argument +  to initialize the second disk.  (We reserve minor number 0 for the +  first disk.) + +<p>Boot xv6, run mounttest, and make sure <tt>virtio_disk_init</tt> +  gets called (e.g., add print statement).  You won't know if your +  changes are correct, but your code should compile and invoke the +  driver for the second disk. + +<h2>Modify the logging system</h2> + +<p>After calling <tt>virtio_disk_init</tt>, you need to also +  call <tt>loginit</tt> to initialize the logging system for the +  second disk (and restore the second disk if a power failure happened +  while modifying the second disk).  Generalize the logging system to +  support to two logs, one on disk 0 and one disk 1.  These changes +  are mostly mechanical (e.g., <tt>log.</tt> changes +  to <tt>log[n].</tt>), similar to generalizing the disk driver to +  support two disks. + +<p>To make xv6 compile, you need to provide a disk number +  to <tt>begin_op</tt> and <tt>end_op</tt>.  It will be a challenge to +  figure out what the right value is; for now just specify the first +  disk (i.e., 0).  This isn't correct, since modifications to the +  second disk should be logged on the second disk, but we have no way +  yet to read/write the second disk.  Come back to this later when you +  have a better idea how things will fit together, but make sure that +  xv6 compiles and still runs. + +<h2>Pathname lookup</h2> + +<p>Modify <tt>namex</tt> to traverse mount points: when <tt>namex</tt> +  sees an inode to which a file system is attached, it should traverse +  to the root inode of that file system.  Hint: modify the in-memory +  inode in file.h to keep some additional state, and initialize that +  state in the mount system call.  Note that the inode already has a +  field for disk number (i.e., <tt>dev</tt>), which is initialized and +  passed to reads and writes to the driver.  <tt>dev</tt> corresponds +  to the minor number for disk devices. + +<p>Your modified xv6 should be able to pass the first tests in +  mounttest (i.e., <tt>stat</tt>).  This is likely to be challenging, +  however, because now your kernel will be reading from the second +  disk for the first time, and you may run into many issues. + +<p>Even though <tt>stat</tt> may return correctly, your code is likely +  to be incorrect, because in <tt>namex</tt> +  because <tt>iunlockput</tt> may modify the second disk (e.g., if +  another process removes the file or directory) and those +  modifications must be written to the second disk.  Your job is to +  fix the calls to <tt>begin_op</tt> and <tt>end_op</tt> to take the +  right device.  One challenge is that <tt>begin_op</tt> is called at +  the beginning of a system call but then you don't know the device +  that will be involved; you will have to postpone this call until you +  know which inode is involved (which tells you will which device is +  involved).  Another challenge is that you cannot postpone +  calling <tt>begin_op</tt> passed <tt>ilock</tt> because that +  violates lock ordering in xv6; you should not be +  calling <tt>begin_op</tt> while holding locks on inodes. (The log +  system allows a few systems calls to run; if a system call that +  holds an inode lock isn't admitted and one of the admitted system +  calls needs that inode to complete, then xv6 will deadlock.) + +<p>Once you have implemented a plan for <tt>begin_op</tt> +  and <tt>end_op</tt>, see if your kernel can pass <tt>test0</tt>.  It +  is likely that you will have to modify your implementation of the +  mount system call to handle several corner cases.  See the tests +  in <tt>test0</tt>. + +<p>Run usertests to see if you didn't break anything else.  Since you +  modified <tt>namex</tt> and <tt>begin/end_op</tt>, which are at the +  core of the xv6 file system, you might have introduced bugs, perhaps +  including deadlocks.  Deadlocks manifest themselves as no output +  being produced because all processes are sleeping (hit ctrl-p a few +  times).  Your kernel might also suffer kernel panics, because your +  changes violate invariants.  You may have to iterate a few times to +  get a good design and implementation. + +<h2>umount</h2> + +<p>Once your kernel passes usertests and test0 of mounttest, implement +  umount.  The main challenge is that umount of a file system should +  fail if the file system is still in use; that is, if there is an +  inode on the mounted device that has a <tt>ref > 0</tt>. +  Furthermore, this test and unmounting should be an atomic +  operation. (Hint: lock the inode cache.)  Make sure your kernel +  passes test1 of mounttest. + +<p>Test2 of mounttest stresses <tt>namex</tt> more; if you have done +    everything right above, your kernel should pass it.  Test3 tests +    concurrent mount/unmounts with file creation. + +<h2>crash safety</h2> + +<p>One of the main goals of the file system is to provide crash +  safety: if there is a power failure during a file system operation, +  xv6 should recover correctly.  It is difficult to introduce power +  failure at the critical steps of logging; instead, we added a system +  call that causes a kernel panic after committing an operation but +  before installing the operation.  Test4 with crashtest tests if your +  xv6 recovers the mounted disk correctly. +    +        +</body> +</html> + +<h2>Optional challenges</h2> + +<p>Modify xv6 so that init mounts the first disk on the root inode. +  This will allow you to remove some code specific for the first disk +  from the kernel. + +<p>Support mounts on top of mounts. diff --git a/labs/lazy.html b/labs/lazy.html new file mode 100644 index 0000000..9d97cab --- /dev/null +++ b/labs/lazy.html @@ -0,0 +1,132 @@ +<html> +<head> +<title>Lab: xv6 lazy page allocation</title> +<link rel="stylesheet" href="homework.css" type="text/css" /> +</head> +<body> + +<h1>Lab: xv6 lazy page allocation</h1> + +<p> +One of the many neat tricks an O/S can play with page table hardware +is lazy allocation of heap memory. Xv6 applications ask the kernel for +heap memory using the sbrk() system call. In the kernel we've given +you, sbrk() allocates physical memory and maps it into the process's +virtual address space. There are programs that allocate memory but +never use it, for example to implement large sparse arrays. +Sophisticated kernels delay allocation of each page of memory until +the application tries to use that page -- as signaled by a page fault. +You'll add this lazy allocation feature to xv6 in this lab. + +<h2>Part One: Eliminate allocation from sbrk()</h2> + +Your first task is to delete page allocation from the sbrk(n) system +call implementation, which is the function sys_sbrk() in sysproc.c. The +sbrk(n) system call grows the process's memory size by n bytes, and +then returns the start of the newly allocated region (i.e., the old +size). Your new sbrk(n) should just increment the process's size +(myproc()->sz) by n and return the old size. It should not allocate memory +-- so you should delete the call to growproc() (but you still need to +increase the process's size!). + +<p> +Try to guess what the result of this modification will be: what will +break? + +<p> +Make this modification, boot xv6, and type <tt>echo hi</tt> to the shell. +You should see something like this: + +<pre> +init: starting sh +$ echo hi +usertrap(): unexpected scause 0x000000000000000f pid=3 +            sepc=0x00000000000011dc stval=0x0000000000004008 +va=0x0000000000004000 pte=0x0000000000000000 +panic: unmappages: not mapped +</pre> + +The "usertrap(): ..." message is from the user trap handler in trap.c; +it has caught an exception that it does not know how to handle. Make +sure you understand why this page fault occurs. The "stval=0x0..04008" +indicates that the virtual address that caused the page fault is +0x4008. + +<h2>Part Two: Lazy allocation</h2> + +Modify the code in trap.c to respond to a page fault from user space +by mapping a newly-allocated page of physical memory at the faulting +address, and then returning back to user space to let the process +continue executing. You should add your code just before +the <tt>printf</tt> call that produced the "usertrap(): ..." +message. + +<p> +Hint: look at the printf arguments to see how to find the virtual +address that caused the page fault. + +<p> +Hint: steal code from allocuvm() in vm.c, which is what sbrk() +calls (via growproc()). + +<p> +Hint: use PGROUNDDOWN(va) to round the faulting virtual address +down to a page boundary. + +<p> +Hint: <tt>usertrapret()</tt> in order to avoid +the <tt>printf</tt> and the <tt>myproc()->killed = 1</tt>. + +<p> +Hint: you'll need to call mappages(). +   +<p>Hint: you can check whether a fault is a page fault by r_scause() +  is 13 or 15 in trap(). + +<p>Hint: modify unmappages() to not free pages that aren't mapped. + +<p>Hint: if the kernel crashes, look up sepc in kernel/kernel.asm + +<p>Hint: if you see the error "imcomplete type proc", include "proc.h" +  (and "spinlock.h"). + +<p>Hint: the first test in sbrk() allocates something large, this +  should succeed now. + +<p> +If all goes well, your lazy allocation code should result in <tt>echo +hi</tt> working. You should get at least one page fault (and thus lazy +allocation) in the shell, and perhaps two. + +<p>If you have the basics working, now turn your implementation into +  one that handles the corner cases too: + +<ul> + +  <li> Handle negative sbrk() arguments.  sbrktest() in usertests will +  tests this. + +  <li> Handle fork correctly. sbrktst() will test this. + +  <li> Make sure that kernel use of not-yet-allocated user addresses +     works; for example, if a program passes an sbrk()-allocated +    address to write().  sbrktest() will test this. + +  <li> Handle out of memory correctly.  sbrktst() will test this. + +  <li> Handle faults on the invalid page below the stack.  stacktest() +  in usertests will tests this. + +</ul> +   +<p>Run all tests in usertests() to make sure your solution doesn't +break other tests. + +<p> +<div class="question"> +<p><b>Submit</b>: The code that you added to trap.c in a file named <em>hwN.c</em> where <em>N</em> is the homework number as listed on the schedule. +</div> + + +</body> +</html> diff --git a/labs/lock.html b/labs/lock.html new file mode 100644 index 0000000..707d6c4 --- /dev/null +++ b/labs/lock.html @@ -0,0 +1,148 @@ +<html> +<head> +<title>Lab: locks</title> +<link rel="stylesheet" href="homework.css" type="text/css" /> +</head> +<body> + +<h1>Lab: locks</h1> + +<p>In this lab you will try to avoid lock contention for certain +workloads. + +<h2>lock contention</h2> + +<p>The program user/kalloctest stresses xv6's memory allocator: three +  processes grow and shrink there address space, which will results in +  many calls to <tt>kalloc</tt> and <tt>kfree</tt>, +  respectively.  <tt>kalloc</tt> and <tt>kfree</tt> +  obtain <tt>kmem.lock</tt>.  To see if there is lock contention for +  <tt>kmem.lock</tt> replace the call to <tt>acquire</tt> +  in <tt>kalloc</tt> with the following code: + +  <pre> +    while(!tryacquire(&kmem.lock)) { +      printf("!"); +    } +  </pre> + +<p><tt>tryacquire</tt> tries to acquire <tt>kmem.lock</tt>: if the +  lock is taking it returns false (0); otherwise, it returns true (1) +  and with the lock acquired.  Your first job is to +  implement <tt>tryacquire</tt> in kernel/spinlock.c. + +<p>A few hints: +  <ul> +    <li>look at <tt>acquire</tt>. +    <li>don't forget to restore interrupts when acquision fails +    <li>Add tryacquire's signature to defs.h. +  </ul> + +<p>Run usertests to see if you didn't break anything.  Note that +  usertests never prints "!"; there is never contention +  for <tt>kmem.lock</tt>.  The caller is always able to immediately +  acquire the lock and never has to wait because some other process +  has the lock. + +<p>Now run kalloctest.  You should see quite a number of "!" on the +  console.  kalloctest causes many processes to contend on +  the <tt>kmem.lock</tt>.  This lock contention is a bit artificial, +  because qemu is simulating 3 processors, but it is likely on real +  hardware, there would be contention too. +   +<h2>Removing lock contention</h2> + +<p>The root cause of lock contention in kalloctest is that there is a +  single free list, protected by a single lock.  To remove lock +  contention, you will have to redesign the memory allocator to avoid +  a single lock and list.  The basic idea is to maintain a free list +  per CPU, each list with its own lock. Allocations and frees on each +  CPU can run in parallel, because each CPU will operate on a +  different list. +   +<p> The main challenge will be to deal with the case that one CPU runs +  out of memory, but another CPU has still free memory; in that case, +  the one CPU must "steal" part of the other CPU's free list. +  Stealing may introduce lock contention, but that may be acceptable +  because it may happen infrequently. + +<p>Your job is to implement per-CPU freelists and stealing when one +  CPU is out of memory.  Run kalloctest() to see if your +  implementation has removed lock contention. + +<p>Some hints: +  <ul> +    <li>You can use the constant <tt>NCPU</tt> in kernel/param.h +    <li>Let <tt>freerange</tt> give all free memory to the CPU +      running <tt>freerange</tt>. +    <li>The function <tt>cpuid</tt> returns the current core, but note +    that you can use it when interrupts are turned off and so you will +    need to turn on/off interrupts in your solution. +  </ul> + +<p>Run usertests to see if you don't break anything. + +<h2>More scalabale bcache lookup</h2> + + +<p>Several processes reading different files repeatedly will +  bottleneck in the buffer cache, bcache, in bio.c.  Replace the +  acquire in <tt>bget</tt> with +   +  <pre> +    while(!tryacquire(&bcache.lock)) { +      printf("!"); +    } +  </pre> + +  and run test0 from bcachetest and you will see "!"s. + +<p>Modify <tt>bget</tt> so that a lookup for a buffer that is in the +  bcache doesn't need to acquire <tt>bcache.lock</tt>.  This is more +  tricky than the kalloc assignment, because bcache buffers are truly +  shared among processes. You must maintain the invariant that a +  buffer is only once in memory. + +<p> There are several races that <tt>bcache.lock</tt> protects +against, including: +  <ul> +    <li>A <tt>brelse</tt> may set <tt>b->ref</tt> to 0, +      while concurrent <tt>bget</tt> is incrementing it. +    <li>Two <tt>bget</tt> may see <tt>b->ref = 0</tt> and one may re-use +    the buffer, while the other may replaces it with another block. +    <li>A concurrent <tt>brelse</tt> modifies the list +      that <tt>bget</tt> traverses. +  </ul> + +<p>A challenge is testing whether you code is still correct.  One way +  to do is to artificially delay certain operations +  using <tt>sleepticks</tt>.  <tt>test1</tt> trashes the buffer cache +  and exercises more code paths. + +<p>Here are some hints: +  <ul> +    <li>Read the description of buffer cache in the xv6 book (Section 7.2). +    <li>Use a simple design: i.e., don't design a lock-free implementation. +    <li>Use a simple hash table with locks per bucket. +    <li>Searching in hash table for a buffer and allocating an entry +      for that buffer when the buffer is not found must be atomic. +    <li>It is fine to acquire <tt>bcache.lock</tt> in <tt>brelse</tt> +      to update the LRU/MRU list. +  </ul> + +<p>Check that your implementation has less contention +  on <tt>test0</tt> + +<p>Make sure your implementation passes bcachetest and usertests. + +<p>Optional: +  <ul> +  <li>make the buffer cache more scalable (e.g., avoid taking +  out <tt>bcache.lock</tt> on <tt>brelse</tt>). +  <li>make lookup lock-free (Hint: use gcc's <tt>__sync_*</tt> +    functions.) How do you convince yourself that your implementation is correct? +  </ul> +   +   +</body> +</html> diff --git a/labs/mmap.html b/labs/mmap.html new file mode 100644 index 0000000..6f779c4 --- /dev/null +++ b/labs/mmap.html @@ -0,0 +1,171 @@ +<html> +<head> +<title>Lab: mmap</title> +<link rel="stylesheet" href="homework.css" type="text/css" /> +</head> +<body> + +<h1>Lab: mmap</h1> + +<p>In this lab you will use </tt>mmap</tt> on Linux to demand-page a +very large table and add memory-mapped files to xv6. + +<h2>Using mmap on Linux</h2> + +<p>This assignment will make you more familiar with how to manage virtual memory +in user programs using the Unix system call interface. You can do this +assignment on any operating system that supports the Unix API (a Linux Athena +machine, your laptop with Linux or MacOS, etc.). + +<p>Download the <a href="mmap.c">mmap homework assignment</a> and look +it over.  The program maintains a very large table of square root +values in virtual memory. However, the table is too large to fit in +physical RAM. Instead, the square root values should be computed on +demand in response to page faults that occur in the table's address +range.  Your job is to implement the demand faulting mechanism using a +signal handler and UNIX memory mapping system calls. To stay within +the physical RAM limit, we suggest using the simple strategy of +unmapping the last page whenever a new page is faulted in. + +<p>To compile <tt>mmap.c</tt>, you need a C compiler, such as gcc. On Athena, +you can type: +<pre> +$ add gnu +</pre> +Once you have gcc, you can compile mmap.c as follows: +<pre> +$ gcc mmap.c -lm -o mmap +</pre> +Which produces a <tt>mmap</tt> file, which you can run: +<pre> +$ ./mmap +page_size is 4096 +Validating square root table contents... +oops got SIGSEGV at 0x7f6bf7fd7f18 +</pre> + +<p>When the process accesses the square root table, the mapping does not exist +and the kernel passes control to the signal handler code in +<tt>handle_sigsegv()</tt>. Modify the code in <tt>handle_sigsegv()</tt> to map +in a page at the faulting address, unmap a previous page to stay within the +physical memory limit, and initialize the new page with the correct square root +values. Use the function <tt>calculate_sqrts()</tt> to compute the values. +The program includes test logic that verifies if the contents of the +square root table are correct. When you have completed your task +successfully, the process will print “All tests passed!”. + +<p>You may find that the man pages for mmap() and munmap() are helpful references. +<pre> +$ man mmap +$ man munmap +</pre> + + +<h2>Implement memory-mapped files in xv6</h2> + +<p>In this assignment you will implement memory-mapped files in xv6. +  The test program <tt>mmaptest</tt> tells you what should work. + +<p>Here are some hints about how you might go about this assignment: + +  <ul> +    <li>Start with adding the two systems calls to the kernel, as you +      done for other systems calls (e.g., <tt>sigalarm</tt>), but +      don't implement them yet; just return an +      error. run <tt>mmaptest</tt> to observe the error. +       +    <li>Keep track for each process what <tt>mmap</tt> has mapped. +      You will need to allocate a <tt>struct vma</tt> to record the +      address, length, permissions, etc. for each virtual memory area +      (VMA) that maps a file.  Since the xv6 kernel doesn't have a +      memory allocator in the kernel, you can use the same approach has +      for <tt>struct file</tt>: have a global array of <tt>struct +	vma</tt>s and have for each process a fixed-sized array of VMAs +      (like the file descriptor array). + +    <li>Implement <tt>mmap</tt>: allocate a VMA, add it to the process's +      table of VMAs, fill in the VMA, and find a hole in the process's +      address space where you will map the file.  You can assume that no +      file will be bigger than 1GB.  The VMA will contain a pointer to +      a <tt>struct file</tt> for the file being mapped; you will need to +      increase the file's reference count so that the structure doesn't +      disappear when the file is closed (hint: +      see <tt>filedup</tt>). You don't have worry about overlapping +      VMAs.  Run <tt>mmaptest</tt>: the first <tt>mmap</tt> should +      succeed, but the first access to the mmaped- memory will fail, +      because you haven't updated the page fault handler. + +    <li>Modify the page-fault handler from the lazy-allocation and COW +      labs to call a VMA function that handles page faults in VMAs. +      This function allocates a page, reads a 4KB from the mmap-ed +      file into the page, and maps the page into the address space of +      the process.  To read the page, you can use <tt>readi</tt>, +      which allows you to specify an offset from where to read in the +      file (but you will have to lock/unlock the inode passed +      to <tt>readi</tt>).  Don't forget to set the permissions correctly +      on the page.  Run <tt>mmaptest</tt>; you should get to the +      first <tt>munmap</tt>. +       +    <li>Implement <tt>munmap</tt>: find the <tt>struct vma</tt> for +      the address and unmap the specified pages (hint: +      use <tt>uvmunmap</tt>). If <tt>munmap</tt> removes all pages +      from a VMA, you will have to free the VMA (don't forget to +      decrement the reference count of the VMA's <tt>struct +      file</tt>); otherwise, you may have to shrink the VMA.  You can +      assume that <tt>munmap</tt> will not split a VMA into two VMAs; +      that is, we don't unmap a few pages in the middle of a VMA.  If +      an unmapped page has been modified and the file is +      mapped <tt>MAP_SHARED</tt>, you will have to write the page back +      to the file. RISC-V has a dirty bit (<tt>D</tt>) in a PTE to +      record whether a page has ever been written too; add the +      declaration to kernel/riscv.h and use it.  Modify <tt>exit</tt> +      to call <tt>munmap</tt> for the process's open VMAs. +      Run <tt>mmaptest</tt>; you should <tt>mmaptest</tt>, but +      probably not <tt>forktest</tt>. + +    <li>Modify <tt>fork</tt> to copy VMAs from parent to child.  Don't +    forget to increment reference count for a VMA's <tt>struct +    file</tt>.  In the page fault handler of the child, it is OK to +    allocate a new page instead of sharing the page with the +    parent. The latter would be cooler, but it would require more +    implementation work.  Run <tt>mmaptest</tt>; make sure you pass +    both <tt>mmaptest</tt> and <tt>forktest</tt>. +           +  </ul> +   +<p>Run usertests to make sure you didn't break anything. + +<p>Optional challenges: +  <ul> +     +    <li>If two processes have the same file mmap-ed (as +      in <tt>forktest</tt>), share their physical pages. You will need +      reference counts on physical pages. + +    <li>The solution above allocates a new physical page for each page +    read from the mmap-ed file, even though the data is also in kernel +    memory in the buffer cache.  Modify your implementation to mmap +    that memory, instead of allocating a new page.  This requires that +    file blocks be the same size as pages (set <tt>BSIZE</tt> to +    4096).  You will need to pin mmap-ed blocks into the buffer cache. +    You will need worry about reference counts. + +    <li>Remove redundancy between your implementation for lazy +    allocation and your implementation of mmapp-ed files.  (Hint: +    create an VMA for the lazy allocation area.) + +    <li>Modify <tt>exec</tt> to use a VMA for different sections of +    the binary so that you get on-demand-paged executables. This will +    make starting programs faster, because <tt>exec</tt> will not have +      to read any data from the file system. + +    <li>Implement on-demand paging: don't keep a process in memory, +    but let the kernel move some parts of processes to disk when +    physical memory is low.  Then, page in the paged-out memory when +    the process references it.  Port your linux program from the first +    assignment to xv6 and run it. +       +  </ul> +   +</body> +</html> diff --git a/labs/syscall.html b/labs/syscall.html new file mode 100644 index 0000000..2281f2e --- /dev/null +++ b/labs/syscall.html @@ -0,0 +1,443 @@ +<html> +<head> +<title>Lab: Alarm and uthread</title> +<link rel="stylesheet" href="homework.css" type="text/css" /> +</head> +<body> + +<h1>Lab: Alarm and uthread</h1> + +This lab will familiarize you with the implementation of system calls +and switching between threads of execution.  In particular, you will +implement new system calls (<tt>sigalarm</tt> and <tt>sigreturn</tt>) +and switching between threads in a user-level thread package. + +<h2>Warmup: RISC-V assembly</h2> + +<p>For this lab it will be important to understand a bit of RISC-V assembly. + +<p>Add a file user/call.c with the following content, modify the +  Makefile to add the program to the user programs, and compile (make +  fs.img).  The Makefile also produces a binary and a readable +  assembly a version of the program in the file user/call.asm. +<pre> +#include "kernel/param.h" +#include "kernel/types.h" +#include "kernel/stat.h" +#include "user/user.h" + +int g(int x) { +  return x+3; +} + +int f(int x) { +  return g(x); +} + +void main(void) { +  printf(1, "%d %d\n", f(8)+1, 13); +  exit(); +} +</pre> + +<p>Read through user/call.asm and understand it.  The instruction manual +  for RISC-V is in the doc directory (doc/riscv-spec-v2.2.pdf).  Here +  are some questions that you should answer for yourself: + +  <ul> +    <li>Which registers contain arguments to functions?  Which +    register holds 13 in the call to <tt>printf</tt>?  Which register +    holds the second argument? Which register holds the third one?  Etc. + +    <li>Where is the function call to <tt>f</tt> from main? Where +        is the call to <tt>g</tt>? +        (Hint: the compiler may inline functions.) + +    <li>At what address is the function <tt>printf</tt> located? + +    <li>What value is in the register <tt>ra</tt> just after the <tt>jalr</tt> +    to <tt>printf</tt> in <tt>main</tt>? +  </ul> + +<h2>Warmup: system call tracing</h2> + +<p>In this exercise you will modify the xv6 kernel to print out a line +for each system call invocation. It is enough to print the name of the +system call and the return value; you don't need to print the system +call arguments. + +<p> +When you're done, you should see output like this when booting +xv6: + +<pre> +... +fork -> 2 +exec -> 0 +open -> 3 +close -> 0 +$write -> 1 + write -> 1 +</pre> + +<p> +That's init forking and execing sh, sh making sure only two file descriptors are +open, and sh writing the $ prompt.  (Note: the output of the shell and the +system call trace are intermixed, because the shell uses the write syscall to +print its output.) + +<p> Hint: modify the syscall() function in kernel/syscall.c. + +<p>Run the xv6 programs you wrote in earlier labs and inspect the system call +  trace.  Are there many system calls?  Which system calls correspond +  to code in the applications you wrote? +     +<p>Optional: print the system call arguments. + +   +<h2>Alarm</h2> + +<p> +In this exercise you'll add a feature to xv6 that periodically alerts +a process as it uses CPU time. This might be useful for compute-bound +processes that want to limit how much CPU time they chew up, or for +processes that want to compute but also want to take some periodic +action. More generally, you'll be implementing a primitive form of +user-level interrupt/fault handlers; you could use something similar +to handle page faults in the application, for example. + +<p> +You should add a new <tt>sigalarm(interval, handler)</tt> system call. +If an application calls <tt>sigalarm(n, fn)</tt>, then after every +<tt>n</tt> "ticks" of CPU time that the program consumes, the kernel +should cause application function +<tt>fn</tt> to be called. When <tt>fn</tt> returns, the application +should resume where it left off. A tick is a fairly arbitrary unit of +time in xv6, determined by how often a hardware timer generates +interrupts. + +<p> +You'll find a file <tt>user/alarmtest.c</tt> in your xv6 +repository. Add it to the Makefile. It won't compile correctly +until you've added <tt>sigalarm</tt> and <tt>sigreturn</tt> +system calls (see below). + +<p> +<tt>alarmtest</tt> calls <tt>sigalarm(2, periodic)</tt> in <tt>test0</tt> to +ask the kernel to force a call to <tt>periodic()</tt> every 2 ticks, +and then spins for a while. +You can see the assembly +code for alarmtest in user/alarmtest.asm, which may be handy +for debugging. +When you've finished the lab, +<tt>alarmtest</tt> should produce output like this: + +<pre> +$ alarmtest +test0 start +......................................alarm! +test0 passed +test1 start +..alarm! +..alarm! +..alarm! +.alarm! +..alarm! +..alarm! +..alarm! +..alarm! +..alarm! +..alarm! +test1 passed +$ +</pre> + +<p>The main challenge will be to arrange that the handler is invoked +  when the process's alarm interval expires.  You'll need to modify +  usertrap() in kernel/trap.c so that when a +  process's alarm interval expires, the process executes +  the handler. How can you do that?  You will need to understand  +  how system calls work (i.e., the code in kernel/trampoline.S +  and kernel/trap.c). Which register contains the address to which +  system calls return? + +<p>Your solution will be only a few lines of code, but it may be tricky to +  get it right. +We'll test your code with the version of alarmtest.c in the original +repository; if you modify alarmtest.c, make sure your kernel changes +cause the original alarmtest to pass the tests. + +<h3>test0: invoke handler</h3> + +<p>Get started by modifying the kernel to jump to the alarm handler in +user space, which will cause test0 to print "alarm!". Don't worry yet +what happens after the "alarm!" output; it's OK for now if your +program crashes after printing "alarm!". Here are some hints: + +<ul> + +<li>You'll need to modify the Makefile to cause <tt>alarmtest.c</tt> +to be compiled as an xv6 user program. + +<li>The right declarations to put in <tt>user/user.h</tt> are: +<pre> +    int sigalarm(int ticks, void (*handler)()); +    int sigreturn(void); +</pre> + +<li>Update user/sys.pl (which generates user/usys.S), +    kernel/syscall.h, and kernel/syscall.c  +   to allow <tt>alarmtest</tt> to invoke the sigalarm and +   sigreturn system calls. + +<li>For now, your <tt>sys_sigreturn</tt> should just return zero. + +<li>Your <tt>sys_sigalarm()</tt> should store the alarm interval and +the pointer to the handler function in new fields in the <tt>proc</tt> +structure, defined in <tt>kernel/proc.h</tt>. + +<li>You'll need to keep track of how many ticks have passed since the +last call (or are left until the next call) to a process's alarm +handler; you'll need a new field in <tt>struct proc</tt> for this +too.  You can initialize <tt>proc</tt> fields in <tt>allocproc()</tt> +in <tt>proc.c</tt>. + +<li>Every tick, the hardware clock forces an interrupt, which is handled +in <tt>usertrap()</tt>; you should add some code here. + +<li>You only want to manipulate a process's alarm ticks if there's a a +  timer interrupt; you want something like +<pre> +    if(which_dev == 2) ... +</pre> + +<li>Only invoke the alarm function if the process has a +  timer outstanding.  Note that the address of the user's alarm +  function might be 0 (e.g., in alarmtest.asm, <tt>periodic</tt> is at +  address 0). + +<li>It will be easier to look at traps with gdb if you tell qemu to +use only one CPU, which you can do by running +<pre> +    make CPUS=1 qemu +</pre> + +<li>You've succeeded if alarmtest prints "alarm!". + +</ul> + +<h3>test1(): resume interrupted code</h3> + +Chances are that alarmtest crashes at some point after it prints +"alarm!". Depending on how your solution works, that point may be in +test0, or it may be in test1. Crashes are likely caused +by the alarm handler (<tt>periodic</tt> in alarmtest.c) returning +to the wrong point in the user program. + +<p> +Your job now is to ensure that, when the alarm handler is done, +control returns to +the instruction at which the user program was originally +interrupted by the timer interrupt. You must also ensure that +the register contents are restored to values they held +at the time of the interrupt, so that the user program +can continue undisturbed after the alarm. + +<p>Your solution is likely to require you to save and restore +  registers---what registers do you need to save and restore to resume +  the interrupted code correctly? (Hint: it will be many). +  Several approaches are possible; for this lab you should make +  the <tt>sigreturn</tt> system call +  restore registers and return to the original +  interrupted user instruction. +  The user-space alarm handler +  calls sigreturn when it is done. + +  Some hints: +  <ul> +    <li>Have <tt>usertrap</tt> save enough state in +      <tt>struct proc</tt> when the timer goes off +      that <tt>sigreturn</tt> can correctly return to the +      interrupted user code. + +    <li>Prevent re-entrant calls to the handler----if a handler hasn't +      returned yet, the kernel shouldn't call it again. +  </ul> +   +<p>Once you pass <tt>test0</tt> and <tt>test1</tt>, run usertests to +  make sure you didn't break any other parts of the kernel. + +<h2>Uthread: switching between threads</h2> +   +<p>Download <a href="uthread.c">uthread.c</a> and <a + href="uthread_switch.S">uthread_switch.S</a> into your xv6 directory. +Make sure <tt>uthread_switch.S</tt> ends with <tt>.S</tt>, not +<tt>.s</tt>.  Add the +following rule to the xv6 Makefile after the _forktest rule: + +<pre> +$U/_uthread: $U/uthread.o $U/uthread_switch.o +	$(LD) $(LDFLAGS) -N -e main -Ttext 0 -o $U/_uthread $U/uthread.o $U/uthread_switch.o $(ULIB) +	$(OBJDUMP) -S $U/_uthread > $U/uthread.asm +</pre> +Make sure that the blank space at the start of each line is a tab, +not spaces. + +<p> +Add <tt>_uthread</tt> in the Makefile to the list of user programs defined by UPROGS. + +<p>Run xv6, then run <tt>uthread</tt> from the xv6 shell. The xv6 kernel will print an error message about <tt>uthread</tt> encountering a page fault. + +<p>Your job is to complete <tt>uthread_switch.S</tt>, so that you see output similar to +this (make sure to run with CPUS=1): +<pre> +~/classes/6828/xv6$ make CPUS=1 qemu +... +$ uthread +my thread running +my thread 0x0000000000002A30 +my thread running +my thread 0x0000000000004A40 +my thread 0x0000000000002A30 +my thread 0x0000000000004A40 +my thread 0x0000000000002A30 +my thread 0x0000000000004A40 +my thread 0x0000000000002A30 +my thread 0x0000000000004A40 +my thread 0x0000000000002A30 +... +my thread 0x0000000000002A88 +my thread 0x0000000000004A98 +my thread: exit +my thread: exit +thread_schedule: no runnable threads +$ +</pre> + +<p><tt>uthread</tt> creates two threads and switches back and forth between +them. Each thread prints "my thread ..." and then yields to give the other +thread a chance to run.  + +<p>To observe the above output, you need to complete <tt>uthread_switch.S</tt>, but before +jumping into <tt>uthread_switch.S</tt>, first understand how <tt>uthread.c</tt> +uses <tt>uthread_switch</tt>.  <tt>uthread.c</tt> has two global variables +<tt>current_thread</tt> and <tt>next_thread</tt>.  Each is a pointer to a +<tt>thread</tt> structure.  The thread structure has a stack for a thread and a +saved stack pointer (<tt>sp</tt>, which points into the thread's stack).  The +job of <tt>uthread_switch</tt> is to save the current thread state into the +structure pointed to by <tt>current_thread</tt>, restore <tt>next_thread</tt>'s +state, and make <tt>current_thread</tt> point to where <tt>next_thread</tt> was +pointing to, so that when <tt>uthread_switch</tt> returns <tt>next_thread</tt> +is running and is the <tt>current_thread</tt>. + +<p>You should study <tt>thread_create</tt>, which sets up the initial stack for +a new thread. It provides hints about what <tt>uthread_switch</tt> should do. +Note that <tt>thread_create</tt> simulates saving all callee-save registers +on a new thread's stack. + +<p>To write the assembly in <tt>thread_switch</tt>, you need to know how the C +compiler lays out <tt>struct thread</tt> in memory, which is as +follows: + +<pre> +    -------------------- +    | 4 bytes for state| +    -------------------- +    | stack size bytes | +    | for stack        | +    -------------------- +    | 8 bytes for sp   | +    --------------------  <--- current_thread +         ...... + +         ...... +    -------------------- +    | 4 bytes for state| +    -------------------- +    | stack size bytes | +    | for stack        | +    -------------------- +    | 8 bytes for sp   | +    --------------------  <--- next_thread +</pre> + +The variables <tt>&next_thread</tt> and <tt>¤t_thread</tt> each +contain the address of a pointer to <tt>struct thread</tt>, and are +passed to <tt>thread_switch</tt>.  The following fragment of assembly +will be useful: + +<pre> +   ld t0, 0(a0) +   sd sp, 0(t0) +</pre> + +This saves <tt>sp</tt> in <tt>current_thread->sp</tt>.  This works because +<tt>sp</tt> is at +offset 0 in the struct. +You can study the assembly the compiler generates for +<tt>uthread.c</tt> by looking at <tt>uthread.asm</tt>. + +<p>To test your code it might be helpful to single step through your +<tt>uthread_switch</tt> using <tt>riscv64-linux-gnu-gdb</tt>.  You can get started in this way: + +<pre> +(gdb) file user/_uthread +Reading symbols from user/_uthread... +(gdb) b *0x230 + +</pre> +0x230 is the address of uthread_switch (see uthread.asm). When you +compile it may be at a different address, so check uthread_asm. +You may also be able to type "b uthread_switch".  <b>XXX This doesn't work +  for me; why?</b> + +<p>The breakpoint may (or may not) be triggered before you even run +<tt>uthread</tt>. How could that happen? + +<p>Once your xv6 shell runs, type "uthread", and gdb will break at +<tt>thread_switch</tt>.  Now you can type commands like the following to inspect +the state of <tt>uthread</tt>: + +<pre> +  (gdb) p/x *next_thread +  $1 = {sp = 0x4a28, stack = {0x0 (repeats 8088 times), +      0x68, 0x1, 0x0 <repeats 102 times>}, state = 0x1} +</pre> +What address is <tt>0x168</tt>, which sits on the bottom of the stack +of <tt>next_thread</tt>? + +With "x", you can examine the content of a memory location +<pre> +  (gdb) x/x next_thread->sp +  0x4a28 <all_thread+16304>:      0x00000168 +</pre> +Why does that print <tt>0x168</tt>? + +<h3>Optional challenges</h3> + +<p>The user-level thread package interacts badly with the operating system in +several ways.  For example, if one user-level thread blocks in a system call, +another user-level thread won't run, because the user-level threads scheduler +doesn't know that one of its threads has been descheduled by the xv6 scheduler.  As +another example, two user-level threads will not run concurrently on different +cores, because the xv6 scheduler isn't aware that there are multiple +threads that could run in parallel.  Note that if two user-level threads were to +run truly in parallel, this implementation won't work because of several races +(e.g., two threads on different processors could call <tt>thread_schedule</tt> +concurrently, select the same runnable thread, and both run it on different +processors.) + +<p>There are several ways of addressing these problems.  One is + using <a href="http://en.wikipedia.org/wiki/Scheduler_activations">scheduler + activations</a> and another is to use one kernel thread per + user-level thread (as Linux kernels do).  Implement one of these ways + in xv6.  This is not easy to get right; for example, you will need to + implement TLB shootdown when updating a page table for a + multithreaded user process. + +<p>Add locks, condition variables, barriers, +etc. to your thread package. +     +</body> +</html> + diff --git a/labs/xv6.html b/labs/xv6.html new file mode 100644 index 0000000..13d581e --- /dev/null +++ b/labs/xv6.html @@ -0,0 +1,238 @@ +<html> +<head> +<title>Lab: xv6</title> +<link rel="stylesheet" href="homework.css" type="text/css" /> +</head> +<body> + +<h1>Lab: xv6</h1> + +This lab makes you familiar with xv6 and its system calls. + +<h2>Boot xv6</h2> + +<p>Login to Athena (e.g., ssh -X athena.dialup.mit.edu) and attach the course +locker: (You must run this command every time you log in; or add it to your +~/.environment file.) + +<pre> +$ add -f 6.828 +</pre> + +<p>Fetch the xv6 source: + +<pre> +$ mkdir 6.828 +$ cd 6.828 +$ git clone git://github.com/mit-pdos/xv6-riscv.git +Cloning into 'xv6-riscv'... +... +$ +</pre> + +<p>XXX pointer to an update tools page + +<p>Build xv6 on Athena: +<pre> +$ cd xv6-public +$ makeriscv64-linux-gnu-gcc    -c -o kernel/entry.o kernel/entry.S +riscv64-linux-gnu-gcc -Wall -Werror -O -fno-omit-frame-pointer -ggdb -MD -mcmodel=medany -ffreestanding -fno-common -nostdlib -mno-relax -I. -fno-stack-protector -fno-pie -no-pie   -c -o kernel/start.o kernel/start.c +... +$ make qemu +... +mkfs/mkfs fs.img README user/_cat user/_echo user/_forktest user/_grep user/_init user/_kill user/_ln user/_ls user/_mkdir user/_rm user/_sh user/_stressfs user/_usertests user/_wc user/_zombie user/_cow  +nmeta 46 (boot, super, log blocks 30 inode blocks 13, bitmap blocks 1) blocks 954 total 1000 +balloc: first 497 blocks have been allocated +balloc: write bitmap block at sector 45 +qemu-system-riscv64 -machine virt -kernel kernel/kernel -m 3G -smp 3 -nographic -drive file=fs.img,if=none,format=raw,id=x0 -device virtio-blk-device,drive=x0,bus=virtio-mmio-bus.0 +hart 0 starting +hart 2 starting +hart 1 starting +init: starting sh +$ +</pre> + +<p> +If you type <tt>ls</tt> at the prompt, you should output similar to the following: +<pre> +$ ls +.              1 1 1024 +..             1 1 1024 +README         2 2 2181 +cat            2 3 21024 +echo           2 4 19776 +forktest       2 5 11456 +grep           2 6 24512 +init           2 7 20656 +kill           2 8 19856 +ln             2 9 19832 +ls             2 10 23280 +mkdir          2 11 19952 +rm             2 12 19936 +sh             2 13 38632 +stressfs       2 14 20912 +usertests      2 15 106264 +wc             2 16 22160 +zombie         2 17 19376 +cow            2 18 27152 +console        3 19 0 +</pre> +These are the programs/files that <tt>mkfs</tt> includes in the +initial file system.  You just ran one of them: <tt>ls</tt>. + +<h2>sleep</h2> + +<p>Implement the UNIX program sleep for xv6; your sleep should pause +  for a user-specified number of ticks. + +<p>Some hints: +  <ul> +    <li>Look at some of the other programs in <tt>user/</tt> to see +    how you can obtain the command-line arguments passed to a program.  If the user +    forgets to pass an argument, sleep should print an error message. + +    <li>The command-line argument is passed as a string; you can convert it to an +      integer using <tt>atoi</tt> (see user/ulib.c). + +    <li>Use the system call <tt>sleep</tt> (see user/usys.S and kernel/sysproc.c). + +    <li>Make sure <tt>main</tt> calls <tt>exit()</tt> in order to exit +    your program. + +    <li>Add the program to <tt>UPROGS</tt> in Makefile and compile +      user programs by typing <tt>make fs.img</tt>. + +  </ul> + +  <p>Run the program from the xv6 shell: +    <pre> +      $ make qemu +      ... +      init: starting sh +      $ sleep 10 +      (waits for a little while) +      $ +    </pre> + +  <p>Optional: write an uptime program that prints the uptime in terms +    of ticks using the <tt>uptime</tt> system call. + +<h2>pingpong</h2> +     +<p> Write a program that uses UNIX system calls to ``ping-pong'' a +  byte between two processes over a pair of pipes, one for each +  direction. The parent sends by writing a byte to <tt>fd[1]</tt> and +  the child receives it by reading from <tt>fd[0]</tt>. After +  receiving a byte from parent, the child responds with its own byte +  by writing to <tt>fd[1]</tt>, which the parent then reads. + +<p>Some hints: +  <ul> +    <li>Use <tt>pipe</tt> to create a pipe. +    <li>Use <tt>fork</tt> to create a child. +    <li>Use <tt>read</tt> to read from the pipe, and <tt>write</tt> to write to the pipe. +  </ul> +     +<h2>primes</h2> + +  <p>Write a concurrent version of prime sieve using pipes.  This idea +    is due to Doug McIlroy, inventor of Unix pipes.  The picture +    halfway down <a href="http://swtch.com/~rsc/thread/">the page</a> +    and the text surrounding it explain how to do it. + +    <p>Your goal is to use <tt>pipe</tt> and <tt>fork</tt> to set up +    the pipeline. The first process feeds the numbers 2 through 35 +    into the pipeline.  For each prime number, you will arrange to +    create one process that reads from its left neighbor over a pipe +    and writes to its right neighbor over another pipe. Since xv6 has +    limited number of file descriptors and processes, the first +    process can stop at 35. +     +<p>Some hints: +  <ul> +    <li>Be careful to close file descriptors that a process doesn't +    need, because otherwise your program will run xv6 out of resources +    before the first process reaches 35. +       +    <li>Once the first process reach 35, you should arrange that the +    pipeline terminates cleanly (Hint: read will return an end-of-file +      when the write-side of the pipe is closed). +  </ul> + +<h2>find</h2> + +<p>Write a simple version of the UNIX find program: find all the files +  in a directory tree whose name matches a string.  For example if the +  file system contains a file <tt>a/b</tt>, then running find as +  follows should produce: +  <pre> +    $ find . b +    ./a/b +    $ +  </pre> +   +<p>Some hints: +  <ul> +    <li>Look at user/ls.c to see how to read directories. +    <li>Use recursion to run find in sub-directories. +    <li>Don't recurse into "." and "..". +  </ul> + +<p>Optional: support regular expressions in name matching.  Grep has some +  primitive support for regular expressions. +   +<h2>xargs</h2> + +<p>Write a simple version of the UNIX xargs program: read lines from +  standard in and run a command for each line, supplying the line as +  arguments to the command.  The following example illustrates xarg's +  behavior: +  <pre> +    $ xargs echo bye +    hello too +    bye hello too +    <ctrl-d> +    $ +  </pre> +  Note that the command here is "echo bye" and the additional +  arguments are "hello too", making the command "echo bye hello too", +  which outputs "bye hello too". +   +<p>xargs and find combine well: +  <pre> +    find . b | xargs grep hello +  </pre> +  will run "grep hello" on each file named b in the directories below ".". + +<p>Some hints: +  <ul> +    <li>Use <tt>fork</tt> and <tt>exec</tt> system call to invoke the +      command on each line of input.  Use <tt>wait</tt> in the parent +      to wait for the child to complete running the command. +    <li>Read from stdin a character at the time until the newline +      character ('\n'). +    <li>kernel/param.h declares MAXARG, which may be useful if you need +    to declare an argv. +  </ul> + +<h2>Optional: modify the shell</h2> + +There are endless ways in which the shell could be extended. Here are +some suggestions: + +<ul> +   +<li>Modify the shell to support wait. +   +<li>Modify the shell to support lists of commands, separated by ";" + +<li>Modify the shell to support sub-shells by implementing "(" and ")" + +<li>Modify the shell to allow users to edit the command line + +</ul> + +</body> +</html> + + diff --git a/lapic.c b/lapic.c deleted file mode 100644 index b22bbd7..0000000 --- a/lapic.c +++ /dev/null @@ -1,229 +0,0 @@ -// The local APIC manages internal (non-I/O) interrupts. -// See Chapter 8 & Appendix C of Intel processor manual volume 3. - -#include "param.h" -#include "types.h" -#include "defs.h" -#include "date.h" -#include "memlayout.h" -#include "traps.h" -#include "mmu.h" -#include "x86.h" - -// Local APIC registers, divided by 4 for use as uint[] indices. -#define ID      (0x0020/4)   // ID -#define VER     (0x0030/4)   // Version -#define TPR     (0x0080/4)   // Task Priority -#define EOI     (0x00B0/4)   // EOI -#define SVR     (0x00F0/4)   // Spurious Interrupt Vector -  #define ENABLE     0x00000100   // Unit Enable -#define ESR     (0x0280/4)   // Error Status -#define ICRLO   (0x0300/4)   // Interrupt Command -  #define INIT       0x00000500   // INIT/RESET -  #define STARTUP    0x00000600   // Startup IPI -  #define DELIVS     0x00001000   // Delivery status -  #define ASSERT     0x00004000   // Assert interrupt (vs deassert) -  #define DEASSERT   0x00000000 -  #define LEVEL      0x00008000   // Level triggered -  #define BCAST      0x00080000   // Send to all APICs, including self. -  #define BUSY       0x00001000 -  #define FIXED      0x00000000 -#define ICRHI   (0x0310/4)   // Interrupt Command [63:32] -#define TIMER   (0x0320/4)   // Local Vector Table 0 (TIMER) -  #define X1         0x0000000B   // divide counts by 1 -  #define PERIODIC   0x00020000   // Periodic -#define PCINT   (0x0340/4)   // Performance Counter LVT -#define LINT0   (0x0350/4)   // Local Vector Table 1 (LINT0) -#define LINT1   (0x0360/4)   // Local Vector Table 2 (LINT1) -#define ERROR   (0x0370/4)   // Local Vector Table 3 (ERROR) -  #define MASKED     0x00010000   // Interrupt masked -#define TICR    (0x0380/4)   // Timer Initial Count -#define TCCR    (0x0390/4)   // Timer Current Count -#define TDCR    (0x03E0/4)   // Timer Divide Configuration - -volatile uint *lapic;  // Initialized in mp.c - -//PAGEBREAK! -static void -lapicw(int index, int value) -{ -  lapic[index] = value; -  lapic[ID];  // wait for write to finish, by reading -} - -void -lapicinit(void) -{ -  if(!lapic) -    return; - -  // Enable local APIC; set spurious interrupt vector. -  lapicw(SVR, ENABLE | (T_IRQ0 + IRQ_SPURIOUS)); - -  // The timer repeatedly counts down at bus frequency -  // from lapic[TICR] and then issues an interrupt. -  // If xv6 cared more about precise timekeeping, -  // TICR would be calibrated using an external time source. -  lapicw(TDCR, X1); -  lapicw(TIMER, PERIODIC | (T_IRQ0 + IRQ_TIMER)); -  lapicw(TICR, 10000000); - -  // Disable logical interrupt lines. -  lapicw(LINT0, MASKED); -  lapicw(LINT1, MASKED); - -  // Disable performance counter overflow interrupts -  // on machines that provide that interrupt entry. -  if(((lapic[VER]>>16) & 0xFF) >= 4) -    lapicw(PCINT, MASKED); - -  // Map error interrupt to IRQ_ERROR. -  lapicw(ERROR, T_IRQ0 + IRQ_ERROR); - -  // Clear error status register (requires back-to-back writes). -  lapicw(ESR, 0); -  lapicw(ESR, 0); - -  // Ack any outstanding interrupts. -  lapicw(EOI, 0); - -  // Send an Init Level De-Assert to synchronise arbitration ID's. -  lapicw(ICRHI, 0); -  lapicw(ICRLO, BCAST | INIT | LEVEL); -  while(lapic[ICRLO] & DELIVS) -    ; - -  // Enable interrupts on the APIC (but not on the processor). -  lapicw(TPR, 0); -} - -int -lapicid(void) -{ -  if (!lapic) -    return 0; -  return lapic[ID] >> 24; -} - -// Acknowledge interrupt. -void -lapiceoi(void) -{ -  if(lapic) -    lapicw(EOI, 0); -} - -// Spin for a given number of microseconds. -// On real hardware would want to tune this dynamically. -void -microdelay(int us) -{ -} - -#define CMOS_PORT    0x70 -#define CMOS_RETURN  0x71 - -// Start additional processor running entry code at addr. -// See Appendix B of MultiProcessor Specification. -void -lapicstartap(uchar apicid, uint addr) -{ -  int i; -  ushort *wrv; - -  // "The BSP must initialize CMOS shutdown code to 0AH -  // and the warm reset vector (DWORD based at 40:67) to point at -  // the AP startup code prior to the [universal startup algorithm]." -  outb(CMOS_PORT, 0xF);  // offset 0xF is shutdown code -  outb(CMOS_PORT+1, 0x0A); -  wrv = (ushort*)P2V((0x40<<4 | 0x67));  // Warm reset vector -  wrv[0] = 0; -  wrv[1] = addr >> 4; - -  // "Universal startup algorithm." -  // Send INIT (level-triggered) interrupt to reset other CPU. -  lapicw(ICRHI, apicid<<24); -  lapicw(ICRLO, INIT | LEVEL | ASSERT); -  microdelay(200); -  lapicw(ICRLO, INIT | LEVEL); -  microdelay(100);    // should be 10ms, but too slow in Bochs! - -  // Send startup IPI (twice!) to enter code. -  // Regular hardware is supposed to only accept a STARTUP -  // when it is in the halted state due to an INIT.  So the second -  // should be ignored, but it is part of the official Intel algorithm. -  // Bochs complains about the second one.  Too bad for Bochs. -  for(i = 0; i < 2; i++){ -    lapicw(ICRHI, apicid<<24); -    lapicw(ICRLO, STARTUP | (addr>>12)); -    microdelay(200); -  } -} - -#define CMOS_STATA   0x0a -#define CMOS_STATB   0x0b -#define CMOS_UIP    (1 << 7)        // RTC update in progress - -#define SECS    0x00 -#define MINS    0x02 -#define HOURS   0x04 -#define DAY     0x07 -#define MONTH   0x08 -#define YEAR    0x09 - -static uint -cmos_read(uint reg) -{ -  outb(CMOS_PORT,  reg); -  microdelay(200); - -  return inb(CMOS_RETURN); -} - -static void -fill_rtcdate(struct rtcdate *r) -{ -  r->second = cmos_read(SECS); -  r->minute = cmos_read(MINS); -  r->hour   = cmos_read(HOURS); -  r->day    = cmos_read(DAY); -  r->month  = cmos_read(MONTH); -  r->year   = cmos_read(YEAR); -} - -// qemu seems to use 24-hour GWT and the values are BCD encoded -void -cmostime(struct rtcdate *r) -{ -  struct rtcdate t1, t2; -  int sb, bcd; - -  sb = cmos_read(CMOS_STATB); - -  bcd = (sb & (1 << 2)) == 0; - -  // make sure CMOS doesn't modify time while we read it -  for(;;) { -    fill_rtcdate(&t1); -    if(cmos_read(CMOS_STATA) & CMOS_UIP) -        continue; -    fill_rtcdate(&t2); -    if(memcmp(&t1, &t2, sizeof(t1)) == 0) -      break; -  } - -  // convert -  if(bcd) { -#define    CONV(x)     (t1.x = ((t1.x >> 4) * 10) + (t1.x & 0xf)) -    CONV(second); -    CONV(minute); -    CONV(hour  ); -    CONV(day   ); -    CONV(month ); -    CONV(year  ); -#undef     CONV -  } - -  *r = t1; -  r->year += 2000; -} @@ -1,116 +0,0 @@ -#include "types.h" -#include "defs.h" -#include "param.h" -#include "memlayout.h" -#include "mmu.h" -#include "proc.h" -#include "x86.h" - -static void startothers(void); -static void mpmain(void)  __attribute__((noreturn)); -extern pde_t *kpgdir; -extern char end[]; // first address after kernel loaded from ELF file - -// Bootstrap processor starts running C code here. -// Allocate a real stack and switch to it, first -// doing some setup required for memory allocator to work. -int -main(void) -{ -  kinit1(end, P2V(4*1024*1024)); // phys page allocator -  kvmalloc();      // kernel page table -  mpinit();        // detect other processors -  lapicinit();     // interrupt controller -  seginit();       // segment descriptors -  picinit();       // disable pic -  ioapicinit();    // another interrupt controller -  consoleinit();   // console hardware -  uartinit();      // serial port -  pinit();         // process table -  tvinit();        // trap vectors -  binit();         // buffer cache -  fileinit();      // file table -  ideinit();       // disk  -  startothers();   // start other processors -  kinit2(P2V(4*1024*1024), P2V(PHYSTOP)); // must come after startothers() -  userinit();      // first user process -  mpmain();        // finish this processor's setup -} - -// Other CPUs jump here from entryother.S. -static void -mpenter(void) -{ -  switchkvm(); -  seginit(); -  lapicinit(); -  mpmain(); -} - -// Common CPU setup code. -static void -mpmain(void) -{ -  cprintf("cpu%d: starting %d\n", cpuid(), cpuid()); -  idtinit();       // load idt register -  xchg(&(mycpu()->started), 1); // tell startothers() we're up -  scheduler();     // start running processes -} - -pde_t entrypgdir[];  // For entry.S - -// Start the non-boot (AP) processors. -static void -startothers(void) -{ -  extern uchar _binary_entryother_start[], _binary_entryother_size[]; -  uchar *code; -  struct cpu *c; -  char *stack; - -  // Write entry code to unused memory at 0x7000. -  // The linker has placed the image of entryother.S in -  // _binary_entryother_start. -  code = P2V(0x7000); -  memmove(code, _binary_entryother_start, (uint)_binary_entryother_size); - -  for(c = cpus; c < cpus+ncpu; c++){ -    if(c == mycpu())  // We've started already. -      continue; - -    // Tell entryother.S what stack to use, where to enter, and what -    // pgdir to use. We cannot use kpgdir yet, because the AP processor -    // is running in low  memory, so we use entrypgdir for the APs too. -    stack = kalloc(); -    *(void**)(code-4) = stack + KSTACKSIZE; -    *(void(**)(void))(code-8) = mpenter; -    *(int**)(code-12) = (void *) V2P(entrypgdir); - -    lapicstartap(c->apicid, V2P(code)); - -    // wait for cpu to finish mpmain() -    while(c->started == 0) -      ; -  } -} - -// The boot page table used in entry.S and entryother.S. -// Page directories (and page tables) must start on page boundaries, -// hence the __aligned__ attribute. -// PTE_PS in a page directory entry enables 4Mbyte pages. - -__attribute__((__aligned__(PGSIZE))) -pde_t entrypgdir[NPDENTRIES] = { -  // Map VA's [0, 4MB) to PA's [0, 4MB) -  [0] = (0) | PTE_P | PTE_W | PTE_PS, -  // Map VA's [KERNBASE, KERNBASE+4MB) to PA's [0, 4MB) -  [KERNBASE>>PDXSHIFT] = (0) | PTE_P | PTE_W | PTE_PS, -}; - -//PAGEBREAK! -// Blank page. -//PAGEBREAK! -// Blank page. -//PAGEBREAK! -// Blank page. - diff --git a/memide.c b/memide.c deleted file mode 100644 index ba267ac..0000000 --- a/memide.c +++ /dev/null @@ -1,60 +0,0 @@ -// Fake IDE disk; stores blocks in memory. -// Useful for running kernel without scratch disk. - -#include "types.h" -#include "defs.h" -#include "param.h" -#include "mmu.h" -#include "proc.h" -#include "x86.h" -#include "traps.h" -#include "spinlock.h" -#include "sleeplock.h" -#include "fs.h" -#include "buf.h" - -extern uchar _binary_fs_img_start[], _binary_fs_img_size[]; - -static int disksize; -static uchar *memdisk; - -void -ideinit(void) -{ -  memdisk = _binary_fs_img_start; -  disksize = (uint)_binary_fs_img_size/BSIZE; -} - -// Interrupt handler. -void -ideintr(void) -{ -  // no-op -} - -// Sync buf with disk. -// If B_DIRTY is set, write buf to disk, clear B_DIRTY, set B_VALID. -// Else if B_VALID is not set, read buf from disk, set B_VALID. -void -iderw(struct buf *b) -{ -  uchar *p; - -  if(!holdingsleep(&b->lock)) -    panic("iderw: buf not locked"); -  if((b->flags & (B_VALID|B_DIRTY)) == B_VALID) -    panic("iderw: nothing to do"); -  if(b->dev != 1) -    panic("iderw: request not for disk 1"); -  if(b->blockno >= disksize) -    panic("iderw: block out of range"); - -  p = memdisk + b->blockno*BSIZE; - -  if(b->flags & B_DIRTY){ -    b->flags &= ~B_DIRTY; -    memmove(p, b->data, BSIZE); -  } else -    memmove(b->data, p, BSIZE); -  b->flags |= B_VALID; -} diff --git a/memlayout.h b/memlayout.h deleted file mode 100644 index d1615f7..0000000 --- a/memlayout.h +++ /dev/null @@ -1,15 +0,0 @@ -// Memory layout - -#define EXTMEM  0x100000            // Start of extended memory -#define PHYSTOP 0xE000000           // Top physical memory -#define DEVSPACE 0xFE000000         // Other devices are at high addresses - -// Key addresses for address space layout (see kmap in vm.c for layout) -#define KERNBASE 0x80000000         // First kernel virtual address -#define KERNLINK (KERNBASE+EXTMEM)  // Address where kernel is linked - -#define V2P(a) (((uint) (a)) - KERNBASE) -#define P2V(a) ((void *)(((char *) (a)) + KERNBASE)) - -#define V2P_WO(x) ((x) - KERNBASE)    // same as V2P, but without casts -#define P2V_WO(x) ((x) + KERNBASE)    // same as P2V, but without casts @@ -6,10 +6,10 @@  #include <assert.h>  #define stat xv6_stat  // avoid clash with host struct stat -#include "types.h" -#include "fs.h" -#include "stat.h" -#include "param.h" +#include "kernel/types.h" +#include "kernel/fs.h" +#include "kernel/stat.h" +#include "kernel/param.h"  #ifndef static_assert  #define static_assert(a, b) do { switch (0) case 0: case (a): ; } while (0) @@ -94,6 +94,7 @@ main(int argc, char *argv[])    nmeta = 2 + nlog + ninodeblocks + nbitmap;    nblocks = FSSIZE - nmeta; +  sb.magic = FSMAGIC;    sb.size = xint(FSSIZE);    sb.nblocks = xint(nblocks);    sb.ninodes = xint(NINODES); @@ -128,7 +129,14 @@ main(int argc, char *argv[])    iappend(rootino, &de, sizeof(de));    for(i = 2; i < argc; i++){ -    assert(index(argv[i], '/') == 0); +    // get rid of "user/" +    char *shortname; +    if(strncmp(argv[i], "user/", 5) == 0) +      shortname = argv[i] + 5; +    else +      shortname = argv[i]; +     +    assert(index(shortname, '/') == 0);      if((fd = open(argv[i], 0)) < 0){        perror(argv[i]); @@ -139,14 +147,14 @@ main(int argc, char *argv[])      // The binaries are named _rm, _cat, etc. to keep the      // build operating system from trying to execute them      // in place of system binaries like rm and cat. -    if(argv[i][0] == '_') -      ++argv[i]; +    if(shortname[0] == '_') +      shortname += 1;      inum = ialloc(T_FILE);      bzero(&de, sizeof(de));      de.inum = xshort(inum); -    strncpy(de.name, argv[i], DIRSIZ); +    strncpy(de.name, shortname, DIRSIZ);      iappend(rootino, &de, sizeof(de));      while((cc = read(fd, buf, sizeof(buf))) > 0) @@ -1,181 +0,0 @@ -// This file contains definitions for the -// x86 memory management unit (MMU). - -// Eflags register -#define FL_IF           0x00000200      // Interrupt Enable - -// Control Register flags -#define CR0_PE          0x00000001      // Protection Enable -#define CR0_WP          0x00010000      // Write Protect -#define CR0_PG          0x80000000      // Paging - -#define CR4_PSE         0x00000010      // Page size extension - -// various segment selectors. -#define SEG_KCODE 1  // kernel code -#define SEG_KDATA 2  // kernel data+stack -#define SEG_UCODE 3  // user code -#define SEG_UDATA 4  // user data+stack -#define SEG_TSS   5  // this process's task state - -// cpu->gdt[NSEGS] holds the above segments. -#define NSEGS     6 - -#ifndef __ASSEMBLER__ -// Segment Descriptor -struct segdesc { -  uint lim_15_0 : 16;  // Low bits of segment limit -  uint base_15_0 : 16; // Low bits of segment base address -  uint base_23_16 : 8; // Middle bits of segment base address -  uint type : 4;       // Segment type (see STS_ constants) -  uint s : 1;          // 0 = system, 1 = application -  uint dpl : 2;        // Descriptor Privilege Level -  uint p : 1;          // Present -  uint lim_19_16 : 4;  // High bits of segment limit -  uint avl : 1;        // Unused (available for software use) -  uint rsv1 : 1;       // Reserved -  uint db : 1;         // 0 = 16-bit segment, 1 = 32-bit segment -  uint g : 1;          // Granularity: limit scaled by 4K when set -  uint base_31_24 : 8; // High bits of segment base address -}; - -// Normal segment -#define SEG(type, base, lim, dpl) (struct segdesc)    \ -{ ((lim) >> 12) & 0xffff, (uint)(base) & 0xffff,      \ -  ((uint)(base) >> 16) & 0xff, type, 1, dpl, 1,       \ -  (uint)(lim) >> 28, 0, 0, 1, 1, (uint)(base) >> 24 } -#define SEG16(type, base, lim, dpl) (struct segdesc)  \ -{ (lim) & 0xffff, (uint)(base) & 0xffff,              \ -  ((uint)(base) >> 16) & 0xff, type, 1, dpl, 1,       \ -  (uint)(lim) >> 16, 0, 0, 1, 0, (uint)(base) >> 24 } -#endif - -#define DPL_USER    0x3     // User DPL - -// Application segment type bits -#define STA_X       0x8     // Executable segment -#define STA_W       0x2     // Writeable (non-executable segments) -#define STA_R       0x2     // Readable (executable segments) - -// System segment type bits -#define STS_T32A    0x9     // Available 32-bit TSS -#define STS_IG32    0xE     // 32-bit Interrupt Gate -#define STS_TG32    0xF     // 32-bit Trap Gate - -// A virtual address 'la' has a three-part structure as follows: -// -// +--------10------+-------10-------+---------12----------+ -// | Page Directory |   Page Table   | Offset within Page  | -// |      Index     |      Index     |                     | -// +----------------+----------------+---------------------+ -//  \--- PDX(va) --/ \--- PTX(va) --/ - -// page directory index -#define PDX(va)         (((uint)(va) >> PDXSHIFT) & 0x3FF) - -// page table index -#define PTX(va)         (((uint)(va) >> PTXSHIFT) & 0x3FF) - -// construct virtual address from indexes and offset -#define PGADDR(d, t, o) ((uint)((d) << PDXSHIFT | (t) << PTXSHIFT | (o))) - -// Page directory and page table constants. -#define NPDENTRIES      1024    // # directory entries per page directory -#define NPTENTRIES      1024    // # PTEs per page table -#define PGSIZE          4096    // bytes mapped by a page - -#define PTXSHIFT        12      // offset of PTX in a linear address -#define PDXSHIFT        22      // offset of PDX in a linear address - -#define PGROUNDUP(sz)  (((sz)+PGSIZE-1) & ~(PGSIZE-1)) -#define PGROUNDDOWN(a) (((a)) & ~(PGSIZE-1)) - -// Page table/directory entry flags. -#define PTE_P           0x001   // Present -#define PTE_W           0x002   // Writeable -#define PTE_U           0x004   // User -#define PTE_PS          0x080   // Page Size - -// Address in page table or page directory entry -#define PTE_ADDR(pte)   ((uint)(pte) & ~0xFFF) -#define PTE_FLAGS(pte)  ((uint)(pte) &  0xFFF) - -#ifndef __ASSEMBLER__ -typedef uint pte_t; - -// Task state segment format -struct taskstate { -  uint link;         // Old ts selector -  uint esp0;         // Stack pointers and segment selectors -  ushort ss0;        //   after an increase in privilege level -  ushort padding1; -  uint *esp1; -  ushort ss1; -  ushort padding2; -  uint *esp2; -  ushort ss2; -  ushort padding3; -  void *cr3;         // Page directory base -  uint *eip;         // Saved state from last task switch -  uint eflags; -  uint eax;          // More saved state (registers) -  uint ecx; -  uint edx; -  uint ebx; -  uint *esp; -  uint *ebp; -  uint esi; -  uint edi; -  ushort es;         // Even more saved state (segment selectors) -  ushort padding4; -  ushort cs; -  ushort padding5; -  ushort ss; -  ushort padding6; -  ushort ds; -  ushort padding7; -  ushort fs; -  ushort padding8; -  ushort gs; -  ushort padding9; -  ushort ldt; -  ushort padding10; -  ushort t;          // Trap on task switch -  ushort iomb;       // I/O map base address -}; - -// Gate descriptors for interrupts and traps -struct gatedesc { -  uint off_15_0 : 16;   // low 16 bits of offset in segment -  uint cs : 16;         // code segment selector -  uint args : 5;        // # args, 0 for interrupt/trap gates -  uint rsv1 : 3;        // reserved(should be zero I guess) -  uint type : 4;        // type(STS_{IG32,TG32}) -  uint s : 1;           // must be 0 (system) -  uint dpl : 2;         // descriptor(meaning new) privilege level -  uint p : 1;           // Present -  uint off_31_16 : 16;  // high bits of offset in segment -}; - -// Set up a normal interrupt/trap gate descriptor. -// - istrap: 1 for a trap (= exception) gate, 0 for an interrupt gate. -//   interrupt gate clears FL_IF, trap gate leaves FL_IF alone -// - sel: Code segment selector for interrupt/trap handler -// - off: Offset in code segment for interrupt/trap handler -// - dpl: Descriptor Privilege Level - -//        the privilege level required for software to invoke -//        this interrupt/trap gate explicitly using an int instruction. -#define SETGATE(gate, istrap, sel, off, d)                \ -{                                                         \ -  (gate).off_15_0 = (uint)(off) & 0xffff;                \ -  (gate).cs = (sel);                                      \ -  (gate).args = 0;                                        \ -  (gate).rsv1 = 0;                                        \ -  (gate).type = (istrap) ? STS_TG32 : STS_IG32;           \ -  (gate).s = 0;                                           \ -  (gate).dpl = (d);                                       \ -  (gate).p = 1;                                           \ -  (gate).off_31_16 = (uint)(off) >> 16;                  \ -} - -#endif @@ -1,139 +0,0 @@ -// Multiprocessor support -// Search memory for MP description structures. -// http://developer.intel.com/design/pentium/datashts/24201606.pdf - -#include "types.h" -#include "defs.h" -#include "param.h" -#include "memlayout.h" -#include "mp.h" -#include "x86.h" -#include "mmu.h" -#include "proc.h" - -struct cpu cpus[NCPU]; -int ncpu; -uchar ioapicid; - -static uchar -sum(uchar *addr, int len) -{ -  int i, sum; - -  sum = 0; -  for(i=0; i<len; i++) -    sum += addr[i]; -  return sum; -} - -// Look for an MP structure in the len bytes at addr. -static struct mp* -mpsearch1(uint a, int len) -{ -  uchar *e, *p, *addr; - -  addr = P2V(a); -  e = addr+len; -  for(p = addr; p < e; p += sizeof(struct mp)) -    if(memcmp(p, "_MP_", 4) == 0 && sum(p, sizeof(struct mp)) == 0) -      return (struct mp*)p; -  return 0; -} - -// Search for the MP Floating Pointer Structure, which according to the -// spec is in one of the following three locations: -// 1) in the first KB of the EBDA; -// 2) in the last KB of system base memory; -// 3) in the BIOS ROM between 0xE0000 and 0xFFFFF. -static struct mp* -mpsearch(void) -{ -  uchar *bda; -  uint p; -  struct mp *mp; - -  bda = (uchar *) P2V(0x400); -  if((p = ((bda[0x0F]<<8)| bda[0x0E]) << 4)){ -    if((mp = mpsearch1(p, 1024))) -      return mp; -  } else { -    p = ((bda[0x14]<<8)|bda[0x13])*1024; -    if((mp = mpsearch1(p-1024, 1024))) -      return mp; -  } -  return mpsearch1(0xF0000, 0x10000); -} - -// Search for an MP configuration table.  For now, -// don't accept the default configurations (physaddr == 0). -// Check for correct signature, calculate the checksum and, -// if correct, check the version. -// To do: check extended table checksum. -static struct mpconf* -mpconfig(struct mp **pmp) -{ -  struct mpconf *conf; -  struct mp *mp; - -  if((mp = mpsearch()) == 0 || mp->physaddr == 0) -    return 0; -  conf = (struct mpconf*) P2V((uint) mp->physaddr); -  if(memcmp(conf, "PCMP", 4) != 0) -    return 0; -  if(conf->version != 1 && conf->version != 4) -    return 0; -  if(sum((uchar*)conf, conf->length) != 0) -    return 0; -  *pmp = mp; -  return conf; -} - -void -mpinit(void) -{ -  uchar *p, *e; -  int ismp; -  struct mp *mp; -  struct mpconf *conf; -  struct mpproc *proc; -  struct mpioapic *ioapic; - -  if((conf = mpconfig(&mp)) == 0) -    panic("Expect to run on an SMP"); -  ismp = 1; -  lapic = (uint*)conf->lapicaddr; -  for(p=(uchar*)(conf+1), e=(uchar*)conf+conf->length; p<e; ){ -    switch(*p){ -    case MPPROC: -      proc = (struct mpproc*)p; -      if(ncpu < NCPU) { -        cpus[ncpu].apicid = proc->apicid;  // apicid may differ from ncpu -        ncpu++; -      } -      p += sizeof(struct mpproc); -      continue; -    case MPIOAPIC: -      ioapic = (struct mpioapic*)p; -      ioapicid = ioapic->apicno; -      p += sizeof(struct mpioapic); -      continue; -    case MPBUS: -    case MPIOINTR: -    case MPLINTR: -      p += 8; -      continue; -    default: -      ismp = 0; -      break; -    } -  } -  if(!ismp) -    panic("Didn't find a suitable machine"); - -  if(mp->imcrp){ -    // Bochs doesn't support IMCR, so this doesn't run on Bochs. -    // But it would on real hardware. -    outb(0x22, 0x70);   // Select IMCR -    outb(0x23, inb(0x23) | 1);  // Mask external interrupts. -  } -} @@ -1,56 +0,0 @@ -// See MultiProcessor Specification Version 1.[14] - -struct mp {             // floating pointer -  uchar signature[4];           // "_MP_" -  void *physaddr;               // phys addr of MP config table -  uchar length;                 // 1 -  uchar specrev;                // [14] -  uchar checksum;               // all bytes must add up to 0 -  uchar type;                   // MP system config type -  uchar imcrp; -  uchar reserved[3]; -}; - -struct mpconf {         // configuration table header -  uchar signature[4];           // "PCMP" -  ushort length;                // total table length -  uchar version;                // [14] -  uchar checksum;               // all bytes must add up to 0 -  uchar product[20];            // product id -  uint *oemtable;               // OEM table pointer -  ushort oemlength;             // OEM table length -  ushort entry;                 // entry count -  uint *lapicaddr;              // address of local APIC -  ushort xlength;               // extended table length -  uchar xchecksum;              // extended table checksum -  uchar reserved; -}; - -struct mpproc {         // processor table entry -  uchar type;                   // entry type (0) -  uchar apicid;                 // local APIC id -  uchar version;                // local APIC verison -  uchar flags;                  // CPU flags -    #define MPBOOT 0x02           // This proc is the bootstrap processor. -  uchar signature[4];           // CPU signature -  uint feature;                 // feature flags from CPUID instruction -  uchar reserved[8]; -}; - -struct mpioapic {       // I/O APIC table entry -  uchar type;                   // entry type (2) -  uchar apicno;                 // I/O APIC id -  uchar version;                // I/O APIC version -  uchar flags;                  // I/O APIC flags -  uint *addr;                  // I/O APIC address -}; - -// Table entry types -#define MPPROC    0x00  // One per processor -#define MPBUS     0x01  // One per bus -#define MPIOAPIC  0x02  // One per I/O APIC -#define MPIOINTR  0x03  // One per bus interrupt source -#define MPLINTR   0x04  // One per system interrupt source - -//PAGEBREAK! -// Blank page. diff --git a/picirq.c b/picirq.c deleted file mode 100644 index e26957f..0000000 --- a/picirq.c +++ /dev/null @@ -1,19 +0,0 @@ -#include "types.h" -#include "x86.h" -#include "traps.h" - -// I/O Addresses of the two programmable interrupt controllers -#define IO_PIC1         0x20    // Master (IRQs 0-7) -#define IO_PIC2         0xA0    // Slave (IRQs 8-15) - -// Don't use the 8259A interrupt controllers.  Xv6 assumes SMP hardware. -void -picinit(void) -{ -  // mask all interrupts -  outb(IO_PIC1+1, 0xFF); -  outb(IO_PIC2+1, 0xFF); -} - -//PAGEBREAK! -// Blank page. @@ -1,121 +0,0 @@ -#include "types.h" -#include "defs.h" -#include "param.h" -#include "mmu.h" -#include "proc.h" -#include "fs.h" -#include "spinlock.h" -#include "sleeplock.h" -#include "file.h" - -#define PIPESIZE 512 - -struct pipe { -  struct spinlock lock; -  char data[PIPESIZE]; -  uint nread;     // number of bytes read -  uint nwrite;    // number of bytes written -  int readopen;   // read fd is still open -  int writeopen;  // write fd is still open -}; - -int -pipealloc(struct file **f0, struct file **f1) -{ -  struct pipe *p; - -  p = 0; -  *f0 = *f1 = 0; -  if((*f0 = filealloc()) == 0 || (*f1 = filealloc()) == 0) -    goto bad; -  if((p = (struct pipe*)kalloc()) == 0) -    goto bad; -  p->readopen = 1; -  p->writeopen = 1; -  p->nwrite = 0; -  p->nread = 0; -  initlock(&p->lock, "pipe"); -  (*f0)->type = FD_PIPE; -  (*f0)->readable = 1; -  (*f0)->writable = 0; -  (*f0)->pipe = p; -  (*f1)->type = FD_PIPE; -  (*f1)->readable = 0; -  (*f1)->writable = 1; -  (*f1)->pipe = p; -  return 0; - -//PAGEBREAK: 20 - bad: -  if(p) -    kfree((char*)p); -  if(*f0) -    fileclose(*f0); -  if(*f1) -    fileclose(*f1); -  return -1; -} - -void -pipeclose(struct pipe *p, int writable) -{ -  acquire(&p->lock); -  if(writable){ -    p->writeopen = 0; -    wakeup(&p->nread); -  } else { -    p->readopen = 0; -    wakeup(&p->nwrite); -  } -  if(p->readopen == 0 && p->writeopen == 0){ -    release(&p->lock); -    kfree((char*)p); -  } else -    release(&p->lock); -} - -//PAGEBREAK: 40 -int -pipewrite(struct pipe *p, char *addr, int n) -{ -  int i; - -  acquire(&p->lock); -  for(i = 0; i < n; i++){ -    while(p->nwrite == p->nread + PIPESIZE){  //DOC: pipewrite-full -      if(p->readopen == 0 || myproc()->killed){ -        release(&p->lock); -        return -1; -      } -      wakeup(&p->nread); -      sleep(&p->nwrite, &p->lock);  //DOC: pipewrite-sleep -    } -    p->data[p->nwrite++ % PIPESIZE] = addr[i]; -  } -  wakeup(&p->nread);  //DOC: pipewrite-wakeup1 -  release(&p->lock); -  return n; -} - -int -piperead(struct pipe *p, char *addr, int n) -{ -  int i; - -  acquire(&p->lock); -  while(p->nread == p->nwrite && p->writeopen){  //DOC: pipe-empty -    if(myproc()->killed){ -      release(&p->lock); -      return -1; -    } -    sleep(&p->nread, &p->lock); //DOC: piperead-sleep -  } -  for(i = 0; i < n; i++){  //DOC: piperead-copy -    if(p->nread == p->nwrite) -      break; -    addr[i] = p->data[p->nread++ % PIPESIZE]; -  } -  wakeup(&p->nwrite);  //DOC: piperead-wakeup -  release(&p->lock); -  return i; -} @@ -1,36 +0,0 @@ -#!/usr/bin/perl - -use POSIX qw(strftime); - -if($ARGV[0] eq "-h"){ -	shift @ARGV; -	$h = $ARGV[0]; -	shift @ARGV; -}else{ -	$h = $ARGV[0]; -} - -$page = 0; -$now = strftime "%b %e %H:%M %Y", localtime; - -@lines = <>; -for($i=0; $i<@lines; $i+=50){ -	print "\n\n"; -	++$page; -	print "$now  $h  Page $page\n"; -	print "\n\n"; -	for($j=$i; $j<@lines && $j<$i +50; $j++){ -		$lines[$j] =~ s!//DOC.*!!; -		print $lines[$j]; -	} -	for(; $j<$i+50; $j++){ -		print "\n"; -	} -	$sheet = ""; -	if($lines[$i] =~ /^([0-9][0-9])[0-9][0-9] /){ -		$sheet = "Sheet $1"; -	} -	print "\n\n"; -	print "$sheet\n"; -	print "\n\n"; -} diff --git a/printpcs b/printpcs deleted file mode 100755 index 81d039b..0000000 --- a/printpcs +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/sh - -# Decode the symbols from a panic EIP list - -# Find a working addr2line -for p in i386-jos-elf-addr2line addr2line; do -    if which $p 2>&1 >/dev/null && \ -       $p -h 2>&1 | grep -q '\belf32-i386\b'; then -        break -    fi -done - -# Enable as much pretty-printing as this addr2line can do -$p $($p -h | grep ' -[aipsf] ' | awk '{print $1}') -e kernel "$@" @@ -1,534 +0,0 @@ -#include "types.h" -#include "defs.h" -#include "param.h" -#include "memlayout.h" -#include "mmu.h" -#include "x86.h" -#include "proc.h" -#include "spinlock.h" - -struct { -  struct spinlock lock; -  struct proc proc[NPROC]; -} ptable; - -static struct proc *initproc; - -int nextpid = 1; -extern void forkret(void); -extern void trapret(void); - -static void wakeup1(void *chan); - -void -pinit(void) -{ -  initlock(&ptable.lock, "ptable"); -} - -// Must be called with interrupts disabled -int -cpuid() { -  return mycpu()-cpus; -} - -// Must be called with interrupts disabled to avoid the caller being -// rescheduled between reading lapicid and running through the loop. -struct cpu* -mycpu(void) -{ -  int apicid, i; -   -  if(readeflags()&FL_IF) -    panic("mycpu called with interrupts enabled\n"); -   -  apicid = lapicid(); -  // APIC IDs are not guaranteed to be contiguous. Maybe we should have -  // a reverse map, or reserve a register to store &cpus[i]. -  for (i = 0; i < ncpu; ++i) { -    if (cpus[i].apicid == apicid) -      return &cpus[i]; -  } -  panic("unknown apicid\n"); -} - -// Disable interrupts so that we are not rescheduled -// while reading proc from the cpu structure -struct proc* -myproc(void) { -  struct cpu *c; -  struct proc *p; -  pushcli(); -  c = mycpu(); -  p = c->proc; -  popcli(); -  return p; -} - -//PAGEBREAK: 32 -// Look in the process table for an UNUSED proc. -// If found, change state to EMBRYO and initialize -// state required to run in the kernel. -// Otherwise return 0. -static struct proc* -allocproc(void) -{ -  struct proc *p; -  char *sp; - -  acquire(&ptable.lock); - -  for(p = ptable.proc; p < &ptable.proc[NPROC]; p++) -    if(p->state == UNUSED) -      goto found; - -  release(&ptable.lock); -  return 0; - -found: -  p->state = EMBRYO; -  p->pid = nextpid++; - -  release(&ptable.lock); - -  // Allocate kernel stack. -  if((p->kstack = kalloc()) == 0){ -    p->state = UNUSED; -    return 0; -  } -  sp = p->kstack + KSTACKSIZE; - -  // Leave room for trap frame. -  sp -= sizeof *p->tf; -  p->tf = (struct trapframe*)sp; - -  // Set up new context to start executing at forkret, -  // which returns to trapret. -  sp -= 4; -  *(uint*)sp = (uint)trapret; - -  sp -= sizeof *p->context; -  p->context = (struct context*)sp; -  memset(p->context, 0, sizeof *p->context); -  p->context->eip = (uint)forkret; - -  return p; -} - -//PAGEBREAK: 32 -// Set up first user process. -void -userinit(void) -{ -  struct proc *p; -  extern char _binary_initcode_start[], _binary_initcode_size[]; - -  p = allocproc(); -   -  initproc = p; -  if((p->pgdir = setupkvm()) == 0) -    panic("userinit: out of memory?"); -  inituvm(p->pgdir, _binary_initcode_start, (int)_binary_initcode_size); -  p->sz = PGSIZE; -  memset(p->tf, 0, sizeof(*p->tf)); -  p->tf->cs = (SEG_UCODE << 3) | DPL_USER; -  p->tf->ds = (SEG_UDATA << 3) | DPL_USER; -  p->tf->es = p->tf->ds; -  p->tf->ss = p->tf->ds; -  p->tf->eflags = FL_IF; -  p->tf->esp = PGSIZE; -  p->tf->eip = 0;  // beginning of initcode.S - -  safestrcpy(p->name, "initcode", sizeof(p->name)); -  p->cwd = namei("/"); - -  // this assignment to p->state lets other cores -  // run this process. the acquire forces the above -  // writes to be visible, and the lock is also needed -  // because the assignment might not be atomic. -  acquire(&ptable.lock); - -  p->state = RUNNABLE; - -  release(&ptable.lock); -} - -// Grow current process's memory by n bytes. -// Return 0 on success, -1 on failure. -int -growproc(int n) -{ -  uint sz; -  struct proc *curproc = myproc(); - -  sz = curproc->sz; -  if(n > 0){ -    if((sz = allocuvm(curproc->pgdir, sz, sz + n)) == 0) -      return -1; -  } else if(n < 0){ -    if((sz = deallocuvm(curproc->pgdir, sz, sz + n)) == 0) -      return -1; -  } -  curproc->sz = sz; -  switchuvm(curproc); -  return 0; -} - -// Create a new process copying p as the parent. -// Sets up stack to return as if from system call. -// Caller must set state of returned proc to RUNNABLE. -int -fork(void) -{ -  int i, pid; -  struct proc *np; -  struct proc *curproc = myproc(); - -  // Allocate process. -  if((np = allocproc()) == 0){ -    return -1; -  } - -  // Copy process state from proc. -  if((np->pgdir = copyuvm(curproc->pgdir, curproc->sz)) == 0){ -    kfree(np->kstack); -    np->kstack = 0; -    np->state = UNUSED; -    return -1; -  } -  np->sz = curproc->sz; -  np->parent = curproc; -  *np->tf = *curproc->tf; - -  // Clear %eax so that fork returns 0 in the child. -  np->tf->eax = 0; - -  for(i = 0; i < NOFILE; i++) -    if(curproc->ofile[i]) -      np->ofile[i] = filedup(curproc->ofile[i]); -  np->cwd = idup(curproc->cwd); - -  safestrcpy(np->name, curproc->name, sizeof(curproc->name)); - -  pid = np->pid; - -  acquire(&ptable.lock); - -  np->state = RUNNABLE; - -  release(&ptable.lock); - -  return pid; -} - -// Exit the current process.  Does not return. -// An exited process remains in the zombie state -// until its parent calls wait() to find out it exited. -void -exit(void) -{ -  struct proc *curproc = myproc(); -  struct proc *p; -  int fd; - -  if(curproc == initproc) -    panic("init exiting"); - -  // Close all open files. -  for(fd = 0; fd < NOFILE; fd++){ -    if(curproc->ofile[fd]){ -      fileclose(curproc->ofile[fd]); -      curproc->ofile[fd] = 0; -    } -  } - -  begin_op(); -  iput(curproc->cwd); -  end_op(); -  curproc->cwd = 0; - -  acquire(&ptable.lock); - -  // Parent might be sleeping in wait(). -  wakeup1(curproc->parent); - -  // Pass abandoned children to init. -  for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){ -    if(p->parent == curproc){ -      p->parent = initproc; -      if(p->state == ZOMBIE) -        wakeup1(initproc); -    } -  } - -  // Jump into the scheduler, never to return. -  curproc->state = ZOMBIE; -  sched(); -  panic("zombie exit"); -} - -// Wait for a child process to exit and return its pid. -// Return -1 if this process has no children. -int -wait(void) -{ -  struct proc *p; -  int havekids, pid; -  struct proc *curproc = myproc(); -   -  acquire(&ptable.lock); -  for(;;){ -    // Scan through table looking for exited children. -    havekids = 0; -    for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){ -      if(p->parent != curproc) -        continue; -      havekids = 1; -      if(p->state == ZOMBIE){ -        // Found one. -        pid = p->pid; -        kfree(p->kstack); -        p->kstack = 0; -        freevm(p->pgdir); -        p->pid = 0; -        p->parent = 0; -        p->name[0] = 0; -        p->killed = 0; -        p->state = UNUSED; -        release(&ptable.lock); -        return pid; -      } -    } - -    // No point waiting if we don't have any children. -    if(!havekids || curproc->killed){ -      release(&ptable.lock); -      return -1; -    } - -    // Wait for children to exit.  (See wakeup1 call in proc_exit.) -    sleep(curproc, &ptable.lock);  //DOC: wait-sleep -  } -} - -//PAGEBREAK: 42 -// Per-CPU process scheduler. -// Each CPU calls scheduler() after setting itself up. -// Scheduler never returns.  It loops, doing: -//  - choose a process to run -//  - swtch to start running that process -//  - eventually that process transfers control -//      via swtch back to the scheduler. -void -scheduler(void) -{ -  struct proc *p; -  struct cpu *c = mycpu(); -  c->proc = 0; -   -  for(;;){ -    // Enable interrupts on this processor. -    sti(); - -    // Loop over process table looking for process to run. -    acquire(&ptable.lock); -    for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){ -      if(p->state != RUNNABLE) -        continue; - -      // Switch to chosen process.  It is the process's job -      // to release ptable.lock and then reacquire it -      // before jumping back to us. -      c->proc = p; -      switchuvm(p); -      p->state = RUNNING; - -      swtch(&(c->scheduler), p->context); -      switchkvm(); - -      // Process is done running for now. -      // It should have changed its p->state before coming back. -      c->proc = 0; -    } -    release(&ptable.lock); - -  } -} - -// Enter scheduler.  Must hold only ptable.lock -// and have changed proc->state. Saves and restores -// intena because intena is a property of this -// kernel thread, not this CPU. It should -// be proc->intena and proc->ncli, but that would -// break in the few places where a lock is held but -// there's no process. -void -sched(void) -{ -  int intena; -  struct proc *p = myproc(); - -  if(!holding(&ptable.lock)) -    panic("sched ptable.lock"); -  if(mycpu()->ncli != 1) -    panic("sched locks"); -  if(p->state == RUNNING) -    panic("sched running"); -  if(readeflags()&FL_IF) -    panic("sched interruptible"); -  intena = mycpu()->intena; -  swtch(&p->context, mycpu()->scheduler); -  mycpu()->intena = intena; -} - -// Give up the CPU for one scheduling round. -void -yield(void) -{ -  acquire(&ptable.lock);  //DOC: yieldlock -  myproc()->state = RUNNABLE; -  sched(); -  release(&ptable.lock); -} - -// A fork child's very first scheduling by scheduler() -// will swtch here.  "Return" to user space. -void -forkret(void) -{ -  static int first = 1; -  // Still holding ptable.lock from scheduler. -  release(&ptable.lock); - -  if (first) { -    // Some initialization functions must be run in the context -    // of a regular process (e.g., they call sleep), and thus cannot -    // be run from main(). -    first = 0; -    iinit(ROOTDEV); -    initlog(ROOTDEV); -  } - -  // Return to "caller", actually trapret (see allocproc). -} - -// Atomically release lock and sleep on chan. -// Reacquires lock when awakened. -void -sleep(void *chan, struct spinlock *lk) -{ -  struct proc *p = myproc(); -   -  if(p == 0) -    panic("sleep"); - -  if(lk == 0) -    panic("sleep without lk"); - -  // Must acquire ptable.lock in order to -  // change p->state and then call sched. -  // Once we hold ptable.lock, we can be -  // guaranteed that we won't miss any wakeup -  // (wakeup runs with ptable.lock locked), -  // so it's okay to release lk. -  if(lk != &ptable.lock){  //DOC: sleeplock0 -    acquire(&ptable.lock);  //DOC: sleeplock1 -    release(lk); -  } -  // Go to sleep. -  p->chan = chan; -  p->state = SLEEPING; - -  sched(); - -  // Tidy up. -  p->chan = 0; - -  // Reacquire original lock. -  if(lk != &ptable.lock){  //DOC: sleeplock2 -    release(&ptable.lock); -    acquire(lk); -  } -} - -//PAGEBREAK! -// Wake up all processes sleeping on chan. -// The ptable lock must be held. -static void -wakeup1(void *chan) -{ -  struct proc *p; - -  for(p = ptable.proc; p < &ptable.proc[NPROC]; p++) -    if(p->state == SLEEPING && p->chan == chan) -      p->state = RUNNABLE; -} - -// Wake up all processes sleeping on chan. -void -wakeup(void *chan) -{ -  acquire(&ptable.lock); -  wakeup1(chan); -  release(&ptable.lock); -} - -// Kill the process with the given pid. -// Process won't exit until it returns -// to user space (see trap in trap.c). -int -kill(int pid) -{ -  struct proc *p; - -  acquire(&ptable.lock); -  for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){ -    if(p->pid == pid){ -      p->killed = 1; -      // Wake process from sleep if necessary. -      if(p->state == SLEEPING) -        p->state = RUNNABLE; -      release(&ptable.lock); -      return 0; -    } -  } -  release(&ptable.lock); -  return -1; -} - -//PAGEBREAK: 36 -// Print a process listing to console.  For debugging. -// Runs when user types ^P on console. -// No lock to avoid wedging a stuck machine further. -void -procdump(void) -{ -  static char *states[] = { -  [UNUSED]    "unused", -  [EMBRYO]    "embryo", -  [SLEEPING]  "sleep ", -  [RUNNABLE]  "runble", -  [RUNNING]   "run   ", -  [ZOMBIE]    "zombie" -  }; -  int i; -  struct proc *p; -  char *state; -  uint pc[10]; - -  for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){ -    if(p->state == UNUSED) -      continue; -    if(p->state >= 0 && p->state < NELEM(states) && states[p->state]) -      state = states[p->state]; -    else -      state = "???"; -    cprintf("%d %s %s", p->pid, state, p->name); -    if(p->state == SLEEPING){ -      getcallerpcs((uint*)p->context->ebp+2, pc); -      for(i=0; i<10 && pc[i] != 0; i++) -        cprintf(" %p", pc[i]); -    } -    cprintf("\n"); -  } -} @@ -1,58 +0,0 @@ -// Per-CPU state -struct cpu { -  uchar apicid;                // Local APIC ID -  struct context *scheduler;   // swtch() here to enter scheduler -  struct taskstate ts;         // Used by x86 to find stack for interrupt -  struct segdesc gdt[NSEGS];   // x86 global descriptor table -  volatile uint started;       // Has the CPU started? -  int ncli;                    // Depth of pushcli nesting. -  int intena;                  // Were interrupts enabled before pushcli? -  struct proc *proc;           // The process running on this cpu or null -}; - -extern struct cpu cpus[NCPU]; -extern int ncpu; - -//PAGEBREAK: 17 -// Saved registers for kernel context switches. -// Don't need to save all the segment registers (%cs, etc), -// because they are constant across kernel contexts. -// Don't need to save %eax, %ecx, %edx, because the -// x86 convention is that the caller has saved them. -// Contexts are stored at the bottom of the stack they -// describe; the stack pointer is the address of the context. -// The layout of the context matches the layout of the stack in swtch.S -// at the "Switch stacks" comment. Switch doesn't save eip explicitly, -// but it is on the stack and allocproc() manipulates it. -struct context { -  uint edi; -  uint esi; -  uint ebx; -  uint ebp; -  uint eip; -}; - -enum procstate { UNUSED, EMBRYO, SLEEPING, RUNNABLE, RUNNING, ZOMBIE }; - -// Per-process state -struct proc { -  uint sz;                     // Size of process memory (bytes) -  pde_t* pgdir;                // Page table -  char *kstack;                // Bottom of kernel stack for this process -  enum procstate state;        // Process state -  int pid;                     // Process ID -  struct proc *parent;         // Parent process -  struct trapframe *tf;        // Trap frame for current syscall -  struct context *context;     // swtch() here to run process -  void *chan;                  // If non-zero, sleeping on chan -  int killed;                  // If non-zero, have been killed -  struct file *ofile[NOFILE];  // Open files -  struct inode *cwd;           // Current directory -  char name[16];               // Process name (debugging) -}; - -// Process memory is laid out contiguously, low addresses first: -//   text -//   original data and bss -//   fixed-size stack -//   expandable heap @@ -1,246 +0,0 @@ -#!/bin/sh - -echo This script takes a minute to run.  Be patient. 1>&2 - -LC_CTYPE=C export LC_CTYPE - -# pad stdin to multiple of 120 lines -pad() -{ -	awk '{print} END{for(; NR%120!=0; NR++) print ""}' -} - -# create formatted (numbered) files -mkdir -p fmt -rm -f fmt/* -cp README fmt -echo > fmt/blank -files=`grep -v '^#' runoff.list | awk '{print $1}'` -n=99 -for i in $files -do -	./runoff1 -n $n $i >fmt/$i -	nn=`tail -1 fmt/$i | sed 's/ .*//; s/^0*//'` -	if [ "x$nn" != x ]; then -		n=$nn -	fi -done - -# create table of contents -cat toc.hdr >fmt/toc -pr -e8 -t runoff.list | awk ' -/^[a-z0-9]/ { -	s=$0 -	f="fmt/"$1 -	getline<f -	close(f) -	n=$1 -	printf("%02d %s\n", n/100, s); -	printf("TOC: %04d %s\n", n, s) >"fmt/tocdata" -	next -} -{ -	print -}' | pr -3 -t >>fmt/toc -cat toc.ftr >>fmt/toc - -# check for bad alignments -perl -e ' -	$leftwarn = 0; -	while(<>){ -		chomp; -		s!#.*!!; -		s!\s+! !g; -		s! +$!!; -		next if /^$/; -		 -		if(/TOC: (\d+) (.*)/){ -			$toc{$2} = $1; -			next; -		} -		 -		if(/sheet1: (left|right)$/){ -			print STDERR "assuming that sheet 1 is a $1 page.  double-check!\n"; -			$left = $1 eq "left" ? "13579" : "02468"; -			$right = $1 eq "left" ? "02468" : "13579"; -			next; -		} -		 -		if(/even: (.*)/){ -			$file = $1; -			if(!defined($toc{$file})){ -				print STDERR "Have no toc for $file\n"; -				next; -			} -			if($toc{$file} =~ /^\d\d[^0]/){ -				print STDERR "$file does not start on a fresh page.\n"; -			} -			next; -		} -		 -		if(/odd: (.*)/){ -			$file = $1; -			if(!defined($toc{$file})){ -				print STDERR "Have no toc for $file\n"; -				next; -			} -			if($toc{$file} !~ /^\d\d5/){ -				print STDERR "$file does not start on a second half page.\n"; -			} -			next; -		} -		 -		if(/(left|right): (.*)/){ -			$what = $1; -			$file = $2; -			if(!defined($toc{$file})){ -				print STDERR "Have no toc for $file\n"; -				next; -			} -			if($what eq "left" && !($toc{$file} =~ /^\d[$left][05]/)){ -				print STDERR "$file does not start on a left page [$toc{$file}]\n"; -			} -			# why does this not work if I inline $x in the if? -			$x = ($toc{$file} =~ /^\d[$right][05]/); -			if($what eq "right" && !$x){ -				print STDERR "$file does not start on a right page [$toc{$file}] [$x]\n"; -			} -			next; -		} -		 -		print STDERR "Unknown spec: $_\n"; -	} -' fmt/tocdata runoff.spec - -# make definition list -cd fmt -perl -e ' -	while(<>) { -		chomp; - -		s!//.*!!; -		s!/\*([^*]|[*][^/])*\*/!!g; -		s!\s! !g; -		s! +$!!; - -		# look for declarations like char* x; -		if (/^[0-9]+ typedef .* u(int|short|long|char);/) { -			next; -		} -		if (/^[0-9]+ extern/) { -			next; -		} -		if (/^[0-9]+ struct [a-zA-Z0-9_]+;/) { -			next; -		} -		if (/^([0-9]+) #define +([A-za-z0-9_]+) +?\(.*/) { -			print "$1 $2\n" -		} -		elsif (/^([0-9]+) #define +([A-Za-z0-9_]+) +([^ ]+)/) { -			print "$1 $2 $3\n"; -		} -		elsif (/^([0-9]+) #define +([A-Za-z0-9_]+)/) { -			print "$1 $2\n"; -		} -		 -		if(/^^([0-9]+) \.globl ([a-zA-Z0-9_]+)/){ -			$isglobl{$2} = 1; -		} -		if(/^^([0-9]+) ([a-zA-Z0-9_]+):$/ && $isglobl{$2}){ -			print "$1 $2\n"; -		} -		 -		if (/\(/) { -			next; -		} - -		if (/^([0-9]+) (((static|struct|extern|union|enum) +)*([A-Za-z0-9_]+))( .*)? +([A-Za-z_][A-Za-z0-9_]*)(,|;|=| =)/) { -			print "$1 $7\n"; -		} -		 -		elsif(/^([0-9]+) (enum|struct|union) +([A-Za-z0-9_]+) +{/){  -			print "$1 $3\n"; -		} -		# TODO: enum members -	} -' $files >defs - -(for i in $files -do -	case "$i" in -	*.S) -		cat $i | sed 's;#.*;;; s;//.*;;;' -		;; -	*) -		cat $i | sed 's;//.*;;; s;"([^"\\]|\\.)*";;;' -	esac -done -) >alltext - -perl -n -e 'print if s/^([0-9]+ [a-zA-Z0-9_]+)\(.*$/\1/;' alltext | -	egrep -v ' (STUB|usage|main|if|for)$' >>defs -#perl -n -e 'print if s/^([0-9]+) STUB\(([a-zA-Z0-9_]+)\)$/\1 \2/;' alltext \ -#	>>defs -( ->s.defs - -# make reference list -for i in `awk '{print $2}' defs | sort -f | uniq` -do -	defs=`egrep '^[0-9]+ '$i'( |$)' defs | awk '{print $1}'` -	echo $i $defs >>s.defs -	uses=`egrep -h '([^a-zA-Z_0-9])'$i'($|[^a-zA-Z_0-9])' alltext | awk '{print $1}'` -	if [ "x$defs" != "x$uses" ]; then -		echo $i $defs -		echo $uses |fmt -29 | sed 's/^/    /' -#	else -#		echo $i defined but not used >&2 -	fi -done -) >refs - -# build defs list -awk ' -{ -	printf("%04d %s\n", $2, $1); -	for(i=3; i<=NF; i++) -		printf("%04d    \" \n", $i); -} -' s.defs > t.defs - -# format the whole thing -( -	../pr.pl README -	../pr.pl -h "table of contents" toc -	# pr -t -2 t.defs | ../pr.pl -h "definitions" | pad -	pr -t -l50 -2 refs | ../pr.pl -h "cross-references" | pad -	# pr.pl -h "definitions" -2 t.defs | pad -	# pr.pl -h "cross-references" -2 refs | pad -	../pr.pl blank  # make sheet 1 start on left page -	../pr.pl blank -	for i in $files -	do -		../pr.pl -h "xv6/$i" $i -	done -) | mpage -m50t50b -o -bLetter -T -t -2 -FCourier -L60 >all.ps -grep Pages: all.ps - -# if we have the nice font, use it -nicefont=LucidaSans-Typewriter83 -if [ ! -f ../$nicefont ] -then -	if git cat-file blob font:$nicefont > ../$nicefont~; then -		mv ../$nicefont~ ../$nicefont -	fi -fi -if [ -f ../$nicefont ] -then -	echo nicefont -	(sed 1q all.ps; cat ../$nicefont; sed "1d; s/Courier/$nicefont/" all.ps) >allf.ps -else -	echo ugly font! -	cp all.ps allf.ps -fi -ps2pdf allf.ps ../xv6.pdf -# cd .. -# pdftops xv6.pdf xv6.ps diff --git a/runoff.list b/runoff.list deleted file mode 100644 index 2df9b81..0000000 --- a/runoff.list +++ /dev/null @@ -1,80 +0,0 @@ -# basic headers -types.h -param.h -memlayout.h -defs.h -x86.h -asm.h -mmu.h -elf.h -date.h - -# entering xv6 -entry.S -entryother.S -main.c - -# locks -spinlock.h -spinlock.c - -# processes -vm.c -proc.h -proc.c -swtch.S -kalloc.c - -# system calls -traps.h -vectors.pl -trapasm.S -trap.c -syscall.h -syscall.c -sysproc.c - -# file system -buf.h -sleeplock.h -fcntl.h -stat.h -fs.h -file.h -ide.c -bio.c -sleeplock.c -log.c -fs.c -file.c -sysfile.c -exec.c - -# pipes -pipe.c - -# string operations -string.c - -# low-level hardware -mp.h -mp.c -lapic.c -ioapic.c -kbd.h -kbd.c -console.c -uart.c - -# user-level -initcode.S -usys.S -init.c -sh.c - -# bootloader -bootasm.S -bootmain.c - -# link -kernel.ld diff --git a/runoff.spec b/runoff.spec deleted file mode 100644 index 9247948..0000000 --- a/runoff.spec +++ /dev/null @@ -1,102 +0,0 @@ -# Is sheet 01 (after the TOC) a left sheet or a right sheet? -sheet1: left - -# "left" and "right" specify which page of a two-page spread a file -# must start on.  "left" means that a file must start on the first of -# the two pages.  "right" means it must start on the second of the two -# pages.  The file may start in either column. -# -# "even" and "odd" specify which column a file must start on.  "even" -# means it must start in the left of the two columns (00).  "odd" means it -# must start in the right of the two columns (50). -# -# You'd think these would be the other way around. - -# types.h either -# param.h either -# defs.h either -# x86.h either -# asm.h either -# mmu.h either -# elf.h either -# mp.h either - -even: entry.S  # mild preference -even: entryother.S  # mild preference -even: main.c -# mp.c don't care at all -# even: initcode.S -# odd: init.c - -left: spinlock.h -even: spinlock.h - -# This gets struct proc and allocproc on the same spread -left: proc.h -even: proc.h - -# goal is to have two action-packed 2-page spreads, -# one with -#     userinit growproc fork exit wait -# and another with -#     scheduler sched yield forkret sleep wakeup1 wakeup -right: proc.c   # VERY important -even: proc.c   # VERY important - -# A few more action packed spreads -# page table creation and process loading -#     walkpgdir mappages setupkvm switch[ku]vm inituvm (loaduvm) -# process memory management -#     allocuvm deallocuvm freevm -left: vm.c - -even: kalloc.c  # mild preference - -# syscall.h either -# trapasm.S either -# traps.h either -# even: trap.c -# vectors.pl either -# syscall.c either -# sysproc.c either - -# buf.h either -# dev.h either -# fcntl.h either -# stat.h either -# file.h either -# fs.h either -# fsvar.h either -# left: ide.c # mild preference -even: ide.c -# odd: bio.c - -# log.c fits nicely in a spread -even: log.c -left: log.c - -# with fs.c starting on 2nd column of a left page, we get these 2-page spreads: -#	ialloc iupdate iget idup ilock iunlock iput iunlockput -#	bmap itrunc stati readi writei -#	namecmp dirlookup dirlink skipelem namex namei -#	fileinit filealloc filedup fileclose filestat fileread filewrite -# starting on 2nd column of a right page is not terrible either -odd: fs.c   # VERY important -left: fs.c  # mild preference -# file.c either -# exec.c either -# sysfile.c either - -# Mild preference, but makes spreads of mp.c, lapic.c, and ioapic.c+picirq.c -even: mp.c -left: mp.c - -# even: pipe.c  # mild preference -# string.c either -# left: kbd.h  # mild preference -even: kbd.h -even: console.c -odd: sh.c - -even: bootasm.S   # mild preference -even: bootmain.c  # mild preference diff --git a/runoff1 b/runoff1 deleted file mode 100755 index 532f844..0000000 --- a/runoff1 +++ /dev/null @@ -1,108 +0,0 @@ -#!/usr/bin/perl - -$n = 0; -$v = 0; -if($ARGV[0] eq "-v") { -	$v = 1; -	shift @ARGV; -} -if($ARGV[0] eq "-n") { -	$n = $ARGV[1]; -	shift @ARGV; -	shift @ARGV; -} -$n = int(($n+49)/50)*50 - 1; - -$file = $ARGV[0]; -@lines = <>; -$linenum = 0; -foreach (@lines) { -	$linenum++; -	chomp; -	s/\s+$//; -	if(length() >= 75){ -		print STDERR "$file:$linenum: line too long\n"; -	} -} -@outlines = (); -$nextout = 0; - -for($i=0; $i<@lines; ){ -	# Skip leading blank lines. -	$i++ while $i<@lines && $lines[$i] =~ /^$/; -	last if $i>=@lines; - -	# If the rest of the file fits, use the whole thing. -	if(@lines <= $i+50 && !grep { /PAGEBREAK/ } @lines){ -		$breakbefore = @lines; -	}else{ -		# Find a good next page break; -		# Hope for end of function. -		# but settle for a blank line (but not first blank line -		# in function, which comes after variable declarations). -		$breakbefore = $i; -		$lastblank = $i; -		$sawbrace = 0; -		$breaksize = 15;  # 15 lines to get to function -		for($j=$i; $j<$i+50 && $j < @lines; $j++){ -			if($lines[$j] =~ /PAGEBREAK!/){ -				$lines[$j] = ""; -				$breakbefore = $j; -				$breaksize = 100; -				last; -			} -			if($lines[$j] =~ /PAGEBREAK:\s*([0-9]+)/){ -				$breaksize = $1; -				$breakbefore = $j; -				$lines[$j] = ""; -			} -			if($lines[$j] =~ /^};?$/){ -				$breakbefore = $j+1; -				$breaksize = 15; -			} -			if($lines[$j] =~ /^{$/){ -				$sawbrace = 1; -			} -			if($lines[$j] =~ /^$/){ -				if($sawbrace){ -					$sawbrace = 0; -				}else{ -					$lastblank = $j; -				} -			} -		} -		if($j<@lines && $lines[$j] =~ /^$/){ -			$lastblank = $j; -		} - -		# If we are not putting enough on a page, try a blank line. -		if($breakbefore - $i < 50 - $breaksize && $lastblank > $breakbefore && $lastblank >= $i+50 - 5){ -			if($v){ -				print STDERR "breakbefore $breakbefore i $i breaksize $breaksize\n"; -			} -			$breakbefore = $lastblank; -			$breaksize = 5;  # only 5 lines to get to blank line -		} - -		# If we are not putting enough on a page, force a full page. -		if($breakbefore - $i < 50 - $breaksize && $breakbefore != @lines){ -			$breakbefore = $i + 50; -			$breakbefore = @lines if @lines < $breakbefore; -		} - -		if($breakbefore < $i+2){ -			$breakbefore = $i+2; -		} -	} - -	# Emit the page. -	$i50 = $i + 50; -	for(; $i<$breakbefore; $i++){ -		printf "%04d %s\n", ++$n, $lines[$i]; -	} - -	# Finish page -	for($j=$i; $j<$i50; $j++){ -		printf "%04d \n", ++$n; -	} -} @@ -1,3 +0,0 @@ -#!/bin/sh - -runoff1 "$@" | pr.pl -h "xv6/$@" | mpage -m50t50b -o -bLetter -T -t -2 -FLucidaSans-Typewriter83 -L60 >x.ps; gv --swap x.ps diff --git a/sign.pl b/sign.pl deleted file mode 100755 index d793035..0000000 --- a/sign.pl +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/perl - -open(SIG, $ARGV[0]) || die "open $ARGV[0]: $!"; - -$n = sysread(SIG, $buf, 1000); - -if($n > 510){ -  print STDERR "boot block too large: $n bytes (max 510)\n"; -  exit 1; -} - -print STDERR "boot block is $n bytes (max 510)\n"; - -$buf .= "\0" x (510-$n); -$buf .= "\x55\xAA"; - -open(SIG, ">$ARGV[0]") || die "open >$ARGV[0]: $!"; -print SIG $buf; -close SIG; diff --git a/sleep1.p b/sleep1.p deleted file mode 100644 index af69772..0000000 --- a/sleep1.p +++ /dev/null @@ -1,134 +0,0 @@ -/* -This file defines a Promela model for xv6's -acquire, release, sleep, and wakeup, along with -a model of a simple producer/consumer queue. - -To run: -	spinp sleep1.p - -(You may need to install Spin, available at http://spinroot.com/.) - -After a successful run spin prints something like: - -	unreached in proctype consumer -		(0 of 37 states) -	unreached in proctype producer -		(0 of 23 states) - -After an unsuccessful run, the spinp script prints -an execution trace that causes a deadlock. - -The safe body of producer reads: - -		acquire(lk); -		x = value; value = x + 1; x = 0; -		wakeup(0); -		release(lk); -		i = i + 1; - -If this is changed to: - -		x = value; value = x + 1; x = 0; -		acquire(lk); -		wakeup(0); -		release(lk); -		i = i + 1; - -then a deadlock can happen, because the non-atomic -increment of value conflicts with the non-atomic  -decrement in consumer, causing value to have a bad value. -Try this. - -If it is changed to: - -		acquire(lk); -		x = value; value = x + 1; x = 0; -		release(lk); -		wakeup(0); -		i = i + 1; - -then nothing bad happens: it is okay to wakeup after release -instead of before, although it seems morally wrong. -*/ - -#define ITER 4 -#define N 2 - -bit lk; -byte value; -bit sleeping[N]; - -inline acquire(x) -{ -	atomic { x == 0; x = 1 } -} - -inline release(x) -{ -	assert x==1; -	x = 0 -} - -inline sleep(cond, lk) -{ -	assert !sleeping[_pid]; -	if -	:: cond -> -		skip -	:: else -> -		atomic { release(lk); sleeping[_pid] = 1 }; -		sleeping[_pid] == 0; -		acquire(lk) -	fi -} - -inline wakeup() -{ -	w = 0; -	do -	:: w < N -> -		sleeping[w] = 0; -		w = w + 1 -	:: else -> -		break -	od -} - -active[N] proctype consumer() -{ -	byte i, x; -	 -	i = 0; -	do -	:: i < ITER -> -		acquire(lk); -		sleep(value > 0, lk); -		x = value; value = x - 1; x = 0; -		release(lk); -		i = i + 1; -	:: else -> -		break -	od; -	i = 0; -	skip -} - -active[N] proctype producer() -{ -	byte i, x, w; -	 -	i = 0; -	do -	:: i < ITER -> -		acquire(lk); -		x = value; value = x + 1; x = 0; -		release(lk); -		wakeup(); -		i = i + 1; -	:: else -> -		break -	od; -	i = 0; -	skip	 -} - diff --git a/spinlock.c b/spinlock.c deleted file mode 100644 index 4020186..0000000 --- a/spinlock.c +++ /dev/null @@ -1,126 +0,0 @@ -// Mutual exclusion spin locks. - -#include "types.h" -#include "defs.h" -#include "param.h" -#include "x86.h" -#include "memlayout.h" -#include "mmu.h" -#include "proc.h" -#include "spinlock.h" - -void -initlock(struct spinlock *lk, char *name) -{ -  lk->name = name; -  lk->locked = 0; -  lk->cpu = 0; -} - -// Acquire the lock. -// Loops (spins) until the lock is acquired. -// Holding a lock for a long time may cause -// other CPUs to waste time spinning to acquire it. -void -acquire(struct spinlock *lk) -{ -  pushcli(); // disable interrupts to avoid deadlock. -  if(holding(lk)) -    panic("acquire"); - -  // The xchg is atomic. -  while(xchg(&lk->locked, 1) != 0) -    ; - -  // Tell the C compiler and the processor to not move loads or stores -  // past this point, to ensure that the critical section's memory -  // references happen after the lock is acquired. -  __sync_synchronize(); - -  // Record info about lock acquisition for debugging. -  lk->cpu = mycpu(); -  getcallerpcs(&lk, lk->pcs); -} - -// Release the lock. -void -release(struct spinlock *lk) -{ -  if(!holding(lk)) -    panic("release"); - -  lk->pcs[0] = 0; -  lk->cpu = 0; - -  // Tell the C compiler and the processor to not move loads or stores -  // past this point, to ensure that all the stores in the critical -  // section are visible to other cores before the lock is released. -  // Both the C compiler and the hardware may re-order loads and -  // stores; __sync_synchronize() tells them both not to. -  __sync_synchronize(); - -  // Release the lock, equivalent to lk->locked = 0. -  // This code can't use a C assignment, since it might -  // not be atomic. A real OS would use C atomics here. -  asm volatile("movl $0, %0" : "+m" (lk->locked) : ); - -  popcli(); -} - -// Record the current call stack in pcs[] by following the %ebp chain. -void -getcallerpcs(void *v, uint pcs[]) -{ -  uint *ebp; -  int i; - -  ebp = (uint*)v - 2; -  for(i = 0; i < 10; i++){ -    if(ebp == 0 || ebp < (uint*)KERNBASE || ebp == (uint*)0xffffffff) -      break; -    pcs[i] = ebp[1];     // saved %eip -    ebp = (uint*)ebp[0]; // saved %ebp -  } -  for(; i < 10; i++) -    pcs[i] = 0; -} - -// Check whether this cpu is holding the lock. -int -holding(struct spinlock *lock) -{ -  int r; -  pushcli(); -  r = lock->locked && lock->cpu == mycpu(); -  popcli(); -  return r; -} - - -// Pushcli/popcli are like cli/sti except that they are matched: -// it takes two popcli to undo two pushcli.  Also, if interrupts -// are off, then pushcli, popcli leaves them off. - -void -pushcli(void) -{ -  int eflags; - -  eflags = readeflags(); -  cli(); -  if(mycpu()->ncli == 0) -    mycpu()->intena = eflags & FL_IF; -  mycpu()->ncli += 1; -} - -void -popcli(void) -{ -  if(readeflags()&FL_IF) -    panic("popcli - interruptible"); -  if(--mycpu()->ncli < 0) -    panic("popcli"); -  if(mycpu()->ncli == 0 && mycpu()->intena) -    sti(); -} - @@ -1,16 +0,0 @@ -#!/bin/sh - -if [ $# != 1 ] || [ ! -f "$1" ]; then -	echo 'usage: spinp file.p' 1>&2 -	exit 1 -fi - -rm -f $1.trail -spin -a $1 || exit 1 -cc -DSAFETY -DREACH -DMEMLIM=500 -o pan pan.c -pan -i -rm pan.* pan -if [ -f $1.trail ]; then -	spin -t -p $1 -fi - diff --git a/swtch.S b/swtch.S deleted file mode 100644 index 63a7dcc..0000000 --- a/swtch.S +++ /dev/null @@ -1,29 +0,0 @@ -# Context switch -# -#   void swtch(struct context **old, struct context *new); -#  -# Save the current registers on the stack, creating -# a struct context, and save its address in *old. -# Switch stacks to new and pop previously-saved registers. - -.globl swtch -swtch: -  movl 4(%esp), %eax -  movl 8(%esp), %edx - -  # Save old callee-saved registers -  pushl %ebp -  pushl %ebx -  pushl %esi -  pushl %edi - -  # Switch stacks -  movl %esp, (%eax) -  movl %edx, %esp - -  # Load new callee-saved registers -  popl %edi -  popl %esi -  popl %ebx -  popl %ebp -  ret diff --git a/syscall.c b/syscall.c deleted file mode 100644 index ee85261..0000000 --- a/syscall.c +++ /dev/null @@ -1,145 +0,0 @@ -#include "types.h" -#include "defs.h" -#include "param.h" -#include "memlayout.h" -#include "mmu.h" -#include "proc.h" -#include "x86.h" -#include "syscall.h" - -// User code makes a system call with INT T_SYSCALL. -// System call number in %eax. -// Arguments on the stack, from the user call to the C -// library system call function. The saved user %esp points -// to a saved program counter, and then the first argument. - -// Fetch the int at addr from the current process. -int -fetchint(uint addr, int *ip) -{ -  struct proc *curproc = myproc(); - -  if(addr >= curproc->sz || addr+4 > curproc->sz) -    return -1; -  *ip = *(int*)(addr); -  return 0; -} - -// Fetch the nul-terminated string at addr from the current process. -// Doesn't actually copy the string - just sets *pp to point at it. -// Returns length of string, not including nul. -int -fetchstr(uint addr, char **pp) -{ -  char *s, *ep; -  struct proc *curproc = myproc(); - -  if(addr >= curproc->sz) -    return -1; -  *pp = (char*)addr; -  ep = (char*)curproc->sz; -  for(s = *pp; s < ep; s++){ -    if(*s == 0) -      return s - *pp; -  } -  return -1; -} - -// Fetch the nth 32-bit system call argument. -int -argint(int n, int *ip) -{ -  return fetchint((myproc()->tf->esp) + 4 + 4*n, ip); -} - -// Fetch the nth word-sized system call argument as a pointer -// to a block of memory of size bytes.  Check that the pointer -// lies within the process address space. -int -argptr(int n, char **pp, int size) -{ -  int i; -  struct proc *curproc = myproc(); -  -  if(argint(n, &i) < 0) -    return -1; -  if(size < 0 || (uint)i >= curproc->sz || (uint)i+size > curproc->sz) -    return -1; -  *pp = (char*)i; -  return 0; -} - -// Fetch the nth word-sized system call argument as a string pointer. -// Check that the pointer is valid and the string is nul-terminated. -// (There is no shared writable memory, so the string can't change -// between this check and being used by the kernel.) -int -argstr(int n, char **pp) -{ -  int addr; -  if(argint(n, &addr) < 0) -    return -1; -  return fetchstr(addr, pp); -} - -extern int sys_chdir(void); -extern int sys_close(void); -extern int sys_dup(void); -extern int sys_exec(void); -extern int sys_exit(void); -extern int sys_fork(void); -extern int sys_fstat(void); -extern int sys_getpid(void); -extern int sys_kill(void); -extern int sys_link(void); -extern int sys_mkdir(void); -extern int sys_mknod(void); -extern int sys_open(void); -extern int sys_pipe(void); -extern int sys_read(void); -extern int sys_sbrk(void); -extern int sys_sleep(void); -extern int sys_unlink(void); -extern int sys_wait(void); -extern int sys_write(void); -extern int sys_uptime(void); - -static int (*syscalls[])(void) = { -[SYS_fork]    sys_fork, -[SYS_exit]    sys_exit, -[SYS_wait]    sys_wait, -[SYS_pipe]    sys_pipe, -[SYS_read]    sys_read, -[SYS_kill]    sys_kill, -[SYS_exec]    sys_exec, -[SYS_fstat]   sys_fstat, -[SYS_chdir]   sys_chdir, -[SYS_dup]     sys_dup, -[SYS_getpid]  sys_getpid, -[SYS_sbrk]    sys_sbrk, -[SYS_sleep]   sys_sleep, -[SYS_uptime]  sys_uptime, -[SYS_open]    sys_open, -[SYS_write]   sys_write, -[SYS_mknod]   sys_mknod, -[SYS_unlink]  sys_unlink, -[SYS_link]    sys_link, -[SYS_mkdir]   sys_mkdir, -[SYS_close]   sys_close, -}; - -void -syscall(void) -{ -  int num; -  struct proc *curproc = myproc(); - -  num = curproc->tf->eax; -  if(num > 0 && num < NELEM(syscalls) && syscalls[num]) { -    curproc->tf->eax = syscalls[num](); -  } else { -    cprintf("%d %s: unknown sys call %d\n", -            curproc->pid, curproc->name, num); -    curproc->tf->eax = -1; -  } -} diff --git a/toc.ftr b/toc.ftr deleted file mode 100644 index 0061c1d..0000000 --- a/toc.ftr +++ /dev/null @@ -1,13 +0,0 @@ - - -The source listing is preceded by a cross-reference that lists every defined  -constant, struct, global variable, and function in xv6.  Each entry gives, -on the same line as the name, the line number (or, in a few cases, numbers) -where the name is defined.  Successive lines in an entry list the line -numbers where the name is used.  For example, this entry: - -    swtch 2658 -        0374 2428 2466 2657 2658 - -indicates that swtch is defined on line 2658 and is mentioned on five lines -on sheets 03, 24, and 26. diff --git a/toc.hdr b/toc.hdr deleted file mode 100644 index 3698d81..0000000 --- a/toc.hdr +++ /dev/null @@ -1,6 +0,0 @@ -The numbers to the left of the file names in the table are sheet numbers. -The source code has been printed in a double column format with fifty -lines per column, giving one hundred lines per sheet (or page). -Thus there is a convenient relationship between line numbers and sheet numbers. - - @@ -1,112 +0,0 @@ -#include "types.h" -#include "defs.h" -#include "param.h" -#include "memlayout.h" -#include "mmu.h" -#include "proc.h" -#include "x86.h" -#include "traps.h" -#include "spinlock.h" - -// Interrupt descriptor table (shared by all CPUs). -struct gatedesc idt[256]; -extern uint vectors[];  // in vectors.S: array of 256 entry pointers -struct spinlock tickslock; -uint ticks; - -void -tvinit(void) -{ -  int i; - -  for(i = 0; i < 256; i++) -    SETGATE(idt[i], 0, SEG_KCODE<<3, vectors[i], 0); -  SETGATE(idt[T_SYSCALL], 1, SEG_KCODE<<3, vectors[T_SYSCALL], DPL_USER); - -  initlock(&tickslock, "time"); -} - -void -idtinit(void) -{ -  lidt(idt, sizeof(idt)); -} - -//PAGEBREAK: 41 -void -trap(struct trapframe *tf) -{ -  if(tf->trapno == T_SYSCALL){ -    if(myproc()->killed) -      exit(); -    myproc()->tf = tf; -    syscall(); -    if(myproc()->killed) -      exit(); -    return; -  } - -  switch(tf->trapno){ -  case T_IRQ0 + IRQ_TIMER: -    if(cpuid() == 0){ -      acquire(&tickslock); -      ticks++; -      wakeup(&ticks); -      release(&tickslock); -    } -    lapiceoi(); -    break; -  case T_IRQ0 + IRQ_IDE: -    ideintr(); -    lapiceoi(); -    break; -  case T_IRQ0 + IRQ_IDE+1: -    // Bochs generates spurious IDE1 interrupts. -    break; -  case T_IRQ0 + IRQ_KBD: -    kbdintr(); -    lapiceoi(); -    break; -  case T_IRQ0 + IRQ_COM1: -    uartintr(); -    lapiceoi(); -    break; -  case T_IRQ0 + 7: -  case T_IRQ0 + IRQ_SPURIOUS: -    cprintf("cpu%d: spurious interrupt at %x:%x\n", -            cpuid(), tf->cs, tf->eip); -    lapiceoi(); -    break; - -  //PAGEBREAK: 13 -  default: -    if(myproc() == 0 || (tf->cs&3) == 0){ -      // In kernel, it must be our mistake. -      cprintf("unexpected trap %d from cpu %d eip %x (cr2=0x%x)\n", -              tf->trapno, cpuid(), tf->eip, rcr2()); -      panic("trap"); -    } -    // In user space, assume process misbehaved. -    cprintf("pid %d %s: trap %d err %d on cpu %d " -            "eip 0x%x addr 0x%x--kill proc\n", -            myproc()->pid, myproc()->name, tf->trapno, -            tf->err, cpuid(), tf->eip, rcr2()); -    myproc()->killed = 1; -  } - -  // Force process exit if it has been killed and is in user space. -  // (If it is still executing in the kernel, let it keep running -  // until it gets to the regular system call return.) -  if(myproc() && myproc()->killed && (tf->cs&3) == DPL_USER) -    exit(); - -  // Force process to give up CPU on clock tick. -  // If interrupts were on while locks held, would need to check nlock. -  if(myproc() && myproc()->state == RUNNING && -     tf->trapno == T_IRQ0+IRQ_TIMER) -    yield(); - -  // Check if the process has been killed since we yielded -  if(myproc() && myproc()->killed && (tf->cs&3) == DPL_USER) -    exit(); -} diff --git a/trapasm.S b/trapasm.S deleted file mode 100644 index da8aefc..0000000 --- a/trapasm.S +++ /dev/null @@ -1,32 +0,0 @@ -#include "mmu.h" - -  # vectors.S sends all traps here. -.globl alltraps -alltraps: -  # Build trap frame. -  pushl %ds -  pushl %es -  pushl %fs -  pushl %gs -  pushal -   -  # Set up data segments. -  movw $(SEG_KDATA<<3), %ax -  movw %ax, %ds -  movw %ax, %es - -  # Call trap(tf), where tf=%esp -  pushl %esp -  call trap -  addl $4, %esp - -  # Return falls through to trapret... -.globl trapret -trapret: -  popal -  popl %gs -  popl %fs -  popl %es -  popl %ds -  addl $0x8, %esp  # trapno and errcode -  iret diff --git a/traps.h b/traps.h deleted file mode 100644 index 0bd1fd8..0000000 --- a/traps.h +++ /dev/null @@ -1,38 +0,0 @@ -// x86 trap and interrupt constants. - -// Processor-defined: -#define T_DIVIDE         0      // divide error -#define T_DEBUG          1      // debug exception -#define T_NMI            2      // non-maskable interrupt -#define T_BRKPT          3      // breakpoint -#define T_OFLOW          4      // overflow -#define T_BOUND          5      // bounds check -#define T_ILLOP          6      // illegal opcode -#define T_DEVICE         7      // device not available -#define T_DBLFLT         8      // double fault -// #define T_COPROC      9      // reserved (not used since 486) -#define T_TSS           10      // invalid task switch segment -#define T_SEGNP         11      // segment not present -#define T_STACK         12      // stack exception -#define T_GPFLT         13      // general protection fault -#define T_PGFLT         14      // page fault -// #define T_RES        15      // reserved -#define T_FPERR         16      // floating point error -#define T_ALIGN         17      // aligment check -#define T_MCHK          18      // machine check -#define T_SIMDERR       19      // SIMD floating point error - -// These are arbitrarily chosen, but with care not to overlap -// processor defined exceptions or interrupt vectors. -#define T_SYSCALL       64      // system call -#define T_DEFAULT      500      // catchall - -#define T_IRQ0          32      // IRQ 0 corresponds to int T_IRQ - -#define IRQ_TIMER        0 -#define IRQ_KBD          1 -#define IRQ_COM1         4 -#define IRQ_IDE         14 -#define IRQ_ERROR       19 -#define IRQ_SPURIOUS    31 - diff --git a/types.h b/types.h deleted file mode 100644 index e4adf64..0000000 --- a/types.h +++ /dev/null @@ -1,4 +0,0 @@ -typedef unsigned int   uint; -typedef unsigned short ushort; -typedef unsigned char  uchar; -typedef uint pde_t; @@ -1,77 +0,0 @@ -// Intel 8250 serial port (UART). - -#include "types.h" -#include "defs.h" -#include "param.h" -#include "traps.h" -#include "spinlock.h" -#include "sleeplock.h" -#include "fs.h" -#include "file.h" -#include "mmu.h" -#include "proc.h" -#include "x86.h" - -#define COM1    0x3f8 - -static int uart;    // is there a uart? - -void -uartinit(void) -{ -  char *p; - -  // Turn off the FIFO -  outb(COM1+2, 0); - -  // 9600 baud, 8 data bits, 1 stop bit, parity off. -  outb(COM1+3, 0x80);    // Unlock divisor -  outb(COM1+0, 115200/9600); -  outb(COM1+1, 0); -  outb(COM1+3, 0x03);    // Lock divisor, 8 data bits. -  outb(COM1+4, 0); -  outb(COM1+1, 0x01);    // Enable receive interrupts. - -  // If status is 0xFF, no serial port. -  if(inb(COM1+5) == 0xFF) -    return; -  uart = 1; - -  // Acknowledge pre-existing interrupt conditions; -  // enable interrupts. -  inb(COM1+2); -  inb(COM1+0); -  ioapicenable(IRQ_COM1, 0); - -  // Announce that we're here. -  for(p="xv6...\n"; *p; p++) -    uartputc(*p); -} - -void -uartputc(int c) -{ -  int i; - -  if(!uart) -    return; -  for(i = 0; i < 128 && !(inb(COM1+5) & 0x20); i++) -    microdelay(10); -  outb(COM1+0, c); -} - -static int -uartgetc(void) -{ -  if(!uart) -    return -1; -  if(!(inb(COM1+5) & 0x01)) -    return -1; -  return inb(COM1+0); -} - -void -uartintr(void) -{ -  consoleintr(uartgetc); -} diff --git a/user/alarmtest.c b/user/alarmtest.c new file mode 100644 index 0000000..c6da547 --- /dev/null +++ b/user/alarmtest.c @@ -0,0 +1,88 @@ +// +// test program for the alarm lab. +// you can modify this file for testing, +// but please make sure your kernel +// modifications pass the original +// versions of these tests. +// + +#include "kernel/param.h" +#include "kernel/types.h" +#include "kernel/stat.h" +#include "kernel/riscv.h" +#include "user/user.h" + +void test0(); +void test1(); +void periodic(); + +int +main(int argc, char *argv[]) +{ +  test0(); +  test1(); +  exit(); +} + +volatile static int count; + +void +periodic() +{ +  count = count + 1; +  printf(1, "alarm!\n"); +  sigreturn(); +} + +// tests whether the kernel calls +// the alarm handler even a single time. +void +test0() +{ +  int i; +  printf(1, "test0 start\n"); +  count = 0; +  sigalarm(2, periodic); +  for(i = 0; i < 1000*500000; i++){ +    if((i % 250000) == 0) +      write(2, ".", 1); +    if(count > 0) +      break; +  } +  sigalarm(0, 0); +  if(count > 0){ +    printf(1, "test0 passed\n"); +  } else { +    printf(1, "test0 failed\n"); +  } +} + +void __attribute__ ((noinline)) foo(int i, int *j) { +  if((i % 2500000) == 0) { +    write(2, ".", 1); +  } +  *j += 1; +} + +void +test1() +{ +  int i; +  int j; + +  printf(1, "test1 start\n"); +  count = 0; +  j = 0; +  sigalarm(2, periodic); +  for(i = 0; i < 500000000; i++){ +    if(count >= 10) +      break; +    foo(i, &j); +  } +  if(i != j || count < 10){ +    // i should equal j +    printf(1, "test1 failed\n"); +  } else { +    printf(1, "test1 passed\n"); +  } +} @@ -1,6 +1,6 @@ -#include "types.h" -#include "stat.h" -#include "user.h" +#include "kernel/types.h" +#include "kernel/stat.h" +#include "user/user.h"  char buf[512]; diff --git a/user/cow.c b/user/cow.c new file mode 100644 index 0000000..0426600 --- /dev/null +++ b/user/cow.c @@ -0,0 +1,196 @@ +// +// tests for copy-on-write fork() assignment. +// + +#include "kernel/types.h" +#include "kernel/memlayout.h" +#include "user/user.h" + +// allocate more than half of physical memory, +// then fork. this will fail in the default +// kernel, which does not support copy-on-write. +void +simpletest() +{ +  uint64 phys_size = PHYSTOP - KERNBASE; +  int sz = (phys_size / 3) * 2; + +  printf(1, "simple: "); +   +  char *p = sbrk(sz); +  if(p == (char*)0xffffffffffffffffL){ +    printf(1, "sbrk(%d) failed\n", sz); +    exit(); +  } + +  for(char *q = p; q < p + sz; q += 4096){ +    *(int*)q = getpid(); +  } + +  int pid = fork(); +  if(pid < 0){ +    printf(1, "fork() failed\n"); +    exit(); +  } + +  if(pid == 0) +    exit(); + +  wait(); + +  if(sbrk(-sz) == (char*)0xffffffffffffffffL){ +    printf(1, "sbrk(-%d) failed\n", sz); +    exit(); +  } + +  printf(1, "ok\n"); +} + +// three processes all write COW memory. +// this causes more than half of physical memory +// to be allocated, so it also checks whether +// copied pages are freed. +void +threetest() +{ +  uint64 phys_size = PHYSTOP - KERNBASE; +  int sz = phys_size / 4; +  int pid1, pid2; + +  printf(1, "three: "); +   +  char *p = sbrk(sz); +  if(p == (char*)0xffffffffffffffffL){ +    printf(1, "sbrk(%d) failed\n", sz); +    exit(); +  } + +  pid1 = fork(); +  if(pid1 < 0){ +    printf(1, "fork failed\n"); +    exit(); +  } +  if(pid1 == 0){ +    pid2 = fork(); +    if(pid2 < 0){ +      printf(1, "fork failed"); +      exit(); +    } +    if(pid2 == 0){ +      for(char *q = p; q < p + (sz/5)*4; q += 4096){ +        *(int*)q = getpid(); +      } +      for(char *q = p; q < p + (sz/5)*4; q += 4096){ +        if(*(int*)q != getpid()){ +          printf(1, "wrong content\n"); +          exit(); +        } +      } +      exit(); +    } +    for(char *q = p; q < p + (sz/2); q += 4096){ +      *(int*)q = 9999; +    } +    exit(); +  } + +  for(char *q = p; q < p + sz; q += 4096){ +    *(int*)q = getpid(); +  } + +  wait(); + +  sleep(1); + +  for(char *q = p; q < p + sz; q += 4096){ +    if(*(int*)q != getpid()){ +      printf(1, "wrong content\n"); +      exit(); +    } +  } + +  if(sbrk(-sz) == (char*)0xffffffffffffffffL){ +    printf(1, "sbrk(-%d) failed\n", sz); +    exit(); +  } + +  printf(1, "ok\n"); +} + +char junk1[4096]; +int fds[2]; +char junk2[4096]; +char buf[4096]; +char junk3[4096]; + +// test whether copyout() simulates COW faults. +void +filetest() +{ +  int parent = getpid(); +   +  printf(1, "file: "); +   +  buf[0] = 99; + +  for(int i = 0; i < 4; i++){ +    if(pipe(fds) != 0){ +      printf(1, "pipe() failed\n"); +      exit(); +    } +    int pid = fork(); +    if(pid < 0){ +      printf(1, "fork failed\n"); +      exit(); +    } +    if(pid == 0){ +      sleep(1); +      if(read(fds[0], buf, sizeof(i)) != sizeof(i)){ +        printf(1, "read failed\n"); +        kill(parent); +        exit(); +      } +      sleep(1); +      int j = *(int*)buf; +      if(j != i){ +        printf(1, "read the wrong value\n"); +        kill(parent); +        exit(); +      } +      exit(); +    } +    if(write(fds[1], &i, sizeof(i)) != sizeof(i)){ +      printf(1, "write failed\n"); +      exit(); +    } +  } + +  for(int i = 0; i < 4; i++) +    wait(); + +  if(buf[0] != 99){ +    printf(1, "child overwrote parent\n"); +    exit(); +  } + +  printf(1, "ok\n"); +} + +int +main(int argc, char *argv[]) +{ +  simpletest(); + +  // check that the first simpletest() freed the physical memory. +  simpletest(); + +  threetest(); +  threetest(); +  threetest(); + +  filetest(); + +  printf(1, "ALL COW TESTS PASSED\n"); + +  exit(); +} @@ -1,6 +1,6 @@ -#include "types.h" -#include "stat.h" -#include "user.h" +#include "kernel/types.h" +#include "kernel/stat.h" +#include "user/user.h"  int  main(int argc, char *argv[]) diff --git a/forktest.c b/user/forktest.c index 8bc984d..be4915e 100644 --- a/forktest.c +++ b/user/forktest.c @@ -1,9 +1,9 @@  // Test that fork fails gracefully.  // Tiny executable so that the limit can be filling the proc table. -#include "types.h" -#include "stat.h" -#include "user.h" +#include "kernel/types.h" +#include "kernel/stat.h" +#include "user/user.h"  #define N  1000 @@ -1,8 +1,8 @@  // Simple grep.  Only supports ^ . * $ operators. -#include "types.h" -#include "stat.h" -#include "user.h" +#include "kernel/types.h" +#include "kernel/stat.h" +#include "user/user.h"  char buf[1024];  int match(char*, char*); @@ -1,9 +1,9 @@  // init: The initial user-level program -#include "types.h" -#include "stat.h" -#include "user.h" -#include "fcntl.h" +#include "kernel/types.h" +#include "kernel/stat.h" +#include "user/user.h" +#include "kernel/fcntl.h"  char *argv[] = { "sh", 0 }; @@ -31,7 +31,8 @@ main(void)        printf(1, "init: exec sh failed\n");        exit();      } -    while((wpid=wait()) >= 0 && wpid != pid) -      printf(1, "zombie!\n"); +    while((wpid=wait()) >= 0 && wpid != pid){ +      //printf(1, "zombie!\n"); +    }    }  } diff --git a/initcode.S b/user/initcode.S index 80ac5d8..ca76972 100644 --- a/initcode.S +++ b/user/initcode.S @@ -2,23 +2,20 @@  # This code runs in user space.  #include "syscall.h" -#include "traps.h" -  # exec(init, argv)  .globl start  start: -  pushl $argv -  pushl $init -  pushl $0  // where caller pc would be -  movl $SYS_exec, %eax -  int $T_SYSCALL +        la a0, init +        la a1, argv +        li a7, SYS_exec +        ecall  # for(;;) exit();  exit: -  movl $SYS_exit, %eax -  int $T_SYSCALL -  jmp exit +        li a7, SYS_exit +        ecall +        jal exit  # char init[] = "/init\0";  init: @@ -29,4 +26,3 @@ init:  argv:    .long init    .long 0 - @@ -1,6 +1,6 @@ -#include "types.h" -#include "stat.h" -#include "user.h" +#include "kernel/types.h" +#include "kernel/stat.h" +#include "user/user.h"  int  main(int argc, char **argv) @@ -1,6 +1,6 @@ -#include "types.h" -#include "stat.h" -#include "user.h" +#include "kernel/types.h" +#include "kernel/stat.h" +#include "user/user.h"  int  main(int argc, char *argv[]) @@ -1,7 +1,7 @@ -#include "types.h" -#include "stat.h" -#include "user.h" -#include "fs.h" +#include "kernel/types.h" +#include "kernel/stat.h" +#include "user/user.h" +#include "kernel/fs.h"  char*  fmtname(char *path) @@ -43,7 +43,7 @@ ls(char *path)    switch(st.type){    case T_FILE: -    printf(1, "%s %d %d %d\n", fmtname(path), st.type, st.ino, st.size); +    printf(1, "%s %d %d %l\n", fmtname(path), st.type, st.ino, st.size);      break;    case T_DIR: @@ -1,6 +1,6 @@ -#include "types.h" -#include "stat.h" -#include "user.h" +#include "kernel/types.h" +#include "kernel/stat.h" +#include "user/user.h"  int  main(int argc, char *argv[]) @@ -1,6 +1,10 @@ -#include "types.h" -#include "stat.h" -#include "user.h" +#include "kernel/types.h" +#include "kernel/stat.h" +#include "user/user.h" + +#include <stdarg.h> + +static char digits[] = "0123456789ABCDEF";  static void  putc(int fd, char c) @@ -11,7 +15,6 @@ putc(int fd, char c)  static void  printint(int fd, int xx, int base, int sgn)  { -  static char digits[] = "0123456789ABCDEF";    char buf[16];    int i, neg;    uint x; @@ -35,16 +38,25 @@ printint(int fd, int xx, int base, int sgn)      putc(fd, buf[i]);  } +static void +printptr(int fd, uint64 x) { +  int i; +  putc(fd, '0'); +  putc(fd, 'x'); +  for (i = 0; i < (sizeof(uint64) * 2); i++, x <<= 4) +    putc(fd, digits[x >> (sizeof(uint64) * 8 - 4)]); +} +  // Print to the given fd. Only understands %d, %x, %p, %s.  void  printf(int fd, const char *fmt, ...)  { +  va_list ap;    char *s;    int c, i, state; -  uint *ap; +  va_start(ap, fmt);    state = 0; -  ap = (uint*)(void*)&fmt + 1;    for(i = 0; fmt[i]; i++){      c = fmt[i] & 0xff;      if(state == 0){ @@ -55,14 +67,15 @@ printf(int fd, const char *fmt, ...)        }      } else if(state == '%'){        if(c == 'd'){ -        printint(fd, *ap, 10, 1); -        ap++; -      } else if(c == 'x' || c == 'p'){ -        printint(fd, *ap, 16, 0); -        ap++; +        printint(fd, va_arg(ap, int), 10, 1); +      } else if(c == 'l') { +        printint(fd, va_arg(ap, uint64), 10, 0); +      } else if(c == 'x') { +        printint(fd, va_arg(ap, int), 16, 0); +      } else if(c == 'p') { +        printptr(fd, va_arg(ap, uint64));        } else if(c == 's'){ -        s = (char*)*ap; -        ap++; +        s = va_arg(ap, char*);          if(s == 0)            s = "(null)";          while(*s != 0){ @@ -70,8 +83,7 @@ printf(int fd, const char *fmt, ...)            s++;          }        } else if(c == 'c'){ -        putc(fd, *ap); -        ap++; +        putc(fd, va_arg(ap, uint));        } else if(c == '%'){          putc(fd, c);        } else { @@ -1,6 +1,6 @@ -#include "types.h" -#include "stat.h" -#include "user.h" +#include "kernel/types.h" +#include "kernel/stat.h" +#include "user/user.h"  int  main(int argc, char *argv[]) @@ -1,8 +1,8 @@  // Shell. -#include "types.h" -#include "user.h" -#include "fcntl.h" +#include "kernel/types.h" +#include "user/user.h" +#include "kernel/fcntl.h"  // Parsed command representation  #define EXEC  1 diff --git a/stressfs.c b/user/stressfs.c index c0a4743..ef8f1cd 100644 --- a/stressfs.c +++ b/user/stressfs.c @@ -7,11 +7,11 @@  //    for (i = 0; i < 40000; i++)  //      asm volatile(""); -#include "types.h" -#include "stat.h" -#include "user.h" -#include "fs.h" -#include "fcntl.h" +#include "kernel/types.h" +#include "kernel/stat.h" +#include "user/user.h" +#include "kernel/fs.h" +#include "kernel/fcntl.h"  int  main(int argc, char *argv[]) @@ -1,8 +1,7 @@ -#include "types.h" -#include "stat.h" -#include "fcntl.h" -#include "user.h" -#include "x86.h" +#include "kernel/types.h" +#include "kernel/stat.h" +#include "kernel/fcntl.h" +#include "user/user.h"  char*  strcpy(char *s, const char *t) @@ -36,7 +35,11 @@ strlen(const char *s)  void*  memset(void *dst, int c, uint n)  { -  stosb(dst, c, n); +  char *cdst = (char *) dst; +  int i; +  for(i = 0; i < n; i++){ +    cdst[i] = c; +  }    return dst;  } diff --git a/umalloc.c b/user/umalloc.c index a7e7d2c..2092a32 100644 --- a/umalloc.c +++ b/user/umalloc.c @@ -1,7 +1,7 @@ -#include "types.h" -#include "stat.h" -#include "user.h" -#include "param.h" +#include "kernel/types.h" +#include "kernel/stat.h" +#include "user/user.h" +#include "kernel/param.h"  // Memory allocator by Kernighan and Ritchie,  // The C programming Language, 2nd ed.  Section 8.7. diff --git a/usertests.c b/user/usertests.c index a1e97e7..f74b88c 100644 --- a/usertests.c +++ b/user/usertests.c @@ -1,12 +1,12 @@ -#include "param.h" -#include "types.h" -#include "stat.h" -#include "user.h" -#include "fs.h" -#include "fcntl.h" -#include "syscall.h" -#include "traps.h" -#include "memlayout.h" +#include "kernel/param.h" +#include "kernel/types.h" +#include "kernel/stat.h" +#include "user/user.h" +#include "kernel/fs.h" +#include "kernel/fcntl.h" +#include "kernel/syscall.h" +#include "kernel/memlayout.h" +#include "kernel/riscv.h"  char buf[8192];  char name[3]; @@ -363,17 +363,29 @@ preempt(void)    printf(1, "preempt: ");    pid1 = fork(); +  if(pid1 < 0) { +    printf(1, "fork failed"); +    exit(); +  }    if(pid1 == 0)      for(;;)        ;    pid2 = fork(); +  if(pid2 < 0) { +    printf(1, "fork failed\n"); +    exit(); +  }    if(pid2 == 0)      for(;;)        ;    pipe(pfds);    pid3 = fork(); +  if(pid3 < 0) { +     printf(1, "fork failed\n"); +     exit(); +  }    if(pid3 == 0){      close(pfds[0]);      if(write(pfds[1], "x", 1) != 1) @@ -406,16 +418,18 @@ exitwait(void)  {    int i, pid; +  printf(1, "exitwait test\n"); +    for(i = 0; i < 100; i++){      pid = fork();      if(pid < 0){        printf(1, "fork failed\n"); -      return; +      exit();      }      if(pid){        if(wait() != pid){          printf(1, "wait wrong pid\n"); -        return; +        exit();        }      } else {        exit(); @@ -424,6 +438,147 @@ exitwait(void)    printf(1, "exitwait ok\n");  } +// try to find races in the reparenting +// code that handles a parent exiting +// when it still has live children. +void +reparent(void) +{ +  int master_pid = getpid(); +   +  printf(1, "reparent test\n"); + +  for(int i = 0; i < 200; i++){ +    int pid = fork(); +    if(pid < 0){ +      printf(1, "fork failed\n"); +      exit(); +    } +    if(pid){ +      if(wait() != pid){ +        printf(1, "wait wrong pid\n"); +        exit(); +      } +    } else { +      int pid2 = fork(); +      if(pid2 < 0){ +        printf(1, "fork failed\n"); +        kill(master_pid); +        exit(); +      } +      if(pid2 == 0){ +        exit(); +      } else { +        exit(); +      } +    } +  } +  printf(1, "reparent ok\n"); +} + +// what if two children exit() at the same time? +void +twochildren(void) +{ +  printf(1, "twochildren test\n"); + +  for(int i = 0; i < 1000; i++){ +    int pid1 = fork(); +    if(pid1 < 0){ +      printf(1, "fork failed\n"); +      exit(); +    } +    if(pid1 == 0){ +      exit(); +    } else { +      int pid2 = fork(); +      if(pid2 < 0){ +        printf(1, "fork failed\n"); +        exit(); +      } +      if(pid2 == 0){ +        exit(); +      } else { +        wait(); +        wait(); +      } +    } +  } +  printf(1, "twochildren ok\n"); +} + +// concurrent forks to try to expose locking bugs. +void +forkfork(void) +{ +  int ppid = getpid(); +   +  printf(1, "forkfork test\n"); + +  for(int i = 0; i < 2; i++){ +    int pid = fork(); +    if(pid < 0){ +      printf(1, "fork failed"); +      exit(); +    } +    if(pid == 0){ +      for(int j = 0; j < 200; j++){ +        int pid1 = fork(); +        if(pid1 < 0){ +          printf(1, "fork failed\n"); +          kill(ppid); +          exit(); +        } +        if(pid1 == 0){ +          exit(); +        } +        wait(); +      } +      exit(); +    } +  } + +  for(int i = 0; i < 2; i++){ +    wait(); +  } + +  printf(1, "forkfork ok\n"); +} + +void +forkforkfork(void) +{ +  printf(1, "forkforkfork test\n"); + +  unlink("stopforking"); + +  int pid = fork(); +  if(pid < 0){ +    printf(1, "fork failed"); +    exit(); +  } +  if(pid == 0){ +    while(1){ +      int fd = open("stopforking", 0); +      if(fd >= 0){ +        exit(); +      } +      if(fork() < 0){ +        close(open("stopforking", O_CREATE|O_RDWR)); +      } +    } + +    exit(); +  } + +  sleep(20); // two seconds +  close(open("stopforking", O_CREATE|O_RDWR)); +  wait(); +  sleep(10); // one second + +  printf(1, "forkforkfork ok\n"); +} +  void  mem(void)  { @@ -583,13 +738,13 @@ fourfiles(void)  void  createdelete(void)  { -  enum { N = 20 }; +  enum { N = 20, NCHILD=4 };    int pid, i, fd, pi;    char name[32];    printf(1, "createdelete test\n"); -  for(pi = 0; pi < 4; pi++){ +  for(pi = 0; pi < NCHILD; pi++){      pid = fork();      if(pid < 0){        printf(1, "fork failed\n"); @@ -619,13 +774,13 @@ createdelete(void)      }    } -  for(pi = 0; pi < 4; pi++){ +  for(pi = 0; pi < NCHILD; pi++){      wait();    }    name[0] = name[1] = name[2] = 0;    for(i = 0; i < N; i++){ -    for(pi = 0; pi < 4; pi++){ +    for(pi = 0; pi < NCHILD; pi++){        name[0] = 'p' + pi;        name[1] = '0' + i;        fd = open(name, 0); @@ -642,7 +797,7 @@ createdelete(void)    }    for(i = 0; i < N; i++){ -    for(pi = 0; pi < 4; pi++){ +    for(pi = 0; pi < NCHILD; pi++){        name[0] = 'p' + i;        name[1] = '0' + i;        unlink(name); @@ -1391,6 +1546,11 @@ forktest(void)        exit();    } +  if (n == 0) { +    printf(1, "no fork at all!\n"); +    exit(); +  } +    if(n == 1000){      printf(1, "fork claimed to work 1000 times!\n");      exit(); @@ -1414,16 +1574,25 @@ forktest(void)  void  sbrktest(void)  { -  int fds[2], pid, pids[10], ppid; -  char *a, *b, *c, *lastaddr, *oldbrk, *p, scratch; -  uint amt; +  int i, fds[2], pids[10], pid, ppid; +  char *c, *oldbrk, scratch, *a, *b, *lastaddr, *p; +  uint64 amt; +  int fd; +  int n; +  #define BIG (100*1024*1024)    printf(stdout, "sbrk test\n");    oldbrk = sbrk(0); +  // does sbrk() return the expected failure value? +  a = sbrk(1024*1024*1024); +  if(a != (char*)0xffffffffffffffffL){ +    printf(stdout, "sbrk(<toomuch>) returned %p\n", a); +    exit(); +  } +    // can one sbrk() less than a page?    a = sbrk(0); -  int i;    for(i = 0; i < 5000; i++){      b = sbrk(1);      if(b != a){ @@ -1449,9 +1618,8 @@ sbrktest(void)    wait();    // can one grow address space to something big? -#define BIG (100*1024*1024)    a = sbrk(0); -  amt = (BIG) - (uint)a; +  amt = BIG - (uint64)a;    p = sbrk(amt);    if (p != a) {      printf(stdout, "sbrk test failed to grow big address space; enough phys mem?\n"); @@ -1463,7 +1631,7 @@ sbrktest(void)    // can one de-allocate?    a = sbrk(0);    c = sbrk(-4096); -  if(c == (char*)0xffffffff){ +  if(c == (char*)0xffffffffffffffffL){      printf(stdout, "sbrk could not deallocate\n");      exit();    } @@ -1508,7 +1676,7 @@ sbrktest(void)      }      wait();    } - +        // if we run the system out of memory, does it clean up the last    // failed allocation?    if(pipe(fds) != 0){ @@ -1518,7 +1686,7 @@ sbrktest(void)    for(i = 0; i < sizeof(pids)/sizeof(pids[0]); i++){      if((pids[i] = fork()) == 0){        // allocate a lot of memory -      sbrk(BIG - (uint)sbrk(0)); +      sbrk(BIG - (uint64)sbrk(0));        write(fds[1], "x", 1);        // sit around until killed        for(;;) sleep(1000); @@ -1526,6 +1694,7 @@ sbrktest(void)      if(pids[i] != -1)        read(fds[0], &scratch, 1);    } +    // if those failed allocations freed up the pages they did allocate,    // we'll be able to allocate here    c = sbrk(4096); @@ -1535,11 +1704,55 @@ sbrktest(void)      kill(pids[i]);      wait();    } -  if(c == (char*)0xffffffff){ +  if(c == (char*)0xffffffffffffffffL){      printf(stdout, "failed sbrk leaked memory\n");      exit();    } +  // test running fork with the above allocated page  +  ppid = getpid(); +  pid = fork(); +  if(pid < 0){ +    printf(stdout, "fork failed\n"); +    exit(); +  } + +  // test out of memory during sbrk +  if(pid == 0){ +    // allocate a lot of memory +    a = sbrk(0); +    sbrk(10*BIG); +    int n = 0; +    for (i = 0; i < 10*BIG; i += 4096) { +      n += *(a+i); +    } +    printf(stdout, "allocate a lot of memory succeeded %d\n", n); +    kill(ppid); +    exit(); +  } +  wait(); + +  // test reads from allocated memory +  a = sbrk(4096); +  fd = open("sbrk", O_CREATE|O_WRONLY); +  unlink("sbrk"); +  if(fd < 0)  { +    printf(stdout, "open sbrk failed\n"); +    exit(); +  } +  if ((n = write(fd, a, 10)) < 0) { +    printf(stdout, "write sbrk failed\n"); +    exit(); +  } +  close(fd); + +  // test writes to allocated memory +  a = sbrk(4096); +  if(pipe((int *) a) != 0){ +    printf(1, "pipe() failed\n"); +    exit(); +  }  +    if(sbrk(0) > oldbrk)      sbrk(-(sbrk(0) - oldbrk)); @@ -1549,7 +1762,7 @@ sbrktest(void)  void  validateint(int *p)  { -  int res; +  /* XXX int res;    asm("mov %%esp, %%ebx\n\t"        "mov %3, %%esp\n\t"        "int %2\n\t" @@ -1557,13 +1770,14 @@ validateint(int *p)        "=a" (res) :        "a" (SYS_sleep), "n" (T_SYSCALL), "c" (p) :        "ebx"); +  */  }  void  validatetest(void)  {    int hi, pid; -  uint p; +  uint64 p;    printf(stdout, "validate test\n");    hi = 1100*1024; @@ -1695,35 +1909,6 @@ fsfull()    printf(1, "fsfull test finished\n");  } -void -uio() -{ -  #define RTC_ADDR 0x70 -  #define RTC_DATA 0x71 - -  ushort port = 0; -  uchar val = 0; -  int pid; - -  printf(1, "uio test\n"); -  pid = fork(); -  if(pid == 0){ -    port = RTC_ADDR; -    val = 0x09;  /* year */ -    /* http://wiki.osdev.org/Inline_Assembly/Examples */ -    asm volatile("outb %0,%1"::"a"(val), "d" (port)); -    port = RTC_DATA; -    asm volatile("inb %1,%0" : "=a" (val) : "d" (port)); -    printf(1, "uio: uio succeeded; test FAILED\n"); -    exit(); -  } else if(pid < 0){ -    printf (1, "fork failed\n"); -    exit(); -  } -  wait(); -  printf(1, "uio test done\n"); -} -  void argptest()  {    int fd; @@ -1745,6 +1930,32 @@ rand()    return randstate;  } +// check that there's an invalid page beneath +// the user stack, to catch stack overflow. +void +stacktest() +{ +  int pid; +  int ppid = getpid(); +   +  printf(1, "stack guard test\n"); +  pid = fork(); +  if(pid == 0) { +    char *sp = (char *) r_sp(); +    sp -= 4096; +    // the *sp should cause a trap. +    printf(1, "stacktest: read below stack %p\n", *sp); +    printf(1, "stacktest: test FAILED\n"); +    kill(ppid); +    exit(); +  } else if(pid < 0){ +    printf (1, "fork failed\n"); +    exit(); +  } +  wait(); +  printf(1, "stack guard test ok\n"); +} +  int  main(int argc, char *argv[])  { @@ -1756,6 +1967,11 @@ main(int argc, char *argv[])    }    close(open("usertests.ran", O_CREATE)); +  reparent(); +  twochildren(); +  forkfork(); +  forkforkfork(); +      argptest();    createdelete();    linkunlink(); @@ -1769,7 +1985,8 @@ main(int argc, char *argv[])    bsstest();    sbrktest();    validatetest(); - +  stacktest(); +      opentest();    writetest();    writetest1(); @@ -1795,8 +2012,6 @@ main(int argc, char *argv[])    forktest();    bigdir(); // slow -  uio(); -    exectest();    exit(); diff --git a/user/usys.pl b/user/usys.pl new file mode 100755 index 0000000..01e426e --- /dev/null +++ b/user/usys.pl @@ -0,0 +1,38 @@ +#!/usr/bin/perl -w + +# Generate usys.S, the stubs for syscalls. + +print "# generated by usys.pl - do not edit\n"; + +print "#include \"kernel/syscall.h\"\n"; + +sub entry { +    my $name = shift; +    print ".global $name\n"; +    print "${name}:\n"; +    print " li a7, SYS_${name}\n"; +    print " ecall\n"; +    print " ret\n"; +} +	 +entry("fork"); +entry("exit"); +entry("wait"); +entry("pipe"); +entry("read"); +entry("write"); +entry("close"); +entry("kill"); +entry("exec"); +entry("open"); +entry("mknod"); +entry("unlink"); +entry("fstat"); +entry("link"); +entry("mkdir"); +entry("chdir"); +entry("dup"); +entry("getpid"); +entry("sbrk"); +entry("sleep"); +entry("uptime"); @@ -1,6 +1,6 @@ -#include "types.h" -#include "stat.h" -#include "user.h" +#include "kernel/types.h" +#include "kernel/stat.h" +#include "user/user.h"  char buf[512]; @@ -1,9 +1,9 @@  // Create a zombie process that  // must be reparented at exit. -#include "types.h" -#include "stat.h" -#include "user.h" +#include "kernel/types.h" +#include "kernel/stat.h" +#include "user/user.h"  int  main(void) @@ -1,31 +0,0 @@ -#include "syscall.h" -#include "traps.h" - -#define SYSCALL(name) \ -  .globl name; \ -  name: \ -    movl $SYS_ ## name, %eax; \ -    int $T_SYSCALL; \ -    ret - -SYSCALL(fork) -SYSCALL(exit) -SYSCALL(wait) -SYSCALL(pipe) -SYSCALL(read) -SYSCALL(write) -SYSCALL(close) -SYSCALL(kill) -SYSCALL(exec) -SYSCALL(open) -SYSCALL(mknod) -SYSCALL(unlink) -SYSCALL(fstat) -SYSCALL(link) -SYSCALL(mkdir) -SYSCALL(chdir) -SYSCALL(dup) -SYSCALL(getpid) -SYSCALL(sbrk) -SYSCALL(sleep) -SYSCALL(uptime) diff --git a/vectors.pl b/vectors.pl deleted file mode 100755 index 57b49dd..0000000 --- a/vectors.pl +++ /dev/null @@ -1,47 +0,0 @@ -#!/usr/bin/perl -w - -# Generate vectors.S, the trap/interrupt entry points. -# There has to be one entry point per interrupt number -# since otherwise there's no way for trap() to discover -# the interrupt number. - -print "# generated by vectors.pl - do not edit\n"; -print "# handlers\n"; -print ".globl alltraps\n"; -for(my $i = 0; $i < 256; $i++){ -    print ".globl vector$i\n"; -    print "vector$i:\n"; -    if(!($i == 8 || ($i >= 10 && $i <= 14) || $i == 17)){ -        print "  pushl \$0\n"; -    } -    print "  pushl \$$i\n"; -    print "  jmp alltraps\n"; -} - -print "\n# vector table\n"; -print ".data\n"; -print ".globl vectors\n"; -print "vectors:\n"; -for(my $i = 0; $i < 256; $i++){ -    print "  .long vector$i\n"; -} - -# sample output: -#   # handlers -#   .globl alltraps -#   .globl vector0 -#   vector0: -#     pushl $0 -#     pushl $0 -#     jmp alltraps -#   ... -#    -#   # vector table -#   .data -#   .globl vectors -#   vectors: -#     .long vector0 -#     .long vector1 -#     .long vector2 -#   ... - @@ -1,394 +0,0 @@ -#include "param.h" -#include "types.h" -#include "defs.h" -#include "x86.h" -#include "memlayout.h" -#include "mmu.h" -#include "proc.h" -#include "elf.h" - -extern char data[];  // defined by kernel.ld -pde_t *kpgdir;  // for use in scheduler() - -// Set up CPU's kernel segment descriptors. -// Run once on entry on each CPU. -void -seginit(void) -{ -  struct cpu *c; - -  // Map "logical" addresses to virtual addresses using identity map. -  // Cannot share a CODE descriptor for both kernel and user -  // because it would have to have DPL_USR, but the CPU forbids -  // an interrupt from CPL=0 to DPL=3. -  c = &cpus[cpuid()]; -  c->gdt[SEG_KCODE] = SEG(STA_X|STA_R, 0, 0xffffffff, 0); -  c->gdt[SEG_KDATA] = SEG(STA_W, 0, 0xffffffff, 0); -  c->gdt[SEG_UCODE] = SEG(STA_X|STA_R, 0, 0xffffffff, DPL_USER); -  c->gdt[SEG_UDATA] = SEG(STA_W, 0, 0xffffffff, DPL_USER); -  lgdt(c->gdt, sizeof(c->gdt)); -} - -// Return the address of the PTE in page table pgdir -// that corresponds to virtual address va.  If alloc!=0, -// create any required page table pages. -static pte_t * -walkpgdir(pde_t *pgdir, const void *va, int alloc) -{ -  pde_t *pde; -  pte_t *pgtab; - -  pde = &pgdir[PDX(va)]; -  if(*pde & PTE_P){ -    pgtab = (pte_t*)P2V(PTE_ADDR(*pde)); -  } else { -    if(!alloc || (pgtab = (pte_t*)kalloc()) == 0) -      return 0; -    // Make sure all those PTE_P bits are zero. -    memset(pgtab, 0, PGSIZE); -    // The permissions here are overly generous, but they can -    // be further restricted by the permissions in the page table -    // entries, if necessary. -    *pde = V2P(pgtab) | PTE_P | PTE_W | PTE_U; -  } -  return &pgtab[PTX(va)]; -} - -// Create PTEs for virtual addresses starting at va that refer to -// physical addresses starting at pa. va and size might not -// be page-aligned. -static int -mappages(pde_t *pgdir, void *va, uint size, uint pa, int perm) -{ -  char *a, *last; -  pte_t *pte; - -  a = (char*)PGROUNDDOWN((uint)va); -  last = (char*)PGROUNDDOWN(((uint)va) + size - 1); -  for(;;){ -    if((pte = walkpgdir(pgdir, a, 1)) == 0) -      return -1; -    if(*pte & PTE_P) -      panic("remap"); -    *pte = pa | perm | PTE_P; -    if(a == last) -      break; -    a += PGSIZE; -    pa += PGSIZE; -  } -  return 0; -} - -// There is one page table per process, plus one that's used when -// a CPU is not running any process (kpgdir). The kernel uses the -// current process's page table during system calls and interrupts; -// page protection bits prevent user code from using the kernel's -// mappings. -// -// setupkvm() and exec() set up every page table like this: -// -//   0..KERNBASE: user memory (text+data+stack+heap), mapped to -//                phys memory allocated by the kernel -//   KERNBASE..KERNBASE+EXTMEM: mapped to 0..EXTMEM (for I/O space) -//   KERNBASE+EXTMEM..data: mapped to EXTMEM..V2P(data) -//                for the kernel's instructions and r/o data -//   data..KERNBASE+PHYSTOP: mapped to V2P(data)..PHYSTOP, -//                                  rw data + free physical memory -//   0xfe000000..0: mapped direct (devices such as ioapic) -// -// The kernel allocates physical memory for its heap and for user memory -// between V2P(end) and the end of physical memory (PHYSTOP) -// (directly addressable from end..P2V(PHYSTOP)). - -// This table defines the kernel's mappings, which are present in -// every process's page table. -static struct kmap { -  void *virt; -  uint phys_start; -  uint phys_end; -  int perm; -} kmap[] = { - { (void*)KERNBASE, 0,             EXTMEM,    PTE_W}, // I/O space - { (void*)KERNLINK, V2P(KERNLINK), V2P(data), 0},     // kern text+rodata - { (void*)data,     V2P(data),     PHYSTOP,   PTE_W}, // kern data+memory - { (void*)DEVSPACE, DEVSPACE,      0,         PTE_W}, // more devices -}; - -// Set up kernel part of a page table. -pde_t* -setupkvm(void) -{ -  pde_t *pgdir; -  struct kmap *k; - -  if((pgdir = (pde_t*)kalloc()) == 0) -    return 0; -  memset(pgdir, 0, PGSIZE); -  if (P2V(PHYSTOP) > (void*)DEVSPACE) -    panic("PHYSTOP too high"); -  for(k = kmap; k < &kmap[NELEM(kmap)]; k++) -    if(mappages(pgdir, k->virt, k->phys_end - k->phys_start, -                (uint)k->phys_start, k->perm) < 0) { -      freevm(pgdir); -      return 0; -    } -  return pgdir; -} - -// Allocate one page table for the machine for the kernel address -// space for scheduler processes. -void -kvmalloc(void) -{ -  kpgdir = setupkvm(); -  switchkvm(); -} - -// Switch h/w page table register to the kernel-only page table, -// for when no process is running. -void -switchkvm(void) -{ -  lcr3(V2P(kpgdir));   // switch to the kernel page table -} - -// Switch TSS and h/w page table to correspond to process p. -void -switchuvm(struct proc *p) -{ -  if(p == 0) -    panic("switchuvm: no process"); -  if(p->kstack == 0) -    panic("switchuvm: no kstack"); -  if(p->pgdir == 0) -    panic("switchuvm: no pgdir"); - -  pushcli(); -  mycpu()->gdt[SEG_TSS] = SEG16(STS_T32A, &mycpu()->ts, -                                sizeof(mycpu()->ts)-1, 0); -  mycpu()->gdt[SEG_TSS].s = 0; -  mycpu()->ts.ss0 = SEG_KDATA << 3; -  mycpu()->ts.esp0 = (uint)p->kstack + KSTACKSIZE; -  // setting IOPL=0 in eflags *and* iomb beyond the tss segment limit -  // forbids I/O instructions (e.g., inb and outb) from user space -  mycpu()->ts.iomb = (ushort) 0xFFFF; -  ltr(SEG_TSS << 3); -  lcr3(V2P(p->pgdir));  // switch to process's address space -  popcli(); -} - -// Load the initcode into address 0 of pgdir. -// sz must be less than a page. -void -inituvm(pde_t *pgdir, char *init, uint sz) -{ -  char *mem; - -  if(sz >= PGSIZE) -    panic("inituvm: more than a page"); -  mem = kalloc(); -  memset(mem, 0, PGSIZE); -  mappages(pgdir, 0, PGSIZE, V2P(mem), PTE_W|PTE_U); -  memmove(mem, init, sz); -} - -// Load a program segment into pgdir.  addr must be page-aligned -// and the pages from addr to addr+sz must already be mapped. -int -loaduvm(pde_t *pgdir, char *addr, struct inode *ip, uint offset, uint sz) -{ -  uint i, pa, n; -  pte_t *pte; - -  if((uint) addr % PGSIZE != 0) -    panic("loaduvm: addr must be page aligned"); -  for(i = 0; i < sz; i += PGSIZE){ -    if((pte = walkpgdir(pgdir, addr+i, 0)) == 0) -      panic("loaduvm: address should exist"); -    pa = PTE_ADDR(*pte); -    if(sz - i < PGSIZE) -      n = sz - i; -    else -      n = PGSIZE; -    if(readi(ip, P2V(pa), offset+i, n) != n) -      return -1; -  } -  return 0; -} - -// Allocate page tables and physical memory to grow process from oldsz to -// newsz, which need not be page aligned.  Returns new size or 0 on error. -int -allocuvm(pde_t *pgdir, uint oldsz, uint newsz) -{ -  char *mem; -  uint a; - -  if(newsz >= KERNBASE) -    return 0; -  if(newsz < oldsz) -    return oldsz; - -  a = PGROUNDUP(oldsz); -  for(; a < newsz; a += PGSIZE){ -    mem = kalloc(); -    if(mem == 0){ -      cprintf("allocuvm out of memory\n"); -      deallocuvm(pgdir, newsz, oldsz); -      return 0; -    } -    memset(mem, 0, PGSIZE); -    if(mappages(pgdir, (char*)a, PGSIZE, V2P(mem), PTE_W|PTE_U) < 0){ -      cprintf("allocuvm out of memory (2)\n"); -      deallocuvm(pgdir, newsz, oldsz); -      kfree(mem); -      return 0; -    } -  } -  return newsz; -} - -// Deallocate user pages to bring the process size from oldsz to -// newsz.  oldsz and newsz need not be page-aligned, nor does newsz -// need to be less than oldsz.  oldsz can be larger than the actual -// process size.  Returns the new process size. -int -deallocuvm(pde_t *pgdir, uint oldsz, uint newsz) -{ -  pte_t *pte; -  uint a, pa; - -  if(newsz >= oldsz) -    return oldsz; - -  a = PGROUNDUP(newsz); -  for(; a  < oldsz; a += PGSIZE){ -    pte = walkpgdir(pgdir, (char*)a, 0); -    if(!pte) -      a = PGADDR(PDX(a) + 1, 0, 0) - PGSIZE; -    else if((*pte & PTE_P) != 0){ -      pa = PTE_ADDR(*pte); -      if(pa == 0) -        panic("kfree"); -      char *v = P2V(pa); -      kfree(v); -      *pte = 0; -    } -  } -  return newsz; -} - -// Free a page table and all the physical memory pages -// in the user part. -void -freevm(pde_t *pgdir) -{ -  uint i; - -  if(pgdir == 0) -    panic("freevm: no pgdir"); -  deallocuvm(pgdir, KERNBASE, 0); -  for(i = 0; i < NPDENTRIES; i++){ -    if(pgdir[i] & PTE_P){ -      char * v = P2V(PTE_ADDR(pgdir[i])); -      kfree(v); -    } -  } -  kfree((char*)pgdir); -} - -// Clear PTE_U on a page. Used to create an inaccessible -// page beneath the user stack. -void -clearpteu(pde_t *pgdir, char *uva) -{ -  pte_t *pte; - -  pte = walkpgdir(pgdir, uva, 0); -  if(pte == 0) -    panic("clearpteu"); -  *pte &= ~PTE_U; -} - -// Given a parent process's page table, create a copy -// of it for a child. -pde_t* -copyuvm(pde_t *pgdir, uint sz) -{ -  pde_t *d; -  pte_t *pte; -  uint pa, i, flags; -  char *mem; - -  if((d = setupkvm()) == 0) -    return 0; -  for(i = 0; i < sz; i += PGSIZE){ -    if((pte = walkpgdir(pgdir, (void *) i, 0)) == 0) -      panic("copyuvm: pte should exist"); -    if(!(*pte & PTE_P)) -      panic("copyuvm: page not present"); -    pa = PTE_ADDR(*pte); -    flags = PTE_FLAGS(*pte); -    if((mem = kalloc()) == 0) -      goto bad; -    memmove(mem, (char*)P2V(pa), PGSIZE); -    if(mappages(d, (void*)i, PGSIZE, V2P(mem), flags) < 0) { -      kfree(mem); -      goto bad; -    } -  } -  return d; - -bad: -  freevm(d); -  return 0; -} - -//PAGEBREAK! -// Map user virtual address to kernel address. -char* -uva2ka(pde_t *pgdir, char *uva) -{ -  pte_t *pte; - -  pte = walkpgdir(pgdir, uva, 0); -  if((*pte & PTE_P) == 0) -    return 0; -  if((*pte & PTE_U) == 0) -    return 0; -  return (char*)P2V(PTE_ADDR(*pte)); -} - -// Copy len bytes from p to user address va in page table pgdir. -// Most useful when pgdir is not the current page table. -// uva2ka ensures this only works for PTE_U pages. -int -copyout(pde_t *pgdir, uint va, void *p, uint len) -{ -  char *buf, *pa0; -  uint n, va0; - -  buf = (char*)p; -  while(len > 0){ -    va0 = (uint)PGROUNDDOWN(va); -    pa0 = uva2ka(pgdir, (char*)va0); -    if(pa0 == 0) -      return -1; -    n = PGSIZE - (va - va0); -    if(n > len) -      n = len; -    memmove(pa0 + (va - va0), buf, n); -    len -= n; -    buf += n; -    va = va0 + PGSIZE; -  } -  return 0; -} - -//PAGEBREAK! -// Blank page. -//PAGEBREAK! -// Blank page. -//PAGEBREAK! -// Blank page. - @@ -1,183 +0,0 @@ -// Routines to let C code use special x86 instructions. - -static inline uchar -inb(ushort port) -{ -  uchar data; - -  asm volatile("in %1,%0" : "=a" (data) : "d" (port)); -  return data; -} - -static inline void -insl(int port, void *addr, int cnt) -{ -  asm volatile("cld; rep insl" : -               "=D" (addr), "=c" (cnt) : -               "d" (port), "0" (addr), "1" (cnt) : -               "memory", "cc"); -} - -static inline void -outb(ushort port, uchar data) -{ -  asm volatile("out %0,%1" : : "a" (data), "d" (port)); -} - -static inline void -outw(ushort port, ushort data) -{ -  asm volatile("out %0,%1" : : "a" (data), "d" (port)); -} - -static inline void -outsl(int port, const void *addr, int cnt) -{ -  asm volatile("cld; rep outsl" : -               "=S" (addr), "=c" (cnt) : -               "d" (port), "0" (addr), "1" (cnt) : -               "cc"); -} - -static inline void -stosb(void *addr, int data, int cnt) -{ -  asm volatile("cld; rep stosb" : -               "=D" (addr), "=c" (cnt) : -               "0" (addr), "1" (cnt), "a" (data) : -               "memory", "cc"); -} - -static inline void -stosl(void *addr, int data, int cnt) -{ -  asm volatile("cld; rep stosl" : -               "=D" (addr), "=c" (cnt) : -               "0" (addr), "1" (cnt), "a" (data) : -               "memory", "cc"); -} - -struct segdesc; - -static inline void -lgdt(struct segdesc *p, int size) -{ -  volatile ushort pd[3]; - -  pd[0] = size-1; -  pd[1] = (uint)p; -  pd[2] = (uint)p >> 16; - -  asm volatile("lgdt (%0)" : : "r" (pd)); -} - -struct gatedesc; - -static inline void -lidt(struct gatedesc *p, int size) -{ -  volatile ushort pd[3]; - -  pd[0] = size-1; -  pd[1] = (uint)p; -  pd[2] = (uint)p >> 16; - -  asm volatile("lidt (%0)" : : "r" (pd)); -} - -static inline void -ltr(ushort sel) -{ -  asm volatile("ltr %0" : : "r" (sel)); -} - -static inline uint -readeflags(void) -{ -  uint eflags; -  asm volatile("pushfl; popl %0" : "=r" (eflags)); -  return eflags; -} - -static inline void -loadgs(ushort v) -{ -  asm volatile("movw %0, %%gs" : : "r" (v)); -} - -static inline void -cli(void) -{ -  asm volatile("cli"); -} - -static inline void -sti(void) -{ -  asm volatile("sti"); -} - -static inline uint -xchg(volatile uint *addr, uint newval) -{ -  uint result; - -  // The + in "+m" denotes a read-modify-write operand. -  asm volatile("lock; xchgl %0, %1" : -               "+m" (*addr), "=a" (result) : -               "1" (newval) : -               "cc"); -  return result; -} - -static inline uint -rcr2(void) -{ -  uint val; -  asm volatile("movl %%cr2,%0" : "=r" (val)); -  return val; -} - -static inline void -lcr3(uint val) -{ -  asm volatile("movl %0,%%cr3" : : "r" (val)); -} - -//PAGEBREAK: 36 -// Layout of the trap frame built on the stack by the -// hardware and by trapasm.S, and passed to trap(). -struct trapframe { -  // registers as pushed by pusha -  uint edi; -  uint esi; -  uint ebp; -  uint oesp;      // useless & ignored -  uint ebx; -  uint edx; -  uint ecx; -  uint eax; - -  // rest of trap frame -  ushort gs; -  ushort padding1; -  ushort fs; -  ushort padding2; -  ushort es; -  ushort padding3; -  ushort ds; -  ushort padding4; -  uint trapno; - -  // below here defined by x86 hardware -  uint err; -  uint eip; -  ushort cs; -  ushort padding5; -  uint eflags; - -  // below here only when crossing rings, such as from user to kernel -  uint esp; -  ushort ss; -  ushort padding6; -}; | 
