diff options
| -rw-r--r-- | .gdbinit.tmpl-riscv | 3 | ||||
| -rw-r--r-- | Makefile | 101 | ||||
| -rw-r--r-- | console.c | 189 | ||||
| -rw-r--r-- | defs.h | 42 | ||||
| -rw-r--r-- | entry.S | 245 | ||||
| -rw-r--r-- | exec.c | 22 | ||||
| -rw-r--r-- | initcode.S | 17 | ||||
| -rw-r--r-- | kalloc.c | 58 | ||||
| -rw-r--r-- | kernel.ld | 69 | ||||
| -rw-r--r-- | main.c | 103 | ||||
| -rw-r--r-- | memlayout.h | 33 | ||||
| -rw-r--r-- | mmu.h | 160 | ||||
| -rw-r--r-- | msr.h | 25 | ||||
| -rw-r--r-- | param.h | 1 | ||||
| -rw-r--r-- | proc.c | 285 | ||||
| -rw-r--r-- | proc.h | 84 | ||||
| -rw-r--r-- | riscv.h | 172 | ||||
| -rw-r--r-- | spinlock.c | 35 | ||||
| -rw-r--r-- | start.c | 34 | ||||
| -rw-r--r-- | string.c | 11 | ||||
| -rw-r--r-- | swtch.S | 59 | ||||
| -rw-r--r-- | syscall.c | 86 | ||||
| -rw-r--r-- | sysfile.c | 12 | ||||
| -rw-r--r-- | sysproc.c | 27 | ||||
| -rw-r--r-- | trampoline.S | 108 | ||||
| -rw-r--r-- | trap.c | 166 | ||||
| -rw-r--r-- | traps.h | 36 | ||||
| -rw-r--r-- | uart.c | 74 | ||||
| -rw-r--r-- | vm.c | 498 | ||||
| -rw-r--r-- | x86.h | 198 | 
30 files changed, 1094 insertions, 1859 deletions
| diff --git a/.gdbinit.tmpl-riscv b/.gdbinit.tmpl-riscv new file mode 100644 index 0000000..6ea36e1 --- /dev/null +++ b/.gdbinit.tmpl-riscv @@ -0,0 +1,3 @@ +set architecture riscv +target remote 127.0.0.1:1234 +symbol-file kernel @@ -1,4 +1,20 @@  OBJS = \ +  start.o \ +  console.o \ +  uart.o \ +  kalloc.o \ +  spinlock.o \ +  string.o \ +  main.o \ +  vm.o \ +  proc.o \ +  swtch.o \ +  trampoline.o \ +  trap.o \ +  syscall.o \ +  sysproc.o + +XXXOBJS = \  	bio.o\  	console.o\  	exec.o\ @@ -28,48 +44,23 @@ OBJS = \  	vectors.o\  	vm.o\ -# Cross-compiling (e.g., on Mac OS X) -# TOOLPREFIX = i386-jos-elf - -# Using native tools (e.g., on X86 Linux) +# riscv64-unknown-elf- or riscv64-linux-gnu- +# perhaps in /opt/riscv/bin  #TOOLPREFIX =   # Try to infer the correct TOOLPREFIX if not set  ifndef TOOLPREFIX -TOOLPREFIX := $(shell if i386-jos-elf-objdump -i 2>&1 | grep '^elf32-i386$$' >/dev/null 2>&1; \ -	then echo 'i386-jos-elf-'; \ -	elif objdump -i 2>&1 | grep 'elf32-i386' >/dev/null 2>&1; \ -	then echo ''; \ +TOOLPREFIX := $(shell if riscv64-unknown-elf-objdump -i 2>&1 | grep 'elf64-big' >/dev/null 2>&1; \ +	then echo 'riscv64-unknown-elf-'; \ +	elif riscv64-linux-gnu-objdump -i 2>&1 | grep 'elf64-big' >/dev/null 2>&1; \ +	then echo 'riscv64-linux-gnu-'; \  	else echo "***" 1>&2; \ -	echo "*** Error: Couldn't find an i386-*-elf version of GCC/binutils." 1>&2; \ -	echo "*** Is the directory with i386-jos-elf-gcc in your PATH?" 1>&2; \ -	echo "*** If your i386-*-elf toolchain is installed with a command" 1>&2; \ -	echo "*** prefix other than 'i386-jos-elf-', set your TOOLPREFIX" 1>&2; \ -	echo "*** environment variable to that prefix and run 'make' again." 1>&2; \ +	echo "*** Error: Couldn't find an riscv64 version of GCC/binutils." 1>&2; \  	echo "*** To turn off this error, run 'gmake TOOLPREFIX= ...'." 1>&2; \  	echo "***" 1>&2; exit 1; fi)  endif -# If the makefile can't find QEMU, specify its path here -QEMU = qemu-system-x86_64 - -# Try to infer the correct QEMU -ifndef QEMU -QEMU = $(shell if which qemu > /dev/null; \ -	then echo qemu; exit; \ -	elif which qemu-system-i386 > /dev/null; \ -	then echo qemu-system-i386; exit; \ -	elif which qemu-system-x86_64 > /dev/null; \ -	then echo qemu-system-x86_64; exit; \ -	else \ -	qemu=/Applications/Q.app/Contents/MacOS/i386-softmmu.app/Contents/MacOS/i386-softmmu; \ -	if test -x $$qemu; then echo $$qemu; exit; fi; fi; \ -	echo "***" 1>&2; \ -	echo "*** Error: Couldn't find a working QEMU executable." 1>&2; \ -	echo "*** Is the directory containing the qemu binary in your PATH" 1>&2; \ -	echo "*** or have you tried setting the QEMU variable in Makefile?" 1>&2; \ -	echo "***" 1>&2; exit 1) -endif +QEMU = qemu-system-riscv64  CC = $(TOOLPREFIX)gcc  AS = $(TOOLPREFIX)gas @@ -77,15 +68,10 @@ LD = $(TOOLPREFIX)ld  OBJCOPY = $(TOOLPREFIX)objcopy  OBJDUMP = $(TOOLPREFIX)objdump -XFLAGS = -m64 -mcmodel=large -ggdb -# CFLAGS = -fno-pic -static -fno-builtin -fno-strict-aliasing -O2 -Wall -MD -ggdb -Werror -fno-omit-frame-pointer -CFLAGS = -fno-pic -static -fno-builtin -fno-strict-aliasing -Wall -MD -ggdb -Werror -fno-omit-frame-pointer -CFLAGS += -ffreestanding -fno-common -nostdlib $(XFLAGS) +CFLAGS = -fno-pic -static -fno-builtin -fno-strict-aliasing -Wall -MD -ggdb -Werror -fno-omit-frame-pointer -O +CFLAGS = -mcmodel=medany +CFLAGS += -ffreestanding -fno-common -nostdlib -mno-relax  CFLAGS += $(shell $(CC) -fno-stack-protector -E -x c /dev/null >/dev/null 2>&1 && echo -fno-stack-protector) -ASFLAGS = -gdwarf-2 -Wa,-divide $(XFLAGS) -# FreeBSD ld wants ``elf_i386_fbsd'' -LDFLAGS += -m $(shell $(LD) -V | grep elf_x86_64 2>/dev/null | head -n 1) -LDFLAGS += -z max-page-size=4096  # Disable PIE when possible (for Ubuntu 16.10 toolchain)  ifneq ($(shell $(CC) -dumpspecs 2>/dev/null | grep -e '[^f]no-pie'),) @@ -95,21 +81,17 @@ ifneq ($(shell $(CC) -dumpspecs 2>/dev/null | grep -e '[^f]nopie'),)  CFLAGS += -fno-pie -nopie  endif -kernel: $(OBJS) entry.o entryother initcode kernel.ld -	$(LD) $(LDFLAGS) -T kernel.ld -o kernel entry.o $(OBJS) -b binary initcode entryother +LDFLAGS = -z max-page-size=4096 + +kernel: $(OBJS) entry.o kernel.ld  +	$(LD) $(LDFLAGS) -T kernel.ld -o kernel entry.o $(OBJS)   	$(OBJDUMP) -S kernel > kernel.asm  	$(OBJDUMP) -t kernel | sed '1,/SYMBOL TABLE/d; s/ .* / /; /^$$/d' > kernel.sym -entryother: entryother.S -	$(CC) $(CFLAGS) -fno-pic -nostdinc -I. -c entryother.S -	$(LD) $(LDFLAGS) -N -e start -Ttext 0x7000 -o bootblockother.o entryother.o -	$(OBJCOPY) -S -O binary -j .text bootblockother.o entryother -	$(OBJDUMP) -S bootblockother.o > entryother.asm -  initcode: initcode.S  	$(CC) $(CFLAGS) -nostdinc -I. -c initcode.S -	$(LD) $(LDFLAGS) -N -e start -Ttext 0 -o initcode.out initcode.o -	$(OBJCOPY) -S -O binary initcode.out initcode +	#$(LD) $(LDFLAGS) -N -e start -Ttext 0 -o initcode.out initcode.o +	#$(OBJCOPY) -S -O binary initcode.out initcode  	$(OBJDUMP) -S initcode.o > initcode.asm  tags: $(OBJS) entryother.S _init @@ -186,19 +168,18 @@ QEMUGDB = $(shell if $(QEMU) -help | grep -q '^-gdb'; \  	then echo "-gdb tcp::$(GDBPORT)"; \  	else echo "-s -p $(GDBPORT)"; fi)  ifndef CPUS -CPUS := 2 +CPUS := 1  endif -QEMUOPTS = -kernel kernel -drive file=fs.img,index=1,media=disk,format=raw -smp $(CPUS) -m 512 $(QEMUEXTRA) -qemu: fs.img -	$(QEMU) -serial mon:stdio $(QEMUOPTS) +QEMUOPTS = -machine virt -kernel kernel -m 3G -smp $(CPUS) -nographic +#QEMUOPTS += -initrd fs.img -qemu-nox: fs.img kernel -	$(QEMU) -nographic $(QEMUOPTS) +qemu: kernel +	$(QEMU) $(QEMUOPTS) -.gdbinit: .gdbinit.tmpl-x64 -	sed "s/localhost:1234/localhost:$(GDBPORT)/" < $^ > $@ +.gdbinit: .gdbinit.tmpl-riscv +	sed "s/:1234/:$(GDBPORT)/" < $^ > $@ -qemu-gdb: fs.img kernel .gdbinit +qemu-gdb: kernel .gdbinit  	@echo "*** Now run 'gdb'." 1>&2  	$(QEMU) $(QEMUOPTS) -S $(QEMUGDB) @@ -5,17 +5,14 @@  #include <stdarg.h>  #include "types.h" -#include "defs.h"  #include "param.h" -#include "traps.h"  #include "spinlock.h"  #include "sleeplock.h"  #include "fs.h"  #include "file.h"  #include "memlayout.h" -#include "mmu.h" -#include "proc.h" -#include "x86.h" +#include "riscv.h" +#include "defs.h"  static void consputc(int); @@ -28,6 +25,12 @@ static struct {  static char digits[] = "0123456789abcdef"; +void +consoleinit(void) +{ +  initlock(&cons.lock, "console"); +} +  static void  printint(int xx, int base, int sign)  { @@ -66,7 +69,7 @@ printptr(uint64 x) {  // Print to the console. only understands %d, %x, %p, %s.  void -cprintf(char *fmt, ...) +printf(char *fmt, ...)  {    va_list ap;    int i, c, locking; @@ -122,67 +125,20 @@ cprintf(char *fmt, ...)  void  panic(char *s)  { -  int i; -  uint64 pcs[10]; - -  cli(); -  cons.locking = 0; -  // use lapiccpunum so that we can call panic from mycpu() -  cprintf("lapicid %d: panic: ", lapicid()); -  cprintf(s); -  cprintf("\n"); -  getcallerpcs(&s, pcs); -  for(i=0; i<10; i++) -    cprintf(" %p", pcs[i]); +  printf("panic: "); +  printf(s); +  printf("\n");    panicked = 1; // freeze other CPU    for(;;)      ;  } -//PAGEBREAK: 50  #define BACKSPACE 0x100 -#define CRTPORT 0x3d4 -static ushort *crt = (ushort*)P2V(0xb8000);  // CGA memory - -static void -cgaputc(int c) -{ -  int pos; - -  // Cursor position: col + 80*row. -  outb(CRTPORT, 14); -  pos = inb(CRTPORT+1) << 8; -  outb(CRTPORT, 15); -  pos |= inb(CRTPORT+1); - -  if(c == '\n') -    pos += 80 - pos%80; -  else if(c == BACKSPACE){ -    if(pos > 0) --pos; -  } else -    crt[pos++] = (c&0xff) | 0x0700;  // black on white - -  if(pos < 0 || pos > 25*80) -    panic("pos under/overflow"); - -  if((pos/80) >= 24){  // Scroll up. -    memmove(crt, crt+80, sizeof(crt[0])*23*80); -    pos -= 80; -    memset(crt+pos, 0, sizeof(crt[0])*(24*80 - pos)); -  } - -  outb(CRTPORT, 14); -  outb(CRTPORT+1, pos>>8); -  outb(CRTPORT, 15); -  outb(CRTPORT+1, pos); -  crt[pos] = ' ' | 0x0700; -}  void  consputc(int c)  {    if(panicked){ -    cli();      for(;;)        ;    } @@ -191,125 +147,4 @@ consputc(int c)      uartputc('\b'); uartputc(' '); uartputc('\b');    } else      uartputc(c); -  cgaputc(c);  } - -#define INPUT_BUF 128 -struct { -  char buf[INPUT_BUF]; -  uint r;  // Read index -  uint w;  // Write index -  uint e;  // Edit index -} input; - -#define C(x)  ((x)-'@')  // Control-x - -void -consoleintr(int (*getc)(void)) -{ -  int c, doprocdump = 0; - -  acquire(&cons.lock); -  while((c = getc()) >= 0){ -    switch(c){ -    case C('P'):  // Process listing. -      // procdump() locks cons.lock indirectly; invoke later -      doprocdump = 1; -      break; -    case C('U'):  // Kill line. -      while(input.e != input.w && -            input.buf[(input.e-1) % INPUT_BUF] != '\n'){ -        input.e--; -        consputc(BACKSPACE); -      } -      break; -    case C('H'): case '\x7f':  // Backspace -      if(input.e != input.w){ -        input.e--; -        consputc(BACKSPACE); -      } -      break; -    default: -      if(c != 0 && input.e-input.r < INPUT_BUF){ -        c = (c == '\r') ? '\n' : c; -        input.buf[input.e++ % INPUT_BUF] = c; -        consputc(c); -        if(c == '\n' || c == C('D') || input.e == input.r+INPUT_BUF){ -          input.w = input.e; -          wakeup(&input.r); -        } -      } -      break; -    } -  } -  release(&cons.lock); -  if(doprocdump) { -    procdump();  // now call procdump() wo. cons.lock held -  } -} - -int -consoleread(struct inode *ip, char *dst, int n) -{ -  uint target; -  int c; - -  iunlock(ip); -  target = n; -  acquire(&cons.lock); -  while(n > 0){ -    while(input.r == input.w){ -      if(myproc()->killed){ -        release(&cons.lock); -        ilock(ip); -        return -1; -      } -      sleep(&input.r, &cons.lock); -    } -    c = input.buf[input.r++ % INPUT_BUF]; -    if(c == C('D')){  // EOF -      if(n < target){ -        // Save ^D for next time, to make sure -        // caller gets a 0-byte result. -        input.r--; -      } -      break; -    } -    *dst++ = c; -    --n; -    if(c == '\n') -      break; -  } -  release(&cons.lock); -  ilock(ip); - -  return target - n; -} - -int -consolewrite(struct inode *ip, char *buf, int n) -{ -  int i; - -  iunlock(ip); -  acquire(&cons.lock); -  for(i = 0; i < n; i++) -    consputc(buf[i] & 0xff); -  release(&cons.lock); -  ilock(ip); - -  return n; -} - -void -consoleinit(void) -{ -  initlock(&cons.lock, "console"); - -  devsw[CONSOLE].write = consolewrite; -  devsw[CONSOLE].read = consoleread; -  cons.locking = 1; - -  ioapicenable(IRQ_KBD, 0); -} - @@ -19,7 +19,7 @@ void            bwrite(struct buf*);  // console.c  void            consoleinit(void); -void            cprintf(char*, ...); +void            printf(char*, ...);  void            consoleintr(int(*)(void));  void            panic(char*) __attribute__((noreturn)); @@ -65,10 +65,9 @@ extern uchar    ioapicid;  void            ioapicinit(void);  // kalloc.c -char*           kalloc(void); -void            kfree(char*); -void            kinit1(void*, void*); -void            kinit2(void*, void*); +void*           kalloc(void); +void            kfree(void *); +void            kinit();  // kbd.c  void            kbdintr(void); @@ -112,7 +111,7 @@ int             kill(int);  struct cpu*     mycpu(void);  struct cpu*     getmycpu(void);  struct proc*    myproc(); -void            pinit(void); +void            procinit(void);  void            procdump(void);  void            scheduler(void) __attribute__((noreturn));  void            sched(void); @@ -124,7 +123,7 @@ void            wakeup(void*);  void            yield(void);  // swtch.S -void            swtch(struct context**, struct context*); +void            swtch(struct context*, struct context*);  // spinlock.c  void            acquire(struct spinlock*); @@ -158,16 +157,16 @@ int             argaddr(int, uint64 *);  int             fetchint(uint64, int*);  int             fetchstr(uint64, char**);  int             fetchaddr(uint64, uint64*); -void            syscall(struct sysframe*); +void            syscall();  // timer.c  void            timerinit(void);  // trap.c -void            idtinit(void);  extern uint     ticks; -void            tvinit(void); +void            trapinit(void);  extern struct spinlock tickslock; +void            usertrapret(void);  // uart.c  void            uartinit(void); @@ -175,20 +174,15 @@ void            uartintr(void);  void            uartputc(int);  // vm.c -void            seginit(void); -void            kvmalloc(void); -pde_t*          setupkvm(void); -char*           uva2ka(pde_t*, char*); -int             allocuvm(pde_t*, uint, uint); -int             deallocuvm(pde_t*, uint64, uint64); -void            freevm(pde_t*, uint64); -void            inituvm(pde_t*, char*, uint); -int             loaduvm(pde_t*, char*, struct inode*, uint, uint); -pde_t*          copyuvm(pde_t*, uint); -void            switchuvm(struct proc*); -void            switchkvm(void); -int             copyout(pde_t*, uint, void*, uint); -void            clearpteu(pde_t *pgdir, char *uva); +void            kvminit(void); +void            kvmswitch(void); +pagetable_t     uvmcreate(void); +void            uvminit(pagetable_t, char *, uint); +int             uvmdealloc(pagetable_t, uint64, uint64); +void            uvmcopy(pagetable_t, pagetable_t, uint64); +void            uvmfree(pagetable_t, uint64); +void            mappages(pagetable_t, uint64, uint64, uint64, int); +void            unmappages(pagetable_t, uint64, uint64, int);  // number of elements in fixed-size array  #define NELEM(x) (sizeof(x)/sizeof((x)[0])) @@ -1,223 +1,22 @@ -# x86-64 bootstrap, assuming load by MultiBoot-compliant loader. -# The MutliBoot specification is at: -# http://www.gnu.org/software/grub/manual/multiboot/multiboot.html -# GRUB is a MultiBoot loader, as is qemu's -kernel option. - -#include "mmu.h" -#include "memlayout.h"   - -# STACK is the size of the bootstrap stack. -#define STACK 8192 - -# MultiBoot header. -# http://www.gnu.org/software/grub/manual/multiboot/multiboot.html#Header-layout -.align 4 -.text -.globl multiboot_header -multiboot_header: -  #define magic 0x1badb002 -  #define flags (1<<16 | 1<<0) -  .long magic -  .long flags -  .long (- magic - flags)  # checksum -  .long V2P_WO(multiboot_header)  # header address -  .long V2P_WO(multiboot_header)  # load address -  .long V2P_WO(edata)       # load end address -  .long V2P_WO(end)         # bss end address -  .long V2P_WO(start)       # entry address - -# Entry point jumped to by boot loader.  Running in 32-bit mode. -# http://www.gnu.org/software/grub/manual/multiboot/multiboot.html#Machine-state -# -#       EAX = 0x2badb002 -#       EBX = address of multiboot information structure -#       CS = 32-bit read/execute code segment with identity map -#       DS, ES, FS, GS, SS = 32-bit read/write data segment with identity map -#       A20 gate = enabled -#       CR0 = PE set, PG clear -#       EFLAGS = VM clear, IF clear -# -.code32 -.globl start -start: -  # Tell BIOS to do "warm reboot" when we shut down. -  movw $0x1234, 0x472 - -  # Set up multiboot arguments for main. -  movl %eax, %edi -  movl %ebx, %esi - -  # Initialize stack. -  movl $V2P_WO(stack+STACK), %esp -   -  # Zero bss.  QEMU's MultiBoot seems not to. -  # It's possible that the header above is not right, but it looks right. -  # %edi is holding multiboot argument, so save in another register. -  # (The stack is in the bss.) -  movl %edi, %edx -  movl $V2P_WO(edata), %edi -  movl $V2P_WO(end), %ecx -  subl $V2P_WO(edata), %ecx -  movl $0, %eax -  cld -  rep stosb -  movl %edx, %edi - -  call loadgdt -   -  # Enter new 32-bit code segment (already in 32-bit mode). -  ljmp $SEG_KCODE32, $V2P_WO(start32)  // code32 segment selector -   -start32: -  # Initialize page table. -  call initpagetables -  call init32e -   -  movl $V2P_WO(start64), %eax -  # Enter 64-bit mode. -  ljmp $SEG_KCODE, $V2P_WO(tramp64)  // code64 segment selector - -.code64 -start64: -  # Load VA of stack -  movabsq $(stack+STACK), %rsp -  # Clear frame pointer for stack walks -  movl $0, %ebp -  # Call into C code. -  call main -  # should not return from main -  jmp . - -.code32 -.global apstart -apstart: -  call loadgdt -  ljmp $SEG_KCODE32, $V2P_WO(apstart32)  // code32 segment selector -   -apstart32: -  call init32e -  movl $V2P_WO(apstart64), %eax -  ljmp $SEG_KCODE, $V2P_WO(tramp64)  // code64 segment selector - -.code64        -apstart64: -  # Remember (from bootothers), that our kernel stack pointer is -  # at the top of our temporary stack. -  popq %rax -  movq %rax, %rsp -  movq $0, %rbp -  call apmain -  jmp . -   -.code64 -tramp64: -  # The linker thinks we are running at tramp64, but we're actually -  # running at PADDR(tramp64), so use an explicit calculation to -  # load and jump to the correct address.  %rax should hold the -  # physical address of the jmp target. -  movq $KERNBASE, %r11 -  addq %r11, %rax -  jmp *%rax - -# Initial stack -.comm stack, STACK - -# Page tables.  See section 4.5 of 253668.pdf. -# We map the first GB of physical memory at 0 and at 1 TB (not GB) before -# the end of virtual memory.  At boot time we are using the mapping at 0 -# but during ordinary execution we use the high mapping. -# The intent is that after bootstrap the kernel can expand this mapping -# to cover all the available physical memory. -# This would be easier if we could use the PS bit to create GB-sized entries -# and skip the pdt table, but not all chips support it, and QEMU doesn't. -.align 4096 -pml4: -  .quad V2P_WO(pdpt) + PTE_P + PTE_W   // present, read/write -  .quad 0 -  .space 4096 - 2*16 -  .quad V2P_WO(pdpt) + PTE_P + PTE_W -  .quad 0 - -.align 4096 -pdpt: -  .quad V2P_WO(pdt) + PTE_P + PTE_W -  .space 4096 - 8 - -.align 4096 -pdt: -  // Filled in below. -  .space 4096 - -.code32 -initpagetables: -  pushl %edi -  pushl %ecx -  pushl %eax - -  // Set up 64-bit entry in %edx:%eax. -  // Base address 0, present, read/write, large page. -  movl $(0 | PTE_P | PTE_W | PTE_PS), %eax -  movl $0, %edx - -  // Fill in 512 entries at pdt. -  movl $V2P_WO(pdt), %edi -  movl $512, %ecx -1: -  // Write this 64-bit entry. -  movl %eax, 0(%edi) -  movl %edx, 4(%edi) -  addl $8, %edi -  // 64-bit add to prepare address for next entry. -  // Because this is a large page entry, it covers 512 4k pages (2 MB). -  add $(512*4096), %eax -  adc $0, %edx -  loop 1b - -  popl %eax -  popl %ecx -  popl %edi -  ret - -# Initialize IA-32e mode.  See section 9.8.5 of 253668.pdf. -init32e: -  # Set CR4.PAE and CR4.PSE = 1. -  movl %cr4, %eax -  orl $0x30, %eax -  movl %eax, %cr4 - -  # Load CR3 with physical base address of level 4 page table. -  movl $V2P_WO(pml4), %eax -  movl %eax, %cr3 -   -  # Enable IA-32e mode by setting IA32_EFER.LME = 1. -  # Also turn on IA32_EFER.SCE (syscall enable). -  movl $0xc0000080, %ecx -  rdmsr -  orl $0x101, %eax -  wrmsr - -  # Enable paging by setting CR0.PG = 1. -  movl %cr0, %eax -  orl $0x80000000, %eax    -  movl %eax, %cr0 -  nop -  nop - -  ret - -loadgdt: -  subl $8, %esp -  movl $V2P_WO(bootgdt), 4(%esp) -  movw $(8*NSEGS-1), 2(%esp) -  lgdt 2(%esp) -  addl $8, %esp - -  movl $SEG_KDATA, %eax  // data segment selector -  movw %ax, %ds -  movw %ax, %es -  movw %ax, %ss -  movl $0, %eax  // null segment selector -  movw %ax, %fs -  movw %ax, %gs - -  ret +	# qemu -kernel starts at 0x1000. the instructions +        # there seem to be provided by qemu, as if it +        # were a ROM. the code at 0x1000 jumps to +        # 0x8000000, the _start function here, +        # in machine mode. +.section .data +.globl stack0 +.section .text +.globl mstart +.section .text +.globl _entry +_entry: +	# set up a stack for C; stack0 is declared in start. +        la sp, stack0 +        addi sp, sp, 1024 +        addi sp, sp, 1024 +        addi sp, sp, 1024 +        addi sp, sp, 1024 +	# jump to mstart() in start.c +        call mstart +junk: +        j junk @@ -19,8 +19,8 @@ exec(char *path, char **argv)    struct inode *ip;    struct proghdr ph;    pde_t *pgdir, *oldpgdir; -  struct proc *curproc = myproc(); -  uint64 oldsz = curproc->sz; +  struct proc *p = myproc(); +  uint64 oldsz = p->sz;    begin_op(); @@ -85,8 +85,8 @@ exec(char *path, char **argv)    ustack[1] = argc;    ustack[2] = sp - (argc+1)*sizeof(uint64);  // argv pointer -  curproc->sf->rdi = argc; -  curproc->sf->rsi = sp - (argc+1)*sizeof(uint64); +  p->sf->rdi = argc; +  p->sf->rsi = sp - (argc+1)*sizeof(uint64);    sp -= (3+argc+1) * sizeof(uint64);    if(copyout(pgdir, sp, ustack, (3+argc+1)*sizeof(uint64)) < 0) @@ -96,15 +96,15 @@ exec(char *path, char **argv)    for(last=s=path; *s; s++)      if(*s == '/')        last = s+1; -  safestrcpy(curproc->name, last, sizeof(curproc->name)); +  safestrcpy(p->name, last, sizeof(p->name));    // Commit to the user image. -  oldpgdir = curproc->pgdir; -  curproc->pgdir = pgdir; -  curproc->sz = sz; -  curproc->sf->rcx = elf.entry;  // main -  curproc->sf->rsp = sp; -  switchuvm(curproc); +  oldpgdir = p->pgdir; +  p->pgdir = pgdir; +  p->sz = sz; +  p->sf->rcx = elf.entry;  // main +  p->sf->rsp = sp; +  switchuvm(p);    freevm(oldpgdir, oldsz);    return 0; @@ -2,22 +2,20 @@  # This code runs in user space.  #include "syscall.h" -#include "traps.h" -  # exec(init, argv)  .globl start  start: -  mov $init, %rdi -  mov $argv, %rsi -  mov $SYS_exec, %rax   -  syscall +        la a0, init +        la a1, argv +        li a7, SYS_exec +        ecall  # for(;;) exit();  exit: -  mov $SYS_exit, %rax -  syscall -  jmp exit +        li a7, SYS_exit +        ecall +        jal exit  # char init[] = "/init\0";  init: @@ -28,4 +26,3 @@ init:  argv:    .long init    .long 0 - @@ -3,13 +3,14 @@  // and pipe buffers. Allocates 4096-byte pages.  #include "types.h" -#include "defs.h"  #include "param.h"  #include "memlayout.h" -#include "mmu.h"  #include "spinlock.h" +#include "riscv.h" +#include "defs.h" + +void freerange(void *pa_start, void *pa_end); -void freerange(void *vstart, void *vend);  extern char end[]; // first address after kernel loaded from ELF file                     // defined by the kernel linker script in kernel.ld @@ -19,36 +20,22 @@ struct run {  struct {    struct spinlock lock; -  int use_lock;    struct run *freelist;  } kmem; -// Initialization happens in two phases. -// 1. main() calls kinit1() while still using entrypgdir to place just -// the pages mapped by entrypgdir on free list. -// 2. main() calls kinit2() with the rest of the physical pages -// after installing a full page table that maps them on all cores.  void -kinit1(void *vstart, void *vend) +kinit()  {    initlock(&kmem.lock, "kmem"); -  kmem.use_lock = 0; -  freerange(vstart, vend); -} - -void -kinit2(void *vstart, void *vend) -{ -  freerange(vstart, vend); -  kmem.use_lock = 1; +  freerange(end, (void*)PHYSTOP);  }  void -freerange(void *vstart, void *vend) +freerange(void *pa_start, void *pa_end)  {    char *p; -  p = (char*)PGROUNDUP((uint64)vstart); -  for(; p + PGSIZE <= (char*)vend; p += PGSIZE) +  p = (char*)PGROUNDUP((uint64)pa_start); +  for(; p + PGSIZE <= (char*)pa_end; p += PGSIZE)      kfree(p);  }  //PAGEBREAK: 21 @@ -57,42 +44,37 @@ freerange(void *vstart, void *vend)  // call to kalloc().  (The exception is when  // initializing the allocator; see kinit above.)  void -kfree(char *v) +kfree(void *pa)  {    struct run *r; -  if((uint64)v % PGSIZE || v < end || V2P(v) >= PHYSTOP) +  if(((uint64)pa % PGSIZE) != 0 || (char*)pa < end || (uint64)pa >= PHYSTOP)      panic("kfree");    // Fill with junk to catch dangling refs. -  memset(v, 1, PGSIZE); +  memset(pa, 1, PGSIZE); -  if(kmem.use_lock) -    acquire(&kmem.lock); -  r = (struct run*)v; +  acquire(&kmem.lock); +  r = (struct run*)pa;    r->next = kmem.freelist;    kmem.freelist = r; -  if(kmem.use_lock) -    release(&kmem.lock); +  release(&kmem.lock);  }  // Allocate one 4096-byte page of physical memory.  // Returns a pointer that the kernel can use.  // Returns 0 if the memory cannot be allocated. -char* +void *  kalloc(void)  {    struct run *r; -  if(kmem.use_lock) -    acquire(&kmem.lock); +  acquire(&kmem.lock);    r = kmem.freelist;    if(r)      kmem.freelist = r->next; -  if(kmem.use_lock) -    release(&kmem.lock); -  if(r != 0 && (uint64) r < KERNBASE) -    panic("kalloc"); -  return (char*)r; +  release(&kmem.lock); +  memset((char*)r, 5, PGSIZE); // fill with junk +  return (void*)r;  } @@ -1,50 +1,33 @@ -OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64") -OUTPUT_ARCH(i386:x86-64) +OUTPUT_ARCH( "riscv" ) +ENTRY( _entry )  SECTIONS  { -	. = 0xFFFFFF0000100000; -	PROVIDE(text = .); -	.text : AT(0x100000) { -		*(.text .stub .text.* .gnu.linkonce.t.*) -	} -	.rodata : { -		*(.rodata .rodata.* .gnu.linkonce.r.*) -	} +  /* +   * ensure that entry.S / _entry is at 0x80000000, +   * where qemu's -kernel jumps. +   */ +  . = 0x80000000; +  .text : +  { +    *(.text) +    . = ALIGN(0x1000); +    *(trampoline) +  } -	/* Include debugging information in kernel memory */ -	.stab : { -		PROVIDE(__STAB_BEGIN__ = .); -		*(.stab); -		PROVIDE(__STAB_END__ = .); -		BYTE(0)		/* Force the linker to allocate space -				   for this section */ -	} +  . = ALIGN(0x1000); +  PROVIDE(etext = .); -	.stabstr : { -		PROVIDE(__STABSTR_BEGIN__ = .); -		*(.stabstr); -		PROVIDE(__STABSTR_END__ = .); -		BYTE(0)		/* Force the linker to allocate space -				   for this section */ -	} +  /* +   * make sure end is after data and bss. +   */ +  .data : { +    *(.data) +  } +  bss : { +    *(.bss) +  } -	. = ALIGN(0x1000); - -        /* Conventionally, Unix linkers provide pseudo-symbols -         * etext, edata, and end, at the end of the text, data, and bss. -         * For the kernel mapping, we need the address at the beginning -         * of the data section, but that's not one of the conventional -         * symbols, because the convention started before there was a -         * read-only rodata section between text and data. */ -        PROVIDE(data = .); -	.data : { -		*(.data) -	} -	bss : { -		PROVIDE(edata = .); -		*(.bss) -		*(COMMON) -		PROVIDE(end = .); -	} +  . = ALIGN(0x1000); +  PROVIDE(end = .);  } @@ -1,105 +1,28 @@  #include "types.h" -#include "defs.h"  #include "param.h"  #include "memlayout.h" -#include "mmu.h" -#include "proc.h" -#include "x86.h" - -extern pde_t *kpgdir; -extern char end[]; // first address after kernel loaded from ELF file - -static void mpmain(void)  __attribute__((noreturn)); -static void startothers(void); - +#include "riscv.h" +#include "defs.h"  // Bootstrap processor starts running C code here.  // Allocate a real stack and switch to it, first  // doing some setup required for memory allocator to work. -int -main(uint64 mbmagic, uint64 mbaddr) +void +main()  { -  if(mbmagic != 0x2badb002) -       panic("multiboot header not found"); - -  kinit1(end, P2V(4*1024*1024)); // phys page allocator -  kvmalloc();      // kernel page table -  mpinit();        // detect other processors -  lapicinit();     // interrupt controller -  seginit();       // segment descriptors -  picinit();       // disable pic -  ioapicinit();    // another interrupt controller -  consoleinit();   // console hardware    uartinit();      // serial port -  pinit();         // process table -  tvinit();        // trap vectors +  consoleinit(); +  printf("entering main()\n"); +  kinit();         // physical page allocator +  kvminit();       // kernel page table +  procinit();      // process table +  trapinit();      // trap vectors +#if 0    binit();         // buffer cache    fileinit();      // file table    ideinit();       // disk -   -  startothers();   // start other processors -  -  kinit2(P2V(4*1024*1024), P2V(PHYSTOP)); // must come after startothers() +#endif    userinit();      // first user process -  mpmain();         -  return 0; -} - -extern struct cpu* getmycpu(); - -// Common CPU setup code. -static void -mpmain(void) -{ -  cprintf("cpu%d: starting %d\n", cpuid(), cpuid()); -  idtinit();       // load idt register -  xchg(&(mycpu()->started), 1); // tell startothers() we're up -  scheduler();     // start running processes -} -// AP processors jump here from entryother.S. -void -apmain(void) -{ -  switchkvm(); -  seginit(); -  lapicinit(); -  mpmain(); -} - -void apstart(void); - -// Start the non-boot (AP) processors. -static void -startothers(void) -{ -  extern uchar _binary_entryother_start[], _binary_entryother_size[]; -  uchar *code; -  struct cpu *c; -  char *stack; - -  // Write entry code to unused memory at 0x7000. -  // The linker has placed the image of entryother.S in -  // _binary_entryother_start. -  code = P2V(0x7000); -  memmove(code, _binary_entryother_start, (uint64)_binary_entryother_size); - -  for(c = cpus; c < cpus+ncpu; c++){ -    if(c == mycpu())  // We've started already. -      continue; - -    // Tell entryother.S what stack to use, where to enter, and what -    // pgdir to use. We cannot use kpgdir yet, because the AP processor -    // is running in low  memory, so we use entrypgdir for the APs too. -    stack = kalloc(); -    *(uint32*)(code-4) = V2P(apstart); -    *(uint64*)(code-12) = (uint64) (stack+KSTACKSIZE); - -    lapicstartap(c->apicid, V2P(code)); - -    // wait for cpu to finish mpmain() -    while(c->started == 0) -      ; -  } +  scheduler();          } - diff --git a/memlayout.h b/memlayout.h index 87818d3..798621e 100644 --- a/memlayout.h +++ b/memlayout.h @@ -1,16 +1,25 @@ -// Memory layout +// Physical memory layout -#define EXTMEM  0x100000            // Start of extended memory -#define PHYSTOP 0xE000000           // Top physical memory -#define DEVSPACE 0xFE000000         // Other devices are top of 32-bit address space -#define DEVSPACETOP 0x100000000 +// qemu -machine virt is set up like this: +// 00001000 -- boot ROM, provided by qemu +// 10000000 -- uart0 registers +// 80000000 -- boot ROM jumps here in machine mode +// unused RAM after 80000000. -// Key addresses for address space layout (see kmap in vm.c for layout) -#define KERNBASE  0xFFFFFF0000000000  // First kernel virtual address -#define KERNLINK (KERNBASE+EXTMEM)  // Address where kernel is linked +// the kernel uses physical memory thus: +// 80000000 -- entry.S, then kernel text and data +// end -- start of kernel page allocation area +// PHYSTOP -- end RAM used by the kernel -#define V2P(a) (((uint64) (a)) - KERNBASE) -#define P2V(a) ((void *)(((char *) (a)) + KERNBASE)) +// registers start here in physical memory. +#define UART0 0x10000000L -#define V2P_WO(x) ((x) - KERNBASE)    // same as V2P, but without casts -#define P2V_WO(x) ((x) + KERNBASE)    // same as P2V, but without casts +// the kernel expects there to be RAM +// for use by the kernel and user pages +// from physical address 0x80000000 to PHYSTOP. +#define KERNBASE 0x80000000L +#define PHYSTOP (KERNBASE + 64*1024*1024) + +// map the trampoline page to the highest address, +// in both user and kernel space. +#define TRAMPOLINE (MAXVA - PGSIZE) @@ -1,160 +0,0 @@ -// This file contains definitions for the -// x86 memory management unit (MMU). - -// Eflags register -#define FL_TF           0x00000100      // Trap Flag -#define FL_IF           0x00000200      // Interrupt Enable - - -// Control Register flags -#define CR0_PE          0x00000001      // Protection Enable -#define CR0_WP          0x00010000      // Write Protect -#define CR0_PG          0x80000000      // Paging - -#define CR4_PSE         0x00000010      // Page size extension - -// Segment selectors (indexes) in our GDTs. -// Defined by our convention, not the architecture. -#define SEG_KCODE32 (1<<3)  // kernel 32-bit code segment -#define SEG_KCODE   (2<<3)  // kernel code segment -#define SEG_KDATA   (3<<3)  // kernel data segment -#define SEG_TSS     (4<<3)  // tss segment - takes two slots -#define SEG_UDATA   (6<<3)  // user data segment -#define SEG_UCODE   (7<<3)  // user code segment - -#define NSEGS 8 - -#ifndef __ASSEMBLER__ -struct segdesc { -	uint16 limit0; -	uint16 base0; -	uint8 base1; -	uint8 bits; -	uint8 bitslimit1; -	uint8 base2; -}; - -// SEGDESC constructs a segment descriptor literal -// with the given, base, limit, and type bits. -#define SEGDESC(base, limit, bits) (struct segdesc){ \ -	(limit)&0xffff, (base)&0xffff, \ -	((base)>>16)&0xff, \ -	(bits)&0xff, \ -	(((bits)>>4)&0xf0) | ((limit>>16)&0xf), \ -	((base)>>24)&0xff, \ -} - -// SEGDESCHI constructs an extension segment descriptor -// literal that records the high bits of base. -#define SEGDESCHI(base) (struct segdesc) {                        \ -  (((base)>>32)&0xffff), (((base)>>48)&0xffff), \ -} - -#endif - -#define DPL_USER    0x3     // User DPL - -#define SEG_A      (1<<0)      // segment accessed bit  -#define SEG_R      (1<<1)      // readable (code)  -#define SEG_W      (1<<1)      // writable (data)  -#define SEG_C      (1<<2)      // conforming segment (code)  -#define SEG_E      (1<<2)      // expand-down bit (data)  -#define SEG_CODE   (1<<3)      // code segment (instead of data)  - -// User and system segment bits. -#define SEG_S      (1<<4)      // if 0, system descriptor  -#define SEG_DPL(x) ((x)<<5)    // descriptor privilege level (2 bits)  -#define SEG_P      (1<<7)      // segment present  -#define SEG_AVL    (1<<8)      // available for operating system use  -#define SEG_L      (1<<9)      // long mode  -#define SEG_D      (1<<10)     // default operation size 32-bit  -#define SEG_G      (1<<11)     // granularity  - -// Application segment type bits -#define STA_X       0x8     // Executable segment -#define STA_W       0x2     // Writeable (non-executable segments) -#define STA_R       0x2     // Readable (executable segments) - -// System segment type bits -#define SEG_LDT    (2<<0)      // local descriptor table  -#define SEG_TSS64A (9<<0)      // available 64-bit TSS  -#define SEG_TSS64B (11<<0)     // busy 64-bit TSS  -#define SEG_CALL64 (12<<0)     // 64-bit call gate  -#define SEG_INTR64 (14<<0)     // 64-bit interrupt gate  -#define SEG_TRAP64 (15<<0)     // 64-bit trap gate  - -// A virtual address 'la' has a six-part structure as follows: -// -// +--16--+---9---+------9-------+-----9----+----9-------+----12-------+ -// | Sign | PML4  |Page Directory| Page Dir |Page Table  | Offset Page | -// |Extend| Index | Pointer Index|  Index   |  Index     | in Page     | -// +------+-------+--------------+----------+------------+-------------+ -//         L3 pgtab   L2 pgtab      L1 pgtab   L0 pgtab - -// Page directory and page table constants. -#define NPDENTRIES      512     // # directory entries per page directory -#define PGSIZE          4096    // bytes mapped by a page -#define PGSHIFT         12      // offset of PTX in a linear address - -#define PXMASK          0x1FF -#define PXSHIFT(n)	(PGSHIFT+(9*(n)))  // shift for index into level n page table -#define PX(n, va)	((((uint64) (va)) >> PXSHIFT(n)) & PXMASK) -#define L_PML4           3 -     -#define PGROUNDUP(sz)  (((sz)+PGSIZE-1) & ~(PGSIZE-1)) -#define PGROUNDDOWN(a) (((a)) & ~(PGSIZE-1)) - -// Page table/directory entry flags. -#define PTE_P           0x001   // Present -#define PTE_W           0x002   // Writeable -#define PTE_U           0x004   // User -#define PTE_PS          0x080   // Page Size -#define PTE_PWT         0x008   // Write-Through -#define PTE_PCD         0x010   // Cache-Disable - -// Address in page table or page directory entry -#define PTE_ADDR(pte)   ((uint64)(pte) & ~0xFFF) -#define PTE_FLAGS(pte)  ((uint64)(pte) &  0xFFF) - -#ifndef __ASSEMBLER__ - -typedef uint64 pte_t; - -struct taskstate { -  uint8 reserved0[4]; -  uint64 rsp[3]; -  uint64 ist[8]; -  uint8 reserved1[10]; -  uint16 iomba; -  uint8 iopb[0]; -} __attribute__ ((packed)); - -#define INT_P      (1<<7)      // interrupt descriptor present - -struct intgate -{ -	uint16 rip0; -	uint16 cs; -	uint8 reserved0; -	uint8 bits; -	uint16 rip1; -	uint32 rip2; -	uint32 reserved1; -}; - -// INTDESC constructs an interrupt descriptor literal -// that records the given code segment, instruction pointer, -// and type bits. -#define INTDESC(cs, rip, bits) (struct intgate){ \ -	(rip)&0xffff, (cs), 0, bits, ((rip)>>16)&0xffff, \ -	(uint64)(rip)>>32, 0, \ -} - -// See section 4.6 of amd64 vol2 -struct desctr -{ -  uint16 limit; -  uint64 base; -} __attribute__((packed, aligned(16)));   // important! - -#endif @@ -1,25 +0,0 @@ -// SYSCALL and SYSRET registers -#define MSR_STAR        0xc0000081 -#define MSR_LSTAR       0xc0000082 -#define MSR_CSTAR       0xc0000083 -#define MSR_SFMASK      0xc0000084 - -// GS -#define MSR_GS_BASE     0xc0000101 -#define MSR_GS_KERNBASE 0xc0000102 - -static inline uint64 -readmsr(uint32 msr) -{ -  uint32 hi, lo; -  __asm volatile("rdmsr" : "=d" (hi), "=a" (lo) : "c" (msr)); -  return ((uint64) lo) | (((uint64) hi) << 32); -} - -static inline void -writemsr(uint64 msr, uint64 val) -{ -  uint32 lo = val & 0xffffffff; -  uint32 hi = val >> 32; -  __asm volatile("wrmsr" : : "c" (msr), "a" (lo), "d" (hi) : "memory"); -} @@ -1,5 +1,4 @@  #define NPROC        64  // maximum number of processes -#define KSTACKSIZE 4096  // size of per-process kernel stack  #define NCPU          8  // maximum number of CPUs  #define NOFILE       16  // open files per process  #define NFILE       100  // open files per system @@ -1,18 +1,20 @@  #include "types.h" -#include "defs.h"  #include "param.h"  #include "memlayout.h" -#include "mmu.h" -#include "x86.h" +#include "riscv.h"  #include "proc.h"  #include "spinlock.h" +#include "defs.h"  struct {    struct spinlock lock;    struct proc proc[NPROC];  } ptable; -static struct proc *initproc; +// XXX riscv move somewhere else +struct cpu cpus[NCPU]; + +struct proc *initproc;  int nextpid = 1;  extern void forkret(void); @@ -22,57 +24,36 @@ extern void sysexit(void);  static void wakeup1(void *chan); +extern char trampstart[]; // trampoline.S +  void -pinit(void) +procinit(void)  {    initlock(&ptable.lock, "ptable");  } -// Must be called with interrupts disabled +// Must be called with interrupts disabled. +// XXX riscv  int  cpuid() { -  return mycpu()-cpus; -} - -// Must be called with interrupts disabled to avoid the caller being -// rescheduled between reading lapicid and running through the loop. -struct cpu* -getmycpu(void) -{ -  int apicid, i; -   -  if(readeflags()&FL_IF) -    panic("getmycpu called with interrupts enabled\n"); -   -  apicid = lapicid(); -  // APIC IDs are not guaranteed to be contiguous. -  for (i = 0; i < ncpu; ++i) { -    if (cpus[i].apicid == apicid) -      return &cpus[i]; -  } -  panic("unknown apicid\n"); +  return 0;  } -// Return this core's cpu struct using %gs. %gs points this core's struct -// cpu. Offet 24 in struct cpu is cpu. +// Return this core's cpu struct. +// XXX riscv  struct cpu*  mycpu(void) {    struct cpu *c; -  asm volatile("mov %%gs:24, %0" : "=r" (c)); +  c = &cpus[0];    return c;  }  // Disable interrupts so that we are not rescheduled  // while reading proc from the cpu structure +// XXX riscv  struct proc*  myproc(void) { -  struct cpu *c; -  struct proc *p; -  pushcli(); -  c = mycpu(); -  p = c->proc; -  popcli(); -  return p; +  return cpus[0].proc;  }  //PAGEBREAK: 32 @@ -84,7 +65,6 @@ static struct proc*  allocproc(void)  {    struct proc *p; -  char *sp;    acquire(&ptable.lock); @@ -101,56 +81,73 @@ found:    release(&ptable.lock); -  // Allocate kernel stack. +  // Allocate a page for the kernel stack.    if((p->kstack = kalloc()) == 0){      p->state = UNUSED;      return 0;    } -  sp = p->kstack + KSTACKSIZE; -  // Leave room for syscall frame. -  sp -= sizeof *p->sf; +  // Allocate a trapframe page. +  if((p->tf = (struct trapframe *)kalloc()) == 0){ +    p->state = UNUSED; +    return 0; +  } -  if ((uint64) sp % 16) -    panic("misaligned sp"); +  // An empty user page table. +  p->pagetable = uvmcreate(); -  p->sf = (struct sysframe*)sp; +  // map the trampoline code (for system call return) +  // at the highest user virtual address. +  // only the supervisor uses it, on the way +  // to/from user space, so not PTE_U. +  mappages(p->pagetable, TRAMPOLINE, PGSIZE, +           (uint64)trampstart, PTE_R | PTE_X); -  // Set up new context to start executing at forkret, -  // which returns to sysexit. -  sp -= sizeof(uint64); -  *(uint64*)sp = (uint64)sysexit; +  // map the trapframe, for trampoline.S. +  mappages(p->pagetable, (TRAMPOLINE - PGSIZE), PGSIZE, +           (uint64)(p->tf), PTE_R | PTE_W); -  sp -= sizeof *p->context; -  p->context = (struct context*)sp; -  memset(p->context, 0, sizeof *p->context); -  p->context->rip = (uint64)forkret; +  // Set up new context to start executing at forkret, +  // which returns to user space. +  memset(&p->context, 0, sizeof p->context); +  p->context.ra = (uint64)forkret; +  p->context.sp = (uint64)p->kstack + PGSIZE;    return p;  } +// XXX hack because I don't know how to incorporate initcode +// into the kernel binary. just the exec system call, no arguments. +// manually copied from initcode.asm. +unsigned char initcode[] = { +  0x85, 0x48,             // li a7, 1 -- SYS_fork +  0x73, 0x00, 0x00, 0x00, // ecall +  0x8d, 0x48,             // li a7, 3 -- SYS_wait +  0x73, 0x00, 0x00, 0x00, // ecall +  0x89, 0x48,             // li a7, 2 -- SYS_exit +  0x73, 0x00, 0x00, 0x00, // ecall +}; +  //PAGEBREAK: 32  // Set up first user process.  void  userinit(void)  {    struct proc *p; -  extern char _binary_initcode_start[], _binary_initcode_size[];    p = allocproc(); -      initproc = p; -  if((p->pgdir = setupkvm()) == 0) -    panic("userinit: out of memory?"); -  inituvm(p->pgdir, _binary_initcode_start, (uint64)_binary_initcode_size); +   +  uvminit(p->pagetable, initcode, sizeof(initcode));    p->sz = PGSIZE; -  memset(p->sf, 0, sizeof(*p->sf)); -  p->sf->r11 = FL_IF; -  p->sf->rsp = PGSIZE; -  p->sf->rcx = 0;  // beginning of initcode.S + +  // prepare for the very first kernel->user. +  p->tf->epc = 0; +  p->tf->sp = PGSIZE;    safestrcpy(p->name, "initcode", sizeof(p->name)); -  p->cwd = namei("/"); +  // XXX riscv +  //p->cwd = namei("/");    // this assignment to p->state lets other cores    // run this process. the acquire forces the above @@ -163,62 +160,65 @@ userinit(void)    release(&ptable.lock);  } +#if 0 +  // Grow current process's memory by n bytes.  // Return 0 on success, -1 on failure.  int  growproc(int n)  {    uint sz; -  struct proc *curproc = myproc(); +  struct proc *p = myproc(); -  sz = curproc->sz; +  sz = p->sz;    if(n > 0){ -    if((sz = allocuvm(curproc->pgdir, sz, sz + n)) == 0) +    if((sz = allocuvm(p->pagetable, sz, sz + n)) == 0)        return -1;    } else if(n < 0){ -    if((sz = deallocuvm(curproc->pgdir, sz, sz + n)) == 0) +    if((sz = uvmdealloc(p->pagetable, sz, sz + n)) == 0)        return -1;    } -  curproc->sz = sz; -  switchuvm(curproc); +  p->sz = sz; +  switchuvm(p);    return 0;  } +#endif -// Create a new process copying p as the parent. -// Sets up stack to return as if from system call. -// Caller must set state of returned proc to RUNNABLE. +// Create a new process, copying p as the parent. +// Sets up child kernel stack to return as if from system call.  int  fork(void)  {    int i, pid;    struct proc *np; -  struct proc *curproc = myproc(); +  struct proc *p = myproc();    // Allocate process.    if((np = allocproc()) == 0){      return -1;    } -  // Copy process state from proc. -  if((np->pgdir = copyuvm(curproc->pgdir, curproc->sz)) == 0){ -    kfree(np->kstack); -    np->kstack = 0; -    np->state = UNUSED; -    return -1; -  } -  np->sz = curproc->sz; -  np->parent = curproc; -  *np->sf = *curproc->sf; +  // Copy user memory from parent to child. +  uvmcopy(p->pagetable, np->pagetable, p->sz); +  np->sz = p->sz; -  // Clear %eax so that fork returns 0 in the child. -  np->sf->rax = 0; +  np->parent = p; +  // copy saved user registers. +  *(np->tf) = *(p->tf); + +  // Cause fork to return 0 in the child. +  np->tf->a0 = 0; + +#if 0 // XXX riscv +  // increment reference counts on open file descriptors.    for(i = 0; i < NOFILE; i++) -    if(curproc->ofile[i]) -      np->ofile[i] = filedup(curproc->ofile[i]); -  np->cwd = idup(curproc->cwd); +    if(p->ofile[i]) +      np->ofile[i] = filedup(p->ofile[i]); +  np->cwd = idup(p->cwd); +#endif -  safestrcpy(np->name, curproc->name, sizeof(curproc->name)); +  safestrcpy(np->name, p->name, sizeof(p->name));    pid = np->pid; @@ -233,46 +233,48 @@ fork(void)  // Exit the current process.  Does not return.  // An exited process remains in the zombie state -// until its parent calls wait() to find out it exited. +// until its parent calls wait().  void  exit(void)  { -  struct proc *curproc = myproc(); -  struct proc *p; +  struct proc *p = myproc(); +  struct proc *pp;    int fd; -  if(curproc == initproc) +  if(p == initproc)      panic("init exiting"); +#if 0 // XXX riscv    // Close all open files.    for(fd = 0; fd < NOFILE; fd++){ -    if(curproc->ofile[fd]){ -      fileclose(curproc->ofile[fd]); -      curproc->ofile[fd] = 0; +    if(p->ofile[fd]){ +      fileclose(p->ofile[fd]); +      p->ofile[fd] = 0;      }    }    begin_op(); -  iput(curproc->cwd); +  iput(p->cwd);    end_op(); -  curproc->cwd = 0; +#endif +  p->cwd = 0;    acquire(&ptable.lock);    // Parent might be sleeping in wait(). -  wakeup1(curproc->parent); +  wakeup1(p->parent);    // Pass abandoned children to init. -  for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){ -    if(p->parent == curproc){ -      p->parent = initproc; -      if(p->state == ZOMBIE) +  for(pp = ptable.proc; pp < &ptable.proc[NPROC]; pp++){ +    if(pp->parent == p){ +      pp->parent = initproc; +      if(pp->state == ZOMBIE)          wakeup1(initproc);      }    }    // Jump into the scheduler, never to return. -  curproc->state = ZOMBIE; +  p->state = ZOMBIE;    sched();    panic("zombie exit");  } @@ -282,42 +284,47 @@ exit(void)  int  wait(void)  { -  struct proc *p; +  struct proc *np;    int havekids, pid; -  struct proc *curproc = myproc(); +  struct proc *p = myproc();    acquire(&ptable.lock);    for(;;){      // Scan through table looking for exited children.      havekids = 0; -    for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){ -      if(p->parent != curproc) +    for(np = ptable.proc; np < &ptable.proc[NPROC]; np++){ +      if(np->parent != p)          continue;        havekids = 1; -      if(p->state == ZOMBIE){ +      if(np->state == ZOMBIE){          // Found one. -        pid = p->pid; -        kfree(p->kstack); -        p->kstack = 0; -        freevm(p->pgdir, p->sz); -         p->pid = 0; -        p->parent = 0; -        p->name[0] = 0; -        p->killed = 0; -        p->state = UNUSED; +        pid = np->pid; +        kfree(np->kstack); +        np->kstack = 0; +        kfree((void*)np->tf); +        np->tf = 0; +        unmappages(np->pagetable, TRAMPOLINE, PGSIZE, 0); +        unmappages(np->pagetable, TRAMPOLINE-PGSIZE, PGSIZE, 0); +        uvmfree(np->pagetable, np->sz); +        np->pagetable = 0; +        np->pid = 0; +        np->parent = 0; +        np->name[0] = 0; +        np->killed = 0; +        np->state = UNUSED;          release(&ptable.lock);          return pid;        }      }      // No point waiting if we don't have any children. -    if(!havekids || curproc->killed){ +    if(!havekids || p->killed){        release(&ptable.lock);        return -1;      }      // Wait for children to exit.  (See wakeup1 call in proc_exit.) -    sleep(curproc, &ptable.lock);  //DOC: wait-sleep +    sleep(p, &ptable.lock);  //DOC: wait-sleep    }  } @@ -338,7 +345,8 @@ scheduler(void)    c->proc = 0;    for(;;){      // Enable interrupts on this processor. -    sti(); +    // XXX riscv +    //sti();      // Loop over process table looking for process to run.      acquire(&ptable.lock); @@ -350,11 +358,11 @@ scheduler(void)        // to release ptable.lock and then reacquire it        // before jumping back to us.        c->proc = p; -      switchuvm(p);        p->state = RUNNING; -      swtch(&(c->scheduler), p->context); -      switchkvm(); +      printf("switch...\n"); +      swtch(&c->scheduler, &p->context); +      printf("switch returned\n");        // Process is done running for now.        // It should have changed its p->state before coming back. @@ -380,14 +388,10 @@ sched(void)    if(!holding(&ptable.lock))      panic("sched ptable.lock"); -  if(mycpu()->ncli != 1) -    panic("sched locks");    if(p->state == RUNNING)      panic("sched running"); -  if(readeflags()&FL_IF) -    panic("sched interruptible");    intena = mycpu()->intena; -  swtch(&p->context, mycpu()->scheduler); +  swtch(&p->context, &mycpu()->scheduler);    mycpu()->intena = intena;  } @@ -402,24 +406,29 @@ yield(void)  }  // A fork child's very first scheduling by scheduler() -// will swtch here.  "Return" to user space. +// will swtch to forkret.  void  forkret(void)  { +  struct proc *p = myproc(); +    static int first = 1;    // Still holding ptable.lock from scheduler.    release(&ptable.lock); +  printf("entering forkret\n"); +    if (first) {      // Some initialization functions must be run in the context      // of a regular process (e.g., they call sleep), and thus cannot      // be run from main().      first = 0; -    iinit(ROOTDEV); -    initlog(ROOTDEV); +    // XXX riscv +    //iinit(ROOTDEV); +    //initlog(ROOTDEV);    } -   -  // Return to "caller", actually trapret (see allocproc). + +  usertrapret();  }  // Atomically release lock and sleep on chan. @@ -483,6 +492,8 @@ wakeup(void *chan)    release(&ptable.lock);  } +#if 0 +  // Kill the process with the given pid.  // Process won't exit until it returns  // to user space (see trap in trap.c). @@ -533,12 +544,14 @@ procdump(void)        state = states[p->state];      else        state = "???"; -    cprintf("%d %s %s", p->pid, state, p->name); +    printf("%d %s %s", p->pid, state, p->name);      if(p->state == SLEEPING){        getcallerpcs((uint64*)p->context->rbp+2, pc);        for(i=0; i<10 && pc[i] != 0; i++) -        cprintf(" %p", pc[i]); +        printf(" %p", pc[i]);      } -    cprintf("\n"); +    printf("\n");    }  } + +#endif @@ -1,13 +1,30 @@ +// Saved registers for kernel context switches. +struct context { +  uint64 ra; +  uint64 sp; + +  // callee-saved +  uint64 s0; +  uint64 s1; +  uint64 s2; +  uint64 s3; +  uint64 s4; +  uint64 s5; +  uint64 s6; +  uint64 s7; +  uint64 s8; +  uint64 s9; +  uint64 s10; +  uint64 s11; +}; +  // Per-CPU state  struct cpu {    uint64 syscallno;            // Temporary used by sysentry    uint64 usp;                  // Temporary used by sysentry    struct proc *proc;           // The process running on this cpu or null    struct cpu *cpu;             // XXX -  uchar apicid;                // Local APIC ID -  struct context *scheduler;   // swtch() here to enter scheduler -  struct taskstate ts;         // Used by x86 to find stack for interrupt -  struct segdesc gdt[NSEGS];   // x86 global descriptor table +  struct context scheduler;   // swtch() here to enter scheduler    volatile uint started;       // Has the CPU started?    int ncli;                    // Depth of pushcli nesting.    int intena;                  // Were interrupts enabled before pushcli? @@ -17,39 +34,52 @@ extern struct cpu cpus[NCPU];  extern int ncpu;  //PAGEBREAK: 17 -// Saved registers for kernel context switches. -// Don't need to save all the segment registers (%cs, etc), -// because they are constant across kernel contexts. -// Don't need to save %eax, %ecx, %edx, because the -// x86 convention is that the caller has saved them. -// Contexts are stored at the bottom of the stack they -// describe; the stack pointer is the address of the context. -// The layout of the context matches the layout of the stack in swtch.S -// at the "Switch stacks" comment. Switch doesn't save eip explicitly, -// but it is on the stack and allocproc() manipulates it. -struct context { -  uint64 r15; -  uint64 r14; -  uint64 r13; -  uint64 r12; -  uint64 r11; -  uint64 rbx; -  uint64 rbp; -  uint64 rip; + +// per-process data for the early trap handling code in trampoline.S. +// sits in a page by itself just under the trampoline page in the +// user page table. not specially mapped in the kernel page table. +// the sscratch register points here. +// trampoline.S saves user registers, then restores kernel_sp and +// kernel_satp. +// no need to save s0-s11 (callee-saved) since C code and swtch() save them. +struct trapframe { +  /*   0 */ uint64 kernel_satp; +  /*   8 */ uint64 kernel_sp; +  /*  16 */ uint64 kernel_trap; // address of trap() +  /*  24 */ uint64 epc; // saved user program counter +  /*  32 */ uint64 ra; +  /*  40 */ uint64 sp; +  /*  48 */ uint64 gp; +  /*  56 */ uint64 tp; +  /*  64 */ uint64 t0; +  /*  72 */ uint64 t1; +  /*  80 */ uint64 t2; +  /*  88 */ uint64 a0; +  /*  96 */ uint64 a1; +  /* 104 */ uint64 a2; +  /* 112 */ uint64 a3; +  /* 120 */ uint64 a4; +  /* 128 */ uint64 a5; +  /* 136 */ uint64 a6; +  /* 144 */ uint64 a7; +  /* 152 */ uint64 t3; +  /* 160 */ uint64 t4; +  /* 168 */ uint64 t5; +  /* 176 */ uint64 t6;  };  enum procstate { UNUSED, EMBRYO, SLEEPING, RUNNABLE, RUNNING, ZOMBIE };  // Per-process state  struct proc { -  char *kstack;                // Bottom of kernel stack for this process, must be first entry +  char *kstack;                // Bottom of kernel stack for this process    uint64 sz;                   // Size of process memory (bytes) -  pde_t* pgdir;                // Page table +  pagetable_t pagetable;       // Page table    enum procstate state;        // Process state    int pid;                     // Process ID    struct proc *parent;         // Parent process -  struct sysframe *sf;         // Syscall frame for current syscall -  struct context *context;     // swtch() here to run process +  struct trapframe *tf;        // data page for trampoline.S +  struct context context;      // swtch() here to run process    void *chan;                  // If non-zero, sleeping on chan    int killed;                  // If non-zero, have been killed    struct file *ofile[NOFILE];  // Open files @@ -0,0 +1,172 @@ +// Machine Status Register, mstatus + +#define MSTATUS_MPP_MASK (3L << 11) +#define MSTATUS_MPP_M (3L << 11) +#define MSTATUS_MPP_S (1L << 11) +#define MSTATUS_MPP_U (0L << 11) + +static inline uint64 +r_mstatus() +{ +  uint64 x; +  asm("csrr %0, mstatus" : "=r" (x) ); +  return x; +} + +static inline void  +w_mstatus(uint64 x) +{ +  asm("csrw mstatus, %0" : : "r" (x)); +} + +// machine exception program counter, holds the +// instruction address to which a return from +// exception will go. +static inline void  +w_mepc(uint64 x) +{ +  asm("csrw mepc, %0" : : "r" (x)); +} + +// Supervisor Status Register, sstatus + +#define SSTATUS_SPP (1L << 8) // 1=Supervisor, 0=User + +static inline uint64 +r_sstatus() +{ +  uint64 x; +  asm("csrr %0, sstatus" : "=r" (x) ); +  return x; +} + +static inline void  +w_sstatus(uint64 x) +{ +  asm("csrw sstatus, %0" : : "r" (x)); +} + +// machine exception program counter, holds the +// instruction address to which a return from +// exception will go. +static inline void  +w_sepc(uint64 x) +{ +  asm("csrw sepc, %0" : : "r" (x)); +} + +static inline uint64 +r_sepc() +{ +  uint64 x; +  asm("csrr %0, sepc" : "=r" (x) ); +  return x; +} + +// Machine Exception Delegation +static inline uint64 +r_medeleg() +{ +  uint64 x; +  asm("csrr %0, medeleg" : "=r" (x) ); +  return x; +} + +static inline void  +w_medeleg(uint64 x) +{ +  asm("csrw medeleg, %0" : : "r" (x)); +} + +// Machine Interrupt Delegation +static inline uint64 +r_mideleg() +{ +  uint64 x; +  asm("csrr %0, mideleg" : "=r" (x) ); +  return x; +} + +static inline void  +w_mideleg(uint64 x) +{ +  asm("csrw mideleg, %0" : : "r" (x)); +} + +// Supervisor Trap-Vector Base Address +// low two bits are mode. +static inline void  +w_stvec(uint64 x) +{ +  asm("csrw stvec, %0" : : "r" (x)); +} + +// use riscv's sv39 page table scheme. +#define SATP_SV39 (8L << 60) + +#define MAKE_SATP(pagetable) (SATP_SV39 | (((uint64)pagetable) >> 12)) + +// supervisor address translation and protection; +// holds the address of the page table. +static inline void  +w_satp(uint64 x) +{ +  asm("csrw satp, %0" : : "r" (x)); +} + +static inline uint64 +r_satp() +{ +  uint64 x; +  asm("csrr %0, satp" : "=r" (x) ); +  return x; +} + +// Supervisor Scratch register, for early trap handler in trampoline.S. +static inline void  +w_sscratch(uint64 x) +{ +  asm("csrw sscratch, %0" : : "r" (x)); +} + +// Supervisor trap cause +static inline uint64 +r_scause() +{ +  uint64 x; +  asm("csrr %0, scause" : "=r" (x) ); +  return x; +} + +#define PGSIZE 4096 // bytes per page +#define PGSHIFT 12  // bits of offset within a page + +#define PGROUNDUP(sz)  (((sz)+PGSIZE-1) & ~(PGSIZE-1)) +#define PGROUNDDOWN(a) (((a)) & ~(PGSIZE-1)) + +#define PTE_V (1L << 0) // valid +#define PTE_R (1L << 1) +#define PTE_W (1L << 2) +#define PTE_X (1L << 3) +#define PTE_U (1L << 4) // 1 -> user can access + +// shift a physical address to the right place for a PTE. +#define PA2PTE(pa) ((((uint64)pa) >> 12) << 10) + +#define PTE2PA(pte) (((pte) >> 10) << 12) + +#define PTE_FLAGS(pte) ((pte) & (PTE_V|PTE_R|PTE_W|PTE_X|PTE_U)) + +// extract the three 9-bit page table indices from a virtual address. +#define PXMASK          0x1FF // 9 bits +#define PXSHIFT(level)  (PGSHIFT+(9*(level))) +#define PX(level, va) ((((uint64) (va)) >> PXSHIFT(level)) & PXMASK) + +// one beyond the highest possible virtual address. +// MAXVA is actually one bit less than the max allowed by +// Sv39, to avoid having to sign-extend virtual addresses +// that have the high bit set. +#define MAXVA (1L << (9 + 9 + 9 + 12 - 1)) + +typedef uint64 pte_t; +typedef uint64 *pagetable_t; // 512 PTEs @@ -1,13 +1,11 @@  // Mutual exclusion spin locks.  #include "types.h" -#include "defs.h"  #include "param.h" -#include "x86.h"  #include "memlayout.h" -#include "mmu.h" -#include "proc.h"  #include "spinlock.h" +#include "riscv.h" +#include "defs.h"  void  initlock(struct spinlock *lk, char *name) @@ -17,6 +15,27 @@ initlock(struct spinlock *lk, char *name)    lk->cpu = 0;  } +void +acquire(struct spinlock *lk) +{ +  lk->locked = 1; +  lk->cpu = mycpu(); +} + +void +release(struct spinlock *lk) +{ +  lk->locked = 0; +  lk->cpu = 0; +} + +int +holding(struct spinlock *lk) +{ +  return lk->locked && lk->cpu == mycpu(); +} + +#if 0  // Acquire the lock.  // Loops (spins) until the lock is acquired.  // Holding a lock for a long time may cause @@ -37,7 +56,7 @@ acquire(struct spinlock *lk)    // references happen after the lock is acquired.    __sync_synchronize(); -  // Record info about lock acquisition for debugging. +  // Record info about lock acquisition for holding() and debugging.    lk->cpu = mycpu();    getcallerpcs(&lk, lk->pcs);  } @@ -87,11 +106,11 @@ getcallerpcs(void *v, uint64 pcs[])  // Check whether this cpu is holding the lock.  int -holding(struct spinlock *lock) +holding(struct spinlock *lk)  {    int r;    pushcli(); -  r = lock->locked && lock->cpu == mycpu(); +  r = lk->locked && lk->cpu == mycpu();    popcli();    return r;  } @@ -123,4 +142,4 @@ popcli(void)    if(mycpu()->ncli == 0 && mycpu()->intena)      sti();  } - +#endif @@ -0,0 +1,34 @@ +#include "types.h" +#include "memlayout.h" +#include "riscv.h" +#include "defs.h" + +void main(); + +// entry.S uses this as the initial stack. +char stack0[4096]; + +// entry.S jumps here in machine mode on stack0. +void +mstart() +{ +  // set M Previous Privilege mode to Supervisor, for mret. +  unsigned long x = r_mstatus(); +  x &= ~MSTATUS_MPP_MASK; +  x |= MSTATUS_MPP_S; +  w_mstatus(x); + +  // set M Exception Program Counter to main, for mret. +  // requires gcc -mcmodel=medany +  w_mepc((uint64)main); + +  // disable paging for now. +  w_satp(0); + +  // delegate all interrupts and exceptions to supervisor mode. +  w_medeleg(0xffff); +  w_mideleg(0xffff); +   +  // jump to main in supervisor mode. +  asm("mret"); +} @@ -1,14 +1,13 @@  #include "types.h" -#include "x86.h"  void*  memset(void *dst, int c, uint n)  { -  if ((uint64)dst%4 == 0 && n%4 == 0){ -    c &= 0xFF; -    stosl(dst, (c<<24)|(c<<16)|(c<<8)|c, n/4); -  } else -    stosb(dst, c, n); +  char *cdst = (char *) dst; +  int i; +  for(i = 0; i < n; i++){ +    cdst[i] = c; +  }    return dst;  } @@ -1,35 +1,42 @@  # Context switch  # -#   void swtch(struct context **old, struct context *new); +#   void swtch(struct context *old, struct context *new);  #  -# Save the current registers on the stack, creating -# a struct context, and save its address in *old. -# Switch stacks to new and pop previously-saved registers. +# Save current registers in old. Load from new.	 +  .globl swtch  swtch: -  # Save old callee-saved registers -  push %rbp -  push %rbx -  push %r11 -  push %r12 -  push %r13 -  push %r14 -  push %r15 - -  # Switch stacks -  mov %rsp, (%rdi)   # first arg of swtch is in rdi -  mov %rsi, %rsp     # second arg of swtch is in rsi - -  # Load new callee-saved registers -  pop %r15 -  pop %r14 -  pop %r13 -  pop %r12 -  pop %r11 -  pop %rbx -  pop %rbp +        sd ra, 0(a0) +        sd sp, 8(a0) +        sd s0, 16(a0) +        sd s1, 24(a0) +        sd s2, 32(a0) +        sd s3, 40(a0) +        sd s4, 48(a0) +        sd s5, 56(a0) +        sd s6, 64(a0) +        sd s7, 72(a0) +        sd s8, 80(a0) +        sd s9, 88(a0) +        sd s10, 96(a0) +        sd s11, 104(a0) -  ret +        ld ra, 0(a1) +        ld sp, 8(a1) +        ld s0, 16(a1) +        ld s1, 24(a1) +        ld s2, 32(a1) +        ld s3, 40(a1) +        ld s4, 48(a1) +        ld s5, 56(a1) +        ld s6, 64(a1) +        ld s7, 72(a1) +        ld s8, 80(a1) +        ld s9, 88(a1) +        ld s10, 96(a1) +        ld s11, 104(a1) +         +        ret @@ -1,11 +1,10 @@  #include "types.h" -#include "defs.h"  #include "param.h"  #include "memlayout.h" -#include "mmu.h" +#include "riscv.h"  #include "proc.h" -#include "x86.h"  #include "syscall.h" +#include "defs.h"  // User code makes a system call with INT T_SYSCALL.  // System call number in %eax. @@ -17,9 +16,9 @@  int  fetchint(uint64 addr, int *ip)  { -  struct proc *curproc = myproc(); +  struct proc *p = myproc(); -  if(addr >= curproc->sz || addr+4 > curproc->sz) +  if(addr >= p->sz || addr+4 > p->sz)      return -1;    *ip = *(uint64*)(addr);    return 0; @@ -29,8 +28,8 @@ fetchint(uint64 addr, int *ip)  int  fetchaddr(uint64 addr, uint64 *ip)  { -  struct proc *curproc = myproc(); -  if(addr >= curproc->sz || addr+sizeof(uint64) > curproc->sz) +  struct proc *p = myproc(); +  if(addr >= p->sz || addr+sizeof(uint64) > p->sz)      return -1;    *ip = *(uint64*)(addr);    return 0; @@ -43,12 +42,12 @@ int  fetchstr(uint64 addr, char **pp)  {    char *s, *ep; -  struct proc *curproc = myproc(); +  struct proc *p = myproc(); -  if(addr >= curproc->sz) +  if(addr >= p->sz)      return -1;    *pp = (char*)addr; -  ep = (char*)curproc->sz; +  ep = (char*)p->sz;    for(s = *pp; s < ep; s++){      if(*s == 0)        return s - *pp; @@ -59,20 +58,20 @@ fetchstr(uint64 addr, char **pp)  static uint64  fetcharg(int n)  { -  struct proc *curproc = myproc(); +  struct proc *p = myproc();    switch (n) {    case 0: -    return curproc->sf->rdi; +    return p->tf->a0;    case 1: -    return curproc->sf->rsi; +    return p->tf->a1;    case 2: -    return curproc->sf->rdx; +    return p->tf->a2;    case 3: -    return curproc->sf->r10; +    return p->tf->a3;    case 4: -    return curproc->sf->r8; +    return p->tf->a4;    case 5: -    return curproc->sf->r9; +    return p->tf->a5;    }    panic("fetcharg");    return -1; @@ -100,11 +99,11 @@ int  argptr(int n, char **pp, int size)  {    uint64 i; -  struct proc *curproc = myproc(); +  struct proc *p = myproc();    if(argaddr(n, &i) < 0)      return -1; -  if(size < 0 || (uint)i >= curproc->sz || (uint)i+size > curproc->sz) +  if(size < 0 || (uint)i >= p->sz || (uint)i+size > p->sz)      return -1;    *pp = (char*)i;    return 0; @@ -149,48 +148,47 @@ static int (*syscalls[])(void) = {  [SYS_fork]    sys_fork,  [SYS_exit]    sys_exit,  [SYS_wait]    sys_wait, -[SYS_pipe]    sys_pipe, -[SYS_read]    sys_read, -[SYS_kill]    sys_kill, -[SYS_exec]    sys_exec, -[SYS_fstat]   sys_fstat, -[SYS_chdir]   sys_chdir, -[SYS_dup]     sys_dup, +//[SYS_pipe]    sys_pipe, +//[SYS_read]    sys_read, +//[SYS_kill]    sys_kill, +//[SYS_exec]    sys_exec, +//[SYS_fstat]   sys_fstat, +//[SYS_chdir]   sys_chdir, +//[SYS_dup]     sys_dup,  [SYS_getpid]  sys_getpid, -[SYS_sbrk]    sys_sbrk, -[SYS_sleep]   sys_sleep, -[SYS_uptime]  sys_uptime, -[SYS_open]    sys_open, -[SYS_write]   sys_write, -[SYS_mknod]   sys_mknod, -[SYS_unlink]  sys_unlink, -[SYS_link]    sys_link, -[SYS_mkdir]   sys_mkdir, -[SYS_close]   sys_close, +//[SYS_sbrk]    sys_sbrk, +//[SYS_sleep]   sys_sleep, +//[SYS_uptime]  sys_uptime, +//[SYS_open]    sys_open, +//[SYS_write]   sys_write, +//[SYS_mknod]   sys_mknod, +//[SYS_unlink]  sys_unlink, +//[SYS_link]    sys_link, +//[SYS_mkdir]   sys_mkdir, +//[SYS_close]   sys_close,  };  static void  dosyscall(void)  {    int num; -  struct proc *curproc = myproc(); +  struct proc *p = myproc(); -  num = curproc->sf->rax; +  num = p->tf->a7;    if(num > 0 && num < NELEM(syscalls) && syscalls[num]) { -    curproc->sf->rax = syscalls[num](); +    p->tf->a0 = syscalls[num]();    } else { -    cprintf("%d %s: unknown sys call %d\n", -            curproc->pid, curproc->name, num); -    curproc->sf->rax = -1; +    printf("%d %s: unknown sys call %d\n", +            p->pid, p->name, num); +    p->tf->a0 = -1;    }  }  void -syscall(struct sysframe *sf) +syscall()  {      if(myproc()->killed)        exit(); -    myproc()->sf = sf;      dosyscall();      if(myproc()->killed)        exit(); @@ -41,11 +41,11 @@ static int  fdalloc(struct file *f)  {    int fd; -  struct proc *curproc = myproc(); +  struct proc *p = myproc();    for(fd = 0; fd < NOFILE; fd++){ -    if(curproc->ofile[fd] == 0){ -      curproc->ofile[fd] = f; +    if(p->ofile[fd] == 0){ +      p->ofile[fd] = f;        return fd;      }    } @@ -374,7 +374,7 @@ sys_chdir(void)  {    char *path;    struct inode *ip; -  struct proc *curproc = myproc(); +  struct proc *p = myproc();    begin_op();    if(argstr(0, &path) < 0 || (ip = namei(path)) == 0){ @@ -388,9 +388,9 @@ sys_chdir(void)      return -1;    }    iunlock(ip); -  iput(curproc->cwd); +  iput(p->cwd);    end_op(); -  curproc->cwd = ip; +  p->cwd = ip;    return 0;  } @@ -1,23 +1,28 @@  #include "types.h" -#include "x86.h" +#include "riscv.h"  #include "defs.h"  #include "date.h"  #include "param.h"  #include "memlayout.h" -#include "mmu.h"  #include "proc.h"  int -sys_fork(void) +sys_exit(void)  { -  return fork(); +  exit(); +  return 0;  // not reached  }  int -sys_exit(void) +sys_getpid(void)  { -  exit(); -  return 0;  // not reached +  return myproc()->pid; +} + +int +sys_fork(void) +{ +  return fork();  }  int @@ -26,6 +31,7 @@ sys_wait(void)    return wait();  } +#if 0  int  sys_kill(void)  { @@ -37,12 +43,6 @@ sys_kill(void)  }  int -sys_getpid(void) -{ -  return myproc()->pid; -} - -int  sys_sbrk(void)  {    int addr; @@ -89,3 +89,4 @@ sys_uptime(void)    release(&tickslock);    return xticks;  } +#endif diff --git a/trampoline.S b/trampoline.S new file mode 100644 index 0000000..109dd93 --- /dev/null +++ b/trampoline.S @@ -0,0 +1,108 @@ +	# +        # code to switch between user and kernel space. +        # +        # this code is mapped at the same virtual address +        # in user and kernel space so that it can switch +        # page tables. +	# +	# kernel.ld causes trampstart to be aligned +        # to a page boundary. +        # +.globl usertrap +	.section trampoline +.globl trampstart +trampstart: +        # switch from kernel to user. +	# a0: p->tf in user page table +        # a1: new value for satp, for user page table + +        # switch to user page table +        csrw satp, a1 + +        # put the saved user a0 in sscratch, so we +        # can swap it with our a0 (p->tf) in the last step. +        ld t0, 80(a0) +        csrw sscratch, t0 + +        # restore all but a0 from p->tf +        ld ra, 32(a0) +        ld sp, 40(a0) +        ld gp, 48(a0) +        ld tp, 56(a0) +        ld t0, 64(a0) +        ld t1, 72(a0) +        ld t2, 80(a0) +        ld a1, 96(a0) +        ld a2, 104(a0) +        ld a3, 112(a0) +        ld a4, 120(a0) +        ld a5, 128(a0) +        ld a6, 136(a0) +        ld a7, 144(a0) +        ld t3, 152(a0) +        ld t4, 160(a0) +        ld t5, 168(a0) +        ld t6, 176(a0) + +	# restore user a0, and save p->tf +        csrrw a0, sscratch, a0 +         +        # return to user mode and user pc. +        # caller has set up sstatus and sepc. +        sret + +	# +        # trap.c set stvec to point here, so +        # interrupts and exceptions start here, +        # in supervisor mode, but with a +        # user page table. +        # +        # sscratch points to where the process's p->tf is +        # mapped into user space (TRAMPOLINE - 4096). +        # +.align 4 +.globl trampvec +trampvec:     +	# swap a0 and sscratch +        # so that a0 is p->tf +        csrrw a0, sscratch, a0 + +        # save the user registers in p->tf +        sd ra, 32(a0) +        sd sp, 40(a0) +        sd gp, 48(a0) +        sd tp, 56(a0) +        sd t0, 64(a0) +        sd t1, 72(a0) +        sd t2, 80(a0) +        sd a1, 96(a0) +        sd a2, 104(a0) +        sd a3, 112(a0) +        sd a4, 120(a0) +        sd a5, 128(a0) +        sd a6, 136(a0) +        sd a7, 144(a0) +        sd t3, 152(a0) +        sd t4, 160(a0) +        sd t5, 168(a0) +        sd t6, 176(a0) + +	# save the user a0 in p->tf->a0 +        csrr t0, sscratch +        sd t0, 80(a0) + +        # restore kernel stack pointer from p->tf->kernel_sp +        ld sp, 8(a0) + +        # remember the address of usertrap(), p->tf->kernel_trap +        ld t0, 16(a0) + +        # restore kernel page table from p->tf->kernel_satp +        ld t1, 0(a0) +        csrw satp, t1 + +        # a0 is no longer valid, since the kernel page +        # table does not specially map p->td. + +        # jump to usertrap(), which does not return +        jr t0 @@ -1,109 +1,113 @@  #include "types.h" -#include "defs.h"  #include "param.h"  #include "memlayout.h" -#include "mmu.h" +#include "riscv.h"  #include "proc.h" -#include "x86.h" -#include "traps.h"  #include "spinlock.h" +#include "defs.h" -// Interrupt descriptor table (shared by all CPUs). -struct intgate idt[256]; -extern uint64 vectors[];  // in vectors.S: array of 256 entry pointers  struct spinlock tickslock;  uint ticks; +extern char trampstart[], trampvec[]; + +void kerneltrap(); +  void -tvinit(void) +trapinit(void)  {    int i; -  for(i=0; i<256; i++) { -    idt[i] = INTDESC(SEG_KCODE, vectors[i], INT_P | SEG_INTR64); -  } -  idtinit(); -     +  // send interrupts and exceptions to kerneltrap(). +  w_stvec((uint64)kerneltrap); +    initlock(&tickslock, "time");  } +// +// handle an interrupt, exception, or system call from user space. +// called from trampoline.S +//  void -idtinit(void) +usertrap(void)  { -  struct desctr dtr; +  if((r_sstatus() & SSTATUS_SPP) != 0) +    panic("usertrap: not from user mode"); + +  // send interrupts and exceptions to kerneltrap(), +  // since we're now in the kernel. +  w_stvec((uint64)kerneltrap); + +  struct proc *p = myproc(); +   +  // save user program counter. +  p->tf->epc = r_sepc(); +   +  if(r_scause() == 8){ +    // system call +    printf("usertrap(): system call pid=%d syscall=%d\n", p->pid, p->tf->a7); + +    // sepc points to the ecall instruction, +    // but we want to return to the next instruction. +    p->tf->epc += 4; -  dtr.limit = sizeof(idt) - 1; -  dtr.base = (uint64)idt; -  lidt((void *)&dtr.limit); +    syscall(); +  } else { +    printf("usertrap(): unexpected scause 0x%x pid=%d\n", r_scause(), p->pid); +    panic("usertrap"); +  } + +  usertrapret();  } -//PAGEBREAK: 41 +// +// return to user space +//  void -trap(struct trapframe *tf) +usertrapret(void)  { -  switch(tf->trapno){ -  case T_IRQ0 + IRQ_TIMER: -    if(cpuid() == 0){ -      acquire(&tickslock); -      ticks++; -      wakeup(&ticks); -      release(&tickslock); -    } -    lapiceoi(); -    break; -  case T_IRQ0 + IRQ_IDE: -    ideintr(); -    lapiceoi(); -    break; -  case T_IRQ0 + IRQ_IDE+1: -    // Bochs generates spurious IDE1 interrupts. -    break; -  case T_IRQ0 + IRQ_KBD: -    kbdintr(); -    lapiceoi(); -    break; -  case T_IRQ0 + IRQ_COM1: -    uartintr(); -    lapiceoi(); -    break; -  case T_IRQ0 + 7: -  case T_IRQ0 + IRQ_SPURIOUS: -    cprintf("cpu%d: spurious interrupt at %x:%x\n", -            cpuid(), tf->cs, tf->rip); -    lapiceoi(); -    break; - -  //PAGEBREAK: 13 -  default: -    if(myproc() == 0 || (tf->cs&3) == 0){ -      // In kernel, it must be our mistake. -      cprintf("unexpected trap %d from cpu %d rip %x (cr2=0x%x)\n", -              tf->trapno, cpuid(), tf->rip, rcr2()); -      panic("trap"); -    } -    // In user space, assume process misbehaved. -    cprintf("pid %d %s: trap %d err %d on cpu %d " -            "rip 0x%x addr 0x%x--kill proc\n", -            myproc()->pid, myproc()->name, tf->trapno, -            tf->err, cpuid(), tf->rip, rcr2()); -    myproc()->killed = 1; -  } +  struct proc *p = myproc(); + +  // XXX turn off interrupts, since we're switching +  // now from kerneltrap() to usertrap(). + +  // send interrupts and exceptions to trampoline.S +  w_stvec(TRAMPOLINE + (trampvec - trampstart)); + +  // set up values that trampoline.S will need when +  // the process next re-enters the kernel. +  p->tf->kernel_satp = r_satp(); +  p->tf->kernel_sp = (uint64)p->kstack + PGSIZE; +  p->tf->kernel_trap = (uint64)usertrap; -  // Force process exit if it has been killed and is in user space. -  // (If it is still executing in the kernel, let it keep running -  // until it gets to the regular system call return.) -  if(myproc() && myproc()->killed && (tf->cs&3) == DPL_USER) -    exit(); - -  // Force process to give up CPU on clock tick. -  // If interrupts were on while locks held, would need to check nlock. -  if(myproc() && myproc()->state == RUNNING && -     tf->trapno == T_IRQ0+IRQ_TIMER) -    yield(); +  // set up the registers that trampoline.S's sret will use +  // to get to user space. -  // Check if the process has been killed since we yielded -  if(myproc() && myproc()->killed && (tf->cs&3) == DPL_USER) -    exit(); +  // set S Previous Privilege mode to User. +  unsigned long x = r_sstatus(); +  x &= ~SSTATUS_SPP; // clear SPP to 0 for user mode +  w_sstatus(x); + +  // set S Exception Program Counter to the saved user pc. +  w_sepc(p->tf->epc); + +  // tell trampline.S the user page table to switch to. +  uint64 satp = MAKE_SATP(p->pagetable); + +  // jump to trampoline.S at the top of memory, which  +  // switches to the user page table, restores user registers, +  // and switches to user mode with sret. +  ((void (*)(uint64,uint64))TRAMPOLINE)(TRAMPOLINE - PGSIZE, satp);  } +// interrupts and exceptions from kernel code go here, +// on whatever the current kernel stack is. +// must be 4-byte aligned to fit in stvec. +void __attribute__ ((aligned (4))) +kerneltrap() +{ +  if((r_sstatus() & SSTATUS_SPP) == 0) +    panic("kerneltrap: not from supervisor mode"); +  panic("kerneltrap"); +} diff --git a/traps.h b/traps.h deleted file mode 100644 index 6e8a444..0000000 --- a/traps.h +++ /dev/null @@ -1,36 +0,0 @@ -// x86 trap and interrupt constants. - -// Processor-defined: -#define T_DIVIDE         0      // divide error -#define T_DEBUG          1      // debug exception -#define T_NMI            2      // non-maskable interrupt -#define T_BRKPT          3      // breakpoint -#define T_OFLOW          4      // overflow -#define T_BOUND          5      // bounds check -#define T_ILLOP          6      // illegal opcode -#define T_DEVICE         7      // device not available -#define T_DBLFLT         8      // double fault -// #define T_COPROC      9      // reserved (not used since 486) -#define T_TSS           10      // invalid task switch segment -#define T_SEGNP         11      // segment not present -#define T_STACK         12      // stack exception -#define T_GPFLT         13      // general protection fault -#define T_PGFLT         14      // page fault -// #define T_RES        15      // reserved -#define T_FPERR         16      // floating point error -#define T_ALIGN         17      // aligment check -#define T_MCHK          18      // machine check -#define T_SIMDERR       19      // SIMD floating point error - -#define T_DEFAULT      500      // catchall - -#define T_IRQ0          32      // IRQ 0 corresponds to int T_IRQ - -#define IRQ_TIMER        0 -#define IRQ_KBD          1 -#define IRQ_COM1         4 -#define IRQ_IDE         14 -#define IRQ_ERROR       19 -#define IRQ_SPURIOUS    31 - - @@ -1,77 +1,51 @@ -// Intel 8250 serial port (UART). +#include "memlayout.h" -#include "types.h" -#include "defs.h" -#include "param.h" -#include "traps.h" -#include "spinlock.h" -#include "sleeplock.h" -#include "fs.h" -#include "file.h" -#include "mmu.h" -#include "proc.h" -#include "x86.h" +// +// qemu -machine virt has a 16550a UART +// qemu/hw/riscv/virt.c +// http://byterunner.com/16550.html +// +// caller should lock. +// -#define COM1    0x3f8 - -static int uart;    // is there a uart? +// address of one of the registers +#define R(reg) ((unsigned int*)(UART0 + 4*(reg)))  void  uartinit(void)  { -  char *p; +  // disable interrupts +  *R(1) = 0x00; -  // Turn off the FIFO -  outb(COM1+2, 0); +  // special mode to set baud rate +  *R(3) = 0x80; -  // 9600 baud, 8 data bits, 1 stop bit, parity off. -  outb(COM1+3, 0x80);    // Unlock divisor -  outb(COM1+0, 115200/9600); -  outb(COM1+1, 0); -  outb(COM1+3, 0x03);    // Lock divisor, 8 data bits. -  outb(COM1+4, 0); -  outb(COM1+1, 0x01);    // Enable receive interrupts. +  // LSB for baud rate of 38.4K +  *R(0) = 0x03; -  // If status is 0xFF, no serial port. -  if(inb(COM1+5) == 0xFF) -    return; -  uart = 1; +  // MSB for baud rate of 38.4K +  *R(1) = 0x00; -  // Acknowledge pre-existing interrupt conditions; -  // enable interrupts. -  inb(COM1+2); -  inb(COM1+0); -  ioapicenable(IRQ_COM1, 0); +  // leave set-baud mode, +  // and set word length to 8 bits, no parity. +  *R(3) = 0x03; -  // Announce that we're here. -  for(p="xv6...\n"; *p; p++) -    uartputc(*p); +  // reset and enable FIFOs. +  *R(2) = 0x07;  }  void  uartputc(int c)  { -  int i; - -  if(!uart) -    return; -  for(i = 0; i < 128 && !(inb(COM1+5) & 0x20); i++) -    microdelay(10); -  outb(COM1+0, c); +  *R(0) = c;  }  static int  uartgetc(void)  { -  if(!uart) -    return -1; -  if(!(inb(COM1+5) & 0x01)) -    return -1; -  return inb(COM1+0);  }  void  uartintr(void)  { -  consoleintr(uartgetc);  } @@ -1,230 +1,162 @@  #include "param.h"  #include "types.h" -#include "defs.h" -#include "x86.h" -#include "msr.h"  #include "memlayout.h" -#include "mmu.h" -#include "proc.h"  #include "elf.h" -#include "traps.h" - -extern char data[];  // defined by kernel.ld -void sysentry(void); +#include "riscv.h" +#include "defs.h" -static pde_t *kpml4; // kernel address space, used by scheduler and bootup +/* + * the kernel's page table. + */ +pagetable_t kernel_pagetable; -// Bootstrap GDT.  Used by boot.S but defined in C -// Map "logical" addresses to virtual addresses using identity map. -// Cannot share a CODE descriptor for both kernel and user -// because it would have to have DPL_USR, but the CPU forbids -// an interrupt from CPL=0 to DPL=3. -struct segdesc bootgdt[NSEGS] = { -  [0] = SEGDESC(0, 0, 0),  // null -  [1] = SEGDESC(0, 0xfffff, SEG_R|SEG_CODE|SEG_S|SEG_DPL(0)|SEG_P|SEG_D|SEG_G),  // 32-bit kernel code -  [2] = SEGDESC(0, 0, SEG_R|SEG_CODE|SEG_S|SEG_DPL(0)|SEG_P|SEG_L|SEG_G),  // 64-bit kernel code -  [3] = SEGDESC(0, 0xfffff, SEG_W|SEG_S|SEG_DPL(0)|SEG_P|SEG_D|SEG_G),       // kernel data -  // The order of the user data and user code segments is -  // important for syscall instructions.  See initseg. -  [6] = SEGDESC(0, 0xfffff, SEG_W|SEG_S|SEG_DPL(3)|SEG_P|SEG_D|SEG_G),   // 64-bit user data -  [7] = SEGDESC(0, 0, SEG_R|SEG_CODE|SEG_S|SEG_DPL(3)|SEG_P|SEG_L|SEG_G),    // 64-bit user code -}; +extern char etext[];  // kernel.ld sets this to end of kernel code. +extern char trampstart[]; // trampoline.S -// Set up CPU's kernel segment descriptors. -// Run once on entry on each CPU. +/* + * create a direct-map page table for the kernel and + * turn on paging. called early, in supervisor mode. + * the page allocator is already initialized. + */  void -seginit(void) +kvminit()  { -  struct cpu *c; -  struct desctr dtr; +  kernel_pagetable = (pagetable_t) kalloc(); +  memset(kernel_pagetable, 0, PGSIZE); -  c = getmycpu(); +  // uart registers +  mappages(kernel_pagetable, UART0, PGSIZE, +           UART0, PTE_R | PTE_W); + +  // map kernel text executable and read-only. +  mappages(kernel_pagetable, KERNBASE, (uint64)etext-KERNBASE, +           KERNBASE, PTE_R | PTE_X); -  memmove(c->gdt, bootgdt, sizeof bootgdt); -  dtr.limit = sizeof(c->gdt)-1; -  dtr.base = (uint64) c->gdt; -  lgdt((void *)&dtr.limit); +  // map kernel data and the physical RAM we'll make use of. +  mappages(kernel_pagetable, (uint64)etext, PHYSTOP-(uint64)etext, +           (uint64)etext, PTE_R | PTE_W); + +  // map the trampoline for trap entry/exit to +  // the highest virtual address in the kernel. +  mappages(kernel_pagetable, TRAMPOLINE, PGSIZE, +           (uint64)trampstart, PTE_R | PTE_X); -  // When executing a syscall instruction the CPU sets the SS selector -  // to (star >> 32) + 8 and the CS selector to (star >> 32). -  // When executing a sysret instruction the CPU sets the SS selector -  // to (star >> 48) + 8 and the CS selector to (star >> 48) + 16. -  uint64 star = ((((uint64)SEG_UCODE|0x3)- 16)<<48)|((uint64)(SEG_KCODE)<<32); -  writemsr(MSR_STAR, star); -  writemsr(MSR_LSTAR, (uint64)&sysentry); -  writemsr(MSR_SFMASK, FL_TF | FL_IF); +  kvmswitch(); +} -  // Initialize cpu-local storage so that each core can easily -  // find its struct cpu using %gs. -  writegs(SEG_KDATA); -  writemsr(MSR_GS_BASE, (uint64)c); -  writemsr(MSR_GS_KERNBASE, (uint64)c); -  c->cpu = c; +// Switch h/w page table register to the kernel's page table, +// and enable paging. +void +kvmswitch(void) +{ +  w_satp(MAKE_SATP(kernel_pagetable));  } -// Return the address of the PTE in page table pgdir +// Return the address of the PTE in page table pagetable  // that corresponds to virtual address va.  If alloc!=0,  // create any required page table pages. +// +// The risc-v Sv39 scheme has three levels of page table +// pages. A page table page contains 512 64-bit PTEs. +// A 64-bit virtual address is split into five fields: +//   39..63 -- must be zero. +//   30..38 -- 9 bits of level-2 index. +//   21..39 -- 9 bits of level-1 index. +//   12..20 -- 9 bits of level-0 index. +//    0..12 -- 12 bits of byte offset within the page.  static pte_t * -walkpgdir(pde_t *pml4, const void *va, int alloc) +walk(pagetable_t pagetable, const void *va, int alloc)  { -  pde_t *pgdir = pml4; -  pde_t *pde; -  int level; -   -  for (level = L_PML4; level > 0; level--) { -    pde = &pgdir[PX(level, va)]; -    if(*pde & PTE_P) -      pgdir = (pte_t*)P2V(PTE_ADDR(*pde)); -    else { -      if(!alloc || (pgdir = (pde_t*)kalloc()) == 0) +  if((uint64)va >= MAXVA) +    panic("walk"); + +  for(int level = 2; level > 0; level--) { +    pte_t *pte = &pagetable[PX(level, va)]; +    if(*pte & PTE_V) { +      pagetable = (pagetable_t)PTE2PA(*pte); +    } else { +      if(!alloc || (pagetable = (pde_t*)kalloc()) == 0)          return 0; -      memset(pgdir, 0, PGSIZE); -      *pde = V2P(pgdir) | PTE_P | PTE_W | PTE_U; +      memset(pagetable, 0, PGSIZE); +      *pte = PA2PTE(pagetable) | PTE_V;      }    } -  return &pgdir[PX(level, va)]; +  return &pagetable[PX(0, va)];  }  // Create PTEs for virtual addresses starting at va that refer to  // physical addresses starting at pa. va and size might not  // be page-aligned. -static int -mappages(pde_t *pgdir, void *va, uint64 size, uint64 pa, int perm) +void +mappages(pagetable_t pagetable, uint64 va, uint64 size, uint64 pa, int perm)  {    char *a, *last;    pte_t *pte; -  a = (char*)PGROUNDDOWN((uint64)va); -  last = (char*)PGROUNDDOWN(((uint64)va) + size - 1); +  a = (char*)PGROUNDDOWN(va); +  last = (char*)PGROUNDDOWN(va + size - 1);    for(;;){ -    if((pte = walkpgdir(pgdir, a, 1)) == 0) -      return -1; -    if(*pte & PTE_P) +    if((pte = walk(pagetable, a, 1)) == 0) +      panic("mappages: walk"); +    if(*pte & PTE_V)        panic("remap"); -    *pte = pa | perm | PTE_P; +    *pte = PA2PTE(pa) | perm | PTE_V;      if(a == last)        break;      a += PGSIZE;      pa += PGSIZE;    } -  return 0;  } -// There is one page table per process, plus one that's used when -// a CPU is not running any process (kpml4). The kernel uses the -// current process's page table during system calls and interrupts; -// page protection bits prevent user code from using the kernel's -// mappings. -// -// setupkvm() and exec() set up every page table like this: -// -//   0..KERNBASE: user memory (text+data+stack+heap), mapped to -//                phys memory allocated by the kernel -//   KERNBASE..KERNBASE+EXTMEM: mapped to 0..EXTMEM (for I/O space) -//   KERNBASE+EXTMEM..data: mapped to EXTMEM..V2P(data) -//                for the kernel's instructions and r/o data -//   data..KERNBASE+PHYSTOP: mapped to V2P(data)..PHYSTOP, -//                                  rw data + free physical memory -//   0xfe000000..0: mapped direct (devices such as ioapic) -// -// The kernel allocates physical memory for its heap and for user memory -// between V2P(end) and the end of physical memory (PHYSTOP) -// (directly addressable from end..P2V(PHYSTOP)). - -// This table defines the kernel's mappings, which are present in -// every process's page table. -static struct kmap { -  void *virt; -  uint64 phys_start; -  uint64 phys_end; -  int perm; -} kmap[] = { - { (void*)KERNBASE, 0,             EXTMEM,    PTE_W}, // I/O space - { (void*)KERNLINK, V2P(KERNLINK), V2P(data), 0},     // kern text+rodata - { (void*)data,     V2P(data),     PHYSTOP,   PTE_W}, // kern data+memory - { (void*)P2V(DEVSPACE), DEVSPACE, DEVSPACETOP, PTE_W}, // more devices -}; - -// Set up kernel part of a page table. -pde_t* -setupkvm(void) +// Remove mappings from a page table. The mappings in +// the given range must exist. Optionally free the +// physical memory. +void +unmappages(pagetable_t pagetable, uint64 va, uint64 size, int do_free)  { -  pde_t *pml4; -  struct kmap *k; +  char *a, *last; +  pte_t *pte; +  uint64 pa; -  if((pml4 = (pde_t*)kalloc()) == 0) -    return 0; -  memset(pml4, 0, PGSIZE); -  if (PHYSTOP > DEVSPACE) -    panic("PHYSTOP too high"); -  for(k = kmap; k < &kmap[NELEM(kmap)]; k++) { -    if(mappages(pml4, k->virt, k->phys_end - k->phys_start, -                (uint)k->phys_start, k->perm) < 0) { -      freevm(pml4, 0); -      return 0; +  a = (char*)PGROUNDDOWN(va); +  last = (char*)PGROUNDDOWN(va + size - 1); +  for(;;){ +    if((pte = walk(pagetable, a, 0)) == 0) +      panic("unmappages: walk"); +    if((*pte & PTE_V) == 0) +      panic("unmappages: not mapped"); +    if(PTE_FLAGS(*pte) == PTE_V) +      panic("unmappages: not a leaf"); +    if(do_free){ +      pa = PTE2PA(*pte); +      kfree((void*)pa);      } +    *pte = 0; +    if(a == last) +      break; +    a += PGSIZE; +    pa += PGSIZE;    } -  return pml4; -} - -// Allocate one page table for the machine for the kernel address -// space for scheduler processes. -void -kvmalloc(void) -{ -  kpml4 = setupkvm(); -  switchkvm();  } -// Switch h/w page table register to the kernel-only page table, -// for when no process is running. -void -switchkvm(void) +// create an empty user page table. +pagetable_t +uvmcreate()  { -  lcr3(V2P(kpml4));   // switch to the kernel page table +  pagetable_t pagetable; +  pagetable = (pagetable_t) kalloc(); +  if(pagetable == 0) +    panic("uvmcreate: out of memory"); +  memset(pagetable, 0, PGSIZE); +  return pagetable;  } - -// Switch TSS and h/w page table to correspond to process p. -void -switchuvm(struct proc *p) -{ -  struct desctr dtr; -  struct cpu *c; -   -  if(p == 0) -    panic("switchuvm: no process"); -  if(p->kstack == 0) -    panic("switchuvm: no kstack"); -  if(p->pgdir == 0) -    panic("switchuvm: no pgdir"); - -  pushcli(); - -  c = mycpu(); -  uint64 base = (uint64) &(c->ts); -  c->gdt[SEG_TSS>>3] =  SEGDESC(base, (sizeof(c->ts)-1), SEG_P|SEG_TSS64A); -  c->gdt[(SEG_TSS>>3)+1] = SEGDESCHI(base); -  c->ts.rsp[0] = (uint64) p->kstack + KSTACKSIZE; -  c->ts.iomba = (ushort) 0xFFFF; - -  dtr.limit = sizeof(c->gdt) - 1; -  dtr.base = (uint64)c->gdt; -  lgdt((void *)&dtr.limit); - -  ltr(SEG_TSS); - -  lcr3(V2P(p->pgdir));  // switch to process's address space - -  popcli(); -} - -// Load the initcode into address 0 of pgdir. +// Load the user initcode into address 0 of pagetable, +// for the very first process.  // sz must be less than a page.  void -inituvm(pde_t *pgdir, char *init, uint sz) +uvminit(pagetable_t pagetable, char *src, uint sz)  {    char *mem; @@ -232,63 +164,8 @@ inituvm(pde_t *pgdir, char *init, uint sz)      panic("inituvm: more than a page");    mem = kalloc();    memset(mem, 0, PGSIZE); -  mappages(pgdir, 0, PGSIZE, V2P(mem), PTE_W|PTE_U); -  memmove(mem, init, sz); -} - -// Load a program segment into pgdir.  addr must be page-aligned -// and the pages from addr to addr+sz must already be mapped. -int -loaduvm(pde_t *pgdir, char *addr, struct inode *ip, uint offset, uint sz) -{ -  uint i, n; -  uint64 pa; -  pte_t *pte; - -  if((uint64) addr % PGSIZE != 0) -    panic("loaduvm: addr must be page aligned"); -  for(i = 0; i < sz; i += PGSIZE){ -    if((pte = walkpgdir(pgdir, addr+i, 0)) == 0) -      panic("loaduvm: address should exist"); -    pa = PTE_ADDR(*pte); -    if(sz - i < PGSIZE) -      n = sz - i; -    else -      n = PGSIZE; -    if(readi(ip, P2V(pa), offset+i, n) != n) -      return -1; -  } -  return 0; -} - -// Allocate page tables and physical memory to grow process from oldsz to -// newsz, which need not be page aligned.  Returns new size or 0 on error. -int -allocuvm(pde_t *pgdir, uint oldsz, uint newsz) -{ -  char *mem; -  uint64 a; - -  if(newsz >= KERNBASE) -    return 0; -  if(newsz < oldsz) -    return oldsz; - -  a = PGROUNDUP(oldsz); -  for(; a < newsz; a += PGSIZE){ -    mem = kalloc(); -    if(mem == 0){ -      deallocuvm(pgdir, newsz, oldsz); -      return 0; -    } -    memset(mem, 0, PGSIZE); -    if(mappages(pgdir, (char*)a, PGSIZE, V2P(mem), PTE_W|PTE_U) < 0){ -      deallocuvm(pgdir, newsz, oldsz); -      kfree(mem); -      return 0; -    } -  } -  return newsz; +  mappages(pagetable, 0, PGSIZE, (uint64)mem, PTE_W|PTE_R|PTE_X|PTE_U); +  memmove(mem, src, sz);  }  // Deallocate user pages to bring the process size from oldsz to @@ -296,153 +173,66 @@ allocuvm(pde_t *pgdir, uint oldsz, uint newsz)  // need to be less than oldsz.  oldsz can be larger than the actual  // process size.  Returns the new process size.  int -deallocuvm(pde_t *pml4, uint64 oldsz, uint64 newsz) +uvmdealloc(pagetable_t pagetable, uint64 oldsz, uint64 newsz)  { -  pte_t *pte; -  uint64 a, pa; -    if(newsz >= oldsz)      return oldsz; - -  a = PGROUNDUP(newsz); -  for(; a  < oldsz; a += PGSIZE){ -    pte = walkpgdir(pml4, (char*)a, 0); -    if(!pte) -      continue; -    else if((*pte & PTE_P) != 0){ -      pa = PTE_ADDR(*pte); -      if(pa == 0) -        panic("kfree"); -      char *v = P2V(pa); -      kfree(v); -      *pte = 0; -    } -  } +  unmappages(pagetable, newsz, oldsz - newsz, 1);    return newsz;  } -// Recursively free a page table -void -freelevel(pde_t *pgtab, int level) { -  int i; -  pde_t *pd; -   -  if (level > 0) { -    for(i = 0; i < NPDENTRIES; i++) { -      if(pgtab[i] & PTE_P){ -        pd = (pde_t*)P2V(PTE_ADDR(pgtab[i])); -        freelevel(pd, level-1); -      } +// Recursively free page table pages. +// All leaf mappings must already have been removed. +static void +freewalk(pagetable_t pagetable) +{ +  // there are 2^9 = 512 PTEs in a page table. +  for(int i = 0; i < 512; i++){ +    pte_t pte = pagetable[i]; +    if((pte & PTE_V) && (pte & (PTE_R|PTE_W|PTE_X)) == 0){ +      // this PTE points to a lower-level page table. +      uint64 child = PTE2PA(pte); +      freewalk((pagetable_t)child); +      pagetable[i] = 0; +    } else if(pte & PTE_V){ +      // XXX trampoline pages... +      panic("freewalk: leaf");      }    } -  kfree((char*)pgtab); +  kfree((void*)pagetable);  } -// Free all the physical memory pages -// in the user part and page table +// Free user memory pages, +// then free page table pages.  void -freevm(pde_t *pml4, uint64 sz) +uvmfree(pagetable_t pagetable, uint64 sz)  { -  if(pml4 == 0) -    panic("freevm: no pgdir"); - -  deallocuvm(pml4, sz, 0); -  freelevel(pml4, L_PML4); +  unmappages(pagetable, 0, sz, 1); +  freewalk(pagetable);  } -// Clear PTE_U on a page. Used to create an inaccessible -// page beneath the user stack. +// Given a parent process's page table, copy +// its memory into a child's page table. +// Copies both the page table and the +// physical memory.  void -clearpteu(pde_t *pgdir, char *uva) -{ -  pte_t *pte; - -  pte = walkpgdir(pgdir, uva, 0); -  if(pte == 0) -    panic("clearpteu"); -  *pte &= ~PTE_U; -} - -// Given a parent process's page table, create a copy -// of it for a child. -pde_t* -copyuvm(pde_t *pgdir, uint sz) +uvmcopy(pagetable_t old, pagetable_t new, uint64 sz)  { -  pde_t *d;    pte_t *pte;    uint64 pa, i;    uint flags;    char *mem; -  if((d = setupkvm()) == 0) -    return 0;    for(i = 0; i < sz; i += PGSIZE){ -    if((pte = walkpgdir(pgdir, (void *) i, 0)) == 0) +    if((pte = walk(old, (void *) i, 0)) == 0)        panic("copyuvm: pte should exist"); -    if(!(*pte & PTE_P)) +    if((*pte & PTE_V) == 0)        panic("copyuvm: page not present"); -    pa = PTE_ADDR(*pte); +    pa = PTE2PA(*pte);      flags = PTE_FLAGS(*pte);      if((mem = kalloc()) == 0) -      goto bad; -    memmove(mem, (char*)P2V(pa), PGSIZE); -    if(mappages(d, (void*)i, PGSIZE, V2P(mem), flags) < 0) { -      kfree(mem); -      goto bad; -    } +      panic("uvmcopy: kalloc failed"); +    memmove(mem, (char*)pa, PGSIZE); +    mappages(new, i, PGSIZE, (uint64)mem, flags);    } -  return d; - -bad: -  freevm(d, sz); -  return 0;  } - -//PAGEBREAK! -// Map user virtual address to kernel address. -char* -uva2ka(pde_t *pgdir, char *uva) -{ -  pte_t *pte; - -  pte = walkpgdir(pgdir, uva, 0); -  if((*pte & PTE_P) == 0) -    return 0; -  if((*pte & PTE_U) == 0) -    return 0; -  return (char*)P2V(PTE_ADDR(*pte)); -} - -// Copy len bytes from p to user address va in page table pgdir. -// Most useful when pgdir is not the current page table. -// uva2ka ensures this only works for PTE_U pages. -int -copyout(pde_t *pgdir, uint va, void *p, uint len) -{ -  char *buf, *pa0; -  uint64 n, va0; - -  buf = (char*)p; -  while(len > 0){ -    va0 = (uint)PGROUNDDOWN(va); -    pa0 = uva2ka(pgdir, (char*)va0); -    if(pa0 == 0) -      return -1; -    n = PGSIZE - (va - va0); -    if(n > len) -      n = len; -    memmove(pa0 + (va - va0), buf, n); -    len -= n; -    buf += n; -    va = va0 + PGSIZE; -  } -  return 0; -} - -//PAGEBREAK! -// Blank page. -//PAGEBREAK! -// Blank page. -//PAGEBREAK! -// Blank page. - @@ -1,198 +0,0 @@ -// Routines to let C code use special x86 instructions. - -#ifndef __ASSEMBLER__ - -static inline uchar -inb(ushort port) -{ -  uchar data; - -  asm volatile("in %1,%0" : "=a" (data) : "d" (port)); -  return data; -} - -static inline void -insl(int port, void *addr, int cnt) -{ -  asm volatile("cld; rep insl" : -               "=D" (addr), "=c" (cnt) : -               "d" (port), "0" (addr), "1" (cnt) : -               "memory", "cc"); -} - -static inline void -outb(ushort port, uchar data) -{ -  asm volatile("out %0,%1" : : "a" (data), "d" (port)); -} - -static inline void -outw(ushort port, ushort data) -{ -  asm volatile("out %0,%1" : : "a" (data), "d" (port)); -} - -static inline void -outsl(int port, const void *addr, int cnt) -{ -  asm volatile("cld; rep outsl" : -               "=S" (addr), "=c" (cnt) : -               "d" (port), "0" (addr), "1" (cnt) : -               "cc"); -} - -static inline void -stosb(void *addr, int data, int cnt) -{ -  asm volatile("cld; rep stosb" : -               "=D" (addr), "=c" (cnt) : -               "0" (addr), "1" (cnt), "a" (data) : -               "memory", "cc"); -} - -static inline void -stosl(void *addr, int data, int cnt) -{ -  asm volatile("cld; rep stosl" : -               "=D" (addr), "=c" (cnt) : -               "0" (addr), "1" (cnt), "a" (data) : -               "memory", "cc"); -} - -static inline void -lgdt(void *p) -{ -  asm volatile("lgdt (%0)" : : "r" (p) : "memory"); -} - -static inline void -lidt(void *p) -{ -  asm volatile("lidt (%0)" : : "r" (p) : "memory"); -} - -static inline void -ltr(ushort sel) -{ -  asm volatile("ltr %0" : : "r" (sel)); -} - -static inline uint64 -readeflags(void) -{ -  uint64 eflags; -  asm volatile("pushf; pop %0" : "=r" (eflags)); -  return eflags; -} - -static inline void -loadgs(ushort v) -{ -  asm volatile("movw %0, %%gs" : : "r" (v)); -} - -static inline void -cli(void) -{ -  asm volatile("cli"); -} - -static inline void -sti(void) -{ -  asm volatile("sti"); -} - -static inline uint -xchg(volatile uint *addr, uint newval) -{ -  uint result; - -  // The + in "+m" denotes a read-modify-write operand. -  asm volatile("lock; xchgl %0, %1" : -               "+m" (*addr), "=a" (result) : -               "1" (newval) : -               "cc"); -  return result; -} - -static inline uint -rcr2(void) -{ -  uint64 val; -  asm volatile("mov %%cr2,%0" : "=r" (val)); -  return val; -} - -static inline void -lcr3(uint64 val) -{ -  asm volatile("mov %0,%%cr3" : : "r" (val)); -} - -static inline void -writegs(uint16 v) -{ -  __asm volatile("movw %0, %%gs" : : "r" (v)); -} - - -//PAGEBREAK: 36 -// Layout of the trap frame built on the stack by the -// hardware and by trapasm.S, and passed to trap(). -struct trapframe { -   uint64 rax;       -   uint64 rbx; -   uint64 rcx; -   uint64 rdx; -   uint64 rbp; -   uint64 rsi; -   uint64 rdi; -   uint64 r8; -   uint64 r9; -   uint64 r10; -   uint64 r11; -   uint64 r12; -   uint64 r13; -   uint64 r14; -   uint64 r15; -   uint64 trapno; -   uint64 err; -   uint64 rip;      -   uint16 cs; -   uint16 padding[3]; -   uint64 rflags;   -   uint64 rsp;      -   uint64 ss;       -}__attribute__((packed)); - -struct sysframe { -  // arguments -  uint64 rdi; -  uint64 rsi; -  uint64 rdx; -  uint64 r10; -  uint64 r8; -  uint64 r9; -   -  // callee-saved registers -  uint64 r15; -  uint64 r14; -  uint64 r13; -  uint64 r12; -  uint64 rbx; -  uint64 rbp; - -  // return value -  uint64 rax; - -  // syscall registers -  uint64 r11;   // eflags -  uint64 rcx;   // rip -  uint64 rsp; -   -}__attribute__((packed)); - -#endif - -#define TF_CS 144 // offset in trapframe for saved cs | 
