diff options
| author | Frans Kaashoek <kaashoek@Frans-Kaashoeks-MacBook-Pro.local> | 2010-08-26 08:03:18 -0400 | 
|---|---|---|
| committer | Frans Kaashoek <kaashoek@Frans-Kaashoeks-MacBook-Pro.local> | 2010-08-26 08:03:18 -0400 | 
| commit | d55b2fac074ac23e30c337014f40ae2156b31b60 (patch) | |
| tree | bb9885972bf8929e7957f9ea512a8befaf06b11c | |
| parent | d87f51c5a1c5e7a2a3ba111b1052a297f4f96fb0 (diff) | |
| parent | 789b508d538e6faf635e49f268a4f1f9e9b65f05 (diff) | |
| download | xv6-labs-d55b2fac074ac23e30c337014f40ae2156b31b60.tar.gz xv6-labs-d55b2fac074ac23e30c337014f40ae2156b31b60.tar.bz2 xv6-labs-d55b2fac074ac23e30c337014f40ae2156b31b60.zip | |
Merge commit 'origin/page' into page
| -rw-r--r-- | Makefile | 2 | ||||
| -rw-r--r-- | asm.h | 2 | ||||
| -rw-r--r-- | bootasm.S | 6 | ||||
| -rw-r--r-- | bootother.S | 6 | ||||
| -rw-r--r-- | defs.h | 12 | ||||
| -rw-r--r-- | exec.c | 7 | ||||
| -rw-r--r-- | kalloc.c | 9 | ||||
| -rw-r--r-- | main.c | 42 | ||||
| -rw-r--r-- | mmu.h | 33 | ||||
| -rw-r--r-- | proc.c | 19 | ||||
| -rw-r--r-- | proc.h | 9 | ||||
| -rw-r--r-- | runoff.list | 1 | ||||
| -rw-r--r-- | sh.c | 1 | ||||
| -rw-r--r-- | swtch.S | 8 | ||||
| -rw-r--r-- | syscall.c | 2 | ||||
| -rw-r--r-- | syscall.h | 1 | ||||
| -rw-r--r-- | sysproc.c | 16 | ||||
| -rw-r--r-- | trap.c | 2 | ||||
| -rw-r--r-- | usertests.c | 137 | ||||
| -rw-r--r-- | usys.S | 1 | ||||
| -rw-r--r-- | vm.c | 202 | 
21 files changed, 351 insertions, 167 deletions
| @@ -38,7 +38,7 @@ AS = $(TOOLPREFIX)gas  LD = $(TOOLPREFIX)ld  OBJCOPY = $(TOOLPREFIX)objcopy  OBJDUMP = $(TOOLPREFIX)objdump -CFLAGS = -fno-pic -static -fno-builtin -fno-strict-aliasing -O2 -Wall -MD -ggdb -m32 +CFLAGS = -fno-pic -static -fno-builtin -fno-strict-aliasing -O2 -Wall -MD -ggdb -m32 -Werror  CFLAGS += $(shell $(CC) -fno-stack-protector -E -x c /dev/null >/dev/null 2>&1 && echo -fno-stack-protector)  ASFLAGS = -m32 -gdwarf-2  # FreeBSD ld wants ``elf_i386_fbsd'' @@ -6,6 +6,8 @@          .word 0, 0;                                             \          .byte 0, 0, 0, 0 +// The 0xC0 means the limit is in 4096-byte units +// and (for executable segments) 32-bit mode.  #define SEG_ASM(type,base,lim)                                  \          .word (((lim) >> 12) & 0xffff), ((base) & 0xffff);      \          .byte (((base) >> 16) & 0xff), (0x90 | (type)),         \ @@ -51,8 +51,10 @@ seta20.2:    orl     $CR0_PE, %eax    movl    %eax, %cr0 -  # Jump to next instruction, but in 32-bit code segment. -  # Switches processor into 32-bit mode. +  # This ljmp is how you load the CS (Code Segment) register. +  # SEG_ASM produces segment descriptors with the 32-bit mode +  # flag set (the D flag), so addresses and word operands will +  # default to 32 bits after this jump.    ljmp    $(SEG_KCODE<<3), $start32  .code32                       # Assemble for 32-bit mode diff --git a/bootother.S b/bootother.S index 11d32f1..899669a 100644 --- a/bootother.S +++ b/bootother.S @@ -45,8 +45,10 @@ start:    orl     $CR0_PE, %eax    movl    %eax, %cr0 -  # Jump to next instruction, but in 32-bit code segment. -  # Switches processor into 32-bit mode. +  # This ljmp is how you load the CS (Code Segment) register. +  # SEG_ASM produces segment descriptors with the 32-bit mode +  # flag set (the D flag), so addresses and word operands will +  # default to 32 bits after this jump.    ljmp    $(SEG_KCODE<<3), $start32  .code32                       # Assemble for 32-bit mode @@ -110,7 +110,6 @@ void            yield(void);  // swtch.S  void            swtch(struct context**, struct context*); -void            jstack(uint);  // spinlock.c  void            acquire(struct spinlock*); @@ -143,7 +142,7 @@ void            timerinit(void);  // trap.c  void            idtinit(void); -extern int      ticks; +extern uint     ticks;  void            tvinit(void);  extern struct spinlock tickslock; @@ -153,23 +152,20 @@ void            uartintr(void);  void            uartputc(int);  // vm.c -#define PGROUNDUP(sz)  ((sz+PGSIZE-1) & ~(PGSIZE-1)) -extern pde_t    *kpgdir;  void            pminit(void);  void            ksegment(void);  void            kvmalloc(void);  void            vminit(void); -void            jkstack(); -void            printstack(void); -void            printpgdir(pde_t *);  pde_t*          setupkvm(void);  char*           uva2ka(pde_t*, char*);  int             allocuvm(pde_t*, char*, uint); +int             deallocuvm(pde_t *pgdir, char *addr, uint sz);  void            freevm(pde_t*);  void            inituvm(pde_t*, char*, char*, uint);  int             loaduvm(pde_t*, char*, struct inode *ip, uint, uint);  pde_t*          copyuvm(pde_t*,uint); -void            loadvm(struct proc*); +void            switchuvm(struct proc*); +void            switchkvm();  // number of elements in fixed-size array  #define NELEM(x) (sizeof(x)/sizeof((x)[0])) @@ -43,13 +43,16 @@ exec(char *path, char **argv)        goto bad;      if (!allocuvm(pgdir, (char *)ph.va, ph.memsz))        goto bad; -    sz += PGROUNDUP(ph.memsz); +    if(ph.va + ph.memsz > sz) +      sz = ph.va + ph.memsz;      if (!loaduvm(pgdir, (char *)ph.va, ip, ph.offset, ph.filesz))        goto bad;    }    iunlockput(ip);    // Allocate and initialize stack at sz +  sz = PGROUNDUP(sz); +  sz += PGSIZE; // leave an invalid page    if (!allocuvm(pgdir, (char *)sz, PGSIZE))      goto bad;    mem = uva2ka(pgdir, (char *)sz); @@ -95,7 +98,7 @@ exec(char *path, char **argv)    proc->tf->eip = elf.entry;  // main    proc->tf->esp = sp; -  loadvm(proc);  +  switchuvm(proc);     freevm(oldpgdir); @@ -1,9 +1,8 @@  // Physical memory allocator, intended to allocate -// memory for user processes. Allocates in 4096-byte "pages". +// memory for user processes. Allocates in 4096-byte pages.  // Free list is kept sorted and combines adjacent pages into  // long runs, to make it easier to allocate big segments. -// One reason the page size is 4k is that the x86 segment size -// granularity is 4k. +// This combining is not useful now that xv6 uses paging.  #include "types.h"  #include "defs.h" @@ -24,14 +23,10 @@ struct {  int nfreemem;  // Initialize free list of physical pages. -// This code cheats by just considering one megabyte of -// pages after end.  Real systems would determine the -// amount of memory available in the system and use it all.  void  kinit(char *p, uint len)  {    initlock(&kmem.lock, "kmem"); -  cprintf("end 0x%x free = %d(0x%x)\n", p, len);    nfreemem = 0;    kfree(p, len);  } @@ -7,7 +7,8 @@  static void bootothers(void);  static void mpmain(void); -void jkstack(void) __attribute__((noreturn)); +void jkstack(void)  __attribute__((noreturn)); +void mainc(void);  // Bootstrap processor starts running C code here.  int @@ -15,21 +16,32 @@ main(void)  {    mpinit();        // collect info about this machine    lapicinit(mpbcpu()); -  ksegment(); +  ksegment();      // set up segments    picinit();       // interrupt controller    ioapicinit();    // another interrupt controller    consoleinit();   // I/O devices & their interrupts    uartinit();      // serial port -  pminit();        // physical memory for kernel -  jkstack();       // Jump to mainc on a proper-allocated kernel stack  +  pminit();        // discover how much memory there is +  jkstack();       // call mainc() on a properly-allocated stack  +} + +void +jkstack(void) +{ +  char *kstack = kalloc(PGSIZE); +  if (!kstack) +    panic("jkstack\n"); +  char *top = kstack + PGSIZE; +  asm volatile("movl %0,%%esp" : : "r" (top)); +  asm volatile("call mainc"); +  panic("jkstack");  }  void  mainc(void)  { -  cprintf("cpus %p cpu %p\n", cpus, cpu);    cprintf("\ncpu%d: starting xv6\n\n", cpu->id); -  kvmalloc();      // allocate the kernel page table +  kvmalloc();      // initialze the kernel page table    pinit();         // process table    tvinit();        // trap vectors    binit();         // buffer cache @@ -45,22 +57,21 @@ mainc(void)    mpmain();  } -// Bootstrap processor gets here after setting up the hardware. -// Additional processors start here. +// Common CPU setup code. +// Bootstrap CPU comes here from mainc(). +// Other CPUs jump here from bootother.S.  static void  mpmain(void)  {    if(cpunum() != mpbcpu()) {      ksegment(); -    cprintf("other cpu\n");      lapicinit(cpunum());    } -  vminit();        // Run with paging on each processor -  cprintf("cpu%d: mpmain\n", cpu->id); -  idtinit(); +  vminit();        // turn on paging +  cprintf("cpu%d: starting\n", cpu->id); +  idtinit();       // load idt register    xchg(&cpu->booted, 1); -  cprintf("cpu%d: scheduling\n", cpu->id); -  scheduler(); +  scheduler();     // start running processes  }  static void @@ -75,6 +86,7 @@ bootothers(void)    // placed the start of bootother.S there.    code = (uchar *) 0x7000;    memmove(code, _binary_bootother_start, (uint)_binary_bootother_size); +    for(c = cpus; c < cpus+ncpu; c++){      if(c == cpus+cpunum())  // We've started already.        continue; @@ -85,7 +97,7 @@ bootothers(void)      *(void**)(code-8) = mpmain;      lapicstartap(c->id, (uint)code); -    // Wait for cpu to get through bootstrap. +    // Wait for cpu to finish mpmain()      while(c->booted == 0)        ;    } @@ -85,32 +85,20 @@ struct segdesc {  // | Page Directory |   Page Table   | Offset within Page  |  // |      Index     |      Index     |                     |  // +----------------+----------------+---------------------+ -//  \--- PDX(la) --/ \--- PTX(la) --/ \---- PGOFF(la) ----/ -//  \----------- PPN(la) -----------/ -// -// The PDX, PTX, PGOFF, and PPN macros decompose linear addresses as shown. -// To construct a linear address la from PDX(la), PTX(la), and PGOFF(la), -// use PGADDR(PDX(la), PTX(la), PGOFF(la)). - -// page number field of address -#define PPN(la)		(((uint) (la)) >> PTXSHIFT) -#define VPN(la)		PPN(la)		// used to index into vpt[] +//  \--- PDX(la) --/ \--- PTX(la) --/   // page directory index  #define PDX(la)		((((uint) (la)) >> PDXSHIFT) & 0x3FF) -#define VPD(la)		PDX(la)		// used to index into vpd[]  // page table index  #define PTX(la)		((((uint) (la)) >> PTXSHIFT) & 0x3FF) -// offset in page -#define PGOFF(la)	(((uint) (la)) & 0xFFF) -  // construct linear address from indexes and offset  #define PGADDR(d, t, o)	((uint) ((d) << PDXSHIFT | (t) << PTXSHIFT | (o))) -// mapping from physical addresses to virtual addresses is the identity one -// (really linear addresses, but we map linear to physical also directly) +// turn a kernel linear address into a physical address. +// all of the kernel data structures have linear and +// physical addresses that are equal.  #define PADDR(a)       ((uint) a)  // Page directory and page table constants. @@ -120,12 +108,12 @@ struct segdesc {  #define PGSIZE		4096		// bytes mapped by a page  #define PGSHIFT		12		// log2(PGSIZE) -#define PTSIZE		(PGSIZE*NPTENTRIES) // bytes mapped by a page directory entry -#define PTSHIFT		22		// log2(PTSIZE) -  #define PTXSHIFT	12		// offset of PTX in a linear address  #define PDXSHIFT	22		// offset of PDX in a linear address +#define PGROUNDUP(sz)  (((sz)+PGSIZE-1) & ~(PGSIZE-1)) +#define PGROUNDDOWN(a) ((char*)((((unsigned int)a) & ~(PGSIZE-1)))) +  // Page table/directory entry flags.  #define PTE_P		0x001	// Present  #define PTE_W		0x002	// Writeable @@ -137,13 +125,6 @@ struct segdesc {  #define PTE_PS		0x080	// Page Size  #define PTE_MBZ		0x180	// Bits must be zero -// The PTE_AVAIL bits aren't used by the kernel or interpreted by the -// hardware, so user processes are allowed to set them arbitrarily. -#define PTE_AVAIL	0xE00	// Available for software use - -// Only flags in PTE_USER may be used in system calls. -#define PTE_USER	(PTE_AVAIL | PTE_P | PTE_W | PTE_U) -  // Address in page table or page directory entry  #define PTE_ADDR(pte)	((uint) (pte) & ~0xFFF) @@ -142,10 +142,15 @@ userinit(void)  int  growproc(int n)  { -  if (!allocuvm(proc->pgdir, (char *)proc->sz, n)) -    return -1; +  if(n > 0){ +    if (!allocuvm(proc->pgdir, (char *)proc->sz, n)) +      return -1; +  } else if(n < 0){ +    if (!deallocuvm(proc->pgdir, (char *)(proc->sz + n), 0 - n)) +      return -1; +  }    proc->sz += n; -  loadvm(proc); +  switchuvm(proc);    return 0;  } @@ -214,9 +219,10 @@ scheduler(void)        // to release ptable.lock and then reacquire it        // before jumping back to us.        proc = p; -      loadvm(p); +      switchuvm(p);        p->state = RUNNING;        swtch(&cpu->scheduler, proc->context); +      switchkvm();        // Process is done running for now.        // It should have changed its p->state before coming back. @@ -242,7 +248,6 @@ sched(void)      panic("sched running");    if(readeflags()&FL_IF)      panic("sched interruptible"); -  lcr3(PADDR(kpgdir));   // Switch to the kernel page table    intena = cpu->intena;    swtch(&proc->context, cpu->scheduler);    cpu->intena = intena; @@ -414,9 +419,9 @@ wait(void)          // Found one.          pid = p->pid;          kfree(p->kstack, KSTACKSIZE); -	freevm(p->pgdir); +        p->kstack = 0; +        freevm(p->pgdir);          p->state = UNUSED; -	p->kstack = 0;          p->pid = 0;          p->parent = 0;          p->name[0] = 0; @@ -3,8 +3,8 @@  #define SEG_KCODE 1  // kernel code  #define SEG_KDATA 2  // kernel data+stack  #define SEG_KCPU  3  // kernel per-cpu data -#define SEG_UCODE 4 -#define SEG_UDATA 5 +#define SEG_UCODE 4  // user code +#define SEG_UDATA 5  // user data+stack  #define SEG_TSS   6  // this process's task state  #define NSEGS     7 @@ -16,7 +16,7 @@  // Contexts are stored at the bottom of the stack they  // describe; the stack pointer is the address of the context.  // The layout of the context matches the layout of the stack in swtch.S -// at "Switch stacks" comment. Switch itself doesn't save eip explicitly, +// at the "Switch stacks" comment. Switch doesn't save eip explicitly,  // but it is on the stack and allocproc() manipulates it.  struct context {    uint edi; @@ -31,7 +31,7 @@ enum procstate { UNUSED, EMBRYO, SLEEPING, RUNNABLE, RUNNING, ZOMBIE };  // Per-process state  struct proc {    uint sz;                     // Size of process memory (bytes) -  pde_t* pgdir;                // linear address of proc's pgdir +  pde_t* pgdir;                // Linear address of proc's pgdir    char *kstack;                // Bottom of kernel stack for this process    enum procstate state;        // Process state    volatile int pid;            // Process ID @@ -48,6 +48,7 @@ struct proc {  // Process memory is laid out contiguously, low addresses first:  //   text  //   original data and bss +//   invalid page  //   fixed-size stack  //   expandable heap diff --git a/runoff.list b/runoff.list index c469aae..f39ce18 100644 --- a/runoff.list +++ b/runoff.list @@ -23,6 +23,7 @@ proc.c  swtch.S  vm.c  kalloc.c +vm.c  # system calls  traps.h @@ -420,7 +420,6 @@ parseexec(char **ps, char *es)    int tok, argc;    struct execcmd *cmd;    struct cmd *ret; -  int *x = (int *) peek;    if(peek(ps, es, "("))      return parseblock(ps, es); @@ -26,11 +26,3 @@ swtch:    popl %ebx    popl %ebp    ret - -# Jump on a new stack, fake C calling conventions -.globl jstack -jstack:	 -  movl 4(%esp), %esp -  subl $16, %esp	# space for arguments -  movl $0, %ebp	        # terminate functions that follow ebp's -  call mainc	        # continue at mainc @@ -100,6 +100,7 @@ extern int sys_sleep(void);  extern int sys_unlink(void);  extern int sys_wait(void);  extern int sys_write(void); +extern int sys_uptime(void);  static int (*syscalls[])(void) = {  [SYS_chdir]   sys_chdir, @@ -122,6 +123,7 @@ static int (*syscalls[])(void) = {  [SYS_unlink]  sys_unlink,  [SYS_wait]    sys_wait,  [SYS_write]   sys_write, +[SYS_uptime]  sys_uptime,  };  void @@ -19,3 +19,4 @@  #define SYS_getpid 18  #define SYS_sbrk   19  #define SYS_sleep  20 +#define SYS_uptime 21 @@ -57,7 +57,8 @@ sys_sbrk(void)  int  sys_sleep(void)  { -  int n, ticks0; +  int n; +  uint ticks0;    if(argint(0, &n) < 0)      return -1; @@ -73,3 +74,16 @@ sys_sleep(void)    release(&tickslock);    return 0;  } + +// return how many clock tick interrupts have occurred +// since boot. +int +sys_uptime(void) +{ +  uint xticks; +   +  acquire(&tickslock); +  xticks = ticks; +  release(&tickslock); +  return xticks; +} @@ -11,7 +11,7 @@  struct gatedesc idt[256];  extern uint vectors[];  // in vectors.S: array of 256 entry pointers  struct spinlock tickslock; -int ticks; +uint ticks;  void  tvinit(void) diff --git a/usertests.c b/usertests.c index cc2601c..670a4a8 100644 --- a/usertests.c +++ b/usertests.c @@ -322,8 +322,9 @@ void  mem(void)  {    void *m1, *m2; -  int pid; +  int pid, ppid; +  ppid = getpid();    if((pid = fork()) == 0){      m1 = 0;      while((m2 = malloc(10001)) != 0) { @@ -338,6 +339,7 @@ mem(void)      m1 = malloc(1024*20);      if(m1 == 0) {        printf(1, "couldn't allocate mem?!!\n"); +      kill(ppid);        exit();      }      free(m1); @@ -1229,6 +1231,136 @@ forktest(void)    printf(1, "fork test OK\n");  } +void +sbrktest(void) +{ +  int pid; +  char *oldbrk = sbrk(0); + +  printf(stdout, "sbrk test\n"); + +  // can one sbrk() less than a page? +  char *a = sbrk(0); +  int i; +  for(i = 0; i < 5000; i++){ +    char *b = sbrk(1); +    if(b != a){ +      printf(stdout, "sbrk test failed %d %x %x\n", i, a, b); +      exit(); +    } +    *b = 1; +    a = b + 1; +  } +  pid = fork(); +  if(pid < 0){ +    printf(stdout, "sbrk test fork failed\n"); +    exit(); +  } +  char *c = sbrk(1); +  c = sbrk(1); +  if(c != a + 1){ +    printf(stdout, "sbrk test failed post-fork\n"); +    exit(); +  } +  if(pid == 0) +    exit(); +  wait(); + +  // can one allocate the full 640K? +  a = sbrk(0); +  uint amt = (640 * 1024) - (uint) a; +  char *p = sbrk(amt); +  if(p != a){ +    printf(stdout, "sbrk test failed 640K test, p %x a %x\n", p, a); +    exit(); +  } +  char *lastaddr = (char *)(640 * 1024 - 1); +  *lastaddr = 99; + +  // is one forbidden from allocating more than 640K? +  c = sbrk(4096); +  if(c != (char *) 0xffffffff){ +    printf(stdout, "sbrk allocated more than 640K, c %x\n", c); +    exit(); +  } + +  // can one de-allocate? +  a = sbrk(0); +  c = sbrk(-4096); +  if(c == (char *) 0xffffffff){ +    printf(stdout, "sbrk could not deallocate\n"); +    exit(); +  } +  c = sbrk(0); +  if(c != a - 4096){ +    printf(stdout, "sbrk deallocation produced wrong address, a %x c %x\n", a, c); +    exit(); +  } + +  // can one re-allocate that page? +  a = sbrk(0); +  c = sbrk(4096); +  if(c != a || sbrk(0) != a + 4096){ +    printf(stdout, "sbrk re-allocation failed, a %x c %x\n", a, c); +    exit(); +  } +  if(*lastaddr == 99){ +    // should be zero +    printf(stdout, "sbrk de-allocation didn't really deallocate\n"); +    exit(); +  } + +  c = sbrk(4096); +  if(c != (char *) 0xffffffff){ +    printf(stdout, "sbrk was able to re-allocate beyond 640K, c %x\n", c); +    exit(); +  } + +  // can we read the kernel's memory? +  for(a = (char*)(640*1024); a < (char *)2000000; a += 50000){ +    int ppid = getpid(); +    int pid = fork(); +    if(pid < 0){ +      printf(stdout, "fork failed\n"); +      exit(); +    } +    if(pid == 0){ +      printf(stdout, "oops could read %x = %x\n", a, *a); +      kill(ppid); +      exit(); +    } +    wait(); +  } + +  if(sbrk(0) > oldbrk) +    sbrk(-(sbrk(0) - oldbrk)); + +  printf(stdout, "sbrk test OK\n"); +} + +void +stacktest(void) +{ +  printf(stdout, "stack test\n"); +  char dummy = 1; +  char *p = &dummy; +  int ppid = getpid(); +  int pid = fork(); +  if(pid < 0){ +    printf(stdout, "fork failed\n"); +    exit(); +  } +  if(pid == 0){ +    // should cause a trap: +    p[-4096] = 'z'; +    kill(ppid); +    printf(stdout, "stack test failed: page before stack was writeable\n"); +    exit(); +  } +  wait(); +  printf(stdout, "stack test OK\n"); +} +  int  main(int argc, char *argv[])  { @@ -1240,6 +1372,9 @@ main(int argc, char *argv[])    }    close(open("usertests.ran", O_CREATE)); +  stacktest(); +  sbrktest(); +    opentest();    writetest();    writetest1(); @@ -28,3 +28,4 @@ SYSCALL(dup)  SYSCALL(getpid)  SYSCALL(sbrk)  SYSCALL(sleep) +SYSCALL(uptime) @@ -8,13 +8,20 @@  // The mappings from logical to linear are one to one (i.e.,  // segmentation doesn't do anything). -// The mapping from linear to physical are one to one for the kernel. -// The mappings for the kernel include all of physical memory (until -// PHYSTOP), including the I/O hole, and the top of physical address -// space, where additional devices are located. -// The kernel itself is linked to be at 1MB, and its physical memory -// is also at 1MB. -// Physical memory for user programs is allocated from physical memory +// There is one page table per process, plus one that's used +// when a CPU is not running any process (kpgdir). +// A user process uses the same page table as the kernel; the +// page protection bits prevent it from using anything other +// than its memory. +//  +// setupkvm() and exec() set up every page table like this: +//   0..640K          : user memory (text, data, stack, heap) +//   640K..1M         : mapped direct (for IO space) +//   1M..kernend      : mapped direct (for the kernel's text and data) +//   kernend..PHYSTOP : mapped direct (kernel heap and user pages) +//   0xfe000000..0    : mapped direct (devices such as ioapic) +// +// The kernel allocates memory for its heap and for user memory  // between kernend and the end of physical memory (PHYSTOP).  // The virtual address space of each user program includes the kernel  // (which is inaccessible in user mode).  The user program addresses @@ -22,7 +29,7 @@  // (both in physical memory and in the kernel's virtual address  // space). -#define PHYSTOP  0x300000 +#define PHYSTOP  0x1000000  #define USERTOP  0xA0000  static uint kerntext;  // Linker starts kernel at 1MB @@ -31,29 +38,11 @@ static uint kerndata;  static uint kerndsz;  static uint kernend;  static uint freesz; -pde_t *kpgdir;         // One kernel page table for scheduler procs - -void -printpgdir(pde_t *pgdir) -{ -  uint i; -  uint j; - -  cprintf("printpgdir 0x%x\n", pgdir); -  for (i = 0; i < NPDENTRIES; i++) { -    if (pgdir[i] != 0 && i < 100) { -      cprintf("pgdir %d, v=0x%x\n", i, pgdir[i]); -      pte_t *pgtab = (pte_t*) PTE_ADDR(pgdir[i]); -      for (j = 0; j < NPTENTRIES; j++) { -	if (pgtab[j] != 0) -	  cprintf("pgtab %d, v=0x%x, addr=0x%x\n", j, PGADDR(i, j, 0),  -		PTE_ADDR(pgtab[j])); -      } -    } -  } -  cprintf("printpgdir done\n", pgdir); -} +static pde_t *kpgdir;  // for use in scheduler() +// return the address of the PTE in page table pgdir +// that corresponds to linear address va.  if create!=0, +// create any required page table pages.  static pte_t *  walkpgdir(pde_t *pgdir, const void *va, int create)  { @@ -80,16 +69,26 @@ walkpgdir(pde_t *pgdir, const void *va, int create)    return &pgtab[PTX(va)];  } +// create PTEs for linear addresses starting at la that refer to +// physical addresses starting at pa. la and size might not +// be page-aligned.  static int  mappages(pde_t *pgdir, void *la, uint size, uint pa, int perm)  { -  uint i; -  pte_t *pte; - -  for (i = 0; i < size; i += PGSIZE) { -    if (!(pte = walkpgdir(pgdir, (void*)(la + i), 1))) +  char *first = PGROUNDDOWN(la); +  char *last = PGROUNDDOWN(la + size - 1); +  char *a = first; +  while(1){ +    pte_t *pte = walkpgdir(pgdir, a, 1); +    if(pte == 0)        return 0; -    *pte = (pa + i) | perm | PTE_P; +    if(*pte & PTE_P) +      panic("remap"); +    *pte = pa | perm | PTE_P; +    if(a == last) +      break; +    a += PGSIZE; +    pa += PGSIZE;    }    return 1;  } @@ -101,12 +100,15 @@ ksegment(void)  {    struct cpu *c; -  // Map once virtual addresses to linear addresses using identity map +  // Map virtual addresses to linear addresses using identity map. +  // Cannot share a CODE descriptor for both kernel and user +  // because it would have to have DPL_USR, but the CPU forbids +  // an interrupt from CPL=0 to DPL=3.    c = &cpus[cpunum()];    c->gdt[SEG_KCODE] = SEG(STA_X|STA_R, 0, 0xffffffff, 0);    c->gdt[SEG_KDATA] = SEG(STA_W, 0, 0xffffffff, 0); -  c->gdt[SEG_UCODE] = SEG(STA_X|STA_R, 0x0, 0xffffffff, DPL_USER); -  c->gdt[SEG_UDATA] = SEG(STA_W, 0x0, 0xffffffff, DPL_USER); +  c->gdt[SEG_UCODE] = SEG(STA_X|STA_R, 0, 0xffffffff, DPL_USER); +  c->gdt[SEG_UDATA] = SEG(STA_W, 0, 0xffffffff, DPL_USER);    // map cpu, and curproc    c->gdt[SEG_KCPU] = SEG(STA_W, &c->cpu, 8, 0); @@ -119,9 +121,9 @@ ksegment(void)    proc = 0;  } -// Setup address space and current process task state. +// Switch h/w page table and TSS registers to point to process p.  void -loadvm(struct proc *p) +switchuvm(struct proc *p)  {    pushcli(); @@ -133,14 +135,21 @@ loadvm(struct proc *p)    ltr(SEG_TSS << 3);    if (p->pgdir == 0) -    panic("loadvm: no pgdir\n"); +    panic("switchuvm: no pgdir\n");    lcr3(PADDR(p->pgdir));  // switch to new address space    popcli();  } -// Setup kernel part of a page table. Linear adresses map one-to-one -// on physical addresses. +// Switch h/w page table register to the kernel-only page table, for when +// no process is running. +void +switchkvm() +{ +  lcr3(PADDR(kpgdir));   // Switch to the kernel page table +} + +// Set up kernel part of a page table.  pde_t*  setupkvm(void)  { @@ -153,10 +162,10 @@ setupkvm(void)    // Map IO space from 640K to 1Mbyte    if (!mappages(pgdir, (void *)USERTOP, 0x60000, USERTOP, PTE_W))      return 0; -  // Map kernel text from kern text addr read-only +  // Map kernel text read-only    if (!mappages(pgdir, (void *) kerntext, kerntsz, kerntext, 0))      return 0; -  // Map kernel data form kern data addr R/W +  // Map kernel data read/write    if (!mappages(pgdir, (void *) kerndata, kerndsz, kerndata, PTE_W))      return 0;    // Map dynamically-allocated memory read/write (kernel stacks, user mem) @@ -168,6 +177,10 @@ setupkvm(void)    return pgdir;  } +// return the physical address that a given user address +// maps to. the result is also a kernel logical address, +// since the kernel maps the physical memory allocated to user +// processes directly.  char*  uva2ka(pde_t *pgdir, char *uva)  {     @@ -177,25 +190,60 @@ uva2ka(pde_t *pgdir, char *uva)    return (char *)pa;  } +// allocate sz bytes more memory for a process starting at the +// given user address; allocates physical memory and page +// table entries. addr and sz need not be page-aligned. +// it is a no-op for any parts of the requested memory +// that are already allocated.  int  allocuvm(pde_t *pgdir, char *addr, uint sz)  { -  uint i, n; -  char *mem; +  if (addr + sz > (char*)USERTOP) +    return 0; +  char *first = PGROUNDDOWN(addr); +  char *last = PGROUNDDOWN(addr + sz - 1); +  char *a; +  for(a = first; a <= last; a += PGSIZE){ +    pte_t *pte = walkpgdir(pgdir, a, 0); +    if(pte == 0 || (*pte & PTE_P) == 0){ +      char *mem = kalloc(PGSIZE); +      if(mem == 0){ +        // XXX clean up? +        return 0; +      } +      memset(mem, 0, PGSIZE); +      mappages(pgdir, a, PGSIZE, PADDR(mem), PTE_W|PTE_U); +    } +  } +  return 1; +} -  n = PGROUNDUP(sz); -  if (addr + n >= USERTOP) +// deallocate some of the user pages, in response to sbrk() +// with a negative argument. if addr is not page-aligned, +// then only deallocates starting at the next page boundary. +int +deallocuvm(pde_t *pgdir, char *addr, uint sz) +{ +  if (addr + sz > (char*)USERTOP)      return 0; -  for (i = 0; i < n; i += PGSIZE) { -    if (!(mem = kalloc(PGSIZE))) {   // XXX cleanup what we did? -      return 0; +  char *first = (char*) PGROUNDUP((uint)addr); +  char *last = PGROUNDDOWN(addr + sz - 1); +  char *a; +  for(a = first; a <= last; a += PGSIZE){ +    pte_t *pte = walkpgdir(pgdir, a, 0); +    if(pte && (*pte & PTE_P) != 0){ +      uint pa = PTE_ADDR(*pte); +      if(pa == 0) +        panic("deallocuvm"); +      kfree((void *) pa, PGSIZE); +      *pte = 0;      } -    memset(mem, 0, PGSIZE); -    mappages(pgdir, addr + i, PGSIZE, PADDR(mem), PTE_W|PTE_U);    }    return 1;  } +// free a page table and all the physical memory pages +// in the user part.  void  freevm(pde_t *pgdir)  { @@ -211,9 +259,8 @@ freevm(pde_t *pgdir)  	if (pgtab[j] != 0) {  	  uint pa = PTE_ADDR(pgtab[j]);  	  uint va = PGADDR(i, j, 0); -	  if (va >= USERTOP)   // done with user part? -	    break; -	  kfree((void *) pa, PGSIZE); +	  if (va < USERTOP)   // user memory +            kfree((void *) pa, PGSIZE);  	  pgtab[j] = 0;  	}        } @@ -261,6 +308,8 @@ inituvm(pde_t *pgdir, char *addr, char *init, uint sz)    }  } +// given a parent process's page table, create a copy +// of it for a child.  pde_t*  copyuvm(pde_t *pgdir, uint sz)  { @@ -273,17 +322,22 @@ copyuvm(pde_t *pgdir, uint sz)    for (i = 0; i < sz; i += PGSIZE) {      if (!(pte = walkpgdir(pgdir, (void *)i, 0)))        panic("copyuvm: pte should exist\n"); -    pa = PTE_ADDR(*pte); -    if (!(mem = kalloc(PGSIZE))) -      return 0; -    memmove(mem, (char *)pa, PGSIZE); -    if (!mappages(d, (void *)i, PGSIZE, PADDR(mem), PTE_W|PTE_U)) -      return 0; +    if(*pte & PTE_P){ +      pa = PTE_ADDR(*pte); +      if (!(mem = kalloc(PGSIZE))) +        return 0; +      memmove(mem, (char *)pa, PGSIZE); +      if (!mappages(d, (void *)i, PGSIZE, PADDR(mem), PTE_W|PTE_U)) +        return 0; +    }    }    return d;  } -// Gather about physical memory layout.  Called once during boot. +// Gather information about physical memory layout. +// Called once during boot. +// Really should find out how much physical memory +// there is rather than assuming PHYSTOP.  void  pminit(void)  { @@ -298,27 +352,13 @@ pminit(void)    kernend = ((uint)end + PGSIZE) & ~(PGSIZE-1);    kerntext = ph[0].va;    kerndata = ph[1].va; -  kerntsz = kerndata - kerntext; -  kerndsz = kernend - kerndata; +  kerntsz = ph[0].memsz; +  kerndsz = ph[1].memsz;    freesz = PHYSTOP - kernend; -  cprintf("kerntext@0x%x(sz=0x%x), kerndata@0x%x(sz=0x%x), kernend 0x%x freesz = 0x%x\n",  -	  kerntext, kerntsz, kerndata, kerndsz, kernend, freesz); -    kinit((char *)kernend, freesz);  } -// Jump to mainc on a properly-allocated kernel stack -void -jkstack(void) -{ -  char *kstack = kalloc(PGSIZE); -  if (!kstack) -    panic("jkstack\n"); -  char *top = kstack + PGSIZE; -  jstack((uint) top); -} -  // Allocate one page table for the machine for the kernel address  // space for scheduler processes.  void | 
