diff options
| -rw-r--r-- | Makefile | 7 | ||||
| -rw-r--r-- | asm.h | 2 | ||||
| -rw-r--r-- | bio.c | 1 | ||||
| -rw-r--r-- | bootasm.S | 8 | ||||
| -rw-r--r-- | bootother.S | 6 | ||||
| -rw-r--r-- | defs.h | 24 | ||||
| -rw-r--r-- | exec.c | 89 | ||||
| -rw-r--r-- | file.c | 1 | ||||
| -rw-r--r-- | forktest.c | 8 | ||||
| -rw-r--r-- | ide.c | 3 | ||||
| -rw-r--r-- | kalloc.c | 35 | ||||
| -rw-r--r-- | lapic.c | 5 | ||||
| -rw-r--r-- | main.c | 57 | ||||
| -rw-r--r-- | mmu.h | 68 | ||||
| -rw-r--r-- | mp.c | 1 | ||||
| -rw-r--r-- | param.h | 2 | ||||
| -rw-r--r-- | proc.c | 80 | ||||
| -rw-r--r-- | proc.h | 9 | ||||
| -rw-r--r-- | runoff.list | 2 | ||||
| -rw-r--r-- | runoff.spec | 1 | ||||
| -rw-r--r-- | sh.c | 4 | ||||
| -rw-r--r-- | spinlock.c | 2 | ||||
| -rw-r--r-- | syscall.c | 16 | ||||
| -rw-r--r-- | syscall.h | 1 | ||||
| -rw-r--r-- | sysfile.c | 5 | ||||
| -rw-r--r-- | sysproc.c | 16 | ||||
| -rw-r--r-- | trap.c | 11 | ||||
| -rw-r--r-- | traps.h | 2 | ||||
| -rw-r--r-- | types.h | 1 | ||||
| -rw-r--r-- | usertests.c | 137 | ||||
| -rw-r--r-- | usys.S | 1 | ||||
| -rw-r--r-- | vm.c | 382 | ||||
| -rw-r--r-- | x86.h | 60 | 
33 files changed, 858 insertions, 189 deletions
@@ -25,19 +25,20 @@ OBJS = \  	trap.o\  	uart.o\  	vectors.o\ +	vm.o\  # Cross-compiling (e.g., on Mac OS X) -#TOOLPREFIX = i386-jos-elf- +TOOLPREFIX = i386-jos-elf-  # Using native tools (e.g., on X86 Linux) -TOOLPREFIX =  +#TOOLPREFIX =   CC = $(TOOLPREFIX)gcc  AS = $(TOOLPREFIX)gas  LD = $(TOOLPREFIX)ld  OBJCOPY = $(TOOLPREFIX)objcopy  OBJDUMP = $(TOOLPREFIX)objdump -CFLAGS = -fno-pic -static -fno-builtin -fno-strict-aliasing -O2 -Wall -MD -ggdb -m32 +CFLAGS = -fno-pic -static -fno-builtin -fno-strict-aliasing -O2 -Wall -MD -ggdb -m32 -Werror  CFLAGS += $(shell $(CC) -fno-stack-protector -E -x c /dev/null >/dev/null 2>&1 && echo -fno-stack-protector)  ASFLAGS = -m32 -gdwarf-2  # FreeBSD ld wants ``elf_i386_fbsd'' @@ -6,6 +6,8 @@          .word 0, 0;                                             \          .byte 0, 0, 0, 0 +// The 0xC0 means the limit is in 4096-byte units +// and (for executable segments) 32-bit mode.  #define SEG_ASM(type,base,lim)                                  \          .word (((lim) >> 12) & 0xffff), ((base) & 0xffff);      \          .byte (((base) >> 16) & 0xff), (0x90 | (type)),         \ @@ -109,7 +109,6 @@ bread(uint dev, uint sector)  void  bwrite(struct buf *b)  { -  cprintf("bwrite sector %d\n", b->sector);    if((b->flags & B_BUSY) == 0)      panic("bwrite");    b->flags |= B_DIRTY; @@ -51,8 +51,10 @@ seta20.2:    orl     $CR0_PE, %eax    movl    %eax, %cr0 -  # Jump to next instruction, but in 32-bit code segment. -  # Switches processor into 32-bit mode. +  # This ljmp is how you load the CS (Code Segment) register. +  # SEG_ASM produces segment descriptors with the 32-bit mode +  # flag set (the D flag), so addresses and word operands will +  # default to 32 bits after this jump.    ljmp    $(SEG_KCODE<<3), $start32  .code32                       # Assemble for 32-bit mode @@ -88,5 +90,5 @@ gdt:    SEG_ASM(STA_W, 0x0, 0xffffffff)         # data seg  gdtdesc: -  .word   (gdtdesc - gdt - 1)                            # sizeof(gdt) - 1 +  .word   (gdtdesc - gdt - 1)             # sizeof(gdt) - 1    .long   gdt                             # address gdt diff --git a/bootother.S b/bootother.S index 11d32f1..899669a 100644 --- a/bootother.S +++ b/bootother.S @@ -45,8 +45,10 @@ start:    orl     $CR0_PE, %eax    movl    %eax, %cr0 -  # Jump to next instruction, but in 32-bit code segment. -  # Switches processor into 32-bit mode. +  # This ljmp is how you load the CS (Code Segment) register. +  # SEG_ASM produces segment descriptors with the 32-bit mode +  # flag set (the D flag), so addresses and word operands will +  # default to 32 bits after this jump.    ljmp    $(SEG_KCODE<<3), $start32  .code32                       # Assemble for 32-bit mode @@ -60,9 +60,10 @@ extern uchar    ioapicid;  void            ioapicinit(void);  // kalloc.c +extern int      nfreemem;  char*           kalloc(int);  void            kfree(char*, int); -void            kinit(void); +void            kinit(char*,uint);  // kbd.c  void            kbdintr(void); @@ -101,8 +102,6 @@ int             kill(int);  void            pinit(void);  void            procdump(void);  void            scheduler(void) __attribute__((noreturn)); -void            ksegment(void); -void            usegment(void);  void            sleep(void*, struct spinlock*);  void            userinit(void);  int             wait(void); @@ -143,7 +142,7 @@ void            timerinit(void);  // trap.c  void            idtinit(void); -extern int      ticks; +extern uint     ticks;  void            tvinit(void);  extern struct spinlock tickslock; @@ -152,6 +151,21 @@ void            uartinit(void);  void            uartintr(void);  void            uartputc(int); +// vm.c +void            pminit(void); +void            ksegment(void); +void            kvmalloc(void); +void            vminit(void); +pde_t*          setupkvm(void); +char*           uva2ka(pde_t*, char*); +int             allocuvm(pde_t*, char*, uint); +int             deallocuvm(pde_t *pgdir, char *addr, uint sz); +void            freevm(pde_t*); +void            inituvm(pde_t*, char*, char*, uint); +int             loaduvm(pde_t*, char*, struct inode *ip, uint, uint); +pde_t*          copyuvm(pde_t*,uint); +void            switchuvm(struct proc*); +void            switchkvm(); +  // number of elements in fixed-size array  #define NELEM(x) (sizeof(x)/sizeof((x)[0])) - @@ -11,12 +11,13 @@ exec(char *path, char **argv)  {    char *mem, *s, *last;    int i, argc, arglen, len, off; -  uint sz, sp, argp; +  uint sz, sp, spoffset, argp;    struct elfhdr elf;    struct inode *ip;    struct proghdr ph; +  pde_t *pgdir, *oldpgdir; -  mem = 0; +  pgdir = 0;    sz = 0;    if((ip = namei(path)) == 0) @@ -29,37 +30,8 @@ exec(char *path, char **argv)    if(elf.magic != ELF_MAGIC)      goto bad; -  // Compute memory size of new process. -  // Program segments. -  for(i=0, off=elf.phoff; i<elf.phnum; i++, off+=sizeof(ph)){ -    if(readi(ip, (char*)&ph, off, sizeof(ph)) != sizeof(ph)) -      goto bad; -    if(ph.type != ELF_PROG_LOAD) -      continue; -    if(ph.memsz < ph.filesz) -      goto bad; -    sz += ph.memsz; -  } -   -  // Arguments. -  arglen = 0; -  for(argc=0; argv[argc]; argc++) -    arglen += strlen(argv[argc]) + 1; -  arglen = (arglen+3) & ~3; -  sz += arglen; -  sz += 4*(argc+1);  // argv data -  sz += 4;  // argv -  sz += 4;  // argc - -  // Stack. -  sz += PAGE; -   -  // Allocate program memory. -  sz = (sz+PAGE-1) & ~(PAGE-1); -  mem = kalloc(sz); -  if(mem == 0) +  if (!(pgdir = setupkvm()))      goto bad; -  memset(mem, 0, sz);    // Load program into memory.    for(i=0, off=elf.phoff; i<elf.phnum; i++, off+=sizeof(ph)){ @@ -67,37 +39,51 @@ exec(char *path, char **argv)        goto bad;      if(ph.type != ELF_PROG_LOAD)        continue; -    if(ph.va + ph.memsz < ph.va || ph.va + ph.memsz > sz) -      goto bad;      if(ph.memsz < ph.filesz)        goto bad; -    if(readi(ip, mem + ph.va, ph.offset, ph.filesz) != ph.filesz) +    if (!allocuvm(pgdir, (char *)ph.va, ph.memsz)) +      goto bad; +    if(ph.va + ph.memsz > sz) +      sz = ph.va + ph.memsz; +    if (!loaduvm(pgdir, (char *)ph.va, ip, ph.offset, ph.filesz))        goto bad; -    memset(mem + ph.va + ph.filesz, 0, ph.memsz - ph.filesz);    }    iunlockput(ip); -   -  // Initialize stack. + +  // Allocate and initialize stack at sz +  sz = PGROUNDUP(sz); +  sz += PGSIZE; // leave an invalid page +  if (!allocuvm(pgdir, (char *)sz, PGSIZE)) +    goto bad; +  mem = uva2ka(pgdir, (char *)sz); +  spoffset = sz; +  sz += PGSIZE; + +  arglen = 0; +  for(argc=0; argv[argc]; argc++) +    arglen += strlen(argv[argc]) + 1; +  arglen = (arglen+3) & ~3; +    sp = sz;    argp = sz - arglen - 4*(argc+1);    // Copy argv strings and pointers to stack. -  *(uint*)(mem+argp + 4*argc) = 0;  // argv[argc] +  *(uint*)(mem+argp-spoffset + 4*argc) = 0;  // argv[argc]    for(i=argc-1; i>=0; i--){      len = strlen(argv[i]) + 1;      sp -= len; -    memmove(mem+sp, argv[i], len); -    *(uint*)(mem+argp + 4*i) = sp;  // argv[i] +    memmove(mem+sp-spoffset, argv[i], len); +    *(uint*)(mem+argp-spoffset + 4*i) = sp;  // argv[i]    }    // Stack frame for main(argc, argv), below arguments.    sp = argp;    sp -= 4; -  *(uint*)(mem+sp) = argp; +  *(uint*)(mem+sp-spoffset) = argp;    sp -= 4; -  *(uint*)(mem+sp) = argc; +  *(uint*)(mem+sp-spoffset) = argc;    sp -= 4; -  *(uint*)(mem+sp) = 0xffffffff;   // fake return pc +  *(uint*)(mem+sp-spoffset) = 0xffffffff;   // fake return pc    // Save program name for debugging.    for(last=s=path; *s; s++) @@ -105,18 +91,21 @@ exec(char *path, char **argv)        last = s+1;    safestrcpy(proc->name, last, sizeof(proc->name)); -  // Commit to the new image. -  kfree(proc->mem, proc->sz); -  proc->mem = mem; +  // Commit to the user image. +  oldpgdir = proc->pgdir; +  proc->pgdir = pgdir;    proc->sz = sz;    proc->tf->eip = elf.entry;  // main    proc->tf->esp = sp; -  usegment(); + +  switchuvm(proc);  + +  freevm(oldpgdir); +    return 0;   bad: -  if(mem) -    kfree(mem, sz); +  if (pgdir) freevm(pgdir);    iunlockput(ip);    return -1;  } @@ -116,7 +116,6 @@ filewrite(struct file *f, char *addr, int n)      return pipewrite(f->pipe, addr, n);    if(f->type == FD_INODE){      ilock(f->ip); -    cprintf("filewrite: %d\n", n);      if((r = writei(f->ip, addr, f->off, n)) > 0)        f->off += r;      iunlock(f->ip); @@ -5,6 +5,8 @@  #include "stat.h"  #include "user.h" +#define N  1000 +  void  printf(int fd, char *s, ...)  { @@ -18,7 +20,7 @@ forktest(void)    printf(1, "fork test\n"); -  for(n=0; n<1000; n++){ +  for(n=0; n<N; n++){      pid = fork();      if(pid < 0)        break; @@ -26,8 +28,8 @@ forktest(void)        exit();    } -  if(n == 1000){ -    printf(1, "fork claimed to work 1000 times!\n"); +  if(n == N){ +    printf(1, "fork claimed to work N times!\n", N);      exit();    } @@ -147,8 +147,9 @@ iderw(struct buf *b)    // Wait for request to finish.    // Assuming will not sleep too long: ignore proc->killed. -  while((b->flags & (B_VALID|B_DIRTY)) != B_VALID) +  while((b->flags & (B_VALID|B_DIRTY)) != B_VALID) {      sleep(b, &idelock); +  }    release(&idelock);  } @@ -1,13 +1,13 @@  // Physical memory allocator, intended to allocate -// memory for user processes. Allocates in 4096-byte "pages". +// memory for user processes. Allocates in 4096-byte pages.  // Free list is kept sorted and combines adjacent pages into  // long runs, to make it easier to allocate big segments. -// One reason the page size is 4k is that the x86 segment size -// granularity is 4k. +// This combining is not useful now that xv6 uses paging.  #include "types.h"  #include "defs.h"  #include "param.h" +#include "mmu.h"  #include "spinlock.h"  struct run { @@ -20,21 +20,14 @@ struct {    struct run *freelist;  } kmem; +int nfreemem; +  // Initialize free list of physical pages. -// This code cheats by just considering one megabyte of -// pages after end.  Real systems would determine the -// amount of memory available in the system and use it all.  void -kinit(void) +kinit(char *p, uint len)  { -  extern char end[]; -  uint len; -  char *p; -    initlock(&kmem.lock, "kmem"); -  p = (char*)(((uint)end + PAGE) & ~(PAGE-1)); -  len = 256*PAGE; // assume computer has 256 pages of RAM, 1 MB -  cprintf("mem = %d\n", len); +  nfreemem = 0;    kfree(p, len);  } @@ -47,19 +40,23 @@ kfree(char *v, int len)  {    struct run *r, *rend, **rp, *p, *pend; -  if(len <= 0 || len % PAGE) +  if(len <= 0 || len % PGSIZE)      panic("kfree");    // Fill with junk to catch dangling refs.    memset(v, 1, len);    acquire(&kmem.lock); +  nfreemem += len;    p = (struct run*)v;    pend = (struct run*)(v + len);    for(rp=&kmem.freelist; (r=*rp) != 0 && r <= pend; rp=&r->next){      rend = (struct run*)((char*)r + r->len); -    if(r <= p && p < rend) +    if(r <= p && p < rend) { +      cprintf("freeing a free page: r = 0x%x p = 0x%x rend = 0x%x\n",  +	      r, p, rend);        panic("freeing free page"); +    }      if(rend == p){  // r before p: expand r to include p        r->len += len;        if(r->next && r->next == pend){  // r now next to r->next? @@ -93,7 +90,7 @@ kalloc(int n)    char *p;    struct run *r, **rp; -  if(n % PAGE || n <= 0) +  if(n % PGSIZE || n <= 0)      panic("kalloc");    acquire(&kmem.lock); @@ -103,12 +100,12 @@ kalloc(int n)        p = (char*)r + r->len;        if(r->len == 0)          *rp = r->next; +      nfreemem -= n;        release(&kmem.lock);        return p;      }    }    release(&kmem.lock); - -  cprintf("kalloc: out of memory\n");    return 0;  } + @@ -20,8 +20,11 @@    #define STARTUP    0x00000600   // Startup IPI    #define DELIVS     0x00001000   // Delivery status    #define ASSERT     0x00004000   // Assert interrupt (vs deassert) +  #define DEASSERT   0x00000000    #define LEVEL      0x00008000   // Level triggered    #define BCAST      0x00080000   // Send to all APICs, including self. +  #define BUSY       0x00001000 +  #define FIXED      0x00000000  #define ICRHI   (0x0310/4)   // Interrupt Command [63:32]  #define TIMER   (0x0320/4)   // Local Vector Table 0 (TIMER)    #define X1         0x0000000B   // divide counts by 1 @@ -48,6 +51,7 @@ lapicw(int index, int value)  void  lapicinit(int c)  { +  cprintf("lapicinit: %d 0x%x\n", c, lapic);    if(!lapic)       return; @@ -126,7 +130,6 @@ microdelay(int us)  {  } -  #define IO_RTC  0x70  // Start additional processor running bootstrap code at addr. @@ -6,23 +6,42 @@  #include "x86.h"  static void bootothers(void); -static void mpmain(void) __attribute__((noreturn)); +static void mpmain(void); +void jkstack(void)  __attribute__((noreturn)); +void mainc(void);  // Bootstrap processor starts running C code here.  int  main(void)  { -  mpinit(); // collect info about this machine +  mpinit();        // collect info about this machine    lapicinit(mpbcpu()); -  ksegment(); +  ksegment();      // set up segments    picinit();       // interrupt controller    ioapicinit();    // another interrupt controller    consoleinit();   // I/O devices & their interrupts    uartinit();      // serial port -cprintf("cpus %p cpu %p\n", cpus, cpu); -  cprintf("\ncpu%d: starting xv6\n\n", cpu->id); +  pminit();        // discover how much memory there is +  jkstack();       // call mainc() on a properly-allocated stack  +} + +void +jkstack(void) +{ +  char *kstack = kalloc(PGSIZE); +  if (!kstack) +    panic("jkstack\n"); +  char *top = kstack + PGSIZE; +  asm volatile("movl %0,%%esp" : : "r" (top)); +  asm volatile("call mainc"); +  panic("jkstack"); +} -  kinit();         // physical memory allocator +void +mainc(void) +{ +  cprintf("\ncpu%d: starting xv6\n\n", cpu->id); +  kvmalloc();      // initialze the kernel page table    pinit();         // process table    tvinit();        // trap vectors    binit();         // buffer cache @@ -38,20 +57,21 @@ cprintf("cpus %p cpu %p\n", cpus, cpu);    mpmain();  } -// Bootstrap processor gets here after setting up the hardware. -// Additional processors start here. +// Common CPU setup code. +// Bootstrap CPU comes here from mainc(). +// Other CPUs jump here from bootother.S.  static void  mpmain(void)  { -  if(cpunum() != mpbcpu()) +  if(cpunum() != mpbcpu()) { +    ksegment();      lapicinit(cpunum()); -  ksegment(); -  cprintf("cpu%d: mpmain\n", cpu->id); -  idtinit(); +  } +  vminit();        // turn on paging +  cprintf("cpu%d: starting\n", cpu->id); +  idtinit();       // load idt register    xchg(&cpu->booted, 1); - -  cprintf("cpu%d: scheduling\n", cpu->id); -  scheduler(); +  scheduler();     // start running processes  }  static void @@ -62,8 +82,9 @@ bootothers(void)    struct cpu *c;    char *stack; -  // Write bootstrap code to unused memory at 0x7000. -  code = (uchar*)0x7000; +  // Write bootstrap code to unused memory at 0x7000.  The linker has +  // placed the start of bootother.S there. +  code = (uchar *) 0x7000;    memmove(code, _binary_bootother_start, (uint)_binary_bootother_size);    for(c = cpus; c < cpus+ncpu; c++){ @@ -76,7 +97,7 @@ bootothers(void)      *(void**)(code-8) = mpmain;      lapicstartap(c->id, (uint)code); -    // Wait for cpu to get through bootstrap. +    // Wait for cpu to finish mpmain()      while(c->booted == 0)        ;    } @@ -62,6 +62,8 @@ struct segdesc {  #define STA_R       0x2     // Readable (executable segments)  #define STA_A       0x1     // Accessed +//  +  // System segment type bits  #define STS_T16A    0x1     // Available 16-bit TSS  #define STS_LDT     0x2     // Local Descriptor Table @@ -76,6 +78,72 @@ struct segdesc {  #define STS_IG32    0xE     // 32-bit Interrupt Gate  #define STS_TG32    0xF     // 32-bit Trap Gate + +// A linear address 'la' has a three-part structure as follows: +// +// +--------10------+-------10-------+---------12----------+ +// | Page Directory |   Page Table   | Offset within Page  | +// |      Index     |      Index     |                     | +// +----------------+----------------+---------------------+ +//  \--- PDX(la) --/ \--- PTX(la) --/  + +// page directory index +#define PDX(la)		((((uint) (la)) >> PDXSHIFT) & 0x3FF) + +// page table index +#define PTX(la)		((((uint) (la)) >> PTXSHIFT) & 0x3FF) + +// construct linear address from indexes and offset +#define PGADDR(d, t, o)	((uint) ((d) << PDXSHIFT | (t) << PTXSHIFT | (o))) + +// turn a kernel linear address into a physical address. +// all of the kernel data structures have linear and +// physical addresses that are equal. +#define PADDR(a)       ((uint) a) + +// Page directory and page table constants. +#define NPDENTRIES	1024		// page directory entries per page directory +#define NPTENTRIES	1024		// page table entries per page table + +#define PGSIZE		4096		// bytes mapped by a page +#define PGSHIFT		12		// log2(PGSIZE) + +#define PTXSHIFT	12		// offset of PTX in a linear address +#define PDXSHIFT	22		// offset of PDX in a linear address + +#define PGROUNDUP(sz)  (((sz)+PGSIZE-1) & ~(PGSIZE-1)) +#define PGROUNDDOWN(a) ((char*)((((unsigned int)a) & ~(PGSIZE-1)))) + +// Page table/directory entry flags. +#define PTE_P		0x001	// Present +#define PTE_W		0x002	// Writeable +#define PTE_U		0x004	// User +#define PTE_PWT		0x008	// Write-Through +#define PTE_PCD		0x010	// Cache-Disable +#define PTE_A		0x020	// Accessed +#define PTE_D		0x040	// Dirty +#define PTE_PS		0x080	// Page Size +#define PTE_MBZ		0x180	// Bits must be zero + +// Address in page table or page directory entry +#define PTE_ADDR(pte)	((uint) (pte) & ~0xFFF) + +typedef uint pte_t; + +// Control Register flags +#define CR0_PE		0x00000001	// Protection Enable +#define CR0_MP		0x00000002	// Monitor coProcessor +#define CR0_EM		0x00000004	// Emulation +#define CR0_TS		0x00000008	// Task Switched +#define CR0_ET		0x00000010	// Extension Type +#define CR0_NE		0x00000020	// Numeric Errror +#define CR0_WP		0x00010000	// Write Protect +#define CR0_AM		0x00040000	// Alignment Mask +#define CR0_NW		0x20000000	// Not Writethrough +#define CR0_CD		0x40000000	// Cache Disable +#define CR0_PG		0x80000000	// Paging + +  // PAGEBREAK: 40  // Task state segment format  struct taskstate { @@ -39,6 +39,7 @@ mpsearch1(uchar *addr, int len)  {    uchar *e, *p; +  cprintf("mpsearch1 0x%x %d\n", addr, len);    e = addr+len;    for(p = addr; p < e; p += sizeof(struct mp))      if(memcmp(p, "_MP_", 4) == 0 && sum(p, sizeof(struct mp)) == 0) @@ -1,5 +1,5 @@  #define NPROC        64  // maximum number of processes -#define PAGE       4096  // granularity of user-space memory allocation +#define PAGE       4096  // conveniently chosen to be equal to PGSIZE  #define KSTACKSIZE PAGE  // size of per-process kernel stack  #define NCPU          8  // maximum number of CPUs  #define NOFILE       16  // open files per process @@ -60,39 +60,6 @@ procdump(void)    }  } -// Set up CPU's kernel segment descriptors. -// Run once at boot time on each CPU. -void -ksegment(void) -{ -  struct cpu *c; - -  c = &cpus[cpunum()]; -  c->gdt[SEG_KCODE] = SEG(STA_X|STA_R, 0, 0x100000 + 64*1024-1, 0); -  c->gdt[SEG_KDATA] = SEG(STA_W, 0, 0xffffffff, 0); -  c->gdt[SEG_KCPU] = SEG(STA_W, &c->cpu, 8, 0); -  lgdt(c->gdt, sizeof(c->gdt)); -  loadgs(SEG_KCPU << 3); -   -  // Initialize cpu-local storage. -  cpu = c; -  proc = 0; -} - -// Set up CPU's segment descriptors and current process task state. -void -usegment(void) -{ -  pushcli(); -  cpu->gdt[SEG_UCODE] = SEG(STA_X|STA_R, proc->mem, proc->sz-1, DPL_USER); -  cpu->gdt[SEG_UDATA] = SEG(STA_W, proc->mem, proc->sz-1, DPL_USER); -  cpu->gdt[SEG_TSS] = SEG16(STS_T32A, &cpu->ts, sizeof(cpu->ts)-1, 0); -  cpu->gdt[SEG_TSS].s = 0; -  cpu->ts.ss0 = SEG_KDATA << 3; -  cpu->ts.esp0 = (uint)proc->kstack + KSTACKSIZE; -  ltr(SEG_TSS << 3); -  popcli(); -}  //PAGEBREAK: 32  // Look in the process table for an UNUSED proc. @@ -149,20 +116,19 @@ userinit(void)    p = allocproc();    initproc = p; - -  // Initialize memory from initcode.S -  p->sz = PAGE; -  p->mem = kalloc(p->sz); -  memset(p->mem, 0, p->sz); -  memmove(p->mem, _binary_initcode_start, (int)_binary_initcode_size); - +  if (!(p->pgdir = setupkvm())) +    panic("userinit: out of memory?"); +  if (!allocuvm(p->pgdir, 0x0, (int)_binary_initcode_size)) +    panic("userinit: out of memory?"); +  inituvm(p->pgdir, 0x0, _binary_initcode_start, (int)_binary_initcode_size); +  p->sz = PGROUNDUP((int)_binary_initcode_size);    memset(p->tf, 0, sizeof(*p->tf));    p->tf->cs = (SEG_UCODE << 3) | DPL_USER;    p->tf->ds = (SEG_UDATA << 3) | DPL_USER;    p->tf->es = p->tf->ds;    p->tf->ss = p->tf->ds;    p->tf->eflags = FL_IF; -  p->tf->esp = p->sz; +  p->tf->esp = PGSIZE;    p->tf->eip = 0;  // beginning of initcode.S    safestrcpy(p->name, "initcode", sizeof(p->name)); @@ -176,17 +142,15 @@ userinit(void)  int  growproc(int n)  { -  char *newmem; - -  newmem = kalloc(proc->sz + n); -  if(newmem == 0) -    return -1; -  memmove(newmem, proc->mem, proc->sz); -  memset(newmem + proc->sz, 0, n); -  kfree(proc->mem, proc->sz); -  proc->mem = newmem; +  if(n > 0){ +    if (!allocuvm(proc->pgdir, (char *)proc->sz, n)) +      return -1; +  } else if(n < 0){ +    if (!deallocuvm(proc->pgdir, (char *)(proc->sz + n), 0 - n)) +      return -1; +  }    proc->sz += n; -  usegment(); +  switchuvm(proc);    return 0;  } @@ -204,14 +168,13 @@ fork(void)      return -1;    // Copy process state from p. -  np->sz = proc->sz; -  if((np->mem = kalloc(np->sz)) == 0){ +  if (!(np->pgdir = copyuvm(proc->pgdir, proc->sz))) {      kfree(np->kstack, KSTACKSIZE);      np->kstack = 0;      np->state = UNUSED;      return -1;    } -  memmove(np->mem, proc->mem, np->sz); +  np->sz = proc->sz;    np->parent = proc;    *np->tf = *proc->tf; @@ -225,7 +188,7 @@ fork(void)    pid = np->pid;    np->state = RUNNABLE; - +  safestrcpy(np->name, proc->name, sizeof(proc->name));    return pid;  } @@ -256,9 +219,10 @@ scheduler(void)        // to release ptable.lock and then reacquire it        // before jumping back to us.        proc = p; -      usegment(); +      switchuvm(p);        p->state = RUNNING;        swtch(&cpu->scheduler, proc->context); +      switchkvm();        // Process is done running for now.        // It should have changed its p->state before coming back. @@ -284,7 +248,6 @@ sched(void)      panic("sched running");    if(readeflags()&FL_IF)      panic("sched interruptible"); -    intena = cpu->intena;    swtch(&proc->context, cpu->scheduler);    cpu->intena = intena; @@ -455,8 +418,9 @@ wait(void)        if(p->state == ZOMBIE){          // Found one.          pid = p->pid; -        kfree(p->mem, p->sz);          kfree(p->kstack, KSTACKSIZE); +        p->kstack = 0; +        freevm(p->pgdir);          p->state = UNUSED;          p->pid = 0;          p->parent = 0; @@ -3,8 +3,8 @@  #define SEG_KCODE 1  // kernel code  #define SEG_KDATA 2  // kernel data+stack  #define SEG_KCPU  3  // kernel per-cpu data -#define SEG_UCODE 4 -#define SEG_UDATA 5 +#define SEG_UCODE 4  // user code +#define SEG_UDATA 5  // user data+stack  #define SEG_TSS   6  // this process's task state  #define NSEGS     7 @@ -16,7 +16,7 @@  // Contexts are stored at the bottom of the stack they  // describe; the stack pointer is the address of the context.  // The layout of the context matches the layout of the stack in swtch.S -// at "Switch stacks" comment. Switch itself doesn't save eip explicitly, +// at the "Switch stacks" comment. Switch doesn't save eip explicitly,  // but it is on the stack and allocproc() manipulates it.  struct context {    uint edi; @@ -30,8 +30,8 @@ enum procstate { UNUSED, EMBRYO, SLEEPING, RUNNABLE, RUNNING, ZOMBIE };  // Per-process state  struct proc { -  char *mem;                   // Start of process memory (kernel address)    uint sz;                     // Size of process memory (bytes) +  pde_t* pgdir;                // Linear address of proc's pgdir    char *kstack;                // Bottom of kernel stack for this process    enum procstate state;        // Process state    volatile int pid;            // Process ID @@ -48,6 +48,7 @@ struct proc {  // Process memory is laid out contiguously, low addresses first:  //   text  //   original data and bss +//   invalid page  //   fixed-size stack  //   expandable heap diff --git a/runoff.list b/runoff.list index 6bbd386..f39ce18 100644 --- a/runoff.list +++ b/runoff.list @@ -21,7 +21,9 @@ spinlock.c  proc.h  proc.c  swtch.S +vm.c  kalloc.c +vm.c  # system calls  traps.h diff --git a/runoff.spec b/runoff.spec index dbd6d5c..e4cfd42 100644 --- a/runoff.spec +++ b/runoff.spec @@ -27,6 +27,7 @@ even: proc.h  # mild preference  right: proc.c   # VERY important  # setjmp.S either +# vm.c either  # kalloc.c either  # syscall.h either @@ -330,7 +330,7 @@ parsecmd(char *s)  {    char *es;    struct cmd *cmd; -   +    es = s + strlen(s);    cmd = parseline(&s, es);    peek(&s, es, ""); @@ -363,7 +363,7 @@ struct cmd*  parsepipe(char **ps, char *es)  {    struct cmd *cmd; -   +    cmd = parseexec(ps, es);    if(peek(ps, es, "|")){      gettoken(ps, es, 0, 0); @@ -71,7 +71,7 @@ getcallerpcs(void *v, uint pcs[])    ebp = (uint*)v - 2;    for(i = 0; i < 10; i++){ -    if(ebp == 0 || ebp == (uint*)0xffffffff) +    if(ebp == 0 || ebp < (uint *) 0x100000 || ebp == (uint*)0xffffffff)        break;      pcs[i] = ebp[1];     // saved %eip      ebp = (uint*)ebp[0]; // saved %ebp @@ -18,10 +18,12 @@ fetchint(struct proc *p, uint addr, int *ip)  {    if(addr >= p->sz || addr+4 > p->sz)      return -1; -  *ip = *(int*)(p->mem + addr); +  *ip = *(int*)(addr);    return 0;  } +// XXX should we copy the string? +  // Fetch the nul-terminated string at addr from process p.  // Doesn't actually copy the string - just sets *pp to point at it.  // Returns length of string, not including nul. @@ -32,8 +34,8 @@ fetchstr(struct proc *p, uint addr, char **pp)    if(addr >= p->sz)      return -1; -  *pp = p->mem + addr; -  ep = p->mem + p->sz; +  *pp = (char *) addr; +  ep = (char *) p->sz;    for(s = *pp; s < ep; s++)      if(*s == 0)        return s - *pp; @@ -44,7 +46,8 @@ fetchstr(struct proc *p, uint addr, char **pp)  int  argint(int n, int *ip)  { -  return fetchint(proc, proc->tf->esp + 4 + 4*n, ip); +  int x = fetchint(proc, proc->tf->esp + 4 + 4*n, ip); +  return x;  }  // Fetch the nth word-sized system call argument as a pointer @@ -59,7 +62,8 @@ argptr(int n, char **pp, int size)      return -1;    if((uint)i >= proc->sz || (uint)i+size >= proc->sz)      return -1; -  *pp = proc->mem + i; +  // *pp = proc->mem + i;   // XXXXX +  *pp = (char *) i;   // XXXXX    return 0;  } @@ -96,6 +100,7 @@ extern int sys_sleep(void);  extern int sys_unlink(void);  extern int sys_wait(void);  extern int sys_write(void); +extern int sys_uptime(void);  static int (*syscalls[])(void) = {  [SYS_chdir]   sys_chdir, @@ -118,6 +123,7 @@ static int (*syscalls[])(void) = {  [SYS_unlink]  sys_unlink,  [SYS_wait]    sys_wait,  [SYS_write]   sys_write, +[SYS_uptime]  sys_uptime,  };  void @@ -19,3 +19,4 @@  #define SYS_getpid 18  #define SYS_sbrk   19  #define SYS_sleep  20 +#define SYS_uptime 21 @@ -264,7 +264,6 @@ sys_open(void)    if(argstr(0, &path) < 0 || argint(1, &omode) < 0)      return -1; -    if(omode & O_CREATE){      if((ip = create(path, T_FILE, 0, 0)) == 0)        return -1; @@ -291,7 +290,6 @@ sys_open(void)    f->off = 0;    f->readable = !(omode & O_WRONLY);    f->writable = (omode & O_WRONLY) || (omode & O_RDWR); -    return fd;  } @@ -350,8 +348,9 @@ sys_exec(void)    int i;    uint uargv, uarg; -  if(argstr(0, &path) < 0 || argint(1, (int*)&uargv) < 0) +  if(argstr(0, &path) < 0 || argint(1, (int*)&uargv) < 0) {      return -1; +  }    memset(argv, 0, sizeof(argv));    for(i=0;; i++){      if(i >= NELEM(argv)) @@ -57,7 +57,8 @@ sys_sbrk(void)  int  sys_sleep(void)  { -  int n, ticks0; +  int n; +  uint ticks0;    if(argint(0, &n) < 0)      return -1; @@ -73,3 +74,16 @@ sys_sleep(void)    release(&tickslock);    return 0;  } + +// return how many clock tick interrupts have occurred +// since boot. +int +sys_uptime(void) +{ +  uint xticks; +   +  acquire(&tickslock); +  xticks = ticks; +  release(&tickslock); +  return xticks; +} @@ -11,7 +11,7 @@  struct gatedesc idt[256];  extern uint vectors[];  // in vectors.S: array of 256 entry pointers  struct spinlock tickslock; -int ticks; +uint ticks;  void  tvinit(void) @@ -78,13 +78,14 @@ trap(struct trapframe *tf)    default:      if(proc == 0 || (tf->cs&3) == 0){        // In kernel, it must be our mistake. -      cprintf("unexpected trap %d from cpu %d eip %x\n", -              tf->trapno, cpu->id, tf->eip); +      cprintf("unexpected trap %d from cpu %d eip %x (cr2=0x%x)\n", +              tf->trapno, cpu->id, tf->eip, rcr2());        panic("trap");      }      // In user space, assume process misbehaved. -    cprintf("pid %d %s: trap %d err %d on cpu %d eip %x -- kill proc\n", -            proc->pid, proc->name, tf->trapno, tf->err, cpu->id, tf->eip); +    cprintf("pid %d %s: trap %d err %d on cpu %d eip 0x%x addr 0x%x--kill proc\n", +            proc->pid, proc->name, tf->trapno, tf->err, cpu->id, tf->eip,  +	    rcr2());      proc->killed = 1;    } @@ -24,7 +24,7 @@  // These are arbitrarily chosen, but with care not to overlap  // processor defined exceptions or interrupt vectors. -#define T_SYSCALL       64     // system call +#define T_SYSCALL       64      // system call  #define T_DEFAULT      500      // catchall  #define T_IRQ0          32      // IRQ 0 corresponds to int T_IRQ @@ -1,3 +1,4 @@  typedef unsigned int   uint;  typedef unsigned short ushort;  typedef unsigned char  uchar; +typedef uint pde_t; diff --git a/usertests.c b/usertests.c index cc2601c..670a4a8 100644 --- a/usertests.c +++ b/usertests.c @@ -322,8 +322,9 @@ void  mem(void)  {    void *m1, *m2; -  int pid; +  int pid, ppid; +  ppid = getpid();    if((pid = fork()) == 0){      m1 = 0;      while((m2 = malloc(10001)) != 0) { @@ -338,6 +339,7 @@ mem(void)      m1 = malloc(1024*20);      if(m1 == 0) {        printf(1, "couldn't allocate mem?!!\n"); +      kill(ppid);        exit();      }      free(m1); @@ -1229,6 +1231,136 @@ forktest(void)    printf(1, "fork test OK\n");  } +void +sbrktest(void) +{ +  int pid; +  char *oldbrk = sbrk(0); + +  printf(stdout, "sbrk test\n"); + +  // can one sbrk() less than a page? +  char *a = sbrk(0); +  int i; +  for(i = 0; i < 5000; i++){ +    char *b = sbrk(1); +    if(b != a){ +      printf(stdout, "sbrk test failed %d %x %x\n", i, a, b); +      exit(); +    } +    *b = 1; +    a = b + 1; +  } +  pid = fork(); +  if(pid < 0){ +    printf(stdout, "sbrk test fork failed\n"); +    exit(); +  } +  char *c = sbrk(1); +  c = sbrk(1); +  if(c != a + 1){ +    printf(stdout, "sbrk test failed post-fork\n"); +    exit(); +  } +  if(pid == 0) +    exit(); +  wait(); + +  // can one allocate the full 640K? +  a = sbrk(0); +  uint amt = (640 * 1024) - (uint) a; +  char *p = sbrk(amt); +  if(p != a){ +    printf(stdout, "sbrk test failed 640K test, p %x a %x\n", p, a); +    exit(); +  } +  char *lastaddr = (char *)(640 * 1024 - 1); +  *lastaddr = 99; + +  // is one forbidden from allocating more than 640K? +  c = sbrk(4096); +  if(c != (char *) 0xffffffff){ +    printf(stdout, "sbrk allocated more than 640K, c %x\n", c); +    exit(); +  } + +  // can one de-allocate? +  a = sbrk(0); +  c = sbrk(-4096); +  if(c == (char *) 0xffffffff){ +    printf(stdout, "sbrk could not deallocate\n"); +    exit(); +  } +  c = sbrk(0); +  if(c != a - 4096){ +    printf(stdout, "sbrk deallocation produced wrong address, a %x c %x\n", a, c); +    exit(); +  } + +  // can one re-allocate that page? +  a = sbrk(0); +  c = sbrk(4096); +  if(c != a || sbrk(0) != a + 4096){ +    printf(stdout, "sbrk re-allocation failed, a %x c %x\n", a, c); +    exit(); +  } +  if(*lastaddr == 99){ +    // should be zero +    printf(stdout, "sbrk de-allocation didn't really deallocate\n"); +    exit(); +  } + +  c = sbrk(4096); +  if(c != (char *) 0xffffffff){ +    printf(stdout, "sbrk was able to re-allocate beyond 640K, c %x\n", c); +    exit(); +  } + +  // can we read the kernel's memory? +  for(a = (char*)(640*1024); a < (char *)2000000; a += 50000){ +    int ppid = getpid(); +    int pid = fork(); +    if(pid < 0){ +      printf(stdout, "fork failed\n"); +      exit(); +    } +    if(pid == 0){ +      printf(stdout, "oops could read %x = %x\n", a, *a); +      kill(ppid); +      exit(); +    } +    wait(); +  } + +  if(sbrk(0) > oldbrk) +    sbrk(-(sbrk(0) - oldbrk)); + +  printf(stdout, "sbrk test OK\n"); +} + +void +stacktest(void) +{ +  printf(stdout, "stack test\n"); +  char dummy = 1; +  char *p = &dummy; +  int ppid = getpid(); +  int pid = fork(); +  if(pid < 0){ +    printf(stdout, "fork failed\n"); +    exit(); +  } +  if(pid == 0){ +    // should cause a trap: +    p[-4096] = 'z'; +    kill(ppid); +    printf(stdout, "stack test failed: page before stack was writeable\n"); +    exit(); +  } +  wait(); +  printf(stdout, "stack test OK\n"); +} +  int  main(int argc, char *argv[])  { @@ -1240,6 +1372,9 @@ main(int argc, char *argv[])    }    close(open("usertests.ran", O_CREATE)); +  stacktest(); +  sbrktest(); +    opentest();    writetest();    writetest1(); @@ -28,3 +28,4 @@ SYSCALL(dup)  SYSCALL(getpid)  SYSCALL(sbrk)  SYSCALL(sleep) +SYSCALL(uptime) @@ -0,0 +1,382 @@ +#include "param.h" +#include "types.h" +#include "defs.h" +#include "x86.h" +#include "mmu.h" +#include "proc.h" +#include "elf.h" + +// The mappings from logical to linear are one to one (i.e., +// segmentation doesn't do anything). +// There is one page table per process, plus one that's used +// when a CPU is not running any process (kpgdir). +// A user process uses the same page table as the kernel; the +// page protection bits prevent it from using anything other +// than its memory. +//  +// setupkvm() and exec() set up every page table like this: +//   0..640K          : user memory (text, data, stack, heap) +//   640K..1M         : mapped direct (for IO space) +//   1M..kernend      : mapped direct (for the kernel's text and data) +//   kernend..PHYSTOP : mapped direct (kernel heap and user pages) +//   0xfe000000..0    : mapped direct (devices such as ioapic) +// +// The kernel allocates memory for its heap and for user memory +// between kernend and the end of physical memory (PHYSTOP). +// The virtual address space of each user program includes the kernel +// (which is inaccessible in user mode).  The user program addresses +// range from 0 till 640KB (USERTOP), which where the I/O hole starts +// (both in physical memory and in the kernel's virtual address +// space). + +#define PHYSTOP  0x1000000 +#define USERTOP  0xA0000 + +static uint kerntext;  // Linker starts kernel at 1MB +static uint kerntsz;    +static uint kerndata; +static uint kerndsz; +static uint kernend; +static uint freesz; +static pde_t *kpgdir;  // for use in scheduler() + +// return the address of the PTE in page table pgdir +// that corresponds to linear address va.  if create!=0, +// create any required page table pages. +static pte_t * +walkpgdir(pde_t *pgdir, const void *va, int create) +{ +  uint r; +  pde_t *pde; +  pte_t *pgtab; + +  pde = &pgdir[PDX(va)]; +  if (*pde & PTE_P) { +    pgtab = (pte_t*) PTE_ADDR(*pde); +  } else if (!create || !(r = (uint) kalloc(PGSIZE))) +    return 0; +  else { +    pgtab = (pte_t*) r; + +    // Make sure all those PTE_P bits are zero. +    memset(pgtab, 0, PGSIZE); + +    // The permissions here are overly generous, but they can +    // be further restricted by the permissions in the page table  +    // entries, if necessary. +    *pde = PADDR(r) | PTE_P | PTE_W | PTE_U; +  } +  return &pgtab[PTX(va)]; +} + +// create PTEs for linear addresses starting at la that refer to +// physical addresses starting at pa. la and size might not +// be page-aligned. +static int +mappages(pde_t *pgdir, void *la, uint size, uint pa, int perm) +{ +  char *first = PGROUNDDOWN(la); +  char *last = PGROUNDDOWN(la + size - 1); +  char *a = first; +  while(1){ +    pte_t *pte = walkpgdir(pgdir, a, 1); +    if(pte == 0) +      return 0; +    if(*pte & PTE_P) +      panic("remap"); +    *pte = pa | perm | PTE_P; +    if(a == last) +      break; +    a += PGSIZE; +    pa += PGSIZE; +  } +  return 1; +} + +// Set up CPU's kernel segment descriptors. +// Run once at boot time on each CPU. +void +ksegment(void) +{ +  struct cpu *c; + +  // Map virtual addresses to linear addresses using identity map. +  // Cannot share a CODE descriptor for both kernel and user +  // because it would have to have DPL_USR, but the CPU forbids +  // an interrupt from CPL=0 to DPL=3. +  c = &cpus[cpunum()]; +  c->gdt[SEG_KCODE] = SEG(STA_X|STA_R, 0, 0xffffffff, 0); +  c->gdt[SEG_KDATA] = SEG(STA_W, 0, 0xffffffff, 0); +  c->gdt[SEG_UCODE] = SEG(STA_X|STA_R, 0, 0xffffffff, DPL_USER); +  c->gdt[SEG_UDATA] = SEG(STA_W, 0, 0xffffffff, DPL_USER); + +  // map cpu, and curproc +  c->gdt[SEG_KCPU] = SEG(STA_W, &c->cpu, 8, 0); + +  lgdt(c->gdt, sizeof(c->gdt)); +  loadgs(SEG_KCPU << 3); +   +  // Initialize cpu-local storage. +  cpu = c; +  proc = 0; +} + +// Switch h/w page table and TSS registers to point to process p. +void +switchuvm(struct proc *p) +{ +  pushcli(); + +  // Setup TSS +  cpu->gdt[SEG_TSS] = SEG16(STS_T32A, &cpu->ts, sizeof(cpu->ts)-1, 0); +  cpu->gdt[SEG_TSS].s = 0; +  cpu->ts.ss0 = SEG_KDATA << 3; +  cpu->ts.esp0 = (uint)proc->kstack + KSTACKSIZE; +  ltr(SEG_TSS << 3); + +  if (p->pgdir == 0) +    panic("switchuvm: no pgdir\n"); + +  lcr3(PADDR(p->pgdir));  // switch to new address space +  popcli(); +} + +// Switch h/w page table register to the kernel-only page table, for when +// no process is running. +void +switchkvm() +{ +  lcr3(PADDR(kpgdir));   // Switch to the kernel page table +} + +// Set up kernel part of a page table. +pde_t* +setupkvm(void) +{ +  pde_t *pgdir; + +  // Allocate page directory +  if (!(pgdir = (pde_t *) kalloc(PGSIZE))) +    return 0; +  memset(pgdir, 0, PGSIZE); +  // Map IO space from 640K to 1Mbyte +  if (!mappages(pgdir, (void *)USERTOP, 0x60000, USERTOP, PTE_W)) +    return 0; +  // Map kernel text read-only +  if (!mappages(pgdir, (void *) kerntext, kerntsz, kerntext, 0)) +    return 0; +  // Map kernel data read/write +  if (!mappages(pgdir, (void *) kerndata, kerndsz, kerndata, PTE_W)) +    return 0; +  // Map dynamically-allocated memory read/write (kernel stacks, user mem) +  if (!mappages(pgdir, (void *) kernend, freesz, PADDR(kernend), PTE_W)) +    return 0; +  // Map devices such as ioapic, lapic, ... +  if (!mappages(pgdir, (void *)0xFE000000, 0x2000000, 0xFE000000, PTE_W)) +    return 0; +  return pgdir; +} + +// return the physical address that a given user address +// maps to. the result is also a kernel logical address, +// since the kernel maps the physical memory allocated to user +// processes directly. +char* +uva2ka(pde_t *pgdir, char *uva) +{     +  pte_t *pte = walkpgdir(pgdir, uva, 0); +  if (pte == 0) return 0; +  uint pa = PTE_ADDR(*pte); +  return (char *)pa; +} + +// allocate sz bytes more memory for a process starting at the +// given user address; allocates physical memory and page +// table entries. addr and sz need not be page-aligned. +// it is a no-op for any parts of the requested memory +// that are already allocated. +int +allocuvm(pde_t *pgdir, char *addr, uint sz) +{ +  if (addr + sz > (char*)USERTOP) +    return 0; +  char *first = PGROUNDDOWN(addr); +  char *last = PGROUNDDOWN(addr + sz - 1); +  char *a; +  for(a = first; a <= last; a += PGSIZE){ +    pte_t *pte = walkpgdir(pgdir, a, 0); +    if(pte == 0 || (*pte & PTE_P) == 0){ +      char *mem = kalloc(PGSIZE); +      if(mem == 0){ +        // XXX clean up? +        return 0; +      } +      memset(mem, 0, PGSIZE); +      mappages(pgdir, a, PGSIZE, PADDR(mem), PTE_W|PTE_U); +    } +  } +  return 1; +} + +// deallocate some of the user pages, in response to sbrk() +// with a negative argument. if addr is not page-aligned, +// then only deallocates starting at the next page boundary. +int +deallocuvm(pde_t *pgdir, char *addr, uint sz) +{ +  if (addr + sz > (char*)USERTOP) +    return 0; +  char *first = (char*) PGROUNDUP((uint)addr); +  char *last = PGROUNDDOWN(addr + sz - 1); +  char *a; +  for(a = first; a <= last; a += PGSIZE){ +    pte_t *pte = walkpgdir(pgdir, a, 0); +    if(pte && (*pte & PTE_P) != 0){ +      uint pa = PTE_ADDR(*pte); +      if(pa == 0) +        panic("deallocuvm"); +      kfree((void *) pa, PGSIZE); +      *pte = 0; +    } +  } +  return 1; +} + +// free a page table and all the physical memory pages +// in the user part. +void +freevm(pde_t *pgdir) +{ +  uint i, j, da; + +  if (!pgdir) +    panic("freevm: no pgdir\n"); +  for (i = 0; i < NPDENTRIES; i++) { +    da = PTE_ADDR(pgdir[i]); +    if (da != 0) { +      pte_t *pgtab = (pte_t*) da; +      for (j = 0; j < NPTENTRIES; j++) { +	if (pgtab[j] != 0) { +	  uint pa = PTE_ADDR(pgtab[j]); +	  uint va = PGADDR(i, j, 0); +	  if (va < USERTOP)   // user memory +            kfree((void *) pa, PGSIZE); +	  pgtab[j] = 0; +	} +      } +      kfree((void *) da, PGSIZE); +      pgdir[i] = 0; +    } +  } +  kfree((void *) pgdir, PGSIZE); +} + +int +loaduvm(pde_t *pgdir, char *addr, struct inode *ip, uint offset, uint sz) +{ +  uint i, pa, n; +  pte_t *pte; + +  if ((uint)addr % PGSIZE != 0) +    panic("loaduvm: addr must be page aligned\n"); +  for (i = 0; i < sz; i += PGSIZE) { +    if (!(pte = walkpgdir(pgdir, addr+i, 0))) +      panic("loaduvm: address should exist\n"); +    pa = PTE_ADDR(*pte); +    if (sz - i < PGSIZE) n = sz - i; +    else n = PGSIZE; +    if(readi(ip, (char *)pa, offset+i, n) != n) +      return 0; +  } +  return 1; +} + +void +inituvm(pde_t *pgdir, char *addr, char *init, uint sz) +{ +  uint i, pa, n, off; +  pte_t *pte; + +  for (i = 0; i < sz; i += PGSIZE) { +    if (!(pte = walkpgdir(pgdir, (void *)(i+addr), 0))) +	panic("inituvm: pte should exist\n"); +    off = (i+(uint)addr) % PGSIZE; +    pa = PTE_ADDR(*pte); +    if (sz - i < PGSIZE) n = sz - i; +    else n = PGSIZE; +    memmove((char *)pa+off, init+i, n); +  } +} + +// given a parent process's page table, create a copy +// of it for a child. +pde_t* +copyuvm(pde_t *pgdir, uint sz) +{ +  pde_t *d = setupkvm(); +  pte_t *pte; +  uint pa, i; +  char *mem; + +  if (!d) return 0; +  for (i = 0; i < sz; i += PGSIZE) { +    if (!(pte = walkpgdir(pgdir, (void *)i, 0))) +      panic("copyuvm: pte should exist\n"); +    if(*pte & PTE_P){ +      pa = PTE_ADDR(*pte); +      if (!(mem = kalloc(PGSIZE))) +        return 0; +      memmove(mem, (char *)pa, PGSIZE); +      if (!mappages(d, (void *)i, PGSIZE, PADDR(mem), PTE_W|PTE_U)) +        return 0; +    } +  } +  return d; +} + +// Gather information about physical memory layout. +// Called once during boot. +// Really should find out how much physical memory +// there is rather than assuming PHYSTOP. +void +pminit(void) +{ +  extern char end[]; +  struct proghdr *ph; +  struct elfhdr *elf = (struct elfhdr*)0x10000;  // scratch space + +  if (elf->magic != ELF_MAGIC || elf->phnum != 2) +    panic("pminit: need a text and data segment\n"); + +  ph = (struct proghdr*)((uchar*)elf + elf->phoff); +  kernend = ((uint)end + PGSIZE) & ~(PGSIZE-1); +  kerntext = ph[0].va; +  kerndata = ph[1].va; +  kerntsz = ph[0].memsz; +  kerndsz = ph[1].memsz; +  freesz = PHYSTOP - kernend; + +  kinit((char *)kernend, freesz); +} + +// Allocate one page table for the machine for the kernel address +// space for scheduler processes. +void +kvmalloc(void) +{ +  kpgdir = setupkvm(); +} + +// Turn on paging. +void +vminit(void) +{ +  uint cr0; + +  lcr3(PADDR(kpgdir)); +  cr0 = rcr0(); +  cr0 |= CR0_PE|CR0_PG|CR0_AM|CR0_WP|CR0_NE|CR0_TS|CR0_EM|CR0_MP; +  cr0 &= ~(CR0_TS|CR0_EM); +  lcr0(cr0); +} + @@ -121,6 +121,66 @@ sti(void)    asm volatile("sti");  } +static inline void lcr0(uint val) +{ +  asm volatile("movl %0,%%cr0" : : "r" (val)); +} + +static inline uint rcr0(void) +{ +  uint val; +  asm volatile("movl %%cr0,%0" : "=r" (val)); +  return val; +} + +static inline uint rcr2(void) +{ +  uint val; +  asm volatile("movl %%cr2,%0" : "=r" (val)); +  return val; +} + +static inline void lcr3(uint val)  +{ +  asm volatile("movl %0,%%cr3" : : "r" (val)); +} + +static inline uint rcr3(void) +{ +  uint val; +  asm volatile("movl %%cr3,%0" : "=r" (val)); +  return val; +} + +static inline void lebp(uint val) +{ +  asm volatile("movl %0,%%ebp" : : "r" (val)); +} + +static inline uint rebp(void) +{ +  uint val; +  asm volatile("movl %%ebp,%0" : "=r" (val)); +  return val; +} + +static inline void lesp(uint val) +{ +  asm volatile("movl %0,%%esp" : : "r" (val)); +} + +static inline uint resp(void) +{ +  uint val; +  asm volatile("movl %%esp,%0" : "=r" (val)); +  return val; +} + +static inline void nop_pause(void) +{ +  asm volatile("pause" : :); +} +  //PAGEBREAK: 36  // Layout of the trap frame built on the stack by the  // hardware and by trapasm.S, and passed to trap().  | 
