diff options
-rw-r--r-- | Makefile | 7 | ||||
-rw-r--r-- | asm.h | 2 | ||||
-rw-r--r-- | bio.c | 1 | ||||
-rw-r--r-- | bootasm.S | 8 | ||||
-rw-r--r-- | bootother.S | 6 | ||||
-rw-r--r-- | defs.h | 24 | ||||
-rw-r--r-- | exec.c | 89 | ||||
-rw-r--r-- | file.c | 1 | ||||
-rw-r--r-- | forktest.c | 8 | ||||
-rw-r--r-- | ide.c | 3 | ||||
-rw-r--r-- | kalloc.c | 35 | ||||
-rw-r--r-- | lapic.c | 5 | ||||
-rw-r--r-- | main.c | 57 | ||||
-rw-r--r-- | mmu.h | 68 | ||||
-rw-r--r-- | mp.c | 1 | ||||
-rw-r--r-- | param.h | 2 | ||||
-rw-r--r-- | proc.c | 80 | ||||
-rw-r--r-- | proc.h | 9 | ||||
-rw-r--r-- | runoff.list | 2 | ||||
-rw-r--r-- | runoff.spec | 1 | ||||
-rw-r--r-- | sh.c | 4 | ||||
-rw-r--r-- | spinlock.c | 2 | ||||
-rw-r--r-- | syscall.c | 16 | ||||
-rw-r--r-- | syscall.h | 1 | ||||
-rw-r--r-- | sysfile.c | 5 | ||||
-rw-r--r-- | sysproc.c | 16 | ||||
-rw-r--r-- | trap.c | 11 | ||||
-rw-r--r-- | traps.h | 2 | ||||
-rw-r--r-- | types.h | 1 | ||||
-rw-r--r-- | usertests.c | 137 | ||||
-rw-r--r-- | usys.S | 1 | ||||
-rw-r--r-- | vm.c | 382 | ||||
-rw-r--r-- | x86.h | 60 |
33 files changed, 858 insertions, 189 deletions
@@ -25,19 +25,20 @@ OBJS = \ trap.o\ uart.o\ vectors.o\ + vm.o\ # Cross-compiling (e.g., on Mac OS X) -#TOOLPREFIX = i386-jos-elf- +TOOLPREFIX = i386-jos-elf- # Using native tools (e.g., on X86 Linux) -TOOLPREFIX = +#TOOLPREFIX = CC = $(TOOLPREFIX)gcc AS = $(TOOLPREFIX)gas LD = $(TOOLPREFIX)ld OBJCOPY = $(TOOLPREFIX)objcopy OBJDUMP = $(TOOLPREFIX)objdump -CFLAGS = -fno-pic -static -fno-builtin -fno-strict-aliasing -O2 -Wall -MD -ggdb -m32 +CFLAGS = -fno-pic -static -fno-builtin -fno-strict-aliasing -O2 -Wall -MD -ggdb -m32 -Werror CFLAGS += $(shell $(CC) -fno-stack-protector -E -x c /dev/null >/dev/null 2>&1 && echo -fno-stack-protector) ASFLAGS = -m32 -gdwarf-2 # FreeBSD ld wants ``elf_i386_fbsd'' @@ -6,6 +6,8 @@ .word 0, 0; \ .byte 0, 0, 0, 0 +// The 0xC0 means the limit is in 4096-byte units +// and (for executable segments) 32-bit mode. #define SEG_ASM(type,base,lim) \ .word (((lim) >> 12) & 0xffff), ((base) & 0xffff); \ .byte (((base) >> 16) & 0xff), (0x90 | (type)), \ @@ -109,7 +109,6 @@ bread(uint dev, uint sector) void bwrite(struct buf *b) { - cprintf("bwrite sector %d\n", b->sector); if((b->flags & B_BUSY) == 0) panic("bwrite"); b->flags |= B_DIRTY; @@ -51,8 +51,10 @@ seta20.2: orl $CR0_PE, %eax movl %eax, %cr0 - # Jump to next instruction, but in 32-bit code segment. - # Switches processor into 32-bit mode. + # This ljmp is how you load the CS (Code Segment) register. + # SEG_ASM produces segment descriptors with the 32-bit mode + # flag set (the D flag), so addresses and word operands will + # default to 32 bits after this jump. ljmp $(SEG_KCODE<<3), $start32 .code32 # Assemble for 32-bit mode @@ -88,5 +90,5 @@ gdt: SEG_ASM(STA_W, 0x0, 0xffffffff) # data seg gdtdesc: - .word (gdtdesc - gdt - 1) # sizeof(gdt) - 1 + .word (gdtdesc - gdt - 1) # sizeof(gdt) - 1 .long gdt # address gdt diff --git a/bootother.S b/bootother.S index 11d32f1..899669a 100644 --- a/bootother.S +++ b/bootother.S @@ -45,8 +45,10 @@ start: orl $CR0_PE, %eax movl %eax, %cr0 - # Jump to next instruction, but in 32-bit code segment. - # Switches processor into 32-bit mode. + # This ljmp is how you load the CS (Code Segment) register. + # SEG_ASM produces segment descriptors with the 32-bit mode + # flag set (the D flag), so addresses and word operands will + # default to 32 bits after this jump. ljmp $(SEG_KCODE<<3), $start32 .code32 # Assemble for 32-bit mode @@ -60,9 +60,10 @@ extern uchar ioapicid; void ioapicinit(void); // kalloc.c +extern int nfreemem; char* kalloc(int); void kfree(char*, int); -void kinit(void); +void kinit(char*,uint); // kbd.c void kbdintr(void); @@ -101,8 +102,6 @@ int kill(int); void pinit(void); void procdump(void); void scheduler(void) __attribute__((noreturn)); -void ksegment(void); -void usegment(void); void sleep(void*, struct spinlock*); void userinit(void); int wait(void); @@ -143,7 +142,7 @@ void timerinit(void); // trap.c void idtinit(void); -extern int ticks; +extern uint ticks; void tvinit(void); extern struct spinlock tickslock; @@ -152,6 +151,21 @@ void uartinit(void); void uartintr(void); void uartputc(int); +// vm.c +void pminit(void); +void ksegment(void); +void kvmalloc(void); +void vminit(void); +pde_t* setupkvm(void); +char* uva2ka(pde_t*, char*); +int allocuvm(pde_t*, char*, uint); +int deallocuvm(pde_t *pgdir, char *addr, uint sz); +void freevm(pde_t*); +void inituvm(pde_t*, char*, char*, uint); +int loaduvm(pde_t*, char*, struct inode *ip, uint, uint); +pde_t* copyuvm(pde_t*,uint); +void switchuvm(struct proc*); +void switchkvm(); + // number of elements in fixed-size array #define NELEM(x) (sizeof(x)/sizeof((x)[0])) - @@ -11,12 +11,13 @@ exec(char *path, char **argv) { char *mem, *s, *last; int i, argc, arglen, len, off; - uint sz, sp, argp; + uint sz, sp, spoffset, argp; struct elfhdr elf; struct inode *ip; struct proghdr ph; + pde_t *pgdir, *oldpgdir; - mem = 0; + pgdir = 0; sz = 0; if((ip = namei(path)) == 0) @@ -29,37 +30,8 @@ exec(char *path, char **argv) if(elf.magic != ELF_MAGIC) goto bad; - // Compute memory size of new process. - // Program segments. - for(i=0, off=elf.phoff; i<elf.phnum; i++, off+=sizeof(ph)){ - if(readi(ip, (char*)&ph, off, sizeof(ph)) != sizeof(ph)) - goto bad; - if(ph.type != ELF_PROG_LOAD) - continue; - if(ph.memsz < ph.filesz) - goto bad; - sz += ph.memsz; - } - - // Arguments. - arglen = 0; - for(argc=0; argv[argc]; argc++) - arglen += strlen(argv[argc]) + 1; - arglen = (arglen+3) & ~3; - sz += arglen; - sz += 4*(argc+1); // argv data - sz += 4; // argv - sz += 4; // argc - - // Stack. - sz += PAGE; - - // Allocate program memory. - sz = (sz+PAGE-1) & ~(PAGE-1); - mem = kalloc(sz); - if(mem == 0) + if (!(pgdir = setupkvm())) goto bad; - memset(mem, 0, sz); // Load program into memory. for(i=0, off=elf.phoff; i<elf.phnum; i++, off+=sizeof(ph)){ @@ -67,37 +39,51 @@ exec(char *path, char **argv) goto bad; if(ph.type != ELF_PROG_LOAD) continue; - if(ph.va + ph.memsz < ph.va || ph.va + ph.memsz > sz) - goto bad; if(ph.memsz < ph.filesz) goto bad; - if(readi(ip, mem + ph.va, ph.offset, ph.filesz) != ph.filesz) + if (!allocuvm(pgdir, (char *)ph.va, ph.memsz)) + goto bad; + if(ph.va + ph.memsz > sz) + sz = ph.va + ph.memsz; + if (!loaduvm(pgdir, (char *)ph.va, ip, ph.offset, ph.filesz)) goto bad; - memset(mem + ph.va + ph.filesz, 0, ph.memsz - ph.filesz); } iunlockput(ip); - - // Initialize stack. + + // Allocate and initialize stack at sz + sz = PGROUNDUP(sz); + sz += PGSIZE; // leave an invalid page + if (!allocuvm(pgdir, (char *)sz, PGSIZE)) + goto bad; + mem = uva2ka(pgdir, (char *)sz); + spoffset = sz; + sz += PGSIZE; + + arglen = 0; + for(argc=0; argv[argc]; argc++) + arglen += strlen(argv[argc]) + 1; + arglen = (arglen+3) & ~3; + sp = sz; argp = sz - arglen - 4*(argc+1); // Copy argv strings and pointers to stack. - *(uint*)(mem+argp + 4*argc) = 0; // argv[argc] + *(uint*)(mem+argp-spoffset + 4*argc) = 0; // argv[argc] for(i=argc-1; i>=0; i--){ len = strlen(argv[i]) + 1; sp -= len; - memmove(mem+sp, argv[i], len); - *(uint*)(mem+argp + 4*i) = sp; // argv[i] + memmove(mem+sp-spoffset, argv[i], len); + *(uint*)(mem+argp-spoffset + 4*i) = sp; // argv[i] } // Stack frame for main(argc, argv), below arguments. sp = argp; sp -= 4; - *(uint*)(mem+sp) = argp; + *(uint*)(mem+sp-spoffset) = argp; sp -= 4; - *(uint*)(mem+sp) = argc; + *(uint*)(mem+sp-spoffset) = argc; sp -= 4; - *(uint*)(mem+sp) = 0xffffffff; // fake return pc + *(uint*)(mem+sp-spoffset) = 0xffffffff; // fake return pc // Save program name for debugging. for(last=s=path; *s; s++) @@ -105,18 +91,21 @@ exec(char *path, char **argv) last = s+1; safestrcpy(proc->name, last, sizeof(proc->name)); - // Commit to the new image. - kfree(proc->mem, proc->sz); - proc->mem = mem; + // Commit to the user image. + oldpgdir = proc->pgdir; + proc->pgdir = pgdir; proc->sz = sz; proc->tf->eip = elf.entry; // main proc->tf->esp = sp; - usegment(); + + switchuvm(proc); + + freevm(oldpgdir); + return 0; bad: - if(mem) - kfree(mem, sz); + if (pgdir) freevm(pgdir); iunlockput(ip); return -1; } @@ -116,7 +116,6 @@ filewrite(struct file *f, char *addr, int n) return pipewrite(f->pipe, addr, n); if(f->type == FD_INODE){ ilock(f->ip); - cprintf("filewrite: %d\n", n); if((r = writei(f->ip, addr, f->off, n)) > 0) f->off += r; iunlock(f->ip); @@ -5,6 +5,8 @@ #include "stat.h" #include "user.h" +#define N 1000 + void printf(int fd, char *s, ...) { @@ -18,7 +20,7 @@ forktest(void) printf(1, "fork test\n"); - for(n=0; n<1000; n++){ + for(n=0; n<N; n++){ pid = fork(); if(pid < 0) break; @@ -26,8 +28,8 @@ forktest(void) exit(); } - if(n == 1000){ - printf(1, "fork claimed to work 1000 times!\n"); + if(n == N){ + printf(1, "fork claimed to work N times!\n", N); exit(); } @@ -147,8 +147,9 @@ iderw(struct buf *b) // Wait for request to finish. // Assuming will not sleep too long: ignore proc->killed. - while((b->flags & (B_VALID|B_DIRTY)) != B_VALID) + while((b->flags & (B_VALID|B_DIRTY)) != B_VALID) { sleep(b, &idelock); + } release(&idelock); } @@ -1,13 +1,13 @@ // Physical memory allocator, intended to allocate -// memory for user processes. Allocates in 4096-byte "pages". +// memory for user processes. Allocates in 4096-byte pages. // Free list is kept sorted and combines adjacent pages into // long runs, to make it easier to allocate big segments. -// One reason the page size is 4k is that the x86 segment size -// granularity is 4k. +// This combining is not useful now that xv6 uses paging. #include "types.h" #include "defs.h" #include "param.h" +#include "mmu.h" #include "spinlock.h" struct run { @@ -20,21 +20,14 @@ struct { struct run *freelist; } kmem; +int nfreemem; + // Initialize free list of physical pages. -// This code cheats by just considering one megabyte of -// pages after end. Real systems would determine the -// amount of memory available in the system and use it all. void -kinit(void) +kinit(char *p, uint len) { - extern char end[]; - uint len; - char *p; - initlock(&kmem.lock, "kmem"); - p = (char*)(((uint)end + PAGE) & ~(PAGE-1)); - len = 256*PAGE; // assume computer has 256 pages of RAM, 1 MB - cprintf("mem = %d\n", len); + nfreemem = 0; kfree(p, len); } @@ -47,19 +40,23 @@ kfree(char *v, int len) { struct run *r, *rend, **rp, *p, *pend; - if(len <= 0 || len % PAGE) + if(len <= 0 || len % PGSIZE) panic("kfree"); // Fill with junk to catch dangling refs. memset(v, 1, len); acquire(&kmem.lock); + nfreemem += len; p = (struct run*)v; pend = (struct run*)(v + len); for(rp=&kmem.freelist; (r=*rp) != 0 && r <= pend; rp=&r->next){ rend = (struct run*)((char*)r + r->len); - if(r <= p && p < rend) + if(r <= p && p < rend) { + cprintf("freeing a free page: r = 0x%x p = 0x%x rend = 0x%x\n", + r, p, rend); panic("freeing free page"); + } if(rend == p){ // r before p: expand r to include p r->len += len; if(r->next && r->next == pend){ // r now next to r->next? @@ -93,7 +90,7 @@ kalloc(int n) char *p; struct run *r, **rp; - if(n % PAGE || n <= 0) + if(n % PGSIZE || n <= 0) panic("kalloc"); acquire(&kmem.lock); @@ -103,12 +100,12 @@ kalloc(int n) p = (char*)r + r->len; if(r->len == 0) *rp = r->next; + nfreemem -= n; release(&kmem.lock); return p; } } release(&kmem.lock); - - cprintf("kalloc: out of memory\n"); return 0; } + @@ -20,8 +20,11 @@ #define STARTUP 0x00000600 // Startup IPI #define DELIVS 0x00001000 // Delivery status #define ASSERT 0x00004000 // Assert interrupt (vs deassert) + #define DEASSERT 0x00000000 #define LEVEL 0x00008000 // Level triggered #define BCAST 0x00080000 // Send to all APICs, including self. + #define BUSY 0x00001000 + #define FIXED 0x00000000 #define ICRHI (0x0310/4) // Interrupt Command [63:32] #define TIMER (0x0320/4) // Local Vector Table 0 (TIMER) #define X1 0x0000000B // divide counts by 1 @@ -48,6 +51,7 @@ lapicw(int index, int value) void lapicinit(int c) { + cprintf("lapicinit: %d 0x%x\n", c, lapic); if(!lapic) return; @@ -126,7 +130,6 @@ microdelay(int us) { } - #define IO_RTC 0x70 // Start additional processor running bootstrap code at addr. @@ -6,23 +6,42 @@ #include "x86.h" static void bootothers(void); -static void mpmain(void) __attribute__((noreturn)); +static void mpmain(void); +void jkstack(void) __attribute__((noreturn)); +void mainc(void); // Bootstrap processor starts running C code here. int main(void) { - mpinit(); // collect info about this machine + mpinit(); // collect info about this machine lapicinit(mpbcpu()); - ksegment(); + ksegment(); // set up segments picinit(); // interrupt controller ioapicinit(); // another interrupt controller consoleinit(); // I/O devices & their interrupts uartinit(); // serial port -cprintf("cpus %p cpu %p\n", cpus, cpu); - cprintf("\ncpu%d: starting xv6\n\n", cpu->id); + pminit(); // discover how much memory there is + jkstack(); // call mainc() on a properly-allocated stack +} + +void +jkstack(void) +{ + char *kstack = kalloc(PGSIZE); + if (!kstack) + panic("jkstack\n"); + char *top = kstack + PGSIZE; + asm volatile("movl %0,%%esp" : : "r" (top)); + asm volatile("call mainc"); + panic("jkstack"); +} - kinit(); // physical memory allocator +void +mainc(void) +{ + cprintf("\ncpu%d: starting xv6\n\n", cpu->id); + kvmalloc(); // initialze the kernel page table pinit(); // process table tvinit(); // trap vectors binit(); // buffer cache @@ -38,20 +57,21 @@ cprintf("cpus %p cpu %p\n", cpus, cpu); mpmain(); } -// Bootstrap processor gets here after setting up the hardware. -// Additional processors start here. +// Common CPU setup code. +// Bootstrap CPU comes here from mainc(). +// Other CPUs jump here from bootother.S. static void mpmain(void) { - if(cpunum() != mpbcpu()) + if(cpunum() != mpbcpu()) { + ksegment(); lapicinit(cpunum()); - ksegment(); - cprintf("cpu%d: mpmain\n", cpu->id); - idtinit(); + } + vminit(); // turn on paging + cprintf("cpu%d: starting\n", cpu->id); + idtinit(); // load idt register xchg(&cpu->booted, 1); - - cprintf("cpu%d: scheduling\n", cpu->id); - scheduler(); + scheduler(); // start running processes } static void @@ -62,8 +82,9 @@ bootothers(void) struct cpu *c; char *stack; - // Write bootstrap code to unused memory at 0x7000. - code = (uchar*)0x7000; + // Write bootstrap code to unused memory at 0x7000. The linker has + // placed the start of bootother.S there. + code = (uchar *) 0x7000; memmove(code, _binary_bootother_start, (uint)_binary_bootother_size); for(c = cpus; c < cpus+ncpu; c++){ @@ -76,7 +97,7 @@ bootothers(void) *(void**)(code-8) = mpmain; lapicstartap(c->id, (uint)code); - // Wait for cpu to get through bootstrap. + // Wait for cpu to finish mpmain() while(c->booted == 0) ; } @@ -62,6 +62,8 @@ struct segdesc { #define STA_R 0x2 // Readable (executable segments) #define STA_A 0x1 // Accessed +// + // System segment type bits #define STS_T16A 0x1 // Available 16-bit TSS #define STS_LDT 0x2 // Local Descriptor Table @@ -76,6 +78,72 @@ struct segdesc { #define STS_IG32 0xE // 32-bit Interrupt Gate #define STS_TG32 0xF // 32-bit Trap Gate + +// A linear address 'la' has a three-part structure as follows: +// +// +--------10------+-------10-------+---------12----------+ +// | Page Directory | Page Table | Offset within Page | +// | Index | Index | | +// +----------------+----------------+---------------------+ +// \--- PDX(la) --/ \--- PTX(la) --/ + +// page directory index +#define PDX(la) ((((uint) (la)) >> PDXSHIFT) & 0x3FF) + +// page table index +#define PTX(la) ((((uint) (la)) >> PTXSHIFT) & 0x3FF) + +// construct linear address from indexes and offset +#define PGADDR(d, t, o) ((uint) ((d) << PDXSHIFT | (t) << PTXSHIFT | (o))) + +// turn a kernel linear address into a physical address. +// all of the kernel data structures have linear and +// physical addresses that are equal. +#define PADDR(a) ((uint) a) + +// Page directory and page table constants. +#define NPDENTRIES 1024 // page directory entries per page directory +#define NPTENTRIES 1024 // page table entries per page table + +#define PGSIZE 4096 // bytes mapped by a page +#define PGSHIFT 12 // log2(PGSIZE) + +#define PTXSHIFT 12 // offset of PTX in a linear address +#define PDXSHIFT 22 // offset of PDX in a linear address + +#define PGROUNDUP(sz) (((sz)+PGSIZE-1) & ~(PGSIZE-1)) +#define PGROUNDDOWN(a) ((char*)((((unsigned int)a) & ~(PGSIZE-1)))) + +// Page table/directory entry flags. +#define PTE_P 0x001 // Present +#define PTE_W 0x002 // Writeable +#define PTE_U 0x004 // User +#define PTE_PWT 0x008 // Write-Through +#define PTE_PCD 0x010 // Cache-Disable +#define PTE_A 0x020 // Accessed +#define PTE_D 0x040 // Dirty +#define PTE_PS 0x080 // Page Size +#define PTE_MBZ 0x180 // Bits must be zero + +// Address in page table or page directory entry +#define PTE_ADDR(pte) ((uint) (pte) & ~0xFFF) + +typedef uint pte_t; + +// Control Register flags +#define CR0_PE 0x00000001 // Protection Enable +#define CR0_MP 0x00000002 // Monitor coProcessor +#define CR0_EM 0x00000004 // Emulation +#define CR0_TS 0x00000008 // Task Switched +#define CR0_ET 0x00000010 // Extension Type +#define CR0_NE 0x00000020 // Numeric Errror +#define CR0_WP 0x00010000 // Write Protect +#define CR0_AM 0x00040000 // Alignment Mask +#define CR0_NW 0x20000000 // Not Writethrough +#define CR0_CD 0x40000000 // Cache Disable +#define CR0_PG 0x80000000 // Paging + + // PAGEBREAK: 40 // Task state segment format struct taskstate { @@ -39,6 +39,7 @@ mpsearch1(uchar *addr, int len) { uchar *e, *p; + cprintf("mpsearch1 0x%x %d\n", addr, len); e = addr+len; for(p = addr; p < e; p += sizeof(struct mp)) if(memcmp(p, "_MP_", 4) == 0 && sum(p, sizeof(struct mp)) == 0) @@ -1,5 +1,5 @@ #define NPROC 64 // maximum number of processes -#define PAGE 4096 // granularity of user-space memory allocation +#define PAGE 4096 // conveniently chosen to be equal to PGSIZE #define KSTACKSIZE PAGE // size of per-process kernel stack #define NCPU 8 // maximum number of CPUs #define NOFILE 16 // open files per process @@ -60,39 +60,6 @@ procdump(void) } } -// Set up CPU's kernel segment descriptors. -// Run once at boot time on each CPU. -void -ksegment(void) -{ - struct cpu *c; - - c = &cpus[cpunum()]; - c->gdt[SEG_KCODE] = SEG(STA_X|STA_R, 0, 0x100000 + 64*1024-1, 0); - c->gdt[SEG_KDATA] = SEG(STA_W, 0, 0xffffffff, 0); - c->gdt[SEG_KCPU] = SEG(STA_W, &c->cpu, 8, 0); - lgdt(c->gdt, sizeof(c->gdt)); - loadgs(SEG_KCPU << 3); - - // Initialize cpu-local storage. - cpu = c; - proc = 0; -} - -// Set up CPU's segment descriptors and current process task state. -void -usegment(void) -{ - pushcli(); - cpu->gdt[SEG_UCODE] = SEG(STA_X|STA_R, proc->mem, proc->sz-1, DPL_USER); - cpu->gdt[SEG_UDATA] = SEG(STA_W, proc->mem, proc->sz-1, DPL_USER); - cpu->gdt[SEG_TSS] = SEG16(STS_T32A, &cpu->ts, sizeof(cpu->ts)-1, 0); - cpu->gdt[SEG_TSS].s = 0; - cpu->ts.ss0 = SEG_KDATA << 3; - cpu->ts.esp0 = (uint)proc->kstack + KSTACKSIZE; - ltr(SEG_TSS << 3); - popcli(); -} //PAGEBREAK: 32 // Look in the process table for an UNUSED proc. @@ -149,20 +116,19 @@ userinit(void) p = allocproc(); initproc = p; - - // Initialize memory from initcode.S - p->sz = PAGE; - p->mem = kalloc(p->sz); - memset(p->mem, 0, p->sz); - memmove(p->mem, _binary_initcode_start, (int)_binary_initcode_size); - + if (!(p->pgdir = setupkvm())) + panic("userinit: out of memory?"); + if (!allocuvm(p->pgdir, 0x0, (int)_binary_initcode_size)) + panic("userinit: out of memory?"); + inituvm(p->pgdir, 0x0, _binary_initcode_start, (int)_binary_initcode_size); + p->sz = PGROUNDUP((int)_binary_initcode_size); memset(p->tf, 0, sizeof(*p->tf)); p->tf->cs = (SEG_UCODE << 3) | DPL_USER; p->tf->ds = (SEG_UDATA << 3) | DPL_USER; p->tf->es = p->tf->ds; p->tf->ss = p->tf->ds; p->tf->eflags = FL_IF; - p->tf->esp = p->sz; + p->tf->esp = PGSIZE; p->tf->eip = 0; // beginning of initcode.S safestrcpy(p->name, "initcode", sizeof(p->name)); @@ -176,17 +142,15 @@ userinit(void) int growproc(int n) { - char *newmem; - - newmem = kalloc(proc->sz + n); - if(newmem == 0) - return -1; - memmove(newmem, proc->mem, proc->sz); - memset(newmem + proc->sz, 0, n); - kfree(proc->mem, proc->sz); - proc->mem = newmem; + if(n > 0){ + if (!allocuvm(proc->pgdir, (char *)proc->sz, n)) + return -1; + } else if(n < 0){ + if (!deallocuvm(proc->pgdir, (char *)(proc->sz + n), 0 - n)) + return -1; + } proc->sz += n; - usegment(); + switchuvm(proc); return 0; } @@ -204,14 +168,13 @@ fork(void) return -1; // Copy process state from p. - np->sz = proc->sz; - if((np->mem = kalloc(np->sz)) == 0){ + if (!(np->pgdir = copyuvm(proc->pgdir, proc->sz))) { kfree(np->kstack, KSTACKSIZE); np->kstack = 0; np->state = UNUSED; return -1; } - memmove(np->mem, proc->mem, np->sz); + np->sz = proc->sz; np->parent = proc; *np->tf = *proc->tf; @@ -225,7 +188,7 @@ fork(void) pid = np->pid; np->state = RUNNABLE; - + safestrcpy(np->name, proc->name, sizeof(proc->name)); return pid; } @@ -256,9 +219,10 @@ scheduler(void) // to release ptable.lock and then reacquire it // before jumping back to us. proc = p; - usegment(); + switchuvm(p); p->state = RUNNING; swtch(&cpu->scheduler, proc->context); + switchkvm(); // Process is done running for now. // It should have changed its p->state before coming back. @@ -284,7 +248,6 @@ sched(void) panic("sched running"); if(readeflags()&FL_IF) panic("sched interruptible"); - intena = cpu->intena; swtch(&proc->context, cpu->scheduler); cpu->intena = intena; @@ -455,8 +418,9 @@ wait(void) if(p->state == ZOMBIE){ // Found one. pid = p->pid; - kfree(p->mem, p->sz); kfree(p->kstack, KSTACKSIZE); + p->kstack = 0; + freevm(p->pgdir); p->state = UNUSED; p->pid = 0; p->parent = 0; @@ -3,8 +3,8 @@ #define SEG_KCODE 1 // kernel code #define SEG_KDATA 2 // kernel data+stack #define SEG_KCPU 3 // kernel per-cpu data -#define SEG_UCODE 4 -#define SEG_UDATA 5 +#define SEG_UCODE 4 // user code +#define SEG_UDATA 5 // user data+stack #define SEG_TSS 6 // this process's task state #define NSEGS 7 @@ -16,7 +16,7 @@ // Contexts are stored at the bottom of the stack they // describe; the stack pointer is the address of the context. // The layout of the context matches the layout of the stack in swtch.S -// at "Switch stacks" comment. Switch itself doesn't save eip explicitly, +// at the "Switch stacks" comment. Switch doesn't save eip explicitly, // but it is on the stack and allocproc() manipulates it. struct context { uint edi; @@ -30,8 +30,8 @@ enum procstate { UNUSED, EMBRYO, SLEEPING, RUNNABLE, RUNNING, ZOMBIE }; // Per-process state struct proc { - char *mem; // Start of process memory (kernel address) uint sz; // Size of process memory (bytes) + pde_t* pgdir; // Linear address of proc's pgdir char *kstack; // Bottom of kernel stack for this process enum procstate state; // Process state volatile int pid; // Process ID @@ -48,6 +48,7 @@ struct proc { // Process memory is laid out contiguously, low addresses first: // text // original data and bss +// invalid page // fixed-size stack // expandable heap diff --git a/runoff.list b/runoff.list index 6bbd386..f39ce18 100644 --- a/runoff.list +++ b/runoff.list @@ -21,7 +21,9 @@ spinlock.c proc.h proc.c swtch.S +vm.c kalloc.c +vm.c # system calls traps.h diff --git a/runoff.spec b/runoff.spec index dbd6d5c..e4cfd42 100644 --- a/runoff.spec +++ b/runoff.spec @@ -27,6 +27,7 @@ even: proc.h # mild preference right: proc.c # VERY important # setjmp.S either +# vm.c either # kalloc.c either # syscall.h either @@ -330,7 +330,7 @@ parsecmd(char *s) { char *es; struct cmd *cmd; - + es = s + strlen(s); cmd = parseline(&s, es); peek(&s, es, ""); @@ -363,7 +363,7 @@ struct cmd* parsepipe(char **ps, char *es) { struct cmd *cmd; - + cmd = parseexec(ps, es); if(peek(ps, es, "|")){ gettoken(ps, es, 0, 0); @@ -71,7 +71,7 @@ getcallerpcs(void *v, uint pcs[]) ebp = (uint*)v - 2; for(i = 0; i < 10; i++){ - if(ebp == 0 || ebp == (uint*)0xffffffff) + if(ebp == 0 || ebp < (uint *) 0x100000 || ebp == (uint*)0xffffffff) break; pcs[i] = ebp[1]; // saved %eip ebp = (uint*)ebp[0]; // saved %ebp @@ -18,10 +18,12 @@ fetchint(struct proc *p, uint addr, int *ip) { if(addr >= p->sz || addr+4 > p->sz) return -1; - *ip = *(int*)(p->mem + addr); + *ip = *(int*)(addr); return 0; } +// XXX should we copy the string? + // Fetch the nul-terminated string at addr from process p. // Doesn't actually copy the string - just sets *pp to point at it. // Returns length of string, not including nul. @@ -32,8 +34,8 @@ fetchstr(struct proc *p, uint addr, char **pp) if(addr >= p->sz) return -1; - *pp = p->mem + addr; - ep = p->mem + p->sz; + *pp = (char *) addr; + ep = (char *) p->sz; for(s = *pp; s < ep; s++) if(*s == 0) return s - *pp; @@ -44,7 +46,8 @@ fetchstr(struct proc *p, uint addr, char **pp) int argint(int n, int *ip) { - return fetchint(proc, proc->tf->esp + 4 + 4*n, ip); + int x = fetchint(proc, proc->tf->esp + 4 + 4*n, ip); + return x; } // Fetch the nth word-sized system call argument as a pointer @@ -59,7 +62,8 @@ argptr(int n, char **pp, int size) return -1; if((uint)i >= proc->sz || (uint)i+size >= proc->sz) return -1; - *pp = proc->mem + i; + // *pp = proc->mem + i; // XXXXX + *pp = (char *) i; // XXXXX return 0; } @@ -96,6 +100,7 @@ extern int sys_sleep(void); extern int sys_unlink(void); extern int sys_wait(void); extern int sys_write(void); +extern int sys_uptime(void); static int (*syscalls[])(void) = { [SYS_chdir] sys_chdir, @@ -118,6 +123,7 @@ static int (*syscalls[])(void) = { [SYS_unlink] sys_unlink, [SYS_wait] sys_wait, [SYS_write] sys_write, +[SYS_uptime] sys_uptime, }; void @@ -19,3 +19,4 @@ #define SYS_getpid 18 #define SYS_sbrk 19 #define SYS_sleep 20 +#define SYS_uptime 21 @@ -264,7 +264,6 @@ sys_open(void) if(argstr(0, &path) < 0 || argint(1, &omode) < 0) return -1; - if(omode & O_CREATE){ if((ip = create(path, T_FILE, 0, 0)) == 0) return -1; @@ -291,7 +290,6 @@ sys_open(void) f->off = 0; f->readable = !(omode & O_WRONLY); f->writable = (omode & O_WRONLY) || (omode & O_RDWR); - return fd; } @@ -350,8 +348,9 @@ sys_exec(void) int i; uint uargv, uarg; - if(argstr(0, &path) < 0 || argint(1, (int*)&uargv) < 0) + if(argstr(0, &path) < 0 || argint(1, (int*)&uargv) < 0) { return -1; + } memset(argv, 0, sizeof(argv)); for(i=0;; i++){ if(i >= NELEM(argv)) @@ -57,7 +57,8 @@ sys_sbrk(void) int sys_sleep(void) { - int n, ticks0; + int n; + uint ticks0; if(argint(0, &n) < 0) return -1; @@ -73,3 +74,16 @@ sys_sleep(void) release(&tickslock); return 0; } + +// return how many clock tick interrupts have occurred +// since boot. +int +sys_uptime(void) +{ + uint xticks; + + acquire(&tickslock); + xticks = ticks; + release(&tickslock); + return xticks; +} @@ -11,7 +11,7 @@ struct gatedesc idt[256]; extern uint vectors[]; // in vectors.S: array of 256 entry pointers struct spinlock tickslock; -int ticks; +uint ticks; void tvinit(void) @@ -78,13 +78,14 @@ trap(struct trapframe *tf) default: if(proc == 0 || (tf->cs&3) == 0){ // In kernel, it must be our mistake. - cprintf("unexpected trap %d from cpu %d eip %x\n", - tf->trapno, cpu->id, tf->eip); + cprintf("unexpected trap %d from cpu %d eip %x (cr2=0x%x)\n", + tf->trapno, cpu->id, tf->eip, rcr2()); panic("trap"); } // In user space, assume process misbehaved. - cprintf("pid %d %s: trap %d err %d on cpu %d eip %x -- kill proc\n", - proc->pid, proc->name, tf->trapno, tf->err, cpu->id, tf->eip); + cprintf("pid %d %s: trap %d err %d on cpu %d eip 0x%x addr 0x%x--kill proc\n", + proc->pid, proc->name, tf->trapno, tf->err, cpu->id, tf->eip, + rcr2()); proc->killed = 1; } @@ -24,7 +24,7 @@ // These are arbitrarily chosen, but with care not to overlap // processor defined exceptions or interrupt vectors. -#define T_SYSCALL 64 // system call +#define T_SYSCALL 64 // system call #define T_DEFAULT 500 // catchall #define T_IRQ0 32 // IRQ 0 corresponds to int T_IRQ @@ -1,3 +1,4 @@ typedef unsigned int uint; typedef unsigned short ushort; typedef unsigned char uchar; +typedef uint pde_t; diff --git a/usertests.c b/usertests.c index cc2601c..670a4a8 100644 --- a/usertests.c +++ b/usertests.c @@ -322,8 +322,9 @@ void mem(void) { void *m1, *m2; - int pid; + int pid, ppid; + ppid = getpid(); if((pid = fork()) == 0){ m1 = 0; while((m2 = malloc(10001)) != 0) { @@ -338,6 +339,7 @@ mem(void) m1 = malloc(1024*20); if(m1 == 0) { printf(1, "couldn't allocate mem?!!\n"); + kill(ppid); exit(); } free(m1); @@ -1229,6 +1231,136 @@ forktest(void) printf(1, "fork test OK\n"); } +void +sbrktest(void) +{ + int pid; + char *oldbrk = sbrk(0); + + printf(stdout, "sbrk test\n"); + + // can one sbrk() less than a page? + char *a = sbrk(0); + int i; + for(i = 0; i < 5000; i++){ + char *b = sbrk(1); + if(b != a){ + printf(stdout, "sbrk test failed %d %x %x\n", i, a, b); + exit(); + } + *b = 1; + a = b + 1; + } + pid = fork(); + if(pid < 0){ + printf(stdout, "sbrk test fork failed\n"); + exit(); + } + char *c = sbrk(1); + c = sbrk(1); + if(c != a + 1){ + printf(stdout, "sbrk test failed post-fork\n"); + exit(); + } + if(pid == 0) + exit(); + wait(); + + // can one allocate the full 640K? + a = sbrk(0); + uint amt = (640 * 1024) - (uint) a; + char *p = sbrk(amt); + if(p != a){ + printf(stdout, "sbrk test failed 640K test, p %x a %x\n", p, a); + exit(); + } + char *lastaddr = (char *)(640 * 1024 - 1); + *lastaddr = 99; + + // is one forbidden from allocating more than 640K? + c = sbrk(4096); + if(c != (char *) 0xffffffff){ + printf(stdout, "sbrk allocated more than 640K, c %x\n", c); + exit(); + } + + // can one de-allocate? + a = sbrk(0); + c = sbrk(-4096); + if(c == (char *) 0xffffffff){ + printf(stdout, "sbrk could not deallocate\n"); + exit(); + } + c = sbrk(0); + if(c != a - 4096){ + printf(stdout, "sbrk deallocation produced wrong address, a %x c %x\n", a, c); + exit(); + } + + // can one re-allocate that page? + a = sbrk(0); + c = sbrk(4096); + if(c != a || sbrk(0) != a + 4096){ + printf(stdout, "sbrk re-allocation failed, a %x c %x\n", a, c); + exit(); + } + if(*lastaddr == 99){ + // should be zero + printf(stdout, "sbrk de-allocation didn't really deallocate\n"); + exit(); + } + + c = sbrk(4096); + if(c != (char *) 0xffffffff){ + printf(stdout, "sbrk was able to re-allocate beyond 640K, c %x\n", c); + exit(); + } + + // can we read the kernel's memory? + for(a = (char*)(640*1024); a < (char *)2000000; a += 50000){ + int ppid = getpid(); + int pid = fork(); + if(pid < 0){ + printf(stdout, "fork failed\n"); + exit(); + } + if(pid == 0){ + printf(stdout, "oops could read %x = %x\n", a, *a); + kill(ppid); + exit(); + } + wait(); + } + + if(sbrk(0) > oldbrk) + sbrk(-(sbrk(0) - oldbrk)); + + printf(stdout, "sbrk test OK\n"); +} + +void +stacktest(void) +{ + printf(stdout, "stack test\n"); + char dummy = 1; + char *p = &dummy; + int ppid = getpid(); + int pid = fork(); + if(pid < 0){ + printf(stdout, "fork failed\n"); + exit(); + } + if(pid == 0){ + // should cause a trap: + p[-4096] = 'z'; + kill(ppid); + printf(stdout, "stack test failed: page before stack was writeable\n"); + exit(); + } + wait(); + printf(stdout, "stack test OK\n"); +} + int main(int argc, char *argv[]) { @@ -1240,6 +1372,9 @@ main(int argc, char *argv[]) } close(open("usertests.ran", O_CREATE)); + stacktest(); + sbrktest(); + opentest(); writetest(); writetest1(); @@ -28,3 +28,4 @@ SYSCALL(dup) SYSCALL(getpid) SYSCALL(sbrk) SYSCALL(sleep) +SYSCALL(uptime) @@ -0,0 +1,382 @@ +#include "param.h" +#include "types.h" +#include "defs.h" +#include "x86.h" +#include "mmu.h" +#include "proc.h" +#include "elf.h" + +// The mappings from logical to linear are one to one (i.e., +// segmentation doesn't do anything). +// There is one page table per process, plus one that's used +// when a CPU is not running any process (kpgdir). +// A user process uses the same page table as the kernel; the +// page protection bits prevent it from using anything other +// than its memory. +// +// setupkvm() and exec() set up every page table like this: +// 0..640K : user memory (text, data, stack, heap) +// 640K..1M : mapped direct (for IO space) +// 1M..kernend : mapped direct (for the kernel's text and data) +// kernend..PHYSTOP : mapped direct (kernel heap and user pages) +// 0xfe000000..0 : mapped direct (devices such as ioapic) +// +// The kernel allocates memory for its heap and for user memory +// between kernend and the end of physical memory (PHYSTOP). +// The virtual address space of each user program includes the kernel +// (which is inaccessible in user mode). The user program addresses +// range from 0 till 640KB (USERTOP), which where the I/O hole starts +// (both in physical memory and in the kernel's virtual address +// space). + +#define PHYSTOP 0x1000000 +#define USERTOP 0xA0000 + +static uint kerntext; // Linker starts kernel at 1MB +static uint kerntsz; +static uint kerndata; +static uint kerndsz; +static uint kernend; +static uint freesz; +static pde_t *kpgdir; // for use in scheduler() + +// return the address of the PTE in page table pgdir +// that corresponds to linear address va. if create!=0, +// create any required page table pages. +static pte_t * +walkpgdir(pde_t *pgdir, const void *va, int create) +{ + uint r; + pde_t *pde; + pte_t *pgtab; + + pde = &pgdir[PDX(va)]; + if (*pde & PTE_P) { + pgtab = (pte_t*) PTE_ADDR(*pde); + } else if (!create || !(r = (uint) kalloc(PGSIZE))) + return 0; + else { + pgtab = (pte_t*) r; + + // Make sure all those PTE_P bits are zero. + memset(pgtab, 0, PGSIZE); + + // The permissions here are overly generous, but they can + // be further restricted by the permissions in the page table + // entries, if necessary. + *pde = PADDR(r) | PTE_P | PTE_W | PTE_U; + } + return &pgtab[PTX(va)]; +} + +// create PTEs for linear addresses starting at la that refer to +// physical addresses starting at pa. la and size might not +// be page-aligned. +static int +mappages(pde_t *pgdir, void *la, uint size, uint pa, int perm) +{ + char *first = PGROUNDDOWN(la); + char *last = PGROUNDDOWN(la + size - 1); + char *a = first; + while(1){ + pte_t *pte = walkpgdir(pgdir, a, 1); + if(pte == 0) + return 0; + if(*pte & PTE_P) + panic("remap"); + *pte = pa | perm | PTE_P; + if(a == last) + break; + a += PGSIZE; + pa += PGSIZE; + } + return 1; +} + +// Set up CPU's kernel segment descriptors. +// Run once at boot time on each CPU. +void +ksegment(void) +{ + struct cpu *c; + + // Map virtual addresses to linear addresses using identity map. + // Cannot share a CODE descriptor for both kernel and user + // because it would have to have DPL_USR, but the CPU forbids + // an interrupt from CPL=0 to DPL=3. + c = &cpus[cpunum()]; + c->gdt[SEG_KCODE] = SEG(STA_X|STA_R, 0, 0xffffffff, 0); + c->gdt[SEG_KDATA] = SEG(STA_W, 0, 0xffffffff, 0); + c->gdt[SEG_UCODE] = SEG(STA_X|STA_R, 0, 0xffffffff, DPL_USER); + c->gdt[SEG_UDATA] = SEG(STA_W, 0, 0xffffffff, DPL_USER); + + // map cpu, and curproc + c->gdt[SEG_KCPU] = SEG(STA_W, &c->cpu, 8, 0); + + lgdt(c->gdt, sizeof(c->gdt)); + loadgs(SEG_KCPU << 3); + + // Initialize cpu-local storage. + cpu = c; + proc = 0; +} + +// Switch h/w page table and TSS registers to point to process p. +void +switchuvm(struct proc *p) +{ + pushcli(); + + // Setup TSS + cpu->gdt[SEG_TSS] = SEG16(STS_T32A, &cpu->ts, sizeof(cpu->ts)-1, 0); + cpu->gdt[SEG_TSS].s = 0; + cpu->ts.ss0 = SEG_KDATA << 3; + cpu->ts.esp0 = (uint)proc->kstack + KSTACKSIZE; + ltr(SEG_TSS << 3); + + if (p->pgdir == 0) + panic("switchuvm: no pgdir\n"); + + lcr3(PADDR(p->pgdir)); // switch to new address space + popcli(); +} + +// Switch h/w page table register to the kernel-only page table, for when +// no process is running. +void +switchkvm() +{ + lcr3(PADDR(kpgdir)); // Switch to the kernel page table +} + +// Set up kernel part of a page table. +pde_t* +setupkvm(void) +{ + pde_t *pgdir; + + // Allocate page directory + if (!(pgdir = (pde_t *) kalloc(PGSIZE))) + return 0; + memset(pgdir, 0, PGSIZE); + // Map IO space from 640K to 1Mbyte + if (!mappages(pgdir, (void *)USERTOP, 0x60000, USERTOP, PTE_W)) + return 0; + // Map kernel text read-only + if (!mappages(pgdir, (void *) kerntext, kerntsz, kerntext, 0)) + return 0; + // Map kernel data read/write + if (!mappages(pgdir, (void *) kerndata, kerndsz, kerndata, PTE_W)) + return 0; + // Map dynamically-allocated memory read/write (kernel stacks, user mem) + if (!mappages(pgdir, (void *) kernend, freesz, PADDR(kernend), PTE_W)) + return 0; + // Map devices such as ioapic, lapic, ... + if (!mappages(pgdir, (void *)0xFE000000, 0x2000000, 0xFE000000, PTE_W)) + return 0; + return pgdir; +} + +// return the physical address that a given user address +// maps to. the result is also a kernel logical address, +// since the kernel maps the physical memory allocated to user +// processes directly. +char* +uva2ka(pde_t *pgdir, char *uva) +{ + pte_t *pte = walkpgdir(pgdir, uva, 0); + if (pte == 0) return 0; + uint pa = PTE_ADDR(*pte); + return (char *)pa; +} + +// allocate sz bytes more memory for a process starting at the +// given user address; allocates physical memory and page +// table entries. addr and sz need not be page-aligned. +// it is a no-op for any parts of the requested memory +// that are already allocated. +int +allocuvm(pde_t *pgdir, char *addr, uint sz) +{ + if (addr + sz > (char*)USERTOP) + return 0; + char *first = PGROUNDDOWN(addr); + char *last = PGROUNDDOWN(addr + sz - 1); + char *a; + for(a = first; a <= last; a += PGSIZE){ + pte_t *pte = walkpgdir(pgdir, a, 0); + if(pte == 0 || (*pte & PTE_P) == 0){ + char *mem = kalloc(PGSIZE); + if(mem == 0){ + // XXX clean up? + return 0; + } + memset(mem, 0, PGSIZE); + mappages(pgdir, a, PGSIZE, PADDR(mem), PTE_W|PTE_U); + } + } + return 1; +} + +// deallocate some of the user pages, in response to sbrk() +// with a negative argument. if addr is not page-aligned, +// then only deallocates starting at the next page boundary. +int +deallocuvm(pde_t *pgdir, char *addr, uint sz) +{ + if (addr + sz > (char*)USERTOP) + return 0; + char *first = (char*) PGROUNDUP((uint)addr); + char *last = PGROUNDDOWN(addr + sz - 1); + char *a; + for(a = first; a <= last; a += PGSIZE){ + pte_t *pte = walkpgdir(pgdir, a, 0); + if(pte && (*pte & PTE_P) != 0){ + uint pa = PTE_ADDR(*pte); + if(pa == 0) + panic("deallocuvm"); + kfree((void *) pa, PGSIZE); + *pte = 0; + } + } + return 1; +} + +// free a page table and all the physical memory pages +// in the user part. +void +freevm(pde_t *pgdir) +{ + uint i, j, da; + + if (!pgdir) + panic("freevm: no pgdir\n"); + for (i = 0; i < NPDENTRIES; i++) { + da = PTE_ADDR(pgdir[i]); + if (da != 0) { + pte_t *pgtab = (pte_t*) da; + for (j = 0; j < NPTENTRIES; j++) { + if (pgtab[j] != 0) { + uint pa = PTE_ADDR(pgtab[j]); + uint va = PGADDR(i, j, 0); + if (va < USERTOP) // user memory + kfree((void *) pa, PGSIZE); + pgtab[j] = 0; + } + } + kfree((void *) da, PGSIZE); + pgdir[i] = 0; + } + } + kfree((void *) pgdir, PGSIZE); +} + +int +loaduvm(pde_t *pgdir, char *addr, struct inode *ip, uint offset, uint sz) +{ + uint i, pa, n; + pte_t *pte; + + if ((uint)addr % PGSIZE != 0) + panic("loaduvm: addr must be page aligned\n"); + for (i = 0; i < sz; i += PGSIZE) { + if (!(pte = walkpgdir(pgdir, addr+i, 0))) + panic("loaduvm: address should exist\n"); + pa = PTE_ADDR(*pte); + if (sz - i < PGSIZE) n = sz - i; + else n = PGSIZE; + if(readi(ip, (char *)pa, offset+i, n) != n) + return 0; + } + return 1; +} + +void +inituvm(pde_t *pgdir, char *addr, char *init, uint sz) +{ + uint i, pa, n, off; + pte_t *pte; + + for (i = 0; i < sz; i += PGSIZE) { + if (!(pte = walkpgdir(pgdir, (void *)(i+addr), 0))) + panic("inituvm: pte should exist\n"); + off = (i+(uint)addr) % PGSIZE; + pa = PTE_ADDR(*pte); + if (sz - i < PGSIZE) n = sz - i; + else n = PGSIZE; + memmove((char *)pa+off, init+i, n); + } +} + +// given a parent process's page table, create a copy +// of it for a child. +pde_t* +copyuvm(pde_t *pgdir, uint sz) +{ + pde_t *d = setupkvm(); + pte_t *pte; + uint pa, i; + char *mem; + + if (!d) return 0; + for (i = 0; i < sz; i += PGSIZE) { + if (!(pte = walkpgdir(pgdir, (void *)i, 0))) + panic("copyuvm: pte should exist\n"); + if(*pte & PTE_P){ + pa = PTE_ADDR(*pte); + if (!(mem = kalloc(PGSIZE))) + return 0; + memmove(mem, (char *)pa, PGSIZE); + if (!mappages(d, (void *)i, PGSIZE, PADDR(mem), PTE_W|PTE_U)) + return 0; + } + } + return d; +} + +// Gather information about physical memory layout. +// Called once during boot. +// Really should find out how much physical memory +// there is rather than assuming PHYSTOP. +void +pminit(void) +{ + extern char end[]; + struct proghdr *ph; + struct elfhdr *elf = (struct elfhdr*)0x10000; // scratch space + + if (elf->magic != ELF_MAGIC || elf->phnum != 2) + panic("pminit: need a text and data segment\n"); + + ph = (struct proghdr*)((uchar*)elf + elf->phoff); + kernend = ((uint)end + PGSIZE) & ~(PGSIZE-1); + kerntext = ph[0].va; + kerndata = ph[1].va; + kerntsz = ph[0].memsz; + kerndsz = ph[1].memsz; + freesz = PHYSTOP - kernend; + + kinit((char *)kernend, freesz); +} + +// Allocate one page table for the machine for the kernel address +// space for scheduler processes. +void +kvmalloc(void) +{ + kpgdir = setupkvm(); +} + +// Turn on paging. +void +vminit(void) +{ + uint cr0; + + lcr3(PADDR(kpgdir)); + cr0 = rcr0(); + cr0 |= CR0_PE|CR0_PG|CR0_AM|CR0_WP|CR0_NE|CR0_TS|CR0_EM|CR0_MP; + cr0 &= ~(CR0_TS|CR0_EM); + lcr0(cr0); +} + @@ -121,6 +121,66 @@ sti(void) asm volatile("sti"); } +static inline void lcr0(uint val) +{ + asm volatile("movl %0,%%cr0" : : "r" (val)); +} + +static inline uint rcr0(void) +{ + uint val; + asm volatile("movl %%cr0,%0" : "=r" (val)); + return val; +} + +static inline uint rcr2(void) +{ + uint val; + asm volatile("movl %%cr2,%0" : "=r" (val)); + return val; +} + +static inline void lcr3(uint val) +{ + asm volatile("movl %0,%%cr3" : : "r" (val)); +} + +static inline uint rcr3(void) +{ + uint val; + asm volatile("movl %%cr3,%0" : "=r" (val)); + return val; +} + +static inline void lebp(uint val) +{ + asm volatile("movl %0,%%ebp" : : "r" (val)); +} + +static inline uint rebp(void) +{ + uint val; + asm volatile("movl %%ebp,%0" : "=r" (val)); + return val; +} + +static inline void lesp(uint val) +{ + asm volatile("movl %0,%%esp" : : "r" (val)); +} + +static inline uint resp(void) +{ + uint val; + asm volatile("movl %%esp,%0" : "=r" (val)); + return val; +} + +static inline void nop_pause(void) +{ + asm volatile("pause" : :); +} + //PAGEBREAK: 36 // Layout of the trap frame built on the stack by the // hardware and by trapasm.S, and passed to trap(). |