summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile7
-rw-r--r--asm.h2
-rw-r--r--bio.c1
-rw-r--r--bootasm.S8
-rw-r--r--bootother.S6
-rw-r--r--defs.h24
-rw-r--r--exec.c89
-rw-r--r--file.c1
-rw-r--r--forktest.c8
-rw-r--r--ide.c3
-rw-r--r--kalloc.c35
-rw-r--r--lapic.c5
-rw-r--r--main.c57
-rw-r--r--mmu.h68
-rw-r--r--mp.c1
-rw-r--r--param.h2
-rw-r--r--proc.c80
-rw-r--r--proc.h9
-rw-r--r--runoff.list2
-rw-r--r--runoff.spec1
-rw-r--r--sh.c4
-rw-r--r--spinlock.c2
-rw-r--r--syscall.c16
-rw-r--r--syscall.h1
-rw-r--r--sysfile.c5
-rw-r--r--sysproc.c16
-rw-r--r--trap.c11
-rw-r--r--traps.h2
-rw-r--r--types.h1
-rw-r--r--usertests.c137
-rw-r--r--usys.S1
-rw-r--r--vm.c382
-rw-r--r--x86.h60
33 files changed, 858 insertions, 189 deletions
diff --git a/Makefile b/Makefile
index add3245..3ddc1c1 100644
--- a/Makefile
+++ b/Makefile
@@ -25,19 +25,20 @@ OBJS = \
trap.o\
uart.o\
vectors.o\
+ vm.o\
# Cross-compiling (e.g., on Mac OS X)
-#TOOLPREFIX = i386-jos-elf-
+TOOLPREFIX = i386-jos-elf-
# Using native tools (e.g., on X86 Linux)
-TOOLPREFIX =
+#TOOLPREFIX =
CC = $(TOOLPREFIX)gcc
AS = $(TOOLPREFIX)gas
LD = $(TOOLPREFIX)ld
OBJCOPY = $(TOOLPREFIX)objcopy
OBJDUMP = $(TOOLPREFIX)objdump
-CFLAGS = -fno-pic -static -fno-builtin -fno-strict-aliasing -O2 -Wall -MD -ggdb -m32
+CFLAGS = -fno-pic -static -fno-builtin -fno-strict-aliasing -O2 -Wall -MD -ggdb -m32 -Werror
CFLAGS += $(shell $(CC) -fno-stack-protector -E -x c /dev/null >/dev/null 2>&1 && echo -fno-stack-protector)
ASFLAGS = -m32 -gdwarf-2
# FreeBSD ld wants ``elf_i386_fbsd''
diff --git a/asm.h b/asm.h
index 0c052db..68210d7 100644
--- a/asm.h
+++ b/asm.h
@@ -6,6 +6,8 @@
.word 0, 0; \
.byte 0, 0, 0, 0
+// The 0xC0 means the limit is in 4096-byte units
+// and (for executable segments) 32-bit mode.
#define SEG_ASM(type,base,lim) \
.word (((lim) >> 12) & 0xffff), ((base) & 0xffff); \
.byte (((base) >> 16) & 0xff), (0x90 | (type)), \
diff --git a/bio.c b/bio.c
index c746478..6a3968b 100644
--- a/bio.c
+++ b/bio.c
@@ -109,7 +109,6 @@ bread(uint dev, uint sector)
void
bwrite(struct buf *b)
{
- cprintf("bwrite sector %d\n", b->sector);
if((b->flags & B_BUSY) == 0)
panic("bwrite");
b->flags |= B_DIRTY;
diff --git a/bootasm.S b/bootasm.S
index 059cc1b..56175ce 100644
--- a/bootasm.S
+++ b/bootasm.S
@@ -51,8 +51,10 @@ seta20.2:
orl $CR0_PE, %eax
movl %eax, %cr0
- # Jump to next instruction, but in 32-bit code segment.
- # Switches processor into 32-bit mode.
+ # This ljmp is how you load the CS (Code Segment) register.
+ # SEG_ASM produces segment descriptors with the 32-bit mode
+ # flag set (the D flag), so addresses and word operands will
+ # default to 32 bits after this jump.
ljmp $(SEG_KCODE<<3), $start32
.code32 # Assemble for 32-bit mode
@@ -88,5 +90,5 @@ gdt:
SEG_ASM(STA_W, 0x0, 0xffffffff) # data seg
gdtdesc:
- .word (gdtdesc - gdt - 1) # sizeof(gdt) - 1
+ .word (gdtdesc - gdt - 1) # sizeof(gdt) - 1
.long gdt # address gdt
diff --git a/bootother.S b/bootother.S
index 11d32f1..899669a 100644
--- a/bootother.S
+++ b/bootother.S
@@ -45,8 +45,10 @@ start:
orl $CR0_PE, %eax
movl %eax, %cr0
- # Jump to next instruction, but in 32-bit code segment.
- # Switches processor into 32-bit mode.
+ # This ljmp is how you load the CS (Code Segment) register.
+ # SEG_ASM produces segment descriptors with the 32-bit mode
+ # flag set (the D flag), so addresses and word operands will
+ # default to 32 bits after this jump.
ljmp $(SEG_KCODE<<3), $start32
.code32 # Assemble for 32-bit mode
diff --git a/defs.h b/defs.h
index bcfab55..0197e70 100644
--- a/defs.h
+++ b/defs.h
@@ -60,9 +60,10 @@ extern uchar ioapicid;
void ioapicinit(void);
// kalloc.c
+extern int nfreemem;
char* kalloc(int);
void kfree(char*, int);
-void kinit(void);
+void kinit(char*,uint);
// kbd.c
void kbdintr(void);
@@ -101,8 +102,6 @@ int kill(int);
void pinit(void);
void procdump(void);
void scheduler(void) __attribute__((noreturn));
-void ksegment(void);
-void usegment(void);
void sleep(void*, struct spinlock*);
void userinit(void);
int wait(void);
@@ -143,7 +142,7 @@ void timerinit(void);
// trap.c
void idtinit(void);
-extern int ticks;
+extern uint ticks;
void tvinit(void);
extern struct spinlock tickslock;
@@ -152,6 +151,21 @@ void uartinit(void);
void uartintr(void);
void uartputc(int);
+// vm.c
+void pminit(void);
+void ksegment(void);
+void kvmalloc(void);
+void vminit(void);
+pde_t* setupkvm(void);
+char* uva2ka(pde_t*, char*);
+int allocuvm(pde_t*, char*, uint);
+int deallocuvm(pde_t *pgdir, char *addr, uint sz);
+void freevm(pde_t*);
+void inituvm(pde_t*, char*, char*, uint);
+int loaduvm(pde_t*, char*, struct inode *ip, uint, uint);
+pde_t* copyuvm(pde_t*,uint);
+void switchuvm(struct proc*);
+void switchkvm();
+
// number of elements in fixed-size array
#define NELEM(x) (sizeof(x)/sizeof((x)[0]))
-
diff --git a/exec.c b/exec.c
index 17fab01..4f11695 100644
--- a/exec.c
+++ b/exec.c
@@ -11,12 +11,13 @@ exec(char *path, char **argv)
{
char *mem, *s, *last;
int i, argc, arglen, len, off;
- uint sz, sp, argp;
+ uint sz, sp, spoffset, argp;
struct elfhdr elf;
struct inode *ip;
struct proghdr ph;
+ pde_t *pgdir, *oldpgdir;
- mem = 0;
+ pgdir = 0;
sz = 0;
if((ip = namei(path)) == 0)
@@ -29,37 +30,8 @@ exec(char *path, char **argv)
if(elf.magic != ELF_MAGIC)
goto bad;
- // Compute memory size of new process.
- // Program segments.
- for(i=0, off=elf.phoff; i<elf.phnum; i++, off+=sizeof(ph)){
- if(readi(ip, (char*)&ph, off, sizeof(ph)) != sizeof(ph))
- goto bad;
- if(ph.type != ELF_PROG_LOAD)
- continue;
- if(ph.memsz < ph.filesz)
- goto bad;
- sz += ph.memsz;
- }
-
- // Arguments.
- arglen = 0;
- for(argc=0; argv[argc]; argc++)
- arglen += strlen(argv[argc]) + 1;
- arglen = (arglen+3) & ~3;
- sz += arglen;
- sz += 4*(argc+1); // argv data
- sz += 4; // argv
- sz += 4; // argc
-
- // Stack.
- sz += PAGE;
-
- // Allocate program memory.
- sz = (sz+PAGE-1) & ~(PAGE-1);
- mem = kalloc(sz);
- if(mem == 0)
+ if (!(pgdir = setupkvm()))
goto bad;
- memset(mem, 0, sz);
// Load program into memory.
for(i=0, off=elf.phoff; i<elf.phnum; i++, off+=sizeof(ph)){
@@ -67,37 +39,51 @@ exec(char *path, char **argv)
goto bad;
if(ph.type != ELF_PROG_LOAD)
continue;
- if(ph.va + ph.memsz < ph.va || ph.va + ph.memsz > sz)
- goto bad;
if(ph.memsz < ph.filesz)
goto bad;
- if(readi(ip, mem + ph.va, ph.offset, ph.filesz) != ph.filesz)
+ if (!allocuvm(pgdir, (char *)ph.va, ph.memsz))
+ goto bad;
+ if(ph.va + ph.memsz > sz)
+ sz = ph.va + ph.memsz;
+ if (!loaduvm(pgdir, (char *)ph.va, ip, ph.offset, ph.filesz))
goto bad;
- memset(mem + ph.va + ph.filesz, 0, ph.memsz - ph.filesz);
}
iunlockput(ip);
-
- // Initialize stack.
+
+ // Allocate and initialize stack at sz
+ sz = PGROUNDUP(sz);
+ sz += PGSIZE; // leave an invalid page
+ if (!allocuvm(pgdir, (char *)sz, PGSIZE))
+ goto bad;
+ mem = uva2ka(pgdir, (char *)sz);
+ spoffset = sz;
+ sz += PGSIZE;
+
+ arglen = 0;
+ for(argc=0; argv[argc]; argc++)
+ arglen += strlen(argv[argc]) + 1;
+ arglen = (arglen+3) & ~3;
+
sp = sz;
argp = sz - arglen - 4*(argc+1);
// Copy argv strings and pointers to stack.
- *(uint*)(mem+argp + 4*argc) = 0; // argv[argc]
+ *(uint*)(mem+argp-spoffset + 4*argc) = 0; // argv[argc]
for(i=argc-1; i>=0; i--){
len = strlen(argv[i]) + 1;
sp -= len;
- memmove(mem+sp, argv[i], len);
- *(uint*)(mem+argp + 4*i) = sp; // argv[i]
+ memmove(mem+sp-spoffset, argv[i], len);
+ *(uint*)(mem+argp-spoffset + 4*i) = sp; // argv[i]
}
// Stack frame for main(argc, argv), below arguments.
sp = argp;
sp -= 4;
- *(uint*)(mem+sp) = argp;
+ *(uint*)(mem+sp-spoffset) = argp;
sp -= 4;
- *(uint*)(mem+sp) = argc;
+ *(uint*)(mem+sp-spoffset) = argc;
sp -= 4;
- *(uint*)(mem+sp) = 0xffffffff; // fake return pc
+ *(uint*)(mem+sp-spoffset) = 0xffffffff; // fake return pc
// Save program name for debugging.
for(last=s=path; *s; s++)
@@ -105,18 +91,21 @@ exec(char *path, char **argv)
last = s+1;
safestrcpy(proc->name, last, sizeof(proc->name));
- // Commit to the new image.
- kfree(proc->mem, proc->sz);
- proc->mem = mem;
+ // Commit to the user image.
+ oldpgdir = proc->pgdir;
+ proc->pgdir = pgdir;
proc->sz = sz;
proc->tf->eip = elf.entry; // main
proc->tf->esp = sp;
- usegment();
+
+ switchuvm(proc);
+
+ freevm(oldpgdir);
+
return 0;
bad:
- if(mem)
- kfree(mem, sz);
+ if (pgdir) freevm(pgdir);
iunlockput(ip);
return -1;
}
diff --git a/file.c b/file.c
index 9b29d08..e10b824 100644
--- a/file.c
+++ b/file.c
@@ -116,7 +116,6 @@ filewrite(struct file *f, char *addr, int n)
return pipewrite(f->pipe, addr, n);
if(f->type == FD_INODE){
ilock(f->ip);
- cprintf("filewrite: %d\n", n);
if((r = writei(f->ip, addr, f->off, n)) > 0)
f->off += r;
iunlock(f->ip);
diff --git a/forktest.c b/forktest.c
index 90cc38c..bb286e6 100644
--- a/forktest.c
+++ b/forktest.c
@@ -5,6 +5,8 @@
#include "stat.h"
#include "user.h"
+#define N 1000
+
void
printf(int fd, char *s, ...)
{
@@ -18,7 +20,7 @@ forktest(void)
printf(1, "fork test\n");
- for(n=0; n<1000; n++){
+ for(n=0; n<N; n++){
pid = fork();
if(pid < 0)
break;
@@ -26,8 +28,8 @@ forktest(void)
exit();
}
- if(n == 1000){
- printf(1, "fork claimed to work 1000 times!\n");
+ if(n == N){
+ printf(1, "fork claimed to work N times!\n", N);
exit();
}
diff --git a/ide.c b/ide.c
index c0eb80a..7b12aa0 100644
--- a/ide.c
+++ b/ide.c
@@ -147,8 +147,9 @@ iderw(struct buf *b)
// Wait for request to finish.
// Assuming will not sleep too long: ignore proc->killed.
- while((b->flags & (B_VALID|B_DIRTY)) != B_VALID)
+ while((b->flags & (B_VALID|B_DIRTY)) != B_VALID) {
sleep(b, &idelock);
+ }
release(&idelock);
}
diff --git a/kalloc.c b/kalloc.c
index 2730d57..ca87018 100644
--- a/kalloc.c
+++ b/kalloc.c
@@ -1,13 +1,13 @@
// Physical memory allocator, intended to allocate
-// memory for user processes. Allocates in 4096-byte "pages".
+// memory for user processes. Allocates in 4096-byte pages.
// Free list is kept sorted and combines adjacent pages into
// long runs, to make it easier to allocate big segments.
-// One reason the page size is 4k is that the x86 segment size
-// granularity is 4k.
+// This combining is not useful now that xv6 uses paging.
#include "types.h"
#include "defs.h"
#include "param.h"
+#include "mmu.h"
#include "spinlock.h"
struct run {
@@ -20,21 +20,14 @@ struct {
struct run *freelist;
} kmem;
+int nfreemem;
+
// Initialize free list of physical pages.
-// This code cheats by just considering one megabyte of
-// pages after end. Real systems would determine the
-// amount of memory available in the system and use it all.
void
-kinit(void)
+kinit(char *p, uint len)
{
- extern char end[];
- uint len;
- char *p;
-
initlock(&kmem.lock, "kmem");
- p = (char*)(((uint)end + PAGE) & ~(PAGE-1));
- len = 256*PAGE; // assume computer has 256 pages of RAM, 1 MB
- cprintf("mem = %d\n", len);
+ nfreemem = 0;
kfree(p, len);
}
@@ -47,19 +40,23 @@ kfree(char *v, int len)
{
struct run *r, *rend, **rp, *p, *pend;
- if(len <= 0 || len % PAGE)
+ if(len <= 0 || len % PGSIZE)
panic("kfree");
// Fill with junk to catch dangling refs.
memset(v, 1, len);
acquire(&kmem.lock);
+ nfreemem += len;
p = (struct run*)v;
pend = (struct run*)(v + len);
for(rp=&kmem.freelist; (r=*rp) != 0 && r <= pend; rp=&r->next){
rend = (struct run*)((char*)r + r->len);
- if(r <= p && p < rend)
+ if(r <= p && p < rend) {
+ cprintf("freeing a free page: r = 0x%x p = 0x%x rend = 0x%x\n",
+ r, p, rend);
panic("freeing free page");
+ }
if(rend == p){ // r before p: expand r to include p
r->len += len;
if(r->next && r->next == pend){ // r now next to r->next?
@@ -93,7 +90,7 @@ kalloc(int n)
char *p;
struct run *r, **rp;
- if(n % PAGE || n <= 0)
+ if(n % PGSIZE || n <= 0)
panic("kalloc");
acquire(&kmem.lock);
@@ -103,12 +100,12 @@ kalloc(int n)
p = (char*)r + r->len;
if(r->len == 0)
*rp = r->next;
+ nfreemem -= n;
release(&kmem.lock);
return p;
}
}
release(&kmem.lock);
-
- cprintf("kalloc: out of memory\n");
return 0;
}
+
diff --git a/lapic.c b/lapic.c
index d2407b1..e232abc 100644
--- a/lapic.c
+++ b/lapic.c
@@ -20,8 +20,11 @@
#define STARTUP 0x00000600 // Startup IPI
#define DELIVS 0x00001000 // Delivery status
#define ASSERT 0x00004000 // Assert interrupt (vs deassert)
+ #define DEASSERT 0x00000000
#define LEVEL 0x00008000 // Level triggered
#define BCAST 0x00080000 // Send to all APICs, including self.
+ #define BUSY 0x00001000
+ #define FIXED 0x00000000
#define ICRHI (0x0310/4) // Interrupt Command [63:32]
#define TIMER (0x0320/4) // Local Vector Table 0 (TIMER)
#define X1 0x0000000B // divide counts by 1
@@ -48,6 +51,7 @@ lapicw(int index, int value)
void
lapicinit(int c)
{
+ cprintf("lapicinit: %d 0x%x\n", c, lapic);
if(!lapic)
return;
@@ -126,7 +130,6 @@ microdelay(int us)
{
}
-
#define IO_RTC 0x70
// Start additional processor running bootstrap code at addr.
diff --git a/main.c b/main.c
index 60cd1b3..a6088cb 100644
--- a/main.c
+++ b/main.c
@@ -6,23 +6,42 @@
#include "x86.h"
static void bootothers(void);
-static void mpmain(void) __attribute__((noreturn));
+static void mpmain(void);
+void jkstack(void) __attribute__((noreturn));
+void mainc(void);
// Bootstrap processor starts running C code here.
int
main(void)
{
- mpinit(); // collect info about this machine
+ mpinit(); // collect info about this machine
lapicinit(mpbcpu());
- ksegment();
+ ksegment(); // set up segments
picinit(); // interrupt controller
ioapicinit(); // another interrupt controller
consoleinit(); // I/O devices & their interrupts
uartinit(); // serial port
-cprintf("cpus %p cpu %p\n", cpus, cpu);
- cprintf("\ncpu%d: starting xv6\n\n", cpu->id);
+ pminit(); // discover how much memory there is
+ jkstack(); // call mainc() on a properly-allocated stack
+}
+
+void
+jkstack(void)
+{
+ char *kstack = kalloc(PGSIZE);
+ if (!kstack)
+ panic("jkstack\n");
+ char *top = kstack + PGSIZE;
+ asm volatile("movl %0,%%esp" : : "r" (top));
+ asm volatile("call mainc");
+ panic("jkstack");
+}
- kinit(); // physical memory allocator
+void
+mainc(void)
+{
+ cprintf("\ncpu%d: starting xv6\n\n", cpu->id);
+ kvmalloc(); // initialze the kernel page table
pinit(); // process table
tvinit(); // trap vectors
binit(); // buffer cache
@@ -38,20 +57,21 @@ cprintf("cpus %p cpu %p\n", cpus, cpu);
mpmain();
}
-// Bootstrap processor gets here after setting up the hardware.
-// Additional processors start here.
+// Common CPU setup code.
+// Bootstrap CPU comes here from mainc().
+// Other CPUs jump here from bootother.S.
static void
mpmain(void)
{
- if(cpunum() != mpbcpu())
+ if(cpunum() != mpbcpu()) {
+ ksegment();
lapicinit(cpunum());
- ksegment();
- cprintf("cpu%d: mpmain\n", cpu->id);
- idtinit();
+ }
+ vminit(); // turn on paging
+ cprintf("cpu%d: starting\n", cpu->id);
+ idtinit(); // load idt register
xchg(&cpu->booted, 1);
-
- cprintf("cpu%d: scheduling\n", cpu->id);
- scheduler();
+ scheduler(); // start running processes
}
static void
@@ -62,8 +82,9 @@ bootothers(void)
struct cpu *c;
char *stack;
- // Write bootstrap code to unused memory at 0x7000.
- code = (uchar*)0x7000;
+ // Write bootstrap code to unused memory at 0x7000. The linker has
+ // placed the start of bootother.S there.
+ code = (uchar *) 0x7000;
memmove(code, _binary_bootother_start, (uint)_binary_bootother_size);
for(c = cpus; c < cpus+ncpu; c++){
@@ -76,7 +97,7 @@ bootothers(void)
*(void**)(code-8) = mpmain;
lapicstartap(c->id, (uint)code);
- // Wait for cpu to get through bootstrap.
+ // Wait for cpu to finish mpmain()
while(c->booted == 0)
;
}
diff --git a/mmu.h b/mmu.h
index 364d05b..f4fc732 100644
--- a/mmu.h
+++ b/mmu.h
@@ -62,6 +62,8 @@ struct segdesc {
#define STA_R 0x2 // Readable (executable segments)
#define STA_A 0x1 // Accessed
+//
+
// System segment type bits
#define STS_T16A 0x1 // Available 16-bit TSS
#define STS_LDT 0x2 // Local Descriptor Table
@@ -76,6 +78,72 @@ struct segdesc {
#define STS_IG32 0xE // 32-bit Interrupt Gate
#define STS_TG32 0xF // 32-bit Trap Gate
+
+// A linear address 'la' has a three-part structure as follows:
+//
+// +--------10------+-------10-------+---------12----------+
+// | Page Directory | Page Table | Offset within Page |
+// | Index | Index | |
+// +----------------+----------------+---------------------+
+// \--- PDX(la) --/ \--- PTX(la) --/
+
+// page directory index
+#define PDX(la) ((((uint) (la)) >> PDXSHIFT) & 0x3FF)
+
+// page table index
+#define PTX(la) ((((uint) (la)) >> PTXSHIFT) & 0x3FF)
+
+// construct linear address from indexes and offset
+#define PGADDR(d, t, o) ((uint) ((d) << PDXSHIFT | (t) << PTXSHIFT | (o)))
+
+// turn a kernel linear address into a physical address.
+// all of the kernel data structures have linear and
+// physical addresses that are equal.
+#define PADDR(a) ((uint) a)
+
+// Page directory and page table constants.
+#define NPDENTRIES 1024 // page directory entries per page directory
+#define NPTENTRIES 1024 // page table entries per page table
+
+#define PGSIZE 4096 // bytes mapped by a page
+#define PGSHIFT 12 // log2(PGSIZE)
+
+#define PTXSHIFT 12 // offset of PTX in a linear address
+#define PDXSHIFT 22 // offset of PDX in a linear address
+
+#define PGROUNDUP(sz) (((sz)+PGSIZE-1) & ~(PGSIZE-1))
+#define PGROUNDDOWN(a) ((char*)((((unsigned int)a) & ~(PGSIZE-1))))
+
+// Page table/directory entry flags.
+#define PTE_P 0x001 // Present
+#define PTE_W 0x002 // Writeable
+#define PTE_U 0x004 // User
+#define PTE_PWT 0x008 // Write-Through
+#define PTE_PCD 0x010 // Cache-Disable
+#define PTE_A 0x020 // Accessed
+#define PTE_D 0x040 // Dirty
+#define PTE_PS 0x080 // Page Size
+#define PTE_MBZ 0x180 // Bits must be zero
+
+// Address in page table or page directory entry
+#define PTE_ADDR(pte) ((uint) (pte) & ~0xFFF)
+
+typedef uint pte_t;
+
+// Control Register flags
+#define CR0_PE 0x00000001 // Protection Enable
+#define CR0_MP 0x00000002 // Monitor coProcessor
+#define CR0_EM 0x00000004 // Emulation
+#define CR0_TS 0x00000008 // Task Switched
+#define CR0_ET 0x00000010 // Extension Type
+#define CR0_NE 0x00000020 // Numeric Errror
+#define CR0_WP 0x00010000 // Write Protect
+#define CR0_AM 0x00040000 // Alignment Mask
+#define CR0_NW 0x20000000 // Not Writethrough
+#define CR0_CD 0x40000000 // Cache Disable
+#define CR0_PG 0x80000000 // Paging
+
+
// PAGEBREAK: 40
// Task state segment format
struct taskstate {
diff --git a/mp.c b/mp.c
index e1edf24..d2f828a 100644
--- a/mp.c
+++ b/mp.c
@@ -39,6 +39,7 @@ mpsearch1(uchar *addr, int len)
{
uchar *e, *p;
+ cprintf("mpsearch1 0x%x %d\n", addr, len);
e = addr+len;
for(p = addr; p < e; p += sizeof(struct mp))
if(memcmp(p, "_MP_", 4) == 0 && sum(p, sizeof(struct mp)) == 0)
diff --git a/param.h b/param.h
index 34edf95..c1959d1 100644
--- a/param.h
+++ b/param.h
@@ -1,5 +1,5 @@
#define NPROC 64 // maximum number of processes
-#define PAGE 4096 // granularity of user-space memory allocation
+#define PAGE 4096 // conveniently chosen to be equal to PGSIZE
#define KSTACKSIZE PAGE // size of per-process kernel stack
#define NCPU 8 // maximum number of CPUs
#define NOFILE 16 // open files per process
diff --git a/proc.c b/proc.c
index 669331e..e69bacf 100644
--- a/proc.c
+++ b/proc.c
@@ -60,39 +60,6 @@ procdump(void)
}
}
-// Set up CPU's kernel segment descriptors.
-// Run once at boot time on each CPU.
-void
-ksegment(void)
-{
- struct cpu *c;
-
- c = &cpus[cpunum()];
- c->gdt[SEG_KCODE] = SEG(STA_X|STA_R, 0, 0x100000 + 64*1024-1, 0);
- c->gdt[SEG_KDATA] = SEG(STA_W, 0, 0xffffffff, 0);
- c->gdt[SEG_KCPU] = SEG(STA_W, &c->cpu, 8, 0);
- lgdt(c->gdt, sizeof(c->gdt));
- loadgs(SEG_KCPU << 3);
-
- // Initialize cpu-local storage.
- cpu = c;
- proc = 0;
-}
-
-// Set up CPU's segment descriptors and current process task state.
-void
-usegment(void)
-{
- pushcli();
- cpu->gdt[SEG_UCODE] = SEG(STA_X|STA_R, proc->mem, proc->sz-1, DPL_USER);
- cpu->gdt[SEG_UDATA] = SEG(STA_W, proc->mem, proc->sz-1, DPL_USER);
- cpu->gdt[SEG_TSS] = SEG16(STS_T32A, &cpu->ts, sizeof(cpu->ts)-1, 0);
- cpu->gdt[SEG_TSS].s = 0;
- cpu->ts.ss0 = SEG_KDATA << 3;
- cpu->ts.esp0 = (uint)proc->kstack + KSTACKSIZE;
- ltr(SEG_TSS << 3);
- popcli();
-}
//PAGEBREAK: 32
// Look in the process table for an UNUSED proc.
@@ -149,20 +116,19 @@ userinit(void)
p = allocproc();
initproc = p;
-
- // Initialize memory from initcode.S
- p->sz = PAGE;
- p->mem = kalloc(p->sz);
- memset(p->mem, 0, p->sz);
- memmove(p->mem, _binary_initcode_start, (int)_binary_initcode_size);
-
+ if (!(p->pgdir = setupkvm()))
+ panic("userinit: out of memory?");
+ if (!allocuvm(p->pgdir, 0x0, (int)_binary_initcode_size))
+ panic("userinit: out of memory?");
+ inituvm(p->pgdir, 0x0, _binary_initcode_start, (int)_binary_initcode_size);
+ p->sz = PGROUNDUP((int)_binary_initcode_size);
memset(p->tf, 0, sizeof(*p->tf));
p->tf->cs = (SEG_UCODE << 3) | DPL_USER;
p->tf->ds = (SEG_UDATA << 3) | DPL_USER;
p->tf->es = p->tf->ds;
p->tf->ss = p->tf->ds;
p->tf->eflags = FL_IF;
- p->tf->esp = p->sz;
+ p->tf->esp = PGSIZE;
p->tf->eip = 0; // beginning of initcode.S
safestrcpy(p->name, "initcode", sizeof(p->name));
@@ -176,17 +142,15 @@ userinit(void)
int
growproc(int n)
{
- char *newmem;
-
- newmem = kalloc(proc->sz + n);
- if(newmem == 0)
- return -1;
- memmove(newmem, proc->mem, proc->sz);
- memset(newmem + proc->sz, 0, n);
- kfree(proc->mem, proc->sz);
- proc->mem = newmem;
+ if(n > 0){
+ if (!allocuvm(proc->pgdir, (char *)proc->sz, n))
+ return -1;
+ } else if(n < 0){
+ if (!deallocuvm(proc->pgdir, (char *)(proc->sz + n), 0 - n))
+ return -1;
+ }
proc->sz += n;
- usegment();
+ switchuvm(proc);
return 0;
}
@@ -204,14 +168,13 @@ fork(void)
return -1;
// Copy process state from p.
- np->sz = proc->sz;
- if((np->mem = kalloc(np->sz)) == 0){
+ if (!(np->pgdir = copyuvm(proc->pgdir, proc->sz))) {
kfree(np->kstack, KSTACKSIZE);
np->kstack = 0;
np->state = UNUSED;
return -1;
}
- memmove(np->mem, proc->mem, np->sz);
+ np->sz = proc->sz;
np->parent = proc;
*np->tf = *proc->tf;
@@ -225,7 +188,7 @@ fork(void)
pid = np->pid;
np->state = RUNNABLE;
-
+ safestrcpy(np->name, proc->name, sizeof(proc->name));
return pid;
}
@@ -256,9 +219,10 @@ scheduler(void)
// to release ptable.lock and then reacquire it
// before jumping back to us.
proc = p;
- usegment();
+ switchuvm(p);
p->state = RUNNING;
swtch(&cpu->scheduler, proc->context);
+ switchkvm();
// Process is done running for now.
// It should have changed its p->state before coming back.
@@ -284,7 +248,6 @@ sched(void)
panic("sched running");
if(readeflags()&FL_IF)
panic("sched interruptible");
-
intena = cpu->intena;
swtch(&proc->context, cpu->scheduler);
cpu->intena = intena;
@@ -455,8 +418,9 @@ wait(void)
if(p->state == ZOMBIE){
// Found one.
pid = p->pid;
- kfree(p->mem, p->sz);
kfree(p->kstack, KSTACKSIZE);
+ p->kstack = 0;
+ freevm(p->pgdir);
p->state = UNUSED;
p->pid = 0;
p->parent = 0;
diff --git a/proc.h b/proc.h
index ebf4f2d..7d97dfa 100644
--- a/proc.h
+++ b/proc.h
@@ -3,8 +3,8 @@
#define SEG_KCODE 1 // kernel code
#define SEG_KDATA 2 // kernel data+stack
#define SEG_KCPU 3 // kernel per-cpu data
-#define SEG_UCODE 4
-#define SEG_UDATA 5
+#define SEG_UCODE 4 // user code
+#define SEG_UDATA 5 // user data+stack
#define SEG_TSS 6 // this process's task state
#define NSEGS 7
@@ -16,7 +16,7 @@
// Contexts are stored at the bottom of the stack they
// describe; the stack pointer is the address of the context.
// The layout of the context matches the layout of the stack in swtch.S
-// at "Switch stacks" comment. Switch itself doesn't save eip explicitly,
+// at the "Switch stacks" comment. Switch doesn't save eip explicitly,
// but it is on the stack and allocproc() manipulates it.
struct context {
uint edi;
@@ -30,8 +30,8 @@ enum procstate { UNUSED, EMBRYO, SLEEPING, RUNNABLE, RUNNING, ZOMBIE };
// Per-process state
struct proc {
- char *mem; // Start of process memory (kernel address)
uint sz; // Size of process memory (bytes)
+ pde_t* pgdir; // Linear address of proc's pgdir
char *kstack; // Bottom of kernel stack for this process
enum procstate state; // Process state
volatile int pid; // Process ID
@@ -48,6 +48,7 @@ struct proc {
// Process memory is laid out contiguously, low addresses first:
// text
// original data and bss
+// invalid page
// fixed-size stack
// expandable heap
diff --git a/runoff.list b/runoff.list
index 6bbd386..f39ce18 100644
--- a/runoff.list
+++ b/runoff.list
@@ -21,7 +21,9 @@ spinlock.c
proc.h
proc.c
swtch.S
+vm.c
kalloc.c
+vm.c
# system calls
traps.h
diff --git a/runoff.spec b/runoff.spec
index dbd6d5c..e4cfd42 100644
--- a/runoff.spec
+++ b/runoff.spec
@@ -27,6 +27,7 @@ even: proc.h # mild preference
right: proc.c # VERY important
# setjmp.S either
+# vm.c either
# kalloc.c either
# syscall.h either
diff --git a/sh.c b/sh.c
index 100bbdc..16e325b 100644
--- a/sh.c
+++ b/sh.c
@@ -330,7 +330,7 @@ parsecmd(char *s)
{
char *es;
struct cmd *cmd;
-
+
es = s + strlen(s);
cmd = parseline(&s, es);
peek(&s, es, "");
@@ -363,7 +363,7 @@ struct cmd*
parsepipe(char **ps, char *es)
{
struct cmd *cmd;
-
+
cmd = parseexec(ps, es);
if(peek(ps, es, "|")){
gettoken(ps, es, 0, 0);
diff --git a/spinlock.c b/spinlock.c
index c3ea730..68cfbe9 100644
--- a/spinlock.c
+++ b/spinlock.c
@@ -71,7 +71,7 @@ getcallerpcs(void *v, uint pcs[])
ebp = (uint*)v - 2;
for(i = 0; i < 10; i++){
- if(ebp == 0 || ebp == (uint*)0xffffffff)
+ if(ebp == 0 || ebp < (uint *) 0x100000 || ebp == (uint*)0xffffffff)
break;
pcs[i] = ebp[1]; // saved %eip
ebp = (uint*)ebp[0]; // saved %ebp
diff --git a/syscall.c b/syscall.c
index 110a872..9296cff 100644
--- a/syscall.c
+++ b/syscall.c
@@ -18,10 +18,12 @@ fetchint(struct proc *p, uint addr, int *ip)
{
if(addr >= p->sz || addr+4 > p->sz)
return -1;
- *ip = *(int*)(p->mem + addr);
+ *ip = *(int*)(addr);
return 0;
}
+// XXX should we copy the string?
+
// Fetch the nul-terminated string at addr from process p.
// Doesn't actually copy the string - just sets *pp to point at it.
// Returns length of string, not including nul.
@@ -32,8 +34,8 @@ fetchstr(struct proc *p, uint addr, char **pp)
if(addr >= p->sz)
return -1;
- *pp = p->mem + addr;
- ep = p->mem + p->sz;
+ *pp = (char *) addr;
+ ep = (char *) p->sz;
for(s = *pp; s < ep; s++)
if(*s == 0)
return s - *pp;
@@ -44,7 +46,8 @@ fetchstr(struct proc *p, uint addr, char **pp)
int
argint(int n, int *ip)
{
- return fetchint(proc, proc->tf->esp + 4 + 4*n, ip);
+ int x = fetchint(proc, proc->tf->esp + 4 + 4*n, ip);
+ return x;
}
// Fetch the nth word-sized system call argument as a pointer
@@ -59,7 +62,8 @@ argptr(int n, char **pp, int size)
return -1;
if((uint)i >= proc->sz || (uint)i+size >= proc->sz)
return -1;
- *pp = proc->mem + i;
+ // *pp = proc->mem + i; // XXXXX
+ *pp = (char *) i; // XXXXX
return 0;
}
@@ -96,6 +100,7 @@ extern int sys_sleep(void);
extern int sys_unlink(void);
extern int sys_wait(void);
extern int sys_write(void);
+extern int sys_uptime(void);
static int (*syscalls[])(void) = {
[SYS_chdir] sys_chdir,
@@ -118,6 +123,7 @@ static int (*syscalls[])(void) = {
[SYS_unlink] sys_unlink,
[SYS_wait] sys_wait,
[SYS_write] sys_write,
+[SYS_uptime] sys_uptime,
};
void
diff --git a/syscall.h b/syscall.h
index f4b7807..3a0fbca 100644
--- a/syscall.h
+++ b/syscall.h
@@ -19,3 +19,4 @@
#define SYS_getpid 18
#define SYS_sbrk 19
#define SYS_sleep 20
+#define SYS_uptime 21
diff --git a/sysfile.c b/sysfile.c
index 3eec766..6b8eef4 100644
--- a/sysfile.c
+++ b/sysfile.c
@@ -264,7 +264,6 @@ sys_open(void)
if(argstr(0, &path) < 0 || argint(1, &omode) < 0)
return -1;
-
if(omode & O_CREATE){
if((ip = create(path, T_FILE, 0, 0)) == 0)
return -1;
@@ -291,7 +290,6 @@ sys_open(void)
f->off = 0;
f->readable = !(omode & O_WRONLY);
f->writable = (omode & O_WRONLY) || (omode & O_RDWR);
-
return fd;
}
@@ -350,8 +348,9 @@ sys_exec(void)
int i;
uint uargv, uarg;
- if(argstr(0, &path) < 0 || argint(1, (int*)&uargv) < 0)
+ if(argstr(0, &path) < 0 || argint(1, (int*)&uargv) < 0) {
return -1;
+ }
memset(argv, 0, sizeof(argv));
for(i=0;; i++){
if(i >= NELEM(argv))
diff --git a/sysproc.c b/sysproc.c
index 11770ff..efaa372 100644
--- a/sysproc.c
+++ b/sysproc.c
@@ -57,7 +57,8 @@ sys_sbrk(void)
int
sys_sleep(void)
{
- int n, ticks0;
+ int n;
+ uint ticks0;
if(argint(0, &n) < 0)
return -1;
@@ -73,3 +74,16 @@ sys_sleep(void)
release(&tickslock);
return 0;
}
+
+// return how many clock tick interrupts have occurred
+// since boot.
+int
+sys_uptime(void)
+{
+ uint xticks;
+
+ acquire(&tickslock);
+ xticks = ticks;
+ release(&tickslock);
+ return xticks;
+}
diff --git a/trap.c b/trap.c
index 86ce052..daee22f 100644
--- a/trap.c
+++ b/trap.c
@@ -11,7 +11,7 @@
struct gatedesc idt[256];
extern uint vectors[]; // in vectors.S: array of 256 entry pointers
struct spinlock tickslock;
-int ticks;
+uint ticks;
void
tvinit(void)
@@ -78,13 +78,14 @@ trap(struct trapframe *tf)
default:
if(proc == 0 || (tf->cs&3) == 0){
// In kernel, it must be our mistake.
- cprintf("unexpected trap %d from cpu %d eip %x\n",
- tf->trapno, cpu->id, tf->eip);
+ cprintf("unexpected trap %d from cpu %d eip %x (cr2=0x%x)\n",
+ tf->trapno, cpu->id, tf->eip, rcr2());
panic("trap");
}
// In user space, assume process misbehaved.
- cprintf("pid %d %s: trap %d err %d on cpu %d eip %x -- kill proc\n",
- proc->pid, proc->name, tf->trapno, tf->err, cpu->id, tf->eip);
+ cprintf("pid %d %s: trap %d err %d on cpu %d eip 0x%x addr 0x%x--kill proc\n",
+ proc->pid, proc->name, tf->trapno, tf->err, cpu->id, tf->eip,
+ rcr2());
proc->killed = 1;
}
diff --git a/traps.h b/traps.h
index f450c2d..0bd1fd8 100644
--- a/traps.h
+++ b/traps.h
@@ -24,7 +24,7 @@
// These are arbitrarily chosen, but with care not to overlap
// processor defined exceptions or interrupt vectors.
-#define T_SYSCALL 64 // system call
+#define T_SYSCALL 64 // system call
#define T_DEFAULT 500 // catchall
#define T_IRQ0 32 // IRQ 0 corresponds to int T_IRQ
diff --git a/types.h b/types.h
index 1c19789..e4adf64 100644
--- a/types.h
+++ b/types.h
@@ -1,3 +1,4 @@
typedef unsigned int uint;
typedef unsigned short ushort;
typedef unsigned char uchar;
+typedef uint pde_t;
diff --git a/usertests.c b/usertests.c
index cc2601c..670a4a8 100644
--- a/usertests.c
+++ b/usertests.c
@@ -322,8 +322,9 @@ void
mem(void)
{
void *m1, *m2;
- int pid;
+ int pid, ppid;
+ ppid = getpid();
if((pid = fork()) == 0){
m1 = 0;
while((m2 = malloc(10001)) != 0) {
@@ -338,6 +339,7 @@ mem(void)
m1 = malloc(1024*20);
if(m1 == 0) {
printf(1, "couldn't allocate mem?!!\n");
+ kill(ppid);
exit();
}
free(m1);
@@ -1229,6 +1231,136 @@ forktest(void)
printf(1, "fork test OK\n");
}
+void
+sbrktest(void)
+{
+ int pid;
+ char *oldbrk = sbrk(0);
+
+ printf(stdout, "sbrk test\n");
+
+ // can one sbrk() less than a page?
+ char *a = sbrk(0);
+ int i;
+ for(i = 0; i < 5000; i++){
+ char *b = sbrk(1);
+ if(b != a){
+ printf(stdout, "sbrk test failed %d %x %x\n", i, a, b);
+ exit();
+ }
+ *b = 1;
+ a = b + 1;
+ }
+ pid = fork();
+ if(pid < 0){
+ printf(stdout, "sbrk test fork failed\n");
+ exit();
+ }
+ char *c = sbrk(1);
+ c = sbrk(1);
+ if(c != a + 1){
+ printf(stdout, "sbrk test failed post-fork\n");
+ exit();
+ }
+ if(pid == 0)
+ exit();
+ wait();
+
+ // can one allocate the full 640K?
+ a = sbrk(0);
+ uint amt = (640 * 1024) - (uint) a;
+ char *p = sbrk(amt);
+ if(p != a){
+ printf(stdout, "sbrk test failed 640K test, p %x a %x\n", p, a);
+ exit();
+ }
+ char *lastaddr = (char *)(640 * 1024 - 1);
+ *lastaddr = 99;
+
+ // is one forbidden from allocating more than 640K?
+ c = sbrk(4096);
+ if(c != (char *) 0xffffffff){
+ printf(stdout, "sbrk allocated more than 640K, c %x\n", c);
+ exit();
+ }
+
+ // can one de-allocate?
+ a = sbrk(0);
+ c = sbrk(-4096);
+ if(c == (char *) 0xffffffff){
+ printf(stdout, "sbrk could not deallocate\n");
+ exit();
+ }
+ c = sbrk(0);
+ if(c != a - 4096){
+ printf(stdout, "sbrk deallocation produced wrong address, a %x c %x\n", a, c);
+ exit();
+ }
+
+ // can one re-allocate that page?
+ a = sbrk(0);
+ c = sbrk(4096);
+ if(c != a || sbrk(0) != a + 4096){
+ printf(stdout, "sbrk re-allocation failed, a %x c %x\n", a, c);
+ exit();
+ }
+ if(*lastaddr == 99){
+ // should be zero
+ printf(stdout, "sbrk de-allocation didn't really deallocate\n");
+ exit();
+ }
+
+ c = sbrk(4096);
+ if(c != (char *) 0xffffffff){
+ printf(stdout, "sbrk was able to re-allocate beyond 640K, c %x\n", c);
+ exit();
+ }
+
+ // can we read the kernel's memory?
+ for(a = (char*)(640*1024); a < (char *)2000000; a += 50000){
+ int ppid = getpid();
+ int pid = fork();
+ if(pid < 0){
+ printf(stdout, "fork failed\n");
+ exit();
+ }
+ if(pid == 0){
+ printf(stdout, "oops could read %x = %x\n", a, *a);
+ kill(ppid);
+ exit();
+ }
+ wait();
+ }
+
+ if(sbrk(0) > oldbrk)
+ sbrk(-(sbrk(0) - oldbrk));
+
+ printf(stdout, "sbrk test OK\n");
+}
+
+void
+stacktest(void)
+{
+ printf(stdout, "stack test\n");
+ char dummy = 1;
+ char *p = &dummy;
+ int ppid = getpid();
+ int pid = fork();
+ if(pid < 0){
+ printf(stdout, "fork failed\n");
+ exit();
+ }
+ if(pid == 0){
+ // should cause a trap:
+ p[-4096] = 'z';
+ kill(ppid);
+ printf(stdout, "stack test failed: page before stack was writeable\n");
+ exit();
+ }
+ wait();
+ printf(stdout, "stack test OK\n");
+}
+
int
main(int argc, char *argv[])
{
@@ -1240,6 +1372,9 @@ main(int argc, char *argv[])
}
close(open("usertests.ran", O_CREATE));
+ stacktest();
+ sbrktest();
+
opentest();
writetest();
writetest1();
diff --git a/usys.S b/usys.S
index 2291b02..8bfd8a1 100644
--- a/usys.S
+++ b/usys.S
@@ -28,3 +28,4 @@ SYSCALL(dup)
SYSCALL(getpid)
SYSCALL(sbrk)
SYSCALL(sleep)
+SYSCALL(uptime)
diff --git a/vm.c b/vm.c
new file mode 100644
index 0000000..98ac108
--- /dev/null
+++ b/vm.c
@@ -0,0 +1,382 @@
+#include "param.h"
+#include "types.h"
+#include "defs.h"
+#include "x86.h"
+#include "mmu.h"
+#include "proc.h"
+#include "elf.h"
+
+// The mappings from logical to linear are one to one (i.e.,
+// segmentation doesn't do anything).
+// There is one page table per process, plus one that's used
+// when a CPU is not running any process (kpgdir).
+// A user process uses the same page table as the kernel; the
+// page protection bits prevent it from using anything other
+// than its memory.
+//
+// setupkvm() and exec() set up every page table like this:
+// 0..640K : user memory (text, data, stack, heap)
+// 640K..1M : mapped direct (for IO space)
+// 1M..kernend : mapped direct (for the kernel's text and data)
+// kernend..PHYSTOP : mapped direct (kernel heap and user pages)
+// 0xfe000000..0 : mapped direct (devices such as ioapic)
+//
+// The kernel allocates memory for its heap and for user memory
+// between kernend and the end of physical memory (PHYSTOP).
+// The virtual address space of each user program includes the kernel
+// (which is inaccessible in user mode). The user program addresses
+// range from 0 till 640KB (USERTOP), which where the I/O hole starts
+// (both in physical memory and in the kernel's virtual address
+// space).
+
+#define PHYSTOP 0x1000000
+#define USERTOP 0xA0000
+
+static uint kerntext; // Linker starts kernel at 1MB
+static uint kerntsz;
+static uint kerndata;
+static uint kerndsz;
+static uint kernend;
+static uint freesz;
+static pde_t *kpgdir; // for use in scheduler()
+
+// return the address of the PTE in page table pgdir
+// that corresponds to linear address va. if create!=0,
+// create any required page table pages.
+static pte_t *
+walkpgdir(pde_t *pgdir, const void *va, int create)
+{
+ uint r;
+ pde_t *pde;
+ pte_t *pgtab;
+
+ pde = &pgdir[PDX(va)];
+ if (*pde & PTE_P) {
+ pgtab = (pte_t*) PTE_ADDR(*pde);
+ } else if (!create || !(r = (uint) kalloc(PGSIZE)))
+ return 0;
+ else {
+ pgtab = (pte_t*) r;
+
+ // Make sure all those PTE_P bits are zero.
+ memset(pgtab, 0, PGSIZE);
+
+ // The permissions here are overly generous, but they can
+ // be further restricted by the permissions in the page table
+ // entries, if necessary.
+ *pde = PADDR(r) | PTE_P | PTE_W | PTE_U;
+ }
+ return &pgtab[PTX(va)];
+}
+
+// create PTEs for linear addresses starting at la that refer to
+// physical addresses starting at pa. la and size might not
+// be page-aligned.
+static int
+mappages(pde_t *pgdir, void *la, uint size, uint pa, int perm)
+{
+ char *first = PGROUNDDOWN(la);
+ char *last = PGROUNDDOWN(la + size - 1);
+ char *a = first;
+ while(1){
+ pte_t *pte = walkpgdir(pgdir, a, 1);
+ if(pte == 0)
+ return 0;
+ if(*pte & PTE_P)
+ panic("remap");
+ *pte = pa | perm | PTE_P;
+ if(a == last)
+ break;
+ a += PGSIZE;
+ pa += PGSIZE;
+ }
+ return 1;
+}
+
+// Set up CPU's kernel segment descriptors.
+// Run once at boot time on each CPU.
+void
+ksegment(void)
+{
+ struct cpu *c;
+
+ // Map virtual addresses to linear addresses using identity map.
+ // Cannot share a CODE descriptor for both kernel and user
+ // because it would have to have DPL_USR, but the CPU forbids
+ // an interrupt from CPL=0 to DPL=3.
+ c = &cpus[cpunum()];
+ c->gdt[SEG_KCODE] = SEG(STA_X|STA_R, 0, 0xffffffff, 0);
+ c->gdt[SEG_KDATA] = SEG(STA_W, 0, 0xffffffff, 0);
+ c->gdt[SEG_UCODE] = SEG(STA_X|STA_R, 0, 0xffffffff, DPL_USER);
+ c->gdt[SEG_UDATA] = SEG(STA_W, 0, 0xffffffff, DPL_USER);
+
+ // map cpu, and curproc
+ c->gdt[SEG_KCPU] = SEG(STA_W, &c->cpu, 8, 0);
+
+ lgdt(c->gdt, sizeof(c->gdt));
+ loadgs(SEG_KCPU << 3);
+
+ // Initialize cpu-local storage.
+ cpu = c;
+ proc = 0;
+}
+
+// Switch h/w page table and TSS registers to point to process p.
+void
+switchuvm(struct proc *p)
+{
+ pushcli();
+
+ // Setup TSS
+ cpu->gdt[SEG_TSS] = SEG16(STS_T32A, &cpu->ts, sizeof(cpu->ts)-1, 0);
+ cpu->gdt[SEG_TSS].s = 0;
+ cpu->ts.ss0 = SEG_KDATA << 3;
+ cpu->ts.esp0 = (uint)proc->kstack + KSTACKSIZE;
+ ltr(SEG_TSS << 3);
+
+ if (p->pgdir == 0)
+ panic("switchuvm: no pgdir\n");
+
+ lcr3(PADDR(p->pgdir)); // switch to new address space
+ popcli();
+}
+
+// Switch h/w page table register to the kernel-only page table, for when
+// no process is running.
+void
+switchkvm()
+{
+ lcr3(PADDR(kpgdir)); // Switch to the kernel page table
+}
+
+// Set up kernel part of a page table.
+pde_t*
+setupkvm(void)
+{
+ pde_t *pgdir;
+
+ // Allocate page directory
+ if (!(pgdir = (pde_t *) kalloc(PGSIZE)))
+ return 0;
+ memset(pgdir, 0, PGSIZE);
+ // Map IO space from 640K to 1Mbyte
+ if (!mappages(pgdir, (void *)USERTOP, 0x60000, USERTOP, PTE_W))
+ return 0;
+ // Map kernel text read-only
+ if (!mappages(pgdir, (void *) kerntext, kerntsz, kerntext, 0))
+ return 0;
+ // Map kernel data read/write
+ if (!mappages(pgdir, (void *) kerndata, kerndsz, kerndata, PTE_W))
+ return 0;
+ // Map dynamically-allocated memory read/write (kernel stacks, user mem)
+ if (!mappages(pgdir, (void *) kernend, freesz, PADDR(kernend), PTE_W))
+ return 0;
+ // Map devices such as ioapic, lapic, ...
+ if (!mappages(pgdir, (void *)0xFE000000, 0x2000000, 0xFE000000, PTE_W))
+ return 0;
+ return pgdir;
+}
+
+// return the physical address that a given user address
+// maps to. the result is also a kernel logical address,
+// since the kernel maps the physical memory allocated to user
+// processes directly.
+char*
+uva2ka(pde_t *pgdir, char *uva)
+{
+ pte_t *pte = walkpgdir(pgdir, uva, 0);
+ if (pte == 0) return 0;
+ uint pa = PTE_ADDR(*pte);
+ return (char *)pa;
+}
+
+// allocate sz bytes more memory for a process starting at the
+// given user address; allocates physical memory and page
+// table entries. addr and sz need not be page-aligned.
+// it is a no-op for any parts of the requested memory
+// that are already allocated.
+int
+allocuvm(pde_t *pgdir, char *addr, uint sz)
+{
+ if (addr + sz > (char*)USERTOP)
+ return 0;
+ char *first = PGROUNDDOWN(addr);
+ char *last = PGROUNDDOWN(addr + sz - 1);
+ char *a;
+ for(a = first; a <= last; a += PGSIZE){
+ pte_t *pte = walkpgdir(pgdir, a, 0);
+ if(pte == 0 || (*pte & PTE_P) == 0){
+ char *mem = kalloc(PGSIZE);
+ if(mem == 0){
+ // XXX clean up?
+ return 0;
+ }
+ memset(mem, 0, PGSIZE);
+ mappages(pgdir, a, PGSIZE, PADDR(mem), PTE_W|PTE_U);
+ }
+ }
+ return 1;
+}
+
+// deallocate some of the user pages, in response to sbrk()
+// with a negative argument. if addr is not page-aligned,
+// then only deallocates starting at the next page boundary.
+int
+deallocuvm(pde_t *pgdir, char *addr, uint sz)
+{
+ if (addr + sz > (char*)USERTOP)
+ return 0;
+ char *first = (char*) PGROUNDUP((uint)addr);
+ char *last = PGROUNDDOWN(addr + sz - 1);
+ char *a;
+ for(a = first; a <= last; a += PGSIZE){
+ pte_t *pte = walkpgdir(pgdir, a, 0);
+ if(pte && (*pte & PTE_P) != 0){
+ uint pa = PTE_ADDR(*pte);
+ if(pa == 0)
+ panic("deallocuvm");
+ kfree((void *) pa, PGSIZE);
+ *pte = 0;
+ }
+ }
+ return 1;
+}
+
+// free a page table and all the physical memory pages
+// in the user part.
+void
+freevm(pde_t *pgdir)
+{
+ uint i, j, da;
+
+ if (!pgdir)
+ panic("freevm: no pgdir\n");
+ for (i = 0; i < NPDENTRIES; i++) {
+ da = PTE_ADDR(pgdir[i]);
+ if (da != 0) {
+ pte_t *pgtab = (pte_t*) da;
+ for (j = 0; j < NPTENTRIES; j++) {
+ if (pgtab[j] != 0) {
+ uint pa = PTE_ADDR(pgtab[j]);
+ uint va = PGADDR(i, j, 0);
+ if (va < USERTOP) // user memory
+ kfree((void *) pa, PGSIZE);
+ pgtab[j] = 0;
+ }
+ }
+ kfree((void *) da, PGSIZE);
+ pgdir[i] = 0;
+ }
+ }
+ kfree((void *) pgdir, PGSIZE);
+}
+
+int
+loaduvm(pde_t *pgdir, char *addr, struct inode *ip, uint offset, uint sz)
+{
+ uint i, pa, n;
+ pte_t *pte;
+
+ if ((uint)addr % PGSIZE != 0)
+ panic("loaduvm: addr must be page aligned\n");
+ for (i = 0; i < sz; i += PGSIZE) {
+ if (!(pte = walkpgdir(pgdir, addr+i, 0)))
+ panic("loaduvm: address should exist\n");
+ pa = PTE_ADDR(*pte);
+ if (sz - i < PGSIZE) n = sz - i;
+ else n = PGSIZE;
+ if(readi(ip, (char *)pa, offset+i, n) != n)
+ return 0;
+ }
+ return 1;
+}
+
+void
+inituvm(pde_t *pgdir, char *addr, char *init, uint sz)
+{
+ uint i, pa, n, off;
+ pte_t *pte;
+
+ for (i = 0; i < sz; i += PGSIZE) {
+ if (!(pte = walkpgdir(pgdir, (void *)(i+addr), 0)))
+ panic("inituvm: pte should exist\n");
+ off = (i+(uint)addr) % PGSIZE;
+ pa = PTE_ADDR(*pte);
+ if (sz - i < PGSIZE) n = sz - i;
+ else n = PGSIZE;
+ memmove((char *)pa+off, init+i, n);
+ }
+}
+
+// given a parent process's page table, create a copy
+// of it for a child.
+pde_t*
+copyuvm(pde_t *pgdir, uint sz)
+{
+ pde_t *d = setupkvm();
+ pte_t *pte;
+ uint pa, i;
+ char *mem;
+
+ if (!d) return 0;
+ for (i = 0; i < sz; i += PGSIZE) {
+ if (!(pte = walkpgdir(pgdir, (void *)i, 0)))
+ panic("copyuvm: pte should exist\n");
+ if(*pte & PTE_P){
+ pa = PTE_ADDR(*pte);
+ if (!(mem = kalloc(PGSIZE)))
+ return 0;
+ memmove(mem, (char *)pa, PGSIZE);
+ if (!mappages(d, (void *)i, PGSIZE, PADDR(mem), PTE_W|PTE_U))
+ return 0;
+ }
+ }
+ return d;
+}
+
+// Gather information about physical memory layout.
+// Called once during boot.
+// Really should find out how much physical memory
+// there is rather than assuming PHYSTOP.
+void
+pminit(void)
+{
+ extern char end[];
+ struct proghdr *ph;
+ struct elfhdr *elf = (struct elfhdr*)0x10000; // scratch space
+
+ if (elf->magic != ELF_MAGIC || elf->phnum != 2)
+ panic("pminit: need a text and data segment\n");
+
+ ph = (struct proghdr*)((uchar*)elf + elf->phoff);
+ kernend = ((uint)end + PGSIZE) & ~(PGSIZE-1);
+ kerntext = ph[0].va;
+ kerndata = ph[1].va;
+ kerntsz = ph[0].memsz;
+ kerndsz = ph[1].memsz;
+ freesz = PHYSTOP - kernend;
+
+ kinit((char *)kernend, freesz);
+}
+
+// Allocate one page table for the machine for the kernel address
+// space for scheduler processes.
+void
+kvmalloc(void)
+{
+ kpgdir = setupkvm();
+}
+
+// Turn on paging.
+void
+vminit(void)
+{
+ uint cr0;
+
+ lcr3(PADDR(kpgdir));
+ cr0 = rcr0();
+ cr0 |= CR0_PE|CR0_PG|CR0_AM|CR0_WP|CR0_NE|CR0_TS|CR0_EM|CR0_MP;
+ cr0 &= ~(CR0_TS|CR0_EM);
+ lcr0(cr0);
+}
+
diff --git a/x86.h b/x86.h
index fcd3062..b9fa8b8 100644
--- a/x86.h
+++ b/x86.h
@@ -121,6 +121,66 @@ sti(void)
asm volatile("sti");
}
+static inline void lcr0(uint val)
+{
+ asm volatile("movl %0,%%cr0" : : "r" (val));
+}
+
+static inline uint rcr0(void)
+{
+ uint val;
+ asm volatile("movl %%cr0,%0" : "=r" (val));
+ return val;
+}
+
+static inline uint rcr2(void)
+{
+ uint val;
+ asm volatile("movl %%cr2,%0" : "=r" (val));
+ return val;
+}
+
+static inline void lcr3(uint val)
+{
+ asm volatile("movl %0,%%cr3" : : "r" (val));
+}
+
+static inline uint rcr3(void)
+{
+ uint val;
+ asm volatile("movl %%cr3,%0" : "=r" (val));
+ return val;
+}
+
+static inline void lebp(uint val)
+{
+ asm volatile("movl %0,%%ebp" : : "r" (val));
+}
+
+static inline uint rebp(void)
+{
+ uint val;
+ asm volatile("movl %%ebp,%0" : "=r" (val));
+ return val;
+}
+
+static inline void lesp(uint val)
+{
+ asm volatile("movl %0,%%esp" : : "r" (val));
+}
+
+static inline uint resp(void)
+{
+ uint val;
+ asm volatile("movl %%esp,%0" : "=r" (val));
+ return val;
+}
+
+static inline void nop_pause(void)
+{
+ asm volatile("pause" : :);
+}
+
//PAGEBREAK: 36
// Layout of the trap frame built on the stack by the
// hardware and by trapasm.S, and passed to trap().