+OBJS = main.o console.o string.o kalloc.o proc.o trapasm.o
+CC = i386-jos-elf-gcc
+LD = i386-jos-elf-ld
+OBJCOPY = i386-jos-elf-objcopy
+OBJDUMP = i386-jos-elf-objdump
+xv6.img : bootblock kernel
+ dd if=/dev/zero of=xv6.img count=10000
+ dd if=bootblock of=xv6.img conv=notrunc
+ dd if=kernel of=xv6.img seek=1 conv=notrunc
+bootblock : bootasm.S bootmain.c
+ $(CC) -O -nostdinc -I. -c bootmain.c
+ $(CC) -nostdinc -I. -c bootasm.S
+ $(LD) -N -e start -Ttext 0x7C00 -o bootblock.o bootasm.o bootmain.o
+ $(OBJDUMP) -S bootblock.o > bootblock.asm
+ $(OBJCOPY) -S -O binary bootblock.o bootblock
+ ./ bootblock
+kernel : $(OBJS)
+ $(LD) -Ttext 0x100000 -e main -o kernel $(OBJS)
+ $(OBJDUMP) -S kernel > kernel.asm
+%.o: %.c
+ $(CC) -nostdinc -I. -O -c -o $@ $<
+clean :
+ rm -f bootmain.o bootasm.o bootblock.o bootblock
+ rm -f kernel main.o kernel.asm xv6.img
diff --git a/Notes b/Notes
new file mode 100644
index 0000000..e5e2c5f
--- /dev/null
+++ b/Notes
@@ -0,0 +1,67 @@
+bootmain.c doesn't work right if the ELF sections aren't
+sector-aligned. so you can't use ld -N. and the sections may also need
+to be non-zero length, only really matters for tiny "kernels".
+kernel loaded at 1 megabyte. stack same place that bootasm.S left it.
+kinit() should find real mem size
+ and rescue useable memory below 1 meg
+no paging, no use of page table hardware, just segments
+no user area: no magic kernel stack mapping
+ so no copying of kernel stack during fork
+ though there is a kernel stack page for each process
+no kernel malloc(), just kalloc() for user core
+user pointers aren't valid in the kernel
+setting up first process
+ we do want a process zero, as template
+ but not runnable
+ just set up return-from-trap frame on new kernel stack
+ fake user program that calls exec
+map text read-only?
+shared text?
+what's on the stack during a trap or sys call?
+ PUSHA before scheduler switch? for callee-saved registers.
+ segment contents?
+ what does iret need to get out of the kernel?
+ how does INT know what kernel stack to use?
+are interrupts turned on in the kernel? probably.
+per-cpu curproc
+one tss per process, or one per cpu?
+one segment array per cpu, or per process?
+pass curproc explicitly, or implicit from cpu #?
+ e.g. argument to newproc()?
+test stack expansion
+test running out of memory, process slots
+we can't really use a separate stack segment, since stack addresses
+need to work correctly as ordinary pointers. the same may be true of
+data vs text. how can we have a gap between data and stack, so that
+both can grow, without committing 4GB of physical memory? does this
+mean we need paging?
+what's the simplest way to add the paging we need?
+ one page table, re-write it each time we leave the kernel?
+ page table per process?
+ probably need to use 0-0xffffffff segments, so that
+ both data and stack pointers always work
+ so is it now worth it to make a process's phys mem contiguous?
+ or could use segment limits and 4 meg pages?
+ but limits would prevent using stack pointers as data pointers
+ how to write-protect text? not important?
+perhaps have fixed-size stack, put it in the data segment?
+oops, if kernel stack is in contiguous user phys mem, then moving
+users' memory (e.g. to expand it) will wreck any pointers into the
+kernel stack.
diff --git a/bootasm.S b/bootasm.S
new file mode 100644
index 0000000..00cbdc9
--- /dev/null
+++ b/bootasm.S
@@ -0,0 +1,109 @@
+#define SEG_NULL \
+ .word 0, 0; \
+ .byte 0, 0, 0, 0
+#define SEG(type,base,lim) \
+ .word (((lim) >> 12) & 0xffff), ((base) & 0xffff); \
+ .byte (((base) >> 16) & 0xff), (0x90 | (type)), \
+ (0xC0 | (((lim) >> 28) & 0xf)), (((base) >> 24) & 0xff)
+#define STA_X 0x8 // Executable segment
+#define STA_E 0x4 // Expand down (non-executable segments)
+#define STA_C 0x4 // Conforming code segment (executable only)
+#define STA_W 0x2 // Writeable (non-executable segments)
+#define STA_R 0x2 // Readable (executable segments)
+#define STA_A 0x1 // Accessed
+.set PROT_MODE_CSEG,0x8 # code segment selector
+.set PROT_MODE_DSEG,0x10 # data segment selector
+.set CR0_PE_ON,0x1 # protected mode enable flag
+# This code should be stored in the first sector of the hard disk.
+# After the BIOS initializes the hardware on startup or system reset,
+# it loads this code at physical address 0x7c00 - 0x7d00 (512 bytes).
+# Then the BIOS jumps to the beginning of it, address 0x7c00,
+# while running in 16-bit real-mode (8086 compatibility mode).
+# The Code Segment register (CS) is initially zero on entry.
+# This code switches into 32-bit protected mode so that all of
+# memory can accessed, then calls into C.
+.globl start # Entry point
+start: .code16 # This runs in real mode
+ cli # Disable interrupts
+ cld # String operations increment
+ # Set up the important data segment registers (DS, ES, SS).
+ xorw %ax,%ax # Segment number zero
+ movw %ax,%ds # -> Data Segment
+ movw %ax,%es # -> Extra Segment
+ movw %ax,%ss # -> Stack Segment
+ # Set up the stack pointer, growing downward from 0x7c00.
+ movw $start,%sp # Stack Pointer
+#### Enable A20:
+#### For fascinating historical reasons (related to the fact that
+#### the earliest 8086-based PCs could only address 1MB of physical memory
+#### and subsequent 80286-based PCs wanted to retain maximum compatibility),
+#### physical address line 20 is tied to low when the machine boots.
+#### Obviously this a bit of a drag for us, especially when trying to
+#### address memory above 1MB. This code undoes this.
+seta20.1: inb $0x64,%al # Get status
+ testb $0x2,%al # Busy?
+ jnz seta20.1 # Yes
+ movb $0xd1,%al # Command: Write
+ outb %al,$0x64 # output port
+seta20.2: inb $0x64,%al # Get status
+ testb $0x2,%al # Busy?
+ jnz seta20.2 # Yes
+ movb $0xdf,%al # Enable
+ outb %al,$0x60 # A20
+#### Switch from real to protected mode
+#### The descriptors in our GDT allow all physical memory to be accessed.
+#### Furthermore, the descriptors have base addresses of 0, so that the
+#### segment translation is a NOP, ie. virtual addresses are identical to
+#### their physical addresses. With this setup, immediately after
+#### enabling protected mode it will still appear to this code
+#### that it is running directly on physical memory with no translation.
+#### This initial NOP-translation setup is required by the processor
+#### to ensure that the transition to protected mode occurs smoothly.
+real_to_prot: cli # Mandatory since we dont set up an IDT
+ lgdt gdtdesc # load GDT -- mandatory in protected mode
+ movl %cr0, %eax # turn on protected mode
+ orl $CR0_PE_ON, %eax #
+ movl %eax, %cr0 #
+ ### CPU magic: jump to relocation, flush prefetch queue, and reload %cs
+ ### Has the effect of just jmp to the next instruction, but simultaneous
+ ### loads CS with $PROT_MODE_CSEG.
+ ljmp $PROT_MODE_CSEG, $protcseg
+#### we are in 32-bit protected mode (hence the .code32)
+ # Set up the protected-mode data segment registers
+ movw $PROT_MODE_DSEG, %ax # Our data segment selector
+ movw %ax, %ds # -> DS: Data Segment
+ movw %ax, %es # -> ES: Extra Segment
+ movw %ax, %fs # -> FS
+ movw %ax, %gs # -> GS
+ movw %ax, %ss # -> SS: Stack Segment
+ call cmain # finish the boot load from C.
+ # cmain() should not return
+spin: jmp spin # ..but in case it does, spin
+.p2align 2 # force 4 byte alignment
+ SEG_NULL # null seg
+ SEG(STA_X|STA_R, 0x0, 0xffffffff) # code seg
+ SEG(STA_W, 0x0, 0xffffffff) # data seg
+ .word 0x17 # sizeof(gdt) - 1
+ .long gdt # address gdt
diff --git a/bootmain.c b/bootmain.c
new file mode 100644
index 0000000..79d769c
--- /dev/null
+++ b/bootmain.c
@@ -0,0 +1,121 @@
+#include <types.h>
+#include <elf.h>
+#include <x86.h>
+ * This a dirt simple boot loader, whose sole job is to boot
+ * an elf kernel image from the first IDE hard disk.
+ *
+ * * This program(boot.S and main.c) is the bootloader. It should
+ * be stored in the first sector of the disk.
+ *
+ * * The 2nd sector onward holds the kernel image.
+ *
+ * * The kernel image must be in ELF format.
+ *
+ * * when the CPU boots it loads the BIOS into memory and executes it
+ *
+ * * the BIOS intializes devices, sets of the interrupt routines, and
+ * reads the first sector of the boot device(e.g., hard-drive)
+ * into memory and jumps to it.
+ *
+ * * Assuming this boot loader is stored in the first sector of the
+ * hard-drive, this code takes over...
+ *
+ * * control starts in bootloader.S -- which sets up protected mode,
+ * and a stack so C code then run, then calls cmain()
+ *
+ * * cmain() in this file takes over, reads in the kernel and jumps to it.
+ **********************************************************************/
+#define SECTSIZE 512
+#define ELFHDR ((struct Elf *) 0x10000) // scratch space
+void readsect(void*, uint32_t);
+void readseg(uint32_t, uint32_t, uint32_t);
+ struct Proghdr *ph, *eph;
+ // read 1st page off disk
+ readseg((uint32_t) ELFHDR, SECTSIZE*8, 0);
+ // is this a valid ELF?
+ if (ELFHDR->e_magic != ELF_MAGIC)
+ goto bad;
+ // load each program segment (ignores ph flags)
+ ph = (struct Proghdr *) ((uint8_t *) ELFHDR + ELFHDR->e_phoff);
+ eph = ph + ELFHDR->e_phnum;
+ for (; ph < eph; ph++)
+ readseg(ph->p_va, ph->p_memsz, ph->p_offset);
+ // call the entry point from the ELF header
+ // note: does not return!
+ ((void (*)(void)) (ELFHDR->e_entry & 0xFFFFFF))();
+ outw(0x8A00, 0x8A00);
+ outw(0x8A00, 0x8E00);
+ while (1)
+ /* do nothing */;
+// Read 'count' bytes at 'offset' from kernel into virtual address 'va'.
+// Might copy more than asked
+readseg(uint32_t va, uint32_t count, uint32_t offset)
+ uint32_t end_va;
+ va &= 0xFFFFFF;
+ end_va = va + count;
+ // round down to sector boundary
+ va &= ~(SECTSIZE - 1);
+ // translate from bytes to sectors, and kernel starts at sector 1
+ offset = (offset / SECTSIZE) + 1;
+ // If this is too slow, we could read lots of sectors at a time.
+ // We'd write more to memory than asked, but it doesn't matter --
+ // we load in increasing order.
+ while (va < end_va) {
+ readsect((uint8_t*) va, offset);
+ va += SECTSIZE;
+ offset++;
+ }
+ // wait for disk reaady
+ while ((inb(0x1F7) & 0xC0) != 0x40)
+ /* do nothing */;
+readsect(void *dst, uint32_t offset)
+ // wait for disk to be ready
+ waitdisk();
+ outb(0x1F2, 1); // count = 1
+ outb(0x1F3, offset);
+ outb(0x1F4, offset >> 8);
+ outb(0x1F5, offset >> 16);
+ outb(0x1F6, (offset >> 24) | 0xE0);
+ outb(0x1F7, 0x20); // cmd 0x20 - read sectors
+ // wait for disk to be ready
+ waitdisk();
+ // read a sector
+ insl(0x1F0, dst, SECTSIZE/4);
diff --git a/console.c b/console.c
new file mode 100644
index 0000000..2035611
--- /dev/null
+++ b/console.c
@@ -0,0 +1,108 @@
+#include <types.h>
+#include <x86.h>
+#include "defs.h"
+cons_putc(int c)
+ int crtport = 0x3d4; // io port of CGA
+ unsigned short *crt = (unsigned short *) 0xB8000; // base of CGA memory
+ int ind;
+ // cursor position, 16 bits, col + 80*row
+ outb(crtport, 14);
+ ind = inb(crtport + 1) << 8;
+ outb(crtport, 15);
+ ind |= inb(crtport + 1);
+ c &= 0xff;
+ if(c == '\n'){
+ ind -= (ind % 80);
+ ind += 80;
+ } else {
+ c |= 0x0700; // black on white
+ crt[ind] = c;
+ ind += 1;
+ }
+ if((ind / 80) >= 24){
+ // scroll up
+ memcpy(crt, crt + 80, sizeof(crt[0]) * (23 * 80));
+ ind -= 80;
+ memset(crt + ind, 0, sizeof(crt[0]) * ((24 * 80) - ind));
+ }
+ outb(crtport, 14);
+ outb(crtport + 1, ind >> 8);
+ outb(crtport, 15);
+ outb(crtport + 1, ind);
+printint(int xx, int base, int sgn)
+ char buf[16];
+ char digits[] = "0123456789ABCDEF";
+ int i = 0, neg = 0;
+ unsigned int x;
+ if(sgn && xx < 0){
+ neg = 1;
+ x = 0 - xx;
+ } else {
+ x = xx;
+ }
+ do {
+ buf[i++] = digits[x % base];
+ } while((x /= base) != 0);
+ if(neg)
+ buf[i++] = '-';
+ while(i > 0){
+ i -= 1;
+ cons_putc(buf[i]);
+ }
+ * print to the console. only understands %d and %x.
+ */
+cprintf(char *fmt, ...)
+ int i, state = 0, c;
+ unsigned int *ap = (unsigned int *) &fmt + 1;
+ for(i = 0; fmt[i]; i++){
+ c = fmt[i] & 0xff;
+ if(state == 0){
+ if(c == '%'){
+ state = '%';
+ } else {
+ cons_putc(c);
+ }
+ } else if(state == '%'){
+ if(c == 'd'){
+ printint(*ap, 10, 1);
+ ap++;
+ } else if(c == 'x'){
+ printint(*ap, 16, 0);
+ ap++;
+ } else if(c == '%'){
+ cons_putc(c);
+ }
+ state = 0;
+ }
+ }
+panic(char *s)
+ cprintf(s, 0);
+ cprintf("\n", 0);
+ while(1)
+ ;
diff --git a/defs.h b/defs.h
new file mode 100644
index 0000000..ec41bfe
--- /dev/null
+++ b/defs.h
@@ -0,0 +1,12 @@
+// kalloc.c
+char *kalloc(int n);
+void kfree(char *cp, int len);
+// console.c
+void cprintf(char *fmt, ...);
+void panic(char *s);
+// proc.c
+struct proc;
+void setupsegs(struct proc *p);
+struct proc * newproc(struct proc *op);
diff --git a/elf.h b/elf.h
new file mode 100644
index 0000000..ea9f964
--- /dev/null
+++ b/elf.h
@@ -0,0 +1,43 @@
+#ifndef JOS_INC_ELF_H
+#define JOS_INC_ELF_H
+#define ELF_MAGIC 0x464C457FU /* "\x7FELF" in little endian */
+struct Elf {
+ uint32_t e_magic; // must equal ELF_MAGIC
+ uint8_t e_elf[12];
+ uint16_t e_type;
+ uint16_t e_machine;
+ uint32_t e_version;
+ uint32_t e_entry;
+ uint32_t e_phoff;
+ uint32_t e_shoff;
+ uint32_t e_flags;
+ uint16_t e_ehsize;
+ uint16_t e_phentsize;
+ uint16_t e_phnum;
+ uint16_t e_shentsize;
+ uint16_t e_shnum;
+ uint16_t e_shstrndx;
+struct Proghdr {
+ uint32_t p_type;
+ uint32_t p_offset;
+ uint32_t p_va;
+ uint32_t p_pa;
+ uint32_t p_filesz;
+ uint32_t p_memsz;
+ uint32_t p_flags;
+ uint32_t p_align;
+// Values for Proghdr::p_type
+#define ELF_PROG_LOAD 1
+// Flag bits for Proghdr::p_flags
+#endif /* !JOS_INC_ELF_H */
diff --git a/kalloc.c b/kalloc.c
new file mode 100644
index 0000000..5ea38fd
--- /dev/null
+++ b/kalloc.c
@@ -0,0 +1,158 @@
+ * physical memory allocator, intended to be used to allocate
+ * memory for user processes. allocates in 4096-byte "pages".
+ * free list is sorted and combines adjacent pages into
+ * long runs, to make it easier to allocate big segments.
+ * one reason the page size is 4k is that the x86 segment size
+ * granularity is 4k.
+ */
+#include "param.h"
+#include "types.h"
+#include "defs.h"
+struct run {
+ struct run *next;
+ int len; // bytes
+struct run *freelist;
+void ktest();
+ * initialize free list of physical pages. this code
+ * cheats by just considering the one megabyte of pages
+ * after _end.
+ */
+ extern int end;
+ unsigned mem;
+ char *start;
+ start = (char *) &end;
+ start = (char *) (((unsigned)start + PAGE) & ~(PAGE-1));
+ mem = 256; // XXX
+ cprintf("mem = %d\n", mem * PAGE);
+ kfree(start, mem * PAGE);
+ ktest();
+kfree(char *cp, int len)
+ struct run **rr;
+ struct run *p = (struct run *) cp;
+ struct run *pend = (struct run *) (cp + len);
+ if(len % PAGE)
+ panic("kfree");
+ rr = &freelist;
+ while(*rr){
+ struct run *rend = (struct run *) ((char *)(*rr) + (*rr)->len);
+ if(p >= *rr && p < rend)
+ panic("freeing free page");
+ if(pend == *rr){
+ p->len = len + (*rr)->len;
+ p->next = (*rr)->next;
+ *rr = p;
+ return;
+ }
+ if(pend < *rr){
+ p->len = len;
+ p->next = *rr;
+ *rr = p;
+ return;
+ }
+ if(p == rend){
+ (*rr)->len += len;
+ if((*rr)->next && (*rr)->next == pend){
+ (*rr)->len += (*rr)->next->len;
+ (*rr)->next = (*rr)->next->next;
+ }
+ return;
+ }
+ rr = &((*rr)->next);
+ }
+ p->len = len;
+ p->next = 0;
+ *rr = p;
+ * allocate n bytes of physical memory.
+ * returns a kernel-segment pointer.
+ * returns 0 if there's no run that's big enough.
+ */
+char *
+kalloc(int n)
+ struct run **rr;
+ if(n % PAGE)
+ panic("kalloc");
+ rr = &freelist;
+ while(*rr){
+ struct run *r = *rr;
+ if(r->len == n){
+ *rr = r->next;
+ return (char *) r;
+ }
+ if(r->len > n){
+ char *p = (char *)r + (r->len - n);
+ r->len -= n;
+ return p;
+ }
+ rr = &(*rr)->next;
+ }
+ return 0;
+ char *p1, *p2, *p3;
+ // test coalescing
+ p1 = kalloc(4 * PAGE);
+ kfree(p1 + 3*PAGE, PAGE);
+ kfree(p1 + 2*PAGE, PAGE);
+ kfree(p1, PAGE);
+ kfree(p1 + PAGE, PAGE);
+ p2 = kalloc(4 * PAGE);
+ if(p2 != p1)
+ panic("ktest");
+ kfree(p2, 4 * PAGE);
+ // test finding first run that fits
+ p1 = kalloc(1 * PAGE);
+ p2 = kalloc(1 * PAGE);
+ kfree(p1, PAGE);
+ p3 = kalloc(2 * PAGE);
+ kfree(p2, PAGE);
+ kfree(p3, 2 * PAGE);
+ // test running out of memory
+ p1 = 0;
+ while(1){
+ p2 = kalloc(PAGE);
+ if(p2 == 0)
+ break;
+ *(char **)p2 = p1;
+ p1 = p2;
+ }
+ while(p1){
+ p2 = *(char **)p1;
+ kfree(p1, PAGE);
+ p1 = p2;
+ }
+ p1 = kalloc(PAGE * 20);
+ if(p1 == 0)
+ panic("ktest2");
+ kfree(p1, PAGE * 20);
+ cprintf("ktest ok\n");
diff --git a/main.c b/main.c
new file mode 100644
index 0000000..a1c08c9
--- /dev/null
+++ b/main.c
@@ -0,0 +1,40 @@
+#include "types.h"
+#include "param.h"
+#include "mmu.h"
+#include "proc.h"
+#include "defs.h"
+#include "x86.h"
+char junk1[20000];
+char junk2[20000] = { 1 };
+ struct proc *p;
+ cprintf("\nxV6\n\n");
+ // initialize physical memory allocator
+ kinit();
+ // create fake process zero
+ p = &proc[0];
+ p->state = WAITING;
+ p->sz = PAGE;
+ p->mem = kalloc(p->sz);
+ memset(p->mem, 0, p->sz);
+ p->kstack = kalloc(KSTACKSIZE);
+ p->tf = (struct Trapframe *) (p->kstack + KSTACKSIZE - sizeof(struct Trapframe));
+ memset(p->tf, 0, sizeof(struct Trapframe));
+ p->tf->tf_es = p->tf->tf_ds = p->tf->tf_ss = (SEG_UDATA << 3) | 3;
+ p->tf->tf_cs = (SEG_UCODE << 3) | 3;
+ p->tf->tf_eflags = FL_IF;
+ setupsegs(p);
+ p = newproc(&proc[0]);
+ // xxx copy instructions to p->mem
+ p->tf->tf_eip = 0;
+ p->tf->tf_esp = p->sz;
+ swtch(&proc[0]);
diff --git a/mmu.h b/mmu.h
new file mode 100644
index 0000000..776db23
--- /dev/null
+++ b/mmu.h
@@ -0,0 +1,308 @@
+ * This file contains definitions for the x86 memory management unit (MMU),
+ * including paging- and segmentation-related data structures and constants,
+ * the %cr0, %cr4, and %eflags registers, and traps.
+ */
+ *
+ * Part 1. Paging data structures and constants.
+ *
+ */
+// A linear address 'la' has a three-part structure as follows:
+// +--------10------+-------10-------+---------12----------+
+// | Page Directory | Page Table | Offset within Page |
+// | Index | Index | |
+// +----------------+----------------+---------------------+
+// \--- PDX(la) --/ \--- PTX(la) --/ \---- PGOFF(la) ----/
+// \----------- PPN(la) -----------/
+// The PDX, PTX, PGOFF, and PPN macros decompose linear addresses as shown.
+// To construct a linear address la from PDX(la), PTX(la), and PGOFF(la),
+// use PGADDR(PDX(la), PTX(la), PGOFF(la)).
+// page number field of address
+#define PPN(la) (((uintptr_t) (la)) >> PTXSHIFT)
+#define VPN(la) PPN(la) // used to index into vpt[]
+// page directory index
+#define PDX(la) ((((uintptr_t) (la)) >> PDXSHIFT) & 0x3FF)
+#define VPD(la) PDX(la) // used to index into vpd[]
+// page table index
+#define PTX(la) ((((uintptr_t) (la)) >> PTXSHIFT) & 0x3FF)
+// offset in page
+#define PGOFF(la) (((uintptr_t) (la)) & 0xFFF)
+// construct linear address from indexes and offset
+#define PGADDR(d, t, o) ((void*) ((d) << PDXSHIFT | (t) << PTXSHIFT | (o)))
+// Page directory and page table constants.
+#define NPDENTRIES 1024 // page directory entries per page directory
+#define NPTENTRIES 1024 // page table entries per page table
+#define PGSIZE 4096 // bytes mapped by a page
+#define PGSHIFT 12 // log2(PGSIZE)
+#define PTSIZE (PGSIZE*NPTENTRIES) // bytes mapped by a page directory entry
+#define PTSHIFT 22 // log2(PTSIZE)
+#define PTXSHIFT 12 // offset of PTX in a linear address
+#define PDXSHIFT 22 // offset of PDX in a linear address
+// Page table/directory entry flags.
+#define PTE_P 0x001 // Present
+#define PTE_W 0x002 // Writeable
+#define PTE_U 0x004 // User
+#define PTE_PWT 0x008 // Write-Through
+#define PTE_PCD 0x010 // Cache-Disable
+#define PTE_A 0x020 // Accessed
+#define PTE_D 0x040 // Dirty
+#define PTE_PS 0x080 // Page Size
+#define PTE_MBZ 0x180 // Bits must be zero
+// The PTE_AVAIL bits aren't used by the kernel or interpreted by the
+// hardware, so user processes are allowed to set them arbitrarily.
+#define PTE_AVAIL 0xE00 // Available for software use
+// Only flags in PTE_USER may be used in system calls.
+// address in page table entry
+#define PTE_ADDR(pte) ((physaddr_t) (pte) & ~0xFFF)
+// Control Register flags
+#define CR0_PE 0x00000001 // Protection Enable
+#define CR0_MP 0x00000002 // Monitor coProcessor
+#define CR0_EM 0x00000004 // Emulation
+#define CR0_TS 0x00000008 // Task Switched
+#define CR0_ET 0x00000010 // Extension Type
+#define CR0_NE 0x00000020 // Numeric Errror
+#define CR0_WP 0x00010000 // Write Protect
+#define CR0_AM 0x00040000 // Alignment Mask
+#define CR0_NW 0x20000000 // Not Writethrough
+#define CR0_CD 0x40000000 // Cache Disable
+#define CR0_PG 0x80000000 // Paging
+#define CR4_PCE 0x00000100 // Performance counter enable
+#define CR4_MCE 0x00000040 // Machine Check Enable
+#define CR4_PSE 0x00000010 // Page Size Extensions
+#define CR4_DE 0x00000008 // Debugging Extensions
+#define CR4_TSD 0x00000004 // Time Stamp Disable
+#define CR4_PVI 0x00000002 // Protected-Mode Virtual Interrupts
+#define CR4_VME 0x00000001 // V86 Mode Extensions
+// Eflags register
+#define FL_CF 0x00000001 // Carry Flag
+#define FL_PF 0x00000004 // Parity Flag
+#define FL_AF 0x00000010 // Auxiliary carry Flag
+#define FL_ZF 0x00000040 // Zero Flag
+#define FL_SF 0x00000080 // Sign Flag
+#define FL_TF 0x00000100 // Trap Flag
+#define FL_IF 0x00000200 // Interrupt Flag
+#define FL_DF 0x00000400 // Direction Flag
+#define FL_OF 0x00000800 // Overflow Flag
+#define FL_IOPL_MASK 0x00003000 // I/O Privilege Level bitmask
+#define FL_IOPL_0 0x00000000 // IOPL == 0
+#define FL_IOPL_1 0x00001000 // IOPL == 1
+#define FL_IOPL_2 0x00002000 // IOPL == 2
+#define FL_IOPL_3 0x00003000 // IOPL == 3
+#define FL_NT 0x00004000 // Nested Task
+#define FL_RF 0x00010000 // Resume Flag
+#define FL_VM 0x00020000 // Virtual 8086 mode
+#define FL_AC 0x00040000 // Alignment Check
+#define FL_VIF 0x00080000 // Virtual Interrupt Flag
+#define FL_VIP 0x00100000 // Virtual Interrupt Pending
+#define FL_ID 0x00200000 // ID flag
+// Page fault error codes
+#define FEC_PR 0x1 // Page fault caused by protection violation
+#define FEC_WR 0x2 // Page fault caused by a write
+#define FEC_U 0x4 // Page fault occured while in user mode
+ *
+ * Part 2. Segmentation data structures and constants.
+ *
+ */
+#ifdef __ASSEMBLER__
+ * Macros to build GDT entries in assembly.
+ */
+#define SEG_NULL \
+ .word 0, 0; \
+ .byte 0, 0, 0, 0
+#define SEG(type,base,lim) \
+ .word (((lim) >> 12) & 0xffff), ((base) & 0xffff); \
+ .byte (((base) >> 16) & 0xff), (0x90 | (type)), \
+ (0xC0 | (((lim) >> 28) & 0xf)), (((base) >> 24) & 0xff)
+#else // not __ASSEMBLER__
+// Segment Descriptors
+struct Segdesc {
+ unsigned sd_lim_15_0 : 16; // Low bits of segment limit
+ unsigned sd_base_15_0 : 16; // Low bits of segment base address
+ unsigned sd_base_23_16 : 8; // Middle bits of segment base address
+ unsigned sd_type : 4; // Segment type (see STS_ constants)
+ unsigned sd_s : 1; // 0 = system, 1 = application
+ unsigned sd_dpl : 2; // Descriptor Privilege Level
+ unsigned sd_p : 1; // Present
+ unsigned sd_lim_19_16 : 4; // High bits of segment limit
+ unsigned sd_avl : 1; // Unused (available for software use)
+ unsigned sd_rsv1 : 1; // Reserved
+ unsigned sd_db : 1; // 0 = 16-bit segment, 1 = 32-bit segment
+ unsigned sd_g : 1; // Granularity: limit scaled by 4K when set
+ unsigned sd_base_31_24 : 8; // High bits of segment base address
+// Null segment
+#define SEG_NULL (struct Segdesc){ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+// Segment that is loadable but faults when used
+#define SEG_FAULT (struct Segdesc){ 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0 }
+// Normal segment
+#define SEG(type, base, lim, dpl) (struct Segdesc) \
+{ ((lim) >> 12) & 0xffff, (base) & 0xffff, ((base) >> 16) & 0xff, \
+ type, 1, dpl, 1, (unsigned) (lim) >> 28, 0, 0, 1, 1, \
+ (unsigned) (base) >> 24 }
+#define SEG16(type, base, lim, dpl) (struct Segdesc) \
+{ (lim) & 0xffff, (base) & 0xffff, ((base) >> 16) & 0xff, \
+ type, 1, dpl, 1, (unsigned) (lim) >> 16, 0, 0, 1, 0, \
+ (unsigned) (base) >> 24 }
+#endif /* !__ASSEMBLER__ */
+// Application segment type bits
+#define STA_X 0x8 // Executable segment
+#define STA_E 0x4 // Expand down (non-executable segments)
+#define STA_C 0x4 // Conforming code segment (executable only)
+#define STA_W 0x2 // Writeable (non-executable segments)
+#define STA_R 0x2 // Readable (executable segments)
+#define STA_A 0x1 // Accessed
+// System segment type bits
+#define STS_T16A 0x1 // Available 16-bit TSS
+#define STS_LDT 0x2 // Local Descriptor Table
+#define STS_T16B 0x3 // Busy 16-bit TSS
+#define STS_CG16 0x4 // 16-bit Call Gate
+#define STS_TG 0x5 // Task Gate / Coum Transmitions
+#define STS_IG16 0x6 // 16-bit Interrupt Gate
+#define STS_TG16 0x7 // 16-bit Trap Gate
+#define STS_T32A 0x9 // Available 32-bit TSS
+#define STS_T32B 0xB // Busy 32-bit TSS
+#define STS_CG32 0xC // 32-bit Call Gate
+#define STS_IG32 0xE // 32-bit Interrupt Gate
+#define STS_TG32 0xF // 32-bit Trap Gate
+ *
+ * Part 3. Traps.
+ *
+ */
+#ifndef __ASSEMBLER__
+// Task state segment format (as described by the Pentium architecture book)
+struct Taskstate {
+ uint32_t ts_link; // Old ts selector
+ uintptr_t ts_esp0; // Stack pointers and segment selectors
+ uint16_t ts_ss0; // after an increase in privilege level
+ uint16_t ts_padding1;
+ uintptr_t ts_esp1;
+ uint16_t ts_ss1;
+ uint16_t ts_padding2;
+ uintptr_t ts_esp2;
+ uint16_t ts_ss2;
+ uint16_t ts_padding3;
+ physaddr_t ts_cr3; // Page directory base
+ uintptr_t ts_eip; // Saved state from last task switch
+ uint32_t ts_eflags;
+ uint32_t ts_eax; // More saved state (registers)
+ uint32_t ts_ecx;
+ uint32_t ts_edx;
+ uint32_t ts_ebx;
+ uintptr_t ts_esp;
+ uintptr_t ts_ebp;
+ uint32_t ts_esi;
+ uint32_t ts_edi;
+ uint16_t ts_es; // Even more saved state (segment selectors)
+ uint16_t ts_padding4;
+ uint16_t ts_cs;
+ uint16_t ts_padding5;
+ uint16_t ts_ss;
+ uint16_t ts_padding6;
+ uint16_t ts_ds;
+ uint16_t ts_padding7;
+ uint16_t ts_fs;
+ uint16_t ts_padding8;
+ uint16_t ts_gs;
+ uint16_t ts_padding9;
+ uint16_t ts_ldt;
+ uint16_t ts_padding10;
+ uint16_t ts_t; // Trap on task switch
+ uint16_t ts_iomb; // I/O map base address
+// Gate descriptors for interrupts and traps
+struct Gatedesc {
+ unsigned gd_off_15_0 : 16; // low 16 bits of offset in segment
+ unsigned gd_ss : 16; // segment selector
+ unsigned gd_args : 5; // # args, 0 for interrupt/trap gates
+ unsigned gd_rsv1 : 3; // reserved(should be zero I guess)
+ unsigned gd_type : 4; // type(STS_{TG,IG32,TG32})
+ unsigned gd_s : 1; // must be 0 (system)
+ unsigned gd_dpl : 2; // descriptor(meaning new) privilege level
+ unsigned gd_p : 1; // Present
+ unsigned gd_off_31_16 : 16; // high bits of offset in segment
+// Set up a normal interrupt/trap gate descriptor.
+// - istrap: 1 for a trap (= exception) gate, 0 for an interrupt gate.
+// - sel: Code segment selector for interrupt/trap handler
+// - off: Offset in code segment for interrupt/trap handler
+// - dpl: Descriptor Privilege Level -
+// the privilege level required for software to invoke
+// this interrupt/trap gate explicitly using an int instruction.
+#define SETGATE(gate, istrap, sel, off, dpl) \
+{ \
+ (gate).gd_off_15_0 = (uint32_t) (off) & 0xffff; \
+ (gate).gd_ss = (sel); \
+ (gate).gd_args = 0; \
+ (gate).gd_rsv1 = 0; \
+ (gate).gd_type = (istrap) ? STS_TG32 : STS_IG32; \
+ (gate).gd_s = 0; \
+ (gate).gd_dpl = (dpl); \
+ (gate).gd_p = 1; \
+ (gate).gd_off_31_16 = (uint32_t) (off) >> 16; \
+// Set up a call gate descriptor.
+#define SETCALLGATE(gate, ss, off, dpl) \
+{ \
+ (gate).gd_off_15_0 = (uint32_t) (off) & 0xffff; \
+ (gate).gd_ss = (ss); \
+ (gate).gd_args = 0; \
+ (gate).gd_rsv1 = 0; \
+ (gate).gd_type = STS_CG32; \
+ (gate).gd_s = 0; \
+ (gate).gd_dpl = (dpl); \
+ (gate).gd_p = 1; \
+ (gate).gd_off_31_16 = (uint32_t) (off) >> 16; \
+// Pseudo-descriptors used for LGDT, LLDT and LIDT instructions.
+struct Pseudodesc {
+ uint16_t pd__garbage; // LGDT supposed to be from address 4N+2
+ uint16_t pd_lim; // Limit
+ uint32_t pd_base __attribute__ ((packed)); // Base address
+#define PD_ADDR(desc) (&(desc).pd_lim)
+#endif /* !__ASSEMBLER__ */
diff --git a/param.h b/param.h
new file mode 100644
index 0000000..798dc5b
--- /dev/null
+++ b/param.h
@@ -0,0 +1,3 @@
+#define NPROC 64
+#define PAGE 4096
diff --git a/proc.c b/proc.c
new file mode 100644
index 0000000..fda834e
--- /dev/null
+++ b/proc.c
@@ -0,0 +1,112 @@
+#include "types.h"
+#include "mmu.h"
+#include "x86.h"
+#include "proc.h"
+#include "param.h"
+#include "defs.h"
+struct proc proc[NPROC];
+ * set up a process's task state and segment descriptors
+ * correctly, given its current size and address in memory.
+ * this should be called whenever the latter change.
+ * doesn't change the cpu's current segmentation setup.
+ */
+setupsegs(struct proc *p)
+ memset(&p->ts, 0, sizeof(struct Taskstate));
+ p->ts.ts_ss0 = SEG_KDATA << 3;
+ p->ts.ts_esp0 = (unsigned)(p->kstack + KSTACKSIZE);
+ memset(&p->gdt, 0, sizeof(p->gdt));
+ p->gdt[0] = SEG_NULL;
+ p->gdt[SEG_KCODE] = SEG(STA_X|STA_R, 0, 0xffffffff, 0);
+ p->gdt[SEG_KDATA] = SEG(STA_W, 0, 0xffffffff, 0);
+ p->gdt[SEG_TSS] = SEG16(STS_T32A, (unsigned) &p->ts, sizeof(p->ts), 0);
+ p->gdt[SEG_TSS].sd_s = 0;
+ p->gdt[SEG_UCODE] = SEG(STA_X|STA_R, (unsigned)p->mem, p->sz, 3);
+ p->gdt[SEG_UDATA] = SEG(STA_W, (unsigned)p->mem, p->sz, 3);
+ p->gdt_pd.pd__garbage = 0;
+ p->gdt_pd.pd_lim = sizeof(p->gdt) - 1;
+ p->gdt_pd.pd_base = (unsigned) p->gdt;
+extern void trapret();
+ * internal fork(). does not copy kernel stack; instead,
+ * sets up the stack to return as if from system call.
+ */
+struct proc *
+newproc(struct proc *op)
+ struct proc *np;
+ unsigned *sp;
+ for(np = &proc[1]; np < &proc[NPROC]; np++)
+ if(np->state == UNUSED)
+ break;
+ if(np >= &proc[NPROC])
+ return 0;
+ np->sz = op->sz;
+ np->mem = kalloc(op->sz);
+ if(np->mem == 0)
+ return 0;
+ memcpy(np->mem, op->mem, np->sz);
+ np->kstack = kalloc(KSTACKSIZE);
+ if(np->kstack == 0){
+ kfree(np->mem, op->sz);
+ return 0;
+ }
+ np->tf = (struct Trapframe *) (np->kstack + KSTACKSIZE - sizeof(struct Trapframe));
+ setupsegs(np);
+ np->state = RUNNABLE;
+ // set up kernel stack to return to user space
+ *(np->tf) = *(op->tf);
+ sp = (unsigned *) np->tf;
+ *(--sp) = (unsigned) &trapret; // for return from swtch()
+ *(--sp) = 0; // previous bp for leave in swtch()
+ np->esp = (unsigned) sp;
+ np->ebp = (unsigned) sp;
+ cprintf("esp %x ebp %x mem %x\n", np->esp, np->ebp, np->mem);
+ return np;
+ * find a runnable process and switch to it.
+ */
+swtch(struct proc *op)
+ struct proc *np;
+ while(1){
+ for(np = op + 1; np != op; np++){
+ if(np == &proc[NPROC])
+ np = &proc[0];
+ if(np->state == RUNNABLE)
+ break;
+ }
+ if(np->state == RUNNABLE)
+ break;
+ // idle...
+ }
+ op->ebp = read_ebp();
+ op->esp = read_esp();
+ // XXX callee-saved registers?
+ // this happens to work, but probably isn't safe:
+ // it's not clear that np->ebp will evaluate
+ // correctly after changing the stack pointer.
+ asm volatile("lgdt %0" : : "g" (np->gdt_pd.pd_lim));
+ asm volatile("movl %0, %%esp" : : "g" (np->esp));
+ asm volatile("movl %0, %%ebp" : : "g" (np->ebp));
diff --git a/proc.h b/proc.h
new file mode 100644
index 0000000..e5c230c
--- /dev/null
+++ b/proc.h
@@ -0,0 +1,34 @@
+ * p->mem:
+ * text
+ * original data and bss
+ * fixed-size stack
+ * expandable heap
+ */
+ * segments in proc->gdt
+ */
+#define SEG_KCODE 1 // kernel code
+#define SEG_KDATA 2 // kernel data+stack
+#define SEG_UCODE 3
+#define SEG_UDATA 4
+#define SEG_TSS 5 // this process's task state
+#define NSEGS 6
+struct proc{
+ char *mem; // start of process's physical memory
+ unsigned sz; // total size of mem, including kernel stack
+ char *kstack; // kernel stack, separate from mem so it doesn't move
+ enum { UNUSED, RUNNABLE, WAITING } state;
+ struct Taskstate ts; // only to give cpu address of kernel stack
+ struct Segdesc gdt[NSEGS];
+ struct Pseudodesc gdt_pd;
+ unsigned esp; // kernel stack pointer
+ unsigned ebp; // kernel frame pointer
+ struct Trapframe *tf; // points into kstack, used to find user regs
+extern struct proc proc[];
diff --git a/ b/
new file mode 100755
index 0000000..d84bdc6
--- /dev/null
+++ b/
@@ -0,0 +1,19 @@
+open(SIG, $ARGV[0]) || die "open $ARGV[0]: $!";
+$n = sysread(SIG, $buf, 1000);
+if($n > 510){
+ print STDERR "boot block too large: $n bytes (max 510)\n";
+ exit 1;
+print STDERR "boot block is $n bytes (max 510)\n";
+$buf .= "\0" x (510-$n);
+$buf .= "\x55\xAA";
+open(SIG, ">$ARGV[0]") || die "open >$ARGV[0]: $!";
+print SIG $buf;
+close SIG;
diff --git a/string.c b/string.c
new file mode 100644
index 0000000..aef4242
--- /dev/null
+++ b/string.c
@@ -0,0 +1,22 @@
+void *
+memcpy(void *dst, void *src, unsigned n)
+ char *d = (char *) dst;
+ char *s = (char *) src;
+ while(n-- > 0)
+ *d++ = *s++;
+ return dst;
+void *
+memset(void *dst, int c, unsigned n)
+ char *d = (char *) dst;
+ while(n-- > 0)
+ *d++ = c;
+ return dst;
diff --git a/trapasm.S b/trapasm.S
new file mode 100644
index 0000000..69649ff
--- /dev/null
+++ b/trapasm.S
@@ -0,0 +1,12 @@
+ .text
+ .globl trapret
+ /*
+ * a forked process RETs here
+ * expects ESP to point to a Trapframe
+ */
+ popal
+ popl %es
+ popl %ds
+ addl $0x8, %esp /* trapno and errcode */
+ iret
diff --git a/types.h b/types.h
new file mode 100644
index 0000000..01989d6
--- /dev/null
+++ b/types.h
@@ -0,0 +1,6 @@
+typedef unsigned long long uint64_t;
+typedef unsigned int uint32_t;
+typedef unsigned short uint16_t;
+typedef unsigned char uint8_t;
+typedef uint32_t uintptr_t;
+typedef uint32_t physaddr_t;
diff --git a/x86.h b/x86.h
new file mode 100644
index 0000000..134c6d2
--- /dev/null
+++ b/x86.h
@@ -0,0 +1,301 @@
+static __inline void breakpoint(void) __attribute__((always_inline));
+static __inline uint8_t inb(int port) __attribute__((always_inline));
+static __inline void insb(int port, void *addr, int cnt) __attribute__((always_inline));
+static __inline uint16_t inw(int port) __attribute__((always_inline));
+static __inline void insw(int port, void *addr, int cnt) __attribute__((always_inline));
+static __inline uint32_t inl(int port) __attribute__((always_inline));
+static __inline void insl(int port, void *addr, int cnt) __attribute__((always_inline));
+static __inline void outb(int port, uint8_t data) __attribute__((always_inline));
+static __inline void outsb(int port, const void *addr, int cnt) __attribute__((always_inline));
+static __inline void outw(int port, uint16_t data) __attribute__((always_inline));
+static __inline void outsw(int port, const void *addr, int cnt) __attribute__((always_inline));
+static __inline void outsl(int port, const void *addr, int cnt) __attribute__((always_inline));
+static __inline void outl(int port, uint32_t data) __attribute__((always_inline));
+static __inline void invlpg(void *addr) __attribute__((always_inline));
+static __inline void lidt(void *p) __attribute__((always_inline));
+static __inline void lldt(uint16_t sel) __attribute__((always_inline));
+static __inline void ltr(uint16_t sel) __attribute__((always_inline));
+static __inline void lcr0(uint32_t val) __attribute__((always_inline));
+static __inline uint32_t rcr0(void) __attribute__((always_inline));
+static __inline uint32_t rcr2(void) __attribute__((always_inline));
+static __inline void lcr3(uint32_t val) __attribute__((always_inline));
+static __inline uint32_t rcr3(void) __attribute__((always_inline));
+static __inline void lcr4(uint32_t val) __attribute__((always_inline));
+static __inline uint32_t rcr4(void) __attribute__((always_inline));
+static __inline void tlbflush(void) __attribute__((always_inline));
+static __inline uint32_t read_eflags(void) __attribute__((always_inline));
+static __inline void write_eflags(uint32_t eflags) __attribute__((always_inline));
+static __inline uint32_t read_ebp(void) __attribute__((always_inline));
+static __inline uint32_t read_esp(void) __attribute__((always_inline));
+static __inline void cpuid(uint32_t info, uint32_t *eaxp, uint32_t *ebxp, uint32_t *ecxp, uint32_t *edxp);
+static __inline uint64_t read_tsc(void) __attribute__((always_inline));
+static __inline void
+ __asm __volatile("int3");
+static __inline uint8_t
+inb(int port)
+ uint8_t data;
+ __asm __volatile("inb %w1,%0" : "=a" (data) : "d" (port));
+ return data;
+static __inline void
+insb(int port, void *addr, int cnt)
+ __asm __volatile("cld\n\trepne\n\tinsb" :
+ "=D" (addr), "=c" (cnt) :
+ "d" (port), "0" (addr), "1" (cnt) :
+ "memory", "cc");
+static __inline uint16_t
+inw(int port)
+ uint16_t data;
+ __asm __volatile("inw %w1,%0" : "=a" (data) : "d" (port));
+ return data;
+static __inline void
+insw(int port, void *addr, int cnt)
+ __asm __volatile("cld\n\trepne\n\tinsw" :
+ "=D" (addr), "=c" (cnt) :
+ "d" (port), "0" (addr), "1" (cnt) :
+ "memory", "cc");
+static __inline uint32_t
+inl(int port)
+ uint32_t data;
+ __asm __volatile("inl %w1,%0" : "=a" (data) : "d" (port));
+ return data;
+static __inline void
+insl(int port, void *addr, int cnt)
+ __asm __volatile("cld\n\trepne\n\tinsl" :
+ "=D" (addr), "=c" (cnt) :
+ "d" (port), "0" (addr), "1" (cnt) :
+ "memory", "cc");
+static __inline void
+outb(int port, uint8_t data)
+ __asm __volatile("outb %0,%w1" : : "a" (data), "d" (port));
+static __inline void
+outsb(int port, const void *addr, int cnt)
+ __asm __volatile("cld\n\trepne\n\toutsb" :
+ "=S" (addr), "=c" (cnt) :
+ "d" (port), "0" (addr), "1" (cnt) :
+ "cc");
+static __inline void
+outw(int port, uint16_t data)
+ __asm __volatile("outw %0,%w1" : : "a" (data), "d" (port));
+static __inline void
+outsw(int port, const void *addr, int cnt)
+ __asm __volatile("cld\n\trepne\n\toutsw" :
+ "=S" (addr), "=c" (cnt) :
+ "d" (port), "0" (addr), "1" (cnt) :
+ "cc");
+static __inline void
+outsl(int port, const void *addr, int cnt)
+ __asm __volatile("cld\n\trepne\n\toutsl" :
+ "=S" (addr), "=c" (cnt) :
+ "d" (port), "0" (addr), "1" (cnt) :
+ "cc");
+static __inline void
+outl(int port, uint32_t data)
+ __asm __volatile("outl %0,%w1" : : "a" (data), "d" (port));
+static __inline void
+invlpg(void *addr)
+ __asm __volatile("invlpg (%0)" : : "r" (addr) : "memory");
+static __inline void
+lidt(void *p)
+ __asm __volatile("lidt (%0)" : : "r" (p));
+static __inline void
+lldt(uint16_t sel)
+ __asm __volatile("lldt %0" : : "r" (sel));
+static __inline void
+ltr(uint16_t sel)
+ __asm __volatile("ltr %0" : : "r" (sel));
+static __inline void
+lcr0(uint32_t val)
+ __asm __volatile("movl %0,%%cr0" : : "r" (val));
+static __inline uint32_t
+ uint32_t val;
+ __asm __volatile("movl %%cr0,%0" : "=r" (val));
+ return val;
+static __inline uint32_t
+ uint32_t val;
+ __asm __volatile("movl %%cr2,%0" : "=r" (val));
+ return val;
+static __inline void
+lcr3(uint32_t val)
+ __asm __volatile("movl %0,%%cr3" : : "r" (val));
+static __inline uint32_t
+ uint32_t val;
+ __asm __volatile("movl %%cr3,%0" : "=r" (val));
+ return val;
+static __inline void
+lcr4(uint32_t val)
+ __asm __volatile("movl %0,%%cr4" : : "r" (val));
+static __inline uint32_t
+ uint32_t cr4;
+ __asm __volatile("movl %%cr4,%0" : "=r" (cr4));
+ return cr4;
+static __inline void
+ uint32_t cr3;
+ __asm __volatile("movl %%cr3,%0" : "=r" (cr3));
+ __asm __volatile("movl %0,%%cr3" : : "r" (cr3));
+static __inline uint32_t
+ uint32_t eflags;
+ __asm __volatile("pushfl; popl %0" : "=r" (eflags));
+ return eflags;
+static __inline void
+write_eflags(uint32_t eflags)
+ __asm __volatile("pushl %0; popfl" : : "r" (eflags));
+static __inline uint32_t
+ uint32_t ebp;
+ __asm __volatile("movl %%ebp,%0" : "=r" (ebp));
+ return ebp;
+static __inline uint32_t
+ uint32_t esp;
+ __asm __volatile("movl %%esp,%0" : "=r" (esp));
+ return esp;
+static __inline void
+cpuid(uint32_t info, uint32_t *eaxp, uint32_t *ebxp, uint32_t *ecxp, uint32_t *edxp)
+ uint32_t eax, ebx, ecx, edx;
+ asm volatile("cpuid"
+ : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
+ : "a" (info));
+ if (eaxp)
+ *eaxp = eax;
+ if (ebxp)
+ *ebxp = ebx;
+ if (ecxp)
+ *ecxp = ecx;
+ if (edxp)
+ *edxp = edx;
+static __inline uint64_t
+ uint64_t tsc;
+ __asm __volatile("rdtsc" : "=A" (tsc));
+ return tsc;
+struct PushRegs {
+ /* registers as pushed by pusha */
+ uint32_t reg_edi;
+ uint32_t reg_esi;
+ uint32_t reg_ebp;
+ uint32_t reg_oesp; /* Useless */
+ uint32_t reg_ebx;
+ uint32_t reg_edx;
+ uint32_t reg_ecx;
+ uint32_t reg_eax;
+struct Trapframe {
+ struct PushRegs tf_regs;
+ uint16_t tf_es;
+ uint16_t tf_padding1;
+ uint16_t tf_ds;
+ uint16_t tf_padding2;
+ uint32_t tf_trapno;
+ /* below here defined by x86 hardware */
+ uint32_t tf_err;
+ uintptr_t tf_eip;
+ uint16_t tf_cs;
+ uint16_t tf_padding3;
+ uint32_t tf_eflags;
+ /* below here only when crossing rings, such as from user to kernel */
+ uintptr_t tf_esp;
+ uint16_t tf_ss;
+ uint16_t tf_padding4;