diff options
| author | rtm <rtm> | 2006-06-12 15:22:12 +0000 | 
|---|---|---|
| committer | rtm <rtm> | 2006-06-12 15:22:12 +0000 | 
| commit | 55e95b16db458b7f9abeca96e541acbdf8d7f85b (patch) | |
| tree | 92a1fcb6f1cdede7ab83b37acabf76e1bc1b10f4 | |
| download | xv6-labs-55e95b16db458b7f9abeca96e541acbdf8d7f85b.tar.gz xv6-labs-55e95b16db458b7f9abeca96e541acbdf8d7f85b.tar.bz2 xv6-labs-55e95b16db458b7f9abeca96e541acbdf8d7f85b.zip | |
import
| -rw-r--r-- | Makefile | 30 | ||||
| -rw-r--r-- | Notes | 67 | ||||
| -rw-r--r-- | bootasm.S | 109 | ||||
| -rw-r--r-- | bootmain.c | 121 | ||||
| -rw-r--r-- | console.c | 108 | ||||
| -rw-r--r-- | defs.h | 12 | ||||
| -rw-r--r-- | elf.h | 43 | ||||
| -rw-r--r-- | kalloc.c | 158 | ||||
| -rw-r--r-- | main.c | 40 | ||||
| -rw-r--r-- | mmu.h | 308 | ||||
| -rw-r--r-- | param.h | 3 | ||||
| -rw-r--r-- | proc.c | 112 | ||||
| -rw-r--r-- | proc.h | 34 | ||||
| -rwxr-xr-x | sign.pl | 19 | ||||
| -rw-r--r-- | string.c | 22 | ||||
| -rw-r--r-- | trapasm.S | 12 | ||||
| -rw-r--r-- | types.h | 6 | ||||
| -rw-r--r-- | x86.h | 301 | 
18 files changed, 1505 insertions, 0 deletions
| diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..e63c77c --- /dev/null +++ b/Makefile @@ -0,0 +1,30 @@ +OBJS = main.o console.o string.o kalloc.o proc.o trapasm.o + +CC = i386-jos-elf-gcc +LD = i386-jos-elf-ld +OBJCOPY = i386-jos-elf-objcopy +OBJDUMP = i386-jos-elf-objdump + +xv6.img : bootblock kernel +	dd if=/dev/zero of=xv6.img count=10000 +	dd if=bootblock of=xv6.img conv=notrunc +	dd if=kernel of=xv6.img seek=1 conv=notrunc + +bootblock : bootasm.S bootmain.c +	$(CC) -O -nostdinc -I. -c bootmain.c +	$(CC) -nostdinc -I. -c bootasm.S +	$(LD) -N -e start -Ttext 0x7C00 -o bootblock.o bootasm.o bootmain.o +	$(OBJDUMP) -S bootblock.o > bootblock.asm +	$(OBJCOPY) -S -O binary bootblock.o bootblock +	./sign.pl bootblock + +kernel : $(OBJS) +	$(LD) -Ttext 0x100000 -e main -o kernel $(OBJS) +	$(OBJDUMP) -S kernel > kernel.asm + +%.o: %.c +	$(CC) -nostdinc -I. -O -c -o $@ $< + +clean :  +	rm -f bootmain.o bootasm.o bootblock.o bootblock +	rm -f kernel main.o kernel.asm xv6.img @@ -0,0 +1,67 @@ +bootmain.c doesn't work right if the ELF sections aren't +sector-aligned. so you can't use ld -N. and the sections may also need +to be non-zero length, only really matters for tiny "kernels". + +kernel loaded at 1 megabyte. stack same place that bootasm.S left it. + +kinit() should find real mem size +  and rescue useable memory below 1 meg + +no paging, no use of page table hardware, just segments + +no user area: no magic kernel stack mapping +  so no copying of kernel stack during fork +  though there is a kernel stack page for each process + +no kernel malloc(), just kalloc() for user core + +user pointers aren't valid in the kernel + +setting up first process +  we do want a process zero, as template +    but not runnable +  just set up return-from-trap frame on new kernel stack +  fake user program that calls exec + +map text read-only? +shared text? + +what's on the stack during a trap or sys call? +  PUSHA before scheduler switch? for callee-saved registers. +  segment contents? +  what does iret need to get out of the kernel? +  how does INT know what kernel stack to use? +  +are interrupts turned on in the kernel? probably. + +per-cpu curproc +one tss per process, or one per cpu? +one segment array per cpu, or per process? + +pass curproc explicitly, or implicit from cpu #? +  e.g. argument to newproc()? + +test stack expansion +test running out of memory, process slots + +we can't really use a separate stack segment, since stack addresses +need to work correctly as ordinary pointers. the same may be true of +data vs text. how can we have a gap between data and stack, so that +both can grow, without committing 4GB of physical memory? does this +mean we need paging? + +what's the simplest way to add the paging we need? +  one page table, re-write it each time we leave the kernel? +  page table per process? +  probably need to use 0-0xffffffff segments, so that +    both data and stack pointers always work +  so is it now worth it to make a process's phys mem contiguous? +  or could use segment limits and 4 meg pages? +    but limits would prevent using stack pointers as data pointers +  how to write-protect text? not important? + +perhaps have fixed-size stack, put it in the data segment? + +oops, if kernel stack is in contiguous user phys mem, then moving +users' memory (e.g. to expand it) will wreck any pointers into the +kernel stack. diff --git a/bootasm.S b/bootasm.S new file mode 100644 index 0000000..00cbdc9 --- /dev/null +++ b/bootasm.S @@ -0,0 +1,109 @@ +#define SEG_NULL						\ +	.word 0, 0;						\ +	.byte 0, 0, 0, 0 +#define SEG(type,base,lim)					\ +	.word (((lim) >> 12) & 0xffff), ((base) & 0xffff);	\ +	.byte (((base) >> 16) & 0xff), (0x90 | (type)),		\ +		(0xC0 | (((lim) >> 28) & 0xf)), (((base) >> 24) & 0xff) + +#define STA_X		0x8	    // Executable segment +#define STA_E		0x4	    // Expand down (non-executable segments) +#define STA_C		0x4	    // Conforming code segment (executable only) +#define STA_W		0x2	    // Writeable (non-executable segments) +#define STA_R		0x2	    // Readable (executable segments) +#define STA_A		0x1	    // Accessed +	 +.set PROT_MODE_CSEG,0x8		# code segment selector +.set PROT_MODE_DSEG,0x10        # data segment selector +.set CR0_PE_ON,0x1		# protected mode enable flag +	 +################################################################################### +# ENTRY POINT	 +#   This code should be stored in the first sector of the hard disk. +#   After the BIOS initializes the hardware on startup or system reset, +#   it loads this code at physical address 0x7c00 - 0x7d00 (512 bytes). +#   Then the BIOS jumps to the beginning of it, address 0x7c00, +#   while running in 16-bit real-mode (8086 compatibility mode). +#   The Code Segment register (CS) is initially zero on entry. +#	 +# This code switches into 32-bit protected mode so that all of +# memory can accessed, then calls into C. +################################################################################### +	 +.globl start					# Entry point	 +start:		.code16				# This runs in real mode +		cli				# Disable interrupts +		cld				# String operations increment + +		# Set up the important data segment registers (DS, ES, SS). +		xorw	%ax,%ax			# Segment number zero +		movw	%ax,%ds			# -> Data Segment +		movw	%ax,%es			# -> Extra Segment +		movw	%ax,%ss			# -> Stack Segment + +		# Set up the stack pointer, growing downward from 0x7c00. +		movw	$start,%sp         	# Stack Pointer +	 +#### Enable A20: +####   For fascinating historical reasons (related to the fact that +####   the earliest 8086-based PCs could only address 1MB of physical memory +####   and subsequent 80286-based PCs wanted to retain maximum compatibility), +####   physical address line 20 is tied to low when the machine boots. +####   Obviously this a bit of a drag for us, especially when trying to +####   address memory above 1MB.  This code undoes this. +	 +seta20.1:	inb	$0x64,%al		# Get status +		testb	$0x2,%al		# Busy? +		jnz	seta20.1		# Yes +		movb	$0xd1,%al		# Command: Write +		outb	%al,$0x64		#  output port +seta20.2:	inb	$0x64,%al		# Get status +		testb	$0x2,%al		# Busy? +		jnz	seta20.2		# Yes +		movb	$0xdf,%al		# Enable +		outb	%al,$0x60		#  A20 + +#### Switch from real to protected mode	 +####     The descriptors in our GDT allow all physical memory to be accessed. +####     Furthermore, the descriptors have base addresses of 0, so that the +####     segment translation is a NOP, ie. virtual addresses are identical to +####     their physical addresses.  With this setup, immediately after +####	 enabling protected mode it will still appear to this code +####	 that it is running directly on physical memory with no translation. +####	 This initial NOP-translation setup is required by the processor +####	 to ensure that the transition to protected mode occurs smoothly. +	 +real_to_prot:	cli				# Mandatory since we dont set up an IDT +		lgdt	gdtdesc			# load GDT -- mandatory in protected mode +		movl	%cr0, %eax		# turn on protected mode +		orl	$CR0_PE_ON, %eax	#  +		movl	%eax, %cr0		#  +	        ### CPU magic: jump to relocation, flush prefetch queue, and reload %cs +		### Has the effect of just jmp to the next instruction, but simultaneous +		### loads CS with $PROT_MODE_CSEG. +		ljmp	$PROT_MODE_CSEG, $protcseg +	 +#### we are in 32-bit protected mode (hence the .code32) +.code32 +protcseg:	 +		# Set up the protected-mode data segment registers +		movw	$PROT_MODE_DSEG, %ax	# Our data segment selector +		movw	%ax, %ds		# -> DS: Data Segment +		movw	%ax, %es		# -> ES: Extra Segment +		movw	%ax, %fs		# -> FS +		movw	%ax, %gs		# -> GS +		movw	%ax, %ss		# -> SS: Stack Segment +	 +		call cmain			# finish the boot load from C. +						# cmain() should not return +spin:		jmp spin			# ..but in case it does, spin +	 +.p2align 2					# force 4 byte alignment +gdt: +	SEG_NULL				# null seg +	SEG(STA_X|STA_R, 0x0, 0xffffffff)	# code seg +	SEG(STA_W, 0x0, 0xffffffff)	        # data seg +	 +gdtdesc: +	.word	0x17			# sizeof(gdt) - 1 +	.long	gdt			# address gdt diff --git a/bootmain.c b/bootmain.c new file mode 100644 index 0000000..79d769c --- /dev/null +++ b/bootmain.c @@ -0,0 +1,121 @@ +#include <types.h> +#include <elf.h> +#include <x86.h> + +/********************************************************************** + * This a dirt simple boot loader, whose sole job is to boot + * an elf kernel image from the first IDE hard disk. + * + * DISK LAYOUT + *  * This program(boot.S and main.c) is the bootloader.  It should + *    be stored in the first sector of the disk. + *  + *  * The 2nd sector onward holds the kernel image. + *	 + *  * The kernel image must be in ELF format. + * + * BOOT UP STEPS	 + *  * when the CPU boots it loads the BIOS into memory and executes it + * + *  * the BIOS intializes devices, sets of the interrupt routines, and + *    reads the first sector of the boot device(e.g., hard-drive)  + *    into memory and jumps to it. + * + *  * Assuming this boot loader is stored in the first sector of the + *    hard-drive, this code takes over... + * + *  * control starts in bootloader.S -- which sets up protected mode, + *    and a stack so C code then run, then calls cmain() + * + *  * cmain() in this file takes over, reads in the kernel and jumps to it. + **********************************************************************/ + +#define SECTSIZE	512 +#define ELFHDR		((struct Elf *) 0x10000) // scratch space + +void readsect(void*, uint32_t); +void readseg(uint32_t, uint32_t, uint32_t); + +void +cmain(void) +{ +	struct Proghdr *ph, *eph; + +	// read 1st page off disk +	readseg((uint32_t) ELFHDR, SECTSIZE*8, 0); + +	// is this a valid ELF? +	if (ELFHDR->e_magic != ELF_MAGIC) +		goto bad; + +	// load each program segment (ignores ph flags) +	ph = (struct Proghdr *) ((uint8_t *) ELFHDR + ELFHDR->e_phoff); +	eph = ph + ELFHDR->e_phnum; +	for (; ph < eph; ph++) +		readseg(ph->p_va, ph->p_memsz, ph->p_offset); + +	// call the entry point from the ELF header +	// note: does not return! +	((void (*)(void)) (ELFHDR->e_entry & 0xFFFFFF))(); + +bad: +	outw(0x8A00, 0x8A00); +	outw(0x8A00, 0x8E00); +	while (1) +		/* do nothing */; +} + +// Read 'count' bytes at 'offset' from kernel into virtual address 'va'. +// Might copy more than asked +void +readseg(uint32_t va, uint32_t count, uint32_t offset) +{ +	uint32_t end_va; + +	va &= 0xFFFFFF; +	end_va = va + count; +	 +	// round down to sector boundary +	va &= ~(SECTSIZE - 1); + +	// translate from bytes to sectors, and kernel starts at sector 1 +	offset = (offset / SECTSIZE) + 1; + +	// If this is too slow, we could read lots of sectors at a time. +	// We'd write more to memory than asked, but it doesn't matter -- +	// we load in increasing order. +	while (va < end_va) { +		readsect((uint8_t*) va, offset); +		va += SECTSIZE; +		offset++; +	} +} + +void +waitdisk(void) +{ +	// wait for disk reaady +	while ((inb(0x1F7) & 0xC0) != 0x40) +		/* do nothing */; +} + +void +readsect(void *dst, uint32_t offset) +{ +	// wait for disk to be ready +	waitdisk(); + +	outb(0x1F2, 1);		// count = 1 +	outb(0x1F3, offset); +	outb(0x1F4, offset >> 8); +	outb(0x1F5, offset >> 16); +	outb(0x1F6, (offset >> 24) | 0xE0); +	outb(0x1F7, 0x20);	// cmd 0x20 - read sectors + +	// wait for disk to be ready +	waitdisk(); + +	// read a sector +	insl(0x1F0, dst, SECTSIZE/4); +} + diff --git a/console.c b/console.c new file mode 100644 index 0000000..2035611 --- /dev/null +++ b/console.c @@ -0,0 +1,108 @@ +#include <types.h> +#include <x86.h> +#include "defs.h" + +void +cons_putc(int c) +{ +  int crtport = 0x3d4; // io port of CGA +  unsigned short *crt = (unsigned short *) 0xB8000; // base of CGA memory +  int ind; + +  // cursor position, 16 bits, col + 80*row +  outb(crtport, 14); +  ind = inb(crtport + 1) << 8; +  outb(crtport, 15); +  ind |= inb(crtport + 1); + +  c &= 0xff; + +  if(c == '\n'){ +    ind -= (ind % 80); +    ind += 80; +  } else { +    c |= 0x0700; // black on white +    crt[ind] = c; +    ind += 1; +  } + +  if((ind / 80) >= 24){ +    // scroll up +    memcpy(crt, crt + 80, sizeof(crt[0]) * (23 * 80)); +    ind -= 80; +    memset(crt + ind, 0, sizeof(crt[0]) * ((24 * 80) - ind)); +  } + +  outb(crtport, 14); +  outb(crtport + 1, ind >> 8); +  outb(crtport, 15); +  outb(crtport + 1, ind); +} + +void +printint(int xx, int base, int sgn) +{ +  char buf[16]; +  char digits[] = "0123456789ABCDEF"; +  int i = 0, neg = 0; +  unsigned int x; +   +  if(sgn && xx < 0){ +    neg = 1; +    x = 0 - xx; +  } else { +    x = xx; +  } + +  do { +    buf[i++] = digits[x % base]; +  } while((x /= base) != 0); +  if(neg) +    buf[i++] = '-'; + +  while(i > 0){ +    i -= 1; +    cons_putc(buf[i]); +  } +} + +/* + * print to the console. only understands %d and %x. + */ +void +cprintf(char *fmt, ...) +{ +  int i, state = 0, c; +  unsigned int *ap = (unsigned int *) &fmt + 1; + +  for(i = 0; fmt[i]; i++){ +    c = fmt[i] & 0xff; +    if(state == 0){ +      if(c == '%'){ +        state = '%'; +      } else { +        cons_putc(c); +      } +    } else if(state == '%'){ +      if(c == 'd'){ +        printint(*ap, 10, 1); +        ap++; +      } else if(c == 'x'){ +        printint(*ap, 16, 0); +        ap++; +      } else if(c == '%'){ +        cons_putc(c); +      } +      state = 0; +    } +  } +} + +void +panic(char *s) +{ +  cprintf(s, 0); +  cprintf("\n", 0); +  while(1) +    ; +} @@ -0,0 +1,12 @@ +// kalloc.c +char *kalloc(int n); +void kfree(char *cp, int len); + +// console.c +void cprintf(char *fmt, ...); +void panic(char *s); + +// proc.c +struct proc; +void setupsegs(struct proc *p); +struct proc * newproc(struct proc *op); @@ -0,0 +1,43 @@ +#ifndef JOS_INC_ELF_H +#define JOS_INC_ELF_H + +#define ELF_MAGIC 0x464C457FU	/* "\x7FELF" in little endian */ + +struct Elf { +	uint32_t e_magic;	// must equal ELF_MAGIC +	uint8_t e_elf[12]; +	uint16_t e_type; +	uint16_t e_machine; +	uint32_t e_version; +	uint32_t e_entry; +	uint32_t e_phoff; +	uint32_t e_shoff; +	uint32_t e_flags; +	uint16_t e_ehsize; +	uint16_t e_phentsize; +	uint16_t e_phnum; +	uint16_t e_shentsize; +	uint16_t e_shnum; +	uint16_t e_shstrndx; +}; + +struct Proghdr { +	uint32_t p_type; +	uint32_t p_offset; +	uint32_t p_va; +	uint32_t p_pa; +	uint32_t p_filesz; +	uint32_t p_memsz; +	uint32_t p_flags; +	uint32_t p_align; +}; + +// Values for Proghdr::p_type +#define ELF_PROG_LOAD		1 + +// Flag bits for Proghdr::p_flags +#define ELF_PROG_FLAG_EXEC	1 +#define ELF_PROG_FLAG_WRITE	2 +#define ELF_PROG_FLAG_READ	4 + +#endif /* !JOS_INC_ELF_H */ diff --git a/kalloc.c b/kalloc.c new file mode 100644 index 0000000..5ea38fd --- /dev/null +++ b/kalloc.c @@ -0,0 +1,158 @@ +/* + * physical memory allocator, intended to be used to allocate + * memory for user processes. allocates in 4096-byte "pages". + * free list is sorted and combines adjacent pages into + * long runs, to make it easier to allocate big segments. + * one reason the page size is 4k is that the x86 segment size + * granularity is 4k. + */ + +#include "param.h" +#include "types.h" +#include "defs.h" + +struct run { +  struct run *next; +  int len; // bytes +}; +struct run *freelist; + +void ktest(); + +/* + * initialize free list of physical pages. this code + * cheats by just considering the one megabyte of pages + * after _end. + */ +void +kinit() +{ +  extern int end; +  unsigned mem; +  char *start; +   +  start = (char *) &end; +  start = (char *) (((unsigned)start + PAGE) & ~(PAGE-1)); +  mem = 256; // XXX +  cprintf("mem = %d\n", mem * PAGE); +  kfree(start, mem * PAGE); +  ktest(); +} + +void +kfree(char *cp, int len) +{ +  struct run **rr; +  struct run *p = (struct run *) cp; +  struct run *pend = (struct run *) (cp + len); + +  if(len % PAGE) +    panic("kfree"); + +  rr = &freelist; +  while(*rr){ +    struct run *rend = (struct run *) ((char *)(*rr) + (*rr)->len); +    if(p >= *rr && p < rend) +      panic("freeing free page"); +    if(pend == *rr){ +      p->len = len + (*rr)->len; +      p->next = (*rr)->next; +      *rr = p; +      return; +    } +    if(pend < *rr){ +      p->len = len; +      p->next = *rr; +      *rr = p; +      return; +    } +    if(p == rend){ +      (*rr)->len += len; +      if((*rr)->next && (*rr)->next == pend){ +        (*rr)->len += (*rr)->next->len; +        (*rr)->next = (*rr)->next->next; +      } +      return; +    } +    rr = &((*rr)->next); +  } +  p->len = len; +  p->next = 0; +  *rr = p; +} + +/* + * allocate n bytes of physical memory. + * returns a kernel-segment pointer. + * returns 0 if there's no run that's big enough. + */ +char * +kalloc(int n) +{ +  struct run **rr; + +  if(n % PAGE) +    panic("kalloc"); + +  rr = &freelist; +  while(*rr){ +    struct run *r = *rr; +    if(r->len == n){ +      *rr = r->next; +      return (char *) r; +    } +    if(r->len > n){ +      char *p = (char *)r + (r->len - n); +      r->len -= n; +      return p; +    } +    rr = &(*rr)->next; +  } +  return 0; +} + +void +ktest() +{ +  char *p1, *p2, *p3; + +  // test coalescing +  p1 = kalloc(4 * PAGE); +  kfree(p1 + 3*PAGE, PAGE); +  kfree(p1 + 2*PAGE, PAGE); +  kfree(p1, PAGE); +  kfree(p1 + PAGE, PAGE); +  p2 = kalloc(4 * PAGE); +  if(p2 != p1) +    panic("ktest"); +  kfree(p2, 4 * PAGE); + +  // test finding first run that fits +  p1 = kalloc(1 * PAGE); +  p2 = kalloc(1 * PAGE); +  kfree(p1, PAGE); +  p3 = kalloc(2 * PAGE); +  kfree(p2, PAGE); +  kfree(p3, 2 * PAGE); + +  // test running out of memory +  p1 = 0; +  while(1){ +    p2 = kalloc(PAGE); +    if(p2 == 0) +      break; +    *(char **)p2 = p1; +    p1 = p2; +  } +  while(p1){ +    p2 = *(char **)p1; +    kfree(p1, PAGE); +    p1 = p2; +  } +  p1 = kalloc(PAGE * 20); +  if(p1 == 0) +    panic("ktest2"); +  kfree(p1, PAGE * 20); + +  cprintf("ktest ok\n"); +} @@ -0,0 +1,40 @@ +#include "types.h" +#include "param.h" +#include "mmu.h" +#include "proc.h" +#include "defs.h" +#include "x86.h" + +char junk1[20000]; +char junk2[20000] = { 1 }; + +main() +{ +  struct proc *p; + +  cprintf("\nxV6\n\n"); + +  // initialize physical memory allocator +  kinit(); + +  // create fake process zero +  p = &proc[0]; +  p->state = WAITING; +  p->sz = PAGE; +  p->mem = kalloc(p->sz); +  memset(p->mem, 0, p->sz); +  p->kstack = kalloc(KSTACKSIZE); +  p->tf = (struct Trapframe *) (p->kstack + KSTACKSIZE - sizeof(struct Trapframe)); +  memset(p->tf, 0, sizeof(struct Trapframe)); +  p->tf->tf_es = p->tf->tf_ds = p->tf->tf_ss = (SEG_UDATA << 3) | 3; +  p->tf->tf_cs = (SEG_UCODE << 3) | 3; +  p->tf->tf_eflags = FL_IF; +  setupsegs(p); + +  p = newproc(&proc[0]); +  // xxx copy instructions to p->mem +  p->tf->tf_eip = 0; +  p->tf->tf_esp = p->sz; + +  swtch(&proc[0]); +} @@ -0,0 +1,308 @@ +/* + * This file contains definitions for the x86 memory management unit (MMU), + * including paging- and segmentation-related data structures and constants, + * the %cr0, %cr4, and %eflags registers, and traps. + */ + +/* + * + *	Part 1.  Paging data structures and constants. + * + */ + +// A linear address 'la' has a three-part structure as follows: +// +// +--------10------+-------10-------+---------12----------+ +// | Page Directory |   Page Table   | Offset within Page  | +// |      Index     |      Index     |                     | +// +----------------+----------------+---------------------+ +//  \--- PDX(la) --/ \--- PTX(la) --/ \---- PGOFF(la) ----/ +//  \----------- PPN(la) -----------/ +// +// The PDX, PTX, PGOFF, and PPN macros decompose linear addresses as shown. +// To construct a linear address la from PDX(la), PTX(la), and PGOFF(la), +// use PGADDR(PDX(la), PTX(la), PGOFF(la)). + +// page number field of address +#define PPN(la)		(((uintptr_t) (la)) >> PTXSHIFT) +#define VPN(la)		PPN(la)		// used to index into vpt[] + +// page directory index +#define PDX(la)		((((uintptr_t) (la)) >> PDXSHIFT) & 0x3FF) +#define VPD(la)		PDX(la)		// used to index into vpd[] + +// page table index +#define PTX(la)		((((uintptr_t) (la)) >> PTXSHIFT) & 0x3FF) + +// offset in page +#define PGOFF(la)	(((uintptr_t) (la)) & 0xFFF) + +// construct linear address from indexes and offset +#define PGADDR(d, t, o)	((void*) ((d) << PDXSHIFT | (t) << PTXSHIFT | (o))) + +// Page directory and page table constants. +#define NPDENTRIES	1024		// page directory entries per page directory +#define NPTENTRIES	1024		// page table entries per page table + +#define PGSIZE		4096		// bytes mapped by a page +#define PGSHIFT		12		// log2(PGSIZE) + +#define PTSIZE		(PGSIZE*NPTENTRIES) // bytes mapped by a page directory entry +#define PTSHIFT		22		// log2(PTSIZE) + +#define PTXSHIFT	12		// offset of PTX in a linear address +#define PDXSHIFT	22		// offset of PDX in a linear address + +// Page table/directory entry flags. +#define PTE_P		0x001	// Present +#define PTE_W		0x002	// Writeable +#define PTE_U		0x004	// User +#define PTE_PWT		0x008	// Write-Through +#define PTE_PCD		0x010	// Cache-Disable +#define PTE_A		0x020	// Accessed +#define PTE_D		0x040	// Dirty +#define PTE_PS		0x080	// Page Size +#define PTE_MBZ		0x180	// Bits must be zero + +// The PTE_AVAIL bits aren't used by the kernel or interpreted by the +// hardware, so user processes are allowed to set them arbitrarily. +#define PTE_AVAIL	0xE00	// Available for software use + +// Only flags in PTE_USER may be used in system calls. +#define PTE_USER	(PTE_AVAIL | PTE_P | PTE_W | PTE_U) + +// address in page table entry +#define PTE_ADDR(pte)	((physaddr_t) (pte) & ~0xFFF) + +// Control Register flags +#define CR0_PE		0x00000001	// Protection Enable +#define CR0_MP		0x00000002	// Monitor coProcessor +#define CR0_EM		0x00000004	// Emulation +#define CR0_TS		0x00000008	// Task Switched +#define CR0_ET		0x00000010	// Extension Type +#define CR0_NE		0x00000020	// Numeric Errror +#define CR0_WP		0x00010000	// Write Protect +#define CR0_AM		0x00040000	// Alignment Mask +#define CR0_NW		0x20000000	// Not Writethrough +#define CR0_CD		0x40000000	// Cache Disable +#define CR0_PG		0x80000000	// Paging + +#define CR4_PCE		0x00000100	// Performance counter enable +#define CR4_MCE		0x00000040	// Machine Check Enable +#define CR4_PSE		0x00000010	// Page Size Extensions +#define CR4_DE		0x00000008	// Debugging Extensions +#define CR4_TSD		0x00000004	// Time Stamp Disable +#define CR4_PVI		0x00000002	// Protected-Mode Virtual Interrupts +#define CR4_VME		0x00000001	// V86 Mode Extensions + +// Eflags register +#define FL_CF		0x00000001	// Carry Flag +#define FL_PF		0x00000004	// Parity Flag +#define FL_AF		0x00000010	// Auxiliary carry Flag +#define FL_ZF		0x00000040	// Zero Flag +#define FL_SF		0x00000080	// Sign Flag +#define FL_TF		0x00000100	// Trap Flag +#define FL_IF		0x00000200	// Interrupt Flag +#define FL_DF		0x00000400	// Direction Flag +#define FL_OF		0x00000800	// Overflow Flag +#define FL_IOPL_MASK	0x00003000	// I/O Privilege Level bitmask +#define FL_IOPL_0	0x00000000	//   IOPL == 0 +#define FL_IOPL_1	0x00001000	//   IOPL == 1 +#define FL_IOPL_2	0x00002000	//   IOPL == 2 +#define FL_IOPL_3	0x00003000	//   IOPL == 3 +#define FL_NT		0x00004000	// Nested Task +#define FL_RF		0x00010000	// Resume Flag +#define FL_VM		0x00020000	// Virtual 8086 mode +#define FL_AC		0x00040000	// Alignment Check +#define FL_VIF		0x00080000	// Virtual Interrupt Flag +#define FL_VIP		0x00100000	// Virtual Interrupt Pending +#define FL_ID		0x00200000	// ID flag + +// Page fault error codes +#define FEC_PR		0x1	// Page fault caused by protection violation +#define FEC_WR		0x2	// Page fault caused by a write +#define FEC_U		0x4	// Page fault occured while in user mode + + +/* + * + *	Part 2.  Segmentation data structures and constants. + * + */ + +#ifdef __ASSEMBLER__ + +/* + * Macros to build GDT entries in assembly. + */ +#define SEG_NULL						\ +	.word 0, 0;						\ +	.byte 0, 0, 0, 0 +#define SEG(type,base,lim)					\ +	.word (((lim) >> 12) & 0xffff), ((base) & 0xffff);	\ +	.byte (((base) >> 16) & 0xff), (0x90 | (type)),		\ +		(0xC0 | (((lim) >> 28) & 0xf)), (((base) >> 24) & 0xff) + +#else	// not __ASSEMBLER__ + +// Segment Descriptors +struct Segdesc { +	unsigned sd_lim_15_0 : 16;  // Low bits of segment limit +	unsigned sd_base_15_0 : 16; // Low bits of segment base address +	unsigned sd_base_23_16 : 8; // Middle bits of segment base address +	unsigned sd_type : 4;       // Segment type (see STS_ constants) +	unsigned sd_s : 1;          // 0 = system, 1 = application +	unsigned sd_dpl : 2;        // Descriptor Privilege Level +	unsigned sd_p : 1;          // Present +	unsigned sd_lim_19_16 : 4;  // High bits of segment limit +	unsigned sd_avl : 1;        // Unused (available for software use) +	unsigned sd_rsv1 : 1;       // Reserved +	unsigned sd_db : 1;         // 0 = 16-bit segment, 1 = 32-bit segment +	unsigned sd_g : 1;          // Granularity: limit scaled by 4K when set +	unsigned sd_base_31_24 : 8; // High bits of segment base address +}; +// Null segment +#define SEG_NULL	(struct Segdesc){ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } +// Segment that is loadable but faults when used +#define SEG_FAULT	(struct Segdesc){ 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0 } +// Normal segment +#define SEG(type, base, lim, dpl) (struct Segdesc)			\ +{ ((lim) >> 12) & 0xffff, (base) & 0xffff, ((base) >> 16) & 0xff,	\ +    type, 1, dpl, 1, (unsigned) (lim) >> 28, 0, 0, 1, 1,		\ +    (unsigned) (base) >> 24 } +#define SEG16(type, base, lim, dpl) (struct Segdesc)			\ +{ (lim) & 0xffff, (base) & 0xffff, ((base) >> 16) & 0xff,		\ +    type, 1, dpl, 1, (unsigned) (lim) >> 16, 0, 0, 1, 0,		\ +    (unsigned) (base) >> 24 } + +#endif /* !__ASSEMBLER__ */ + +// Application segment type bits +#define STA_X		0x8	    // Executable segment +#define STA_E		0x4	    // Expand down (non-executable segments) +#define STA_C		0x4	    // Conforming code segment (executable only) +#define STA_W		0x2	    // Writeable (non-executable segments) +#define STA_R		0x2	    // Readable (executable segments) +#define STA_A		0x1	    // Accessed + +// System segment type bits +#define STS_T16A	0x1	    // Available 16-bit TSS +#define STS_LDT		0x2	    // Local Descriptor Table +#define STS_T16B	0x3	    // Busy 16-bit TSS +#define STS_CG16	0x4	    // 16-bit Call Gate +#define STS_TG		0x5	    // Task Gate / Coum Transmitions +#define STS_IG16	0x6	    // 16-bit Interrupt Gate +#define STS_TG16	0x7	    // 16-bit Trap Gate +#define STS_T32A	0x9	    // Available 32-bit TSS +#define STS_T32B	0xB	    // Busy 32-bit TSS +#define STS_CG32	0xC	    // 32-bit Call Gate +#define STS_IG32	0xE	    // 32-bit Interrupt Gate +#define STS_TG32	0xF	    // 32-bit Trap Gate + + +/* + * + *	Part 3.  Traps. + * + */ + +#ifndef __ASSEMBLER__ + +// Task state segment format (as described by the Pentium architecture book) +struct Taskstate { +	uint32_t ts_link;	// Old ts selector +	uintptr_t ts_esp0;	// Stack pointers and segment selectors +	uint16_t ts_ss0;	//   after an increase in privilege level +	uint16_t ts_padding1; +	uintptr_t ts_esp1; +	uint16_t ts_ss1; +	uint16_t ts_padding2; +	uintptr_t ts_esp2; +	uint16_t ts_ss2; +	uint16_t ts_padding3; +	physaddr_t ts_cr3;	// Page directory base +	uintptr_t ts_eip;	// Saved state from last task switch +	uint32_t ts_eflags; +	uint32_t ts_eax;	// More saved state (registers) +	uint32_t ts_ecx; +	uint32_t ts_edx; +	uint32_t ts_ebx; +	uintptr_t ts_esp; +	uintptr_t ts_ebp; +	uint32_t ts_esi; +	uint32_t ts_edi; +	uint16_t ts_es;		// Even more saved state (segment selectors) +	uint16_t ts_padding4; +	uint16_t ts_cs; +	uint16_t ts_padding5; +	uint16_t ts_ss; +	uint16_t ts_padding6; +	uint16_t ts_ds; +	uint16_t ts_padding7; +	uint16_t ts_fs; +	uint16_t ts_padding8; +	uint16_t ts_gs; +	uint16_t ts_padding9; +	uint16_t ts_ldt; +	uint16_t ts_padding10; +	uint16_t ts_t;		// Trap on task switch +	uint16_t ts_iomb;	// I/O map base address +}; + +// Gate descriptors for interrupts and traps +struct Gatedesc { +	unsigned gd_off_15_0 : 16;   // low 16 bits of offset in segment +	unsigned gd_ss : 16;         // segment selector +	unsigned gd_args : 5;        // # args, 0 for interrupt/trap gates +	unsigned gd_rsv1 : 3;        // reserved(should be zero I guess) +	unsigned gd_type : 4;        // type(STS_{TG,IG32,TG32}) +	unsigned gd_s : 1;           // must be 0 (system) +	unsigned gd_dpl : 2;         // descriptor(meaning new) privilege level +	unsigned gd_p : 1;           // Present +	unsigned gd_off_31_16 : 16;  // high bits of offset in segment +}; + +// Set up a normal interrupt/trap gate descriptor. +// - istrap: 1 for a trap (= exception) gate, 0 for an interrupt gate. +// - sel: Code segment selector for interrupt/trap handler +// - off: Offset in code segment for interrupt/trap handler +// - dpl: Descriptor Privilege Level - +//	  the privilege level required for software to invoke +//	  this interrupt/trap gate explicitly using an int instruction. +#define SETGATE(gate, istrap, sel, off, dpl)			\ +{								\ +	(gate).gd_off_15_0 = (uint32_t) (off) & 0xffff;		\ +	(gate).gd_ss = (sel);					\ +	(gate).gd_args = 0;					\ +	(gate).gd_rsv1 = 0;					\ +	(gate).gd_type = (istrap) ? STS_TG32 : STS_IG32;	\ +	(gate).gd_s = 0;					\ +	(gate).gd_dpl = (dpl);					\ +	(gate).gd_p = 1;					\ +	(gate).gd_off_31_16 = (uint32_t) (off) >> 16;		\ +} + +// Set up a call gate descriptor. +#define SETCALLGATE(gate, ss, off, dpl)           	        \ +{								\ +	(gate).gd_off_15_0 = (uint32_t) (off) & 0xffff;		\ +	(gate).gd_ss = (ss);					\ +	(gate).gd_args = 0;					\ +	(gate).gd_rsv1 = 0;					\ +	(gate).gd_type = STS_CG32;				\ +	(gate).gd_s = 0;					\ +	(gate).gd_dpl = (dpl);					\ +	(gate).gd_p = 1;					\ +	(gate).gd_off_31_16 = (uint32_t) (off) >> 16;		\ +} + +// Pseudo-descriptors used for LGDT, LLDT and LIDT instructions. +struct Pseudodesc { +	uint16_t pd__garbage;         // LGDT supposed to be from address 4N+2 +	uint16_t pd_lim;              // Limit +	uint32_t pd_base __attribute__ ((packed));       // Base address +}; +#define PD_ADDR(desc)	(&(desc).pd_lim) + +#endif /* !__ASSEMBLER__ */ + @@ -0,0 +1,3 @@ +#define NPROC 64 +#define PAGE 4096 +#define KSTACKSIZE PAGE @@ -0,0 +1,112 @@ +#include "types.h" +#include "mmu.h" +#include "x86.h" +#include "proc.h" +#include "param.h" +#include "defs.h" + +struct proc proc[NPROC]; + +/* + * set up a process's task state and segment descriptors + * correctly, given its current size and address in memory. + * this should be called whenever the latter change. + * doesn't change the cpu's current segmentation setup. + */ +void +setupsegs(struct proc *p) +{ +  memset(&p->ts, 0, sizeof(struct Taskstate)); +  p->ts.ts_ss0 = SEG_KDATA << 3; +  p->ts.ts_esp0 = (unsigned)(p->kstack + KSTACKSIZE); + +  memset(&p->gdt, 0, sizeof(p->gdt)); +  p->gdt[0] = SEG_NULL; +  p->gdt[SEG_KCODE] = SEG(STA_X|STA_R, 0, 0xffffffff, 0); +  p->gdt[SEG_KDATA] = SEG(STA_W, 0, 0xffffffff, 0); +  p->gdt[SEG_TSS] = SEG16(STS_T32A, (unsigned) &p->ts, sizeof(p->ts), 0); +  p->gdt[SEG_TSS].sd_s = 0; +  p->gdt[SEG_UCODE] = SEG(STA_X|STA_R, (unsigned)p->mem, p->sz, 3); +  p->gdt[SEG_UDATA] = SEG(STA_W, (unsigned)p->mem, p->sz, 3); +  p->gdt_pd.pd__garbage = 0; +  p->gdt_pd.pd_lim = sizeof(p->gdt) - 1; +  p->gdt_pd.pd_base = (unsigned) p->gdt; +} + +extern void trapret(); + +/* + * internal fork(). does not copy kernel stack; instead, + * sets up the stack to return as if from system call. + */ +struct proc * +newproc(struct proc *op) +{ +  struct proc *np; +  unsigned *sp; + +  for(np = &proc[1]; np < &proc[NPROC]; np++) +    if(np->state == UNUSED) +      break; +  if(np >= &proc[NPROC]) +    return 0; + +  np->sz = op->sz; +  np->mem = kalloc(op->sz); +  if(np->mem == 0) +    return 0; +  memcpy(np->mem, op->mem, np->sz); +  np->kstack = kalloc(KSTACKSIZE); +  if(np->kstack == 0){ +    kfree(np->mem, op->sz); +    return 0; +  } +  np->tf = (struct Trapframe *) (np->kstack + KSTACKSIZE - sizeof(struct Trapframe)); +  setupsegs(np); +  np->state = RUNNABLE; +   +  // set up kernel stack to return to user space +  *(np->tf) = *(op->tf); +  sp = (unsigned *) np->tf; +  *(--sp) = (unsigned) &trapret;  // for return from swtch() +  *(--sp) = 0;  // previous bp for leave in swtch() +  np->esp = (unsigned) sp; +  np->ebp = (unsigned) sp; + +  cprintf("esp %x ebp %x mem %x\n", np->esp, np->ebp, np->mem); + +  return np; +} + +/* + * find a runnable process and switch to it. + */ +void +swtch(struct proc *op) +{ +  struct proc *np; +   +  while(1){ +    for(np = op + 1; np != op; np++){ +      if(np == &proc[NPROC]) +        np = &proc[0]; +      if(np->state == RUNNABLE) +        break; +    } +    if(np->state == RUNNABLE) +      break; +    // idle... +  } +   +  op->ebp = read_ebp(); +  op->esp = read_esp(); + +  // XXX callee-saved registers? + +  // this happens to work, but probably isn't safe: +  // it's not clear that np->ebp will evaluate +  // correctly after changing the stack pointer. +  asm volatile("lgdt %0" : : "g" (np->gdt_pd.pd_lim)); +  asm volatile("movl %0, %%esp" : : "g" (np->esp)); +  asm volatile("movl %0, %%ebp" : : "g" (np->ebp)); +} @@ -0,0 +1,34 @@ +/* + * p->mem: + *   text + *   original data and bss + *   fixed-size stack + *   expandable heap + */ + +/* + * segments in proc->gdt + */ +#define SEG_KCODE 1 // kernel code +#define SEG_KDATA 2 // kernel data+stack +#define SEG_UCODE 3 +#define SEG_UDATA 4 +#define SEG_TSS 5   // this process's task state +#define NSEGS 6 + +struct proc{ +  char *mem; // start of process's physical memory +  unsigned sz; // total size of mem, including kernel stack +  char *kstack; // kernel stack, separate from mem so it doesn't move +  enum { UNUSED, RUNNABLE, WAITING } state; + +  struct Taskstate ts;  // only to give cpu address of kernel stack +  struct Segdesc gdt[NSEGS]; +  struct Pseudodesc gdt_pd; +  unsigned esp; // kernel stack pointer +  unsigned ebp; // kernel frame pointer + +  struct Trapframe *tf; // points into kstack, used to find user regs +}; + +extern struct proc proc[]; @@ -0,0 +1,19 @@ +#!/usr/bin/perl + +open(SIG, $ARGV[0]) || die "open $ARGV[0]: $!"; + +$n = sysread(SIG, $buf, 1000); + +if($n > 510){ +	print STDERR "boot block too large: $n bytes (max 510)\n"; +	exit 1; +} + +print STDERR "boot block is $n bytes (max 510)\n"; + +$buf .= "\0" x (510-$n); +$buf .= "\x55\xAA"; + +open(SIG, ">$ARGV[0]") || die "open >$ARGV[0]: $!"; +print SIG $buf; +close SIG; diff --git a/string.c b/string.c new file mode 100644 index 0000000..aef4242 --- /dev/null +++ b/string.c @@ -0,0 +1,22 @@ +void * +memcpy(void *dst, void *src, unsigned n) +{ +  char *d = (char *) dst; +  char *s = (char *) src; + +  while(n-- > 0) +    *d++ = *s++; + +  return dst; +} + +void * +memset(void *dst, int c, unsigned n) +{ +  char *d = (char *) dst; + +  while(n-- > 0) +    *d++ = c; + +  return dst; +} diff --git a/trapasm.S b/trapasm.S new file mode 100644 index 0000000..69649ff --- /dev/null +++ b/trapasm.S @@ -0,0 +1,12 @@ +	.text +        .globl trapret +	/* +         * a forked process RETs here +         * expects ESP to point to a Trapframe +         */ +trapret: +        popal +        popl %es +        popl %ds +        addl $0x8, %esp /* trapno and errcode */ +        iret @@ -0,0 +1,6 @@ +typedef unsigned long long uint64_t; +typedef unsigned int uint32_t; +typedef unsigned short uint16_t; +typedef unsigned char uint8_t; +typedef uint32_t uintptr_t; +typedef uint32_t physaddr_t; @@ -0,0 +1,301 @@ +static __inline void breakpoint(void) __attribute__((always_inline)); +static __inline uint8_t inb(int port) __attribute__((always_inline)); +static __inline void insb(int port, void *addr, int cnt) __attribute__((always_inline)); +static __inline uint16_t inw(int port) __attribute__((always_inline)); +static __inline void insw(int port, void *addr, int cnt) __attribute__((always_inline)); +static __inline uint32_t inl(int port) __attribute__((always_inline)); +static __inline void insl(int port, void *addr, int cnt) __attribute__((always_inline)); +static __inline void outb(int port, uint8_t data) __attribute__((always_inline)); +static __inline void outsb(int port, const void *addr, int cnt) __attribute__((always_inline)); +static __inline void outw(int port, uint16_t data) __attribute__((always_inline)); +static __inline void outsw(int port, const void *addr, int cnt) __attribute__((always_inline)); +static __inline void outsl(int port, const void *addr, int cnt) __attribute__((always_inline)); +static __inline void outl(int port, uint32_t data) __attribute__((always_inline)); +static __inline void invlpg(void *addr) __attribute__((always_inline)); +static __inline void lidt(void *p) __attribute__((always_inline)); +static __inline void lldt(uint16_t sel) __attribute__((always_inline)); +static __inline void ltr(uint16_t sel) __attribute__((always_inline)); +static __inline void lcr0(uint32_t val) __attribute__((always_inline)); +static __inline uint32_t rcr0(void) __attribute__((always_inline)); +static __inline uint32_t rcr2(void) __attribute__((always_inline)); +static __inline void lcr3(uint32_t val) __attribute__((always_inline)); +static __inline uint32_t rcr3(void) __attribute__((always_inline)); +static __inline void lcr4(uint32_t val) __attribute__((always_inline)); +static __inline uint32_t rcr4(void) __attribute__((always_inline)); +static __inline void tlbflush(void) __attribute__((always_inline)); +static __inline uint32_t read_eflags(void) __attribute__((always_inline)); +static __inline void write_eflags(uint32_t eflags) __attribute__((always_inline)); +static __inline uint32_t read_ebp(void) __attribute__((always_inline)); +static __inline uint32_t read_esp(void) __attribute__((always_inline)); +static __inline void cpuid(uint32_t info, uint32_t *eaxp, uint32_t *ebxp, uint32_t *ecxp, uint32_t *edxp); +static __inline uint64_t read_tsc(void) __attribute__((always_inline)); + +static __inline void +breakpoint(void) +{ +	__asm __volatile("int3"); +} + +static __inline uint8_t +inb(int port) +{ +	uint8_t data; +	__asm __volatile("inb %w1,%0" : "=a" (data) : "d" (port)); +	return data; +} + +static __inline void +insb(int port, void *addr, int cnt) +{ +	__asm __volatile("cld\n\trepne\n\tinsb"			: +			 "=D" (addr), "=c" (cnt)		: +			 "d" (port), "0" (addr), "1" (cnt)	: +			 "memory", "cc"); +} + +static __inline uint16_t +inw(int port) +{ +	uint16_t data; +	__asm __volatile("inw %w1,%0" : "=a" (data) : "d" (port)); +	return data; +} + +static __inline void +insw(int port, void *addr, int cnt) +{ +	__asm __volatile("cld\n\trepne\n\tinsw"			: +			 "=D" (addr), "=c" (cnt)		: +			 "d" (port), "0" (addr), "1" (cnt)	: +			 "memory", "cc"); +} + +static __inline uint32_t +inl(int port) +{ +	uint32_t data; +	__asm __volatile("inl %w1,%0" : "=a" (data) : "d" (port)); +	return data; +} + +static __inline void +insl(int port, void *addr, int cnt) +{ +	__asm __volatile("cld\n\trepne\n\tinsl"			: +			 "=D" (addr), "=c" (cnt)		: +			 "d" (port), "0" (addr), "1" (cnt)	: +			 "memory", "cc"); +} + +static __inline void +outb(int port, uint8_t data) +{ +	__asm __volatile("outb %0,%w1" : : "a" (data), "d" (port)); +} + +static __inline void +outsb(int port, const void *addr, int cnt) +{ +	__asm __volatile("cld\n\trepne\n\toutsb"		: +			 "=S" (addr), "=c" (cnt)		: +			 "d" (port), "0" (addr), "1" (cnt)	: +			 "cc"); +} + +static __inline void +outw(int port, uint16_t data) +{ +	__asm __volatile("outw %0,%w1" : : "a" (data), "d" (port)); +} + +static __inline void +outsw(int port, const void *addr, int cnt) +{ +	__asm __volatile("cld\n\trepne\n\toutsw"		: +			 "=S" (addr), "=c" (cnt)		: +			 "d" (port), "0" (addr), "1" (cnt)	: +			 "cc"); +} + +static __inline void +outsl(int port, const void *addr, int cnt) +{ +	__asm __volatile("cld\n\trepne\n\toutsl"		: +			 "=S" (addr), "=c" (cnt)		: +			 "d" (port), "0" (addr), "1" (cnt)	: +			 "cc"); +} + +static __inline void +outl(int port, uint32_t data) +{ +	__asm __volatile("outl %0,%w1" : : "a" (data), "d" (port)); +} + +static __inline void  +invlpg(void *addr) +{  +	__asm __volatile("invlpg (%0)" : : "r" (addr) : "memory"); +}   + +static __inline void +lidt(void *p) +{ +	__asm __volatile("lidt (%0)" : : "r" (p)); +} + +static __inline void +lldt(uint16_t sel) +{ +	__asm __volatile("lldt %0" : : "r" (sel)); +} + +static __inline void +ltr(uint16_t sel) +{ +	__asm __volatile("ltr %0" : : "r" (sel)); +} + +static __inline void +lcr0(uint32_t val) +{ +	__asm __volatile("movl %0,%%cr0" : : "r" (val)); +} + +static __inline uint32_t +rcr0(void) +{ +	uint32_t val; +	__asm __volatile("movl %%cr0,%0" : "=r" (val)); +	return val; +} + +static __inline uint32_t +rcr2(void) +{ +	uint32_t val; +	__asm __volatile("movl %%cr2,%0" : "=r" (val)); +	return val; +} + +static __inline void +lcr3(uint32_t val) +{ +	__asm __volatile("movl %0,%%cr3" : : "r" (val)); +} + +static __inline uint32_t +rcr3(void) +{ +	uint32_t val; +	__asm __volatile("movl %%cr3,%0" : "=r" (val)); +	return val; +} + +static __inline void +lcr4(uint32_t val) +{ +	__asm __volatile("movl %0,%%cr4" : : "r" (val)); +} + +static __inline uint32_t +rcr4(void) +{ +	uint32_t cr4; +	__asm __volatile("movl %%cr4,%0" : "=r" (cr4)); +	return cr4; +} + +static __inline void +tlbflush(void) +{ +	uint32_t cr3; +	__asm __volatile("movl %%cr3,%0" : "=r" (cr3)); +	__asm __volatile("movl %0,%%cr3" : : "r" (cr3)); +} + +static __inline uint32_t +read_eflags(void) +{ +        uint32_t eflags; +        __asm __volatile("pushfl; popl %0" : "=r" (eflags)); +        return eflags; +} + +static __inline void +write_eflags(uint32_t eflags) +{ +        __asm __volatile("pushl %0; popfl" : : "r" (eflags)); +} + +static __inline uint32_t +read_ebp(void) +{ +        uint32_t ebp; +        __asm __volatile("movl %%ebp,%0" : "=r" (ebp)); +        return ebp; +} + +static __inline uint32_t +read_esp(void) +{ +        uint32_t esp; +        __asm __volatile("movl %%esp,%0" : "=r" (esp)); +        return esp; +} + +static __inline void +cpuid(uint32_t info, uint32_t *eaxp, uint32_t *ebxp, uint32_t *ecxp, uint32_t *edxp) +{ +	uint32_t eax, ebx, ecx, edx; +	asm volatile("cpuid"  +		: "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) +		: "a" (info)); +	if (eaxp) +		*eaxp = eax; +	if (ebxp) +		*ebxp = ebx; +	if (ecxp) +		*ecxp = ecx; +	if (edxp) +		*edxp = edx; +} + +static __inline uint64_t +read_tsc(void) +{ +        uint64_t tsc; +        __asm __volatile("rdtsc" : "=A" (tsc)); +        return tsc; +} + +struct PushRegs { +    /* registers as pushed by pusha */ +    uint32_t reg_edi; +    uint32_t reg_esi; +    uint32_t reg_ebp; +    uint32_t reg_oesp;      /* Useless */ +    uint32_t reg_ebx; +    uint32_t reg_edx; +    uint32_t reg_ecx; +    uint32_t reg_eax; +}; + +struct Trapframe { +    struct PushRegs tf_regs; +    uint16_t tf_es; +    uint16_t tf_padding1; +    uint16_t tf_ds; +    uint16_t tf_padding2; +    uint32_t tf_trapno; +    /* below here defined by x86 hardware */ +    uint32_t tf_err; +    uintptr_t tf_eip; +    uint16_t tf_cs; +    uint16_t tf_padding3; +    uint32_t tf_eflags; +    /* below here only when crossing rings, such as from user to kernel */ +    uintptr_t tf_esp; +    uint16_t tf_ss; +    uint16_t tf_padding4; +}; | 
