diff options
| author | Robert Morris <rtm@csail.mit.edu> | 2019-06-11 09:57:14 -0400 | 
|---|---|---|
| committer | Robert Morris <rtm@csail.mit.edu> | 2019-06-11 09:57:14 -0400 | 
| commit | 5753553213df8f9de851adb68377db43faecb91f (patch) | |
| tree | 3b629ff54897fca414146677532cb459a2ed11ba /kernel | |
| parent | 91ba81110acd3163f7de3580b677eece0c57f5e7 (diff) | |
| download | xv6-labs-5753553213df8f9de851adb68377db43faecb91f.tar.gz xv6-labs-5753553213df8f9de851adb68377db43faecb91f.tar.bz2 xv6-labs-5753553213df8f9de851adb68377db43faecb91f.zip | |
separate source into kernel/ user/ mkfs/
Diffstat (limited to 'kernel')
43 files changed, 5609 insertions, 0 deletions
| diff --git a/kernel/bio.c b/kernel/bio.c new file mode 100644 index 0000000..90f9af9 --- /dev/null +++ b/kernel/bio.c @@ -0,0 +1,145 @@ +// Buffer cache. +// +// The buffer cache is a linked list of buf structures holding +// cached copies of disk block contents.  Caching disk blocks +// in memory reduces the number of disk reads and also provides +// a synchronization point for disk blocks used by multiple processes. +// +// Interface: +// * To get a buffer for a particular disk block, call bread. +// * After changing buffer data, call bwrite to write it to disk. +// * When done with the buffer, call brelse. +// * Do not use the buffer after calling brelse. +// * Only one process at a time can use a buffer, +//     so do not keep them longer than necessary. +// +// The implementation uses two state flags internally: +// * B_VALID: the buffer data has been read from the disk. +// * B_DIRTY: the buffer data has been modified +//     and needs to be written to disk. + +#include "types.h" +#include "param.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "riscv.h" +#include "defs.h" +#include "fs.h" +#include "buf.h" + +struct { +  struct spinlock lock; +  struct buf buf[NBUF]; + +  // Linked list of all buffers, through prev/next. +  // head.next is most recently used. +  struct buf head; +} bcache; + +void +binit(void) +{ +  struct buf *b; + +  initlock(&bcache.lock, "bcache"); + +//PAGEBREAK! +  // Create linked list of buffers +  bcache.head.prev = &bcache.head; +  bcache.head.next = &bcache.head; +  for(b = bcache.buf; b < bcache.buf+NBUF; b++){ +    b->next = bcache.head.next; +    b->prev = &bcache.head; +    initsleeplock(&b->lock, "buffer"); +    bcache.head.next->prev = b; +    bcache.head.next = b; +  } +} + +// Look through buffer cache for block on device dev. +// If not found, allocate a buffer. +// In either case, return locked buffer. +static struct buf* +bget(uint dev, uint blockno) +{ +  struct buf *b; + +  acquire(&bcache.lock); + +  // Is the block already cached? +  for(b = bcache.head.next; b != &bcache.head; b = b->next){ +    if(b->dev == dev && b->blockno == blockno){ +      b->refcnt++; +      release(&bcache.lock); +      acquiresleep(&b->lock); +      return b; +    } +  } + +  // Not cached; recycle an unused buffer. +  // Even if refcnt==0, B_DIRTY indicates a buffer is in use +  // because log.c has modified it but not yet committed it. +  for(b = bcache.head.prev; b != &bcache.head; b = b->prev){ +    if(b->refcnt == 0 && (b->flags & B_DIRTY) == 0) { +      b->dev = dev; +      b->blockno = blockno; +      b->flags = 0; +      b->refcnt = 1; +      release(&bcache.lock); +      acquiresleep(&b->lock); +      return b; +    } +  } +  panic("bget: no buffers"); +} + +// Return a locked buf with the contents of the indicated block. +struct buf* +bread(uint dev, uint blockno) +{ +  struct buf *b; + +  b = bget(dev, blockno); +  if((b->flags & B_VALID) == 0) { +    ramdiskrw(b); +  } +  return b; +} + +// Write b's contents to disk.  Must be locked. +void +bwrite(struct buf *b) +{ +  if(!holdingsleep(&b->lock)) +    panic("bwrite"); +  b->flags |= B_DIRTY; +  ramdiskrw(b); +} + +// Release a locked buffer. +// Move to the head of the MRU list. +void +brelse(struct buf *b) +{ +  if(!holdingsleep(&b->lock)) +    panic("brelse"); + +  releasesleep(&b->lock); + +  acquire(&bcache.lock); +  b->refcnt--; +  if (b->refcnt == 0) { +    // no one is waiting for it. +    b->next->prev = b->prev; +    b->prev->next = b->next; +    b->next = bcache.head.next; +    b->prev = &bcache.head; +    bcache.head.next->prev = b; +    bcache.head.next = b; +  } +   +  release(&bcache.lock); +} +//PAGEBREAK! +// Blank page. + diff --git a/kernel/buf.h b/kernel/buf.h new file mode 100644 index 0000000..3266495 --- /dev/null +++ b/kernel/buf.h @@ -0,0 +1,14 @@ +struct buf { +  int flags; +  uint dev; +  uint blockno; +  struct sleeplock lock; +  uint refcnt; +  struct buf *prev; // LRU cache list +  struct buf *next; +  struct buf *qnext; // disk queue +  uchar data[BSIZE]; +}; +#define B_VALID 0x2  // buffer has been read from disk +#define B_DIRTY 0x4  // buffer needs to be written to disk + diff --git a/kernel/console.c b/kernel/console.c new file mode 100644 index 0000000..b20d4a9 --- /dev/null +++ b/kernel/console.c @@ -0,0 +1,271 @@ +// Console input and output. +// Input is from the keyboard or serial port. +// Output is written to the screen and serial port. + +#include <stdarg.h> + +#include "types.h" +#include "param.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "fs.h" +#include "file.h" +#include "memlayout.h" +#include "riscv.h" +#include "defs.h" +#include "proc.h" + +static void consputc(int); + +static volatile int panicked = 0; + +static struct { +  struct spinlock lock; +  int locking; +} cons; + +static char digits[] = "0123456789abcdef"; + +static void +printint(int xx, int base, int sign) +{ +  char buf[16]; +  int i; +  uint x; + +  if(sign && (sign = xx < 0)) +    x = -xx; +  else +    x = xx; + +  i = 0; +  do{ +    buf[i++] = digits[x % base]; +  }while((x /= base) != 0); + +  if(sign) +    buf[i++] = '-'; + +  while(--i >= 0) +    consputc(buf[i]); +} + +static void +printptr(uint64 x) { +  int i; +  consputc('0'); +  consputc('x'); +  for (i = 0; i < (sizeof(uint64) * 2); i++, x <<= 4) +    consputc(digits[x >> (sizeof(uint64) * 8 - 4)]); +} + + +//PAGEBREAK: 50 + +// Print to the console. only understands %d, %x, %p, %s. +void +printf(char *fmt, ...) +{ +  va_list ap; +  int i, c, locking; +  char *s; + +  locking = cons.locking; +  if(locking) +    acquire(&cons.lock); + +  if (fmt == 0) +    panic("null fmt"); + +  va_start(ap, fmt); +  for(i = 0; (c = fmt[i] & 0xff) != 0; i++){ +    if(c != '%'){ +      consputc(c); +      continue; +    } +    c = fmt[++i] & 0xff; +    if(c == 0) +      break; +    switch(c){ +    case 'd': +      printint(va_arg(ap, int), 10, 1); +      break; +    case 'x': +      printint(va_arg(ap, int), 16, 1); +      break; +    case 'p': +      printptr(va_arg(ap, uint64)); +      break; +    case 's': +      if((s = va_arg(ap, char*)) == 0) +        s = "(null)"; +      for(; *s; s++) +        consputc(*s); +      break; +    case '%': +      consputc('%'); +      break; +    default: +      // Print unknown % sequence to draw attention. +      consputc('%'); +      consputc(c); +      break; +    } +  } + +  if(locking) +    release(&cons.lock); +} + +void +panic(char *s) +{ +  cons.locking = 0; +  printf("panic: "); +  printf(s); +  printf("\n"); +  panicked = 1; // freeze other CPU +  for(;;) +    ; +} + +#define BACKSPACE 0x100 + +void +consputc(int c) +{ +  if(panicked){ +    for(;;) +      ; +  } + +  if(c == BACKSPACE){ +    uartputc('\b'); uartputc(' '); uartputc('\b'); +  } else +    uartputc(c); +} + +#define INPUT_BUF 128 +struct { +  char buf[INPUT_BUF]; +  uint r;  // Read index +  uint w;  // Write index +  uint e;  // Edit index +} input; + +#define C(x)  ((x)-'@')  // Contro + +int +consoleread(struct inode *ip, int user_dst, uint64 dst, int n) +{ +  uint target; +  int c; +  char buf[1]; + +  iunlock(ip); +  target = n; +  acquire(&cons.lock); +  while(n > 0){ +    while(input.r == input.w){ +      if(myproc()->killed){ +        release(&cons.lock); +        ilock(ip); +        return -1; +      } +      sleep(&input.r, &cons.lock); +    } +    c = input.buf[input.r++ % INPUT_BUF]; +    if(c == C('D')){  // EOF +      if(n < target){ +        // Save ^D for next time, to make sure +        // caller gets a 0-byte result. +        input.r--; +      } +      break; +    } +    buf[0] = c; +    if(either_copyout(user_dst, dst, &buf[0], 1) == -1) +      break; +    dst++; +    --n; +    if(c == '\n') +      break; +  } +  release(&cons.lock); +  ilock(ip); + +  return target - n; +} + +int +consolewrite(struct inode *ip, int user_src, uint64 src, int n) +{ +  int i; + +  iunlock(ip); +  acquire(&cons.lock); +  for(i = 0; i < n; i++){ +    char c; +    if(either_copyin(&c, user_src, src+i, 1) == -1) +      break; +    consputc(c); +  } +  release(&cons.lock); +  ilock(ip); + +  return n; +} + +void +consoleintr(int c) +{ +  int doprocdump = 0; +   +  acquire(&cons.lock); + +  switch(c){ +  case C('P'):  // Process list. +    // procdump() locks cons.lock indirectly; invoke later +    doprocdump = 1; +    break; +  case C('U'):  // Kill line. +    while(input.e != input.w && +          input.buf[(input.e-1) % INPUT_BUF] != '\n'){ +      input.e--; +      consputc(BACKSPACE); +    } +    break; +  case C('H'): case '\x7f':  // Backspace +    if(input.e != input.w){ +      input.e--; +      consputc(BACKSPACE); +    } +    break; +  default: +    if(c != 0 && input.e-input.r < INPUT_BUF){ +      c = (c == '\r') ? '\n' : c; +      input.buf[input.e++ % INPUT_BUF] = c; +      consputc(c); +      if(c == '\n' || c == C('D') || input.e == input.r+INPUT_BUF){ +        input.w = input.e; +        wakeup(&input.r); +      } +    } +    break; +  } +   +  release(&cons.lock); + +  if(doprocdump) +    procdump(); +} + +void +consoleinit(void) +{ +  initlock(&cons.lock, "console"); + +  devsw[CONSOLE].write = consolewrite; +  devsw[CONSOLE].read = consoleread; +  cons.locking = 1; +} diff --git a/kernel/date.h b/kernel/date.h new file mode 100644 index 0000000..94aec4b --- /dev/null +++ b/kernel/date.h @@ -0,0 +1,8 @@ +struct rtcdate { +  uint second; +  uint minute; +  uint hour; +  uint day; +  uint month; +  uint year; +}; diff --git a/kernel/defs.h b/kernel/defs.h new file mode 100644 index 0000000..597e5b6 --- /dev/null +++ b/kernel/defs.h @@ -0,0 +1,205 @@ +struct buf; +struct context; +struct file; +struct inode; +struct pipe; +struct proc; +struct rtcdate; +struct spinlock; +struct sleeplock; +struct stat; +struct superblock; +struct sysframe; + +// bio.c +void            binit(void); +struct buf*     bread(uint, uint); +void            brelse(struct buf*); +void            bwrite(struct buf*); + +// console.c +void            consoleinit(void); +void            printf(char*, ...); +void            consoleintr(int); +void            panic(char*) __attribute__((noreturn)); + +// exec.c +int             exec(char*, char**); + +// file.c +struct file*    filealloc(void); +void            fileclose(struct file*); +struct file*    filedup(struct file*); +void            fileinit(void); +int             fileread(struct file*, uint64, int n); +int             filestat(struct file*, uint64 addr); +int             filewrite(struct file*, uint64, int n); + +// fs.c +void            readsb(int dev, struct superblock *sb); +int             dirlink(struct inode*, char*, uint); +struct inode*   dirlookup(struct inode*, char*, uint*); +struct inode*   ialloc(uint, short); +struct inode*   idup(struct inode*); +void            iinit(int dev); +void            ilock(struct inode*); +void            iput(struct inode*); +void            iunlock(struct inode*); +void            iunlockput(struct inode*); +void            iupdate(struct inode*); +int             namecmp(const char*, const char*); +struct inode*   namei(char*); +struct inode*   nameiparent(char*, char*); +int             readi(struct inode*, int, uint64, uint, uint); +void            stati(struct inode*, struct stat*); +int             writei(struct inode*, int, uint64, uint, uint); + +// ramdisk.c +void            ramdiskinit(void); +void            ramdiskintr(void); +void            ramdiskrw(struct buf*); + +// ioapic.c +void            ioapicenable(int irq, int cpu); +extern uchar    ioapicid; +void            ioapicinit(void); + +// kalloc.c +void*           kalloc(void); +void            kfree(void *); +void            kinit(); + +// kbd.c +void            kbdintr(void); + +// lapic.c +void            cmostime(struct rtcdate *r); +int             lapicid(void); +extern volatile uint*    lapic; +void            lapiceoi(void); +void            lapicinit(void); +void            lapicstartap(uchar, uint); +void            microdelay(int); + +// log.c +void            initlog(int dev); +void            log_write(struct buf*); +void            begin_op(); +void            end_op(); + +// mp.c +extern int      ismp; +void            mpinit(void); + +// picirq.c +void            picenable(int); +void            picinit(void); + +// pipe.c +int             pipealloc(struct file**, struct file**); +void            pipeclose(struct pipe*, int); +int             piperead(struct pipe*, uint64, int); +int             pipewrite(struct pipe*, uint64, int); + +//PAGEBREAK: 16 +// proc.c +int             cpuid(void); +void            exit(void); +int             fork(void); +int             growproc(int); +pagetable_t     proc_pagetable(struct proc *); +void            proc_freepagetable(pagetable_t, uint64); +int             kill(int); +struct cpu*     mycpu(void); +struct cpu*     getmycpu(void); +struct proc*    myproc(); +void            procinit(void); +void            scheduler(void) __attribute__((noreturn)); +void            sched(void); +void            setproc(struct proc*); +void            sleep(void*, struct spinlock*); +void            userinit(void); +int             wait(void); +void            wakeup(void*); +void            yield(void); +int             either_copyout(int user_dst, uint64 dst, void *src, uint64 len); +int             either_copyin(void *dst, int user_src, uint64 src, uint64 len); +void            procdump(void); + +// swtch.S +void            swtch(struct context*, struct context*); + +// spinlock.c +void            acquire(struct spinlock*); +int             holding(struct spinlock*); +void            initlock(struct spinlock*, char*); +void            release(struct spinlock*); +void            push_off(void); +void            pop_off(void); + +// sleeplock.c +void            acquiresleep(struct sleeplock*); +void            releasesleep(struct sleeplock*); +int             holdingsleep(struct sleeplock*); +void            initsleeplock(struct sleeplock*, char*); + +// string.c +int             memcmp(const void*, const void*, uint); +void*           memmove(void*, const void*, uint); +void*           memset(void*, int, uint); +char*           safestrcpy(char*, const char*, int); +int             strlen(const char*); +int             strncmp(const char*, const char*, uint); +char*           strncpy(char*, const char*, int); + +// syscall.c +int             argint(int, int*); +int             argptr(int, uint64*, int); +int             argstr(int, char*, int); +int             argaddr(int, uint64 *); +int             fetchint(uint64, int*); +int             fetchstr(uint64, char*, int); +int             fetchaddr(uint64, uint64*); +void            syscall(); + +// timer.c +void            timerinit(void); + +// trap.c +extern uint     ticks; +void            trapinit(void); +void            trapinithart(void); +extern struct spinlock tickslock; +void            usertrapret(void); + +// uart.c +void            uartinit(void); +void            uartintr(void); +void            uartputc(int); +int             uartgetc(void); + +// vm.c +void            kvminit(void); +void            kvminithart(void); +pagetable_t     uvmcreate(void); +void            uvminit(pagetable_t, uchar *, uint); +uint64          uvmalloc(pagetable_t, uint64, uint64); +uint64          uvmdealloc(pagetable_t, uint64, uint64); +void            uvmcopy(pagetable_t, pagetable_t, uint64); +void            uvmfree(pagetable_t, uint64); +void            mappages(pagetable_t, uint64, uint64, uint64, int); +void            unmappages(pagetable_t, uint64, uint64, int); +uint64          walkaddr(pagetable_t, uint64); +int             copyout(pagetable_t, uint64, char *, uint64); +int             copyin(pagetable_t, char *, uint64, uint64); +int             copyinstr(pagetable_t pagetable, char *dst, uint64 srcva, uint64 max); + +// plic.c +void            plicinit(void); +void            plicinithart(void); +uint64          plic_pending(void); +int             plic_claim(void); +void            plic_complete(int); + +// number of elements in fixed-size array +#define NELEM(x) (sizeof(x)/sizeof((x)[0])) diff --git a/kernel/elf.h b/kernel/elf.h new file mode 100644 index 0000000..84555fa --- /dev/null +++ b/kernel/elf.h @@ -0,0 +1,42 @@ +// Format of an ELF executable file + +#define ELF_MAGIC 0x464C457FU  // "\x7FELF" in little endian + +// File header +struct elfhdr { +  uint magic;  // must equal ELF_MAGIC +  uchar elf[12]; +  ushort type; +  ushort machine; +  uint version; +  uint64 entry; +  uint64 phoff; +  uint64 shoff; +  uint flags; +  ushort ehsize; +  ushort phentsize; +  ushort phnum; +  ushort shentsize; +  ushort shnum; +  ushort shstrndx; +}; + +// Program section header +struct proghdr { +  uint32 type; +  uint32 flags; +  uint64 off; +  uint64 vaddr; +  uint64 paddr; +  uint64 filesz; +  uint64 memsz; +  uint64 align; +}; + +// Values for Proghdr type +#define ELF_PROG_LOAD           1 + +// Flag bits for Proghdr flags +#define ELF_PROG_FLAG_EXEC      1 +#define ELF_PROG_FLAG_WRITE     2 +#define ELF_PROG_FLAG_READ      4 diff --git a/kernel/entry.S b/kernel/entry.S new file mode 100644 index 0000000..b3d2c55 --- /dev/null +++ b/kernel/entry.S @@ -0,0 +1,25 @@ +	# qemu -kernel starts at 0x1000. the instructions +        # there seem to be provided by qemu, as if it +        # were a ROM. the code at 0x1000 jumps to +        # 0x8000000, the _start function here, +        # in machine mode. +.section .data +.globl stack0 +.section .text +.globl mstart +.section .text +.globl _entry +_entry: +	# set up a stack for C. +        # stack0 is declared in start, +        # with 4096 bytes per CPU. +        la sp, stack0 +        li a0, 1024*4 +	csrr a1, mhartid +        addi a1, a1, 1 +        mul a0, a0, a1 +        add sp, sp, a0 +	# jump to mstart() in start.c +        call mstart +junk: +        j junk diff --git a/kernel/exec.c b/kernel/exec.c new file mode 100644 index 0000000..c9af395 --- /dev/null +++ b/kernel/exec.c @@ -0,0 +1,149 @@ +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "proc.h" +#include "defs.h" +#include "elf.h" + +static int loadseg(pde_t *pgdir, uint64 addr, struct inode *ip, uint offset, uint sz); + +int +exec(char *path, char **argv) +{ +  char *s, *last; +  int i, off; +  uint64 argc, sz, sp, ustack[MAXARG+1], stackbase; +  struct elfhdr elf; +  struct inode *ip; +  struct proghdr ph; +  pagetable_t pagetable = 0, oldpagetable; +  struct proc *p = myproc(); +  uint64 oldsz = p->sz; + +  begin_op(); + +  if((ip = namei(path)) == 0){ +    end_op(); +    return -1; +  } +  ilock(ip); + +  // Check ELF header +  if(readi(ip, 0, (uint64)&elf, 0, sizeof(elf)) != sizeof(elf)) +    goto bad; +  if(elf.magic != ELF_MAGIC) +    goto bad; + +  if((pagetable = proc_pagetable(p)) == 0) +    goto bad; + +  // Load program into memory. +  sz = 0; +  for(i=0, off=elf.phoff; i<elf.phnum; i++, off+=sizeof(ph)){ +    if(readi(ip, 0, (uint64)&ph, off, sizeof(ph)) != sizeof(ph)) +      goto bad; +    if(ph.type != ELF_PROG_LOAD) +      continue; +    if(ph.memsz < ph.filesz) +      goto bad; +    if(ph.vaddr + ph.memsz < ph.vaddr) +      goto bad; +    if((sz = uvmalloc(pagetable, sz, ph.vaddr + ph.memsz)) == 0) +      goto bad; +    if(ph.vaddr % PGSIZE != 0) +      goto bad; +    if(loadseg(pagetable, ph.vaddr, ip, ph.off, ph.filesz) < 0) +      goto bad; +  } +  iunlockput(ip); +  end_op(); +  ip = 0; + +  // Allocate two pages at the next page boundary. +  // Use the second as the user stack. +  sz = PGROUNDUP(sz); +  if((sz = uvmalloc(pagetable, sz, sz + 2*PGSIZE)) == 0) +    goto bad; +  sp = sz; +  stackbase = sp - PGSIZE; + +  // Push argument strings, prepare rest of stack in ustack. +  for(argc = 0; argv[argc]; argc++) { +    if(argc >= MAXARG) +      goto bad; +    sp -= strlen(argv[argc]) + 1; +    sp -= sp % 16; // riscv sp must be 16-byte aligned +    if(sp < stackbase) +      goto bad; +    if(copyout(pagetable, sp, argv[argc], strlen(argv[argc]) + 1) < 0) +      goto bad; +    ustack[argc] = sp; +  } +  ustack[argc] = 0; + +  // push the array of argv[] pointers. +  sp -= (argc+1) * sizeof(uint64); +  sp -= sp % 16; +  if(sp < stackbase) +    goto bad; +  if(copyout(pagetable, sp, (char *)ustack, (argc+1)*sizeof(uint64)) < 0) +    goto bad; + +  // arguments to user main(argc, argv) +  // argc is returned via the system call return +  // value, which goes in a0. +  p->tf->a1 = sp; + +  // Save program name for debugging. +  for(last=s=path; *s; s++) +    if(*s == '/') +      last = s+1; +  safestrcpy(p->name, last, sizeof(p->name)); +     +  // Commit to the user image. +  oldpagetable = p->pagetable; +  p->pagetable = pagetable; +  p->sz = sz; +  p->tf->epc = elf.entry;  // initial program counter = main +  p->tf->sp = sp; // initial stack pointer +  proc_freepagetable(oldpagetable, oldsz); +  return argc; // this ends up in a0, the first argument to main(argc, argv) + + bad: +  if(pagetable) +    proc_freepagetable(pagetable, sz); +  if(ip){ +    iunlockput(ip); +    end_op(); +  } +  return -1; +} + +// Load a program segment into pagetable at virtual address va. +// va must be page-aligned +// and the pages from va to va+sz must already be mapped. +// Returns 0 on success, -1 on failure. +static int +loadseg(pagetable_t pagetable, uint64 va, struct inode *ip, uint offset, uint sz) +{ +  uint i, n; +  uint64 pa; + +  if((va % PGSIZE) != 0) +    panic("loadseg: va must be page aligned"); + +  for(i = 0; i < sz; i += PGSIZE){ +    pa = walkaddr(pagetable, va + i); +    if(pa == 0) +      panic("loadseg: address should exist"); +    if(sz - i < PGSIZE) +      n = sz - i; +    else +      n = PGSIZE; +    if(readi(ip, 0, (uint64)pa, offset+i, n) != n) +      return -1; +  } +   +  return 0; +} diff --git a/kernel/fcntl.h b/kernel/fcntl.h new file mode 100644 index 0000000..d565483 --- /dev/null +++ b/kernel/fcntl.h @@ -0,0 +1,4 @@ +#define O_RDONLY  0x000 +#define O_WRONLY  0x001 +#define O_RDWR    0x002 +#define O_CREATE  0x200 diff --git a/kernel/file.c b/kernel/file.c new file mode 100644 index 0000000..6f27f22 --- /dev/null +++ b/kernel/file.c @@ -0,0 +1,175 @@ +// +// Support functions for system calls that involve file descriptors. +// + +#include "types.h" +#include "riscv.h" +#include "defs.h" +#include "param.h" +#include "fs.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "file.h" +#include "stat.h" +#include "proc.h" + +struct devsw devsw[NDEV]; +struct { +  struct spinlock lock; +  struct file file[NFILE]; +} ftable; + +void +fileinit(void) +{ +  initlock(&ftable.lock, "ftable"); +} + +// Allocate a file structure. +struct file* +filealloc(void) +{ +  struct file *f; + +  acquire(&ftable.lock); +  for(f = ftable.file; f < ftable.file + NFILE; f++){ +    if(f->ref == 0){ +      f->ref = 1; +      release(&ftable.lock); +      return f; +    } +  } +  release(&ftable.lock); +  return 0; +} + +// Increment ref count for file f. +struct file* +filedup(struct file *f) +{ +  acquire(&ftable.lock); +  if(f->ref < 1) +    panic("filedup"); +  f->ref++; +  release(&ftable.lock); +  return f; +} + +// Close file f.  (Decrement ref count, close when reaches 0.) +void +fileclose(struct file *f) +{ +  struct file ff; + +  acquire(&ftable.lock); +  if(f->ref < 1) +    panic("fileclose"); +  if(--f->ref > 0){ +    release(&ftable.lock); +    return; +  } +  ff = *f; +  f->ref = 0; +  f->type = FD_NONE; +  release(&ftable.lock); + +  if(ff.type == FD_PIPE) +    pipeclose(ff.pipe, ff.writable); +  else if(ff.type == FD_INODE){ +    begin_op(); +    iput(ff.ip); +    end_op(); +  } +} + +// Get metadata about file f. +// addr is a user virtual address, pointing to a struct stat. +int +filestat(struct file *f, uint64 addr) +{ +  struct proc *p = myproc(); +  struct stat st; +   +  if(f->type == FD_INODE){ +    ilock(f->ip); +    stati(f->ip, &st); +    iunlock(f->ip); +    if(copyout(p->pagetable, addr, (char *)&st, sizeof(st)) < 0) +      return -1; +    return 0; +  } +  return -1; +} + +// Read from file f. +// addr is a user virtual address. +int +fileread(struct file *f, uint64 addr, int n) +{ +  int r = 0; + +  if(f->readable == 0) +    return -1; + +  if(f->type == FD_PIPE){ +    r = piperead(f->pipe, addr, n); +  } else if(f->type == FD_INODE){ +    ilock(f->ip); +    if((r = readi(f->ip, 1, addr, f->off, n)) > 0) +      f->off += r; +    iunlock(f->ip); +  } else { +    panic("fileread"); +  } + +  return r; +} + +//PAGEBREAK! +// Write to file f. +// addr is a user virtual address. +int +filewrite(struct file *f, uint64 addr, int n) +{ +  int r, ret = 0; + +  if(f->writable == 0) +    return -1; + +  if(f->type == FD_PIPE){ +    ret = pipewrite(f->pipe, addr, n); +  } else if(f->type == FD_INODE){ +    // write a few blocks at a time to avoid exceeding +    // the maximum log transaction size, including +    // i-node, indirect block, allocation blocks, +    // and 2 blocks of slop for non-aligned writes. +    // this really belongs lower down, since writei() +    // might be writing a device like the console. +    int max = ((MAXOPBLOCKS-1-1-2) / 2) * 512; +    int i = 0; +    while(i < n){ +      int n1 = n - i; +      if(n1 > max) +        n1 = max; + +      begin_op(); +      ilock(f->ip); +      if ((r = writei(f->ip, 1, addr + i, f->off, n1)) > 0) +        f->off += r; +      iunlock(f->ip); +      end_op(); + +      if(r < 0) +        break; +      if(r != n1) +        panic("short filewrite"); +      i += r; +    } +    ret = (i == n ? n : -1); +  } else { +    panic("filewrite"); +  } + +  return ret; +} + diff --git a/kernel/file.h b/kernel/file.h new file mode 100644 index 0000000..f28018f --- /dev/null +++ b/kernel/file.h @@ -0,0 +1,37 @@ +struct file { +  enum { FD_NONE, FD_PIPE, FD_INODE } type; +  int ref; // reference count +  char readable; +  char writable; +  struct pipe *pipe; +  struct inode *ip; +  uint off; +}; + + +// in-memory copy of an inode +struct inode { +  uint dev;           // Device number +  uint inum;          // Inode number +  int ref;            // Reference count +  struct sleeplock lock; // protects everything below here +  int valid;          // inode has been read from disk? + +  short type;         // copy of disk inode +  short major; +  short minor; +  short nlink; +  uint size; +  uint addrs[NDIRECT+1]; +}; + +// table mapping major device number to +// device functions +struct devsw { +  int (*read)(struct inode*, int, uint64, int); +  int (*write)(struct inode*, int, uint64, int); +}; + +extern struct devsw devsw[]; + +#define CONSOLE 1 diff --git a/kernel/fs.c b/kernel/fs.c new file mode 100644 index 0000000..ebe377a --- /dev/null +++ b/kernel/fs.c @@ -0,0 +1,681 @@ +// File system implementation.  Five layers: +//   + Blocks: allocator for raw disk blocks. +//   + Log: crash recovery for multi-step updates. +//   + Files: inode allocator, reading, writing, metadata. +//   + Directories: inode with special contents (list of other inodes!) +//   + Names: paths like /usr/rtm/xv6/fs.c for convenient naming. +// +// This file contains the low-level file system manipulation +// routines.  The (higher-level) system call implementations +// are in sysfile.c. + +#include "types.h" +#include "riscv.h" +#include "defs.h" +#include "param.h" +#include "stat.h" +#include "proc.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "fs.h" +#include "buf.h" +#include "file.h" + +#define min(a, b) ((a) < (b) ? (a) : (b)) +static void itrunc(struct inode*); +// there should be one superblock per disk device, but we run with +// only one device +struct superblock sb;  + +// Read the super block. +void +readsb(int dev, struct superblock *sb) +{ +  struct buf *bp; + +  bp = bread(dev, 1); +  memmove(sb, bp->data, sizeof(*sb)); +  brelse(bp); +} + +// Zero a block. +static void +bzero(int dev, int bno) +{ +  struct buf *bp; + +  bp = bread(dev, bno); +  memset(bp->data, 0, BSIZE); +  log_write(bp); +  brelse(bp); +} + +// Blocks. + +// Allocate a zeroed disk block. +static uint +balloc(uint dev) +{ +  int b, bi, m; +  struct buf *bp; + +  bp = 0; +  for(b = 0; b < sb.size; b += BPB){ +    bp = bread(dev, BBLOCK(b, sb)); +    for(bi = 0; bi < BPB && b + bi < sb.size; bi++){ +      m = 1 << (bi % 8); +      if((bp->data[bi/8] & m) == 0){  // Is block free? +        bp->data[bi/8] |= m;  // Mark block in use. +        log_write(bp); +        brelse(bp); +        bzero(dev, b + bi); +        return b + bi; +      } +    } +    brelse(bp); +  } +  panic("balloc: out of blocks"); +} + +// Free a disk block. +static void +bfree(int dev, uint b) +{ +  struct buf *bp; +  int bi, m; + +  readsb(dev, &sb); +  bp = bread(dev, BBLOCK(b, sb)); +  bi = b % BPB; +  m = 1 << (bi % 8); +  if((bp->data[bi/8] & m) == 0) +    panic("freeing free block"); +  bp->data[bi/8] &= ~m; +  log_write(bp); +  brelse(bp); +} + +// Inodes. +// +// An inode describes a single unnamed file. +// The inode disk structure holds metadata: the file's type, +// its size, the number of links referring to it, and the +// list of blocks holding the file's content. +// +// The inodes are laid out sequentially on disk at +// sb.startinode. Each inode has a number, indicating its +// position on the disk. +// +// The kernel keeps a cache of in-use inodes in memory +// to provide a place for synchronizing access +// to inodes used by multiple processes. The cached +// inodes include book-keeping information that is +// not stored on disk: ip->ref and ip->valid. +// +// An inode and its in-memory representation go through a +// sequence of states before they can be used by the +// rest of the file system code. +// +// * Allocation: an inode is allocated if its type (on disk) +//   is non-zero. ialloc() allocates, and iput() frees if +//   the reference and link counts have fallen to zero. +// +// * Referencing in cache: an entry in the inode cache +//   is free if ip->ref is zero. Otherwise ip->ref tracks +//   the number of in-memory pointers to the entry (open +//   files and current directories). iget() finds or +//   creates a cache entry and increments its ref; iput() +//   decrements ref. +// +// * Valid: the information (type, size, &c) in an inode +//   cache entry is only correct when ip->valid is 1. +//   ilock() reads the inode from +//   the disk and sets ip->valid, while iput() clears +//   ip->valid if ip->ref has fallen to zero. +// +// * Locked: file system code may only examine and modify +//   the information in an inode and its content if it +//   has first locked the inode. +// +// Thus a typical sequence is: +//   ip = iget(dev, inum) +//   ilock(ip) +//   ... examine and modify ip->xxx ... +//   iunlock(ip) +//   iput(ip) +// +// ilock() is separate from iget() so that system calls can +// get a long-term reference to an inode (as for an open file) +// and only lock it for short periods (e.g., in read()). +// The separation also helps avoid deadlock and races during +// pathname lookup. iget() increments ip->ref so that the inode +// stays cached and pointers to it remain valid. +// +// Many internal file system functions expect the caller to +// have locked the inodes involved; this lets callers create +// multi-step atomic operations. +// +// The icache.lock spin-lock protects the allocation of icache +// entries. Since ip->ref indicates whether an entry is free, +// and ip->dev and ip->inum indicate which i-node an entry +// holds, one must hold icache.lock while using any of those fields. +// +// An ip->lock sleep-lock protects all ip-> fields other than ref, +// dev, and inum.  One must hold ip->lock in order to +// read or write that inode's ip->valid, ip->size, ip->type, &c. + +struct { +  struct spinlock lock; +  struct inode inode[NINODE]; +} icache; + +void +iinit(int dev) +{ +  int i = 0; +   +  initlock(&icache.lock, "icache"); +  for(i = 0; i < NINODE; i++) { +    initsleeplock(&icache.inode[i].lock, "inode"); +  } + +  readsb(dev, &sb); +  if(sb.magic != FSMAGIC) +    panic("invalid file system"); +} + +static struct inode* iget(uint dev, uint inum); + +//PAGEBREAK! +// Allocate an inode on device dev. +// Mark it as allocated by  giving it type type. +// Returns an unlocked but allocated and referenced inode. +struct inode* +ialloc(uint dev, short type) +{ +  int inum; +  struct buf *bp; +  struct dinode *dip; + +  for(inum = 1; inum < sb.ninodes; inum++){ +    bp = bread(dev, IBLOCK(inum, sb)); +    dip = (struct dinode*)bp->data + inum%IPB; +    if(dip->type == 0){  // a free inode +      memset(dip, 0, sizeof(*dip)); +      dip->type = type; +      log_write(bp);   // mark it allocated on the disk +      brelse(bp); +      return iget(dev, inum); +    } +    brelse(bp); +  } +  panic("ialloc: no inodes"); +} + +// Copy a modified in-memory inode to disk. +// Must be called after every change to an ip->xxx field +// that lives on disk, since i-node cache is write-through. +// Caller must hold ip->lock. +void +iupdate(struct inode *ip) +{ +  struct buf *bp; +  struct dinode *dip; + +  bp = bread(ip->dev, IBLOCK(ip->inum, sb)); +  dip = (struct dinode*)bp->data + ip->inum%IPB; +  dip->type = ip->type; +  dip->major = ip->major; +  dip->minor = ip->minor; +  dip->nlink = ip->nlink; +  dip->size = ip->size; +  memmove(dip->addrs, ip->addrs, sizeof(ip->addrs)); +  log_write(bp); +  brelse(bp); +} + +// Find the inode with number inum on device dev +// and return the in-memory copy. Does not lock +// the inode and does not read it from disk. +static struct inode* +iget(uint dev, uint inum) +{ +  struct inode *ip, *empty; + +  acquire(&icache.lock); + +  // Is the inode already cached? +  empty = 0; +  for(ip = &icache.inode[0]; ip < &icache.inode[NINODE]; ip++){ +    if(ip->ref > 0 && ip->dev == dev && ip->inum == inum){ +      ip->ref++; +      release(&icache.lock); +      return ip; +    } +    if(empty == 0 && ip->ref == 0)    // Remember empty slot. +      empty = ip; +  } + +  // Recycle an inode cache entry. +  if(empty == 0) +    panic("iget: no inodes"); + +  ip = empty; +  ip->dev = dev; +  ip->inum = inum; +  ip->ref = 1; +  ip->valid = 0; +  release(&icache.lock); + +  return ip; +} + +// Increment reference count for ip. +// Returns ip to enable ip = idup(ip1) idiom. +struct inode* +idup(struct inode *ip) +{ +  acquire(&icache.lock); +  ip->ref++; +  release(&icache.lock); +  return ip; +} + +// Lock the given inode. +// Reads the inode from disk if necessary. +void +ilock(struct inode *ip) +{ +  struct buf *bp; +  struct dinode *dip; + +  if(ip == 0 || ip->ref < 1) +    panic("ilock"); + +  acquiresleep(&ip->lock); + +  if(ip->valid == 0){ +    bp = bread(ip->dev, IBLOCK(ip->inum, sb)); +    dip = (struct dinode*)bp->data + ip->inum%IPB; +    ip->type = dip->type; +    ip->major = dip->major; +    ip->minor = dip->minor; +    ip->nlink = dip->nlink; +    ip->size = dip->size; +    memmove(ip->addrs, dip->addrs, sizeof(ip->addrs)); +    brelse(bp); +    ip->valid = 1; +    if(ip->type == 0) +      panic("ilock: no type"); +  } +} + +// Unlock the given inode. +void +iunlock(struct inode *ip) +{ +  if(ip == 0 || !holdingsleep(&ip->lock) || ip->ref < 1) +    panic("iunlock"); + +  releasesleep(&ip->lock); +} + +// Drop a reference to an in-memory inode. +// If that was the last reference, the inode cache entry can +// be recycled. +// If that was the last reference and the inode has no links +// to it, free the inode (and its content) on disk. +// All calls to iput() must be inside a transaction in +// case it has to free the inode. +void +iput(struct inode *ip) +{ +  acquire(&icache.lock); + +  if(ip->ref == 1 && ip->valid && ip->nlink == 0){ +    // inode has no links and no other references: truncate and free. + +    // ip->ref == 1 means no other process can have ip locked, +    // so this acquiresleep() won't block (or deadlock). +    acquiresleep(&ip->lock); + +    release(&icache.lock); + +    itrunc(ip); +    ip->type = 0; +    iupdate(ip); +    ip->valid = 0; + +    releasesleep(&ip->lock); + +    acquire(&icache.lock); +  } + +  ip->ref--; +  release(&icache.lock); +} + +// Common idiom: unlock, then put. +void +iunlockput(struct inode *ip) +{ +  iunlock(ip); +  iput(ip); +} + +//PAGEBREAK! +// Inode content +// +// The content (data) associated with each inode is stored +// in blocks on the disk. The first NDIRECT block numbers +// are listed in ip->addrs[].  The next NINDIRECT blocks are +// listed in block ip->addrs[NDIRECT]. + +// Return the disk block address of the nth block in inode ip. +// If there is no such block, bmap allocates one. +static uint +bmap(struct inode *ip, uint bn) +{ +  uint addr, *a; +  struct buf *bp; + +  if(bn < NDIRECT){ +    if((addr = ip->addrs[bn]) == 0) +      ip->addrs[bn] = addr = balloc(ip->dev); +    return addr; +  } +  bn -= NDIRECT; + +  if(bn < NINDIRECT){ +    // Load indirect block, allocating if necessary. +    if((addr = ip->addrs[NDIRECT]) == 0) +      ip->addrs[NDIRECT] = addr = balloc(ip->dev); +    bp = bread(ip->dev, addr); +    a = (uint*)bp->data; +    if((addr = a[bn]) == 0){ +      a[bn] = addr = balloc(ip->dev); +      log_write(bp); +    } +    brelse(bp); +    return addr; +  } + +  panic("bmap: out of range"); +} + +// Truncate inode (discard contents). +// Only called when the inode has no links +// to it (no directory entries referring to it) +// and has no in-memory reference to it (is +// not an open file or current directory). +static void +itrunc(struct inode *ip) +{ +  int i, j; +  struct buf *bp; +  uint *a; + +  for(i = 0; i < NDIRECT; i++){ +    if(ip->addrs[i]){ +      bfree(ip->dev, ip->addrs[i]); +      ip->addrs[i] = 0; +    } +  } + +  if(ip->addrs[NDIRECT]){ +    bp = bread(ip->dev, ip->addrs[NDIRECT]); +    a = (uint*)bp->data; +    for(j = 0; j < NINDIRECT; j++){ +      if(a[j]) +        bfree(ip->dev, a[j]); +    } +    brelse(bp); +    bfree(ip->dev, ip->addrs[NDIRECT]); +    ip->addrs[NDIRECT] = 0; +  } + +  ip->size = 0; +  iupdate(ip); +} + +// Copy stat information from inode. +// Caller must hold ip->lock. +void +stati(struct inode *ip, struct stat *st) +{ +  st->dev = ip->dev; +  st->ino = ip->inum; +  st->type = ip->type; +  st->nlink = ip->nlink; +  st->size = ip->size; +} + +//PAGEBREAK! +// Read data from inode. +// Caller must hold ip->lock. +// If user_dst==1, then dst is a user virtual address; +// otherwise, dst is a kernel address. +int +readi(struct inode *ip, int user_dst, uint64 dst, uint off, uint n) +{ +  uint tot, m; +  struct buf *bp; + +  if(ip->type == T_DEV){ +    if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].read) +      return -1; +    return devsw[ip->major].read(ip, user_dst, dst, n); +  } + +  if(off > ip->size || off + n < off) +    return -1; +  if(off + n > ip->size) +    n = ip->size - off; + +  for(tot=0; tot<n; tot+=m, off+=m, dst+=m){ +    bp = bread(ip->dev, bmap(ip, off/BSIZE)); +    m = min(n - tot, BSIZE - off%BSIZE); +    if(either_copyout(user_dst, dst, bp->data + (off % BSIZE), m) == -1) +      break; +    brelse(bp); +  } +  return n; +} + +// PAGEBREAK! +// Write data to inode. +// Caller must hold ip->lock. +// If user_src==1, then src is a user virtual address; +// otherwise, src is a kernel address. +int +writei(struct inode *ip, int user_src, uint64 src, uint off, uint n) +{ +  uint tot, m; +  struct buf *bp; + +  if(ip->type == T_DEV){ +    if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].write){ +      return -1; +    } +    return devsw[ip->major].write(ip, user_src, src, n); +  } + +  if(off > ip->size || off + n < off) +    return -1; +  if(off + n > MAXFILE*BSIZE) +    return -1; + +  for(tot=0; tot<n; tot+=m, off+=m, src+=m){ +    bp = bread(ip->dev, bmap(ip, off/BSIZE)); +    m = min(n - tot, BSIZE - off%BSIZE); +    if(either_copyin(bp->data + (off % BSIZE), user_src, src, m) == -1) +      break; +    log_write(bp); +    brelse(bp); +  } + +  if(n > 0 && off > ip->size){ +    ip->size = off; +    iupdate(ip); +  } +  return n; +} + +//PAGEBREAK! +// Directories + +int +namecmp(const char *s, const char *t) +{ +  return strncmp(s, t, DIRSIZ); +} + +// Look for a directory entry in a directory. +// If found, set *poff to byte offset of entry. +struct inode* +dirlookup(struct inode *dp, char *name, uint *poff) +{ +  uint off, inum; +  struct dirent de; + +  if(dp->type != T_DIR) +    panic("dirlookup not DIR"); + +  for(off = 0; off < dp->size; off += sizeof(de)){ +    if(readi(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de)) +      panic("dirlookup read"); +    if(de.inum == 0) +      continue; +    if(namecmp(name, de.name) == 0){ +      // entry matches path element +      if(poff) +        *poff = off; +      inum = de.inum; +      return iget(dp->dev, inum); +    } +  } + +  return 0; +} + +// Write a new directory entry (name, inum) into the directory dp. +int +dirlink(struct inode *dp, char *name, uint inum) +{ +  int off; +  struct dirent de; +  struct inode *ip; + +  // Check that name is not present. +  if((ip = dirlookup(dp, name, 0)) != 0){ +    iput(ip); +    return -1; +  } + +  // Look for an empty dirent. +  for(off = 0; off < dp->size; off += sizeof(de)){ +    if(readi(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de)) +      panic("dirlink read"); +    if(de.inum == 0) +      break; +  } + +  strncpy(de.name, name, DIRSIZ); +  de.inum = inum; +  if(writei(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de)) +    panic("dirlink"); + +  return 0; +} + +//PAGEBREAK! +// Paths + +// Copy the next path element from path into name. +// Return a pointer to the element following the copied one. +// The returned path has no leading slashes, +// so the caller can check *path=='\0' to see if the name is the last one. +// If no name to remove, return 0. +// +// Examples: +//   skipelem("a/bb/c", name) = "bb/c", setting name = "a" +//   skipelem("///a//bb", name) = "bb", setting name = "a" +//   skipelem("a", name) = "", setting name = "a" +//   skipelem("", name) = skipelem("////", name) = 0 +// +static char* +skipelem(char *path, char *name) +{ +  char *s; +  int len; + +  while(*path == '/') +    path++; +  if(*path == 0) +    return 0; +  s = path; +  while(*path != '/' && *path != 0) +    path++; +  len = path - s; +  if(len >= DIRSIZ) +    memmove(name, s, DIRSIZ); +  else { +    memmove(name, s, len); +    name[len] = 0; +  } +  while(*path == '/') +    path++; +  return path; +} + +// Look up and return the inode for a path name. +// If parent != 0, return the inode for the parent and copy the final +// path element into name, which must have room for DIRSIZ bytes. +// Must be called inside a transaction since it calls iput(). +static struct inode* +namex(char *path, int nameiparent, char *name) +{ +  struct inode *ip, *next; + +  if(*path == '/') +    ip = iget(ROOTDEV, ROOTINO); +  else +    ip = idup(myproc()->cwd); + +  while((path = skipelem(path, name)) != 0){ +    ilock(ip); +    if(ip->type != T_DIR){ +      iunlockput(ip); +      return 0; +    } +    if(nameiparent && *path == '\0'){ +      // Stop one level early. +      iunlock(ip); +      return ip; +    } +    if((next = dirlookup(ip, name, 0)) == 0){ +      iunlockput(ip); +      return 0; +    } +    iunlockput(ip); +    ip = next; +  } +  if(nameiparent){ +    iput(ip); +    return 0; +  } +  return ip; +} + +struct inode* +namei(char *path) +{ +  char name[DIRSIZ]; +  return namex(path, 0, name); +} + +struct inode* +nameiparent(char *path, char *name) +{ +  return namex(path, 1, name); +} diff --git a/kernel/fs.h b/kernel/fs.h new file mode 100644 index 0000000..bc0805f --- /dev/null +++ b/kernel/fs.h @@ -0,0 +1,60 @@ +// On-disk file system format. +// Both the kernel and user programs use this header file. + + +#define ROOTINO  1   // root i-number +#define BSIZE 1024  // block size + +// Disk layout: +// [ boot block | super block | log | inode blocks | +//                                          free bit map | data blocks] +// +// mkfs computes the super block and builds an initial file system. The +// super block describes the disk layout: +struct superblock { +  uint magic;        // Must be FSMAGIC +  uint size;         // Size of file system image (blocks) +  uint nblocks;      // Number of data blocks +  uint ninodes;      // Number of inodes. +  uint nlog;         // Number of log blocks +  uint logstart;     // Block number of first log block +  uint inodestart;   // Block number of first inode block +  uint bmapstart;    // Block number of first free map block +}; + +#define FSMAGIC 0x10203040 + +#define NDIRECT 12 +#define NINDIRECT (BSIZE / sizeof(uint)) +#define MAXFILE (NDIRECT + NINDIRECT) + +// On-disk inode structure +struct dinode { +  short type;           // File type +  short major;          // Major device number (T_DEV only) +  short minor;          // Minor device number (T_DEV only) +  short nlink;          // Number of links to inode in file system +  uint size;            // Size of file (bytes) +  uint addrs[NDIRECT+1];   // Data block addresses +}; + +// Inodes per block. +#define IPB           (BSIZE / sizeof(struct dinode)) + +// Block containing inode i +#define IBLOCK(i, sb)     ((i) / IPB + sb.inodestart) + +// Bitmap bits per block +#define BPB           (BSIZE*8) + +// Block of free map containing bit for block b +#define BBLOCK(b, sb) ((b)/BPB + sb.bmapstart) + +// Directory is a file containing a sequence of dirent structures. +#define DIRSIZ 14 + +struct dirent { +  ushort inum; +  char name[DIRSIZ]; +}; + diff --git a/kernel/kalloc.c b/kernel/kalloc.c new file mode 100644 index 0000000..1ed1c49 --- /dev/null +++ b/kernel/kalloc.c @@ -0,0 +1,80 @@ +// Physical memory allocator, intended to allocate +// memory for user processes, kernel stacks, page table pages, +// and pipe buffers. Allocates 4096-byte pages. + +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "spinlock.h" +#include "riscv.h" +#include "defs.h" + +void freerange(void *pa_start, void *pa_end); + +extern char end[]; // first address after kernel loaded from ELF file +                   // defined by the kernel linker script in kernel.ld + +struct run { +  struct run *next; +}; + +struct { +  struct spinlock lock; +  struct run *freelist; +} kmem; + +void +kinit() +{ +  initlock(&kmem.lock, "kmem"); +  freerange(end, (void*)PHYSTOP); +} + +void +freerange(void *pa_start, void *pa_end) +{ +  char *p; +  p = (char*)PGROUNDUP((uint64)pa_start); +  for(; p + PGSIZE <= (char*)pa_end; p += PGSIZE) +    kfree(p); +} +//PAGEBREAK: 21 +// Free the page of physical memory pointed at by v, +// which normally should have been returned by a +// call to kalloc().  (The exception is when +// initializing the allocator; see kinit above.) +void +kfree(void *pa) +{ +  struct run *r; + +  if(((uint64)pa % PGSIZE) != 0 || (char*)pa < end || (uint64)pa >= PHYSTOP) +    panic("kfree"); + +  // Fill with junk to catch dangling refs. +  memset(pa, 1, PGSIZE); + +  acquire(&kmem.lock); +  r = (struct run*)pa; +  r->next = kmem.freelist; +  kmem.freelist = r; +  release(&kmem.lock); +} + +// Allocate one 4096-byte page of physical memory. +// Returns a pointer that the kernel can use. +// Returns 0 if the memory cannot be allocated. +void * +kalloc(void) +{ +  struct run *r; + +  acquire(&kmem.lock); +  r = kmem.freelist; +  if(r) +    kmem.freelist = r->next; +  release(&kmem.lock); +  if(r) +    memset((char*)r, 5, PGSIZE); // fill with junk +  return (void*)r; +} diff --git a/kernel/kernel.ld b/kernel/kernel.ld new file mode 100644 index 0000000..53c9b90 --- /dev/null +++ b/kernel/kernel.ld @@ -0,0 +1,31 @@ +OUTPUT_ARCH( "riscv" ) +ENTRY( _entry ) + +SECTIONS +{ +  /* +   * ensure that entry.S / _entry is at 0x80000000, +   * where qemu's -kernel jumps. +   */ +  . = 0x80000000; +  .text : +  { +    *(.text) +    . = ALIGN(0x1000); +    *(trampoline) +  } + +  . = ALIGN(0x1000); +  PROVIDE(etext = .); + +  /* +   * make sure end is after data and bss. +   */ +  .data : { +    *(.data) +  } +  bss : { +    *(.bss) +    PROVIDE(end = .); +  } +} diff --git a/kernel/kernelvec.S b/kernel/kernelvec.S new file mode 100644 index 0000000..4f52688 --- /dev/null +++ b/kernel/kernelvec.S @@ -0,0 +1,112 @@ +	# +        # interrupts and exceptions while in supervisor +        # mode come here. +        # +        # push all registers, call kerneltrap(), restore, return. +        # +.globl kerneltrap +.globl kernelvec +.align 4 +kernelvec: +        addi sp, sp, -256 + +        sd ra, 0(sp) +        sd sp, 8(sp) +        sd gp, 16(sp) +        sd tp, 24(sp) +        sd t0, 32(sp) +        sd t1, 40(sp) +        sd t2, 48(sp) +        sd s0, 56(sp) +        sd s1, 64(sp) +        sd a0, 72(sp) +        sd a1, 80(sp) +        sd a2, 88(sp) +        sd a3, 96(sp) +        sd a4, 104(sp) +        sd a5, 112(sp) +        sd a6, 120(sp) +        sd a7, 128(sp) +        sd s2, 136(sp) +        sd s3, 144(sp) +        sd s4, 152(sp) +        sd s5, 160(sp) +        sd s6, 168(sp) +        sd s7, 176(sp) +        sd s8, 184(sp) +        sd s9, 192(sp) +        sd s10, 200(sp) +        sd s11, 208(sp) +        sd t3, 216(sp) +        sd t4, 224(sp) +        sd t5, 232(sp) +        sd t6, 240(sp) + +        call kerneltrap + +        ld ra, 0(sp) +        ld sp, 8(sp) +        ld gp, 16(sp) +        ld tp, 24(sp) +        ld t0, 32(sp) +        ld t1, 40(sp) +        ld t2, 48(sp) +        ld s0, 56(sp) +        ld s1, 64(sp) +        ld a0, 72(sp) +        ld a1, 80(sp) +        ld a2, 88(sp) +        ld a3, 96(sp) +        ld a4, 104(sp) +        ld a5, 112(sp) +        ld a6, 120(sp) +        ld a7, 128(sp) +        ld s2, 136(sp) +        ld s3, 144(sp) +        ld s4, 152(sp) +        ld s5, 160(sp) +        ld s6, 168(sp) +        ld s7, 176(sp) +        ld s8, 184(sp) +        ld s9, 192(sp) +        ld s10, 200(sp) +        ld s11, 208(sp) +        ld t3, 216(sp) +        ld t4, 224(sp) +        ld t5, 232(sp) +        ld t6, 240(sp) + +        addi sp, sp, 256 + +        sret + +        # +        # machine-mode timer interrupt. +        # +.globl machinevec +.align 4 +machinevec: +        csrrw a0, mscratch, a0 +        sd a1, 0(a0) +        sd a2, 8(a0) +        sd a3, 16(a0) +        sd a4, 24(a0) + +        # add another second to mtimecmp0. +        ld a1, 32(a0) # CLINT_MTIMECMP(hart) +        ld a2, 40(a0) # ticks per second +        ld a3, 0(a1) +        add a3, a3, a2 +        sd a3, 0(a1) + +        # raise a supervisor software interrupt. +	li a1, 2 +        csrw sip, a1 + +        ld a4, 24(a0) +        ld a3, 16(a0) +        ld a2, 8(a0) +        ld a1, 0(a0) +        csrrw a0, mscratch, a0 + +        mret diff --git a/kernel/log.c b/kernel/log.c new file mode 100644 index 0000000..c8f7e62 --- /dev/null +++ b/kernel/log.c @@ -0,0 +1,235 @@ +#include "types.h" +#include "riscv.h" +#include "defs.h" +#include "param.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "fs.h" +#include "buf.h" + +// Simple logging that allows concurrent FS system calls. +// +// A log transaction contains the updates of multiple FS system +// calls. The logging system only commits when there are +// no FS system calls active. Thus there is never +// any reasoning required about whether a commit might +// write an uncommitted system call's updates to disk. +// +// A system call should call begin_op()/end_op() to mark +// its start and end. Usually begin_op() just increments +// the count of in-progress FS system calls and returns. +// But if it thinks the log is close to running out, it +// sleeps until the last outstanding end_op() commits. +// +// The log is a physical re-do log containing disk blocks. +// The on-disk log format: +//   header block, containing block #s for block A, B, C, ... +//   block A +//   block B +//   block C +//   ... +// Log appends are synchronous. + +// Contents of the header block, used for both the on-disk header block +// and to keep track in memory of logged block# before commit. +struct logheader { +  int n; +  int block[LOGSIZE]; +}; + +struct log { +  struct spinlock lock; +  int start; +  int size; +  int outstanding; // how many FS sys calls are executing. +  int committing;  // in commit(), please wait. +  int dev; +  struct logheader lh; +}; +struct log log; + +static void recover_from_log(void); +static void commit(); + +void +initlog(int dev) +{ +  if (sizeof(struct logheader) >= BSIZE) +    panic("initlog: too big logheader"); + +  struct superblock sb; +  initlock(&log.lock, "log"); +  readsb(dev, &sb); +  log.start = sb.logstart; +  log.size = sb.nlog; +  log.dev = dev; +  recover_from_log(); +} + +// Copy committed blocks from log to their home location +static void +install_trans(void) +{ +  int tail; + +  for (tail = 0; tail < log.lh.n; tail++) { +    struct buf *lbuf = bread(log.dev, log.start+tail+1); // read log block +    struct buf *dbuf = bread(log.dev, log.lh.block[tail]); // read dst +    memmove(dbuf->data, lbuf->data, BSIZE);  // copy block to dst +    bwrite(dbuf);  // write dst to disk +    brelse(lbuf); +    brelse(dbuf); +  } +} + +// Read the log header from disk into the in-memory log header +static void +read_head(void) +{ +  struct buf *buf = bread(log.dev, log.start); +  struct logheader *lh = (struct logheader *) (buf->data); +  int i; +  log.lh.n = lh->n; +  for (i = 0; i < log.lh.n; i++) { +    log.lh.block[i] = lh->block[i]; +  } +  brelse(buf); +} + +// Write in-memory log header to disk. +// This is the true point at which the +// current transaction commits. +static void +write_head(void) +{ +  struct buf *buf = bread(log.dev, log.start); +  struct logheader *hb = (struct logheader *) (buf->data); +  int i; +  hb->n = log.lh.n; +  for (i = 0; i < log.lh.n; i++) { +    hb->block[i] = log.lh.block[i]; +  } +  bwrite(buf); +  brelse(buf); +} + +static void +recover_from_log(void) +{ +  read_head(); +  install_trans(); // if committed, copy from log to disk +  log.lh.n = 0; +  write_head(); // clear the log +} + +// called at the start of each FS system call. +void +begin_op(void) +{ +  acquire(&log.lock); +  while(1){ +    if(log.committing){ +      sleep(&log, &log.lock); +    } else if(log.lh.n + (log.outstanding+1)*MAXOPBLOCKS > LOGSIZE){ +      // this op might exhaust log space; wait for commit. +      sleep(&log, &log.lock); +    } else { +      log.outstanding += 1; +      release(&log.lock); +      break; +    } +  } +} + +// called at the end of each FS system call. +// commits if this was the last outstanding operation. +void +end_op(void) +{ +  int do_commit = 0; + +  acquire(&log.lock); +  log.outstanding -= 1; +  if(log.committing) +    panic("log.committing"); +  if(log.outstanding == 0){ +    do_commit = 1; +    log.committing = 1; +  } else { +    // begin_op() may be waiting for log space, +    // and decrementing log.outstanding has decreased +    // the amount of reserved space. +    wakeup(&log); +  } +  release(&log.lock); + +  if(do_commit){ +    // call commit w/o holding locks, since not allowed +    // to sleep with locks. +    commit(); +    acquire(&log.lock); +    log.committing = 0; +    wakeup(&log); +    release(&log.lock); +  } +} + +// Copy modified blocks from cache to log. +static void +write_log(void) +{ +  int tail; + +  for (tail = 0; tail < log.lh.n; tail++) { +    struct buf *to = bread(log.dev, log.start+tail+1); // log block +    struct buf *from = bread(log.dev, log.lh.block[tail]); // cache block +    memmove(to->data, from->data, BSIZE); +    bwrite(to);  // write the log +    brelse(from); +    brelse(to); +  } +} + +static void +commit() +{ +  if (log.lh.n > 0) { +    write_log();     // Write modified blocks from cache to log +    write_head();    // Write header to disk -- the real commit +    install_trans(); // Now install writes to home locations +    log.lh.n = 0; +    write_head();    // Erase the transaction from the log +  } +} + +// Caller has modified b->data and is done with the buffer. +// Record the block number and pin in the cache with B_DIRTY. +// commit()/write_log() will do the disk write. +// +// log_write() replaces bwrite(); a typical use is: +//   bp = bread(...) +//   modify bp->data[] +//   log_write(bp) +//   brelse(bp) +void +log_write(struct buf *b) +{ +  int i; + +  if (log.lh.n >= LOGSIZE || log.lh.n >= log.size - 1) +    panic("too big a transaction"); +  if (log.outstanding < 1) +    panic("log_write outside of trans"); + +  acquire(&log.lock); +  for (i = 0; i < log.lh.n; i++) { +    if (log.lh.block[i] == b->blockno)   // log absorbtion +      break; +  } +  log.lh.block[i] = b->blockno; +  if (i == log.lh.n) +    log.lh.n++; +  b->flags |= B_DIRTY; // prevent eviction +  release(&log.lock); +} + diff --git a/kernel/main.c b/kernel/main.c new file mode 100644 index 0000000..2168b9f --- /dev/null +++ b/kernel/main.c @@ -0,0 +1,42 @@ +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "defs.h" + +volatile static int started = 0; + +// Bootstrap processor starts running C code here. +// Allocate a real stack and switch to it, first +// doing some setup required for memory allocator to work. +void +main() +{ +  if(cpuid() == 0){ +    uartinit();      // serial port +    consoleinit(); +    printf("hart %d starting\n", cpuid()); +    kinit();         // physical page allocator +    kvminit();       // create kernel page table +    kvminithart();   // turn on paging +    procinit();      // process table +    trapinit();      // trap vectors +    trapinithart();  // install kernel trap vector +    plicinit();      // set up interrupt controller +    plicinithart();  // ask PLIC for device interrupts +    binit();         // buffer cache +    fileinit();      // file table +    ramdiskinit();   // disk +    userinit();      // first user process +    started = 1; +  } else { +    while(started == 0) +      ; +    printf("hart %d starting\n", cpuid()); +    kvminithart();    // turn on paging +    trapinithart();   // install kernel trap vector +    plicinithart();   // ask PLIC for device interrupts +  } + +  scheduler();         +} diff --git a/kernel/memlayout.h b/kernel/memlayout.h new file mode 100644 index 0000000..462986c --- /dev/null +++ b/kernel/memlayout.h @@ -0,0 +1,50 @@ +// Physical memory layout + +// qemu -machine virt is set up like this, +// based on qemu's hw/riscv/virt.c: +// +// 00001000 -- boot ROM, provided by qemu +// 02000000 -- CLINT +// 0C000000 -- PLIC +// 10000000 -- uart0 registers +// 80000000 -- boot ROM jumps here in machine mode +//             -kernel loads the kernel here +// 88000000 -- -initrd fs.img ramdisk image. +// unused RAM after 80000000. + +// the kernel uses physical memory thus: +// 80000000 -- entry.S, then kernel text and data +// end -- start of kernel page allocation area +// PHYSTOP -- end RAM used by the kernel + +// qemu puts UART registers here in physical memory. +#define UART0 0x10000000L +#define UART0_IRQ 10 + +// local interrupt controller, which contains the timer. +#define CLINT 0x2000000L +#define CLINT_MTIMECMP(hartid) (CLINT + 0x4000 + 8*(hartid)) +#define CLINT_MTIME (CLINT + 0xBFF8) + +// qemu puts programmable interrupt controller here. +#define PLIC 0x0c000000L +#define PLIC_PRIORITY (PLIC + 0x0) +#define PLIC_PENDING (PLIC + 0x1000) +#define PLIC_MENABLE(hart) (PLIC + 0x2000 + (hart)*0x100) +#define PLIC_SENABLE(hart) (PLIC + 0x2080 + (hart)*0x100) +#define PLIC_MPRIORITY(hart) (PLIC + 0x200000 + (hart)*0x2000) +#define PLIC_SPRIORITY(hart) (PLIC + 0x201000 + (hart)*0x2000) +#define PLIC_MCLAIM(hart) (PLIC + 0x200004 + (hart)*0x2000) +#define PLIC_SCLAIM(hart) (PLIC + 0x201004 + (hart)*0x2000) + +#define RAMDISK 0x88000000L + +// the kernel expects there to be RAM +// for use by the kernel and user pages +// from physical address 0x80000000 to PHYSTOP. +#define KERNBASE 0x80000000L +#define PHYSTOP (KERNBASE + 128*1024*1024) + +// map the trampoline page to the highest address, +// in both user and kernel space. +#define TRAMPOLINE (MAXVA - PGSIZE) diff --git a/kernel/param.h b/kernel/param.h new file mode 100644 index 0000000..b5fdcb2 --- /dev/null +++ b/kernel/param.h @@ -0,0 +1,13 @@ +#define NPROC        64  // maximum number of processes +#define NCPU          8  // maximum number of CPUs +#define NOFILE       16  // open files per process +#define NFILE       100  // open files per system +#define NINODE       50  // maximum number of active i-nodes +#define NDEV         10  // maximum major device number +#define ROOTDEV       1  // device number of file system root disk +#define MAXARG       32  // max exec arguments +#define MAXOPBLOCKS  10  // max # of blocks any FS op writes +#define LOGSIZE      (MAXOPBLOCKS*3)  // max data blocks in on-disk log +#define NBUF         (MAXOPBLOCKS*3)  // size of disk block cache +#define FSSIZE       1000  // size of file system in blocks +#define MAXPATH      128   // maximum file path name diff --git a/kernel/pipe.c b/kernel/pipe.c new file mode 100644 index 0000000..31bf0cc --- /dev/null +++ b/kernel/pipe.c @@ -0,0 +1,129 @@ +#include "types.h" +#include "riscv.h" +#include "defs.h" +#include "param.h" +#include "proc.h" +#include "fs.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "file.h" + +#define PIPESIZE 512 + +struct pipe { +  struct spinlock lock; +  char data[PIPESIZE]; +  uint nread;     // number of bytes read +  uint nwrite;    // number of bytes written +  int readopen;   // read fd is still open +  int writeopen;  // write fd is still open +}; + +int +pipealloc(struct file **f0, struct file **f1) +{ +  struct pipe *p; + +  p = 0; +  *f0 = *f1 = 0; +  if((*f0 = filealloc()) == 0 || (*f1 = filealloc()) == 0) +    goto bad; +  if((p = (struct pipe*)kalloc()) == 0) +    goto bad; +  p->readopen = 1; +  p->writeopen = 1; +  p->nwrite = 0; +  p->nread = 0; +  initlock(&p->lock, "pipe"); +  (*f0)->type = FD_PIPE; +  (*f0)->readable = 1; +  (*f0)->writable = 0; +  (*f0)->pipe = p; +  (*f1)->type = FD_PIPE; +  (*f1)->readable = 0; +  (*f1)->writable = 1; +  (*f1)->pipe = p; +  return 0; + +//PAGEBREAK: 20 + bad: +  if(p) +    kfree((char*)p); +  if(*f0) +    fileclose(*f0); +  if(*f1) +    fileclose(*f1); +  return -1; +} + +void +pipeclose(struct pipe *p, int writable) +{ +  acquire(&p->lock); +  if(writable){ +    p->writeopen = 0; +    wakeup(&p->nread); +  } else { +    p->readopen = 0; +    wakeup(&p->nwrite); +  } +  if(p->readopen == 0 && p->writeopen == 0){ +    release(&p->lock); +    kfree((char*)p); +  } else +    release(&p->lock); +} + +//PAGEBREAK: 40 +int +pipewrite(struct pipe *p, uint64 addr, int n) +{ +  int i; +  char ch; +  struct proc *pr = myproc(); + +  acquire(&p->lock); +  for(i = 0; i < n; i++){ +    while(p->nwrite == p->nread + PIPESIZE){  //DOC: pipewrite-full +      if(p->readopen == 0 || myproc()->killed){ +        release(&p->lock); +        return -1; +      } +      wakeup(&p->nread); +      sleep(&p->nwrite, &p->lock);  //DOC: pipewrite-sleep +    } +    if(copyin(pr->pagetable, &ch, addr + i, 1) == -1) +      break; +    p->data[p->nwrite++ % PIPESIZE] = ch; +  } +  wakeup(&p->nread);  //DOC: pipewrite-wakeup1 +  release(&p->lock); +  return n; +} + +int +piperead(struct pipe *p, uint64 addr, int n) +{ +  int i; +  struct proc *pr = myproc(); +  char ch; + +  acquire(&p->lock); +  while(p->nread == p->nwrite && p->writeopen){  //DOC: pipe-empty +    if(myproc()->killed){ +      release(&p->lock); +      return -1; +    } +    sleep(&p->nread, &p->lock); //DOC: piperead-sleep +  } +  for(i = 0; i < n; i++){  //DOC: piperead-copy +    if(p->nread == p->nwrite) +      break; +    ch = p->data[p->nread++ % PIPESIZE]; +    if(copyout(pr->pagetable, addr + i, &ch, 1) == -1) +      break; +  } +  wakeup(&p->nwrite);  //DOC: piperead-wakeup +  release(&p->lock); +  return i; +} diff --git a/kernel/plic.c b/kernel/plic.c new file mode 100644 index 0000000..0f19ab0 --- /dev/null +++ b/kernel/plic.c @@ -0,0 +1,63 @@ +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "defs.h" + +// +// the riscv Platform Level Interrupt Controller (PLIC). +// + +void +plicinit(void) +{ +  // set uart's priority to be non-zero (otherwise disabled). +  *(uint32*)(PLIC + UART0_IRQ*4) = 1; +} + +void +plicinithart(void) +{ +  int hart = cpuid(); +   +  // set uart's enable bit for this hart's S-mode.  +  //*(uint32*)(PLIC + 0x2080)= (1 << UART0_IRQ); +  *(uint32*)PLIC_SENABLE(hart)= (1 << UART0_IRQ); + +  // set this hart's S-mode priority threshold to 0. +  //*(uint32*)(PLIC + 0x201000) = 0; +  *(uint32*)PLIC_SPRIORITY(hart) = 0; +} + +// return a bitmap of which IRQs are waiting +// to be served. +uint64 +plic_pending(void) +{ +  uint64 mask; + +  //mask = *(uint32*)(PLIC + 0x1000); +  //mask |= (uint64)*(uint32*)(PLIC + 0x1004) << 32; +  mask = *(uint64*)PLIC_PENDING; + +  return mask; +} + +// ask the PLIC what interrupt we should serve. +int +plic_claim(void) +{ +  int hart = cpuid(); +  //int irq = *(uint32*)(PLIC + 0x201004); +  int irq = *(uint32*)PLIC_SCLAIM(hart); +  return irq; +} + +// tell the PLIC we've served this IRQ. +void +plic_complete(int irq) +{ +  int hart = cpuid(); +  //*(uint32*)(PLIC + 0x201004) = irq; +  *(uint32*)PLIC_SCLAIM(hart) = irq; +} diff --git a/kernel/proc.c b/kernel/proc.c new file mode 100644 index 0000000..4ae34c8 --- /dev/null +++ b/kernel/proc.c @@ -0,0 +1,591 @@ +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "proc.h" +#include "spinlock.h" +#include "defs.h" + +struct { +  struct spinlock lock; +  struct proc proc[NPROC]; +} ptable; + +struct cpu cpus[NCPU]; + +struct proc *initproc; + +int nextpid = 1; +extern void forkret(void); + +// for returning  out of the kernel +extern void sysexit(void); + +static void wakeup1(void *chan); + +extern char trampout[]; // trampoline.S + +void +procinit(void) +{ +  initlock(&ptable.lock, "ptable"); +} + +// Must be called with interrupts disabled, +// to prevent race with process being moved +// to a different CPU. +int +cpuid() +{ +  int id = r_tp(); +  return id; +} + +// Return this core's cpu struct. +// Interrupts must be disabled. +struct cpu* +mycpu(void) { +  int id = cpuid(); +  struct cpu *c = &cpus[id]; +  return c; +} + +// Return the current struct proc *. +struct proc* +myproc(void) { +  push_off(); +  struct cpu *c = mycpu(); +  struct proc *p = c->proc; +  pop_off(); +  return p; +} + +//PAGEBREAK: 32 +// Look in the process table for an UNUSED proc. +// If found, change state to EMBRYO and initialize +// state required to run in the kernel. +// Otherwise return 0. +static struct proc* +allocproc(void) +{ +  struct proc *p; + +  acquire(&ptable.lock); + +  for(p = ptable.proc; p < &ptable.proc[NPROC]; p++) +    if(p->state == UNUSED) +      goto found; + +  release(&ptable.lock); +  return 0; + +found: +  p->state = EMBRYO; +  p->pid = nextpid++; + +  release(&ptable.lock); + +  // Allocate a page for the kernel stack. +  if((p->kstack = kalloc()) == 0){ +    p->state = UNUSED; +    return 0; +  } + +  // Allocate a trapframe page. +  if((p->tf = (struct trapframe *)kalloc()) == 0){ +    p->state = UNUSED; +    return 0; +  } + +  // An empty user page table. +  p->pagetable = proc_pagetable(p); + +  // Set up new context to start executing at forkret, +  // which returns to user space. +  memset(&p->context, 0, sizeof p->context); +  p->context.ra = (uint64)forkret; +  p->context.sp = (uint64)p->kstack + PGSIZE; + +  return p; +} + +// Create a page table for a given process, +// with no users pages, but with trampoline pages. +// Called both when creating a process, and +// by exec() when building tentative new memory image, +// which might fail. +pagetable_t +proc_pagetable(struct proc *p) +{ +  pagetable_t pagetable; + +  // An empty user page table. +  pagetable = uvmcreate(); + +  // map the trampoline code (for system call return) +  // at the highest user virtual address. +  // only the supervisor uses it, on the way +  // to/from user space, so not PTE_U. +  mappages(pagetable, TRAMPOLINE, PGSIZE, +           (uint64)trampout, PTE_R | PTE_X); + +  // map the trapframe, for trampoline.S. +  mappages(pagetable, (TRAMPOLINE - PGSIZE), PGSIZE, +           (uint64)(p->tf), PTE_R | PTE_W); + +  return pagetable; +} + +// Free a process's page table, and free the +// physical memory the page table refers to. +// Called both when a process exits and from +// exec() if it fails. +void +proc_freepagetable(pagetable_t pagetable, uint64 sz) +{ +  unmappages(pagetable, TRAMPOLINE, PGSIZE, 0); +  unmappages(pagetable, TRAMPOLINE-PGSIZE, PGSIZE, 0); +  uvmfree(pagetable, sz); +} + +// a user program that calls exec("/init") +// od -t xC initcode +uchar initcode[] = { +  0x17, 0x05, 0x00, 0x00, 0x13, 0x05, 0x05, 0x02, 0x97, 0x05, 0x00, 0x00, 0x93, 0x85, 0x05, 0x02, +  0x9d, 0x48, 0x73, 0x00, 0x00, 0x00, 0x89, 0x48, 0x73, 0x00, 0x00, 0x00, 0xef, 0xf0, 0xbf, 0xff, +  0x2f, 0x69, 0x6e, 0x69, 0x74, 0x00, 0x00, 0x01, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +  0x00, 0x00, 0x00 +}; + +//PAGEBREAK: 32 +// Set up first user process. +void +userinit(void) +{ +  struct proc *p; + +  p = allocproc(); +  initproc = p; +   +  uvminit(p->pagetable, initcode, sizeof(initcode)); +  p->sz = PGSIZE; + +  // prepare for the very first kernel->user. +  p->tf->epc = 0; +  p->tf->sp = PGSIZE; + +  safestrcpy(p->name, "initcode", sizeof(p->name)); +  p->cwd = namei("/"); + +  // this assignment to p->state lets other cores +  // run this process. the acquire forces the above +  // writes to be visible, and the lock is also needed +  // because the assignment might not be atomic. +  acquire(&ptable.lock); + +  p->state = RUNNABLE; + +  release(&ptable.lock); +} + +// Grow current process's memory by n bytes. +// Return 0 on success, -1 on failure. +int +growproc(int n) +{ +  uint sz; +  struct proc *p = myproc(); + +  sz = p->sz; +  if(n > 0){ +    if((sz = uvmalloc(p->pagetable, sz, sz + n)) == 0) +      return -1; +  } else if(n < 0){ +    if((sz = uvmdealloc(p->pagetable, sz, sz + n)) == 0) +      return -1; +  } +  p->sz = sz; +  return 0; +} + +// Create a new process, copying p as the parent. +// Sets up child kernel stack to return as if from system call. +int +fork(void) +{ +  int i, pid; +  struct proc *np; +  struct proc *p = myproc(); + +  // Allocate process. +  if((np = allocproc()) == 0){ +    return -1; +  } + +  // Copy user memory from parent to child. +  uvmcopy(p->pagetable, np->pagetable, p->sz); +  np->sz = p->sz; + +  np->parent = p; + +  // copy saved user registers. +  *(np->tf) = *(p->tf); + +  // Cause fork to return 0 in the child. +  np->tf->a0 = 0; + +  // increment reference counts on open file descriptors. +  for(i = 0; i < NOFILE; i++) +    if(p->ofile[i]) +      np->ofile[i] = filedup(p->ofile[i]); +  np->cwd = idup(p->cwd); + +  safestrcpy(np->name, p->name, sizeof(p->name)); + +  pid = np->pid; + +  acquire(&ptable.lock); + +  np->state = RUNNABLE; + +  release(&ptable.lock); + +  return pid; +} + +// Exit the current process.  Does not return. +// An exited process remains in the zombie state +// until its parent calls wait(). +void +exit(void) +{ +  struct proc *p = myproc(); +  struct proc *pp; +  int fd; + +  if(p == initproc) +    panic("init exiting"); + +  // Close all open files. +  for(fd = 0; fd < NOFILE; fd++){ +    if(p->ofile[fd]){ +      fileclose(p->ofile[fd]); +      p->ofile[fd] = 0; +    } +  } + +  begin_op(); +  iput(p->cwd); +  end_op(); +  p->cwd = 0; + +  acquire(&ptable.lock); + +  // Parent might be sleeping in wait(). +  wakeup1(p->parent); + +  // Pass abandoned children to init. +  for(pp = ptable.proc; pp < &ptable.proc[NPROC]; pp++){ +    if(pp->parent == p){ +      pp->parent = initproc; +      if(pp->state == ZOMBIE) +        wakeup1(initproc); +    } +  } + +  // Jump into the scheduler, never to return. +  p->state = ZOMBIE; +  sched(); +  panic("zombie exit"); +} + +// Wait for a child process to exit and return its pid. +// Return -1 if this process has no children. +int +wait(void) +{ +  struct proc *np; +  int havekids, pid; +  struct proc *p = myproc(); +   +  acquire(&ptable.lock); +  for(;;){ +    // Scan through table looking for exited children. +    havekids = 0; +    for(np = ptable.proc; np < &ptable.proc[NPROC]; np++){ +      if(np->parent != p) +        continue; +      havekids = 1; +      if(np->state == ZOMBIE){ +        // Found one. +        pid = np->pid; +        kfree(np->kstack); +        np->kstack = 0; +        kfree((void*)np->tf); +        np->tf = 0; +        proc_freepagetable(np->pagetable, np->sz); +        np->pagetable = 0; +        np->pid = 0; +        np->parent = 0; +        np->name[0] = 0; +        np->killed = 0; +        np->state = UNUSED; +        release(&ptable.lock); +        return pid; +      } +    } + +    // No point waiting if we don't have any children. +    if(!havekids || p->killed){ +      release(&ptable.lock); +      return -1; +    } + +    // Wait for children to exit.  (See wakeup1 call in proc_exit.) +    sleep(p, &ptable.lock);  //DOC: wait-sleep +  } +} + +//PAGEBREAK: 42 +// Per-CPU process scheduler. +// Each CPU calls scheduler() after setting itself up. +// Scheduler never returns.  It loops, doing: +//  - choose a process to run +//  - swtch to start running that process +//  - eventually that process transfers control +//      via swtch back to the scheduler. +void +scheduler(void) +{ +  struct proc *p; +  struct cpu *c = mycpu(); + +  c->proc = 0; +  for(;;){ +    // Enable interrupts on this processor. +    intr_on(); + +    // Loop over process table looking for process to run. +    acquire(&ptable.lock); +    for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){ +      if(p->state != RUNNABLE) +        continue; + +      // Switch to chosen process.  It is the process's job +      // to release ptable.lock and then reacquire it +      // before jumping back to us. +      c->proc = p; +      p->state = RUNNING; + +      swtch(&c->scheduler, &p->context); + +      // Process is done running for now. +      // It should have changed its p->state before coming back. +      c->proc = 0; +    } +    release(&ptable.lock); +  } +} + +// Enter scheduler.  Must hold only ptable.lock +// and have changed proc->state. Saves and restores +// intena because intena is a property of this +// kernel thread, not this CPU. It should +// be proc->intena and proc->noff, but that would +// break in the few places where a lock is held but +// there's no process. +void +sched(void) +{ +  int intena; +  struct proc *p = myproc(); + +  if(!holding(&ptable.lock)) +    panic("sched ptable.lock"); +  if(mycpu()->noff != 1) +    panic("sched locks"); +  if(p->state == RUNNING) +    panic("sched running"); +  if(intr_get()) +    panic("sched interruptible"); + +  intena = mycpu()->intena; +  swtch(&p->context, &mycpu()->scheduler); +  mycpu()->intena = intena; +} + +// Give up the CPU for one scheduling round. +void +yield(void) +{ +  acquire(&ptable.lock);  //DOC: yieldlock +  myproc()->state = RUNNABLE; +  sched(); +  release(&ptable.lock); +} + +// A fork child's very first scheduling by scheduler() +// will swtch to forkret. +void +forkret(void) +{ +  static int first = 1; + +  // Still holding ptable.lock from scheduler. +  release(&ptable.lock); + +  if (first) { +    // Some initialization functions must be run in the context +    // of a regular process (e.g., they call sleep), and thus cannot +    // be run from main(). +    first = 0; +    iinit(ROOTDEV); +    initlog(ROOTDEV); +  } + +  usertrapret(); +} + +// Atomically release lock and sleep on chan. +// Reacquires lock when awakened. +void +sleep(void *chan, struct spinlock *lk) +{ +  struct proc *p = myproc(); +   +  if(p == 0) +    panic("sleep"); + +  if(lk == 0) +    panic("sleep without lk"); + +  // Must acquire ptable.lock in order to +  // change p->state and then call sched. +  // Once we hold ptable.lock, we can be +  // guaranteed that we won't miss any wakeup +  // (wakeup runs with ptable.lock locked), +  // so it's okay to release lk. +  if(lk != &ptable.lock){  //DOC: sleeplock0 +    acquire(&ptable.lock);  //DOC: sleeplock1 +    release(lk); +  } +  // Go to sleep. +  p->chan = chan; +  p->state = SLEEPING; + +  sched(); + +  // Tidy up. +  p->chan = 0; + +  // Reacquire original lock. +  if(lk != &ptable.lock){  //DOC: sleeplock2 +    release(&ptable.lock); +    acquire(lk); +  } +} + +//PAGEBREAK! +// Wake up all processes sleeping on chan. +// The ptable lock must be held. +static void +wakeup1(void *chan) +{ +  struct proc *p; + +  for(p = ptable.proc; p < &ptable.proc[NPROC]; p++) +    if(p->state == SLEEPING && p->chan == chan) +      p->state = RUNNABLE; +} + +// Wake up all processes sleeping on chan. +void +wakeup(void *chan) +{ +  acquire(&ptable.lock); +  wakeup1(chan); +  release(&ptable.lock); +} + +// Kill the process with the given pid. +// Process won't exit until it returns +// to user space (see trap in trap.c). +int +kill(int pid) +{ +  struct proc *p; + +  acquire(&ptable.lock); +  for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){ +    if(p->pid == pid){ +      p->killed = 1; +      // Wake process from sleep if necessary. +      if(p->state == SLEEPING) +        p->state = RUNNABLE; +      release(&ptable.lock); +      return 0; +    } +  } +  release(&ptable.lock); +  return -1; +} + +// Copy to either a user address, or kernel address, +// depending on usr_dst. +// Returns 0 on success, -1 on error. +int +either_copyout(int user_dst, uint64 dst, void *src, uint64 len) +{ +  struct proc *p = myproc(); +  if(user_dst){ +    return copyout(p->pagetable, dst, src, len); +  } else { +    memmove((char *)dst, src, len); +    return 0; +  } +} + +// Copy from either a user address, or kernel address, +// depending on usr_src. +// Returns 0 on success, -1 on error. +int +either_copyin(void *dst, int user_src, uint64 src, uint64 len) +{ +  struct proc *p = myproc(); +  if(user_src){ +    return copyin(p->pagetable, dst, src, len); +  } else { +    memmove(dst, (char*)src, len); +    return 0; +  } +} + +// Print a process listing to console.  For debugging. +// Runs when user types ^P on console. +// No lock to avoid wedging a stuck machine further. +void +procdump(void) +{ +  static char *states[] = { +  [UNUSED]    "unused", +  [EMBRYO]    "embryo", +  [SLEEPING]  "sleep ", +  [RUNNABLE]  "runble", +  [RUNNING]   "run   ", +  [ZOMBIE]    "zombie" +  }; +  struct proc *p; +  char *state; + +  for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){ +    if(p->state == UNUSED) +      continue; +    if(p->state >= 0 && p->state < NELEM(states) && states[p->state]) +      state = states[p->state]; +    else +      state = "???"; +    printf("%d %s %s", p->pid, state, p->name); +    printf("\n"); +  } +} + diff --git a/kernel/proc.h b/kernel/proc.h new file mode 100644 index 0000000..278e4cd --- /dev/null +++ b/kernel/proc.h @@ -0,0 +1,104 @@ +// Saved registers for kernel context switches. +struct context { +  uint64 ra; +  uint64 sp; + +  // callee-saved +  uint64 s0; +  uint64 s1; +  uint64 s2; +  uint64 s3; +  uint64 s4; +  uint64 s5; +  uint64 s6; +  uint64 s7; +  uint64 s8; +  uint64 s9; +  uint64 s10; +  uint64 s11; +}; + +// Per-CPU state +struct cpu { +  struct proc *proc;           // The process running on this cpu or null +  struct context scheduler;   // swtch() here to enter scheduler +  int noff;                    // Depth of push_off() nesting. +  int intena;                  // Were interrupts enabled before push_off()? +}; + +extern struct cpu cpus[NCPU]; + +//PAGEBREAK: 17 + +// per-process data for the early trap handling code in trampoline.S. +// sits in a page by itself just under the trampoline page in the +// user page table. not specially mapped in the kernel page table. +// the sscratch register points here. +// trampoline.S saves user registers, then restores kernel_sp and +// kernel_satp. +// includes callee-saved registers like s0-s11 because the +// return-to-user path via usertrapret() doesn't return through +// the entire kernel call stack. +struct trapframe { +  /*   0 */ uint64 kernel_satp; +  /*   8 */ uint64 kernel_sp; +  /*  16 */ uint64 kernel_trap; // usertrap() +  /*  24 */ uint64 epc; // saved user program counter +  /*  32 */ uint64 hartid; +  /*  40 */ uint64 ra; +  /*  48 */ uint64 sp; +  /*  56 */ uint64 gp; +  /*  64 */ uint64 tp; +  /*  72 */ uint64 t0; +  /*  80 */ uint64 t1; +  /*  88 */ uint64 t2; +  /*  96 */ uint64 s0; +  /* 104 */ uint64 s1; +  /* 112 */ uint64 a0; +  /* 120 */ uint64 a1; +  /* 128 */ uint64 a2; +  /* 136 */ uint64 a3; +  /* 144 */ uint64 a4; +  /* 152 */ uint64 a5; +  /* 160 */ uint64 a6; +  /* 168 */ uint64 a7; +  /* 176 */ uint64 s2; +  /* 184 */ uint64 s3; +  /* 192 */ uint64 s4; +  /* 200 */ uint64 s5; +  /* 208 */ uint64 s6; +  /* 216 */ uint64 s7; +  /* 224 */ uint64 s8; +  /* 232 */ uint64 s9; +  /* 240 */ uint64 s10; +  /* 248 */ uint64 s11; +  /* 256 */ uint64 t3; +  /* 264 */ uint64 t4; +  /* 272 */ uint64 t5; +  /* 280 */ uint64 t6; +}; + +enum procstate { UNUSED, EMBRYO, SLEEPING, RUNNABLE, RUNNING, ZOMBIE }; + +// Per-process state +struct proc { +  char *kstack;                // Bottom of kernel stack for this process +  uint64 sz;                   // Size of process memory (bytes) +  pagetable_t pagetable;       // Page table +  enum procstate state;        // Process state +  int pid;                     // Process ID +  struct proc *parent;         // Parent process +  struct trapframe *tf;        // data page for trampoline.S +  struct context context;      // swtch() here to run process +  void *chan;                  // If non-zero, sleeping on chan +  int killed;                  // If non-zero, have been killed +  struct file *ofile[NOFILE];  // Open files +  struct inode *cwd;           // Current directory +  char name[16];               // Process name (debugging) +}; + +// Process memory is laid out contiguously, low addresses first: +//   text +//   original data and bss +//   fixed-size stack +//   expandable heap diff --git a/kernel/ramdisk.c b/kernel/ramdisk.c new file mode 100644 index 0000000..9901294 --- /dev/null +++ b/kernel/ramdisk.c @@ -0,0 +1,45 @@ +// +// ramdisk that uses the disk image loaded by qemu -rdinit fs.img +// + +#include "types.h" +#include "riscv.h" +#include "defs.h" +#include "param.h" +#include "memlayout.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "fs.h" +#include "buf.h" + +void +ramdiskinit(void) +{ +} + +// If B_DIRTY is set, write buf to disk, clear B_DIRTY, set B_VALID. +// Else if B_VALID is not set, read buf from disk, set B_VALID. +void +ramdiskrw(struct buf *b) +{ +  if(!holdingsleep(&b->lock)) +    panic("ramdiskrw: buf not locked"); +  if((b->flags & (B_VALID|B_DIRTY)) == B_VALID) +    panic("ramdiskrw: nothing to do"); + +  if(b->blockno >= FSSIZE) +    panic("ramdiskrw: blockno too big"); + +  uint64 diskaddr = b->blockno * BSIZE; +  char *addr = (char *)RAMDISK + diskaddr; + +  if(b->flags & B_DIRTY){ +    // write +    memmove(addr, b->data, BSIZE); +    b->flags &= ~B_DIRTY; +  } else { +    // read +    memmove(b->data, addr, BSIZE); +    b->flags |= B_VALID; +  } +} diff --git a/kernel/riscv.h b/kernel/riscv.h new file mode 100644 index 0000000..c3371a4 --- /dev/null +++ b/kernel/riscv.h @@ -0,0 +1,338 @@ +// which hart (core) is this? +static inline uint64 +r_mhartid() +{ +  uint64 x; +  asm volatile("csrr %0, mhartid" : "=r" (x) ); +  return x; +} + +// Machine Status Register, mstatus + +#define MSTATUS_MPP_MASK (3L << 11) +#define MSTATUS_MPP_M (3L << 11) +#define MSTATUS_MPP_S (1L << 11) +#define MSTATUS_MPP_U (0L << 11) +#define MSTATUS_MIE (1L << 3) + +static inline uint64 +r_mstatus() +{ +  uint64 x; +  asm volatile("csrr %0, mstatus" : "=r" (x) ); +  return x; +} + +static inline void  +w_mstatus(uint64 x) +{ +  asm volatile("csrw mstatus, %0" : : "r" (x)); +} + +// machine exception program counter, holds the +// instruction address to which a return from +// exception will go. +static inline void  +w_mepc(uint64 x) +{ +  asm volatile("csrw mepc, %0" : : "r" (x)); +} + +// Supervisor Status Register, sstatus + +#define SSTATUS_SPP (1L << 8)  // Previous mode, 1=Supervisor, 0=User +#define SSTATUS_SPIE (1L << 5) // Supervisor Previous Interrupt Enable +#define SSTATUS_UPIE (1L << 4) // User Previous Interrupt Enable +#define SSTATUS_SIE (1L << 1)  // Supervisor Interrupt Enable +#define SSTATUS_UIE (1L << 0)  // User Interrupt Enable + +static inline uint64 +r_sstatus() +{ +  uint64 x; +  asm volatile("csrr %0, sstatus" : "=r" (x) ); +  return x; +} + +static inline void  +w_sstatus(uint64 x) +{ +  asm volatile("csrw sstatus, %0" : : "r" (x)); +} + +// Supervisor Interrupt Pending +static inline uint64 +r_sip() +{ +  uint64 x; +  asm volatile("csrr %0, sip" : "=r" (x) ); +  return x; +} + +static inline void  +w_sip(uint64 x) +{ +  asm volatile("csrw sip, %0" : : "r" (x)); +} + +// Supervisor Interrupt Enable +#define SIE_SEIE (1L << 9) // external +#define SIE_STIE (1L << 5) // timer +#define SIE_SSIE (1L << 1) // software +static inline uint64 +r_sie() +{ +  uint64 x; +  asm volatile("csrr %0, sie" : "=r" (x) ); +  return x; +} + +static inline void  +w_sie(uint64 x) +{ +  asm volatile("csrw sie, %0" : : "r" (x)); +} + +// Machine-mode Interrupt Enable +#define MIE_MEIE (1L << 11) // external +#define MIE_MTIE (1L << 7) // timer +#define MIE_MSIE (1L << 3) // software +static inline uint64 +r_mie() +{ +  uint64 x; +  asm volatile("csrr %0, mie" : "=r" (x) ); +  return x; +} + +static inline void  +w_mie(uint64 x) +{ +  asm volatile("csrw mie, %0" : : "r" (x)); +} + +// machine exception program counter, holds the +// instruction address to which a return from +// exception will go. +static inline void  +w_sepc(uint64 x) +{ +  asm volatile("csrw sepc, %0" : : "r" (x)); +} + +static inline uint64 +r_sepc() +{ +  uint64 x; +  asm volatile("csrr %0, sepc" : "=r" (x) ); +  return x; +} + +// Machine Exception Delegation +static inline uint64 +r_medeleg() +{ +  uint64 x; +  asm volatile("csrr %0, medeleg" : "=r" (x) ); +  return x; +} + +static inline void  +w_medeleg(uint64 x) +{ +  asm volatile("csrw medeleg, %0" : : "r" (x)); +} + +// Machine Interrupt Delegation +static inline uint64 +r_mideleg() +{ +  uint64 x; +  asm volatile("csrr %0, mideleg" : "=r" (x) ); +  return x; +} + +static inline void  +w_mideleg(uint64 x) +{ +  asm volatile("csrw mideleg, %0" : : "r" (x)); +} + +// Supervisor Trap-Vector Base Address +// low two bits are mode. +static inline void  +w_stvec(uint64 x) +{ +  asm volatile("csrw stvec, %0" : : "r" (x)); +} + +static inline uint64 +r_stvec() +{ +  uint64 x; +  asm volatile("csrr %0, stvec" : "=r" (x) ); +  return x; +} + +// Machine-mode interrupt vector +static inline void  +w_mtvec(uint64 x) +{ +  asm volatile("csrw mtvec, %0" : : "r" (x)); +} + +// use riscv's sv39 page table scheme. +#define SATP_SV39 (8L << 60) + +#define MAKE_SATP(pagetable) (SATP_SV39 | (((uint64)pagetable) >> 12)) + +// supervisor address translation and protection; +// holds the address of the page table. +static inline void  +w_satp(uint64 x) +{ +  asm volatile("csrw satp, %0" : : "r" (x)); +} + +static inline uint64 +r_satp() +{ +  uint64 x; +  asm volatile("csrr %0, satp" : "=r" (x) ); +  return x; +} + +// Supervisor Scratch register, for early trap handler in trampoline.S. +static inline void  +w_sscratch(uint64 x) +{ +  asm volatile("csrw sscratch, %0" : : "r" (x)); +} + +static inline void  +w_mscratch(uint64 x) +{ +  asm volatile("csrw mscratch, %0" : : "r" (x)); +} + +// Supervisor Trap Cause +static inline uint64 +r_scause() +{ +  uint64 x; +  asm volatile("csrr %0, scause" : "=r" (x) ); +  return x; +} + +// Supervisor Trap Value +static inline uint64 +r_stval() +{ +  uint64 x; +  asm volatile("csrr %0, stval" : "=r" (x) ); +  return x; +} + +// Machine-mode Counter-Enable +static inline void  +w_mcounteren(uint64 x) +{ +  asm volatile("csrw mcounteren, %0" : : "r" (x)); +} + +static inline uint64 +r_mcounteren() +{ +  uint64 x; +  asm volatile("csrr %0, mcounteren" : "=r" (x) ); +  return x; +} + +// machine-mode cycle counter +static inline uint64 +r_time() +{ +  uint64 x; +  asm volatile("csrr %0, time" : "=r" (x) ); +  return x; +} + +// enable interrupts +static inline void +intr_on() +{ +  w_sie(r_sie() | SIE_SEIE | SIE_STIE | SIE_SSIE); +  w_sstatus(r_sstatus() | SSTATUS_SIE); +} + +// disable interrupts +static inline void +intr_off() +{ +  w_sstatus(r_sstatus() & ~SSTATUS_SIE); +} + +// are interrupts enabled? +static inline int +intr_get() +{ +  uint64 x = r_sstatus(); +  return (x & SSTATUS_SIE) != 0; +} + +static inline uint64 +r_sp() +{ +  uint64 x; +  asm volatile("mv %0, sp" : "=r" (x) ); +  return x; +} + +// read and write tp, the thread pointer, which holds +// this core's hartid (core number), the index into cpus[]. +static inline uint64 +r_tp() +{ +  uint64 x; +  asm volatile("mv %0, tp" : "=r" (x) ); +  return x; +} + +static inline void  +w_tp(uint64 x) +{ +  asm volatile("mv tp, %0" : : "r" (x)); +} + +#define PGSIZE 4096 // bytes per page +#define PGSHIFT 12  // bits of offset within a page + +#define PGROUNDUP(sz)  (((sz)+PGSIZE-1) & ~(PGSIZE-1)) +#define PGROUNDDOWN(a) (((a)) & ~(PGSIZE-1)) + +#define PTE_V (1L << 0) // valid +#define PTE_R (1L << 1) +#define PTE_W (1L << 2) +#define PTE_X (1L << 3) +#define PTE_U (1L << 4) // 1 -> user can access + +// shift a physical address to the right place for a PTE. +#define PA2PTE(pa) ((((uint64)pa) >> 12) << 10) + +#define PTE2PA(pte) (((pte) >> 10) << 12) + +#define PTE_FLAGS(pte) ((pte) & (PTE_V|PTE_R|PTE_W|PTE_X|PTE_U)) + +// extract the three 9-bit page table indices from a virtual address. +#define PXMASK          0x1FF // 9 bits +#define PXSHIFT(level)  (PGSHIFT+(9*(level))) +#define PX(level, va) ((((uint64) (va)) >> PXSHIFT(level)) & PXMASK) + +// one beyond the highest possible virtual address. +// MAXVA is actually one bit less than the max allowed by +// Sv39, to avoid having to sign-extend virtual addresses +// that have the high bit set. +#define MAXVA (1L << (9 + 9 + 9 + 12 - 1)) + +typedef uint64 pte_t; +typedef uint64 *pagetable_t; // 512 PTEs diff --git a/kernel/sleeplock.c b/kernel/sleeplock.c new file mode 100644 index 0000000..b490370 --- /dev/null +++ b/kernel/sleeplock.c @@ -0,0 +1,55 @@ +// Sleeping locks + +#include "types.h" +#include "riscv.h" +#include "defs.h" +#include "param.h" +#include "memlayout.h" +#include "proc.h" +#include "spinlock.h" +#include "sleeplock.h" + +void +initsleeplock(struct sleeplock *lk, char *name) +{ +  initlock(&lk->lk, "sleep lock"); +  lk->name = name; +  lk->locked = 0; +  lk->pid = 0; +} + +void +acquiresleep(struct sleeplock *lk) +{ +  acquire(&lk->lk); +  while (lk->locked) { +    sleep(lk, &lk->lk); +  } +  lk->locked = 1; +  lk->pid = myproc()->pid; +  release(&lk->lk); +} + +void +releasesleep(struct sleeplock *lk) +{ +  acquire(&lk->lk); +  lk->locked = 0; +  lk->pid = 0; +  wakeup(lk); +  release(&lk->lk); +} + +int +holdingsleep(struct sleeplock *lk) +{ +  int r; +   +  acquire(&lk->lk); +  r = lk->locked && (lk->pid == myproc()->pid); +  release(&lk->lk); +  return r; +} + + + diff --git a/kernel/sleeplock.h b/kernel/sleeplock.h new file mode 100644 index 0000000..110e6f3 --- /dev/null +++ b/kernel/sleeplock.h @@ -0,0 +1,10 @@ +// Long-term locks for processes +struct sleeplock { +  uint locked;       // Is the lock held? +  struct spinlock lk; // spinlock protecting this sleep lock +   +  // For debugging: +  char *name;        // Name of lock. +  int pid;           // Process holding lock +}; + diff --git a/kernel/spinlock.c b/kernel/spinlock.c new file mode 100644 index 0000000..bbb7cb5 --- /dev/null +++ b/kernel/spinlock.c @@ -0,0 +1,110 @@ +// Mutual exclusion spin locks. + +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "spinlock.h" +#include "riscv.h" +#include "proc.h" +#include "defs.h" + +void +initlock(struct spinlock *lk, char *name) +{ +  lk->name = name; +  lk->locked = 0; +  lk->cpu = 0; +} + +// Acquire the lock. +// Loops (spins) until the lock is acquired. +// Holding a lock for a long time may cause +// other CPUs to waste time spinning to acquire it. +void +acquire(struct spinlock *lk) +{ +  push_off(); // disable interrupts to avoid deadlock. +  if(holding(lk)) +    panic("acquire"); + +  // The xchg is atomic. +  //while(xchg(&lk->locked, 1) != 0) +  //  ; +  while(__sync_lock_test_and_set(&lk->locked, 1) != 0) +    ; + +  // Tell the C compiler and the processor to not move loads or stores +  // past this point, to ensure that the critical section's memory +  // references happen after the lock is acquired. +  __sync_synchronize(); + +  // Record info about lock acquisition for holding() and debugging. +  lk->cpu = mycpu(); +} + +// Release the lock. +void +release(struct spinlock *lk) +{ +  if(!holding(lk)) +    panic("release"); + +  lk->cpu = 0; + +  // Tell the C compiler and the processor to not move loads or stores +  // past this point, to ensure that all the stores in the critical +  // section are visible to other cores before the lock is released. +  // Both the C compiler and the hardware may re-order loads and +  // stores; __sync_synchronize() tells them both not to. +  // On RISC-V, this turns into a fence instruction. +  __sync_synchronize(); + +  // Release the lock, equivalent to lk->locked = 0. +  // This code can't use a C assignment, since it might +  // not be atomic. A real OS would use C atomics here. +  // On RISC-V, use an amoswap instruction. +  //asm volatile("movl $0, %0" : "+m" (lk->locked) : ); +  __sync_lock_release(&lk->locked); + +  pop_off(); +} + +// Check whether this cpu is holding the lock. +int +holding(struct spinlock *lk) +{ +  int r; +  push_off(); +  r = (lk->locked && lk->cpu == mycpu()); +  pop_off(); +  return r; +} + +// push_off/pop_off are like intr_off()/intr_on() except that they are matched: +// it takes two pop_off to undo two push_off.  Also, if interrupts +// are initially off, then push_off, pop_off leaves them off. + +void +push_off(void) +{ +  struct cpu *c = mycpu(); +  int old = intr_get(); + +  intr_off(); +  if(c->noff == 0) +    c->intena = old; +  c->noff += 1; +} + +void +pop_off(void) +{ +  struct cpu *c = mycpu(); +  if(intr_get()) +    panic("pop_off - interruptible"); +  c->noff -= 1; +  if(c->noff < 0) +    panic("pop_off"); +  if(c->noff == 0 && c->intena) +    intr_on(); +} diff --git a/kernel/spinlock.h b/kernel/spinlock.h new file mode 100644 index 0000000..4392820 --- /dev/null +++ b/kernel/spinlock.h @@ -0,0 +1,9 @@ +// Mutual exclusion lock. +struct spinlock { +  uint locked;       // Is the lock held? + +  // For debugging: +  char *name;        // Name of lock. +  struct cpu *cpu;   // The cpu holding the lock. +}; + diff --git a/kernel/start.c b/kernel/start.c new file mode 100644 index 0000000..c4689dc --- /dev/null +++ b/kernel/start.c @@ -0,0 +1,56 @@ +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "defs.h" + +void main(); + +// entry.S needs one stack per CPU. +__attribute__ ((aligned (16))) char stack0[4096 * NCPU]; + +// scratch area for timer interrupt, one per CPU. +uint64 mscratch0[NCPU * 32]; + +// assembly code in kernelvec for machine-mode timer interrupt. +extern void machinevec(); + +// entry.S jumps here in machine mode on stack0. +void +mstart() +{ +  // set M Previous Privilege mode to Supervisor, for mret. +  unsigned long x = r_mstatus(); +  x &= ~MSTATUS_MPP_MASK; +  x |= MSTATUS_MPP_S; +  w_mstatus(x); + +  // set M Exception Program Counter to main, for mret. +  // requires gcc -mcmodel=medany +  w_mepc((uint64)main); + +  // disable paging for now. +  w_satp(0); + +  // delegate all interrupts and exceptions to supervisor mode. +  w_medeleg(0xffff); +  w_mideleg(0xffff); + +  // set up to receive timer interrupts in machine mode, +  // for pre-emptive switching and (on hart 0) to drive time. +  int id = r_mhartid(); +  uint64 *scratch = &mscratch0[32 * id]; +  *(uint64*)CLINT_MTIMECMP(id) = *(uint64*)CLINT_MTIME + 10000; +  scratch[4] = CLINT_MTIMECMP(id); +  scratch[5] = 10000000; +  w_mscratch((uint64)scratch); +  w_mtvec((uint64)machinevec); +  w_mstatus(r_mstatus() | MSTATUS_MIE); +  w_mie(r_mie() |  MIE_MTIE); + +  // keep each CPU's hartid in its tp register, for cpuid(). +  w_tp(id); + +  // call main() in supervisor mode. +  asm volatile("mret"); +} diff --git a/kernel/stat.h b/kernel/stat.h new file mode 100644 index 0000000..8a80933 --- /dev/null +++ b/kernel/stat.h @@ -0,0 +1,11 @@ +#define T_DIR  1   // Directory +#define T_FILE 2   // File +#define T_DEV  3   // Device + +struct stat { +  short type;  // Type of file +  int dev;     // File system's disk device +  uint ino;    // Inode number +  short nlink; // Number of links to file +  uint size;   // Size of file in bytes +}; diff --git a/kernel/string.c b/kernel/string.c new file mode 100644 index 0000000..d99e612 --- /dev/null +++ b/kernel/string.c @@ -0,0 +1,104 @@ +#include "types.h" + +void* +memset(void *dst, int c, uint n) +{ +  char *cdst = (char *) dst; +  int i; +  for(i = 0; i < n; i++){ +    cdst[i] = c; +  } +  return dst; +} + +int +memcmp(const void *v1, const void *v2, uint n) +{ +  const uchar *s1, *s2; + +  s1 = v1; +  s2 = v2; +  while(n-- > 0){ +    if(*s1 != *s2) +      return *s1 - *s2; +    s1++, s2++; +  } + +  return 0; +} + +void* +memmove(void *dst, const void *src, uint n) +{ +  const char *s; +  char *d; + +  s = src; +  d = dst; +  if(s < d && s + n > d){ +    s += n; +    d += n; +    while(n-- > 0) +      *--d = *--s; +  } else +    while(n-- > 0) +      *d++ = *s++; + +  return dst; +} + +// memcpy exists to placate GCC.  Use memmove. +void* +memcpy(void *dst, const void *src, uint n) +{ +  return memmove(dst, src, n); +} + +int +strncmp(const char *p, const char *q, uint n) +{ +  while(n > 0 && *p && *p == *q) +    n--, p++, q++; +  if(n == 0) +    return 0; +  return (uchar)*p - (uchar)*q; +} + +char* +strncpy(char *s, const char *t, int n) +{ +  char *os; + +  os = s; +  while(n-- > 0 && (*s++ = *t++) != 0) +    ; +  while(n-- > 0) +    *s++ = 0; +  return os; +} + +// Like strncpy but guaranteed to NUL-terminate. +char* +safestrcpy(char *s, const char *t, int n) +{ +  char *os; + +  os = s; +  if(n <= 0) +    return os; +  while(--n > 0 && (*s++ = *t++) != 0) +    ; +  *s = 0; +  return os; +} + +int +strlen(const char *s) +{ +  int n; + +  for(n = 0; s[n]; n++) +    ; +  return n; +} + diff --git a/kernel/swtch.S b/kernel/swtch.S new file mode 100644 index 0000000..17a8663 --- /dev/null +++ b/kernel/swtch.S @@ -0,0 +1,42 @@ +# Context switch +# +#   void swtch(struct context *old, struct context *new); +#  +# Save current registers in old. Load from new.	 + + +.globl swtch +swtch: +        sd ra, 0(a0) +        sd sp, 8(a0) +        sd s0, 16(a0) +        sd s1, 24(a0) +        sd s2, 32(a0) +        sd s3, 40(a0) +        sd s4, 48(a0) +        sd s5, 56(a0) +        sd s6, 64(a0) +        sd s7, 72(a0) +        sd s8, 80(a0) +        sd s9, 88(a0) +        sd s10, 96(a0) +        sd s11, 104(a0) + +        ld ra, 0(a1) +        ld sp, 8(a1) +        ld s0, 16(a1) +        ld s1, 24(a1) +        ld s2, 32(a1) +        ld s3, 40(a1) +        ld s4, 48(a1) +        ld s5, 56(a1) +        ld s6, 64(a1) +        ld s7, 72(a1) +        ld s8, 80(a1) +        ld s9, 88(a1) +        ld s10, 96(a1) +        ld s11, 104(a1) +         +        ret + +	 diff --git a/kernel/syscall.c b/kernel/syscall.c new file mode 100644 index 0000000..ca34f2c --- /dev/null +++ b/kernel/syscall.c @@ -0,0 +1,191 @@ +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "proc.h" +#include "syscall.h" +#include "defs.h" + +// User code makes a system call with INT T_SYSCALL. +// System call number in %eax. +// Arguments on the stack, from the user call to the C +// library system call function. The saved user %esp points +// to a saved program counter, and then the first argument. + +// Fetch the int at addr from the current process. +int +fetchint(uint64 addr, int *ip) +{ +  struct proc *p = myproc(); + +  if(addr >= p->sz || addr+4 > p->sz) +    return -1; +  if(copyin(p->pagetable, (char *)ip, addr, sizeof(*ip)) != 0) +    return -1; +  return 0; +} + +// Fetch the uint64 at addr from the current process. +int +fetchaddr(uint64 addr, uint64 *ip) +{ +  struct proc *p = myproc(); +  if(addr >= p->sz || addr+sizeof(uint64) > p->sz) +    return -1; +  if(copyin(p->pagetable, (char *)ip, addr, sizeof(*ip)) != 0) +    return -1; +  return 0; +} + +// Fetch the nul-terminated string at addr from the current process. +// Doesn't actually copy the string - just sets *pp to point at it. +// Returns length of string, not including nul, or -1 for error. +int +fetchstr(uint64 addr, char *buf, int max) +{ +  struct proc *p = myproc(); +  int err = copyinstr(p->pagetable, buf, addr, max); +  if(err < 0) +    return err; +  return strlen(buf); +} + +static uint64 +fetcharg(int n) +{ +  struct proc *p = myproc(); +  switch (n) { +  case 0: +    return p->tf->a0; +  case 1: +    return p->tf->a1; +  case 2: +    return p->tf->a2; +  case 3: +    return p->tf->a3; +  case 4: +    return p->tf->a4; +  case 5: +    return p->tf->a5; +  } +  panic("fetcharg"); +  return -1; +} + +// Fetch the nth 32-bit system call argument. +int +argint(int n, int *ip) +{ +  *ip = fetcharg(n); +  return 0; +} + +int +argaddr(int n, uint64 *ip) +{ +  *ip = fetcharg(n); +  return 0; +} + +// Fetch the nth word-sized system call argument as a pointer +// to a block of memory of size bytes.  Check that the pointer +// lies within the process address space. +int +argptr(int n, uint64 *pp, int size) +{ +  uint64 i; +  struct proc *p = myproc(); +  +  if(argaddr(n, &i) < 0) +    return -1; +  if(size < 0 || (uint)i >= p->sz || (uint)i+size > p->sz) +    return -1; +  *pp = i; +  return 0; +} + +// Fetch the nth word-sized system call argument as a null-terminated string. +// Copies into buf, at most max. +// Returns string length if OK (including nul), -1 if error. +int +argstr(int n, char *buf, int max) +{ +  uint64 addr; +  if(argaddr(n, &addr) < 0) +    return -1; +  return fetchstr(addr, buf, max); +} + +extern int sys_chdir(void); +extern int sys_close(void); +extern int sys_dup(void); +extern int sys_exec(void); +extern int sys_exit(void); +extern int sys_fork(void); +extern int sys_fstat(void); +extern int sys_getpid(void); +extern int sys_kill(void); +extern int sys_link(void); +extern int sys_mkdir(void); +extern int sys_mknod(void); +extern int sys_open(void); +extern int sys_pipe(void); +extern int sys_read(void); +extern int sys_sbrk(void); +extern int sys_sleep(void); +extern int sys_unlink(void); +extern int sys_wait(void); +extern int sys_write(void); +extern int sys_uptime(void); + +static int (*syscalls[])(void) = { +[SYS_fork]    sys_fork, +[SYS_exit]    sys_exit, +[SYS_wait]    sys_wait, +[SYS_pipe]    sys_pipe, +[SYS_read]    sys_read, +[SYS_kill]    sys_kill, +[SYS_exec]    sys_exec, +[SYS_fstat]   sys_fstat, +[SYS_chdir]   sys_chdir, +[SYS_dup]     sys_dup, +[SYS_getpid]  sys_getpid, +[SYS_sbrk]    sys_sbrk, +[SYS_sleep]   sys_sleep, +[SYS_uptime]  sys_uptime, +[SYS_open]    sys_open, +[SYS_write]   sys_write, +[SYS_mknod]   sys_mknod, +[SYS_unlink]  sys_unlink, +[SYS_link]    sys_link, +[SYS_mkdir]   sys_mkdir, +[SYS_close]   sys_close, +}; + +static void +dosyscall(void) +{ +  int num; +  struct proc *p = myproc(); + +  num = p->tf->a7; +  if(num > 0 && num < NELEM(syscalls) && syscalls[num]) { +    p->tf->a0 = syscalls[num](); +  } else { +    printf("%d %s: unknown sys call %d\n", +            p->pid, p->name, num); +    p->tf->a0 = -1; +  } +} + +void +syscall() +{ +    if(myproc()->killed) +      exit(); +    dosyscall(); +    if(myproc()->killed) +      exit(); +    return; +} + diff --git a/kernel/syscall.h b/kernel/syscall.h new file mode 100644 index 0000000..bc5f356 --- /dev/null +++ b/kernel/syscall.h @@ -0,0 +1,22 @@ +// System call numbers +#define SYS_fork    1 +#define SYS_exit    2 +#define SYS_wait    3 +#define SYS_pipe    4 +#define SYS_read    5 +#define SYS_kill    6 +#define SYS_exec    7 +#define SYS_fstat   8 +#define SYS_chdir   9 +#define SYS_dup    10 +#define SYS_getpid 11 +#define SYS_sbrk   12 +#define SYS_sleep  13 +#define SYS_uptime 14 +#define SYS_open   15 +#define SYS_write  16 +#define SYS_mknod  17 +#define SYS_unlink 18 +#define SYS_link   19 +#define SYS_mkdir  20 +#define SYS_close  21 diff --git a/kernel/sysfile.c b/kernel/sysfile.c new file mode 100644 index 0000000..83bb1ed --- /dev/null +++ b/kernel/sysfile.c @@ -0,0 +1,465 @@ +// +// File-system system calls. +// Mostly argument checking, since we don't trust +// user code, and calls into file.c and fs.c. +// + +#include "types.h" +#include "riscv.h" +#include "defs.h" +#include "param.h" +#include "stat.h" +#include "proc.h" +#include "fs.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "file.h" +#include "fcntl.h" + +// Fetch the nth word-sized system call argument as a file descriptor +// and return both the descriptor and the corresponding struct file. +static int +argfd(int n, int *pfd, struct file **pf) +{ +  int fd; +  struct file *f; + +  if(argint(n, &fd) < 0) +    return -1; +  if(fd < 0 || fd >= NOFILE || (f=myproc()->ofile[fd]) == 0) +    return -1; +  if(pfd) +    *pfd = fd; +  if(pf) +    *pf = f; +  return 0; +} + +// Allocate a file descriptor for the given file. +// Takes over file reference from caller on success. +static int +fdalloc(struct file *f) +{ +  int fd; +  struct proc *p = myproc(); + +  for(fd = 0; fd < NOFILE; fd++){ +    if(p->ofile[fd] == 0){ +      p->ofile[fd] = f; +      return fd; +    } +  } +  return -1; +} + +int +sys_dup(void) +{ +  struct file *f; +  int fd; + +  if(argfd(0, 0, &f) < 0) +    return -1; +  if((fd=fdalloc(f)) < 0) +    return -1; +  filedup(f); +  return fd; +} + +int +sys_read(void) +{ +  struct file *f; +  int n; +  uint64 p; + +  if(argfd(0, 0, &f) < 0 || argint(2, &n) < 0 || argptr(1, &p, n) < 0) +    return -1; +  return fileread(f, p, n); +} + +int +sys_write(void) +{ +  struct file *f; +  int n; +  uint64 p; + +  if(argfd(0, 0, &f) < 0 || argint(2, &n) < 0 || argptr(1, &p, n) < 0) +    return -1; + +  return filewrite(f, p, n); +} + +int +sys_close(void) +{ +  int fd; +  struct file *f; + +  if(argfd(0, &fd, &f) < 0) +    return -1; +  myproc()->ofile[fd] = 0; +  fileclose(f); +  return 0; +} + +int +sys_fstat(void) +{ +  struct file *f; +  uint64 st; // user pointer to struct stat + +  if(argfd(0, 0, &f) < 0 || argptr(1, &st, sizeof(struct stat)) < 0) +    return -1; +  return filestat(f, st); +} + +// Create the path new as a link to the same inode as old. +int +sys_link(void) +{ +  char name[DIRSIZ], new[MAXPATH], old[MAXPATH]; +  struct inode *dp, *ip; + +  if(argstr(0, old, MAXPATH) < 0 || argstr(1, new, MAXPATH) < 0) +    return -1; + +  begin_op(); +  if((ip = namei(old)) == 0){ +    end_op(); +    return -1; +  } + +  ilock(ip); +  if(ip->type == T_DIR){ +    iunlockput(ip); +    end_op(); +    return -1; +  } + +  ip->nlink++; +  iupdate(ip); +  iunlock(ip); + +  if((dp = nameiparent(new, name)) == 0) +    goto bad; +  ilock(dp); +  if(dp->dev != ip->dev || dirlink(dp, name, ip->inum) < 0){ +    iunlockput(dp); +    goto bad; +  } +  iunlockput(dp); +  iput(ip); + +  end_op(); + +  return 0; + +bad: +  ilock(ip); +  ip->nlink--; +  iupdate(ip); +  iunlockput(ip); +  end_op(); +  return -1; +} + +// Is the directory dp empty except for "." and ".." ? +static int +isdirempty(struct inode *dp) +{ +  int off; +  struct dirent de; + +  for(off=2*sizeof(de); off<dp->size; off+=sizeof(de)){ +    if(readi(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de)) +      panic("isdirempty: readi"); +    if(de.inum != 0) +      return 0; +  } +  return 1; +} + +//PAGEBREAK! +int +sys_unlink(void) +{ +  struct inode *ip, *dp; +  struct dirent de; +  char name[DIRSIZ], path[MAXPATH]; +  uint off; + +  if(argstr(0, path, MAXPATH) < 0) +    return -1; + +  begin_op(); +  if((dp = nameiparent(path, name)) == 0){ +    end_op(); +    return -1; +  } + +  ilock(dp); + +  // Cannot unlink "." or "..". +  if(namecmp(name, ".") == 0 || namecmp(name, "..") == 0) +    goto bad; + +  if((ip = dirlookup(dp, name, &off)) == 0) +    goto bad; +  ilock(ip); + +  if(ip->nlink < 1) +    panic("unlink: nlink < 1"); +  if(ip->type == T_DIR && !isdirempty(ip)){ +    iunlockput(ip); +    goto bad; +  } + +  memset(&de, 0, sizeof(de)); +  if(writei(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de)) +    panic("unlink: writei"); +  if(ip->type == T_DIR){ +    dp->nlink--; +    iupdate(dp); +  } +  iunlockput(dp); + +  ip->nlink--; +  iupdate(ip); +  iunlockput(ip); + +  end_op(); + +  return 0; + +bad: +  iunlockput(dp); +  end_op(); +  return -1; +} + +static struct inode* +create(char *path, short type, short major, short minor) +{ +  uint off; +  struct inode *ip, *dp; +  char name[DIRSIZ]; + +  if((dp = nameiparent(path, name)) == 0) +    return 0; +  ilock(dp); + +  if((ip = dirlookup(dp, name, &off)) != 0){ +    iunlockput(dp); +    ilock(ip); +    if(type == T_FILE && ip->type == T_FILE) +      return ip; +    iunlockput(ip); +    return 0; +  } + +  if((ip = ialloc(dp->dev, type)) == 0) +    panic("create: ialloc"); + +  ilock(ip); +  ip->major = major; +  ip->minor = minor; +  ip->nlink = 1; +  iupdate(ip); + +  if(type == T_DIR){  // Create . and .. entries. +    dp->nlink++;  // for ".." +    iupdate(dp); +    // No ip->nlink++ for ".": avoid cyclic ref count. +    if(dirlink(ip, ".", ip->inum) < 0 || dirlink(ip, "..", dp->inum) < 0) +      panic("create dots"); +  } + +  if(dirlink(dp, name, ip->inum) < 0) +    panic("create: dirlink"); + +  iunlockput(dp); + +  return ip; +} + +int +sys_open(void) +{ +  char path[MAXPATH]; +  int fd, omode; +  struct file *f; +  struct inode *ip; + +  if(argstr(0, path, MAXPATH) < 0 || argint(1, &omode) < 0) +    return -1; + +  begin_op(); + +  if(omode & O_CREATE){ +    ip = create(path, T_FILE, 0, 0); +    if(ip == 0){ +      end_op(); +      return -1; +    } +  } else { +    if((ip = namei(path)) == 0){ +      end_op(); +      return -1; +    } +    ilock(ip); +    if(ip->type == T_DIR && omode != O_RDONLY){ +      iunlockput(ip); +      end_op(); +      return -1; +    } +  } + +  if((f = filealloc()) == 0 || (fd = fdalloc(f)) < 0){ +    if(f) +      fileclose(f); +    iunlockput(ip); +    end_op(); +    return -1; +  } +  iunlock(ip); +  end_op(); + +  f->type = FD_INODE; +  f->ip = ip; +  f->off = 0; +  f->readable = !(omode & O_WRONLY); +  f->writable = (omode & O_WRONLY) || (omode & O_RDWR); +  return fd; +} + +int +sys_mkdir(void) +{ +  char path[MAXPATH]; +  struct inode *ip; + +  begin_op(); +  if(argstr(0, path, MAXPATH) < 0 || (ip = create(path, T_DIR, 0, 0)) == 0){ +    end_op(); +    return -1; +  } +  iunlockput(ip); +  end_op(); +  return 0; +} + +int +sys_mknod(void) +{ +  struct inode *ip; +  char path[MAXPATH]; +  int major, minor; + +  begin_op(); +  if((argstr(0, path, MAXPATH)) < 0 || +     argint(1, &major) < 0 || +     argint(2, &minor) < 0 || +     (ip = create(path, T_DEV, major, minor)) == 0){ +    end_op(); +    return -1; +  } +  iunlockput(ip); +  end_op(); +  return 0; +} + +int +sys_chdir(void) +{ +  char path[MAXPATH]; +  struct inode *ip; +  struct proc *p = myproc(); +   +  begin_op(); +  if(argstr(0, path, MAXPATH) < 0 || (ip = namei(path)) == 0){ +    end_op(); +    return -1; +  } +  ilock(ip); +  if(ip->type != T_DIR){ +    iunlockput(ip); +    end_op(); +    return -1; +  } +  iunlock(ip); +  iput(p->cwd); +  end_op(); +  p->cwd = ip; +  return 0; +} + +int +sys_exec(void) +{ +  char path[MAXPATH], *argv[MAXARG]; +  int i; +  uint64 uargv, uarg; + +  if(argstr(0, path, MAXPATH) < 0 || argaddr(1, &uargv) < 0){ +    return -1; +  } +  memset(argv, 0, sizeof(argv)); +  for(i=0;; i++){ +    if(i >= NELEM(argv)){ +      return -1; +    } +    if(fetchaddr(uargv+sizeof(uint64)*i, (uint64*)&uarg) < 0){ +      return -1; +    } +    if(uarg == 0){ +      argv[i] = 0; +      break; +    } +    argv[i] = kalloc(); +    if(argv[i] == 0) +      panic("sys_exec kalloc"); +    if(fetchstr(uarg, argv[i], PGSIZE) < 0){ +      return -1; +    } +  } + +  int ret = exec(path, argv); + +  for(i = 0; i < NELEM(argv) && argv[i] != 0; i++) +    kfree(argv[i]); + +  return ret; +} + +int +sys_pipe(void) +{ +  uint64 fdarray; // user pointer to array of two integers +  struct file *rf, *wf; +  int fd0, fd1; +  struct proc *p = myproc(); + +  if(argptr(0, &fdarray, 2*sizeof(int)) < 0) +    return -1; +  if(pipealloc(&rf, &wf) < 0) +    return -1; +  fd0 = -1; +  if((fd0 = fdalloc(rf)) < 0 || (fd1 = fdalloc(wf)) < 0){ +    if(fd0 >= 0) +      p->ofile[fd0] = 0; +    fileclose(rf); +    fileclose(wf); +    return -1; +  } +  if(copyout(p->pagetable, fdarray, (char*)&fd0, sizeof(fd0)) < 0 || +     copyout(p->pagetable, fdarray+sizeof(fd0), (char *)&fd1, sizeof(fd1)) < 0){ +    p->ofile[fd0] = 0; +    p->ofile[fd1] = 0; +    fileclose(rf); +    fileclose(wf); +    return -1; +  } +  return 0; +} diff --git a/kernel/sysproc.c b/kernel/sysproc.c new file mode 100644 index 0000000..e57e045 --- /dev/null +++ b/kernel/sysproc.c @@ -0,0 +1,90 @@ +#include "types.h" +#include "riscv.h" +#include "defs.h" +#include "date.h" +#include "param.h" +#include "memlayout.h" +#include "proc.h" + +int +sys_exit(void) +{ +  exit(); +  return 0;  // not reached +} + +int +sys_getpid(void) +{ +  return myproc()->pid; +} + +int +sys_fork(void) +{ +  return fork(); +} + +int +sys_wait(void) +{ +  return wait(); +} + +int +sys_sbrk(void) +{ +  int addr; +  int n; + +  if(argint(0, &n) < 0) +    return -1; +  addr = myproc()->sz; +  if(growproc(n) < 0) +    return -1; +  return addr; +} + +int +sys_sleep(void) +{ +  int n; +  uint ticks0; + +  if(argint(0, &n) < 0) +    return -1; +  acquire(&tickslock); +  ticks0 = ticks; +  while(ticks - ticks0 < n){ +    if(myproc()->killed){ +      release(&tickslock); +      return -1; +    } +    sleep(&ticks, &tickslock); +  } +  release(&tickslock); +  return 0; +} + +int +sys_kill(void) +{ +  int pid; + +  if(argint(0, &pid) < 0) +    return -1; +  return kill(pid); +} + +// return how many clock tick interrupts have occurred +// since start. +int +sys_uptime(void) +{ +  uint xticks; + +  acquire(&tickslock); +  xticks = ticks; +  release(&tickslock); +  return xticks; +} diff --git a/kernel/trampoline.S b/kernel/trampoline.S new file mode 100644 index 0000000..dd4eb02 --- /dev/null +++ b/kernel/trampoline.S @@ -0,0 +1,137 @@ +	# +        # code to switch between user and kernel space. +        # +        # this code is mapped at the same virtual address +        # in user and kernel space so that it can switch +        # page tables. +	# +	# kernel.ld causes trampout to be aligned +        # to a page boundary. +        # +.globl usertrap +	.section trampoline +.globl trampout +trampout: +        # switch from kernel to user. +        # usertrapret() calls here. +	# a0: p->tf in user page table +        # a1: new value for satp, for user page table + +        # switch to user page table +        csrw satp, a1 + +        # put the saved user a0 in sscratch, so we +        # can swap it with our a0 (p->tf) in the last step. +        ld t0, 112(a0) +        csrw sscratch, t0 + +        # restore all but a0 from p->tf +        ld ra, 40(a0) +        ld sp, 48(a0) +        ld gp, 56(a0) +        ld tp, 64(a0) +        ld t0, 72(a0) +        ld t1, 80(a0) +        ld t2, 88(a0) +        ld s0, 96(a0) +        ld s1, 104(a0) +        ld a1, 120(a0) +        ld a2, 128(a0) +        ld a3, 136(a0) +        ld a4, 144(a0) +        ld a5, 152(a0) +        ld a6, 160(a0) +        ld a7, 168(a0) +        ld s2, 176(a0) +        ld s3, 184(a0) +        ld s4, 192(a0) +        ld s5, 200(a0) +        ld s6, 208(a0) +        ld s7, 216(a0) +        ld s8, 224(a0) +        ld s9, 232(a0) +        ld s10, 240(a0) +        ld s11, 248(a0) +        ld t3, 256(a0) +        ld t4, 264(a0) +        ld t5, 272(a0) +        ld t6, 280(a0) + +	# restore user a0, and save p->tf +        csrrw a0, sscratch, a0 +         +        # return to user mode and user pc. +        # caller has set up sstatus and sepc. +        sret + +.align 4 +.globl trampin +trampin:     +	# +        # trap.c set stvec to point here, so +        # user interrupts and exceptions start here, +        # in supervisor mode, but with a +        # user page table. +        # +        # sscratch points to where the process's p->tf is +        # mapped into user space (TRAMPOLINE - 4096). +        # +         +	# swap a0 and sscratch +        # so that a0 is p->tf +        csrrw a0, sscratch, a0 + +        # save the user registers in p->tf +        sd ra, 40(a0) +        sd sp, 48(a0) +        sd gp, 56(a0) +        sd tp, 64(a0) +        sd t0, 72(a0) +        sd t1, 80(a0) +        sd t2, 88(a0) +        sd s0, 96(a0) +        sd s1, 104(a0) +        sd a1, 120(a0) +        sd a2, 128(a0) +        sd a3, 136(a0) +        sd a4, 144(a0) +        sd a5, 152(a0) +        sd a6, 160(a0) +        sd a7, 168(a0) +        sd s2, 176(a0) +        sd s3, 184(a0) +        sd s4, 192(a0) +        sd s5, 200(a0) +        sd s6, 208(a0) +        sd s7, 216(a0) +        sd s8, 224(a0) +        sd s9, 232(a0) +        sd s10, 240(a0) +        sd s11, 248(a0) +        sd t3, 256(a0) +        sd t4, 264(a0) +        sd t5, 272(a0) +        sd t6, 280(a0) + +	# save the user a0 in p->tf->a0 +        csrr t0, sscratch +        sd t0, 112(a0) + +        # restore kernel stack pointer from p->tf->kernel_sp +        ld sp, 8(a0) + +        # make tp hold the current hartid, from p->tf->hartid +        ld tp, 32(a0) + +        # remember the address of usertrap(), p->tf->kernel_trap +        ld t0, 16(a0) + +        # restore kernel page table from p->tf->kernel_satp +        ld t1, 0(a0) +        csrw satp, t1 + +        # a0 is no longer valid, since the kernel page +        # table does not specially map p->td. + +        # jump to usertrap(), which does not return +        jr t0 diff --git a/kernel/trap.c b/kernel/trap.c new file mode 100644 index 0000000..050a94d --- /dev/null +++ b/kernel/trap.c @@ -0,0 +1,184 @@ +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "proc.h" +#include "spinlock.h" +#include "defs.h" + +struct spinlock tickslock; +uint ticks; + +extern char trampout[], trampin[]; + +// in kernelvec.S, calls kerneltrap(). +void kernelvec(); + +extern int devintr(); + +void +trapinit(void) +{ +  initlock(&tickslock, "time"); +} + +// set up to take exceptions and traps while in the kernel. +void +trapinithart(void) +{ +  w_stvec((uint64)kernelvec); +} + +// +// handle an interrupt, exception, or system call from user space. +// called from trampoline.S +// +void +usertrap(void) +{ +  int which_dev = 0; +   +  if((r_sstatus() & SSTATUS_SPP) != 0) +    panic("usertrap: not from user mode"); + +  // send interrupts and exceptions to kerneltrap(), +  // since we're now in the kernel. +  w_stvec((uint64)kernelvec); + +  struct proc *p = myproc(); +   +  // save user program counter. +  p->tf->epc = r_sepc(); + +  intr_on(); +   +  if(r_scause() == 8){ +    // system call + +    // sepc points to the ecall instruction, +    // but we want to return to the next instruction. +    p->tf->epc += 4; + +    syscall(); +  } else if((which_dev = devintr()) != 0){ +    // ok +  } else { +    printf("usertrap(): unexpected scause 0x%p pid=%d\n", r_scause(), p->pid); +    printf("            sepc=%p stval=%p\n", r_sepc(), r_stval()); +    p->killed = 1; +  } + +  if(p->killed) +    exit(); + +  // give up the CPU if this is a timer interrupt. +  if(which_dev == 2) +    yield(); + +  usertrapret(); +} + +// +// return to user space +// +void +usertrapret(void) +{ +  struct proc *p = myproc(); + +  // turn off interrupts, since we're switching +  // now from kerneltrap() to usertrap(). +  intr_off(); + +  // send interrupts and exceptions to trampoline.S +  w_stvec(TRAMPOLINE + (trampin - trampout)); + +  // set up values that trampoline.S will need when +  // the process next re-enters the kernel. +  p->tf->kernel_satp = r_satp(); +  p->tf->kernel_sp = (uint64)p->kstack + PGSIZE; +  p->tf->kernel_trap = (uint64)usertrap; +  p->tf->hartid = r_tp(); + +  // set up the registers that trampoline.S's sret will use +  // to get to user space. +   +  // set S Previous Privilege mode to User. +  unsigned long x = r_sstatus(); +  x &= ~SSTATUS_SPP; // clear SPP to 0 for user mode +  x |= SSTATUS_SPIE; // enable interrupts in user mode +  w_sstatus(x); + +  // set S Exception Program Counter to the saved user pc. +  w_sepc(p->tf->epc); + +  // tell trampline.S the user page table to switch to. +  uint64 satp = MAKE_SATP(p->pagetable); + +  // jump to trampoline.S at the top of memory, which  +  // switches to the user page table, restores user registers, +  // and switches to user mode with sret. +  ((void (*)(uint64,uint64))TRAMPOLINE)(TRAMPOLINE - PGSIZE, satp); +} + +// interrupts and exceptions from kernel code go here, +// on whatever the current kernel stack is. +// must be 4-byte aligned to fit in stvec. +void  +kerneltrap() +{ +  uint64 sstatus = r_sstatus(); +  uint64 scause = r_scause(); +   +  if((sstatus & SSTATUS_SPP) == 0) +    panic("kerneltrap: not from supervisor mode"); +  if(intr_get() != 0) +    panic("kerneltrap: interrupts enabled"); + +  if(devintr() == 0){ +    printf("scause 0x%p\n", scause); +    printf("sepc=%p stval=%p\n", r_sepc(), r_stval()); +    panic("kerneltrap"); +  } +} + +// check if it's an external interrupt or software interrupt, +// and handle it. +// returns 2 if timer interrupt, +// 1 if other device, +// 0 if not recognized. +int +devintr() +{ +  uint64 scause = r_scause(); + +  if((scause & 0x8000000000000000L) && +     (scause & 0xff) == 9){ +    // supervisor external interrupt, via PLIC. +    int irq = plic_claim(); + +    if(irq == UART0_IRQ){ +      uartintr(); +    } + +    plic_complete(irq); +    return 1; +  } else if(scause == 0x8000000000000001){ +    // software interrupt from a machine-mode timer interrupt. + +    if(cpuid() == 0){ +      acquire(&tickslock); +      ticks++; +      wakeup(&ticks); +      release(&tickslock); +    } +     +    // acknowledge. +    w_sip(r_sip() & ~2); + +    return 2; +  } else { +    return 0; +  } +} + diff --git a/kernel/types.h b/kernel/types.h new file mode 100644 index 0000000..ee73164 --- /dev/null +++ b/kernel/types.h @@ -0,0 +1,10 @@ +typedef unsigned int   uint; +typedef unsigned short ushort; +typedef unsigned char  uchar; + +typedef unsigned char uint8; +typedef unsigned short uint16; +typedef unsigned int  uint32; +typedef unsigned long uint64; + +typedef uint64 pde_t; diff --git a/kernel/uart.c b/kernel/uart.c new file mode 100644 index 0000000..35fac1b --- /dev/null +++ b/kernel/uart.c @@ -0,0 +1,75 @@ +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "proc.h" +#include "spinlock.h" +#include "defs.h" + +// +// qemu -machine virt has a 16550a UART +// qemu/hw/riscv/virt.c +// http://byterunner.com/16550.html +// +// caller should lock. +// + +// address of one of the registers +#define R(reg) ((volatile unsigned char *)(UART0 + reg)) + +void +uartinit(void) +{ +  // disable interrupts -- IER +  *R(1) = 0x00; + +  // special mode to set baud rate +  *R(3) = 0x80; + +  // LSB for baud rate of 38.4K +  *R(0) = 0x03; + +  // MSB for baud rate of 38.4K +  *R(1) = 0x00; + +  // leave set-baud mode, +  // and set word length to 8 bits, no parity. +  *R(3) = 0x03; + +  // reset and enable FIFOs -- FCR. +  *R(2) = 0x07; + +  // enable receive interrupts -- IER. +  *R(1) = 0x01; +} + +void +uartputc(int c) +{ +  // wait for Transmit Holding Empty to be set in LSR. +  while((*R(5) & (1 << 5)) == 0) +    ; +  *R(0) = c; +} + +int +uartgetc(void) +{ +  if(*R(5) & 0x01){ +    // input data is ready. +    return *R(0); +  } else { +    return -1; +  } +} + +void +uartintr(void) +{ +  while(1){ +    int c = uartgetc(); +    if(c == -1) +      break; +    consoleintr(c); +  } +} diff --git a/kernel/vm.c b/kernel/vm.c new file mode 100644 index 0000000..0ea6bca --- /dev/null +++ b/kernel/vm.c @@ -0,0 +1,389 @@ +#include "param.h" +#include "types.h" +#include "memlayout.h" +#include "elf.h" +#include "riscv.h" +#include "defs.h" +#include "fs.h" + +/* + * the kernel's page table. + */ +pagetable_t kernel_pagetable; + +extern char etext[];  // kernel.ld sets this to end of kernel code. + +extern char trampout[]; // trampoline.S + +/* + * create a direct-map page table for the kernel and + * turn on paging. called early, in supervisor mode. + * the page allocator is already initialized. + */ +void +kvminit() +{ +  kernel_pagetable = (pagetable_t) kalloc(); +  memset(kernel_pagetable, 0, PGSIZE); + +  // uart registers +  mappages(kernel_pagetable, UART0, PGSIZE, +           UART0, PTE_R | PTE_W); + +  // CLINT +  mappages(kernel_pagetable, CLINT, 0x10000, +           CLINT, PTE_R | PTE_W); + +  // PLIC +  mappages(kernel_pagetable, PLIC, 0x4000000, +           PLIC, PTE_R | PTE_W); + +  // map kernel text executable and read-only. +  mappages(kernel_pagetable, KERNBASE, (uint64)etext-KERNBASE, +           KERNBASE, PTE_R | PTE_X); + +  // map kernel data and the physical RAM we'll make use of. +  mappages(kernel_pagetable, (uint64)etext, PHYSTOP-(uint64)etext, +           (uint64)etext, PTE_R | PTE_W); + +  // map the qemu -initrd fs.img ramdisk +  mappages(kernel_pagetable, RAMDISK, FSSIZE * BSIZE, +           RAMDISK, PTE_R | PTE_W); + +  // map the trampoline for trap entry/exit to +  // the highest virtual address in the kernel. +  mappages(kernel_pagetable, TRAMPOLINE, PGSIZE, +           (uint64)trampout, PTE_R | PTE_X); +} + +// Switch h/w page table register to the kernel's page table, +// and enable paging. +void +kvminithart() +{ +  w_satp(MAKE_SATP(kernel_pagetable)); +} + +// Return the address of the PTE in page table pagetable +// that corresponds to virtual address va.  If alloc!=0, +// create any required page table pages. +// +// The risc-v Sv39 scheme has three levels of page table +// pages. A page table page contains 512 64-bit PTEs. +// A 64-bit virtual address is split into five fields: +//   39..63 -- must be zero. +//   30..38 -- 9 bits of level-2 index. +//   21..39 -- 9 bits of level-1 index. +//   12..20 -- 9 bits of level-0 index. +//    0..12 -- 12 bits of byte offset within the page. +static pte_t * +walk(pagetable_t pagetable, uint64 va, int alloc) +{ +  if(va >= MAXVA) +    panic("walk"); + +  for(int level = 2; level > 0; level--) { +    pte_t *pte = &pagetable[PX(level, va)]; +    if(*pte & PTE_V) { +      pagetable = (pagetable_t)PTE2PA(*pte); +    } else { +      if(!alloc || (pagetable = (pde_t*)kalloc()) == 0) +        return 0; +      memset(pagetable, 0, PGSIZE); +      *pte = PA2PTE(pagetable) | PTE_V; +    } +  } +  return &pagetable[PX(0, va)]; +} + +// Look up a virtual address, return the physical address, +// Can only be used to look up user pages. +// or 0 if not mapped. +uint64 +walkaddr(pagetable_t pagetable, uint64 va) +{ +  pte_t *pte; +  uint64 pa; + +  pte = walk(pagetable, va, 0); +  if(pte == 0) +    return 0; +  if((*pte & PTE_V) == 0) +    return 0; +  if((*pte & PTE_U) == 0) +    return 0; +  pa = PTE2PA(*pte); +  return pa; +} + + +// Create PTEs for virtual addresses starting at va that refer to +// physical addresses starting at pa. va and size might not +// be page-aligned. +void +mappages(pagetable_t pagetable, uint64 va, uint64 size, uint64 pa, int perm) +{ +  uint64 a, last; +  pte_t *pte; + +  a = PGROUNDDOWN(va); +  last = PGROUNDDOWN(va + size - 1); +  for(;;){ +    if((pte = walk(pagetable, a, 1)) == 0) +      panic("mappages: walk"); +    if(*pte & PTE_V) +      panic("remap"); +    *pte = PA2PTE(pa) | perm | PTE_V; +    if(a == last) +      break; +    a += PGSIZE; +    pa += PGSIZE; +  } +} + +// Remove mappings from a page table. The mappings in +// the given range must exist. Optionally free the +// physical memory. +void +unmappages(pagetable_t pagetable, uint64 va, uint64 size, int do_free) +{ +  uint64 a, last; +  pte_t *pte; +  uint64 pa; + +  a = PGROUNDDOWN(va); +  last = PGROUNDDOWN(va + size - 1); +  for(;;){ +    if((pte = walk(pagetable, a, 0)) == 0) +      panic("unmappages: walk"); +    if((*pte & PTE_V) == 0){ +      printf("va=%p pte=%p\n", a, *pte); +      panic("unmappages: not mapped"); +    } +    if(PTE_FLAGS(*pte) == PTE_V) +      panic("unmappages: not a leaf"); +    if(do_free){ +      pa = PTE2PA(*pte); +      kfree((void*)pa); +    } +    *pte = 0; +    if(a == last) +      break; +    a += PGSIZE; +    pa += PGSIZE; +  } +} + +// create an empty user page table. +pagetable_t +uvmcreate() +{ +  pagetable_t pagetable; +  pagetable = (pagetable_t) kalloc(); +  if(pagetable == 0) +    panic("uvmcreate: out of memory"); +  memset(pagetable, 0, PGSIZE); +  return pagetable; +} + +// Load the user initcode into address 0 of pagetable, +// for the very first process. +// sz must be less than a page. +void +uvminit(pagetable_t pagetable, uchar *src, uint sz) +{ +  char *mem; + +  if(sz >= PGSIZE) +    panic("inituvm: more than a page"); +  mem = kalloc(); +  memset(mem, 0, PGSIZE); +  mappages(pagetable, 0, PGSIZE, (uint64)mem, PTE_W|PTE_R|PTE_X|PTE_U); +  memmove(mem, src, sz); +} + +// Allocate PTEs and physical memory to grow process from oldsz to +// newsz, which need not be page aligned.  Returns new size or 0 on error. +uint64 +uvmalloc(pagetable_t pagetable, uint64 oldsz, uint64 newsz) +{ +  char *mem; +  uint64 a; + +  if(newsz < oldsz) +    return oldsz; + +  oldsz = PGROUNDUP(oldsz); +  a = oldsz; +  for(; a < newsz; a += PGSIZE){ +    mem = kalloc(); +    if(mem == 0){ +      uvmdealloc(pagetable, a, oldsz); +      return 0; +    } +    memset(mem, 0, PGSIZE); +    mappages(pagetable, a, PGSIZE, (uint64)mem, PTE_W|PTE_X|PTE_R|PTE_U); +  } +  return newsz; +} + +// Deallocate user pages to bring the process size from oldsz to +// newsz.  oldsz and newsz need not be page-aligned, nor does newsz +// need to be less than oldsz.  oldsz can be larger than the actual +// process size.  Returns the new process size. +uint64 +uvmdealloc(pagetable_t pagetable, uint64 oldsz, uint64 newsz) +{ +  if(newsz >= oldsz) +    return oldsz; +  unmappages(pagetable, newsz, oldsz - newsz, 1); +  return newsz; +} + +// Recursively free page table pages. +// All leaf mappings must already have been removed. +static void +freewalk(pagetable_t pagetable) +{ +  // there are 2^9 = 512 PTEs in a page table. +  for(int i = 0; i < 512; i++){ +    pte_t pte = pagetable[i]; +    if((pte & PTE_V) && (pte & (PTE_R|PTE_W|PTE_X)) == 0){ +      // this PTE points to a lower-level page table. +      uint64 child = PTE2PA(pte); +      freewalk((pagetable_t)child); +      pagetable[i] = 0; +    } else if(pte & PTE_V){ +      panic("freewalk: leaf"); +    } +  } +  kfree((void*)pagetable); +} + +// Free user memory pages, +// then free page table pages. +void +uvmfree(pagetable_t pagetable, uint64 sz) +{ +  unmappages(pagetable, 0, sz, 1); +  freewalk(pagetable); +} + +// Given a parent process's page table, copy +// its memory into a child's page table. +// Copies both the page table and the +// physical memory. +void +uvmcopy(pagetable_t old, pagetable_t new, uint64 sz) +{ +  pte_t *pte; +  uint64 pa, i; +  uint flags; +  char *mem; + +  for(i = 0; i < sz; i += PGSIZE){ +    if((pte = walk(old, i, 0)) == 0) +      panic("copyuvm: pte should exist"); +    if((*pte & PTE_V) == 0) +      panic("copyuvm: page not present"); +    pa = PTE2PA(*pte); +    flags = PTE_FLAGS(*pte); +    if((mem = kalloc()) == 0) +      panic("uvmcopy: kalloc failed"); +    memmove(mem, (char*)pa, PGSIZE); +    mappages(new, i, PGSIZE, (uint64)mem, flags); +  } +} + +// Copy from kernel to user. +// Copy len bytes from src to virtual address dstva in a given page table. +// Return 0 on success, -1 on error. +int +copyout(pagetable_t pagetable, uint64 dstva, char *src, uint64 len) +{ +  uint64 n, va0, pa0; + +  while(len > 0){ +    va0 = (uint)PGROUNDDOWN(dstva); +    pa0 = walkaddr(pagetable, va0); +    if(pa0 == 0) +      return -1; +    n = PGSIZE - (dstva - va0); +    if(n > len) +      n = len; +    memmove((void *)(pa0 + (dstva - va0)), src, n); + +    len -= n; +    src += n; +    dstva = va0 + PGSIZE; +  } +  return 0; +} + +// Copy from user to kernel. +// Copy len bytes to dst from virtual address srcva in a given page table. +// Return 0 on success, -1 on error. +int +copyin(pagetable_t pagetable, char *dst, uint64 srcva, uint64 len) +{ +  uint64 n, va0, pa0; + +  while(len > 0){ +    va0 = (uint)PGROUNDDOWN(srcva); +    pa0 = walkaddr(pagetable, va0); +    if(pa0 == 0) +      return -1; +    n = PGSIZE - (srcva - va0); +    if(n > len) +      n = len; +    memmove(dst, (void *)(pa0 + (srcva - va0)), n); + +    len -= n; +    dst += n; +    srcva = va0 + PGSIZE; +  } +  return 0; +} + +// Copy a null-terminated string from user to kernel. +// Copy bytes to dst from virtual address srcva in a given page table, +// until a '\0', or max. +// Return 0 on success, -1 on error. +int +copyinstr(pagetable_t pagetable, char *dst, uint64 srcva, uint64 max) +{ +  uint64 n, va0, pa0; +  int got_null = 0; + +  while(got_null == 0 && max > 0){ +    va0 = (uint)PGROUNDDOWN(srcva); +    pa0 = walkaddr(pagetable, va0); +    if(pa0 == 0) +      return -1; +    n = PGSIZE - (srcva - va0); +    if(n > max) +      n = max; + +    char *p = (char *) (pa0 + (srcva - va0)); +    while(n > 0){ +      if(*p == '\0'){ +        *dst = '\0'; +        got_null = 1; +        break; +      } else { +        *dst = *p; +      } +      --n; +      --max; +      p++; +      dst++; +    } + +    srcva = va0 + PGSIZE; +  } +  if(got_null){ +    return 0; +  } else { +    return -1; +  } +} | 
