summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bio.c151
-rw-r--r--kernel/buf.h13
-rw-r--r--kernel/console.c199
-rw-r--r--kernel/date.h8
-rw-r--r--kernel/defs.h186
-rw-r--r--kernel/elf.h42
-rw-r--r--kernel/entry.S26
-rw-r--r--kernel/exec.c153
-rw-r--r--kernel/fcntl.h4
-rw-r--r--kernel/file.c178
-rw-r--r--kernel/file.h37
-rw-r--r--kernel/fs.c667
-rw-r--r--kernel/fs.h60
-rw-r--r--kernel/kalloc.c83
-rw-r--r--kernel/kernel.ld32
-rw-r--r--kernel/kernelvec.S121
-rw-r--r--kernel/log.c235
-rw-r--r--kernel/main.c43
-rw-r--r--kernel/memlayout.h67
-rw-r--r--kernel/param.h13
-rw-r--r--kernel/pipe.c127
-rw-r--r--kernel/plic.c62
-rw-r--r--kernel/printf.c134
-rw-r--r--kernel/proc.c647
-rw-r--r--kernel/proc.h105
-rw-r--r--kernel/ramdisk.c45
-rw-r--r--kernel/riscv.h358
-rw-r--r--kernel/sleeplock.c55
-rw-r--r--kernel/sleeplock.h10
-rw-r--r--kernel/spinlock.c108
-rw-r--r--kernel/spinlock.h9
-rw-r--r--kernel/start.c82
-rw-r--r--kernel/stat.h11
-rw-r--r--kernel/string.c104
-rw-r--r--kernel/swtch.S42
-rw-r--r--kernel/syscall.c147
-rw-r--r--kernel/syscall.h22
-rw-r--r--kernel/sysfile.c476
-rw-r--r--kernel/sysproc.c91
-rw-r--r--kernel/trampoline.S141
-rw-r--r--kernel/trap.c213
-rw-r--r--kernel/types.h10
-rw-r--r--kernel/uart.c92
-rw-r--r--kernel/virtio.h72
-rw-r--r--kernel/virtio_disk.c269
-rw-r--r--kernel/vm.c441
46 files changed, 6191 insertions, 0 deletions
diff --git a/kernel/bio.c b/kernel/bio.c
new file mode 100644
index 0000000..a1074f2
--- /dev/null
+++ b/kernel/bio.c
@@ -0,0 +1,151 @@
+// Buffer cache.
+//
+// The buffer cache is a linked list of buf structures holding
+// cached copies of disk block contents. Caching disk blocks
+// in memory reduces the number of disk reads and also provides
+// a synchronization point for disk blocks used by multiple processes.
+//
+// Interface:
+// * To get a buffer for a particular disk block, call bread.
+// * After changing buffer data, call bwrite to write it to disk.
+// * When done with the buffer, call brelse.
+// * Do not use the buffer after calling brelse.
+// * Only one process at a time can use a buffer,
+// so do not keep them longer than necessary.
+
+
+#include "types.h"
+#include "param.h"
+#include "spinlock.h"
+#include "sleeplock.h"
+#include "riscv.h"
+#include "defs.h"
+#include "fs.h"
+#include "buf.h"
+
+struct {
+ struct spinlock lock;
+ struct buf buf[NBUF];
+
+ // Linked list of all buffers, through prev/next.
+ // head.next is most recently used.
+ struct buf head;
+} bcache;
+
+void
+binit(void)
+{
+ struct buf *b;
+
+ initlock(&bcache.lock, "bcache");
+
+ // Create linked list of buffers
+ bcache.head.prev = &bcache.head;
+ bcache.head.next = &bcache.head;
+ for(b = bcache.buf; b < bcache.buf+NBUF; b++){
+ b->next = bcache.head.next;
+ b->prev = &bcache.head;
+ initsleeplock(&b->lock, "buffer");
+ bcache.head.next->prev = b;
+ bcache.head.next = b;
+ }
+}
+
+// Look through buffer cache for block on device dev.
+// If not found, allocate a buffer.
+// In either case, return locked buffer.
+static struct buf*
+bget(uint dev, uint blockno)
+{
+ struct buf *b;
+
+ acquire(&bcache.lock);
+
+ // Is the block already cached?
+ for(b = bcache.head.next; b != &bcache.head; b = b->next){
+ if(b->dev == dev && b->blockno == blockno){
+ b->refcnt++;
+ release(&bcache.lock);
+ acquiresleep(&b->lock);
+ return b;
+ }
+ }
+
+ // Not cached; recycle an unused buffer.
+ for(b = bcache.head.prev; b != &bcache.head; b = b->prev){
+ if(b->refcnt == 0) {
+ b->dev = dev;
+ b->blockno = blockno;
+ b->valid = 0;
+ b->refcnt = 1;
+ release(&bcache.lock);
+ acquiresleep(&b->lock);
+ return b;
+ }
+ }
+ panic("bget: no buffers");
+}
+
+// Return a locked buf with the contents of the indicated block.
+struct buf*
+bread(uint dev, uint blockno)
+{
+ struct buf *b;
+
+ b = bget(dev, blockno);
+ if(!b->valid) {
+ virtio_disk_rw(b, 0);
+ b->valid = 1;
+ }
+ return b;
+}
+
+// Write b's contents to disk. Must be locked.
+void
+bwrite(struct buf *b)
+{
+ if(!holdingsleep(&b->lock))
+ panic("bwrite");
+ virtio_disk_rw(b, 1);
+}
+
+// Release a locked buffer.
+// Move to the head of the MRU list.
+void
+brelse(struct buf *b)
+{
+ if(!holdingsleep(&b->lock))
+ panic("brelse");
+
+ releasesleep(&b->lock);
+
+ acquire(&bcache.lock);
+ b->refcnt--;
+ if (b->refcnt == 0) {
+ // no one is waiting for it.
+ b->next->prev = b->prev;
+ b->prev->next = b->next;
+ b->next = bcache.head.next;
+ b->prev = &bcache.head;
+ bcache.head.next->prev = b;
+ bcache.head.next = b;
+ }
+
+ release(&bcache.lock);
+}
+
+void
+bpin(struct buf *b) {
+ acquire(&bcache.lock);
+ b->refcnt++;
+ release(&bcache.lock);
+}
+
+void
+bunpin(struct buf *b) {
+ acquire(&bcache.lock);
+ b->refcnt--;
+ release(&bcache.lock);
+}
+
+
diff --git a/kernel/buf.h b/kernel/buf.h
new file mode 100644
index 0000000..4a3a39d
--- /dev/null
+++ b/kernel/buf.h
@@ -0,0 +1,13 @@
+struct buf {
+ int valid; // has data been read from disk?
+ int disk; // does disk "own" buf?
+ uint dev;
+ uint blockno;
+ struct sleeplock lock;
+ uint refcnt;
+ struct buf *prev; // LRU cache list
+ struct buf *next;
+ struct buf *qnext; // disk queue
+ uchar data[BSIZE];
+};
+
diff --git a/kernel/console.c b/kernel/console.c
new file mode 100644
index 0000000..87a83ff
--- /dev/null
+++ b/kernel/console.c
@@ -0,0 +1,199 @@
+//
+// Console input and output, to the uart.
+// Reads are line at a time.
+// Implements special input characters:
+// newline -- end of line
+// control-h -- backspace
+// control-u -- kill line
+// control-d -- end of file
+// control-p -- print process list
+//
+
+#include <stdarg.h>
+
+#include "types.h"
+#include "param.h"
+#include "spinlock.h"
+#include "sleeplock.h"
+#include "fs.h"
+#include "file.h"
+#include "memlayout.h"
+#include "riscv.h"
+#include "defs.h"
+#include "proc.h"
+
+#define BACKSPACE 0x100
+#define C(x) ((x)-'@') // Control-x
+
+//
+// send one character to the uart.
+//
+void
+consputc(int c)
+{
+ extern volatile int panicked; // from printf.c
+
+ if(panicked){
+ for(;;)
+ ;
+ }
+
+ if(c == BACKSPACE){
+ // if the user typed backspace, overwrite with a space.
+ uartputc('\b'); uartputc(' '); uartputc('\b');
+ } else {
+ uartputc(c);
+ }
+}
+
+struct {
+ struct spinlock lock;
+
+ // input
+#define INPUT_BUF 128
+ char buf[INPUT_BUF];
+ uint r; // Read index
+ uint w; // Write index
+ uint e; // Edit index
+} cons;
+
+//
+// user write()s to the console go here.
+//
+int
+consolewrite(int user_src, uint64 src, int n)
+{
+ int i;
+
+ acquire(&cons.lock);
+ for(i = 0; i < n; i++){
+ char c;
+ if(either_copyin(&c, user_src, src+i, 1) == -1)
+ break;
+ consputc(c);
+ }
+ release(&cons.lock);
+
+ return n;
+}
+
+//
+// user read()s from the console go here.
+// copy (up to) a whole input line to dst.
+// user_dist indicates whether dst is a user
+// or kernel address.
+//
+int
+consoleread(int user_dst, uint64 dst, int n)
+{
+ uint target;
+ int c;
+ char cbuf;
+
+ target = n;
+ acquire(&cons.lock);
+ while(n > 0){
+ // wait until interrupt handler has put some
+ // input into cons.buffer.
+ while(cons.r == cons.w){
+ if(myproc()->killed){
+ release(&cons.lock);
+ return -1;
+ }
+ sleep(&cons.r, &cons.lock);
+ }
+
+ c = cons.buf[cons.r++ % INPUT_BUF];
+
+ if(c == C('D')){ // end-of-file
+ if(n < target){
+ // Save ^D for next time, to make sure
+ // caller gets a 0-byte result.
+ cons.r--;
+ }
+ break;
+ }
+
+ // copy the input byte to the user-space buffer.
+ cbuf = c;
+ if(either_copyout(user_dst, dst, &cbuf, 1) == -1)
+ break;
+
+ dst++;
+ --n;
+
+ if(c == '\n'){
+ // a whole line has arrived, return to
+ // the user-level read().
+ break;
+ }
+ }
+ release(&cons.lock);
+
+ return target - n;
+}
+
+//
+// the console input interrupt handler.
+// uartintr() calls this for input character.
+// do erase/kill processing, append to cons.buf,
+// wake up consoleread() if a whole line has arrived.
+//
+void
+consoleintr(int c)
+{
+ acquire(&cons.lock);
+
+ switch(c){
+ case C('P'): // Print process list.
+ procdump();
+ break;
+ case C('U'): // Kill line.
+ while(cons.e != cons.w &&
+ cons.buf[(cons.e-1) % INPUT_BUF] != '\n'){
+ cons.e--;
+ consputc(BACKSPACE);
+ }
+ break;
+ case C('H'): // Backspace
+ case '\x7f':
+ if(cons.e != cons.w){
+ cons.e--;
+ consputc(BACKSPACE);
+ }
+ break;
+ default:
+ if(c != 0 && cons.e-cons.r < INPUT_BUF){
+ c = (c == '\r') ? '\n' : c;
+
+ // echo back to the user.
+ consputc(c);
+
+ // store for consumption by consoleread().
+ cons.buf[cons.e++ % INPUT_BUF] = c;
+
+ if(c == '\n' || c == C('D') || cons.e == cons.r+INPUT_BUF){
+ // wake up consoleread() if a whole line (or end-of-file)
+ // has arrived.
+ cons.w = cons.e;
+ wakeup(&cons.r);
+ }
+ }
+ break;
+ }
+
+ release(&cons.lock);
+}
+
+void
+consoleinit(void)
+{
+ initlock(&cons.lock, "cons");
+
+ uartinit();
+
+ // connect read and write system calls
+ // to consoleread and consolewrite.
+ devsw[CONSOLE].read = consoleread;
+ devsw[CONSOLE].write = consolewrite;
+}
diff --git a/kernel/date.h b/kernel/date.h
new file mode 100644
index 0000000..94aec4b
--- /dev/null
+++ b/kernel/date.h
@@ -0,0 +1,8 @@
+struct rtcdate {
+ uint second;
+ uint minute;
+ uint hour;
+ uint day;
+ uint month;
+ uint year;
+};
diff --git a/kernel/defs.h b/kernel/defs.h
new file mode 100644
index 0000000..23dcd41
--- /dev/null
+++ b/kernel/defs.h
@@ -0,0 +1,186 @@
+struct buf;
+struct context;
+struct file;
+struct inode;
+struct pipe;
+struct proc;
+struct spinlock;
+struct sleeplock;
+struct stat;
+struct superblock;
+
+// bio.c
+void binit(void);
+struct buf* bread(uint, uint);
+void brelse(struct buf*);
+void bwrite(struct buf*);
+void bpin(struct buf*);
+void bunpin(struct buf*);
+
+// console.c
+void consoleinit(void);
+void consoleintr(int);
+void consputc(int);
+
+// exec.c
+int exec(char*, char**);
+
+// file.c
+struct file* filealloc(void);
+void fileclose(struct file*);
+struct file* filedup(struct file*);
+void fileinit(void);
+int fileread(struct file*, uint64, int n);
+int filestat(struct file*, uint64 addr);
+int filewrite(struct file*, uint64, int n);
+
+// fs.c
+void fsinit(int);
+int dirlink(struct inode*, char*, uint);
+struct inode* dirlookup(struct inode*, char*, uint*);
+struct inode* ialloc(uint, short);
+struct inode* idup(struct inode*);
+void iinit();
+void ilock(struct inode*);
+void iput(struct inode*);
+void iunlock(struct inode*);
+void iunlockput(struct inode*);
+void iupdate(struct inode*);
+int namecmp(const char*, const char*);
+struct inode* namei(char*);
+struct inode* nameiparent(char*, char*);
+int readi(struct inode*, int, uint64, uint, uint);
+void stati(struct inode*, struct stat*);
+int writei(struct inode*, int, uint64, uint, uint);
+
+// ramdisk.c
+void ramdiskinit(void);
+void ramdiskintr(void);
+void ramdiskrw(struct buf*);
+
+// kalloc.c
+void* kalloc(void);
+void kfree(void *);
+void kinit();
+
+// log.c
+void initlog(int, struct superblock*);
+void log_write(struct buf*);
+void begin_op();
+void end_op();
+
+// pipe.c
+int pipealloc(struct file**, struct file**);
+void pipeclose(struct pipe*, int);
+int piperead(struct pipe*, uint64, int);
+int pipewrite(struct pipe*, uint64, int);
+
+// printf.c
+void printf(char*, ...);
+void panic(char*) __attribute__((noreturn));
+void printfinit(void);
+
+// proc.c
+int cpuid(void);
+void exit(void);
+int fork(void);
+int growproc(int);
+pagetable_t proc_pagetable(struct proc *);
+void proc_freepagetable(pagetable_t, uint64);
+int kill(int);
+struct cpu* mycpu(void);
+struct cpu* getmycpu(void);
+struct proc* myproc();
+void procinit(void);
+void scheduler(void) __attribute__((noreturn));
+void sched(void);
+void setproc(struct proc*);
+void sleep(void*, struct spinlock*);
+void userinit(void);
+int wait(void);
+void wakeup(void*);
+void yield(void);
+int either_copyout(int user_dst, uint64 dst, void *src, uint64 len);
+int either_copyin(void *dst, int user_src, uint64 src, uint64 len);
+void procdump(void);
+
+// swtch.S
+void swtch(struct context*, struct context*);
+
+// spinlock.c
+void acquire(struct spinlock*);
+int holding(struct spinlock*);
+void initlock(struct spinlock*, char*);
+void release(struct spinlock*);
+void push_off(void);
+void pop_off(void);
+
+// sleeplock.c
+void acquiresleep(struct sleeplock*);
+void releasesleep(struct sleeplock*);
+int holdingsleep(struct sleeplock*);
+void initsleeplock(struct sleeplock*, char*);
+
+// string.c
+int memcmp(const void*, const void*, uint);
+void* memmove(void*, const void*, uint);
+void* memset(void*, int, uint);
+char* safestrcpy(char*, const char*, int);
+int strlen(const char*);
+int strncmp(const char*, const char*, uint);
+char* strncpy(char*, const char*, int);
+
+// syscall.c
+int argint(int, int*);
+int argstr(int, char*, int);
+int argaddr(int, uint64 *);
+int fetchstr(uint64, char*, int);
+int fetchaddr(uint64, uint64*);
+void syscall();
+
+// trap.c
+extern uint ticks;
+void trapinit(void);
+void trapinithart(void);
+extern struct spinlock tickslock;
+void usertrapret(void);
+
+// uart.c
+void uartinit(void);
+void uartintr(void);
+void uartputc(int);
+int uartgetc(void);
+
+// vm.c
+void kvminit(void);
+void kvminithart(void);
+uint64 kvmpa(uint64);
+void kvmmap(uint64, uint64, uint64, int);
+int mappages(pagetable_t, uint64, uint64, uint64, int);
+pagetable_t uvmcreate(void);
+void uvminit(pagetable_t, uchar *, uint);
+uint64 uvmalloc(pagetable_t, uint64, uint64);
+uint64 uvmdealloc(pagetable_t, uint64, uint64);
+int uvmcopy(pagetable_t, pagetable_t, uint64);
+void uvmfree(pagetable_t, uint64);
+void uvmunmap(pagetable_t, uint64, uint64, int);
+void uvmclear(pagetable_t, uint64);
+uint64 walkaddr(pagetable_t, uint64);
+int copyout(pagetable_t, uint64, char *, uint64);
+int copyin(pagetable_t, char *, uint64, uint64);
+int copyinstr(pagetable_t, char *, uint64, uint64);
+
+// plic.c
+void plicinit(void);
+void plicinithart(void);
+uint64 plic_pending(void);
+int plic_claim(void);
+void plic_complete(int);
+
+// virtio_disk.c
+void virtio_disk_init(void);
+void virtio_disk_rw(struct buf *, int);
+void virtio_disk_intr();
+
+// number of elements in fixed-size array
+#define NELEM(x) (sizeof(x)/sizeof((x)[0]))
diff --git a/kernel/elf.h b/kernel/elf.h
new file mode 100644
index 0000000..84555fa
--- /dev/null
+++ b/kernel/elf.h
@@ -0,0 +1,42 @@
+// Format of an ELF executable file
+
+#define ELF_MAGIC 0x464C457FU // "\x7FELF" in little endian
+
+// File header
+struct elfhdr {
+ uint magic; // must equal ELF_MAGIC
+ uchar elf[12];
+ ushort type;
+ ushort machine;
+ uint version;
+ uint64 entry;
+ uint64 phoff;
+ uint64 shoff;
+ uint flags;
+ ushort ehsize;
+ ushort phentsize;
+ ushort phnum;
+ ushort shentsize;
+ ushort shnum;
+ ushort shstrndx;
+};
+
+// Program section header
+struct proghdr {
+ uint32 type;
+ uint32 flags;
+ uint64 off;
+ uint64 vaddr;
+ uint64 paddr;
+ uint64 filesz;
+ uint64 memsz;
+ uint64 align;
+};
+
+// Values for Proghdr type
+#define ELF_PROG_LOAD 1
+
+// Flag bits for Proghdr flags
+#define ELF_PROG_FLAG_EXEC 1
+#define ELF_PROG_FLAG_WRITE 2
+#define ELF_PROG_FLAG_READ 4
diff --git a/kernel/entry.S b/kernel/entry.S
new file mode 100644
index 0000000..ef5a56a
--- /dev/null
+++ b/kernel/entry.S
@@ -0,0 +1,26 @@
+ # qemu -kernel starts at 0x1000. the instructions
+ # there seem to be provided by qemu, as if it
+ # were a ROM. the code at 0x1000 jumps to
+ # 0x8000000, the _start function here,
+ # in machine mode. each CPU starts here.
+.section .data
+.globl stack0
+.section .text
+.globl start
+.section .text
+.globl _entry
+_entry:
+ # set up a stack for C.
+ # stack0 is declared in start.c,
+ # with a 4096-byte stack per CPU.
+ # sp = stack0 + (hartid * 4096)
+ la sp, stack0
+ li a0, 1024*4
+ csrr a1, mhartid
+ addi a1, a1, 1
+ mul a0, a0, a1
+ add sp, sp, a0
+ # jump to start() in start.c
+ call start
+junk:
+ j junk
diff --git a/kernel/exec.c b/kernel/exec.c
new file mode 100644
index 0000000..74ef654
--- /dev/null
+++ b/kernel/exec.c
@@ -0,0 +1,153 @@
+#include "types.h"
+#include "param.h"
+#include "memlayout.h"
+#include "riscv.h"
+#include "spinlock.h"
+#include "proc.h"
+#include "defs.h"
+#include "elf.h"
+
+static int loadseg(pde_t *pgdir, uint64 addr, struct inode *ip, uint offset, uint sz);
+
+int
+exec(char *path, char **argv)
+{
+ char *s, *last;
+ int i, off;
+ uint64 argc, sz, sp, ustack[MAXARG+1], stackbase;
+ struct elfhdr elf;
+ struct inode *ip;
+ struct proghdr ph;
+ pagetable_t pagetable = 0, oldpagetable;
+ struct proc *p = myproc();
+
+ begin_op();
+
+ if((ip = namei(path)) == 0){
+ end_op();
+ return -1;
+ }
+ ilock(ip);
+
+ // Check ELF header
+ if(readi(ip, 0, (uint64)&elf, 0, sizeof(elf)) != sizeof(elf))
+ goto bad;
+ if(elf.magic != ELF_MAGIC)
+ goto bad;
+
+ if((pagetable = proc_pagetable(p)) == 0)
+ goto bad;
+
+ // Load program into memory.
+ sz = 0;
+ for(i=0, off=elf.phoff; i<elf.phnum; i++, off+=sizeof(ph)){
+ if(readi(ip, 0, (uint64)&ph, off, sizeof(ph)) != sizeof(ph))
+ goto bad;
+ if(ph.type != ELF_PROG_LOAD)
+ continue;
+ if(ph.memsz < ph.filesz)
+ goto bad;
+ if(ph.vaddr + ph.memsz < ph.vaddr)
+ goto bad;
+ if((sz = uvmalloc(pagetable, sz, ph.vaddr + ph.memsz)) == 0)
+ goto bad;
+ if(ph.vaddr % PGSIZE != 0)
+ goto bad;
+ if(loadseg(pagetable, ph.vaddr, ip, ph.off, ph.filesz) < 0)
+ goto bad;
+ }
+ iunlockput(ip);
+ end_op();
+ ip = 0;
+
+ p = myproc();
+ uint64 oldsz = p->sz;
+
+ // Allocate two pages at the next page boundary.
+ // Use the second as the user stack.
+ sz = PGROUNDUP(sz);
+ if((sz = uvmalloc(pagetable, sz, sz + 2*PGSIZE)) == 0)
+ goto bad;
+ uvmclear(pagetable, sz-2*PGSIZE);
+ sp = sz;
+ stackbase = sp - PGSIZE;
+
+ // Push argument strings, prepare rest of stack in ustack.
+ for(argc = 0; argv[argc]; argc++) {
+ if(argc >= MAXARG)
+ goto bad;
+ sp -= strlen(argv[argc]) + 1;
+ sp -= sp % 16; // riscv sp must be 16-byte aligned
+ if(sp < stackbase)
+ goto bad;
+ if(copyout(pagetable, sp, argv[argc], strlen(argv[argc]) + 1) < 0)
+ goto bad;
+ ustack[argc] = sp;
+ }
+ ustack[argc] = 0;
+
+ // push the array of argv[] pointers.
+ sp -= (argc+1) * sizeof(uint64);
+ sp -= sp % 16;
+ if(sp < stackbase)
+ goto bad;
+ if(copyout(pagetable, sp, (char *)ustack, (argc+1)*sizeof(uint64)) < 0)
+ goto bad;
+
+ // arguments to user main(argc, argv)
+ // argc is returned via the system call return
+ // value, which goes in a0.
+ p->tf->a1 = sp;
+
+ // Save program name for debugging.
+ for(last=s=path; *s; s++)
+ if(*s == '/')
+ last = s+1;
+ safestrcpy(p->name, last, sizeof(p->name));
+
+ // Commit to the user image.
+ oldpagetable = p->pagetable;
+ p->pagetable = pagetable;
+ p->sz = sz;
+ p->tf->epc = elf.entry; // initial program counter = main
+ p->tf->sp = sp; // initial stack pointer
+ proc_freepagetable(oldpagetable, oldsz);
+ return argc; // this ends up in a0, the first argument to main(argc, argv)
+
+ bad:
+ if(pagetable)
+ proc_freepagetable(pagetable, sz);
+ if(ip){
+ iunlockput(ip);
+ end_op();
+ }
+ return -1;
+}
+
+// Load a program segment into pagetable at virtual address va.
+// va must be page-aligned
+// and the pages from va to va+sz must already be mapped.
+// Returns 0 on success, -1 on failure.
+static int
+loadseg(pagetable_t pagetable, uint64 va, struct inode *ip, uint offset, uint sz)
+{
+ uint i, n;
+ uint64 pa;
+
+ if((va % PGSIZE) != 0)
+ panic("loadseg: va must be page aligned");
+
+ for(i = 0; i < sz; i += PGSIZE){
+ pa = walkaddr(pagetable, va + i);
+ if(pa == 0)
+ panic("loadseg: address should exist");
+ if(sz - i < PGSIZE)
+ n = sz - i;
+ else
+ n = PGSIZE;
+ if(readi(ip, 0, (uint64)pa, offset+i, n) != n)
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/kernel/fcntl.h b/kernel/fcntl.h
new file mode 100644
index 0000000..d565483
--- /dev/null
+++ b/kernel/fcntl.h
@@ -0,0 +1,4 @@
+#define O_RDONLY 0x000
+#define O_WRONLY 0x001
+#define O_RDWR 0x002
+#define O_CREATE 0x200
diff --git a/kernel/file.c b/kernel/file.c
new file mode 100644
index 0000000..7663476
--- /dev/null
+++ b/kernel/file.c
@@ -0,0 +1,178 @@
+//
+// Support functions for system calls that involve file descriptors.
+//
+
+#include "types.h"
+#include "riscv.h"
+#include "defs.h"
+#include "param.h"
+#include "fs.h"
+#include "spinlock.h"
+#include "sleeplock.h"
+#include "file.h"
+#include "stat.h"
+#include "proc.h"
+
+struct devsw devsw[NDEV];
+struct {
+ struct spinlock lock;
+ struct file file[NFILE];
+} ftable;
+
+void
+fileinit(void)
+{
+ initlock(&ftable.lock, "ftable");
+}
+
+// Allocate a file structure.
+struct file*
+filealloc(void)
+{
+ struct file *f;
+
+ acquire(&ftable.lock);
+ for(f = ftable.file; f < ftable.file + NFILE; f++){
+ if(f->ref == 0){
+ f->ref = 1;
+ release(&ftable.lock);
+ return f;
+ }
+ }
+ release(&ftable.lock);
+ return 0;
+}
+
+// Increment ref count for file f.
+struct file*
+filedup(struct file *f)
+{
+ acquire(&ftable.lock);
+ if(f->ref < 1)
+ panic("filedup");
+ f->ref++;
+ release(&ftable.lock);
+ return f;
+}
+
+// Close file f. (Decrement ref count, close when reaches 0.)
+void
+fileclose(struct file *f)
+{
+ struct file ff;
+
+ acquire(&ftable.lock);
+ if(f->ref < 1)
+ panic("fileclose");
+ if(--f->ref > 0){
+ release(&ftable.lock);
+ return;
+ }
+ ff = *f;
+ f->ref = 0;
+ f->type = FD_NONE;
+ release(&ftable.lock);
+
+ if(ff.type == FD_PIPE){
+ pipeclose(ff.pipe, ff.writable);
+ } else if(ff.type == FD_INODE || ff.type == FD_DEVICE){
+ begin_op();
+ iput(ff.ip);
+ end_op();
+ }
+}
+
+// Get metadata about file f.
+// addr is a user virtual address, pointing to a struct stat.
+int
+filestat(struct file *f, uint64 addr)
+{
+ struct proc *p = myproc();
+ struct stat st;
+
+ if(f->type == FD_INODE || f->type == FD_DEVICE){
+ ilock(f->ip);
+ stati(f->ip, &st);
+ iunlock(f->ip);
+ if(copyout(p->pagetable, addr, (char *)&st, sizeof(st)) < 0)
+ return -1;
+ return 0;
+ }
+ return -1;
+}
+
+// Read from file f.
+// addr is a user virtual address.
+int
+fileread(struct file *f, uint64 addr, int n)
+{
+ int r = 0;
+
+ if(f->readable == 0)
+ return -1;
+
+ if(f->type == FD_PIPE){
+ r = piperead(f->pipe, addr, n);
+ } else if(f->type == FD_DEVICE){
+ r = devsw[f->major].read(1, addr, n);
+ } else if(f->type == FD_INODE){
+ ilock(f->ip);
+ if((r = readi(f->ip, 1, addr, f->off, n)) > 0)
+ f->off += r;
+ iunlock(f->ip);
+ } else {
+ panic("fileread");
+ }
+
+ return r;
+}
+
+// Write to file f.
+// addr is a user virtual address.
+int
+filewrite(struct file *f, uint64 addr, int n)
+{
+ int r, ret = 0;
+
+ if(f->writable == 0)
+ return -1;
+
+ if(f->type == FD_PIPE){
+ ret = pipewrite(f->pipe, addr, n);
+ } else if(f->type == FD_DEVICE){
+ ret = devsw[f->major].write(1, addr, n);
+ } else if(f->type == FD_INODE){
+ // write a few blocks at a time to avoid exceeding
+ // the maximum log transaction size, including
+ // i-node, indirect block, allocation blocks,
+ // and 2 blocks of slop for non-aligned writes.
+ // this really belongs lower down, since writei()
+ // might be writing a device like the console.
+ int max = ((MAXOPBLOCKS-1-1-2) / 2) * 512;
+ int i = 0;
+ while(i < n){
+ int n1 = n - i;
+ if(n1 > max)
+ n1 = max;
+
+ begin_op();
+ ilock(f->ip);
+ if ((r = writei(f->ip, 1, addr + i, f->off, n1)) > 0)
+ f->off += r;
+ iunlock(f->ip);
+ end_op();
+
+ if(r < 0)
+ break;
+ if(r != n1)
+ panic("short filewrite");
+ i += r;
+ }
+ ret = (i == n ? n : -1);
+ } else {
+ panic("filewrite");
+ }
+
+ return ret;
+}
+
diff --git a/kernel/file.h b/kernel/file.h
new file mode 100644
index 0000000..5cf15a2
--- /dev/null
+++ b/kernel/file.h
@@ -0,0 +1,37 @@
+struct file {
+ enum { FD_NONE, FD_PIPE, FD_INODE, FD_DEVICE } type;
+ int ref; // reference count
+ char readable;
+ char writable;
+ struct pipe *pipe; // FD_PIPE
+ struct inode *ip; // FD_INODE and FD_DEVICE
+ uint off; // FD_INODE
+ short major; // FD_DEVICE
+};
+
+
+// in-memory copy of an inode
+struct inode {
+ uint dev; // Device number
+ uint inum; // Inode number
+ int ref; // Reference count
+ struct sleeplock lock; // protects everything below here
+ int valid; // inode has been read from disk?
+
+ short type; // copy of disk inode
+ short major;
+ short minor;
+ short nlink;
+ uint size;
+ uint addrs[NDIRECT+1];
+};
+
+// map major device number to device functions.
+struct devsw {
+ int (*read)(int, uint64, int);
+ int (*write)(int, uint64, int);
+};
+
+extern struct devsw devsw[];
+
+#define CONSOLE 1
diff --git a/kernel/fs.c b/kernel/fs.c
new file mode 100644
index 0000000..3a74c1f
--- /dev/null
+++ b/kernel/fs.c
@@ -0,0 +1,667 @@
+// File system implementation. Five layers:
+// + Blocks: allocator for raw disk blocks.
+// + Log: crash recovery for multi-step updates.
+// + Files: inode allocator, reading, writing, metadata.
+// + Directories: inode with special contents (list of other inodes!)
+// + Names: paths like /usr/rtm/xv6/fs.c for convenient naming.
+//
+// This file contains the low-level file system manipulation
+// routines. The (higher-level) system call implementations
+// are in sysfile.c.
+
+#include "types.h"
+#include "riscv.h"
+#include "defs.h"
+#include "param.h"
+#include "stat.h"
+#include "spinlock.h"
+#include "proc.h"
+#include "sleeplock.h"
+#include "fs.h"
+#include "buf.h"
+#include "file.h"
+
+#define min(a, b) ((a) < (b) ? (a) : (b))
+static void itrunc(struct inode*);
+// there should be one superblock per disk device, but we run with
+// only one device
+struct superblock sb;
+
+// Read the super block.
+static void
+readsb(int dev, struct superblock *sb)
+{
+ struct buf *bp;
+
+ bp = bread(dev, 1);
+ memmove(sb, bp->data, sizeof(*sb));
+ brelse(bp);
+}
+
+// Init fs
+void
+fsinit(int dev) {
+ readsb(dev, &sb);
+ if(sb.magic != FSMAGIC)
+ panic("invalid file system");
+ initlog(dev, &sb);
+}
+
+// Zero a block.
+static void
+bzero(int dev, int bno)
+{
+ struct buf *bp;
+
+ bp = bread(dev, bno);
+ memset(bp->data, 0, BSIZE);
+ log_write(bp);
+ brelse(bp);
+}
+
+// Blocks.
+
+// Allocate a zeroed disk block.
+static uint
+balloc(uint dev)
+{
+ int b, bi, m;
+ struct buf *bp;
+
+ bp = 0;
+ for(b = 0; b < sb.size; b += BPB){
+ bp = bread(dev, BBLOCK(b, sb));
+ for(bi = 0; bi < BPB && b + bi < sb.size; bi++){
+ m = 1 << (bi % 8);
+ if((bp->data[bi/8] & m) == 0){ // Is block free?
+ bp->data[bi/8] |= m; // Mark block in use.
+ log_write(bp);
+ brelse(bp);
+ bzero(dev, b + bi);
+ return b + bi;
+ }
+ }
+ brelse(bp);
+ }
+ panic("balloc: out of blocks");
+}
+
+// Free a disk block.
+static void
+bfree(int dev, uint b)
+{
+ struct buf *bp;
+ int bi, m;
+
+ readsb(dev, &sb);
+ bp = bread(dev, BBLOCK(b, sb));
+ bi = b % BPB;
+ m = 1 << (bi % 8);
+ if((bp->data[bi/8] & m) == 0)
+ panic("freeing free block");
+ bp->data[bi/8] &= ~m;
+ log_write(bp);
+ brelse(bp);
+}
+
+// Inodes.
+//
+// An inode describes a single unnamed file.
+// The inode disk structure holds metadata: the file's type,
+// its size, the number of links referring to it, and the
+// list of blocks holding the file's content.
+//
+// The inodes are laid out sequentially on disk at
+// sb.startinode. Each inode has a number, indicating its
+// position on the disk.
+//
+// The kernel keeps a cache of in-use inodes in memory
+// to provide a place for synchronizing access
+// to inodes used by multiple processes. The cached
+// inodes include book-keeping information that is
+// not stored on disk: ip->ref and ip->valid.
+//
+// An inode and its in-memory representation go through a
+// sequence of states before they can be used by the
+// rest of the file system code.
+//
+// * Allocation: an inode is allocated if its type (on disk)
+// is non-zero. ialloc() allocates, and iput() frees if
+// the reference and link counts have fallen to zero.
+//
+// * Referencing in cache: an entry in the inode cache
+// is free if ip->ref is zero. Otherwise ip->ref tracks
+// the number of in-memory pointers to the entry (open
+// files and current directories). iget() finds or
+// creates a cache entry and increments its ref; iput()
+// decrements ref.
+//
+// * Valid: the information (type, size, &c) in an inode
+// cache entry is only correct when ip->valid is 1.
+// ilock() reads the inode from
+// the disk and sets ip->valid, while iput() clears
+// ip->valid if ip->ref has fallen to zero.
+//
+// * Locked: file system code may only examine and modify
+// the information in an inode and its content if it
+// has first locked the inode.
+//
+// Thus a typical sequence is:
+// ip = iget(dev, inum)
+// ilock(ip)
+// ... examine and modify ip->xxx ...
+// iunlock(ip)
+// iput(ip)
+//
+// ilock() is separate from iget() so that system calls can
+// get a long-term reference to an inode (as for an open file)
+// and only lock it for short periods (e.g., in read()).
+// The separation also helps avoid deadlock and races during
+// pathname lookup. iget() increments ip->ref so that the inode
+// stays cached and pointers to it remain valid.
+//
+// Many internal file system functions expect the caller to
+// have locked the inodes involved; this lets callers create
+// multi-step atomic operations.
+//
+// The icache.lock spin-lock protects the allocation of icache
+// entries. Since ip->ref indicates whether an entry is free,
+// and ip->dev and ip->inum indicate which i-node an entry
+// holds, one must hold icache.lock while using any of those fields.
+//
+// An ip->lock sleep-lock protects all ip-> fields other than ref,
+// dev, and inum. One must hold ip->lock in order to
+// read or write that inode's ip->valid, ip->size, ip->type, &c.
+
+struct {
+ struct spinlock lock;
+ struct inode inode[NINODE];
+} icache;
+
+void
+iinit()
+{
+ int i = 0;
+
+ initlock(&icache.lock, "icache");
+ for(i = 0; i < NINODE; i++) {
+ initsleeplock(&icache.inode[i].lock, "inode");
+ }
+}
+
+static struct inode* iget(uint dev, uint inum);
+
+// Allocate an inode on device dev.
+// Mark it as allocated by giving it type type.
+// Returns an unlocked but allocated and referenced inode.
+struct inode*
+ialloc(uint dev, short type)
+{
+ int inum;
+ struct buf *bp;
+ struct dinode *dip;
+
+ for(inum = 1; inum < sb.ninodes; inum++){
+ bp = bread(dev, IBLOCK(inum, sb));
+ dip = (struct dinode*)bp->data + inum%IPB;
+ if(dip->type == 0){ // a free inode
+ memset(dip, 0, sizeof(*dip));
+ dip->type = type;
+ log_write(bp); // mark it allocated on the disk
+ brelse(bp);
+ return iget(dev, inum);
+ }
+ brelse(bp);
+ }
+ panic("ialloc: no inodes");
+}
+
+// Copy a modified in-memory inode to disk.
+// Must be called after every change to an ip->xxx field
+// that lives on disk, since i-node cache is write-through.
+// Caller must hold ip->lock.
+void
+iupdate(struct inode *ip)
+{
+ struct buf *bp;
+ struct dinode *dip;
+
+ bp = bread(ip->dev, IBLOCK(ip->inum, sb));
+ dip = (struct dinode*)bp->data + ip->inum%IPB;
+ dip->type = ip->type;
+ dip->major = ip->major;
+ dip->minor = ip->minor;
+ dip->nlink = ip->nlink;
+ dip->size = ip->size;
+ memmove(dip->addrs, ip->addrs, sizeof(ip->addrs));
+ log_write(bp);
+ brelse(bp);
+}
+
+// Find the inode with number inum on device dev
+// and return the in-memory copy. Does not lock
+// the inode and does not read it from disk.
+static struct inode*
+iget(uint dev, uint inum)
+{
+ struct inode *ip, *empty;
+
+ acquire(&icache.lock);
+
+ // Is the inode already cached?
+ empty = 0;
+ for(ip = &icache.inode[0]; ip < &icache.inode[NINODE]; ip++){
+ if(ip->ref > 0 && ip->dev == dev && ip->inum == inum){
+ ip->ref++;
+ release(&icache.lock);
+ return ip;
+ }
+ if(empty == 0 && ip->ref == 0) // Remember empty slot.
+ empty = ip;
+ }
+
+ // Recycle an inode cache entry.
+ if(empty == 0)
+ panic("iget: no inodes");
+
+ ip = empty;
+ ip->dev = dev;
+ ip->inum = inum;
+ ip->ref = 1;
+ ip->valid = 0;
+ release(&icache.lock);
+
+ return ip;
+}
+
+// Increment reference count for ip.
+// Returns ip to enable ip = idup(ip1) idiom.
+struct inode*
+idup(struct inode *ip)
+{
+ acquire(&icache.lock);
+ ip->ref++;
+ release(&icache.lock);
+ return ip;
+}
+
+// Lock the given inode.
+// Reads the inode from disk if necessary.
+void
+ilock(struct inode *ip)
+{
+ struct buf *bp;
+ struct dinode *dip;
+
+ if(ip == 0 || ip->ref < 1)
+ panic("ilock");
+
+ acquiresleep(&ip->lock);
+
+ if(ip->valid == 0){
+ bp = bread(ip->dev, IBLOCK(ip->inum, sb));
+ dip = (struct dinode*)bp->data + ip->inum%IPB;
+ ip->type = dip->type;
+ ip->major = dip->major;
+ ip->minor = dip->minor;
+ ip->nlink = dip->nlink;
+ ip->size = dip->size;
+ memmove(ip->addrs, dip->addrs, sizeof(ip->addrs));
+ brelse(bp);
+ ip->valid = 1;
+ if(ip->type == 0)
+ panic("ilock: no type");
+ }
+}
+
+// Unlock the given inode.
+void
+iunlock(struct inode *ip)
+{
+ if(ip == 0 || !holdingsleep(&ip->lock) || ip->ref < 1)
+ panic("iunlock");
+
+ releasesleep(&ip->lock);
+}
+
+// Drop a reference to an in-memory inode.
+// If that was the last reference, the inode cache entry can
+// be recycled.
+// If that was the last reference and the inode has no links
+// to it, free the inode (and its content) on disk.
+// All calls to iput() must be inside a transaction in
+// case it has to free the inode.
+void
+iput(struct inode *ip)
+{
+ acquire(&icache.lock);
+
+ if(ip->ref == 1 && ip->valid && ip->nlink == 0){
+ // inode has no links and no other references: truncate and free.
+
+ // ip->ref == 1 means no other process can have ip locked,
+ // so this acquiresleep() won't block (or deadlock).
+ acquiresleep(&ip->lock);
+
+ release(&icache.lock);
+
+ itrunc(ip);
+ ip->type = 0;
+ iupdate(ip);
+ ip->valid = 0;
+
+ releasesleep(&ip->lock);
+
+ acquire(&icache.lock);
+ }
+
+ ip->ref--;
+ release(&icache.lock);
+}
+
+// Common idiom: unlock, then put.
+void
+iunlockput(struct inode *ip)
+{
+ iunlock(ip);
+ iput(ip);
+}
+
+// Inode content
+//
+// The content (data) associated with each inode is stored
+// in blocks on the disk. The first NDIRECT block numbers
+// are listed in ip->addrs[]. The next NINDIRECT blocks are
+// listed in block ip->addrs[NDIRECT].
+
+// Return the disk block address of the nth block in inode ip.
+// If there is no such block, bmap allocates one.
+static uint
+bmap(struct inode *ip, uint bn)
+{
+ uint addr, *a;
+ struct buf *bp;
+
+ if(bn < NDIRECT){
+ if((addr = ip->addrs[bn]) == 0)
+ ip->addrs[bn] = addr = balloc(ip->dev);
+ return addr;
+ }
+ bn -= NDIRECT;
+
+ if(bn < NINDIRECT){
+ // Load indirect block, allocating if necessary.
+ if((addr = ip->addrs[NDIRECT]) == 0)
+ ip->addrs[NDIRECT] = addr = balloc(ip->dev);
+ bp = bread(ip->dev, addr);
+ a = (uint*)bp->data;
+ if((addr = a[bn]) == 0){
+ a[bn] = addr = balloc(ip->dev);
+ log_write(bp);
+ }
+ brelse(bp);
+ return addr;
+ }
+
+ panic("bmap: out of range");
+}
+
+// Truncate inode (discard contents).
+// Only called when the inode has no links
+// to it (no directory entries referring to it)
+// and has no in-memory reference to it (is
+// not an open file or current directory).
+static void
+itrunc(struct inode *ip)
+{
+ int i, j;
+ struct buf *bp;
+ uint *a;
+
+ for(i = 0; i < NDIRECT; i++){
+ if(ip->addrs[i]){
+ bfree(ip->dev, ip->addrs[i]);
+ ip->addrs[i] = 0;
+ }
+ }
+
+ if(ip->addrs[NDIRECT]){
+ bp = bread(ip->dev, ip->addrs[NDIRECT]);
+ a = (uint*)bp->data;
+ for(j = 0; j < NINDIRECT; j++){
+ if(a[j])
+ bfree(ip->dev, a[j]);
+ }
+ brelse(bp);
+ bfree(ip->dev, ip->addrs[NDIRECT]);
+ ip->addrs[NDIRECT] = 0;
+ }
+
+ ip->size = 0;
+ iupdate(ip);
+}
+
+// Copy stat information from inode.
+// Caller must hold ip->lock.
+void
+stati(struct inode *ip, struct stat *st)
+{
+ st->dev = ip->dev;
+ st->ino = ip->inum;
+ st->type = ip->type;
+ st->nlink = ip->nlink;
+ st->size = ip->size;
+}
+
+// Read data from inode.
+// Caller must hold ip->lock.
+// If user_dst==1, then dst is a user virtual address;
+// otherwise, dst is a kernel address.
+int
+readi(struct inode *ip, int user_dst, uint64 dst, uint off, uint n)
+{
+ uint tot, m;
+ struct buf *bp;
+
+ if(off > ip->size || off + n < off)
+ return -1;
+ if(off + n > ip->size)
+ n = ip->size - off;
+
+ for(tot=0; tot<n; tot+=m, off+=m, dst+=m){
+ bp = bread(ip->dev, bmap(ip, off/BSIZE));
+ m = min(n - tot, BSIZE - off%BSIZE);
+ if(either_copyout(user_dst, dst, bp->data + (off % BSIZE), m) == -1)
+ break;
+ brelse(bp);
+ }
+ return n;
+}
+
+// Write data to inode.
+// Caller must hold ip->lock.
+// If user_src==1, then src is a user virtual address;
+// otherwise, src is a kernel address.
+int
+writei(struct inode *ip, int user_src, uint64 src, uint off, uint n)
+{
+ uint tot, m;
+ struct buf *bp;
+
+ if(off > ip->size || off + n < off)
+ return -1;
+ if(off + n > MAXFILE*BSIZE)
+ return -1;
+
+ for(tot=0; tot<n; tot+=m, off+=m, src+=m){
+ bp = bread(ip->dev, bmap(ip, off/BSIZE));
+ m = min(n - tot, BSIZE - off%BSIZE);
+ if(either_copyin(bp->data + (off % BSIZE), user_src, src, m) == -1)
+ break;
+ log_write(bp);
+ brelse(bp);
+ }
+
+ if(n > 0 && off > ip->size){
+ ip->size = off;
+ iupdate(ip);
+ }
+ return n;
+}
+
+// Directories
+
+int
+namecmp(const char *s, const char *t)
+{
+ return strncmp(s, t, DIRSIZ);
+}
+
+// Look for a directory entry in a directory.
+// If found, set *poff to byte offset of entry.
+struct inode*
+dirlookup(struct inode *dp, char *name, uint *poff)
+{
+ uint off, inum;
+ struct dirent de;
+
+ if(dp->type != T_DIR)
+ panic("dirlookup not DIR");
+
+ for(off = 0; off < dp->size; off += sizeof(de)){
+ if(readi(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de))
+ panic("dirlookup read");
+ if(de.inum == 0)
+ continue;
+ if(namecmp(name, de.name) == 0){
+ // entry matches path element
+ if(poff)
+ *poff = off;
+ inum = de.inum;
+ return iget(dp->dev, inum);
+ }
+ }
+
+ return 0;
+}
+
+// Write a new directory entry (name, inum) into the directory dp.
+int
+dirlink(struct inode *dp, char *name, uint inum)
+{
+ int off;
+ struct dirent de;
+ struct inode *ip;
+
+ // Check that name is not present.
+ if((ip = dirlookup(dp, name, 0)) != 0){
+ iput(ip);
+ return -1;
+ }
+
+ // Look for an empty dirent.
+ for(off = 0; off < dp->size; off += sizeof(de)){
+ if(readi(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de))
+ panic("dirlink read");
+ if(de.inum == 0)
+ break;
+ }
+
+ strncpy(de.name, name, DIRSIZ);
+ de.inum = inum;
+ if(writei(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de))
+ panic("dirlink");
+
+ return 0;
+}
+
+// Paths
+
+// Copy the next path element from path into name.
+// Return a pointer to the element following the copied one.
+// The returned path has no leading slashes,
+// so the caller can check *path=='\0' to see if the name is the last one.
+// If no name to remove, return 0.
+//
+// Examples:
+// skipelem("a/bb/c", name) = "bb/c", setting name = "a"
+// skipelem("///a//bb", name) = "bb", setting name = "a"
+// skipelem("a", name) = "", setting name = "a"
+// skipelem("", name) = skipelem("////", name) = 0
+//
+static char*
+skipelem(char *path, char *name)
+{
+ char *s;
+ int len;
+
+ while(*path == '/')
+ path++;
+ if(*path == 0)
+ return 0;
+ s = path;
+ while(*path != '/' && *path != 0)
+ path++;
+ len = path - s;
+ if(len >= DIRSIZ)
+ memmove(name, s, DIRSIZ);
+ else {
+ memmove(name, s, len);
+ name[len] = 0;
+ }
+ while(*path == '/')
+ path++;
+ return path;
+}
+
+// Look up and return the inode for a path name.
+// If parent != 0, return the inode for the parent and copy the final
+// path element into name, which must have room for DIRSIZ bytes.
+// Must be called inside a transaction since it calls iput().
+static struct inode*
+namex(char *path, int nameiparent, char *name)
+{
+ struct inode *ip, *next;
+
+ if(*path == '/')
+ ip = iget(ROOTDEV, ROOTINO);
+ else
+ ip = idup(myproc()->cwd);
+
+ while((path = skipelem(path, name)) != 0){
+ ilock(ip);
+ if(ip->type != T_DIR){
+ iunlockput(ip);
+ return 0;
+ }
+ if(nameiparent && *path == '\0'){
+ // Stop one level early.
+ iunlock(ip);
+ return ip;
+ }
+ if((next = dirlookup(ip, name, 0)) == 0){
+ iunlockput(ip);
+ return 0;
+ }
+ iunlockput(ip);
+ ip = next;
+ }
+ if(nameiparent){
+ iput(ip);
+ return 0;
+ }
+ return ip;
+}
+
+struct inode*
+namei(char *path)
+{
+ char name[DIRSIZ];
+ return namex(path, 0, name);
+}
+
+struct inode*
+nameiparent(char *path, char *name)
+{
+ return namex(path, 1, name);
+}
diff --git a/kernel/fs.h b/kernel/fs.h
new file mode 100644
index 0000000..139dcc9
--- /dev/null
+++ b/kernel/fs.h
@@ -0,0 +1,60 @@
+// On-disk file system format.
+// Both the kernel and user programs use this header file.
+
+
+#define ROOTINO 1 // root i-number
+#define BSIZE 1024 // block size
+
+// Disk layout:
+// [ boot block | super block | log | inode blocks |
+// free bit map | data blocks]
+//
+// mkfs computes the super block and builds an initial file system. The
+// super block describes the disk layout:
+struct superblock {
+ uint magic; // Must be FSMAGIC
+ uint size; // Size of file system image (blocks)
+ uint nblocks; // Number of data blocks
+ uint ninodes; // Number of inodes.
+ uint nlog; // Number of log blocks
+ uint logstart; // Block number of first log block
+ uint inodestart; // Block number of first inode block
+ uint bmapstart; // Block number of first free map block
+};
+
+#define FSMAGIC 0x10203040
+
+#define NDIRECT 12
+#define NINDIRECT (BSIZE / sizeof(uint))
+#define MAXFILE (NDIRECT + NINDIRECT)
+
+// On-disk inode structure
+struct dinode {
+ short type; // File type
+ short major; // Major device number (T_DEVICE only)
+ short minor; // Minor device number (T_DEVICE only)
+ short nlink; // Number of links to inode in file system
+ uint size; // Size of file (bytes)
+ uint addrs[NDIRECT+1]; // Data block addresses
+};
+
+// Inodes per block.
+#define IPB (BSIZE / sizeof(struct dinode))
+
+// Block containing inode i
+#define IBLOCK(i, sb) ((i) / IPB + sb.inodestart)
+
+// Bitmap bits per block
+#define BPB (BSIZE*8)
+
+// Block of free map containing bit for block b
+#define BBLOCK(b, sb) ((b)/BPB + sb.bmapstart)
+
+// Directory is a file containing a sequence of dirent structures.
+#define DIRSIZ 14
+
+struct dirent {
+ ushort inum;
+ char name[DIRSIZ];
+};
+
diff --git a/kernel/kalloc.c b/kernel/kalloc.c
new file mode 100644
index 0000000..ae3863b
--- /dev/null
+++ b/kernel/kalloc.c
@@ -0,0 +1,83 @@
+// Physical memory allocator, for user processes,
+// kernel stacks, page-table pages,
+// and pipe buffers. Allocates whole 4096-byte pages.
+
+#include "types.h"
+#include "param.h"
+#include "memlayout.h"
+#include "spinlock.h"
+#include "riscv.h"
+#include "defs.h"
+
+void freerange(void *pa_start, void *pa_end);
+
+extern char end[]; // first address after kernel.
+ // defined by kernel.ld.
+
+struct run {
+ struct run *next;
+};
+
+struct {
+ struct spinlock lock;
+ struct run *freelist;
+} kmem;
+
+void
+kinit()
+{
+ initlock(&kmem.lock, "kmem");
+ freerange(end, (void*)PHYSTOP);
+}
+
+void
+freerange(void *pa_start, void *pa_end)
+{
+ char *p;
+ p = (char*)PGROUNDUP((uint64)pa_start);
+ p += 4096; // XXX I can't get kernel.ld to place end beyond the last bss symbol.
+ for(; p + PGSIZE <= (char*)pa_end; p += PGSIZE)
+ kfree(p);
+}
+
+// Free the page of physical memory pointed at by v,
+// which normally should have been returned by a
+// call to kalloc(). (The exception is when
+// initializing the allocator; see kinit above.)
+void
+kfree(void *pa)
+{
+ struct run *r;
+
+ if(((uint64)pa % PGSIZE) != 0 || (char*)pa < end || (uint64)pa >= PHYSTOP)
+ panic("kfree");
+
+ // Fill with junk to catch dangling refs.
+ memset(pa, 1, PGSIZE);
+
+ r = (struct run*)pa;
+
+ acquire(&kmem.lock);
+ r->next = kmem.freelist;
+ kmem.freelist = r;
+ release(&kmem.lock);
+}
+
+// Allocate one 4096-byte page of physical memory.
+// Returns a pointer that the kernel can use.
+// Returns 0 if the memory cannot be allocated.
+void *
+kalloc(void)
+{
+ struct run *r;
+
+ acquire(&kmem.lock);
+ r = kmem.freelist;
+ if(r)
+ kmem.freelist = r->next;
+ release(&kmem.lock);
+
+ if(r)
+ memset((char*)r, 5, PGSIZE); // fill with junk
+ return (void*)r;
+}
diff --git a/kernel/kernel.ld b/kernel/kernel.ld
new file mode 100644
index 0000000..0b5e76b
--- /dev/null
+++ b/kernel/kernel.ld
@@ -0,0 +1,32 @@
+OUTPUT_ARCH( "riscv" )
+ENTRY( _entry )
+
+SECTIONS
+{
+ /*
+ * ensure that entry.S / _entry is at 0x80000000,
+ * where qemu's -kernel jumps.
+ */
+ . = 0x80000000;
+ .text :
+ {
+ *(.text)
+ . = ALIGN(0x1000);
+ *(trampsec)
+ }
+
+ . = ALIGN(0x1000);
+ PROVIDE(etext = .);
+
+ /*
+ * make sure end is after data and bss.
+ */
+ .data : {
+ *(.data)
+ }
+ bss : {
+ *(.bss)
+ PROVIDE(end = .);
+ }
+
+}
diff --git a/kernel/kernelvec.S b/kernel/kernelvec.S
new file mode 100644
index 0000000..3e9d3e9
--- /dev/null
+++ b/kernel/kernelvec.S
@@ -0,0 +1,121 @@
+ #
+ # interrupts and exceptions while in supervisor
+ # mode come here.
+ #
+ # push all registers, call kerneltrap(), restore, return.
+ #
+.globl kerneltrap
+.globl kernelvec
+.align 4
+kernelvec:
+ // make room to save registers.
+ addi sp, sp, -256
+
+ // save the registers.
+ sd ra, 0(sp)
+ sd sp, 8(sp)
+ sd gp, 16(sp)
+ sd tp, 24(sp)
+ sd t0, 32(sp)
+ sd t1, 40(sp)
+ sd t2, 48(sp)
+ sd s0, 56(sp)
+ sd s1, 64(sp)
+ sd a0, 72(sp)
+ sd a1, 80(sp)
+ sd a2, 88(sp)
+ sd a3, 96(sp)
+ sd a4, 104(sp)
+ sd a5, 112(sp)
+ sd a6, 120(sp)
+ sd a7, 128(sp)
+ sd s2, 136(sp)
+ sd s3, 144(sp)
+ sd s4, 152(sp)
+ sd s5, 160(sp)
+ sd s6, 168(sp)
+ sd s7, 176(sp)
+ sd s8, 184(sp)
+ sd s9, 192(sp)
+ sd s10, 200(sp)
+ sd s11, 208(sp)
+ sd t3, 216(sp)
+ sd t4, 224(sp)
+ sd t5, 232(sp)
+ sd t6, 240(sp)
+
+ // call the C trap handler in trap.c
+ call kerneltrap
+
+ // restore registers.
+ ld ra, 0(sp)
+ ld sp, 8(sp)
+ ld gp, 16(sp)
+ // not this, in case we moved CPUs: ld tp, 24(sp)
+ ld t0, 32(sp)
+ ld t1, 40(sp)
+ ld t2, 48(sp)
+ ld s0, 56(sp)
+ ld s1, 64(sp)
+ ld a0, 72(sp)
+ ld a1, 80(sp)
+ ld a2, 88(sp)
+ ld a3, 96(sp)
+ ld a4, 104(sp)
+ ld a5, 112(sp)
+ ld a6, 120(sp)
+ ld a7, 128(sp)
+ ld s2, 136(sp)
+ ld s3, 144(sp)
+ ld s4, 152(sp)
+ ld s5, 160(sp)
+ ld s6, 168(sp)
+ ld s7, 176(sp)
+ ld s8, 184(sp)
+ ld s9, 192(sp)
+ ld s10, 200(sp)
+ ld s11, 208(sp)
+ ld t3, 216(sp)
+ ld t4, 224(sp)
+ ld t5, 232(sp)
+ ld t6, 240(sp)
+
+ addi sp, sp, 256
+
+ // return to whatever we were doing in the kernel.
+ sret
+
+ #
+ # machine-mode timer interrupt.
+ #
+.globl timervec
+.align 4
+timervec:
+ # start.c has set up the memory that mscratch points to:
+ # scratch[0,8,16] : register save area.
+ # scratch[32] : address of CLINT's MTIMECMP register.
+ # scratch[40] : desired interval between interrupts.
+
+ csrrw a0, mscratch, a0
+ sd a1, 0(a0)
+ sd a2, 8(a0)
+ sd a3, 16(a0)
+
+ # schedule the next timer interrupt
+ # by adding interval to mtimecmp.
+ ld a1, 32(a0) # CLINT_MTIMECMP(hart)
+ ld a2, 40(a0) # interval
+ ld a3, 0(a1)
+ add a3, a3, a2
+ sd a3, 0(a1)
+
+ # raise a supervisor software interrupt.
+ li a1, 2
+ csrw sip, a1
+
+ ld a3, 16(a0)
+ ld a2, 8(a0)
+ ld a1, 0(a0)
+ csrrw a0, mscratch, a0
+
+ mret
diff --git a/kernel/log.c b/kernel/log.c
new file mode 100644
index 0000000..5e884bb
--- /dev/null
+++ b/kernel/log.c
@@ -0,0 +1,235 @@
+#include "types.h"
+#include "riscv.h"
+#include "defs.h"
+#include "param.h"
+#include "spinlock.h"
+#include "sleeplock.h"
+#include "fs.h"
+#include "buf.h"
+
+// Simple logging that allows concurrent FS system calls.
+//
+// A log transaction contains the updates of multiple FS system
+// calls. The logging system only commits when there are
+// no FS system calls active. Thus there is never
+// any reasoning required about whether a commit might
+// write an uncommitted system call's updates to disk.
+//
+// A system call should call begin_op()/end_op() to mark
+// its start and end. Usually begin_op() just increments
+// the count of in-progress FS system calls and returns.
+// But if it thinks the log is close to running out, it
+// sleeps until the last outstanding end_op() commits.
+//
+// The log is a physical re-do log containing disk blocks.
+// The on-disk log format:
+// header block, containing block #s for block A, B, C, ...
+// block A
+// block B
+// block C
+// ...
+// Log appends are synchronous.
+
+// Contents of the header block, used for both the on-disk header block
+// and to keep track in memory of logged block# before commit.
+struct logheader {
+ int n;
+ int block[LOGSIZE];
+};
+
+struct log {
+ struct spinlock lock;
+ int start;
+ int size;
+ int outstanding; // how many FS sys calls are executing.
+ int committing; // in commit(), please wait.
+ int dev;
+ struct logheader lh;
+};
+struct log log;
+
+static void recover_from_log(void);
+static void commit();
+
+void
+initlog(int dev, struct superblock *sb)
+{
+ if (sizeof(struct logheader) >= BSIZE)
+ panic("initlog: too big logheader");
+
+ initlock(&log.lock, "log");
+ log.start = sb->logstart;
+ log.size = sb->nlog;
+ log.dev = dev;
+ recover_from_log();
+}
+
+// Copy committed blocks from log to their home location
+static void
+install_trans(void)
+{
+ int tail;
+
+ for (tail = 0; tail < log.lh.n; tail++) {
+ struct buf *lbuf = bread(log.dev, log.start+tail+1); // read log block
+ struct buf *dbuf = bread(log.dev, log.lh.block[tail]); // read dst
+ memmove(dbuf->data, lbuf->data, BSIZE); // copy block to dst
+ bwrite(dbuf); // write dst to disk
+ bunpin(dbuf);
+ brelse(lbuf);
+ brelse(dbuf);
+ }
+}
+
+// Read the log header from disk into the in-memory log header
+static void
+read_head(void)
+{
+ struct buf *buf = bread(log.dev, log.start);
+ struct logheader *lh = (struct logheader *) (buf->data);
+ int i;
+ log.lh.n = lh->n;
+ for (i = 0; i < log.lh.n; i++) {
+ log.lh.block[i] = lh->block[i];
+ }
+ brelse(buf);
+}
+
+// Write in-memory log header to disk.
+// This is the true point at which the
+// current transaction commits.
+static void
+write_head(void)
+{
+ struct buf *buf = bread(log.dev, log.start);
+ struct logheader *hb = (struct logheader *) (buf->data);
+ int i;
+ hb->n = log.lh.n;
+ for (i = 0; i < log.lh.n; i++) {
+ hb->block[i] = log.lh.block[i];
+ }
+ bwrite(buf);
+ brelse(buf);
+}
+
+static void
+recover_from_log(void)
+{
+ read_head();
+ install_trans(); // if committed, copy from log to disk
+ log.lh.n = 0;
+ write_head(); // clear the log
+}
+
+// called at the start of each FS system call.
+void
+begin_op(void)
+{
+ acquire(&log.lock);
+ while(1){
+ if(log.committing){
+ sleep(&log, &log.lock);
+ } else if(log.lh.n + (log.outstanding+1)*MAXOPBLOCKS > LOGSIZE){
+ // this op might exhaust log space; wait for commit.
+ sleep(&log, &log.lock);
+ } else {
+ log.outstanding += 1;
+ release(&log.lock);
+ break;
+ }
+ }
+}
+
+// called at the end of each FS system call.
+// commits if this was the last outstanding operation.
+void
+end_op(void)
+{
+ int do_commit = 0;
+
+ acquire(&log.lock);
+ log.outstanding -= 1;
+ if(log.committing)
+ panic("log.committing");
+ if(log.outstanding == 0){
+ do_commit = 1;
+ log.committing = 1;
+ } else {
+ // begin_op() may be waiting for log space,
+ // and decrementing log.outstanding has decreased
+ // the amount of reserved space.
+ wakeup(&log);
+ }
+ release(&log.lock);
+
+ if(do_commit){
+ // call commit w/o holding locks, since not allowed
+ // to sleep with locks.
+ commit();
+ acquire(&log.lock);
+ log.committing = 0;
+ wakeup(&log);
+ release(&log.lock);
+ }
+}
+
+// Copy modified blocks from cache to log.
+static void
+write_log(void)
+{
+ int tail;
+
+ for (tail = 0; tail < log.lh.n; tail++) {
+ struct buf *to = bread(log.dev, log.start+tail+1); // log block
+ struct buf *from = bread(log.dev, log.lh.block[tail]); // cache block
+ memmove(to->data, from->data, BSIZE);
+ bwrite(to); // write the log
+ brelse(from);
+ brelse(to);
+ }
+}
+
+static void
+commit()
+{
+ if (log.lh.n > 0) {
+ write_log(); // Write modified blocks from cache to log
+ write_head(); // Write header to disk -- the real commit
+ install_trans(); // Now install writes to home locations
+ log.lh.n = 0;
+ write_head(); // Erase the transaction from the log
+ }
+}
+
+// Caller has modified b->data and is done with the buffer.
+// Record the block number and pin in the cache by increasing refcnt.
+// commit()/write_log() will do the disk write.
+//
+// log_write() replaces bwrite(); a typical use is:
+// bp = bread(...)
+// modify bp->data[]
+// log_write(bp)
+// brelse(bp)
+void
+log_write(struct buf *b)
+{
+ int i;
+
+ if (log.lh.n >= LOGSIZE || log.lh.n >= log.size - 1)
+ panic("too big a transaction");
+ if (log.outstanding < 1)
+ panic("log_write outside of trans");
+
+ acquire(&log.lock);
+ for (i = 0; i < log.lh.n; i++) {
+ if (log.lh.block[i] == b->blockno) // log absorbtion
+ break;
+ }
+ log.lh.block[i] = b->blockno;
+ if (i == log.lh.n) { // Add new block to log?
+ bpin(b);
+ log.lh.n++;
+ }
+ release(&log.lock);
+}
+
diff --git a/kernel/main.c b/kernel/main.c
new file mode 100644
index 0000000..a936fd3
--- /dev/null
+++ b/kernel/main.c
@@ -0,0 +1,43 @@
+#include "types.h"
+#include "param.h"
+#include "memlayout.h"
+#include "riscv.h"
+#include "defs.h"
+
+volatile static int started = 0;
+
+// start() jumps here in supervisor mode on all CPUs.
+void
+main()
+{
+ if(cpuid() == 0){
+ consoleinit();
+ printfinit();
+ printf("hart %d starting\n", cpuid());
+ kinit(); // physical page allocator
+ kvminit(); // create kernel page table
+ kvminithart(); // turn on paging
+ procinit(); // process table
+ trapinit(); // trap vectors
+ trapinithart(); // install kernel trap vector
+ plicinit(); // set up interrupt controller
+ plicinithart(); // ask PLIC for device interrupts
+ binit(); // buffer cache
+ iinit(); // inode cache
+ fileinit(); // file table
+ virtio_disk_init(); // emulated hard disk
+ userinit(); // first user process
+ __sync_synchronize();
+ started = 1;
+ } else {
+ while(started == 0)
+ ;
+ __sync_synchronize();
+ printf("hart %d starting\n", cpuid());
+ kvminithart(); // turn on paging
+ trapinithart(); // install kernel trap vector
+ plicinithart(); // ask PLIC for device interrupts
+ }
+
+ scheduler();
+}
diff --git a/kernel/memlayout.h b/kernel/memlayout.h
new file mode 100644
index 0000000..8ffd538
--- /dev/null
+++ b/kernel/memlayout.h
@@ -0,0 +1,67 @@
+// Physical memory layout
+
+// qemu -machine virt is set up like this,
+// based on qemu's hw/riscv/virt.c:
+//
+// 00001000 -- boot ROM, provided by qemu
+// 02000000 -- CLINT
+// 0C000000 -- PLIC
+// 10000000 -- uart0
+// 10001000 -- virtio disk
+// 80000000 -- boot ROM jumps here in machine mode
+// -kernel loads the kernel here
+// unused RAM after 80000000.
+
+// the kernel uses physical memory thus:
+// 80000000 -- entry.S, then kernel text and data
+// end -- start of kernel page allocation area
+// PHYSTOP -- end RAM used by the kernel
+
+// qemu puts UART registers here in physical memory.
+#define UART0 0x10000000L
+#define UART0_IRQ 10
+
+// virtio mmio interface
+#define VIRTIO0 0x10001000
+#define VIRTIO0_IRQ 1
+
+// local interrupt controller, which contains the timer.
+#define CLINT 0x2000000L
+#define CLINT_MTIMECMP(hartid) (CLINT + 0x4000 + 8*(hartid))
+#define CLINT_MTIME (CLINT + 0xBFF8) // cycles since boot.
+
+// qemu puts programmable interrupt controller here.
+#define PLIC 0x0c000000L
+#define PLIC_PRIORITY (PLIC + 0x0)
+#define PLIC_PENDING (PLIC + 0x1000)
+#define PLIC_MENABLE(hart) (PLIC + 0x2000 + (hart)*0x100)
+#define PLIC_SENABLE(hart) (PLIC + 0x2080 + (hart)*0x100)
+#define PLIC_MPRIORITY(hart) (PLIC + 0x200000 + (hart)*0x2000)
+#define PLIC_SPRIORITY(hart) (PLIC + 0x201000 + (hart)*0x2000)
+#define PLIC_MCLAIM(hart) (PLIC + 0x200004 + (hart)*0x2000)
+#define PLIC_SCLAIM(hart) (PLIC + 0x201004 + (hart)*0x2000)
+
+// the kernel expects there to be RAM
+// for use by the kernel and user pages
+// from physical address 0x80000000 to PHYSTOP.
+#define KERNBASE 0x80000000L
+#define PHYSTOP (KERNBASE + 128*1024*1024)
+
+// map the trampoline page to the highest address,
+// in both user and kernel space.
+#define TRAMPOLINE (MAXVA - PGSIZE)
+
+// map kernel stacks beneath the trampoline,
+// each surrounded by invalid guard pages.
+#define KSTACK(p) (TRAMPOLINE - ((p)+1)* 2*PGSIZE)
+
+// User memory layout.
+// Address zero first:
+// text
+// original data and bss
+// fixed-size stack
+// expandable heap
+// ...
+// TRAPFRAME (p->tf, used by the trampoline)
+// TRAMPOLINE (the same page as in the kernel)
+#define TRAPFRAME (TRAMPOLINE - PGSIZE)
diff --git a/kernel/param.h b/kernel/param.h
new file mode 100644
index 0000000..b5fdcb2
--- /dev/null
+++ b/kernel/param.h
@@ -0,0 +1,13 @@
+#define NPROC 64 // maximum number of processes
+#define NCPU 8 // maximum number of CPUs
+#define NOFILE 16 // open files per process
+#define NFILE 100 // open files per system
+#define NINODE 50 // maximum number of active i-nodes
+#define NDEV 10 // maximum major device number
+#define ROOTDEV 1 // device number of file system root disk
+#define MAXARG 32 // max exec arguments
+#define MAXOPBLOCKS 10 // max # of blocks any FS op writes
+#define LOGSIZE (MAXOPBLOCKS*3) // max data blocks in on-disk log
+#define NBUF (MAXOPBLOCKS*3) // size of disk block cache
+#define FSSIZE 1000 // size of file system in blocks
+#define MAXPATH 128 // maximum file path name
diff --git a/kernel/pipe.c b/kernel/pipe.c
new file mode 100644
index 0000000..c3a8acf
--- /dev/null
+++ b/kernel/pipe.c
@@ -0,0 +1,127 @@
+#include "types.h"
+#include "riscv.h"
+#include "defs.h"
+#include "param.h"
+#include "spinlock.h"
+#include "proc.h"
+#include "fs.h"
+#include "sleeplock.h"
+#include "file.h"
+
+#define PIPESIZE 512
+
+struct pipe {
+ struct spinlock lock;
+ char data[PIPESIZE];
+ uint nread; // number of bytes read
+ uint nwrite; // number of bytes written
+ int readopen; // read fd is still open
+ int writeopen; // write fd is still open
+};
+
+int
+pipealloc(struct file **f0, struct file **f1)
+{
+ struct pipe *pi;
+
+ pi = 0;
+ *f0 = *f1 = 0;
+ if((*f0 = filealloc()) == 0 || (*f1 = filealloc()) == 0)
+ goto bad;
+ if((pi = (struct pipe*)kalloc()) == 0)
+ goto bad;
+ pi->readopen = 1;
+ pi->writeopen = 1;
+ pi->nwrite = 0;
+ pi->nread = 0;
+ initlock(&pi->lock, "pipe");
+ (*f0)->type = FD_PIPE;
+ (*f0)->readable = 1;
+ (*f0)->writable = 0;
+ (*f0)->pipe = pi;
+ (*f1)->type = FD_PIPE;
+ (*f1)->readable = 0;
+ (*f1)->writable = 1;
+ (*f1)->pipe = pi;
+ return 0;
+
+ bad:
+ if(pi)
+ kfree((char*)pi);
+ if(*f0)
+ fileclose(*f0);
+ if(*f1)
+ fileclose(*f1);
+ return -1;
+}
+
+void
+pipeclose(struct pipe *pi, int writable)
+{
+ acquire(&pi->lock);
+ if(writable){
+ pi->writeopen = 0;
+ wakeup(&pi->nread);
+ } else {
+ pi->readopen = 0;
+ wakeup(&pi->nwrite);
+ }
+ if(pi->readopen == 0 && pi->writeopen == 0){
+ release(&pi->lock);
+ kfree((char*)pi);
+ } else
+ release(&pi->lock);
+}
+
+int
+pipewrite(struct pipe *pi, uint64 addr, int n)
+{
+ int i;
+ char ch;
+ struct proc *pr = myproc();
+
+ acquire(&pi->lock);
+ for(i = 0; i < n; i++){
+ while(pi->nwrite == pi->nread + PIPESIZE){ //DOC: pipewrite-full
+ if(pi->readopen == 0 || myproc()->killed){
+ release(&pi->lock);
+ return -1;
+ }
+ wakeup(&pi->nread);
+ sleep(&pi->nwrite, &pi->lock); //DOC: pipewrite-sleep
+ }
+ if(copyin(pr->pagetable, &ch, addr + i, 1) == -1)
+ break;
+ pi->data[pi->nwrite++ % PIPESIZE] = ch;
+ }
+ wakeup(&pi->nread); //DOC: pipewrite-wakeup1
+ release(&pi->lock);
+ return n;
+}
+
+int
+piperead(struct pipe *pi, uint64 addr, int n)
+{
+ int i;
+ struct proc *pr = myproc();
+ char ch;
+
+ acquire(&pi->lock);
+ while(pi->nread == pi->nwrite && pi->writeopen){ //DOC: pipe-empty
+ if(myproc()->killed){
+ release(&pi->lock);
+ return -1;
+ }
+ sleep(&pi->nread, &pi->lock); //DOC: piperead-sleep
+ }
+ for(i = 0; i < n; i++){ //DOC: piperead-copy
+ if(pi->nread == pi->nwrite)
+ break;
+ ch = pi->data[pi->nread++ % PIPESIZE];
+ if(copyout(pr->pagetable, addr + i, &ch, 1) == -1)
+ break;
+ }
+ wakeup(&pi->nwrite); //DOC: piperead-wakeup
+ release(&pi->lock);
+ return i;
+}
diff --git a/kernel/plic.c b/kernel/plic.c
new file mode 100644
index 0000000..b569492
--- /dev/null
+++ b/kernel/plic.c
@@ -0,0 +1,62 @@
+#include "types.h"
+#include "param.h"
+#include "memlayout.h"
+#include "riscv.h"
+#include "defs.h"
+
+//
+// the riscv Platform Level Interrupt Controller (PLIC).
+//
+
+void
+plicinit(void)
+{
+ // set desired IRQ priorities non-zero (otherwise disabled).
+ *(uint32*)(PLIC + UART0_IRQ*4) = 1;
+ *(uint32*)(PLIC + VIRTIO0_IRQ*4) = 1;
+}
+
+void
+plicinithart(void)
+{
+ int hart = cpuid();
+
+ // set uart's enable bit for this hart's S-mode.
+ *(uint32*)PLIC_SENABLE(hart)= (1 << UART0_IRQ) | (1 << VIRTIO0_IRQ);
+
+ // set this hart's S-mode priority threshold to 0.
+ *(uint32*)PLIC_SPRIORITY(hart) = 0;
+}
+
+// return a bitmap of which IRQs are waiting
+// to be served.
+uint64
+plic_pending(void)
+{
+ uint64 mask;
+
+ //mask = *(uint32*)(PLIC + 0x1000);
+ //mask |= (uint64)*(uint32*)(PLIC + 0x1004) << 32;
+ mask = *(uint64*)PLIC_PENDING;
+
+ return mask;
+}
+
+// ask the PLIC what interrupt we should serve.
+int
+plic_claim(void)
+{
+ int hart = cpuid();
+ //int irq = *(uint32*)(PLIC + 0x201004);
+ int irq = *(uint32*)PLIC_SCLAIM(hart);
+ return irq;
+}
+
+// tell the PLIC we've served this IRQ.
+void
+plic_complete(int irq)
+{
+ int hart = cpuid();
+ //*(uint32*)(PLIC + 0x201004) = irq;
+ *(uint32*)PLIC_SCLAIM(hart) = irq;
+}
diff --git a/kernel/printf.c b/kernel/printf.c
new file mode 100644
index 0000000..777cc5f
--- /dev/null
+++ b/kernel/printf.c
@@ -0,0 +1,134 @@
+//
+// formatted console output -- printf, panic.
+//
+
+#include <stdarg.h>
+
+#include "types.h"
+#include "param.h"
+#include "spinlock.h"
+#include "sleeplock.h"
+#include "fs.h"
+#include "file.h"
+#include "memlayout.h"
+#include "riscv.h"
+#include "defs.h"
+#include "proc.h"
+
+volatile int panicked = 0;
+
+// lock to avoid interleaving concurrent printf's.
+static struct {
+ struct spinlock lock;
+ int locking;
+} pr;
+
+static char digits[] = "0123456789abcdef";
+
+static void
+printint(int xx, int base, int sign)
+{
+ char buf[16];
+ int i;
+ uint x;
+
+ if(sign && (sign = xx < 0))
+ x = -xx;
+ else
+ x = xx;
+
+ i = 0;
+ do {
+ buf[i++] = digits[x % base];
+ } while((x /= base) != 0);
+
+ if(sign)
+ buf[i++] = '-';
+
+ while(--i >= 0)
+ consputc(buf[i]);
+}
+
+static void
+printptr(uint64 x)
+{
+ int i;
+ consputc('0');
+ consputc('x');
+ for (i = 0; i < (sizeof(uint64) * 2); i++, x <<= 4)
+ consputc(digits[x >> (sizeof(uint64) * 8 - 4)]);
+}
+
+// Print to the console. only understands %d, %x, %p, %s.
+void
+printf(char *fmt, ...)
+{
+ va_list ap;
+ int i, c, locking;
+ char *s;
+
+ locking = pr.locking;
+ if(locking)
+ acquire(&pr.lock);
+
+ if (fmt == 0)
+ panic("null fmt");
+
+ va_start(ap, fmt);
+ for(i = 0; (c = fmt[i] & 0xff) != 0; i++){
+ if(c != '%'){
+ consputc(c);
+ continue;
+ }
+ c = fmt[++i] & 0xff;
+ if(c == 0)
+ break;
+ switch(c){
+ case 'd':
+ printint(va_arg(ap, int), 10, 1);
+ break;
+ case 'x':
+ printint(va_arg(ap, int), 16, 1);
+ break;
+ case 'p':
+ printptr(va_arg(ap, uint64));
+ break;
+ case 's':
+ if((s = va_arg(ap, char*)) == 0)
+ s = "(null)";
+ for(; *s; s++)
+ consputc(*s);
+ break;
+ case '%':
+ consputc('%');
+ break;
+ default:
+ // Print unknown % sequence to draw attention.
+ consputc('%');
+ consputc(c);
+ break;
+ }
+ }
+
+ if(locking)
+ release(&pr.lock);
+}
+
+void
+panic(char *s)
+{
+ pr.locking = 0;
+ printf("panic: ");
+ printf(s);
+ printf("\n");
+ panicked = 1; // freeze other CPUs
+ for(;;)
+ ;
+}
+
+void
+printfinit(void)
+{
+ initlock(&pr.lock, "pr");
+ pr.locking = 1;
+}
diff --git a/kernel/proc.c b/kernel/proc.c
new file mode 100644
index 0000000..3d65b46
--- /dev/null
+++ b/kernel/proc.c
@@ -0,0 +1,647 @@
+#include "types.h"
+#include "param.h"
+#include "memlayout.h"
+#include "riscv.h"
+#include "spinlock.h"
+#include "proc.h"
+#include "defs.h"
+
+struct cpu cpus[NCPU];
+
+struct proc proc[NPROC];
+
+struct proc *initproc;
+
+int nextpid = 1;
+struct spinlock pid_lock;
+
+extern void forkret(void);
+static void wakeup1(struct proc *chan);
+
+extern char trampoline[]; // trampoline.S
+
+void
+procinit(void)
+{
+ struct proc *p;
+
+ initlock(&pid_lock, "nextpid");
+ for(p = proc; p < &proc[NPROC]; p++) {
+ initlock(&p->lock, "proc");
+
+ // Allocate a page for the process's kernel stack.
+ // Map it high in memory, followed by an invalid
+ // guard page.
+ char *pa = kalloc();
+ if(pa == 0)
+ panic("kalloc");
+ uint64 va = KSTACK((int) (p - proc));
+ kvmmap(va, (uint64)pa, PGSIZE, PTE_R | PTE_W);
+ p->kstack = va;
+ }
+ kvminithart();
+}
+
+// Must be called with interrupts disabled,
+// to prevent race with process being moved
+// to a different CPU.
+int
+cpuid()
+{
+ int id = r_tp();
+ return id;
+}
+
+// Return this CPU's cpu struct.
+// Interrupts must be disabled.
+struct cpu*
+mycpu(void) {
+ int id = cpuid();
+ struct cpu *c = &cpus[id];
+ return c;
+}
+
+// Return the current struct proc *, or zero if none.
+struct proc*
+myproc(void) {
+ push_off();
+ struct cpu *c = mycpu();
+ struct proc *p = c->proc;
+ pop_off();
+ return p;
+}
+
+int
+allocpid() {
+ int pid;
+
+ acquire(&pid_lock);
+ pid = nextpid;
+ nextpid = nextpid + 1;
+ release(&pid_lock);
+
+ return pid;
+}
+
+// Look in the process table for an UNUSED proc.
+// If found, initialize state required to run in the kernel,
+// and return with p->lock held.
+// If there are no free procs, return 0.
+static struct proc*
+allocproc(void)
+{
+ struct proc *p;
+
+ for(p = proc; p < &proc[NPROC]; p++) {
+ acquire(&p->lock);
+ if(p->state == UNUSED) {
+ goto found;
+ } else {
+ release(&p->lock);
+ }
+ }
+ return 0;
+
+found:
+ p->pid = allocpid();
+
+ // Allocate a trapframe page.
+ if((p->tf = (struct trapframe *)kalloc()) == 0){
+ release(&p->lock);
+ return 0;
+ }
+
+ // An empty user page table.
+ p->pagetable = proc_pagetable(p);
+
+ // Set up new context to start executing at forkret,
+ // which returns to user space.
+ memset(&p->context, 0, sizeof p->context);
+ p->context.ra = (uint64)forkret;
+ p->context.sp = p->kstack + PGSIZE;
+
+ return p;
+}
+
+// free a proc structure and the data hanging from it,
+// including user pages.
+// p->lock must be held.
+static void
+freeproc(struct proc *p)
+{
+ if(p->tf)
+ kfree((void*)p->tf);
+ p->tf = 0;
+ if(p->pagetable)
+ proc_freepagetable(p->pagetable, p->sz);
+ p->pagetable = 0;
+ p->sz = 0;
+ p->pid = 0;
+ p->parent = 0;
+ p->name[0] = 0;
+ p->chan = 0;
+ p->killed = 0;
+ p->state = UNUSED;
+}
+
+// Create a page table for a given process,
+// with no user pages, but with trampoline pages.
+pagetable_t
+proc_pagetable(struct proc *p)
+{
+ pagetable_t pagetable;
+
+ // An empty page table.
+ pagetable = uvmcreate();
+
+ // map the trampoline code (for system call return)
+ // at the highest user virtual address.
+ // only the supervisor uses it, on the way
+ // to/from user space, so not PTE_U.
+ mappages(pagetable, TRAMPOLINE, PGSIZE,
+ (uint64)trampoline, PTE_R | PTE_X);
+
+ // map the trapframe just below TRAMPOLINE, for trampoline.S.
+ mappages(pagetable, TRAPFRAME, PGSIZE,
+ (uint64)(p->tf), PTE_R | PTE_W);
+
+ return pagetable;
+}
+
+// Free a process's page table, and free the
+// physical memory it refers to.
+void
+proc_freepagetable(pagetable_t pagetable, uint64 sz)
+{
+ uvmunmap(pagetable, TRAMPOLINE, PGSIZE, 0);
+ uvmunmap(pagetable, TRAPFRAME, PGSIZE, 0);
+ if(sz > 0)
+ uvmfree(pagetable, sz);
+}
+
+// a user program that calls exec("/init")
+// od -t xC initcode
+uchar initcode[] = {
+ 0x17, 0x05, 0x00, 0x00, 0x13, 0x05, 0x05, 0x02,
+ 0x97, 0x05, 0x00, 0x00, 0x93, 0x85, 0x05, 0x02,
+ 0x9d, 0x48, 0x73, 0x00, 0x00, 0x00, 0x89, 0x48,
+ 0x73, 0x00, 0x00, 0x00, 0xef, 0xf0, 0xbf, 0xff,
+ 0x2f, 0x69, 0x6e, 0x69, 0x74, 0x00, 0x00, 0x01,
+ 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00
+};
+
+// Set up first user process.
+void
+userinit(void)
+{
+ struct proc *p;
+
+ p = allocproc();
+ initproc = p;
+
+ // allocate one user page and copy init's instructions
+ // and data into it.
+ uvminit(p->pagetable, initcode, sizeof(initcode));
+ p->sz = PGSIZE;
+
+ // prepare for the very first "return" from kernel to user.
+ p->tf->epc = 0; // user program counter
+ p->tf->sp = PGSIZE; // user stack pointer
+
+ safestrcpy(p->name, "initcode", sizeof(p->name));
+ p->cwd = namei("/");
+
+ p->state = RUNNABLE;
+
+ release(&p->lock);
+}
+
+// Grow or shrink user memory by n bytes.
+// Return 0 on success, -1 on failure.
+int
+growproc(int n)
+{
+ uint sz;
+ struct proc *p = myproc();
+
+ sz = p->sz;
+ if(n > 0){
+ if((sz = uvmalloc(p->pagetable, sz, sz + n)) == 0) {
+ return -1;
+ }
+ } else if(n < 0){
+ if((sz = uvmdealloc(p->pagetable, sz, sz + n)) == 0) {
+ return -1;
+ }
+ }
+ p->sz = sz;
+ return 0;
+}
+
+// Create a new process, copying the parent.
+// Sets up child kernel stack to return as if from fork() system call.
+int
+fork(void)
+{
+ int i, pid;
+ struct proc *np;
+ struct proc *p = myproc();
+
+ // Allocate process.
+ if((np = allocproc()) == 0){
+ return -1;
+ }
+
+ // Copy user memory from parent to child.
+ if(uvmcopy(p->pagetable, np->pagetable, p->sz) < 0){
+ freeproc(np);
+ release(&np->lock);
+ return -1;
+ }
+ np->sz = p->sz;
+
+ np->parent = p;
+
+ // copy saved user registers.
+ *(np->tf) = *(p->tf);
+
+ // Cause fork to return 0 in the child.
+ np->tf->a0 = 0;
+
+ // increment reference counts on open file descriptors.
+ for(i = 0; i < NOFILE; i++)
+ if(p->ofile[i])
+ np->ofile[i] = filedup(p->ofile[i]);
+ np->cwd = idup(p->cwd);
+
+ safestrcpy(np->name, p->name, sizeof(p->name));
+
+ pid = np->pid;
+
+ np->state = RUNNABLE;
+
+ release(&np->lock);
+
+ return pid;
+}
+
+// Pass p's abandoned children to init.
+// Caller must hold p->lock and parent->lock.
+void
+reparent(struct proc *p, struct proc *parent) {
+ struct proc *pp;
+ int child_of_init = (p->parent == initproc);
+
+ for(pp = proc; pp < &proc[NPROC]; pp++){
+ // this code uses pp->parent without holding pp->lock.
+ // acquiring the lock first could cause a deadlock
+ // if pp or a child of pp were also in exit()
+ // and about to try to lock p.
+ if(pp->parent == p){
+ // pp->parent can't change between the check and the acquire()
+ // because only the parent changes it, and we're the parent.
+ acquire(&pp->lock);
+ pp->parent = initproc;
+ if(pp->state == ZOMBIE) {
+ if(!child_of_init)
+ acquire(&initproc->lock);
+ wakeup1(initproc);
+ if(!child_of_init)
+ release(&initproc->lock);
+ }
+ release(&pp->lock);
+ }
+ }
+}
+
+// Exit the current process. Does not return.
+// An exited process remains in the zombie state
+// until its parent calls wait().
+void
+exit(void)
+{
+ struct proc *p = myproc();
+
+ if(p == initproc)
+ panic("init exiting");
+
+ // Close all open files.
+ for(int fd = 0; fd < NOFILE; fd++){
+ if(p->ofile[fd]){
+ struct file *f = p->ofile[fd];
+ fileclose(f);
+ p->ofile[fd] = 0;
+ }
+ }
+
+ begin_op();
+ iput(p->cwd);
+ end_op();
+ p->cwd = 0;
+
+ acquire(&p->parent->lock);
+
+ acquire(&p->lock);
+
+ // Give any children to init.
+ reparent(p, p->parent);
+
+ // Parent might be sleeping in wait().
+ wakeup1(p->parent);
+
+ p->state = ZOMBIE;
+
+ release(&p->parent->lock);
+
+ // Jump into the scheduler, never to return.
+ sched();
+ panic("zombie exit");
+}
+
+// Wait for a child process to exit and return its pid.
+// Return -1 if this process has no children.
+int
+wait(void)
+{
+ struct proc *np;
+ int havekids, pid;
+ struct proc *p = myproc();
+
+ // hold p->lock for the whole time to avoid lost
+ // wakeups from a child's exit().
+ acquire(&p->lock);
+
+ for(;;){
+ // Scan through table looking for exited children.
+ havekids = 0;
+ for(np = proc; np < &proc[NPROC]; np++){
+ // this code uses np->parent without holding np->lock.
+ // acquiring the lock first would cause a deadlock,
+ // since np might be an ancestor, and we already hold p->lock.
+ if(np->parent == p){
+ // np->parent can't change between the check and the acquire()
+ // because only the parent changes it, and we're the parent.
+ acquire(&np->lock);
+ havekids = 1;
+ if(np->state == ZOMBIE){
+ // Found one.
+ pid = np->pid;
+ freeproc(np);
+ release(&np->lock);
+ release(&p->lock);
+ return pid;
+ }
+ release(&np->lock);
+ }
+ }
+
+ // No point waiting if we don't have any children.
+ if(!havekids || p->killed){
+ release(&p->lock);
+ return -1;
+ }
+
+ // Wait for a child to exit.
+ sleep(p, &p->lock); //DOC: wait-sleep
+ }
+}
+
+// Per-CPU process scheduler.
+// Each CPU calls scheduler() after setting itself up.
+// Scheduler never returns. It loops, doing:
+// - choose a process to run.
+// - swtch to start running that process.
+// - eventually that process transfers control
+// via swtch back to the scheduler.
+void
+scheduler(void)
+{
+ struct proc *p;
+ struct cpu *c = mycpu();
+
+ c->proc = 0;
+ for(;;){
+ // Avoid deadlock by ensuring that devices can interrupt.
+ intr_on();
+
+ for(p = proc; p < &proc[NPROC]; p++) {
+ acquire(&p->lock);
+ if(p->state == RUNNABLE) {
+ // Switch to chosen process. It is the process's job
+ // to release its lock and then reacquire it
+ // before jumping back to us.
+ p->state = RUNNING;
+ c->proc = p;
+ swtch(&c->scheduler, &p->context);
+
+ // Process is done running for now.
+ // It should have changed its p->state before coming back.
+ c->proc = 0;
+ }
+ release(&p->lock);
+ }
+ }
+}
+
+// Switch to scheduler. Must hold only p->lock
+// and have changed proc->state. Saves and restores
+// intena because intena is a property of this
+// kernel thread, not this CPU. It should
+// be proc->intena and proc->noff, but that would
+// break in the few places where a lock is held but
+// there's no process.
+void
+sched(void)
+{
+ int intena;
+ struct proc *p = myproc();
+
+ if(!holding(&p->lock))
+ panic("sched p->lock");
+ if(mycpu()->noff != 1)
+ panic("sched locks");
+ if(p->state == RUNNING)
+ panic("sched running");
+ if(intr_get())
+ panic("sched interruptible");
+
+ intena = mycpu()->intena;
+ swtch(&p->context, &mycpu()->scheduler);
+ mycpu()->intena = intena;
+}
+
+// Give up the CPU for one scheduling round.
+void
+yield(void)
+{
+ struct proc *p = myproc();
+ acquire(&p->lock); //DOC: yieldlock
+ p->state = RUNNABLE;
+ sched();
+ release(&p->lock);
+}
+
+// A fork child's very first scheduling by scheduler()
+// will swtch to forkret.
+void
+forkret(void)
+{
+ static int first = 1;
+
+ // Still holding p->lock from scheduler.
+ release(&myproc()->lock);
+
+ if (first) {
+ // File system initialization must be run in the context of a
+ // regular process (e.g., because it calls sleep), and thus cannot
+ // be run from main().
+ first = 0;
+ fsinit(ROOTDEV);
+ }
+
+ usertrapret();
+}
+
+// Atomically release lock and sleep on chan.
+// Reacquires lock when awakened.
+void
+sleep(void *chan, struct spinlock *lk)
+{
+ struct proc *p = myproc();
+
+ // Must acquire p->lock in order to
+ // change p->state and then call sched.
+ // Once we hold p->lock, we can be
+ // guaranteed that we won't miss any wakeup
+ // (wakeup locks p->lock),
+ // so it's okay to release lk.
+ if(lk != &p->lock){ //DOC: sleeplock0
+ acquire(&p->lock); //DOC: sleeplock1
+ release(lk);
+ }
+
+ // Go to sleep.
+ p->chan = chan;
+ p->state = SLEEPING;
+
+ sched();
+
+ // Tidy up.
+ p->chan = 0;
+
+ // Reacquire original lock.
+ if(lk != &p->lock){ //DOC: sleeplock2
+ release(&p->lock);
+ acquire(lk);
+ }
+}
+
+// Wake up all processes sleeping on chan.
+// Must be called without any p->lock.
+void
+wakeup(void *chan)
+{
+ struct proc *p;
+
+ for(p = proc; p < &proc[NPROC]; p++) {
+ acquire(&p->lock);
+ if(p->state == SLEEPING && p->chan == chan) {
+ p->state = RUNNABLE;
+ }
+ release(&p->lock);
+ }
+}
+
+// Wake up p if it is sleeping in wait(); used by exit().
+// Caller must hold p->lock.
+static void
+wakeup1(struct proc *p)
+{
+ if(p->chan == p && p->state == SLEEPING) {
+ p->state = RUNNABLE;
+ }
+}
+
+// Kill the process with the given pid.
+// The victim won't exit until it tries to return
+// to user space (see usertrap() in trap.c).
+int
+kill(int pid)
+{
+ struct proc *p;
+
+ for(p = proc; p < &proc[NPROC]; p++){
+ acquire(&p->lock);
+ if(p->pid == pid){
+ p->killed = 1;
+ if(p->state == SLEEPING){
+ // Wake process from sleep().
+ p->state = RUNNABLE;
+ }
+ release(&p->lock);
+ return 0;
+ }
+ release(&p->lock);
+ }
+ return -1;
+}
+
+// Copy to either a user address, or kernel address,
+// depending on usr_dst.
+// Returns 0 on success, -1 on error.
+int
+either_copyout(int user_dst, uint64 dst, void *src, uint64 len)
+{
+ struct proc *p = myproc();
+ if(user_dst){
+ return copyout(p->pagetable, dst, src, len);
+ } else {
+ memmove((char *)dst, src, len);
+ return 0;
+ }
+}
+
+// Copy from either a user address, or kernel address,
+// depending on usr_src.
+// Returns 0 on success, -1 on error.
+int
+either_copyin(void *dst, int user_src, uint64 src, uint64 len)
+{
+ struct proc *p = myproc();
+ if(user_src){
+ return copyin(p->pagetable, dst, src, len);
+ } else {
+ memmove(dst, (char*)src, len);
+ return 0;
+ }
+}
+
+// Print a process listing to console. For debugging.
+// Runs when user types ^P on console.
+// No lock to avoid wedging a stuck machine further.
+void
+procdump(void)
+{
+ static char *states[] = {
+ [UNUSED] "unused",
+ [SLEEPING] "sleep ",
+ [RUNNABLE] "runble",
+ [RUNNING] "run ",
+ [ZOMBIE] "zombie"
+ };
+ struct proc *p;
+ char *state;
+
+ printf("\n");
+ for(p = proc; p < &proc[NPROC]; p++){
+ if(p->state == UNUSED)
+ continue;
+ if(p->state >= 0 && p->state < NELEM(states) && states[p->state])
+ state = states[p->state];
+ else
+ state = "???";
+ printf("%d %s %s", p->pid, state, p->name);
+ printf("\n");
+ }
+}
diff --git a/kernel/proc.h b/kernel/proc.h
new file mode 100644
index 0000000..655d79f
--- /dev/null
+++ b/kernel/proc.h
@@ -0,0 +1,105 @@
+// Saved registers for kernel context switches.
+struct context {
+ uint64 ra;
+ uint64 sp;
+
+ // callee-saved
+ uint64 s0;
+ uint64 s1;
+ uint64 s2;
+ uint64 s3;
+ uint64 s4;
+ uint64 s5;
+ uint64 s6;
+ uint64 s7;
+ uint64 s8;
+ uint64 s9;
+ uint64 s10;
+ uint64 s11;
+};
+
+// Per-CPU state.
+struct cpu {
+ struct proc *proc; // The process running on this cpu, or null.
+ struct context scheduler; // swtch() here to enter scheduler().
+ int noff; // Depth of push_off() nesting.
+ int intena; // Were interrupts enabled before push_off()?
+};
+
+extern struct cpu cpus[NCPU];
+
+// per-process data for the trap handling code in trampoline.S.
+// sits in a page by itself just under the trampoline page in the
+// user page table. not specially mapped in the kernel page table.
+// the sscratch register points here.
+// uservec in trampoline.S saves user registers in the trapframe,
+// then initializes registers from the trapframe's
+// kernel_sp, kernel_hartid, kernel_satp, and jumps to kernel_trap.
+// usertrapret() and userret in trampoline.S set up
+// the trapframe's kernel_*, restore user registers from the
+// trapframe, switch to the user page table, and enter user space.
+// the trapframe includes callee-saved user registers like s0-s11 because the
+// return-to-user path via usertrapret() doesn't return through
+// the entire kernel call stack.
+struct trapframe {
+ /* 0 */ uint64 kernel_satp; // kernel page table
+ /* 8 */ uint64 kernel_sp; // top of process's kernel stack
+ /* 16 */ uint64 kernel_trap; // usertrap()
+ /* 24 */ uint64 epc; // saved user program counter
+ /* 32 */ uint64 kernel_hartid; // saved kernel tp
+ /* 40 */ uint64 ra;
+ /* 48 */ uint64 sp;
+ /* 56 */ uint64 gp;
+ /* 64 */ uint64 tp;
+ /* 72 */ uint64 t0;
+ /* 80 */ uint64 t1;
+ /* 88 */ uint64 t2;
+ /* 96 */ uint64 s0;
+ /* 104 */ uint64 s1;
+ /* 112 */ uint64 a0;
+ /* 120 */ uint64 a1;
+ /* 128 */ uint64 a2;
+ /* 136 */ uint64 a3;
+ /* 144 */ uint64 a4;
+ /* 152 */ uint64 a5;
+ /* 160 */ uint64 a6;
+ /* 168 */ uint64 a7;
+ /* 176 */ uint64 s2;
+ /* 184 */ uint64 s3;
+ /* 192 */ uint64 s4;
+ /* 200 */ uint64 s5;
+ /* 208 */ uint64 s6;
+ /* 216 */ uint64 s7;
+ /* 224 */ uint64 s8;
+ /* 232 */ uint64 s9;
+ /* 240 */ uint64 s10;
+ /* 248 */ uint64 s11;
+ /* 256 */ uint64 t3;
+ /* 264 */ uint64 t4;
+ /* 272 */ uint64 t5;
+ /* 280 */ uint64 t6;
+};
+
+enum procstate { UNUSED, SLEEPING, RUNNABLE, RUNNING, ZOMBIE };
+
+// Per-process state
+struct proc {
+ struct spinlock lock;
+
+ // p->lock must be held when using these:
+ enum procstate state; // Process state
+ struct proc *parent; // Parent process
+ void *chan; // If non-zero, sleeping on chan
+ int killed; // If non-zero, have been killed
+ int pid; // Process ID
+
+ // these are private to the process, so p->lock need not be held.
+ uint64 kstack; // Bottom of kernel stack for this process
+ uint64 sz; // Size of process memory (bytes)
+ pagetable_t pagetable; // Page table
+ struct trapframe *tf; // data page for trampoline.S
+ struct context context; // swtch() here to run process
+ struct file *ofile[NOFILE]; // Open files
+ struct inode *cwd; // Current directory
+ char name[16]; // Process name (debugging)
+};
diff --git a/kernel/ramdisk.c b/kernel/ramdisk.c
new file mode 100644
index 0000000..9901294
--- /dev/null
+++ b/kernel/ramdisk.c
@@ -0,0 +1,45 @@
+//
+// ramdisk that uses the disk image loaded by qemu -rdinit fs.img
+//
+
+#include "types.h"
+#include "riscv.h"
+#include "defs.h"
+#include "param.h"
+#include "memlayout.h"
+#include "spinlock.h"
+#include "sleeplock.h"
+#include "fs.h"
+#include "buf.h"
+
+void
+ramdiskinit(void)
+{
+}
+
+// If B_DIRTY is set, write buf to disk, clear B_DIRTY, set B_VALID.
+// Else if B_VALID is not set, read buf from disk, set B_VALID.
+void
+ramdiskrw(struct buf *b)
+{
+ if(!holdingsleep(&b->lock))
+ panic("ramdiskrw: buf not locked");
+ if((b->flags & (B_VALID|B_DIRTY)) == B_VALID)
+ panic("ramdiskrw: nothing to do");
+
+ if(b->blockno >= FSSIZE)
+ panic("ramdiskrw: blockno too big");
+
+ uint64 diskaddr = b->blockno * BSIZE;
+ char *addr = (char *)RAMDISK + diskaddr;
+
+ if(b->flags & B_DIRTY){
+ // write
+ memmove(addr, b->data, BSIZE);
+ b->flags &= ~B_DIRTY;
+ } else {
+ // read
+ memmove(b->data, addr, BSIZE);
+ b->flags |= B_VALID;
+ }
+}
diff --git a/kernel/riscv.h b/kernel/riscv.h
new file mode 100644
index 0000000..0f83db6
--- /dev/null
+++ b/kernel/riscv.h
@@ -0,0 +1,358 @@
+// which hart (core) is this?
+static inline uint64
+r_mhartid()
+{
+ uint64 x;
+ asm volatile("csrr %0, mhartid" : "=r" (x) );
+ return x;
+}
+
+// Machine Status Register, mstatus
+
+#define MSTATUS_MPP_MASK (3L << 11) // previous mode.
+#define MSTATUS_MPP_M (3L << 11)
+#define MSTATUS_MPP_S (1L << 11)
+#define MSTATUS_MPP_U (0L << 11)
+#define MSTATUS_MIE (1L << 3) // machine-mode interrupt enable.
+
+static inline uint64
+r_mstatus()
+{
+ uint64 x;
+ asm volatile("csrr %0, mstatus" : "=r" (x) );
+ return x;
+}
+
+static inline void
+w_mstatus(uint64 x)
+{
+ asm volatile("csrw mstatus, %0" : : "r" (x));
+}
+
+// machine exception program counter, holds the
+// instruction address to which a return from
+// exception will go.
+static inline void
+w_mepc(uint64 x)
+{
+ asm volatile("csrw mepc, %0" : : "r" (x));
+}
+
+// Supervisor Status Register, sstatus
+
+#define SSTATUS_SPP (1L << 8) // Previous mode, 1=Supervisor, 0=User
+#define SSTATUS_SPIE (1L << 5) // Supervisor Previous Interrupt Enable
+#define SSTATUS_UPIE (1L << 4) // User Previous Interrupt Enable
+#define SSTATUS_SIE (1L << 1) // Supervisor Interrupt Enable
+#define SSTATUS_UIE (1L << 0) // User Interrupt Enable
+
+static inline uint64
+r_sstatus()
+{
+ uint64 x;
+ asm volatile("csrr %0, sstatus" : "=r" (x) );
+ return x;
+}
+
+static inline void
+w_sstatus(uint64 x)
+{
+ asm volatile("csrw sstatus, %0" : : "r" (x));
+}
+
+// Supervisor Interrupt Pending
+static inline uint64
+r_sip()
+{
+ uint64 x;
+ asm volatile("csrr %0, sip" : "=r" (x) );
+ return x;
+}
+
+static inline void
+w_sip(uint64 x)
+{
+ asm volatile("csrw sip, %0" : : "r" (x));
+}
+
+// Supervisor Interrupt Enable
+#define SIE_SEIE (1L << 9) // external
+#define SIE_STIE (1L << 5) // timer
+#define SIE_SSIE (1L << 1) // software
+static inline uint64
+r_sie()
+{
+ uint64 x;
+ asm volatile("csrr %0, sie" : "=r" (x) );
+ return x;
+}
+
+static inline void
+w_sie(uint64 x)
+{
+ asm volatile("csrw sie, %0" : : "r" (x));
+}
+
+// Machine-mode Interrupt Enable
+#define MIE_MEIE (1L << 11) // external
+#define MIE_MTIE (1L << 7) // timer
+#define MIE_MSIE (1L << 3) // software
+static inline uint64
+r_mie()
+{
+ uint64 x;
+ asm volatile("csrr %0, mie" : "=r" (x) );
+ return x;
+}
+
+static inline void
+w_mie(uint64 x)
+{
+ asm volatile("csrw mie, %0" : : "r" (x));
+}
+
+// machine exception program counter, holds the
+// instruction address to which a return from
+// exception will go.
+static inline void
+w_sepc(uint64 x)
+{
+ asm volatile("csrw sepc, %0" : : "r" (x));
+}
+
+static inline uint64
+r_sepc()
+{
+ uint64 x;
+ asm volatile("csrr %0, sepc" : "=r" (x) );
+ return x;
+}
+
+// Machine Exception Delegation
+static inline uint64
+r_medeleg()
+{
+ uint64 x;
+ asm volatile("csrr %0, medeleg" : "=r" (x) );
+ return x;
+}
+
+static inline void
+w_medeleg(uint64 x)
+{
+ asm volatile("csrw medeleg, %0" : : "r" (x));
+}
+
+// Machine Interrupt Delegation
+static inline uint64
+r_mideleg()
+{
+ uint64 x;
+ asm volatile("csrr %0, mideleg" : "=r" (x) );
+ return x;
+}
+
+static inline void
+w_mideleg(uint64 x)
+{
+ asm volatile("csrw mideleg, %0" : : "r" (x));
+}
+
+// Supervisor Trap-Vector Base Address
+// low two bits are mode.
+static inline void
+w_stvec(uint64 x)
+{
+ asm volatile("csrw stvec, %0" : : "r" (x));
+}
+
+static inline uint64
+r_stvec()
+{
+ uint64 x;
+ asm volatile("csrr %0, stvec" : "=r" (x) );
+ return x;
+}
+
+// Machine-mode interrupt vector
+static inline void
+w_mtvec(uint64 x)
+{
+ asm volatile("csrw mtvec, %0" : : "r" (x));
+}
+
+// use riscv's sv39 page table scheme.
+#define SATP_SV39 (8L << 60)
+
+#define MAKE_SATP(pagetable) (SATP_SV39 | (((uint64)pagetable) >> 12))
+
+// supervisor address translation and protection;
+// holds the address of the page table.
+static inline void
+w_satp(uint64 x)
+{
+ asm volatile("csrw satp, %0" : : "r" (x));
+}
+
+static inline uint64
+r_satp()
+{
+ uint64 x;
+ asm volatile("csrr %0, satp" : "=r" (x) );
+ return x;
+}
+
+// Supervisor Scratch register, for early trap handler in trampoline.S.
+static inline void
+w_sscratch(uint64 x)
+{
+ asm volatile("csrw sscratch, %0" : : "r" (x));
+}
+
+static inline void
+w_mscratch(uint64 x)
+{
+ asm volatile("csrw mscratch, %0" : : "r" (x));
+}
+
+// Supervisor Trap Cause
+static inline uint64
+r_scause()
+{
+ uint64 x;
+ asm volatile("csrr %0, scause" : "=r" (x) );
+ return x;
+}
+
+// Supervisor Trap Value
+static inline uint64
+r_stval()
+{
+ uint64 x;
+ asm volatile("csrr %0, stval" : "=r" (x) );
+ return x;
+}
+
+// Machine-mode Counter-Enable
+static inline void
+w_mcounteren(uint64 x)
+{
+ asm volatile("csrw mcounteren, %0" : : "r" (x));
+}
+
+static inline uint64
+r_mcounteren()
+{
+ uint64 x;
+ asm volatile("csrr %0, mcounteren" : "=r" (x) );
+ return x;
+}
+
+// machine-mode cycle counter
+static inline uint64
+r_time()
+{
+ uint64 x;
+ asm volatile("csrr %0, time" : "=r" (x) );
+ return x;
+}
+
+// enable device interrupts
+static inline void
+intr_on()
+{
+ w_sie(r_sie() | SIE_SEIE | SIE_STIE | SIE_SSIE);
+ w_sstatus(r_sstatus() | SSTATUS_SIE);
+}
+
+// disable device interrupts
+static inline void
+intr_off()
+{
+ w_sstatus(r_sstatus() & ~SSTATUS_SIE);
+}
+
+// are device interrupts enabled?
+static inline int
+intr_get()
+{
+ uint64 x = r_sstatus();
+ return (x & SSTATUS_SIE) != 0;
+}
+
+static inline uint64
+r_sp()
+{
+ uint64 x;
+ asm volatile("mv %0, sp" : "=r" (x) );
+ return x;
+}
+
+// read and write tp, the thread pointer, which holds
+// this core's hartid (core number), the index into cpus[].
+static inline uint64
+r_tp()
+{
+ uint64 x;
+ asm volatile("mv %0, tp" : "=r" (x) );
+ return x;
+}
+
+static inline void
+w_tp(uint64 x)
+{
+ asm volatile("mv tp, %0" : : "r" (x));
+}
+
+static inline uint64
+r_ra()
+{
+ uint64 x;
+ asm volatile("mv %0, ra" : "=r" (x) );
+ return x;
+}
+
+// tell the machine to finish any previous writes to
+// PTEs, so that a subsequent use of a virtual
+// address or load of the SATP will see those writes.
+// perhaps this also flushes the TLB.
+static inline void
+sfence_vma()
+{
+ // the zero, zero means flush all TLB entries.
+ asm volatile("sfence.vma zero, zero");
+}
+
+
+#define PGSIZE 4096 // bytes per page
+#define PGSHIFT 12 // bits of offset within a page
+
+#define PGROUNDUP(sz) (((sz)+PGSIZE-1) & ~(PGSIZE-1))
+#define PGROUNDDOWN(a) (((a)) & ~(PGSIZE-1))
+
+#define PTE_V (1L << 0) // valid
+#define PTE_R (1L << 1)
+#define PTE_W (1L << 2)
+#define PTE_X (1L << 3)
+#define PTE_U (1L << 4) // 1 -> user can access
+
+// shift a physical address to the right place for a PTE.
+#define PA2PTE(pa) ((((uint64)pa) >> 12) << 10)
+
+#define PTE2PA(pte) (((pte) >> 10) << 12)
+
+#define PTE_FLAGS(pte) ((pte) & (PTE_V|PTE_R|PTE_W|PTE_X|PTE_U))
+
+// extract the three 9-bit page table indices from a virtual address.
+#define PXMASK 0x1FF // 9 bits
+#define PXSHIFT(level) (PGSHIFT+(9*(level)))
+#define PX(level, va) ((((uint64) (va)) >> PXSHIFT(level)) & PXMASK)
+
+// one beyond the highest possible virtual address.
+// MAXVA is actually one bit less than the max allowed by
+// Sv39, to avoid having to sign-extend virtual addresses
+// that have the high bit set.
+#define MAXVA (1L << (9 + 9 + 9 + 12 - 1))
+
+typedef uint64 pte_t;
+typedef uint64 *pagetable_t; // 512 PTEs
diff --git a/kernel/sleeplock.c b/kernel/sleeplock.c
new file mode 100644
index 0000000..81de585
--- /dev/null
+++ b/kernel/sleeplock.c
@@ -0,0 +1,55 @@
+// Sleeping locks
+
+#include "types.h"
+#include "riscv.h"
+#include "defs.h"
+#include "param.h"
+#include "memlayout.h"
+#include "spinlock.h"
+#include "proc.h"
+#include "sleeplock.h"
+
+void
+initsleeplock(struct sleeplock *lk, char *name)
+{
+ initlock(&lk->lk, "sleep lock");
+ lk->name = name;
+ lk->locked = 0;
+ lk->pid = 0;
+}
+
+void
+acquiresleep(struct sleeplock *lk)
+{
+ acquire(&lk->lk);
+ while (lk->locked) {
+ sleep(lk, &lk->lk);
+ }
+ lk->locked = 1;
+ lk->pid = myproc()->pid;
+ release(&lk->lk);
+}
+
+void
+releasesleep(struct sleeplock *lk)
+{
+ acquire(&lk->lk);
+ lk->locked = 0;
+ lk->pid = 0;
+ wakeup(lk);
+ release(&lk->lk);
+}
+
+int
+holdingsleep(struct sleeplock *lk)
+{
+ int r;
+
+ acquire(&lk->lk);
+ r = lk->locked && (lk->pid == myproc()->pid);
+ release(&lk->lk);
+ return r;
+}
+
+
+
diff --git a/kernel/sleeplock.h b/kernel/sleeplock.h
new file mode 100644
index 0000000..110e6f3
--- /dev/null
+++ b/kernel/sleeplock.h
@@ -0,0 +1,10 @@
+// Long-term locks for processes
+struct sleeplock {
+ uint locked; // Is the lock held?
+ struct spinlock lk; // spinlock protecting this sleep lock
+
+ // For debugging:
+ char *name; // Name of lock.
+ int pid; // Process holding lock
+};
+
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
new file mode 100644
index 0000000..563532e
--- /dev/null
+++ b/kernel/spinlock.c
@@ -0,0 +1,108 @@
+// Mutual exclusion spin locks.
+
+#include "types.h"
+#include "param.h"
+#include "memlayout.h"
+#include "spinlock.h"
+#include "riscv.h"
+#include "proc.h"
+#include "defs.h"
+
+void
+initlock(struct spinlock *lk, char *name)
+{
+ lk->name = name;
+ lk->locked = 0;
+ lk->cpu = 0;
+}
+
+// Acquire the lock.
+// Loops (spins) until the lock is acquired.
+void
+acquire(struct spinlock *lk)
+{
+ push_off(); // disable interrupts to avoid deadlock.
+ if(holding(lk))
+ panic("acquire");
+
+ // On RISC-V, sync_lock_test_and_set turns into an atomic swap:
+ // a5 = 1
+ // s1 = &lk->locked
+ // amoswap.w.aq a5, a5, (s1)
+ while(__sync_lock_test_and_set(&lk->locked, 1) != 0)
+ ;
+
+ // Tell the C compiler and the processor to not move loads or stores
+ // past this point, to ensure that the critical section's memory
+ // references happen after the lock is acquired.
+ __sync_synchronize();
+
+ // Record info about lock acquisition for holding() and debugging.
+ lk->cpu = mycpu();
+}
+
+// Release the lock.
+void
+release(struct spinlock *lk)
+{
+ if(!holding(lk))
+ panic("release");
+
+ lk->cpu = 0;
+
+ // Tell the C compiler and the CPU to not move loads or stores
+ // past this point, to ensure that all the stores in the critical
+ // section are visible to other CPUs before the lock is released.
+ // On RISC-V, this turns into a fence instruction.
+ __sync_synchronize();
+
+ // Release the lock, equivalent to lk->locked = 0.
+ // This code doesn't use a C assignment, since the C standard
+ // implies that an assignment might be implemented with
+ // multiple store instructions.
+ // On RISC-V, sync_lock_release turns into an atomic swap:
+ // s1 = &lk->locked
+ // amoswap.w zero, zero, (s1)
+ __sync_lock_release(&lk->locked);
+
+ pop_off();
+}
+
+// Check whether this cpu is holding the lock.
+int
+holding(struct spinlock *lk)
+{
+ int r;
+ push_off();
+ r = (lk->locked && lk->cpu == mycpu());
+ pop_off();
+ return r;
+}
+
+// push_off/pop_off are like intr_off()/intr_on() except that they are matched:
+// it takes two pop_off()s to undo two push_off()s. Also, if interrupts
+// are initially off, then push_off, pop_off leaves them off.
+
+void
+push_off(void)
+{
+ int old = intr_get();
+
+ intr_off();
+ if(mycpu()->noff == 0)
+ mycpu()->intena = old;
+ mycpu()->noff += 1;
+}
+
+void
+pop_off(void)
+{
+ struct cpu *c = mycpu();
+ if(intr_get())
+ panic("pop_off - interruptible");
+ c->noff -= 1;
+ if(c->noff < 0)
+ panic("pop_off");
+ if(c->noff == 0 && c->intena)
+ intr_on();
+}
diff --git a/kernel/spinlock.h b/kernel/spinlock.h
new file mode 100644
index 0000000..4392820
--- /dev/null
+++ b/kernel/spinlock.h
@@ -0,0 +1,9 @@
+// Mutual exclusion lock.
+struct spinlock {
+ uint locked; // Is the lock held?
+
+ // For debugging:
+ char *name; // Name of lock.
+ struct cpu *cpu; // The cpu holding the lock.
+};
+
diff --git a/kernel/start.c b/kernel/start.c
new file mode 100644
index 0000000..203c5e6
--- /dev/null
+++ b/kernel/start.c
@@ -0,0 +1,82 @@
+#include "types.h"
+#include "param.h"
+#include "memlayout.h"
+#include "riscv.h"
+#include "defs.h"
+
+void main();
+void timerinit();
+
+// entry.S needs one stack per CPU.
+__attribute__ ((aligned (16))) char stack0[4096 * NCPU];
+
+// scratch area for timer interrupt, one per CPU.
+uint64 mscratch0[NCPU * 32];
+
+// assembly code in kernelvec.S for machine-mode timer interrupt.
+extern void timervec();
+
+// entry.S jumps here in machine mode on stack0.
+void
+start()
+{
+ // set M Previous Privilege mode to Supervisor, for mret.
+ unsigned long x = r_mstatus();
+ x &= ~MSTATUS_MPP_MASK;
+ x |= MSTATUS_MPP_S;
+ w_mstatus(x);
+
+ // set M Exception Program Counter to main, for mret.
+ // requires gcc -mcmodel=medany
+ w_mepc((uint64)main);
+
+ // disable paging for now.
+ w_satp(0);
+
+ // delegate all interrupts and exceptions to supervisor mode.
+ w_medeleg(0xffff);
+ w_mideleg(0xffff);
+
+ // ask for clock interrupts.
+ timerinit();
+
+ // keep each CPU's hartid in its tp register, for cpuid().
+ int id = r_mhartid();
+ w_tp(id);
+
+ // switch to supervisor mode and jump to main().
+ asm volatile("mret");
+}
+
+// set up to receive timer interrupts in machine mode,
+// which arrive at timervec in kernelvec.S,
+// which turns them into software interrupts for
+// devintr() in trap.c.
+void
+timerinit()
+{
+ // each CPU has a separate source of timer interrupts.
+ int id = r_mhartid();
+
+ // ask the CLINT for a timer interrupt.
+ int interval = 1000000; // cycles; about 1/10th second in qemu.
+ *(uint64*)CLINT_MTIMECMP(id) = *(uint64*)CLINT_MTIME + interval;
+
+ // prepare information in scratch[] for timervec.
+ // scratch[0..3] : space for timervec to save registers.
+ // scratch[4] : address of CLINT MTIMECMP register.
+ // scratch[5] : desired interval (in cycles) between timer interrupts.
+ uint64 *scratch = &mscratch0[32 * id];
+ scratch[4] = CLINT_MTIMECMP(id);
+ scratch[5] = interval;
+ w_mscratch((uint64)scratch);
+
+ // set the machine-mode trap handler.
+ w_mtvec((uint64)timervec);
+
+ // enable machine-mode interrupts.
+ w_mstatus(r_mstatus() | MSTATUS_MIE);
+
+ // enable machine-mode timer interrupts.
+ w_mie(r_mie() | MIE_MTIE);
+}
diff --git a/kernel/stat.h b/kernel/stat.h
new file mode 100644
index 0000000..19543af
--- /dev/null
+++ b/kernel/stat.h
@@ -0,0 +1,11 @@
+#define T_DIR 1 // Directory
+#define T_FILE 2 // File
+#define T_DEVICE 3 // Device
+
+struct stat {
+ int dev; // File system's disk device
+ uint ino; // Inode number
+ short type; // Type of file
+ short nlink; // Number of links to file
+ uint64 size; // Size of file in bytes
+};
diff --git a/kernel/string.c b/kernel/string.c
new file mode 100644
index 0000000..d99e612
--- /dev/null
+++ b/kernel/string.c
@@ -0,0 +1,104 @@
+#include "types.h"
+
+void*
+memset(void *dst, int c, uint n)
+{
+ char *cdst = (char *) dst;
+ int i;
+ for(i = 0; i < n; i++){
+ cdst[i] = c;
+ }
+ return dst;
+}
+
+int
+memcmp(const void *v1, const void *v2, uint n)
+{
+ const uchar *s1, *s2;
+
+ s1 = v1;
+ s2 = v2;
+ while(n-- > 0){
+ if(*s1 != *s2)
+ return *s1 - *s2;
+ s1++, s2++;
+ }
+
+ return 0;
+}
+
+void*
+memmove(void *dst, const void *src, uint n)
+{
+ const char *s;
+ char *d;
+
+ s = src;
+ d = dst;
+ if(s < d && s + n > d){
+ s += n;
+ d += n;
+ while(n-- > 0)
+ *--d = *--s;
+ } else
+ while(n-- > 0)
+ *d++ = *s++;
+
+ return dst;
+}
+
+// memcpy exists to placate GCC. Use memmove.
+void*
+memcpy(void *dst, const void *src, uint n)
+{
+ return memmove(dst, src, n);
+}
+
+int
+strncmp(const char *p, const char *q, uint n)
+{
+ while(n > 0 && *p && *p == *q)
+ n--, p++, q++;
+ if(n == 0)
+ return 0;
+ return (uchar)*p - (uchar)*q;
+}
+
+char*
+strncpy(char *s, const char *t, int n)
+{
+ char *os;
+
+ os = s;
+ while(n-- > 0 && (*s++ = *t++) != 0)
+ ;
+ while(n-- > 0)
+ *s++ = 0;
+ return os;
+}
+
+// Like strncpy but guaranteed to NUL-terminate.
+char*
+safestrcpy(char *s, const char *t, int n)
+{
+ char *os;
+
+ os = s;
+ if(n <= 0)
+ return os;
+ while(--n > 0 && (*s++ = *t++) != 0)
+ ;
+ *s = 0;
+ return os;
+}
+
+int
+strlen(const char *s)
+{
+ int n;
+
+ for(n = 0; s[n]; n++)
+ ;
+ return n;
+}
+
diff --git a/kernel/swtch.S b/kernel/swtch.S
new file mode 100644
index 0000000..17a8663
--- /dev/null
+++ b/kernel/swtch.S
@@ -0,0 +1,42 @@
+# Context switch
+#
+# void swtch(struct context *old, struct context *new);
+#
+# Save current registers in old. Load from new.
+
+
+.globl swtch
+swtch:
+ sd ra, 0(a0)
+ sd sp, 8(a0)
+ sd s0, 16(a0)
+ sd s1, 24(a0)
+ sd s2, 32(a0)
+ sd s3, 40(a0)
+ sd s4, 48(a0)
+ sd s5, 56(a0)
+ sd s6, 64(a0)
+ sd s7, 72(a0)
+ sd s8, 80(a0)
+ sd s9, 88(a0)
+ sd s10, 96(a0)
+ sd s11, 104(a0)
+
+ ld ra, 0(a1)
+ ld sp, 8(a1)
+ ld s0, 16(a1)
+ ld s1, 24(a1)
+ ld s2, 32(a1)
+ ld s3, 40(a1)
+ ld s4, 48(a1)
+ ld s5, 56(a1)
+ ld s6, 64(a1)
+ ld s7, 72(a1)
+ ld s8, 80(a1)
+ ld s9, 88(a1)
+ ld s10, 96(a1)
+ ld s11, 104(a1)
+
+ ret
+
+
diff --git a/kernel/syscall.c b/kernel/syscall.c
new file mode 100644
index 0000000..97974d6
--- /dev/null
+++ b/kernel/syscall.c
@@ -0,0 +1,147 @@
+#include "types.h"
+#include "param.h"
+#include "memlayout.h"
+#include "riscv.h"
+#include "spinlock.h"
+#include "proc.h"
+#include "syscall.h"
+#include "defs.h"
+
+// Fetch the uint64 at addr from the current process.
+int
+fetchaddr(uint64 addr, uint64 *ip)
+{
+ struct proc *p = myproc();
+ if(addr >= p->sz || addr+sizeof(uint64) > p->sz)
+ return -1;
+ if(copyin(p->pagetable, (char *)ip, addr, sizeof(*ip)) != 0)
+ return -1;
+ return 0;
+}
+
+// Fetch the nul-terminated string at addr from the current process.
+// Doesn't actually copy the string - just sets *pp to point at it.
+// Returns length of string, not including nul, or -1 for error.
+int
+fetchstr(uint64 addr, char *buf, int max)
+{
+ struct proc *p = myproc();
+ int err = copyinstr(p->pagetable, buf, addr, max);
+ if(err < 0)
+ return err;
+ return strlen(buf);
+}
+
+static uint64
+argraw(int n)
+{
+ struct proc *p = myproc();
+ switch (n) {
+ case 0:
+ return p->tf->a0;
+ case 1:
+ return p->tf->a1;
+ case 2:
+ return p->tf->a2;
+ case 3:
+ return p->tf->a3;
+ case 4:
+ return p->tf->a4;
+ case 5:
+ return p->tf->a5;
+ }
+ panic("argraw");
+ return -1;
+}
+
+// Fetch the nth 32-bit system call argument.
+int
+argint(int n, int *ip)
+{
+ *ip = argraw(n);
+ return 0;
+}
+
+// Retrieve an argument as a pointer.
+// Doesn't check for legality, since
+// copyin/copyout will do that.
+int
+argaddr(int n, uint64 *ip)
+{
+ *ip = argraw(n);
+ return 0;
+}
+
+// Fetch the nth word-sized system call argument as a null-terminated string.
+// Copies into buf, at most max.
+// Returns string length if OK (including nul), -1 if error.
+int
+argstr(int n, char *buf, int max)
+{
+ uint64 addr;
+ if(argaddr(n, &addr) < 0)
+ return -1;
+ return fetchstr(addr, buf, max);
+}
+
+extern uint64 sys_chdir(void);
+extern uint64 sys_close(void);
+extern uint64 sys_dup(void);
+extern uint64 sys_exec(void);
+extern uint64 sys_exit(void);
+extern uint64 sys_fork(void);
+extern uint64 sys_fstat(void);
+extern uint64 sys_getpid(void);
+extern uint64 sys_kill(void);
+extern uint64 sys_link(void);
+extern uint64 sys_mkdir(void);
+extern uint64 sys_mknod(void);
+extern uint64 sys_open(void);
+extern uint64 sys_pipe(void);
+extern uint64 sys_read(void);
+extern uint64 sys_sbrk(void);
+extern uint64 sys_sleep(void);
+extern uint64 sys_unlink(void);
+extern uint64 sys_wait(void);
+extern uint64 sys_write(void);
+extern uint64 sys_uptime(void);
+
+static uint64 (*syscalls[])(void) = {
+[SYS_fork] sys_fork,
+[SYS_exit] sys_exit,
+[SYS_wait] sys_wait,
+[SYS_pipe] sys_pipe,
+[SYS_read] sys_read,
+[SYS_kill] sys_kill,
+[SYS_exec] sys_exec,
+[SYS_fstat] sys_fstat,
+[SYS_chdir] sys_chdir,
+[SYS_dup] sys_dup,
+[SYS_getpid] sys_getpid,
+[SYS_sbrk] sys_sbrk,
+[SYS_sleep] sys_sleep,
+[SYS_uptime] sys_uptime,
+[SYS_open] sys_open,
+[SYS_write] sys_write,
+[SYS_mknod] sys_mknod,
+[SYS_unlink] sys_unlink,
+[SYS_link] sys_link,
+[SYS_mkdir] sys_mkdir,
+[SYS_close] sys_close,
+};
+
+void
+syscall(void)
+{
+ int num;
+ struct proc *p = myproc();
+
+ num = p->tf->a7;
+ if(num > 0 && num < NELEM(syscalls) && syscalls[num]) {
+ p->tf->a0 = syscalls[num]();
+ } else {
+ printf("%d %s: unknown sys call %d\n",
+ p->pid, p->name, num);
+ p->tf->a0 = -1;
+ }
+}
diff --git a/kernel/syscall.h b/kernel/syscall.h
new file mode 100644
index 0000000..bc5f356
--- /dev/null
+++ b/kernel/syscall.h
@@ -0,0 +1,22 @@
+// System call numbers
+#define SYS_fork 1
+#define SYS_exit 2
+#define SYS_wait 3
+#define SYS_pipe 4
+#define SYS_read 5
+#define SYS_kill 6
+#define SYS_exec 7
+#define SYS_fstat 8
+#define SYS_chdir 9
+#define SYS_dup 10
+#define SYS_getpid 11
+#define SYS_sbrk 12
+#define SYS_sleep 13
+#define SYS_uptime 14
+#define SYS_open 15
+#define SYS_write 16
+#define SYS_mknod 17
+#define SYS_unlink 18
+#define SYS_link 19
+#define SYS_mkdir 20
+#define SYS_close 21
diff --git a/kernel/sysfile.c b/kernel/sysfile.c
new file mode 100644
index 0000000..23a9540
--- /dev/null
+++ b/kernel/sysfile.c
@@ -0,0 +1,476 @@
+//
+// File-system system calls.
+// Mostly argument checking, since we don't trust
+// user code, and calls into file.c and fs.c.
+//
+
+#include "types.h"
+#include "riscv.h"
+#include "defs.h"
+#include "param.h"
+#include "stat.h"
+#include "spinlock.h"
+#include "proc.h"
+#include "fs.h"
+#include "sleeplock.h"
+#include "file.h"
+#include "fcntl.h"
+
+// Fetch the nth word-sized system call argument as a file descriptor
+// and return both the descriptor and the corresponding struct file.
+static int
+argfd(int n, int *pfd, struct file **pf)
+{
+ int fd;
+ struct file *f;
+
+ if(argint(n, &fd) < 0)
+ return -1;
+ if(fd < 0 || fd >= NOFILE || (f=myproc()->ofile[fd]) == 0)
+ return -1;
+ if(pfd)
+ *pfd = fd;
+ if(pf)
+ *pf = f;
+ return 0;
+}
+
+// Allocate a file descriptor for the given file.
+// Takes over file reference from caller on success.
+static int
+fdalloc(struct file *f)
+{
+ int fd;
+ struct proc *p = myproc();
+
+ for(fd = 0; fd < NOFILE; fd++){
+ if(p->ofile[fd] == 0){
+ p->ofile[fd] = f;
+ return fd;
+ }
+ }
+ return -1;
+}
+
+uint64
+sys_dup(void)
+{
+ struct file *f;
+ int fd;
+
+ if(argfd(0, 0, &f) < 0)
+ return -1;
+ if((fd=fdalloc(f)) < 0)
+ return -1;
+ filedup(f);
+ return fd;
+}
+
+uint64
+sys_read(void)
+{
+ struct file *f;
+ int n;
+ uint64 p;
+
+ if(argfd(0, 0, &f) < 0 || argint(2, &n) < 0 || argaddr(1, &p) < 0)
+ return -1;
+ return fileread(f, p, n);
+}
+
+uint64
+sys_write(void)
+{
+ struct file *f;
+ int n;
+ uint64 p;
+
+ if(argfd(0, 0, &f) < 0 || argint(2, &n) < 0 || argaddr(1, &p) < 0)
+ return -1;
+
+ return filewrite(f, p, n);
+}
+
+uint64
+sys_close(void)
+{
+ int fd;
+ struct file *f;
+
+ if(argfd(0, &fd, &f) < 0)
+ return -1;
+ myproc()->ofile[fd] = 0;
+ fileclose(f);
+ return 0;
+}
+
+uint64
+sys_fstat(void)
+{
+ struct file *f;
+ uint64 st; // user pointer to struct stat
+
+ if(argfd(0, 0, &f) < 0 || argaddr(1, &st) < 0)
+ return -1;
+ return filestat(f, st);
+}
+
+// Create the path new as a link to the same inode as old.
+uint64
+sys_link(void)
+{
+ char name[DIRSIZ], new[MAXPATH], old[MAXPATH];
+ struct inode *dp, *ip;
+
+ if(argstr(0, old, MAXPATH) < 0 || argstr(1, new, MAXPATH) < 0)
+ return -1;
+
+ begin_op();
+ if((ip = namei(old)) == 0){
+ end_op();
+ return -1;
+ }
+
+ ilock(ip);
+ if(ip->type == T_DIR){
+ iunlockput(ip);
+ end_op();
+ return -1;
+ }
+
+ ip->nlink++;
+ iupdate(ip);
+ iunlock(ip);
+
+ if((dp = nameiparent(new, name)) == 0)
+ goto bad;
+ ilock(dp);
+ if(dp->dev != ip->dev || dirlink(dp, name, ip->inum) < 0){
+ iunlockput(dp);
+ goto bad;
+ }
+ iunlockput(dp);
+ iput(ip);
+
+ end_op();
+
+ return 0;
+
+bad:
+ ilock(ip);
+ ip->nlink--;
+ iupdate(ip);
+ iunlockput(ip);
+ end_op();
+ return -1;
+}
+
+// Is the directory dp empty except for "." and ".." ?
+static int
+isdirempty(struct inode *dp)
+{
+ int off;
+ struct dirent de;
+
+ for(off=2*sizeof(de); off<dp->size; off+=sizeof(de)){
+ if(readi(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de))
+ panic("isdirempty: readi");
+ if(de.inum != 0)
+ return 0;
+ }
+ return 1;
+}
+
+uint64
+sys_unlink(void)
+{
+ struct inode *ip, *dp;
+ struct dirent de;
+ char name[DIRSIZ], path[MAXPATH];
+ uint off;
+
+ if(argstr(0, path, MAXPATH) < 0)
+ return -1;
+
+ begin_op();
+ if((dp = nameiparent(path, name)) == 0){
+ end_op();
+ return -1;
+ }
+
+ ilock(dp);
+
+ // Cannot unlink "." or "..".
+ if(namecmp(name, ".") == 0 || namecmp(name, "..") == 0)
+ goto bad;
+
+ if((ip = dirlookup(dp, name, &off)) == 0)
+ goto bad;
+ ilock(ip);
+
+ if(ip->nlink < 1)
+ panic("unlink: nlink < 1");
+ if(ip->type == T_DIR && !isdirempty(ip)){
+ iunlockput(ip);
+ goto bad;
+ }
+
+ memset(&de, 0, sizeof(de));
+ if(writei(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de))
+ panic("unlink: writei");
+ if(ip->type == T_DIR){
+ dp->nlink--;
+ iupdate(dp);
+ }
+ iunlockput(dp);
+
+ ip->nlink--;
+ iupdate(ip);
+ iunlockput(ip);
+
+ end_op();
+
+ return 0;
+
+bad:
+ iunlockput(dp);
+ end_op();
+ return -1;
+}
+
+static struct inode*
+create(char *path, short type, short major, short minor)
+{
+ struct inode *ip, *dp;
+ char name[DIRSIZ];
+
+ if((dp = nameiparent(path, name)) == 0)
+ return 0;
+ ilock(dp);
+
+ if((ip = dirlookup(dp, name, 0)) != 0){
+ iunlockput(dp);
+ ilock(ip);
+ if(type == T_FILE && (ip->type == T_FILE || ip->type == T_DEVICE))
+ return ip;
+ iunlockput(ip);
+ return 0;
+ }
+
+ if((ip = ialloc(dp->dev, type)) == 0)
+ panic("create: ialloc");
+
+ ilock(ip);
+ ip->major = major;
+ ip->minor = minor;
+ ip->nlink = 1;
+ iupdate(ip);
+
+ if(type == T_DIR){ // Create . and .. entries.
+ dp->nlink++; // for ".."
+ iupdate(dp);
+ // No ip->nlink++ for ".": avoid cyclic ref count.
+ if(dirlink(ip, ".", ip->inum) < 0 || dirlink(ip, "..", dp->inum) < 0)
+ panic("create dots");
+ }
+
+ if(dirlink(dp, name, ip->inum) < 0)
+ panic("create: dirlink");
+
+ iunlockput(dp);
+
+ return ip;
+}
+
+uint64
+sys_open(void)
+{
+ char path[MAXPATH];
+ int fd, omode;
+ struct file *f;
+ struct inode *ip;
+
+ if(argstr(0, path, MAXPATH) < 0 || argint(1, &omode) < 0)
+ return -1;
+
+ begin_op();
+
+ if(omode & O_CREATE){
+ ip = create(path, T_FILE, 0, 0);
+ if(ip == 0){
+ end_op();
+ return -1;
+ }
+ } else {
+ if((ip = namei(path)) == 0){
+ end_op();
+ return -1;
+ }
+ ilock(ip);
+ if(ip->type == T_DIR && omode != O_RDONLY){
+ iunlockput(ip);
+ end_op();
+ return -1;
+ }
+ }
+
+ if(ip->type == T_DEVICE && (ip->major < 0 || ip->major >= NDEV)){
+ iunlockput(ip);
+ end_op();
+ return -1;
+ }
+
+ if((f = filealloc()) == 0 || (fd = fdalloc(f)) < 0){
+ if(f)
+ fileclose(f);
+ iunlockput(ip);
+ end_op();
+ return -1;
+ }
+
+ if(ip->type == T_DEVICE){
+ f->type = FD_DEVICE;
+ f->major = ip->major;
+ } else {
+ f->type = FD_INODE;
+ f->off = 0;
+ }
+ f->ip = ip;
+ f->readable = !(omode & O_WRONLY);
+ f->writable = (omode & O_WRONLY) || (omode & O_RDWR);
+
+ iunlock(ip);
+ end_op();
+
+ return fd;
+}
+
+uint64
+sys_mkdir(void)
+{
+ char path[MAXPATH];
+ struct inode *ip;
+
+ begin_op();
+ if(argstr(0, path, MAXPATH) < 0 || (ip = create(path, T_DIR, 0, 0)) == 0){
+ end_op();
+ return -1;
+ }
+ iunlockput(ip);
+ end_op();
+ return 0;
+}
+
+uint64
+sys_mknod(void)
+{
+ struct inode *ip;
+ char path[MAXPATH];
+ int major, minor;
+
+ begin_op();
+ if((argstr(0, path, MAXPATH)) < 0 ||
+ argint(1, &major) < 0 ||
+ argint(2, &minor) < 0 ||
+ (ip = create(path, T_DEVICE, major, minor)) == 0){
+ end_op();
+ return -1;
+ }
+ iunlockput(ip);
+ end_op();
+ return 0;
+}
+
+uint64
+sys_chdir(void)
+{
+ char path[MAXPATH];
+ struct inode *ip;
+ struct proc *p = myproc();
+
+ begin_op();
+ if(argstr(0, path, MAXPATH) < 0 || (ip = namei(path)) == 0){
+ end_op();
+ return -1;
+ }
+ ilock(ip);
+ if(ip->type != T_DIR){
+ iunlockput(ip);
+ end_op();
+ return -1;
+ }
+ iunlock(ip);
+ iput(p->cwd);
+ end_op();
+ p->cwd = ip;
+ return 0;
+}
+
+uint64
+sys_exec(void)
+{
+ char path[MAXPATH], *argv[MAXARG];
+ int i;
+ uint64 uargv, uarg;
+
+ if(argstr(0, path, MAXPATH) < 0 || argaddr(1, &uargv) < 0){
+ return -1;
+ }
+ memset(argv, 0, sizeof(argv));
+ for(i=0;; i++){
+ if(i >= NELEM(argv)){
+ return -1;
+ }
+ if(fetchaddr(uargv+sizeof(uint64)*i, (uint64*)&uarg) < 0){
+ return -1;
+ }
+ if(uarg == 0){
+ argv[i] = 0;
+ break;
+ }
+ argv[i] = kalloc();
+ if(argv[i] == 0)
+ panic("sys_exec kalloc");
+ if(fetchstr(uarg, argv[i], PGSIZE) < 0){
+ return -1;
+ }
+ }
+
+ int ret = exec(path, argv);
+
+ for(i = 0; i < NELEM(argv) && argv[i] != 0; i++)
+ kfree(argv[i]);
+
+ return ret;
+}
+
+uint64
+sys_pipe(void)
+{
+ uint64 fdarray; // user pointer to array of two integers
+ struct file *rf, *wf;
+ int fd0, fd1;
+ struct proc *p = myproc();
+
+ if(argaddr(0, &fdarray) < 0)
+ return -1;
+ if(pipealloc(&rf, &wf) < 0)
+ return -1;
+ fd0 = -1;
+ if((fd0 = fdalloc(rf)) < 0 || (fd1 = fdalloc(wf)) < 0){
+ if(fd0 >= 0)
+ p->ofile[fd0] = 0;
+ fileclose(rf);
+ fileclose(wf);
+ return -1;
+ }
+ if(copyout(p->pagetable, fdarray, (char*)&fd0, sizeof(fd0)) < 0 ||
+ copyout(p->pagetable, fdarray+sizeof(fd0), (char *)&fd1, sizeof(fd1)) < 0){
+ p->ofile[fd0] = 0;
+ p->ofile[fd1] = 0;
+ fileclose(rf);
+ fileclose(wf);
+ return -1;
+ }
+ return 0;
+}
diff --git a/kernel/sysproc.c b/kernel/sysproc.c
new file mode 100644
index 0000000..face81a
--- /dev/null
+++ b/kernel/sysproc.c
@@ -0,0 +1,91 @@
+#include "types.h"
+#include "riscv.h"
+#include "defs.h"
+#include "date.h"
+#include "param.h"
+#include "memlayout.h"
+#include "spinlock.h"
+#include "proc.h"
+
+uint64
+sys_exit(void)
+{
+ exit();
+ return 0; // not reached
+}
+
+uint64
+sys_getpid(void)
+{
+ return myproc()->pid;
+}
+
+uint64
+sys_fork(void)
+{
+ return fork();
+}
+
+uint64
+sys_wait(void)
+{
+ return wait();
+}
+
+uint64
+sys_sbrk(void)
+{
+ int addr;
+ int n;
+
+ if(argint(0, &n) < 0)
+ return -1;
+ addr = myproc()->sz;
+ if(growproc(n) < 0)
+ return -1;
+ return addr;
+}
+
+uint64
+sys_sleep(void)
+{
+ int n;
+ uint ticks0;
+
+ if(argint(0, &n) < 0)
+ return -1;
+ acquire(&tickslock);
+ ticks0 = ticks;
+ while(ticks - ticks0 < n){
+ if(myproc()->killed){
+ release(&tickslock);
+ return -1;
+ }
+ sleep(&ticks, &tickslock);
+ }
+ release(&tickslock);
+ return 0;
+}
+
+uint64
+sys_kill(void)
+{
+ int pid;
+
+ if(argint(0, &pid) < 0)
+ return -1;
+ return kill(pid);
+}
+
+// return how many clock tick interrupts have occurred
+// since start.
+uint64
+sys_uptime(void)
+{
+ uint xticks;
+
+ acquire(&tickslock);
+ xticks = ticks;
+ release(&tickslock);
+ return xticks;
+}
diff --git a/kernel/trampoline.S b/kernel/trampoline.S
new file mode 100644
index 0000000..24499d9
--- /dev/null
+++ b/kernel/trampoline.S
@@ -0,0 +1,141 @@
+ #
+ # code to switch between user and kernel space.
+ #
+ # this code is mapped at the same virtual address
+ # (TRAMPOLINE) in user and kernel space so that
+ # it continues to work when it switches page tables.
+ #
+ # kernel.ld causes this to be aligned
+ # to a page boundary.
+ #
+ .section trampsec
+.globl trampoline
+trampoline:
+.align 4
+.globl uservec
+uservec:
+ #
+ # trap.c sets stvec to point here, so
+ # traps from user space start here,
+ # in supervisor mode, but with a
+ # user page table.
+ #
+ # sscratch points to where the process's p->tf is
+ # mapped into user space, at TRAPFRAME.
+ #
+
+ # swap a0 and sscratch
+ # so that a0 is TRAPFRAME
+ csrrw a0, sscratch, a0
+
+ # save the user registers in TRAPFRAME
+ sd ra, 40(a0)
+ sd sp, 48(a0)
+ sd gp, 56(a0)
+ sd tp, 64(a0)
+ sd t0, 72(a0)
+ sd t1, 80(a0)
+ sd t2, 88(a0)
+ sd s0, 96(a0)
+ sd s1, 104(a0)
+ sd a1, 120(a0)
+ sd a2, 128(a0)
+ sd a3, 136(a0)
+ sd a4, 144(a0)
+ sd a5, 152(a0)
+ sd a6, 160(a0)
+ sd a7, 168(a0)
+ sd s2, 176(a0)
+ sd s3, 184(a0)
+ sd s4, 192(a0)
+ sd s5, 200(a0)
+ sd s6, 208(a0)
+ sd s7, 216(a0)
+ sd s8, 224(a0)
+ sd s9, 232(a0)
+ sd s10, 240(a0)
+ sd s11, 248(a0)
+ sd t3, 256(a0)
+ sd t4, 264(a0)
+ sd t5, 272(a0)
+ sd t6, 280(a0)
+
+ # save the user a0 in p->tf->a0
+ csrr t0, sscratch
+ sd t0, 112(a0)
+
+ # restore kernel stack pointer from p->tf->kernel_sp
+ ld sp, 8(a0)
+
+ # make tp hold the current hartid, from p->tf->kernel_hartid
+ ld tp, 32(a0)
+
+ # remember the address of usertrap(), p->tf->kernel_trap
+ ld t0, 16(a0)
+
+ # restore kernel page table from p->tf->kernel_satp
+ ld t1, 0(a0)
+ sfence.vma zero, zero
+ csrw satp, t1
+
+ # a0 is no longer valid, since the kernel page
+ # table does not specially map p->td.
+
+ # jump to usertrap(), which does not return
+ jr t0
+
+.globl userret
+userret:
+ # userret(TRAPFRAME, pagetable)
+ # switch from kernel to user.
+ # usertrapret() calls here.
+ # a0: TRAPFRAME, in user page table
+ # a1: user page table, for satp
+
+ # switch to the user page table.
+ sfence.vma zero, zero
+ csrw satp, a1
+
+ # put the saved user a0 in sscratch, so we
+ # can swap it with our a0 (TRAPFRAME) in the last step.
+ ld t0, 112(a0)
+ csrw sscratch, t0
+
+ # restore all but a0 from TRAPFRAME
+ ld ra, 40(a0)
+ ld sp, 48(a0)
+ ld gp, 56(a0)
+ ld tp, 64(a0)
+ ld t0, 72(a0)
+ ld t1, 80(a0)
+ ld t2, 88(a0)
+ ld s0, 96(a0)
+ ld s1, 104(a0)
+ ld a1, 120(a0)
+ ld a2, 128(a0)
+ ld a3, 136(a0)
+ ld a4, 144(a0)
+ ld a5, 152(a0)
+ ld a6, 160(a0)
+ ld a7, 168(a0)
+ ld s2, 176(a0)
+ ld s3, 184(a0)
+ ld s4, 192(a0)
+ ld s5, 200(a0)
+ ld s6, 208(a0)
+ ld s7, 216(a0)
+ ld s8, 224(a0)
+ ld s9, 232(a0)
+ ld s10, 240(a0)
+ ld s11, 248(a0)
+ ld t3, 256(a0)
+ ld t4, 264(a0)
+ ld t5, 272(a0)
+ ld t6, 280(a0)
+
+ # restore user a0, and save TRAPFRAME in sscratch
+ csrrw a0, sscratch, a0
+
+ # return to user mode and user pc.
+ # usertrapret() set up sstatus and sepc.
+ sret
diff --git a/kernel/trap.c b/kernel/trap.c
new file mode 100644
index 0000000..ec57bed
--- /dev/null
+++ b/kernel/trap.c
@@ -0,0 +1,213 @@
+#include "types.h"
+#include "param.h"
+#include "memlayout.h"
+#include "riscv.h"
+#include "spinlock.h"
+#include "proc.h"
+#include "defs.h"
+
+struct spinlock tickslock;
+uint ticks;
+
+extern char trampoline[], uservec[], userret[];
+
+// in kernelvec.S, calls kerneltrap().
+void kernelvec();
+
+extern int devintr();
+
+void
+trapinit(void)
+{
+ initlock(&tickslock, "time");
+}
+
+// set up to take exceptions and traps while in the kernel.
+void
+trapinithart(void)
+{
+ w_stvec((uint64)kernelvec);
+}
+
+//
+// handle an interrupt, exception, or system call from user space.
+// called from trampoline.S
+//
+void
+usertrap(void)
+{
+ int which_dev = 0;
+
+ if((r_sstatus() & SSTATUS_SPP) != 0)
+ panic("usertrap: not from user mode");
+
+ // send interrupts and exceptions to kerneltrap(),
+ // since we're now in the kernel.
+ w_stvec((uint64)kernelvec);
+
+ struct proc *p = myproc();
+
+ // save user program counter.
+ p->tf->epc = r_sepc();
+
+ if(r_scause() == 8){
+ // system call
+
+ if(p->killed)
+ exit();
+
+ // sepc points to the ecall instruction,
+ // but we want to return to the next instruction.
+ p->tf->epc += 4;
+
+ // an interrupt will change sstatus &c registers,
+ // so don't enable until done with those registers.
+ intr_on();
+
+ syscall();
+ } else if((which_dev = devintr()) != 0){
+ // ok
+ } else {
+ printf("usertrap(): unexpected scause %p pid=%d\n", r_scause(), p->pid);
+ printf(" sepc=%p stval=%p\n", r_sepc(), r_stval());
+ p->killed = 1;
+ }
+
+ if(p->killed)
+ exit();
+
+ // give up the CPU if this is a timer interrupt.
+ if(which_dev == 2)
+ yield();
+
+ usertrapret();
+}
+
+//
+// return to user space
+//
+void
+usertrapret(void)
+{
+ struct proc *p = myproc();
+
+ // turn off interrupts, since we're switching
+ // now from kerneltrap() to usertrap().
+ intr_off();
+
+ // send interrupts and exceptions to trampoline.S
+ w_stvec(TRAMPOLINE + (uservec - trampoline));
+
+ // set up values that uservec will need when
+ // the process next re-enters the kernel.
+ p->tf->kernel_satp = r_satp(); // kernel page table
+ p->tf->kernel_sp = p->kstack + PGSIZE; // process's kernel stack
+ p->tf->kernel_trap = (uint64)usertrap;
+ p->tf->kernel_hartid = r_tp(); // hartid for cpuid()
+
+ // set up the registers that trampoline.S's sret will use
+ // to get to user space.
+
+ // set S Previous Privilege mode to User.
+ unsigned long x = r_sstatus();
+ x &= ~SSTATUS_SPP; // clear SPP to 0 for user mode
+ x |= SSTATUS_SPIE; // enable interrupts in user mode
+ w_sstatus(x);
+
+ // set S Exception Program Counter to the saved user pc.
+ w_sepc(p->tf->epc);
+
+ // tell trampoline.S the user page table to switch to.
+ uint64 satp = MAKE_SATP(p->pagetable);
+
+ // jump to trampoline.S at the top of memory, which
+ // switches to the user page table, restores user registers,
+ // and switches to user mode with sret.
+ uint64 fn = TRAMPOLINE + (userret - trampoline);
+ ((void (*)(uint64,uint64))fn)(TRAPFRAME, satp);
+}
+
+// interrupts and exceptions from kernel code go here via kernelvec,
+// on whatever the current kernel stack is.
+// must be 4-byte aligned to fit in stvec.
+void
+kerneltrap()
+{
+ int which_dev = 0;
+ uint64 sepc = r_sepc();
+ uint64 sstatus = r_sstatus();
+ uint64 scause = r_scause();
+
+ if((sstatus & SSTATUS_SPP) == 0)
+ panic("kerneltrap: not from supervisor mode");
+ if(intr_get() != 0)
+ panic("kerneltrap: interrupts enabled");
+
+ if((which_dev = devintr()) == 0){
+ printf("scause %p\n", scause);
+ printf("sepc=%p stval=%p\n", r_sepc(), r_stval());
+ panic("kerneltrap");
+ }
+
+ // give up the CPU if this is a timer interrupt.
+ if(which_dev == 2 && myproc() != 0 && myproc()->state == RUNNING)
+ yield();
+
+ // the yield() may have caused some traps to occur,
+ // so restore trap registers for use by kernelvec.S's sepc instruction.
+ w_sepc(sepc);
+ w_sstatus(sstatus);
+}
+
+void
+clockintr()
+{
+ acquire(&tickslock);
+ ticks++;
+ wakeup(&ticks);
+ release(&tickslock);
+}
+
+// check if it's an external interrupt or software interrupt,
+// and handle it.
+// returns 2 if timer interrupt,
+// 1 if other device,
+// 0 if not recognized.
+int
+devintr()
+{
+ uint64 scause = r_scause();
+
+ if((scause & 0x8000000000000000L) &&
+ (scause & 0xff) == 9){
+ // this is a supervisor external interrupt, via PLIC.
+
+ // irq indicates which device interrupted.
+ int irq = plic_claim();
+
+ if(irq == UART0_IRQ){
+ uartintr();
+ } else if(irq == VIRTIO0_IRQ){
+ virtio_disk_intr();
+ }
+
+ plic_complete(irq);
+ return 1;
+ } else if(scause == 0x8000000000000001L){
+ // software interrupt from a machine-mode timer interrupt,
+ // forwarded by timervec in kernelvec.S.
+
+ if(cpuid() == 0){
+ clockintr();
+ }
+
+ // acknowledge the software interrupt by clearing
+ // the SSIP bit in sip.
+ w_sip(r_sip() & ~2);
+
+ return 2;
+ } else {
+ return 0;
+ }
+}
+
diff --git a/kernel/types.h b/kernel/types.h
new file mode 100644
index 0000000..ee73164
--- /dev/null
+++ b/kernel/types.h
@@ -0,0 +1,10 @@
+typedef unsigned int uint;
+typedef unsigned short ushort;
+typedef unsigned char uchar;
+
+typedef unsigned char uint8;
+typedef unsigned short uint16;
+typedef unsigned int uint32;
+typedef unsigned long uint64;
+
+typedef uint64 pde_t;
diff --git a/kernel/uart.c b/kernel/uart.c
new file mode 100644
index 0000000..3a5cdc4
--- /dev/null
+++ b/kernel/uart.c
@@ -0,0 +1,92 @@
+//
+// low-level driver routines for 16550a UART.
+//
+
+#include "types.h"
+#include "param.h"
+#include "memlayout.h"
+#include "riscv.h"
+#include "spinlock.h"
+#include "proc.h"
+#include "defs.h"
+
+// the UART control registers are memory-mapped
+// at address UART0. this macro returns the
+// address of one of the registers.
+#define Reg(reg) ((volatile unsigned char *)(UART0 + reg))
+
+// the UART control registers.
+// some have different meanings for
+// read vs write.
+// http://byterunner.com/16550.html
+#define RHR 0 // receive holding register (for input bytes)
+#define THR 0 // transmit holding register (for output bytes)
+#define IER 1 // interrupt enable register
+#define FCR 2 // FIFO control register
+#define ISR 2 // interrupt status register
+#define LCR 3 // line control register
+#define LSR 5 // line status register
+
+#define ReadReg(reg) (*(Reg(reg)))
+#define WriteReg(reg, v) (*(Reg(reg)) = (v))
+
+void
+uartinit(void)
+{
+ // disable interrupts.
+ WriteReg(IER, 0x00);
+
+ // special mode to set baud rate.
+ WriteReg(LCR, 0x80);
+
+ // LSB for baud rate of 38.4K.
+ WriteReg(0, 0x03);
+
+ // MSB for baud rate of 38.4K.
+ WriteReg(1, 0x00);
+
+ // leave set-baud mode,
+ // and set word length to 8 bits, no parity.
+ WriteReg(LCR, 0x03);
+
+ // reset and enable FIFOs.
+ WriteReg(FCR, 0x07);
+
+ // enable receive interrupts.
+ WriteReg(IER, 0x01);
+}
+
+// write one output character to the UART.
+void
+uartputc(int c)
+{
+ // wait for Transmit Holding Empty to be set in LSR.
+ while((ReadReg(LSR) & (1 << 5)) == 0)
+ ;
+ WriteReg(THR, c);
+}
+
+// read one input character from the UART.
+// return -1 if none is waiting.
+int
+uartgetc(void)
+{
+ if(ReadReg(LSR) & 0x01){
+ // input data is ready.
+ return ReadReg(RHR);
+ } else {
+ return -1;
+ }
+}
+
+// trap.c calls here when the uart interrupts.
+void
+uartintr(void)
+{
+ while(1){
+ int c = uartgetc();
+ if(c == -1)
+ break;
+ consoleintr(c);
+ }
+}
diff --git a/kernel/virtio.h b/kernel/virtio.h
new file mode 100644
index 0000000..03b53a9
--- /dev/null
+++ b/kernel/virtio.h
@@ -0,0 +1,72 @@
+//
+// virtio device definitions.
+// for both the mmio interface, and virtio descriptors.
+// only tested with qemu.
+// this is the "legacy" virtio interface.
+//
+// the virtio spec:
+// https://docs.oasis-open.org/virtio/virtio/v1.1/virtio-v1.1.pdf
+//
+
+// virtio mmio control registers, mapped starting at 0x10001000.
+// from qemu virtio_mmio.h
+#define VIRTIO_MMIO_MAGIC_VALUE 0x000 // 0x74726976
+#define VIRTIO_MMIO_VERSION 0x004 // version; 1 is legacy
+#define VIRTIO_MMIO_DEVICE_ID 0x008 // device type; 1 is net, 2 is disk
+#define VIRTIO_MMIO_VENDOR_ID 0x00c // 0x554d4551
+#define VIRTIO_MMIO_DEVICE_FEATURES 0x010
+#define VIRTIO_MMIO_DRIVER_FEATURES 0x020
+#define VIRTIO_MMIO_GUEST_PAGE_SIZE 0x028 // page size for PFN, write-only
+#define VIRTIO_MMIO_QUEUE_SEL 0x030 // select queue, write-only
+#define VIRTIO_MMIO_QUEUE_NUM_MAX 0x034 // max size of current queue, read-only
+#define VIRTIO_MMIO_QUEUE_NUM 0x038 // size of current queue, write-only
+#define VIRTIO_MMIO_QUEUE_ALIGN 0x03c // used ring alignment, write-only
+#define VIRTIO_MMIO_QUEUE_PFN 0x040 // physical page number for queue, read/write
+#define VIRTIO_MMIO_QUEUE_READY 0x044 // ready bit
+#define VIRTIO_MMIO_QUEUE_NOTIFY 0x050 // write-only
+#define VIRTIO_MMIO_INTERRUPT_STATUS 0x060 // read-only
+#define VIRTIO_MMIO_INTERRUPT_ACK 0x064 // write-only
+#define VIRTIO_MMIO_STATUS 0x070 // read/write
+
+// status register bits, from qemu virtio_config.h
+#define VIRTIO_CONFIG_S_ACKNOWLEDGE 1
+#define VIRTIO_CONFIG_S_DRIVER 2
+#define VIRTIO_CONFIG_S_DRIVER_OK 4
+#define VIRTIO_CONFIG_S_FEATURES_OK 8
+
+// device feature bits
+#define VIRTIO_BLK_F_RO 5 /* Disk is read-only */
+#define VIRTIO_BLK_F_SCSI 7 /* Supports scsi command passthru */
+#define VIRTIO_BLK_F_CONFIG_WCE 11 /* Writeback mode available in config */
+#define VIRTIO_BLK_F_MQ 12 /* support more than one vq */
+#define VIRTIO_F_ANY_LAYOUT 27
+#define VIRTIO_RING_F_INDIRECT_DESC 28
+#define VIRTIO_RING_F_EVENT_IDX 29
+
+// this many virtio descriptors.
+// must be a power of two.
+#define NUM 8
+
+struct VRingDesc {
+ uint64 addr;
+ uint32 len;
+ uint16 flags;
+ uint16 next;
+};
+#define VRING_DESC_F_NEXT 1 // chained with another descriptor
+#define VRING_DESC_F_WRITE 2 // device writes (vs read)
+
+struct VRingUsedElem {
+ uint32 id; // index of start of completed descriptor chain
+ uint32 len;
+};
+
+// for disk ops
+#define VIRTIO_BLK_T_IN 0 // read the disk
+#define VIRTIO_BLK_T_OUT 1 // write the disk
+
+struct UsedArea {
+ uint16 flags;
+ uint16 id;
+ struct VRingUsedElem elems[NUM];
+};
diff --git a/kernel/virtio_disk.c b/kernel/virtio_disk.c
new file mode 100644
index 0000000..3cff024
--- /dev/null
+++ b/kernel/virtio_disk.c
@@ -0,0 +1,269 @@
+//
+// driver for qemu's virtio disk device.
+// uses qemu's mmio interface to virtio.
+// qemu presents a "legacy" virtio interface.
+//
+// qemu ... -drive file=fs.img,if=none,format=raw,id=x0 -device virtio-blk-device,drive=x0,bus=virtio-mmio-bus.0
+//
+
+#include "types.h"
+#include "riscv.h"
+#include "defs.h"
+#include "param.h"
+#include "memlayout.h"
+#include "spinlock.h"
+#include "sleeplock.h"
+#include "fs.h"
+#include "buf.h"
+#include "virtio.h"
+
+// the address of virtio mmio register r.
+#define R(r) ((volatile uint32 *)(VIRTIO0 + (r)))
+
+static struct disk {
+ // memory for virtio descriptors &c for queue 0.
+ // this is a global instead of allocated because it must
+ // be multiple contiguous pages, which kalloc()
+ // doesn't support, and page aligned.
+ char pages[2*PGSIZE];
+ struct VRingDesc *desc;
+ uint16 *avail;
+ struct UsedArea *used;
+
+ // our own book-keeping.
+ char free[NUM]; // is a descriptor free?
+ uint16 used_idx; // we've looked this far in used[2..NUM].
+
+ // track info about in-flight operations,
+ // for use when completion interrupt arrives.
+ // indexed by first descriptor index of chain.
+ struct {
+ struct buf *b;
+ char status;
+ } info[NUM];
+
+ struct spinlock vdisk_lock;
+
+} __attribute__ ((aligned (PGSIZE))) disk;
+
+void
+virtio_disk_init(void)
+{
+ uint32 status = 0;
+
+ initlock(&disk.vdisk_lock, "virtio_disk");
+
+ if(*R(VIRTIO_MMIO_MAGIC_VALUE) != 0x74726976 ||
+ *R(VIRTIO_MMIO_VERSION) != 1 ||
+ *R(VIRTIO_MMIO_DEVICE_ID) != 2 ||
+ *R(VIRTIO_MMIO_VENDOR_ID) != 0x554d4551){
+ panic("could not find virtio disk");
+ }
+
+ status |= VIRTIO_CONFIG_S_ACKNOWLEDGE;
+ *R(VIRTIO_MMIO_STATUS) = status;
+
+ status |= VIRTIO_CONFIG_S_DRIVER;
+ *R(VIRTIO_MMIO_STATUS) = status;
+
+ // negotiate features
+ uint64 features = *R(VIRTIO_MMIO_DEVICE_FEATURES);
+ features &= ~(1 << VIRTIO_BLK_F_RO);
+ features &= ~(1 << VIRTIO_BLK_F_SCSI);
+ features &= ~(1 << VIRTIO_BLK_F_CONFIG_WCE);
+ features &= ~(1 << VIRTIO_BLK_F_MQ);
+ features &= ~(1 << VIRTIO_F_ANY_LAYOUT);
+ features &= ~(1 << VIRTIO_RING_F_EVENT_IDX);
+ features &= ~(1 << VIRTIO_RING_F_INDIRECT_DESC);
+ *R(VIRTIO_MMIO_DRIVER_FEATURES) = features;
+
+ // tell device that feature negotiation is complete.
+ status |= VIRTIO_CONFIG_S_FEATURES_OK;
+ *R(VIRTIO_MMIO_STATUS) = status;
+
+ // tell device we're completely ready.
+ status |= VIRTIO_CONFIG_S_DRIVER_OK;
+ *R(VIRTIO_MMIO_STATUS) = status;
+
+ *R(VIRTIO_MMIO_GUEST_PAGE_SIZE) = PGSIZE;
+
+ // initialize queue 0.
+ *R(VIRTIO_MMIO_QUEUE_SEL) = 0;
+ uint32 max = *R(VIRTIO_MMIO_QUEUE_NUM_MAX);
+ if(max == 0)
+ panic("virtio disk has no queue 0");
+ if(max < NUM)
+ panic("virtio disk max queue too short");
+ *R(VIRTIO_MMIO_QUEUE_NUM) = NUM;
+ memset(disk.pages, 0, sizeof(disk.pages));
+ *R(VIRTIO_MMIO_QUEUE_PFN) = ((uint64)disk.pages) >> PGSHIFT;
+
+ // desc = pages -- num * VRingDesc
+ // avail = pages + 0x40 -- 2 * uint16, then num * uint16
+ // used = pages + 4096 -- 2 * uint16, then num * vRingUsedElem
+
+ disk.desc = (struct VRingDesc *) disk.pages;
+ disk.avail = (uint16*)(((char*)disk.desc) + NUM*sizeof(struct VRingDesc));
+ disk.used = (struct UsedArea *) (disk.pages + PGSIZE);
+
+ for(int i = 0; i < NUM; i++)
+ disk.free[i] = 1;
+
+ // plic.c and trap.c arrange for interrupts from VIRTIO0_IRQ.
+}
+
+// find a free descriptor, mark it non-free, return its index.
+static int
+alloc_desc()
+{
+ for(int i = 0; i < NUM; i++){
+ if(disk.free[i]){
+ disk.free[i] = 0;
+ return i;
+ }
+ }
+ return -1;
+}
+
+// mark a descriptor as free.
+static void
+free_desc(int i)
+{
+ if(i >= NUM)
+ panic("virtio_disk_intr 1");
+ if(disk.free[i])
+ panic("virtio_disk_intr 2");
+ disk.desc[i].addr = 0;
+ disk.free[i] = 1;
+ wakeup(&disk.free[0]);
+}
+
+// free a chain of descriptors.
+static void
+free_chain(int i)
+{
+ while(1){
+ free_desc(i);
+ if(disk.desc[i].flags & VRING_DESC_F_NEXT)
+ i = disk.desc[i].next;
+ else
+ break;
+ }
+}
+
+static int
+alloc3_desc(int *idx)
+{
+ for(int i = 0; i < 3; i++){
+ idx[i] = alloc_desc();
+ if(idx[i] < 0){
+ for(int j = 0; j < i; j++)
+ free_desc(idx[j]);
+ return -1;
+ }
+ }
+ return 0;
+}
+
+void
+virtio_disk_rw(struct buf *b, int write)
+{
+ uint64 sector = b->blockno * (BSIZE / 512);
+
+ acquire(&disk.vdisk_lock);
+
+ // the spec says that legacy block operations use three
+ // descriptors: one for type/reserved/sector, one for
+ // the data, one for a 1-byte status result.
+
+ // allocate the three descriptors.
+ int idx[3];
+ while(1){
+ if(alloc3_desc(idx) == 0) {
+ break;
+ }
+ sleep(&disk.free[0], &disk.vdisk_lock);
+ }
+
+ // format the three descriptors.
+ // qemu's virtio-blk.c reads them.
+
+ struct virtio_blk_outhdr {
+ uint32 type;
+ uint32 reserved;
+ uint64 sector;
+ } buf0;
+
+ if(write)
+ buf0.type = VIRTIO_BLK_T_OUT; // write the disk
+ else
+ buf0.type = VIRTIO_BLK_T_IN; // read the disk
+ buf0.reserved = 0;
+ buf0.sector = sector;
+
+ // buf0 is on a kernel stack, which is not direct mapped,
+ // thus the call to kvmpa().
+ disk.desc[idx[0]].addr = (uint64) kvmpa((uint64) &buf0);
+ disk.desc[idx[0]].len = sizeof(buf0);
+ disk.desc[idx[0]].flags = VRING_DESC_F_NEXT;
+ disk.desc[idx[0]].next = idx[1];
+
+ disk.desc[idx[1]].addr = (uint64) b->data;
+ disk.desc[idx[1]].len = BSIZE;
+ if(write)
+ disk.desc[idx[1]].flags = 0; // device reads b->data
+ else
+ disk.desc[idx[1]].flags = VRING_DESC_F_WRITE; // device writes b->data
+ disk.desc[idx[1]].flags |= VRING_DESC_F_NEXT;
+ disk.desc[idx[1]].next = idx[2];
+
+ disk.info[idx[0]].status = 0;
+ disk.desc[idx[2]].addr = (uint64) &disk.info[idx[0]].status;
+ disk.desc[idx[2]].len = 1;
+ disk.desc[idx[2]].flags = VRING_DESC_F_WRITE; // device writes the status
+ disk.desc[idx[2]].next = 0;
+
+ // record struct buf for virtio_disk_intr().
+ b->disk = 1;
+ disk.info[idx[0]].b = b;
+
+ // avail[0] is flags
+ // avail[1] tells the device how far to look in avail[2...].
+ // avail[2...] are desc[] indices the device should process.
+ // we only tell device the first index in our chain of descriptors.
+ disk.avail[2 + (disk.avail[1] % NUM)] = idx[0];
+ __sync_synchronize();
+ disk.avail[1] = disk.avail[1] + 1;
+
+ *R(VIRTIO_MMIO_QUEUE_NOTIFY) = 0; // value is queue number
+
+ // Wait for virtio_disk_intr() to say request has finished.
+ while(b->disk == 1) {
+ sleep(b, &disk.vdisk_lock);
+ }
+
+ disk.info[idx[0]].b = 0;
+ free_chain(idx[0]);
+
+ release(&disk.vdisk_lock);
+}
+
+void
+virtio_disk_intr()
+{
+ acquire(&disk.vdisk_lock);
+
+ while((disk.used_idx % NUM) != (disk.used->id % NUM)){
+ int id = disk.used->elems[disk.used_idx].id;
+
+ if(disk.info[id].status != 0)
+ panic("virtio_disk_intr status");
+
+ disk.info[id].b->disk = 0; // disk is done with buf
+ wakeup(disk.info[id].b);
+
+ disk.used_idx = (disk.used_idx + 1) % NUM;
+ }
+
+ release(&disk.vdisk_lock);
+}
diff --git a/kernel/vm.c b/kernel/vm.c
new file mode 100644
index 0000000..3631c9c
--- /dev/null
+++ b/kernel/vm.c
@@ -0,0 +1,441 @@
+#include "param.h"
+#include "types.h"
+#include "memlayout.h"
+#include "elf.h"
+#include "riscv.h"
+#include "defs.h"
+#include "fs.h"
+
+/*
+ * the kernel's page table.
+ */
+pagetable_t kernel_pagetable;
+
+extern char etext[]; // kernel.ld sets this to end of kernel code.
+
+extern char trampoline[]; // trampoline.S
+
+/*
+ * create a direct-map page table for the kernel and
+ * turn on paging. called early, in supervisor mode.
+ * the page allocator is already initialized.
+ */
+void
+kvminit()
+{
+ kernel_pagetable = (pagetable_t) kalloc();
+ memset(kernel_pagetable, 0, PGSIZE);
+
+ // uart registers
+ kvmmap(UART0, UART0, PGSIZE, PTE_R | PTE_W);
+
+ // virtio mmio disk interface
+ kvmmap(VIRTIO0, VIRTIO0, PGSIZE, PTE_R | PTE_W);
+
+ // CLINT
+ kvmmap(CLINT, CLINT, 0x10000, PTE_R | PTE_W);
+
+ // PLIC
+ kvmmap(PLIC, PLIC, 0x400000, PTE_R | PTE_W);
+
+ // map kernel text executable and read-only.
+ kvmmap(KERNBASE, KERNBASE, (uint64)etext-KERNBASE, PTE_R | PTE_X);
+
+ // map kernel data and the physical RAM we'll make use of.
+ kvmmap((uint64)etext, (uint64)etext, PHYSTOP-(uint64)etext, PTE_R | PTE_W);
+
+ // map the trampoline for trap entry/exit to
+ // the highest virtual address in the kernel.
+ kvmmap(TRAMPOLINE, (uint64)trampoline, PGSIZE, PTE_R | PTE_X);
+}
+
+// Switch h/w page table register to the kernel's page table,
+// and enable paging.
+void
+kvminithart()
+{
+ sfence_vma();
+ w_satp(MAKE_SATP(kernel_pagetable));
+}
+
+// Return the address of the PTE in page table pagetable
+// that corresponds to virtual address va. If alloc!=0,
+// create any required page-table pages.
+//
+// The risc-v Sv39 scheme has three levels of page-table
+// pages. A page-table page contains 512 64-bit PTEs.
+// A 64-bit virtual address is split into five fields:
+// 39..63 -- must be zero.
+// 30..38 -- 9 bits of level-2 index.
+// 21..39 -- 9 bits of level-1 index.
+// 12..20 -- 9 bits of level-0 index.
+// 0..12 -- 12 bits of byte offset within the page.
+static pte_t *
+walk(pagetable_t pagetable, uint64 va, int alloc)
+{
+ if(va >= MAXVA)
+ panic("walk");
+
+ for(int level = 2; level > 0; level--) {
+ pte_t *pte = &pagetable[PX(level, va)];
+ if(*pte & PTE_V) {
+ pagetable = (pagetable_t)PTE2PA(*pte);
+ } else {
+ if(!alloc || (pagetable = (pde_t*)kalloc()) == 0)
+ return 0;
+ memset(pagetable, 0, PGSIZE);
+ *pte = PA2PTE(pagetable) | PTE_V;
+ }
+ }
+ return &pagetable[PX(0, va)];
+}
+
+// Look up a virtual address, return the physical address,
+// or 0 if not mapped.
+// Can only be used to look up user pages.
+uint64
+walkaddr(pagetable_t pagetable, uint64 va)
+{
+ pte_t *pte;
+ uint64 pa;
+
+ pte = walk(pagetable, va, 0);
+ if(pte == 0)
+ return 0;
+ if((*pte & PTE_V) == 0)
+ return 0;
+ if((*pte & PTE_U) == 0)
+ return 0;
+ pa = PTE2PA(*pte);
+ return pa;
+}
+
+// add a mapping to the kernel page table.
+// only used when booting.
+// does not flush TLB or enable paging.
+void
+kvmmap(uint64 va, uint64 pa, uint64 sz, int perm)
+{
+ if(mappages(kernel_pagetable, va, sz, pa, perm) != 0)
+ panic("kvmmap");
+}
+
+// translate a kernel virtual address to
+// a physical address. only needed for
+// addresses on the stack.
+// assumes va is page aligned.
+uint64
+kvmpa(uint64 va)
+{
+ uint64 off = va % PGSIZE;
+ pte_t *pte;
+ uint64 pa;
+
+ pte = walk(kernel_pagetable, va, 0);
+ if(pte == 0)
+ panic("kvmpa");
+ if((*pte & PTE_V) == 0)
+ panic("kvmpa");
+ pa = PTE2PA(*pte);
+ return pa+off;
+}
+
+// Create PTEs for virtual addresses starting at va that refer to
+// physical addresses starting at pa. va and size might not
+// be page-aligned. Returns 0 on success, -1 if walk() couldn't
+// allocate a needed page-table page.
+int
+mappages(pagetable_t pagetable, uint64 va, uint64 size, uint64 pa, int perm)
+{
+ uint64 a, last;
+ pte_t *pte;
+
+ a = PGROUNDDOWN(va);
+ last = PGROUNDDOWN(va + size - 1);
+ for(;;){
+ if((pte = walk(pagetable, a, 1)) == 0)
+ return -1;
+ if(*pte & PTE_V)
+ panic("remap");
+ *pte = PA2PTE(pa) | perm | PTE_V;
+ if(a == last)
+ break;
+ a += PGSIZE;
+ pa += PGSIZE;
+ }
+ return 0;
+}
+
+// Remove mappings from a page table. The mappings in
+// the given range must exist. Optionally free the
+// physical memory.
+void
+uvmunmap(pagetable_t pagetable, uint64 va, uint64 size, int do_free)
+{
+ uint64 a, last;
+ pte_t *pte;
+ uint64 pa;
+
+ a = PGROUNDDOWN(va);
+ last = PGROUNDDOWN(va + size - 1);
+ for(;;){
+ if((pte = walk(pagetable, a, 0)) == 0)
+ panic("uvmunmap: walk");
+ if((*pte & PTE_V) == 0){
+ printf("va=%p pte=%p\n", a, *pte);
+ panic("uvmunmap: not mapped");
+ }
+ if(PTE_FLAGS(*pte) == PTE_V)
+ panic("uvmunmap: not a leaf");
+ if(do_free){
+ pa = PTE2PA(*pte);
+ kfree((void*)pa);
+ }
+ *pte = 0;
+ if(a == last)
+ break;
+ a += PGSIZE;
+ pa += PGSIZE;
+ }
+}
+
+// create an empty user page table.
+pagetable_t
+uvmcreate()
+{
+ pagetable_t pagetable;
+ pagetable = (pagetable_t) kalloc();
+ if(pagetable == 0)
+ panic("uvmcreate: out of memory");
+ memset(pagetable, 0, PGSIZE);
+ return pagetable;
+}
+
+// Load the user initcode into address 0 of pagetable,
+// for the very first process.
+// sz must be less than a page.
+void
+uvminit(pagetable_t pagetable, uchar *src, uint sz)
+{
+ char *mem;
+
+ if(sz >= PGSIZE)
+ panic("inituvm: more than a page");
+ mem = kalloc();
+ memset(mem, 0, PGSIZE);
+ mappages(pagetable, 0, PGSIZE, (uint64)mem, PTE_W|PTE_R|PTE_X|PTE_U);
+ memmove(mem, src, sz);
+}
+
+// Allocate PTEs and physical memory to grow process from oldsz to
+// newsz, which need not be page aligned. Returns new size or 0 on error.
+uint64
+uvmalloc(pagetable_t pagetable, uint64 oldsz, uint64 newsz)
+{
+ char *mem;
+ uint64 a;
+
+ if(newsz < oldsz)
+ return oldsz;
+
+ oldsz = PGROUNDUP(oldsz);
+ a = oldsz;
+ for(; a < newsz; a += PGSIZE){
+ mem = kalloc();
+ if(mem == 0){
+ uvmdealloc(pagetable, a, oldsz);
+ return 0;
+ }
+ memset(mem, 0, PGSIZE);
+ if(mappages(pagetable, a, PGSIZE, (uint64)mem, PTE_W|PTE_X|PTE_R|PTE_U) != 0){
+ kfree(mem);
+ uvmdealloc(pagetable, a, oldsz);
+ return 0;
+ }
+ }
+ return newsz;
+}
+
+// Deallocate user pages to bring the process size from oldsz to
+// newsz. oldsz and newsz need not be page-aligned, nor does newsz
+// need to be less than oldsz. oldsz can be larger than the actual
+// process size. Returns the new process size.
+uint64
+uvmdealloc(pagetable_t pagetable, uint64 oldsz, uint64 newsz)
+{
+ if(newsz >= oldsz)
+ return oldsz;
+ uvmunmap(pagetable, newsz, oldsz - newsz, 1);
+ return newsz;
+}
+
+// Recursively free page-table pages.
+// All leaf mappings must already have been removed.
+static void
+freewalk(pagetable_t pagetable)
+{
+ // there are 2^9 = 512 PTEs in a page table.
+ for(int i = 0; i < 512; i++){
+ pte_t pte = pagetable[i];
+ if((pte & PTE_V) && (pte & (PTE_R|PTE_W|PTE_X)) == 0){
+ // this PTE points to a lower-level page table.
+ uint64 child = PTE2PA(pte);
+ freewalk((pagetable_t)child);
+ pagetable[i] = 0;
+ } else if(pte & PTE_V){
+ panic("freewalk: leaf");
+ }
+ }
+ kfree((void*)pagetable);
+}
+
+// Free user memory pages,
+// then free page-table pages.
+void
+uvmfree(pagetable_t pagetable, uint64 sz)
+{
+ uvmunmap(pagetable, 0, sz, 1);
+ freewalk(pagetable);
+}
+
+// Given a parent process's page table, copy
+// its memory into a child's page table.
+// Copies both the page table and the
+// physical memory.
+// returns 0 on success, -1 on failure.
+// frees any allocated pages on failure.
+int
+uvmcopy(pagetable_t old, pagetable_t new, uint64 sz)
+{
+ pte_t *pte;
+ uint64 pa, i;
+ uint flags;
+ char *mem;
+
+ for(i = 0; i < sz; i += PGSIZE){
+ if((pte = walk(old, i, 0)) == 0)
+ panic("copyuvm: pte should exist");
+ if((*pte & PTE_V) == 0)
+ panic("copyuvm: page not present");
+ pa = PTE2PA(*pte);
+ flags = PTE_FLAGS(*pte);
+ if((mem = kalloc()) == 0)
+ goto err;
+ memmove(mem, (char*)pa, PGSIZE);
+ if(mappages(new, i, PGSIZE, (uint64)mem, flags) != 0){
+ kfree(mem);
+ goto err;
+ }
+ }
+ return 0;
+
+ err:
+ uvmunmap(new, 0, i, 1);
+ return -1;
+}
+
+// mark a PTE invalid for user access.
+// used by exec for the user stack guard page.
+void
+uvmclear(pagetable_t pagetable, uint64 va)
+{
+ pte_t *pte;
+
+ pte = walk(pagetable, va, 0);
+ if(pte == 0)
+ panic("uvmclear");
+ *pte &= ~PTE_U;
+}
+
+// Copy from kernel to user.
+// Copy len bytes from src to virtual address dstva in a given page table.
+// Return 0 on success, -1 on error.
+int
+copyout(pagetable_t pagetable, uint64 dstva, char *src, uint64 len)
+{
+ uint64 n, va0, pa0;
+
+ while(len > 0){
+ va0 = (uint)PGROUNDDOWN(dstva);
+ pa0 = walkaddr(pagetable, va0);
+ if(pa0 == 0)
+ return -1;
+ n = PGSIZE - (dstva - va0);
+ if(n > len)
+ n = len;
+ memmove((void *)(pa0 + (dstva - va0)), src, n);
+
+ len -= n;
+ src += n;
+ dstva = va0 + PGSIZE;
+ }
+ return 0;
+}
+
+// Copy from user to kernel.
+// Copy len bytes to dst from virtual address srcva in a given page table.
+// Return 0 on success, -1 on error.
+int
+copyin(pagetable_t pagetable, char *dst, uint64 srcva, uint64 len)
+{
+ uint64 n, va0, pa0;
+
+ while(len > 0){
+ va0 = (uint)PGROUNDDOWN(srcva);
+ pa0 = walkaddr(pagetable, va0);
+ if(pa0 == 0)
+ return -1;
+ n = PGSIZE - (srcva - va0);
+ if(n > len)
+ n = len;
+ memmove(dst, (void *)(pa0 + (srcva - va0)), n);
+
+ len -= n;
+ dst += n;
+ srcva = va0 + PGSIZE;
+ }
+ return 0;
+}
+
+// Copy a null-terminated string from user to kernel.
+// Copy bytes to dst from virtual address srcva in a given page table,
+// until a '\0', or max.
+// Return 0 on success, -1 on error.
+int
+copyinstr(pagetable_t pagetable, char *dst, uint64 srcva, uint64 max)
+{
+ uint64 n, va0, pa0;
+ int got_null = 0;
+
+ while(got_null == 0 && max > 0){
+ va0 = (uint)PGROUNDDOWN(srcva);
+ pa0 = walkaddr(pagetable, va0);
+ if(pa0 == 0)
+ return -1;
+ n = PGSIZE - (srcva - va0);
+ if(n > max)
+ n = max;
+
+ char *p = (char *) (pa0 + (srcva - va0));
+ while(n > 0){
+ if(*p == '\0'){
+ *dst = '\0';
+ got_null = 1;
+ break;
+ } else {
+ *dst = *p;
+ }
+ --n;
+ --max;
+ p++;
+ dst++;
+ }
+
+ srcva = va0 + PGSIZE;
+ }
+ if(got_null){
+ return 0;
+ } else {
+ return -1;
+ }
+}