summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorRobert Morris <[email protected]>2019-06-11 09:57:14 -0400
committerRobert Morris <[email protected]>2019-06-11 09:57:14 -0400
commit5753553213df8f9de851adb68377db43faecb91f (patch)
tree3b629ff54897fca414146677532cb459a2ed11ba /kernel
parent91ba81110acd3163f7de3580b677eece0c57f5e7 (diff)
downloadxv6-labs-5753553213df8f9de851adb68377db43faecb91f.tar.gz
xv6-labs-5753553213df8f9de851adb68377db43faecb91f.tar.bz2
xv6-labs-5753553213df8f9de851adb68377db43faecb91f.zip
separate source into kernel/ user/ mkfs/
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bio.c145
-rw-r--r--kernel/buf.h14
-rw-r--r--kernel/console.c271
-rw-r--r--kernel/date.h8
-rw-r--r--kernel/defs.h205
-rw-r--r--kernel/elf.h42
-rw-r--r--kernel/entry.S25
-rw-r--r--kernel/exec.c149
-rw-r--r--kernel/fcntl.h4
-rw-r--r--kernel/file.c175
-rw-r--r--kernel/file.h37
-rw-r--r--kernel/fs.c681
-rw-r--r--kernel/fs.h60
-rw-r--r--kernel/kalloc.c80
-rw-r--r--kernel/kernel.ld31
-rw-r--r--kernel/kernelvec.S112
-rw-r--r--kernel/log.c235
-rw-r--r--kernel/main.c42
-rw-r--r--kernel/memlayout.h50
-rw-r--r--kernel/param.h13
-rw-r--r--kernel/pipe.c129
-rw-r--r--kernel/plic.c63
-rw-r--r--kernel/proc.c591
-rw-r--r--kernel/proc.h104
-rw-r--r--kernel/ramdisk.c45
-rw-r--r--kernel/riscv.h338
-rw-r--r--kernel/sleeplock.c55
-rw-r--r--kernel/sleeplock.h10
-rw-r--r--kernel/spinlock.c110
-rw-r--r--kernel/spinlock.h9
-rw-r--r--kernel/start.c56
-rw-r--r--kernel/stat.h11
-rw-r--r--kernel/string.c104
-rw-r--r--kernel/swtch.S42
-rw-r--r--kernel/syscall.c191
-rw-r--r--kernel/syscall.h22
-rw-r--r--kernel/sysfile.c465
-rw-r--r--kernel/sysproc.c90
-rw-r--r--kernel/trampoline.S137
-rw-r--r--kernel/trap.c184
-rw-r--r--kernel/types.h10
-rw-r--r--kernel/uart.c75
-rw-r--r--kernel/vm.c389
43 files changed, 5609 insertions, 0 deletions
diff --git a/kernel/bio.c b/kernel/bio.c
new file mode 100644
index 0000000..90f9af9
--- /dev/null
+++ b/kernel/bio.c
@@ -0,0 +1,145 @@
+// Buffer cache.
+//
+// The buffer cache is a linked list of buf structures holding
+// cached copies of disk block contents. Caching disk blocks
+// in memory reduces the number of disk reads and also provides
+// a synchronization point for disk blocks used by multiple processes.
+//
+// Interface:
+// * To get a buffer for a particular disk block, call bread.
+// * After changing buffer data, call bwrite to write it to disk.
+// * When done with the buffer, call brelse.
+// * Do not use the buffer after calling brelse.
+// * Only one process at a time can use a buffer,
+// so do not keep them longer than necessary.
+//
+// The implementation uses two state flags internally:
+// * B_VALID: the buffer data has been read from the disk.
+// * B_DIRTY: the buffer data has been modified
+// and needs to be written to disk.
+
+#include "types.h"
+#include "param.h"
+#include "spinlock.h"
+#include "sleeplock.h"
+#include "riscv.h"
+#include "defs.h"
+#include "fs.h"
+#include "buf.h"
+
+struct {
+ struct spinlock lock;
+ struct buf buf[NBUF];
+
+ // Linked list of all buffers, through prev/next.
+ // head.next is most recently used.
+ struct buf head;
+} bcache;
+
+void
+binit(void)
+{
+ struct buf *b;
+
+ initlock(&bcache.lock, "bcache");
+
+//PAGEBREAK!
+ // Create linked list of buffers
+ bcache.head.prev = &bcache.head;
+ bcache.head.next = &bcache.head;
+ for(b = bcache.buf; b < bcache.buf+NBUF; b++){
+ b->next = bcache.head.next;
+ b->prev = &bcache.head;
+ initsleeplock(&b->lock, "buffer");
+ bcache.head.next->prev = b;
+ bcache.head.next = b;
+ }
+}
+
+// Look through buffer cache for block on device dev.
+// If not found, allocate a buffer.
+// In either case, return locked buffer.
+static struct buf*
+bget(uint dev, uint blockno)
+{
+ struct buf *b;
+
+ acquire(&bcache.lock);
+
+ // Is the block already cached?
+ for(b = bcache.head.next; b != &bcache.head; b = b->next){
+ if(b->dev == dev && b->blockno == blockno){
+ b->refcnt++;
+ release(&bcache.lock);
+ acquiresleep(&b->lock);
+ return b;
+ }
+ }
+
+ // Not cached; recycle an unused buffer.
+ // Even if refcnt==0, B_DIRTY indicates a buffer is in use
+ // because log.c has modified it but not yet committed it.
+ for(b = bcache.head.prev; b != &bcache.head; b = b->prev){
+ if(b->refcnt == 0 && (b->flags & B_DIRTY) == 0) {
+ b->dev = dev;
+ b->blockno = blockno;
+ b->flags = 0;
+ b->refcnt = 1;
+ release(&bcache.lock);
+ acquiresleep(&b->lock);
+ return b;
+ }
+ }
+ panic("bget: no buffers");
+}
+
+// Return a locked buf with the contents of the indicated block.
+struct buf*
+bread(uint dev, uint blockno)
+{
+ struct buf *b;
+
+ b = bget(dev, blockno);
+ if((b->flags & B_VALID) == 0) {
+ ramdiskrw(b);
+ }
+ return b;
+}
+
+// Write b's contents to disk. Must be locked.
+void
+bwrite(struct buf *b)
+{
+ if(!holdingsleep(&b->lock))
+ panic("bwrite");
+ b->flags |= B_DIRTY;
+ ramdiskrw(b);
+}
+
+// Release a locked buffer.
+// Move to the head of the MRU list.
+void
+brelse(struct buf *b)
+{
+ if(!holdingsleep(&b->lock))
+ panic("brelse");
+
+ releasesleep(&b->lock);
+
+ acquire(&bcache.lock);
+ b->refcnt--;
+ if (b->refcnt == 0) {
+ // no one is waiting for it.
+ b->next->prev = b->prev;
+ b->prev->next = b->next;
+ b->next = bcache.head.next;
+ b->prev = &bcache.head;
+ bcache.head.next->prev = b;
+ bcache.head.next = b;
+ }
+
+ release(&bcache.lock);
+}
+//PAGEBREAK!
+// Blank page.
+
diff --git a/kernel/buf.h b/kernel/buf.h
new file mode 100644
index 0000000..3266495
--- /dev/null
+++ b/kernel/buf.h
@@ -0,0 +1,14 @@
+struct buf {
+ int flags;
+ uint dev;
+ uint blockno;
+ struct sleeplock lock;
+ uint refcnt;
+ struct buf *prev; // LRU cache list
+ struct buf *next;
+ struct buf *qnext; // disk queue
+ uchar data[BSIZE];
+};
+#define B_VALID 0x2 // buffer has been read from disk
+#define B_DIRTY 0x4 // buffer needs to be written to disk
+
diff --git a/kernel/console.c b/kernel/console.c
new file mode 100644
index 0000000..b20d4a9
--- /dev/null
+++ b/kernel/console.c
@@ -0,0 +1,271 @@
+// Console input and output.
+// Input is from the keyboard or serial port.
+// Output is written to the screen and serial port.
+
+#include <stdarg.h>
+
+#include "types.h"
+#include "param.h"
+#include "spinlock.h"
+#include "sleeplock.h"
+#include "fs.h"
+#include "file.h"
+#include "memlayout.h"
+#include "riscv.h"
+#include "defs.h"
+#include "proc.h"
+
+static void consputc(int);
+
+static volatile int panicked = 0;
+
+static struct {
+ struct spinlock lock;
+ int locking;
+} cons;
+
+static char digits[] = "0123456789abcdef";
+
+static void
+printint(int xx, int base, int sign)
+{
+ char buf[16];
+ int i;
+ uint x;
+
+ if(sign && (sign = xx < 0))
+ x = -xx;
+ else
+ x = xx;
+
+ i = 0;
+ do{
+ buf[i++] = digits[x % base];
+ }while((x /= base) != 0);
+
+ if(sign)
+ buf[i++] = '-';
+
+ while(--i >= 0)
+ consputc(buf[i]);
+}
+
+static void
+printptr(uint64 x) {
+ int i;
+ consputc('0');
+ consputc('x');
+ for (i = 0; i < (sizeof(uint64) * 2); i++, x <<= 4)
+ consputc(digits[x >> (sizeof(uint64) * 8 - 4)]);
+}
+
+
+//PAGEBREAK: 50
+
+// Print to the console. only understands %d, %x, %p, %s.
+void
+printf(char *fmt, ...)
+{
+ va_list ap;
+ int i, c, locking;
+ char *s;
+
+ locking = cons.locking;
+ if(locking)
+ acquire(&cons.lock);
+
+ if (fmt == 0)
+ panic("null fmt");
+
+ va_start(ap, fmt);
+ for(i = 0; (c = fmt[i] & 0xff) != 0; i++){
+ if(c != '%'){
+ consputc(c);
+ continue;
+ }
+ c = fmt[++i] & 0xff;
+ if(c == 0)
+ break;
+ switch(c){
+ case 'd':
+ printint(va_arg(ap, int), 10, 1);
+ break;
+ case 'x':
+ printint(va_arg(ap, int), 16, 1);
+ break;
+ case 'p':
+ printptr(va_arg(ap, uint64));
+ break;
+ case 's':
+ if((s = va_arg(ap, char*)) == 0)
+ s = "(null)";
+ for(; *s; s++)
+ consputc(*s);
+ break;
+ case '%':
+ consputc('%');
+ break;
+ default:
+ // Print unknown % sequence to draw attention.
+ consputc('%');
+ consputc(c);
+ break;
+ }
+ }
+
+ if(locking)
+ release(&cons.lock);
+}
+
+void
+panic(char *s)
+{
+ cons.locking = 0;
+ printf("panic: ");
+ printf(s);
+ printf("\n");
+ panicked = 1; // freeze other CPU
+ for(;;)
+ ;
+}
+
+#define BACKSPACE 0x100
+
+void
+consputc(int c)
+{
+ if(panicked){
+ for(;;)
+ ;
+ }
+
+ if(c == BACKSPACE){
+ uartputc('\b'); uartputc(' '); uartputc('\b');
+ } else
+ uartputc(c);
+}
+
+#define INPUT_BUF 128
+struct {
+ char buf[INPUT_BUF];
+ uint r; // Read index
+ uint w; // Write index
+ uint e; // Edit index
+} input;
+
+#define C(x) ((x)-'@') // Contro
+
+int
+consoleread(struct inode *ip, int user_dst, uint64 dst, int n)
+{
+ uint target;
+ int c;
+ char buf[1];
+
+ iunlock(ip);
+ target = n;
+ acquire(&cons.lock);
+ while(n > 0){
+ while(input.r == input.w){
+ if(myproc()->killed){
+ release(&cons.lock);
+ ilock(ip);
+ return -1;
+ }
+ sleep(&input.r, &cons.lock);
+ }
+ c = input.buf[input.r++ % INPUT_BUF];
+ if(c == C('D')){ // EOF
+ if(n < target){
+ // Save ^D for next time, to make sure
+ // caller gets a 0-byte result.
+ input.r--;
+ }
+ break;
+ }
+ buf[0] = c;
+ if(either_copyout(user_dst, dst, &buf[0], 1) == -1)
+ break;
+ dst++;
+ --n;
+ if(c == '\n')
+ break;
+ }
+ release(&cons.lock);
+ ilock(ip);
+
+ return target - n;
+}
+
+int
+consolewrite(struct inode *ip, int user_src, uint64 src, int n)
+{
+ int i;
+
+ iunlock(ip);
+ acquire(&cons.lock);
+ for(i = 0; i < n; i++){
+ char c;
+ if(either_copyin(&c, user_src, src+i, 1) == -1)
+ break;
+ consputc(c);
+ }
+ release(&cons.lock);
+ ilock(ip);
+
+ return n;
+}
+
+void
+consoleintr(int c)
+{
+ int doprocdump = 0;
+
+ acquire(&cons.lock);
+
+ switch(c){
+ case C('P'): // Process list.
+ // procdump() locks cons.lock indirectly; invoke later
+ doprocdump = 1;
+ break;
+ case C('U'): // Kill line.
+ while(input.e != input.w &&
+ input.buf[(input.e-1) % INPUT_BUF] != '\n'){
+ input.e--;
+ consputc(BACKSPACE);
+ }
+ break;
+ case C('H'): case '\x7f': // Backspace
+ if(input.e != input.w){
+ input.e--;
+ consputc(BACKSPACE);
+ }
+ break;
+ default:
+ if(c != 0 && input.e-input.r < INPUT_BUF){
+ c = (c == '\r') ? '\n' : c;
+ input.buf[input.e++ % INPUT_BUF] = c;
+ consputc(c);
+ if(c == '\n' || c == C('D') || input.e == input.r+INPUT_BUF){
+ input.w = input.e;
+ wakeup(&input.r);
+ }
+ }
+ break;
+ }
+
+ release(&cons.lock);
+
+ if(doprocdump)
+ procdump();
+}
+
+void
+consoleinit(void)
+{
+ initlock(&cons.lock, "console");
+
+ devsw[CONSOLE].write = consolewrite;
+ devsw[CONSOLE].read = consoleread;
+ cons.locking = 1;
+}
diff --git a/kernel/date.h b/kernel/date.h
new file mode 100644
index 0000000..94aec4b
--- /dev/null
+++ b/kernel/date.h
@@ -0,0 +1,8 @@
+struct rtcdate {
+ uint second;
+ uint minute;
+ uint hour;
+ uint day;
+ uint month;
+ uint year;
+};
diff --git a/kernel/defs.h b/kernel/defs.h
new file mode 100644
index 0000000..597e5b6
--- /dev/null
+++ b/kernel/defs.h
@@ -0,0 +1,205 @@
+struct buf;
+struct context;
+struct file;
+struct inode;
+struct pipe;
+struct proc;
+struct rtcdate;
+struct spinlock;
+struct sleeplock;
+struct stat;
+struct superblock;
+struct sysframe;
+
+// bio.c
+void binit(void);
+struct buf* bread(uint, uint);
+void brelse(struct buf*);
+void bwrite(struct buf*);
+
+// console.c
+void consoleinit(void);
+void printf(char*, ...);
+void consoleintr(int);
+void panic(char*) __attribute__((noreturn));
+
+// exec.c
+int exec(char*, char**);
+
+// file.c
+struct file* filealloc(void);
+void fileclose(struct file*);
+struct file* filedup(struct file*);
+void fileinit(void);
+int fileread(struct file*, uint64, int n);
+int filestat(struct file*, uint64 addr);
+int filewrite(struct file*, uint64, int n);
+
+// fs.c
+void readsb(int dev, struct superblock *sb);
+int dirlink(struct inode*, char*, uint);
+struct inode* dirlookup(struct inode*, char*, uint*);
+struct inode* ialloc(uint, short);
+struct inode* idup(struct inode*);
+void iinit(int dev);
+void ilock(struct inode*);
+void iput(struct inode*);
+void iunlock(struct inode*);
+void iunlockput(struct inode*);
+void iupdate(struct inode*);
+int namecmp(const char*, const char*);
+struct inode* namei(char*);
+struct inode* nameiparent(char*, char*);
+int readi(struct inode*, int, uint64, uint, uint);
+void stati(struct inode*, struct stat*);
+int writei(struct inode*, int, uint64, uint, uint);
+
+// ramdisk.c
+void ramdiskinit(void);
+void ramdiskintr(void);
+void ramdiskrw(struct buf*);
+
+// ioapic.c
+void ioapicenable(int irq, int cpu);
+extern uchar ioapicid;
+void ioapicinit(void);
+
+// kalloc.c
+void* kalloc(void);
+void kfree(void *);
+void kinit();
+
+// kbd.c
+void kbdintr(void);
+
+// lapic.c
+void cmostime(struct rtcdate *r);
+int lapicid(void);
+extern volatile uint* lapic;
+void lapiceoi(void);
+void lapicinit(void);
+void lapicstartap(uchar, uint);
+void microdelay(int);
+
+// log.c
+void initlog(int dev);
+void log_write(struct buf*);
+void begin_op();
+void end_op();
+
+// mp.c
+extern int ismp;
+void mpinit(void);
+
+// picirq.c
+void picenable(int);
+void picinit(void);
+
+// pipe.c
+int pipealloc(struct file**, struct file**);
+void pipeclose(struct pipe*, int);
+int piperead(struct pipe*, uint64, int);
+int pipewrite(struct pipe*, uint64, int);
+
+//PAGEBREAK: 16
+// proc.c
+int cpuid(void);
+void exit(void);
+int fork(void);
+int growproc(int);
+pagetable_t proc_pagetable(struct proc *);
+void proc_freepagetable(pagetable_t, uint64);
+int kill(int);
+struct cpu* mycpu(void);
+struct cpu* getmycpu(void);
+struct proc* myproc();
+void procinit(void);
+void scheduler(void) __attribute__((noreturn));
+void sched(void);
+void setproc(struct proc*);
+void sleep(void*, struct spinlock*);
+void userinit(void);
+int wait(void);
+void wakeup(void*);
+void yield(void);
+int either_copyout(int user_dst, uint64 dst, void *src, uint64 len);
+int either_copyin(void *dst, int user_src, uint64 src, uint64 len);
+void procdump(void);
+
+// swtch.S
+void swtch(struct context*, struct context*);
+
+// spinlock.c
+void acquire(struct spinlock*);
+int holding(struct spinlock*);
+void initlock(struct spinlock*, char*);
+void release(struct spinlock*);
+void push_off(void);
+void pop_off(void);
+
+// sleeplock.c
+void acquiresleep(struct sleeplock*);
+void releasesleep(struct sleeplock*);
+int holdingsleep(struct sleeplock*);
+void initsleeplock(struct sleeplock*, char*);
+
+// string.c
+int memcmp(const void*, const void*, uint);
+void* memmove(void*, const void*, uint);
+void* memset(void*, int, uint);
+char* safestrcpy(char*, const char*, int);
+int strlen(const char*);
+int strncmp(const char*, const char*, uint);
+char* strncpy(char*, const char*, int);
+
+// syscall.c
+int argint(int, int*);
+int argptr(int, uint64*, int);
+int argstr(int, char*, int);
+int argaddr(int, uint64 *);
+int fetchint(uint64, int*);
+int fetchstr(uint64, char*, int);
+int fetchaddr(uint64, uint64*);
+void syscall();
+
+// timer.c
+void timerinit(void);
+
+// trap.c
+extern uint ticks;
+void trapinit(void);
+void trapinithart(void);
+extern struct spinlock tickslock;
+void usertrapret(void);
+
+// uart.c
+void uartinit(void);
+void uartintr(void);
+void uartputc(int);
+int uartgetc(void);
+
+// vm.c
+void kvminit(void);
+void kvminithart(void);
+pagetable_t uvmcreate(void);
+void uvminit(pagetable_t, uchar *, uint);
+uint64 uvmalloc(pagetable_t, uint64, uint64);
+uint64 uvmdealloc(pagetable_t, uint64, uint64);
+void uvmcopy(pagetable_t, pagetable_t, uint64);
+void uvmfree(pagetable_t, uint64);
+void mappages(pagetable_t, uint64, uint64, uint64, int);
+void unmappages(pagetable_t, uint64, uint64, int);
+uint64 walkaddr(pagetable_t, uint64);
+int copyout(pagetable_t, uint64, char *, uint64);
+int copyin(pagetable_t, char *, uint64, uint64);
+int copyinstr(pagetable_t pagetable, char *dst, uint64 srcva, uint64 max);
+
+// plic.c
+void plicinit(void);
+void plicinithart(void);
+uint64 plic_pending(void);
+int plic_claim(void);
+void plic_complete(int);
+
+// number of elements in fixed-size array
+#define NELEM(x) (sizeof(x)/sizeof((x)[0]))
diff --git a/kernel/elf.h b/kernel/elf.h
new file mode 100644
index 0000000..84555fa
--- /dev/null
+++ b/kernel/elf.h
@@ -0,0 +1,42 @@
+// Format of an ELF executable file
+
+#define ELF_MAGIC 0x464C457FU // "\x7FELF" in little endian
+
+// File header
+struct elfhdr {
+ uint magic; // must equal ELF_MAGIC
+ uchar elf[12];
+ ushort type;
+ ushort machine;
+ uint version;
+ uint64 entry;
+ uint64 phoff;
+ uint64 shoff;
+ uint flags;
+ ushort ehsize;
+ ushort phentsize;
+ ushort phnum;
+ ushort shentsize;
+ ushort shnum;
+ ushort shstrndx;
+};
+
+// Program section header
+struct proghdr {
+ uint32 type;
+ uint32 flags;
+ uint64 off;
+ uint64 vaddr;
+ uint64 paddr;
+ uint64 filesz;
+ uint64 memsz;
+ uint64 align;
+};
+
+// Values for Proghdr type
+#define ELF_PROG_LOAD 1
+
+// Flag bits for Proghdr flags
+#define ELF_PROG_FLAG_EXEC 1
+#define ELF_PROG_FLAG_WRITE 2
+#define ELF_PROG_FLAG_READ 4
diff --git a/kernel/entry.S b/kernel/entry.S
new file mode 100644
index 0000000..b3d2c55
--- /dev/null
+++ b/kernel/entry.S
@@ -0,0 +1,25 @@
+ # qemu -kernel starts at 0x1000. the instructions
+ # there seem to be provided by qemu, as if it
+ # were a ROM. the code at 0x1000 jumps to
+ # 0x8000000, the _start function here,
+ # in machine mode.
+.section .data
+.globl stack0
+.section .text
+.globl mstart
+.section .text
+.globl _entry
+_entry:
+ # set up a stack for C.
+ # stack0 is declared in start,
+ # with 4096 bytes per CPU.
+ la sp, stack0
+ li a0, 1024*4
+ csrr a1, mhartid
+ addi a1, a1, 1
+ mul a0, a0, a1
+ add sp, sp, a0
+ # jump to mstart() in start.c
+ call mstart
+junk:
+ j junk
diff --git a/kernel/exec.c b/kernel/exec.c
new file mode 100644
index 0000000..c9af395
--- /dev/null
+++ b/kernel/exec.c
@@ -0,0 +1,149 @@
+#include "types.h"
+#include "param.h"
+#include "memlayout.h"
+#include "riscv.h"
+#include "proc.h"
+#include "defs.h"
+#include "elf.h"
+
+static int loadseg(pde_t *pgdir, uint64 addr, struct inode *ip, uint offset, uint sz);
+
+int
+exec(char *path, char **argv)
+{
+ char *s, *last;
+ int i, off;
+ uint64 argc, sz, sp, ustack[MAXARG+1], stackbase;
+ struct elfhdr elf;
+ struct inode *ip;
+ struct proghdr ph;
+ pagetable_t pagetable = 0, oldpagetable;
+ struct proc *p = myproc();
+ uint64 oldsz = p->sz;
+
+ begin_op();
+
+ if((ip = namei(path)) == 0){
+ end_op();
+ return -1;
+ }
+ ilock(ip);
+
+ // Check ELF header
+ if(readi(ip, 0, (uint64)&elf, 0, sizeof(elf)) != sizeof(elf))
+ goto bad;
+ if(elf.magic != ELF_MAGIC)
+ goto bad;
+
+ if((pagetable = proc_pagetable(p)) == 0)
+ goto bad;
+
+ // Load program into memory.
+ sz = 0;
+ for(i=0, off=elf.phoff; i<elf.phnum; i++, off+=sizeof(ph)){
+ if(readi(ip, 0, (uint64)&ph, off, sizeof(ph)) != sizeof(ph))
+ goto bad;
+ if(ph.type != ELF_PROG_LOAD)
+ continue;
+ if(ph.memsz < ph.filesz)
+ goto bad;
+ if(ph.vaddr + ph.memsz < ph.vaddr)
+ goto bad;
+ if((sz = uvmalloc(pagetable, sz, ph.vaddr + ph.memsz)) == 0)
+ goto bad;
+ if(ph.vaddr % PGSIZE != 0)
+ goto bad;
+ if(loadseg(pagetable, ph.vaddr, ip, ph.off, ph.filesz) < 0)
+ goto bad;
+ }
+ iunlockput(ip);
+ end_op();
+ ip = 0;
+
+ // Allocate two pages at the next page boundary.
+ // Use the second as the user stack.
+ sz = PGROUNDUP(sz);
+ if((sz = uvmalloc(pagetable, sz, sz + 2*PGSIZE)) == 0)
+ goto bad;
+ sp = sz;
+ stackbase = sp - PGSIZE;
+
+ // Push argument strings, prepare rest of stack in ustack.
+ for(argc = 0; argv[argc]; argc++) {
+ if(argc >= MAXARG)
+ goto bad;
+ sp -= strlen(argv[argc]) + 1;
+ sp -= sp % 16; // riscv sp must be 16-byte aligned
+ if(sp < stackbase)
+ goto bad;
+ if(copyout(pagetable, sp, argv[argc], strlen(argv[argc]) + 1) < 0)
+ goto bad;
+ ustack[argc] = sp;
+ }
+ ustack[argc] = 0;
+
+ // push the array of argv[] pointers.
+ sp -= (argc+1) * sizeof(uint64);
+ sp -= sp % 16;
+ if(sp < stackbase)
+ goto bad;
+ if(copyout(pagetable, sp, (char *)ustack, (argc+1)*sizeof(uint64)) < 0)
+ goto bad;
+
+ // arguments to user main(argc, argv)
+ // argc is returned via the system call return
+ // value, which goes in a0.
+ p->tf->a1 = sp;
+
+ // Save program name for debugging.
+ for(last=s=path; *s; s++)
+ if(*s == '/')
+ last = s+1;
+ safestrcpy(p->name, last, sizeof(p->name));
+
+ // Commit to the user image.
+ oldpagetable = p->pagetable;
+ p->pagetable = pagetable;
+ p->sz = sz;
+ p->tf->epc = elf.entry; // initial program counter = main
+ p->tf->sp = sp; // initial stack pointer
+ proc_freepagetable(oldpagetable, oldsz);
+ return argc; // this ends up in a0, the first argument to main(argc, argv)
+
+ bad:
+ if(pagetable)
+ proc_freepagetable(pagetable, sz);
+ if(ip){
+ iunlockput(ip);
+ end_op();
+ }
+ return -1;
+}
+
+// Load a program segment into pagetable at virtual address va.
+// va must be page-aligned
+// and the pages from va to va+sz must already be mapped.
+// Returns 0 on success, -1 on failure.
+static int
+loadseg(pagetable_t pagetable, uint64 va, struct inode *ip, uint offset, uint sz)
+{
+ uint i, n;
+ uint64 pa;
+
+ if((va % PGSIZE) != 0)
+ panic("loadseg: va must be page aligned");
+
+ for(i = 0; i < sz; i += PGSIZE){
+ pa = walkaddr(pagetable, va + i);
+ if(pa == 0)
+ panic("loadseg: address should exist");
+ if(sz - i < PGSIZE)
+ n = sz - i;
+ else
+ n = PGSIZE;
+ if(readi(ip, 0, (uint64)pa, offset+i, n) != n)
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/kernel/fcntl.h b/kernel/fcntl.h
new file mode 100644
index 0000000..d565483
--- /dev/null
+++ b/kernel/fcntl.h
@@ -0,0 +1,4 @@
+#define O_RDONLY 0x000
+#define O_WRONLY 0x001
+#define O_RDWR 0x002
+#define O_CREATE 0x200
diff --git a/kernel/file.c b/kernel/file.c
new file mode 100644
index 0000000..6f27f22
--- /dev/null
+++ b/kernel/file.c
@@ -0,0 +1,175 @@
+//
+// Support functions for system calls that involve file descriptors.
+//
+
+#include "types.h"
+#include "riscv.h"
+#include "defs.h"
+#include "param.h"
+#include "fs.h"
+#include "spinlock.h"
+#include "sleeplock.h"
+#include "file.h"
+#include "stat.h"
+#include "proc.h"
+
+struct devsw devsw[NDEV];
+struct {
+ struct spinlock lock;
+ struct file file[NFILE];
+} ftable;
+
+void
+fileinit(void)
+{
+ initlock(&ftable.lock, "ftable");
+}
+
+// Allocate a file structure.
+struct file*
+filealloc(void)
+{
+ struct file *f;
+
+ acquire(&ftable.lock);
+ for(f = ftable.file; f < ftable.file + NFILE; f++){
+ if(f->ref == 0){
+ f->ref = 1;
+ release(&ftable.lock);
+ return f;
+ }
+ }
+ release(&ftable.lock);
+ return 0;
+}
+
+// Increment ref count for file f.
+struct file*
+filedup(struct file *f)
+{
+ acquire(&ftable.lock);
+ if(f->ref < 1)
+ panic("filedup");
+ f->ref++;
+ release(&ftable.lock);
+ return f;
+}
+
+// Close file f. (Decrement ref count, close when reaches 0.)
+void
+fileclose(struct file *f)
+{
+ struct file ff;
+
+ acquire(&ftable.lock);
+ if(f->ref < 1)
+ panic("fileclose");
+ if(--f->ref > 0){
+ release(&ftable.lock);
+ return;
+ }
+ ff = *f;
+ f->ref = 0;
+ f->type = FD_NONE;
+ release(&ftable.lock);
+
+ if(ff.type == FD_PIPE)
+ pipeclose(ff.pipe, ff.writable);
+ else if(ff.type == FD_INODE){
+ begin_op();
+ iput(ff.ip);
+ end_op();
+ }
+}
+
+// Get metadata about file f.
+// addr is a user virtual address, pointing to a struct stat.
+int
+filestat(struct file *f, uint64 addr)
+{
+ struct proc *p = myproc();
+ struct stat st;
+
+ if(f->type == FD_INODE){
+ ilock(f->ip);
+ stati(f->ip, &st);
+ iunlock(f->ip);
+ if(copyout(p->pagetable, addr, (char *)&st, sizeof(st)) < 0)
+ return -1;
+ return 0;
+ }
+ return -1;
+}
+
+// Read from file f.
+// addr is a user virtual address.
+int
+fileread(struct file *f, uint64 addr, int n)
+{
+ int r = 0;
+
+ if(f->readable == 0)
+ return -1;
+
+ if(f->type == FD_PIPE){
+ r = piperead(f->pipe, addr, n);
+ } else if(f->type == FD_INODE){
+ ilock(f->ip);
+ if((r = readi(f->ip, 1, addr, f->off, n)) > 0)
+ f->off += r;
+ iunlock(f->ip);
+ } else {
+ panic("fileread");
+ }
+
+ return r;
+}
+
+//PAGEBREAK!
+// Write to file f.
+// addr is a user virtual address.
+int
+filewrite(struct file *f, uint64 addr, int n)
+{
+ int r, ret = 0;
+
+ if(f->writable == 0)
+ return -1;
+
+ if(f->type == FD_PIPE){
+ ret = pipewrite(f->pipe, addr, n);
+ } else if(f->type == FD_INODE){
+ // write a few blocks at a time to avoid exceeding
+ // the maximum log transaction size, including
+ // i-node, indirect block, allocation blocks,
+ // and 2 blocks of slop for non-aligned writes.
+ // this really belongs lower down, since writei()
+ // might be writing a device like the console.
+ int max = ((MAXOPBLOCKS-1-1-2) / 2) * 512;
+ int i = 0;
+ while(i < n){
+ int n1 = n - i;
+ if(n1 > max)
+ n1 = max;
+
+ begin_op();
+ ilock(f->ip);
+ if ((r = writei(f->ip, 1, addr + i, f->off, n1)) > 0)
+ f->off += r;
+ iunlock(f->ip);
+ end_op();
+
+ if(r < 0)
+ break;
+ if(r != n1)
+ panic("short filewrite");
+ i += r;
+ }
+ ret = (i == n ? n : -1);
+ } else {
+ panic("filewrite");
+ }
+
+ return ret;
+}
+
diff --git a/kernel/file.h b/kernel/file.h
new file mode 100644
index 0000000..f28018f
--- /dev/null
+++ b/kernel/file.h
@@ -0,0 +1,37 @@
+struct file {
+ enum { FD_NONE, FD_PIPE, FD_INODE } type;
+ int ref; // reference count
+ char readable;
+ char writable;
+ struct pipe *pipe;
+ struct inode *ip;
+ uint off;
+};
+
+
+// in-memory copy of an inode
+struct inode {
+ uint dev; // Device number
+ uint inum; // Inode number
+ int ref; // Reference count
+ struct sleeplock lock; // protects everything below here
+ int valid; // inode has been read from disk?
+
+ short type; // copy of disk inode
+ short major;
+ short minor;
+ short nlink;
+ uint size;
+ uint addrs[NDIRECT+1];
+};
+
+// table mapping major device number to
+// device functions
+struct devsw {
+ int (*read)(struct inode*, int, uint64, int);
+ int (*write)(struct inode*, int, uint64, int);
+};
+
+extern struct devsw devsw[];
+
+#define CONSOLE 1
diff --git a/kernel/fs.c b/kernel/fs.c
new file mode 100644
index 0000000..ebe377a
--- /dev/null
+++ b/kernel/fs.c
@@ -0,0 +1,681 @@
+// File system implementation. Five layers:
+// + Blocks: allocator for raw disk blocks.
+// + Log: crash recovery for multi-step updates.
+// + Files: inode allocator, reading, writing, metadata.
+// + Directories: inode with special contents (list of other inodes!)
+// + Names: paths like /usr/rtm/xv6/fs.c for convenient naming.
+//
+// This file contains the low-level file system manipulation
+// routines. The (higher-level) system call implementations
+// are in sysfile.c.
+
+#include "types.h"
+#include "riscv.h"
+#include "defs.h"
+#include "param.h"
+#include "stat.h"
+#include "proc.h"
+#include "spinlock.h"
+#include "sleeplock.h"
+#include "fs.h"
+#include "buf.h"
+#include "file.h"
+
+#define min(a, b) ((a) < (b) ? (a) : (b))
+static void itrunc(struct inode*);
+// there should be one superblock per disk device, but we run with
+// only one device
+struct superblock sb;
+
+// Read the super block.
+void
+readsb(int dev, struct superblock *sb)
+{
+ struct buf *bp;
+
+ bp = bread(dev, 1);
+ memmove(sb, bp->data, sizeof(*sb));
+ brelse(bp);
+}
+
+// Zero a block.
+static void
+bzero(int dev, int bno)
+{
+ struct buf *bp;
+
+ bp = bread(dev, bno);
+ memset(bp->data, 0, BSIZE);
+ log_write(bp);
+ brelse(bp);
+}
+
+// Blocks.
+
+// Allocate a zeroed disk block.
+static uint
+balloc(uint dev)
+{
+ int b, bi, m;
+ struct buf *bp;
+
+ bp = 0;
+ for(b = 0; b < sb.size; b += BPB){
+ bp = bread(dev, BBLOCK(b, sb));
+ for(bi = 0; bi < BPB && b + bi < sb.size; bi++){
+ m = 1 << (bi % 8);
+ if((bp->data[bi/8] & m) == 0){ // Is block free?
+ bp->data[bi/8] |= m; // Mark block in use.
+ log_write(bp);
+ brelse(bp);
+ bzero(dev, b + bi);
+ return b + bi;
+ }
+ }
+ brelse(bp);
+ }
+ panic("balloc: out of blocks");
+}
+
+// Free a disk block.
+static void
+bfree(int dev, uint b)
+{
+ struct buf *bp;
+ int bi, m;
+
+ readsb(dev, &sb);
+ bp = bread(dev, BBLOCK(b, sb));
+ bi = b % BPB;
+ m = 1 << (bi % 8);
+ if((bp->data[bi/8] & m) == 0)
+ panic("freeing free block");
+ bp->data[bi/8] &= ~m;
+ log_write(bp);
+ brelse(bp);
+}
+
+// Inodes.
+//
+// An inode describes a single unnamed file.
+// The inode disk structure holds metadata: the file's type,
+// its size, the number of links referring to it, and the
+// list of blocks holding the file's content.
+//
+// The inodes are laid out sequentially on disk at
+// sb.startinode. Each inode has a number, indicating its
+// position on the disk.
+//
+// The kernel keeps a cache of in-use inodes in memory
+// to provide a place for synchronizing access
+// to inodes used by multiple processes. The cached
+// inodes include book-keeping information that is
+// not stored on disk: ip->ref and ip->valid.
+//
+// An inode and its in-memory representation go through a
+// sequence of states before they can be used by the
+// rest of the file system code.
+//
+// * Allocation: an inode is allocated if its type (on disk)
+// is non-zero. ialloc() allocates, and iput() frees if
+// the reference and link counts have fallen to zero.
+//
+// * Referencing in cache: an entry in the inode cache
+// is free if ip->ref is zero. Otherwise ip->ref tracks
+// the number of in-memory pointers to the entry (open
+// files and current directories). iget() finds or
+// creates a cache entry and increments its ref; iput()
+// decrements ref.
+//
+// * Valid: the information (type, size, &c) in an inode
+// cache entry is only correct when ip->valid is 1.
+// ilock() reads the inode from
+// the disk and sets ip->valid, while iput() clears
+// ip->valid if ip->ref has fallen to zero.
+//
+// * Locked: file system code may only examine and modify
+// the information in an inode and its content if it
+// has first locked the inode.
+//
+// Thus a typical sequence is:
+// ip = iget(dev, inum)
+// ilock(ip)
+// ... examine and modify ip->xxx ...
+// iunlock(ip)
+// iput(ip)
+//
+// ilock() is separate from iget() so that system calls can
+// get a long-term reference to an inode (as for an open file)
+// and only lock it for short periods (e.g., in read()).
+// The separation also helps avoid deadlock and races during
+// pathname lookup. iget() increments ip->ref so that the inode
+// stays cached and pointers to it remain valid.
+//
+// Many internal file system functions expect the caller to
+// have locked the inodes involved; this lets callers create
+// multi-step atomic operations.
+//
+// The icache.lock spin-lock protects the allocation of icache
+// entries. Since ip->ref indicates whether an entry is free,
+// and ip->dev and ip->inum indicate which i-node an entry
+// holds, one must hold icache.lock while using any of those fields.
+//
+// An ip->lock sleep-lock protects all ip-> fields other than ref,
+// dev, and inum. One must hold ip->lock in order to
+// read or write that inode's ip->valid, ip->size, ip->type, &c.
+
+struct {
+ struct spinlock lock;
+ struct inode inode[NINODE];
+} icache;
+
+void
+iinit(int dev)
+{
+ int i = 0;
+
+ initlock(&icache.lock, "icache");
+ for(i = 0; i < NINODE; i++) {
+ initsleeplock(&icache.inode[i].lock, "inode");
+ }
+
+ readsb(dev, &sb);
+ if(sb.magic != FSMAGIC)
+ panic("invalid file system");
+}
+
+static struct inode* iget(uint dev, uint inum);
+
+//PAGEBREAK!
+// Allocate an inode on device dev.
+// Mark it as allocated by giving it type type.
+// Returns an unlocked but allocated and referenced inode.
+struct inode*
+ialloc(uint dev, short type)
+{
+ int inum;
+ struct buf *bp;
+ struct dinode *dip;
+
+ for(inum = 1; inum < sb.ninodes; inum++){
+ bp = bread(dev, IBLOCK(inum, sb));
+ dip = (struct dinode*)bp->data + inum%IPB;
+ if(dip->type == 0){ // a free inode
+ memset(dip, 0, sizeof(*dip));
+ dip->type = type;
+ log_write(bp); // mark it allocated on the disk
+ brelse(bp);
+ return iget(dev, inum);
+ }
+ brelse(bp);
+ }
+ panic("ialloc: no inodes");
+}
+
+// Copy a modified in-memory inode to disk.
+// Must be called after every change to an ip->xxx field
+// that lives on disk, since i-node cache is write-through.
+// Caller must hold ip->lock.
+void
+iupdate(struct inode *ip)
+{
+ struct buf *bp;
+ struct dinode *dip;
+
+ bp = bread(ip->dev, IBLOCK(ip->inum, sb));
+ dip = (struct dinode*)bp->data + ip->inum%IPB;
+ dip->type = ip->type;
+ dip->major = ip->major;
+ dip->minor = ip->minor;
+ dip->nlink = ip->nlink;
+ dip->size = ip->size;
+ memmove(dip->addrs, ip->addrs, sizeof(ip->addrs));
+ log_write(bp);
+ brelse(bp);
+}
+
+// Find the inode with number inum on device dev
+// and return the in-memory copy. Does not lock
+// the inode and does not read it from disk.
+static struct inode*
+iget(uint dev, uint inum)
+{
+ struct inode *ip, *empty;
+
+ acquire(&icache.lock);
+
+ // Is the inode already cached?
+ empty = 0;
+ for(ip = &icache.inode[0]; ip < &icache.inode[NINODE]; ip++){
+ if(ip->ref > 0 && ip->dev == dev && ip->inum == inum){
+ ip->ref++;
+ release(&icache.lock);
+ return ip;
+ }
+ if(empty == 0 && ip->ref == 0) // Remember empty slot.
+ empty = ip;
+ }
+
+ // Recycle an inode cache entry.
+ if(empty == 0)
+ panic("iget: no inodes");
+
+ ip = empty;
+ ip->dev = dev;
+ ip->inum = inum;
+ ip->ref = 1;
+ ip->valid = 0;
+ release(&icache.lock);
+
+ return ip;
+}
+
+// Increment reference count for ip.
+// Returns ip to enable ip = idup(ip1) idiom.
+struct inode*
+idup(struct inode *ip)
+{
+ acquire(&icache.lock);
+ ip->ref++;
+ release(&icache.lock);
+ return ip;
+}
+
+// Lock the given inode.
+// Reads the inode from disk if necessary.
+void
+ilock(struct inode *ip)
+{
+ struct buf *bp;
+ struct dinode *dip;
+
+ if(ip == 0 || ip->ref < 1)
+ panic("ilock");
+
+ acquiresleep(&ip->lock);
+
+ if(ip->valid == 0){
+ bp = bread(ip->dev, IBLOCK(ip->inum, sb));
+ dip = (struct dinode*)bp->data + ip->inum%IPB;
+ ip->type = dip->type;
+ ip->major = dip->major;
+ ip->minor = dip->minor;
+ ip->nlink = dip->nlink;
+ ip->size = dip->size;
+ memmove(ip->addrs, dip->addrs, sizeof(ip->addrs));
+ brelse(bp);
+ ip->valid = 1;
+ if(ip->type == 0)
+ panic("ilock: no type");
+ }
+}
+
+// Unlock the given inode.
+void
+iunlock(struct inode *ip)
+{
+ if(ip == 0 || !holdingsleep(&ip->lock) || ip->ref < 1)
+ panic("iunlock");
+
+ releasesleep(&ip->lock);
+}
+
+// Drop a reference to an in-memory inode.
+// If that was the last reference, the inode cache entry can
+// be recycled.
+// If that was the last reference and the inode has no links
+// to it, free the inode (and its content) on disk.
+// All calls to iput() must be inside a transaction in
+// case it has to free the inode.
+void
+iput(struct inode *ip)
+{
+ acquire(&icache.lock);
+
+ if(ip->ref == 1 && ip->valid && ip->nlink == 0){
+ // inode has no links and no other references: truncate and free.
+
+ // ip->ref == 1 means no other process can have ip locked,
+ // so this acquiresleep() won't block (or deadlock).
+ acquiresleep(&ip->lock);
+
+ release(&icache.lock);
+
+ itrunc(ip);
+ ip->type = 0;
+ iupdate(ip);
+ ip->valid = 0;
+
+ releasesleep(&ip->lock);
+
+ acquire(&icache.lock);
+ }
+
+ ip->ref--;
+ release(&icache.lock);
+}
+
+// Common idiom: unlock, then put.
+void
+iunlockput(struct inode *ip)
+{
+ iunlock(ip);
+ iput(ip);
+}
+
+//PAGEBREAK!
+// Inode content
+//
+// The content (data) associated with each inode is stored
+// in blocks on the disk. The first NDIRECT block numbers
+// are listed in ip->addrs[]. The next NINDIRECT blocks are
+// listed in block ip->addrs[NDIRECT].
+
+// Return the disk block address of the nth block in inode ip.
+// If there is no such block, bmap allocates one.
+static uint
+bmap(struct inode *ip, uint bn)
+{
+ uint addr, *a;
+ struct buf *bp;
+
+ if(bn < NDIRECT){
+ if((addr = ip->addrs[bn]) == 0)
+ ip->addrs[bn] = addr = balloc(ip->dev);
+ return addr;
+ }
+ bn -= NDIRECT;
+
+ if(bn < NINDIRECT){
+ // Load indirect block, allocating if necessary.
+ if((addr = ip->addrs[NDIRECT]) == 0)
+ ip->addrs[NDIRECT] = addr = balloc(ip->dev);
+ bp = bread(ip->dev, addr);
+ a = (uint*)bp->data;
+ if((addr = a[bn]) == 0){
+ a[bn] = addr = balloc(ip->dev);
+ log_write(bp);
+ }
+ brelse(bp);
+ return addr;
+ }
+
+ panic("bmap: out of range");
+}
+
+// Truncate inode (discard contents).
+// Only called when the inode has no links
+// to it (no directory entries referring to it)
+// and has no in-memory reference to it (is
+// not an open file or current directory).
+static void
+itrunc(struct inode *ip)
+{
+ int i, j;
+ struct buf *bp;
+ uint *a;
+
+ for(i = 0; i < NDIRECT; i++){
+ if(ip->addrs[i]){
+ bfree(ip->dev, ip->addrs[i]);
+ ip->addrs[i] = 0;
+ }
+ }
+
+ if(ip->addrs[NDIRECT]){
+ bp = bread(ip->dev, ip->addrs[NDIRECT]);
+ a = (uint*)bp->data;
+ for(j = 0; j < NINDIRECT; j++){
+ if(a[j])
+ bfree(ip->dev, a[j]);
+ }
+ brelse(bp);
+ bfree(ip->dev, ip->addrs[NDIRECT]);
+ ip->addrs[NDIRECT] = 0;
+ }
+
+ ip->size = 0;
+ iupdate(ip);
+}
+
+// Copy stat information from inode.
+// Caller must hold ip->lock.
+void
+stati(struct inode *ip, struct stat *st)
+{
+ st->dev = ip->dev;
+ st->ino = ip->inum;
+ st->type = ip->type;
+ st->nlink = ip->nlink;
+ st->size = ip->size;
+}
+
+//PAGEBREAK!
+// Read data from inode.
+// Caller must hold ip->lock.
+// If user_dst==1, then dst is a user virtual address;
+// otherwise, dst is a kernel address.
+int
+readi(struct inode *ip, int user_dst, uint64 dst, uint off, uint n)
+{
+ uint tot, m;
+ struct buf *bp;
+
+ if(ip->type == T_DEV){
+ if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].read)
+ return -1;
+ return devsw[ip->major].read(ip, user_dst, dst, n);
+ }
+
+ if(off > ip->size || off + n < off)
+ return -1;
+ if(off + n > ip->size)
+ n = ip->size - off;
+
+ for(tot=0; tot<n; tot+=m, off+=m, dst+=m){
+ bp = bread(ip->dev, bmap(ip, off/BSIZE));
+ m = min(n - tot, BSIZE - off%BSIZE);
+ if(either_copyout(user_dst, dst, bp->data + (off % BSIZE), m) == -1)
+ break;
+ brelse(bp);
+ }
+ return n;
+}
+
+// PAGEBREAK!
+// Write data to inode.
+// Caller must hold ip->lock.
+// If user_src==1, then src is a user virtual address;
+// otherwise, src is a kernel address.
+int
+writei(struct inode *ip, int user_src, uint64 src, uint off, uint n)
+{
+ uint tot, m;
+ struct buf *bp;
+
+ if(ip->type == T_DEV){
+ if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].write){
+ return -1;
+ }
+ return devsw[ip->major].write(ip, user_src, src, n);
+ }
+
+ if(off > ip->size || off + n < off)
+ return -1;
+ if(off + n > MAXFILE*BSIZE)
+ return -1;
+
+ for(tot=0; tot<n; tot+=m, off+=m, src+=m){
+ bp = bread(ip->dev, bmap(ip, off/BSIZE));
+ m = min(n - tot, BSIZE - off%BSIZE);
+ if(either_copyin(bp->data + (off % BSIZE), user_src, src, m) == -1)
+ break;
+ log_write(bp);
+ brelse(bp);
+ }
+
+ if(n > 0 && off > ip->size){
+ ip->size = off;
+ iupdate(ip);
+ }
+ return n;
+}
+
+//PAGEBREAK!
+// Directories
+
+int
+namecmp(const char *s, const char *t)
+{
+ return strncmp(s, t, DIRSIZ);
+}
+
+// Look for a directory entry in a directory.
+// If found, set *poff to byte offset of entry.
+struct inode*
+dirlookup(struct inode *dp, char *name, uint *poff)
+{
+ uint off, inum;
+ struct dirent de;
+
+ if(dp->type != T_DIR)
+ panic("dirlookup not DIR");
+
+ for(off = 0; off < dp->size; off += sizeof(de)){
+ if(readi(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de))
+ panic("dirlookup read");
+ if(de.inum == 0)
+ continue;
+ if(namecmp(name, de.name) == 0){
+ // entry matches path element
+ if(poff)
+ *poff = off;
+ inum = de.inum;
+ return iget(dp->dev, inum);
+ }
+ }
+
+ return 0;
+}
+
+// Write a new directory entry (name, inum) into the directory dp.
+int
+dirlink(struct inode *dp, char *name, uint inum)
+{
+ int off;
+ struct dirent de;
+ struct inode *ip;
+
+ // Check that name is not present.
+ if((ip = dirlookup(dp, name, 0)) != 0){
+ iput(ip);
+ return -1;
+ }
+
+ // Look for an empty dirent.
+ for(off = 0; off < dp->size; off += sizeof(de)){
+ if(readi(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de))
+ panic("dirlink read");
+ if(de.inum == 0)
+ break;
+ }
+
+ strncpy(de.name, name, DIRSIZ);
+ de.inum = inum;
+ if(writei(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de))
+ panic("dirlink");
+
+ return 0;
+}
+
+//PAGEBREAK!
+// Paths
+
+// Copy the next path element from path into name.
+// Return a pointer to the element following the copied one.
+// The returned path has no leading slashes,
+// so the caller can check *path=='\0' to see if the name is the last one.
+// If no name to remove, return 0.
+//
+// Examples:
+// skipelem("a/bb/c", name) = "bb/c", setting name = "a"
+// skipelem("///a//bb", name) = "bb", setting name = "a"
+// skipelem("a", name) = "", setting name = "a"
+// skipelem("", name) = skipelem("////", name) = 0
+//
+static char*
+skipelem(char *path, char *name)
+{
+ char *s;
+ int len;
+
+ while(*path == '/')
+ path++;
+ if(*path == 0)
+ return 0;
+ s = path;
+ while(*path != '/' && *path != 0)
+ path++;
+ len = path - s;
+ if(len >= DIRSIZ)
+ memmove(name, s, DIRSIZ);
+ else {
+ memmove(name, s, len);
+ name[len] = 0;
+ }
+ while(*path == '/')
+ path++;
+ return path;
+}
+
+// Look up and return the inode for a path name.
+// If parent != 0, return the inode for the parent and copy the final
+// path element into name, which must have room for DIRSIZ bytes.
+// Must be called inside a transaction since it calls iput().
+static struct inode*
+namex(char *path, int nameiparent, char *name)
+{
+ struct inode *ip, *next;
+
+ if(*path == '/')
+ ip = iget(ROOTDEV, ROOTINO);
+ else
+ ip = idup(myproc()->cwd);
+
+ while((path = skipelem(path, name)) != 0){
+ ilock(ip);
+ if(ip->type != T_DIR){
+ iunlockput(ip);
+ return 0;
+ }
+ if(nameiparent && *path == '\0'){
+ // Stop one level early.
+ iunlock(ip);
+ return ip;
+ }
+ if((next = dirlookup(ip, name, 0)) == 0){
+ iunlockput(ip);
+ return 0;
+ }
+ iunlockput(ip);
+ ip = next;
+ }
+ if(nameiparent){
+ iput(ip);
+ return 0;
+ }
+ return ip;
+}
+
+struct inode*
+namei(char *path)
+{
+ char name[DIRSIZ];
+ return namex(path, 0, name);
+}
+
+struct inode*
+nameiparent(char *path, char *name)
+{
+ return namex(path, 1, name);
+}
diff --git a/kernel/fs.h b/kernel/fs.h
new file mode 100644
index 0000000..bc0805f
--- /dev/null
+++ b/kernel/fs.h
@@ -0,0 +1,60 @@
+// On-disk file system format.
+// Both the kernel and user programs use this header file.
+
+
+#define ROOTINO 1 // root i-number
+#define BSIZE 1024 // block size
+
+// Disk layout:
+// [ boot block | super block | log | inode blocks |
+// free bit map | data blocks]
+//
+// mkfs computes the super block and builds an initial file system. The
+// super block describes the disk layout:
+struct superblock {
+ uint magic; // Must be FSMAGIC
+ uint size; // Size of file system image (blocks)
+ uint nblocks; // Number of data blocks
+ uint ninodes; // Number of inodes.
+ uint nlog; // Number of log blocks
+ uint logstart; // Block number of first log block
+ uint inodestart; // Block number of first inode block
+ uint bmapstart; // Block number of first free map block
+};
+
+#define FSMAGIC 0x10203040
+
+#define NDIRECT 12
+#define NINDIRECT (BSIZE / sizeof(uint))
+#define MAXFILE (NDIRECT + NINDIRECT)
+
+// On-disk inode structure
+struct dinode {
+ short type; // File type
+ short major; // Major device number (T_DEV only)
+ short minor; // Minor device number (T_DEV only)
+ short nlink; // Number of links to inode in file system
+ uint size; // Size of file (bytes)
+ uint addrs[NDIRECT+1]; // Data block addresses
+};
+
+// Inodes per block.
+#define IPB (BSIZE / sizeof(struct dinode))
+
+// Block containing inode i
+#define IBLOCK(i, sb) ((i) / IPB + sb.inodestart)
+
+// Bitmap bits per block
+#define BPB (BSIZE*8)
+
+// Block of free map containing bit for block b
+#define BBLOCK(b, sb) ((b)/BPB + sb.bmapstart)
+
+// Directory is a file containing a sequence of dirent structures.
+#define DIRSIZ 14
+
+struct dirent {
+ ushort inum;
+ char name[DIRSIZ];
+};
+
diff --git a/kernel/kalloc.c b/kernel/kalloc.c
new file mode 100644
index 0000000..1ed1c49
--- /dev/null
+++ b/kernel/kalloc.c
@@ -0,0 +1,80 @@
+// Physical memory allocator, intended to allocate
+// memory for user processes, kernel stacks, page table pages,
+// and pipe buffers. Allocates 4096-byte pages.
+
+#include "types.h"
+#include "param.h"
+#include "memlayout.h"
+#include "spinlock.h"
+#include "riscv.h"
+#include "defs.h"
+
+void freerange(void *pa_start, void *pa_end);
+
+extern char end[]; // first address after kernel loaded from ELF file
+ // defined by the kernel linker script in kernel.ld
+
+struct run {
+ struct run *next;
+};
+
+struct {
+ struct spinlock lock;
+ struct run *freelist;
+} kmem;
+
+void
+kinit()
+{
+ initlock(&kmem.lock, "kmem");
+ freerange(end, (void*)PHYSTOP);
+}
+
+void
+freerange(void *pa_start, void *pa_end)
+{
+ char *p;
+ p = (char*)PGROUNDUP((uint64)pa_start);
+ for(; p + PGSIZE <= (char*)pa_end; p += PGSIZE)
+ kfree(p);
+}
+//PAGEBREAK: 21
+// Free the page of physical memory pointed at by v,
+// which normally should have been returned by a
+// call to kalloc(). (The exception is when
+// initializing the allocator; see kinit above.)
+void
+kfree(void *pa)
+{
+ struct run *r;
+
+ if(((uint64)pa % PGSIZE) != 0 || (char*)pa < end || (uint64)pa >= PHYSTOP)
+ panic("kfree");
+
+ // Fill with junk to catch dangling refs.
+ memset(pa, 1, PGSIZE);
+
+ acquire(&kmem.lock);
+ r = (struct run*)pa;
+ r->next = kmem.freelist;
+ kmem.freelist = r;
+ release(&kmem.lock);
+}
+
+// Allocate one 4096-byte page of physical memory.
+// Returns a pointer that the kernel can use.
+// Returns 0 if the memory cannot be allocated.
+void *
+kalloc(void)
+{
+ struct run *r;
+
+ acquire(&kmem.lock);
+ r = kmem.freelist;
+ if(r)
+ kmem.freelist = r->next;
+ release(&kmem.lock);
+ if(r)
+ memset((char*)r, 5, PGSIZE); // fill with junk
+ return (void*)r;
+}
diff --git a/kernel/kernel.ld b/kernel/kernel.ld
new file mode 100644
index 0000000..53c9b90
--- /dev/null
+++ b/kernel/kernel.ld
@@ -0,0 +1,31 @@
+OUTPUT_ARCH( "riscv" )
+ENTRY( _entry )
+
+SECTIONS
+{
+ /*
+ * ensure that entry.S / _entry is at 0x80000000,
+ * where qemu's -kernel jumps.
+ */
+ . = 0x80000000;
+ .text :
+ {
+ *(.text)
+ . = ALIGN(0x1000);
+ *(trampoline)
+ }
+
+ . = ALIGN(0x1000);
+ PROVIDE(etext = .);
+
+ /*
+ * make sure end is after data and bss.
+ */
+ .data : {
+ *(.data)
+ }
+ bss : {
+ *(.bss)
+ PROVIDE(end = .);
+ }
+}
diff --git a/kernel/kernelvec.S b/kernel/kernelvec.S
new file mode 100644
index 0000000..4f52688
--- /dev/null
+++ b/kernel/kernelvec.S
@@ -0,0 +1,112 @@
+ #
+ # interrupts and exceptions while in supervisor
+ # mode come here.
+ #
+ # push all registers, call kerneltrap(), restore, return.
+ #
+.globl kerneltrap
+.globl kernelvec
+.align 4
+kernelvec:
+ addi sp, sp, -256
+
+ sd ra, 0(sp)
+ sd sp, 8(sp)
+ sd gp, 16(sp)
+ sd tp, 24(sp)
+ sd t0, 32(sp)
+ sd t1, 40(sp)
+ sd t2, 48(sp)
+ sd s0, 56(sp)
+ sd s1, 64(sp)
+ sd a0, 72(sp)
+ sd a1, 80(sp)
+ sd a2, 88(sp)
+ sd a3, 96(sp)
+ sd a4, 104(sp)
+ sd a5, 112(sp)
+ sd a6, 120(sp)
+ sd a7, 128(sp)
+ sd s2, 136(sp)
+ sd s3, 144(sp)
+ sd s4, 152(sp)
+ sd s5, 160(sp)
+ sd s6, 168(sp)
+ sd s7, 176(sp)
+ sd s8, 184(sp)
+ sd s9, 192(sp)
+ sd s10, 200(sp)
+ sd s11, 208(sp)
+ sd t3, 216(sp)
+ sd t4, 224(sp)
+ sd t5, 232(sp)
+ sd t6, 240(sp)
+
+ call kerneltrap
+
+ ld ra, 0(sp)
+ ld sp, 8(sp)
+ ld gp, 16(sp)
+ ld tp, 24(sp)
+ ld t0, 32(sp)
+ ld t1, 40(sp)
+ ld t2, 48(sp)
+ ld s0, 56(sp)
+ ld s1, 64(sp)
+ ld a0, 72(sp)
+ ld a1, 80(sp)
+ ld a2, 88(sp)
+ ld a3, 96(sp)
+ ld a4, 104(sp)
+ ld a5, 112(sp)
+ ld a6, 120(sp)
+ ld a7, 128(sp)
+ ld s2, 136(sp)
+ ld s3, 144(sp)
+ ld s4, 152(sp)
+ ld s5, 160(sp)
+ ld s6, 168(sp)
+ ld s7, 176(sp)
+ ld s8, 184(sp)
+ ld s9, 192(sp)
+ ld s10, 200(sp)
+ ld s11, 208(sp)
+ ld t3, 216(sp)
+ ld t4, 224(sp)
+ ld t5, 232(sp)
+ ld t6, 240(sp)
+
+ addi sp, sp, 256
+
+ sret
+
+ #
+ # machine-mode timer interrupt.
+ #
+.globl machinevec
+.align 4
+machinevec:
+ csrrw a0, mscratch, a0
+ sd a1, 0(a0)
+ sd a2, 8(a0)
+ sd a3, 16(a0)
+ sd a4, 24(a0)
+
+ # add another second to mtimecmp0.
+ ld a1, 32(a0) # CLINT_MTIMECMP(hart)
+ ld a2, 40(a0) # ticks per second
+ ld a3, 0(a1)
+ add a3, a3, a2
+ sd a3, 0(a1)
+
+ # raise a supervisor software interrupt.
+ li a1, 2
+ csrw sip, a1
+
+ ld a4, 24(a0)
+ ld a3, 16(a0)
+ ld a2, 8(a0)
+ ld a1, 0(a0)
+ csrrw a0, mscratch, a0
+
+ mret
diff --git a/kernel/log.c b/kernel/log.c
new file mode 100644
index 0000000..c8f7e62
--- /dev/null
+++ b/kernel/log.c
@@ -0,0 +1,235 @@
+#include "types.h"
+#include "riscv.h"
+#include "defs.h"
+#include "param.h"
+#include "spinlock.h"
+#include "sleeplock.h"
+#include "fs.h"
+#include "buf.h"
+
+// Simple logging that allows concurrent FS system calls.
+//
+// A log transaction contains the updates of multiple FS system
+// calls. The logging system only commits when there are
+// no FS system calls active. Thus there is never
+// any reasoning required about whether a commit might
+// write an uncommitted system call's updates to disk.
+//
+// A system call should call begin_op()/end_op() to mark
+// its start and end. Usually begin_op() just increments
+// the count of in-progress FS system calls and returns.
+// But if it thinks the log is close to running out, it
+// sleeps until the last outstanding end_op() commits.
+//
+// The log is a physical re-do log containing disk blocks.
+// The on-disk log format:
+// header block, containing block #s for block A, B, C, ...
+// block A
+// block B
+// block C
+// ...
+// Log appends are synchronous.
+
+// Contents of the header block, used for both the on-disk header block
+// and to keep track in memory of logged block# before commit.
+struct logheader {
+ int n;
+ int block[LOGSIZE];
+};
+
+struct log {
+ struct spinlock lock;
+ int start;
+ int size;
+ int outstanding; // how many FS sys calls are executing.
+ int committing; // in commit(), please wait.
+ int dev;
+ struct logheader lh;
+};
+struct log log;
+
+static void recover_from_log(void);
+static void commit();
+
+void
+initlog(int dev)
+{
+ if (sizeof(struct logheader) >= BSIZE)
+ panic("initlog: too big logheader");
+
+ struct superblock sb;
+ initlock(&log.lock, "log");
+ readsb(dev, &sb);
+ log.start = sb.logstart;
+ log.size = sb.nlog;
+ log.dev = dev;
+ recover_from_log();
+}
+
+// Copy committed blocks from log to their home location
+static void
+install_trans(void)
+{
+ int tail;
+
+ for (tail = 0; tail < log.lh.n; tail++) {
+ struct buf *lbuf = bread(log.dev, log.start+tail+1); // read log block
+ struct buf *dbuf = bread(log.dev, log.lh.block[tail]); // read dst
+ memmove(dbuf->data, lbuf->data, BSIZE); // copy block to dst
+ bwrite(dbuf); // write dst to disk
+ brelse(lbuf);
+ brelse(dbuf);
+ }
+}
+
+// Read the log header from disk into the in-memory log header
+static void
+read_head(void)
+{
+ struct buf *buf = bread(log.dev, log.start);
+ struct logheader *lh = (struct logheader *) (buf->data);
+ int i;
+ log.lh.n = lh->n;
+ for (i = 0; i < log.lh.n; i++) {
+ log.lh.block[i] = lh->block[i];
+ }
+ brelse(buf);
+}
+
+// Write in-memory log header to disk.
+// This is the true point at which the
+// current transaction commits.
+static void
+write_head(void)
+{
+ struct buf *buf = bread(log.dev, log.start);
+ struct logheader *hb = (struct logheader *) (buf->data);
+ int i;
+ hb->n = log.lh.n;
+ for (i = 0; i < log.lh.n; i++) {
+ hb->block[i] = log.lh.block[i];
+ }
+ bwrite(buf);
+ brelse(buf);
+}
+
+static void
+recover_from_log(void)
+{
+ read_head();
+ install_trans(); // if committed, copy from log to disk
+ log.lh.n = 0;
+ write_head(); // clear the log
+}
+
+// called at the start of each FS system call.
+void
+begin_op(void)
+{
+ acquire(&log.lock);
+ while(1){
+ if(log.committing){
+ sleep(&log, &log.lock);
+ } else if(log.lh.n + (log.outstanding+1)*MAXOPBLOCKS > LOGSIZE){
+ // this op might exhaust log space; wait for commit.
+ sleep(&log, &log.lock);
+ } else {
+ log.outstanding += 1;
+ release(&log.lock);
+ break;
+ }
+ }
+}
+
+// called at the end of each FS system call.
+// commits if this was the last outstanding operation.
+void
+end_op(void)
+{
+ int do_commit = 0;
+
+ acquire(&log.lock);
+ log.outstanding -= 1;
+ if(log.committing)
+ panic("log.committing");
+ if(log.outstanding == 0){
+ do_commit = 1;
+ log.committing = 1;
+ } else {
+ // begin_op() may be waiting for log space,
+ // and decrementing log.outstanding has decreased
+ // the amount of reserved space.
+ wakeup(&log);
+ }
+ release(&log.lock);
+
+ if(do_commit){
+ // call commit w/o holding locks, since not allowed
+ // to sleep with locks.
+ commit();
+ acquire(&log.lock);
+ log.committing = 0;
+ wakeup(&log);
+ release(&log.lock);
+ }
+}
+
+// Copy modified blocks from cache to log.
+static void
+write_log(void)
+{
+ int tail;
+
+ for (tail = 0; tail < log.lh.n; tail++) {
+ struct buf *to = bread(log.dev, log.start+tail+1); // log block
+ struct buf *from = bread(log.dev, log.lh.block[tail]); // cache block
+ memmove(to->data, from->data, BSIZE);
+ bwrite(to); // write the log
+ brelse(from);
+ brelse(to);
+ }
+}
+
+static void
+commit()
+{
+ if (log.lh.n > 0) {
+ write_log(); // Write modified blocks from cache to log
+ write_head(); // Write header to disk -- the real commit
+ install_trans(); // Now install writes to home locations
+ log.lh.n = 0;
+ write_head(); // Erase the transaction from the log
+ }
+}
+
+// Caller has modified b->data and is done with the buffer.
+// Record the block number and pin in the cache with B_DIRTY.
+// commit()/write_log() will do the disk write.
+//
+// log_write() replaces bwrite(); a typical use is:
+// bp = bread(...)
+// modify bp->data[]
+// log_write(bp)
+// brelse(bp)
+void
+log_write(struct buf *b)
+{
+ int i;
+
+ if (log.lh.n >= LOGSIZE || log.lh.n >= log.size - 1)
+ panic("too big a transaction");
+ if (log.outstanding < 1)
+ panic("log_write outside of trans");
+
+ acquire(&log.lock);
+ for (i = 0; i < log.lh.n; i++) {
+ if (log.lh.block[i] == b->blockno) // log absorbtion
+ break;
+ }
+ log.lh.block[i] = b->blockno;
+ if (i == log.lh.n)
+ log.lh.n++;
+ b->flags |= B_DIRTY; // prevent eviction
+ release(&log.lock);
+}
+
diff --git a/kernel/main.c b/kernel/main.c
new file mode 100644
index 0000000..2168b9f
--- /dev/null
+++ b/kernel/main.c
@@ -0,0 +1,42 @@
+#include "types.h"
+#include "param.h"
+#include "memlayout.h"
+#include "riscv.h"
+#include "defs.h"
+
+volatile static int started = 0;
+
+// Bootstrap processor starts running C code here.
+// Allocate a real stack and switch to it, first
+// doing some setup required for memory allocator to work.
+void
+main()
+{
+ if(cpuid() == 0){
+ uartinit(); // serial port
+ consoleinit();
+ printf("hart %d starting\n", cpuid());
+ kinit(); // physical page allocator
+ kvminit(); // create kernel page table
+ kvminithart(); // turn on paging
+ procinit(); // process table
+ trapinit(); // trap vectors
+ trapinithart(); // install kernel trap vector
+ plicinit(); // set up interrupt controller
+ plicinithart(); // ask PLIC for device interrupts
+ binit(); // buffer cache
+ fileinit(); // file table
+ ramdiskinit(); // disk
+ userinit(); // first user process
+ started = 1;
+ } else {
+ while(started == 0)
+ ;
+ printf("hart %d starting\n", cpuid());
+ kvminithart(); // turn on paging
+ trapinithart(); // install kernel trap vector
+ plicinithart(); // ask PLIC for device interrupts
+ }
+
+ scheduler();
+}
diff --git a/kernel/memlayout.h b/kernel/memlayout.h
new file mode 100644
index 0000000..462986c
--- /dev/null
+++ b/kernel/memlayout.h
@@ -0,0 +1,50 @@
+// Physical memory layout
+
+// qemu -machine virt is set up like this,
+// based on qemu's hw/riscv/virt.c:
+//
+// 00001000 -- boot ROM, provided by qemu
+// 02000000 -- CLINT
+// 0C000000 -- PLIC
+// 10000000 -- uart0 registers
+// 80000000 -- boot ROM jumps here in machine mode
+// -kernel loads the kernel here
+// 88000000 -- -initrd fs.img ramdisk image.
+// unused RAM after 80000000.
+
+// the kernel uses physical memory thus:
+// 80000000 -- entry.S, then kernel text and data
+// end -- start of kernel page allocation area
+// PHYSTOP -- end RAM used by the kernel
+
+// qemu puts UART registers here in physical memory.
+#define UART0 0x10000000L
+#define UART0_IRQ 10
+
+// local interrupt controller, which contains the timer.
+#define CLINT 0x2000000L
+#define CLINT_MTIMECMP(hartid) (CLINT + 0x4000 + 8*(hartid))
+#define CLINT_MTIME (CLINT + 0xBFF8)
+
+// qemu puts programmable interrupt controller here.
+#define PLIC 0x0c000000L
+#define PLIC_PRIORITY (PLIC + 0x0)
+#define PLIC_PENDING (PLIC + 0x1000)
+#define PLIC_MENABLE(hart) (PLIC + 0x2000 + (hart)*0x100)
+#define PLIC_SENABLE(hart) (PLIC + 0x2080 + (hart)*0x100)
+#define PLIC_MPRIORITY(hart) (PLIC + 0x200000 + (hart)*0x2000)
+#define PLIC_SPRIORITY(hart) (PLIC + 0x201000 + (hart)*0x2000)
+#define PLIC_MCLAIM(hart) (PLIC + 0x200004 + (hart)*0x2000)
+#define PLIC_SCLAIM(hart) (PLIC + 0x201004 + (hart)*0x2000)
+
+#define RAMDISK 0x88000000L
+
+// the kernel expects there to be RAM
+// for use by the kernel and user pages
+// from physical address 0x80000000 to PHYSTOP.
+#define KERNBASE 0x80000000L
+#define PHYSTOP (KERNBASE + 128*1024*1024)
+
+// map the trampoline page to the highest address,
+// in both user and kernel space.
+#define TRAMPOLINE (MAXVA - PGSIZE)
diff --git a/kernel/param.h b/kernel/param.h
new file mode 100644
index 0000000..b5fdcb2
--- /dev/null
+++ b/kernel/param.h
@@ -0,0 +1,13 @@
+#define NPROC 64 // maximum number of processes
+#define NCPU 8 // maximum number of CPUs
+#define NOFILE 16 // open files per process
+#define NFILE 100 // open files per system
+#define NINODE 50 // maximum number of active i-nodes
+#define NDEV 10 // maximum major device number
+#define ROOTDEV 1 // device number of file system root disk
+#define MAXARG 32 // max exec arguments
+#define MAXOPBLOCKS 10 // max # of blocks any FS op writes
+#define LOGSIZE (MAXOPBLOCKS*3) // max data blocks in on-disk log
+#define NBUF (MAXOPBLOCKS*3) // size of disk block cache
+#define FSSIZE 1000 // size of file system in blocks
+#define MAXPATH 128 // maximum file path name
diff --git a/kernel/pipe.c b/kernel/pipe.c
new file mode 100644
index 0000000..31bf0cc
--- /dev/null
+++ b/kernel/pipe.c
@@ -0,0 +1,129 @@
+#include "types.h"
+#include "riscv.h"
+#include "defs.h"
+#include "param.h"
+#include "proc.h"
+#include "fs.h"
+#include "spinlock.h"
+#include "sleeplock.h"
+#include "file.h"
+
+#define PIPESIZE 512
+
+struct pipe {
+ struct spinlock lock;
+ char data[PIPESIZE];
+ uint nread; // number of bytes read
+ uint nwrite; // number of bytes written
+ int readopen; // read fd is still open
+ int writeopen; // write fd is still open
+};
+
+int
+pipealloc(struct file **f0, struct file **f1)
+{
+ struct pipe *p;
+
+ p = 0;
+ *f0 = *f1 = 0;
+ if((*f0 = filealloc()) == 0 || (*f1 = filealloc()) == 0)
+ goto bad;
+ if((p = (struct pipe*)kalloc()) == 0)
+ goto bad;
+ p->readopen = 1;
+ p->writeopen = 1;
+ p->nwrite = 0;
+ p->nread = 0;
+ initlock(&p->lock, "pipe");
+ (*f0)->type = FD_PIPE;
+ (*f0)->readable = 1;
+ (*f0)->writable = 0;
+ (*f0)->pipe = p;
+ (*f1)->type = FD_PIPE;
+ (*f1)->readable = 0;
+ (*f1)->writable = 1;
+ (*f1)->pipe = p;
+ return 0;
+
+//PAGEBREAK: 20
+ bad:
+ if(p)
+ kfree((char*)p);
+ if(*f0)
+ fileclose(*f0);
+ if(*f1)
+ fileclose(*f1);
+ return -1;
+}
+
+void
+pipeclose(struct pipe *p, int writable)
+{
+ acquire(&p->lock);
+ if(writable){
+ p->writeopen = 0;
+ wakeup(&p->nread);
+ } else {
+ p->readopen = 0;
+ wakeup(&p->nwrite);
+ }
+ if(p->readopen == 0 && p->writeopen == 0){
+ release(&p->lock);
+ kfree((char*)p);
+ } else
+ release(&p->lock);
+}
+
+//PAGEBREAK: 40
+int
+pipewrite(struct pipe *p, uint64 addr, int n)
+{
+ int i;
+ char ch;
+ struct proc *pr = myproc();
+
+ acquire(&p->lock);
+ for(i = 0; i < n; i++){
+ while(p->nwrite == p->nread + PIPESIZE){ //DOC: pipewrite-full
+ if(p->readopen == 0 || myproc()->killed){
+ release(&p->lock);
+ return -1;
+ }
+ wakeup(&p->nread);
+ sleep(&p->nwrite, &p->lock); //DOC: pipewrite-sleep
+ }
+ if(copyin(pr->pagetable, &ch, addr + i, 1) == -1)
+ break;
+ p->data[p->nwrite++ % PIPESIZE] = ch;
+ }
+ wakeup(&p->nread); //DOC: pipewrite-wakeup1
+ release(&p->lock);
+ return n;
+}
+
+int
+piperead(struct pipe *p, uint64 addr, int n)
+{
+ int i;
+ struct proc *pr = myproc();
+ char ch;
+
+ acquire(&p->lock);
+ while(p->nread == p->nwrite && p->writeopen){ //DOC: pipe-empty
+ if(myproc()->killed){
+ release(&p->lock);
+ return -1;
+ }
+ sleep(&p->nread, &p->lock); //DOC: piperead-sleep
+ }
+ for(i = 0; i < n; i++){ //DOC: piperead-copy
+ if(p->nread == p->nwrite)
+ break;
+ ch = p->data[p->nread++ % PIPESIZE];
+ if(copyout(pr->pagetable, addr + i, &ch, 1) == -1)
+ break;
+ }
+ wakeup(&p->nwrite); //DOC: piperead-wakeup
+ release(&p->lock);
+ return i;
+}
diff --git a/kernel/plic.c b/kernel/plic.c
new file mode 100644
index 0000000..0f19ab0
--- /dev/null
+++ b/kernel/plic.c
@@ -0,0 +1,63 @@
+#include "types.h"
+#include "param.h"
+#include "memlayout.h"
+#include "riscv.h"
+#include "defs.h"
+
+//
+// the riscv Platform Level Interrupt Controller (PLIC).
+//
+
+void
+plicinit(void)
+{
+ // set uart's priority to be non-zero (otherwise disabled).
+ *(uint32*)(PLIC + UART0_IRQ*4) = 1;
+}
+
+void
+plicinithart(void)
+{
+ int hart = cpuid();
+
+ // set uart's enable bit for this hart's S-mode.
+ //*(uint32*)(PLIC + 0x2080)= (1 << UART0_IRQ);
+ *(uint32*)PLIC_SENABLE(hart)= (1 << UART0_IRQ);
+
+ // set this hart's S-mode priority threshold to 0.
+ //*(uint32*)(PLIC + 0x201000) = 0;
+ *(uint32*)PLIC_SPRIORITY(hart) = 0;
+}
+
+// return a bitmap of which IRQs are waiting
+// to be served.
+uint64
+plic_pending(void)
+{
+ uint64 mask;
+
+ //mask = *(uint32*)(PLIC + 0x1000);
+ //mask |= (uint64)*(uint32*)(PLIC + 0x1004) << 32;
+ mask = *(uint64*)PLIC_PENDING;
+
+ return mask;
+}
+
+// ask the PLIC what interrupt we should serve.
+int
+plic_claim(void)
+{
+ int hart = cpuid();
+ //int irq = *(uint32*)(PLIC + 0x201004);
+ int irq = *(uint32*)PLIC_SCLAIM(hart);
+ return irq;
+}
+
+// tell the PLIC we've served this IRQ.
+void
+plic_complete(int irq)
+{
+ int hart = cpuid();
+ //*(uint32*)(PLIC + 0x201004) = irq;
+ *(uint32*)PLIC_SCLAIM(hart) = irq;
+}
diff --git a/kernel/proc.c b/kernel/proc.c
new file mode 100644
index 0000000..4ae34c8
--- /dev/null
+++ b/kernel/proc.c
@@ -0,0 +1,591 @@
+#include "types.h"
+#include "param.h"
+#include "memlayout.h"
+#include "riscv.h"
+#include "proc.h"
+#include "spinlock.h"
+#include "defs.h"
+
+struct {
+ struct spinlock lock;
+ struct proc proc[NPROC];
+} ptable;
+
+struct cpu cpus[NCPU];
+
+struct proc *initproc;
+
+int nextpid = 1;
+extern void forkret(void);
+
+// for returning out of the kernel
+extern void sysexit(void);
+
+static void wakeup1(void *chan);
+
+extern char trampout[]; // trampoline.S
+
+void
+procinit(void)
+{
+ initlock(&ptable.lock, "ptable");
+}
+
+// Must be called with interrupts disabled,
+// to prevent race with process being moved
+// to a different CPU.
+int
+cpuid()
+{
+ int id = r_tp();
+ return id;
+}
+
+// Return this core's cpu struct.
+// Interrupts must be disabled.
+struct cpu*
+mycpu(void) {
+ int id = cpuid();
+ struct cpu *c = &cpus[id];
+ return c;
+}
+
+// Return the current struct proc *.
+struct proc*
+myproc(void) {
+ push_off();
+ struct cpu *c = mycpu();
+ struct proc *p = c->proc;
+ pop_off();
+ return p;
+}
+
+//PAGEBREAK: 32
+// Look in the process table for an UNUSED proc.
+// If found, change state to EMBRYO and initialize
+// state required to run in the kernel.
+// Otherwise return 0.
+static struct proc*
+allocproc(void)
+{
+ struct proc *p;
+
+ acquire(&ptable.lock);
+
+ for(p = ptable.proc; p < &ptable.proc[NPROC]; p++)
+ if(p->state == UNUSED)
+ goto found;
+
+ release(&ptable.lock);
+ return 0;
+
+found:
+ p->state = EMBRYO;
+ p->pid = nextpid++;
+
+ release(&ptable.lock);
+
+ // Allocate a page for the kernel stack.
+ if((p->kstack = kalloc()) == 0){
+ p->state = UNUSED;
+ return 0;
+ }
+
+ // Allocate a trapframe page.
+ if((p->tf = (struct trapframe *)kalloc()) == 0){
+ p->state = UNUSED;
+ return 0;
+ }
+
+ // An empty user page table.
+ p->pagetable = proc_pagetable(p);
+
+ // Set up new context to start executing at forkret,
+ // which returns to user space.
+ memset(&p->context, 0, sizeof p->context);
+ p->context.ra = (uint64)forkret;
+ p->context.sp = (uint64)p->kstack + PGSIZE;
+
+ return p;
+}
+
+// Create a page table for a given process,
+// with no users pages, but with trampoline pages.
+// Called both when creating a process, and
+// by exec() when building tentative new memory image,
+// which might fail.
+pagetable_t
+proc_pagetable(struct proc *p)
+{
+ pagetable_t pagetable;
+
+ // An empty user page table.
+ pagetable = uvmcreate();
+
+ // map the trampoline code (for system call return)
+ // at the highest user virtual address.
+ // only the supervisor uses it, on the way
+ // to/from user space, so not PTE_U.
+ mappages(pagetable, TRAMPOLINE, PGSIZE,
+ (uint64)trampout, PTE_R | PTE_X);
+
+ // map the trapframe, for trampoline.S.
+ mappages(pagetable, (TRAMPOLINE - PGSIZE), PGSIZE,
+ (uint64)(p->tf), PTE_R | PTE_W);
+
+ return pagetable;
+}
+
+// Free a process's page table, and free the
+// physical memory the page table refers to.
+// Called both when a process exits and from
+// exec() if it fails.
+void
+proc_freepagetable(pagetable_t pagetable, uint64 sz)
+{
+ unmappages(pagetable, TRAMPOLINE, PGSIZE, 0);
+ unmappages(pagetable, TRAMPOLINE-PGSIZE, PGSIZE, 0);
+ uvmfree(pagetable, sz);
+}
+
+// a user program that calls exec("/init")
+// od -t xC initcode
+uchar initcode[] = {
+ 0x17, 0x05, 0x00, 0x00, 0x13, 0x05, 0x05, 0x02, 0x97, 0x05, 0x00, 0x00, 0x93, 0x85, 0x05, 0x02,
+ 0x9d, 0x48, 0x73, 0x00, 0x00, 0x00, 0x89, 0x48, 0x73, 0x00, 0x00, 0x00, 0xef, 0xf0, 0xbf, 0xff,
+ 0x2f, 0x69, 0x6e, 0x69, 0x74, 0x00, 0x00, 0x01, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00
+};
+
+//PAGEBREAK: 32
+// Set up first user process.
+void
+userinit(void)
+{
+ struct proc *p;
+
+ p = allocproc();
+ initproc = p;
+
+ uvminit(p->pagetable, initcode, sizeof(initcode));
+ p->sz = PGSIZE;
+
+ // prepare for the very first kernel->user.
+ p->tf->epc = 0;
+ p->tf->sp = PGSIZE;
+
+ safestrcpy(p->name, "initcode", sizeof(p->name));
+ p->cwd = namei("/");
+
+ // this assignment to p->state lets other cores
+ // run this process. the acquire forces the above
+ // writes to be visible, and the lock is also needed
+ // because the assignment might not be atomic.
+ acquire(&ptable.lock);
+
+ p->state = RUNNABLE;
+
+ release(&ptable.lock);
+}
+
+// Grow current process's memory by n bytes.
+// Return 0 on success, -1 on failure.
+int
+growproc(int n)
+{
+ uint sz;
+ struct proc *p = myproc();
+
+ sz = p->sz;
+ if(n > 0){
+ if((sz = uvmalloc(p->pagetable, sz, sz + n)) == 0)
+ return -1;
+ } else if(n < 0){
+ if((sz = uvmdealloc(p->pagetable, sz, sz + n)) == 0)
+ return -1;
+ }
+ p->sz = sz;
+ return 0;
+}
+
+// Create a new process, copying p as the parent.
+// Sets up child kernel stack to return as if from system call.
+int
+fork(void)
+{
+ int i, pid;
+ struct proc *np;
+ struct proc *p = myproc();
+
+ // Allocate process.
+ if((np = allocproc()) == 0){
+ return -1;
+ }
+
+ // Copy user memory from parent to child.
+ uvmcopy(p->pagetable, np->pagetable, p->sz);
+ np->sz = p->sz;
+
+ np->parent = p;
+
+ // copy saved user registers.
+ *(np->tf) = *(p->tf);
+
+ // Cause fork to return 0 in the child.
+ np->tf->a0 = 0;
+
+ // increment reference counts on open file descriptors.
+ for(i = 0; i < NOFILE; i++)
+ if(p->ofile[i])
+ np->ofile[i] = filedup(p->ofile[i]);
+ np->cwd = idup(p->cwd);
+
+ safestrcpy(np->name, p->name, sizeof(p->name));
+
+ pid = np->pid;
+
+ acquire(&ptable.lock);
+
+ np->state = RUNNABLE;
+
+ release(&ptable.lock);
+
+ return pid;
+}
+
+// Exit the current process. Does not return.
+// An exited process remains in the zombie state
+// until its parent calls wait().
+void
+exit(void)
+{
+ struct proc *p = myproc();
+ struct proc *pp;
+ int fd;
+
+ if(p == initproc)
+ panic("init exiting");
+
+ // Close all open files.
+ for(fd = 0; fd < NOFILE; fd++){
+ if(p->ofile[fd]){
+ fileclose(p->ofile[fd]);
+ p->ofile[fd] = 0;
+ }
+ }
+
+ begin_op();
+ iput(p->cwd);
+ end_op();
+ p->cwd = 0;
+
+ acquire(&ptable.lock);
+
+ // Parent might be sleeping in wait().
+ wakeup1(p->parent);
+
+ // Pass abandoned children to init.
+ for(pp = ptable.proc; pp < &ptable.proc[NPROC]; pp++){
+ if(pp->parent == p){
+ pp->parent = initproc;
+ if(pp->state == ZOMBIE)
+ wakeup1(initproc);
+ }
+ }
+
+ // Jump into the scheduler, never to return.
+ p->state = ZOMBIE;
+ sched();
+ panic("zombie exit");
+}
+
+// Wait for a child process to exit and return its pid.
+// Return -1 if this process has no children.
+int
+wait(void)
+{
+ struct proc *np;
+ int havekids, pid;
+ struct proc *p = myproc();
+
+ acquire(&ptable.lock);
+ for(;;){
+ // Scan through table looking for exited children.
+ havekids = 0;
+ for(np = ptable.proc; np < &ptable.proc[NPROC]; np++){
+ if(np->parent != p)
+ continue;
+ havekids = 1;
+ if(np->state == ZOMBIE){
+ // Found one.
+ pid = np->pid;
+ kfree(np->kstack);
+ np->kstack = 0;
+ kfree((void*)np->tf);
+ np->tf = 0;
+ proc_freepagetable(np->pagetable, np->sz);
+ np->pagetable = 0;
+ np->pid = 0;
+ np->parent = 0;
+ np->name[0] = 0;
+ np->killed = 0;
+ np->state = UNUSED;
+ release(&ptable.lock);
+ return pid;
+ }
+ }
+
+ // No point waiting if we don't have any children.
+ if(!havekids || p->killed){
+ release(&ptable.lock);
+ return -1;
+ }
+
+ // Wait for children to exit. (See wakeup1 call in proc_exit.)
+ sleep(p, &ptable.lock); //DOC: wait-sleep
+ }
+}
+
+//PAGEBREAK: 42
+// Per-CPU process scheduler.
+// Each CPU calls scheduler() after setting itself up.
+// Scheduler never returns. It loops, doing:
+// - choose a process to run
+// - swtch to start running that process
+// - eventually that process transfers control
+// via swtch back to the scheduler.
+void
+scheduler(void)
+{
+ struct proc *p;
+ struct cpu *c = mycpu();
+
+ c->proc = 0;
+ for(;;){
+ // Enable interrupts on this processor.
+ intr_on();
+
+ // Loop over process table looking for process to run.
+ acquire(&ptable.lock);
+ for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){
+ if(p->state != RUNNABLE)
+ continue;
+
+ // Switch to chosen process. It is the process's job
+ // to release ptable.lock and then reacquire it
+ // before jumping back to us.
+ c->proc = p;
+ p->state = RUNNING;
+
+ swtch(&c->scheduler, &p->context);
+
+ // Process is done running for now.
+ // It should have changed its p->state before coming back.
+ c->proc = 0;
+ }
+ release(&ptable.lock);
+ }
+}
+
+// Enter scheduler. Must hold only ptable.lock
+// and have changed proc->state. Saves and restores
+// intena because intena is a property of this
+// kernel thread, not this CPU. It should
+// be proc->intena and proc->noff, but that would
+// break in the few places where a lock is held but
+// there's no process.
+void
+sched(void)
+{
+ int intena;
+ struct proc *p = myproc();
+
+ if(!holding(&ptable.lock))
+ panic("sched ptable.lock");
+ if(mycpu()->noff != 1)
+ panic("sched locks");
+ if(p->state == RUNNING)
+ panic("sched running");
+ if(intr_get())
+ panic("sched interruptible");
+
+ intena = mycpu()->intena;
+ swtch(&p->context, &mycpu()->scheduler);
+ mycpu()->intena = intena;
+}
+
+// Give up the CPU for one scheduling round.
+void
+yield(void)
+{
+ acquire(&ptable.lock); //DOC: yieldlock
+ myproc()->state = RUNNABLE;
+ sched();
+ release(&ptable.lock);
+}
+
+// A fork child's very first scheduling by scheduler()
+// will swtch to forkret.
+void
+forkret(void)
+{
+ static int first = 1;
+
+ // Still holding ptable.lock from scheduler.
+ release(&ptable.lock);
+
+ if (first) {
+ // Some initialization functions must be run in the context
+ // of a regular process (e.g., they call sleep), and thus cannot
+ // be run from main().
+ first = 0;
+ iinit(ROOTDEV);
+ initlog(ROOTDEV);
+ }
+
+ usertrapret();
+}
+
+// Atomically release lock and sleep on chan.
+// Reacquires lock when awakened.
+void
+sleep(void *chan, struct spinlock *lk)
+{
+ struct proc *p = myproc();
+
+ if(p == 0)
+ panic("sleep");
+
+ if(lk == 0)
+ panic("sleep without lk");
+
+ // Must acquire ptable.lock in order to
+ // change p->state and then call sched.
+ // Once we hold ptable.lock, we can be
+ // guaranteed that we won't miss any wakeup
+ // (wakeup runs with ptable.lock locked),
+ // so it's okay to release lk.
+ if(lk != &ptable.lock){ //DOC: sleeplock0
+ acquire(&ptable.lock); //DOC: sleeplock1
+ release(lk);
+ }
+ // Go to sleep.
+ p->chan = chan;
+ p->state = SLEEPING;
+
+ sched();
+
+ // Tidy up.
+ p->chan = 0;
+
+ // Reacquire original lock.
+ if(lk != &ptable.lock){ //DOC: sleeplock2
+ release(&ptable.lock);
+ acquire(lk);
+ }
+}
+
+//PAGEBREAK!
+// Wake up all processes sleeping on chan.
+// The ptable lock must be held.
+static void
+wakeup1(void *chan)
+{
+ struct proc *p;
+
+ for(p = ptable.proc; p < &ptable.proc[NPROC]; p++)
+ if(p->state == SLEEPING && p->chan == chan)
+ p->state = RUNNABLE;
+}
+
+// Wake up all processes sleeping on chan.
+void
+wakeup(void *chan)
+{
+ acquire(&ptable.lock);
+ wakeup1(chan);
+ release(&ptable.lock);
+}
+
+// Kill the process with the given pid.
+// Process won't exit until it returns
+// to user space (see trap in trap.c).
+int
+kill(int pid)
+{
+ struct proc *p;
+
+ acquire(&ptable.lock);
+ for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){
+ if(p->pid == pid){
+ p->killed = 1;
+ // Wake process from sleep if necessary.
+ if(p->state == SLEEPING)
+ p->state = RUNNABLE;
+ release(&ptable.lock);
+ return 0;
+ }
+ }
+ release(&ptable.lock);
+ return -1;
+}
+
+// Copy to either a user address, or kernel address,
+// depending on usr_dst.
+// Returns 0 on success, -1 on error.
+int
+either_copyout(int user_dst, uint64 dst, void *src, uint64 len)
+{
+ struct proc *p = myproc();
+ if(user_dst){
+ return copyout(p->pagetable, dst, src, len);
+ } else {
+ memmove((char *)dst, src, len);
+ return 0;
+ }
+}
+
+// Copy from either a user address, or kernel address,
+// depending on usr_src.
+// Returns 0 on success, -1 on error.
+int
+either_copyin(void *dst, int user_src, uint64 src, uint64 len)
+{
+ struct proc *p = myproc();
+ if(user_src){
+ return copyin(p->pagetable, dst, src, len);
+ } else {
+ memmove(dst, (char*)src, len);
+ return 0;
+ }
+}
+
+// Print a process listing to console. For debugging.
+// Runs when user types ^P on console.
+// No lock to avoid wedging a stuck machine further.
+void
+procdump(void)
+{
+ static char *states[] = {
+ [UNUSED] "unused",
+ [EMBRYO] "embryo",
+ [SLEEPING] "sleep ",
+ [RUNNABLE] "runble",
+ [RUNNING] "run ",
+ [ZOMBIE] "zombie"
+ };
+ struct proc *p;
+ char *state;
+
+ for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){
+ if(p->state == UNUSED)
+ continue;
+ if(p->state >= 0 && p->state < NELEM(states) && states[p->state])
+ state = states[p->state];
+ else
+ state = "???";
+ printf("%d %s %s", p->pid, state, p->name);
+ printf("\n");
+ }
+}
+
diff --git a/kernel/proc.h b/kernel/proc.h
new file mode 100644
index 0000000..278e4cd
--- /dev/null
+++ b/kernel/proc.h
@@ -0,0 +1,104 @@
+// Saved registers for kernel context switches.
+struct context {
+ uint64 ra;
+ uint64 sp;
+
+ // callee-saved
+ uint64 s0;
+ uint64 s1;
+ uint64 s2;
+ uint64 s3;
+ uint64 s4;
+ uint64 s5;
+ uint64 s6;
+ uint64 s7;
+ uint64 s8;
+ uint64 s9;
+ uint64 s10;
+ uint64 s11;
+};
+
+// Per-CPU state
+struct cpu {
+ struct proc *proc; // The process running on this cpu or null
+ struct context scheduler; // swtch() here to enter scheduler
+ int noff; // Depth of push_off() nesting.
+ int intena; // Were interrupts enabled before push_off()?
+};
+
+extern struct cpu cpus[NCPU];
+
+//PAGEBREAK: 17
+
+// per-process data for the early trap handling code in trampoline.S.
+// sits in a page by itself just under the trampoline page in the
+// user page table. not specially mapped in the kernel page table.
+// the sscratch register points here.
+// trampoline.S saves user registers, then restores kernel_sp and
+// kernel_satp.
+// includes callee-saved registers like s0-s11 because the
+// return-to-user path via usertrapret() doesn't return through
+// the entire kernel call stack.
+struct trapframe {
+ /* 0 */ uint64 kernel_satp;
+ /* 8 */ uint64 kernel_sp;
+ /* 16 */ uint64 kernel_trap; // usertrap()
+ /* 24 */ uint64 epc; // saved user program counter
+ /* 32 */ uint64 hartid;
+ /* 40 */ uint64 ra;
+ /* 48 */ uint64 sp;
+ /* 56 */ uint64 gp;
+ /* 64 */ uint64 tp;
+ /* 72 */ uint64 t0;
+ /* 80 */ uint64 t1;
+ /* 88 */ uint64 t2;
+ /* 96 */ uint64 s0;
+ /* 104 */ uint64 s1;
+ /* 112 */ uint64 a0;
+ /* 120 */ uint64 a1;
+ /* 128 */ uint64 a2;
+ /* 136 */ uint64 a3;
+ /* 144 */ uint64 a4;
+ /* 152 */ uint64 a5;
+ /* 160 */ uint64 a6;
+ /* 168 */ uint64 a7;
+ /* 176 */ uint64 s2;
+ /* 184 */ uint64 s3;
+ /* 192 */ uint64 s4;
+ /* 200 */ uint64 s5;
+ /* 208 */ uint64 s6;
+ /* 216 */ uint64 s7;
+ /* 224 */ uint64 s8;
+ /* 232 */ uint64 s9;
+ /* 240 */ uint64 s10;
+ /* 248 */ uint64 s11;
+ /* 256 */ uint64 t3;
+ /* 264 */ uint64 t4;
+ /* 272 */ uint64 t5;
+ /* 280 */ uint64 t6;
+};
+
+enum procstate { UNUSED, EMBRYO, SLEEPING, RUNNABLE, RUNNING, ZOMBIE };
+
+// Per-process state
+struct proc {
+ char *kstack; // Bottom of kernel stack for this process
+ uint64 sz; // Size of process memory (bytes)
+ pagetable_t pagetable; // Page table
+ enum procstate state; // Process state
+ int pid; // Process ID
+ struct proc *parent; // Parent process
+ struct trapframe *tf; // data page for trampoline.S
+ struct context context; // swtch() here to run process
+ void *chan; // If non-zero, sleeping on chan
+ int killed; // If non-zero, have been killed
+ struct file *ofile[NOFILE]; // Open files
+ struct inode *cwd; // Current directory
+ char name[16]; // Process name (debugging)
+};
+
+// Process memory is laid out contiguously, low addresses first:
+// text
+// original data and bss
+// fixed-size stack
+// expandable heap
diff --git a/kernel/ramdisk.c b/kernel/ramdisk.c
new file mode 100644
index 0000000..9901294
--- /dev/null
+++ b/kernel/ramdisk.c
@@ -0,0 +1,45 @@
+//
+// ramdisk that uses the disk image loaded by qemu -rdinit fs.img
+//
+
+#include "types.h"
+#include "riscv.h"
+#include "defs.h"
+#include "param.h"
+#include "memlayout.h"
+#include "spinlock.h"
+#include "sleeplock.h"
+#include "fs.h"
+#include "buf.h"
+
+void
+ramdiskinit(void)
+{
+}
+
+// If B_DIRTY is set, write buf to disk, clear B_DIRTY, set B_VALID.
+// Else if B_VALID is not set, read buf from disk, set B_VALID.
+void
+ramdiskrw(struct buf *b)
+{
+ if(!holdingsleep(&b->lock))
+ panic("ramdiskrw: buf not locked");
+ if((b->flags & (B_VALID|B_DIRTY)) == B_VALID)
+ panic("ramdiskrw: nothing to do");
+
+ if(b->blockno >= FSSIZE)
+ panic("ramdiskrw: blockno too big");
+
+ uint64 diskaddr = b->blockno * BSIZE;
+ char *addr = (char *)RAMDISK + diskaddr;
+
+ if(b->flags & B_DIRTY){
+ // write
+ memmove(addr, b->data, BSIZE);
+ b->flags &= ~B_DIRTY;
+ } else {
+ // read
+ memmove(b->data, addr, BSIZE);
+ b->flags |= B_VALID;
+ }
+}
diff --git a/kernel/riscv.h b/kernel/riscv.h
new file mode 100644
index 0000000..c3371a4
--- /dev/null
+++ b/kernel/riscv.h
@@ -0,0 +1,338 @@
+// which hart (core) is this?
+static inline uint64
+r_mhartid()
+{
+ uint64 x;
+ asm volatile("csrr %0, mhartid" : "=r" (x) );
+ return x;
+}
+
+// Machine Status Register, mstatus
+
+#define MSTATUS_MPP_MASK (3L << 11)
+#define MSTATUS_MPP_M (3L << 11)
+#define MSTATUS_MPP_S (1L << 11)
+#define MSTATUS_MPP_U (0L << 11)
+#define MSTATUS_MIE (1L << 3)
+
+static inline uint64
+r_mstatus()
+{
+ uint64 x;
+ asm volatile("csrr %0, mstatus" : "=r" (x) );
+ return x;
+}
+
+static inline void
+w_mstatus(uint64 x)
+{
+ asm volatile("csrw mstatus, %0" : : "r" (x));
+}
+
+// machine exception program counter, holds the
+// instruction address to which a return from
+// exception will go.
+static inline void
+w_mepc(uint64 x)
+{
+ asm volatile("csrw mepc, %0" : : "r" (x));
+}
+
+// Supervisor Status Register, sstatus
+
+#define SSTATUS_SPP (1L << 8) // Previous mode, 1=Supervisor, 0=User
+#define SSTATUS_SPIE (1L << 5) // Supervisor Previous Interrupt Enable
+#define SSTATUS_UPIE (1L << 4) // User Previous Interrupt Enable
+#define SSTATUS_SIE (1L << 1) // Supervisor Interrupt Enable
+#define SSTATUS_UIE (1L << 0) // User Interrupt Enable
+
+static inline uint64
+r_sstatus()
+{
+ uint64 x;
+ asm volatile("csrr %0, sstatus" : "=r" (x) );
+ return x;
+}
+
+static inline void
+w_sstatus(uint64 x)
+{
+ asm volatile("csrw sstatus, %0" : : "r" (x));
+}
+
+// Supervisor Interrupt Pending
+static inline uint64
+r_sip()
+{
+ uint64 x;
+ asm volatile("csrr %0, sip" : "=r" (x) );
+ return x;
+}
+
+static inline void
+w_sip(uint64 x)
+{
+ asm volatile("csrw sip, %0" : : "r" (x));
+}
+
+// Supervisor Interrupt Enable
+#define SIE_SEIE (1L << 9) // external
+#define SIE_STIE (1L << 5) // timer
+#define SIE_SSIE (1L << 1) // software
+static inline uint64
+r_sie()
+{
+ uint64 x;
+ asm volatile("csrr %0, sie" : "=r" (x) );
+ return x;
+}
+
+static inline void
+w_sie(uint64 x)
+{
+ asm volatile("csrw sie, %0" : : "r" (x));
+}
+
+// Machine-mode Interrupt Enable
+#define MIE_MEIE (1L << 11) // external
+#define MIE_MTIE (1L << 7) // timer
+#define MIE_MSIE (1L << 3) // software
+static inline uint64
+r_mie()
+{
+ uint64 x;
+ asm volatile("csrr %0, mie" : "=r" (x) );
+ return x;
+}
+
+static inline void
+w_mie(uint64 x)
+{
+ asm volatile("csrw mie, %0" : : "r" (x));
+}
+
+// machine exception program counter, holds the
+// instruction address to which a return from
+// exception will go.
+static inline void
+w_sepc(uint64 x)
+{
+ asm volatile("csrw sepc, %0" : : "r" (x));
+}
+
+static inline uint64
+r_sepc()
+{
+ uint64 x;
+ asm volatile("csrr %0, sepc" : "=r" (x) );
+ return x;
+}
+
+// Machine Exception Delegation
+static inline uint64
+r_medeleg()
+{
+ uint64 x;
+ asm volatile("csrr %0, medeleg" : "=r" (x) );
+ return x;
+}
+
+static inline void
+w_medeleg(uint64 x)
+{
+ asm volatile("csrw medeleg, %0" : : "r" (x));
+}
+
+// Machine Interrupt Delegation
+static inline uint64
+r_mideleg()
+{
+ uint64 x;
+ asm volatile("csrr %0, mideleg" : "=r" (x) );
+ return x;
+}
+
+static inline void
+w_mideleg(uint64 x)
+{
+ asm volatile("csrw mideleg, %0" : : "r" (x));
+}
+
+// Supervisor Trap-Vector Base Address
+// low two bits are mode.
+static inline void
+w_stvec(uint64 x)
+{
+ asm volatile("csrw stvec, %0" : : "r" (x));
+}
+
+static inline uint64
+r_stvec()
+{
+ uint64 x;
+ asm volatile("csrr %0, stvec" : "=r" (x) );
+ return x;
+}
+
+// Machine-mode interrupt vector
+static inline void
+w_mtvec(uint64 x)
+{
+ asm volatile("csrw mtvec, %0" : : "r" (x));
+}
+
+// use riscv's sv39 page table scheme.
+#define SATP_SV39 (8L << 60)
+
+#define MAKE_SATP(pagetable) (SATP_SV39 | (((uint64)pagetable) >> 12))
+
+// supervisor address translation and protection;
+// holds the address of the page table.
+static inline void
+w_satp(uint64 x)
+{
+ asm volatile("csrw satp, %0" : : "r" (x));
+}
+
+static inline uint64
+r_satp()
+{
+ uint64 x;
+ asm volatile("csrr %0, satp" : "=r" (x) );
+ return x;
+}
+
+// Supervisor Scratch register, for early trap handler in trampoline.S.
+static inline void
+w_sscratch(uint64 x)
+{
+ asm volatile("csrw sscratch, %0" : : "r" (x));
+}
+
+static inline void
+w_mscratch(uint64 x)
+{
+ asm volatile("csrw mscratch, %0" : : "r" (x));
+}
+
+// Supervisor Trap Cause
+static inline uint64
+r_scause()
+{
+ uint64 x;
+ asm volatile("csrr %0, scause" : "=r" (x) );
+ return x;
+}
+
+// Supervisor Trap Value
+static inline uint64
+r_stval()
+{
+ uint64 x;
+ asm volatile("csrr %0, stval" : "=r" (x) );
+ return x;
+}
+
+// Machine-mode Counter-Enable
+static inline void
+w_mcounteren(uint64 x)
+{
+ asm volatile("csrw mcounteren, %0" : : "r" (x));
+}
+
+static inline uint64
+r_mcounteren()
+{
+ uint64 x;
+ asm volatile("csrr %0, mcounteren" : "=r" (x) );
+ return x;
+}
+
+// machine-mode cycle counter
+static inline uint64
+r_time()
+{
+ uint64 x;
+ asm volatile("csrr %0, time" : "=r" (x) );
+ return x;
+}
+
+// enable interrupts
+static inline void
+intr_on()
+{
+ w_sie(r_sie() | SIE_SEIE | SIE_STIE | SIE_SSIE);
+ w_sstatus(r_sstatus() | SSTATUS_SIE);
+}
+
+// disable interrupts
+static inline void
+intr_off()
+{
+ w_sstatus(r_sstatus() & ~SSTATUS_SIE);
+}
+
+// are interrupts enabled?
+static inline int
+intr_get()
+{
+ uint64 x = r_sstatus();
+ return (x & SSTATUS_SIE) != 0;
+}
+
+static inline uint64
+r_sp()
+{
+ uint64 x;
+ asm volatile("mv %0, sp" : "=r" (x) );
+ return x;
+}
+
+// read and write tp, the thread pointer, which holds
+// this core's hartid (core number), the index into cpus[].
+static inline uint64
+r_tp()
+{
+ uint64 x;
+ asm volatile("mv %0, tp" : "=r" (x) );
+ return x;
+}
+
+static inline void
+w_tp(uint64 x)
+{
+ asm volatile("mv tp, %0" : : "r" (x));
+}
+
+#define PGSIZE 4096 // bytes per page
+#define PGSHIFT 12 // bits of offset within a page
+
+#define PGROUNDUP(sz) (((sz)+PGSIZE-1) & ~(PGSIZE-1))
+#define PGROUNDDOWN(a) (((a)) & ~(PGSIZE-1))
+
+#define PTE_V (1L << 0) // valid
+#define PTE_R (1L << 1)
+#define PTE_W (1L << 2)
+#define PTE_X (1L << 3)
+#define PTE_U (1L << 4) // 1 -> user can access
+
+// shift a physical address to the right place for a PTE.
+#define PA2PTE(pa) ((((uint64)pa) >> 12) << 10)
+
+#define PTE2PA(pte) (((pte) >> 10) << 12)
+
+#define PTE_FLAGS(pte) ((pte) & (PTE_V|PTE_R|PTE_W|PTE_X|PTE_U))
+
+// extract the three 9-bit page table indices from a virtual address.
+#define PXMASK 0x1FF // 9 bits
+#define PXSHIFT(level) (PGSHIFT+(9*(level)))
+#define PX(level, va) ((((uint64) (va)) >> PXSHIFT(level)) & PXMASK)
+
+// one beyond the highest possible virtual address.
+// MAXVA is actually one bit less than the max allowed by
+// Sv39, to avoid having to sign-extend virtual addresses
+// that have the high bit set.
+#define MAXVA (1L << (9 + 9 + 9 + 12 - 1))
+
+typedef uint64 pte_t;
+typedef uint64 *pagetable_t; // 512 PTEs
diff --git a/kernel/sleeplock.c b/kernel/sleeplock.c
new file mode 100644
index 0000000..b490370
--- /dev/null
+++ b/kernel/sleeplock.c
@@ -0,0 +1,55 @@
+// Sleeping locks
+
+#include "types.h"
+#include "riscv.h"
+#include "defs.h"
+#include "param.h"
+#include "memlayout.h"
+#include "proc.h"
+#include "spinlock.h"
+#include "sleeplock.h"
+
+void
+initsleeplock(struct sleeplock *lk, char *name)
+{
+ initlock(&lk->lk, "sleep lock");
+ lk->name = name;
+ lk->locked = 0;
+ lk->pid = 0;
+}
+
+void
+acquiresleep(struct sleeplock *lk)
+{
+ acquire(&lk->lk);
+ while (lk->locked) {
+ sleep(lk, &lk->lk);
+ }
+ lk->locked = 1;
+ lk->pid = myproc()->pid;
+ release(&lk->lk);
+}
+
+void
+releasesleep(struct sleeplock *lk)
+{
+ acquire(&lk->lk);
+ lk->locked = 0;
+ lk->pid = 0;
+ wakeup(lk);
+ release(&lk->lk);
+}
+
+int
+holdingsleep(struct sleeplock *lk)
+{
+ int r;
+
+ acquire(&lk->lk);
+ r = lk->locked && (lk->pid == myproc()->pid);
+ release(&lk->lk);
+ return r;
+}
+
+
+
diff --git a/kernel/sleeplock.h b/kernel/sleeplock.h
new file mode 100644
index 0000000..110e6f3
--- /dev/null
+++ b/kernel/sleeplock.h
@@ -0,0 +1,10 @@
+// Long-term locks for processes
+struct sleeplock {
+ uint locked; // Is the lock held?
+ struct spinlock lk; // spinlock protecting this sleep lock
+
+ // For debugging:
+ char *name; // Name of lock.
+ int pid; // Process holding lock
+};
+
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
new file mode 100644
index 0000000..bbb7cb5
--- /dev/null
+++ b/kernel/spinlock.c
@@ -0,0 +1,110 @@
+// Mutual exclusion spin locks.
+
+#include "types.h"
+#include "param.h"
+#include "memlayout.h"
+#include "spinlock.h"
+#include "riscv.h"
+#include "proc.h"
+#include "defs.h"
+
+void
+initlock(struct spinlock *lk, char *name)
+{
+ lk->name = name;
+ lk->locked = 0;
+ lk->cpu = 0;
+}
+
+// Acquire the lock.
+// Loops (spins) until the lock is acquired.
+// Holding a lock for a long time may cause
+// other CPUs to waste time spinning to acquire it.
+void
+acquire(struct spinlock *lk)
+{
+ push_off(); // disable interrupts to avoid deadlock.
+ if(holding(lk))
+ panic("acquire");
+
+ // The xchg is atomic.
+ //while(xchg(&lk->locked, 1) != 0)
+ // ;
+ while(__sync_lock_test_and_set(&lk->locked, 1) != 0)
+ ;
+
+ // Tell the C compiler and the processor to not move loads or stores
+ // past this point, to ensure that the critical section's memory
+ // references happen after the lock is acquired.
+ __sync_synchronize();
+
+ // Record info about lock acquisition for holding() and debugging.
+ lk->cpu = mycpu();
+}
+
+// Release the lock.
+void
+release(struct spinlock *lk)
+{
+ if(!holding(lk))
+ panic("release");
+
+ lk->cpu = 0;
+
+ // Tell the C compiler and the processor to not move loads or stores
+ // past this point, to ensure that all the stores in the critical
+ // section are visible to other cores before the lock is released.
+ // Both the C compiler and the hardware may re-order loads and
+ // stores; __sync_synchronize() tells them both not to.
+ // On RISC-V, this turns into a fence instruction.
+ __sync_synchronize();
+
+ // Release the lock, equivalent to lk->locked = 0.
+ // This code can't use a C assignment, since it might
+ // not be atomic. A real OS would use C atomics here.
+ // On RISC-V, use an amoswap instruction.
+ //asm volatile("movl $0, %0" : "+m" (lk->locked) : );
+ __sync_lock_release(&lk->locked);
+
+ pop_off();
+}
+
+// Check whether this cpu is holding the lock.
+int
+holding(struct spinlock *lk)
+{
+ int r;
+ push_off();
+ r = (lk->locked && lk->cpu == mycpu());
+ pop_off();
+ return r;
+}
+
+// push_off/pop_off are like intr_off()/intr_on() except that they are matched:
+// it takes two pop_off to undo two push_off. Also, if interrupts
+// are initially off, then push_off, pop_off leaves them off.
+
+void
+push_off(void)
+{
+ struct cpu *c = mycpu();
+ int old = intr_get();
+
+ intr_off();
+ if(c->noff == 0)
+ c->intena = old;
+ c->noff += 1;
+}
+
+void
+pop_off(void)
+{
+ struct cpu *c = mycpu();
+ if(intr_get())
+ panic("pop_off - interruptible");
+ c->noff -= 1;
+ if(c->noff < 0)
+ panic("pop_off");
+ if(c->noff == 0 && c->intena)
+ intr_on();
+}
diff --git a/kernel/spinlock.h b/kernel/spinlock.h
new file mode 100644
index 0000000..4392820
--- /dev/null
+++ b/kernel/spinlock.h
@@ -0,0 +1,9 @@
+// Mutual exclusion lock.
+struct spinlock {
+ uint locked; // Is the lock held?
+
+ // For debugging:
+ char *name; // Name of lock.
+ struct cpu *cpu; // The cpu holding the lock.
+};
+
diff --git a/kernel/start.c b/kernel/start.c
new file mode 100644
index 0000000..c4689dc
--- /dev/null
+++ b/kernel/start.c
@@ -0,0 +1,56 @@
+#include "types.h"
+#include "param.h"
+#include "memlayout.h"
+#include "riscv.h"
+#include "defs.h"
+
+void main();
+
+// entry.S needs one stack per CPU.
+__attribute__ ((aligned (16))) char stack0[4096 * NCPU];
+
+// scratch area for timer interrupt, one per CPU.
+uint64 mscratch0[NCPU * 32];
+
+// assembly code in kernelvec for machine-mode timer interrupt.
+extern void machinevec();
+
+// entry.S jumps here in machine mode on stack0.
+void
+mstart()
+{
+ // set M Previous Privilege mode to Supervisor, for mret.
+ unsigned long x = r_mstatus();
+ x &= ~MSTATUS_MPP_MASK;
+ x |= MSTATUS_MPP_S;
+ w_mstatus(x);
+
+ // set M Exception Program Counter to main, for mret.
+ // requires gcc -mcmodel=medany
+ w_mepc((uint64)main);
+
+ // disable paging for now.
+ w_satp(0);
+
+ // delegate all interrupts and exceptions to supervisor mode.
+ w_medeleg(0xffff);
+ w_mideleg(0xffff);
+
+ // set up to receive timer interrupts in machine mode,
+ // for pre-emptive switching and (on hart 0) to drive time.
+ int id = r_mhartid();
+ uint64 *scratch = &mscratch0[32 * id];
+ *(uint64*)CLINT_MTIMECMP(id) = *(uint64*)CLINT_MTIME + 10000;
+ scratch[4] = CLINT_MTIMECMP(id);
+ scratch[5] = 10000000;
+ w_mscratch((uint64)scratch);
+ w_mtvec((uint64)machinevec);
+ w_mstatus(r_mstatus() | MSTATUS_MIE);
+ w_mie(r_mie() | MIE_MTIE);
+
+ // keep each CPU's hartid in its tp register, for cpuid().
+ w_tp(id);
+
+ // call main() in supervisor mode.
+ asm volatile("mret");
+}
diff --git a/kernel/stat.h b/kernel/stat.h
new file mode 100644
index 0000000..8a80933
--- /dev/null
+++ b/kernel/stat.h
@@ -0,0 +1,11 @@
+#define T_DIR 1 // Directory
+#define T_FILE 2 // File
+#define T_DEV 3 // Device
+
+struct stat {
+ short type; // Type of file
+ int dev; // File system's disk device
+ uint ino; // Inode number
+ short nlink; // Number of links to file
+ uint size; // Size of file in bytes
+};
diff --git a/kernel/string.c b/kernel/string.c
new file mode 100644
index 0000000..d99e612
--- /dev/null
+++ b/kernel/string.c
@@ -0,0 +1,104 @@
+#include "types.h"
+
+void*
+memset(void *dst, int c, uint n)
+{
+ char *cdst = (char *) dst;
+ int i;
+ for(i = 0; i < n; i++){
+ cdst[i] = c;
+ }
+ return dst;
+}
+
+int
+memcmp(const void *v1, const void *v2, uint n)
+{
+ const uchar *s1, *s2;
+
+ s1 = v1;
+ s2 = v2;
+ while(n-- > 0){
+ if(*s1 != *s2)
+ return *s1 - *s2;
+ s1++, s2++;
+ }
+
+ return 0;
+}
+
+void*
+memmove(void *dst, const void *src, uint n)
+{
+ const char *s;
+ char *d;
+
+ s = src;
+ d = dst;
+ if(s < d && s + n > d){
+ s += n;
+ d += n;
+ while(n-- > 0)
+ *--d = *--s;
+ } else
+ while(n-- > 0)
+ *d++ = *s++;
+
+ return dst;
+}
+
+// memcpy exists to placate GCC. Use memmove.
+void*
+memcpy(void *dst, const void *src, uint n)
+{
+ return memmove(dst, src, n);
+}
+
+int
+strncmp(const char *p, const char *q, uint n)
+{
+ while(n > 0 && *p && *p == *q)
+ n--, p++, q++;
+ if(n == 0)
+ return 0;
+ return (uchar)*p - (uchar)*q;
+}
+
+char*
+strncpy(char *s, const char *t, int n)
+{
+ char *os;
+
+ os = s;
+ while(n-- > 0 && (*s++ = *t++) != 0)
+ ;
+ while(n-- > 0)
+ *s++ = 0;
+ return os;
+}
+
+// Like strncpy but guaranteed to NUL-terminate.
+char*
+safestrcpy(char *s, const char *t, int n)
+{
+ char *os;
+
+ os = s;
+ if(n <= 0)
+ return os;
+ while(--n > 0 && (*s++ = *t++) != 0)
+ ;
+ *s = 0;
+ return os;
+}
+
+int
+strlen(const char *s)
+{
+ int n;
+
+ for(n = 0; s[n]; n++)
+ ;
+ return n;
+}
+
diff --git a/kernel/swtch.S b/kernel/swtch.S
new file mode 100644
index 0000000..17a8663
--- /dev/null
+++ b/kernel/swtch.S
@@ -0,0 +1,42 @@
+# Context switch
+#
+# void swtch(struct context *old, struct context *new);
+#
+# Save current registers in old. Load from new.
+
+
+.globl swtch
+swtch:
+ sd ra, 0(a0)
+ sd sp, 8(a0)
+ sd s0, 16(a0)
+ sd s1, 24(a0)
+ sd s2, 32(a0)
+ sd s3, 40(a0)
+ sd s4, 48(a0)
+ sd s5, 56(a0)
+ sd s6, 64(a0)
+ sd s7, 72(a0)
+ sd s8, 80(a0)
+ sd s9, 88(a0)
+ sd s10, 96(a0)
+ sd s11, 104(a0)
+
+ ld ra, 0(a1)
+ ld sp, 8(a1)
+ ld s0, 16(a1)
+ ld s1, 24(a1)
+ ld s2, 32(a1)
+ ld s3, 40(a1)
+ ld s4, 48(a1)
+ ld s5, 56(a1)
+ ld s6, 64(a1)
+ ld s7, 72(a1)
+ ld s8, 80(a1)
+ ld s9, 88(a1)
+ ld s10, 96(a1)
+ ld s11, 104(a1)
+
+ ret
+
+
diff --git a/kernel/syscall.c b/kernel/syscall.c
new file mode 100644
index 0000000..ca34f2c
--- /dev/null
+++ b/kernel/syscall.c
@@ -0,0 +1,191 @@
+#include "types.h"
+#include "param.h"
+#include "memlayout.h"
+#include "riscv.h"
+#include "proc.h"
+#include "syscall.h"
+#include "defs.h"
+
+// User code makes a system call with INT T_SYSCALL.
+// System call number in %eax.
+// Arguments on the stack, from the user call to the C
+// library system call function. The saved user %esp points
+// to a saved program counter, and then the first argument.
+
+// Fetch the int at addr from the current process.
+int
+fetchint(uint64 addr, int *ip)
+{
+ struct proc *p = myproc();
+
+ if(addr >= p->sz || addr+4 > p->sz)
+ return -1;
+ if(copyin(p->pagetable, (char *)ip, addr, sizeof(*ip)) != 0)
+ return -1;
+ return 0;
+}
+
+// Fetch the uint64 at addr from the current process.
+int
+fetchaddr(uint64 addr, uint64 *ip)
+{
+ struct proc *p = myproc();
+ if(addr >= p->sz || addr+sizeof(uint64) > p->sz)
+ return -1;
+ if(copyin(p->pagetable, (char *)ip, addr, sizeof(*ip)) != 0)
+ return -1;
+ return 0;
+}
+
+// Fetch the nul-terminated string at addr from the current process.
+// Doesn't actually copy the string - just sets *pp to point at it.
+// Returns length of string, not including nul, or -1 for error.
+int
+fetchstr(uint64 addr, char *buf, int max)
+{
+ struct proc *p = myproc();
+ int err = copyinstr(p->pagetable, buf, addr, max);
+ if(err < 0)
+ return err;
+ return strlen(buf);
+}
+
+static uint64
+fetcharg(int n)
+{
+ struct proc *p = myproc();
+ switch (n) {
+ case 0:
+ return p->tf->a0;
+ case 1:
+ return p->tf->a1;
+ case 2:
+ return p->tf->a2;
+ case 3:
+ return p->tf->a3;
+ case 4:
+ return p->tf->a4;
+ case 5:
+ return p->tf->a5;
+ }
+ panic("fetcharg");
+ return -1;
+}
+
+// Fetch the nth 32-bit system call argument.
+int
+argint(int n, int *ip)
+{
+ *ip = fetcharg(n);
+ return 0;
+}
+
+int
+argaddr(int n, uint64 *ip)
+{
+ *ip = fetcharg(n);
+ return 0;
+}
+
+// Fetch the nth word-sized system call argument as a pointer
+// to a block of memory of size bytes. Check that the pointer
+// lies within the process address space.
+int
+argptr(int n, uint64 *pp, int size)
+{
+ uint64 i;
+ struct proc *p = myproc();
+
+ if(argaddr(n, &i) < 0)
+ return -1;
+ if(size < 0 || (uint)i >= p->sz || (uint)i+size > p->sz)
+ return -1;
+ *pp = i;
+ return 0;
+}
+
+// Fetch the nth word-sized system call argument as a null-terminated string.
+// Copies into buf, at most max.
+// Returns string length if OK (including nul), -1 if error.
+int
+argstr(int n, char *buf, int max)
+{
+ uint64 addr;
+ if(argaddr(n, &addr) < 0)
+ return -1;
+ return fetchstr(addr, buf, max);
+}
+
+extern int sys_chdir(void);
+extern int sys_close(void);
+extern int sys_dup(void);
+extern int sys_exec(void);
+extern int sys_exit(void);
+extern int sys_fork(void);
+extern int sys_fstat(void);
+extern int sys_getpid(void);
+extern int sys_kill(void);
+extern int sys_link(void);
+extern int sys_mkdir(void);
+extern int sys_mknod(void);
+extern int sys_open(void);
+extern int sys_pipe(void);
+extern int sys_read(void);
+extern int sys_sbrk(void);
+extern int sys_sleep(void);
+extern int sys_unlink(void);
+extern int sys_wait(void);
+extern int sys_write(void);
+extern int sys_uptime(void);
+
+static int (*syscalls[])(void) = {
+[SYS_fork] sys_fork,
+[SYS_exit] sys_exit,
+[SYS_wait] sys_wait,
+[SYS_pipe] sys_pipe,
+[SYS_read] sys_read,
+[SYS_kill] sys_kill,
+[SYS_exec] sys_exec,
+[SYS_fstat] sys_fstat,
+[SYS_chdir] sys_chdir,
+[SYS_dup] sys_dup,
+[SYS_getpid] sys_getpid,
+[SYS_sbrk] sys_sbrk,
+[SYS_sleep] sys_sleep,
+[SYS_uptime] sys_uptime,
+[SYS_open] sys_open,
+[SYS_write] sys_write,
+[SYS_mknod] sys_mknod,
+[SYS_unlink] sys_unlink,
+[SYS_link] sys_link,
+[SYS_mkdir] sys_mkdir,
+[SYS_close] sys_close,
+};
+
+static void
+dosyscall(void)
+{
+ int num;
+ struct proc *p = myproc();
+
+ num = p->tf->a7;
+ if(num > 0 && num < NELEM(syscalls) && syscalls[num]) {
+ p->tf->a0 = syscalls[num]();
+ } else {
+ printf("%d %s: unknown sys call %d\n",
+ p->pid, p->name, num);
+ p->tf->a0 = -1;
+ }
+}
+
+void
+syscall()
+{
+ if(myproc()->killed)
+ exit();
+ dosyscall();
+ if(myproc()->killed)
+ exit();
+ return;
+}
+
diff --git a/kernel/syscall.h b/kernel/syscall.h
new file mode 100644
index 0000000..bc5f356
--- /dev/null
+++ b/kernel/syscall.h
@@ -0,0 +1,22 @@
+// System call numbers
+#define SYS_fork 1
+#define SYS_exit 2
+#define SYS_wait 3
+#define SYS_pipe 4
+#define SYS_read 5
+#define SYS_kill 6
+#define SYS_exec 7
+#define SYS_fstat 8
+#define SYS_chdir 9
+#define SYS_dup 10
+#define SYS_getpid 11
+#define SYS_sbrk 12
+#define SYS_sleep 13
+#define SYS_uptime 14
+#define SYS_open 15
+#define SYS_write 16
+#define SYS_mknod 17
+#define SYS_unlink 18
+#define SYS_link 19
+#define SYS_mkdir 20
+#define SYS_close 21
diff --git a/kernel/sysfile.c b/kernel/sysfile.c
new file mode 100644
index 0000000..83bb1ed
--- /dev/null
+++ b/kernel/sysfile.c
@@ -0,0 +1,465 @@
+//
+// File-system system calls.
+// Mostly argument checking, since we don't trust
+// user code, and calls into file.c and fs.c.
+//
+
+#include "types.h"
+#include "riscv.h"
+#include "defs.h"
+#include "param.h"
+#include "stat.h"
+#include "proc.h"
+#include "fs.h"
+#include "spinlock.h"
+#include "sleeplock.h"
+#include "file.h"
+#include "fcntl.h"
+
+// Fetch the nth word-sized system call argument as a file descriptor
+// and return both the descriptor and the corresponding struct file.
+static int
+argfd(int n, int *pfd, struct file **pf)
+{
+ int fd;
+ struct file *f;
+
+ if(argint(n, &fd) < 0)
+ return -1;
+ if(fd < 0 || fd >= NOFILE || (f=myproc()->ofile[fd]) == 0)
+ return -1;
+ if(pfd)
+ *pfd = fd;
+ if(pf)
+ *pf = f;
+ return 0;
+}
+
+// Allocate a file descriptor for the given file.
+// Takes over file reference from caller on success.
+static int
+fdalloc(struct file *f)
+{
+ int fd;
+ struct proc *p = myproc();
+
+ for(fd = 0; fd < NOFILE; fd++){
+ if(p->ofile[fd] == 0){
+ p->ofile[fd] = f;
+ return fd;
+ }
+ }
+ return -1;
+}
+
+int
+sys_dup(void)
+{
+ struct file *f;
+ int fd;
+
+ if(argfd(0, 0, &f) < 0)
+ return -1;
+ if((fd=fdalloc(f)) < 0)
+ return -1;
+ filedup(f);
+ return fd;
+}
+
+int
+sys_read(void)
+{
+ struct file *f;
+ int n;
+ uint64 p;
+
+ if(argfd(0, 0, &f) < 0 || argint(2, &n) < 0 || argptr(1, &p, n) < 0)
+ return -1;
+ return fileread(f, p, n);
+}
+
+int
+sys_write(void)
+{
+ struct file *f;
+ int n;
+ uint64 p;
+
+ if(argfd(0, 0, &f) < 0 || argint(2, &n) < 0 || argptr(1, &p, n) < 0)
+ return -1;
+
+ return filewrite(f, p, n);
+}
+
+int
+sys_close(void)
+{
+ int fd;
+ struct file *f;
+
+ if(argfd(0, &fd, &f) < 0)
+ return -1;
+ myproc()->ofile[fd] = 0;
+ fileclose(f);
+ return 0;
+}
+
+int
+sys_fstat(void)
+{
+ struct file *f;
+ uint64 st; // user pointer to struct stat
+
+ if(argfd(0, 0, &f) < 0 || argptr(1, &st, sizeof(struct stat)) < 0)
+ return -1;
+ return filestat(f, st);
+}
+
+// Create the path new as a link to the same inode as old.
+int
+sys_link(void)
+{
+ char name[DIRSIZ], new[MAXPATH], old[MAXPATH];
+ struct inode *dp, *ip;
+
+ if(argstr(0, old, MAXPATH) < 0 || argstr(1, new, MAXPATH) < 0)
+ return -1;
+
+ begin_op();
+ if((ip = namei(old)) == 0){
+ end_op();
+ return -1;
+ }
+
+ ilock(ip);
+ if(ip->type == T_DIR){
+ iunlockput(ip);
+ end_op();
+ return -1;
+ }
+
+ ip->nlink++;
+ iupdate(ip);
+ iunlock(ip);
+
+ if((dp = nameiparent(new, name)) == 0)
+ goto bad;
+ ilock(dp);
+ if(dp->dev != ip->dev || dirlink(dp, name, ip->inum) < 0){
+ iunlockput(dp);
+ goto bad;
+ }
+ iunlockput(dp);
+ iput(ip);
+
+ end_op();
+
+ return 0;
+
+bad:
+ ilock(ip);
+ ip->nlink--;
+ iupdate(ip);
+ iunlockput(ip);
+ end_op();
+ return -1;
+}
+
+// Is the directory dp empty except for "." and ".." ?
+static int
+isdirempty(struct inode *dp)
+{
+ int off;
+ struct dirent de;
+
+ for(off=2*sizeof(de); off<dp->size; off+=sizeof(de)){
+ if(readi(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de))
+ panic("isdirempty: readi");
+ if(de.inum != 0)
+ return 0;
+ }
+ return 1;
+}
+
+//PAGEBREAK!
+int
+sys_unlink(void)
+{
+ struct inode *ip, *dp;
+ struct dirent de;
+ char name[DIRSIZ], path[MAXPATH];
+ uint off;
+
+ if(argstr(0, path, MAXPATH) < 0)
+ return -1;
+
+ begin_op();
+ if((dp = nameiparent(path, name)) == 0){
+ end_op();
+ return -1;
+ }
+
+ ilock(dp);
+
+ // Cannot unlink "." or "..".
+ if(namecmp(name, ".") == 0 || namecmp(name, "..") == 0)
+ goto bad;
+
+ if((ip = dirlookup(dp, name, &off)) == 0)
+ goto bad;
+ ilock(ip);
+
+ if(ip->nlink < 1)
+ panic("unlink: nlink < 1");
+ if(ip->type == T_DIR && !isdirempty(ip)){
+ iunlockput(ip);
+ goto bad;
+ }
+
+ memset(&de, 0, sizeof(de));
+ if(writei(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de))
+ panic("unlink: writei");
+ if(ip->type == T_DIR){
+ dp->nlink--;
+ iupdate(dp);
+ }
+ iunlockput(dp);
+
+ ip->nlink--;
+ iupdate(ip);
+ iunlockput(ip);
+
+ end_op();
+
+ return 0;
+
+bad:
+ iunlockput(dp);
+ end_op();
+ return -1;
+}
+
+static struct inode*
+create(char *path, short type, short major, short minor)
+{
+ uint off;
+ struct inode *ip, *dp;
+ char name[DIRSIZ];
+
+ if((dp = nameiparent(path, name)) == 0)
+ return 0;
+ ilock(dp);
+
+ if((ip = dirlookup(dp, name, &off)) != 0){
+ iunlockput(dp);
+ ilock(ip);
+ if(type == T_FILE && ip->type == T_FILE)
+ return ip;
+ iunlockput(ip);
+ return 0;
+ }
+
+ if((ip = ialloc(dp->dev, type)) == 0)
+ panic("create: ialloc");
+
+ ilock(ip);
+ ip->major = major;
+ ip->minor = minor;
+ ip->nlink = 1;
+ iupdate(ip);
+
+ if(type == T_DIR){ // Create . and .. entries.
+ dp->nlink++; // for ".."
+ iupdate(dp);
+ // No ip->nlink++ for ".": avoid cyclic ref count.
+ if(dirlink(ip, ".", ip->inum) < 0 || dirlink(ip, "..", dp->inum) < 0)
+ panic("create dots");
+ }
+
+ if(dirlink(dp, name, ip->inum) < 0)
+ panic("create: dirlink");
+
+ iunlockput(dp);
+
+ return ip;
+}
+
+int
+sys_open(void)
+{
+ char path[MAXPATH];
+ int fd, omode;
+ struct file *f;
+ struct inode *ip;
+
+ if(argstr(0, path, MAXPATH) < 0 || argint(1, &omode) < 0)
+ return -1;
+
+ begin_op();
+
+ if(omode & O_CREATE){
+ ip = create(path, T_FILE, 0, 0);
+ if(ip == 0){
+ end_op();
+ return -1;
+ }
+ } else {
+ if((ip = namei(path)) == 0){
+ end_op();
+ return -1;
+ }
+ ilock(ip);
+ if(ip->type == T_DIR && omode != O_RDONLY){
+ iunlockput(ip);
+ end_op();
+ return -1;
+ }
+ }
+
+ if((f = filealloc()) == 0 || (fd = fdalloc(f)) < 0){
+ if(f)
+ fileclose(f);
+ iunlockput(ip);
+ end_op();
+ return -1;
+ }
+ iunlock(ip);
+ end_op();
+
+ f->type = FD_INODE;
+ f->ip = ip;
+ f->off = 0;
+ f->readable = !(omode & O_WRONLY);
+ f->writable = (omode & O_WRONLY) || (omode & O_RDWR);
+ return fd;
+}
+
+int
+sys_mkdir(void)
+{
+ char path[MAXPATH];
+ struct inode *ip;
+
+ begin_op();
+ if(argstr(0, path, MAXPATH) < 0 || (ip = create(path, T_DIR, 0, 0)) == 0){
+ end_op();
+ return -1;
+ }
+ iunlockput(ip);
+ end_op();
+ return 0;
+}
+
+int
+sys_mknod(void)
+{
+ struct inode *ip;
+ char path[MAXPATH];
+ int major, minor;
+
+ begin_op();
+ if((argstr(0, path, MAXPATH)) < 0 ||
+ argint(1, &major) < 0 ||
+ argint(2, &minor) < 0 ||
+ (ip = create(path, T_DEV, major, minor)) == 0){
+ end_op();
+ return -1;
+ }
+ iunlockput(ip);
+ end_op();
+ return 0;
+}
+
+int
+sys_chdir(void)
+{
+ char path[MAXPATH];
+ struct inode *ip;
+ struct proc *p = myproc();
+
+ begin_op();
+ if(argstr(0, path, MAXPATH) < 0 || (ip = namei(path)) == 0){
+ end_op();
+ return -1;
+ }
+ ilock(ip);
+ if(ip->type != T_DIR){
+ iunlockput(ip);
+ end_op();
+ return -1;
+ }
+ iunlock(ip);
+ iput(p->cwd);
+ end_op();
+ p->cwd = ip;
+ return 0;
+}
+
+int
+sys_exec(void)
+{
+ char path[MAXPATH], *argv[MAXARG];
+ int i;
+ uint64 uargv, uarg;
+
+ if(argstr(0, path, MAXPATH) < 0 || argaddr(1, &uargv) < 0){
+ return -1;
+ }
+ memset(argv, 0, sizeof(argv));
+ for(i=0;; i++){
+ if(i >= NELEM(argv)){
+ return -1;
+ }
+ if(fetchaddr(uargv+sizeof(uint64)*i, (uint64*)&uarg) < 0){
+ return -1;
+ }
+ if(uarg == 0){
+ argv[i] = 0;
+ break;
+ }
+ argv[i] = kalloc();
+ if(argv[i] == 0)
+ panic("sys_exec kalloc");
+ if(fetchstr(uarg, argv[i], PGSIZE) < 0){
+ return -1;
+ }
+ }
+
+ int ret = exec(path, argv);
+
+ for(i = 0; i < NELEM(argv) && argv[i] != 0; i++)
+ kfree(argv[i]);
+
+ return ret;
+}
+
+int
+sys_pipe(void)
+{
+ uint64 fdarray; // user pointer to array of two integers
+ struct file *rf, *wf;
+ int fd0, fd1;
+ struct proc *p = myproc();
+
+ if(argptr(0, &fdarray, 2*sizeof(int)) < 0)
+ return -1;
+ if(pipealloc(&rf, &wf) < 0)
+ return -1;
+ fd0 = -1;
+ if((fd0 = fdalloc(rf)) < 0 || (fd1 = fdalloc(wf)) < 0){
+ if(fd0 >= 0)
+ p->ofile[fd0] = 0;
+ fileclose(rf);
+ fileclose(wf);
+ return -1;
+ }
+ if(copyout(p->pagetable, fdarray, (char*)&fd0, sizeof(fd0)) < 0 ||
+ copyout(p->pagetable, fdarray+sizeof(fd0), (char *)&fd1, sizeof(fd1)) < 0){
+ p->ofile[fd0] = 0;
+ p->ofile[fd1] = 0;
+ fileclose(rf);
+ fileclose(wf);
+ return -1;
+ }
+ return 0;
+}
diff --git a/kernel/sysproc.c b/kernel/sysproc.c
new file mode 100644
index 0000000..e57e045
--- /dev/null
+++ b/kernel/sysproc.c
@@ -0,0 +1,90 @@
+#include "types.h"
+#include "riscv.h"
+#include "defs.h"
+#include "date.h"
+#include "param.h"
+#include "memlayout.h"
+#include "proc.h"
+
+int
+sys_exit(void)
+{
+ exit();
+ return 0; // not reached
+}
+
+int
+sys_getpid(void)
+{
+ return myproc()->pid;
+}
+
+int
+sys_fork(void)
+{
+ return fork();
+}
+
+int
+sys_wait(void)
+{
+ return wait();
+}
+
+int
+sys_sbrk(void)
+{
+ int addr;
+ int n;
+
+ if(argint(0, &n) < 0)
+ return -1;
+ addr = myproc()->sz;
+ if(growproc(n) < 0)
+ return -1;
+ return addr;
+}
+
+int
+sys_sleep(void)
+{
+ int n;
+ uint ticks0;
+
+ if(argint(0, &n) < 0)
+ return -1;
+ acquire(&tickslock);
+ ticks0 = ticks;
+ while(ticks - ticks0 < n){
+ if(myproc()->killed){
+ release(&tickslock);
+ return -1;
+ }
+ sleep(&ticks, &tickslock);
+ }
+ release(&tickslock);
+ return 0;
+}
+
+int
+sys_kill(void)
+{
+ int pid;
+
+ if(argint(0, &pid) < 0)
+ return -1;
+ return kill(pid);
+}
+
+// return how many clock tick interrupts have occurred
+// since start.
+int
+sys_uptime(void)
+{
+ uint xticks;
+
+ acquire(&tickslock);
+ xticks = ticks;
+ release(&tickslock);
+ return xticks;
+}
diff --git a/kernel/trampoline.S b/kernel/trampoline.S
new file mode 100644
index 0000000..dd4eb02
--- /dev/null
+++ b/kernel/trampoline.S
@@ -0,0 +1,137 @@
+ #
+ # code to switch between user and kernel space.
+ #
+ # this code is mapped at the same virtual address
+ # in user and kernel space so that it can switch
+ # page tables.
+ #
+ # kernel.ld causes trampout to be aligned
+ # to a page boundary.
+ #
+.globl usertrap
+ .section trampoline
+.globl trampout
+trampout:
+ # switch from kernel to user.
+ # usertrapret() calls here.
+ # a0: p->tf in user page table
+ # a1: new value for satp, for user page table
+
+ # switch to user page table
+ csrw satp, a1
+
+ # put the saved user a0 in sscratch, so we
+ # can swap it with our a0 (p->tf) in the last step.
+ ld t0, 112(a0)
+ csrw sscratch, t0
+
+ # restore all but a0 from p->tf
+ ld ra, 40(a0)
+ ld sp, 48(a0)
+ ld gp, 56(a0)
+ ld tp, 64(a0)
+ ld t0, 72(a0)
+ ld t1, 80(a0)
+ ld t2, 88(a0)
+ ld s0, 96(a0)
+ ld s1, 104(a0)
+ ld a1, 120(a0)
+ ld a2, 128(a0)
+ ld a3, 136(a0)
+ ld a4, 144(a0)
+ ld a5, 152(a0)
+ ld a6, 160(a0)
+ ld a7, 168(a0)
+ ld s2, 176(a0)
+ ld s3, 184(a0)
+ ld s4, 192(a0)
+ ld s5, 200(a0)
+ ld s6, 208(a0)
+ ld s7, 216(a0)
+ ld s8, 224(a0)
+ ld s9, 232(a0)
+ ld s10, 240(a0)
+ ld s11, 248(a0)
+ ld t3, 256(a0)
+ ld t4, 264(a0)
+ ld t5, 272(a0)
+ ld t6, 280(a0)
+
+ # restore user a0, and save p->tf
+ csrrw a0, sscratch, a0
+
+ # return to user mode and user pc.
+ # caller has set up sstatus and sepc.
+ sret
+
+.align 4
+.globl trampin
+trampin:
+ #
+ # trap.c set stvec to point here, so
+ # user interrupts and exceptions start here,
+ # in supervisor mode, but with a
+ # user page table.
+ #
+ # sscratch points to where the process's p->tf is
+ # mapped into user space (TRAMPOLINE - 4096).
+ #
+
+ # swap a0 and sscratch
+ # so that a0 is p->tf
+ csrrw a0, sscratch, a0
+
+ # save the user registers in p->tf
+ sd ra, 40(a0)
+ sd sp, 48(a0)
+ sd gp, 56(a0)
+ sd tp, 64(a0)
+ sd t0, 72(a0)
+ sd t1, 80(a0)
+ sd t2, 88(a0)
+ sd s0, 96(a0)
+ sd s1, 104(a0)
+ sd a1, 120(a0)
+ sd a2, 128(a0)
+ sd a3, 136(a0)
+ sd a4, 144(a0)
+ sd a5, 152(a0)
+ sd a6, 160(a0)
+ sd a7, 168(a0)
+ sd s2, 176(a0)
+ sd s3, 184(a0)
+ sd s4, 192(a0)
+ sd s5, 200(a0)
+ sd s6, 208(a0)
+ sd s7, 216(a0)
+ sd s8, 224(a0)
+ sd s9, 232(a0)
+ sd s10, 240(a0)
+ sd s11, 248(a0)
+ sd t3, 256(a0)
+ sd t4, 264(a0)
+ sd t5, 272(a0)
+ sd t6, 280(a0)
+
+ # save the user a0 in p->tf->a0
+ csrr t0, sscratch
+ sd t0, 112(a0)
+
+ # restore kernel stack pointer from p->tf->kernel_sp
+ ld sp, 8(a0)
+
+ # make tp hold the current hartid, from p->tf->hartid
+ ld tp, 32(a0)
+
+ # remember the address of usertrap(), p->tf->kernel_trap
+ ld t0, 16(a0)
+
+ # restore kernel page table from p->tf->kernel_satp
+ ld t1, 0(a0)
+ csrw satp, t1
+
+ # a0 is no longer valid, since the kernel page
+ # table does not specially map p->td.
+
+ # jump to usertrap(), which does not return
+ jr t0
diff --git a/kernel/trap.c b/kernel/trap.c
new file mode 100644
index 0000000..050a94d
--- /dev/null
+++ b/kernel/trap.c
@@ -0,0 +1,184 @@
+#include "types.h"
+#include "param.h"
+#include "memlayout.h"
+#include "riscv.h"
+#include "proc.h"
+#include "spinlock.h"
+#include "defs.h"
+
+struct spinlock tickslock;
+uint ticks;
+
+extern char trampout[], trampin[];
+
+// in kernelvec.S, calls kerneltrap().
+void kernelvec();
+
+extern int devintr();
+
+void
+trapinit(void)
+{
+ initlock(&tickslock, "time");
+}
+
+// set up to take exceptions and traps while in the kernel.
+void
+trapinithart(void)
+{
+ w_stvec((uint64)kernelvec);
+}
+
+//
+// handle an interrupt, exception, or system call from user space.
+// called from trampoline.S
+//
+void
+usertrap(void)
+{
+ int which_dev = 0;
+
+ if((r_sstatus() & SSTATUS_SPP) != 0)
+ panic("usertrap: not from user mode");
+
+ // send interrupts and exceptions to kerneltrap(),
+ // since we're now in the kernel.
+ w_stvec((uint64)kernelvec);
+
+ struct proc *p = myproc();
+
+ // save user program counter.
+ p->tf->epc = r_sepc();
+
+ intr_on();
+
+ if(r_scause() == 8){
+ // system call
+
+ // sepc points to the ecall instruction,
+ // but we want to return to the next instruction.
+ p->tf->epc += 4;
+
+ syscall();
+ } else if((which_dev = devintr()) != 0){
+ // ok
+ } else {
+ printf("usertrap(): unexpected scause 0x%p pid=%d\n", r_scause(), p->pid);
+ printf(" sepc=%p stval=%p\n", r_sepc(), r_stval());
+ p->killed = 1;
+ }
+
+ if(p->killed)
+ exit();
+
+ // give up the CPU if this is a timer interrupt.
+ if(which_dev == 2)
+ yield();
+
+ usertrapret();
+}
+
+//
+// return to user space
+//
+void
+usertrapret(void)
+{
+ struct proc *p = myproc();
+
+ // turn off interrupts, since we're switching
+ // now from kerneltrap() to usertrap().
+ intr_off();
+
+ // send interrupts and exceptions to trampoline.S
+ w_stvec(TRAMPOLINE + (trampin - trampout));
+
+ // set up values that trampoline.S will need when
+ // the process next re-enters the kernel.
+ p->tf->kernel_satp = r_satp();
+ p->tf->kernel_sp = (uint64)p->kstack + PGSIZE;
+ p->tf->kernel_trap = (uint64)usertrap;
+ p->tf->hartid = r_tp();
+
+ // set up the registers that trampoline.S's sret will use
+ // to get to user space.
+
+ // set S Previous Privilege mode to User.
+ unsigned long x = r_sstatus();
+ x &= ~SSTATUS_SPP; // clear SPP to 0 for user mode
+ x |= SSTATUS_SPIE; // enable interrupts in user mode
+ w_sstatus(x);
+
+ // set S Exception Program Counter to the saved user pc.
+ w_sepc(p->tf->epc);
+
+ // tell trampline.S the user page table to switch to.
+ uint64 satp = MAKE_SATP(p->pagetable);
+
+ // jump to trampoline.S at the top of memory, which
+ // switches to the user page table, restores user registers,
+ // and switches to user mode with sret.
+ ((void (*)(uint64,uint64))TRAMPOLINE)(TRAMPOLINE - PGSIZE, satp);
+}
+
+// interrupts and exceptions from kernel code go here,
+// on whatever the current kernel stack is.
+// must be 4-byte aligned to fit in stvec.
+void
+kerneltrap()
+{
+ uint64 sstatus = r_sstatus();
+ uint64 scause = r_scause();
+
+ if((sstatus & SSTATUS_SPP) == 0)
+ panic("kerneltrap: not from supervisor mode");
+ if(intr_get() != 0)
+ panic("kerneltrap: interrupts enabled");
+
+ if(devintr() == 0){
+ printf("scause 0x%p\n", scause);
+ printf("sepc=%p stval=%p\n", r_sepc(), r_stval());
+ panic("kerneltrap");
+ }
+}
+
+// check if it's an external interrupt or software interrupt,
+// and handle it.
+// returns 2 if timer interrupt,
+// 1 if other device,
+// 0 if not recognized.
+int
+devintr()
+{
+ uint64 scause = r_scause();
+
+ if((scause & 0x8000000000000000L) &&
+ (scause & 0xff) == 9){
+ // supervisor external interrupt, via PLIC.
+ int irq = plic_claim();
+
+ if(irq == UART0_IRQ){
+ uartintr();
+ }
+
+ plic_complete(irq);
+ return 1;
+ } else if(scause == 0x8000000000000001){
+ // software interrupt from a machine-mode timer interrupt.
+
+ if(cpuid() == 0){
+ acquire(&tickslock);
+ ticks++;
+ wakeup(&ticks);
+ release(&tickslock);
+ }
+
+ // acknowledge.
+ w_sip(r_sip() & ~2);
+
+ return 2;
+ } else {
+ return 0;
+ }
+}
+
diff --git a/kernel/types.h b/kernel/types.h
new file mode 100644
index 0000000..ee73164
--- /dev/null
+++ b/kernel/types.h
@@ -0,0 +1,10 @@
+typedef unsigned int uint;
+typedef unsigned short ushort;
+typedef unsigned char uchar;
+
+typedef unsigned char uint8;
+typedef unsigned short uint16;
+typedef unsigned int uint32;
+typedef unsigned long uint64;
+
+typedef uint64 pde_t;
diff --git a/kernel/uart.c b/kernel/uart.c
new file mode 100644
index 0000000..35fac1b
--- /dev/null
+++ b/kernel/uart.c
@@ -0,0 +1,75 @@
+#include "types.h"
+#include "param.h"
+#include "memlayout.h"
+#include "riscv.h"
+#include "proc.h"
+#include "spinlock.h"
+#include "defs.h"
+
+//
+// qemu -machine virt has a 16550a UART
+// qemu/hw/riscv/virt.c
+// http://byterunner.com/16550.html
+//
+// caller should lock.
+//
+
+// address of one of the registers
+#define R(reg) ((volatile unsigned char *)(UART0 + reg))
+
+void
+uartinit(void)
+{
+ // disable interrupts -- IER
+ *R(1) = 0x00;
+
+ // special mode to set baud rate
+ *R(3) = 0x80;
+
+ // LSB for baud rate of 38.4K
+ *R(0) = 0x03;
+
+ // MSB for baud rate of 38.4K
+ *R(1) = 0x00;
+
+ // leave set-baud mode,
+ // and set word length to 8 bits, no parity.
+ *R(3) = 0x03;
+
+ // reset and enable FIFOs -- FCR.
+ *R(2) = 0x07;
+
+ // enable receive interrupts -- IER.
+ *R(1) = 0x01;
+}
+
+void
+uartputc(int c)
+{
+ // wait for Transmit Holding Empty to be set in LSR.
+ while((*R(5) & (1 << 5)) == 0)
+ ;
+ *R(0) = c;
+}
+
+int
+uartgetc(void)
+{
+ if(*R(5) & 0x01){
+ // input data is ready.
+ return *R(0);
+ } else {
+ return -1;
+ }
+}
+
+void
+uartintr(void)
+{
+ while(1){
+ int c = uartgetc();
+ if(c == -1)
+ break;
+ consoleintr(c);
+ }
+}
diff --git a/kernel/vm.c b/kernel/vm.c
new file mode 100644
index 0000000..0ea6bca
--- /dev/null
+++ b/kernel/vm.c
@@ -0,0 +1,389 @@
+#include "param.h"
+#include "types.h"
+#include "memlayout.h"
+#include "elf.h"
+#include "riscv.h"
+#include "defs.h"
+#include "fs.h"
+
+/*
+ * the kernel's page table.
+ */
+pagetable_t kernel_pagetable;
+
+extern char etext[]; // kernel.ld sets this to end of kernel code.
+
+extern char trampout[]; // trampoline.S
+
+/*
+ * create a direct-map page table for the kernel and
+ * turn on paging. called early, in supervisor mode.
+ * the page allocator is already initialized.
+ */
+void
+kvminit()
+{
+ kernel_pagetable = (pagetable_t) kalloc();
+ memset(kernel_pagetable, 0, PGSIZE);
+
+ // uart registers
+ mappages(kernel_pagetable, UART0, PGSIZE,
+ UART0, PTE_R | PTE_W);
+
+ // CLINT
+ mappages(kernel_pagetable, CLINT, 0x10000,
+ CLINT, PTE_R | PTE_W);
+
+ // PLIC
+ mappages(kernel_pagetable, PLIC, 0x4000000,
+ PLIC, PTE_R | PTE_W);
+
+ // map kernel text executable and read-only.
+ mappages(kernel_pagetable, KERNBASE, (uint64)etext-KERNBASE,
+ KERNBASE, PTE_R | PTE_X);
+
+ // map kernel data and the physical RAM we'll make use of.
+ mappages(kernel_pagetable, (uint64)etext, PHYSTOP-(uint64)etext,
+ (uint64)etext, PTE_R | PTE_W);
+
+ // map the qemu -initrd fs.img ramdisk
+ mappages(kernel_pagetable, RAMDISK, FSSIZE * BSIZE,
+ RAMDISK, PTE_R | PTE_W);
+
+ // map the trampoline for trap entry/exit to
+ // the highest virtual address in the kernel.
+ mappages(kernel_pagetable, TRAMPOLINE, PGSIZE,
+ (uint64)trampout, PTE_R | PTE_X);
+}
+
+// Switch h/w page table register to the kernel's page table,
+// and enable paging.
+void
+kvminithart()
+{
+ w_satp(MAKE_SATP(kernel_pagetable));
+}
+
+// Return the address of the PTE in page table pagetable
+// that corresponds to virtual address va. If alloc!=0,
+// create any required page table pages.
+//
+// The risc-v Sv39 scheme has three levels of page table
+// pages. A page table page contains 512 64-bit PTEs.
+// A 64-bit virtual address is split into five fields:
+// 39..63 -- must be zero.
+// 30..38 -- 9 bits of level-2 index.
+// 21..39 -- 9 bits of level-1 index.
+// 12..20 -- 9 bits of level-0 index.
+// 0..12 -- 12 bits of byte offset within the page.
+static pte_t *
+walk(pagetable_t pagetable, uint64 va, int alloc)
+{
+ if(va >= MAXVA)
+ panic("walk");
+
+ for(int level = 2; level > 0; level--) {
+ pte_t *pte = &pagetable[PX(level, va)];
+ if(*pte & PTE_V) {
+ pagetable = (pagetable_t)PTE2PA(*pte);
+ } else {
+ if(!alloc || (pagetable = (pde_t*)kalloc()) == 0)
+ return 0;
+ memset(pagetable, 0, PGSIZE);
+ *pte = PA2PTE(pagetable) | PTE_V;
+ }
+ }
+ return &pagetable[PX(0, va)];
+}
+
+// Look up a virtual address, return the physical address,
+// Can only be used to look up user pages.
+// or 0 if not mapped.
+uint64
+walkaddr(pagetable_t pagetable, uint64 va)
+{
+ pte_t *pte;
+ uint64 pa;
+
+ pte = walk(pagetable, va, 0);
+ if(pte == 0)
+ return 0;
+ if((*pte & PTE_V) == 0)
+ return 0;
+ if((*pte & PTE_U) == 0)
+ return 0;
+ pa = PTE2PA(*pte);
+ return pa;
+}
+
+
+// Create PTEs for virtual addresses starting at va that refer to
+// physical addresses starting at pa. va and size might not
+// be page-aligned.
+void
+mappages(pagetable_t pagetable, uint64 va, uint64 size, uint64 pa, int perm)
+{
+ uint64 a, last;
+ pte_t *pte;
+
+ a = PGROUNDDOWN(va);
+ last = PGROUNDDOWN(va + size - 1);
+ for(;;){
+ if((pte = walk(pagetable, a, 1)) == 0)
+ panic("mappages: walk");
+ if(*pte & PTE_V)
+ panic("remap");
+ *pte = PA2PTE(pa) | perm | PTE_V;
+ if(a == last)
+ break;
+ a += PGSIZE;
+ pa += PGSIZE;
+ }
+}
+
+// Remove mappings from a page table. The mappings in
+// the given range must exist. Optionally free the
+// physical memory.
+void
+unmappages(pagetable_t pagetable, uint64 va, uint64 size, int do_free)
+{
+ uint64 a, last;
+ pte_t *pte;
+ uint64 pa;
+
+ a = PGROUNDDOWN(va);
+ last = PGROUNDDOWN(va + size - 1);
+ for(;;){
+ if((pte = walk(pagetable, a, 0)) == 0)
+ panic("unmappages: walk");
+ if((*pte & PTE_V) == 0){
+ printf("va=%p pte=%p\n", a, *pte);
+ panic("unmappages: not mapped");
+ }
+ if(PTE_FLAGS(*pte) == PTE_V)
+ panic("unmappages: not a leaf");
+ if(do_free){
+ pa = PTE2PA(*pte);
+ kfree((void*)pa);
+ }
+ *pte = 0;
+ if(a == last)
+ break;
+ a += PGSIZE;
+ pa += PGSIZE;
+ }
+}
+
+// create an empty user page table.
+pagetable_t
+uvmcreate()
+{
+ pagetable_t pagetable;
+ pagetable = (pagetable_t) kalloc();
+ if(pagetable == 0)
+ panic("uvmcreate: out of memory");
+ memset(pagetable, 0, PGSIZE);
+ return pagetable;
+}
+
+// Load the user initcode into address 0 of pagetable,
+// for the very first process.
+// sz must be less than a page.
+void
+uvminit(pagetable_t pagetable, uchar *src, uint sz)
+{
+ char *mem;
+
+ if(sz >= PGSIZE)
+ panic("inituvm: more than a page");
+ mem = kalloc();
+ memset(mem, 0, PGSIZE);
+ mappages(pagetable, 0, PGSIZE, (uint64)mem, PTE_W|PTE_R|PTE_X|PTE_U);
+ memmove(mem, src, sz);
+}
+
+// Allocate PTEs and physical memory to grow process from oldsz to
+// newsz, which need not be page aligned. Returns new size or 0 on error.
+uint64
+uvmalloc(pagetable_t pagetable, uint64 oldsz, uint64 newsz)
+{
+ char *mem;
+ uint64 a;
+
+ if(newsz < oldsz)
+ return oldsz;
+
+ oldsz = PGROUNDUP(oldsz);
+ a = oldsz;
+ for(; a < newsz; a += PGSIZE){
+ mem = kalloc();
+ if(mem == 0){
+ uvmdealloc(pagetable, a, oldsz);
+ return 0;
+ }
+ memset(mem, 0, PGSIZE);
+ mappages(pagetable, a, PGSIZE, (uint64)mem, PTE_W|PTE_X|PTE_R|PTE_U);
+ }
+ return newsz;
+}
+
+// Deallocate user pages to bring the process size from oldsz to
+// newsz. oldsz and newsz need not be page-aligned, nor does newsz
+// need to be less than oldsz. oldsz can be larger than the actual
+// process size. Returns the new process size.
+uint64
+uvmdealloc(pagetable_t pagetable, uint64 oldsz, uint64 newsz)
+{
+ if(newsz >= oldsz)
+ return oldsz;
+ unmappages(pagetable, newsz, oldsz - newsz, 1);
+ return newsz;
+}
+
+// Recursively free page table pages.
+// All leaf mappings must already have been removed.
+static void
+freewalk(pagetable_t pagetable)
+{
+ // there are 2^9 = 512 PTEs in a page table.
+ for(int i = 0; i < 512; i++){
+ pte_t pte = pagetable[i];
+ if((pte & PTE_V) && (pte & (PTE_R|PTE_W|PTE_X)) == 0){
+ // this PTE points to a lower-level page table.
+ uint64 child = PTE2PA(pte);
+ freewalk((pagetable_t)child);
+ pagetable[i] = 0;
+ } else if(pte & PTE_V){
+ panic("freewalk: leaf");
+ }
+ }
+ kfree((void*)pagetable);
+}
+
+// Free user memory pages,
+// then free page table pages.
+void
+uvmfree(pagetable_t pagetable, uint64 sz)
+{
+ unmappages(pagetable, 0, sz, 1);
+ freewalk(pagetable);
+}
+
+// Given a parent process's page table, copy
+// its memory into a child's page table.
+// Copies both the page table and the
+// physical memory.
+void
+uvmcopy(pagetable_t old, pagetable_t new, uint64 sz)
+{
+ pte_t *pte;
+ uint64 pa, i;
+ uint flags;
+ char *mem;
+
+ for(i = 0; i < sz; i += PGSIZE){
+ if((pte = walk(old, i, 0)) == 0)
+ panic("copyuvm: pte should exist");
+ if((*pte & PTE_V) == 0)
+ panic("copyuvm: page not present");
+ pa = PTE2PA(*pte);
+ flags = PTE_FLAGS(*pte);
+ if((mem = kalloc()) == 0)
+ panic("uvmcopy: kalloc failed");
+ memmove(mem, (char*)pa, PGSIZE);
+ mappages(new, i, PGSIZE, (uint64)mem, flags);
+ }
+}
+
+// Copy from kernel to user.
+// Copy len bytes from src to virtual address dstva in a given page table.
+// Return 0 on success, -1 on error.
+int
+copyout(pagetable_t pagetable, uint64 dstva, char *src, uint64 len)
+{
+ uint64 n, va0, pa0;
+
+ while(len > 0){
+ va0 = (uint)PGROUNDDOWN(dstva);
+ pa0 = walkaddr(pagetable, va0);
+ if(pa0 == 0)
+ return -1;
+ n = PGSIZE - (dstva - va0);
+ if(n > len)
+ n = len;
+ memmove((void *)(pa0 + (dstva - va0)), src, n);
+
+ len -= n;
+ src += n;
+ dstva = va0 + PGSIZE;
+ }
+ return 0;
+}
+
+// Copy from user to kernel.
+// Copy len bytes to dst from virtual address srcva in a given page table.
+// Return 0 on success, -1 on error.
+int
+copyin(pagetable_t pagetable, char *dst, uint64 srcva, uint64 len)
+{
+ uint64 n, va0, pa0;
+
+ while(len > 0){
+ va0 = (uint)PGROUNDDOWN(srcva);
+ pa0 = walkaddr(pagetable, va0);
+ if(pa0 == 0)
+ return -1;
+ n = PGSIZE - (srcva - va0);
+ if(n > len)
+ n = len;
+ memmove(dst, (void *)(pa0 + (srcva - va0)), n);
+
+ len -= n;
+ dst += n;
+ srcva = va0 + PGSIZE;
+ }
+ return 0;
+}
+
+// Copy a null-terminated string from user to kernel.
+// Copy bytes to dst from virtual address srcva in a given page table,
+// until a '\0', or max.
+// Return 0 on success, -1 on error.
+int
+copyinstr(pagetable_t pagetable, char *dst, uint64 srcva, uint64 max)
+{
+ uint64 n, va0, pa0;
+ int got_null = 0;
+
+ while(got_null == 0 && max > 0){
+ va0 = (uint)PGROUNDDOWN(srcva);
+ pa0 = walkaddr(pagetable, va0);
+ if(pa0 == 0)
+ return -1;
+ n = PGSIZE - (srcva - va0);
+ if(n > max)
+ n = max;
+
+ char *p = (char *) (pa0 + (srcva - va0));
+ while(n > 0){
+ if(*p == '\0'){
+ *dst = '\0';
+ got_null = 1;
+ break;
+ } else {
+ *dst = *p;
+ }
+ --n;
+ --max;
+ p++;
+ dst++;
+ }
+
+ srcva = va0 + PGSIZE;
+ }
+ if(got_null){
+ return 0;
+ } else {
+ return -1;
+ }
+}