diff options
author | Robert Morris <[email protected]> | 2019-06-11 09:57:14 -0400 |
---|---|---|
committer | Robert Morris <[email protected]> | 2019-06-11 09:57:14 -0400 |
commit | 5753553213df8f9de851adb68377db43faecb91f (patch) | |
tree | 3b629ff54897fca414146677532cb459a2ed11ba /kernel | |
parent | 91ba81110acd3163f7de3580b677eece0c57f5e7 (diff) | |
download | xv6-labs-5753553213df8f9de851adb68377db43faecb91f.tar.gz xv6-labs-5753553213df8f9de851adb68377db43faecb91f.tar.bz2 xv6-labs-5753553213df8f9de851adb68377db43faecb91f.zip |
separate source into kernel/ user/ mkfs/
Diffstat (limited to 'kernel')
43 files changed, 5609 insertions, 0 deletions
diff --git a/kernel/bio.c b/kernel/bio.c new file mode 100644 index 0000000..90f9af9 --- /dev/null +++ b/kernel/bio.c @@ -0,0 +1,145 @@ +// Buffer cache. +// +// The buffer cache is a linked list of buf structures holding +// cached copies of disk block contents. Caching disk blocks +// in memory reduces the number of disk reads and also provides +// a synchronization point for disk blocks used by multiple processes. +// +// Interface: +// * To get a buffer for a particular disk block, call bread. +// * After changing buffer data, call bwrite to write it to disk. +// * When done with the buffer, call brelse. +// * Do not use the buffer after calling brelse. +// * Only one process at a time can use a buffer, +// so do not keep them longer than necessary. +// +// The implementation uses two state flags internally: +// * B_VALID: the buffer data has been read from the disk. +// * B_DIRTY: the buffer data has been modified +// and needs to be written to disk. + +#include "types.h" +#include "param.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "riscv.h" +#include "defs.h" +#include "fs.h" +#include "buf.h" + +struct { + struct spinlock lock; + struct buf buf[NBUF]; + + // Linked list of all buffers, through prev/next. + // head.next is most recently used. + struct buf head; +} bcache; + +void +binit(void) +{ + struct buf *b; + + initlock(&bcache.lock, "bcache"); + +//PAGEBREAK! + // Create linked list of buffers + bcache.head.prev = &bcache.head; + bcache.head.next = &bcache.head; + for(b = bcache.buf; b < bcache.buf+NBUF; b++){ + b->next = bcache.head.next; + b->prev = &bcache.head; + initsleeplock(&b->lock, "buffer"); + bcache.head.next->prev = b; + bcache.head.next = b; + } +} + +// Look through buffer cache for block on device dev. +// If not found, allocate a buffer. +// In either case, return locked buffer. +static struct buf* +bget(uint dev, uint blockno) +{ + struct buf *b; + + acquire(&bcache.lock); + + // Is the block already cached? + for(b = bcache.head.next; b != &bcache.head; b = b->next){ + if(b->dev == dev && b->blockno == blockno){ + b->refcnt++; + release(&bcache.lock); + acquiresleep(&b->lock); + return b; + } + } + + // Not cached; recycle an unused buffer. + // Even if refcnt==0, B_DIRTY indicates a buffer is in use + // because log.c has modified it but not yet committed it. + for(b = bcache.head.prev; b != &bcache.head; b = b->prev){ + if(b->refcnt == 0 && (b->flags & B_DIRTY) == 0) { + b->dev = dev; + b->blockno = blockno; + b->flags = 0; + b->refcnt = 1; + release(&bcache.lock); + acquiresleep(&b->lock); + return b; + } + } + panic("bget: no buffers"); +} + +// Return a locked buf with the contents of the indicated block. +struct buf* +bread(uint dev, uint blockno) +{ + struct buf *b; + + b = bget(dev, blockno); + if((b->flags & B_VALID) == 0) { + ramdiskrw(b); + } + return b; +} + +// Write b's contents to disk. Must be locked. +void +bwrite(struct buf *b) +{ + if(!holdingsleep(&b->lock)) + panic("bwrite"); + b->flags |= B_DIRTY; + ramdiskrw(b); +} + +// Release a locked buffer. +// Move to the head of the MRU list. +void +brelse(struct buf *b) +{ + if(!holdingsleep(&b->lock)) + panic("brelse"); + + releasesleep(&b->lock); + + acquire(&bcache.lock); + b->refcnt--; + if (b->refcnt == 0) { + // no one is waiting for it. + b->next->prev = b->prev; + b->prev->next = b->next; + b->next = bcache.head.next; + b->prev = &bcache.head; + bcache.head.next->prev = b; + bcache.head.next = b; + } + + release(&bcache.lock); +} +//PAGEBREAK! +// Blank page. + diff --git a/kernel/buf.h b/kernel/buf.h new file mode 100644 index 0000000..3266495 --- /dev/null +++ b/kernel/buf.h @@ -0,0 +1,14 @@ +struct buf { + int flags; + uint dev; + uint blockno; + struct sleeplock lock; + uint refcnt; + struct buf *prev; // LRU cache list + struct buf *next; + struct buf *qnext; // disk queue + uchar data[BSIZE]; +}; +#define B_VALID 0x2 // buffer has been read from disk +#define B_DIRTY 0x4 // buffer needs to be written to disk + diff --git a/kernel/console.c b/kernel/console.c new file mode 100644 index 0000000..b20d4a9 --- /dev/null +++ b/kernel/console.c @@ -0,0 +1,271 @@ +// Console input and output. +// Input is from the keyboard or serial port. +// Output is written to the screen and serial port. + +#include <stdarg.h> + +#include "types.h" +#include "param.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "fs.h" +#include "file.h" +#include "memlayout.h" +#include "riscv.h" +#include "defs.h" +#include "proc.h" + +static void consputc(int); + +static volatile int panicked = 0; + +static struct { + struct spinlock lock; + int locking; +} cons; + +static char digits[] = "0123456789abcdef"; + +static void +printint(int xx, int base, int sign) +{ + char buf[16]; + int i; + uint x; + + if(sign && (sign = xx < 0)) + x = -xx; + else + x = xx; + + i = 0; + do{ + buf[i++] = digits[x % base]; + }while((x /= base) != 0); + + if(sign) + buf[i++] = '-'; + + while(--i >= 0) + consputc(buf[i]); +} + +static void +printptr(uint64 x) { + int i; + consputc('0'); + consputc('x'); + for (i = 0; i < (sizeof(uint64) * 2); i++, x <<= 4) + consputc(digits[x >> (sizeof(uint64) * 8 - 4)]); +} + + +//PAGEBREAK: 50 + +// Print to the console. only understands %d, %x, %p, %s. +void +printf(char *fmt, ...) +{ + va_list ap; + int i, c, locking; + char *s; + + locking = cons.locking; + if(locking) + acquire(&cons.lock); + + if (fmt == 0) + panic("null fmt"); + + va_start(ap, fmt); + for(i = 0; (c = fmt[i] & 0xff) != 0; i++){ + if(c != '%'){ + consputc(c); + continue; + } + c = fmt[++i] & 0xff; + if(c == 0) + break; + switch(c){ + case 'd': + printint(va_arg(ap, int), 10, 1); + break; + case 'x': + printint(va_arg(ap, int), 16, 1); + break; + case 'p': + printptr(va_arg(ap, uint64)); + break; + case 's': + if((s = va_arg(ap, char*)) == 0) + s = "(null)"; + for(; *s; s++) + consputc(*s); + break; + case '%': + consputc('%'); + break; + default: + // Print unknown % sequence to draw attention. + consputc('%'); + consputc(c); + break; + } + } + + if(locking) + release(&cons.lock); +} + +void +panic(char *s) +{ + cons.locking = 0; + printf("panic: "); + printf(s); + printf("\n"); + panicked = 1; // freeze other CPU + for(;;) + ; +} + +#define BACKSPACE 0x100 + +void +consputc(int c) +{ + if(panicked){ + for(;;) + ; + } + + if(c == BACKSPACE){ + uartputc('\b'); uartputc(' '); uartputc('\b'); + } else + uartputc(c); +} + +#define INPUT_BUF 128 +struct { + char buf[INPUT_BUF]; + uint r; // Read index + uint w; // Write index + uint e; // Edit index +} input; + +#define C(x) ((x)-'@') // Contro + +int +consoleread(struct inode *ip, int user_dst, uint64 dst, int n) +{ + uint target; + int c; + char buf[1]; + + iunlock(ip); + target = n; + acquire(&cons.lock); + while(n > 0){ + while(input.r == input.w){ + if(myproc()->killed){ + release(&cons.lock); + ilock(ip); + return -1; + } + sleep(&input.r, &cons.lock); + } + c = input.buf[input.r++ % INPUT_BUF]; + if(c == C('D')){ // EOF + if(n < target){ + // Save ^D for next time, to make sure + // caller gets a 0-byte result. + input.r--; + } + break; + } + buf[0] = c; + if(either_copyout(user_dst, dst, &buf[0], 1) == -1) + break; + dst++; + --n; + if(c == '\n') + break; + } + release(&cons.lock); + ilock(ip); + + return target - n; +} + +int +consolewrite(struct inode *ip, int user_src, uint64 src, int n) +{ + int i; + + iunlock(ip); + acquire(&cons.lock); + for(i = 0; i < n; i++){ + char c; + if(either_copyin(&c, user_src, src+i, 1) == -1) + break; + consputc(c); + } + release(&cons.lock); + ilock(ip); + + return n; +} + +void +consoleintr(int c) +{ + int doprocdump = 0; + + acquire(&cons.lock); + + switch(c){ + case C('P'): // Process list. + // procdump() locks cons.lock indirectly; invoke later + doprocdump = 1; + break; + case C('U'): // Kill line. + while(input.e != input.w && + input.buf[(input.e-1) % INPUT_BUF] != '\n'){ + input.e--; + consputc(BACKSPACE); + } + break; + case C('H'): case '\x7f': // Backspace + if(input.e != input.w){ + input.e--; + consputc(BACKSPACE); + } + break; + default: + if(c != 0 && input.e-input.r < INPUT_BUF){ + c = (c == '\r') ? '\n' : c; + input.buf[input.e++ % INPUT_BUF] = c; + consputc(c); + if(c == '\n' || c == C('D') || input.e == input.r+INPUT_BUF){ + input.w = input.e; + wakeup(&input.r); + } + } + break; + } + + release(&cons.lock); + + if(doprocdump) + procdump(); +} + +void +consoleinit(void) +{ + initlock(&cons.lock, "console"); + + devsw[CONSOLE].write = consolewrite; + devsw[CONSOLE].read = consoleread; + cons.locking = 1; +} diff --git a/kernel/date.h b/kernel/date.h new file mode 100644 index 0000000..94aec4b --- /dev/null +++ b/kernel/date.h @@ -0,0 +1,8 @@ +struct rtcdate { + uint second; + uint minute; + uint hour; + uint day; + uint month; + uint year; +}; diff --git a/kernel/defs.h b/kernel/defs.h new file mode 100644 index 0000000..597e5b6 --- /dev/null +++ b/kernel/defs.h @@ -0,0 +1,205 @@ +struct buf; +struct context; +struct file; +struct inode; +struct pipe; +struct proc; +struct rtcdate; +struct spinlock; +struct sleeplock; +struct stat; +struct superblock; +struct sysframe; + +// bio.c +void binit(void); +struct buf* bread(uint, uint); +void brelse(struct buf*); +void bwrite(struct buf*); + +// console.c +void consoleinit(void); +void printf(char*, ...); +void consoleintr(int); +void panic(char*) __attribute__((noreturn)); + +// exec.c +int exec(char*, char**); + +// file.c +struct file* filealloc(void); +void fileclose(struct file*); +struct file* filedup(struct file*); +void fileinit(void); +int fileread(struct file*, uint64, int n); +int filestat(struct file*, uint64 addr); +int filewrite(struct file*, uint64, int n); + +// fs.c +void readsb(int dev, struct superblock *sb); +int dirlink(struct inode*, char*, uint); +struct inode* dirlookup(struct inode*, char*, uint*); +struct inode* ialloc(uint, short); +struct inode* idup(struct inode*); +void iinit(int dev); +void ilock(struct inode*); +void iput(struct inode*); +void iunlock(struct inode*); +void iunlockput(struct inode*); +void iupdate(struct inode*); +int namecmp(const char*, const char*); +struct inode* namei(char*); +struct inode* nameiparent(char*, char*); +int readi(struct inode*, int, uint64, uint, uint); +void stati(struct inode*, struct stat*); +int writei(struct inode*, int, uint64, uint, uint); + +// ramdisk.c +void ramdiskinit(void); +void ramdiskintr(void); +void ramdiskrw(struct buf*); + +// ioapic.c +void ioapicenable(int irq, int cpu); +extern uchar ioapicid; +void ioapicinit(void); + +// kalloc.c +void* kalloc(void); +void kfree(void *); +void kinit(); + +// kbd.c +void kbdintr(void); + +// lapic.c +void cmostime(struct rtcdate *r); +int lapicid(void); +extern volatile uint* lapic; +void lapiceoi(void); +void lapicinit(void); +void lapicstartap(uchar, uint); +void microdelay(int); + +// log.c +void initlog(int dev); +void log_write(struct buf*); +void begin_op(); +void end_op(); + +// mp.c +extern int ismp; +void mpinit(void); + +// picirq.c +void picenable(int); +void picinit(void); + +// pipe.c +int pipealloc(struct file**, struct file**); +void pipeclose(struct pipe*, int); +int piperead(struct pipe*, uint64, int); +int pipewrite(struct pipe*, uint64, int); + +//PAGEBREAK: 16 +// proc.c +int cpuid(void); +void exit(void); +int fork(void); +int growproc(int); +pagetable_t proc_pagetable(struct proc *); +void proc_freepagetable(pagetable_t, uint64); +int kill(int); +struct cpu* mycpu(void); +struct cpu* getmycpu(void); +struct proc* myproc(); +void procinit(void); +void scheduler(void) __attribute__((noreturn)); +void sched(void); +void setproc(struct proc*); +void sleep(void*, struct spinlock*); +void userinit(void); +int wait(void); +void wakeup(void*); +void yield(void); +int either_copyout(int user_dst, uint64 dst, void *src, uint64 len); +int either_copyin(void *dst, int user_src, uint64 src, uint64 len); +void procdump(void); + +// swtch.S +void swtch(struct context*, struct context*); + +// spinlock.c +void acquire(struct spinlock*); +int holding(struct spinlock*); +void initlock(struct spinlock*, char*); +void release(struct spinlock*); +void push_off(void); +void pop_off(void); + +// sleeplock.c +void acquiresleep(struct sleeplock*); +void releasesleep(struct sleeplock*); +int holdingsleep(struct sleeplock*); +void initsleeplock(struct sleeplock*, char*); + +// string.c +int memcmp(const void*, const void*, uint); +void* memmove(void*, const void*, uint); +void* memset(void*, int, uint); +char* safestrcpy(char*, const char*, int); +int strlen(const char*); +int strncmp(const char*, const char*, uint); +char* strncpy(char*, const char*, int); + +// syscall.c +int argint(int, int*); +int argptr(int, uint64*, int); +int argstr(int, char*, int); +int argaddr(int, uint64 *); +int fetchint(uint64, int*); +int fetchstr(uint64, char*, int); +int fetchaddr(uint64, uint64*); +void syscall(); + +// timer.c +void timerinit(void); + +// trap.c +extern uint ticks; +void trapinit(void); +void trapinithart(void); +extern struct spinlock tickslock; +void usertrapret(void); + +// uart.c +void uartinit(void); +void uartintr(void); +void uartputc(int); +int uartgetc(void); + +// vm.c +void kvminit(void); +void kvminithart(void); +pagetable_t uvmcreate(void); +void uvminit(pagetable_t, uchar *, uint); +uint64 uvmalloc(pagetable_t, uint64, uint64); +uint64 uvmdealloc(pagetable_t, uint64, uint64); +void uvmcopy(pagetable_t, pagetable_t, uint64); +void uvmfree(pagetable_t, uint64); +void mappages(pagetable_t, uint64, uint64, uint64, int); +void unmappages(pagetable_t, uint64, uint64, int); +uint64 walkaddr(pagetable_t, uint64); +int copyout(pagetable_t, uint64, char *, uint64); +int copyin(pagetable_t, char *, uint64, uint64); +int copyinstr(pagetable_t pagetable, char *dst, uint64 srcva, uint64 max); + +// plic.c +void plicinit(void); +void plicinithart(void); +uint64 plic_pending(void); +int plic_claim(void); +void plic_complete(int); + +// number of elements in fixed-size array +#define NELEM(x) (sizeof(x)/sizeof((x)[0])) diff --git a/kernel/elf.h b/kernel/elf.h new file mode 100644 index 0000000..84555fa --- /dev/null +++ b/kernel/elf.h @@ -0,0 +1,42 @@ +// Format of an ELF executable file + +#define ELF_MAGIC 0x464C457FU // "\x7FELF" in little endian + +// File header +struct elfhdr { + uint magic; // must equal ELF_MAGIC + uchar elf[12]; + ushort type; + ushort machine; + uint version; + uint64 entry; + uint64 phoff; + uint64 shoff; + uint flags; + ushort ehsize; + ushort phentsize; + ushort phnum; + ushort shentsize; + ushort shnum; + ushort shstrndx; +}; + +// Program section header +struct proghdr { + uint32 type; + uint32 flags; + uint64 off; + uint64 vaddr; + uint64 paddr; + uint64 filesz; + uint64 memsz; + uint64 align; +}; + +// Values for Proghdr type +#define ELF_PROG_LOAD 1 + +// Flag bits for Proghdr flags +#define ELF_PROG_FLAG_EXEC 1 +#define ELF_PROG_FLAG_WRITE 2 +#define ELF_PROG_FLAG_READ 4 diff --git a/kernel/entry.S b/kernel/entry.S new file mode 100644 index 0000000..b3d2c55 --- /dev/null +++ b/kernel/entry.S @@ -0,0 +1,25 @@ + # qemu -kernel starts at 0x1000. the instructions + # there seem to be provided by qemu, as if it + # were a ROM. the code at 0x1000 jumps to + # 0x8000000, the _start function here, + # in machine mode. +.section .data +.globl stack0 +.section .text +.globl mstart +.section .text +.globl _entry +_entry: + # set up a stack for C. + # stack0 is declared in start, + # with 4096 bytes per CPU. + la sp, stack0 + li a0, 1024*4 + csrr a1, mhartid + addi a1, a1, 1 + mul a0, a0, a1 + add sp, sp, a0 + # jump to mstart() in start.c + call mstart +junk: + j junk diff --git a/kernel/exec.c b/kernel/exec.c new file mode 100644 index 0000000..c9af395 --- /dev/null +++ b/kernel/exec.c @@ -0,0 +1,149 @@ +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "proc.h" +#include "defs.h" +#include "elf.h" + +static int loadseg(pde_t *pgdir, uint64 addr, struct inode *ip, uint offset, uint sz); + +int +exec(char *path, char **argv) +{ + char *s, *last; + int i, off; + uint64 argc, sz, sp, ustack[MAXARG+1], stackbase; + struct elfhdr elf; + struct inode *ip; + struct proghdr ph; + pagetable_t pagetable = 0, oldpagetable; + struct proc *p = myproc(); + uint64 oldsz = p->sz; + + begin_op(); + + if((ip = namei(path)) == 0){ + end_op(); + return -1; + } + ilock(ip); + + // Check ELF header + if(readi(ip, 0, (uint64)&elf, 0, sizeof(elf)) != sizeof(elf)) + goto bad; + if(elf.magic != ELF_MAGIC) + goto bad; + + if((pagetable = proc_pagetable(p)) == 0) + goto bad; + + // Load program into memory. + sz = 0; + for(i=0, off=elf.phoff; i<elf.phnum; i++, off+=sizeof(ph)){ + if(readi(ip, 0, (uint64)&ph, off, sizeof(ph)) != sizeof(ph)) + goto bad; + if(ph.type != ELF_PROG_LOAD) + continue; + if(ph.memsz < ph.filesz) + goto bad; + if(ph.vaddr + ph.memsz < ph.vaddr) + goto bad; + if((sz = uvmalloc(pagetable, sz, ph.vaddr + ph.memsz)) == 0) + goto bad; + if(ph.vaddr % PGSIZE != 0) + goto bad; + if(loadseg(pagetable, ph.vaddr, ip, ph.off, ph.filesz) < 0) + goto bad; + } + iunlockput(ip); + end_op(); + ip = 0; + + // Allocate two pages at the next page boundary. + // Use the second as the user stack. + sz = PGROUNDUP(sz); + if((sz = uvmalloc(pagetable, sz, sz + 2*PGSIZE)) == 0) + goto bad; + sp = sz; + stackbase = sp - PGSIZE; + + // Push argument strings, prepare rest of stack in ustack. + for(argc = 0; argv[argc]; argc++) { + if(argc >= MAXARG) + goto bad; + sp -= strlen(argv[argc]) + 1; + sp -= sp % 16; // riscv sp must be 16-byte aligned + if(sp < stackbase) + goto bad; + if(copyout(pagetable, sp, argv[argc], strlen(argv[argc]) + 1) < 0) + goto bad; + ustack[argc] = sp; + } + ustack[argc] = 0; + + // push the array of argv[] pointers. + sp -= (argc+1) * sizeof(uint64); + sp -= sp % 16; + if(sp < stackbase) + goto bad; + if(copyout(pagetable, sp, (char *)ustack, (argc+1)*sizeof(uint64)) < 0) + goto bad; + + // arguments to user main(argc, argv) + // argc is returned via the system call return + // value, which goes in a0. + p->tf->a1 = sp; + + // Save program name for debugging. + for(last=s=path; *s; s++) + if(*s == '/') + last = s+1; + safestrcpy(p->name, last, sizeof(p->name)); + + // Commit to the user image. + oldpagetable = p->pagetable; + p->pagetable = pagetable; + p->sz = sz; + p->tf->epc = elf.entry; // initial program counter = main + p->tf->sp = sp; // initial stack pointer + proc_freepagetable(oldpagetable, oldsz); + return argc; // this ends up in a0, the first argument to main(argc, argv) + + bad: + if(pagetable) + proc_freepagetable(pagetable, sz); + if(ip){ + iunlockput(ip); + end_op(); + } + return -1; +} + +// Load a program segment into pagetable at virtual address va. +// va must be page-aligned +// and the pages from va to va+sz must already be mapped. +// Returns 0 on success, -1 on failure. +static int +loadseg(pagetable_t pagetable, uint64 va, struct inode *ip, uint offset, uint sz) +{ + uint i, n; + uint64 pa; + + if((va % PGSIZE) != 0) + panic("loadseg: va must be page aligned"); + + for(i = 0; i < sz; i += PGSIZE){ + pa = walkaddr(pagetable, va + i); + if(pa == 0) + panic("loadseg: address should exist"); + if(sz - i < PGSIZE) + n = sz - i; + else + n = PGSIZE; + if(readi(ip, 0, (uint64)pa, offset+i, n) != n) + return -1; + } + + return 0; +} diff --git a/kernel/fcntl.h b/kernel/fcntl.h new file mode 100644 index 0000000..d565483 --- /dev/null +++ b/kernel/fcntl.h @@ -0,0 +1,4 @@ +#define O_RDONLY 0x000 +#define O_WRONLY 0x001 +#define O_RDWR 0x002 +#define O_CREATE 0x200 diff --git a/kernel/file.c b/kernel/file.c new file mode 100644 index 0000000..6f27f22 --- /dev/null +++ b/kernel/file.c @@ -0,0 +1,175 @@ +// +// Support functions for system calls that involve file descriptors. +// + +#include "types.h" +#include "riscv.h" +#include "defs.h" +#include "param.h" +#include "fs.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "file.h" +#include "stat.h" +#include "proc.h" + +struct devsw devsw[NDEV]; +struct { + struct spinlock lock; + struct file file[NFILE]; +} ftable; + +void +fileinit(void) +{ + initlock(&ftable.lock, "ftable"); +} + +// Allocate a file structure. +struct file* +filealloc(void) +{ + struct file *f; + + acquire(&ftable.lock); + for(f = ftable.file; f < ftable.file + NFILE; f++){ + if(f->ref == 0){ + f->ref = 1; + release(&ftable.lock); + return f; + } + } + release(&ftable.lock); + return 0; +} + +// Increment ref count for file f. +struct file* +filedup(struct file *f) +{ + acquire(&ftable.lock); + if(f->ref < 1) + panic("filedup"); + f->ref++; + release(&ftable.lock); + return f; +} + +// Close file f. (Decrement ref count, close when reaches 0.) +void +fileclose(struct file *f) +{ + struct file ff; + + acquire(&ftable.lock); + if(f->ref < 1) + panic("fileclose"); + if(--f->ref > 0){ + release(&ftable.lock); + return; + } + ff = *f; + f->ref = 0; + f->type = FD_NONE; + release(&ftable.lock); + + if(ff.type == FD_PIPE) + pipeclose(ff.pipe, ff.writable); + else if(ff.type == FD_INODE){ + begin_op(); + iput(ff.ip); + end_op(); + } +} + +// Get metadata about file f. +// addr is a user virtual address, pointing to a struct stat. +int +filestat(struct file *f, uint64 addr) +{ + struct proc *p = myproc(); + struct stat st; + + if(f->type == FD_INODE){ + ilock(f->ip); + stati(f->ip, &st); + iunlock(f->ip); + if(copyout(p->pagetable, addr, (char *)&st, sizeof(st)) < 0) + return -1; + return 0; + } + return -1; +} + +// Read from file f. +// addr is a user virtual address. +int +fileread(struct file *f, uint64 addr, int n) +{ + int r = 0; + + if(f->readable == 0) + return -1; + + if(f->type == FD_PIPE){ + r = piperead(f->pipe, addr, n); + } else if(f->type == FD_INODE){ + ilock(f->ip); + if((r = readi(f->ip, 1, addr, f->off, n)) > 0) + f->off += r; + iunlock(f->ip); + } else { + panic("fileread"); + } + + return r; +} + +//PAGEBREAK! +// Write to file f. +// addr is a user virtual address. +int +filewrite(struct file *f, uint64 addr, int n) +{ + int r, ret = 0; + + if(f->writable == 0) + return -1; + + if(f->type == FD_PIPE){ + ret = pipewrite(f->pipe, addr, n); + } else if(f->type == FD_INODE){ + // write a few blocks at a time to avoid exceeding + // the maximum log transaction size, including + // i-node, indirect block, allocation blocks, + // and 2 blocks of slop for non-aligned writes. + // this really belongs lower down, since writei() + // might be writing a device like the console. + int max = ((MAXOPBLOCKS-1-1-2) / 2) * 512; + int i = 0; + while(i < n){ + int n1 = n - i; + if(n1 > max) + n1 = max; + + begin_op(); + ilock(f->ip); + if ((r = writei(f->ip, 1, addr + i, f->off, n1)) > 0) + f->off += r; + iunlock(f->ip); + end_op(); + + if(r < 0) + break; + if(r != n1) + panic("short filewrite"); + i += r; + } + ret = (i == n ? n : -1); + } else { + panic("filewrite"); + } + + return ret; +} + diff --git a/kernel/file.h b/kernel/file.h new file mode 100644 index 0000000..f28018f --- /dev/null +++ b/kernel/file.h @@ -0,0 +1,37 @@ +struct file { + enum { FD_NONE, FD_PIPE, FD_INODE } type; + int ref; // reference count + char readable; + char writable; + struct pipe *pipe; + struct inode *ip; + uint off; +}; + + +// in-memory copy of an inode +struct inode { + uint dev; // Device number + uint inum; // Inode number + int ref; // Reference count + struct sleeplock lock; // protects everything below here + int valid; // inode has been read from disk? + + short type; // copy of disk inode + short major; + short minor; + short nlink; + uint size; + uint addrs[NDIRECT+1]; +}; + +// table mapping major device number to +// device functions +struct devsw { + int (*read)(struct inode*, int, uint64, int); + int (*write)(struct inode*, int, uint64, int); +}; + +extern struct devsw devsw[]; + +#define CONSOLE 1 diff --git a/kernel/fs.c b/kernel/fs.c new file mode 100644 index 0000000..ebe377a --- /dev/null +++ b/kernel/fs.c @@ -0,0 +1,681 @@ +// File system implementation. Five layers: +// + Blocks: allocator for raw disk blocks. +// + Log: crash recovery for multi-step updates. +// + Files: inode allocator, reading, writing, metadata. +// + Directories: inode with special contents (list of other inodes!) +// + Names: paths like /usr/rtm/xv6/fs.c for convenient naming. +// +// This file contains the low-level file system manipulation +// routines. The (higher-level) system call implementations +// are in sysfile.c. + +#include "types.h" +#include "riscv.h" +#include "defs.h" +#include "param.h" +#include "stat.h" +#include "proc.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "fs.h" +#include "buf.h" +#include "file.h" + +#define min(a, b) ((a) < (b) ? (a) : (b)) +static void itrunc(struct inode*); +// there should be one superblock per disk device, but we run with +// only one device +struct superblock sb; + +// Read the super block. +void +readsb(int dev, struct superblock *sb) +{ + struct buf *bp; + + bp = bread(dev, 1); + memmove(sb, bp->data, sizeof(*sb)); + brelse(bp); +} + +// Zero a block. +static void +bzero(int dev, int bno) +{ + struct buf *bp; + + bp = bread(dev, bno); + memset(bp->data, 0, BSIZE); + log_write(bp); + brelse(bp); +} + +// Blocks. + +// Allocate a zeroed disk block. +static uint +balloc(uint dev) +{ + int b, bi, m; + struct buf *bp; + + bp = 0; + for(b = 0; b < sb.size; b += BPB){ + bp = bread(dev, BBLOCK(b, sb)); + for(bi = 0; bi < BPB && b + bi < sb.size; bi++){ + m = 1 << (bi % 8); + if((bp->data[bi/8] & m) == 0){ // Is block free? + bp->data[bi/8] |= m; // Mark block in use. + log_write(bp); + brelse(bp); + bzero(dev, b + bi); + return b + bi; + } + } + brelse(bp); + } + panic("balloc: out of blocks"); +} + +// Free a disk block. +static void +bfree(int dev, uint b) +{ + struct buf *bp; + int bi, m; + + readsb(dev, &sb); + bp = bread(dev, BBLOCK(b, sb)); + bi = b % BPB; + m = 1 << (bi % 8); + if((bp->data[bi/8] & m) == 0) + panic("freeing free block"); + bp->data[bi/8] &= ~m; + log_write(bp); + brelse(bp); +} + +// Inodes. +// +// An inode describes a single unnamed file. +// The inode disk structure holds metadata: the file's type, +// its size, the number of links referring to it, and the +// list of blocks holding the file's content. +// +// The inodes are laid out sequentially on disk at +// sb.startinode. Each inode has a number, indicating its +// position on the disk. +// +// The kernel keeps a cache of in-use inodes in memory +// to provide a place for synchronizing access +// to inodes used by multiple processes. The cached +// inodes include book-keeping information that is +// not stored on disk: ip->ref and ip->valid. +// +// An inode and its in-memory representation go through a +// sequence of states before they can be used by the +// rest of the file system code. +// +// * Allocation: an inode is allocated if its type (on disk) +// is non-zero. ialloc() allocates, and iput() frees if +// the reference and link counts have fallen to zero. +// +// * Referencing in cache: an entry in the inode cache +// is free if ip->ref is zero. Otherwise ip->ref tracks +// the number of in-memory pointers to the entry (open +// files and current directories). iget() finds or +// creates a cache entry and increments its ref; iput() +// decrements ref. +// +// * Valid: the information (type, size, &c) in an inode +// cache entry is only correct when ip->valid is 1. +// ilock() reads the inode from +// the disk and sets ip->valid, while iput() clears +// ip->valid if ip->ref has fallen to zero. +// +// * Locked: file system code may only examine and modify +// the information in an inode and its content if it +// has first locked the inode. +// +// Thus a typical sequence is: +// ip = iget(dev, inum) +// ilock(ip) +// ... examine and modify ip->xxx ... +// iunlock(ip) +// iput(ip) +// +// ilock() is separate from iget() so that system calls can +// get a long-term reference to an inode (as for an open file) +// and only lock it for short periods (e.g., in read()). +// The separation also helps avoid deadlock and races during +// pathname lookup. iget() increments ip->ref so that the inode +// stays cached and pointers to it remain valid. +// +// Many internal file system functions expect the caller to +// have locked the inodes involved; this lets callers create +// multi-step atomic operations. +// +// The icache.lock spin-lock protects the allocation of icache +// entries. Since ip->ref indicates whether an entry is free, +// and ip->dev and ip->inum indicate which i-node an entry +// holds, one must hold icache.lock while using any of those fields. +// +// An ip->lock sleep-lock protects all ip-> fields other than ref, +// dev, and inum. One must hold ip->lock in order to +// read or write that inode's ip->valid, ip->size, ip->type, &c. + +struct { + struct spinlock lock; + struct inode inode[NINODE]; +} icache; + +void +iinit(int dev) +{ + int i = 0; + + initlock(&icache.lock, "icache"); + for(i = 0; i < NINODE; i++) { + initsleeplock(&icache.inode[i].lock, "inode"); + } + + readsb(dev, &sb); + if(sb.magic != FSMAGIC) + panic("invalid file system"); +} + +static struct inode* iget(uint dev, uint inum); + +//PAGEBREAK! +// Allocate an inode on device dev. +// Mark it as allocated by giving it type type. +// Returns an unlocked but allocated and referenced inode. +struct inode* +ialloc(uint dev, short type) +{ + int inum; + struct buf *bp; + struct dinode *dip; + + for(inum = 1; inum < sb.ninodes; inum++){ + bp = bread(dev, IBLOCK(inum, sb)); + dip = (struct dinode*)bp->data + inum%IPB; + if(dip->type == 0){ // a free inode + memset(dip, 0, sizeof(*dip)); + dip->type = type; + log_write(bp); // mark it allocated on the disk + brelse(bp); + return iget(dev, inum); + } + brelse(bp); + } + panic("ialloc: no inodes"); +} + +// Copy a modified in-memory inode to disk. +// Must be called after every change to an ip->xxx field +// that lives on disk, since i-node cache is write-through. +// Caller must hold ip->lock. +void +iupdate(struct inode *ip) +{ + struct buf *bp; + struct dinode *dip; + + bp = bread(ip->dev, IBLOCK(ip->inum, sb)); + dip = (struct dinode*)bp->data + ip->inum%IPB; + dip->type = ip->type; + dip->major = ip->major; + dip->minor = ip->minor; + dip->nlink = ip->nlink; + dip->size = ip->size; + memmove(dip->addrs, ip->addrs, sizeof(ip->addrs)); + log_write(bp); + brelse(bp); +} + +// Find the inode with number inum on device dev +// and return the in-memory copy. Does not lock +// the inode and does not read it from disk. +static struct inode* +iget(uint dev, uint inum) +{ + struct inode *ip, *empty; + + acquire(&icache.lock); + + // Is the inode already cached? + empty = 0; + for(ip = &icache.inode[0]; ip < &icache.inode[NINODE]; ip++){ + if(ip->ref > 0 && ip->dev == dev && ip->inum == inum){ + ip->ref++; + release(&icache.lock); + return ip; + } + if(empty == 0 && ip->ref == 0) // Remember empty slot. + empty = ip; + } + + // Recycle an inode cache entry. + if(empty == 0) + panic("iget: no inodes"); + + ip = empty; + ip->dev = dev; + ip->inum = inum; + ip->ref = 1; + ip->valid = 0; + release(&icache.lock); + + return ip; +} + +// Increment reference count for ip. +// Returns ip to enable ip = idup(ip1) idiom. +struct inode* +idup(struct inode *ip) +{ + acquire(&icache.lock); + ip->ref++; + release(&icache.lock); + return ip; +} + +// Lock the given inode. +// Reads the inode from disk if necessary. +void +ilock(struct inode *ip) +{ + struct buf *bp; + struct dinode *dip; + + if(ip == 0 || ip->ref < 1) + panic("ilock"); + + acquiresleep(&ip->lock); + + if(ip->valid == 0){ + bp = bread(ip->dev, IBLOCK(ip->inum, sb)); + dip = (struct dinode*)bp->data + ip->inum%IPB; + ip->type = dip->type; + ip->major = dip->major; + ip->minor = dip->minor; + ip->nlink = dip->nlink; + ip->size = dip->size; + memmove(ip->addrs, dip->addrs, sizeof(ip->addrs)); + brelse(bp); + ip->valid = 1; + if(ip->type == 0) + panic("ilock: no type"); + } +} + +// Unlock the given inode. +void +iunlock(struct inode *ip) +{ + if(ip == 0 || !holdingsleep(&ip->lock) || ip->ref < 1) + panic("iunlock"); + + releasesleep(&ip->lock); +} + +// Drop a reference to an in-memory inode. +// If that was the last reference, the inode cache entry can +// be recycled. +// If that was the last reference and the inode has no links +// to it, free the inode (and its content) on disk. +// All calls to iput() must be inside a transaction in +// case it has to free the inode. +void +iput(struct inode *ip) +{ + acquire(&icache.lock); + + if(ip->ref == 1 && ip->valid && ip->nlink == 0){ + // inode has no links and no other references: truncate and free. + + // ip->ref == 1 means no other process can have ip locked, + // so this acquiresleep() won't block (or deadlock). + acquiresleep(&ip->lock); + + release(&icache.lock); + + itrunc(ip); + ip->type = 0; + iupdate(ip); + ip->valid = 0; + + releasesleep(&ip->lock); + + acquire(&icache.lock); + } + + ip->ref--; + release(&icache.lock); +} + +// Common idiom: unlock, then put. +void +iunlockput(struct inode *ip) +{ + iunlock(ip); + iput(ip); +} + +//PAGEBREAK! +// Inode content +// +// The content (data) associated with each inode is stored +// in blocks on the disk. The first NDIRECT block numbers +// are listed in ip->addrs[]. The next NINDIRECT blocks are +// listed in block ip->addrs[NDIRECT]. + +// Return the disk block address of the nth block in inode ip. +// If there is no such block, bmap allocates one. +static uint +bmap(struct inode *ip, uint bn) +{ + uint addr, *a; + struct buf *bp; + + if(bn < NDIRECT){ + if((addr = ip->addrs[bn]) == 0) + ip->addrs[bn] = addr = balloc(ip->dev); + return addr; + } + bn -= NDIRECT; + + if(bn < NINDIRECT){ + // Load indirect block, allocating if necessary. + if((addr = ip->addrs[NDIRECT]) == 0) + ip->addrs[NDIRECT] = addr = balloc(ip->dev); + bp = bread(ip->dev, addr); + a = (uint*)bp->data; + if((addr = a[bn]) == 0){ + a[bn] = addr = balloc(ip->dev); + log_write(bp); + } + brelse(bp); + return addr; + } + + panic("bmap: out of range"); +} + +// Truncate inode (discard contents). +// Only called when the inode has no links +// to it (no directory entries referring to it) +// and has no in-memory reference to it (is +// not an open file or current directory). +static void +itrunc(struct inode *ip) +{ + int i, j; + struct buf *bp; + uint *a; + + for(i = 0; i < NDIRECT; i++){ + if(ip->addrs[i]){ + bfree(ip->dev, ip->addrs[i]); + ip->addrs[i] = 0; + } + } + + if(ip->addrs[NDIRECT]){ + bp = bread(ip->dev, ip->addrs[NDIRECT]); + a = (uint*)bp->data; + for(j = 0; j < NINDIRECT; j++){ + if(a[j]) + bfree(ip->dev, a[j]); + } + brelse(bp); + bfree(ip->dev, ip->addrs[NDIRECT]); + ip->addrs[NDIRECT] = 0; + } + + ip->size = 0; + iupdate(ip); +} + +// Copy stat information from inode. +// Caller must hold ip->lock. +void +stati(struct inode *ip, struct stat *st) +{ + st->dev = ip->dev; + st->ino = ip->inum; + st->type = ip->type; + st->nlink = ip->nlink; + st->size = ip->size; +} + +//PAGEBREAK! +// Read data from inode. +// Caller must hold ip->lock. +// If user_dst==1, then dst is a user virtual address; +// otherwise, dst is a kernel address. +int +readi(struct inode *ip, int user_dst, uint64 dst, uint off, uint n) +{ + uint tot, m; + struct buf *bp; + + if(ip->type == T_DEV){ + if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].read) + return -1; + return devsw[ip->major].read(ip, user_dst, dst, n); + } + + if(off > ip->size || off + n < off) + return -1; + if(off + n > ip->size) + n = ip->size - off; + + for(tot=0; tot<n; tot+=m, off+=m, dst+=m){ + bp = bread(ip->dev, bmap(ip, off/BSIZE)); + m = min(n - tot, BSIZE - off%BSIZE); + if(either_copyout(user_dst, dst, bp->data + (off % BSIZE), m) == -1) + break; + brelse(bp); + } + return n; +} + +// PAGEBREAK! +// Write data to inode. +// Caller must hold ip->lock. +// If user_src==1, then src is a user virtual address; +// otherwise, src is a kernel address. +int +writei(struct inode *ip, int user_src, uint64 src, uint off, uint n) +{ + uint tot, m; + struct buf *bp; + + if(ip->type == T_DEV){ + if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].write){ + return -1; + } + return devsw[ip->major].write(ip, user_src, src, n); + } + + if(off > ip->size || off + n < off) + return -1; + if(off + n > MAXFILE*BSIZE) + return -1; + + for(tot=0; tot<n; tot+=m, off+=m, src+=m){ + bp = bread(ip->dev, bmap(ip, off/BSIZE)); + m = min(n - tot, BSIZE - off%BSIZE); + if(either_copyin(bp->data + (off % BSIZE), user_src, src, m) == -1) + break; + log_write(bp); + brelse(bp); + } + + if(n > 0 && off > ip->size){ + ip->size = off; + iupdate(ip); + } + return n; +} + +//PAGEBREAK! +// Directories + +int +namecmp(const char *s, const char *t) +{ + return strncmp(s, t, DIRSIZ); +} + +// Look for a directory entry in a directory. +// If found, set *poff to byte offset of entry. +struct inode* +dirlookup(struct inode *dp, char *name, uint *poff) +{ + uint off, inum; + struct dirent de; + + if(dp->type != T_DIR) + panic("dirlookup not DIR"); + + for(off = 0; off < dp->size; off += sizeof(de)){ + if(readi(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de)) + panic("dirlookup read"); + if(de.inum == 0) + continue; + if(namecmp(name, de.name) == 0){ + // entry matches path element + if(poff) + *poff = off; + inum = de.inum; + return iget(dp->dev, inum); + } + } + + return 0; +} + +// Write a new directory entry (name, inum) into the directory dp. +int +dirlink(struct inode *dp, char *name, uint inum) +{ + int off; + struct dirent de; + struct inode *ip; + + // Check that name is not present. + if((ip = dirlookup(dp, name, 0)) != 0){ + iput(ip); + return -1; + } + + // Look for an empty dirent. + for(off = 0; off < dp->size; off += sizeof(de)){ + if(readi(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de)) + panic("dirlink read"); + if(de.inum == 0) + break; + } + + strncpy(de.name, name, DIRSIZ); + de.inum = inum; + if(writei(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de)) + panic("dirlink"); + + return 0; +} + +//PAGEBREAK! +// Paths + +// Copy the next path element from path into name. +// Return a pointer to the element following the copied one. +// The returned path has no leading slashes, +// so the caller can check *path=='\0' to see if the name is the last one. +// If no name to remove, return 0. +// +// Examples: +// skipelem("a/bb/c", name) = "bb/c", setting name = "a" +// skipelem("///a//bb", name) = "bb", setting name = "a" +// skipelem("a", name) = "", setting name = "a" +// skipelem("", name) = skipelem("////", name) = 0 +// +static char* +skipelem(char *path, char *name) +{ + char *s; + int len; + + while(*path == '/') + path++; + if(*path == 0) + return 0; + s = path; + while(*path != '/' && *path != 0) + path++; + len = path - s; + if(len >= DIRSIZ) + memmove(name, s, DIRSIZ); + else { + memmove(name, s, len); + name[len] = 0; + } + while(*path == '/') + path++; + return path; +} + +// Look up and return the inode for a path name. +// If parent != 0, return the inode for the parent and copy the final +// path element into name, which must have room for DIRSIZ bytes. +// Must be called inside a transaction since it calls iput(). +static struct inode* +namex(char *path, int nameiparent, char *name) +{ + struct inode *ip, *next; + + if(*path == '/') + ip = iget(ROOTDEV, ROOTINO); + else + ip = idup(myproc()->cwd); + + while((path = skipelem(path, name)) != 0){ + ilock(ip); + if(ip->type != T_DIR){ + iunlockput(ip); + return 0; + } + if(nameiparent && *path == '\0'){ + // Stop one level early. + iunlock(ip); + return ip; + } + if((next = dirlookup(ip, name, 0)) == 0){ + iunlockput(ip); + return 0; + } + iunlockput(ip); + ip = next; + } + if(nameiparent){ + iput(ip); + return 0; + } + return ip; +} + +struct inode* +namei(char *path) +{ + char name[DIRSIZ]; + return namex(path, 0, name); +} + +struct inode* +nameiparent(char *path, char *name) +{ + return namex(path, 1, name); +} diff --git a/kernel/fs.h b/kernel/fs.h new file mode 100644 index 0000000..bc0805f --- /dev/null +++ b/kernel/fs.h @@ -0,0 +1,60 @@ +// On-disk file system format. +// Both the kernel and user programs use this header file. + + +#define ROOTINO 1 // root i-number +#define BSIZE 1024 // block size + +// Disk layout: +// [ boot block | super block | log | inode blocks | +// free bit map | data blocks] +// +// mkfs computes the super block and builds an initial file system. The +// super block describes the disk layout: +struct superblock { + uint magic; // Must be FSMAGIC + uint size; // Size of file system image (blocks) + uint nblocks; // Number of data blocks + uint ninodes; // Number of inodes. + uint nlog; // Number of log blocks + uint logstart; // Block number of first log block + uint inodestart; // Block number of first inode block + uint bmapstart; // Block number of first free map block +}; + +#define FSMAGIC 0x10203040 + +#define NDIRECT 12 +#define NINDIRECT (BSIZE / sizeof(uint)) +#define MAXFILE (NDIRECT + NINDIRECT) + +// On-disk inode structure +struct dinode { + short type; // File type + short major; // Major device number (T_DEV only) + short minor; // Minor device number (T_DEV only) + short nlink; // Number of links to inode in file system + uint size; // Size of file (bytes) + uint addrs[NDIRECT+1]; // Data block addresses +}; + +// Inodes per block. +#define IPB (BSIZE / sizeof(struct dinode)) + +// Block containing inode i +#define IBLOCK(i, sb) ((i) / IPB + sb.inodestart) + +// Bitmap bits per block +#define BPB (BSIZE*8) + +// Block of free map containing bit for block b +#define BBLOCK(b, sb) ((b)/BPB + sb.bmapstart) + +// Directory is a file containing a sequence of dirent structures. +#define DIRSIZ 14 + +struct dirent { + ushort inum; + char name[DIRSIZ]; +}; + diff --git a/kernel/kalloc.c b/kernel/kalloc.c new file mode 100644 index 0000000..1ed1c49 --- /dev/null +++ b/kernel/kalloc.c @@ -0,0 +1,80 @@ +// Physical memory allocator, intended to allocate +// memory for user processes, kernel stacks, page table pages, +// and pipe buffers. Allocates 4096-byte pages. + +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "spinlock.h" +#include "riscv.h" +#include "defs.h" + +void freerange(void *pa_start, void *pa_end); + +extern char end[]; // first address after kernel loaded from ELF file + // defined by the kernel linker script in kernel.ld + +struct run { + struct run *next; +}; + +struct { + struct spinlock lock; + struct run *freelist; +} kmem; + +void +kinit() +{ + initlock(&kmem.lock, "kmem"); + freerange(end, (void*)PHYSTOP); +} + +void +freerange(void *pa_start, void *pa_end) +{ + char *p; + p = (char*)PGROUNDUP((uint64)pa_start); + for(; p + PGSIZE <= (char*)pa_end; p += PGSIZE) + kfree(p); +} +//PAGEBREAK: 21 +// Free the page of physical memory pointed at by v, +// which normally should have been returned by a +// call to kalloc(). (The exception is when +// initializing the allocator; see kinit above.) +void +kfree(void *pa) +{ + struct run *r; + + if(((uint64)pa % PGSIZE) != 0 || (char*)pa < end || (uint64)pa >= PHYSTOP) + panic("kfree"); + + // Fill with junk to catch dangling refs. + memset(pa, 1, PGSIZE); + + acquire(&kmem.lock); + r = (struct run*)pa; + r->next = kmem.freelist; + kmem.freelist = r; + release(&kmem.lock); +} + +// Allocate one 4096-byte page of physical memory. +// Returns a pointer that the kernel can use. +// Returns 0 if the memory cannot be allocated. +void * +kalloc(void) +{ + struct run *r; + + acquire(&kmem.lock); + r = kmem.freelist; + if(r) + kmem.freelist = r->next; + release(&kmem.lock); + if(r) + memset((char*)r, 5, PGSIZE); // fill with junk + return (void*)r; +} diff --git a/kernel/kernel.ld b/kernel/kernel.ld new file mode 100644 index 0000000..53c9b90 --- /dev/null +++ b/kernel/kernel.ld @@ -0,0 +1,31 @@ +OUTPUT_ARCH( "riscv" ) +ENTRY( _entry ) + +SECTIONS +{ + /* + * ensure that entry.S / _entry is at 0x80000000, + * where qemu's -kernel jumps. + */ + . = 0x80000000; + .text : + { + *(.text) + . = ALIGN(0x1000); + *(trampoline) + } + + . = ALIGN(0x1000); + PROVIDE(etext = .); + + /* + * make sure end is after data and bss. + */ + .data : { + *(.data) + } + bss : { + *(.bss) + PROVIDE(end = .); + } +} diff --git a/kernel/kernelvec.S b/kernel/kernelvec.S new file mode 100644 index 0000000..4f52688 --- /dev/null +++ b/kernel/kernelvec.S @@ -0,0 +1,112 @@ + # + # interrupts and exceptions while in supervisor + # mode come here. + # + # push all registers, call kerneltrap(), restore, return. + # +.globl kerneltrap +.globl kernelvec +.align 4 +kernelvec: + addi sp, sp, -256 + + sd ra, 0(sp) + sd sp, 8(sp) + sd gp, 16(sp) + sd tp, 24(sp) + sd t0, 32(sp) + sd t1, 40(sp) + sd t2, 48(sp) + sd s0, 56(sp) + sd s1, 64(sp) + sd a0, 72(sp) + sd a1, 80(sp) + sd a2, 88(sp) + sd a3, 96(sp) + sd a4, 104(sp) + sd a5, 112(sp) + sd a6, 120(sp) + sd a7, 128(sp) + sd s2, 136(sp) + sd s3, 144(sp) + sd s4, 152(sp) + sd s5, 160(sp) + sd s6, 168(sp) + sd s7, 176(sp) + sd s8, 184(sp) + sd s9, 192(sp) + sd s10, 200(sp) + sd s11, 208(sp) + sd t3, 216(sp) + sd t4, 224(sp) + sd t5, 232(sp) + sd t6, 240(sp) + + call kerneltrap + + ld ra, 0(sp) + ld sp, 8(sp) + ld gp, 16(sp) + ld tp, 24(sp) + ld t0, 32(sp) + ld t1, 40(sp) + ld t2, 48(sp) + ld s0, 56(sp) + ld s1, 64(sp) + ld a0, 72(sp) + ld a1, 80(sp) + ld a2, 88(sp) + ld a3, 96(sp) + ld a4, 104(sp) + ld a5, 112(sp) + ld a6, 120(sp) + ld a7, 128(sp) + ld s2, 136(sp) + ld s3, 144(sp) + ld s4, 152(sp) + ld s5, 160(sp) + ld s6, 168(sp) + ld s7, 176(sp) + ld s8, 184(sp) + ld s9, 192(sp) + ld s10, 200(sp) + ld s11, 208(sp) + ld t3, 216(sp) + ld t4, 224(sp) + ld t5, 232(sp) + ld t6, 240(sp) + + addi sp, sp, 256 + + sret + + # + # machine-mode timer interrupt. + # +.globl machinevec +.align 4 +machinevec: + csrrw a0, mscratch, a0 + sd a1, 0(a0) + sd a2, 8(a0) + sd a3, 16(a0) + sd a4, 24(a0) + + # add another second to mtimecmp0. + ld a1, 32(a0) # CLINT_MTIMECMP(hart) + ld a2, 40(a0) # ticks per second + ld a3, 0(a1) + add a3, a3, a2 + sd a3, 0(a1) + + # raise a supervisor software interrupt. + li a1, 2 + csrw sip, a1 + + ld a4, 24(a0) + ld a3, 16(a0) + ld a2, 8(a0) + ld a1, 0(a0) + csrrw a0, mscratch, a0 + + mret diff --git a/kernel/log.c b/kernel/log.c new file mode 100644 index 0000000..c8f7e62 --- /dev/null +++ b/kernel/log.c @@ -0,0 +1,235 @@ +#include "types.h" +#include "riscv.h" +#include "defs.h" +#include "param.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "fs.h" +#include "buf.h" + +// Simple logging that allows concurrent FS system calls. +// +// A log transaction contains the updates of multiple FS system +// calls. The logging system only commits when there are +// no FS system calls active. Thus there is never +// any reasoning required about whether a commit might +// write an uncommitted system call's updates to disk. +// +// A system call should call begin_op()/end_op() to mark +// its start and end. Usually begin_op() just increments +// the count of in-progress FS system calls and returns. +// But if it thinks the log is close to running out, it +// sleeps until the last outstanding end_op() commits. +// +// The log is a physical re-do log containing disk blocks. +// The on-disk log format: +// header block, containing block #s for block A, B, C, ... +// block A +// block B +// block C +// ... +// Log appends are synchronous. + +// Contents of the header block, used for both the on-disk header block +// and to keep track in memory of logged block# before commit. +struct logheader { + int n; + int block[LOGSIZE]; +}; + +struct log { + struct spinlock lock; + int start; + int size; + int outstanding; // how many FS sys calls are executing. + int committing; // in commit(), please wait. + int dev; + struct logheader lh; +}; +struct log log; + +static void recover_from_log(void); +static void commit(); + +void +initlog(int dev) +{ + if (sizeof(struct logheader) >= BSIZE) + panic("initlog: too big logheader"); + + struct superblock sb; + initlock(&log.lock, "log"); + readsb(dev, &sb); + log.start = sb.logstart; + log.size = sb.nlog; + log.dev = dev; + recover_from_log(); +} + +// Copy committed blocks from log to their home location +static void +install_trans(void) +{ + int tail; + + for (tail = 0; tail < log.lh.n; tail++) { + struct buf *lbuf = bread(log.dev, log.start+tail+1); // read log block + struct buf *dbuf = bread(log.dev, log.lh.block[tail]); // read dst + memmove(dbuf->data, lbuf->data, BSIZE); // copy block to dst + bwrite(dbuf); // write dst to disk + brelse(lbuf); + brelse(dbuf); + } +} + +// Read the log header from disk into the in-memory log header +static void +read_head(void) +{ + struct buf *buf = bread(log.dev, log.start); + struct logheader *lh = (struct logheader *) (buf->data); + int i; + log.lh.n = lh->n; + for (i = 0; i < log.lh.n; i++) { + log.lh.block[i] = lh->block[i]; + } + brelse(buf); +} + +// Write in-memory log header to disk. +// This is the true point at which the +// current transaction commits. +static void +write_head(void) +{ + struct buf *buf = bread(log.dev, log.start); + struct logheader *hb = (struct logheader *) (buf->data); + int i; + hb->n = log.lh.n; + for (i = 0; i < log.lh.n; i++) { + hb->block[i] = log.lh.block[i]; + } + bwrite(buf); + brelse(buf); +} + +static void +recover_from_log(void) +{ + read_head(); + install_trans(); // if committed, copy from log to disk + log.lh.n = 0; + write_head(); // clear the log +} + +// called at the start of each FS system call. +void +begin_op(void) +{ + acquire(&log.lock); + while(1){ + if(log.committing){ + sleep(&log, &log.lock); + } else if(log.lh.n + (log.outstanding+1)*MAXOPBLOCKS > LOGSIZE){ + // this op might exhaust log space; wait for commit. + sleep(&log, &log.lock); + } else { + log.outstanding += 1; + release(&log.lock); + break; + } + } +} + +// called at the end of each FS system call. +// commits if this was the last outstanding operation. +void +end_op(void) +{ + int do_commit = 0; + + acquire(&log.lock); + log.outstanding -= 1; + if(log.committing) + panic("log.committing"); + if(log.outstanding == 0){ + do_commit = 1; + log.committing = 1; + } else { + // begin_op() may be waiting for log space, + // and decrementing log.outstanding has decreased + // the amount of reserved space. + wakeup(&log); + } + release(&log.lock); + + if(do_commit){ + // call commit w/o holding locks, since not allowed + // to sleep with locks. + commit(); + acquire(&log.lock); + log.committing = 0; + wakeup(&log); + release(&log.lock); + } +} + +// Copy modified blocks from cache to log. +static void +write_log(void) +{ + int tail; + + for (tail = 0; tail < log.lh.n; tail++) { + struct buf *to = bread(log.dev, log.start+tail+1); // log block + struct buf *from = bread(log.dev, log.lh.block[tail]); // cache block + memmove(to->data, from->data, BSIZE); + bwrite(to); // write the log + brelse(from); + brelse(to); + } +} + +static void +commit() +{ + if (log.lh.n > 0) { + write_log(); // Write modified blocks from cache to log + write_head(); // Write header to disk -- the real commit + install_trans(); // Now install writes to home locations + log.lh.n = 0; + write_head(); // Erase the transaction from the log + } +} + +// Caller has modified b->data and is done with the buffer. +// Record the block number and pin in the cache with B_DIRTY. +// commit()/write_log() will do the disk write. +// +// log_write() replaces bwrite(); a typical use is: +// bp = bread(...) +// modify bp->data[] +// log_write(bp) +// brelse(bp) +void +log_write(struct buf *b) +{ + int i; + + if (log.lh.n >= LOGSIZE || log.lh.n >= log.size - 1) + panic("too big a transaction"); + if (log.outstanding < 1) + panic("log_write outside of trans"); + + acquire(&log.lock); + for (i = 0; i < log.lh.n; i++) { + if (log.lh.block[i] == b->blockno) // log absorbtion + break; + } + log.lh.block[i] = b->blockno; + if (i == log.lh.n) + log.lh.n++; + b->flags |= B_DIRTY; // prevent eviction + release(&log.lock); +} + diff --git a/kernel/main.c b/kernel/main.c new file mode 100644 index 0000000..2168b9f --- /dev/null +++ b/kernel/main.c @@ -0,0 +1,42 @@ +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "defs.h" + +volatile static int started = 0; + +// Bootstrap processor starts running C code here. +// Allocate a real stack and switch to it, first +// doing some setup required for memory allocator to work. +void +main() +{ + if(cpuid() == 0){ + uartinit(); // serial port + consoleinit(); + printf("hart %d starting\n", cpuid()); + kinit(); // physical page allocator + kvminit(); // create kernel page table + kvminithart(); // turn on paging + procinit(); // process table + trapinit(); // trap vectors + trapinithart(); // install kernel trap vector + plicinit(); // set up interrupt controller + plicinithart(); // ask PLIC for device interrupts + binit(); // buffer cache + fileinit(); // file table + ramdiskinit(); // disk + userinit(); // first user process + started = 1; + } else { + while(started == 0) + ; + printf("hart %d starting\n", cpuid()); + kvminithart(); // turn on paging + trapinithart(); // install kernel trap vector + plicinithart(); // ask PLIC for device interrupts + } + + scheduler(); +} diff --git a/kernel/memlayout.h b/kernel/memlayout.h new file mode 100644 index 0000000..462986c --- /dev/null +++ b/kernel/memlayout.h @@ -0,0 +1,50 @@ +// Physical memory layout + +// qemu -machine virt is set up like this, +// based on qemu's hw/riscv/virt.c: +// +// 00001000 -- boot ROM, provided by qemu +// 02000000 -- CLINT +// 0C000000 -- PLIC +// 10000000 -- uart0 registers +// 80000000 -- boot ROM jumps here in machine mode +// -kernel loads the kernel here +// 88000000 -- -initrd fs.img ramdisk image. +// unused RAM after 80000000. + +// the kernel uses physical memory thus: +// 80000000 -- entry.S, then kernel text and data +// end -- start of kernel page allocation area +// PHYSTOP -- end RAM used by the kernel + +// qemu puts UART registers here in physical memory. +#define UART0 0x10000000L +#define UART0_IRQ 10 + +// local interrupt controller, which contains the timer. +#define CLINT 0x2000000L +#define CLINT_MTIMECMP(hartid) (CLINT + 0x4000 + 8*(hartid)) +#define CLINT_MTIME (CLINT + 0xBFF8) + +// qemu puts programmable interrupt controller here. +#define PLIC 0x0c000000L +#define PLIC_PRIORITY (PLIC + 0x0) +#define PLIC_PENDING (PLIC + 0x1000) +#define PLIC_MENABLE(hart) (PLIC + 0x2000 + (hart)*0x100) +#define PLIC_SENABLE(hart) (PLIC + 0x2080 + (hart)*0x100) +#define PLIC_MPRIORITY(hart) (PLIC + 0x200000 + (hart)*0x2000) +#define PLIC_SPRIORITY(hart) (PLIC + 0x201000 + (hart)*0x2000) +#define PLIC_MCLAIM(hart) (PLIC + 0x200004 + (hart)*0x2000) +#define PLIC_SCLAIM(hart) (PLIC + 0x201004 + (hart)*0x2000) + +#define RAMDISK 0x88000000L + +// the kernel expects there to be RAM +// for use by the kernel and user pages +// from physical address 0x80000000 to PHYSTOP. +#define KERNBASE 0x80000000L +#define PHYSTOP (KERNBASE + 128*1024*1024) + +// map the trampoline page to the highest address, +// in both user and kernel space. +#define TRAMPOLINE (MAXVA - PGSIZE) diff --git a/kernel/param.h b/kernel/param.h new file mode 100644 index 0000000..b5fdcb2 --- /dev/null +++ b/kernel/param.h @@ -0,0 +1,13 @@ +#define NPROC 64 // maximum number of processes +#define NCPU 8 // maximum number of CPUs +#define NOFILE 16 // open files per process +#define NFILE 100 // open files per system +#define NINODE 50 // maximum number of active i-nodes +#define NDEV 10 // maximum major device number +#define ROOTDEV 1 // device number of file system root disk +#define MAXARG 32 // max exec arguments +#define MAXOPBLOCKS 10 // max # of blocks any FS op writes +#define LOGSIZE (MAXOPBLOCKS*3) // max data blocks in on-disk log +#define NBUF (MAXOPBLOCKS*3) // size of disk block cache +#define FSSIZE 1000 // size of file system in blocks +#define MAXPATH 128 // maximum file path name diff --git a/kernel/pipe.c b/kernel/pipe.c new file mode 100644 index 0000000..31bf0cc --- /dev/null +++ b/kernel/pipe.c @@ -0,0 +1,129 @@ +#include "types.h" +#include "riscv.h" +#include "defs.h" +#include "param.h" +#include "proc.h" +#include "fs.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "file.h" + +#define PIPESIZE 512 + +struct pipe { + struct spinlock lock; + char data[PIPESIZE]; + uint nread; // number of bytes read + uint nwrite; // number of bytes written + int readopen; // read fd is still open + int writeopen; // write fd is still open +}; + +int +pipealloc(struct file **f0, struct file **f1) +{ + struct pipe *p; + + p = 0; + *f0 = *f1 = 0; + if((*f0 = filealloc()) == 0 || (*f1 = filealloc()) == 0) + goto bad; + if((p = (struct pipe*)kalloc()) == 0) + goto bad; + p->readopen = 1; + p->writeopen = 1; + p->nwrite = 0; + p->nread = 0; + initlock(&p->lock, "pipe"); + (*f0)->type = FD_PIPE; + (*f0)->readable = 1; + (*f0)->writable = 0; + (*f0)->pipe = p; + (*f1)->type = FD_PIPE; + (*f1)->readable = 0; + (*f1)->writable = 1; + (*f1)->pipe = p; + return 0; + +//PAGEBREAK: 20 + bad: + if(p) + kfree((char*)p); + if(*f0) + fileclose(*f0); + if(*f1) + fileclose(*f1); + return -1; +} + +void +pipeclose(struct pipe *p, int writable) +{ + acquire(&p->lock); + if(writable){ + p->writeopen = 0; + wakeup(&p->nread); + } else { + p->readopen = 0; + wakeup(&p->nwrite); + } + if(p->readopen == 0 && p->writeopen == 0){ + release(&p->lock); + kfree((char*)p); + } else + release(&p->lock); +} + +//PAGEBREAK: 40 +int +pipewrite(struct pipe *p, uint64 addr, int n) +{ + int i; + char ch; + struct proc *pr = myproc(); + + acquire(&p->lock); + for(i = 0; i < n; i++){ + while(p->nwrite == p->nread + PIPESIZE){ //DOC: pipewrite-full + if(p->readopen == 0 || myproc()->killed){ + release(&p->lock); + return -1; + } + wakeup(&p->nread); + sleep(&p->nwrite, &p->lock); //DOC: pipewrite-sleep + } + if(copyin(pr->pagetable, &ch, addr + i, 1) == -1) + break; + p->data[p->nwrite++ % PIPESIZE] = ch; + } + wakeup(&p->nread); //DOC: pipewrite-wakeup1 + release(&p->lock); + return n; +} + +int +piperead(struct pipe *p, uint64 addr, int n) +{ + int i; + struct proc *pr = myproc(); + char ch; + + acquire(&p->lock); + while(p->nread == p->nwrite && p->writeopen){ //DOC: pipe-empty + if(myproc()->killed){ + release(&p->lock); + return -1; + } + sleep(&p->nread, &p->lock); //DOC: piperead-sleep + } + for(i = 0; i < n; i++){ //DOC: piperead-copy + if(p->nread == p->nwrite) + break; + ch = p->data[p->nread++ % PIPESIZE]; + if(copyout(pr->pagetable, addr + i, &ch, 1) == -1) + break; + } + wakeup(&p->nwrite); //DOC: piperead-wakeup + release(&p->lock); + return i; +} diff --git a/kernel/plic.c b/kernel/plic.c new file mode 100644 index 0000000..0f19ab0 --- /dev/null +++ b/kernel/plic.c @@ -0,0 +1,63 @@ +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "defs.h" + +// +// the riscv Platform Level Interrupt Controller (PLIC). +// + +void +plicinit(void) +{ + // set uart's priority to be non-zero (otherwise disabled). + *(uint32*)(PLIC + UART0_IRQ*4) = 1; +} + +void +plicinithart(void) +{ + int hart = cpuid(); + + // set uart's enable bit for this hart's S-mode. + //*(uint32*)(PLIC + 0x2080)= (1 << UART0_IRQ); + *(uint32*)PLIC_SENABLE(hart)= (1 << UART0_IRQ); + + // set this hart's S-mode priority threshold to 0. + //*(uint32*)(PLIC + 0x201000) = 0; + *(uint32*)PLIC_SPRIORITY(hart) = 0; +} + +// return a bitmap of which IRQs are waiting +// to be served. +uint64 +plic_pending(void) +{ + uint64 mask; + + //mask = *(uint32*)(PLIC + 0x1000); + //mask |= (uint64)*(uint32*)(PLIC + 0x1004) << 32; + mask = *(uint64*)PLIC_PENDING; + + return mask; +} + +// ask the PLIC what interrupt we should serve. +int +plic_claim(void) +{ + int hart = cpuid(); + //int irq = *(uint32*)(PLIC + 0x201004); + int irq = *(uint32*)PLIC_SCLAIM(hart); + return irq; +} + +// tell the PLIC we've served this IRQ. +void +plic_complete(int irq) +{ + int hart = cpuid(); + //*(uint32*)(PLIC + 0x201004) = irq; + *(uint32*)PLIC_SCLAIM(hart) = irq; +} diff --git a/kernel/proc.c b/kernel/proc.c new file mode 100644 index 0000000..4ae34c8 --- /dev/null +++ b/kernel/proc.c @@ -0,0 +1,591 @@ +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "proc.h" +#include "spinlock.h" +#include "defs.h" + +struct { + struct spinlock lock; + struct proc proc[NPROC]; +} ptable; + +struct cpu cpus[NCPU]; + +struct proc *initproc; + +int nextpid = 1; +extern void forkret(void); + +// for returning out of the kernel +extern void sysexit(void); + +static void wakeup1(void *chan); + +extern char trampout[]; // trampoline.S + +void +procinit(void) +{ + initlock(&ptable.lock, "ptable"); +} + +// Must be called with interrupts disabled, +// to prevent race with process being moved +// to a different CPU. +int +cpuid() +{ + int id = r_tp(); + return id; +} + +// Return this core's cpu struct. +// Interrupts must be disabled. +struct cpu* +mycpu(void) { + int id = cpuid(); + struct cpu *c = &cpus[id]; + return c; +} + +// Return the current struct proc *. +struct proc* +myproc(void) { + push_off(); + struct cpu *c = mycpu(); + struct proc *p = c->proc; + pop_off(); + return p; +} + +//PAGEBREAK: 32 +// Look in the process table for an UNUSED proc. +// If found, change state to EMBRYO and initialize +// state required to run in the kernel. +// Otherwise return 0. +static struct proc* +allocproc(void) +{ + struct proc *p; + + acquire(&ptable.lock); + + for(p = ptable.proc; p < &ptable.proc[NPROC]; p++) + if(p->state == UNUSED) + goto found; + + release(&ptable.lock); + return 0; + +found: + p->state = EMBRYO; + p->pid = nextpid++; + + release(&ptable.lock); + + // Allocate a page for the kernel stack. + if((p->kstack = kalloc()) == 0){ + p->state = UNUSED; + return 0; + } + + // Allocate a trapframe page. + if((p->tf = (struct trapframe *)kalloc()) == 0){ + p->state = UNUSED; + return 0; + } + + // An empty user page table. + p->pagetable = proc_pagetable(p); + + // Set up new context to start executing at forkret, + // which returns to user space. + memset(&p->context, 0, sizeof p->context); + p->context.ra = (uint64)forkret; + p->context.sp = (uint64)p->kstack + PGSIZE; + + return p; +} + +// Create a page table for a given process, +// with no users pages, but with trampoline pages. +// Called both when creating a process, and +// by exec() when building tentative new memory image, +// which might fail. +pagetable_t +proc_pagetable(struct proc *p) +{ + pagetable_t pagetable; + + // An empty user page table. + pagetable = uvmcreate(); + + // map the trampoline code (for system call return) + // at the highest user virtual address. + // only the supervisor uses it, on the way + // to/from user space, so not PTE_U. + mappages(pagetable, TRAMPOLINE, PGSIZE, + (uint64)trampout, PTE_R | PTE_X); + + // map the trapframe, for trampoline.S. + mappages(pagetable, (TRAMPOLINE - PGSIZE), PGSIZE, + (uint64)(p->tf), PTE_R | PTE_W); + + return pagetable; +} + +// Free a process's page table, and free the +// physical memory the page table refers to. +// Called both when a process exits and from +// exec() if it fails. +void +proc_freepagetable(pagetable_t pagetable, uint64 sz) +{ + unmappages(pagetable, TRAMPOLINE, PGSIZE, 0); + unmappages(pagetable, TRAMPOLINE-PGSIZE, PGSIZE, 0); + uvmfree(pagetable, sz); +} + +// a user program that calls exec("/init") +// od -t xC initcode +uchar initcode[] = { + 0x17, 0x05, 0x00, 0x00, 0x13, 0x05, 0x05, 0x02, 0x97, 0x05, 0x00, 0x00, 0x93, 0x85, 0x05, 0x02, + 0x9d, 0x48, 0x73, 0x00, 0x00, 0x00, 0x89, 0x48, 0x73, 0x00, 0x00, 0x00, 0xef, 0xf0, 0xbf, 0xff, + 0x2f, 0x69, 0x6e, 0x69, 0x74, 0x00, 0x00, 0x01, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00 +}; + +//PAGEBREAK: 32 +// Set up first user process. +void +userinit(void) +{ + struct proc *p; + + p = allocproc(); + initproc = p; + + uvminit(p->pagetable, initcode, sizeof(initcode)); + p->sz = PGSIZE; + + // prepare for the very first kernel->user. + p->tf->epc = 0; + p->tf->sp = PGSIZE; + + safestrcpy(p->name, "initcode", sizeof(p->name)); + p->cwd = namei("/"); + + // this assignment to p->state lets other cores + // run this process. the acquire forces the above + // writes to be visible, and the lock is also needed + // because the assignment might not be atomic. + acquire(&ptable.lock); + + p->state = RUNNABLE; + + release(&ptable.lock); +} + +// Grow current process's memory by n bytes. +// Return 0 on success, -1 on failure. +int +growproc(int n) +{ + uint sz; + struct proc *p = myproc(); + + sz = p->sz; + if(n > 0){ + if((sz = uvmalloc(p->pagetable, sz, sz + n)) == 0) + return -1; + } else if(n < 0){ + if((sz = uvmdealloc(p->pagetable, sz, sz + n)) == 0) + return -1; + } + p->sz = sz; + return 0; +} + +// Create a new process, copying p as the parent. +// Sets up child kernel stack to return as if from system call. +int +fork(void) +{ + int i, pid; + struct proc *np; + struct proc *p = myproc(); + + // Allocate process. + if((np = allocproc()) == 0){ + return -1; + } + + // Copy user memory from parent to child. + uvmcopy(p->pagetable, np->pagetable, p->sz); + np->sz = p->sz; + + np->parent = p; + + // copy saved user registers. + *(np->tf) = *(p->tf); + + // Cause fork to return 0 in the child. + np->tf->a0 = 0; + + // increment reference counts on open file descriptors. + for(i = 0; i < NOFILE; i++) + if(p->ofile[i]) + np->ofile[i] = filedup(p->ofile[i]); + np->cwd = idup(p->cwd); + + safestrcpy(np->name, p->name, sizeof(p->name)); + + pid = np->pid; + + acquire(&ptable.lock); + + np->state = RUNNABLE; + + release(&ptable.lock); + + return pid; +} + +// Exit the current process. Does not return. +// An exited process remains in the zombie state +// until its parent calls wait(). +void +exit(void) +{ + struct proc *p = myproc(); + struct proc *pp; + int fd; + + if(p == initproc) + panic("init exiting"); + + // Close all open files. + for(fd = 0; fd < NOFILE; fd++){ + if(p->ofile[fd]){ + fileclose(p->ofile[fd]); + p->ofile[fd] = 0; + } + } + + begin_op(); + iput(p->cwd); + end_op(); + p->cwd = 0; + + acquire(&ptable.lock); + + // Parent might be sleeping in wait(). + wakeup1(p->parent); + + // Pass abandoned children to init. + for(pp = ptable.proc; pp < &ptable.proc[NPROC]; pp++){ + if(pp->parent == p){ + pp->parent = initproc; + if(pp->state == ZOMBIE) + wakeup1(initproc); + } + } + + // Jump into the scheduler, never to return. + p->state = ZOMBIE; + sched(); + panic("zombie exit"); +} + +// Wait for a child process to exit and return its pid. +// Return -1 if this process has no children. +int +wait(void) +{ + struct proc *np; + int havekids, pid; + struct proc *p = myproc(); + + acquire(&ptable.lock); + for(;;){ + // Scan through table looking for exited children. + havekids = 0; + for(np = ptable.proc; np < &ptable.proc[NPROC]; np++){ + if(np->parent != p) + continue; + havekids = 1; + if(np->state == ZOMBIE){ + // Found one. + pid = np->pid; + kfree(np->kstack); + np->kstack = 0; + kfree((void*)np->tf); + np->tf = 0; + proc_freepagetable(np->pagetable, np->sz); + np->pagetable = 0; + np->pid = 0; + np->parent = 0; + np->name[0] = 0; + np->killed = 0; + np->state = UNUSED; + release(&ptable.lock); + return pid; + } + } + + // No point waiting if we don't have any children. + if(!havekids || p->killed){ + release(&ptable.lock); + return -1; + } + + // Wait for children to exit. (See wakeup1 call in proc_exit.) + sleep(p, &ptable.lock); //DOC: wait-sleep + } +} + +//PAGEBREAK: 42 +// Per-CPU process scheduler. +// Each CPU calls scheduler() after setting itself up. +// Scheduler never returns. It loops, doing: +// - choose a process to run +// - swtch to start running that process +// - eventually that process transfers control +// via swtch back to the scheduler. +void +scheduler(void) +{ + struct proc *p; + struct cpu *c = mycpu(); + + c->proc = 0; + for(;;){ + // Enable interrupts on this processor. + intr_on(); + + // Loop over process table looking for process to run. + acquire(&ptable.lock); + for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){ + if(p->state != RUNNABLE) + continue; + + // Switch to chosen process. It is the process's job + // to release ptable.lock and then reacquire it + // before jumping back to us. + c->proc = p; + p->state = RUNNING; + + swtch(&c->scheduler, &p->context); + + // Process is done running for now. + // It should have changed its p->state before coming back. + c->proc = 0; + } + release(&ptable.lock); + } +} + +// Enter scheduler. Must hold only ptable.lock +// and have changed proc->state. Saves and restores +// intena because intena is a property of this +// kernel thread, not this CPU. It should +// be proc->intena and proc->noff, but that would +// break in the few places where a lock is held but +// there's no process. +void +sched(void) +{ + int intena; + struct proc *p = myproc(); + + if(!holding(&ptable.lock)) + panic("sched ptable.lock"); + if(mycpu()->noff != 1) + panic("sched locks"); + if(p->state == RUNNING) + panic("sched running"); + if(intr_get()) + panic("sched interruptible"); + + intena = mycpu()->intena; + swtch(&p->context, &mycpu()->scheduler); + mycpu()->intena = intena; +} + +// Give up the CPU for one scheduling round. +void +yield(void) +{ + acquire(&ptable.lock); //DOC: yieldlock + myproc()->state = RUNNABLE; + sched(); + release(&ptable.lock); +} + +// A fork child's very first scheduling by scheduler() +// will swtch to forkret. +void +forkret(void) +{ + static int first = 1; + + // Still holding ptable.lock from scheduler. + release(&ptable.lock); + + if (first) { + // Some initialization functions must be run in the context + // of a regular process (e.g., they call sleep), and thus cannot + // be run from main(). + first = 0; + iinit(ROOTDEV); + initlog(ROOTDEV); + } + + usertrapret(); +} + +// Atomically release lock and sleep on chan. +// Reacquires lock when awakened. +void +sleep(void *chan, struct spinlock *lk) +{ + struct proc *p = myproc(); + + if(p == 0) + panic("sleep"); + + if(lk == 0) + panic("sleep without lk"); + + // Must acquire ptable.lock in order to + // change p->state and then call sched. + // Once we hold ptable.lock, we can be + // guaranteed that we won't miss any wakeup + // (wakeup runs with ptable.lock locked), + // so it's okay to release lk. + if(lk != &ptable.lock){ //DOC: sleeplock0 + acquire(&ptable.lock); //DOC: sleeplock1 + release(lk); + } + // Go to sleep. + p->chan = chan; + p->state = SLEEPING; + + sched(); + + // Tidy up. + p->chan = 0; + + // Reacquire original lock. + if(lk != &ptable.lock){ //DOC: sleeplock2 + release(&ptable.lock); + acquire(lk); + } +} + +//PAGEBREAK! +// Wake up all processes sleeping on chan. +// The ptable lock must be held. +static void +wakeup1(void *chan) +{ + struct proc *p; + + for(p = ptable.proc; p < &ptable.proc[NPROC]; p++) + if(p->state == SLEEPING && p->chan == chan) + p->state = RUNNABLE; +} + +// Wake up all processes sleeping on chan. +void +wakeup(void *chan) +{ + acquire(&ptable.lock); + wakeup1(chan); + release(&ptable.lock); +} + +// Kill the process with the given pid. +// Process won't exit until it returns +// to user space (see trap in trap.c). +int +kill(int pid) +{ + struct proc *p; + + acquire(&ptable.lock); + for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){ + if(p->pid == pid){ + p->killed = 1; + // Wake process from sleep if necessary. + if(p->state == SLEEPING) + p->state = RUNNABLE; + release(&ptable.lock); + return 0; + } + } + release(&ptable.lock); + return -1; +} + +// Copy to either a user address, or kernel address, +// depending on usr_dst. +// Returns 0 on success, -1 on error. +int +either_copyout(int user_dst, uint64 dst, void *src, uint64 len) +{ + struct proc *p = myproc(); + if(user_dst){ + return copyout(p->pagetable, dst, src, len); + } else { + memmove((char *)dst, src, len); + return 0; + } +} + +// Copy from either a user address, or kernel address, +// depending on usr_src. +// Returns 0 on success, -1 on error. +int +either_copyin(void *dst, int user_src, uint64 src, uint64 len) +{ + struct proc *p = myproc(); + if(user_src){ + return copyin(p->pagetable, dst, src, len); + } else { + memmove(dst, (char*)src, len); + return 0; + } +} + +// Print a process listing to console. For debugging. +// Runs when user types ^P on console. +// No lock to avoid wedging a stuck machine further. +void +procdump(void) +{ + static char *states[] = { + [UNUSED] "unused", + [EMBRYO] "embryo", + [SLEEPING] "sleep ", + [RUNNABLE] "runble", + [RUNNING] "run ", + [ZOMBIE] "zombie" + }; + struct proc *p; + char *state; + + for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){ + if(p->state == UNUSED) + continue; + if(p->state >= 0 && p->state < NELEM(states) && states[p->state]) + state = states[p->state]; + else + state = "???"; + printf("%d %s %s", p->pid, state, p->name); + printf("\n"); + } +} + diff --git a/kernel/proc.h b/kernel/proc.h new file mode 100644 index 0000000..278e4cd --- /dev/null +++ b/kernel/proc.h @@ -0,0 +1,104 @@ +// Saved registers for kernel context switches. +struct context { + uint64 ra; + uint64 sp; + + // callee-saved + uint64 s0; + uint64 s1; + uint64 s2; + uint64 s3; + uint64 s4; + uint64 s5; + uint64 s6; + uint64 s7; + uint64 s8; + uint64 s9; + uint64 s10; + uint64 s11; +}; + +// Per-CPU state +struct cpu { + struct proc *proc; // The process running on this cpu or null + struct context scheduler; // swtch() here to enter scheduler + int noff; // Depth of push_off() nesting. + int intena; // Were interrupts enabled before push_off()? +}; + +extern struct cpu cpus[NCPU]; + +//PAGEBREAK: 17 + +// per-process data for the early trap handling code in trampoline.S. +// sits in a page by itself just under the trampoline page in the +// user page table. not specially mapped in the kernel page table. +// the sscratch register points here. +// trampoline.S saves user registers, then restores kernel_sp and +// kernel_satp. +// includes callee-saved registers like s0-s11 because the +// return-to-user path via usertrapret() doesn't return through +// the entire kernel call stack. +struct trapframe { + /* 0 */ uint64 kernel_satp; + /* 8 */ uint64 kernel_sp; + /* 16 */ uint64 kernel_trap; // usertrap() + /* 24 */ uint64 epc; // saved user program counter + /* 32 */ uint64 hartid; + /* 40 */ uint64 ra; + /* 48 */ uint64 sp; + /* 56 */ uint64 gp; + /* 64 */ uint64 tp; + /* 72 */ uint64 t0; + /* 80 */ uint64 t1; + /* 88 */ uint64 t2; + /* 96 */ uint64 s0; + /* 104 */ uint64 s1; + /* 112 */ uint64 a0; + /* 120 */ uint64 a1; + /* 128 */ uint64 a2; + /* 136 */ uint64 a3; + /* 144 */ uint64 a4; + /* 152 */ uint64 a5; + /* 160 */ uint64 a6; + /* 168 */ uint64 a7; + /* 176 */ uint64 s2; + /* 184 */ uint64 s3; + /* 192 */ uint64 s4; + /* 200 */ uint64 s5; + /* 208 */ uint64 s6; + /* 216 */ uint64 s7; + /* 224 */ uint64 s8; + /* 232 */ uint64 s9; + /* 240 */ uint64 s10; + /* 248 */ uint64 s11; + /* 256 */ uint64 t3; + /* 264 */ uint64 t4; + /* 272 */ uint64 t5; + /* 280 */ uint64 t6; +}; + +enum procstate { UNUSED, EMBRYO, SLEEPING, RUNNABLE, RUNNING, ZOMBIE }; + +// Per-process state +struct proc { + char *kstack; // Bottom of kernel stack for this process + uint64 sz; // Size of process memory (bytes) + pagetable_t pagetable; // Page table + enum procstate state; // Process state + int pid; // Process ID + struct proc *parent; // Parent process + struct trapframe *tf; // data page for trampoline.S + struct context context; // swtch() here to run process + void *chan; // If non-zero, sleeping on chan + int killed; // If non-zero, have been killed + struct file *ofile[NOFILE]; // Open files + struct inode *cwd; // Current directory + char name[16]; // Process name (debugging) +}; + +// Process memory is laid out contiguously, low addresses first: +// text +// original data and bss +// fixed-size stack +// expandable heap diff --git a/kernel/ramdisk.c b/kernel/ramdisk.c new file mode 100644 index 0000000..9901294 --- /dev/null +++ b/kernel/ramdisk.c @@ -0,0 +1,45 @@ +// +// ramdisk that uses the disk image loaded by qemu -rdinit fs.img +// + +#include "types.h" +#include "riscv.h" +#include "defs.h" +#include "param.h" +#include "memlayout.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "fs.h" +#include "buf.h" + +void +ramdiskinit(void) +{ +} + +// If B_DIRTY is set, write buf to disk, clear B_DIRTY, set B_VALID. +// Else if B_VALID is not set, read buf from disk, set B_VALID. +void +ramdiskrw(struct buf *b) +{ + if(!holdingsleep(&b->lock)) + panic("ramdiskrw: buf not locked"); + if((b->flags & (B_VALID|B_DIRTY)) == B_VALID) + panic("ramdiskrw: nothing to do"); + + if(b->blockno >= FSSIZE) + panic("ramdiskrw: blockno too big"); + + uint64 diskaddr = b->blockno * BSIZE; + char *addr = (char *)RAMDISK + diskaddr; + + if(b->flags & B_DIRTY){ + // write + memmove(addr, b->data, BSIZE); + b->flags &= ~B_DIRTY; + } else { + // read + memmove(b->data, addr, BSIZE); + b->flags |= B_VALID; + } +} diff --git a/kernel/riscv.h b/kernel/riscv.h new file mode 100644 index 0000000..c3371a4 --- /dev/null +++ b/kernel/riscv.h @@ -0,0 +1,338 @@ +// which hart (core) is this? +static inline uint64 +r_mhartid() +{ + uint64 x; + asm volatile("csrr %0, mhartid" : "=r" (x) ); + return x; +} + +// Machine Status Register, mstatus + +#define MSTATUS_MPP_MASK (3L << 11) +#define MSTATUS_MPP_M (3L << 11) +#define MSTATUS_MPP_S (1L << 11) +#define MSTATUS_MPP_U (0L << 11) +#define MSTATUS_MIE (1L << 3) + +static inline uint64 +r_mstatus() +{ + uint64 x; + asm volatile("csrr %0, mstatus" : "=r" (x) ); + return x; +} + +static inline void +w_mstatus(uint64 x) +{ + asm volatile("csrw mstatus, %0" : : "r" (x)); +} + +// machine exception program counter, holds the +// instruction address to which a return from +// exception will go. +static inline void +w_mepc(uint64 x) +{ + asm volatile("csrw mepc, %0" : : "r" (x)); +} + +// Supervisor Status Register, sstatus + +#define SSTATUS_SPP (1L << 8) // Previous mode, 1=Supervisor, 0=User +#define SSTATUS_SPIE (1L << 5) // Supervisor Previous Interrupt Enable +#define SSTATUS_UPIE (1L << 4) // User Previous Interrupt Enable +#define SSTATUS_SIE (1L << 1) // Supervisor Interrupt Enable +#define SSTATUS_UIE (1L << 0) // User Interrupt Enable + +static inline uint64 +r_sstatus() +{ + uint64 x; + asm volatile("csrr %0, sstatus" : "=r" (x) ); + return x; +} + +static inline void +w_sstatus(uint64 x) +{ + asm volatile("csrw sstatus, %0" : : "r" (x)); +} + +// Supervisor Interrupt Pending +static inline uint64 +r_sip() +{ + uint64 x; + asm volatile("csrr %0, sip" : "=r" (x) ); + return x; +} + +static inline void +w_sip(uint64 x) +{ + asm volatile("csrw sip, %0" : : "r" (x)); +} + +// Supervisor Interrupt Enable +#define SIE_SEIE (1L << 9) // external +#define SIE_STIE (1L << 5) // timer +#define SIE_SSIE (1L << 1) // software +static inline uint64 +r_sie() +{ + uint64 x; + asm volatile("csrr %0, sie" : "=r" (x) ); + return x; +} + +static inline void +w_sie(uint64 x) +{ + asm volatile("csrw sie, %0" : : "r" (x)); +} + +// Machine-mode Interrupt Enable +#define MIE_MEIE (1L << 11) // external +#define MIE_MTIE (1L << 7) // timer +#define MIE_MSIE (1L << 3) // software +static inline uint64 +r_mie() +{ + uint64 x; + asm volatile("csrr %0, mie" : "=r" (x) ); + return x; +} + +static inline void +w_mie(uint64 x) +{ + asm volatile("csrw mie, %0" : : "r" (x)); +} + +// machine exception program counter, holds the +// instruction address to which a return from +// exception will go. +static inline void +w_sepc(uint64 x) +{ + asm volatile("csrw sepc, %0" : : "r" (x)); +} + +static inline uint64 +r_sepc() +{ + uint64 x; + asm volatile("csrr %0, sepc" : "=r" (x) ); + return x; +} + +// Machine Exception Delegation +static inline uint64 +r_medeleg() +{ + uint64 x; + asm volatile("csrr %0, medeleg" : "=r" (x) ); + return x; +} + +static inline void +w_medeleg(uint64 x) +{ + asm volatile("csrw medeleg, %0" : : "r" (x)); +} + +// Machine Interrupt Delegation +static inline uint64 +r_mideleg() +{ + uint64 x; + asm volatile("csrr %0, mideleg" : "=r" (x) ); + return x; +} + +static inline void +w_mideleg(uint64 x) +{ + asm volatile("csrw mideleg, %0" : : "r" (x)); +} + +// Supervisor Trap-Vector Base Address +// low two bits are mode. +static inline void +w_stvec(uint64 x) +{ + asm volatile("csrw stvec, %0" : : "r" (x)); +} + +static inline uint64 +r_stvec() +{ + uint64 x; + asm volatile("csrr %0, stvec" : "=r" (x) ); + return x; +} + +// Machine-mode interrupt vector +static inline void +w_mtvec(uint64 x) +{ + asm volatile("csrw mtvec, %0" : : "r" (x)); +} + +// use riscv's sv39 page table scheme. +#define SATP_SV39 (8L << 60) + +#define MAKE_SATP(pagetable) (SATP_SV39 | (((uint64)pagetable) >> 12)) + +// supervisor address translation and protection; +// holds the address of the page table. +static inline void +w_satp(uint64 x) +{ + asm volatile("csrw satp, %0" : : "r" (x)); +} + +static inline uint64 +r_satp() +{ + uint64 x; + asm volatile("csrr %0, satp" : "=r" (x) ); + return x; +} + +// Supervisor Scratch register, for early trap handler in trampoline.S. +static inline void +w_sscratch(uint64 x) +{ + asm volatile("csrw sscratch, %0" : : "r" (x)); +} + +static inline void +w_mscratch(uint64 x) +{ + asm volatile("csrw mscratch, %0" : : "r" (x)); +} + +// Supervisor Trap Cause +static inline uint64 +r_scause() +{ + uint64 x; + asm volatile("csrr %0, scause" : "=r" (x) ); + return x; +} + +// Supervisor Trap Value +static inline uint64 +r_stval() +{ + uint64 x; + asm volatile("csrr %0, stval" : "=r" (x) ); + return x; +} + +// Machine-mode Counter-Enable +static inline void +w_mcounteren(uint64 x) +{ + asm volatile("csrw mcounteren, %0" : : "r" (x)); +} + +static inline uint64 +r_mcounteren() +{ + uint64 x; + asm volatile("csrr %0, mcounteren" : "=r" (x) ); + return x; +} + +// machine-mode cycle counter +static inline uint64 +r_time() +{ + uint64 x; + asm volatile("csrr %0, time" : "=r" (x) ); + return x; +} + +// enable interrupts +static inline void +intr_on() +{ + w_sie(r_sie() | SIE_SEIE | SIE_STIE | SIE_SSIE); + w_sstatus(r_sstatus() | SSTATUS_SIE); +} + +// disable interrupts +static inline void +intr_off() +{ + w_sstatus(r_sstatus() & ~SSTATUS_SIE); +} + +// are interrupts enabled? +static inline int +intr_get() +{ + uint64 x = r_sstatus(); + return (x & SSTATUS_SIE) != 0; +} + +static inline uint64 +r_sp() +{ + uint64 x; + asm volatile("mv %0, sp" : "=r" (x) ); + return x; +} + +// read and write tp, the thread pointer, which holds +// this core's hartid (core number), the index into cpus[]. +static inline uint64 +r_tp() +{ + uint64 x; + asm volatile("mv %0, tp" : "=r" (x) ); + return x; +} + +static inline void +w_tp(uint64 x) +{ + asm volatile("mv tp, %0" : : "r" (x)); +} + +#define PGSIZE 4096 // bytes per page +#define PGSHIFT 12 // bits of offset within a page + +#define PGROUNDUP(sz) (((sz)+PGSIZE-1) & ~(PGSIZE-1)) +#define PGROUNDDOWN(a) (((a)) & ~(PGSIZE-1)) + +#define PTE_V (1L << 0) // valid +#define PTE_R (1L << 1) +#define PTE_W (1L << 2) +#define PTE_X (1L << 3) +#define PTE_U (1L << 4) // 1 -> user can access + +// shift a physical address to the right place for a PTE. +#define PA2PTE(pa) ((((uint64)pa) >> 12) << 10) + +#define PTE2PA(pte) (((pte) >> 10) << 12) + +#define PTE_FLAGS(pte) ((pte) & (PTE_V|PTE_R|PTE_W|PTE_X|PTE_U)) + +// extract the three 9-bit page table indices from a virtual address. +#define PXMASK 0x1FF // 9 bits +#define PXSHIFT(level) (PGSHIFT+(9*(level))) +#define PX(level, va) ((((uint64) (va)) >> PXSHIFT(level)) & PXMASK) + +// one beyond the highest possible virtual address. +// MAXVA is actually one bit less than the max allowed by +// Sv39, to avoid having to sign-extend virtual addresses +// that have the high bit set. +#define MAXVA (1L << (9 + 9 + 9 + 12 - 1)) + +typedef uint64 pte_t; +typedef uint64 *pagetable_t; // 512 PTEs diff --git a/kernel/sleeplock.c b/kernel/sleeplock.c new file mode 100644 index 0000000..b490370 --- /dev/null +++ b/kernel/sleeplock.c @@ -0,0 +1,55 @@ +// Sleeping locks + +#include "types.h" +#include "riscv.h" +#include "defs.h" +#include "param.h" +#include "memlayout.h" +#include "proc.h" +#include "spinlock.h" +#include "sleeplock.h" + +void +initsleeplock(struct sleeplock *lk, char *name) +{ + initlock(&lk->lk, "sleep lock"); + lk->name = name; + lk->locked = 0; + lk->pid = 0; +} + +void +acquiresleep(struct sleeplock *lk) +{ + acquire(&lk->lk); + while (lk->locked) { + sleep(lk, &lk->lk); + } + lk->locked = 1; + lk->pid = myproc()->pid; + release(&lk->lk); +} + +void +releasesleep(struct sleeplock *lk) +{ + acquire(&lk->lk); + lk->locked = 0; + lk->pid = 0; + wakeup(lk); + release(&lk->lk); +} + +int +holdingsleep(struct sleeplock *lk) +{ + int r; + + acquire(&lk->lk); + r = lk->locked && (lk->pid == myproc()->pid); + release(&lk->lk); + return r; +} + + + diff --git a/kernel/sleeplock.h b/kernel/sleeplock.h new file mode 100644 index 0000000..110e6f3 --- /dev/null +++ b/kernel/sleeplock.h @@ -0,0 +1,10 @@ +// Long-term locks for processes +struct sleeplock { + uint locked; // Is the lock held? + struct spinlock lk; // spinlock protecting this sleep lock + + // For debugging: + char *name; // Name of lock. + int pid; // Process holding lock +}; + diff --git a/kernel/spinlock.c b/kernel/spinlock.c new file mode 100644 index 0000000..bbb7cb5 --- /dev/null +++ b/kernel/spinlock.c @@ -0,0 +1,110 @@ +// Mutual exclusion spin locks. + +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "spinlock.h" +#include "riscv.h" +#include "proc.h" +#include "defs.h" + +void +initlock(struct spinlock *lk, char *name) +{ + lk->name = name; + lk->locked = 0; + lk->cpu = 0; +} + +// Acquire the lock. +// Loops (spins) until the lock is acquired. +// Holding a lock for a long time may cause +// other CPUs to waste time spinning to acquire it. +void +acquire(struct spinlock *lk) +{ + push_off(); // disable interrupts to avoid deadlock. + if(holding(lk)) + panic("acquire"); + + // The xchg is atomic. + //while(xchg(&lk->locked, 1) != 0) + // ; + while(__sync_lock_test_and_set(&lk->locked, 1) != 0) + ; + + // Tell the C compiler and the processor to not move loads or stores + // past this point, to ensure that the critical section's memory + // references happen after the lock is acquired. + __sync_synchronize(); + + // Record info about lock acquisition for holding() and debugging. + lk->cpu = mycpu(); +} + +// Release the lock. +void +release(struct spinlock *lk) +{ + if(!holding(lk)) + panic("release"); + + lk->cpu = 0; + + // Tell the C compiler and the processor to not move loads or stores + // past this point, to ensure that all the stores in the critical + // section are visible to other cores before the lock is released. + // Both the C compiler and the hardware may re-order loads and + // stores; __sync_synchronize() tells them both not to. + // On RISC-V, this turns into a fence instruction. + __sync_synchronize(); + + // Release the lock, equivalent to lk->locked = 0. + // This code can't use a C assignment, since it might + // not be atomic. A real OS would use C atomics here. + // On RISC-V, use an amoswap instruction. + //asm volatile("movl $0, %0" : "+m" (lk->locked) : ); + __sync_lock_release(&lk->locked); + + pop_off(); +} + +// Check whether this cpu is holding the lock. +int +holding(struct spinlock *lk) +{ + int r; + push_off(); + r = (lk->locked && lk->cpu == mycpu()); + pop_off(); + return r; +} + +// push_off/pop_off are like intr_off()/intr_on() except that they are matched: +// it takes two pop_off to undo two push_off. Also, if interrupts +// are initially off, then push_off, pop_off leaves them off. + +void +push_off(void) +{ + struct cpu *c = mycpu(); + int old = intr_get(); + + intr_off(); + if(c->noff == 0) + c->intena = old; + c->noff += 1; +} + +void +pop_off(void) +{ + struct cpu *c = mycpu(); + if(intr_get()) + panic("pop_off - interruptible"); + c->noff -= 1; + if(c->noff < 0) + panic("pop_off"); + if(c->noff == 0 && c->intena) + intr_on(); +} diff --git a/kernel/spinlock.h b/kernel/spinlock.h new file mode 100644 index 0000000..4392820 --- /dev/null +++ b/kernel/spinlock.h @@ -0,0 +1,9 @@ +// Mutual exclusion lock. +struct spinlock { + uint locked; // Is the lock held? + + // For debugging: + char *name; // Name of lock. + struct cpu *cpu; // The cpu holding the lock. +}; + diff --git a/kernel/start.c b/kernel/start.c new file mode 100644 index 0000000..c4689dc --- /dev/null +++ b/kernel/start.c @@ -0,0 +1,56 @@ +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "defs.h" + +void main(); + +// entry.S needs one stack per CPU. +__attribute__ ((aligned (16))) char stack0[4096 * NCPU]; + +// scratch area for timer interrupt, one per CPU. +uint64 mscratch0[NCPU * 32]; + +// assembly code in kernelvec for machine-mode timer interrupt. +extern void machinevec(); + +// entry.S jumps here in machine mode on stack0. +void +mstart() +{ + // set M Previous Privilege mode to Supervisor, for mret. + unsigned long x = r_mstatus(); + x &= ~MSTATUS_MPP_MASK; + x |= MSTATUS_MPP_S; + w_mstatus(x); + + // set M Exception Program Counter to main, for mret. + // requires gcc -mcmodel=medany + w_mepc((uint64)main); + + // disable paging for now. + w_satp(0); + + // delegate all interrupts and exceptions to supervisor mode. + w_medeleg(0xffff); + w_mideleg(0xffff); + + // set up to receive timer interrupts in machine mode, + // for pre-emptive switching and (on hart 0) to drive time. + int id = r_mhartid(); + uint64 *scratch = &mscratch0[32 * id]; + *(uint64*)CLINT_MTIMECMP(id) = *(uint64*)CLINT_MTIME + 10000; + scratch[4] = CLINT_MTIMECMP(id); + scratch[5] = 10000000; + w_mscratch((uint64)scratch); + w_mtvec((uint64)machinevec); + w_mstatus(r_mstatus() | MSTATUS_MIE); + w_mie(r_mie() | MIE_MTIE); + + // keep each CPU's hartid in its tp register, for cpuid(). + w_tp(id); + + // call main() in supervisor mode. + asm volatile("mret"); +} diff --git a/kernel/stat.h b/kernel/stat.h new file mode 100644 index 0000000..8a80933 --- /dev/null +++ b/kernel/stat.h @@ -0,0 +1,11 @@ +#define T_DIR 1 // Directory +#define T_FILE 2 // File +#define T_DEV 3 // Device + +struct stat { + short type; // Type of file + int dev; // File system's disk device + uint ino; // Inode number + short nlink; // Number of links to file + uint size; // Size of file in bytes +}; diff --git a/kernel/string.c b/kernel/string.c new file mode 100644 index 0000000..d99e612 --- /dev/null +++ b/kernel/string.c @@ -0,0 +1,104 @@ +#include "types.h" + +void* +memset(void *dst, int c, uint n) +{ + char *cdst = (char *) dst; + int i; + for(i = 0; i < n; i++){ + cdst[i] = c; + } + return dst; +} + +int +memcmp(const void *v1, const void *v2, uint n) +{ + const uchar *s1, *s2; + + s1 = v1; + s2 = v2; + while(n-- > 0){ + if(*s1 != *s2) + return *s1 - *s2; + s1++, s2++; + } + + return 0; +} + +void* +memmove(void *dst, const void *src, uint n) +{ + const char *s; + char *d; + + s = src; + d = dst; + if(s < d && s + n > d){ + s += n; + d += n; + while(n-- > 0) + *--d = *--s; + } else + while(n-- > 0) + *d++ = *s++; + + return dst; +} + +// memcpy exists to placate GCC. Use memmove. +void* +memcpy(void *dst, const void *src, uint n) +{ + return memmove(dst, src, n); +} + +int +strncmp(const char *p, const char *q, uint n) +{ + while(n > 0 && *p && *p == *q) + n--, p++, q++; + if(n == 0) + return 0; + return (uchar)*p - (uchar)*q; +} + +char* +strncpy(char *s, const char *t, int n) +{ + char *os; + + os = s; + while(n-- > 0 && (*s++ = *t++) != 0) + ; + while(n-- > 0) + *s++ = 0; + return os; +} + +// Like strncpy but guaranteed to NUL-terminate. +char* +safestrcpy(char *s, const char *t, int n) +{ + char *os; + + os = s; + if(n <= 0) + return os; + while(--n > 0 && (*s++ = *t++) != 0) + ; + *s = 0; + return os; +} + +int +strlen(const char *s) +{ + int n; + + for(n = 0; s[n]; n++) + ; + return n; +} + diff --git a/kernel/swtch.S b/kernel/swtch.S new file mode 100644 index 0000000..17a8663 --- /dev/null +++ b/kernel/swtch.S @@ -0,0 +1,42 @@ +# Context switch +# +# void swtch(struct context *old, struct context *new); +# +# Save current registers in old. Load from new. + + +.globl swtch +swtch: + sd ra, 0(a0) + sd sp, 8(a0) + sd s0, 16(a0) + sd s1, 24(a0) + sd s2, 32(a0) + sd s3, 40(a0) + sd s4, 48(a0) + sd s5, 56(a0) + sd s6, 64(a0) + sd s7, 72(a0) + sd s8, 80(a0) + sd s9, 88(a0) + sd s10, 96(a0) + sd s11, 104(a0) + + ld ra, 0(a1) + ld sp, 8(a1) + ld s0, 16(a1) + ld s1, 24(a1) + ld s2, 32(a1) + ld s3, 40(a1) + ld s4, 48(a1) + ld s5, 56(a1) + ld s6, 64(a1) + ld s7, 72(a1) + ld s8, 80(a1) + ld s9, 88(a1) + ld s10, 96(a1) + ld s11, 104(a1) + + ret + + diff --git a/kernel/syscall.c b/kernel/syscall.c new file mode 100644 index 0000000..ca34f2c --- /dev/null +++ b/kernel/syscall.c @@ -0,0 +1,191 @@ +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "proc.h" +#include "syscall.h" +#include "defs.h" + +// User code makes a system call with INT T_SYSCALL. +// System call number in %eax. +// Arguments on the stack, from the user call to the C +// library system call function. The saved user %esp points +// to a saved program counter, and then the first argument. + +// Fetch the int at addr from the current process. +int +fetchint(uint64 addr, int *ip) +{ + struct proc *p = myproc(); + + if(addr >= p->sz || addr+4 > p->sz) + return -1; + if(copyin(p->pagetable, (char *)ip, addr, sizeof(*ip)) != 0) + return -1; + return 0; +} + +// Fetch the uint64 at addr from the current process. +int +fetchaddr(uint64 addr, uint64 *ip) +{ + struct proc *p = myproc(); + if(addr >= p->sz || addr+sizeof(uint64) > p->sz) + return -1; + if(copyin(p->pagetable, (char *)ip, addr, sizeof(*ip)) != 0) + return -1; + return 0; +} + +// Fetch the nul-terminated string at addr from the current process. +// Doesn't actually copy the string - just sets *pp to point at it. +// Returns length of string, not including nul, or -1 for error. +int +fetchstr(uint64 addr, char *buf, int max) +{ + struct proc *p = myproc(); + int err = copyinstr(p->pagetable, buf, addr, max); + if(err < 0) + return err; + return strlen(buf); +} + +static uint64 +fetcharg(int n) +{ + struct proc *p = myproc(); + switch (n) { + case 0: + return p->tf->a0; + case 1: + return p->tf->a1; + case 2: + return p->tf->a2; + case 3: + return p->tf->a3; + case 4: + return p->tf->a4; + case 5: + return p->tf->a5; + } + panic("fetcharg"); + return -1; +} + +// Fetch the nth 32-bit system call argument. +int +argint(int n, int *ip) +{ + *ip = fetcharg(n); + return 0; +} + +int +argaddr(int n, uint64 *ip) +{ + *ip = fetcharg(n); + return 0; +} + +// Fetch the nth word-sized system call argument as a pointer +// to a block of memory of size bytes. Check that the pointer +// lies within the process address space. +int +argptr(int n, uint64 *pp, int size) +{ + uint64 i; + struct proc *p = myproc(); + + if(argaddr(n, &i) < 0) + return -1; + if(size < 0 || (uint)i >= p->sz || (uint)i+size > p->sz) + return -1; + *pp = i; + return 0; +} + +// Fetch the nth word-sized system call argument as a null-terminated string. +// Copies into buf, at most max. +// Returns string length if OK (including nul), -1 if error. +int +argstr(int n, char *buf, int max) +{ + uint64 addr; + if(argaddr(n, &addr) < 0) + return -1; + return fetchstr(addr, buf, max); +} + +extern int sys_chdir(void); +extern int sys_close(void); +extern int sys_dup(void); +extern int sys_exec(void); +extern int sys_exit(void); +extern int sys_fork(void); +extern int sys_fstat(void); +extern int sys_getpid(void); +extern int sys_kill(void); +extern int sys_link(void); +extern int sys_mkdir(void); +extern int sys_mknod(void); +extern int sys_open(void); +extern int sys_pipe(void); +extern int sys_read(void); +extern int sys_sbrk(void); +extern int sys_sleep(void); +extern int sys_unlink(void); +extern int sys_wait(void); +extern int sys_write(void); +extern int sys_uptime(void); + +static int (*syscalls[])(void) = { +[SYS_fork] sys_fork, +[SYS_exit] sys_exit, +[SYS_wait] sys_wait, +[SYS_pipe] sys_pipe, +[SYS_read] sys_read, +[SYS_kill] sys_kill, +[SYS_exec] sys_exec, +[SYS_fstat] sys_fstat, +[SYS_chdir] sys_chdir, +[SYS_dup] sys_dup, +[SYS_getpid] sys_getpid, +[SYS_sbrk] sys_sbrk, +[SYS_sleep] sys_sleep, +[SYS_uptime] sys_uptime, +[SYS_open] sys_open, +[SYS_write] sys_write, +[SYS_mknod] sys_mknod, +[SYS_unlink] sys_unlink, +[SYS_link] sys_link, +[SYS_mkdir] sys_mkdir, +[SYS_close] sys_close, +}; + +static void +dosyscall(void) +{ + int num; + struct proc *p = myproc(); + + num = p->tf->a7; + if(num > 0 && num < NELEM(syscalls) && syscalls[num]) { + p->tf->a0 = syscalls[num](); + } else { + printf("%d %s: unknown sys call %d\n", + p->pid, p->name, num); + p->tf->a0 = -1; + } +} + +void +syscall() +{ + if(myproc()->killed) + exit(); + dosyscall(); + if(myproc()->killed) + exit(); + return; +} + diff --git a/kernel/syscall.h b/kernel/syscall.h new file mode 100644 index 0000000..bc5f356 --- /dev/null +++ b/kernel/syscall.h @@ -0,0 +1,22 @@ +// System call numbers +#define SYS_fork 1 +#define SYS_exit 2 +#define SYS_wait 3 +#define SYS_pipe 4 +#define SYS_read 5 +#define SYS_kill 6 +#define SYS_exec 7 +#define SYS_fstat 8 +#define SYS_chdir 9 +#define SYS_dup 10 +#define SYS_getpid 11 +#define SYS_sbrk 12 +#define SYS_sleep 13 +#define SYS_uptime 14 +#define SYS_open 15 +#define SYS_write 16 +#define SYS_mknod 17 +#define SYS_unlink 18 +#define SYS_link 19 +#define SYS_mkdir 20 +#define SYS_close 21 diff --git a/kernel/sysfile.c b/kernel/sysfile.c new file mode 100644 index 0000000..83bb1ed --- /dev/null +++ b/kernel/sysfile.c @@ -0,0 +1,465 @@ +// +// File-system system calls. +// Mostly argument checking, since we don't trust +// user code, and calls into file.c and fs.c. +// + +#include "types.h" +#include "riscv.h" +#include "defs.h" +#include "param.h" +#include "stat.h" +#include "proc.h" +#include "fs.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "file.h" +#include "fcntl.h" + +// Fetch the nth word-sized system call argument as a file descriptor +// and return both the descriptor and the corresponding struct file. +static int +argfd(int n, int *pfd, struct file **pf) +{ + int fd; + struct file *f; + + if(argint(n, &fd) < 0) + return -1; + if(fd < 0 || fd >= NOFILE || (f=myproc()->ofile[fd]) == 0) + return -1; + if(pfd) + *pfd = fd; + if(pf) + *pf = f; + return 0; +} + +// Allocate a file descriptor for the given file. +// Takes over file reference from caller on success. +static int +fdalloc(struct file *f) +{ + int fd; + struct proc *p = myproc(); + + for(fd = 0; fd < NOFILE; fd++){ + if(p->ofile[fd] == 0){ + p->ofile[fd] = f; + return fd; + } + } + return -1; +} + +int +sys_dup(void) +{ + struct file *f; + int fd; + + if(argfd(0, 0, &f) < 0) + return -1; + if((fd=fdalloc(f)) < 0) + return -1; + filedup(f); + return fd; +} + +int +sys_read(void) +{ + struct file *f; + int n; + uint64 p; + + if(argfd(0, 0, &f) < 0 || argint(2, &n) < 0 || argptr(1, &p, n) < 0) + return -1; + return fileread(f, p, n); +} + +int +sys_write(void) +{ + struct file *f; + int n; + uint64 p; + + if(argfd(0, 0, &f) < 0 || argint(2, &n) < 0 || argptr(1, &p, n) < 0) + return -1; + + return filewrite(f, p, n); +} + +int +sys_close(void) +{ + int fd; + struct file *f; + + if(argfd(0, &fd, &f) < 0) + return -1; + myproc()->ofile[fd] = 0; + fileclose(f); + return 0; +} + +int +sys_fstat(void) +{ + struct file *f; + uint64 st; // user pointer to struct stat + + if(argfd(0, 0, &f) < 0 || argptr(1, &st, sizeof(struct stat)) < 0) + return -1; + return filestat(f, st); +} + +// Create the path new as a link to the same inode as old. +int +sys_link(void) +{ + char name[DIRSIZ], new[MAXPATH], old[MAXPATH]; + struct inode *dp, *ip; + + if(argstr(0, old, MAXPATH) < 0 || argstr(1, new, MAXPATH) < 0) + return -1; + + begin_op(); + if((ip = namei(old)) == 0){ + end_op(); + return -1; + } + + ilock(ip); + if(ip->type == T_DIR){ + iunlockput(ip); + end_op(); + return -1; + } + + ip->nlink++; + iupdate(ip); + iunlock(ip); + + if((dp = nameiparent(new, name)) == 0) + goto bad; + ilock(dp); + if(dp->dev != ip->dev || dirlink(dp, name, ip->inum) < 0){ + iunlockput(dp); + goto bad; + } + iunlockput(dp); + iput(ip); + + end_op(); + + return 0; + +bad: + ilock(ip); + ip->nlink--; + iupdate(ip); + iunlockput(ip); + end_op(); + return -1; +} + +// Is the directory dp empty except for "." and ".." ? +static int +isdirempty(struct inode *dp) +{ + int off; + struct dirent de; + + for(off=2*sizeof(de); off<dp->size; off+=sizeof(de)){ + if(readi(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de)) + panic("isdirempty: readi"); + if(de.inum != 0) + return 0; + } + return 1; +} + +//PAGEBREAK! +int +sys_unlink(void) +{ + struct inode *ip, *dp; + struct dirent de; + char name[DIRSIZ], path[MAXPATH]; + uint off; + + if(argstr(0, path, MAXPATH) < 0) + return -1; + + begin_op(); + if((dp = nameiparent(path, name)) == 0){ + end_op(); + return -1; + } + + ilock(dp); + + // Cannot unlink "." or "..". + if(namecmp(name, ".") == 0 || namecmp(name, "..") == 0) + goto bad; + + if((ip = dirlookup(dp, name, &off)) == 0) + goto bad; + ilock(ip); + + if(ip->nlink < 1) + panic("unlink: nlink < 1"); + if(ip->type == T_DIR && !isdirempty(ip)){ + iunlockput(ip); + goto bad; + } + + memset(&de, 0, sizeof(de)); + if(writei(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de)) + panic("unlink: writei"); + if(ip->type == T_DIR){ + dp->nlink--; + iupdate(dp); + } + iunlockput(dp); + + ip->nlink--; + iupdate(ip); + iunlockput(ip); + + end_op(); + + return 0; + +bad: + iunlockput(dp); + end_op(); + return -1; +} + +static struct inode* +create(char *path, short type, short major, short minor) +{ + uint off; + struct inode *ip, *dp; + char name[DIRSIZ]; + + if((dp = nameiparent(path, name)) == 0) + return 0; + ilock(dp); + + if((ip = dirlookup(dp, name, &off)) != 0){ + iunlockput(dp); + ilock(ip); + if(type == T_FILE && ip->type == T_FILE) + return ip; + iunlockput(ip); + return 0; + } + + if((ip = ialloc(dp->dev, type)) == 0) + panic("create: ialloc"); + + ilock(ip); + ip->major = major; + ip->minor = minor; + ip->nlink = 1; + iupdate(ip); + + if(type == T_DIR){ // Create . and .. entries. + dp->nlink++; // for ".." + iupdate(dp); + // No ip->nlink++ for ".": avoid cyclic ref count. + if(dirlink(ip, ".", ip->inum) < 0 || dirlink(ip, "..", dp->inum) < 0) + panic("create dots"); + } + + if(dirlink(dp, name, ip->inum) < 0) + panic("create: dirlink"); + + iunlockput(dp); + + return ip; +} + +int +sys_open(void) +{ + char path[MAXPATH]; + int fd, omode; + struct file *f; + struct inode *ip; + + if(argstr(0, path, MAXPATH) < 0 || argint(1, &omode) < 0) + return -1; + + begin_op(); + + if(omode & O_CREATE){ + ip = create(path, T_FILE, 0, 0); + if(ip == 0){ + end_op(); + return -1; + } + } else { + if((ip = namei(path)) == 0){ + end_op(); + return -1; + } + ilock(ip); + if(ip->type == T_DIR && omode != O_RDONLY){ + iunlockput(ip); + end_op(); + return -1; + } + } + + if((f = filealloc()) == 0 || (fd = fdalloc(f)) < 0){ + if(f) + fileclose(f); + iunlockput(ip); + end_op(); + return -1; + } + iunlock(ip); + end_op(); + + f->type = FD_INODE; + f->ip = ip; + f->off = 0; + f->readable = !(omode & O_WRONLY); + f->writable = (omode & O_WRONLY) || (omode & O_RDWR); + return fd; +} + +int +sys_mkdir(void) +{ + char path[MAXPATH]; + struct inode *ip; + + begin_op(); + if(argstr(0, path, MAXPATH) < 0 || (ip = create(path, T_DIR, 0, 0)) == 0){ + end_op(); + return -1; + } + iunlockput(ip); + end_op(); + return 0; +} + +int +sys_mknod(void) +{ + struct inode *ip; + char path[MAXPATH]; + int major, minor; + + begin_op(); + if((argstr(0, path, MAXPATH)) < 0 || + argint(1, &major) < 0 || + argint(2, &minor) < 0 || + (ip = create(path, T_DEV, major, minor)) == 0){ + end_op(); + return -1; + } + iunlockput(ip); + end_op(); + return 0; +} + +int +sys_chdir(void) +{ + char path[MAXPATH]; + struct inode *ip; + struct proc *p = myproc(); + + begin_op(); + if(argstr(0, path, MAXPATH) < 0 || (ip = namei(path)) == 0){ + end_op(); + return -1; + } + ilock(ip); + if(ip->type != T_DIR){ + iunlockput(ip); + end_op(); + return -1; + } + iunlock(ip); + iput(p->cwd); + end_op(); + p->cwd = ip; + return 0; +} + +int +sys_exec(void) +{ + char path[MAXPATH], *argv[MAXARG]; + int i; + uint64 uargv, uarg; + + if(argstr(0, path, MAXPATH) < 0 || argaddr(1, &uargv) < 0){ + return -1; + } + memset(argv, 0, sizeof(argv)); + for(i=0;; i++){ + if(i >= NELEM(argv)){ + return -1; + } + if(fetchaddr(uargv+sizeof(uint64)*i, (uint64*)&uarg) < 0){ + return -1; + } + if(uarg == 0){ + argv[i] = 0; + break; + } + argv[i] = kalloc(); + if(argv[i] == 0) + panic("sys_exec kalloc"); + if(fetchstr(uarg, argv[i], PGSIZE) < 0){ + return -1; + } + } + + int ret = exec(path, argv); + + for(i = 0; i < NELEM(argv) && argv[i] != 0; i++) + kfree(argv[i]); + + return ret; +} + +int +sys_pipe(void) +{ + uint64 fdarray; // user pointer to array of two integers + struct file *rf, *wf; + int fd0, fd1; + struct proc *p = myproc(); + + if(argptr(0, &fdarray, 2*sizeof(int)) < 0) + return -1; + if(pipealloc(&rf, &wf) < 0) + return -1; + fd0 = -1; + if((fd0 = fdalloc(rf)) < 0 || (fd1 = fdalloc(wf)) < 0){ + if(fd0 >= 0) + p->ofile[fd0] = 0; + fileclose(rf); + fileclose(wf); + return -1; + } + if(copyout(p->pagetable, fdarray, (char*)&fd0, sizeof(fd0)) < 0 || + copyout(p->pagetable, fdarray+sizeof(fd0), (char *)&fd1, sizeof(fd1)) < 0){ + p->ofile[fd0] = 0; + p->ofile[fd1] = 0; + fileclose(rf); + fileclose(wf); + return -1; + } + return 0; +} diff --git a/kernel/sysproc.c b/kernel/sysproc.c new file mode 100644 index 0000000..e57e045 --- /dev/null +++ b/kernel/sysproc.c @@ -0,0 +1,90 @@ +#include "types.h" +#include "riscv.h" +#include "defs.h" +#include "date.h" +#include "param.h" +#include "memlayout.h" +#include "proc.h" + +int +sys_exit(void) +{ + exit(); + return 0; // not reached +} + +int +sys_getpid(void) +{ + return myproc()->pid; +} + +int +sys_fork(void) +{ + return fork(); +} + +int +sys_wait(void) +{ + return wait(); +} + +int +sys_sbrk(void) +{ + int addr; + int n; + + if(argint(0, &n) < 0) + return -1; + addr = myproc()->sz; + if(growproc(n) < 0) + return -1; + return addr; +} + +int +sys_sleep(void) +{ + int n; + uint ticks0; + + if(argint(0, &n) < 0) + return -1; + acquire(&tickslock); + ticks0 = ticks; + while(ticks - ticks0 < n){ + if(myproc()->killed){ + release(&tickslock); + return -1; + } + sleep(&ticks, &tickslock); + } + release(&tickslock); + return 0; +} + +int +sys_kill(void) +{ + int pid; + + if(argint(0, &pid) < 0) + return -1; + return kill(pid); +} + +// return how many clock tick interrupts have occurred +// since start. +int +sys_uptime(void) +{ + uint xticks; + + acquire(&tickslock); + xticks = ticks; + release(&tickslock); + return xticks; +} diff --git a/kernel/trampoline.S b/kernel/trampoline.S new file mode 100644 index 0000000..dd4eb02 --- /dev/null +++ b/kernel/trampoline.S @@ -0,0 +1,137 @@ + # + # code to switch between user and kernel space. + # + # this code is mapped at the same virtual address + # in user and kernel space so that it can switch + # page tables. + # + # kernel.ld causes trampout to be aligned + # to a page boundary. + # +.globl usertrap + .section trampoline +.globl trampout +trampout: + # switch from kernel to user. + # usertrapret() calls here. + # a0: p->tf in user page table + # a1: new value for satp, for user page table + + # switch to user page table + csrw satp, a1 + + # put the saved user a0 in sscratch, so we + # can swap it with our a0 (p->tf) in the last step. + ld t0, 112(a0) + csrw sscratch, t0 + + # restore all but a0 from p->tf + ld ra, 40(a0) + ld sp, 48(a0) + ld gp, 56(a0) + ld tp, 64(a0) + ld t0, 72(a0) + ld t1, 80(a0) + ld t2, 88(a0) + ld s0, 96(a0) + ld s1, 104(a0) + ld a1, 120(a0) + ld a2, 128(a0) + ld a3, 136(a0) + ld a4, 144(a0) + ld a5, 152(a0) + ld a6, 160(a0) + ld a7, 168(a0) + ld s2, 176(a0) + ld s3, 184(a0) + ld s4, 192(a0) + ld s5, 200(a0) + ld s6, 208(a0) + ld s7, 216(a0) + ld s8, 224(a0) + ld s9, 232(a0) + ld s10, 240(a0) + ld s11, 248(a0) + ld t3, 256(a0) + ld t4, 264(a0) + ld t5, 272(a0) + ld t6, 280(a0) + + # restore user a0, and save p->tf + csrrw a0, sscratch, a0 + + # return to user mode and user pc. + # caller has set up sstatus and sepc. + sret + +.align 4 +.globl trampin +trampin: + # + # trap.c set stvec to point here, so + # user interrupts and exceptions start here, + # in supervisor mode, but with a + # user page table. + # + # sscratch points to where the process's p->tf is + # mapped into user space (TRAMPOLINE - 4096). + # + + # swap a0 and sscratch + # so that a0 is p->tf + csrrw a0, sscratch, a0 + + # save the user registers in p->tf + sd ra, 40(a0) + sd sp, 48(a0) + sd gp, 56(a0) + sd tp, 64(a0) + sd t0, 72(a0) + sd t1, 80(a0) + sd t2, 88(a0) + sd s0, 96(a0) + sd s1, 104(a0) + sd a1, 120(a0) + sd a2, 128(a0) + sd a3, 136(a0) + sd a4, 144(a0) + sd a5, 152(a0) + sd a6, 160(a0) + sd a7, 168(a0) + sd s2, 176(a0) + sd s3, 184(a0) + sd s4, 192(a0) + sd s5, 200(a0) + sd s6, 208(a0) + sd s7, 216(a0) + sd s8, 224(a0) + sd s9, 232(a0) + sd s10, 240(a0) + sd s11, 248(a0) + sd t3, 256(a0) + sd t4, 264(a0) + sd t5, 272(a0) + sd t6, 280(a0) + + # save the user a0 in p->tf->a0 + csrr t0, sscratch + sd t0, 112(a0) + + # restore kernel stack pointer from p->tf->kernel_sp + ld sp, 8(a0) + + # make tp hold the current hartid, from p->tf->hartid + ld tp, 32(a0) + + # remember the address of usertrap(), p->tf->kernel_trap + ld t0, 16(a0) + + # restore kernel page table from p->tf->kernel_satp + ld t1, 0(a0) + csrw satp, t1 + + # a0 is no longer valid, since the kernel page + # table does not specially map p->td. + + # jump to usertrap(), which does not return + jr t0 diff --git a/kernel/trap.c b/kernel/trap.c new file mode 100644 index 0000000..050a94d --- /dev/null +++ b/kernel/trap.c @@ -0,0 +1,184 @@ +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "proc.h" +#include "spinlock.h" +#include "defs.h" + +struct spinlock tickslock; +uint ticks; + +extern char trampout[], trampin[]; + +// in kernelvec.S, calls kerneltrap(). +void kernelvec(); + +extern int devintr(); + +void +trapinit(void) +{ + initlock(&tickslock, "time"); +} + +// set up to take exceptions and traps while in the kernel. +void +trapinithart(void) +{ + w_stvec((uint64)kernelvec); +} + +// +// handle an interrupt, exception, or system call from user space. +// called from trampoline.S +// +void +usertrap(void) +{ + int which_dev = 0; + + if((r_sstatus() & SSTATUS_SPP) != 0) + panic("usertrap: not from user mode"); + + // send interrupts and exceptions to kerneltrap(), + // since we're now in the kernel. + w_stvec((uint64)kernelvec); + + struct proc *p = myproc(); + + // save user program counter. + p->tf->epc = r_sepc(); + + intr_on(); + + if(r_scause() == 8){ + // system call + + // sepc points to the ecall instruction, + // but we want to return to the next instruction. + p->tf->epc += 4; + + syscall(); + } else if((which_dev = devintr()) != 0){ + // ok + } else { + printf("usertrap(): unexpected scause 0x%p pid=%d\n", r_scause(), p->pid); + printf(" sepc=%p stval=%p\n", r_sepc(), r_stval()); + p->killed = 1; + } + + if(p->killed) + exit(); + + // give up the CPU if this is a timer interrupt. + if(which_dev == 2) + yield(); + + usertrapret(); +} + +// +// return to user space +// +void +usertrapret(void) +{ + struct proc *p = myproc(); + + // turn off interrupts, since we're switching + // now from kerneltrap() to usertrap(). + intr_off(); + + // send interrupts and exceptions to trampoline.S + w_stvec(TRAMPOLINE + (trampin - trampout)); + + // set up values that trampoline.S will need when + // the process next re-enters the kernel. + p->tf->kernel_satp = r_satp(); + p->tf->kernel_sp = (uint64)p->kstack + PGSIZE; + p->tf->kernel_trap = (uint64)usertrap; + p->tf->hartid = r_tp(); + + // set up the registers that trampoline.S's sret will use + // to get to user space. + + // set S Previous Privilege mode to User. + unsigned long x = r_sstatus(); + x &= ~SSTATUS_SPP; // clear SPP to 0 for user mode + x |= SSTATUS_SPIE; // enable interrupts in user mode + w_sstatus(x); + + // set S Exception Program Counter to the saved user pc. + w_sepc(p->tf->epc); + + // tell trampline.S the user page table to switch to. + uint64 satp = MAKE_SATP(p->pagetable); + + // jump to trampoline.S at the top of memory, which + // switches to the user page table, restores user registers, + // and switches to user mode with sret. + ((void (*)(uint64,uint64))TRAMPOLINE)(TRAMPOLINE - PGSIZE, satp); +} + +// interrupts and exceptions from kernel code go here, +// on whatever the current kernel stack is. +// must be 4-byte aligned to fit in stvec. +void +kerneltrap() +{ + uint64 sstatus = r_sstatus(); + uint64 scause = r_scause(); + + if((sstatus & SSTATUS_SPP) == 0) + panic("kerneltrap: not from supervisor mode"); + if(intr_get() != 0) + panic("kerneltrap: interrupts enabled"); + + if(devintr() == 0){ + printf("scause 0x%p\n", scause); + printf("sepc=%p stval=%p\n", r_sepc(), r_stval()); + panic("kerneltrap"); + } +} + +// check if it's an external interrupt or software interrupt, +// and handle it. +// returns 2 if timer interrupt, +// 1 if other device, +// 0 if not recognized. +int +devintr() +{ + uint64 scause = r_scause(); + + if((scause & 0x8000000000000000L) && + (scause & 0xff) == 9){ + // supervisor external interrupt, via PLIC. + int irq = plic_claim(); + + if(irq == UART0_IRQ){ + uartintr(); + } + + plic_complete(irq); + return 1; + } else if(scause == 0x8000000000000001){ + // software interrupt from a machine-mode timer interrupt. + + if(cpuid() == 0){ + acquire(&tickslock); + ticks++; + wakeup(&ticks); + release(&tickslock); + } + + // acknowledge. + w_sip(r_sip() & ~2); + + return 2; + } else { + return 0; + } +} + diff --git a/kernel/types.h b/kernel/types.h new file mode 100644 index 0000000..ee73164 --- /dev/null +++ b/kernel/types.h @@ -0,0 +1,10 @@ +typedef unsigned int uint; +typedef unsigned short ushort; +typedef unsigned char uchar; + +typedef unsigned char uint8; +typedef unsigned short uint16; +typedef unsigned int uint32; +typedef unsigned long uint64; + +typedef uint64 pde_t; diff --git a/kernel/uart.c b/kernel/uart.c new file mode 100644 index 0000000..35fac1b --- /dev/null +++ b/kernel/uart.c @@ -0,0 +1,75 @@ +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "proc.h" +#include "spinlock.h" +#include "defs.h" + +// +// qemu -machine virt has a 16550a UART +// qemu/hw/riscv/virt.c +// http://byterunner.com/16550.html +// +// caller should lock. +// + +// address of one of the registers +#define R(reg) ((volatile unsigned char *)(UART0 + reg)) + +void +uartinit(void) +{ + // disable interrupts -- IER + *R(1) = 0x00; + + // special mode to set baud rate + *R(3) = 0x80; + + // LSB for baud rate of 38.4K + *R(0) = 0x03; + + // MSB for baud rate of 38.4K + *R(1) = 0x00; + + // leave set-baud mode, + // and set word length to 8 bits, no parity. + *R(3) = 0x03; + + // reset and enable FIFOs -- FCR. + *R(2) = 0x07; + + // enable receive interrupts -- IER. + *R(1) = 0x01; +} + +void +uartputc(int c) +{ + // wait for Transmit Holding Empty to be set in LSR. + while((*R(5) & (1 << 5)) == 0) + ; + *R(0) = c; +} + +int +uartgetc(void) +{ + if(*R(5) & 0x01){ + // input data is ready. + return *R(0); + } else { + return -1; + } +} + +void +uartintr(void) +{ + while(1){ + int c = uartgetc(); + if(c == -1) + break; + consoleintr(c); + } +} diff --git a/kernel/vm.c b/kernel/vm.c new file mode 100644 index 0000000..0ea6bca --- /dev/null +++ b/kernel/vm.c @@ -0,0 +1,389 @@ +#include "param.h" +#include "types.h" +#include "memlayout.h" +#include "elf.h" +#include "riscv.h" +#include "defs.h" +#include "fs.h" + +/* + * the kernel's page table. + */ +pagetable_t kernel_pagetable; + +extern char etext[]; // kernel.ld sets this to end of kernel code. + +extern char trampout[]; // trampoline.S + +/* + * create a direct-map page table for the kernel and + * turn on paging. called early, in supervisor mode. + * the page allocator is already initialized. + */ +void +kvminit() +{ + kernel_pagetable = (pagetable_t) kalloc(); + memset(kernel_pagetable, 0, PGSIZE); + + // uart registers + mappages(kernel_pagetable, UART0, PGSIZE, + UART0, PTE_R | PTE_W); + + // CLINT + mappages(kernel_pagetable, CLINT, 0x10000, + CLINT, PTE_R | PTE_W); + + // PLIC + mappages(kernel_pagetable, PLIC, 0x4000000, + PLIC, PTE_R | PTE_W); + + // map kernel text executable and read-only. + mappages(kernel_pagetable, KERNBASE, (uint64)etext-KERNBASE, + KERNBASE, PTE_R | PTE_X); + + // map kernel data and the physical RAM we'll make use of. + mappages(kernel_pagetable, (uint64)etext, PHYSTOP-(uint64)etext, + (uint64)etext, PTE_R | PTE_W); + + // map the qemu -initrd fs.img ramdisk + mappages(kernel_pagetable, RAMDISK, FSSIZE * BSIZE, + RAMDISK, PTE_R | PTE_W); + + // map the trampoline for trap entry/exit to + // the highest virtual address in the kernel. + mappages(kernel_pagetable, TRAMPOLINE, PGSIZE, + (uint64)trampout, PTE_R | PTE_X); +} + +// Switch h/w page table register to the kernel's page table, +// and enable paging. +void +kvminithart() +{ + w_satp(MAKE_SATP(kernel_pagetable)); +} + +// Return the address of the PTE in page table pagetable +// that corresponds to virtual address va. If alloc!=0, +// create any required page table pages. +// +// The risc-v Sv39 scheme has three levels of page table +// pages. A page table page contains 512 64-bit PTEs. +// A 64-bit virtual address is split into five fields: +// 39..63 -- must be zero. +// 30..38 -- 9 bits of level-2 index. +// 21..39 -- 9 bits of level-1 index. +// 12..20 -- 9 bits of level-0 index. +// 0..12 -- 12 bits of byte offset within the page. +static pte_t * +walk(pagetable_t pagetable, uint64 va, int alloc) +{ + if(va >= MAXVA) + panic("walk"); + + for(int level = 2; level > 0; level--) { + pte_t *pte = &pagetable[PX(level, va)]; + if(*pte & PTE_V) { + pagetable = (pagetable_t)PTE2PA(*pte); + } else { + if(!alloc || (pagetable = (pde_t*)kalloc()) == 0) + return 0; + memset(pagetable, 0, PGSIZE); + *pte = PA2PTE(pagetable) | PTE_V; + } + } + return &pagetable[PX(0, va)]; +} + +// Look up a virtual address, return the physical address, +// Can only be used to look up user pages. +// or 0 if not mapped. +uint64 +walkaddr(pagetable_t pagetable, uint64 va) +{ + pte_t *pte; + uint64 pa; + + pte = walk(pagetable, va, 0); + if(pte == 0) + return 0; + if((*pte & PTE_V) == 0) + return 0; + if((*pte & PTE_U) == 0) + return 0; + pa = PTE2PA(*pte); + return pa; +} + + +// Create PTEs for virtual addresses starting at va that refer to +// physical addresses starting at pa. va and size might not +// be page-aligned. +void +mappages(pagetable_t pagetable, uint64 va, uint64 size, uint64 pa, int perm) +{ + uint64 a, last; + pte_t *pte; + + a = PGROUNDDOWN(va); + last = PGROUNDDOWN(va + size - 1); + for(;;){ + if((pte = walk(pagetable, a, 1)) == 0) + panic("mappages: walk"); + if(*pte & PTE_V) + panic("remap"); + *pte = PA2PTE(pa) | perm | PTE_V; + if(a == last) + break; + a += PGSIZE; + pa += PGSIZE; + } +} + +// Remove mappings from a page table. The mappings in +// the given range must exist. Optionally free the +// physical memory. +void +unmappages(pagetable_t pagetable, uint64 va, uint64 size, int do_free) +{ + uint64 a, last; + pte_t *pte; + uint64 pa; + + a = PGROUNDDOWN(va); + last = PGROUNDDOWN(va + size - 1); + for(;;){ + if((pte = walk(pagetable, a, 0)) == 0) + panic("unmappages: walk"); + if((*pte & PTE_V) == 0){ + printf("va=%p pte=%p\n", a, *pte); + panic("unmappages: not mapped"); + } + if(PTE_FLAGS(*pte) == PTE_V) + panic("unmappages: not a leaf"); + if(do_free){ + pa = PTE2PA(*pte); + kfree((void*)pa); + } + *pte = 0; + if(a == last) + break; + a += PGSIZE; + pa += PGSIZE; + } +} + +// create an empty user page table. +pagetable_t +uvmcreate() +{ + pagetable_t pagetable; + pagetable = (pagetable_t) kalloc(); + if(pagetable == 0) + panic("uvmcreate: out of memory"); + memset(pagetable, 0, PGSIZE); + return pagetable; +} + +// Load the user initcode into address 0 of pagetable, +// for the very first process. +// sz must be less than a page. +void +uvminit(pagetable_t pagetable, uchar *src, uint sz) +{ + char *mem; + + if(sz >= PGSIZE) + panic("inituvm: more than a page"); + mem = kalloc(); + memset(mem, 0, PGSIZE); + mappages(pagetable, 0, PGSIZE, (uint64)mem, PTE_W|PTE_R|PTE_X|PTE_U); + memmove(mem, src, sz); +} + +// Allocate PTEs and physical memory to grow process from oldsz to +// newsz, which need not be page aligned. Returns new size or 0 on error. +uint64 +uvmalloc(pagetable_t pagetable, uint64 oldsz, uint64 newsz) +{ + char *mem; + uint64 a; + + if(newsz < oldsz) + return oldsz; + + oldsz = PGROUNDUP(oldsz); + a = oldsz; + for(; a < newsz; a += PGSIZE){ + mem = kalloc(); + if(mem == 0){ + uvmdealloc(pagetable, a, oldsz); + return 0; + } + memset(mem, 0, PGSIZE); + mappages(pagetable, a, PGSIZE, (uint64)mem, PTE_W|PTE_X|PTE_R|PTE_U); + } + return newsz; +} + +// Deallocate user pages to bring the process size from oldsz to +// newsz. oldsz and newsz need not be page-aligned, nor does newsz +// need to be less than oldsz. oldsz can be larger than the actual +// process size. Returns the new process size. +uint64 +uvmdealloc(pagetable_t pagetable, uint64 oldsz, uint64 newsz) +{ + if(newsz >= oldsz) + return oldsz; + unmappages(pagetable, newsz, oldsz - newsz, 1); + return newsz; +} + +// Recursively free page table pages. +// All leaf mappings must already have been removed. +static void +freewalk(pagetable_t pagetable) +{ + // there are 2^9 = 512 PTEs in a page table. + for(int i = 0; i < 512; i++){ + pte_t pte = pagetable[i]; + if((pte & PTE_V) && (pte & (PTE_R|PTE_W|PTE_X)) == 0){ + // this PTE points to a lower-level page table. + uint64 child = PTE2PA(pte); + freewalk((pagetable_t)child); + pagetable[i] = 0; + } else if(pte & PTE_V){ + panic("freewalk: leaf"); + } + } + kfree((void*)pagetable); +} + +// Free user memory pages, +// then free page table pages. +void +uvmfree(pagetable_t pagetable, uint64 sz) +{ + unmappages(pagetable, 0, sz, 1); + freewalk(pagetable); +} + +// Given a parent process's page table, copy +// its memory into a child's page table. +// Copies both the page table and the +// physical memory. +void +uvmcopy(pagetable_t old, pagetable_t new, uint64 sz) +{ + pte_t *pte; + uint64 pa, i; + uint flags; + char *mem; + + for(i = 0; i < sz; i += PGSIZE){ + if((pte = walk(old, i, 0)) == 0) + panic("copyuvm: pte should exist"); + if((*pte & PTE_V) == 0) + panic("copyuvm: page not present"); + pa = PTE2PA(*pte); + flags = PTE_FLAGS(*pte); + if((mem = kalloc()) == 0) + panic("uvmcopy: kalloc failed"); + memmove(mem, (char*)pa, PGSIZE); + mappages(new, i, PGSIZE, (uint64)mem, flags); + } +} + +// Copy from kernel to user. +// Copy len bytes from src to virtual address dstva in a given page table. +// Return 0 on success, -1 on error. +int +copyout(pagetable_t pagetable, uint64 dstva, char *src, uint64 len) +{ + uint64 n, va0, pa0; + + while(len > 0){ + va0 = (uint)PGROUNDDOWN(dstva); + pa0 = walkaddr(pagetable, va0); + if(pa0 == 0) + return -1; + n = PGSIZE - (dstva - va0); + if(n > len) + n = len; + memmove((void *)(pa0 + (dstva - va0)), src, n); + + len -= n; + src += n; + dstva = va0 + PGSIZE; + } + return 0; +} + +// Copy from user to kernel. +// Copy len bytes to dst from virtual address srcva in a given page table. +// Return 0 on success, -1 on error. +int +copyin(pagetable_t pagetable, char *dst, uint64 srcva, uint64 len) +{ + uint64 n, va0, pa0; + + while(len > 0){ + va0 = (uint)PGROUNDDOWN(srcva); + pa0 = walkaddr(pagetable, va0); + if(pa0 == 0) + return -1; + n = PGSIZE - (srcva - va0); + if(n > len) + n = len; + memmove(dst, (void *)(pa0 + (srcva - va0)), n); + + len -= n; + dst += n; + srcva = va0 + PGSIZE; + } + return 0; +} + +// Copy a null-terminated string from user to kernel. +// Copy bytes to dst from virtual address srcva in a given page table, +// until a '\0', or max. +// Return 0 on success, -1 on error. +int +copyinstr(pagetable_t pagetable, char *dst, uint64 srcva, uint64 max) +{ + uint64 n, va0, pa0; + int got_null = 0; + + while(got_null == 0 && max > 0){ + va0 = (uint)PGROUNDDOWN(srcva); + pa0 = walkaddr(pagetable, va0); + if(pa0 == 0) + return -1; + n = PGSIZE - (srcva - va0); + if(n > max) + n = max; + + char *p = (char *) (pa0 + (srcva - va0)); + while(n > 0){ + if(*p == '\0'){ + *dst = '\0'; + got_null = 1; + break; + } else { + *dst = *p; + } + --n; + --max; + p++; + dst++; + } + + srcva = va0 + PGSIZE; + } + if(got_null){ + return 0; + } else { + return -1; + } +} |