diff options
author | Mole Shang <[email protected]> | 2024-02-16 10:20:27 +0800 |
---|---|---|
committer | Mole Shang <[email protected]> | 2024-02-16 10:20:27 +0800 |
commit | a98c56a811142e5ede3332a7a444cca45f628769 (patch) | |
tree | c93ff7090da7b6ef911932be283818c2f6a03784 /kernel | |
parent | 0d65be5d1d880afafbf08c2adb605cf9f72216e2 (diff) | |
parent | 99015f3a985b2fd051606636743a2a2969b216e8 (diff) | |
download | xv6-labs-a98c56a811142e5ede3332a7a444cca45f628769.tar.gz xv6-labs-a98c56a811142e5ede3332a7a444cca45f628769.tar.bz2 xv6-labs-a98c56a811142e5ede3332a7a444cca45f628769.zip |
Merge branch 'lock' into thread
Conflicts:
.gitignore
Makefile
conf/lab.mk
Diffstat (limited to 'kernel')
38 files changed, 2552 insertions, 82 deletions
diff --git a/kernel/bio.c b/kernel/bio.c index 60d91a6..a1f4474 100644 --- a/kernel/bio.c +++ b/kernel/bio.c @@ -23,16 +23,26 @@ #include "fs.h" #include "buf.h" -struct { - struct spinlock lock; - struct buf buf[NBUF]; +#define N_BUCKETS 13 +struct bucket { + struct spinlock lock; // Linked list of all buffers, through prev/next. // Sorted by how recently the buffer was used. // head.next is most recent, head.prev is least. + // struct buf head; struct buf head; +}; +struct { + struct spinlock lock; + struct buf buf[NBUF]; + struct bucket buckets[N_BUCKETS]; } bcache; +static inline uint hash(uint blockno) { + return blockno % N_BUCKETS; +} + void binit(void) { @@ -40,15 +50,24 @@ binit(void) initlock(&bcache.lock, "bcache"); + // init each bucket + for (int i = 0; i < N_BUCKETS; i++) { + static char lock_name[16]; + snprintf(lock_name, sizeof(lock_name), "bio.bucket.%d", i); + initlock(&bcache.buckets[i].lock, lock_name); + bcache.buckets[i].head.prev = &bcache.buckets[i].head; + bcache.buckets[i].head.next = &bcache.buckets[i].head; + } + // Create linked list of buffers - bcache.head.prev = &bcache.head; - bcache.head.next = &bcache.head; + // since for now, blockno is all zero, + // they are all linked to bucket 0 for(b = bcache.buf; b < bcache.buf+NBUF; b++){ - b->next = bcache.head.next; - b->prev = &bcache.head; + b->next = bcache.buckets[0].head.next; + b->prev = &bcache.buckets[0].head; initsleeplock(&b->lock, "buffer"); - bcache.head.next->prev = b; - bcache.head.next = b; + bcache.buckets[0].head.next->prev = b; + bcache.buckets[0].head.next = b; } } @@ -59,32 +78,92 @@ static struct buf* bget(uint dev, uint blockno) { struct buf *b; + uint bucket_index = hash(blockno); - acquire(&bcache.lock); + acquire(&bcache.buckets[bucket_index].lock); // Is the block already cached? - for(b = bcache.head.next; b != &bcache.head; b = b->next){ + for(b = bcache.buckets[bucket_index].head.next; b != &bcache.buckets[bucket_index].head; b = b->next){ if(b->dev == dev && b->blockno == blockno){ b->refcnt++; - release(&bcache.lock); + release(&bcache.buckets[bucket_index].lock); acquiresleep(&b->lock); return b; } } // Not cached. - // Recycle the least recently used (LRU) unused buffer. - for(b = bcache.head.prev; b != &bcache.head; b = b->prev){ + // Recycle the least recently used (LRU) unused buffer + // of its own list first. + struct buf *unused = (struct buf *)0; + for(b = bcache.buckets[bucket_index].head.prev; b != &bcache.buckets[bucket_index].head; b = b->prev){ if(b->refcnt == 0) { - b->dev = dev; - b->blockno = blockno; - b->valid = 0; - b->refcnt = 1; - release(&bcache.lock); - acquiresleep(&b->lock); - return b; + if (!unused || unused->ticks > b->ticks) { + unused = b; + } } } + if (unused) { + unused->dev = dev; + unused->blockno = blockno; + unused->valid = 0; + unused->refcnt = 1; + unused->ticks = ticks; + release(&bcache.buckets[bucket_index].lock); + acquiresleep(&unused->lock); + return unused; + } + // Sadly still no usable buf, + // Steal the least recently used (LRU) unused buffer in others list + // and move to our hash bucket. + // To avoid deadlocks, scan with order bucket 0 -> N_BUCKETS. + + // release bigger bucket_index lock to acquire other locks later + release(&bcache.buckets[bucket_index].lock); + int i; + for (i = 0; i < N_BUCKETS; i++) { + if (i == bucket_index) { + acquire(&bcache.buckets[bucket_index].lock); + } else { + acquire(&bcache.buckets[i].lock); + for(b = bcache.buckets[i].head.prev; b != &bcache.buckets[i].head; b = b->prev){ + if(b->refcnt == 0) { + if (!unused || unused->ticks > b->ticks) { + unused = b; + } + } + } + if (unused) { + if (i < bucket_index) { + acquire(&bcache.buckets[bucket_index].lock); + } + break; + } else { + release(&bcache.buckets[i].lock); + } + } + } + if (unused) { + // remove from old bucket + unused->prev->next = unused->next; + unused->next->prev = unused->prev; + release(&bcache.buckets[i].lock); + + unused->dev = dev; + unused->blockno = blockno; + unused->valid = 0; + unused->refcnt = 1; + unused->ticks = ticks; + // add to our hash bucket, after head + unused->next = bcache.buckets[bucket_index].head.next; + unused->prev = &bcache.buckets[bucket_index].head; + bcache.buckets[bucket_index].head.next->prev = unused; + bcache.buckets[bucket_index].head.next = unused; + release(&bcache.buckets[bucket_index].lock); + + acquiresleep(&unused->lock); + return unused; + } panic("bget: no buffers"); } @@ -121,33 +200,38 @@ brelse(struct buf *b) releasesleep(&b->lock); - acquire(&bcache.lock); + uint bucket_index = hash(b->blockno); + acquire(&bcache.buckets[bucket_index].lock); + b->refcnt--; if (b->refcnt == 0) { - // no one is waiting for it. + // no one is waiting for it, move it to MRU b->next->prev = b->prev; b->prev->next = b->next; - b->next = bcache.head.next; - b->prev = &bcache.head; - bcache.head.next->prev = b; - bcache.head.next = b; + b->next = bcache.buckets[bucket_index].head.next; + b->prev = &bcache.buckets[bucket_index].head; + bcache.buckets[bucket_index].head.next->prev = b; + bcache.buckets[bucket_index].head.next = b; + b->ticks = ticks; } - release(&bcache.lock); + release(&bcache.buckets[bucket_index].lock); } void bpin(struct buf *b) { - acquire(&bcache.lock); + uint id = hash(b->blockno); + acquire(&bcache.buckets[id].lock); b->refcnt++; - release(&bcache.lock); + release(&bcache.buckets[id].lock); } void bunpin(struct buf *b) { - acquire(&bcache.lock); + uint id = hash(b->blockno); + acquire(&bcache.buckets[id].lock); b->refcnt--; - release(&bcache.lock); + release(&bcache.buckets[id].lock); } diff --git a/kernel/buf.h b/kernel/buf.h index 4616e9e..bbef407 100644 --- a/kernel/buf.h +++ b/kernel/buf.h @@ -8,5 +8,6 @@ struct buf { struct buf *prev; // LRU cache list struct buf *next; uchar data[BSIZE]; + uint ticks; // derived from global variable ticks (trap.c), record ticks when last modified }; diff --git a/kernel/cow.c b/kernel/cow.c new file mode 100644 index 0000000..b3634fc --- /dev/null +++ b/kernel/cow.c @@ -0,0 +1,30 @@ +// COW pagefault handler +#include "types.h" +#include "riscv.h" +#include "defs.h" + +int +cow_handler(pagetable_t pagetable, uint64 va) +{ + // you can't really write to rediculous pointers + if(va >= MAXVA || PGROUNDDOWN(va) == 0) + return -1; + pte_t *pte = walk(pagetable, va, 0); + if(pte == 0 || (*pte & PTE_V) == 0 || (*pte & PTE_U) == 0) + return -1; + if(*pte & PTE_C){ + uint64 pa_orig = PTE2PA(*pte); + uint64 pa_new = (uint64)kalloc(); + if(pa_new == 0){ + printf("cow pagefault: kalloc failed\n"); + return -1; + } + // copy the page and add write permission + memmove((void*)pa_new, (void*)pa_orig, PGSIZE); + uint64 flags = (PTE_FLAGS(*pte) | PTE_W) & ~PTE_C; + *pte = PA2PTE(pa_new) | flags; + kfree((void*)pa_orig); + } else if ((*pte & PTE_W) == 0) + return -1; + return 0; +} diff --git a/kernel/defs.h b/kernel/defs.h index a3c962b..541c97e 100644 --- a/kernel/defs.h +++ b/kernel/defs.h @@ -1,3 +1,7 @@ +#ifdef LAB_MMAP +typedef unsigned long size_t; +typedef long int off_t; +#endif struct buf; struct context; struct file; @@ -8,6 +12,11 @@ struct spinlock; struct sleeplock; struct stat; struct superblock; +#ifdef LAB_NET +struct mbuf; +struct sock; +#endif +struct sysinfo; // bio.c void binit(void); @@ -22,6 +31,9 @@ void consoleinit(void); void consoleintr(int); void consputc(int); +// cow.c +int cow_handler(pagetable_t, uint64); + // exec.c int exec(char*, char**); @@ -60,9 +72,12 @@ void ramdiskintr(void); void ramdiskrw(struct buf*); // kalloc.c +int refcnt_inc(uint64); +int refcnt_dec(uint64); void* kalloc(void); void kfree(void *); void kinit(void); +int get_freemem(void); // log.c void initlog(int, struct superblock*); @@ -80,6 +95,7 @@ int pipewrite(struct pipe*, uint64, int); void printf(char*, ...); void panic(char*) __attribute__((noreturn)); void printfinit(void); +void backtrace(void); // proc.c int cpuid(void); @@ -106,6 +122,8 @@ void yield(void); int either_copyout(int user_dst, uint64 dst, void *src, uint64 len); int either_copyin(void *dst, int user_src, uint64 src, uint64 len); void procdump(void); +int get_nproc(void); +int pgaccess(uint64 base, int len, uint64 mask); // swtch.S void swtch(struct context*, struct context*); @@ -117,6 +135,10 @@ void initlock(struct spinlock*, char*); void release(struct spinlock*); void push_off(void); void pop_off(void); +int atomic_read4(int *addr); +#ifdef LAB_LOCK +void freelock(struct spinlock*); +#endif // sleeplock.c void acquiresleep(struct sleeplock*); @@ -141,6 +163,9 @@ int fetchstr(uint64, char*, int); int fetchaddr(uint64, uint64*); void syscall(); +// sysinfo.c +int sys_info(uint64); + // trap.c extern uint ticks; void trapinit(void); @@ -173,6 +198,7 @@ uint64 walkaddr(pagetable_t, uint64); int copyout(pagetable_t, uint64, char *, uint64); int copyin(pagetable_t, char *, uint64, uint64); int copyinstr(pagetable_t, char *, uint64, uint64); +void vmprint(pagetable_t); // plic.c void plicinit(void); @@ -187,3 +213,44 @@ void virtio_disk_intr(void); // number of elements in fixed-size array #define NELEM(x) (sizeof(x)/sizeof((x)[0])) + + + +#ifdef LAB_PGTBL +// vmcopyin.c +int copyin_new(pagetable_t, char *, uint64, uint64); +int copyinstr_new(pagetable_t, char *, uint64, uint64); +#endif + +// stats.c +void statsinit(void); +void statsinc(void); + +// sprintf.c +int snprintf(char*, int, char*, ...); + +#ifdef KCSAN +void kcsaninit(); +#endif + +#ifdef LAB_NET +// pci.c +void pci_init(); + +// e1000.c +void e1000_init(uint32 *); +void e1000_intr(void); +int e1000_transmit(struct mbuf*); + +// net.c +void net_rx(struct mbuf*); +void net_tx_udp(struct mbuf*, uint32, uint16, uint16); + +// sysnet.c +void sockinit(void); +int sockalloc(struct file **, uint32, uint16, uint16); +void sockclose(struct sock *); +int sockread(struct sock *, uint64, int); +int sockwrite(struct sock *, uint64, int); +void sockrecvudp(struct mbuf*, uint32, uint16, uint16); +#endif diff --git a/kernel/e1000.c b/kernel/e1000.c new file mode 100644 index 0000000..c9ba9e2 --- /dev/null +++ b/kernel/e1000.c @@ -0,0 +1,178 @@ +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "spinlock.h" +#include "proc.h" +#include "defs.h" +#include "e1000_dev.h" +#include "net.h" + +#define TX_RING_SIZE 16 +static struct tx_desc tx_ring[TX_RING_SIZE] __attribute__((aligned(16))); +static struct mbuf *tx_mbufs[TX_RING_SIZE]; + +#define RX_RING_SIZE 16 +static struct rx_desc rx_ring[RX_RING_SIZE] __attribute__((aligned(16))); +static struct mbuf *rx_mbufs[RX_RING_SIZE]; + +// remember where the e1000's registers live. +static volatile uint32 *regs; + +struct spinlock e1000_lock; + +// called by pci_init(). +// xregs is the memory address at which the +// e1000's registers are mapped. +void +e1000_init(uint32 *xregs) +{ + int i; + + initlock(&e1000_lock, "e1000"); + + regs = xregs; + + // Reset the device + regs[E1000_IMS] = 0; // disable interrupts + regs[E1000_CTL] |= E1000_CTL_RST; + regs[E1000_IMS] = 0; // redisable interrupts + __sync_synchronize(); + + // [E1000 14.5] Transmit initialization + memset(tx_ring, 0, sizeof(tx_ring)); + for (i = 0; i < TX_RING_SIZE; i++) { + tx_ring[i].status = E1000_TXD_STAT_DD; + tx_mbufs[i] = 0; + } + regs[E1000_TDBAL] = (uint64) tx_ring; + if(sizeof(tx_ring) % 128 != 0) + panic("e1000"); + regs[E1000_TDLEN] = sizeof(tx_ring); + regs[E1000_TDH] = regs[E1000_TDT] = 0; + + // [E1000 14.4] Receive initialization + memset(rx_ring, 0, sizeof(rx_ring)); + for (i = 0; i < RX_RING_SIZE; i++) { + rx_mbufs[i] = mbufalloc(0); + if (!rx_mbufs[i]) + panic("e1000"); + rx_ring[i].addr = (uint64) rx_mbufs[i]->head; + } + regs[E1000_RDBAL] = (uint64) rx_ring; + if(sizeof(rx_ring) % 128 != 0) + panic("e1000"); + regs[E1000_RDH] = 0; + regs[E1000_RDT] = RX_RING_SIZE - 1; + regs[E1000_RDLEN] = sizeof(rx_ring); + + // filter by qemu's MAC address, 52:54:00:12:34:56 + regs[E1000_RA] = 0x12005452; + regs[E1000_RA+1] = 0x5634 | (1<<31); + // multicast table + for (int i = 0; i < 4096/32; i++) + regs[E1000_MTA + i] = 0; + + // transmitter control bits. + regs[E1000_TCTL] = E1000_TCTL_EN | // enable + E1000_TCTL_PSP | // pad short packets + (0x10 << E1000_TCTL_CT_SHIFT) | // collision stuff + (0x40 << E1000_TCTL_COLD_SHIFT); + regs[E1000_TIPG] = 10 | (8<<10) | (6<<20); // inter-pkt gap + + // receiver control bits. + regs[E1000_RCTL] = E1000_RCTL_EN | // enable receiver + E1000_RCTL_BAM | // enable broadcast + E1000_RCTL_SZ_2048 | // 2048-byte rx buffers + E1000_RCTL_SECRC; // strip CRC + + // ask e1000 for receive interrupts. + regs[E1000_RDTR] = 0; // interrupt after every received packet (no timer) + regs[E1000_RADV] = 0; // interrupt after every packet (no timer) + regs[E1000_IMS] = (1 << 7); // RXDW -- Receiver Descriptor Write Back +} + +int +e1000_transmit(struct mbuf *m) +{ + // the mbuf contains an ethernet frame; program it into + // the TX descriptor ring so that the e1000 sends it. Stash + // a pointer so that it can be freed after sending. + acquire(&e1000_lock); + + int cur_idx = regs[E1000_TDT]; + + // check if the STAT_DD bit is set in current descriptor + // if not set, means a previous tx in this descripter is still in flight, return an error. + if(!(tx_ring[cur_idx].status | E1000_TXD_STAT_DD)){ + release(&e1000_lock); + return -1; + } + + // free previous mbuf and update current descriptor + if(tx_mbufs[cur_idx]) + mbuffree(tx_mbufs[cur_idx]); + tx_ring[cur_idx].addr = (uint64)m->head; + tx_ring[cur_idx].length = (uint64)m->len; + tx_ring[cur_idx].cmd = E1000_TXD_CMD_RS | E1000_TXD_CMD_EOP; + // also clear status bits + tx_ring[cur_idx].status = 0; + + // stash current mbuf to tx_mbufs (would be freed later) + tx_mbufs[cur_idx] = m; + + // update the ring position to point to the next descriptor; + regs[E1000_TDT] = (cur_idx + 1) % TX_RING_SIZE; + + release(&e1000_lock); + return 0; +} + +static void +e1000_recv(void) +{ + // Check for packets that have arrived from the e1000 + // Create and deliver an mbuf for each packet (using net_rx()). + while(1){ + acquire(&e1000_lock); + int cur_idx = (regs[E1000_RDT]+1) % RX_RING_SIZE; + + // check if last rx is completed. If not, skip passing to net_rx() + if(!(rx_ring[cur_idx].status | E1000_RXD_STAT_DD)) + break; + + // update the mbuf's length to the len reported by rx_desc + // mbufput(rx_mbufs[cur_idx], rx_ring[cur_idx].length); + rx_mbufs[cur_idx]->len = rx_ring[cur_idx].length; + + // stash mbuf, for later net_rx() + struct mbuf *rx_buf = rx_mbufs[cur_idx]; + + // net_rx() would free the passed mbuf invisibly, so we need to re-alloc it + rx_mbufs[cur_idx] = mbufalloc(0); + if(!rx_mbufs[cur_idx]) + panic("e1000_recv: mbufalloc"); + + // update buffer addr and clear status bits + rx_ring[cur_idx].addr = (uint64)rx_mbufs[cur_idx]->head; + rx_ring[cur_idx].status = 0; + + // update the E1000_RDT register to point to next position + regs[E1000_RDT] = cur_idx; + release(&e1000_lock); + + // pass to the network stack, must not hold the lock coz it can lead to deadlocks under different cpus + net_rx(rx_buf); + } +} + +void +e1000_intr(void) +{ + // tell the e1000 we've seen this interrupt; + // without this the e1000 won't raise any + // further interrupts. + regs[E1000_ICR] = 0xffffffff; + + e1000_recv(); +} diff --git a/kernel/e1000_dev.h b/kernel/e1000_dev.h new file mode 100644 index 0000000..9b462df --- /dev/null +++ b/kernel/e1000_dev.h @@ -0,0 +1,125 @@ +// +// E1000 hardware definitions: registers and DMA ring format. +// from the Intel 82540EP/EM &c manual. +// + +/* Registers */ +#define E1000_CTL (0x00000/4) /* Device Control Register - RW */ +#define E1000_ICR (0x000C0/4) /* Interrupt Cause Read - R */ +#define E1000_IMS (0x000D0/4) /* Interrupt Mask Set - RW */ +#define E1000_RCTL (0x00100/4) /* RX Control - RW */ +#define E1000_TCTL (0x00400/4) /* TX Control - RW */ +#define E1000_TIPG (0x00410/4) /* TX Inter-packet gap -RW */ +#define E1000_RDBAL (0x02800/4) /* RX Descriptor Base Address Low - RW */ +#define E1000_RDTR (0x02820/4) /* RX Delay Timer */ +#define E1000_RADV (0x0282C/4) /* RX Interrupt Absolute Delay Timer */ +#define E1000_RDH (0x02810/4) /* RX Descriptor Head - RW */ +#define E1000_RDT (0x02818/4) /* RX Descriptor Tail - RW */ +#define E1000_RDLEN (0x02808/4) /* RX Descriptor Length - RW */ +#define E1000_RSRPD (0x02C00/4) /* RX Small Packet Detect Interrupt */ +#define E1000_TDBAL (0x03800/4) /* TX Descriptor Base Address Low - RW */ +#define E1000_TDLEN (0x03808/4) /* TX Descriptor Length - RW */ +#define E1000_TDH (0x03810/4) /* TX Descriptor Head - RW */ +#define E1000_TDT (0x03818/4) /* TX Descripotr Tail - RW */ +#define E1000_MTA (0x05200/4) /* Multicast Table Array - RW Array */ +#define E1000_RA (0x05400/4) /* Receive Address - RW Array */ + +/* Device Control */ +#define E1000_CTL_SLU 0x00000040 /* set link up */ +#define E1000_CTL_FRCSPD 0x00000800 /* force speed */ +#define E1000_CTL_FRCDPLX 0x00001000 /* force duplex */ +#define E1000_CTL_RST 0x00400000 /* full reset */ + +/* Transmit Control */ +#define E1000_TCTL_RST 0x00000001 /* software reset */ +#define E1000_TCTL_EN 0x00000002 /* enable tx */ +#define E1000_TCTL_BCE 0x00000004 /* busy check enable */ +#define E1000_TCTL_PSP 0x00000008 /* pad short packets */ +#define E1000_TCTL_CT 0x00000ff0 /* collision threshold */ +#define E1000_TCTL_CT_SHIFT 4 +#define E1000_TCTL_COLD 0x003ff000 /* collision distance */ +#define E1000_TCTL_COLD_SHIFT 12 +#define E1000_TCTL_SWXOFF 0x00400000 /* SW Xoff transmission */ +#define E1000_TCTL_PBE 0x00800000 /* Packet Burst Enable */ +#define E1000_TCTL_RTLC 0x01000000 /* Re-transmit on late collision */ +#define E1000_TCTL_NRTU 0x02000000 /* No Re-transmit on underrun */ +#define E1000_TCTL_MULR 0x10000000 /* Multiple request support */ + +/* Receive Control */ +#define E1000_RCTL_RST 0x00000001 /* Software reset */ +#define E1000_RCTL_EN 0x00000002 /* enable */ +#define E1000_RCTL_SBP 0x00000004 /* store bad packet */ +#define E1000_RCTL_UPE 0x00000008 /* unicast promiscuous enable */ +#define E1000_RCTL_MPE 0x00000010 /* multicast promiscuous enab */ +#define E1000_RCTL_LPE 0x00000020 /* long packet enable */ +#define E1000_RCTL_LBM_NO 0x00000000 /* no loopback mode */ +#define E1000_RCTL_LBM_MAC 0x00000040 /* MAC loopback mode */ +#define E1000_RCTL_LBM_SLP 0x00000080 /* serial link loopback mode */ +#define E1000_RCTL_LBM_TCVR 0x000000C0 /* tcvr loopback mode */ +#define E1000_RCTL_DTYP_MASK 0x00000C00 /* Descriptor type mask */ +#define E1000_RCTL_DTYP_PS 0x00000400 /* Packet Split descriptor */ +#define E1000_RCTL_RDMTS_HALF 0x00000000 /* rx desc min threshold size */ +#define E1000_RCTL_RDMTS_QUAT 0x00000100 /* rx desc min threshold size */ +#define E1000_RCTL_RDMTS_EIGTH 0x00000200 /* rx desc min threshold size */ +#define E1000_RCTL_MO_SHIFT 12 /* multicast offset shift */ +#define E1000_RCTL_MO_0 0x00000000 /* multicast offset 11:0 */ +#define E1000_RCTL_MO_1 0x00001000 /* multicast offset 12:1 */ +#define E1000_RCTL_MO_2 0x00002000 /* multicast offset 13:2 */ +#define E1000_RCTL_MO_3 0x00003000 /* multicast offset 15:4 */ +#define E1000_RCTL_MDR 0x00004000 /* multicast desc ring 0 */ +#define E1000_RCTL_BAM 0x00008000 /* broadcast enable */ +/* these buffer sizes are valid if E1000_RCTL_BSEX is 0 */ +#define E1000_RCTL_SZ_2048 0x00000000 /* rx buffer size 2048 */ +#define E1000_RCTL_SZ_1024 0x00010000 /* rx buffer size 1024 */ +#define E1000_RCTL_SZ_512 0x00020000 /* rx buffer size 512 */ +#define E1000_RCTL_SZ_256 0x00030000 /* rx buffer size 256 */ +/* these buffer sizes are valid if E1000_RCTL_BSEX is 1 */ +#define E1000_RCTL_SZ_16384 0x00010000 /* rx buffer size 16384 */ +#define E1000_RCTL_SZ_8192 0x00020000 /* rx buffer size 8192 */ +#define E1000_RCTL_SZ_4096 0x00030000 /* rx buffer size 4096 */ +#define E1000_RCTL_VFE 0x00040000 /* vlan filter enable */ +#define E1000_RCTL_CFIEN 0x00080000 /* canonical form enable */ +#define E1000_RCTL_CFI 0x00100000 /* canonical form indicator */ +#define E1000_RCTL_DPF 0x00400000 /* discard pause frames */ +#define E1000_RCTL_PMCF 0x00800000 /* pass MAC control frames */ +#define E1000_RCTL_BSEX 0x02000000 /* Buffer size extension */ +#define E1000_RCTL_SECRC 0x04000000 /* Strip Ethernet CRC */ +#define E1000_RCTL_FLXBUF_MASK 0x78000000 /* Flexible buffer size */ +#define E1000_RCTL_FLXBUF_SHIFT 27 /* Flexible buffer shift */ + +#define DATA_MAX 1518 + +/* Transmit Descriptor command definitions [E1000 3.3.3.1] */ +#define E1000_TXD_CMD_EOP 0x01 /* End of Packet */ +#define E1000_TXD_CMD_RS 0x08 /* Report Status */ + +/* Transmit Descriptor status definitions [E1000 3.3.3.2] */ +#define E1000_TXD_STAT_DD 0x00000001 /* Descriptor Done */ + +// [E1000 3.3.3] +struct tx_desc +{ + uint64 addr; + uint16 length; + uint8 cso; + uint8 cmd; + uint8 status; + uint8 css; + uint16 special; +}; + +/* Receive Descriptor bit definitions [E1000 3.2.3.1] */ +#define E1000_RXD_STAT_DD 0x01 /* Descriptor Done */ +#define E1000_RXD_STAT_EOP 0x02 /* End of Packet */ + +// [E1000 3.2.3] +struct rx_desc +{ + uint64 addr; /* Address of the descriptor's data buffer */ + uint16 length; /* Length of data DMAed into data buffer */ + uint16 csum; /* Packet checksum */ + uint8 status; /* Descriptor status */ + uint8 errors; /* Descriptor Errors */ + uint16 special; +}; + diff --git a/kernel/exec.c b/kernel/exec.c index e18bbb6..35b35f5 100644 --- a/kernel/exec.c +++ b/kernel/exec.c @@ -128,6 +128,10 @@ exec(char *path, char **argv) p->trapframe->sp = sp; // initial stack pointer proc_freepagetable(oldpagetable, oldsz); + if(p->pid == 1){ + vmprint(p->pagetable); + } + return argc; // this ends up in a0, the first argument to main(argc, argv) bad: diff --git a/kernel/file.c b/kernel/file.c index 25fa226..0fba21b 100644 --- a/kernel/file.c +++ b/kernel/file.c @@ -80,6 +80,11 @@ fileclose(struct file *f) iput(ff.ip); end_op(); } +#ifdef LAB_NET + else if(ff.type == FD_SOCK){ + sockclose(ff.sock); + } +#endif } // Get metadata about file f. @@ -122,7 +127,13 @@ fileread(struct file *f, uint64 addr, int n) if((r = readi(f->ip, 1, addr, f->off, n)) > 0) f->off += r; iunlock(f->ip); - } else { + } +#ifdef LAB_NET + else if(f->type == FD_SOCK){ + r = sockread(f->sock, addr, n); + } +#endif + else { panic("fileread"); } @@ -173,7 +184,13 @@ filewrite(struct file *f, uint64 addr, int n) i += r; } ret = (i == n ? n : -1); - } else { + } +#ifdef LAB_NET + else if(f->type == FD_SOCK){ + ret = sockwrite(f->sock, addr, n); + } +#endif + else { panic("filewrite"); } diff --git a/kernel/file.h b/kernel/file.h index b076d1d..1eb5107 100644 --- a/kernel/file.h +++ b/kernel/file.h @@ -1,10 +1,17 @@ struct file { +#ifdef LAB_NET + enum { FD_NONE, FD_PIPE, FD_INODE, FD_DEVICE, FD_SOCK } type; +#else enum { FD_NONE, FD_PIPE, FD_INODE, FD_DEVICE } type; +#endif int ref; // reference count char readable; char writable; struct pipe *pipe; // FD_PIPE struct inode *ip; // FD_INODE and FD_DEVICE +#ifdef LAB_NET + struct sock *sock; // FD_SOCK +#endif uint off; // FD_INODE short major; // FD_DEVICE }; @@ -38,3 +45,4 @@ struct devsw { extern struct devsw devsw[]; #define CONSOLE 1 +#define STATS 2 diff --git a/kernel/fs.c b/kernel/fs.c index c6bab15..6c4079e 100644 --- a/kernel/fs.c +++ b/kernel/fs.c @@ -295,11 +295,11 @@ ilock(struct inode *ip) struct buf *bp; struct dinode *dip; - if(ip == 0 || ip->ref < 1) + if(ip == 0 || atomic_read4(&ip->ref) < 1) panic("ilock"); acquiresleep(&ip->lock); - + if(ip->valid == 0){ bp = bread(ip->dev, IBLOCK(ip->inum, sb)); dip = (struct dinode*)bp->data + ip->inum%IPB; @@ -320,7 +320,7 @@ ilock(struct inode *ip) void iunlock(struct inode *ip) { - if(ip == 0 || !holdingsleep(&ip->lock) || ip->ref < 1) + if(ip == 0 || !holdingsleep(&ip->lock) || atomic_read4(&ip->ref) < 1) panic("iunlock"); releasesleep(&ip->lock); @@ -416,7 +416,6 @@ bmap(struct inode *ip, uint bn) brelse(bp); return addr; } - panic("bmap: out of range"); } @@ -447,7 +446,7 @@ itrunc(struct inode *ip) bfree(ip->dev, ip->addrs[NDIRECT]); ip->addrs[NDIRECT] = 0; } - + ip->size = 0; iupdate(ip); } diff --git a/kernel/kalloc.c b/kernel/kalloc.c index 0699e7e..85cf6b7 100644 --- a/kernel/kalloc.c +++ b/kernel/kalloc.c @@ -9,6 +9,9 @@ #include "riscv.h" #include "defs.h" +// NOTE: leave interrupts disabled to avoid deadlocks & race conditions when using this macro!!! +#define CUR_KMEM (kmem_list[cpuid()]) + void freerange(void *pa_start, void *pa_end); extern char end[]; // first address after kernel. @@ -18,15 +21,55 @@ struct run { struct run *next; }; -struct { +struct kmem { struct spinlock lock; struct run *freelist; -} kmem; +}; + +struct kmem kmem_list[NCPU]; + +int phypg_refcnt[PHYSTOP/PGSIZE]; + +struct spinlock refcnt_lock; + +// Increase the refcnt +int +refcnt_inc(uint64 pa) +{ + acquire(&refcnt_lock); + int *prefcnt = &phypg_refcnt[pa/PGSIZE]; + if(pa > PHYSTOP || *prefcnt < 1) + panic("increase refcnt"); + (*prefcnt)++; + release(&refcnt_lock); + return *prefcnt; +} + +// Decrease the refcnt +int +refcnt_dec(uint64 pa) +{ + acquire(&refcnt_lock); + int *prefcnt = &phypg_refcnt[pa/PGSIZE]; + if(pa > PHYSTOP || *prefcnt < 1) + panic("decrease refcnt"); + (*prefcnt)--; + release(&refcnt_lock); + return *prefcnt; +} void kinit() { - initlock(&kmem.lock, "kmem"); + for(int i = 0; i < NCPU; i++){ + static char lock_name[8]; + snprintf(lock_name, sizeof(lock_name), "kmem.%d", i); + initlock(&kmem_list[i].lock, lock_name); + } + // init all refcnt to 1, which would later be freed to 0 by kfree() + for(uint64 p = PGROUNDUP((uint64)end); p + PGSIZE <= PHYSTOP; p += PGSIZE) + phypg_refcnt[p/PGSIZE] = 1; + initlock(&refcnt_lock, "refcnt"); freerange(end, (void*)PHYSTOP); } @@ -51,15 +94,24 @@ kfree(void *pa) if(((uint64)pa % PGSIZE) != 0 || (char*)pa < end || (uint64)pa >= PHYSTOP) panic("kfree"); + refcnt_dec((uint64)pa); + + if(phypg_refcnt[(uint64)pa/PGSIZE] > 0) + // We still have refs to this phy page, do not actually free it + return; + // Fill with junk to catch dangling refs. memset(pa, 1, PGSIZE); r = (struct run*)pa; - acquire(&kmem.lock); - r->next = kmem.freelist; - kmem.freelist = r; - release(&kmem.lock); + push_off(); + struct kmem *kmem = &CUR_KMEM; + acquire(&kmem->lock); + r->next = kmem->freelist; + kmem->freelist = r; + release(&kmem->lock); + pop_off(); } // Allocate one 4096-byte page of physical memory. @@ -70,13 +122,67 @@ kalloc(void) { struct run *r; - acquire(&kmem.lock); - r = kmem.freelist; - if(r) - kmem.freelist = r->next; - release(&kmem.lock); + push_off(); + struct kmem *kmem = &CUR_KMEM; + acquire(&kmem->lock); + r = kmem->freelist; + if(r){ + acquire(&refcnt_lock); + if(phypg_refcnt[(uint64)r/PGSIZE]) + panic("kalloc: invalid refcnt"); + phypg_refcnt[(uint64)r/PGSIZE] = 1; + release(&refcnt_lock); + kmem->freelist = r->next; + } + + // release the origin lock to avoid deadlocks + release(&kmem->lock); + + if(!r){ + // try to steal mem from other cpu's kmem + for(int i = 0; i < NCPU; i++){ + if(kmem == &kmem_list[i]) + continue; + + acquire(&kmem_list[i].lock); + struct run *f = kmem_list[i].freelist; + if(f){ + r = f; + kmem_list[i].freelist = f->next; + } + if(r){ + // acquire the refcnt lock to set refcnt + // lock is a must to prevent refcnt races + acquire(&refcnt_lock); + // release previous lock now + release(&kmem_list[i].lock); + if(phypg_refcnt[(uint64)r/PGSIZE]) + panic("kalloc: invalid refcnt"); + phypg_refcnt[(uint64)r/PGSIZE] = 1; + release(&refcnt_lock); + break; + } + release(&kmem_list[i].lock); + } + } if(r) memset((char*)r, 5, PGSIZE); // fill with junk + pop_off(); return (void*)r; } + +int +get_freemem(void) +{ + int n; + struct run *r; + + for(int i = 0; i < NCPU; i++){ + acquire(&kmem_list[i].lock); + for(n = 0, r = kmem_list[i].freelist; r; r = r->next) + n++; + release(&kmem_list[i].lock); + } + return n * PGSIZE; +} diff --git a/kernel/kcsan.c b/kernel/kcsan.c new file mode 100644 index 0000000..90861ba --- /dev/null +++ b/kernel/kcsan.c @@ -0,0 +1,323 @@ +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "spinlock.h" +#include "riscv.h" +#include "proc.h" +#include "defs.h" + +// +// Race detector using gcc's thread sanitizer. It delays all stores +// and loads and monitors if any other CPU is using the same address. +// If so, we have a race and print out the backtrace of the thread +// that raced and the thread that set the watchpoint. +// + +// +// To run with kcsan: +// make clean +// make KCSAN=1 qemu +// + +// The number of watch points. +#define NWATCH (NCPU) + +// The number of cycles to delay stores, whatever that means on qemu. +//#define DELAY_CYCLES 20000 +#define DELAY_CYCLES 200000 + +#define MAXTRACE 20 + +int +trace(uint64 *trace, int maxtrace) +{ + uint64 i = 0; + + push_off(); + + uint64 fp = r_fp(); + uint64 ra, low = PGROUNDDOWN(fp) + 16, high = PGROUNDUP(fp); + + while(!(fp & 7) && fp >= low && fp < high){ + ra = *(uint64*)(fp - 8); + fp = *(uint64*)(fp - 16); + trace[i++] = ra; + if(i >= maxtrace) + break; + } + + pop_off(); + + return i; +} + +struct watch { + uint64 addr; + int write; + int race; + uint64 trace[MAXTRACE]; + int tracesz; +}; + +struct { + struct spinlock lock; + struct watch points[NWATCH]; + int on; +} tsan; + +static struct watch* +wp_lookup(uint64 addr) +{ + for(struct watch *w = &tsan.points[0]; w < &tsan.points[NWATCH]; w++) { + if(w->addr == addr) { + return w; + } + } + return 0; +} + +static int +wp_install(uint64 addr, int write) +{ + for(struct watch *w = &tsan.points[0]; w < &tsan.points[NWATCH]; w++) { + if(w->addr == 0) { + w->addr = addr; + w->write = write; + w->tracesz = trace(w->trace, MAXTRACE); + return 1; + } + } + panic("wp_install"); + return 0; +} + +static void +wp_remove(uint64 addr) +{ + for(struct watch *w = &tsan.points[0]; w < &tsan.points[NWATCH]; w++) { + if(w->addr == addr) { + w->addr = 0; + w->tracesz = 0; + return; + } + } + panic("remove"); +} + +static void +printtrace(uint64 *t, int n) +{ + int i; + + for(i = 0; i < n; i++) { + printf("%p\n", t[i]); + } +} + +static void +race(char *s, struct watch *w) { + uint64 t[MAXTRACE]; + int n; + + n = trace(t, MAXTRACE); + printf("== race detected ==\n"); + printf("backtrace for racing %s\n", s); + printtrace(t, n); + printf("backtrace for watchpoint:\n"); + printtrace(w->trace, w->tracesz); + printf("==========\n"); +} + +// cycle counter +static inline uint64 +r_cycle() +{ + uint64 x; + asm volatile("rdcycle %0" : "=r" (x) ); + return x; +} + +static void delay(void) __attribute__((noinline)); +static void delay() { + uint64 stop = r_cycle() + DELAY_CYCLES; + uint64 c = r_cycle(); + while(c < stop) { + c = r_cycle(); + } +} + +static void +kcsan_read(uint64 addr, int sz) +{ + struct watch *w; + + acquire(&tsan.lock); + if((w = wp_lookup(addr)) != 0) { + if(w->write) { + race("load", w); + } + release(&tsan.lock); + return; + } + release(&tsan.lock); +} + +static void +kcsan_write(uint64 addr, int sz) +{ + struct watch *w; + + acquire(&tsan.lock); + if((w = wp_lookup(addr)) != 0) { + race("store", w); + release(&tsan.lock); + } + + // no watchpoint; try to install one + if(wp_install(addr, 1)) { + + release(&tsan.lock); + + // XXX maybe read value at addr before and after delay to catch + // races of unknown origins (e.g., device). + + delay(); + + acquire(&tsan.lock); + + wp_remove(addr); + } + release(&tsan.lock); +} + +// tsan.on will only have effect with "make KCSAN=1" +void +kcsaninit(void) +{ + initlock(&tsan.lock, "tsan"); + tsan.on = 1; + __sync_synchronize(); +} + +// +// Calls inserted by compiler into kernel binary, except for this file. +// + +void +__tsan_init(void) +{ +} + +void +__tsan_read1(uint64 addr) +{ + if(!tsan.on) + return; + // kcsan_read(addr, 1); +} + +void +__tsan_read2(uint64 addr) +{ + if(!tsan.on) + return; + kcsan_read(addr, 2); +} + +void +__tsan_read4(uint64 addr) +{ + if(!tsan.on) + return; + kcsan_read(addr, 4); +} + +void +__tsan_read8(uint64 addr) +{ + if(!tsan.on) + return; + kcsan_read(addr, 8); +} + +void +__tsan_read_range(uint64 addr, uint64 size) +{ + if(!tsan.on) + return; + kcsan_read(addr, size); +} + +void +__tsan_write1(uint64 addr) +{ + if(!tsan.on) + return; + // kcsan_write(addr, 1); +} + +void +__tsan_write2(uint64 addr) +{ + if(!tsan.on) + return; + kcsan_write(addr, 2); +} + +void +__tsan_write4(uint64 addr) +{ + if(!tsan.on) + return; + kcsan_write(addr, 4); +} + +void +__tsan_write8(uint64 addr) +{ + if(!tsan.on) + return; + kcsan_write(addr, 8); +} + +void +__tsan_write_range(uint64 addr, uint64 size) +{ + if(!tsan.on) + return; + kcsan_write(addr, size); +} + +void +__tsan_atomic_thread_fence(int order) +{ + __sync_synchronize(); +} + +uint32 +__tsan_atomic32_load(uint *ptr, uint *val, int order) +{ + uint t; + __atomic_load(ptr, &t, __ATOMIC_SEQ_CST); + return t; +} + +void +__tsan_atomic32_store(uint *ptr, uint val, int order) +{ + __atomic_store(ptr, &val, __ATOMIC_SEQ_CST); +} + +// We don't use this +void +__tsan_func_entry(uint64 pc) +{ +} + +// We don't use this +void +__tsan_func_exit(void) +{ +} + + diff --git a/kernel/main.c b/kernel/main.c index f0d3171..48c9555 100644 --- a/kernel/main.c +++ b/kernel/main.c @@ -12,6 +12,9 @@ main() { if(cpuid() == 0){ consoleinit(); +#if defined(LAB_LOCK) + statsinit(); +#endif printfinit(); printf("\n"); printf("xv6 kernel is booting\n"); @@ -28,11 +31,18 @@ main() iinit(); // inode table fileinit(); // file table virtio_disk_init(); // emulated hard disk +#ifdef LAB_NET + pci_init(); + sockinit(); +#endif userinit(); // first user process +#ifdef KCSAN + kcsaninit(); +#endif __sync_synchronize(); started = 1; } else { - while(started == 0) + while(atomic_read4((int *) &started) == 0) ; __sync_synchronize(); printf("hart %d starting\n", cpuid()); diff --git a/kernel/memlayout.h b/kernel/memlayout.h index cac3cb1..74d2fd4 100644 --- a/kernel/memlayout.h +++ b/kernel/memlayout.h @@ -25,6 +25,10 @@ #define VIRTIO0 0x10001000 #define VIRTIO0_IRQ 1 +#ifdef LAB_NET +#define E1000_IRQ 33 +#endif + // core local interruptor (CLINT), which contains the timer. #define CLINT 0x2000000L #define CLINT_MTIMECMP(hartid) (CLINT + 0x4000 + 8*(hartid)) @@ -34,8 +38,11 @@ #define PLIC 0x0c000000L #define PLIC_PRIORITY (PLIC + 0x0) #define PLIC_PENDING (PLIC + 0x1000) +#define PLIC_MENABLE(hart) (PLIC + 0x2000 + (hart)*0x100) #define PLIC_SENABLE(hart) (PLIC + 0x2080 + (hart)*0x100) +#define PLIC_MPRIORITY(hart) (PLIC + 0x200000 + (hart)*0x2000) #define PLIC_SPRIORITY(hart) (PLIC + 0x201000 + (hart)*0x2000) +#define PLIC_MCLAIM(hart) (PLIC + 0x200004 + (hart)*0x2000) #define PLIC_SCLAIM(hart) (PLIC + 0x201004 + (hart)*0x2000) // the kernel expects there to be RAM @@ -50,7 +57,7 @@ // map kernel stacks beneath the trampoline, // each surrounded by invalid guard pages. -#define KSTACK(p) (TRAMPOLINE - ((p)+1)* 2*PGSIZE) +#define KSTACK(p) (TRAMPOLINE - (p)*2*PGSIZE - 3*PGSIZE) // User memory layout. // Address zero first: @@ -59,6 +66,14 @@ // fixed-size stack // expandable heap // ... +// USYSCALL (shared with kernel) // TRAPFRAME (p->trapframe, used by the trampoline) // TRAMPOLINE (the same page as in the kernel) #define TRAPFRAME (TRAMPOLINE - PGSIZE) +#ifdef LAB_PGTBL +#define USYSCALL (TRAPFRAME - PGSIZE) + +struct usyscall { + int pid; // Process ID +}; +#endif diff --git a/kernel/net.c b/kernel/net.c new file mode 100644 index 0000000..137ea2b --- /dev/null +++ b/kernel/net.c @@ -0,0 +1,374 @@ +// +// networking protocol support (IP, UDP, ARP, etc.). +// + +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "spinlock.h" +#include "proc.h" +#include "net.h" +#include "defs.h" + +static uint32 local_ip = MAKE_IP_ADDR(10, 0, 2, 15); // qemu's idea of the guest IP +static uint8 local_mac[ETHADDR_LEN] = { 0x52, 0x54, 0x00, 0x12, 0x34, 0x56 }; +static uint8 broadcast_mac[ETHADDR_LEN] = { 0xFF, 0XFF, 0XFF, 0XFF, 0XFF, 0XFF }; + +// Strips data from the start of the buffer and returns a pointer to it. +// Returns 0 if less than the full requested length is available. +char * +mbufpull(struct mbuf *m, unsigned int len) +{ + char *tmp = m->head; + if (m->len < len) + return 0; + m->len -= len; + m->head += len; + return tmp; +} + +// Prepends data to the beginning of the buffer and returns a pointer to it. +char * +mbufpush(struct mbuf *m, unsigned int len) +{ + m->head -= len; + if (m->head < m->buf) + panic("mbufpush"); + m->len += len; + return m->head; +} + +// Appends data to the end of the buffer and returns a pointer to it. +char * +mbufput(struct mbuf *m, unsigned int len) +{ + char *tmp = m->head + m->len; + m->len += len; + if (m->len > MBUF_SIZE) + panic("mbufput"); + return tmp; +} + +// Strips data from the end of the buffer and returns a pointer to it. +// Returns 0 if less than the full requested length is available. +char * +mbuftrim(struct mbuf *m, unsigned int len) +{ + if (len > m->len) + return 0; + m->len -= len; + return m->head + m->len; +} + +// Allocates a packet buffer. +struct mbuf * +mbufalloc(unsigned int headroom) +{ + struct mbuf *m; + + if (headroom > MBUF_SIZE) + return 0; + m = kalloc(); + if (m == 0) + return 0; + m->next = 0; + m->head = (char *)m->buf + headroom; + m->len = 0; + memset(m->buf, 0, sizeof(m->buf)); + return m; +} + +// Frees a packet buffer. +void +mbuffree(struct mbuf *m) +{ + kfree(m); +} + +// Pushes an mbuf to the end of the queue. +void +mbufq_pushtail(struct mbufq *q, struct mbuf *m) +{ + m->next = 0; + if (!q->head){ + q->head = q->tail = m; + return; + } + q->tail->next = m; + q->tail = m; +} + +// Pops an mbuf from the start of the queue. +struct mbuf * +mbufq_pophead(struct mbufq *q) +{ + struct mbuf *head = q->head; + if (!head) + return 0; + q->head = head->next; + return head; +} + +// Returns one (nonzero) if the queue is empty. +int +mbufq_empty(struct mbufq *q) +{ + return q->head == 0; +} + +// Intializes a queue of mbufs. +void +mbufq_init(struct mbufq *q) +{ + q->head = 0; +} + +// This code is lifted from FreeBSD's ping.c, and is copyright by the Regents +// of the University of California. +static unsigned short +in_cksum(const unsigned char *addr, int len) +{ + int nleft = len; + const unsigned short *w = (const unsigned short *)addr; + unsigned int sum = 0; + unsigned short answer = 0; + + /* + * Our algorithm is simple, using a 32 bit accumulator (sum), we add + * sequential 16 bit words to it, and at the end, fold back all the + * carry bits from the top 16 bits into the lower 16 bits. + */ + while (nleft > 1) { + sum += *w++; + nleft -= 2; + } + + /* mop up an odd byte, if necessary */ + if (nleft == 1) { + *(unsigned char *)(&answer) = *(const unsigned char *)w; + sum += answer; + } + + /* add back carry outs from top 16 bits to low 16 bits */ + sum = (sum & 0xffff) + (sum >> 16); + sum += (sum >> 16); + /* guaranteed now that the lower 16 bits of sum are correct */ + + answer = ~sum; /* truncate to 16 bits */ + return answer; +} + +// sends an ethernet packet +static void +net_tx_eth(struct mbuf *m, uint16 ethtype) +{ + struct eth *ethhdr; + + ethhdr = mbufpushhdr(m, *ethhdr); + memmove(ethhdr->shost, local_mac, ETHADDR_LEN); + // In a real networking stack, dhost would be set to the address discovered + // through ARP. Because we don't support enough of the ARP protocol, set it + // to broadcast instead. + memmove(ethhdr->dhost, broadcast_mac, ETHADDR_LEN); + ethhdr->type = htons(ethtype); + if (e1000_transmit(m)) { + mbuffree(m); + } +} + +// sends an IP packet +static void +net_tx_ip(struct mbuf *m, uint8 proto, uint32 dip) +{ + struct ip *iphdr; + + // push the IP header + iphdr = mbufpushhdr(m, *iphdr); + memset(iphdr, 0, sizeof(*iphdr)); + iphdr->ip_vhl = (4 << 4) | (20 >> 2); + iphdr->ip_p = proto; + iphdr->ip_src = htonl(local_ip); + iphdr->ip_dst = htonl(dip); + iphdr->ip_len = htons(m->len); + iphdr->ip_ttl = 100; + iphdr->ip_sum = in_cksum((unsigned char *)iphdr, sizeof(*iphdr)); + + // now on to the ethernet layer + net_tx_eth(m, ETHTYPE_IP); +} + +// sends a UDP packet +void +net_tx_udp(struct mbuf *m, uint32 dip, + uint16 sport, uint16 dport) +{ + struct udp *udphdr; + + // put the UDP header + udphdr = mbufpushhdr(m, *udphdr); + udphdr->sport = htons(sport); + udphdr->dport = htons(dport); + udphdr->ulen = htons(m->len); + udphdr->sum = 0; // zero means no checksum is provided + + // now on to the IP layer + net_tx_ip(m, IPPROTO_UDP, dip); +} + +// sends an ARP packet +static int +net_tx_arp(uint16 op, uint8 dmac[ETHADDR_LEN], uint32 dip) +{ + struct mbuf *m; + struct arp *arphdr; + + m = mbufalloc(MBUF_DEFAULT_HEADROOM); + if (!m) + return -1; + + // generic part of ARP header + arphdr = mbufputhdr(m, *arphdr); + arphdr->hrd = htons(ARP_HRD_ETHER); + arphdr->pro = htons(ETHTYPE_IP); + arphdr->hln = ETHADDR_LEN; + arphdr->pln = sizeof(uint32); + arphdr->op = htons(op); + + // ethernet + IP part of ARP header + memmove(arphdr->sha, local_mac, ETHADDR_LEN); + arphdr->sip = htonl(local_ip); + memmove(arphdr->tha, dmac, ETHADDR_LEN); + arphdr->tip = htonl(dip); + + // header is ready, send the packet + net_tx_eth(m, ETHTYPE_ARP); + return 0; +} + +// receives an ARP packet +static void +net_rx_arp(struct mbuf *m) +{ + struct arp *arphdr; + uint8 smac[ETHADDR_LEN]; + uint32 sip, tip; + + arphdr = mbufpullhdr(m, *arphdr); + if (!arphdr) + goto done; + + // validate the ARP header + if (ntohs(arphdr->hrd) != ARP_HRD_ETHER || + ntohs(arphdr->pro) != ETHTYPE_IP || + arphdr->hln != ETHADDR_LEN || + arphdr->pln != sizeof(uint32)) { + goto done; + } + + // only requests are supported so far + // check if our IP was solicited + tip = ntohl(arphdr->tip); // target IP address + if (ntohs(arphdr->op) != ARP_OP_REQUEST || tip != local_ip) + goto done; + + // handle the ARP request + memmove(smac, arphdr->sha, ETHADDR_LEN); // sender's ethernet address + sip = ntohl(arphdr->sip); // sender's IP address (qemu's slirp) + net_tx_arp(ARP_OP_REPLY, smac, sip); + +done: + mbuffree(m); +} + +// receives a UDP packet +static void +net_rx_udp(struct mbuf *m, uint16 len, struct ip *iphdr) +{ + struct udp *udphdr; + uint32 sip; + uint16 sport, dport; + + + udphdr = mbufpullhdr(m, *udphdr); + if (!udphdr) + goto fail; + + // TODO: validate UDP checksum + + // validate lengths reported in headers + if (ntohs(udphdr->ulen) != len) + goto fail; + len -= sizeof(*udphdr); + if (len > m->len) + goto fail; + // minimum packet size could be larger than the payload + mbuftrim(m, m->len - len); + + // parse the necessary fields + sip = ntohl(iphdr->ip_src); + sport = ntohs(udphdr->sport); + dport = ntohs(udphdr->dport); + sockrecvudp(m, sip, dport, sport); + return; + +fail: + mbuffree(m); +} + +// receives an IP packet +static void +net_rx_ip(struct mbuf *m) +{ + struct ip *iphdr; + uint16 len; + + iphdr = mbufpullhdr(m, *iphdr); + if (!iphdr) + goto fail; + + // check IP version and header len + if (iphdr->ip_vhl != ((4 << 4) | (20 >> 2))) + goto fail; + // validate IP checksum + if (in_cksum((unsigned char *)iphdr, sizeof(*iphdr))) + goto fail; + // can't support fragmented IP packets + if (htons(iphdr->ip_off) != 0) + goto fail; + // is the packet addressed to us? + if (htonl(iphdr->ip_dst) != local_ip) + goto fail; + // can only support UDP + if (iphdr->ip_p != IPPROTO_UDP) + goto fail; + + len = ntohs(iphdr->ip_len) - sizeof(*iphdr); + net_rx_udp(m, len, iphdr); + return; + +fail: + mbuffree(m); +} + +// called by e1000 driver's interrupt handler to deliver a packet to the +// networking stack +void net_rx(struct mbuf *m) +{ + struct eth *ethhdr; + uint16 type; + + ethhdr = mbufpullhdr(m, *ethhdr); + if (!ethhdr) { + mbuffree(m); + return; + } + + type = ntohs(ethhdr->type); + if (type == ETHTYPE_IP) + net_rx_ip(m); + else if (type == ETHTYPE_ARP) + net_rx_arp(m); + else + mbuffree(m); +} diff --git a/kernel/net.h b/kernel/net.h new file mode 100644 index 0000000..9e6fefe --- /dev/null +++ b/kernel/net.h @@ -0,0 +1,173 @@ +// +// packet buffer management +// + +#define MBUF_SIZE 2048 +#define MBUF_DEFAULT_HEADROOM 128 + +struct mbuf { + struct mbuf *next; // the next mbuf in the chain + char *head; // the current start position of the buffer + unsigned int len; // the length of the buffer + char buf[MBUF_SIZE]; // the backing store +}; + +char *mbufpull(struct mbuf *m, unsigned int len); +char *mbufpush(struct mbuf *m, unsigned int len); +char *mbufput(struct mbuf *m, unsigned int len); +char *mbuftrim(struct mbuf *m, unsigned int len); + +// The above functions manipulate the size and position of the buffer: +// <- push <- trim +// -> pull -> put +// [-headroom-][------buffer------][-tailroom-] +// |----------------MBUF_SIZE-----------------| +// +// These marcos automatically typecast and determine the size of header structs. +// In most situations you should use these instead of the raw ops above. +#define mbufpullhdr(mbuf, hdr) (typeof(hdr)*)mbufpull(mbuf, sizeof(hdr)) +#define mbufpushhdr(mbuf, hdr) (typeof(hdr)*)mbufpush(mbuf, sizeof(hdr)) +#define mbufputhdr(mbuf, hdr) (typeof(hdr)*)mbufput(mbuf, sizeof(hdr)) +#define mbuftrimhdr(mbuf, hdr) (typeof(hdr)*)mbuftrim(mbuf, sizeof(hdr)) + +struct mbuf *mbufalloc(unsigned int headroom); +void mbuffree(struct mbuf *m); + +struct mbufq { + struct mbuf *head; // the first element in the queue + struct mbuf *tail; // the last element in the queue +}; + +void mbufq_pushtail(struct mbufq *q, struct mbuf *m); +struct mbuf *mbufq_pophead(struct mbufq *q); +int mbufq_empty(struct mbufq *q); +void mbufq_init(struct mbufq *q); + + +// +// endianness support +// + +static inline uint16 bswaps(uint16 val) +{ + return (((val & 0x00ffU) << 8) | + ((val & 0xff00U) >> 8)); +} + +static inline uint32 bswapl(uint32 val) +{ + return (((val & 0x000000ffUL) << 24) | + ((val & 0x0000ff00UL) << 8) | + ((val & 0x00ff0000UL) >> 8) | + ((val & 0xff000000UL) >> 24)); +} + +// Use these macros to convert network bytes to the native byte order. +// Note that Risc-V uses little endian while network order is big endian. +#define ntohs bswaps +#define ntohl bswapl +#define htons bswaps +#define htonl bswapl + + +// +// useful networking headers +// + +#define ETHADDR_LEN 6 + +// an Ethernet packet header (start of the packet). +struct eth { + uint8 dhost[ETHADDR_LEN]; + uint8 shost[ETHADDR_LEN]; + uint16 type; +} __attribute__((packed)); + +#define ETHTYPE_IP 0x0800 // Internet protocol +#define ETHTYPE_ARP 0x0806 // Address resolution protocol + +// an IP packet header (comes after an Ethernet header). +struct ip { + uint8 ip_vhl; // version << 4 | header length >> 2 + uint8 ip_tos; // type of service + uint16 ip_len; // total length + uint16 ip_id; // identification + uint16 ip_off; // fragment offset field + uint8 ip_ttl; // time to live + uint8 ip_p; // protocol + uint16 ip_sum; // checksum + uint32 ip_src, ip_dst; +}; + +#define IPPROTO_ICMP 1 // Control message protocol +#define IPPROTO_TCP 6 // Transmission control protocol +#define IPPROTO_UDP 17 // User datagram protocol + +#define MAKE_IP_ADDR(a, b, c, d) \ + (((uint32)a << 24) | ((uint32)b << 16) | \ + ((uint32)c << 8) | (uint32)d) + +// a UDP packet header (comes after an IP header). +struct udp { + uint16 sport; // source port + uint16 dport; // destination port + uint16 ulen; // length, including udp header, not including IP header + uint16 sum; // checksum +}; + +// an ARP packet (comes after an Ethernet header). +struct arp { + uint16 hrd; // format of hardware address + uint16 pro; // format of protocol address + uint8 hln; // length of hardware address + uint8 pln; // length of protocol address + uint16 op; // operation + + char sha[ETHADDR_LEN]; // sender hardware address + uint32 sip; // sender IP address + char tha[ETHADDR_LEN]; // target hardware address + uint32 tip; // target IP address +} __attribute__((packed)); + +#define ARP_HRD_ETHER 1 // Ethernet + +enum { + ARP_OP_REQUEST = 1, // requests hw addr given protocol addr + ARP_OP_REPLY = 2, // replies a hw addr given protocol addr +}; + +// an DNS packet (comes after an UDP header). +struct dns { + uint16 id; // request ID + + uint8 rd: 1; // recursion desired + uint8 tc: 1; // truncated + uint8 aa: 1; // authoritive + uint8 opcode: 4; + uint8 qr: 1; // query/response + uint8 rcode: 4; // response code + uint8 cd: 1; // checking disabled + uint8 ad: 1; // authenticated data + uint8 z: 1; + uint8 ra: 1; // recursion available + + uint16 qdcount; // number of question entries + uint16 ancount; // number of resource records in answer section + uint16 nscount; // number of NS resource records in authority section + uint16 arcount; // number of resource records in additional records +} __attribute__((packed)); + +struct dns_question { + uint16 qtype; + uint16 qclass; +} __attribute__((packed)); + +#define ARECORD (0x0001) +#define QCLASS (0x0001) + +struct dns_data { + uint16 type; + uint16 class; + uint32 ttl; + uint16 len; +} __attribute__((packed)); diff --git a/kernel/pci.c b/kernel/pci.c new file mode 100644 index 0000000..3e361c5 --- /dev/null +++ b/kernel/pci.c @@ -0,0 +1,61 @@ +// +// simple PCI-Express initialization, only +// works for qemu and its e1000 card. +// + +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "spinlock.h" +#include "proc.h" +#include "defs.h" + +void +pci_init() +{ + // we'll place the e1000 registers at this address. + // vm.c maps this range. + uint64 e1000_regs = 0x40000000L; + + // qemu -machine virt puts PCIe config space here. + // vm.c maps this range. + uint32 *ecam = (uint32 *) 0x30000000L; + + // look at each possible PCI device on bus 0. + for(int dev = 0; dev < 32; dev++){ + int bus = 0; + int func = 0; + int offset = 0; + uint32 off = (bus << 16) | (dev << 11) | (func << 8) | (offset); + volatile uint32 *base = ecam + off; + uint32 id = base[0]; + + // 100e:8086 is an e1000 + if(id == 0x100e8086){ + // command and status register. + // bit 0 : I/O access enable + // bit 1 : memory access enable + // bit 2 : enable mastering + base[1] = 0b111; + __sync_synchronize(); + + for(int i = 0; i < 6; i++){ + uint32 old = base[4+i]; + + // writing all 1's to the BAR causes it to be + // replaced with its size. + base[4+i] = 0xffffffff; + __sync_synchronize(); + + base[4+i] = old; + } + + // tell the e1000 to reveal its registers at + // physical address 0x40000000. + base[4+0] = e1000_regs; + + e1000_init((uint32*)e1000_regs); + } + } +} diff --git a/kernel/pipe.c b/kernel/pipe.c index f6b501a..41a9c5e 100644 --- a/kernel/pipe.c +++ b/kernel/pipe.c @@ -68,6 +68,9 @@ pipeclose(struct pipe *pi, int writable) } if(pi->readopen == 0 && pi->writeopen == 0){ release(&pi->lock); +#ifdef LAB_LOCK + freelock(&pi->lock); +#endif kfree((char*)pi); } else release(&pi->lock); diff --git a/kernel/plic.c b/kernel/plic.c index 4175db9..5c9d96a 100644 --- a/kernel/plic.c +++ b/kernel/plic.c @@ -14,6 +14,13 @@ plicinit(void) // set desired IRQ priorities non-zero (otherwise disabled). *(uint32*)(PLIC + UART0_IRQ*4) = 1; *(uint32*)(PLIC + VIRTIO0_IRQ*4) = 1; + +#ifdef LAB_NET + // PCIE IRQs are 32 to 35 + for(int irq = 1; irq < 0x35; irq++){ + *(uint32*)(PLIC + irq*4) = 1; + } +#endif } void @@ -25,6 +32,11 @@ plicinithart(void) // for the uart and virtio disk. *(uint32*)PLIC_SENABLE(hart) = (1 << UART0_IRQ) | (1 << VIRTIO0_IRQ); +#ifdef LAB_NET + // hack to get at next 32 IRQs for e1000 + *(uint32*)(PLIC_SENABLE(hart)+4) = 0xffffffff; +#endif + // set this hart's S-mode priority threshold to 0. *(uint32*)PLIC_SPRIORITY(hart) = 0; } diff --git a/kernel/printf.c b/kernel/printf.c index 1a50203..509c1c5 100644 --- a/kernel/printf.c +++ b/kernel/printf.c @@ -122,6 +122,8 @@ panic(char *s) printf("panic: "); printf(s); printf("\n"); + backtrace(); + panicked = 1; // freeze uart output from other CPUs for(;;) ; @@ -133,3 +135,18 @@ printfinit(void) initlock(&pr.lock, "pr"); pr.locking = 1; } + +void +backtrace(void) +{ + uint64 fp = r_fp(); + printf("backtrace:\n"); + uint64 stackpg = PGROUNDDOWN(fp); + // Whereever fp points to should always live in the stack page + while(PGROUNDDOWN(fp) == stackpg){ + // print the return addr (stored in fp-8) + printf("%p\n", *(uint64 *)(fp-8)); + // load previous (upper stack) fp + fp = *(uint64 *)(fp-16); + } +} diff --git a/kernel/proc.c b/kernel/proc.c index 58a8a0b..9a9bae9 100644 --- a/kernel/proc.c +++ b/kernel/proc.c @@ -39,6 +39,7 @@ proc_mapstacks(pagetable_t kpgtbl) if(pa == 0) panic("kalloc"); uint64 va = KSTACK((int) (p - proc)); + p->alarm_tickspassed = 0; kvmmap(kpgtbl, va, (uint64)pa, PGSIZE, PTE_R | PTE_W); } } @@ -132,6 +133,25 @@ found: return 0; } + // Allocate a usyscall page and fill pid. + if((p->usyscall = (struct usyscall *)kalloc()) == 0){ + freeproc(p); + release(&p->lock); + return 0; + } + p->usyscall->pid = p->pid; + + // reset sigalarm props + p->alarm_interval = 0; + p->alarm_handler = 0; + p->alarm_tickspassed = 0; + p->alarm_caninvoke = 1; + if((p->atpfm = (struct trapframe *)kalloc()) == 0){ + freeproc(p); + release(&p->lock); + return 0; + } + // An empty user page table. p->pagetable = proc_pagetable(p); if(p->pagetable == 0){ @@ -158,8 +178,18 @@ freeproc(struct proc *p) if(p->trapframe) kfree((void*)p->trapframe); p->trapframe = 0; + if(p->usyscall) + kfree((void*)p->usyscall); + p->usyscall = 0; if(p->pagetable) proc_freepagetable(p->pagetable, p->sz); + if(p->atpfm) + kfree((void*)p->atpfm); + p->atpfm = 0; + p->alarm_interval = 0; + p->alarm_handler = 0; + p->alarm_tickspassed = 0; + p->alarm_caninvoke = 1; p->pagetable = 0; p->sz = 0; p->pid = 0; @@ -172,7 +202,7 @@ freeproc(struct proc *p) } // Create a user page table for a given process, with no user memory, -// but with trampoline and trapframe pages. +// but with trampoline, trapframe and usyscall pages. pagetable_t proc_pagetable(struct proc *p) { @@ -202,6 +232,14 @@ proc_pagetable(struct proc *p) return 0; } + // map the usyscall page below the trapframe page, for + // ugetpid(). + if(mappages(pagetable, USYSCALL, PGSIZE, + (uint64)(p->usyscall), PTE_R | PTE_U) < 0){ + uvmunmap(pagetable, USYSCALL, 1, 0); + uvmfree(pagetable, 0); + return 0; + } return pagetable; } @@ -212,6 +250,7 @@ proc_freepagetable(pagetable_t pagetable, uint64 sz) { uvmunmap(pagetable, TRAMPOLINE, 1, 0); uvmunmap(pagetable, TRAPFRAME, 1, 0); + uvmunmap(pagetable, USYSCALL, 1, 0); uvmfree(pagetable, sz); } @@ -299,6 +338,9 @@ fork(void) // copy saved user registers. *(np->trapframe) = *(p->trapframe); + // inherit trace_mask + np->trace_mask = p->trace_mask; + // Cause fork to return 0 in the child. np->trapframe->a0 = 0; @@ -686,3 +728,43 @@ procdump(void) printf("\n"); } } + +int +get_nproc(void) +{ + int n = 0; + struct proc *p; + + for(int i = 0; i < NPROC; i++) { + p = &proc[i]; + acquire(&p->lock); + if(p->state != UNUSED) + n++; + release(&p->lock); + } + + return n; +} + +// lab pagetable: report which pages have been accessed (r/w) +// according to PTE_A and store it in a bit mask (3rd param) +int +pgaccess(uint64 base, int len, uint64 mask_addr) +{ + struct proc *p = myproc(); + pagetable_t pgtbl = p->pagetable; + pte_t *pte; + int mask = 0; + + // iterater thru pages + for(int i = 0; i < len; i++) { + pte = walk(pgtbl, base + i * PGSIZE, 0); + if(*pte & PTE_A) { + *pte &= (~PTE_A); // clear PTE_A to avoid setting it forever + mask |= (1L << i); + } + } + + // now copyout the mask to user memory + return copyout(pgtbl, mask_addr, (char *)&mask, sizeof(mask)); +} diff --git a/kernel/proc.h b/kernel/proc.h index d021857..a195b02 100644 --- a/kernel/proc.h +++ b/kernel/proc.h @@ -91,6 +91,7 @@ struct proc { int killed; // If non-zero, have been killed int xstate; // Exit status to be returned to parent's wait int pid; // Process ID + int trace_mask; // SYS_trace mask (1 << SYS_xxx) // wait_lock must be held when using this: struct proc *parent; // Parent process @@ -100,8 +101,14 @@ struct proc { uint64 sz; // Size of process memory (bytes) pagetable_t pagetable; // User page table struct trapframe *trapframe; // data page for trampoline.S + struct usyscall *usyscall; // data page for usyscall struct context context; // swtch() here to run process struct file *ofile[NOFILE]; // Open files struct inode *cwd; // Current directory char name[16]; // Process name (debugging) + int alarm_interval; // sigalarm syscall interval + uint64 alarm_handler; // sigalarm syscall handler + int alarm_tickspassed; // record how many ticks passed since last sigalarm handler call + int alarm_caninvoke; // prevent re-entrant calls to handler + struct trapframe *atpfm; // trapframe to resume after handling, must hold p->lock }; diff --git a/kernel/riscv.h b/kernel/riscv.h index 20a01db..af18972 100644 --- a/kernel/riscv.h +++ b/kernel/riscv.h @@ -327,6 +327,15 @@ sfence_vma() asm volatile("sfence.vma zero, zero"); } +// read the frame pointer of currently executing func +static inline uint64 +r_fp() +{ + uint64 x; + asm volatile("mv %0, s0" : "=r" (x) ); + return x; +} + typedef uint64 pte_t; typedef uint64 *pagetable_t; // 512 PTEs @@ -343,6 +352,11 @@ typedef uint64 *pagetable_t; // 512 PTEs #define PTE_W (1L << 2) #define PTE_X (1L << 3) #define PTE_U (1L << 4) // user can access +#define PTE_A (1L << 6) // riscv access bit +#define PTE_C (1L << 8) // RSW low bit, use it to mark whether a page is COW + + + // shift a physical address to the right place for a PTE. #define PA2PTE(pa) ((((uint64)pa) >> 12) << 10) diff --git a/kernel/spinlock.c b/kernel/spinlock.c index 9840302..266a698 100644 --- a/kernel/spinlock.c +++ b/kernel/spinlock.c @@ -8,12 +8,52 @@ #include "proc.h" #include "defs.h" +#ifdef LAB_LOCK +#define NLOCK 500 + +static struct spinlock *locks[NLOCK]; +struct spinlock lock_locks; + +void +freelock(struct spinlock *lk) +{ + acquire(&lock_locks); + int i; + for (i = 0; i < NLOCK; i++) { + if(locks[i] == lk) { + locks[i] = 0; + break; + } + } + release(&lock_locks); +} + +static void +findslot(struct spinlock *lk) { + acquire(&lock_locks); + int i; + for (i = 0; i < NLOCK; i++) { + if(locks[i] == 0) { + locks[i] = lk; + release(&lock_locks); + return; + } + } + panic("findslot"); +} +#endif + void initlock(struct spinlock *lk, char *name) { lk->name = name; lk->locked = 0; lk->cpu = 0; +#ifdef LAB_LOCK + lk->nts = 0; + lk->n = 0; + findslot(lk); +#endif } // Acquire the lock. @@ -25,12 +65,21 @@ acquire(struct spinlock *lk) if(holding(lk)) panic("acquire"); +#ifdef LAB_LOCK + __sync_fetch_and_add(&(lk->n), 1); +#endif + // On RISC-V, sync_lock_test_and_set turns into an atomic swap: // a5 = 1 // s1 = &lk->locked // amoswap.w.aq a5, a5, (s1) - while(__sync_lock_test_and_set(&lk->locked, 1) != 0) - ; + while(__sync_lock_test_and_set(&lk->locked, 1) != 0) { +#ifdef LAB_LOCK + __sync_fetch_and_add(&(lk->nts), 1); +#else + ; +#endif + } // Tell the C compiler and the processor to not move loads or stores // past this point, to ensure that the critical section's memory @@ -108,3 +157,61 @@ pop_off(void) if(c->noff == 0 && c->intena) intr_on(); } + +// Read a shared 32-bit value without holding a lock +int +atomic_read4(int *addr) { + uint32 val; + __atomic_load(addr, &val, __ATOMIC_SEQ_CST); + return val; +} + +#ifdef LAB_LOCK +int +snprint_lock(char *buf, int sz, struct spinlock *lk) +{ + int n = 0; + if(lk->n > 0) { + n = snprintf(buf, sz, "lock: %s: #test-and-set %d #acquire() %d\n", + lk->name, lk->nts, lk->n); + } + return n; +} + +int +statslock(char *buf, int sz) { + int n; + int tot = 0; + + acquire(&lock_locks); + n = snprintf(buf, sz, "--- lock kmem/bcache stats\n"); + for(int i = 0; i < NLOCK; i++) { + if(locks[i] == 0) + break; + if(strncmp(locks[i]->name, "bcache", strlen("bcache")) == 0 || + strncmp(locks[i]->name, "kmem", strlen("kmem")) == 0) { + tot += locks[i]->nts; + n += snprint_lock(buf +n, sz-n, locks[i]); + } + } + + n += snprintf(buf+n, sz-n, "--- top 5 contended locks:\n"); + int last = 100000000; + // stupid way to compute top 5 contended locks + for(int t = 0; t < 5; t++) { + int top = 0; + for(int i = 0; i < NLOCK; i++) { + if(locks[i] == 0) + break; + if(locks[i]->nts > locks[top]->nts && locks[i]->nts < last) { + top = i; + } + } + n += snprint_lock(buf+n, sz-n, locks[top]); + last = locks[top]->nts; + } + n += snprintf(buf+n, sz-n, "tot= %d\n", tot); + release(&lock_locks); + return n; +} +#endif diff --git a/kernel/spinlock.h b/kernel/spinlock.h index 4392820..9bac216 100644 --- a/kernel/spinlock.h +++ b/kernel/spinlock.h @@ -5,5 +5,9 @@ struct spinlock { // For debugging: char *name; // Name of lock. struct cpu *cpu; // The cpu holding the lock. +#ifdef LAB_LOCK + int nts; + int n; +#endif }; diff --git a/kernel/sprintf.c b/kernel/sprintf.c new file mode 100644 index 0000000..050eb85 --- /dev/null +++ b/kernel/sprintf.c @@ -0,0 +1,91 @@ +#include <stdarg.h> + +#include "types.h" +#include "param.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "fs.h" +#include "file.h" +#include "riscv.h" +#include "defs.h" + +static char digits[] = "0123456789abcdef"; + +static int +sputc(char *s, char c) +{ + *s = c; + return 1; +} + +static int +sprintint(char *s, int xx, int base, int sign) +{ + char buf[16]; + int i, n; + uint x; + + if(sign && (sign = xx < 0)) + x = -xx; + else + x = xx; + + i = 0; + do { + buf[i++] = digits[x % base]; + } while((x /= base) != 0); + + if(sign) + buf[i++] = '-'; + + n = 0; + while(--i >= 0) + n += sputc(s+n, buf[i]); + return n; +} + +int +snprintf(char *buf, int sz, char *fmt, ...) +{ + va_list ap; + int i, c; + int off = 0; + char *s; + + if (fmt == 0) + panic("null fmt"); + + va_start(ap, fmt); + for(i = 0; off < sz && (c = fmt[i] & 0xff) != 0; i++){ + if(c != '%'){ + off += sputc(buf+off, c); + continue; + } + c = fmt[++i] & 0xff; + if(c == 0) + break; + switch(c){ + case 'd': + off += sprintint(buf+off, va_arg(ap, int), 10, 1); + break; + case 'x': + off += sprintint(buf+off, va_arg(ap, int), 16, 1); + break; + case 's': + if((s = va_arg(ap, char*)) == 0) + s = "(null)"; + for(; *s && off < sz; s++) + off += sputc(buf+off, *s); + break; + case '%': + off += sputc(buf+off, '%'); + break; + default: + // Print unknown % sequence to draw attention. + off += sputc(buf+off, '%'); + off += sputc(buf+off, c); + break; + } + } + return off; +} diff --git a/kernel/start.c b/kernel/start.c index e16f18a..bf03bc0 100644 --- a/kernel/start.c +++ b/kernel/start.c @@ -38,6 +38,11 @@ start() w_mideleg(0xffff); w_sie(r_sie() | SIE_SEIE | SIE_STIE | SIE_SSIE); +#ifdef KCSAN + // allow supervisor to read cycle counter register + w_mcounteren(r_mcounteren()|0x3); +#endif + // configure Physical Memory Protection to give supervisor mode // access to all of physical memory. w_pmpaddr0(0x3fffffffffffffull); diff --git a/kernel/stats.c b/kernel/stats.c new file mode 100644 index 0000000..b7a8e5f --- /dev/null +++ b/kernel/stats.c @@ -0,0 +1,66 @@ +#include <stdarg.h> + +#include "types.h" +#include "param.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "fs.h" +#include "file.h" +#include "riscv.h" +#include "defs.h" + +#define BUFSZ 4096 +static struct { + struct spinlock lock; + char buf[BUFSZ]; + int sz; + int off; +} stats; + +int statscopyin(char*, int); +int statslock(char*, int); + +int +statswrite(int user_src, uint64 src, int n) +{ + return -1; +} + +int +statsread(int user_dst, uint64 dst, int n) +{ + int m; + + acquire(&stats.lock); + + if(stats.sz == 0) { +#ifdef LAB_LOCK + stats.sz = statslock(stats.buf, BUFSZ); +#endif + } + m = stats.sz - stats.off; + + if (m > 0) { + if(m > n) + m = n; + if(either_copyout(user_dst, dst, stats.buf+stats.off, m) != -1) { + stats.off += m; + } + } else { + m = -1; + stats.sz = 0; + stats.off = 0; + } + release(&stats.lock); + return m; +} + +void +statsinit(void) +{ + initlock(&stats.lock, "stats"); + + devsw[STATS].read = statsread; + devsw[STATS].write = statswrite; +} + diff --git a/kernel/syscall.c b/kernel/syscall.c index ed65409..172c5ea 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -101,6 +101,24 @@ extern uint64 sys_unlink(void); extern uint64 sys_link(void); extern uint64 sys_mkdir(void); extern uint64 sys_close(void); +extern uint64 sys_trace(void); +extern uint64 sys_sysinfo(void); + +#ifdef LAB_NET +extern uint64 sys_connect(void); +#endif +#ifdef LAB_PGTBL +extern uint64 sys_pgaccess(void); +#endif +extern uint64 sys_sigalarm(void); +extern uint64 sys_sigreturn(void); + +#ifdef LAB_NET +extern uint64 sys_connect(void); +#endif +#ifdef LAB_PGTBL +extern uint64 sys_pgaccess(void); +#endif // An array mapping syscall numbers from syscall.h // to the function that handles the system call. @@ -126,8 +144,54 @@ static uint64 (*syscalls[])(void) = { [SYS_link] sys_link, [SYS_mkdir] sys_mkdir, [SYS_close] sys_close, +#ifdef LAB_NET +[SYS_connect] sys_connect, +#endif +#ifdef LAB_PGTBL +[SYS_pgaccess] sys_pgaccess, +#endif +[SYS_trace] sys_trace, +[SYS_sysinfo] sys_sysinfo, +[SYS_sigalarm] sys_sigalarm, +[SYS_sigreturn] sys_sigreturn, +}; + +// syscall name maps for SYS_trace: +static char *syscall_names[] = { +[SYS_fork] "fork", +[SYS_exit] "exit", +[SYS_wait] "wait", +[SYS_pipe] "pipe", +[SYS_read] "read", +[SYS_kill] "kill", +[SYS_exec] "exec", +[SYS_fstat] "fstat", +[SYS_chdir] "chdir", +[SYS_dup] "dup", +[SYS_getpid] "getpid", +[SYS_sbrk] "sbrk", +[SYS_sleep] "sleep", +[SYS_uptime] "uptime", +[SYS_open] "open", +[SYS_write] "write", +[SYS_mknod] "mknod", +[SYS_unlink] "unlink", +[SYS_link] "link", +[SYS_mkdir] "mkdir", +[SYS_close] "close", +#ifdef LAB_NET +[SYS_connect] "connect", +#endif +#ifdef LAB_PGTBL +[SYS_pgaccess] "pgaccess", +#endif +[SYS_trace] "trace", +[SYS_sysinfo] "sysinfo", +[SYS_sigalarm] "sigalarm", +[SYS_sigreturn] "sigreturn", }; + void syscall(void) { @@ -139,9 +203,17 @@ syscall(void) // Use num to lookup the system call function for num, call it, // and store its return value in p->trapframe->a0 p->trapframe->a0 = syscalls[num](); + + // SYS_trace: match all the syscalls which number < mask asked + // p->trace_mask == 1 << SYS_xxx + if(p->trace_mask >> num) { + printf("%d: syscall %s -> %d\n", p->pid, syscall_names[num], p->trapframe->a0); + } + } else { printf("%d %s: unknown sys call %d\n", p->pid, p->name, num); p->trapframe->a0 = -1; } } + diff --git a/kernel/syscall.h b/kernel/syscall.h index bc5f356..8da572e 100644 --- a/kernel/syscall.h +++ b/kernel/syscall.h @@ -20,3 +20,14 @@ #define SYS_link 19 #define SYS_mkdir 20 #define SYS_close 21 + +// System calls for labs +#define SYS_trace 22 +#define SYS_sysinfo 23 +#define SYS_sigalarm 24 +#define SYS_sigreturn 25 +#define SYS_symlink 26 +#define SYS_mmap 27 +#define SYS_munmap 28 +#define SYS_connect 29 +#define SYS_pgaccess 30 diff --git a/kernel/sysfile.c b/kernel/sysfile.c index 16b668c..4b2189a 100644 --- a/kernel/sysfile.c +++ b/kernel/sysfile.c @@ -503,3 +503,29 @@ sys_pipe(void) } return 0; } + + +#ifdef LAB_NET +int +sys_connect(void) +{ + struct file *f; + int fd; + uint32 raddr; + uint32 rport; + uint32 lport; + + argint(0, (int*)&raddr); + argint(1, (int*)&lport); + argint(2, (int*)&rport); + + if(sockalloc(&f, raddr, lport, rport) < 0) + return -1; + if((fd=fdalloc(f)) < 0){ + fileclose(f); + return -1; + } + + return fd; +} +#endif diff --git a/kernel/sysinfo.c b/kernel/sysinfo.c new file mode 100644 index 0000000..c66324d --- /dev/null +++ b/kernel/sysinfo.c @@ -0,0 +1,24 @@ +#include "types.h" +#include "riscv.h" +#include "param.h" +#include "spinlock.h" +#include "defs.h" +#include "sysinfo.h" +#include "proc.h" + +// Get current system info +// addr is a user virtual address, pointing to a struct sysinfo. +int +sys_info(uint64 addr) { + struct proc *p = myproc(); + struct sysinfo info; + + // Fill nums into the sysinfo struct + info.freemem = get_freemem(); + info.nproc = get_nproc(); + + if(copyout(p->pagetable, addr, (char *)&info, sizeof(info)) < 0) + return -1; + return 0; +} + diff --git a/kernel/sysinfo.h b/kernel/sysinfo.h new file mode 100644 index 0000000..fb878e6 --- /dev/null +++ b/kernel/sysinfo.h @@ -0,0 +1,4 @@ +struct sysinfo { + uint64 freemem; // amount of free memory (bytes) + uint64 nproc; // number of process +}; diff --git a/kernel/sysnet.c b/kernel/sysnet.c new file mode 100644 index 0000000..1c48cb3 --- /dev/null +++ b/kernel/sysnet.c @@ -0,0 +1,185 @@ +// +// network system calls. +// + +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "spinlock.h" +#include "proc.h" +#include "defs.h" +#include "fs.h" +#include "sleeplock.h" +#include "file.h" +#include "net.h" + +struct sock { + struct sock *next; // the next socket in the list + uint32 raddr; // the remote IPv4 address + uint16 lport; // the local UDP port number + uint16 rport; // the remote UDP port number + struct spinlock lock; // protects the rxq + struct mbufq rxq; // a queue of packets waiting to be received +}; + +static struct spinlock lock; +static struct sock *sockets; + +void +sockinit(void) +{ + initlock(&lock, "socktbl"); +} + +int +sockalloc(struct file **f, uint32 raddr, uint16 lport, uint16 rport) +{ + struct sock *si, *pos; + + si = 0; + *f = 0; + if ((*f = filealloc()) == 0) + goto bad; + if ((si = (struct sock*)kalloc()) == 0) + goto bad; + + // initialize objects + si->raddr = raddr; + si->lport = lport; + si->rport = rport; + initlock(&si->lock, "sock"); + mbufq_init(&si->rxq); + (*f)->type = FD_SOCK; + (*f)->readable = 1; + (*f)->writable = 1; + (*f)->sock = si; + + // add to list of sockets + acquire(&lock); + pos = sockets; + while (pos) { + if (pos->raddr == raddr && + pos->lport == lport && + pos->rport == rport) { + release(&lock); + goto bad; + } + pos = pos->next; + } + si->next = sockets; + sockets = si; + release(&lock); + return 0; + +bad: + if (si) + kfree((char*)si); + if (*f) + fileclose(*f); + return -1; +} + +void +sockclose(struct sock *si) +{ + struct sock **pos; + struct mbuf *m; + + // remove from list of sockets + acquire(&lock); + pos = &sockets; + while (*pos) { + if (*pos == si){ + *pos = si->next; + break; + } + pos = &(*pos)->next; + } + release(&lock); + + // free any pending mbufs + while (!mbufq_empty(&si->rxq)) { + m = mbufq_pophead(&si->rxq); + mbuffree(m); + } + + kfree((char*)si); +} + +int +sockread(struct sock *si, uint64 addr, int n) +{ + struct proc *pr = myproc(); + struct mbuf *m; + int len; + + acquire(&si->lock); + while (mbufq_empty(&si->rxq) && !pr->killed) { + sleep(&si->rxq, &si->lock); + } + if (pr->killed) { + release(&si->lock); + return -1; + } + m = mbufq_pophead(&si->rxq); + release(&si->lock); + + len = m->len; + if (len > n) + len = n; + if (copyout(pr->pagetable, addr, m->head, len) == -1) { + mbuffree(m); + return -1; + } + mbuffree(m); + return len; +} + +int +sockwrite(struct sock *si, uint64 addr, int n) +{ + struct proc *pr = myproc(); + struct mbuf *m; + + m = mbufalloc(MBUF_DEFAULT_HEADROOM); + if (!m) + return -1; + + if (copyin(pr->pagetable, mbufput(m, n), addr, n) == -1) { + mbuffree(m); + return -1; + } + net_tx_udp(m, si->raddr, si->lport, si->rport); + return n; +} + +// called by protocol handler layer to deliver UDP packets +void +sockrecvudp(struct mbuf *m, uint32 raddr, uint16 lport, uint16 rport) +{ + // + // Find the socket that handles this mbuf and deliver it, waking + // any sleeping reader. Free the mbuf if there are no sockets + // registered to handle it. + // + struct sock *si; + + acquire(&lock); + si = sockets; + while (si) { + if (si->raddr == raddr && si->lport == lport && si->rport == rport) + goto found; + si = si->next; + } + release(&lock); + mbuffree(m); + return; + +found: + acquire(&si->lock); + mbufq_pushtail(&si->rxq, m); + wakeup(&si->rxq); + release(&si->lock); + release(&lock); +} diff --git a/kernel/sysproc.c b/kernel/sysproc.c index 3b4d5bd..715a511 100644 --- a/kernel/sysproc.c +++ b/kernel/sysproc.c @@ -1,7 +1,7 @@ #include "types.h" #include "riscv.h" -#include "defs.h" #include "param.h" +#include "defs.h" #include "memlayout.h" #include "spinlock.h" #include "proc.h" @@ -54,9 +54,8 @@ sys_sleep(void) int n; uint ticks0; + argint(0, &n); - if(n < 0) - n = 0; acquire(&tickslock); ticks0 = ticks; while(ticks - ticks0 < n){ @@ -66,10 +65,29 @@ sys_sleep(void) } sleep(&ticks, &tickslock); } + + // backtrace(); + release(&tickslock); return 0; } + +#ifdef LAB_PGTBL +int +sys_pgaccess(void) +{ + uint64 base, mask; + int len; + + + argaddr(0, &base); + argint(1, &len); + argaddr(2, &mask); + return pgaccess(base, len, mask); +} +#endif + uint64 sys_kill(void) { @@ -91,3 +109,44 @@ sys_uptime(void) release(&tickslock); return xticks; } + +uint64 +sys_trace(void) +{ + argint(0, &myproc()->trace_mask); + + return -(myproc()->trace_mask <= 1); +} + +uint64 +sys_sysinfo(void) +{ + uint64 si; // user pointer to struct sysinfo + + argaddr(0, &si); + return sys_info(si); +} + +uint64 +sys_sigalarm(void) +{ + struct proc *p = myproc(); + uint64 handler; + + argint(0, &p->alarm_interval); + argaddr(1, &handler); + p->alarm_handler = handler; + + return 0; +} + +uint64 sys_sigreturn(void) +{ + struct proc *p = myproc(); + // retore saved trapframe to resume + memmove(p->trapframe, p->atpfm, sizeof(struct trapframe)); + p->alarm_tickspassed = 0; + p->alarm_caninvoke = 1; + // make sure return the original a0 in trapframe to pass test3 + return p->trapframe->a0; +} diff --git a/kernel/trap.c b/kernel/trap.c index 512c850..7cc69b1 100644 --- a/kernel/trap.c +++ b/kernel/trap.c @@ -6,6 +6,12 @@ #include "proc.h" #include "defs.h" +/* + * Always remember that RISC-V disables interrupts when it starts to take a trap, + * so there's no need to call intr_off() at the beginning of trap handling. + * Reference: xv6-riscv-book 4.5 + */ + struct spinlock tickslock; uint ticks; @@ -67,18 +73,41 @@ usertrap(void) syscall(); } else if((which_dev = devintr()) != 0){ // ok + } else if(r_scause() == 13 || r_scause() == 15){ + // deal with page fault + uint64 va = r_stval(); + if(cow_handler(p->pagetable, va) < 0) + goto err; } else { + + printf("usertrap(): unexpected scause %p pid=%d\n", r_scause(), p->pid); printf(" sepc=%p stval=%p\n", r_sepc(), r_stval()); + err: + printf("killing the process...\n"); setkilled(p); } if(killed(p)) exit(-1); + - // give up the CPU if this is a timer interrupt. - if(which_dev == 2) + if(which_dev == 2){ + // timer interrupt + if(p->alarm_interval > 0 && p->alarm_caninvoke){ + // record sigalarm + p->alarm_tickspassed++; + if(p->alarm_tickspassed == p->alarm_interval){ + // store original trapframe in p->atpfm + memmove(p->atpfm, p->trapframe, sizeof(struct trapframe)); + p->alarm_tickspassed = 0; + p->alarm_caninvoke = 0; + p->trapframe->epc = p->alarm_handler; + } + } + // give up the CPU. yield(); + } usertrapret(); } @@ -190,7 +219,13 @@ devintr() uartintr(); } else if(irq == VIRTIO0_IRQ){ virtio_disk_intr(); - } else if(irq){ + } +#ifdef LAB_NET + else if(irq == E1000_IRQ){ + e1000_intr(); + } +#endif + else if(irq){ printf("unexpected interrupt irq=%d\n", irq); } diff --git a/kernel/virtio_disk.c b/kernel/virtio_disk.c index ae6c164..dfca2bc 100644 --- a/kernel/virtio_disk.c +++ b/kernel/virtio_disk.c @@ -212,6 +212,28 @@ alloc3_desc(int *idx) return 0; } +#ifdef LAB_LOCK +// +// check that there are at most NBUF distinct +// struct buf's, which the lock lab requires. +// +static struct buf *xbufs[NBUF]; +static void +checkbuf(struct buf *b) +{ + for(int i = 0; i < NBUF; i++){ + if(xbufs[i] == b){ + return; + } + if(xbufs[i] == 0){ + xbufs[i] = b; + return; + } + } + panic("more than NBUF bufs"); +} +#endif + void virtio_disk_rw(struct buf *b, int write) { @@ -219,6 +241,10 @@ virtio_disk_rw(struct buf *b, int write) acquire(&disk.vdisk_lock); +#ifdef LAB_LOCK + checkbuf(b); +#endif + // the spec's Section 5.2 says that legacy block operations use // three descriptors: one for type/reserved/sector, one for the // data, one for a 1-byte status result. diff --git a/kernel/vm.c b/kernel/vm.c index 5c31e87..be7d042 100644 --- a/kernel/vm.c +++ b/kernel/vm.c @@ -4,6 +4,8 @@ #include "elf.h" #include "riscv.h" #include "defs.h" +#include "spinlock.h" +#include "proc.h" #include "fs.h" /* @@ -30,6 +32,14 @@ kvmmake(void) // virtio mmio disk interface kvmmap(kpgtbl, VIRTIO0, VIRTIO0, PGSIZE, PTE_R | PTE_W); +#ifdef LAB_NET + // PCI-E ECAM (configuration space), for pci.c + kvmmap(kpgtbl, 0x30000000L, 0x30000000L, 0x10000000, PTE_R | PTE_W); + + // pci.c maps the e1000's registers here. + kvmmap(kpgtbl, 0x40000000L, 0x40000000L, 0x20000, PTE_R | PTE_W); +#endif + // PLIC kvmmap(kpgtbl, PLIC, PLIC, 0x400000, PTE_R | PTE_W); @@ -136,9 +146,8 @@ kvmmap(pagetable_t kpgtbl, uint64 va, uint64 pa, uint64 sz, int perm) } // Create PTEs for virtual addresses starting at va that refer to -// physical addresses starting at pa. -// va and size MUST be page-aligned. -// Returns 0 on success, -1 if walk() couldn't +// physical addresses starting at pa. va and size might not +// be page-aligned. Returns 0 on success, -1 if walk() couldn't // allocate a needed page-table page. int mappages(pagetable_t pagetable, uint64 va, uint64 size, uint64 pa, int perm) @@ -146,17 +155,11 @@ mappages(pagetable_t pagetable, uint64 va, uint64 size, uint64 pa, int perm) uint64 a, last; pte_t *pte; - if((va % PGSIZE) != 0) - panic("mappages: va not aligned"); - - if((size % PGSIZE) != 0) - panic("mappages: size not aligned"); - if(size == 0) panic("mappages: size"); - a = va; - last = va + size - PGSIZE; + a = PGROUNDDOWN(va); + last = PGROUNDDOWN(va + size - 1); for(;;){ if((pte = walk(pagetable, a, 1)) == 0) return -1; @@ -186,8 +189,10 @@ uvmunmap(pagetable_t pagetable, uint64 va, uint64 npages, int do_free) for(a = va; a < va + npages*PGSIZE; a += PGSIZE){ if((pte = walk(pagetable, a, 0)) == 0) panic("uvmunmap: walk"); - if((*pte & PTE_V) == 0) + if((*pte & PTE_V) == 0) { + printf("va=%p pte=%p\n", a, *pte); panic("uvmunmap: not mapped"); + } if(PTE_FLAGS(*pte) == PTE_V) panic("uvmunmap: not a leaf"); if(do_free){ @@ -315,20 +320,26 @@ uvmcopy(pagetable_t old, pagetable_t new, uint64 sz) pte_t *pte; uint64 pa, i; uint flags; - char *mem; + // char *mem; for(i = 0; i < sz; i += PGSIZE){ if((pte = walk(old, i, 0)) == 0) panic("uvmcopy: pte should exist"); if((*pte & PTE_V) == 0) panic("uvmcopy: page not present"); - pa = PTE2PA(*pte); - flags = PTE_FLAGS(*pte); + // do not do the actual copy, just increase the refcnt and mark pages readonly COW + /* if((mem = kalloc()) == 0) goto err; memmove(mem, (char*)pa, PGSIZE); - if(mappages(new, i, PGSIZE, (uint64)mem, flags) != 0){ - kfree(mem); + */ + *pte &= ~PTE_W; + *pte |= PTE_C; + pa = PTE2PA(*pte); + refcnt_inc(pa); + flags = PTE_FLAGS(*pte); + if(mappages(new, i, PGSIZE, (uint64)pa, flags) != 0){ + // kfree(mem); goto err; } } @@ -359,17 +370,24 @@ int copyout(pagetable_t pagetable, uint64 dstva, char *src, uint64 len) { uint64 n, va0, pa0; - pte_t *pte; + // pte_t *pte; while(len > 0){ va0 = PGROUNDDOWN(dstva); - if(va0 >= MAXVA) + + if(cow_handler(pagetable, va0) < 0) return -1; + + /* pte = walk(pagetable, va0, 0); if(pte == 0 || (*pte & PTE_V) == 0 || (*pte & PTE_U) == 0 || (*pte & PTE_W) == 0) return -1; pa0 = PTE2PA(*pte); + */ + pa0 = walkaddr(pagetable, va0); + if(pa0 == 0) + return -1; n = PGSIZE - (dstva - va0); if(n > len) n = len; @@ -389,7 +407,7 @@ int copyin(pagetable_t pagetable, char *dst, uint64 srcva, uint64 len) { uint64 n, va0, pa0; - + while(len > 0){ va0 = PGROUNDDOWN(srcva); pa0 = walkaddr(pagetable, va0); @@ -449,3 +467,30 @@ copyinstr(pagetable_t pagetable, char *dst, uint64 srcva, uint64 max) return -1; } } + +static void +walkprint(pagetable_t pgtbl, int level) +{ + for(int i = 0; i < 512; i++){ + pte_t pte = pgtbl[i]; + if(pte & PTE_V){ + for(int j = 0; j < level; j++){ + printf(" .."); + } + printf("%d: pte %p pa %p\n", i, pte, PTE2PA(pte)); + if((pte & (PTE_R|PTE_W|PTE_X)) == 0){ + // this PTE points to a lower-level page table. + walkprint((pagetable_t)PTE2PA(pte), level+1); + } + } + } +} + +// Print the contents of a page table +void +vmprint(pagetable_t pgtbl) +{ + printf("page table %p\n", pgtbl); + + walkprint(pgtbl, 1); +} |