diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/cow.c | 30 | ||||
| -rw-r--r-- | kernel/defs.h | 18 | ||||
| -rw-r--r-- | kernel/e1000.c | 178 | ||||
| -rw-r--r-- | kernel/e1000_dev.h | 125 | ||||
| -rw-r--r-- | kernel/exec.c | 4 | ||||
| -rw-r--r-- | kernel/file.c | 21 | ||||
| -rw-r--r-- | kernel/kalloc.c | 57 | ||||
| -rw-r--r-- | kernel/memlayout.h | 17 | ||||
| -rw-r--r-- | kernel/net.c | 374 | ||||
| -rw-r--r-- | kernel/net.h | 173 | ||||
| -rw-r--r-- | kernel/pci.c | 61 | ||||
| -rw-r--r-- | kernel/plic.c | 12 | ||||
| -rw-r--r-- | kernel/printf.c | 17 | ||||
| -rw-r--r-- | kernel/proc.c | 84 | ||||
| -rw-r--r-- | kernel/proc.h | 7 | ||||
| -rw-r--r-- | kernel/riscv.h | 19 | ||||
| -rw-r--r-- | kernel/stats.c | 3 | ||||
| -rw-r--r-- | kernel/syscall.c | 72 | ||||
| -rw-r--r-- | kernel/syscall.h | 11 | ||||
| -rw-r--r-- | kernel/sysfile.c | 26 | ||||
| -rw-r--r-- | kernel/sysinfo.c | 24 | ||||
| -rw-r--r-- | kernel/sysinfo.h | 4 | ||||
| -rw-r--r-- | kernel/sysnet.c | 185 | ||||
| -rw-r--r-- | kernel/sysproc.c | 65 | ||||
| -rw-r--r-- | kernel/trap.c | 41 | ||||
| -rw-r--r-- | kernel/vm.c | 85 | 
26 files changed, 1667 insertions, 46 deletions
| diff --git a/kernel/cow.c b/kernel/cow.c new file mode 100644 index 0000000..b3634fc --- /dev/null +++ b/kernel/cow.c @@ -0,0 +1,30 @@ +// COW pagefault handler +#include "types.h" +#include "riscv.h" +#include "defs.h" + +int +cow_handler(pagetable_t pagetable, uint64 va) +{ +  // you can't really write to rediculous pointers +  if(va >= MAXVA || PGROUNDDOWN(va) == 0) +    return -1; +  pte_t *pte = walk(pagetable, va, 0); +  if(pte == 0 || (*pte & PTE_V) == 0 || (*pte & PTE_U) == 0) +    return -1; +  if(*pte & PTE_C){ +    uint64 pa_orig = PTE2PA(*pte); +    uint64 pa_new = (uint64)kalloc(); +    if(pa_new == 0){ +      printf("cow pagefault: kalloc failed\n"); +      return -1; +    } +    // copy the page and add write permission +    memmove((void*)pa_new, (void*)pa_orig, PGSIZE); +    uint64 flags = (PTE_FLAGS(*pte) | PTE_W) & ~PTE_C; +    *pte = PA2PTE(pa_new) | flags; +    kfree((void*)pa_orig); +  } else if ((*pte & PTE_W) == 0) +    return -1; +  return 0; +} diff --git a/kernel/defs.h b/kernel/defs.h index 859fc41..541c97e 100644 --- a/kernel/defs.h +++ b/kernel/defs.h @@ -16,6 +16,7 @@ struct superblock;  struct mbuf;  struct sock;  #endif +struct sysinfo;  // bio.c  void            binit(void); @@ -30,6 +31,9 @@ void            consoleinit(void);  void            consoleintr(int);  void            consputc(int); +// cow.c +int             cow_handler(pagetable_t, uint64); +  // exec.c  int             exec(char*, char**); @@ -68,9 +72,12 @@ void            ramdiskintr(void);  void            ramdiskrw(struct buf*);  // kalloc.c +int             refcnt_inc(uint64); +int             refcnt_dec(uint64);  void*           kalloc(void);  void            kfree(void *);  void            kinit(void); +int             get_freemem(void);  // log.c  void            initlog(int, struct superblock*); @@ -88,6 +95,7 @@ int             pipewrite(struct pipe*, uint64, int);  void            printf(char*, ...);  void            panic(char*) __attribute__((noreturn));  void            printfinit(void); +void            backtrace(void);  // proc.c  int             cpuid(void); @@ -114,6 +122,8 @@ void            yield(void);  int             either_copyout(int user_dst, uint64 dst, void *src, uint64 len);  int             either_copyin(void *dst, int user_src, uint64 src, uint64 len);  void            procdump(void); +int             get_nproc(void); +int             pgaccess(uint64 base, int len, uint64 mask);  // swtch.S  void            swtch(struct context*, struct context*); @@ -125,9 +135,7 @@ void            initlock(struct spinlock*, char*);  void            release(struct spinlock*);  void            push_off(void);  void            pop_off(void); -#if defined(LAB_LOCK) || defined(LAB_NET)  int             atomic_read4(int *addr); -#endif  #ifdef LAB_LOCK  void            freelock(struct spinlock*);  #endif @@ -155,6 +163,9 @@ int             fetchstr(uint64, char*, int);  int             fetchaddr(uint64, uint64*);  void            syscall(); +// sysinfo.c +int             sys_info(uint64); +  // trap.c  extern uint     ticks;  void            trapinit(void); @@ -187,6 +198,7 @@ uint64          walkaddr(pagetable_t, uint64);  int             copyout(pagetable_t, uint64, char *, uint64);  int             copyin(pagetable_t, char *, uint64, uint64);  int             copyinstr(pagetable_t, char *, uint64, uint64); +void            vmprint(pagetable_t);  // plic.c  void            plicinit(void); @@ -210,14 +222,12 @@ int             copyin_new(pagetable_t, char *, uint64, uint64);  int             copyinstr_new(pagetable_t, char *, uint64, uint64);  #endif -#ifdef LAB_LOCK  // stats.c  void            statsinit(void);  void            statsinc(void);  // sprintf.c  int             snprintf(char*, int, char*, ...); -#endif  #ifdef KCSAN  void            kcsaninit(); diff --git a/kernel/e1000.c b/kernel/e1000.c new file mode 100644 index 0000000..c9ba9e2 --- /dev/null +++ b/kernel/e1000.c @@ -0,0 +1,178 @@ +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "spinlock.h" +#include "proc.h" +#include "defs.h" +#include "e1000_dev.h" +#include "net.h" + +#define TX_RING_SIZE 16 +static struct tx_desc tx_ring[TX_RING_SIZE] __attribute__((aligned(16))); +static struct mbuf *tx_mbufs[TX_RING_SIZE]; + +#define RX_RING_SIZE 16 +static struct rx_desc rx_ring[RX_RING_SIZE] __attribute__((aligned(16))); +static struct mbuf *rx_mbufs[RX_RING_SIZE]; + +// remember where the e1000's registers live. +static volatile uint32 *regs; + +struct spinlock e1000_lock; + +// called by pci_init(). +// xregs is the memory address at which the +// e1000's registers are mapped. +void +e1000_init(uint32 *xregs) +{ +  int i; + +  initlock(&e1000_lock, "e1000"); + +  regs = xregs; + +  // Reset the device +  regs[E1000_IMS] = 0; // disable interrupts +  regs[E1000_CTL] |= E1000_CTL_RST; +  regs[E1000_IMS] = 0; // redisable interrupts +  __sync_synchronize(); + +  // [E1000 14.5] Transmit initialization +  memset(tx_ring, 0, sizeof(tx_ring)); +  for (i = 0; i < TX_RING_SIZE; i++) { +    tx_ring[i].status = E1000_TXD_STAT_DD; +    tx_mbufs[i] = 0; +  } +  regs[E1000_TDBAL] = (uint64) tx_ring; +  if(sizeof(tx_ring) % 128 != 0) +    panic("e1000"); +  regs[E1000_TDLEN] = sizeof(tx_ring); +  regs[E1000_TDH] = regs[E1000_TDT] = 0; +   +  // [E1000 14.4] Receive initialization +  memset(rx_ring, 0, sizeof(rx_ring)); +  for (i = 0; i < RX_RING_SIZE; i++) { +    rx_mbufs[i] = mbufalloc(0); +    if (!rx_mbufs[i]) +      panic("e1000"); +    rx_ring[i].addr = (uint64) rx_mbufs[i]->head; +  } +  regs[E1000_RDBAL] = (uint64) rx_ring; +  if(sizeof(rx_ring) % 128 != 0) +    panic("e1000"); +  regs[E1000_RDH] = 0; +  regs[E1000_RDT] = RX_RING_SIZE - 1; +  regs[E1000_RDLEN] = sizeof(rx_ring); + +  // filter by qemu's MAC address, 52:54:00:12:34:56 +  regs[E1000_RA] = 0x12005452; +  regs[E1000_RA+1] = 0x5634 | (1<<31); +  // multicast table +  for (int i = 0; i < 4096/32; i++) +    regs[E1000_MTA + i] = 0; + +  // transmitter control bits. +  regs[E1000_TCTL] = E1000_TCTL_EN |  // enable +    E1000_TCTL_PSP |                  // pad short packets +    (0x10 << E1000_TCTL_CT_SHIFT) |   // collision stuff +    (0x40 << E1000_TCTL_COLD_SHIFT); +  regs[E1000_TIPG] = 10 | (8<<10) | (6<<20); // inter-pkt gap + +  // receiver control bits. +  regs[E1000_RCTL] = E1000_RCTL_EN | // enable receiver +    E1000_RCTL_BAM |                 // enable broadcast +    E1000_RCTL_SZ_2048 |             // 2048-byte rx buffers +    E1000_RCTL_SECRC;                // strip CRC +   +  // ask e1000 for receive interrupts. +  regs[E1000_RDTR] = 0; // interrupt after every received packet (no timer) +  regs[E1000_RADV] = 0; // interrupt after every packet (no timer) +  regs[E1000_IMS] = (1 << 7); // RXDW -- Receiver Descriptor Write Back +} + +int +e1000_transmit(struct mbuf *m) +{ +  // the mbuf contains an ethernet frame; program it into +  // the TX descriptor ring so that the e1000 sends it. Stash +  // a pointer so that it can be freed after sending. +  acquire(&e1000_lock); + +  int cur_idx = regs[E1000_TDT]; +   +  // check if the STAT_DD bit is set in current descriptor +  // if not set, means a previous tx in this descripter is still in flight, return an error. +  if(!(tx_ring[cur_idx].status | E1000_TXD_STAT_DD)){ +    release(&e1000_lock); +    return -1; +  } + +  // free previous mbuf and update current descriptor +  if(tx_mbufs[cur_idx]) +    mbuffree(tx_mbufs[cur_idx]); +  tx_ring[cur_idx].addr = (uint64)m->head; +  tx_ring[cur_idx].length = (uint64)m->len; +  tx_ring[cur_idx].cmd = E1000_TXD_CMD_RS | E1000_TXD_CMD_EOP; +  // also clear status bits +  tx_ring[cur_idx].status = 0; + +  // stash current mbuf to tx_mbufs (would be freed later) +  tx_mbufs[cur_idx] = m; + +  // update the ring position to point to the next descriptor; +  regs[E1000_TDT] = (cur_idx + 1) % TX_RING_SIZE; + +  release(&e1000_lock); +  return 0; +} + +static void +e1000_recv(void) +{ +  // Check for packets that have arrived from the e1000 +  // Create and deliver an mbuf for each packet (using net_rx()). +  while(1){ +    acquire(&e1000_lock); +    int cur_idx = (regs[E1000_RDT]+1) % RX_RING_SIZE; + +    // check if last rx is completed. If not, skip passing to net_rx() +    if(!(rx_ring[cur_idx].status | E1000_RXD_STAT_DD)) +      break; + +    // update the mbuf's length to the len reported by rx_desc +    // mbufput(rx_mbufs[cur_idx], rx_ring[cur_idx].length); +    rx_mbufs[cur_idx]->len = rx_ring[cur_idx].length; + +    // stash mbuf, for later net_rx() +    struct mbuf *rx_buf = rx_mbufs[cur_idx]; + +    // net_rx() would free the passed mbuf invisibly, so we need to re-alloc it +    rx_mbufs[cur_idx] = mbufalloc(0); +    if(!rx_mbufs[cur_idx]) +      panic("e1000_recv: mbufalloc"); +     +    // update buffer addr and clear status bits +    rx_ring[cur_idx].addr = (uint64)rx_mbufs[cur_idx]->head; +    rx_ring[cur_idx].status = 0; + +    // update the E1000_RDT register to point to next position +    regs[E1000_RDT] = cur_idx; +    release(&e1000_lock); +     +    // pass to the network stack, must not hold the lock coz it can lead to deadlocks under different cpus +    net_rx(rx_buf); +  } +} + +void +e1000_intr(void) +{ +  // tell the e1000 we've seen this interrupt; +  // without this the e1000 won't raise any +  // further interrupts. +  regs[E1000_ICR] = 0xffffffff; + +  e1000_recv(); +} diff --git a/kernel/e1000_dev.h b/kernel/e1000_dev.h new file mode 100644 index 0000000..9b462df --- /dev/null +++ b/kernel/e1000_dev.h @@ -0,0 +1,125 @@ +// +// E1000 hardware definitions: registers and DMA ring format. +// from the Intel 82540EP/EM &c manual. +// + +/* Registers */ +#define E1000_CTL      (0x00000/4)  /* Device Control Register - RW */ +#define E1000_ICR      (0x000C0/4)  /* Interrupt Cause Read - R */ +#define E1000_IMS      (0x000D0/4)  /* Interrupt Mask Set - RW */ +#define E1000_RCTL     (0x00100/4)  /* RX Control - RW */ +#define E1000_TCTL     (0x00400/4)  /* TX Control - RW */ +#define E1000_TIPG     (0x00410/4)  /* TX Inter-packet gap -RW */ +#define E1000_RDBAL    (0x02800/4)  /* RX Descriptor Base Address Low - RW */ +#define E1000_RDTR     (0x02820/4)  /* RX Delay Timer */ +#define E1000_RADV     (0x0282C/4)  /* RX Interrupt Absolute Delay Timer */ +#define E1000_RDH      (0x02810/4)  /* RX Descriptor Head - RW */ +#define E1000_RDT      (0x02818/4)  /* RX Descriptor Tail - RW */ +#define E1000_RDLEN    (0x02808/4)  /* RX Descriptor Length - RW */ +#define E1000_RSRPD    (0x02C00/4)  /* RX Small Packet Detect Interrupt */ +#define E1000_TDBAL    (0x03800/4)  /* TX Descriptor Base Address Low - RW */ +#define E1000_TDLEN    (0x03808/4)  /* TX Descriptor Length - RW */ +#define E1000_TDH      (0x03810/4)  /* TX Descriptor Head - RW */ +#define E1000_TDT      (0x03818/4)  /* TX Descripotr Tail - RW */ +#define E1000_MTA      (0x05200/4)  /* Multicast Table Array - RW Array */ +#define E1000_RA       (0x05400/4)  /* Receive Address - RW Array */ + +/* Device Control */ +#define E1000_CTL_SLU     0x00000040    /* set link up */ +#define E1000_CTL_FRCSPD  0x00000800    /* force speed */ +#define E1000_CTL_FRCDPLX 0x00001000    /* force duplex */ +#define E1000_CTL_RST     0x00400000    /* full reset */ + +/* Transmit Control */ +#define E1000_TCTL_RST    0x00000001    /* software reset */ +#define E1000_TCTL_EN     0x00000002    /* enable tx */ +#define E1000_TCTL_BCE    0x00000004    /* busy check enable */ +#define E1000_TCTL_PSP    0x00000008    /* pad short packets */ +#define E1000_TCTL_CT     0x00000ff0    /* collision threshold */ +#define E1000_TCTL_CT_SHIFT 4 +#define E1000_TCTL_COLD   0x003ff000    /* collision distance */ +#define E1000_TCTL_COLD_SHIFT 12 +#define E1000_TCTL_SWXOFF 0x00400000    /* SW Xoff transmission */ +#define E1000_TCTL_PBE    0x00800000    /* Packet Burst Enable */ +#define E1000_TCTL_RTLC   0x01000000    /* Re-transmit on late collision */ +#define E1000_TCTL_NRTU   0x02000000    /* No Re-transmit on underrun */ +#define E1000_TCTL_MULR   0x10000000    /* Multiple request support */ + +/* Receive Control */ +#define E1000_RCTL_RST            0x00000001    /* Software reset */ +#define E1000_RCTL_EN             0x00000002    /* enable */ +#define E1000_RCTL_SBP            0x00000004    /* store bad packet */ +#define E1000_RCTL_UPE            0x00000008    /* unicast promiscuous enable */ +#define E1000_RCTL_MPE            0x00000010    /* multicast promiscuous enab */ +#define E1000_RCTL_LPE            0x00000020    /* long packet enable */ +#define E1000_RCTL_LBM_NO         0x00000000    /* no loopback mode */ +#define E1000_RCTL_LBM_MAC        0x00000040    /* MAC loopback mode */ +#define E1000_RCTL_LBM_SLP        0x00000080    /* serial link loopback mode */ +#define E1000_RCTL_LBM_TCVR       0x000000C0    /* tcvr loopback mode */ +#define E1000_RCTL_DTYP_MASK      0x00000C00    /* Descriptor type mask */ +#define E1000_RCTL_DTYP_PS        0x00000400    /* Packet Split descriptor */ +#define E1000_RCTL_RDMTS_HALF     0x00000000    /* rx desc min threshold size */ +#define E1000_RCTL_RDMTS_QUAT     0x00000100    /* rx desc min threshold size */ +#define E1000_RCTL_RDMTS_EIGTH    0x00000200    /* rx desc min threshold size */ +#define E1000_RCTL_MO_SHIFT       12            /* multicast offset shift */ +#define E1000_RCTL_MO_0           0x00000000    /* multicast offset 11:0 */ +#define E1000_RCTL_MO_1           0x00001000    /* multicast offset 12:1 */ +#define E1000_RCTL_MO_2           0x00002000    /* multicast offset 13:2 */ +#define E1000_RCTL_MO_3           0x00003000    /* multicast offset 15:4 */ +#define E1000_RCTL_MDR            0x00004000    /* multicast desc ring 0 */ +#define E1000_RCTL_BAM            0x00008000    /* broadcast enable */ +/* these buffer sizes are valid if E1000_RCTL_BSEX is 0 */ +#define E1000_RCTL_SZ_2048        0x00000000    /* rx buffer size 2048 */ +#define E1000_RCTL_SZ_1024        0x00010000    /* rx buffer size 1024 */ +#define E1000_RCTL_SZ_512         0x00020000    /* rx buffer size 512 */ +#define E1000_RCTL_SZ_256         0x00030000    /* rx buffer size 256 */ +/* these buffer sizes are valid if E1000_RCTL_BSEX is 1 */ +#define E1000_RCTL_SZ_16384       0x00010000    /* rx buffer size 16384 */ +#define E1000_RCTL_SZ_8192        0x00020000    /* rx buffer size 8192 */ +#define E1000_RCTL_SZ_4096        0x00030000    /* rx buffer size 4096 */ +#define E1000_RCTL_VFE            0x00040000    /* vlan filter enable */ +#define E1000_RCTL_CFIEN          0x00080000    /* canonical form enable */ +#define E1000_RCTL_CFI            0x00100000    /* canonical form indicator */ +#define E1000_RCTL_DPF            0x00400000    /* discard pause frames */ +#define E1000_RCTL_PMCF           0x00800000    /* pass MAC control frames */ +#define E1000_RCTL_BSEX           0x02000000    /* Buffer size extension */ +#define E1000_RCTL_SECRC          0x04000000    /* Strip Ethernet CRC */ +#define E1000_RCTL_FLXBUF_MASK    0x78000000    /* Flexible buffer size */ +#define E1000_RCTL_FLXBUF_SHIFT   27            /* Flexible buffer shift */ + +#define DATA_MAX 1518 + +/* Transmit Descriptor command definitions [E1000 3.3.3.1] */ +#define E1000_TXD_CMD_EOP    0x01 /* End of Packet */ +#define E1000_TXD_CMD_RS     0x08 /* Report Status */ + +/* Transmit Descriptor status definitions [E1000 3.3.3.2] */ +#define E1000_TXD_STAT_DD    0x00000001 /* Descriptor Done */ + +// [E1000 3.3.3] +struct tx_desc +{ +  uint64 addr; +  uint16 length; +  uint8 cso; +  uint8 cmd; +  uint8 status; +  uint8 css; +  uint16 special; +}; + +/* Receive Descriptor bit definitions [E1000 3.2.3.1] */ +#define E1000_RXD_STAT_DD       0x01    /* Descriptor Done */ +#define E1000_RXD_STAT_EOP      0x02    /* End of Packet */ + +// [E1000 3.2.3] +struct rx_desc +{ +  uint64 addr;       /* Address of the descriptor's data buffer */ +  uint16 length;     /* Length of data DMAed into data buffer */ +  uint16 csum;       /* Packet checksum */ +  uint8 status;      /* Descriptor status */ +  uint8 errors;      /* Descriptor Errors */ +  uint16 special; +}; + diff --git a/kernel/exec.c b/kernel/exec.c index e18bbb6..35b35f5 100644 --- a/kernel/exec.c +++ b/kernel/exec.c @@ -128,6 +128,10 @@ exec(char *path, char **argv)    p->trapframe->sp = sp; // initial stack pointer    proc_freepagetable(oldpagetable, oldsz); +  if(p->pid == 1){ +    vmprint(p->pagetable); +  } +    return argc; // this ends up in a0, the first argument to main(argc, argv)   bad: diff --git a/kernel/file.c b/kernel/file.c index 25fa226..0fba21b 100644 --- a/kernel/file.c +++ b/kernel/file.c @@ -80,6 +80,11 @@ fileclose(struct file *f)      iput(ff.ip);      end_op();    } +#ifdef LAB_NET +  else if(ff.type == FD_SOCK){ +    sockclose(ff.sock); +  } +#endif  }  // Get metadata about file f. @@ -122,7 +127,13 @@ fileread(struct file *f, uint64 addr, int n)      if((r = readi(f->ip, 1, addr, f->off, n)) > 0)        f->off += r;      iunlock(f->ip); -  } else { +  } +#ifdef LAB_NET +  else if(f->type == FD_SOCK){ +    r = sockread(f->sock, addr, n); +  } +#endif +  else {      panic("fileread");    } @@ -173,7 +184,13 @@ filewrite(struct file *f, uint64 addr, int n)        i += r;      }      ret = (i == n ? n : -1); -  } else { +  } +#ifdef LAB_NET +  else if(f->type == FD_SOCK){ +    ret = sockwrite(f->sock, addr, n); +  } +#endif +  else {      panic("filewrite");    } diff --git a/kernel/kalloc.c b/kernel/kalloc.c index 0699e7e..581f0f6 100644 --- a/kernel/kalloc.c +++ b/kernel/kalloc.c @@ -23,10 +23,41 @@ struct {    struct run *freelist;  } kmem; +int phypg_refcnt[PHYSTOP/PGSIZE]; + +// Increase the refcnt +int +refcnt_inc(uint64 pa) +{ +  acquire(&kmem.lock); +  int *prefcnt = &phypg_refcnt[pa/PGSIZE]; +  if(pa > PHYSTOP || *prefcnt < 1) +    panic("increase refcnt"); +  (*prefcnt)++; +  release(&kmem.lock); +  return *prefcnt; +} + +// Decrease the refcnt +int +refcnt_dec(uint64 pa) +{ +  acquire(&kmem.lock); +  int *prefcnt = &phypg_refcnt[pa/PGSIZE]; +  if(pa > PHYSTOP || *prefcnt < 1) +    panic("decrease refcnt"); +  (*prefcnt)--; +  release(&kmem.lock); +  return *prefcnt; +} +  void  kinit()  {    initlock(&kmem.lock, "kmem"); +  // init all refcnt to 1, which would later be freed to 0 by kfree() +  for(uint64 p = PGROUNDUP((uint64)end); p + PGSIZE <= PHYSTOP; p += PGSIZE) +    phypg_refcnt[p/PGSIZE] = 1;    freerange(end, (void*)PHYSTOP);  } @@ -51,6 +82,12 @@ kfree(void *pa)    if(((uint64)pa % PGSIZE) != 0 || (char*)pa < end || (uint64)pa >= PHYSTOP)      panic("kfree"); +  refcnt_dec((uint64)pa); + +  if(phypg_refcnt[(uint64)pa/PGSIZE] > 0) +    // We still have refs to this phy page, do not actually free it +    return; +    // Fill with junk to catch dangling refs.    memset(pa, 1, PGSIZE); @@ -72,11 +109,29 @@ kalloc(void)    acquire(&kmem.lock);    r = kmem.freelist; -  if(r) +  if(r){ +    if(phypg_refcnt[(uint64)r/PGSIZE]) +      panic("kalloc: invalid refcnt"); +    phypg_refcnt[(uint64)r/PGSIZE] = 1;      kmem.freelist = r->next; +  }    release(&kmem.lock);    if(r)      memset((char*)r, 5, PGSIZE); // fill with junk    return (void*)r;  } + +int +get_freemem(void) +{ +  int n; +  struct run *r; + +  acquire(&kmem.lock); +  for (n = 0, r = kmem.freelist; r; r = r->next) +    n++; +  release(&kmem.lock); + +  return n * PGSIZE; +} diff --git a/kernel/memlayout.h b/kernel/memlayout.h index cac3cb1..74d2fd4 100644 --- a/kernel/memlayout.h +++ b/kernel/memlayout.h @@ -25,6 +25,10 @@  #define VIRTIO0 0x10001000  #define VIRTIO0_IRQ 1 +#ifdef LAB_NET +#define E1000_IRQ 33 +#endif +  // core local interruptor (CLINT), which contains the timer.  #define CLINT 0x2000000L  #define CLINT_MTIMECMP(hartid) (CLINT + 0x4000 + 8*(hartid)) @@ -34,8 +38,11 @@  #define PLIC 0x0c000000L  #define PLIC_PRIORITY (PLIC + 0x0)  #define PLIC_PENDING (PLIC + 0x1000) +#define PLIC_MENABLE(hart) (PLIC + 0x2000 + (hart)*0x100)  #define PLIC_SENABLE(hart) (PLIC + 0x2080 + (hart)*0x100) +#define PLIC_MPRIORITY(hart) (PLIC + 0x200000 + (hart)*0x2000)  #define PLIC_SPRIORITY(hart) (PLIC + 0x201000 + (hart)*0x2000) +#define PLIC_MCLAIM(hart) (PLIC + 0x200004 + (hart)*0x2000)  #define PLIC_SCLAIM(hart) (PLIC + 0x201004 + (hart)*0x2000)  // the kernel expects there to be RAM @@ -50,7 +57,7 @@  // map kernel stacks beneath the trampoline,  // each surrounded by invalid guard pages. -#define KSTACK(p) (TRAMPOLINE - ((p)+1)* 2*PGSIZE) +#define KSTACK(p) (TRAMPOLINE - (p)*2*PGSIZE - 3*PGSIZE)  // User memory layout.  // Address zero first: @@ -59,6 +66,14 @@  //   fixed-size stack  //   expandable heap  //   ... +//   USYSCALL (shared with kernel)  //   TRAPFRAME (p->trapframe, used by the trampoline)  //   TRAMPOLINE (the same page as in the kernel)  #define TRAPFRAME (TRAMPOLINE - PGSIZE) +#ifdef LAB_PGTBL +#define USYSCALL (TRAPFRAME - PGSIZE) + +struct usyscall { +  int pid;  // Process ID +}; +#endif diff --git a/kernel/net.c b/kernel/net.c new file mode 100644 index 0000000..137ea2b --- /dev/null +++ b/kernel/net.c @@ -0,0 +1,374 @@ +// +// networking protocol support (IP, UDP, ARP, etc.). +// + +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "spinlock.h" +#include "proc.h" +#include "net.h" +#include "defs.h" + +static uint32 local_ip = MAKE_IP_ADDR(10, 0, 2, 15); // qemu's idea of the guest IP +static uint8 local_mac[ETHADDR_LEN] = { 0x52, 0x54, 0x00, 0x12, 0x34, 0x56 }; +static uint8 broadcast_mac[ETHADDR_LEN] = { 0xFF, 0XFF, 0XFF, 0XFF, 0XFF, 0XFF }; + +// Strips data from the start of the buffer and returns a pointer to it. +// Returns 0 if less than the full requested length is available. +char * +mbufpull(struct mbuf *m, unsigned int len) +{ +  char *tmp = m->head; +  if (m->len < len) +    return 0; +  m->len -= len; +  m->head += len; +  return tmp; +} + +// Prepends data to the beginning of the buffer and returns a pointer to it. +char * +mbufpush(struct mbuf *m, unsigned int len) +{ +  m->head -= len; +  if (m->head < m->buf) +    panic("mbufpush"); +  m->len += len; +  return m->head; +} + +// Appends data to the end of the buffer and returns a pointer to it. +char * +mbufput(struct mbuf *m, unsigned int len) +{ +  char *tmp = m->head + m->len; +  m->len += len; +  if (m->len > MBUF_SIZE) +    panic("mbufput"); +  return tmp; +} + +// Strips data from the end of the buffer and returns a pointer to it. +// Returns 0 if less than the full requested length is available. +char * +mbuftrim(struct mbuf *m, unsigned int len) +{ +  if (len > m->len) +    return 0; +  m->len -= len; +  return m->head + m->len; +} + +// Allocates a packet buffer. +struct mbuf * +mbufalloc(unsigned int headroom) +{ +  struct mbuf *m; +  +  if (headroom > MBUF_SIZE) +    return 0; +  m = kalloc(); +  if (m == 0) +    return 0; +  m->next = 0; +  m->head = (char *)m->buf + headroom; +  m->len = 0; +  memset(m->buf, 0, sizeof(m->buf)); +  return m; +} + +// Frees a packet buffer. +void +mbuffree(struct mbuf *m) +{ +  kfree(m); +} + +// Pushes an mbuf to the end of the queue. +void +mbufq_pushtail(struct mbufq *q, struct mbuf *m) +{ +  m->next = 0; +  if (!q->head){ +    q->head = q->tail = m; +    return; +  } +  q->tail->next = m; +  q->tail = m; +} + +// Pops an mbuf from the start of the queue. +struct mbuf * +mbufq_pophead(struct mbufq *q) +{ +  struct mbuf *head = q->head; +  if (!head) +    return 0; +  q->head = head->next; +  return head; +} + +// Returns one (nonzero) if the queue is empty. +int +mbufq_empty(struct mbufq *q) +{ +  return q->head == 0; +} + +// Intializes a queue of mbufs. +void +mbufq_init(struct mbufq *q) +{ +  q->head = 0; +} + +// This code is lifted from FreeBSD's ping.c, and is copyright by the Regents +// of the University of California. +static unsigned short +in_cksum(const unsigned char *addr, int len) +{ +  int nleft = len; +  const unsigned short *w = (const unsigned short *)addr; +  unsigned int sum = 0; +  unsigned short answer = 0; + +  /* +   * Our algorithm is simple, using a 32 bit accumulator (sum), we add +   * sequential 16 bit words to it, and at the end, fold back all the +   * carry bits from the top 16 bits into the lower 16 bits. +   */ +  while (nleft > 1)  { +    sum += *w++; +    nleft -= 2; +  } + +  /* mop up an odd byte, if necessary */ +  if (nleft == 1) { +    *(unsigned char *)(&answer) = *(const unsigned char *)w; +    sum += answer; +  } + +  /* add back carry outs from top 16 bits to low 16 bits */ +  sum = (sum & 0xffff) + (sum >> 16); +  sum += (sum >> 16); +  /* guaranteed now that the lower 16 bits of sum are correct */ + +  answer = ~sum; /* truncate to 16 bits */ +  return answer; +} + +// sends an ethernet packet +static void +net_tx_eth(struct mbuf *m, uint16 ethtype) +{ +  struct eth *ethhdr; + +  ethhdr = mbufpushhdr(m, *ethhdr); +  memmove(ethhdr->shost, local_mac, ETHADDR_LEN); +  // In a real networking stack, dhost would be set to the address discovered +  // through ARP. Because we don't support enough of the ARP protocol, set it +  // to broadcast instead. +  memmove(ethhdr->dhost, broadcast_mac, ETHADDR_LEN); +  ethhdr->type = htons(ethtype); +  if (e1000_transmit(m)) { +    mbuffree(m); +  } +} + +// sends an IP packet +static void +net_tx_ip(struct mbuf *m, uint8 proto, uint32 dip) +{ +  struct ip *iphdr; + +  // push the IP header +  iphdr = mbufpushhdr(m, *iphdr); +  memset(iphdr, 0, sizeof(*iphdr)); +  iphdr->ip_vhl = (4 << 4) | (20 >> 2); +  iphdr->ip_p = proto; +  iphdr->ip_src = htonl(local_ip); +  iphdr->ip_dst = htonl(dip); +  iphdr->ip_len = htons(m->len); +  iphdr->ip_ttl = 100; +  iphdr->ip_sum = in_cksum((unsigned char *)iphdr, sizeof(*iphdr)); + +  // now on to the ethernet layer +  net_tx_eth(m, ETHTYPE_IP); +} + +// sends a UDP packet +void +net_tx_udp(struct mbuf *m, uint32 dip, +           uint16 sport, uint16 dport) +{ +  struct udp *udphdr; + +  // put the UDP header +  udphdr = mbufpushhdr(m, *udphdr); +  udphdr->sport = htons(sport); +  udphdr->dport = htons(dport); +  udphdr->ulen = htons(m->len); +  udphdr->sum = 0; // zero means no checksum is provided + +  // now on to the IP layer +  net_tx_ip(m, IPPROTO_UDP, dip); +} + +// sends an ARP packet +static int +net_tx_arp(uint16 op, uint8 dmac[ETHADDR_LEN], uint32 dip) +{ +  struct mbuf *m; +  struct arp *arphdr; + +  m = mbufalloc(MBUF_DEFAULT_HEADROOM); +  if (!m) +    return -1; + +  // generic part of ARP header +  arphdr = mbufputhdr(m, *arphdr); +  arphdr->hrd = htons(ARP_HRD_ETHER); +  arphdr->pro = htons(ETHTYPE_IP); +  arphdr->hln = ETHADDR_LEN; +  arphdr->pln = sizeof(uint32); +  arphdr->op = htons(op); + +  // ethernet + IP part of ARP header +  memmove(arphdr->sha, local_mac, ETHADDR_LEN); +  arphdr->sip = htonl(local_ip); +  memmove(arphdr->tha, dmac, ETHADDR_LEN); +  arphdr->tip = htonl(dip); + +  // header is ready, send the packet +  net_tx_eth(m, ETHTYPE_ARP); +  return 0; +} + +// receives an ARP packet +static void +net_rx_arp(struct mbuf *m) +{ +  struct arp *arphdr; +  uint8 smac[ETHADDR_LEN]; +  uint32 sip, tip; + +  arphdr = mbufpullhdr(m, *arphdr); +  if (!arphdr) +    goto done; + +  // validate the ARP header +  if (ntohs(arphdr->hrd) != ARP_HRD_ETHER || +      ntohs(arphdr->pro) != ETHTYPE_IP || +      arphdr->hln != ETHADDR_LEN || +      arphdr->pln != sizeof(uint32)) { +    goto done; +  } + +  // only requests are supported so far +  // check if our IP was solicited +  tip = ntohl(arphdr->tip); // target IP address +  if (ntohs(arphdr->op) != ARP_OP_REQUEST || tip != local_ip) +    goto done; + +  // handle the ARP request +  memmove(smac, arphdr->sha, ETHADDR_LEN); // sender's ethernet address +  sip = ntohl(arphdr->sip); // sender's IP address (qemu's slirp) +  net_tx_arp(ARP_OP_REPLY, smac, sip); + +done: +  mbuffree(m); +} + +// receives a UDP packet +static void +net_rx_udp(struct mbuf *m, uint16 len, struct ip *iphdr) +{ +  struct udp *udphdr; +  uint32 sip; +  uint16 sport, dport; + + +  udphdr = mbufpullhdr(m, *udphdr); +  if (!udphdr) +    goto fail; + +  // TODO: validate UDP checksum + +  // validate lengths reported in headers +  if (ntohs(udphdr->ulen) != len) +    goto fail; +  len -= sizeof(*udphdr); +  if (len > m->len) +    goto fail; +  // minimum packet size could be larger than the payload +  mbuftrim(m, m->len - len); + +  // parse the necessary fields +  sip = ntohl(iphdr->ip_src); +  sport = ntohs(udphdr->sport); +  dport = ntohs(udphdr->dport); +  sockrecvudp(m, sip, dport, sport); +  return; + +fail: +  mbuffree(m); +} + +// receives an IP packet +static void +net_rx_ip(struct mbuf *m) +{ +  struct ip *iphdr; +  uint16 len; + +  iphdr = mbufpullhdr(m, *iphdr); +  if (!iphdr) +	  goto fail; + +  // check IP version and header len +  if (iphdr->ip_vhl != ((4 << 4) | (20 >> 2))) +    goto fail; +  // validate IP checksum +  if (in_cksum((unsigned char *)iphdr, sizeof(*iphdr))) +    goto fail; +  // can't support fragmented IP packets +  if (htons(iphdr->ip_off) != 0) +    goto fail; +  // is the packet addressed to us? +  if (htonl(iphdr->ip_dst) != local_ip) +    goto fail; +  // can only support UDP +  if (iphdr->ip_p != IPPROTO_UDP) +    goto fail; + +  len = ntohs(iphdr->ip_len) - sizeof(*iphdr); +  net_rx_udp(m, len, iphdr); +  return; + +fail: +  mbuffree(m); +} + +// called by e1000 driver's interrupt handler to deliver a packet to the +// networking stack +void net_rx(struct mbuf *m) +{ +  struct eth *ethhdr; +  uint16 type; + +  ethhdr = mbufpullhdr(m, *ethhdr); +  if (!ethhdr) { +    mbuffree(m); +    return; +  } + +  type = ntohs(ethhdr->type); +  if (type == ETHTYPE_IP) +    net_rx_ip(m); +  else if (type == ETHTYPE_ARP) +    net_rx_arp(m); +  else +    mbuffree(m); +} diff --git a/kernel/net.h b/kernel/net.h new file mode 100644 index 0000000..9e6fefe --- /dev/null +++ b/kernel/net.h @@ -0,0 +1,173 @@ +// +// packet buffer management +// + +#define MBUF_SIZE              2048 +#define MBUF_DEFAULT_HEADROOM  128 + +struct mbuf { +  struct mbuf  *next; // the next mbuf in the chain +  char         *head; // the current start position of the buffer +  unsigned int len;   // the length of the buffer +  char         buf[MBUF_SIZE]; // the backing store +}; + +char *mbufpull(struct mbuf *m, unsigned int len); +char *mbufpush(struct mbuf *m, unsigned int len); +char *mbufput(struct mbuf *m, unsigned int len); +char *mbuftrim(struct mbuf *m, unsigned int len); + +// The above functions manipulate the size and position of the buffer: +//            <- push            <- trim +//             -> pull            -> put +// [-headroom-][------buffer------][-tailroom-] +// |----------------MBUF_SIZE-----------------| +// +// These marcos automatically typecast and determine the size of header structs. +// In most situations you should use these instead of the raw ops above. +#define mbufpullhdr(mbuf, hdr) (typeof(hdr)*)mbufpull(mbuf, sizeof(hdr)) +#define mbufpushhdr(mbuf, hdr) (typeof(hdr)*)mbufpush(mbuf, sizeof(hdr)) +#define mbufputhdr(mbuf, hdr) (typeof(hdr)*)mbufput(mbuf, sizeof(hdr)) +#define mbuftrimhdr(mbuf, hdr) (typeof(hdr)*)mbuftrim(mbuf, sizeof(hdr)) + +struct mbuf *mbufalloc(unsigned int headroom); +void mbuffree(struct mbuf *m); + +struct mbufq { +  struct mbuf *head;  // the first element in the queue +  struct mbuf *tail;  // the last element in the queue +}; + +void mbufq_pushtail(struct mbufq *q, struct mbuf *m); +struct mbuf *mbufq_pophead(struct mbufq *q); +int mbufq_empty(struct mbufq *q); +void mbufq_init(struct mbufq *q); + + +// +// endianness support +// + +static inline uint16 bswaps(uint16 val) +{ +  return (((val & 0x00ffU) << 8) | +          ((val & 0xff00U) >> 8)); +} + +static inline uint32 bswapl(uint32 val) +{ +  return (((val & 0x000000ffUL) << 24) | +          ((val & 0x0000ff00UL) << 8) | +          ((val & 0x00ff0000UL) >> 8) | +          ((val & 0xff000000UL) >> 24)); +} + +// Use these macros to convert network bytes to the native byte order. +// Note that Risc-V uses little endian while network order is big endian. +#define ntohs bswaps +#define ntohl bswapl +#define htons bswaps +#define htonl bswapl + + +// +// useful networking headers +// + +#define ETHADDR_LEN 6 + +// an Ethernet packet header (start of the packet). +struct eth { +  uint8  dhost[ETHADDR_LEN]; +  uint8  shost[ETHADDR_LEN]; +  uint16 type; +} __attribute__((packed)); + +#define ETHTYPE_IP  0x0800 // Internet protocol +#define ETHTYPE_ARP 0x0806 // Address resolution protocol + +// an IP packet header (comes after an Ethernet header). +struct ip { +  uint8  ip_vhl; // version << 4 | header length >> 2 +  uint8  ip_tos; // type of service +  uint16 ip_len; // total length +  uint16 ip_id;  // identification +  uint16 ip_off; // fragment offset field +  uint8  ip_ttl; // time to live +  uint8  ip_p;   // protocol +  uint16 ip_sum; // checksum +  uint32 ip_src, ip_dst; +}; + +#define IPPROTO_ICMP 1  // Control message protocol +#define IPPROTO_TCP  6  // Transmission control protocol +#define IPPROTO_UDP  17 // User datagram protocol + +#define MAKE_IP_ADDR(a, b, c, d)           \ +  (((uint32)a << 24) | ((uint32)b << 16) | \ +   ((uint32)c << 8) | (uint32)d) + +// a UDP packet header (comes after an IP header). +struct udp { +  uint16 sport; // source port +  uint16 dport; // destination port +  uint16 ulen;  // length, including udp header, not including IP header +  uint16 sum;   // checksum +}; + +// an ARP packet (comes after an Ethernet header). +struct arp { +  uint16 hrd; // format of hardware address +  uint16 pro; // format of protocol address +  uint8  hln; // length of hardware address +  uint8  pln; // length of protocol address +  uint16 op;  // operation + +  char   sha[ETHADDR_LEN]; // sender hardware address +  uint32 sip;              // sender IP address +  char   tha[ETHADDR_LEN]; // target hardware address +  uint32 tip;              // target IP address +} __attribute__((packed)); + +#define ARP_HRD_ETHER 1 // Ethernet + +enum { +  ARP_OP_REQUEST = 1, // requests hw addr given protocol addr +  ARP_OP_REPLY = 2,   // replies a hw addr given protocol addr +}; + +// an DNS packet (comes after an UDP header). +struct dns { +  uint16 id;  // request ID + +  uint8 rd: 1;  // recursion desired +  uint8 tc: 1;  // truncated +  uint8 aa: 1;  // authoritive +  uint8 opcode: 4;  +  uint8 qr: 1;  // query/response +  uint8 rcode: 4; // response code +  uint8 cd: 1;  // checking disabled +  uint8 ad: 1;  // authenticated data +  uint8 z:  1;   +  uint8 ra: 1;  // recursion available +   +  uint16 qdcount; // number of question entries +  uint16 ancount; // number of resource records in answer section +  uint16 nscount; // number of NS resource records in authority section +  uint16 arcount; // number of resource records in additional records +} __attribute__((packed)); + +struct dns_question { +  uint16 qtype; +  uint16 qclass; +} __attribute__((packed)); +   +#define ARECORD (0x0001) +#define QCLASS  (0x0001) + +struct dns_data { +  uint16 type; +  uint16 class; +  uint32 ttl; +  uint16 len; +} __attribute__((packed)); diff --git a/kernel/pci.c b/kernel/pci.c new file mode 100644 index 0000000..3e361c5 --- /dev/null +++ b/kernel/pci.c @@ -0,0 +1,61 @@ +// +// simple PCI-Express initialization, only +// works for qemu and its e1000 card. +// + +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "spinlock.h" +#include "proc.h" +#include "defs.h" + +void +pci_init() +{ +  // we'll place the e1000 registers at this address. +  // vm.c maps this range. +  uint64 e1000_regs = 0x40000000L; + +  // qemu -machine virt puts PCIe config space here. +  // vm.c maps this range. +  uint32  *ecam = (uint32 *) 0x30000000L; +   +  // look at each possible PCI device on bus 0. +  for(int dev = 0; dev < 32; dev++){ +    int bus = 0; +    int func = 0; +    int offset = 0; +    uint32 off = (bus << 16) | (dev << 11) | (func << 8) | (offset); +    volatile uint32 *base = ecam + off; +    uint32 id = base[0]; +     +    // 100e:8086 is an e1000 +    if(id == 0x100e8086){ +      // command and status register. +      // bit 0 : I/O access enable +      // bit 1 : memory access enable +      // bit 2 : enable mastering +      base[1] = 0b111; +      __sync_synchronize(); + +      for(int i = 0; i < 6; i++){ +        uint32 old = base[4+i]; + +        // writing all 1's to the BAR causes it to be +        // replaced with its size. +        base[4+i] = 0xffffffff; +        __sync_synchronize(); + +        base[4+i] = old; +      } + +      // tell the e1000 to reveal its registers at +      // physical address 0x40000000. +      base[4+0] = e1000_regs; + +      e1000_init((uint32*)e1000_regs); +    } +  } +} diff --git a/kernel/plic.c b/kernel/plic.c index 4175db9..5c9d96a 100644 --- a/kernel/plic.c +++ b/kernel/plic.c @@ -14,6 +14,13 @@ plicinit(void)    // set desired IRQ priorities non-zero (otherwise disabled).    *(uint32*)(PLIC + UART0_IRQ*4) = 1;    *(uint32*)(PLIC + VIRTIO0_IRQ*4) = 1; +   +#ifdef LAB_NET +  // PCIE IRQs are 32 to 35 +  for(int irq = 1; irq < 0x35; irq++){ +    *(uint32*)(PLIC + irq*4) = 1; +  } +#endif    }  void @@ -25,6 +32,11 @@ plicinithart(void)    // for the uart and virtio disk.    *(uint32*)PLIC_SENABLE(hart) = (1 << UART0_IRQ) | (1 << VIRTIO0_IRQ); +#ifdef LAB_NET +  // hack to get at next 32 IRQs for e1000 +  *(uint32*)(PLIC_SENABLE(hart)+4) = 0xffffffff; +#endif +      // set this hart's S-mode priority threshold to 0.    *(uint32*)PLIC_SPRIORITY(hart) = 0;  } diff --git a/kernel/printf.c b/kernel/printf.c index 1a50203..509c1c5 100644 --- a/kernel/printf.c +++ b/kernel/printf.c @@ -122,6 +122,8 @@ panic(char *s)    printf("panic: ");    printf(s);    printf("\n"); +  backtrace(); +    panicked = 1; // freeze uart output from other CPUs    for(;;)      ; @@ -133,3 +135,18 @@ printfinit(void)    initlock(&pr.lock, "pr");    pr.locking = 1;  } + +void +backtrace(void) +{ +  uint64 fp = r_fp(); +  printf("backtrace:\n"); +  uint64 stackpg = PGROUNDDOWN(fp); +  // Whereever fp points to should always live in the stack page +  while(PGROUNDDOWN(fp) == stackpg){ +    // print the return addr (stored in fp-8) +    printf("%p\n", *(uint64 *)(fp-8)); +    // load previous (upper stack) fp +    fp = *(uint64 *)(fp-16); +  } +} diff --git a/kernel/proc.c b/kernel/proc.c index 58a8a0b..9a9bae9 100644 --- a/kernel/proc.c +++ b/kernel/proc.c @@ -39,6 +39,7 @@ proc_mapstacks(pagetable_t kpgtbl)      if(pa == 0)        panic("kalloc");      uint64 va = KSTACK((int) (p - proc)); +  p->alarm_tickspassed = 0;      kvmmap(kpgtbl, va, (uint64)pa, PGSIZE, PTE_R | PTE_W);    }  } @@ -132,6 +133,25 @@ found:      return 0;    } +  // Allocate a usyscall page and fill pid. +  if((p->usyscall = (struct usyscall *)kalloc()) == 0){ +    freeproc(p); +    release(&p->lock); +    return 0; +  } +  p->usyscall->pid = p->pid; + +  // reset sigalarm props  +  p->alarm_interval = 0; +  p->alarm_handler = 0; +  p->alarm_tickspassed = 0; +  p->alarm_caninvoke = 1; +  if((p->atpfm = (struct trapframe *)kalloc()) == 0){ +    freeproc(p); +    release(&p->lock); +    return 0; +  } +    // An empty user page table.    p->pagetable = proc_pagetable(p);    if(p->pagetable == 0){ @@ -158,8 +178,18 @@ freeproc(struct proc *p)    if(p->trapframe)      kfree((void*)p->trapframe);    p->trapframe = 0; +  if(p->usyscall) +    kfree((void*)p->usyscall); +  p->usyscall = 0;    if(p->pagetable)      proc_freepagetable(p->pagetable, p->sz); +  if(p->atpfm) +    kfree((void*)p->atpfm); +  p->atpfm = 0; +  p->alarm_interval = 0; +  p->alarm_handler = 0; +  p->alarm_tickspassed = 0; +  p->alarm_caninvoke = 1;    p->pagetable = 0;    p->sz = 0;    p->pid = 0; @@ -172,7 +202,7 @@ freeproc(struct proc *p)  }  // Create a user page table for a given process, with no user memory, -// but with trampoline and trapframe pages. +// but with trampoline, trapframe and usyscall pages.  pagetable_t  proc_pagetable(struct proc *p)  { @@ -202,6 +232,14 @@ proc_pagetable(struct proc *p)      return 0;    } +  // map the usyscall page below the trapframe page, for +  // ugetpid(). +  if(mappages(pagetable, USYSCALL, PGSIZE, +              (uint64)(p->usyscall), PTE_R | PTE_U) < 0){ +    uvmunmap(pagetable, USYSCALL, 1, 0); +    uvmfree(pagetable, 0); +    return 0; +  }    return pagetable;  } @@ -212,6 +250,7 @@ proc_freepagetable(pagetable_t pagetable, uint64 sz)  {    uvmunmap(pagetable, TRAMPOLINE, 1, 0);    uvmunmap(pagetable, TRAPFRAME, 1, 0); +  uvmunmap(pagetable, USYSCALL, 1, 0);    uvmfree(pagetable, sz);  } @@ -299,6 +338,9 @@ fork(void)    // copy saved user registers.    *(np->trapframe) = *(p->trapframe); +  // inherit trace_mask +  np->trace_mask = p->trace_mask; +    // Cause fork to return 0 in the child.    np->trapframe->a0 = 0; @@ -686,3 +728,43 @@ procdump(void)      printf("\n");    }  } + +int +get_nproc(void) +{ +  int n = 0; +  struct proc *p; + +  for(int i = 0; i < NPROC; i++) { +    p = &proc[i]; +    acquire(&p->lock); +    if(p->state != UNUSED) +      n++; +    release(&p->lock); +  } + +  return n; +} + +// lab pagetable: report which pages have been accessed (r/w) +// according to PTE_A and store it in a bit mask (3rd param) +int +pgaccess(uint64 base, int len, uint64 mask_addr) +{ +  struct proc *p = myproc(); +  pagetable_t pgtbl = p->pagetable; +  pte_t *pte; +  int mask = 0; +   +  // iterater thru pages +  for(int i = 0; i < len; i++) { +    pte = walk(pgtbl, base + i * PGSIZE, 0); +    if(*pte & PTE_A) { +      *pte &= (~PTE_A); // clear PTE_A to avoid setting it forever +      mask |= (1L << i); +    } +  } + +  // now copyout the mask to user memory +  return copyout(pgtbl, mask_addr, (char *)&mask, sizeof(mask)); +} diff --git a/kernel/proc.h b/kernel/proc.h index d021857..a195b02 100644 --- a/kernel/proc.h +++ b/kernel/proc.h @@ -91,6 +91,7 @@ struct proc {    int killed;                  // If non-zero, have been killed    int xstate;                  // Exit status to be returned to parent's wait    int pid;                     // Process ID +  int trace_mask;              // SYS_trace mask (1 << SYS_xxx)    // wait_lock must be held when using this:    struct proc *parent;         // Parent process @@ -100,8 +101,14 @@ struct proc {    uint64 sz;                   // Size of process memory (bytes)    pagetable_t pagetable;       // User page table    struct trapframe *trapframe; // data page for trampoline.S +  struct usyscall *usyscall;   // data page for usyscall    struct context context;      // swtch() here to run process    struct file *ofile[NOFILE];  // Open files    struct inode *cwd;           // Current directory    char name[16];               // Process name (debugging) +  int alarm_interval;          // sigalarm syscall interval +  uint64 alarm_handler;        // sigalarm syscall handler +  int alarm_tickspassed;       // record how many ticks passed since last sigalarm handler call +  int alarm_caninvoke;         // prevent re-entrant calls to handler +  struct trapframe *atpfm;     // trapframe to resume after handling, must hold p->lock  }; diff --git a/kernel/riscv.h b/kernel/riscv.h index adc3e38..af18972 100644 --- a/kernel/riscv.h +++ b/kernel/riscv.h @@ -295,14 +295,6 @@ r_sp()    return x;  } -static inline uint64 -r_fp() -{ -  uint64 x; -  asm volatile("mv %0, s0" : "=r" (x) ); -  return x; -} -  // read and write tp, the thread pointer, which xv6 uses to hold  // this core's hartid (core number), the index into cpus[].  static inline uint64 @@ -335,6 +327,15 @@ sfence_vma()    asm volatile("sfence.vma zero, zero");  } +// read the frame pointer of currently executing func +static inline uint64 +r_fp() +{ +  uint64 x; +  asm volatile("mv %0, s0" : "=r" (x) ); +  return x; +} +  typedef uint64 pte_t;  typedef uint64 *pagetable_t; // 512 PTEs @@ -351,6 +352,8 @@ typedef uint64 *pagetable_t; // 512 PTEs  #define PTE_W (1L << 2)  #define PTE_X (1L << 3)  #define PTE_U (1L << 4) // user can access +#define PTE_A (1L << 6) // riscv access bit                       +#define PTE_C (1L << 8) // RSW low bit, use it to mark whether a page is COW diff --git a/kernel/stats.c b/kernel/stats.c index 9659bb9..b7a8e5f 100644 --- a/kernel/stats.c +++ b/kernel/stats.c @@ -34,9 +34,6 @@ statsread(int user_dst, uint64 dst, int n)    acquire(&stats.lock);    if(stats.sz == 0) { -#ifdef LAB_PGTBL -    stats.sz = statscopyin(stats.buf, BUFSZ); -#endif  #ifdef LAB_LOCK      stats.sz = statslock(stats.buf, BUFSZ);  #endif diff --git a/kernel/syscall.c b/kernel/syscall.c index ed65409..172c5ea 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -101,6 +101,24 @@ extern uint64 sys_unlink(void);  extern uint64 sys_link(void);  extern uint64 sys_mkdir(void);  extern uint64 sys_close(void); +extern uint64 sys_trace(void); +extern uint64 sys_sysinfo(void); + +#ifdef LAB_NET +extern uint64 sys_connect(void); +#endif +#ifdef LAB_PGTBL +extern uint64 sys_pgaccess(void); +#endif +extern uint64 sys_sigalarm(void); +extern uint64 sys_sigreturn(void); + +#ifdef LAB_NET +extern uint64 sys_connect(void); +#endif +#ifdef LAB_PGTBL +extern uint64 sys_pgaccess(void); +#endif  // An array mapping syscall numbers from syscall.h  // to the function that handles the system call. @@ -126,8 +144,54 @@ static uint64 (*syscalls[])(void) = {  [SYS_link]    sys_link,  [SYS_mkdir]   sys_mkdir,  [SYS_close]   sys_close, +#ifdef LAB_NET +[SYS_connect] sys_connect, +#endif +#ifdef LAB_PGTBL +[SYS_pgaccess] sys_pgaccess, +#endif +[SYS_trace]   sys_trace, +[SYS_sysinfo] sys_sysinfo, +[SYS_sigalarm] sys_sigalarm, +[SYS_sigreturn] sys_sigreturn, +}; + +// syscall name maps for SYS_trace: +static char *syscall_names[] = { +[SYS_fork]    "fork", +[SYS_exit]    "exit", +[SYS_wait]    "wait", +[SYS_pipe]    "pipe", +[SYS_read]    "read", +[SYS_kill]    "kill", +[SYS_exec]    "exec", +[SYS_fstat]   "fstat", +[SYS_chdir]   "chdir", +[SYS_dup]     "dup", +[SYS_getpid]  "getpid", +[SYS_sbrk]    "sbrk", +[SYS_sleep]   "sleep", +[SYS_uptime]  "uptime", +[SYS_open]    "open", +[SYS_write]   "write", +[SYS_mknod]   "mknod", +[SYS_unlink]  "unlink", +[SYS_link]    "link", +[SYS_mkdir]   "mkdir", +[SYS_close]   "close", +#ifdef LAB_NET +[SYS_connect] "connect", +#endif +#ifdef LAB_PGTBL +[SYS_pgaccess] "pgaccess", +#endif +[SYS_trace]   "trace", +[SYS_sysinfo] "sysinfo", +[SYS_sigalarm]  "sigalarm", +[SYS_sigreturn] "sigreturn",  }; +  void  syscall(void)  { @@ -139,9 +203,17 @@ syscall(void)      // Use num to lookup the system call function for num, call it,      // and store its return value in p->trapframe->a0      p->trapframe->a0 = syscalls[num](); +     +    // SYS_trace: match all the syscalls which number < mask asked +    // p->trace_mask == 1 << SYS_xxx +    if(p->trace_mask >> num) { +      printf("%d: syscall %s -> %d\n", p->pid, syscall_names[num], p->trapframe->a0); +    } +    } else {      printf("%d %s: unknown sys call %d\n",              p->pid, p->name, num);      p->trapframe->a0 = -1;    }  } + diff --git a/kernel/syscall.h b/kernel/syscall.h index bc5f356..8da572e 100644 --- a/kernel/syscall.h +++ b/kernel/syscall.h @@ -20,3 +20,14 @@  #define SYS_link   19  #define SYS_mkdir  20  #define SYS_close  21 + +// System calls for labs +#define SYS_trace     22 +#define SYS_sysinfo   23 +#define SYS_sigalarm  24 +#define SYS_sigreturn 25 +#define SYS_symlink   26 +#define SYS_mmap      27 +#define SYS_munmap    28 +#define SYS_connect   29 +#define SYS_pgaccess  30 diff --git a/kernel/sysfile.c b/kernel/sysfile.c index 16b668c..4b2189a 100644 --- a/kernel/sysfile.c +++ b/kernel/sysfile.c @@ -503,3 +503,29 @@ sys_pipe(void)    }    return 0;  } + + +#ifdef LAB_NET +int +sys_connect(void) +{ +  struct file *f; +  int fd; +  uint32 raddr; +  uint32 rport; +  uint32 lport; + +  argint(0, (int*)&raddr); +  argint(1, (int*)&lport); +  argint(2, (int*)&rport); + +  if(sockalloc(&f, raddr, lport, rport) < 0) +    return -1; +  if((fd=fdalloc(f)) < 0){ +    fileclose(f); +    return -1; +  } + +  return fd; +} +#endif diff --git a/kernel/sysinfo.c b/kernel/sysinfo.c new file mode 100644 index 0000000..c66324d --- /dev/null +++ b/kernel/sysinfo.c @@ -0,0 +1,24 @@ +#include "types.h" +#include "riscv.h" +#include "param.h" +#include "spinlock.h" +#include "defs.h" +#include "sysinfo.h" +#include "proc.h" + +// Get current system info +// addr is a user virtual address, pointing to a struct sysinfo. +int +sys_info(uint64 addr) { +  struct proc *p = myproc(); +  struct sysinfo info; + +  // Fill nums into the sysinfo struct +  info.freemem = get_freemem(); +  info.nproc = get_nproc(); + +  if(copyout(p->pagetable, addr, (char *)&info, sizeof(info)) < 0) +    return -1; +  return 0; +} + diff --git a/kernel/sysinfo.h b/kernel/sysinfo.h new file mode 100644 index 0000000..fb878e6 --- /dev/null +++ b/kernel/sysinfo.h @@ -0,0 +1,4 @@ +struct sysinfo { +  uint64 freemem;   // amount of free memory (bytes) +  uint64 nproc;     // number of process +}; diff --git a/kernel/sysnet.c b/kernel/sysnet.c new file mode 100644 index 0000000..1c48cb3 --- /dev/null +++ b/kernel/sysnet.c @@ -0,0 +1,185 @@ +// +// network system calls. +// + +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "spinlock.h" +#include "proc.h" +#include "defs.h" +#include "fs.h" +#include "sleeplock.h" +#include "file.h" +#include "net.h" + +struct sock { +  struct sock *next; // the next socket in the list +  uint32 raddr;      // the remote IPv4 address +  uint16 lport;      // the local UDP port number +  uint16 rport;      // the remote UDP port number +  struct spinlock lock; // protects the rxq +  struct mbufq rxq;  // a queue of packets waiting to be received +}; + +static struct spinlock lock; +static struct sock *sockets; + +void +sockinit(void) +{ +  initlock(&lock, "socktbl"); +} + +int +sockalloc(struct file **f, uint32 raddr, uint16 lport, uint16 rport) +{ +  struct sock *si, *pos; + +  si = 0; +  *f = 0; +  if ((*f = filealloc()) == 0) +    goto bad; +  if ((si = (struct sock*)kalloc()) == 0) +    goto bad; + +  // initialize objects +  si->raddr = raddr; +  si->lport = lport; +  si->rport = rport; +  initlock(&si->lock, "sock"); +  mbufq_init(&si->rxq); +  (*f)->type = FD_SOCK; +  (*f)->readable = 1; +  (*f)->writable = 1; +  (*f)->sock = si; + +  // add to list of sockets +  acquire(&lock); +  pos = sockets; +  while (pos) { +    if (pos->raddr == raddr && +        pos->lport == lport && +	pos->rport == rport) { +      release(&lock); +      goto bad; +    } +    pos = pos->next; +  } +  si->next = sockets; +  sockets = si; +  release(&lock); +  return 0; + +bad: +  if (si) +    kfree((char*)si); +  if (*f) +    fileclose(*f); +  return -1; +} + +void +sockclose(struct sock *si) +{ +  struct sock **pos; +  struct mbuf *m; + +  // remove from list of sockets +  acquire(&lock); +  pos = &sockets; +  while (*pos) { +    if (*pos == si){ +      *pos = si->next; +      break; +    } +    pos = &(*pos)->next; +  } +  release(&lock); + +  // free any pending mbufs +  while (!mbufq_empty(&si->rxq)) { +    m = mbufq_pophead(&si->rxq); +    mbuffree(m); +  } + +  kfree((char*)si); +} + +int +sockread(struct sock *si, uint64 addr, int n) +{ +  struct proc *pr = myproc(); +  struct mbuf *m; +  int len; + +  acquire(&si->lock); +  while (mbufq_empty(&si->rxq) && !pr->killed) { +    sleep(&si->rxq, &si->lock); +  } +  if (pr->killed) { +    release(&si->lock); +    return -1; +  } +  m = mbufq_pophead(&si->rxq); +  release(&si->lock); + +  len = m->len; +  if (len > n) +    len = n; +  if (copyout(pr->pagetable, addr, m->head, len) == -1) { +    mbuffree(m); +    return -1; +  } +  mbuffree(m); +  return len; +} + +int +sockwrite(struct sock *si, uint64 addr, int n) +{ +  struct proc *pr = myproc(); +  struct mbuf *m; + +  m = mbufalloc(MBUF_DEFAULT_HEADROOM); +  if (!m) +    return -1; + +  if (copyin(pr->pagetable, mbufput(m, n), addr, n) == -1) { +    mbuffree(m); +    return -1; +  } +  net_tx_udp(m, si->raddr, si->lport, si->rport); +  return n; +} + +// called by protocol handler layer to deliver UDP packets +void +sockrecvudp(struct mbuf *m, uint32 raddr, uint16 lport, uint16 rport) +{ +  // +  // Find the socket that handles this mbuf and deliver it, waking +  // any sleeping reader. Free the mbuf if there are no sockets +  // registered to handle it. +  // +  struct sock *si; + +  acquire(&lock); +  si = sockets; +  while (si) { +    if (si->raddr == raddr && si->lport == lport && si->rport == rport) +      goto found; +    si = si->next; +  } +  release(&lock); +  mbuffree(m); +  return; + +found: +  acquire(&si->lock); +  mbufq_pushtail(&si->rxq, m); +  wakeup(&si->rxq); +  release(&si->lock); +  release(&lock); +} diff --git a/kernel/sysproc.c b/kernel/sysproc.c index 3b4d5bd..715a511 100644 --- a/kernel/sysproc.c +++ b/kernel/sysproc.c @@ -1,7 +1,7 @@  #include "types.h"  #include "riscv.h" -#include "defs.h"  #include "param.h" +#include "defs.h"  #include "memlayout.h"  #include "spinlock.h"  #include "proc.h" @@ -54,9 +54,8 @@ sys_sleep(void)    int n;    uint ticks0; +    argint(0, &n); -  if(n < 0) -    n = 0;    acquire(&tickslock);    ticks0 = ticks;    while(ticks - ticks0 < n){ @@ -66,10 +65,29 @@ sys_sleep(void)      }      sleep(&ticks, &tickslock);    } + +  // backtrace(); +    release(&tickslock);    return 0;  } + +#ifdef LAB_PGTBL +int +sys_pgaccess(void) +{ +  uint64 base, mask; +  int len; + +   +  argaddr(0, &base); +  argint(1, &len); +  argaddr(2, &mask); +  return pgaccess(base, len, mask); +} +#endif +  uint64  sys_kill(void)  { @@ -91,3 +109,44 @@ sys_uptime(void)    release(&tickslock);    return xticks;  } + +uint64 +sys_trace(void) +{ +  argint(0, &myproc()->trace_mask); + +  return -(myproc()->trace_mask <= 1); +} + +uint64 +sys_sysinfo(void) +{ +  uint64 si; // user pointer to struct sysinfo + +  argaddr(0, &si); +  return sys_info(si); +} + +uint64 +sys_sigalarm(void) +{ +  struct proc *p = myproc(); +  uint64 handler; + +  argint(0, &p->alarm_interval); +  argaddr(1, &handler); +  p->alarm_handler = handler; + +  return 0; +} + +uint64 sys_sigreturn(void) +{ +  struct proc *p = myproc(); +  // retore saved trapframe to resume +  memmove(p->trapframe, p->atpfm, sizeof(struct trapframe)); +  p->alarm_tickspassed = 0; +  p->alarm_caninvoke = 1; +  // make sure return the original a0 in trapframe to pass test3 +  return p->trapframe->a0; +} diff --git a/kernel/trap.c b/kernel/trap.c index 512c850..7cc69b1 100644 --- a/kernel/trap.c +++ b/kernel/trap.c @@ -6,6 +6,12 @@  #include "proc.h"  #include "defs.h" +/* + * Always remember that RISC-V disables interrupts when it starts to take a trap, + * so there's no need to call intr_off() at the beginning of trap handling. + * Reference: xv6-riscv-book 4.5 + */ +  struct spinlock tickslock;  uint ticks; @@ -67,18 +73,41 @@ usertrap(void)      syscall();    } else if((which_dev = devintr()) != 0){      // ok +  } else if(r_scause() == 13 || r_scause() == 15){ +    // deal with page fault +    uint64 va = r_stval(); +    if(cow_handler(p->pagetable, va) < 0) +      goto err;    } else { + +          printf("usertrap(): unexpected scause %p pid=%d\n", r_scause(), p->pid);      printf("            sepc=%p stval=%p\n", r_sepc(), r_stval()); +  err: +    printf("killing the process...\n");      setkilled(p);    }    if(killed(p))      exit(-1); +   -  // give up the CPU if this is a timer interrupt. -  if(which_dev == 2) +  if(which_dev == 2){ +    // timer interrupt +    if(p->alarm_interval > 0 && p->alarm_caninvoke){ +      // record sigalarm +      p->alarm_tickspassed++; +      if(p->alarm_tickspassed == p->alarm_interval){ +        // store original trapframe in p->atpfm +        memmove(p->atpfm, p->trapframe, sizeof(struct trapframe)); +        p->alarm_tickspassed = 0; +        p->alarm_caninvoke = 0; +        p->trapframe->epc = p->alarm_handler; +      } +    } +    // give up the CPU.      yield(); +  }    usertrapret();  } @@ -190,7 +219,13 @@ devintr()        uartintr();      } else if(irq == VIRTIO0_IRQ){        virtio_disk_intr(); -    } else if(irq){ +    } +#ifdef LAB_NET +    else if(irq == E1000_IRQ){ +      e1000_intr(); +    } +#endif +    else if(irq){        printf("unexpected interrupt irq=%d\n", irq);      } diff --git a/kernel/vm.c b/kernel/vm.c index 5c31e87..be7d042 100644 --- a/kernel/vm.c +++ b/kernel/vm.c @@ -4,6 +4,8 @@  #include "elf.h"  #include "riscv.h"  #include "defs.h" +#include "spinlock.h" +#include "proc.h"  #include "fs.h"  /* @@ -30,6 +32,14 @@ kvmmake(void)    // virtio mmio disk interface    kvmmap(kpgtbl, VIRTIO0, VIRTIO0, PGSIZE, PTE_R | PTE_W); +#ifdef LAB_NET +  // PCI-E ECAM (configuration space), for pci.c +  kvmmap(kpgtbl, 0x30000000L, 0x30000000L, 0x10000000, PTE_R | PTE_W); + +  // pci.c maps the e1000's registers here. +  kvmmap(kpgtbl, 0x40000000L, 0x40000000L, 0x20000, PTE_R | PTE_W); +#endif   +    // PLIC    kvmmap(kpgtbl, PLIC, PLIC, 0x400000, PTE_R | PTE_W); @@ -136,9 +146,8 @@ kvmmap(pagetable_t kpgtbl, uint64 va, uint64 pa, uint64 sz, int perm)  }  // Create PTEs for virtual addresses starting at va that refer to -// physical addresses starting at pa. -// va and size MUST be page-aligned. -// Returns 0 on success, -1 if walk() couldn't +// physical addresses starting at pa. va and size might not +// be page-aligned. Returns 0 on success, -1 if walk() couldn't  // allocate a needed page-table page.  int  mappages(pagetable_t pagetable, uint64 va, uint64 size, uint64 pa, int perm) @@ -146,17 +155,11 @@ mappages(pagetable_t pagetable, uint64 va, uint64 size, uint64 pa, int perm)    uint64 a, last;    pte_t *pte; -  if((va % PGSIZE) != 0) -    panic("mappages: va not aligned"); - -  if((size % PGSIZE) != 0) -    panic("mappages: size not aligned"); -    if(size == 0)      panic("mappages: size"); -  a = va; -  last = va + size - PGSIZE; +  a = PGROUNDDOWN(va); +  last = PGROUNDDOWN(va + size - 1);    for(;;){      if((pte = walk(pagetable, a, 1)) == 0)        return -1; @@ -186,8 +189,10 @@ uvmunmap(pagetable_t pagetable, uint64 va, uint64 npages, int do_free)    for(a = va; a < va + npages*PGSIZE; a += PGSIZE){      if((pte = walk(pagetable, a, 0)) == 0)        panic("uvmunmap: walk"); -    if((*pte & PTE_V) == 0) +    if((*pte & PTE_V) == 0) { +      printf("va=%p pte=%p\n", a, *pte);        panic("uvmunmap: not mapped"); +    }      if(PTE_FLAGS(*pte) == PTE_V)        panic("uvmunmap: not a leaf");      if(do_free){ @@ -315,20 +320,26 @@ uvmcopy(pagetable_t old, pagetable_t new, uint64 sz)    pte_t *pte;    uint64 pa, i;    uint flags; -  char *mem; +  // char *mem;    for(i = 0; i < sz; i += PGSIZE){      if((pte = walk(old, i, 0)) == 0)        panic("uvmcopy: pte should exist");      if((*pte & PTE_V) == 0)        panic("uvmcopy: page not present"); -    pa = PTE2PA(*pte); -    flags = PTE_FLAGS(*pte); +    // do not do the actual copy, just increase the refcnt and mark pages readonly COW +    /*      if((mem = kalloc()) == 0)        goto err;      memmove(mem, (char*)pa, PGSIZE); -    if(mappages(new, i, PGSIZE, (uint64)mem, flags) != 0){ -      kfree(mem); +    */ +    *pte &= ~PTE_W; +    *pte |= PTE_C; +    pa = PTE2PA(*pte); +    refcnt_inc(pa); +    flags = PTE_FLAGS(*pte); +    if(mappages(new, i, PGSIZE, (uint64)pa, flags) != 0){ +      // kfree(mem);        goto err;      }    } @@ -359,17 +370,24 @@ int  copyout(pagetable_t pagetable, uint64 dstva, char *src, uint64 len)  {    uint64 n, va0, pa0; -  pte_t *pte; +  // pte_t *pte;    while(len > 0){      va0 = PGROUNDDOWN(dstva); -    if(va0 >= MAXVA) + +    if(cow_handler(pagetable, va0) < 0)        return -1; +     +    /*      pte = walk(pagetable, va0, 0);      if(pte == 0 || (*pte & PTE_V) == 0 || (*pte & PTE_U) == 0 ||         (*pte & PTE_W) == 0)        return -1;      pa0 = PTE2PA(*pte); +    */ +    pa0 = walkaddr(pagetable, va0); +    if(pa0 == 0) +      return -1;      n = PGSIZE - (dstva - va0);      if(n > len)        n = len; @@ -389,7 +407,7 @@ int  copyin(pagetable_t pagetable, char *dst, uint64 srcva, uint64 len)  {    uint64 n, va0, pa0; - +      while(len > 0){      va0 = PGROUNDDOWN(srcva);      pa0 = walkaddr(pagetable, va0); @@ -449,3 +467,30 @@ copyinstr(pagetable_t pagetable, char *dst, uint64 srcva, uint64 max)      return -1;    }  } + +static void +walkprint(pagetable_t pgtbl, int level) +{ +  for(int i = 0; i < 512; i++){ +    pte_t pte = pgtbl[i]; +    if(pte & PTE_V){ +      for(int j = 0; j < level; j++){ +        printf(" .."); +      } +      printf("%d: pte %p pa %p\n", i, pte, PTE2PA(pte)); +      if((pte & (PTE_R|PTE_W|PTE_X)) == 0){ +        // this PTE points to a lower-level page table. +        walkprint((pagetable_t)PTE2PA(pte), level+1); +      } +    } +  } +} + +// Print the contents of a page table +void +vmprint(pagetable_t pgtbl) +{ +  printf("page table %p\n", pgtbl); + +  walkprint(pgtbl, 1); +} | 
