diff options
| author | Robert Morris <rtm@csail.mit.edu> | 2019-06-13 06:49:02 -0400 | 
|---|---|---|
| committer | Robert Morris <rtm@csail.mit.edu> | 2019-06-13 06:49:02 -0400 | 
| commit | de9d72c9086ec935d5b2b889f50ff611135f80fa (patch) | |
| tree | 071952e383057de705c59f9e7894f3154e4bbad2 /kernel | |
| parent | 5753553213df8f9de851adb68377db43faecb91f (diff) | |
| download | xv6-labs-de9d72c9086ec935d5b2b889f50ff611135f80fa.tar.gz xv6-labs-de9d72c9086ec935d5b2b889f50ff611135f80fa.tar.bz2 xv6-labs-de9d72c9086ec935d5b2b889f50ff611135f80fa.zip | |
virtio disk driver
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/bio.c | 6 | ||||
| -rw-r--r-- | kernel/defs.h | 5 | ||||
| -rw-r--r-- | kernel/kalloc.c | 1 | ||||
| -rw-r--r-- | kernel/kernel.ld | 1 | ||||
| -rw-r--r-- | kernel/main.c | 3 | ||||
| -rw-r--r-- | kernel/memlayout.h | 6 | ||||
| -rw-r--r-- | kernel/plic.c | 7 | ||||
| -rw-r--r-- | kernel/trap.c | 2 | ||||
| -rw-r--r-- | kernel/virtio.h | 59 | ||||
| -rw-r--r-- | kernel/virtio_disk.c | 268 | ||||
| -rw-r--r-- | kernel/vm.c | 4 | 
11 files changed, 354 insertions, 8 deletions
| diff --git a/kernel/bio.c b/kernel/bio.c index 90f9af9..07ea030 100644 --- a/kernel/bio.c +++ b/kernel/bio.c @@ -101,7 +101,8 @@ bread(uint dev, uint blockno)    b = bget(dev, blockno);    if((b->flags & B_VALID) == 0) { -    ramdiskrw(b); +    //ramdiskrw(b); +    virtio_disk_rw(b);    }    return b;  } @@ -113,7 +114,8 @@ bwrite(struct buf *b)    if(!holdingsleep(&b->lock))      panic("bwrite");    b->flags |= B_DIRTY; -  ramdiskrw(b); +  //ramdiskrw(b); +  virtio_disk_rw(b);  }  // Release a locked buffer. diff --git a/kernel/defs.h b/kernel/defs.h index 597e5b6..1b397fe 100644 --- a/kernel/defs.h +++ b/kernel/defs.h @@ -201,5 +201,10 @@ uint64          plic_pending(void);  int             plic_claim(void);  void            plic_complete(int); +// virtio_disk.c +void            virtio_disk_init(void); +void            virtio_disk_rw(struct buf *); +void            virtio_disk_intr(); +  // number of elements in fixed-size array  #define NELEM(x) (sizeof(x)/sizeof((x)[0])) diff --git a/kernel/kalloc.c b/kernel/kalloc.c index 1ed1c49..afadb02 100644 --- a/kernel/kalloc.c +++ b/kernel/kalloc.c @@ -35,6 +35,7 @@ freerange(void *pa_start, void *pa_end)  {    char *p;    p = (char*)PGROUNDUP((uint64)pa_start); +  p += 4096; // XXX I can't get kernel.ld to place end beyond the last bss symbol.    for(; p + PGSIZE <= (char*)pa_end; p += PGSIZE)      kfree(p);  } diff --git a/kernel/kernel.ld b/kernel/kernel.ld index 53c9b90..dec8e4f 100644 --- a/kernel/kernel.ld +++ b/kernel/kernel.ld @@ -28,4 +28,5 @@ SECTIONS      *(.bss)      PROVIDE(end = .);    } +  } diff --git a/kernel/main.c b/kernel/main.c index 2168b9f..d44c82c 100644 --- a/kernel/main.c +++ b/kernel/main.c @@ -26,7 +26,8 @@ main()      plicinithart();  // ask PLIC for device interrupts      binit();         // buffer cache      fileinit();      // file table -    ramdiskinit();   // disk +    virtio_disk_init(); // emulated hard disk +    ramdiskinit();   // in-memory disk      userinit();      // first user process      started = 1;    } else { diff --git a/kernel/memlayout.h b/kernel/memlayout.h index 462986c..6d86166 100644 --- a/kernel/memlayout.h +++ b/kernel/memlayout.h @@ -6,7 +6,8 @@  // 00001000 -- boot ROM, provided by qemu  // 02000000 -- CLINT  // 0C000000 -- PLIC -// 10000000 -- uart0 registers +// 10000000 -- uart0  +// 10001000 -- virtio disk   // 80000000 -- boot ROM jumps here in machine mode  //             -kernel loads the kernel here  // 88000000 -- -initrd fs.img ramdisk image. @@ -21,6 +22,9 @@  #define UART0 0x10000000L  #define UART0_IRQ 10 +#define VIRTIO 0x10001000 +#define VIRTIO_IRQ 1 // really the first of 8 units +  // local interrupt controller, which contains the timer.  #define CLINT 0x2000000L  #define CLINT_MTIMECMP(hartid) (CLINT + 0x4000 + 8*(hartid)) diff --git a/kernel/plic.c b/kernel/plic.c index 0f19ab0..cc9a97e 100644 --- a/kernel/plic.c +++ b/kernel/plic.c @@ -11,8 +11,9 @@  void  plicinit(void)  { -  // set uart's priority to be non-zero (otherwise disabled). +  // set desired IRQ priorities non-zero (otherwise disabled).    *(uint32*)(PLIC + UART0_IRQ*4) = 1; +  *(uint32*)(PLIC + VIRTIO_IRQ*4) = 1;  }  void @@ -21,11 +22,9 @@ plicinithart(void)    int hart = cpuid();    // set uart's enable bit for this hart's S-mode.  -  //*(uint32*)(PLIC + 0x2080)= (1 << UART0_IRQ); -  *(uint32*)PLIC_SENABLE(hart)= (1 << UART0_IRQ); +  *(uint32*)PLIC_SENABLE(hart)= (1 << UART0_IRQ) | (1 << VIRTIO_IRQ);    // set this hart's S-mode priority threshold to 0. -  //*(uint32*)(PLIC + 0x201000) = 0;    *(uint32*)PLIC_SPRIORITY(hart) = 0;  } diff --git a/kernel/trap.c b/kernel/trap.c index 050a94d..13ad362 100644 --- a/kernel/trap.c +++ b/kernel/trap.c @@ -159,6 +159,8 @@ devintr()      if(irq == UART0_IRQ){        uartintr(); +    } else if(irq == VIRTIO_IRQ){ +      virtio_disk_intr();      }      plic_complete(irq); diff --git a/kernel/virtio.h b/kernel/virtio.h new file mode 100644 index 0000000..258d107 --- /dev/null +++ b/kernel/virtio.h @@ -0,0 +1,59 @@ +// +// virtio device definitions. +// for both the mmio interface, and virtio descriptors. +// only tested with qemu. +// this is the "legacy" virtio interface. +// + +// virtio mmio control registers, mapped starting at 0x10001000. +// from qemu virtio_mmio.h +#define VIRTIO_MMIO_MAGIC_VALUE		0x000 // 0x74726976 +#define VIRTIO_MMIO_VERSION		0x004 // 1 -- version, 1 is legacy +#define VIRTIO_MMIO_DEVICE_ID		0x008 // 2 -- block device type +#define VIRTIO_MMIO_VENDOR_ID		0x00c // 0x554d4551 +#define VIRTIO_MMIO_DEVICE_FEATURES	0x010 +#define VIRTIO_MMIO_DRIVER_FEATURES	0x020 +#define VIRTIO_MMIO_GUEST_PAGE_SIZE	0x028 // page size for PFN, write-only +#define VIRTIO_MMIO_QUEUE_SEL		0x030 // select queue, write-only +#define VIRTIO_MMIO_QUEUE_NUM_MAX	0x034 // max size of current queue, read-only +#define VIRTIO_MMIO_QUEUE_NUM		0x038 // size of current queue, write-only +#define VIRTIO_MMIO_QUEUE_ALIGN		0x03c // used ring alignment, write-only +#define VIRTIO_MMIO_QUEUE_PFN		0x040 // physical page number for queue, read/write +#define VIRTIO_MMIO_QUEUE_READY		0x044 // ready bit +#define VIRTIO_MMIO_QUEUE_NOTIFY	0x050 // write-only +#define VIRTIO_MMIO_INTERRUPT_STATUS	0x060 // read-only +#define VIRTIO_MMIO_INTERRUPT_ACK	0x064 // write-only +#define VIRTIO_MMIO_STATUS		0x070 // read/write + +// status register bits, from qemu virtio_config.h +#define VIRTIO_CONFIG_S_ACKNOWLEDGE	1 +#define VIRTIO_CONFIG_S_DRIVER		2 +#define VIRTIO_CONFIG_S_DRIVER_OK	4 +#define VIRTIO_CONFIG_S_FEATURES_OK	8 + +// device feature bits +#define VIRTIO_BLK_F_RO		5	/* Disk is read-only */ +#define VIRTIO_BLK_F_SCSI	7	/* Supports scsi command passthru */ +#define VIRTIO_BLK_F_CONFIG_WCE	11	/* Writeback mode available in config */ +#define VIRTIO_BLK_F_MQ		12	/* support more than one vq */ +#define VIRTIO_F_ANY_LAYOUT		27 +#define VIRTIO_RING_F_INDIRECT_DESC	28 +#define VIRTIO_RING_F_EVENT_IDX		29 + +struct VRingDesc { +  uint64 addr; +  uint32 len; +  uint16 flags; +  uint16 next; +}; +#define VRING_DESC_F_NEXT	1 +#define VRING_DESC_F_WRITE	2 // device writes (vs read) + +struct VRingUsedElem { +  uint32 id; // index of start of completed descriptor chain +  uint32 len; +}; + +// for disk ops +#define VIRTIO_BLK_T_IN		0 +#define VIRTIO_BLK_T_OUT	1 diff --git a/kernel/virtio_disk.c b/kernel/virtio_disk.c new file mode 100644 index 0000000..558d3b0 --- /dev/null +++ b/kernel/virtio_disk.c @@ -0,0 +1,268 @@ +// +// driver for qemu's virtio disk device. +// uses qemu's mmio interface to virtio. +// qemu presents a "legacy" virtio interface. +// +// qemu ... -drive file=fs.img,if=none,format=raw,id=x0 -device virtio-blk-device,drive=x0,bus=virtio-mmio-bus.0 +// + +#include "types.h" +#include "riscv.h" +#include "defs.h" +#include "param.h" +#include "memlayout.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "fs.h" +#include "buf.h" +#include "virtio.h" + +// the address of a virtio mmio register. +#define R(off) ((volatile uint32 *)(VIRTIO + (off))) + +struct spinlock virtio_disk_lock; + +// this many virtio descriptors. +// must be a power of two. +#define NUM 8 + +// memory for virtio descriptors &c for queue 0. +// this is a global instead of allocated because it has +// to be multiple contiguous pages, which kalloc() +// doesn't support. +__attribute__ ((aligned (PGSIZE))) +static char pages[2*PGSIZE]; +static struct VRingDesc *desc; +static uint16 *avail; +static char *used; + +// our own book-keeping. +static char free[NUM];  // is a descriptor free? +static uint16 used_idx; // we've looked this far in used[2..NUM]. + +// track info about in-flight operations, +// for use when completion interrupt arrives. +// indexed by first descriptor index of chain. +static struct { +  struct buf *b; +} info[NUM]; + +void +virtio_disk_init(void) +{ +  uint32 status = 0; + +  initlock(&virtio_disk_lock, "virtio_disk"); + +  // qemu's virtio-mmio.c + +  if(*R(VIRTIO_MMIO_MAGIC_VALUE) != 0x74726976 || +     *R(VIRTIO_MMIO_VERSION) != 1 || +     *R(VIRTIO_MMIO_DEVICE_ID) != 2 || +     *R(VIRTIO_MMIO_VENDOR_ID) != 0x554d4551){ +    panic("could not find virtio disk"); +  } +   +  status |= VIRTIO_CONFIG_S_ACKNOWLEDGE; +  *R(VIRTIO_MMIO_STATUS) = status; + +  status |= VIRTIO_CONFIG_S_DRIVER; +  *R(VIRTIO_MMIO_STATUS) = status; + +  // negotiate features +  uint64 features = *R(VIRTIO_MMIO_DEVICE_FEATURES); +  features &= ~(1 << VIRTIO_BLK_F_RO); +  features &= ~(1 << VIRTIO_BLK_F_SCSI); +  features &= ~(1 << VIRTIO_BLK_F_CONFIG_WCE); +  features &= ~(1 << VIRTIO_BLK_F_MQ); +  features &= ~(1 << VIRTIO_F_ANY_LAYOUT); +  features &= ~(1 << VIRTIO_RING_F_EVENT_IDX); +  features &= ~(1 << VIRTIO_RING_F_INDIRECT_DESC); +  *R(VIRTIO_MMIO_DRIVER_FEATURES) = features; + +  // tell device that feature negotiation is complete. +  status |= VIRTIO_CONFIG_S_FEATURES_OK; +  *R(VIRTIO_MMIO_STATUS) = status; + +  // tell device we're completely ready. +  status |= VIRTIO_CONFIG_S_DRIVER_OK; +  *R(VIRTIO_MMIO_STATUS) = status; + +  *R(VIRTIO_MMIO_GUEST_PAGE_SIZE) = PGSIZE; + +  // qemu's hw/virtio/virtio.c + +  // initialize queue 0 +  *R(VIRTIO_MMIO_QUEUE_SEL) = 0; +  uint32 max = *R(VIRTIO_MMIO_QUEUE_NUM_MAX); +  if(max == 0) +    panic("virtio disk has no queue 0"); +  if(max < NUM) +    panic("virtio disk max queue too short"); +  *R(VIRTIO_MMIO_QUEUE_NUM) = NUM; +  memset(pages, 0, sizeof(pages)); +  *R(VIRTIO_MMIO_QUEUE_PFN) = ((uint64)pages) >> PGSHIFT; + +  // desc = pages -- num * VRingDesc +  // avail = pages + 0x40 -- 2 * uint16, then num * uint16 +  // used = pages + 4096 -- 2 * uint16, then num * vRingUsedElem + +  desc = (struct VRingDesc *) pages; +  avail = (uint16*)(((char*)desc) + NUM*sizeof(struct VRingDesc)); +  used = pages + PGSIZE; + +  for(int i = 0; i < NUM; i++) +    free[i] = 1; +} + +// find a free descriptor, mark it non-free, return its index. +static int +alloc_desc() +{ +  for(int i = 0; i < NUM; i++){ +    if(free[i]){ +      free[i] = 0; +      return i; +    } +  } +  return -1; +} + +void +free_desc(int i) +{ +  if(i >= NUM) +    panic("virtio_disk_intr 1"); +  if(free[i]) +    panic("virtio_disk_intr 2"); +  free[i] = 1; +} + +void +virtio_disk_rw(struct buf *b) +{ +  uint64 sector = b->blockno * (BSIZE / 512); + +  acquire(&virtio_disk_lock); + +  // the spec says that legacy block operations always use three +  // descriptors: one for type/reserved/sector, one for +  // the data, one for a 1-byte status result. + +  // allocate the three descriptors. +  int idx[3]; +  while(1){ +    int done = 1; +    for(int i = 0; i < 3; i++){ +      idx[i] = alloc_desc(); +      if(idx[i] < 0){ +        for(int j = 0; j < i; j++) +          free_desc(idx[j]); +        wakeup(&free[0]); +        done = 0; +        break; +      } +    } +    if(done) +      break; +    sleep(&free[0], &virtio_disk_lock); +  } + +  // format the three descriptors. +  // qemu's virtio-blk.c reads them. + +  struct virtio_blk_outhdr { +    uint32 type; +    uint32 reserved; +    uint64 sector; +  } buf0; + +  if(b->flags & B_DIRTY) +    buf0.type = VIRTIO_BLK_T_OUT; // write the disk +  else +    buf0.type = VIRTIO_BLK_T_IN; // read the disk +  buf0.reserved = 0; +  buf0.sector = sector; + +  desc[idx[0]].addr = (uint64) &buf0; +  desc[idx[0]].len = sizeof(buf0); +  desc[idx[0]].flags = VRING_DESC_F_NEXT; +  desc[idx[0]].next = idx[1]; + +  desc[idx[1]].addr = (uint64) b->data; +  desc[idx[1]].len = BSIZE; +  if(b->flags & B_DIRTY) +    desc[idx[1]].flags = 0; // device reads b->data +  else +    desc[idx[1]].flags = VRING_DESC_F_WRITE; // device writes b->data +  desc[idx[1]].flags |= VRING_DESC_F_NEXT; +  desc[idx[1]].next = idx[2]; + +  char status = 0; +  desc[idx[2]].addr = (uint64) &status; +  desc[idx[2]].len = 1; +  desc[idx[2]].flags = VRING_DESC_F_WRITE; // device writes the status +  desc[idx[2]].next = 0; + +  // record struct buf for virtio_disk_intr(). +  info[idx[0]].b = b; + +  // avail[0] is flags +  // avail[1] tells the device how far to look in avail[2...]. +  // avail[2...] are desc[] indices the device should process. +  // we only tell device the first index in our chain of descriptors. +  avail[2 + (avail[1] % NUM)] = idx[0]; +  __sync_synchronize(); +  avail[1] = avail[1] + 1; + +  *R(VIRTIO_MMIO_QUEUE_NOTIFY) = 0; // value is queue number + +  // Wait for virtio_disk_intr() to say request has finished. +  while((b->flags & (B_VALID|B_DIRTY)) != B_VALID){ +    sleep(b, &virtio_disk_lock); +  } + +  release(&virtio_disk_lock); +} + +void +virtio_disk_intr() +{ +  // the used area is: +  // uint16 flags +  // uint16 idx +  // array of VRingUsedElem + +  // XXX spec says to read INTERRUPT_STATUS and +  // write INTERRUPT_ACK + +  acquire(&virtio_disk_lock); +   +  while((used_idx % NUM) != (*(volatile uint16 *)(used+2) % NUM)){ +    struct VRingUsedElem *ue = (struct VRingUsedElem *) (used + 4 + 8*used_idx); + +    // XXX check the one-byte status in the 3rd descriptor. + +    info[ue->id].b->flags |= B_VALID; +    info[ue->id].b->flags &= ~B_DIRTY; + +    wakeup(info[ue->id].b); + +    info[ue->id].b = 0; + +    uint i = ue->id; +    while(1){ +      desc[i].addr = 0; +      free_desc(i); +      if(desc[i].flags & VRING_DESC_F_NEXT) +        i = desc[i].next; +      else +        break; +    } +    wakeup(&free[0]); + +    used_idx = (used_idx + 1) % NUM; +  } + +  release(&virtio_disk_lock); +} diff --git a/kernel/vm.c b/kernel/vm.c index 0ea6bca..0d0a9d9 100644 --- a/kernel/vm.c +++ b/kernel/vm.c @@ -30,6 +30,10 @@ kvminit()    mappages(kernel_pagetable, UART0, PGSIZE,             UART0, PTE_R | PTE_W); +  // virtio disk interface +  mappages(kernel_pagetable, VIRTIO, PGSIZE, +           VIRTIO, PTE_R | PTE_W); +    // CLINT    mappages(kernel_pagetable, CLINT, 0x10000,             CLINT, PTE_R | PTE_W); | 
