diff options
Diffstat (limited to 'kernel/virtio_disk.c')
-rw-r--r-- | kernel/virtio_disk.c | 268 |
1 files changed, 268 insertions, 0 deletions
diff --git a/kernel/virtio_disk.c b/kernel/virtio_disk.c new file mode 100644 index 0000000..558d3b0 --- /dev/null +++ b/kernel/virtio_disk.c @@ -0,0 +1,268 @@ +// +// driver for qemu's virtio disk device. +// uses qemu's mmio interface to virtio. +// qemu presents a "legacy" virtio interface. +// +// qemu ... -drive file=fs.img,if=none,format=raw,id=x0 -device virtio-blk-device,drive=x0,bus=virtio-mmio-bus.0 +// + +#include "types.h" +#include "riscv.h" +#include "defs.h" +#include "param.h" +#include "memlayout.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "fs.h" +#include "buf.h" +#include "virtio.h" + +// the address of a virtio mmio register. +#define R(off) ((volatile uint32 *)(VIRTIO + (off))) + +struct spinlock virtio_disk_lock; + +// this many virtio descriptors. +// must be a power of two. +#define NUM 8 + +// memory for virtio descriptors &c for queue 0. +// this is a global instead of allocated because it has +// to be multiple contiguous pages, which kalloc() +// doesn't support. +__attribute__ ((aligned (PGSIZE))) +static char pages[2*PGSIZE]; +static struct VRingDesc *desc; +static uint16 *avail; +static char *used; + +// our own book-keeping. +static char free[NUM]; // is a descriptor free? +static uint16 used_idx; // we've looked this far in used[2..NUM]. + +// track info about in-flight operations, +// for use when completion interrupt arrives. +// indexed by first descriptor index of chain. +static struct { + struct buf *b; +} info[NUM]; + +void +virtio_disk_init(void) +{ + uint32 status = 0; + + initlock(&virtio_disk_lock, "virtio_disk"); + + // qemu's virtio-mmio.c + + if(*R(VIRTIO_MMIO_MAGIC_VALUE) != 0x74726976 || + *R(VIRTIO_MMIO_VERSION) != 1 || + *R(VIRTIO_MMIO_DEVICE_ID) != 2 || + *R(VIRTIO_MMIO_VENDOR_ID) != 0x554d4551){ + panic("could not find virtio disk"); + } + + status |= VIRTIO_CONFIG_S_ACKNOWLEDGE; + *R(VIRTIO_MMIO_STATUS) = status; + + status |= VIRTIO_CONFIG_S_DRIVER; + *R(VIRTIO_MMIO_STATUS) = status; + + // negotiate features + uint64 features = *R(VIRTIO_MMIO_DEVICE_FEATURES); + features &= ~(1 << VIRTIO_BLK_F_RO); + features &= ~(1 << VIRTIO_BLK_F_SCSI); + features &= ~(1 << VIRTIO_BLK_F_CONFIG_WCE); + features &= ~(1 << VIRTIO_BLK_F_MQ); + features &= ~(1 << VIRTIO_F_ANY_LAYOUT); + features &= ~(1 << VIRTIO_RING_F_EVENT_IDX); + features &= ~(1 << VIRTIO_RING_F_INDIRECT_DESC); + *R(VIRTIO_MMIO_DRIVER_FEATURES) = features; + + // tell device that feature negotiation is complete. + status |= VIRTIO_CONFIG_S_FEATURES_OK; + *R(VIRTIO_MMIO_STATUS) = status; + + // tell device we're completely ready. + status |= VIRTIO_CONFIG_S_DRIVER_OK; + *R(VIRTIO_MMIO_STATUS) = status; + + *R(VIRTIO_MMIO_GUEST_PAGE_SIZE) = PGSIZE; + + // qemu's hw/virtio/virtio.c + + // initialize queue 0 + *R(VIRTIO_MMIO_QUEUE_SEL) = 0; + uint32 max = *R(VIRTIO_MMIO_QUEUE_NUM_MAX); + if(max == 0) + panic("virtio disk has no queue 0"); + if(max < NUM) + panic("virtio disk max queue too short"); + *R(VIRTIO_MMIO_QUEUE_NUM) = NUM; + memset(pages, 0, sizeof(pages)); + *R(VIRTIO_MMIO_QUEUE_PFN) = ((uint64)pages) >> PGSHIFT; + + // desc = pages -- num * VRingDesc + // avail = pages + 0x40 -- 2 * uint16, then num * uint16 + // used = pages + 4096 -- 2 * uint16, then num * vRingUsedElem + + desc = (struct VRingDesc *) pages; + avail = (uint16*)(((char*)desc) + NUM*sizeof(struct VRingDesc)); + used = pages + PGSIZE; + + for(int i = 0; i < NUM; i++) + free[i] = 1; +} + +// find a free descriptor, mark it non-free, return its index. +static int +alloc_desc() +{ + for(int i = 0; i < NUM; i++){ + if(free[i]){ + free[i] = 0; + return i; + } + } + return -1; +} + +void +free_desc(int i) +{ + if(i >= NUM) + panic("virtio_disk_intr 1"); + if(free[i]) + panic("virtio_disk_intr 2"); + free[i] = 1; +} + +void +virtio_disk_rw(struct buf *b) +{ + uint64 sector = b->blockno * (BSIZE / 512); + + acquire(&virtio_disk_lock); + + // the spec says that legacy block operations always use three + // descriptors: one for type/reserved/sector, one for + // the data, one for a 1-byte status result. + + // allocate the three descriptors. + int idx[3]; + while(1){ + int done = 1; + for(int i = 0; i < 3; i++){ + idx[i] = alloc_desc(); + if(idx[i] < 0){ + for(int j = 0; j < i; j++) + free_desc(idx[j]); + wakeup(&free[0]); + done = 0; + break; + } + } + if(done) + break; + sleep(&free[0], &virtio_disk_lock); + } + + // format the three descriptors. + // qemu's virtio-blk.c reads them. + + struct virtio_blk_outhdr { + uint32 type; + uint32 reserved; + uint64 sector; + } buf0; + + if(b->flags & B_DIRTY) + buf0.type = VIRTIO_BLK_T_OUT; // write the disk + else + buf0.type = VIRTIO_BLK_T_IN; // read the disk + buf0.reserved = 0; + buf0.sector = sector; + + desc[idx[0]].addr = (uint64) &buf0; + desc[idx[0]].len = sizeof(buf0); + desc[idx[0]].flags = VRING_DESC_F_NEXT; + desc[idx[0]].next = idx[1]; + + desc[idx[1]].addr = (uint64) b->data; + desc[idx[1]].len = BSIZE; + if(b->flags & B_DIRTY) + desc[idx[1]].flags = 0; // device reads b->data + else + desc[idx[1]].flags = VRING_DESC_F_WRITE; // device writes b->data + desc[idx[1]].flags |= VRING_DESC_F_NEXT; + desc[idx[1]].next = idx[2]; + + char status = 0; + desc[idx[2]].addr = (uint64) &status; + desc[idx[2]].len = 1; + desc[idx[2]].flags = VRING_DESC_F_WRITE; // device writes the status + desc[idx[2]].next = 0; + + // record struct buf for virtio_disk_intr(). + info[idx[0]].b = b; + + // avail[0] is flags + // avail[1] tells the device how far to look in avail[2...]. + // avail[2...] are desc[] indices the device should process. + // we only tell device the first index in our chain of descriptors. + avail[2 + (avail[1] % NUM)] = idx[0]; + __sync_synchronize(); + avail[1] = avail[1] + 1; + + *R(VIRTIO_MMIO_QUEUE_NOTIFY) = 0; // value is queue number + + // Wait for virtio_disk_intr() to say request has finished. + while((b->flags & (B_VALID|B_DIRTY)) != B_VALID){ + sleep(b, &virtio_disk_lock); + } + + release(&virtio_disk_lock); +} + +void +virtio_disk_intr() +{ + // the used area is: + // uint16 flags + // uint16 idx + // array of VRingUsedElem + + // XXX spec says to read INTERRUPT_STATUS and + // write INTERRUPT_ACK + + acquire(&virtio_disk_lock); + + while((used_idx % NUM) != (*(volatile uint16 *)(used+2) % NUM)){ + struct VRingUsedElem *ue = (struct VRingUsedElem *) (used + 4 + 8*used_idx); + + // XXX check the one-byte status in the 3rd descriptor. + + info[ue->id].b->flags |= B_VALID; + info[ue->id].b->flags &= ~B_DIRTY; + + wakeup(info[ue->id].b); + + info[ue->id].b = 0; + + uint i = ue->id; + while(1){ + desc[i].addr = 0; + free_desc(i); + if(desc[i].flags & VRING_DESC_F_NEXT) + i = desc[i].next; + else + break; + } + wakeup(&free[0]); + + used_idx = (used_idx + 1) % NUM; + } + + release(&virtio_disk_lock); +} |