summaryrefslogtreecommitdiff
path: root/kernel/virtio_disk.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/virtio_disk.c')
-rw-r--r--kernel/virtio_disk.c269
1 files changed, 269 insertions, 0 deletions
diff --git a/kernel/virtio_disk.c b/kernel/virtio_disk.c
new file mode 100644
index 0000000..3cff024
--- /dev/null
+++ b/kernel/virtio_disk.c
@@ -0,0 +1,269 @@
+//
+// driver for qemu's virtio disk device.
+// uses qemu's mmio interface to virtio.
+// qemu presents a "legacy" virtio interface.
+//
+// qemu ... -drive file=fs.img,if=none,format=raw,id=x0 -device virtio-blk-device,drive=x0,bus=virtio-mmio-bus.0
+//
+
+#include "types.h"
+#include "riscv.h"
+#include "defs.h"
+#include "param.h"
+#include "memlayout.h"
+#include "spinlock.h"
+#include "sleeplock.h"
+#include "fs.h"
+#include "buf.h"
+#include "virtio.h"
+
+// the address of virtio mmio register r.
+#define R(r) ((volatile uint32 *)(VIRTIO0 + (r)))
+
+static struct disk {
+ // memory for virtio descriptors &c for queue 0.
+ // this is a global instead of allocated because it must
+ // be multiple contiguous pages, which kalloc()
+ // doesn't support, and page aligned.
+ char pages[2*PGSIZE];
+ struct VRingDesc *desc;
+ uint16 *avail;
+ struct UsedArea *used;
+
+ // our own book-keeping.
+ char free[NUM]; // is a descriptor free?
+ uint16 used_idx; // we've looked this far in used[2..NUM].
+
+ // track info about in-flight operations,
+ // for use when completion interrupt arrives.
+ // indexed by first descriptor index of chain.
+ struct {
+ struct buf *b;
+ char status;
+ } info[NUM];
+
+ struct spinlock vdisk_lock;
+
+} __attribute__ ((aligned (PGSIZE))) disk;
+
+void
+virtio_disk_init(void)
+{
+ uint32 status = 0;
+
+ initlock(&disk.vdisk_lock, "virtio_disk");
+
+ if(*R(VIRTIO_MMIO_MAGIC_VALUE) != 0x74726976 ||
+ *R(VIRTIO_MMIO_VERSION) != 1 ||
+ *R(VIRTIO_MMIO_DEVICE_ID) != 2 ||
+ *R(VIRTIO_MMIO_VENDOR_ID) != 0x554d4551){
+ panic("could not find virtio disk");
+ }
+
+ status |= VIRTIO_CONFIG_S_ACKNOWLEDGE;
+ *R(VIRTIO_MMIO_STATUS) = status;
+
+ status |= VIRTIO_CONFIG_S_DRIVER;
+ *R(VIRTIO_MMIO_STATUS) = status;
+
+ // negotiate features
+ uint64 features = *R(VIRTIO_MMIO_DEVICE_FEATURES);
+ features &= ~(1 << VIRTIO_BLK_F_RO);
+ features &= ~(1 << VIRTIO_BLK_F_SCSI);
+ features &= ~(1 << VIRTIO_BLK_F_CONFIG_WCE);
+ features &= ~(1 << VIRTIO_BLK_F_MQ);
+ features &= ~(1 << VIRTIO_F_ANY_LAYOUT);
+ features &= ~(1 << VIRTIO_RING_F_EVENT_IDX);
+ features &= ~(1 << VIRTIO_RING_F_INDIRECT_DESC);
+ *R(VIRTIO_MMIO_DRIVER_FEATURES) = features;
+
+ // tell device that feature negotiation is complete.
+ status |= VIRTIO_CONFIG_S_FEATURES_OK;
+ *R(VIRTIO_MMIO_STATUS) = status;
+
+ // tell device we're completely ready.
+ status |= VIRTIO_CONFIG_S_DRIVER_OK;
+ *R(VIRTIO_MMIO_STATUS) = status;
+
+ *R(VIRTIO_MMIO_GUEST_PAGE_SIZE) = PGSIZE;
+
+ // initialize queue 0.
+ *R(VIRTIO_MMIO_QUEUE_SEL) = 0;
+ uint32 max = *R(VIRTIO_MMIO_QUEUE_NUM_MAX);
+ if(max == 0)
+ panic("virtio disk has no queue 0");
+ if(max < NUM)
+ panic("virtio disk max queue too short");
+ *R(VIRTIO_MMIO_QUEUE_NUM) = NUM;
+ memset(disk.pages, 0, sizeof(disk.pages));
+ *R(VIRTIO_MMIO_QUEUE_PFN) = ((uint64)disk.pages) >> PGSHIFT;
+
+ // desc = pages -- num * VRingDesc
+ // avail = pages + 0x40 -- 2 * uint16, then num * uint16
+ // used = pages + 4096 -- 2 * uint16, then num * vRingUsedElem
+
+ disk.desc = (struct VRingDesc *) disk.pages;
+ disk.avail = (uint16*)(((char*)disk.desc) + NUM*sizeof(struct VRingDesc));
+ disk.used = (struct UsedArea *) (disk.pages + PGSIZE);
+
+ for(int i = 0; i < NUM; i++)
+ disk.free[i] = 1;
+
+ // plic.c and trap.c arrange for interrupts from VIRTIO0_IRQ.
+}
+
+// find a free descriptor, mark it non-free, return its index.
+static int
+alloc_desc()
+{
+ for(int i = 0; i < NUM; i++){
+ if(disk.free[i]){
+ disk.free[i] = 0;
+ return i;
+ }
+ }
+ return -1;
+}
+
+// mark a descriptor as free.
+static void
+free_desc(int i)
+{
+ if(i >= NUM)
+ panic("virtio_disk_intr 1");
+ if(disk.free[i])
+ panic("virtio_disk_intr 2");
+ disk.desc[i].addr = 0;
+ disk.free[i] = 1;
+ wakeup(&disk.free[0]);
+}
+
+// free a chain of descriptors.
+static void
+free_chain(int i)
+{
+ while(1){
+ free_desc(i);
+ if(disk.desc[i].flags & VRING_DESC_F_NEXT)
+ i = disk.desc[i].next;
+ else
+ break;
+ }
+}
+
+static int
+alloc3_desc(int *idx)
+{
+ for(int i = 0; i < 3; i++){
+ idx[i] = alloc_desc();
+ if(idx[i] < 0){
+ for(int j = 0; j < i; j++)
+ free_desc(idx[j]);
+ return -1;
+ }
+ }
+ return 0;
+}
+
+void
+virtio_disk_rw(struct buf *b, int write)
+{
+ uint64 sector = b->blockno * (BSIZE / 512);
+
+ acquire(&disk.vdisk_lock);
+
+ // the spec says that legacy block operations use three
+ // descriptors: one for type/reserved/sector, one for
+ // the data, one for a 1-byte status result.
+
+ // allocate the three descriptors.
+ int idx[3];
+ while(1){
+ if(alloc3_desc(idx) == 0) {
+ break;
+ }
+ sleep(&disk.free[0], &disk.vdisk_lock);
+ }
+
+ // format the three descriptors.
+ // qemu's virtio-blk.c reads them.
+
+ struct virtio_blk_outhdr {
+ uint32 type;
+ uint32 reserved;
+ uint64 sector;
+ } buf0;
+
+ if(write)
+ buf0.type = VIRTIO_BLK_T_OUT; // write the disk
+ else
+ buf0.type = VIRTIO_BLK_T_IN; // read the disk
+ buf0.reserved = 0;
+ buf0.sector = sector;
+
+ // buf0 is on a kernel stack, which is not direct mapped,
+ // thus the call to kvmpa().
+ disk.desc[idx[0]].addr = (uint64) kvmpa((uint64) &buf0);
+ disk.desc[idx[0]].len = sizeof(buf0);
+ disk.desc[idx[0]].flags = VRING_DESC_F_NEXT;
+ disk.desc[idx[0]].next = idx[1];
+
+ disk.desc[idx[1]].addr = (uint64) b->data;
+ disk.desc[idx[1]].len = BSIZE;
+ if(write)
+ disk.desc[idx[1]].flags = 0; // device reads b->data
+ else
+ disk.desc[idx[1]].flags = VRING_DESC_F_WRITE; // device writes b->data
+ disk.desc[idx[1]].flags |= VRING_DESC_F_NEXT;
+ disk.desc[idx[1]].next = idx[2];
+
+ disk.info[idx[0]].status = 0;
+ disk.desc[idx[2]].addr = (uint64) &disk.info[idx[0]].status;
+ disk.desc[idx[2]].len = 1;
+ disk.desc[idx[2]].flags = VRING_DESC_F_WRITE; // device writes the status
+ disk.desc[idx[2]].next = 0;
+
+ // record struct buf for virtio_disk_intr().
+ b->disk = 1;
+ disk.info[idx[0]].b = b;
+
+ // avail[0] is flags
+ // avail[1] tells the device how far to look in avail[2...].
+ // avail[2...] are desc[] indices the device should process.
+ // we only tell device the first index in our chain of descriptors.
+ disk.avail[2 + (disk.avail[1] % NUM)] = idx[0];
+ __sync_synchronize();
+ disk.avail[1] = disk.avail[1] + 1;
+
+ *R(VIRTIO_MMIO_QUEUE_NOTIFY) = 0; // value is queue number
+
+ // Wait for virtio_disk_intr() to say request has finished.
+ while(b->disk == 1) {
+ sleep(b, &disk.vdisk_lock);
+ }
+
+ disk.info[idx[0]].b = 0;
+ free_chain(idx[0]);
+
+ release(&disk.vdisk_lock);
+}
+
+void
+virtio_disk_intr()
+{
+ acquire(&disk.vdisk_lock);
+
+ while((disk.used_idx % NUM) != (disk.used->id % NUM)){
+ int id = disk.used->elems[disk.used_idx].id;
+
+ if(disk.info[id].status != 0)
+ panic("virtio_disk_intr status");
+
+ disk.info[id].b->disk = 0; // disk is done with buf
+ wakeup(disk.info[id].b);
+
+ disk.used_idx = (disk.used_idx + 1) % NUM;
+ }
+
+ release(&disk.vdisk_lock);
+}