summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorRobert Morris <[email protected]>2019-06-13 06:49:02 -0400
committerRobert Morris <[email protected]>2019-06-13 06:49:02 -0400
commitde9d72c9086ec935d5b2b889f50ff611135f80fa (patch)
tree071952e383057de705c59f9e7894f3154e4bbad2 /kernel
parent5753553213df8f9de851adb68377db43faecb91f (diff)
downloadxv6-labs-de9d72c9086ec935d5b2b889f50ff611135f80fa.tar.gz
xv6-labs-de9d72c9086ec935d5b2b889f50ff611135f80fa.tar.bz2
xv6-labs-de9d72c9086ec935d5b2b889f50ff611135f80fa.zip
virtio disk driver
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bio.c6
-rw-r--r--kernel/defs.h5
-rw-r--r--kernel/kalloc.c1
-rw-r--r--kernel/kernel.ld1
-rw-r--r--kernel/main.c3
-rw-r--r--kernel/memlayout.h6
-rw-r--r--kernel/plic.c7
-rw-r--r--kernel/trap.c2
-rw-r--r--kernel/virtio.h59
-rw-r--r--kernel/virtio_disk.c268
-rw-r--r--kernel/vm.c4
11 files changed, 354 insertions, 8 deletions
diff --git a/kernel/bio.c b/kernel/bio.c
index 90f9af9..07ea030 100644
--- a/kernel/bio.c
+++ b/kernel/bio.c
@@ -101,7 +101,8 @@ bread(uint dev, uint blockno)
b = bget(dev, blockno);
if((b->flags & B_VALID) == 0) {
- ramdiskrw(b);
+ //ramdiskrw(b);
+ virtio_disk_rw(b);
}
return b;
}
@@ -113,7 +114,8 @@ bwrite(struct buf *b)
if(!holdingsleep(&b->lock))
panic("bwrite");
b->flags |= B_DIRTY;
- ramdiskrw(b);
+ //ramdiskrw(b);
+ virtio_disk_rw(b);
}
// Release a locked buffer.
diff --git a/kernel/defs.h b/kernel/defs.h
index 597e5b6..1b397fe 100644
--- a/kernel/defs.h
+++ b/kernel/defs.h
@@ -201,5 +201,10 @@ uint64 plic_pending(void);
int plic_claim(void);
void plic_complete(int);
+// virtio_disk.c
+void virtio_disk_init(void);
+void virtio_disk_rw(struct buf *);
+void virtio_disk_intr();
+
// number of elements in fixed-size array
#define NELEM(x) (sizeof(x)/sizeof((x)[0]))
diff --git a/kernel/kalloc.c b/kernel/kalloc.c
index 1ed1c49..afadb02 100644
--- a/kernel/kalloc.c
+++ b/kernel/kalloc.c
@@ -35,6 +35,7 @@ freerange(void *pa_start, void *pa_end)
{
char *p;
p = (char*)PGROUNDUP((uint64)pa_start);
+ p += 4096; // XXX I can't get kernel.ld to place end beyond the last bss symbol.
for(; p + PGSIZE <= (char*)pa_end; p += PGSIZE)
kfree(p);
}
diff --git a/kernel/kernel.ld b/kernel/kernel.ld
index 53c9b90..dec8e4f 100644
--- a/kernel/kernel.ld
+++ b/kernel/kernel.ld
@@ -28,4 +28,5 @@ SECTIONS
*(.bss)
PROVIDE(end = .);
}
+
}
diff --git a/kernel/main.c b/kernel/main.c
index 2168b9f..d44c82c 100644
--- a/kernel/main.c
+++ b/kernel/main.c
@@ -26,7 +26,8 @@ main()
plicinithart(); // ask PLIC for device interrupts
binit(); // buffer cache
fileinit(); // file table
- ramdiskinit(); // disk
+ virtio_disk_init(); // emulated hard disk
+ ramdiskinit(); // in-memory disk
userinit(); // first user process
started = 1;
} else {
diff --git a/kernel/memlayout.h b/kernel/memlayout.h
index 462986c..6d86166 100644
--- a/kernel/memlayout.h
+++ b/kernel/memlayout.h
@@ -6,7 +6,8 @@
// 00001000 -- boot ROM, provided by qemu
// 02000000 -- CLINT
// 0C000000 -- PLIC
-// 10000000 -- uart0 registers
+// 10000000 -- uart0
+// 10001000 -- virtio disk
// 80000000 -- boot ROM jumps here in machine mode
// -kernel loads the kernel here
// 88000000 -- -initrd fs.img ramdisk image.
@@ -21,6 +22,9 @@
#define UART0 0x10000000L
#define UART0_IRQ 10
+#define VIRTIO 0x10001000
+#define VIRTIO_IRQ 1 // really the first of 8 units
+
// local interrupt controller, which contains the timer.
#define CLINT 0x2000000L
#define CLINT_MTIMECMP(hartid) (CLINT + 0x4000 + 8*(hartid))
diff --git a/kernel/plic.c b/kernel/plic.c
index 0f19ab0..cc9a97e 100644
--- a/kernel/plic.c
+++ b/kernel/plic.c
@@ -11,8 +11,9 @@
void
plicinit(void)
{
- // set uart's priority to be non-zero (otherwise disabled).
+ // set desired IRQ priorities non-zero (otherwise disabled).
*(uint32*)(PLIC + UART0_IRQ*4) = 1;
+ *(uint32*)(PLIC + VIRTIO_IRQ*4) = 1;
}
void
@@ -21,11 +22,9 @@ plicinithart(void)
int hart = cpuid();
// set uart's enable bit for this hart's S-mode.
- //*(uint32*)(PLIC + 0x2080)= (1 << UART0_IRQ);
- *(uint32*)PLIC_SENABLE(hart)= (1 << UART0_IRQ);
+ *(uint32*)PLIC_SENABLE(hart)= (1 << UART0_IRQ) | (1 << VIRTIO_IRQ);
// set this hart's S-mode priority threshold to 0.
- //*(uint32*)(PLIC + 0x201000) = 0;
*(uint32*)PLIC_SPRIORITY(hart) = 0;
}
diff --git a/kernel/trap.c b/kernel/trap.c
index 050a94d..13ad362 100644
--- a/kernel/trap.c
+++ b/kernel/trap.c
@@ -159,6 +159,8 @@ devintr()
if(irq == UART0_IRQ){
uartintr();
+ } else if(irq == VIRTIO_IRQ){
+ virtio_disk_intr();
}
plic_complete(irq);
diff --git a/kernel/virtio.h b/kernel/virtio.h
new file mode 100644
index 0000000..258d107
--- /dev/null
+++ b/kernel/virtio.h
@@ -0,0 +1,59 @@
+//
+// virtio device definitions.
+// for both the mmio interface, and virtio descriptors.
+// only tested with qemu.
+// this is the "legacy" virtio interface.
+//
+
+// virtio mmio control registers, mapped starting at 0x10001000.
+// from qemu virtio_mmio.h
+#define VIRTIO_MMIO_MAGIC_VALUE 0x000 // 0x74726976
+#define VIRTIO_MMIO_VERSION 0x004 // 1 -- version, 1 is legacy
+#define VIRTIO_MMIO_DEVICE_ID 0x008 // 2 -- block device type
+#define VIRTIO_MMIO_VENDOR_ID 0x00c // 0x554d4551
+#define VIRTIO_MMIO_DEVICE_FEATURES 0x010
+#define VIRTIO_MMIO_DRIVER_FEATURES 0x020
+#define VIRTIO_MMIO_GUEST_PAGE_SIZE 0x028 // page size for PFN, write-only
+#define VIRTIO_MMIO_QUEUE_SEL 0x030 // select queue, write-only
+#define VIRTIO_MMIO_QUEUE_NUM_MAX 0x034 // max size of current queue, read-only
+#define VIRTIO_MMIO_QUEUE_NUM 0x038 // size of current queue, write-only
+#define VIRTIO_MMIO_QUEUE_ALIGN 0x03c // used ring alignment, write-only
+#define VIRTIO_MMIO_QUEUE_PFN 0x040 // physical page number for queue, read/write
+#define VIRTIO_MMIO_QUEUE_READY 0x044 // ready bit
+#define VIRTIO_MMIO_QUEUE_NOTIFY 0x050 // write-only
+#define VIRTIO_MMIO_INTERRUPT_STATUS 0x060 // read-only
+#define VIRTIO_MMIO_INTERRUPT_ACK 0x064 // write-only
+#define VIRTIO_MMIO_STATUS 0x070 // read/write
+
+// status register bits, from qemu virtio_config.h
+#define VIRTIO_CONFIG_S_ACKNOWLEDGE 1
+#define VIRTIO_CONFIG_S_DRIVER 2
+#define VIRTIO_CONFIG_S_DRIVER_OK 4
+#define VIRTIO_CONFIG_S_FEATURES_OK 8
+
+// device feature bits
+#define VIRTIO_BLK_F_RO 5 /* Disk is read-only */
+#define VIRTIO_BLK_F_SCSI 7 /* Supports scsi command passthru */
+#define VIRTIO_BLK_F_CONFIG_WCE 11 /* Writeback mode available in config */
+#define VIRTIO_BLK_F_MQ 12 /* support more than one vq */
+#define VIRTIO_F_ANY_LAYOUT 27
+#define VIRTIO_RING_F_INDIRECT_DESC 28
+#define VIRTIO_RING_F_EVENT_IDX 29
+
+struct VRingDesc {
+ uint64 addr;
+ uint32 len;
+ uint16 flags;
+ uint16 next;
+};
+#define VRING_DESC_F_NEXT 1
+#define VRING_DESC_F_WRITE 2 // device writes (vs read)
+
+struct VRingUsedElem {
+ uint32 id; // index of start of completed descriptor chain
+ uint32 len;
+};
+
+// for disk ops
+#define VIRTIO_BLK_T_IN 0
+#define VIRTIO_BLK_T_OUT 1
diff --git a/kernel/virtio_disk.c b/kernel/virtio_disk.c
new file mode 100644
index 0000000..558d3b0
--- /dev/null
+++ b/kernel/virtio_disk.c
@@ -0,0 +1,268 @@
+//
+// driver for qemu's virtio disk device.
+// uses qemu's mmio interface to virtio.
+// qemu presents a "legacy" virtio interface.
+//
+// qemu ... -drive file=fs.img,if=none,format=raw,id=x0 -device virtio-blk-device,drive=x0,bus=virtio-mmio-bus.0
+//
+
+#include "types.h"
+#include "riscv.h"
+#include "defs.h"
+#include "param.h"
+#include "memlayout.h"
+#include "spinlock.h"
+#include "sleeplock.h"
+#include "fs.h"
+#include "buf.h"
+#include "virtio.h"
+
+// the address of a virtio mmio register.
+#define R(off) ((volatile uint32 *)(VIRTIO + (off)))
+
+struct spinlock virtio_disk_lock;
+
+// this many virtio descriptors.
+// must be a power of two.
+#define NUM 8
+
+// memory for virtio descriptors &c for queue 0.
+// this is a global instead of allocated because it has
+// to be multiple contiguous pages, which kalloc()
+// doesn't support.
+__attribute__ ((aligned (PGSIZE)))
+static char pages[2*PGSIZE];
+static struct VRingDesc *desc;
+static uint16 *avail;
+static char *used;
+
+// our own book-keeping.
+static char free[NUM]; // is a descriptor free?
+static uint16 used_idx; // we've looked this far in used[2..NUM].
+
+// track info about in-flight operations,
+// for use when completion interrupt arrives.
+// indexed by first descriptor index of chain.
+static struct {
+ struct buf *b;
+} info[NUM];
+
+void
+virtio_disk_init(void)
+{
+ uint32 status = 0;
+
+ initlock(&virtio_disk_lock, "virtio_disk");
+
+ // qemu's virtio-mmio.c
+
+ if(*R(VIRTIO_MMIO_MAGIC_VALUE) != 0x74726976 ||
+ *R(VIRTIO_MMIO_VERSION) != 1 ||
+ *R(VIRTIO_MMIO_DEVICE_ID) != 2 ||
+ *R(VIRTIO_MMIO_VENDOR_ID) != 0x554d4551){
+ panic("could not find virtio disk");
+ }
+
+ status |= VIRTIO_CONFIG_S_ACKNOWLEDGE;
+ *R(VIRTIO_MMIO_STATUS) = status;
+
+ status |= VIRTIO_CONFIG_S_DRIVER;
+ *R(VIRTIO_MMIO_STATUS) = status;
+
+ // negotiate features
+ uint64 features = *R(VIRTIO_MMIO_DEVICE_FEATURES);
+ features &= ~(1 << VIRTIO_BLK_F_RO);
+ features &= ~(1 << VIRTIO_BLK_F_SCSI);
+ features &= ~(1 << VIRTIO_BLK_F_CONFIG_WCE);
+ features &= ~(1 << VIRTIO_BLK_F_MQ);
+ features &= ~(1 << VIRTIO_F_ANY_LAYOUT);
+ features &= ~(1 << VIRTIO_RING_F_EVENT_IDX);
+ features &= ~(1 << VIRTIO_RING_F_INDIRECT_DESC);
+ *R(VIRTIO_MMIO_DRIVER_FEATURES) = features;
+
+ // tell device that feature negotiation is complete.
+ status |= VIRTIO_CONFIG_S_FEATURES_OK;
+ *R(VIRTIO_MMIO_STATUS) = status;
+
+ // tell device we're completely ready.
+ status |= VIRTIO_CONFIG_S_DRIVER_OK;
+ *R(VIRTIO_MMIO_STATUS) = status;
+
+ *R(VIRTIO_MMIO_GUEST_PAGE_SIZE) = PGSIZE;
+
+ // qemu's hw/virtio/virtio.c
+
+ // initialize queue 0
+ *R(VIRTIO_MMIO_QUEUE_SEL) = 0;
+ uint32 max = *R(VIRTIO_MMIO_QUEUE_NUM_MAX);
+ if(max == 0)
+ panic("virtio disk has no queue 0");
+ if(max < NUM)
+ panic("virtio disk max queue too short");
+ *R(VIRTIO_MMIO_QUEUE_NUM) = NUM;
+ memset(pages, 0, sizeof(pages));
+ *R(VIRTIO_MMIO_QUEUE_PFN) = ((uint64)pages) >> PGSHIFT;
+
+ // desc = pages -- num * VRingDesc
+ // avail = pages + 0x40 -- 2 * uint16, then num * uint16
+ // used = pages + 4096 -- 2 * uint16, then num * vRingUsedElem
+
+ desc = (struct VRingDesc *) pages;
+ avail = (uint16*)(((char*)desc) + NUM*sizeof(struct VRingDesc));
+ used = pages + PGSIZE;
+
+ for(int i = 0; i < NUM; i++)
+ free[i] = 1;
+}
+
+// find a free descriptor, mark it non-free, return its index.
+static int
+alloc_desc()
+{
+ for(int i = 0; i < NUM; i++){
+ if(free[i]){
+ free[i] = 0;
+ return i;
+ }
+ }
+ return -1;
+}
+
+void
+free_desc(int i)
+{
+ if(i >= NUM)
+ panic("virtio_disk_intr 1");
+ if(free[i])
+ panic("virtio_disk_intr 2");
+ free[i] = 1;
+}
+
+void
+virtio_disk_rw(struct buf *b)
+{
+ uint64 sector = b->blockno * (BSIZE / 512);
+
+ acquire(&virtio_disk_lock);
+
+ // the spec says that legacy block operations always use three
+ // descriptors: one for type/reserved/sector, one for
+ // the data, one for a 1-byte status result.
+
+ // allocate the three descriptors.
+ int idx[3];
+ while(1){
+ int done = 1;
+ for(int i = 0; i < 3; i++){
+ idx[i] = alloc_desc();
+ if(idx[i] < 0){
+ for(int j = 0; j < i; j++)
+ free_desc(idx[j]);
+ wakeup(&free[0]);
+ done = 0;
+ break;
+ }
+ }
+ if(done)
+ break;
+ sleep(&free[0], &virtio_disk_lock);
+ }
+
+ // format the three descriptors.
+ // qemu's virtio-blk.c reads them.
+
+ struct virtio_blk_outhdr {
+ uint32 type;
+ uint32 reserved;
+ uint64 sector;
+ } buf0;
+
+ if(b->flags & B_DIRTY)
+ buf0.type = VIRTIO_BLK_T_OUT; // write the disk
+ else
+ buf0.type = VIRTIO_BLK_T_IN; // read the disk
+ buf0.reserved = 0;
+ buf0.sector = sector;
+
+ desc[idx[0]].addr = (uint64) &buf0;
+ desc[idx[0]].len = sizeof(buf0);
+ desc[idx[0]].flags = VRING_DESC_F_NEXT;
+ desc[idx[0]].next = idx[1];
+
+ desc[idx[1]].addr = (uint64) b->data;
+ desc[idx[1]].len = BSIZE;
+ if(b->flags & B_DIRTY)
+ desc[idx[1]].flags = 0; // device reads b->data
+ else
+ desc[idx[1]].flags = VRING_DESC_F_WRITE; // device writes b->data
+ desc[idx[1]].flags |= VRING_DESC_F_NEXT;
+ desc[idx[1]].next = idx[2];
+
+ char status = 0;
+ desc[idx[2]].addr = (uint64) &status;
+ desc[idx[2]].len = 1;
+ desc[idx[2]].flags = VRING_DESC_F_WRITE; // device writes the status
+ desc[idx[2]].next = 0;
+
+ // record struct buf for virtio_disk_intr().
+ info[idx[0]].b = b;
+
+ // avail[0] is flags
+ // avail[1] tells the device how far to look in avail[2...].
+ // avail[2...] are desc[] indices the device should process.
+ // we only tell device the first index in our chain of descriptors.
+ avail[2 + (avail[1] % NUM)] = idx[0];
+ __sync_synchronize();
+ avail[1] = avail[1] + 1;
+
+ *R(VIRTIO_MMIO_QUEUE_NOTIFY) = 0; // value is queue number
+
+ // Wait for virtio_disk_intr() to say request has finished.
+ while((b->flags & (B_VALID|B_DIRTY)) != B_VALID){
+ sleep(b, &virtio_disk_lock);
+ }
+
+ release(&virtio_disk_lock);
+}
+
+void
+virtio_disk_intr()
+{
+ // the used area is:
+ // uint16 flags
+ // uint16 idx
+ // array of VRingUsedElem
+
+ // XXX spec says to read INTERRUPT_STATUS and
+ // write INTERRUPT_ACK
+
+ acquire(&virtio_disk_lock);
+
+ while((used_idx % NUM) != (*(volatile uint16 *)(used+2) % NUM)){
+ struct VRingUsedElem *ue = (struct VRingUsedElem *) (used + 4 + 8*used_idx);
+
+ // XXX check the one-byte status in the 3rd descriptor.
+
+ info[ue->id].b->flags |= B_VALID;
+ info[ue->id].b->flags &= ~B_DIRTY;
+
+ wakeup(info[ue->id].b);
+
+ info[ue->id].b = 0;
+
+ uint i = ue->id;
+ while(1){
+ desc[i].addr = 0;
+ free_desc(i);
+ if(desc[i].flags & VRING_DESC_F_NEXT)
+ i = desc[i].next;
+ else
+ break;
+ }
+ wakeup(&free[0]);
+
+ used_idx = (used_idx + 1) % NUM;
+ }
+
+ release(&virtio_disk_lock);
+}
diff --git a/kernel/vm.c b/kernel/vm.c
index 0ea6bca..0d0a9d9 100644
--- a/kernel/vm.c
+++ b/kernel/vm.c
@@ -30,6 +30,10 @@ kvminit()
mappages(kernel_pagetable, UART0, PGSIZE,
UART0, PTE_R | PTE_W);
+ // virtio disk interface
+ mappages(kernel_pagetable, VIRTIO, PGSIZE,
+ VIRTIO, PTE_R | PTE_W);
+
// CLINT
mappages(kernel_pagetable, CLINT, 0x10000,
CLINT, PTE_R | PTE_W);