-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvirtio_disk.c
315 lines (257 loc) · 8.51 KB
/
virtio_disk.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
//
// driver for qemu's virtio disk device.
// uses qemu's mmio interface to virtio.
// qemu presents a "legacy" virtio interface.
//
// qemu ... -drive file=fs.img,if=none,format=raw,id=x0 -device virtio-blk-device,drive=x0,bus=virtio-mmio-bus.0
//
#include "types.h"
#include "riscv.h"
#include "defs.h"
#include "param.h"
#include "memlayout.h"
#include "spinlock.h"
#include "sleeplock.h"
#include "fs.h"
#include "buf.h"
#include "virtio.h"
// the address of virtio mmio register r.
#define R(r) ((volatile uint32 *)(VIRTIO0 + (r)))
static struct disk {
// the virtio driver and device mostly communicate through a set of
// structures in RAM. pages[] allocates that memory. pages[] is a
// global (instead of calls to kalloc()) because it must consist of
// two contiguous pages of page-aligned physical memory.
char pages[2*PGSIZE];
// pages[] is divided into three regions (descriptors, avail, and
// used), as explained in Section 2.6 of the virtio specification
// for the legacy interface.
// https://docs.oasis-open.org/virtio/virtio/v1.1/virtio-v1.1.pdf
// the first region of pages[] is a set (not a ring) of DMA
// descriptors, with which the driver tells the device where to read
// and write individual disk operations. there are NUM descriptors.
// most commands consist of a "chain" (a linked list) of a couple of
// these descriptors.
// points into pages[].
struct virtq_desc *desc;
// next is a ring in which the driver writes descriptor numbers
// that the driver would like the device to process. it only
// includes the head descriptor of each chain. the ring has
// NUM elements.
// points into pages[].
struct virtq_avail *avail;
// finally a ring in which the device writes descriptor numbers that
// the device has finished processing (just the head of each chain).
// there are NUM used ring entries.
// points into pages[].
struct virtq_used *used;
// our own book-keeping.
char free[NUM]; // is a descriptor free?
uint16 used_idx; // we've looked this far in used[2..NUM].
// track info about in-flight operations,
// for use when completion interrupt arrives.
// indexed by first descriptor index of chain.
struct {
struct buf *b;
char status;
} info[NUM];
// disk command headers.
// one-for-one with descriptors, for convenience.
struct virtio_blk_req ops[NUM];
struct spinlock vdisk_lock;
} __attribute__ ((aligned (PGSIZE))) disk;
void
virtio_disk_init(void)
{
uint32 status = 0;
initlock(&disk.vdisk_lock, "virtio_disk");
if(*R(VIRTIO_MMIO_MAGIC_VALUE) != 0x74726976 ||
*R(VIRTIO_MMIO_VERSION) != 1 ||
*R(VIRTIO_MMIO_DEVICE_ID) != 2 ||
*R(VIRTIO_MMIO_VENDOR_ID) != 0x554d4551){
panic("could not find virtio disk");
}
status |= VIRTIO_CONFIG_S_ACKNOWLEDGE;
*R(VIRTIO_MMIO_STATUS) = status;
status |= VIRTIO_CONFIG_S_DRIVER;
*R(VIRTIO_MMIO_STATUS) = status;
// negotiate features
uint64 features = *R(VIRTIO_MMIO_DEVICE_FEATURES);
features &= ~(1 << VIRTIO_BLK_F_RO);
features &= ~(1 << VIRTIO_BLK_F_SCSI);
features &= ~(1 << VIRTIO_BLK_F_CONFIG_WCE);
features &= ~(1 << VIRTIO_BLK_F_MQ);
features &= ~(1 << VIRTIO_F_ANY_LAYOUT);
features &= ~(1 << VIRTIO_RING_F_EVENT_IDX);
features &= ~(1 << VIRTIO_RING_F_INDIRECT_DESC);
*R(VIRTIO_MMIO_DRIVER_FEATURES) = features;
// tell device that feature negotiation is complete.
status |= VIRTIO_CONFIG_S_FEATURES_OK;
*R(VIRTIO_MMIO_STATUS) = status;
// tell device we're completely ready.
status |= VIRTIO_CONFIG_S_DRIVER_OK;
*R(VIRTIO_MMIO_STATUS) = status;
*R(VIRTIO_MMIO_GUEST_PAGE_SIZE) = PGSIZE;
// initialize queue 0.
*R(VIRTIO_MMIO_QUEUE_SEL) = 0;
uint32 max = *R(VIRTIO_MMIO_QUEUE_NUM_MAX);
if(max == 0)
panic("virtio disk has no queue 0");
if(max < NUM)
panic("virtio disk max queue too short");
*R(VIRTIO_MMIO_QUEUE_NUM) = NUM;
memset(disk.pages, 0, sizeof(disk.pages));
*R(VIRTIO_MMIO_QUEUE_PFN) = ((uint64)disk.pages) >> PGSHIFT;
// desc = pages -- num * virtq_desc
// avail = pages + 0x40 -- 2 * uint16, then num * uint16
// used = pages + 4096 -- 2 * uint16, then num * vRingUsedElem
disk.desc = (struct virtq_desc *) disk.pages;
disk.avail = (struct virtq_avail *)(disk.pages + NUM*sizeof(struct virtq_desc));
disk.used = (struct virtq_used *) (disk.pages + PGSIZE);
// all NUM descriptors start out unused.
for(int i = 0; i < NUM; i++)
disk.free[i] = 1;
// plic.c and trap.c arrange for interrupts from VIRTIO0_IRQ.
}
// find a free descriptor, mark it non-free, return its index.
static int
alloc_desc()
{
for(int i = 0; i < NUM; i++){
if(disk.free[i]){
disk.free[i] = 0;
return i;
}
}
return -1;
}
// mark a descriptor as free.
static void
free_desc(int i)
{
if(i >= NUM)
panic("free_desc 1");
if(disk.free[i])
panic("free_desc 2");
disk.desc[i].addr = 0;
disk.desc[i].len = 0;
disk.desc[i].flags = 0;
disk.desc[i].next = 0;
disk.free[i] = 1;
wakeup(&disk.free[0]);
}
// free a chain of descriptors.
static void
free_chain(int i)
{
while(1){
int flag = disk.desc[i].flags;
int nxt = disk.desc[i].next;
free_desc(i);
if(flag & VRING_DESC_F_NEXT)
i = nxt;
else
break;
}
}
// allocate three descriptors (they need not be contiguous).
// disk transfers always use three descriptors.
static int
alloc3_desc(int *idx)
{
for(int i = 0; i < 3; i++){
idx[i] = alloc_desc();
if(idx[i] < 0){
for(int j = 0; j < i; j++)
free_desc(idx[j]);
return -1;
}
}
return 0;
}
void
virtio_disk_rw(struct buf *b, int write)
{
uint64 sector = b->blockno * (BSIZE / 512);
acquire(&disk.vdisk_lock);
// the spec's Section 5.2 says that legacy block operations use
// three descriptors: one for type/reserved/sector, one for the
// data, one for a 1-byte status result.
// allocate the three descriptors.
int idx[3];
while(1){
if(alloc3_desc(idx) == 0) {
break;
}
sleep(&disk.free[0], &disk.vdisk_lock);
}
// format the three descriptors.
// qemu's virtio-blk.c reads them.
struct virtio_blk_req *buf0 = &disk.ops[idx[0]];
if(write)
buf0->type = VIRTIO_BLK_T_OUT; // write the disk
else
buf0->type = VIRTIO_BLK_T_IN; // read the disk
buf0->reserved = 0;
buf0->sector = sector;
disk.desc[idx[0]].addr = (uint64) buf0;
disk.desc[idx[0]].len = sizeof(struct virtio_blk_req);
disk.desc[idx[0]].flags = VRING_DESC_F_NEXT;
disk.desc[idx[0]].next = idx[1];
disk.desc[idx[1]].addr = (uint64) b->data;
disk.desc[idx[1]].len = BSIZE;
if(write)
disk.desc[idx[1]].flags = 0; // device reads b->data
else
disk.desc[idx[1]].flags = VRING_DESC_F_WRITE; // device writes b->data
disk.desc[idx[1]].flags |= VRING_DESC_F_NEXT;
disk.desc[idx[1]].next = idx[2];
disk.info[idx[0]].status = 0xff; // device writes 0 on success
disk.desc[idx[2]].addr = (uint64) &disk.info[idx[0]].status;
disk.desc[idx[2]].len = 1;
disk.desc[idx[2]].flags = VRING_DESC_F_WRITE; // device writes the status
disk.desc[idx[2]].next = 0;
// record struct buf for virtio_disk_intr().
b->disk = 1;
disk.info[idx[0]].b = b;
// tell the device the first index in our chain of descriptors.
disk.avail->ring[disk.avail->idx % NUM] = idx[0];
__sync_synchronize();
// tell the device another avail ring entry is available.
disk.avail->idx += 1; // not % NUM ...
__sync_synchronize();
*R(VIRTIO_MMIO_QUEUE_NOTIFY) = 0; // value is queue number
// Wait for virtio_disk_intr() to say request has finished.
while(b->disk == 1) {
sleep(b, &disk.vdisk_lock);
}
disk.info[idx[0]].b = 0;
free_chain(idx[0]);
release(&disk.vdisk_lock);
}
void
virtio_disk_intr()
{
acquire(&disk.vdisk_lock);
// the device won't raise another interrupt until we tell it
// we've seen this interrupt, which the following line does.
// this may race with the device writing new entries to
// the "used" ring, in which case we may process the new
// completion entries in this interrupt, and have nothing to do
// in the next interrupt, which is harmless.
*R(VIRTIO_MMIO_INTERRUPT_ACK) = *R(VIRTIO_MMIO_INTERRUPT_STATUS) & 0x3;
__sync_synchronize();
// the device increments disk.used->idx when it
// adds an entry to the used ring.
while(disk.used_idx != disk.used->idx){
__sync_synchronize();
int id = disk.used->ring[disk.used_idx % NUM].id;
if(disk.info[id].status != 0)
panic("virtio_disk_intr status");
struct buf *b = disk.info[id].b;
b->disk = 0; // disk is done with buf
wakeup(b);
disk.used_idx += 1;
}
release(&disk.vdisk_lock);
}