blob: c64043323399974d389e0cc9788fa5d7d77bcc80 [file] [log] [blame]
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001/*
2 * blkfront.c
3 *
4 * XenLinux virtual block device driver.
5 *
6 * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
7 * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
8 * Copyright (c) 2004, Christian Limpach
9 * Copyright (c) 2004, Andrew Warfield
10 * Copyright (c) 2005, Christopher Clark
11 * Copyright (c) 2005, XenSource Ltd
12 *
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License version 2
15 * as published by the Free Software Foundation; or, when distributed
16 * separately from the Linux kernel or incorporated into other
17 * software packages, subject to the following license:
18 *
19 * Permission is hereby granted, free of charge, to any person obtaining a copy
20 * of this source file (the "Software"), to deal in the Software without
21 * restriction, including without limitation the rights to use, copy, modify,
22 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
23 * and to permit persons to whom the Software is furnished to do so, subject to
24 * the following conditions:
25 *
26 * The above copyright notice and this permission notice shall be included in
27 * all copies or substantial portions of the Software.
28 *
29 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
31 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
32 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
33 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
34 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
35 * IN THE SOFTWARE.
36 */
37
38#include <linux/interrupt.h>
39#include <linux/blkdev.h>
Ian Campbell597592d2008-02-21 13:03:45 -080040#include <linux/hdreg.h>
Christian Limpach440a01a2008-06-17 10:47:08 +020041#include <linux/cdrom.h>
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -070042#include <linux/module.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090043#include <linux/slab.h>
Arnd Bergmann2a48fc02010-06-02 14:28:52 +020044#include <linux/mutex.h>
Jens Axboe9e973e62009-02-24 08:10:09 +010045#include <linux/scatterlist.h>
Akinobu Mita34ae2e42012-01-21 00:15:26 +090046#include <linux/bitmap.h>
Roger Pau Monne155b7ed2013-03-18 17:49:34 +010047#include <linux/list.h>
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -070048
Jeremy Fitzhardinge1ccbf532009-10-06 15:11:14 -070049#include <xen/xen.h>
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -070050#include <xen/xenbus.h>
51#include <xen/grant_table.h>
52#include <xen/events.h>
53#include <xen/page.h>
Stefano Stabellinic1c54132010-05-14 12:44:30 +010054#include <xen/platform_pci.h>
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -070055
56#include <xen/interface/grant_table.h>
57#include <xen/interface/io/blkif.h>
Markus Armbruster3e334232008-04-02 10:54:02 -070058#include <xen/interface/io/protocols.h>
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -070059
60#include <asm/xen/hypervisor.h>
61
62enum blkif_state {
63 BLKIF_STATE_DISCONNECTED,
64 BLKIF_STATE_CONNECTED,
65 BLKIF_STATE_SUSPENDED,
66};
67
Roger Pau Monne0a8704a2012-10-24 18:58:45 +020068struct grant {
69 grant_ref_t gref;
70 unsigned long pfn;
Roger Pau Monne155b7ed2013-03-18 17:49:34 +010071 struct list_head node;
Roger Pau Monne0a8704a2012-10-24 18:58:45 +020072};
73
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -070074struct blk_shadow {
75 struct blkif_request req;
Jeremy Fitzhardingea945b982010-11-01 17:03:14 -040076 struct request *request;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -070077 unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
Roger Pau Monne0a8704a2012-10-24 18:58:45 +020078 struct grant *grants_used[BLKIF_MAX_SEGMENTS_PER_REQUEST];
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -070079};
80
Arnd Bergmann2a48fc02010-06-02 14:28:52 +020081static DEFINE_MUTEX(blkfront_mutex);
Alexey Dobriyan83d5cde2009-09-21 17:01:13 -070082static const struct block_device_operations xlvbd_block_fops;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -070083
Jeremy Fitzhardinge667c78af2010-12-08 12:39:12 -080084#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -070085
86/*
87 * We have one of these per vbd, whether ide, scsi or 'other'. They
88 * hang in private_data off the gendisk structure. We may end up
89 * putting all kinds of interesting stuff here :-)
90 */
91struct blkfront_info
92{
Steven Noonan34678112012-02-17 12:04:44 -080093 spinlock_t io_lock;
Daniel Stoddenb70f5fa2010-04-30 22:01:19 +000094 struct mutex mutex;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -070095 struct xenbus_device *xbdev;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -070096 struct gendisk *gd;
97 int vdevice;
98 blkif_vdev_t handle;
99 enum blkif_state connected;
100 int ring_ref;
101 struct blkif_front_ring ring;
Jens Axboe9e973e62009-02-24 08:10:09 +0100102 struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700103 unsigned int evtchn, irq;
104 struct request_queue *rq;
105 struct work_struct work;
106 struct gnttab_free_callback callback;
107 struct blk_shadow shadow[BLK_RING_SIZE];
Roger Pau Monne155b7ed2013-03-18 17:49:34 +0100108 struct list_head persistent_gnts;
Roger Pau Monne0a8704a2012-10-24 18:58:45 +0200109 unsigned int persistent_gnts_c;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700110 unsigned long shadow_free;
Tejun Heo4913efe2010-09-03 11:56:16 +0200111 unsigned int feature_flush;
Konrad Rzeszutek Wilkedf6ef52011-05-03 12:01:11 -0400112 unsigned int flush_op;
Konrad Rzeszutek Wilk5ea42982011-10-12 16:23:30 -0400113 unsigned int feature_discard:1;
114 unsigned int feature_secdiscard:1;
Li Dongyanged30bf32011-09-01 18:39:09 +0800115 unsigned int discard_granularity;
116 unsigned int discard_alignment;
Roger Pau Monne0a8704a2012-10-24 18:58:45 +0200117 unsigned int feature_persistent:1;
Christian Limpach1d78d702008-04-02 10:54:04 -0700118 int is_ready;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700119};
120
Jan Beulich0e345822010-08-07 18:28:55 +0200121static unsigned int nr_minors;
122static unsigned long *minors;
123static DEFINE_SPINLOCK(minor_lock);
124
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700125#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
126 (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
127#define GRANT_INVALID_REF 0
128
129#define PARTS_PER_DISK 16
Chris Lalancette9246b5f2008-09-17 14:30:32 -0700130#define PARTS_PER_EXT_DISK 256
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700131
132#define BLKIF_MAJOR(dev) ((dev)>>8)
133#define BLKIF_MINOR(dev) ((dev) & 0xff)
134
Chris Lalancette9246b5f2008-09-17 14:30:32 -0700135#define EXT_SHIFT 28
136#define EXTENDED (1<<EXT_SHIFT)
137#define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED))
138#define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED))
Stefano Stabellinic80a4202010-12-02 17:55:00 +0000139#define EMULATED_HD_DISK_MINOR_OFFSET (0)
140#define EMULATED_HD_DISK_NAME_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET / 256)
Stefan Bader196cfe22011-07-14 15:30:22 +0200141#define EMULATED_SD_DISK_MINOR_OFFSET (0)
142#define EMULATED_SD_DISK_NAME_OFFSET (EMULATED_SD_DISK_MINOR_OFFSET / 256)
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700143
Chris Lalancette9246b5f2008-09-17 14:30:32 -0700144#define DEV_NAME "xvd" /* name in /dev */
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700145
146static int get_id_from_freelist(struct blkfront_info *info)
147{
148 unsigned long free = info->shadow_free;
Roel Kluinb9ed7252009-05-22 09:25:32 +0200149 BUG_ON(free >= BLK_RING_SIZE);
Konrad Rzeszutek Wilk97e36832011-10-12 12:12:36 -0400150 info->shadow_free = info->shadow[free].req.u.rw.id;
151 info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700152 return free;
153}
154
Konrad Rzeszutek Wilk6878c322012-05-25 17:34:51 -0400155static int add_id_to_freelist(struct blkfront_info *info,
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700156 unsigned long id)
157{
Konrad Rzeszutek Wilk6878c322012-05-25 17:34:51 -0400158 if (info->shadow[id].req.u.rw.id != id)
159 return -EINVAL;
160 if (info->shadow[id].request == NULL)
161 return -EINVAL;
Konrad Rzeszutek Wilk97e36832011-10-12 12:12:36 -0400162 info->shadow[id].req.u.rw.id = info->shadow_free;
Jeremy Fitzhardingea945b982010-11-01 17:03:14 -0400163 info->shadow[id].request = NULL;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700164 info->shadow_free = id;
Konrad Rzeszutek Wilk6878c322012-05-25 17:34:51 -0400165 return 0;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700166}
167
Roger Pau Monne9c1e0502013-03-18 17:49:35 +0100168static int fill_grant_buffer(struct blkfront_info *info, int num)
169{
170 struct page *granted_page;
171 struct grant *gnt_list_entry, *n;
172 int i = 0;
173
174 while(i < num) {
175 gnt_list_entry = kzalloc(sizeof(struct grant), GFP_NOIO);
176 if (!gnt_list_entry)
177 goto out_of_memory;
178
179 granted_page = alloc_page(GFP_NOIO);
180 if (!granted_page) {
181 kfree(gnt_list_entry);
182 goto out_of_memory;
183 }
184
185 gnt_list_entry->pfn = page_to_pfn(granted_page);
186 gnt_list_entry->gref = GRANT_INVALID_REF;
187 list_add(&gnt_list_entry->node, &info->persistent_gnts);
188 i++;
189 }
190
191 return 0;
192
193out_of_memory:
194 list_for_each_entry_safe(gnt_list_entry, n,
195 &info->persistent_gnts, node) {
196 list_del(&gnt_list_entry->node);
197 __free_page(pfn_to_page(gnt_list_entry->pfn));
198 kfree(gnt_list_entry);
199 i--;
200 }
201 BUG_ON(i != 0);
202 return -ENOMEM;
203}
204
205static struct grant *get_grant(grant_ref_t *gref_head,
206 struct blkfront_info *info)
207{
208 struct grant *gnt_list_entry;
209 unsigned long buffer_mfn;
210
211 BUG_ON(list_empty(&info->persistent_gnts));
212 gnt_list_entry = list_first_entry(&info->persistent_gnts, struct grant,
213 node);
214 list_del(&gnt_list_entry->node);
215
216 if (gnt_list_entry->gref != GRANT_INVALID_REF) {
217 info->persistent_gnts_c--;
218 return gnt_list_entry;
219 }
220
221 /* Assign a gref to this page */
222 gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head);
223 BUG_ON(gnt_list_entry->gref == -ENOSPC);
224 buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn);
225 gnttab_grant_foreign_access_ref(gnt_list_entry->gref,
226 info->xbdev->otherend_id,
227 buffer_mfn, 0);
228 return gnt_list_entry;
229}
230
Konrad Rzeszutek Wilk6878c322012-05-25 17:34:51 -0400231static const char *op_name(int op)
232{
233 static const char *const names[] = {
234 [BLKIF_OP_READ] = "read",
235 [BLKIF_OP_WRITE] = "write",
236 [BLKIF_OP_WRITE_BARRIER] = "barrier",
237 [BLKIF_OP_FLUSH_DISKCACHE] = "flush",
238 [BLKIF_OP_DISCARD] = "discard" };
239
240 if (op < 0 || op >= ARRAY_SIZE(names))
241 return "unknown";
242
243 if (!names[op])
244 return "reserved";
245
246 return names[op];
247}
Jan Beulich0e345822010-08-07 18:28:55 +0200248static int xlbd_reserve_minors(unsigned int minor, unsigned int nr)
249{
250 unsigned int end = minor + nr;
251 int rc;
252
253 if (end > nr_minors) {
254 unsigned long *bitmap, *old;
255
Thomas Meyerf0941482011-11-29 22:08:00 +0100256 bitmap = kcalloc(BITS_TO_LONGS(end), sizeof(*bitmap),
Jan Beulich0e345822010-08-07 18:28:55 +0200257 GFP_KERNEL);
258 if (bitmap == NULL)
259 return -ENOMEM;
260
261 spin_lock(&minor_lock);
262 if (end > nr_minors) {
263 old = minors;
264 memcpy(bitmap, minors,
265 BITS_TO_LONGS(nr_minors) * sizeof(*bitmap));
266 minors = bitmap;
267 nr_minors = BITS_TO_LONGS(end) * BITS_PER_LONG;
268 } else
269 old = bitmap;
270 spin_unlock(&minor_lock);
271 kfree(old);
272 }
273
274 spin_lock(&minor_lock);
275 if (find_next_bit(minors, end, minor) >= end) {
Akinobu Mita34ae2e42012-01-21 00:15:26 +0900276 bitmap_set(minors, minor, nr);
Jan Beulich0e345822010-08-07 18:28:55 +0200277 rc = 0;
278 } else
279 rc = -EBUSY;
280 spin_unlock(&minor_lock);
281
282 return rc;
283}
284
285static void xlbd_release_minors(unsigned int minor, unsigned int nr)
286{
287 unsigned int end = minor + nr;
288
289 BUG_ON(end > nr_minors);
290 spin_lock(&minor_lock);
Akinobu Mita34ae2e42012-01-21 00:15:26 +0900291 bitmap_clear(minors, minor, nr);
Jan Beulich0e345822010-08-07 18:28:55 +0200292 spin_unlock(&minor_lock);
293}
294
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700295static void blkif_restart_queue_callback(void *arg)
296{
297 struct blkfront_info *info = (struct blkfront_info *)arg;
298 schedule_work(&info->work);
299}
300
Harvey Harrisonafe42d72008-04-29 00:59:47 -0700301static int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg)
Ian Campbell597592d2008-02-21 13:03:45 -0800302{
303 /* We don't have real geometry info, but let's at least return
304 values consistent with the size of the device */
305 sector_t nsect = get_capacity(bd->bd_disk);
306 sector_t cylinders = nsect;
307
308 hg->heads = 0xff;
309 hg->sectors = 0x3f;
310 sector_div(cylinders, hg->heads * hg->sectors);
311 hg->cylinders = cylinders;
312 if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect)
313 hg->cylinders = 0xffff;
314 return 0;
315}
316
Al Viroa63c8482008-03-02 10:23:47 -0500317static int blkif_ioctl(struct block_device *bdev, fmode_t mode,
Adrian Bunk62aa00542008-08-04 11:59:05 +0200318 unsigned command, unsigned long argument)
Christian Limpach440a01a2008-06-17 10:47:08 +0200319{
Al Viroa63c8482008-03-02 10:23:47 -0500320 struct blkfront_info *info = bdev->bd_disk->private_data;
Christian Limpach440a01a2008-06-17 10:47:08 +0200321 int i;
322
323 dev_dbg(&info->xbdev->dev, "command: 0x%x, argument: 0x%lx\n",
324 command, (long)argument);
325
326 switch (command) {
327 case CDROMMULTISESSION:
328 dev_dbg(&info->xbdev->dev, "FIXME: support multisession CDs later\n");
329 for (i = 0; i < sizeof(struct cdrom_multisession); i++)
330 if (put_user(0, (char __user *)(argument + i)))
331 return -EFAULT;
332 return 0;
333
334 case CDROM_GET_CAPABILITY: {
335 struct gendisk *gd = info->gd;
336 if (gd->flags & GENHD_FL_CD)
337 return 0;
338 return -EINVAL;
339 }
340
341 default:
342 /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
343 command);*/
344 return -EINVAL; /* same return as native Linux */
345 }
346
347 return 0;
348}
349
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700350/*
Jeremy Fitzhardingec64e38e2010-11-01 14:32:27 -0400351 * Generate a Xen blkfront IO request from a blk layer request. Reads
Konrad Rzeszutek Wilkedf6ef52011-05-03 12:01:11 -0400352 * and writes are handled as expected.
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700353 *
Jeremy Fitzhardingec64e38e2010-11-01 14:32:27 -0400354 * @req: a request struct
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700355 */
356static int blkif_queue_request(struct request *req)
357{
358 struct blkfront_info *info = req->rq_disk->private_data;
359 unsigned long buffer_mfn;
360 struct blkif_request *ring_req;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700361 unsigned long id;
362 unsigned int fsect, lsect;
Jens Axboe9e973e62009-02-24 08:10:09 +0100363 int i, ref;
Roger Pau Monne0a8704a2012-10-24 18:58:45 +0200364
365 /*
366 * Used to store if we are able to queue the request by just using
367 * existing persistent grants, or if we have to get new grants,
368 * as there are not sufficiently many free.
369 */
370 bool new_persistent_gnts;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700371 grant_ref_t gref_head;
Roger Pau Monne0a8704a2012-10-24 18:58:45 +0200372 struct grant *gnt_list_entry = NULL;
Jens Axboe9e973e62009-02-24 08:10:09 +0100373 struct scatterlist *sg;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700374
375 if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
376 return 1;
377
Roger Pau Monne0a8704a2012-10-24 18:58:45 +0200378 /* Check if we have enought grants to allocate a requests */
379 if (info->persistent_gnts_c < BLKIF_MAX_SEGMENTS_PER_REQUEST) {
380 new_persistent_gnts = 1;
381 if (gnttab_alloc_grant_references(
382 BLKIF_MAX_SEGMENTS_PER_REQUEST - info->persistent_gnts_c,
383 &gref_head) < 0) {
384 gnttab_request_free_callback(
385 &info->callback,
386 blkif_restart_queue_callback,
387 info,
388 BLKIF_MAX_SEGMENTS_PER_REQUEST);
389 return 1;
390 }
391 } else
392 new_persistent_gnts = 0;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700393
394 /* Fill out a communications ring structure. */
395 ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
396 id = get_id_from_freelist(info);
Jeremy Fitzhardingea945b982010-11-01 17:03:14 -0400397 info->shadow[id].request = req;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700398
Konrad Rzeszutek Wilk97e36832011-10-12 12:12:36 -0400399 ring_req->u.rw.id = id;
Owen Smith51de6952010-12-22 15:05:00 +0000400 ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req);
Konrad Rzeszutek Wilk97e36832011-10-12 12:12:36 -0400401 ring_req->u.rw.handle = info->handle;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700402
403 ring_req->operation = rq_data_dir(req) ?
404 BLKIF_OP_WRITE : BLKIF_OP_READ;
Jeremy Fitzhardingebe2f8372010-11-02 10:38:33 -0400405
406 if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) {
407 /*
Konrad Rzeszutek Wilkedf6ef52011-05-03 12:01:11 -0400408 * Ideally we can do an unordered flush-to-disk. In case the
409 * backend onlysupports barriers, use that. A barrier request
Jeremy Fitzhardingebe2f8372010-11-02 10:38:33 -0400410 * a superset of FUA, so we can implement it the same
411 * way. (It's also a FLUSH+FUA, since it is
412 * guaranteed ordered WRT previous writes.)
413 */
Konrad Rzeszutek Wilkedf6ef52011-05-03 12:01:11 -0400414 ring_req->operation = info->flush_op;
Jeremy Fitzhardingebe2f8372010-11-02 10:38:33 -0400415 }
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700416
Konrad Rzeszutek Wilk5ea42982011-10-12 16:23:30 -0400417 if (unlikely(req->cmd_flags & (REQ_DISCARD | REQ_SECURE))) {
Li Dongyanged30bf32011-09-01 18:39:09 +0800418 /* id, sector_number and handle are set above. */
419 ring_req->operation = BLKIF_OP_DISCARD;
Li Dongyanged30bf32011-09-01 18:39:09 +0800420 ring_req->u.discard.nr_sectors = blk_rq_sectors(req);
Konrad Rzeszutek Wilk5ea42982011-10-12 16:23:30 -0400421 if ((req->cmd_flags & REQ_SECURE) && info->feature_secdiscard)
422 ring_req->u.discard.flag = BLKIF_DISCARD_SECURE;
423 else
424 ring_req->u.discard.flag = 0;
Li Dongyanged30bf32011-09-01 18:39:09 +0800425 } else {
Konrad Rzeszutek Wilk97e36832011-10-12 12:12:36 -0400426 ring_req->u.rw.nr_segments = blk_rq_map_sg(req->q, req,
427 info->sg);
428 BUG_ON(ring_req->u.rw.nr_segments >
429 BLKIF_MAX_SEGMENTS_PER_REQUEST);
Jens Axboe9e973e62009-02-24 08:10:09 +0100430
Konrad Rzeszutek Wilk97e36832011-10-12 12:12:36 -0400431 for_each_sg(info->sg, sg, ring_req->u.rw.nr_segments, i) {
Li Dongyanged30bf32011-09-01 18:39:09 +0800432 fsect = sg->offset >> 9;
433 lsect = fsect + (sg->length >> 9) - 1;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700434
Roger Pau Monne9c1e0502013-03-18 17:49:35 +0100435 gnt_list_entry = get_grant(&gref_head, info);
436 ref = gnt_list_entry->gref;
437 buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn);
Roger Pau Monne0a8704a2012-10-24 18:58:45 +0200438
439 info->shadow[id].grants_used[i] = gnt_list_entry;
440
441 if (rq_data_dir(req)) {
442 char *bvec_data;
443 void *shared_data;
444
445 BUG_ON(sg->offset + sg->length > PAGE_SIZE);
446
447 shared_data = kmap_atomic(
448 pfn_to_page(gnt_list_entry->pfn));
449 bvec_data = kmap_atomic(sg_page(sg));
450
451 /*
452 * this does not wipe data stored outside the
453 * range sg->offset..sg->offset+sg->length.
454 * Therefore, blkback *could* see data from
455 * previous requests. This is OK as long as
456 * persistent grants are shared with just one
457 * domain. It may need refactoring if this
458 * changes
459 */
460 memcpy(shared_data + sg->offset,
461 bvec_data + sg->offset,
462 sg->length);
463
464 kunmap_atomic(bvec_data);
465 kunmap_atomic(shared_data);
466 }
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700467
Li Dongyanged30bf32011-09-01 18:39:09 +0800468 info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
469 ring_req->u.rw.seg[i] =
470 (struct blkif_request_segment) {
471 .gref = ref,
472 .first_sect = fsect,
473 .last_sect = lsect };
474 }
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700475 }
476
477 info->ring.req_prod_pvt++;
478
479 /* Keep a private copy so we can reissue requests when recovering. */
480 info->shadow[id].req = *ring_req;
481
Roger Pau Monne0a8704a2012-10-24 18:58:45 +0200482 if (new_persistent_gnts)
483 gnttab_free_grant_references(gref_head);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700484
485 return 0;
486}
487
488
489static inline void flush_requests(struct blkfront_info *info)
490{
491 int notify;
492
493 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify);
494
495 if (notify)
496 notify_remote_via_irq(info->irq);
497}
498
499/*
500 * do_blkif_request
501 * read a block; request is in a request queue
502 */
Jens Axboe165125e2007-07-24 09:28:11 +0200503static void do_blkif_request(struct request_queue *rq)
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700504{
505 struct blkfront_info *info = NULL;
506 struct request *req;
507 int queued;
508
509 pr_debug("Entered do_blkif_request\n");
510
511 queued = 0;
512
Tejun Heo9934c8c2009-05-08 11:54:16 +0900513 while ((req = blk_peek_request(rq)) != NULL) {
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700514 info = req->rq_disk->private_data;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700515
516 if (RING_FULL(&info->ring))
517 goto wait;
518
Tejun Heo9934c8c2009-05-08 11:54:16 +0900519 blk_start_request(req);
Tejun Heo296b2f62009-05-08 11:54:15 +0900520
Konrad Rzeszutek Wilkd11e6152011-09-16 15:15:14 -0400521 if ((req->cmd_type != REQ_TYPE_FS) ||
522 ((req->cmd_flags & (REQ_FLUSH | REQ_FUA)) &&
523 !info->flush_op)) {
Tejun Heo296b2f62009-05-08 11:54:15 +0900524 __blk_end_request_all(req, -EIO);
525 continue;
526 }
527
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700528 pr_debug("do_blk_req %p: cmd %p, sec %lx, "
Tejun Heo83096eb2009-05-07 22:24:39 +0900529 "(%u/%u) buffer:%p [%s]\n",
530 req, req->cmd, (unsigned long)blk_rq_pos(req),
531 blk_rq_cur_sectors(req), blk_rq_sectors(req),
532 req->buffer, rq_data_dir(req) ? "write" : "read");
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700533
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700534 if (blkif_queue_request(req)) {
535 blk_requeue_request(rq, req);
536wait:
537 /* Avoid pointless unplugs. */
538 blk_stop_queue(rq);
539 break;
540 }
541
542 queued++;
543 }
544
545 if (queued != 0)
546 flush_requests(info);
547}
548
549static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
550{
Jens Axboe165125e2007-07-24 09:28:11 +0200551 struct request_queue *rq;
Li Dongyanged30bf32011-09-01 18:39:09 +0800552 struct blkfront_info *info = gd->private_data;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700553
Steven Noonan34678112012-02-17 12:04:44 -0800554 rq = blk_init_queue(do_blkif_request, &info->io_lock);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700555 if (rq == NULL)
556 return -1;
557
Fernando Luis Vázquez Cao66d352e2008-10-27 18:45:54 +0900558 queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700559
Li Dongyanged30bf32011-09-01 18:39:09 +0800560 if (info->feature_discard) {
561 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq);
562 blk_queue_max_discard_sectors(rq, get_capacity(gd));
563 rq->limits.discard_granularity = info->discard_granularity;
564 rq->limits.discard_alignment = info->discard_alignment;
Konrad Rzeszutek Wilk5ea42982011-10-12 16:23:30 -0400565 if (info->feature_secdiscard)
566 queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, rq);
Li Dongyanged30bf32011-09-01 18:39:09 +0800567 }
568
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700569 /* Hard sector size and max sectors impersonate the equiv. hardware. */
Martin K. Petersene1defc42009-05-22 17:17:49 -0400570 blk_queue_logical_block_size(rq, sector_size);
Martin K. Petersen086fa5f2010-02-26 00:20:38 -0500571 blk_queue_max_hw_sectors(rq, 512);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700572
573 /* Each segment in a request is up to an aligned page in size. */
574 blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
575 blk_queue_max_segment_size(rq, PAGE_SIZE);
576
577 /* Ensure a merged request will fit in a single I/O ring slot. */
Martin K. Petersen8a783622010-02-26 00:20:39 -0500578 blk_queue_max_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700579
580 /* Make sure buffer addresses are sector-aligned. */
581 blk_queue_dma_alignment(rq, 511);
582
Ian Campbell1c91fe12008-06-17 10:47:08 +0200583 /* Make sure we don't use bounce buffers. */
584 blk_queue_bounce_limit(rq, BLK_BOUNCE_ANY);
585
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700586 gd->queue = rq;
587
588 return 0;
589}
590
591
Tejun Heo4913efe2010-09-03 11:56:16 +0200592static void xlvbd_flush(struct blkfront_info *info)
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700593{
Tejun Heo4913efe2010-09-03 11:56:16 +0200594 blk_queue_flush(info->rq, info->feature_flush);
Roger Pau Monne0a8704a2012-10-24 18:58:45 +0200595 printk(KERN_INFO "blkfront: %s: %s: %s %s\n",
Tejun Heo4913efe2010-09-03 11:56:16 +0200596 info->gd->disk_name,
Konrad Rzeszutek Wilkedf6ef52011-05-03 12:01:11 -0400597 info->flush_op == BLKIF_OP_WRITE_BARRIER ?
598 "barrier" : (info->flush_op == BLKIF_OP_FLUSH_DISKCACHE ?
599 "flush diskcache" : "barrier or flush"),
Roger Pau Monne0a8704a2012-10-24 18:58:45 +0200600 info->feature_flush ? "enabled" : "disabled",
601 info->feature_persistent ? "using persistent grants" : "");
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700602}
603
Stefano Stabellinic80a4202010-12-02 17:55:00 +0000604static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
605{
606 int major;
607 major = BLKIF_MAJOR(vdevice);
608 *minor = BLKIF_MINOR(vdevice);
609 switch (major) {
610 case XEN_IDE0_MAJOR:
611 *offset = (*minor / 64) + EMULATED_HD_DISK_NAME_OFFSET;
612 *minor = ((*minor / 64) * PARTS_PER_DISK) +
613 EMULATED_HD_DISK_MINOR_OFFSET;
614 break;
615 case XEN_IDE1_MAJOR:
616 *offset = (*minor / 64) + 2 + EMULATED_HD_DISK_NAME_OFFSET;
617 *minor = (((*minor / 64) + 2) * PARTS_PER_DISK) +
618 EMULATED_HD_DISK_MINOR_OFFSET;
619 break;
620 case XEN_SCSI_DISK0_MAJOR:
621 *offset = (*minor / PARTS_PER_DISK) + EMULATED_SD_DISK_NAME_OFFSET;
622 *minor = *minor + EMULATED_SD_DISK_MINOR_OFFSET;
623 break;
624 case XEN_SCSI_DISK1_MAJOR:
625 case XEN_SCSI_DISK2_MAJOR:
626 case XEN_SCSI_DISK3_MAJOR:
627 case XEN_SCSI_DISK4_MAJOR:
628 case XEN_SCSI_DISK5_MAJOR:
629 case XEN_SCSI_DISK6_MAJOR:
630 case XEN_SCSI_DISK7_MAJOR:
631 *offset = (*minor / PARTS_PER_DISK) +
632 ((major - XEN_SCSI_DISK1_MAJOR + 1) * 16) +
633 EMULATED_SD_DISK_NAME_OFFSET;
634 *minor = *minor +
635 ((major - XEN_SCSI_DISK1_MAJOR + 1) * 16 * PARTS_PER_DISK) +
636 EMULATED_SD_DISK_MINOR_OFFSET;
637 break;
638 case XEN_SCSI_DISK8_MAJOR:
639 case XEN_SCSI_DISK9_MAJOR:
640 case XEN_SCSI_DISK10_MAJOR:
641 case XEN_SCSI_DISK11_MAJOR:
642 case XEN_SCSI_DISK12_MAJOR:
643 case XEN_SCSI_DISK13_MAJOR:
644 case XEN_SCSI_DISK14_MAJOR:
645 case XEN_SCSI_DISK15_MAJOR:
646 *offset = (*minor / PARTS_PER_DISK) +
647 ((major - XEN_SCSI_DISK8_MAJOR + 8) * 16) +
648 EMULATED_SD_DISK_NAME_OFFSET;
649 *minor = *minor +
650 ((major - XEN_SCSI_DISK8_MAJOR + 8) * 16 * PARTS_PER_DISK) +
651 EMULATED_SD_DISK_MINOR_OFFSET;
652 break;
653 case XENVBD_MAJOR:
654 *offset = *minor / PARTS_PER_DISK;
655 break;
656 default:
657 printk(KERN_WARNING "blkfront: your disk configuration is "
658 "incorrect, please use an xvd device instead\n");
659 return -ENODEV;
660 }
661 return 0;
662}
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700663
Jan Beuliche77c78c2012-04-05 16:37:22 +0100664static char *encode_disk_name(char *ptr, unsigned int n)
665{
666 if (n >= 26)
667 ptr = encode_disk_name(ptr, n / 26 - 1);
668 *ptr = 'a' + n % 26;
669 return ptr + 1;
670}
671
Chris Lalancette9246b5f2008-09-17 14:30:32 -0700672static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
673 struct blkfront_info *info,
674 u16 vdisk_info, u16 sector_size)
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700675{
676 struct gendisk *gd;
677 int nr_minors = 1;
Stefano Stabellinic80a4202010-12-02 17:55:00 +0000678 int err;
Chris Lalancette9246b5f2008-09-17 14:30:32 -0700679 unsigned int offset;
680 int minor;
681 int nr_parts;
Jan Beuliche77c78c2012-04-05 16:37:22 +0100682 char *ptr;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700683
684 BUG_ON(info->gd != NULL);
685 BUG_ON(info->rq != NULL);
686
Chris Lalancette9246b5f2008-09-17 14:30:32 -0700687 if ((info->vdevice>>EXT_SHIFT) > 1) {
688 /* this is above the extended range; something is wrong */
689 printk(KERN_WARNING "blkfront: vdevice 0x%x is above the extended range; ignoring\n", info->vdevice);
690 return -ENODEV;
691 }
692
693 if (!VDEV_IS_EXTENDED(info->vdevice)) {
Stefano Stabellinic80a4202010-12-02 17:55:00 +0000694 err = xen_translate_vdev(info->vdevice, &minor, &offset);
695 if (err)
696 return err;
697 nr_parts = PARTS_PER_DISK;
Chris Lalancette9246b5f2008-09-17 14:30:32 -0700698 } else {
699 minor = BLKIF_MINOR_EXT(info->vdevice);
700 nr_parts = PARTS_PER_EXT_DISK;
Stefano Stabellinic80a4202010-12-02 17:55:00 +0000701 offset = minor / nr_parts;
Stefan Bader89153b52011-07-14 15:30:37 +0200702 if (xen_hvm_domain() && offset < EMULATED_HD_DISK_NAME_OFFSET + 4)
Stefano Stabellinic80a4202010-12-02 17:55:00 +0000703 printk(KERN_WARNING "blkfront: vdevice 0x%x might conflict with "
704 "emulated IDE disks,\n\t choose an xvd device name"
705 "from xvde on\n", info->vdevice);
Chris Lalancette9246b5f2008-09-17 14:30:32 -0700706 }
Jan Beuliche77c78c2012-04-05 16:37:22 +0100707 if (minor >> MINORBITS) {
708 pr_warn("blkfront: %#x's minor (%#x) out of range; ignoring\n",
709 info->vdevice, minor);
710 return -ENODEV;
711 }
Chris Lalancette9246b5f2008-09-17 14:30:32 -0700712
713 if ((minor % nr_parts) == 0)
714 nr_minors = nr_parts;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700715
Jan Beulich0e345822010-08-07 18:28:55 +0200716 err = xlbd_reserve_minors(minor, nr_minors);
717 if (err)
718 goto out;
719 err = -ENODEV;
720
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700721 gd = alloc_disk(nr_minors);
722 if (gd == NULL)
Jan Beulich0e345822010-08-07 18:28:55 +0200723 goto release;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700724
Jan Beuliche77c78c2012-04-05 16:37:22 +0100725 strcpy(gd->disk_name, DEV_NAME);
726 ptr = encode_disk_name(gd->disk_name + sizeof(DEV_NAME) - 1, offset);
727 BUG_ON(ptr >= gd->disk_name + DISK_NAME_LEN);
728 if (nr_minors > 1)
729 *ptr = 0;
730 else
731 snprintf(ptr, gd->disk_name + DISK_NAME_LEN - ptr,
732 "%d", minor & (nr_parts - 1));
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700733
734 gd->major = XENVBD_MAJOR;
735 gd->first_minor = minor;
736 gd->fops = &xlvbd_block_fops;
737 gd->private_data = info;
738 gd->driverfs_dev = &(info->xbdev->dev);
739 set_capacity(gd, capacity);
740
741 if (xlvbd_init_blk_queue(gd, sector_size)) {
742 del_gendisk(gd);
Jan Beulich0e345822010-08-07 18:28:55 +0200743 goto release;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700744 }
745
746 info->rq = gd->queue;
747 info->gd = gd;
748
Tejun Heo4913efe2010-09-03 11:56:16 +0200749 xlvbd_flush(info);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700750
751 if (vdisk_info & VDISK_READONLY)
752 set_disk_ro(gd, 1);
753
754 if (vdisk_info & VDISK_REMOVABLE)
755 gd->flags |= GENHD_FL_REMOVABLE;
756
757 if (vdisk_info & VDISK_CDROM)
758 gd->flags |= GENHD_FL_CD;
759
760 return 0;
761
Jan Beulich0e345822010-08-07 18:28:55 +0200762 release:
763 xlbd_release_minors(minor, nr_minors);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700764 out:
765 return err;
766}
767
Daniel Stoddena66b5ae2010-08-07 18:33:17 +0200768static void xlvbd_release_gendisk(struct blkfront_info *info)
769{
770 unsigned int minor, nr_minors;
771 unsigned long flags;
772
773 if (info->rq == NULL)
774 return;
775
Steven Noonan34678112012-02-17 12:04:44 -0800776 spin_lock_irqsave(&info->io_lock, flags);
Daniel Stoddena66b5ae2010-08-07 18:33:17 +0200777
778 /* No more blkif_request(). */
779 blk_stop_queue(info->rq);
780
781 /* No more gnttab callback work. */
782 gnttab_cancel_free_callback(&info->callback);
Steven Noonan34678112012-02-17 12:04:44 -0800783 spin_unlock_irqrestore(&info->io_lock, flags);
Daniel Stoddena66b5ae2010-08-07 18:33:17 +0200784
785 /* Flush gnttab callback work. Must be done with no locks held. */
Tejun Heo43829732012-08-20 14:51:24 -0700786 flush_work(&info->work);
Daniel Stoddena66b5ae2010-08-07 18:33:17 +0200787
788 del_gendisk(info->gd);
789
790 minor = info->gd->first_minor;
791 nr_minors = info->gd->minors;
792 xlbd_release_minors(minor, nr_minors);
793
794 blk_cleanup_queue(info->rq);
795 info->rq = NULL;
796
797 put_disk(info->gd);
798 info->gd = NULL;
799}
800
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700801static void kick_pending_request_queues(struct blkfront_info *info)
802{
803 if (!RING_FULL(&info->ring)) {
804 /* Re-enable calldowns. */
805 blk_start_queue(info->rq);
806 /* Kick things off immediately. */
807 do_blkif_request(info->rq);
808 }
809}
810
811static void blkif_restart_queue(struct work_struct *work)
812{
813 struct blkfront_info *info = container_of(work, struct blkfront_info, work);
814
Steven Noonan34678112012-02-17 12:04:44 -0800815 spin_lock_irq(&info->io_lock);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700816 if (info->connected == BLKIF_STATE_CONNECTED)
817 kick_pending_request_queues(info);
Steven Noonan34678112012-02-17 12:04:44 -0800818 spin_unlock_irq(&info->io_lock);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700819}
820
821static void blkif_free(struct blkfront_info *info, int suspend)
822{
Roger Pau Monne155b7ed2013-03-18 17:49:34 +0100823 struct grant *persistent_gnt;
824 struct grant *n;
Roger Pau Monne0a8704a2012-10-24 18:58:45 +0200825
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700826 /* Prevent new requests being issued until we fix things up. */
Steven Noonan34678112012-02-17 12:04:44 -0800827 spin_lock_irq(&info->io_lock);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700828 info->connected = suspend ?
829 BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
830 /* No more blkif_request(). */
831 if (info->rq)
832 blk_stop_queue(info->rq);
Roger Pau Monne0a8704a2012-10-24 18:58:45 +0200833
834 /* Remove all persistent grants */
Roger Pau Monne9c1e0502013-03-18 17:49:35 +0100835 if (!list_empty(&info->persistent_gnts)) {
Roger Pau Monne155b7ed2013-03-18 17:49:34 +0100836 list_for_each_entry_safe(persistent_gnt, n,
837 &info->persistent_gnts, node) {
838 list_del(&persistent_gnt->node);
Roger Pau Monne9c1e0502013-03-18 17:49:35 +0100839 if (persistent_gnt->gref != GRANT_INVALID_REF) {
840 gnttab_end_foreign_access(persistent_gnt->gref,
841 0, 0UL);
842 info->persistent_gnts_c--;
843 }
Roger Pau Monne07c540a2012-11-16 19:26:47 +0100844 __free_page(pfn_to_page(persistent_gnt->pfn));
Roger Pau Monne155b7ed2013-03-18 17:49:34 +0100845 kfree(persistent_gnt);
Roger Pau Monne0a8704a2012-10-24 18:58:45 +0200846 }
Roger Pau Monne0a8704a2012-10-24 18:58:45 +0200847 }
Roger Pau Monne9c1e0502013-03-18 17:49:35 +0100848 BUG_ON(info->persistent_gnts_c != 0);
Roger Pau Monne0a8704a2012-10-24 18:58:45 +0200849
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700850 /* No more gnttab callback work. */
851 gnttab_cancel_free_callback(&info->callback);
Steven Noonan34678112012-02-17 12:04:44 -0800852 spin_unlock_irq(&info->io_lock);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700853
854 /* Flush gnttab callback work. Must be done with no locks held. */
Tejun Heo43829732012-08-20 14:51:24 -0700855 flush_work(&info->work);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700856
857 /* Free resources associated with old device channel. */
858 if (info->ring_ref != GRANT_INVALID_REF) {
859 gnttab_end_foreign_access(info->ring_ref, 0,
860 (unsigned long)info->ring.sring);
861 info->ring_ref = GRANT_INVALID_REF;
862 info->ring.sring = NULL;
863 }
864 if (info->irq)
865 unbind_from_irqhandler(info->irq, info);
866 info->evtchn = info->irq = 0;
867
868}
869
Roger Pau Monne0a8704a2012-10-24 18:58:45 +0200870static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
871 struct blkif_response *bret)
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700872{
Roger Pau Monned62f6912012-12-07 19:00:31 +0100873 int i = 0;
Roger Pau Monne0a8704a2012-10-24 18:58:45 +0200874 struct bio_vec *bvec;
875 struct req_iterator iter;
876 unsigned long flags;
877 char *bvec_data;
878 void *shared_data;
879 unsigned int offset = 0;
880
881 if (bret->operation == BLKIF_OP_READ) {
882 /*
883 * Copy the data received from the backend into the bvec.
884 * Since bv_offset can be different than 0, and bv_len different
885 * than PAGE_SIZE, we have to keep track of the current offset,
886 * to be sure we are copying the data from the right shared page.
887 */
888 rq_for_each_segment(bvec, s->request, iter) {
889 BUG_ON((bvec->bv_offset + bvec->bv_len) > PAGE_SIZE);
Roger Pau Monned62f6912012-12-07 19:00:31 +0100890 if (bvec->bv_offset < offset)
891 i++;
Roger Pau Monnecb5bd4d2012-11-02 16:43:04 +0100892 BUG_ON(i >= s->req.u.rw.nr_segments);
Roger Pau Monne0a8704a2012-10-24 18:58:45 +0200893 shared_data = kmap_atomic(
894 pfn_to_page(s->grants_used[i]->pfn));
895 bvec_data = bvec_kmap_irq(bvec, &flags);
896 memcpy(bvec_data, shared_data + bvec->bv_offset,
897 bvec->bv_len);
898 bvec_kunmap_irq(bvec_data, &flags);
899 kunmap_atomic(shared_data);
Roger Pau Monned62f6912012-12-07 19:00:31 +0100900 offset = bvec->bv_offset + bvec->bv_len;
Roger Pau Monne0a8704a2012-10-24 18:58:45 +0200901 }
902 }
903 /* Add the persistent grant into the list of free grants */
904 for (i = 0; i < s->req.u.rw.nr_segments; i++) {
Roger Pau Monne155b7ed2013-03-18 17:49:34 +0100905 list_add(&s->grants_used[i]->node, &info->persistent_gnts);
Roger Pau Monne0a8704a2012-10-24 18:58:45 +0200906 info->persistent_gnts_c++;
907 }
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700908}
909
910static irqreturn_t blkif_interrupt(int irq, void *dev_id)
911{
912 struct request *req;
913 struct blkif_response *bret;
914 RING_IDX i, rp;
915 unsigned long flags;
916 struct blkfront_info *info = (struct blkfront_info *)dev_id;
Kiyoshi Uedaf530f0362007-12-11 17:47:36 -0500917 int error;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700918
Steven Noonan34678112012-02-17 12:04:44 -0800919 spin_lock_irqsave(&info->io_lock, flags);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700920
921 if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
Steven Noonan34678112012-02-17 12:04:44 -0800922 spin_unlock_irqrestore(&info->io_lock, flags);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700923 return IRQ_HANDLED;
924 }
925
926 again:
927 rp = info->ring.sring->rsp_prod;
928 rmb(); /* Ensure we see queued responses up to 'rp'. */
929
930 for (i = info->ring.rsp_cons; i != rp; i++) {
931 unsigned long id;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700932
933 bret = RING_GET_RESPONSE(&info->ring, i);
934 id = bret->id;
Konrad Rzeszutek Wilk6878c322012-05-25 17:34:51 -0400935 /*
936 * The backend has messed up and given us an id that we would
937 * never have given to it (we stamp it up to BLK_RING_SIZE -
938 * look in get_id_from_freelist.
939 */
940 if (id >= BLK_RING_SIZE) {
941 WARN(1, "%s: response to %s has incorrect id (%ld)\n",
942 info->gd->disk_name, op_name(bret->operation), id);
943 /* We can't safely get the 'struct request' as
944 * the id is busted. */
945 continue;
946 }
Jeremy Fitzhardingea945b982010-11-01 17:03:14 -0400947 req = info->shadow[id].request;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700948
Konrad Rzeszutek Wilk5ea42982011-10-12 16:23:30 -0400949 if (bret->operation != BLKIF_OP_DISCARD)
Roger Pau Monne0a8704a2012-10-24 18:58:45 +0200950 blkif_completion(&info->shadow[id], info, bret);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700951
Konrad Rzeszutek Wilk6878c322012-05-25 17:34:51 -0400952 if (add_id_to_freelist(info, id)) {
953 WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n",
954 info->gd->disk_name, op_name(bret->operation), id);
955 continue;
956 }
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700957
Kiyoshi Uedaf530f0362007-12-11 17:47:36 -0500958 error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700959 switch (bret->operation) {
Li Dongyanged30bf32011-09-01 18:39:09 +0800960 case BLKIF_OP_DISCARD:
961 if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
962 struct request_queue *rq = info->rq;
Konrad Rzeszutek Wilk6878c322012-05-25 17:34:51 -0400963 printk(KERN_WARNING "blkfront: %s: %s op failed\n",
964 info->gd->disk_name, op_name(bret->operation));
Li Dongyanged30bf32011-09-01 18:39:09 +0800965 error = -EOPNOTSUPP;
966 info->feature_discard = 0;
Konrad Rzeszutek Wilk5ea42982011-10-12 16:23:30 -0400967 info->feature_secdiscard = 0;
Li Dongyanged30bf32011-09-01 18:39:09 +0800968 queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
Konrad Rzeszutek Wilk5ea42982011-10-12 16:23:30 -0400969 queue_flag_clear(QUEUE_FLAG_SECDISCARD, rq);
Li Dongyanged30bf32011-09-01 18:39:09 +0800970 }
971 __blk_end_request_all(req, error);
972 break;
Konrad Rzeszutek Wilkedf6ef52011-05-03 12:01:11 -0400973 case BLKIF_OP_FLUSH_DISKCACHE:
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700974 case BLKIF_OP_WRITE_BARRIER:
975 if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
Konrad Rzeszutek Wilk6878c322012-05-25 17:34:51 -0400976 printk(KERN_WARNING "blkfront: %s: %s op failed\n",
977 info->gd->disk_name, op_name(bret->operation));
Kiyoshi Uedaf530f0362007-12-11 17:47:36 -0500978 error = -EOPNOTSUPP;
Jeremy Fitzhardingedcb8bae2010-11-02 11:55:58 -0400979 }
980 if (unlikely(bret->status == BLKIF_RSP_ERROR &&
Konrad Rzeszutek Wilk97e36832011-10-12 12:12:36 -0400981 info->shadow[id].req.u.rw.nr_segments == 0)) {
Konrad Rzeszutek Wilk6878c322012-05-25 17:34:51 -0400982 printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
983 info->gd->disk_name, op_name(bret->operation));
Jeremy Fitzhardingedcb8bae2010-11-02 11:55:58 -0400984 error = -EOPNOTSUPP;
985 }
986 if (unlikely(error)) {
987 if (error == -EOPNOTSUPP)
988 error = 0;
Tejun Heo4913efe2010-09-03 11:56:16 +0200989 info->feature_flush = 0;
Konrad Rzeszutek Wilkedf6ef52011-05-03 12:01:11 -0400990 info->flush_op = 0;
Tejun Heo4913efe2010-09-03 11:56:16 +0200991 xlvbd_flush(info);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700992 }
993 /* fall through */
994 case BLKIF_OP_READ:
995 case BLKIF_OP_WRITE:
996 if (unlikely(bret->status != BLKIF_RSP_OKAY))
997 dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
998 "request: %x\n", bret->status);
999
Tejun Heo40cbbb72009-04-23 11:05:19 +09001000 __blk_end_request_all(req, error);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001001 break;
1002 default:
1003 BUG();
1004 }
1005 }
1006
1007 info->ring.rsp_cons = i;
1008
1009 if (i != info->ring.req_prod_pvt) {
1010 int more_to_do;
1011 RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
1012 if (more_to_do)
1013 goto again;
1014 } else
1015 info->ring.sring->rsp_event = i + 1;
1016
1017 kick_pending_request_queues(info);
1018
Steven Noonan34678112012-02-17 12:04:44 -08001019 spin_unlock_irqrestore(&info->io_lock, flags);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001020
1021 return IRQ_HANDLED;
1022}
1023
1024
1025static int setup_blkring(struct xenbus_device *dev,
1026 struct blkfront_info *info)
1027{
1028 struct blkif_sring *sring;
1029 int err;
1030
1031 info->ring_ref = GRANT_INVALID_REF;
1032
Ian Campbella144ff02008-06-17 10:47:08 +02001033 sring = (struct blkif_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001034 if (!sring) {
1035 xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
1036 return -ENOMEM;
1037 }
1038 SHARED_RING_INIT(sring);
1039 FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
1040
Jens Axboe9e973e62009-02-24 08:10:09 +01001041 sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
1042
Roger Pau Monne9c1e0502013-03-18 17:49:35 +01001043 /* Allocate memory for grants */
1044 err = fill_grant_buffer(info, BLK_RING_SIZE *
1045 BLKIF_MAX_SEGMENTS_PER_REQUEST);
1046 if (err)
1047 goto fail;
1048
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001049 err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
1050 if (err < 0) {
1051 free_page((unsigned long)sring);
1052 info->ring.sring = NULL;
1053 goto fail;
1054 }
1055 info->ring_ref = err;
1056
1057 err = xenbus_alloc_evtchn(dev, &info->evtchn);
1058 if (err)
1059 goto fail;
1060
Theodore Ts'o89c30f12012-07-17 13:46:19 -04001061 err = bind_evtchn_to_irqhandler(info->evtchn, blkif_interrupt, 0,
1062 "blkif", info);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001063 if (err <= 0) {
1064 xenbus_dev_fatal(dev, err,
1065 "bind_evtchn_to_irqhandler failed");
1066 goto fail;
1067 }
1068 info->irq = err;
1069
1070 return 0;
1071fail:
1072 blkif_free(info, 0);
1073 return err;
1074}
1075
1076
1077/* Common code used when first setting up, and when resuming. */
Ian Campbell203fd612009-12-04 15:33:54 +00001078static int talk_to_blkback(struct xenbus_device *dev,
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001079 struct blkfront_info *info)
1080{
1081 const char *message = NULL;
1082 struct xenbus_transaction xbt;
1083 int err;
1084
1085 /* Create shared ring, alloc event channel. */
1086 err = setup_blkring(dev, info);
1087 if (err)
1088 goto out;
1089
1090again:
1091 err = xenbus_transaction_start(&xbt);
1092 if (err) {
1093 xenbus_dev_fatal(dev, err, "starting transaction");
1094 goto destroy_blkring;
1095 }
1096
1097 err = xenbus_printf(xbt, dev->nodename,
1098 "ring-ref", "%u", info->ring_ref);
1099 if (err) {
1100 message = "writing ring-ref";
1101 goto abort_transaction;
1102 }
1103 err = xenbus_printf(xbt, dev->nodename,
1104 "event-channel", "%u", info->evtchn);
1105 if (err) {
1106 message = "writing event-channel";
1107 goto abort_transaction;
1108 }
Markus Armbruster3e334232008-04-02 10:54:02 -07001109 err = xenbus_printf(xbt, dev->nodename, "protocol", "%s",
1110 XEN_IO_PROTO_ABI_NATIVE);
1111 if (err) {
1112 message = "writing protocol";
1113 goto abort_transaction;
1114 }
Roger Pau Monne0a8704a2012-10-24 18:58:45 +02001115 err = xenbus_printf(xbt, dev->nodename,
Roger Pau Monnecb5bd4d2012-11-02 16:43:04 +01001116 "feature-persistent", "%u", 1);
Roger Pau Monne0a8704a2012-10-24 18:58:45 +02001117 if (err)
1118 dev_warn(&dev->dev,
1119 "writing persistent grants feature to xenbus");
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001120
1121 err = xenbus_transaction_end(xbt, 0);
1122 if (err) {
1123 if (err == -EAGAIN)
1124 goto again;
1125 xenbus_dev_fatal(dev, err, "completing transaction");
1126 goto destroy_blkring;
1127 }
1128
1129 xenbus_switch_state(dev, XenbusStateInitialised);
1130
1131 return 0;
1132
1133 abort_transaction:
1134 xenbus_transaction_end(xbt, 1);
1135 if (message)
1136 xenbus_dev_fatal(dev, err, "%s", message);
1137 destroy_blkring:
1138 blkif_free(info, 0);
1139 out:
1140 return err;
1141}
1142
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001143/**
1144 * Entry point to this code when a new device is created. Allocate the basic
1145 * structures and the ring buffer for communication with the backend, and
1146 * inform the backend of the appropriate details for those. Switch to
1147 * Initialised state.
1148 */
1149static int blkfront_probe(struct xenbus_device *dev,
1150 const struct xenbus_device_id *id)
1151{
1152 int err, vdevice, i;
1153 struct blkfront_info *info;
1154
1155 /* FIXME: Use dynamic device id if this is not set. */
1156 err = xenbus_scanf(XBT_NIL, dev->nodename,
1157 "virtual-device", "%i", &vdevice);
1158 if (err != 1) {
Chris Lalancette9246b5f2008-09-17 14:30:32 -07001159 /* go looking in the extended area instead */
1160 err = xenbus_scanf(XBT_NIL, dev->nodename, "virtual-device-ext",
1161 "%i", &vdevice);
1162 if (err != 1) {
1163 xenbus_dev_fatal(dev, err, "reading virtual-device");
1164 return err;
1165 }
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001166 }
1167
Stefano Stabellinib98a4092010-07-29 14:53:16 +01001168 if (xen_hvm_domain()) {
1169 char *type;
1170 int len;
1171 /* no unplug has been done: do not hook devices != xen vbds */
Ian Campbell1dc7ce92010-08-23 11:59:29 +01001172 if (xen_platform_pci_unplug & XEN_UNPLUG_UNNECESSARY) {
Stefano Stabellinib98a4092010-07-29 14:53:16 +01001173 int major;
Stefano Stabellinic1c54132010-05-14 12:44:30 +01001174
Stefano Stabellinib98a4092010-07-29 14:53:16 +01001175 if (!VDEV_IS_EXTENDED(vdevice))
1176 major = BLKIF_MAJOR(vdevice);
1177 else
1178 major = XENVBD_MAJOR;
Stefano Stabellinic1c54132010-05-14 12:44:30 +01001179
Stefano Stabellinib98a4092010-07-29 14:53:16 +01001180 if (major != XENVBD_MAJOR) {
1181 printk(KERN_INFO
1182 "%s: HVM does not support vbd %d as xen block device\n",
1183 __FUNCTION__, vdevice);
1184 return -ENODEV;
1185 }
1186 }
1187 /* do not create a PV cdrom device if we are an HVM guest */
1188 type = xenbus_read(XBT_NIL, dev->nodename, "device-type", &len);
1189 if (IS_ERR(type))
1190 return -ENODEV;
1191 if (strncmp(type, "cdrom", 5) == 0) {
1192 kfree(type);
Stefano Stabellinic1c54132010-05-14 12:44:30 +01001193 return -ENODEV;
1194 }
Stefano Stabellinib98a4092010-07-29 14:53:16 +01001195 kfree(type);
Stefano Stabellinic1c54132010-05-14 12:44:30 +01001196 }
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001197 info = kzalloc(sizeof(*info), GFP_KERNEL);
1198 if (!info) {
1199 xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
1200 return -ENOMEM;
1201 }
1202
Daniel Stoddenb70f5fa2010-04-30 22:01:19 +00001203 mutex_init(&info->mutex);
Steven Noonan34678112012-02-17 12:04:44 -08001204 spin_lock_init(&info->io_lock);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001205 info->xbdev = dev;
1206 info->vdevice = vdevice;
Roger Pau Monne155b7ed2013-03-18 17:49:34 +01001207 INIT_LIST_HEAD(&info->persistent_gnts);
Roger Pau Monne0a8704a2012-10-24 18:58:45 +02001208 info->persistent_gnts_c = 0;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001209 info->connected = BLKIF_STATE_DISCONNECTED;
1210 INIT_WORK(&info->work, blkif_restart_queue);
1211
1212 for (i = 0; i < BLK_RING_SIZE; i++)
Konrad Rzeszutek Wilk97e36832011-10-12 12:12:36 -04001213 info->shadow[i].req.u.rw.id = i+1;
1214 info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001215
1216 /* Front end dir is a number, which is used as the id. */
1217 info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
Greg Kroah-Hartmana1b4b122009-04-30 14:43:31 -07001218 dev_set_drvdata(&dev->dev, info);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001219
Ian Campbell203fd612009-12-04 15:33:54 +00001220 err = talk_to_blkback(dev, info);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001221 if (err) {
1222 kfree(info);
Greg Kroah-Hartmana1b4b122009-04-30 14:43:31 -07001223 dev_set_drvdata(&dev->dev, NULL);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001224 return err;
1225 }
1226
1227 return 0;
1228}
1229
1230
1231static int blkif_recover(struct blkfront_info *info)
1232{
1233 int i;
1234 struct blkif_request *req;
1235 struct blk_shadow *copy;
1236 int j;
1237
1238 /* Stage 1: Make a safe copy of the shadow state. */
Mihnea Dobrescu-Balaur29d0b212013-03-11 13:23:36 +02001239 copy = kmemdup(info->shadow, sizeof(info->shadow),
Ian Campbella144ff02008-06-17 10:47:08 +02001240 GFP_NOIO | __GFP_REPEAT | __GFP_HIGH);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001241 if (!copy)
1242 return -ENOMEM;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001243
1244 /* Stage 2: Set up free list. */
1245 memset(&info->shadow, 0, sizeof(info->shadow));
1246 for (i = 0; i < BLK_RING_SIZE; i++)
Konrad Rzeszutek Wilk97e36832011-10-12 12:12:36 -04001247 info->shadow[i].req.u.rw.id = i+1;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001248 info->shadow_free = info->ring.req_prod_pvt;
Konrad Rzeszutek Wilk97e36832011-10-12 12:12:36 -04001249 info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001250
1251 /* Stage 3: Find pending requests and requeue them. */
1252 for (i = 0; i < BLK_RING_SIZE; i++) {
1253 /* Not in use? */
Jeremy Fitzhardingea945b982010-11-01 17:03:14 -04001254 if (!copy[i].request)
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001255 continue;
1256
1257 /* Grab a request slot and copy shadow state into it. */
1258 req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
1259 *req = copy[i].req;
1260
1261 /* We get a new request id, and must reset the shadow state. */
Konrad Rzeszutek Wilk97e36832011-10-12 12:12:36 -04001262 req->u.rw.id = get_id_from_freelist(info);
1263 memcpy(&info->shadow[req->u.rw.id], &copy[i], sizeof(copy[i]));
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001264
Konrad Rzeszutek Wilk5ea42982011-10-12 16:23:30 -04001265 if (req->operation != BLKIF_OP_DISCARD) {
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001266 /* Rewrite any grant references invalidated by susp/resume. */
Konrad Rzeszutek Wilk5ea42982011-10-12 16:23:30 -04001267 for (j = 0; j < req->u.rw.nr_segments; j++)
1268 gnttab_grant_foreign_access_ref(
1269 req->u.rw.seg[j].gref,
1270 info->xbdev->otherend_id,
1271 pfn_to_mfn(info->shadow[req->u.rw.id].frame[j]),
Roger Pau Monne0a8704a2012-10-24 18:58:45 +02001272 0);
Konrad Rzeszutek Wilk5ea42982011-10-12 16:23:30 -04001273 }
Konrad Rzeszutek Wilk97e36832011-10-12 12:12:36 -04001274 info->shadow[req->u.rw.id].req = *req;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001275
1276 info->ring.req_prod_pvt++;
1277 }
1278
1279 kfree(copy);
1280
1281 xenbus_switch_state(info->xbdev, XenbusStateConnected);
1282
Steven Noonan34678112012-02-17 12:04:44 -08001283 spin_lock_irq(&info->io_lock);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001284
1285 /* Now safe for us to use the shared ring */
1286 info->connected = BLKIF_STATE_CONNECTED;
1287
1288 /* Send off requeued requests */
1289 flush_requests(info);
1290
1291 /* Kick any other new requests queued since we resumed */
1292 kick_pending_request_queues(info);
1293
Steven Noonan34678112012-02-17 12:04:44 -08001294 spin_unlock_irq(&info->io_lock);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001295
1296 return 0;
1297}
1298
1299/**
1300 * We are reconnecting to the backend, due to a suspend/resume, or a backend
1301 * driver restart. We tear down our blkif structure and recreate it, but
1302 * leave the device-layer structures intact so that this is transparent to the
1303 * rest of the kernel.
1304 */
1305static int blkfront_resume(struct xenbus_device *dev)
1306{
Greg Kroah-Hartmana1b4b122009-04-30 14:43:31 -07001307 struct blkfront_info *info = dev_get_drvdata(&dev->dev);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001308 int err;
1309
1310 dev_dbg(&dev->dev, "blkfront_resume: %s\n", dev->nodename);
1311
1312 blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
1313
Ian Campbell203fd612009-12-04 15:33:54 +00001314 err = talk_to_blkback(dev, info);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001315 if (info->connected == BLKIF_STATE_SUSPENDED && !err)
1316 err = blkif_recover(info);
1317
1318 return err;
1319}
1320
Daniel Stoddenb70f5fa2010-04-30 22:01:19 +00001321static void
1322blkfront_closing(struct blkfront_info *info)
1323{
1324 struct xenbus_device *xbdev = info->xbdev;
1325 struct block_device *bdev = NULL;
1326
1327 mutex_lock(&info->mutex);
1328
1329 if (xbdev->state == XenbusStateClosing) {
1330 mutex_unlock(&info->mutex);
1331 return;
1332 }
1333
1334 if (info->gd)
1335 bdev = bdget_disk(info->gd, 0);
1336
1337 mutex_unlock(&info->mutex);
1338
1339 if (!bdev) {
1340 xenbus_frontend_closed(xbdev);
1341 return;
1342 }
1343
1344 mutex_lock(&bdev->bd_mutex);
1345
Daniel Stodden7b32d102010-04-30 22:01:23 +00001346 if (bdev->bd_openers) {
Daniel Stoddenb70f5fa2010-04-30 22:01:19 +00001347 xenbus_dev_error(xbdev, -EBUSY,
1348 "Device in use; refusing to close");
1349 xenbus_switch_state(xbdev, XenbusStateClosing);
1350 } else {
1351 xlvbd_release_gendisk(info);
1352 xenbus_frontend_closed(xbdev);
1353 }
1354
1355 mutex_unlock(&bdev->bd_mutex);
1356 bdput(bdev);
1357}
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001358
Li Dongyanged30bf32011-09-01 18:39:09 +08001359static void blkfront_setup_discard(struct blkfront_info *info)
1360{
1361 int err;
1362 char *type;
1363 unsigned int discard_granularity;
1364 unsigned int discard_alignment;
Konrad Rzeszutek Wilk5ea42982011-10-12 16:23:30 -04001365 unsigned int discard_secure;
Li Dongyanged30bf32011-09-01 18:39:09 +08001366
1367 type = xenbus_read(XBT_NIL, info->xbdev->otherend, "type", NULL);
1368 if (IS_ERR(type))
1369 return;
1370
Konrad Rzeszutek Wilk5ea42982011-10-12 16:23:30 -04001371 info->feature_secdiscard = 0;
Li Dongyanged30bf32011-09-01 18:39:09 +08001372 if (strncmp(type, "phy", 3) == 0) {
1373 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1374 "discard-granularity", "%u", &discard_granularity,
1375 "discard-alignment", "%u", &discard_alignment,
1376 NULL);
1377 if (!err) {
1378 info->feature_discard = 1;
1379 info->discard_granularity = discard_granularity;
1380 info->discard_alignment = discard_alignment;
1381 }
Konrad Rzeszutek Wilk5ea42982011-10-12 16:23:30 -04001382 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1383 "discard-secure", "%d", &discard_secure,
1384 NULL);
1385 if (!err)
1386 info->feature_secdiscard = discard_secure;
1387
Li Dongyanged30bf32011-09-01 18:39:09 +08001388 } else if (strncmp(type, "file", 4) == 0)
1389 info->feature_discard = 1;
1390
1391 kfree(type);
1392}
1393
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001394/*
1395 * Invoked when the backend is finally 'ready' (and has told produced
1396 * the details about the physical device - #sectors, size, etc).
1397 */
1398static void blkfront_connect(struct blkfront_info *info)
1399{
1400 unsigned long long sectors;
1401 unsigned long sector_size;
1402 unsigned int binfo;
1403 int err;
Roger Pau Monne0a8704a2012-10-24 18:58:45 +02001404 int barrier, flush, discard, persistent;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001405
K. Y. Srinivasan1fa73be2010-03-11 13:42:26 -08001406 switch (info->connected) {
1407 case BLKIF_STATE_CONNECTED:
1408 /*
1409 * Potentially, the back-end may be signalling
1410 * a capacity change; update the capacity.
1411 */
1412 err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
1413 "sectors", "%Lu", &sectors);
1414 if (XENBUS_EXIST_ERR(err))
1415 return;
1416 printk(KERN_INFO "Setting capacity to %Lu\n",
1417 sectors);
1418 set_capacity(info->gd, sectors);
K. Y. Srinivasan2def1412010-03-18 15:00:54 -07001419 revalidate_disk(info->gd);
K. Y. Srinivasan1fa73be2010-03-11 13:42:26 -08001420
1421 /* fall through */
1422 case BLKIF_STATE_SUSPENDED:
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001423 return;
1424
Jeremy Fitzhardingeb4dddb42010-03-11 15:10:40 -08001425 default:
1426 break;
K. Y. Srinivasan1fa73be2010-03-11 13:42:26 -08001427 }
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001428
1429 dev_dbg(&info->xbdev->dev, "%s:%s.\n",
1430 __func__, info->xbdev->otherend);
1431
1432 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1433 "sectors", "%llu", &sectors,
1434 "info", "%u", &binfo,
1435 "sector-size", "%lu", &sector_size,
1436 NULL);
1437 if (err) {
1438 xenbus_dev_fatal(info->xbdev, err,
1439 "reading backend fields at %s",
1440 info->xbdev->otherend);
1441 return;
1442 }
1443
Konrad Rzeszutek Wilkedf6ef52011-05-03 12:01:11 -04001444 info->feature_flush = 0;
1445 info->flush_op = 0;
1446
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001447 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
Marek Marczykowski4352b472011-05-03 12:04:52 -04001448 "feature-barrier", "%d", &barrier,
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001449 NULL);
Jeremy Fitzhardinge7901d142010-07-28 10:49:29 -07001450
1451 /*
1452 * If there's no "feature-barrier" defined, then it means
1453 * we're dealing with a very old backend which writes
Tejun Heo4913efe2010-09-03 11:56:16 +02001454 * synchronously; nothing to do.
Jeremy Fitzhardinge7901d142010-07-28 10:49:29 -07001455 *
Tejun Heo6958f142010-09-03 11:56:16 +02001456 * If there are barriers, then we use flush.
Jeremy Fitzhardinge7901d142010-07-28 10:49:29 -07001457 */
Konrad Rzeszutek Wilkedf6ef52011-05-03 12:01:11 -04001458 if (!err && barrier) {
Jeremy Fitzhardingebe2f8372010-11-02 10:38:33 -04001459 info->feature_flush = REQ_FLUSH | REQ_FUA;
Konrad Rzeszutek Wilkedf6ef52011-05-03 12:01:11 -04001460 info->flush_op = BLKIF_OP_WRITE_BARRIER;
1461 }
1462 /*
1463 * And if there is "feature-flush-cache" use that above
1464 * barriers.
1465 */
1466 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1467 "feature-flush-cache", "%d", &flush,
1468 NULL);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001469
Konrad Rzeszutek Wilkedf6ef52011-05-03 12:01:11 -04001470 if (!err && flush) {
1471 info->feature_flush = REQ_FLUSH;
1472 info->flush_op = BLKIF_OP_FLUSH_DISKCACHE;
1473 }
Li Dongyanged30bf32011-09-01 18:39:09 +08001474
1475 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1476 "feature-discard", "%d", &discard,
1477 NULL);
1478
1479 if (!err && discard)
1480 blkfront_setup_discard(info);
1481
Roger Pau Monne0a8704a2012-10-24 18:58:45 +02001482 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1483 "feature-persistent", "%u", &persistent,
1484 NULL);
1485 if (err)
1486 info->feature_persistent = 0;
1487 else
1488 info->feature_persistent = persistent;
1489
Chris Lalancette9246b5f2008-09-17 14:30:32 -07001490 err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001491 if (err) {
1492 xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
1493 info->xbdev->otherend);
1494 return;
1495 }
1496
1497 xenbus_switch_state(info->xbdev, XenbusStateConnected);
1498
1499 /* Kick pending requests. */
Steven Noonan34678112012-02-17 12:04:44 -08001500 spin_lock_irq(&info->io_lock);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001501 info->connected = BLKIF_STATE_CONNECTED;
1502 kick_pending_request_queues(info);
Steven Noonan34678112012-02-17 12:04:44 -08001503 spin_unlock_irq(&info->io_lock);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001504
1505 add_disk(info->gd);
Christian Limpach1d78d702008-04-02 10:54:04 -07001506
1507 info->is_ready = 1;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001508}
1509
1510/**
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001511 * Callback received when the backend's state changes.
1512 */
Ian Campbell203fd612009-12-04 15:33:54 +00001513static void blkback_changed(struct xenbus_device *dev,
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001514 enum xenbus_state backend_state)
1515{
Greg Kroah-Hartmana1b4b122009-04-30 14:43:31 -07001516 struct blkfront_info *info = dev_get_drvdata(&dev->dev);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001517
Ian Campbell203fd612009-12-04 15:33:54 +00001518 dev_dbg(&dev->dev, "blkfront:blkback_changed to state %d.\n", backend_state);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001519
1520 switch (backend_state) {
1521 case XenbusStateInitialising:
1522 case XenbusStateInitWait:
1523 case XenbusStateInitialised:
Noboru Iwamatsub78c9512009-10-13 17:22:29 -04001524 case XenbusStateReconfiguring:
1525 case XenbusStateReconfigured:
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001526 case XenbusStateUnknown:
1527 case XenbusStateClosed:
1528 break;
1529
1530 case XenbusStateConnected:
1531 blkfront_connect(info);
1532 break;
1533
1534 case XenbusStateClosing:
Daniel Stoddenb70f5fa2010-04-30 22:01:19 +00001535 blkfront_closing(info);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001536 break;
1537 }
1538}
1539
Daniel Stoddenfa1bd352010-04-30 22:01:22 +00001540static int blkfront_remove(struct xenbus_device *xbdev)
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001541{
Daniel Stoddenfa1bd352010-04-30 22:01:22 +00001542 struct blkfront_info *info = dev_get_drvdata(&xbdev->dev);
1543 struct block_device *bdev = NULL;
1544 struct gendisk *disk;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001545
Daniel Stoddenfa1bd352010-04-30 22:01:22 +00001546 dev_dbg(&xbdev->dev, "%s removed", xbdev->nodename);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001547
1548 blkif_free(info, 0);
1549
Daniel Stoddenfa1bd352010-04-30 22:01:22 +00001550 mutex_lock(&info->mutex);
1551
1552 disk = info->gd;
1553 if (disk)
1554 bdev = bdget_disk(disk, 0);
1555
1556 info->xbdev = NULL;
1557 mutex_unlock(&info->mutex);
1558
1559 if (!bdev) {
Jan Beulich0e345822010-08-07 18:28:55 +02001560 kfree(info);
Daniel Stoddenfa1bd352010-04-30 22:01:22 +00001561 return 0;
1562 }
1563
1564 /*
1565 * The xbdev was removed before we reached the Closed
1566 * state. See if it's safe to remove the disk. If the bdev
1567 * isn't closed yet, we let release take care of it.
1568 */
1569
1570 mutex_lock(&bdev->bd_mutex);
1571 info = disk->private_data;
1572
Daniel Stoddend54142c2010-08-07 18:51:21 +02001573 dev_warn(disk_to_dev(disk),
1574 "%s was hot-unplugged, %d stale handles\n",
1575 xbdev->nodename, bdev->bd_openers);
1576
Daniel Stodden7b32d102010-04-30 22:01:23 +00001577 if (info && !bdev->bd_openers) {
Daniel Stoddenfa1bd352010-04-30 22:01:22 +00001578 xlvbd_release_gendisk(info);
1579 disk->private_data = NULL;
1580 kfree(info);
1581 }
1582
1583 mutex_unlock(&bdev->bd_mutex);
1584 bdput(bdev);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001585
1586 return 0;
1587}
1588
Christian Limpach1d78d702008-04-02 10:54:04 -07001589static int blkfront_is_ready(struct xenbus_device *dev)
1590{
Greg Kroah-Hartmana1b4b122009-04-30 14:43:31 -07001591 struct blkfront_info *info = dev_get_drvdata(&dev->dev);
Christian Limpach1d78d702008-04-02 10:54:04 -07001592
Jan Beulich5d7ed202010-08-07 18:31:12 +02001593 return info->is_ready && info->xbdev;
Christian Limpach1d78d702008-04-02 10:54:04 -07001594}
1595
Al Viroa63c8482008-03-02 10:23:47 -05001596static int blkif_open(struct block_device *bdev, fmode_t mode)
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001597{
Daniel Stodden13961742010-08-07 18:36:53 +02001598 struct gendisk *disk = bdev->bd_disk;
1599 struct blkfront_info *info;
1600 int err = 0;
Arnd Bergmann6e9624b2010-08-07 18:25:34 +02001601
Arnd Bergmann2a48fc02010-06-02 14:28:52 +02001602 mutex_lock(&blkfront_mutex);
Arnd Bergmann6e9624b2010-08-07 18:25:34 +02001603
Daniel Stodden13961742010-08-07 18:36:53 +02001604 info = disk->private_data;
1605 if (!info) {
1606 /* xbdev gone */
1607 err = -ERESTARTSYS;
1608 goto out;
1609 }
1610
1611 mutex_lock(&info->mutex);
1612
1613 if (!info->gd)
1614 /* xbdev is closed */
1615 err = -ERESTARTSYS;
1616
1617 mutex_unlock(&info->mutex);
1618
Daniel Stodden13961742010-08-07 18:36:53 +02001619out:
Arnd Bergmann2a48fc02010-06-02 14:28:52 +02001620 mutex_unlock(&blkfront_mutex);
Daniel Stodden13961742010-08-07 18:36:53 +02001621 return err;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001622}
1623
Al Viroa63c8482008-03-02 10:23:47 -05001624static int blkif_release(struct gendisk *disk, fmode_t mode)
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001625{
Al Viroa63c8482008-03-02 10:23:47 -05001626 struct blkfront_info *info = disk->private_data;
Daniel Stodden7fd152f2010-08-07 18:45:12 +02001627 struct block_device *bdev;
1628 struct xenbus_device *xbdev;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001629
Arnd Bergmann2a48fc02010-06-02 14:28:52 +02001630 mutex_lock(&blkfront_mutex);
Daniel Stodden7fd152f2010-08-07 18:45:12 +02001631
1632 bdev = bdget_disk(disk, 0);
Daniel Stodden7fd152f2010-08-07 18:45:12 +02001633
Daniel Stoddenacfca3c2010-08-07 18:47:26 +02001634 if (bdev->bd_openers)
1635 goto out;
1636
Daniel Stodden7fd152f2010-08-07 18:45:12 +02001637 /*
1638 * Check if we have been instructed to close. We will have
1639 * deferred this request, because the bdev was still open.
1640 */
1641
1642 mutex_lock(&info->mutex);
1643 xbdev = info->xbdev;
1644
1645 if (xbdev && xbdev->state == XenbusStateClosing) {
1646 /* pending switch to state closed */
Daniel Stoddend54142c2010-08-07 18:51:21 +02001647 dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n");
Daniel Stodden7fd152f2010-08-07 18:45:12 +02001648 xlvbd_release_gendisk(info);
1649 xenbus_frontend_closed(info->xbdev);
1650 }
1651
1652 mutex_unlock(&info->mutex);
1653
1654 if (!xbdev) {
1655 /* sudden device removal */
Daniel Stoddend54142c2010-08-07 18:51:21 +02001656 dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n");
Daniel Stodden7fd152f2010-08-07 18:45:12 +02001657 xlvbd_release_gendisk(info);
1658 disk->private_data = NULL;
1659 kfree(info);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001660 }
Daniel Stodden7fd152f2010-08-07 18:45:12 +02001661
Jens Axboea4cc14e2010-08-08 21:50:05 -04001662out:
Andrew Jonesdad5cf62012-02-16 13:16:25 +01001663 bdput(bdev);
Arnd Bergmann2a48fc02010-06-02 14:28:52 +02001664 mutex_unlock(&blkfront_mutex);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001665 return 0;
1666}
1667
Alexey Dobriyan83d5cde2009-09-21 17:01:13 -07001668static const struct block_device_operations xlvbd_block_fops =
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001669{
1670 .owner = THIS_MODULE,
Al Viroa63c8482008-03-02 10:23:47 -05001671 .open = blkif_open,
1672 .release = blkif_release,
Ian Campbell597592d2008-02-21 13:03:45 -08001673 .getgeo = blkif_getgeo,
Arnd Bergmann8a6cfeb2010-07-08 10:18:46 +02001674 .ioctl = blkif_ioctl,
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001675};
1676
1677
Márton Némethec9c42e2010-01-10 13:39:52 +01001678static const struct xenbus_device_id blkfront_ids[] = {
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001679 { "vbd" },
1680 { "" }
1681};
1682
Jan Beulich73db1442011-12-22 09:08:13 +00001683static DEFINE_XENBUS_DRIVER(blkfront, ,
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001684 .probe = blkfront_probe,
1685 .remove = blkfront_remove,
1686 .resume = blkfront_resume,
Ian Campbell203fd612009-12-04 15:33:54 +00001687 .otherend_changed = blkback_changed,
Christian Limpach1d78d702008-04-02 10:54:04 -07001688 .is_ready = blkfront_is_ready,
Jan Beulich73db1442011-12-22 09:08:13 +00001689);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001690
1691static int __init xlblk_init(void)
1692{
Laszlo Ersek469738e2011-10-07 21:34:38 +02001693 int ret;
1694
Jeremy Fitzhardinge6e833582008-08-19 13:16:17 -07001695 if (!xen_domain())
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001696 return -ENODEV;
1697
Igor Mammedove95ae5a2012-03-27 19:31:08 +02001698 if (xen_hvm_domain() && !xen_platform_pci_unplug)
Igor Mammedovb9136d22012-03-21 15:08:38 +01001699 return -ENODEV;
1700
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001701 if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) {
1702 printk(KERN_WARNING "xen_blk: can't get major %d with name %s\n",
1703 XENVBD_MAJOR, DEV_NAME);
1704 return -ENODEV;
1705 }
1706
Jan Beulich73db1442011-12-22 09:08:13 +00001707 ret = xenbus_register_frontend(&blkfront_driver);
Laszlo Ersek469738e2011-10-07 21:34:38 +02001708 if (ret) {
1709 unregister_blkdev(XENVBD_MAJOR, DEV_NAME);
1710 return ret;
1711 }
1712
1713 return 0;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001714}
1715module_init(xlblk_init);
1716
1717
Jan Beulich5a60d0c2008-06-17 10:47:08 +02001718static void __exit xlblk_exit(void)
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001719{
Jan Beulich86050672012-04-05 16:04:52 +01001720 xenbus_unregister_driver(&blkfront_driver);
1721 unregister_blkdev(XENVBD_MAJOR, DEV_NAME);
1722 kfree(minors);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001723}
1724module_exit(xlblk_exit);
1725
1726MODULE_DESCRIPTION("Xen virtual block device frontend");
1727MODULE_LICENSE("GPL");
1728MODULE_ALIAS_BLOCKDEV_MAJOR(XENVBD_MAJOR);
Mark McLoughlind2f0c522008-04-02 10:54:05 -07001729MODULE_ALIAS("xen:vbd");
Mark McLoughlin4f93f09b2008-04-02 10:54:06 -07001730MODULE_ALIAS("xenblk");