blob: 621dee8b8cb29f40a2e8a53c9e608b4fbe09bea5 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * scsi_lib.c Copyright (C) 1999 Eric Youngdale
3 *
4 * SCSI queueing library.
5 * Initial versions: Eric Youngdale (eric@andante.org).
6 * Based upon conversations with large numbers
7 * of people at Linux Expo.
8 */
9
10#include <linux/bio.h>
11#include <linux/blkdev.h>
12#include <linux/completion.h>
13#include <linux/kernel.h>
14#include <linux/mempool.h>
15#include <linux/slab.h>
16#include <linux/init.h>
17#include <linux/pci.h>
18#include <linux/delay.h>
19
20#include <scsi/scsi.h>
21#include <scsi/scsi_dbg.h>
22#include <scsi/scsi_device.h>
23#include <scsi/scsi_driver.h>
24#include <scsi/scsi_eh.h>
25#include <scsi/scsi_host.h>
26#include <scsi/scsi_request.h>
27
28#include "scsi_priv.h"
29#include "scsi_logging.h"
30
31
32#define SG_MEMPOOL_NR (sizeof(scsi_sg_pools)/sizeof(struct scsi_host_sg_pool))
33#define SG_MEMPOOL_SIZE 32
34
35struct scsi_host_sg_pool {
36 size_t size;
37 char *name;
38 kmem_cache_t *slab;
39 mempool_t *pool;
40};
41
42#if (SCSI_MAX_PHYS_SEGMENTS < 32)
43#error SCSI_MAX_PHYS_SEGMENTS is too small
44#endif
45
46#define SP(x) { x, "sgpool-" #x }
Adrian Bunk52c1da32005-06-23 22:05:33 -070047static struct scsi_host_sg_pool scsi_sg_pools[] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -070048 SP(8),
49 SP(16),
50 SP(32),
51#if (SCSI_MAX_PHYS_SEGMENTS > 32)
52 SP(64),
53#if (SCSI_MAX_PHYS_SEGMENTS > 64)
54 SP(128),
55#if (SCSI_MAX_PHYS_SEGMENTS > 128)
56 SP(256),
57#if (SCSI_MAX_PHYS_SEGMENTS > 256)
58#error SCSI_MAX_PHYS_SEGMENTS is too large
59#endif
60#endif
61#endif
62#endif
63};
64#undef SP
65
66
67/*
68 * Function: scsi_insert_special_req()
69 *
70 * Purpose: Insert pre-formed request into request queue.
71 *
72 * Arguments: sreq - request that is ready to be queued.
73 * at_head - boolean. True if we should insert at head
74 * of queue, false if we should insert at tail.
75 *
76 * Lock status: Assumed that lock is not held upon entry.
77 *
78 * Returns: Nothing
79 *
80 * Notes: This function is called from character device and from
81 * ioctl types of functions where the caller knows exactly
82 * what SCSI command needs to be issued. The idea is that
83 * we merely inject the command into the queue (at the head
84 * for now), and then call the queue request function to actually
85 * process it.
86 */
87int scsi_insert_special_req(struct scsi_request *sreq, int at_head)
88{
89 /*
90 * Because users of this function are apt to reuse requests with no
91 * modification, we have to sanitise the request flags here
92 */
93 sreq->sr_request->flags &= ~REQ_DONTPREP;
94 blk_insert_request(sreq->sr_device->request_queue, sreq->sr_request,
Tejun Heo 867d1192005-04-24 02:06:05 -050095 at_head, sreq);
Linus Torvalds1da177e2005-04-16 15:20:36 -070096 return 0;
97}
98
Tejun Heo a1bf9d1d2005-04-24 02:08:52 -050099static void scsi_run_queue(struct request_queue *q);
100
Linus Torvalds1da177e2005-04-16 15:20:36 -0700101/*
102 * Function: scsi_queue_insert()
103 *
104 * Purpose: Insert a command in the midlevel queue.
105 *
106 * Arguments: cmd - command that we are adding to queue.
107 * reason - why we are inserting command to queue.
108 *
109 * Lock status: Assumed that lock is not held upon entry.
110 *
111 * Returns: Nothing.
112 *
113 * Notes: We do this for one of two cases. Either the host is busy
114 * and it cannot accept any more commands for the time being,
115 * or the device returned QUEUE_FULL and can accept no more
116 * commands.
117 * Notes: This could be called either from an interrupt context or a
118 * normal process context.
119 */
120int scsi_queue_insert(struct scsi_cmnd *cmd, int reason)
121{
122 struct Scsi_Host *host = cmd->device->host;
123 struct scsi_device *device = cmd->device;
Tejun Heo a1bf9d1d2005-04-24 02:08:52 -0500124 struct request_queue *q = device->request_queue;
125 unsigned long flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700126
127 SCSI_LOG_MLQUEUE(1,
128 printk("Inserting command %p into mlqueue\n", cmd));
129
130 /*
Tejun Heo d8c37e72005-05-14 00:46:08 +0900131 * Set the appropriate busy bit for the device/host.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132 *
133 * If the host/device isn't busy, assume that something actually
134 * completed, and that we should be able to queue a command now.
135 *
136 * Note that the prior mid-layer assumption that any host could
137 * always queue at least one command is now broken. The mid-layer
138 * will implement a user specifiable stall (see
139 * scsi_host.max_host_blocked and scsi_device.max_device_blocked)
140 * if a command is requeued with no other commands outstanding
141 * either for the device or for the host.
142 */
143 if (reason == SCSI_MLQUEUE_HOST_BUSY)
144 host->host_blocked = host->max_host_blocked;
145 else if (reason == SCSI_MLQUEUE_DEVICE_BUSY)
146 device->device_blocked = device->max_device_blocked;
147
148 /*
149 * Register the fact that we own the thing for now.
150 */
151 cmd->state = SCSI_STATE_MLQUEUE;
152 cmd->owner = SCSI_OWNER_MIDLEVEL;
153
154 /*
155 * Decrement the counters, since these commands are no longer
156 * active on the host/device.
157 */
158 scsi_device_unbusy(device);
159
160 /*
Tejun Heo a1bf9d1d2005-04-24 02:08:52 -0500161 * Requeue this command. It will go before all other commands
162 * that are already in the queue.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700163 *
164 * NOTE: there is magic here about the way the queue is plugged if
165 * we have no outstanding commands.
166 *
Tejun Heo a1bf9d1d2005-04-24 02:08:52 -0500167 * Although we *don't* plug the queue, we call the request
Linus Torvalds1da177e2005-04-16 15:20:36 -0700168 * function. The SCSI request function detects the blocked condition
169 * and plugs the queue appropriately.
Tejun Heo a1bf9d1d2005-04-24 02:08:52 -0500170 */
171 spin_lock_irqsave(q->queue_lock, flags);
172 blk_requeue_request(q, cmd->request);
173 spin_unlock_irqrestore(q->queue_lock, flags);
174
175 scsi_run_queue(q);
176
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177 return 0;
178}
179
180/*
181 * Function: scsi_do_req
182 *
183 * Purpose: Queue a SCSI request
184 *
185 * Arguments: sreq - command descriptor.
186 * cmnd - actual SCSI command to be performed.
187 * buffer - data buffer.
188 * bufflen - size of data buffer.
189 * done - completion function to be run.
190 * timeout - how long to let it run before timeout.
191 * retries - number of retries we allow.
192 *
193 * Lock status: No locks held upon entry.
194 *
195 * Returns: Nothing.
196 *
197 * Notes: This function is only used for queueing requests for things
198 * like ioctls and character device requests - this is because
199 * we essentially just inject a request into the queue for the
200 * device.
201 *
202 * In order to support the scsi_device_quiesce function, we
203 * now inject requests on the *head* of the device queue
204 * rather than the tail.
205 */
206void scsi_do_req(struct scsi_request *sreq, const void *cmnd,
207 void *buffer, unsigned bufflen,
208 void (*done)(struct scsi_cmnd *),
209 int timeout, int retries)
210{
211 /*
212 * If the upper level driver is reusing these things, then
213 * we should release the low-level block now. Another one will
214 * be allocated later when this request is getting queued.
215 */
216 __scsi_release_request(sreq);
217
218 /*
219 * Our own function scsi_done (which marks the host as not busy,
220 * disables the timeout counter, etc) will be called by us or by the
221 * scsi_hosts[host].queuecommand() function needs to also call
222 * the completion function for the high level driver.
223 */
224 memcpy(sreq->sr_cmnd, cmnd, sizeof(sreq->sr_cmnd));
225 sreq->sr_bufflen = bufflen;
226 sreq->sr_buffer = buffer;
227 sreq->sr_allowed = retries;
228 sreq->sr_done = done;
229 sreq->sr_timeout_per_command = timeout;
230
231 if (sreq->sr_cmd_len == 0)
232 sreq->sr_cmd_len = COMMAND_SIZE(sreq->sr_cmnd[0]);
233
234 /*
235 * head injection *required* here otherwise quiesce won't work
236 */
237 scsi_insert_special_req(sreq, 1);
238}
239EXPORT_SYMBOL(scsi_do_req);
240
241static void scsi_wait_done(struct scsi_cmnd *cmd)
242{
243 struct request *req = cmd->request;
244 struct request_queue *q = cmd->device->request_queue;
245 unsigned long flags;
246
247 req->rq_status = RQ_SCSI_DONE; /* Busy, but indicate request done */
248
249 spin_lock_irqsave(q->queue_lock, flags);
250 if (blk_rq_tagged(req))
251 blk_queue_end_tag(q, req);
252 spin_unlock_irqrestore(q->queue_lock, flags);
253
254 if (req->waiting)
255 complete(req->waiting);
256}
257
258/* This is the end routine we get to if a command was never attached
259 * to the request. Simply complete the request without changing
260 * rq_status; this will cause a DRIVER_ERROR. */
261static void scsi_wait_req_end_io(struct request *req)
262{
263 BUG_ON(!req->waiting);
264
265 complete(req->waiting);
266}
267
268void scsi_wait_req(struct scsi_request *sreq, const void *cmnd, void *buffer,
269 unsigned bufflen, int timeout, int retries)
270{
271 DECLARE_COMPLETION(wait);
272
273 sreq->sr_request->waiting = &wait;
274 sreq->sr_request->rq_status = RQ_SCSI_BUSY;
275 sreq->sr_request->end_io = scsi_wait_req_end_io;
276 scsi_do_req(sreq, cmnd, buffer, bufflen, scsi_wait_done,
277 timeout, retries);
278 wait_for_completion(&wait);
279 sreq->sr_request->waiting = NULL;
280 if (sreq->sr_request->rq_status != RQ_SCSI_DONE)
281 sreq->sr_result |= (DRIVER_ERROR << 24);
282
283 __scsi_release_request(sreq);
284}
285EXPORT_SYMBOL(scsi_wait_req);
286
287/*
288 * Function: scsi_init_cmd_errh()
289 *
290 * Purpose: Initialize cmd fields related to error handling.
291 *
292 * Arguments: cmd - command that is ready to be queued.
293 *
294 * Returns: Nothing
295 *
296 * Notes: This function has the job of initializing a number of
297 * fields related to error handling. Typically this will
298 * be called once for each command, as required.
299 */
300static int scsi_init_cmd_errh(struct scsi_cmnd *cmd)
301{
302 cmd->owner = SCSI_OWNER_MIDLEVEL;
303 cmd->serial_number = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700304 cmd->abort_reason = 0;
305
306 memset(cmd->sense_buffer, 0, sizeof cmd->sense_buffer);
307
308 if (cmd->cmd_len == 0)
309 cmd->cmd_len = COMMAND_SIZE(cmd->cmnd[0]);
310
311 /*
312 * We need saved copies of a number of fields - this is because
313 * error handling may need to overwrite these with different values
314 * to run different commands, and once error handling is complete,
315 * we will need to restore these values prior to running the actual
316 * command.
317 */
318 cmd->old_use_sg = cmd->use_sg;
319 cmd->old_cmd_len = cmd->cmd_len;
320 cmd->sc_old_data_direction = cmd->sc_data_direction;
321 cmd->old_underflow = cmd->underflow;
322 memcpy(cmd->data_cmnd, cmd->cmnd, sizeof(cmd->cmnd));
323 cmd->buffer = cmd->request_buffer;
324 cmd->bufflen = cmd->request_bufflen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700325 cmd->abort_reason = 0;
326
327 return 1;
328}
329
330/*
331 * Function: scsi_setup_cmd_retry()
332 *
333 * Purpose: Restore the command state for a retry
334 *
335 * Arguments: cmd - command to be restored
336 *
337 * Returns: Nothing
338 *
339 * Notes: Immediately prior to retrying a command, we need
340 * to restore certain fields that we saved above.
341 */
342void scsi_setup_cmd_retry(struct scsi_cmnd *cmd)
343{
344 memcpy(cmd->cmnd, cmd->data_cmnd, sizeof(cmd->data_cmnd));
345 cmd->request_buffer = cmd->buffer;
346 cmd->request_bufflen = cmd->bufflen;
347 cmd->use_sg = cmd->old_use_sg;
348 cmd->cmd_len = cmd->old_cmd_len;
349 cmd->sc_data_direction = cmd->sc_old_data_direction;
350 cmd->underflow = cmd->old_underflow;
351}
352
353void scsi_device_unbusy(struct scsi_device *sdev)
354{
355 struct Scsi_Host *shost = sdev->host;
356 unsigned long flags;
357
358 spin_lock_irqsave(shost->host_lock, flags);
359 shost->host_busy--;
360 if (unlikely(test_bit(SHOST_RECOVERY, &shost->shost_state) &&
361 shost->host_failed))
362 scsi_eh_wakeup(shost);
363 spin_unlock(shost->host_lock);
152587d2005-04-12 16:22:06 -0500364 spin_lock(sdev->request_queue->queue_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700365 sdev->device_busy--;
152587d2005-04-12 16:22:06 -0500366 spin_unlock_irqrestore(sdev->request_queue->queue_lock, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700367}
368
369/*
370 * Called for single_lun devices on IO completion. Clear starget_sdev_user,
371 * and call blk_run_queue for all the scsi_devices on the target -
372 * including current_sdev first.
373 *
374 * Called with *no* scsi locks held.
375 */
376static void scsi_single_lun_run(struct scsi_device *current_sdev)
377{
378 struct Scsi_Host *shost = current_sdev->host;
379 struct scsi_device *sdev, *tmp;
380 struct scsi_target *starget = scsi_target(current_sdev);
381 unsigned long flags;
382
383 spin_lock_irqsave(shost->host_lock, flags);
384 starget->starget_sdev_user = NULL;
385 spin_unlock_irqrestore(shost->host_lock, flags);
386
387 /*
388 * Call blk_run_queue for all LUNs on the target, starting with
389 * current_sdev. We race with others (to set starget_sdev_user),
390 * but in most cases, we will be first. Ideally, each LU on the
391 * target would get some limited time or requests on the target.
392 */
393 blk_run_queue(current_sdev->request_queue);
394
395 spin_lock_irqsave(shost->host_lock, flags);
396 if (starget->starget_sdev_user)
397 goto out;
398 list_for_each_entry_safe(sdev, tmp, &starget->devices,
399 same_target_siblings) {
400 if (sdev == current_sdev)
401 continue;
402 if (scsi_device_get(sdev))
403 continue;
404
405 spin_unlock_irqrestore(shost->host_lock, flags);
406 blk_run_queue(sdev->request_queue);
407 spin_lock_irqsave(shost->host_lock, flags);
408
409 scsi_device_put(sdev);
410 }
411 out:
412 spin_unlock_irqrestore(shost->host_lock, flags);
413}
414
415/*
416 * Function: scsi_run_queue()
417 *
418 * Purpose: Select a proper request queue to serve next
419 *
420 * Arguments: q - last request's queue
421 *
422 * Returns: Nothing
423 *
424 * Notes: The previous command was completely finished, start
425 * a new one if possible.
426 */
427static void scsi_run_queue(struct request_queue *q)
428{
429 struct scsi_device *sdev = q->queuedata;
430 struct Scsi_Host *shost = sdev->host;
431 unsigned long flags;
432
433 if (sdev->single_lun)
434 scsi_single_lun_run(sdev);
435
436 spin_lock_irqsave(shost->host_lock, flags);
437 while (!list_empty(&shost->starved_list) &&
438 !shost->host_blocked && !shost->host_self_blocked &&
439 !((shost->can_queue > 0) &&
440 (shost->host_busy >= shost->can_queue))) {
441 /*
442 * As long as shost is accepting commands and we have
443 * starved queues, call blk_run_queue. scsi_request_fn
444 * drops the queue_lock and can add us back to the
445 * starved_list.
446 *
447 * host_lock protects the starved_list and starved_entry.
448 * scsi_request_fn must get the host_lock before checking
449 * or modifying starved_list or starved_entry.
450 */
451 sdev = list_entry(shost->starved_list.next,
452 struct scsi_device, starved_entry);
453 list_del_init(&sdev->starved_entry);
454 spin_unlock_irqrestore(shost->host_lock, flags);
455
456 blk_run_queue(sdev->request_queue);
457
458 spin_lock_irqsave(shost->host_lock, flags);
459 if (unlikely(!list_empty(&sdev->starved_entry)))
460 /*
461 * sdev lost a race, and was put back on the
462 * starved list. This is unlikely but without this
463 * in theory we could loop forever.
464 */
465 break;
466 }
467 spin_unlock_irqrestore(shost->host_lock, flags);
468
469 blk_run_queue(q);
470}
471
472/*
473 * Function: scsi_requeue_command()
474 *
475 * Purpose: Handle post-processing of completed commands.
476 *
477 * Arguments: q - queue to operate on
478 * cmd - command that may need to be requeued.
479 *
480 * Returns: Nothing
481 *
482 * Notes: After command completion, there may be blocks left
483 * over which weren't finished by the previous command
484 * this can be for a number of reasons - the main one is
485 * I/O errors in the middle of the request, in which case
486 * we need to request the blocks that come after the bad
487 * sector.
488 */
489static void scsi_requeue_command(struct request_queue *q, struct scsi_cmnd *cmd)
490{
Tejun Heo 283369c2005-04-24 02:06:36 -0500491 unsigned long flags;
492
Linus Torvalds1da177e2005-04-16 15:20:36 -0700493 cmd->request->flags &= ~REQ_DONTPREP;
Tejun Heo 283369c2005-04-24 02:06:36 -0500494
495 spin_lock_irqsave(q->queue_lock, flags);
496 blk_requeue_request(q, cmd->request);
497 spin_unlock_irqrestore(q->queue_lock, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498
499 scsi_run_queue(q);
500}
501
502void scsi_next_command(struct scsi_cmnd *cmd)
503{
504 struct request_queue *q = cmd->device->request_queue;
505
506 scsi_put_command(cmd);
507 scsi_run_queue(q);
508}
509
510void scsi_run_host_queues(struct Scsi_Host *shost)
511{
512 struct scsi_device *sdev;
513
514 shost_for_each_device(sdev, shost)
515 scsi_run_queue(sdev->request_queue);
516}
517
518/*
519 * Function: scsi_end_request()
520 *
521 * Purpose: Post-processing of completed commands (usually invoked at end
522 * of upper level post-processing and scsi_io_completion).
523 *
524 * Arguments: cmd - command that is complete.
525 * uptodate - 1 if I/O indicates success, <= 0 for I/O error.
526 * bytes - number of bytes of completed I/O
527 * requeue - indicates whether we should requeue leftovers.
528 *
529 * Lock status: Assumed that lock is not held upon entry.
530 *
531 * Returns: cmd if requeue done or required, NULL otherwise
532 *
533 * Notes: This is called for block device requests in order to
534 * mark some number of sectors as complete.
535 *
536 * We are guaranteeing that the request queue will be goosed
537 * at some point during this call.
538 */
539static struct scsi_cmnd *scsi_end_request(struct scsi_cmnd *cmd, int uptodate,
540 int bytes, int requeue)
541{
542 request_queue_t *q = cmd->device->request_queue;
543 struct request *req = cmd->request;
544 unsigned long flags;
545
546 /*
547 * If there are blocks left over at the end, set up the command
548 * to queue the remainder of them.
549 */
550 if (end_that_request_chunk(req, uptodate, bytes)) {
551 int leftover = (req->hard_nr_sectors << 9);
552
553 if (blk_pc_request(req))
554 leftover = req->data_len;
555
556 /* kill remainder if no retrys */
557 if (!uptodate && blk_noretry_request(req))
558 end_that_request_chunk(req, 0, leftover);
559 else {
560 if (requeue)
561 /*
562 * Bleah. Leftovers again. Stick the
563 * leftovers in the front of the
564 * queue, and goose the queue again.
565 */
566 scsi_requeue_command(q, cmd);
567
568 return cmd;
569 }
570 }
571
572 add_disk_randomness(req->rq_disk);
573
574 spin_lock_irqsave(q->queue_lock, flags);
575 if (blk_rq_tagged(req))
576 blk_queue_end_tag(q, req);
577 end_that_request_last(req);
578 spin_unlock_irqrestore(q->queue_lock, flags);
579
580 /*
581 * This will goose the queue request function at the end, so we don't
582 * need to worry about launching another command.
583 */
584 scsi_next_command(cmd);
585 return NULL;
586}
587
588static struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, int gfp_mask)
589{
590 struct scsi_host_sg_pool *sgp;
591 struct scatterlist *sgl;
592
593 BUG_ON(!cmd->use_sg);
594
595 switch (cmd->use_sg) {
596 case 1 ... 8:
597 cmd->sglist_len = 0;
598 break;
599 case 9 ... 16:
600 cmd->sglist_len = 1;
601 break;
602 case 17 ... 32:
603 cmd->sglist_len = 2;
604 break;
605#if (SCSI_MAX_PHYS_SEGMENTS > 32)
606 case 33 ... 64:
607 cmd->sglist_len = 3;
608 break;
609#if (SCSI_MAX_PHYS_SEGMENTS > 64)
610 case 65 ... 128:
611 cmd->sglist_len = 4;
612 break;
613#if (SCSI_MAX_PHYS_SEGMENTS > 128)
614 case 129 ... 256:
615 cmd->sglist_len = 5;
616 break;
617#endif
618#endif
619#endif
620 default:
621 return NULL;
622 }
623
624 sgp = scsi_sg_pools + cmd->sglist_len;
625 sgl = mempool_alloc(sgp->pool, gfp_mask);
626 if (sgl)
627 memset(sgl, 0, sgp->size);
628 return sgl;
629}
630
631static void scsi_free_sgtable(struct scatterlist *sgl, int index)
632{
633 struct scsi_host_sg_pool *sgp;
634
635 BUG_ON(index > SG_MEMPOOL_NR);
636
637 sgp = scsi_sg_pools + index;
638 mempool_free(sgl, sgp->pool);
639}
640
641/*
642 * Function: scsi_release_buffers()
643 *
644 * Purpose: Completion processing for block device I/O requests.
645 *
646 * Arguments: cmd - command that we are bailing.
647 *
648 * Lock status: Assumed that no lock is held upon entry.
649 *
650 * Returns: Nothing
651 *
652 * Notes: In the event that an upper level driver rejects a
653 * command, we must release resources allocated during
654 * the __init_io() function. Primarily this would involve
655 * the scatter-gather table, and potentially any bounce
656 * buffers.
657 */
658static void scsi_release_buffers(struct scsi_cmnd *cmd)
659{
660 struct request *req = cmd->request;
661
662 /*
663 * Free up any indirection buffers we allocated for DMA purposes.
664 */
665 if (cmd->use_sg)
666 scsi_free_sgtable(cmd->request_buffer, cmd->sglist_len);
667 else if (cmd->request_buffer != req->buffer)
668 kfree(cmd->request_buffer);
669
670 /*
671 * Zero these out. They now point to freed memory, and it is
672 * dangerous to hang onto the pointers.
673 */
674 cmd->buffer = NULL;
675 cmd->bufflen = 0;
676 cmd->request_buffer = NULL;
677 cmd->request_bufflen = 0;
678}
679
680/*
681 * Function: scsi_io_completion()
682 *
683 * Purpose: Completion processing for block device I/O requests.
684 *
685 * Arguments: cmd - command that is finished.
686 *
687 * Lock status: Assumed that no lock is held upon entry.
688 *
689 * Returns: Nothing
690 *
691 * Notes: This function is matched in terms of capabilities to
692 * the function that created the scatter-gather list.
693 * In other words, if there are no bounce buffers
694 * (the normal case for most drivers), we don't need
695 * the logic to deal with cleaning up afterwards.
696 *
697 * We must do one of several things here:
698 *
699 * a) Call scsi_end_request. This will finish off the
700 * specified number of sectors. If we are done, the
701 * command block will be released, and the queue
702 * function will be goosed. If we are not done, then
703 * scsi_end_request will directly goose the queue.
704 *
705 * b) We can just use scsi_requeue_command() here. This would
706 * be used if we just wanted to retry, for example.
707 */
708void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes,
709 unsigned int block_bytes)
710{
711 int result = cmd->result;
712 int this_count = cmd->bufflen;
713 request_queue_t *q = cmd->device->request_queue;
714 struct request *req = cmd->request;
715 int clear_errors = 1;
716 struct scsi_sense_hdr sshdr;
717 int sense_valid = 0;
718 int sense_deferred = 0;
719
720 if (blk_complete_barrier_rq(q, req, good_bytes >> 9))
721 return;
722
723 /*
724 * Free up any indirection buffers we allocated for DMA purposes.
725 * For the case of a READ, we need to copy the data out of the
726 * bounce buffer and into the real buffer.
727 */
728 if (cmd->use_sg)
729 scsi_free_sgtable(cmd->buffer, cmd->sglist_len);
730 else if (cmd->buffer != req->buffer) {
731 if (rq_data_dir(req) == READ) {
732 unsigned long flags;
733 char *to = bio_kmap_irq(req->bio, &flags);
734 memcpy(to, cmd->buffer, cmd->bufflen);
735 bio_kunmap_irq(to, &flags);
736 }
737 kfree(cmd->buffer);
738 }
739
740 if (result) {
741 sense_valid = scsi_command_normalize_sense(cmd, &sshdr);
742 if (sense_valid)
743 sense_deferred = scsi_sense_is_deferred(&sshdr);
744 }
745 if (blk_pc_request(req)) { /* SG_IO ioctl from block level */
746 req->errors = result;
747 if (result) {
748 clear_errors = 0;
749 if (sense_valid && req->sense) {
750 /*
751 * SG_IO wants current and deferred errors
752 */
753 int len = 8 + cmd->sense_buffer[7];
754
755 if (len > SCSI_SENSE_BUFFERSIZE)
756 len = SCSI_SENSE_BUFFERSIZE;
757 memcpy(req->sense, cmd->sense_buffer, len);
758 req->sense_len = len;
759 }
760 } else
761 req->data_len = cmd->resid;
762 }
763
764 /*
765 * Zero these out. They now point to freed memory, and it is
766 * dangerous to hang onto the pointers.
767 */
768 cmd->buffer = NULL;
769 cmd->bufflen = 0;
770 cmd->request_buffer = NULL;
771 cmd->request_bufflen = 0;
772
773 /*
774 * Next deal with any sectors which we were able to correctly
775 * handle.
776 */
777 if (good_bytes >= 0) {
778 SCSI_LOG_HLCOMPLETE(1, printk("%ld sectors total, %d bytes done.\n",
779 req->nr_sectors, good_bytes));
780 SCSI_LOG_HLCOMPLETE(1, printk("use_sg is %d\n", cmd->use_sg));
781
782 if (clear_errors)
783 req->errors = 0;
784 /*
785 * If multiple sectors are requested in one buffer, then
786 * they will have been finished off by the first command.
787 * If not, then we have a multi-buffer command.
788 *
789 * If block_bytes != 0, it means we had a medium error
790 * of some sort, and that we want to mark some number of
791 * sectors as not uptodate. Thus we want to inhibit
792 * requeueing right here - we will requeue down below
793 * when we handle the bad sectors.
794 */
795 cmd = scsi_end_request(cmd, 1, good_bytes, result == 0);
796
797 /*
798 * If the command completed without error, then either finish off the
799 * rest of the command, or start a new one.
800 */
801 if (result == 0 || cmd == NULL ) {
802 return;
803 }
804 }
805 /*
806 * Now, if we were good little boys and girls, Santa left us a request
807 * sense buffer. We can extract information from this, so we
808 * can choose a block to remap, etc.
809 */
810 if (sense_valid && !sense_deferred) {
811 switch (sshdr.sense_key) {
812 case UNIT_ATTENTION:
813 if (cmd->device->removable) {
814 /* detected disc change. set a bit
815 * and quietly refuse further access.
816 */
817 cmd->device->changed = 1;
818 cmd = scsi_end_request(cmd, 0,
819 this_count, 1);
820 return;
821 } else {
822 /*
823 * Must have been a power glitch, or a
824 * bus reset. Could not have been a
825 * media change, so we just retry the
826 * request and see what happens.
827 */
828 scsi_requeue_command(q, cmd);
829 return;
830 }
831 break;
832 case ILLEGAL_REQUEST:
833 /*
834 * If we had an ILLEGAL REQUEST returned, then we may
835 * have performed an unsupported command. The only
836 * thing this should be would be a ten byte read where
837 * only a six byte read was supported. Also, on a
838 * system where READ CAPACITY failed, we may have read
839 * past the end of the disk.
840 */
841 if (cmd->device->use_10_for_rw &&
842 (cmd->cmnd[0] == READ_10 ||
843 cmd->cmnd[0] == WRITE_10)) {
844 cmd->device->use_10_for_rw = 0;
845 /*
846 * This will cause a retry with a 6-byte
847 * command.
848 */
849 scsi_requeue_command(q, cmd);
850 result = 0;
851 } else {
852 cmd = scsi_end_request(cmd, 0, this_count, 1);
853 return;
854 }
855 break;
856 case NOT_READY:
857 /*
858 * If the device is in the process of becoming ready,
859 * retry.
860 */
861 if (sshdr.asc == 0x04 && sshdr.ascq == 0x01) {
862 scsi_requeue_command(q, cmd);
863 return;
864 }
865 printk(KERN_INFO "Device %s not ready.\n",
866 req->rq_disk ? req->rq_disk->disk_name : "");
867 cmd = scsi_end_request(cmd, 0, this_count, 1);
868 return;
869 case VOLUME_OVERFLOW:
870 printk(KERN_INFO "Volume overflow <%d %d %d %d> CDB: ",
871 cmd->device->host->host_no,
872 (int)cmd->device->channel,
873 (int)cmd->device->id, (int)cmd->device->lun);
874 __scsi_print_command(cmd->data_cmnd);
875 scsi_print_sense("", cmd);
876 cmd = scsi_end_request(cmd, 0, block_bytes, 1);
877 return;
878 default:
879 break;
880 }
881 } /* driver byte != 0 */
882 if (host_byte(result) == DID_RESET) {
883 /*
884 * Third party bus reset or reset for error
885 * recovery reasons. Just retry the request
886 * and see what happens.
887 */
888 scsi_requeue_command(q, cmd);
889 return;
890 }
891 if (result) {
892 printk(KERN_INFO "SCSI error : <%d %d %d %d> return code "
893 "= 0x%x\n", cmd->device->host->host_no,
894 cmd->device->channel,
895 cmd->device->id,
896 cmd->device->lun, result);
897
898 if (driver_byte(result) & DRIVER_SENSE)
899 scsi_print_sense("", cmd);
900 /*
901 * Mark a single buffer as not uptodate. Queue the remainder.
902 * We sometimes get this cruft in the event that a medium error
903 * isn't properly reported.
904 */
905 block_bytes = req->hard_cur_sectors << 9;
906 if (!block_bytes)
907 block_bytes = req->data_len;
908 cmd = scsi_end_request(cmd, 0, block_bytes, 1);
909 }
910}
911EXPORT_SYMBOL(scsi_io_completion);
912
913/*
914 * Function: scsi_init_io()
915 *
916 * Purpose: SCSI I/O initialize function.
917 *
918 * Arguments: cmd - Command descriptor we wish to initialize
919 *
920 * Returns: 0 on success
921 * BLKPREP_DEFER if the failure is retryable
922 * BLKPREP_KILL if the failure is fatal
923 */
924static int scsi_init_io(struct scsi_cmnd *cmd)
925{
926 struct request *req = cmd->request;
927 struct scatterlist *sgpnt;
928 int count;
929
930 /*
931 * if this is a rq->data based REQ_BLOCK_PC, setup for a non-sg xfer
932 */
933 if ((req->flags & REQ_BLOCK_PC) && !req->bio) {
934 cmd->request_bufflen = req->data_len;
935 cmd->request_buffer = req->data;
936 req->buffer = req->data;
937 cmd->use_sg = 0;
938 return 0;
939 }
940
941 /*
942 * we used to not use scatter-gather for single segment request,
943 * but now we do (it makes highmem I/O easier to support without
944 * kmapping pages)
945 */
946 cmd->use_sg = req->nr_phys_segments;
947
948 /*
949 * if sg table allocation fails, requeue request later.
950 */
951 sgpnt = scsi_alloc_sgtable(cmd, GFP_ATOMIC);
Tejun Heo beb66172005-04-24 02:04:53 -0500952 if (unlikely(!sgpnt))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700953 return BLKPREP_DEFER;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700954
955 cmd->request_buffer = (char *) sgpnt;
956 cmd->request_bufflen = req->nr_sectors << 9;
957 if (blk_pc_request(req))
958 cmd->request_bufflen = req->data_len;
959 req->buffer = NULL;
960
961 /*
962 * Next, walk the list, and fill in the addresses and sizes of
963 * each segment.
964 */
965 count = blk_rq_map_sg(req->q, req, cmd->request_buffer);
966
967 /*
968 * mapped well, send it off
969 */
970 if (likely(count <= cmd->use_sg)) {
971 cmd->use_sg = count;
972 return 0;
973 }
974
975 printk(KERN_ERR "Incorrect number of segments after building list\n");
976 printk(KERN_ERR "counted %d, received %d\n", count, cmd->use_sg);
977 printk(KERN_ERR "req nr_sec %lu, cur_nr_sec %u\n", req->nr_sectors,
978 req->current_nr_sectors);
979
980 /* release the command and kill it */
981 scsi_release_buffers(cmd);
982 scsi_put_command(cmd);
983 return BLKPREP_KILL;
984}
985
986static int scsi_prepare_flush_fn(request_queue_t *q, struct request *rq)
987{
988 struct scsi_device *sdev = q->queuedata;
989 struct scsi_driver *drv;
990
991 if (sdev->sdev_state == SDEV_RUNNING) {
992 drv = *(struct scsi_driver **) rq->rq_disk->private_data;
993
994 if (drv->prepare_flush)
995 return drv->prepare_flush(q, rq);
996 }
997
998 return 0;
999}
1000
1001static void scsi_end_flush_fn(request_queue_t *q, struct request *rq)
1002{
1003 struct scsi_device *sdev = q->queuedata;
1004 struct request *flush_rq = rq->end_io_data;
1005 struct scsi_driver *drv;
1006
1007 if (flush_rq->errors) {
1008 printk("scsi: barrier error, disabling flush support\n");
1009 blk_queue_ordered(q, QUEUE_ORDERED_NONE);
1010 }
1011
1012 if (sdev->sdev_state == SDEV_RUNNING) {
1013 drv = *(struct scsi_driver **) rq->rq_disk->private_data;
1014 drv->end_flush(q, rq);
1015 }
1016}
1017
1018static int scsi_issue_flush_fn(request_queue_t *q, struct gendisk *disk,
1019 sector_t *error_sector)
1020{
1021 struct scsi_device *sdev = q->queuedata;
1022 struct scsi_driver *drv;
1023
1024 if (sdev->sdev_state != SDEV_RUNNING)
1025 return -ENXIO;
1026
1027 drv = *(struct scsi_driver **) disk->private_data;
1028 if (drv->issue_flush)
1029 return drv->issue_flush(&sdev->sdev_gendev, error_sector);
1030
1031 return -EOPNOTSUPP;
1032}
1033
1034static int scsi_prep_fn(struct request_queue *q, struct request *req)
1035{
1036 struct scsi_device *sdev = q->queuedata;
1037 struct scsi_cmnd *cmd;
1038 int specials_only = 0;
1039
1040 /*
1041 * Just check to see if the device is online. If it isn't, we
1042 * refuse to process any commands. The device must be brought
1043 * online before trying any recovery commands
1044 */
1045 if (unlikely(!scsi_device_online(sdev))) {
1046 printk(KERN_ERR "scsi%d (%d:%d): rejecting I/O to offline device\n",
1047 sdev->host->host_no, sdev->id, sdev->lun);
1048 return BLKPREP_KILL;
1049 }
1050 if (unlikely(sdev->sdev_state != SDEV_RUNNING)) {
1051 /* OK, we're not in a running state don't prep
1052 * user commands */
1053 if (sdev->sdev_state == SDEV_DEL) {
1054 /* Device is fully deleted, no commands
1055 * at all allowed down */
1056 printk(KERN_ERR "scsi%d (%d:%d): rejecting I/O to dead device\n",
1057 sdev->host->host_no, sdev->id, sdev->lun);
1058 return BLKPREP_KILL;
1059 }
1060 /* OK, we only allow special commands (i.e. not
1061 * user initiated ones */
1062 specials_only = sdev->sdev_state;
1063 }
1064
1065 /*
1066 * Find the actual device driver associated with this command.
1067 * The SPECIAL requests are things like character device or
1068 * ioctls, which did not originate from ll_rw_blk. Note that
1069 * the special field is also used to indicate the cmd for
1070 * the remainder of a partially fulfilled request that can
1071 * come up when there is a medium error. We have to treat
1072 * these two cases differently. We differentiate by looking
1073 * at request->cmd, as this tells us the real story.
1074 */
1075 if (req->flags & REQ_SPECIAL) {
1076 struct scsi_request *sreq = req->special;
1077
1078 if (sreq->sr_magic == SCSI_REQ_MAGIC) {
1079 cmd = scsi_get_command(sreq->sr_device, GFP_ATOMIC);
1080 if (unlikely(!cmd))
1081 goto defer;
1082 scsi_init_cmd_from_req(cmd, sreq);
1083 } else
1084 cmd = req->special;
1085 } else if (req->flags & (REQ_CMD | REQ_BLOCK_PC)) {
1086
1087 if(unlikely(specials_only)) {
1088 if(specials_only == SDEV_QUIESCE ||
1089 specials_only == SDEV_BLOCK)
1090 return BLKPREP_DEFER;
1091
1092 printk(KERN_ERR "scsi%d (%d:%d): rejecting I/O to device being removed\n",
1093 sdev->host->host_no, sdev->id, sdev->lun);
1094 return BLKPREP_KILL;
1095 }
1096
1097
1098 /*
1099 * Now try and find a command block that we can use.
1100 */
1101 if (!req->special) {
1102 cmd = scsi_get_command(sdev, GFP_ATOMIC);
1103 if (unlikely(!cmd))
1104 goto defer;
1105 } else
1106 cmd = req->special;
1107
1108 /* pull a tag out of the request if we have one */
1109 cmd->tag = req->tag;
1110 } else {
1111 blk_dump_rq_flags(req, "SCSI bad req");
1112 return BLKPREP_KILL;
1113 }
1114
1115 /* note the overloading of req->special. When the tag
1116 * is active it always means cmd. If the tag goes
1117 * back for re-queueing, it may be reset */
1118 req->special = cmd;
1119 cmd->request = req;
1120
1121 /*
1122 * FIXME: drop the lock here because the functions below
1123 * expect to be called without the queue lock held. Also,
1124 * previously, we dequeued the request before dropping the
1125 * lock. We hope REQ_STARTED prevents anything untoward from
1126 * happening now.
1127 */
1128 if (req->flags & (REQ_CMD | REQ_BLOCK_PC)) {
1129 struct scsi_driver *drv;
1130 int ret;
1131
1132 /*
1133 * This will do a couple of things:
1134 * 1) Fill in the actual SCSI command.
1135 * 2) Fill in any other upper-level specific fields
1136 * (timeout).
1137 *
1138 * If this returns 0, it means that the request failed
1139 * (reading past end of disk, reading offline device,
1140 * etc). This won't actually talk to the device, but
1141 * some kinds of consistency checking may cause the
1142 * request to be rejected immediately.
1143 */
1144
1145 /*
1146 * This sets up the scatter-gather table (allocating if
1147 * required).
1148 */
1149 ret = scsi_init_io(cmd);
1150 if (ret) /* BLKPREP_KILL return also releases the command */
1151 return ret;
1152
1153 /*
1154 * Initialize the actual SCSI command for this request.
1155 */
1156 drv = *(struct scsi_driver **)req->rq_disk->private_data;
1157 if (unlikely(!drv->init_command(cmd))) {
1158 scsi_release_buffers(cmd);
1159 scsi_put_command(cmd);
1160 return BLKPREP_KILL;
1161 }
1162 }
1163
1164 /*
1165 * The request is now prepped, no need to come back here
1166 */
1167 req->flags |= REQ_DONTPREP;
1168 return BLKPREP_OK;
1169
1170 defer:
1171 /* If we defer, the elv_next_request() returns NULL, but the
1172 * queue must be restarted, so we plug here if no returning
1173 * command will automatically do that. */
1174 if (sdev->device_busy == 0)
1175 blk_plug_device(q);
1176 return BLKPREP_DEFER;
1177}
1178
1179/*
1180 * scsi_dev_queue_ready: if we can send requests to sdev, return 1 else
1181 * return 0.
1182 *
1183 * Called with the queue_lock held.
1184 */
1185static inline int scsi_dev_queue_ready(struct request_queue *q,
1186 struct scsi_device *sdev)
1187{
1188 if (sdev->device_busy >= sdev->queue_depth)
1189 return 0;
1190 if (sdev->device_busy == 0 && sdev->device_blocked) {
1191 /*
1192 * unblock after device_blocked iterates to zero
1193 */
1194 if (--sdev->device_blocked == 0) {
1195 SCSI_LOG_MLQUEUE(3,
1196 printk("scsi%d (%d:%d) unblocking device at"
1197 " zero depth\n", sdev->host->host_no,
1198 sdev->id, sdev->lun));
1199 } else {
1200 blk_plug_device(q);
1201 return 0;
1202 }
1203 }
1204 if (sdev->device_blocked)
1205 return 0;
1206
1207 return 1;
1208}
1209
1210/*
1211 * scsi_host_queue_ready: if we can send requests to shost, return 1 else
1212 * return 0. We must end up running the queue again whenever 0 is
1213 * returned, else IO can hang.
1214 *
1215 * Called with host_lock held.
1216 */
1217static inline int scsi_host_queue_ready(struct request_queue *q,
1218 struct Scsi_Host *shost,
1219 struct scsi_device *sdev)
1220{
1221 if (test_bit(SHOST_RECOVERY, &shost->shost_state))
1222 return 0;
1223 if (shost->host_busy == 0 && shost->host_blocked) {
1224 /*
1225 * unblock after host_blocked iterates to zero
1226 */
1227 if (--shost->host_blocked == 0) {
1228 SCSI_LOG_MLQUEUE(3,
1229 printk("scsi%d unblocking host at zero depth\n",
1230 shost->host_no));
1231 } else {
1232 blk_plug_device(q);
1233 return 0;
1234 }
1235 }
1236 if ((shost->can_queue > 0 && shost->host_busy >= shost->can_queue) ||
1237 shost->host_blocked || shost->host_self_blocked) {
1238 if (list_empty(&sdev->starved_entry))
1239 list_add_tail(&sdev->starved_entry, &shost->starved_list);
1240 return 0;
1241 }
1242
1243 /* We're OK to process the command, so we can't be starved */
1244 if (!list_empty(&sdev->starved_entry))
1245 list_del_init(&sdev->starved_entry);
1246
1247 return 1;
1248}
1249
1250/*
1251 * Kill requests for a dead device
1252 */
1253static void scsi_kill_requests(request_queue_t *q)
1254{
1255 struct request *req;
1256
1257 while ((req = elv_next_request(q)) != NULL) {
1258 blkdev_dequeue_request(req);
1259 req->flags |= REQ_QUIET;
1260 while (end_that_request_first(req, 0, req->nr_sectors))
1261 ;
1262 end_that_request_last(req);
1263 }
1264}
1265
1266/*
1267 * Function: scsi_request_fn()
1268 *
1269 * Purpose: Main strategy routine for SCSI.
1270 *
1271 * Arguments: q - Pointer to actual queue.
1272 *
1273 * Returns: Nothing
1274 *
1275 * Lock status: IO request lock assumed to be held when called.
1276 */
1277static void scsi_request_fn(struct request_queue *q)
1278{
1279 struct scsi_device *sdev = q->queuedata;
1280 struct Scsi_Host *shost;
1281 struct scsi_cmnd *cmd;
1282 struct request *req;
1283
1284 if (!sdev) {
1285 printk("scsi: killing requests for dead queue\n");
1286 scsi_kill_requests(q);
1287 return;
1288 }
1289
1290 if(!get_device(&sdev->sdev_gendev))
1291 /* We must be tearing the block queue down already */
1292 return;
1293
1294 /*
1295 * To start with, we keep looping until the queue is empty, or until
1296 * the host is no longer able to accept any more requests.
1297 */
1298 shost = sdev->host;
1299 while (!blk_queue_plugged(q)) {
1300 int rtn;
1301 /*
1302 * get next queueable request. We do this early to make sure
1303 * that the request is fully prepared even if we cannot
1304 * accept it.
1305 */
1306 req = elv_next_request(q);
1307 if (!req || !scsi_dev_queue_ready(q, sdev))
1308 break;
1309
1310 if (unlikely(!scsi_device_online(sdev))) {
1311 printk(KERN_ERR "scsi%d (%d:%d): rejecting I/O to offline device\n",
1312 sdev->host->host_no, sdev->id, sdev->lun);
1313 blkdev_dequeue_request(req);
1314 req->flags |= REQ_QUIET;
1315 while (end_that_request_first(req, 0, req->nr_sectors))
1316 ;
1317 end_that_request_last(req);
1318 continue;
1319 }
1320
1321
1322 /*
1323 * Remove the request from the request list.
1324 */
1325 if (!(blk_queue_tagged(q) && !blk_queue_start_tag(q, req)))
1326 blkdev_dequeue_request(req);
1327 sdev->device_busy++;
1328
1329 spin_unlock(q->queue_lock);
1330 spin_lock(shost->host_lock);
1331
1332 if (!scsi_host_queue_ready(q, shost, sdev))
1333 goto not_ready;
1334 if (sdev->single_lun) {
1335 if (scsi_target(sdev)->starget_sdev_user &&
1336 scsi_target(sdev)->starget_sdev_user != sdev)
1337 goto not_ready;
1338 scsi_target(sdev)->starget_sdev_user = sdev;
1339 }
1340 shost->host_busy++;
1341
1342 /*
1343 * XXX(hch): This is rather suboptimal, scsi_dispatch_cmd will
1344 * take the lock again.
1345 */
1346 spin_unlock_irq(shost->host_lock);
1347
1348 cmd = req->special;
1349 if (unlikely(cmd == NULL)) {
1350 printk(KERN_CRIT "impossible request in %s.\n"
1351 "please mail a stack trace to "
1352 "linux-scsi@vger.kernel.org",
1353 __FUNCTION__);
1354 BUG();
1355 }
1356
1357 /*
1358 * Finally, initialize any error handling parameters, and set up
1359 * the timers for timeouts.
1360 */
1361 scsi_init_cmd_errh(cmd);
1362
1363 /*
1364 * Dispatch the command to the low-level driver.
1365 */
1366 rtn = scsi_dispatch_cmd(cmd);
1367 spin_lock_irq(q->queue_lock);
1368 if(rtn) {
1369 /* we're refusing the command; because of
1370 * the way locks get dropped, we need to
1371 * check here if plugging is required */
1372 if(sdev->device_busy == 0)
1373 blk_plug_device(q);
1374
1375 break;
1376 }
1377 }
1378
1379 goto out;
1380
1381 not_ready:
1382 spin_unlock_irq(shost->host_lock);
1383
1384 /*
1385 * lock q, handle tag, requeue req, and decrement device_busy. We
1386 * must return with queue_lock held.
1387 *
1388 * Decrementing device_busy without checking it is OK, as all such
1389 * cases (host limits or settings) should run the queue at some
1390 * later time.
1391 */
1392 spin_lock_irq(q->queue_lock);
1393 blk_requeue_request(q, req);
1394 sdev->device_busy--;
1395 if(sdev->device_busy == 0)
1396 blk_plug_device(q);
1397 out:
1398 /* must be careful here...if we trigger the ->remove() function
1399 * we cannot be holding the q lock */
1400 spin_unlock_irq(q->queue_lock);
1401 put_device(&sdev->sdev_gendev);
1402 spin_lock_irq(q->queue_lock);
1403}
1404
1405u64 scsi_calculate_bounce_limit(struct Scsi_Host *shost)
1406{
1407 struct device *host_dev;
1408 u64 bounce_limit = 0xffffffff;
1409
1410 if (shost->unchecked_isa_dma)
1411 return BLK_BOUNCE_ISA;
1412 /*
1413 * Platforms with virtual-DMA translation
1414 * hardware have no practical limit.
1415 */
1416 if (!PCI_DMA_BUS_IS_PHYS)
1417 return BLK_BOUNCE_ANY;
1418
1419 host_dev = scsi_get_device(shost);
1420 if (host_dev && host_dev->dma_mask)
1421 bounce_limit = *host_dev->dma_mask;
1422
1423 return bounce_limit;
1424}
1425EXPORT_SYMBOL(scsi_calculate_bounce_limit);
1426
1427struct request_queue *scsi_alloc_queue(struct scsi_device *sdev)
1428{
1429 struct Scsi_Host *shost = sdev->host;
1430 struct request_queue *q;
1431
152587d2005-04-12 16:22:06 -05001432 q = blk_init_queue(scsi_request_fn, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001433 if (!q)
1434 return NULL;
1435
1436 blk_queue_prep_rq(q, scsi_prep_fn);
1437
1438 blk_queue_max_hw_segments(q, shost->sg_tablesize);
1439 blk_queue_max_phys_segments(q, SCSI_MAX_PHYS_SEGMENTS);
1440 blk_queue_max_sectors(q, shost->max_sectors);
1441 blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost));
1442 blk_queue_segment_boundary(q, shost->dma_boundary);
1443 blk_queue_issue_flush_fn(q, scsi_issue_flush_fn);
1444
1445 /*
1446 * ordered tags are superior to flush ordering
1447 */
1448 if (shost->ordered_tag)
1449 blk_queue_ordered(q, QUEUE_ORDERED_TAG);
1450 else if (shost->ordered_flush) {
1451 blk_queue_ordered(q, QUEUE_ORDERED_FLUSH);
1452 q->prepare_flush_fn = scsi_prepare_flush_fn;
1453 q->end_flush_fn = scsi_end_flush_fn;
1454 }
1455
1456 if (!shost->use_clustering)
1457 clear_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
1458 return q;
1459}
1460
1461void scsi_free_queue(struct request_queue *q)
1462{
1463 blk_cleanup_queue(q);
1464}
1465
1466/*
1467 * Function: scsi_block_requests()
1468 *
1469 * Purpose: Utility function used by low-level drivers to prevent further
1470 * commands from being queued to the device.
1471 *
1472 * Arguments: shost - Host in question
1473 *
1474 * Returns: Nothing
1475 *
1476 * Lock status: No locks are assumed held.
1477 *
1478 * Notes: There is no timer nor any other means by which the requests
1479 * get unblocked other than the low-level driver calling
1480 * scsi_unblock_requests().
1481 */
1482void scsi_block_requests(struct Scsi_Host *shost)
1483{
1484 shost->host_self_blocked = 1;
1485}
1486EXPORT_SYMBOL(scsi_block_requests);
1487
1488/*
1489 * Function: scsi_unblock_requests()
1490 *
1491 * Purpose: Utility function used by low-level drivers to allow further
1492 * commands from being queued to the device.
1493 *
1494 * Arguments: shost - Host in question
1495 *
1496 * Returns: Nothing
1497 *
1498 * Lock status: No locks are assumed held.
1499 *
1500 * Notes: There is no timer nor any other means by which the requests
1501 * get unblocked other than the low-level driver calling
1502 * scsi_unblock_requests().
1503 *
1504 * This is done as an API function so that changes to the
1505 * internals of the scsi mid-layer won't require wholesale
1506 * changes to drivers that use this feature.
1507 */
1508void scsi_unblock_requests(struct Scsi_Host *shost)
1509{
1510 shost->host_self_blocked = 0;
1511 scsi_run_host_queues(shost);
1512}
1513EXPORT_SYMBOL(scsi_unblock_requests);
1514
1515int __init scsi_init_queue(void)
1516{
1517 int i;
1518
1519 for (i = 0; i < SG_MEMPOOL_NR; i++) {
1520 struct scsi_host_sg_pool *sgp = scsi_sg_pools + i;
1521 int size = sgp->size * sizeof(struct scatterlist);
1522
1523 sgp->slab = kmem_cache_create(sgp->name, size, 0,
1524 SLAB_HWCACHE_ALIGN, NULL, NULL);
1525 if (!sgp->slab) {
1526 printk(KERN_ERR "SCSI: can't init sg slab %s\n",
1527 sgp->name);
1528 }
1529
1530 sgp->pool = mempool_create(SG_MEMPOOL_SIZE,
1531 mempool_alloc_slab, mempool_free_slab,
1532 sgp->slab);
1533 if (!sgp->pool) {
1534 printk(KERN_ERR "SCSI: can't init sg mempool %s\n",
1535 sgp->name);
1536 }
1537 }
1538
1539 return 0;
1540}
1541
1542void scsi_exit_queue(void)
1543{
1544 int i;
1545
1546 for (i = 0; i < SG_MEMPOOL_NR; i++) {
1547 struct scsi_host_sg_pool *sgp = scsi_sg_pools + i;
1548 mempool_destroy(sgp->pool);
1549 kmem_cache_destroy(sgp->slab);
1550 }
1551}
1552/**
1553 * __scsi_mode_sense - issue a mode sense, falling back from 10 to
1554 * six bytes if necessary.
1555 * @sreq: SCSI request to fill in with the MODE_SENSE
1556 * @dbd: set if mode sense will allow block descriptors to be returned
1557 * @modepage: mode page being requested
1558 * @buffer: request buffer (may not be smaller than eight bytes)
1559 * @len: length of request buffer.
1560 * @timeout: command timeout
1561 * @retries: number of retries before failing
1562 * @data: returns a structure abstracting the mode header data
1563 *
1564 * Returns zero if unsuccessful, or the header offset (either 4
1565 * or 8 depending on whether a six or ten byte command was
1566 * issued) if successful.
1567 **/
1568int
1569__scsi_mode_sense(struct scsi_request *sreq, int dbd, int modepage,
1570 unsigned char *buffer, int len, int timeout, int retries,
1571 struct scsi_mode_data *data) {
1572 unsigned char cmd[12];
1573 int use_10_for_ms;
1574 int header_length;
1575
1576 memset(data, 0, sizeof(*data));
1577 memset(&cmd[0], 0, 12);
1578 cmd[1] = dbd & 0x18; /* allows DBD and LLBA bits */
1579 cmd[2] = modepage;
1580
1581 retry:
1582 use_10_for_ms = sreq->sr_device->use_10_for_ms;
1583
1584 if (use_10_for_ms) {
1585 if (len < 8)
1586 len = 8;
1587
1588 cmd[0] = MODE_SENSE_10;
1589 cmd[8] = len;
1590 header_length = 8;
1591 } else {
1592 if (len < 4)
1593 len = 4;
1594
1595 cmd[0] = MODE_SENSE;
1596 cmd[4] = len;
1597 header_length = 4;
1598 }
1599
1600 sreq->sr_cmd_len = 0;
1601 memset(sreq->sr_sense_buffer, 0, sizeof(sreq->sr_sense_buffer));
1602 sreq->sr_data_direction = DMA_FROM_DEVICE;
1603
1604 memset(buffer, 0, len);
1605
1606 scsi_wait_req(sreq, cmd, buffer, len, timeout, retries);
1607
1608 /* This code looks awful: what it's doing is making sure an
1609 * ILLEGAL REQUEST sense return identifies the actual command
1610 * byte as the problem. MODE_SENSE commands can return
1611 * ILLEGAL REQUEST if the code page isn't supported */
1612
1613 if (use_10_for_ms && !scsi_status_is_good(sreq->sr_result) &&
1614 (driver_byte(sreq->sr_result) & DRIVER_SENSE)) {
1615 struct scsi_sense_hdr sshdr;
1616
1617 if (scsi_request_normalize_sense(sreq, &sshdr)) {
1618 if ((sshdr.sense_key == ILLEGAL_REQUEST) &&
1619 (sshdr.asc == 0x20) && (sshdr.ascq == 0)) {
1620 /*
1621 * Invalid command operation code
1622 */
1623 sreq->sr_device->use_10_for_ms = 0;
1624 goto retry;
1625 }
1626 }
1627 }
1628
1629 if(scsi_status_is_good(sreq->sr_result)) {
1630 data->header_length = header_length;
1631 if(use_10_for_ms) {
1632 data->length = buffer[0]*256 + buffer[1] + 2;
1633 data->medium_type = buffer[2];
1634 data->device_specific = buffer[3];
1635 data->longlba = buffer[4] & 0x01;
1636 data->block_descriptor_length = buffer[6]*256
1637 + buffer[7];
1638 } else {
1639 data->length = buffer[0] + 1;
1640 data->medium_type = buffer[1];
1641 data->device_specific = buffer[2];
1642 data->block_descriptor_length = buffer[3];
1643 }
1644 }
1645
1646 return sreq->sr_result;
1647}
1648EXPORT_SYMBOL(__scsi_mode_sense);
1649
1650/**
1651 * scsi_mode_sense - issue a mode sense, falling back from 10 to
1652 * six bytes if necessary.
1653 * @sdev: scsi device to send command to.
1654 * @dbd: set if mode sense will disable block descriptors in the return
1655 * @modepage: mode page being requested
1656 * @buffer: request buffer (may not be smaller than eight bytes)
1657 * @len: length of request buffer.
1658 * @timeout: command timeout
1659 * @retries: number of retries before failing
1660 *
1661 * Returns zero if unsuccessful, or the header offset (either 4
1662 * or 8 depending on whether a six or ten byte command was
1663 * issued) if successful.
1664 **/
1665int
1666scsi_mode_sense(struct scsi_device *sdev, int dbd, int modepage,
1667 unsigned char *buffer, int len, int timeout, int retries,
1668 struct scsi_mode_data *data)
1669{
1670 struct scsi_request *sreq = scsi_allocate_request(sdev, GFP_KERNEL);
1671 int ret;
1672
1673 if (!sreq)
1674 return -1;
1675
1676 ret = __scsi_mode_sense(sreq, dbd, modepage, buffer, len,
1677 timeout, retries, data);
1678
1679 scsi_release_request(sreq);
1680
1681 return ret;
1682}
1683EXPORT_SYMBOL(scsi_mode_sense);
1684
1685int
1686scsi_test_unit_ready(struct scsi_device *sdev, int timeout, int retries)
1687{
1688 struct scsi_request *sreq;
1689 char cmd[] = {
1690 TEST_UNIT_READY, 0, 0, 0, 0, 0,
1691 };
1692 int result;
1693
1694 sreq = scsi_allocate_request(sdev, GFP_KERNEL);
1695 if (!sreq)
1696 return -ENOMEM;
1697
1698 sreq->sr_data_direction = DMA_NONE;
1699 scsi_wait_req(sreq, cmd, NULL, 0, timeout, retries);
1700
1701 if ((driver_byte(sreq->sr_result) & DRIVER_SENSE) && sdev->removable) {
1702 struct scsi_sense_hdr sshdr;
1703
1704 if ((scsi_request_normalize_sense(sreq, &sshdr)) &&
1705 ((sshdr.sense_key == UNIT_ATTENTION) ||
1706 (sshdr.sense_key == NOT_READY))) {
1707 sdev->changed = 1;
1708 sreq->sr_result = 0;
1709 }
1710 }
1711 result = sreq->sr_result;
1712 scsi_release_request(sreq);
1713 return result;
1714}
1715EXPORT_SYMBOL(scsi_test_unit_ready);
1716
1717/**
1718 * scsi_device_set_state - Take the given device through the device
1719 * state model.
1720 * @sdev: scsi device to change the state of.
1721 * @state: state to change to.
1722 *
1723 * Returns zero if unsuccessful or an error if the requested
1724 * transition is illegal.
1725 **/
1726int
1727scsi_device_set_state(struct scsi_device *sdev, enum scsi_device_state state)
1728{
1729 enum scsi_device_state oldstate = sdev->sdev_state;
1730
1731 if (state == oldstate)
1732 return 0;
1733
1734 switch (state) {
1735 case SDEV_CREATED:
1736 /* There are no legal states that come back to
1737 * created. This is the manually initialised start
1738 * state */
1739 goto illegal;
1740
1741 case SDEV_RUNNING:
1742 switch (oldstate) {
1743 case SDEV_CREATED:
1744 case SDEV_OFFLINE:
1745 case SDEV_QUIESCE:
1746 case SDEV_BLOCK:
1747 break;
1748 default:
1749 goto illegal;
1750 }
1751 break;
1752
1753 case SDEV_QUIESCE:
1754 switch (oldstate) {
1755 case SDEV_RUNNING:
1756 case SDEV_OFFLINE:
1757 break;
1758 default:
1759 goto illegal;
1760 }
1761 break;
1762
1763 case SDEV_OFFLINE:
1764 switch (oldstate) {
1765 case SDEV_CREATED:
1766 case SDEV_RUNNING:
1767 case SDEV_QUIESCE:
1768 case SDEV_BLOCK:
1769 break;
1770 default:
1771 goto illegal;
1772 }
1773 break;
1774
1775 case SDEV_BLOCK:
1776 switch (oldstate) {
1777 case SDEV_CREATED:
1778 case SDEV_RUNNING:
1779 break;
1780 default:
1781 goto illegal;
1782 }
1783 break;
1784
1785 case SDEV_CANCEL:
1786 switch (oldstate) {
1787 case SDEV_CREATED:
1788 case SDEV_RUNNING:
1789 case SDEV_OFFLINE:
1790 case SDEV_BLOCK:
1791 break;
1792 default:
1793 goto illegal;
1794 }
1795 break;
1796
1797 case SDEV_DEL:
1798 switch (oldstate) {
1799 case SDEV_CANCEL:
1800 break;
1801 default:
1802 goto illegal;
1803 }
1804 break;
1805
1806 }
1807 sdev->sdev_state = state;
1808 return 0;
1809
1810 illegal:
1811 SCSI_LOG_ERROR_RECOVERY(1,
1812 dev_printk(KERN_ERR, &sdev->sdev_gendev,
1813 "Illegal state transition %s->%s\n",
1814 scsi_device_state_name(oldstate),
1815 scsi_device_state_name(state))
1816 );
1817 return -EINVAL;
1818}
1819EXPORT_SYMBOL(scsi_device_set_state);
1820
1821/**
1822 * scsi_device_quiesce - Block user issued commands.
1823 * @sdev: scsi device to quiesce.
1824 *
1825 * This works by trying to transition to the SDEV_QUIESCE state
1826 * (which must be a legal transition). When the device is in this
1827 * state, only special requests will be accepted, all others will
1828 * be deferred. Since special requests may also be requeued requests,
1829 * a successful return doesn't guarantee the device will be
1830 * totally quiescent.
1831 *
1832 * Must be called with user context, may sleep.
1833 *
1834 * Returns zero if unsuccessful or an error if not.
1835 **/
1836int
1837scsi_device_quiesce(struct scsi_device *sdev)
1838{
1839 int err = scsi_device_set_state(sdev, SDEV_QUIESCE);
1840 if (err)
1841 return err;
1842
1843 scsi_run_queue(sdev->request_queue);
1844 while (sdev->device_busy) {
1845 msleep_interruptible(200);
1846 scsi_run_queue(sdev->request_queue);
1847 }
1848 return 0;
1849}
1850EXPORT_SYMBOL(scsi_device_quiesce);
1851
1852/**
1853 * scsi_device_resume - Restart user issued commands to a quiesced device.
1854 * @sdev: scsi device to resume.
1855 *
1856 * Moves the device from quiesced back to running and restarts the
1857 * queues.
1858 *
1859 * Must be called with user context, may sleep.
1860 **/
1861void
1862scsi_device_resume(struct scsi_device *sdev)
1863{
1864 if(scsi_device_set_state(sdev, SDEV_RUNNING))
1865 return;
1866 scsi_run_queue(sdev->request_queue);
1867}
1868EXPORT_SYMBOL(scsi_device_resume);
1869
1870static void
1871device_quiesce_fn(struct scsi_device *sdev, void *data)
1872{
1873 scsi_device_quiesce(sdev);
1874}
1875
1876void
1877scsi_target_quiesce(struct scsi_target *starget)
1878{
1879 starget_for_each_device(starget, NULL, device_quiesce_fn);
1880}
1881EXPORT_SYMBOL(scsi_target_quiesce);
1882
1883static void
1884device_resume_fn(struct scsi_device *sdev, void *data)
1885{
1886 scsi_device_resume(sdev);
1887}
1888
1889void
1890scsi_target_resume(struct scsi_target *starget)
1891{
1892 starget_for_each_device(starget, NULL, device_resume_fn);
1893}
1894EXPORT_SYMBOL(scsi_target_resume);
1895
1896/**
1897 * scsi_internal_device_block - internal function to put a device
1898 * temporarily into the SDEV_BLOCK state
1899 * @sdev: device to block
1900 *
1901 * Block request made by scsi lld's to temporarily stop all
1902 * scsi commands on the specified device. Called from interrupt
1903 * or normal process context.
1904 *
1905 * Returns zero if successful or error if not
1906 *
1907 * Notes:
1908 * This routine transitions the device to the SDEV_BLOCK state
1909 * (which must be a legal transition). When the device is in this
1910 * state, all commands are deferred until the scsi lld reenables
1911 * the device with scsi_device_unblock or device_block_tmo fires.
1912 * This routine assumes the host_lock is held on entry.
1913 **/
1914int
1915scsi_internal_device_block(struct scsi_device *sdev)
1916{
1917 request_queue_t *q = sdev->request_queue;
1918 unsigned long flags;
1919 int err = 0;
1920
1921 err = scsi_device_set_state(sdev, SDEV_BLOCK);
1922 if (err)
1923 return err;
1924
1925 /*
1926 * The device has transitioned to SDEV_BLOCK. Stop the
1927 * block layer from calling the midlayer with this device's
1928 * request queue.
1929 */
1930 spin_lock_irqsave(q->queue_lock, flags);
1931 blk_stop_queue(q);
1932 spin_unlock_irqrestore(q->queue_lock, flags);
1933
1934 return 0;
1935}
1936EXPORT_SYMBOL_GPL(scsi_internal_device_block);
1937
1938/**
1939 * scsi_internal_device_unblock - resume a device after a block request
1940 * @sdev: device to resume
1941 *
1942 * Called by scsi lld's or the midlayer to restart the device queue
1943 * for the previously suspended scsi device. Called from interrupt or
1944 * normal process context.
1945 *
1946 * Returns zero if successful or error if not.
1947 *
1948 * Notes:
1949 * This routine transitions the device to the SDEV_RUNNING state
1950 * (which must be a legal transition) allowing the midlayer to
1951 * goose the queue for this device. This routine assumes the
1952 * host_lock is held upon entry.
1953 **/
1954int
1955scsi_internal_device_unblock(struct scsi_device *sdev)
1956{
1957 request_queue_t *q = sdev->request_queue;
1958 int err;
1959 unsigned long flags;
1960
1961 /*
1962 * Try to transition the scsi device to SDEV_RUNNING
1963 * and goose the device queue if successful.
1964 */
1965 err = scsi_device_set_state(sdev, SDEV_RUNNING);
1966 if (err)
1967 return err;
1968
1969 spin_lock_irqsave(q->queue_lock, flags);
1970 blk_start_queue(q);
1971 spin_unlock_irqrestore(q->queue_lock, flags);
1972
1973 return 0;
1974}
1975EXPORT_SYMBOL_GPL(scsi_internal_device_unblock);
1976
1977static void
1978device_block(struct scsi_device *sdev, void *data)
1979{
1980 scsi_internal_device_block(sdev);
1981}
1982
1983static int
1984target_block(struct device *dev, void *data)
1985{
1986 if (scsi_is_target_device(dev))
1987 starget_for_each_device(to_scsi_target(dev), NULL,
1988 device_block);
1989 return 0;
1990}
1991
1992void
1993scsi_target_block(struct device *dev)
1994{
1995 if (scsi_is_target_device(dev))
1996 starget_for_each_device(to_scsi_target(dev), NULL,
1997 device_block);
1998 else
1999 device_for_each_child(dev, NULL, target_block);
2000}
2001EXPORT_SYMBOL_GPL(scsi_target_block);
2002
2003static void
2004device_unblock(struct scsi_device *sdev, void *data)
2005{
2006 scsi_internal_device_unblock(sdev);
2007}
2008
2009static int
2010target_unblock(struct device *dev, void *data)
2011{
2012 if (scsi_is_target_device(dev))
2013 starget_for_each_device(to_scsi_target(dev), NULL,
2014 device_unblock);
2015 return 0;
2016}
2017
2018void
2019scsi_target_unblock(struct device *dev)
2020{
2021 if (scsi_is_target_device(dev))
2022 starget_for_each_device(to_scsi_target(dev), NULL,
2023 device_unblock);
2024 else
2025 device_for_each_child(dev, NULL, target_unblock);
2026}
2027EXPORT_SYMBOL_GPL(scsi_target_unblock);