Jens Axboe | fd0928d | 2008-01-24 08:52:45 +0100 | [diff] [blame] | 1 | #ifndef IOCONTEXT_H |
| 2 | #define IOCONTEXT_H |
| 3 | |
Jens Axboe | 4ac845a | 2008-01-24 08:44:49 +0100 | [diff] [blame] | 4 | #include <linux/radix-tree.h> |
Fabio Checconi | 34e6bbf | 2008-04-02 14:31:02 +0200 | [diff] [blame] | 5 | #include <linux/rcupdate.h> |
Tejun Heo | b2efa05 | 2011-12-14 00:33:39 +0100 | [diff] [blame] | 6 | #include <linux/workqueue.h> |
Jens Axboe | 4ac845a | 2008-01-24 08:44:49 +0100 | [diff] [blame] | 7 | |
Tejun Heo | dc86900 | 2011-12-14 00:33:38 +0100 | [diff] [blame] | 8 | enum { |
Tejun Heo | d705ae6 | 2012-02-15 09:45:49 +0100 | [diff] [blame] | 9 | ICQ_IOPRIO_CHANGED = 1 << 0, |
| 10 | ICQ_CGROUP_CHANGED = 1 << 1, |
Tejun Heo | 621032a | 2012-02-15 09:45:53 +0100 | [diff] [blame] | 11 | ICQ_EXITED = 1 << 2, |
Tejun Heo | d705ae6 | 2012-02-15 09:45:49 +0100 | [diff] [blame] | 12 | |
| 13 | ICQ_CHANGED_MASK = ICQ_IOPRIO_CHANGED | ICQ_CGROUP_CHANGED, |
Tejun Heo | dc86900 | 2011-12-14 00:33:38 +0100 | [diff] [blame] | 14 | }; |
| 15 | |
Tejun Heo | f1f8cc9 | 2011-12-14 00:33:42 +0100 | [diff] [blame] | 16 | /* |
| 17 | * An io_cq (icq) is association between an io_context (ioc) and a |
| 18 | * request_queue (q). This is used by elevators which need to track |
| 19 | * information per ioc - q pair. |
| 20 | * |
| 21 | * Elevator can request use of icq by setting elevator_type->icq_size and |
| 22 | * ->icq_align. Both size and align must be larger than that of struct |
| 23 | * io_cq and elevator can use the tail area for private information. The |
| 24 | * recommended way to do this is defining a struct which contains io_cq as |
| 25 | * the first member followed by private members and using its size and |
| 26 | * align. For example, |
| 27 | * |
| 28 | * struct snail_io_cq { |
| 29 | * struct io_cq icq; |
| 30 | * int poke_snail; |
| 31 | * int feed_snail; |
| 32 | * }; |
| 33 | * |
| 34 | * struct elevator_type snail_elv_type { |
| 35 | * .ops = { ... }, |
| 36 | * .icq_size = sizeof(struct snail_io_cq), |
| 37 | * .icq_align = __alignof__(struct snail_io_cq), |
| 38 | * ... |
| 39 | * }; |
| 40 | * |
| 41 | * If icq_size is set, block core will manage icq's. All requests will |
| 42 | * have its ->elv.icq field set before elevator_ops->elevator_set_req_fn() |
| 43 | * is called and be holding a reference to the associated io_context. |
| 44 | * |
| 45 | * Whenever a new icq is created, elevator_ops->elevator_init_icq_fn() is |
| 46 | * called and, on destruction, ->elevator_exit_icq_fn(). Both functions |
| 47 | * are called with both the associated io_context and queue locks held. |
| 48 | * |
| 49 | * Elevator is allowed to lookup icq using ioc_lookup_icq() while holding |
| 50 | * queue lock but the returned icq is valid only until the queue lock is |
| 51 | * released. Elevators can not and should not try to create or destroy |
| 52 | * icq's. |
| 53 | * |
| 54 | * As icq's are linked from both ioc and q, the locking rules are a bit |
| 55 | * complex. |
| 56 | * |
| 57 | * - ioc lock nests inside q lock. |
| 58 | * |
| 59 | * - ioc->icq_list and icq->ioc_node are protected by ioc lock. |
| 60 | * q->icq_list and icq->q_node by q lock. |
| 61 | * |
| 62 | * - ioc->icq_tree and ioc->icq_hint are protected by ioc lock, while icq |
| 63 | * itself is protected by q lock. However, both the indexes and icq |
| 64 | * itself are also RCU managed and lookup can be performed holding only |
| 65 | * the q lock. |
| 66 | * |
| 67 | * - icq's are not reference counted. They are destroyed when either the |
| 68 | * ioc or q goes away. Each request with icq set holds an extra |
| 69 | * reference to ioc to ensure it stays until the request is completed. |
| 70 | * |
| 71 | * - Linking and unlinking icq's are performed while holding both ioc and q |
| 72 | * locks. Due to the lock ordering, q exit is simple but ioc exit |
| 73 | * requires reverse-order double lock dance. |
| 74 | */ |
Tejun Heo | c586980 | 2011-12-14 00:33:41 +0100 | [diff] [blame] | 75 | struct io_cq { |
| 76 | struct request_queue *q; |
| 77 | struct io_context *ioc; |
Jens Axboe | fd0928d | 2008-01-24 08:52:45 +0100 | [diff] [blame] | 78 | |
Tejun Heo | 7e5a879 | 2011-12-14 00:33:42 +0100 | [diff] [blame] | 79 | /* |
| 80 | * q_node and ioc_node link io_cq through icq_list of q and ioc |
| 81 | * respectively. Both fields are unused once ioc_exit_icq() is |
| 82 | * called and shared with __rcu_icq_cache and __rcu_head which are |
| 83 | * used for RCU free of io_cq. |
| 84 | */ |
| 85 | union { |
| 86 | struct list_head q_node; |
| 87 | struct kmem_cache *__rcu_icq_cache; |
| 88 | }; |
| 89 | union { |
| 90 | struct hlist_node ioc_node; |
| 91 | struct rcu_head __rcu_head; |
| 92 | }; |
Jens Axboe | fd0928d | 2008-01-24 08:52:45 +0100 | [diff] [blame] | 93 | |
Tejun Heo | d705ae6 | 2012-02-15 09:45:49 +0100 | [diff] [blame] | 94 | unsigned int flags; |
Jens Axboe | fd0928d | 2008-01-24 08:52:45 +0100 | [diff] [blame] | 95 | }; |
| 96 | |
| 97 | /* |
Jens Axboe | d38ecf9 | 2008-01-24 08:53:35 +0100 | [diff] [blame] | 98 | * I/O subsystem state of the associated processes. It is refcounted |
| 99 | * and kmalloc'ed. These could be shared between processes. |
Jens Axboe | fd0928d | 2008-01-24 08:52:45 +0100 | [diff] [blame] | 100 | */ |
| 101 | struct io_context { |
Nikanth Karthikesan | d9c7d39 | 2009-06-10 12:57:06 -0700 | [diff] [blame] | 102 | atomic_long_t refcount; |
Tejun Heo | f6e8d01 | 2012-03-05 13:15:26 -0800 | [diff] [blame^] | 103 | atomic_t active_ref; |
Jens Axboe | d38ecf9 | 2008-01-24 08:53:35 +0100 | [diff] [blame] | 104 | atomic_t nr_tasks; |
| 105 | |
| 106 | /* all the fields below are protected by this lock */ |
| 107 | spinlock_t lock; |
Jens Axboe | fd0928d | 2008-01-24 08:52:45 +0100 | [diff] [blame] | 108 | |
| 109 | unsigned short ioprio; |
Vivek Goyal | 31e4c28 | 2009-12-03 12:59:42 -0500 | [diff] [blame] | 110 | |
Jens Axboe | fd0928d | 2008-01-24 08:52:45 +0100 | [diff] [blame] | 111 | /* |
| 112 | * For request batching |
| 113 | */ |
Jens Axboe | fd0928d | 2008-01-24 08:52:45 +0100 | [diff] [blame] | 114 | int nr_batch_requests; /* Number of requests left in the batch */ |
Richard Kennedy | 58c24a6 | 2010-02-26 14:00:43 +0100 | [diff] [blame] | 115 | unsigned long last_waited; /* Time last woken after wait for request */ |
Jens Axboe | fd0928d | 2008-01-24 08:52:45 +0100 | [diff] [blame] | 116 | |
Tejun Heo | c586980 | 2011-12-14 00:33:41 +0100 | [diff] [blame] | 117 | struct radix_tree_root icq_tree; |
| 118 | struct io_cq __rcu *icq_hint; |
| 119 | struct hlist_head icq_list; |
Tejun Heo | b2efa05 | 2011-12-14 00:33:39 +0100 | [diff] [blame] | 120 | |
| 121 | struct work_struct release_work; |
Jens Axboe | fd0928d | 2008-01-24 08:52:45 +0100 | [diff] [blame] | 122 | }; |
| 123 | |
Tejun Heo | f6e8d01 | 2012-03-05 13:15:26 -0800 | [diff] [blame^] | 124 | /** |
| 125 | * get_io_context_active - get active reference on ioc |
| 126 | * @ioc: ioc of interest |
| 127 | * |
| 128 | * Only iocs with active reference can issue new IOs. This function |
| 129 | * acquires an active reference on @ioc. The caller must already have an |
| 130 | * active reference on @ioc. |
| 131 | */ |
| 132 | static inline void get_io_context_active(struct io_context *ioc) |
Jens Axboe | d38ecf9 | 2008-01-24 08:53:35 +0100 | [diff] [blame] | 133 | { |
Tejun Heo | 3d48749 | 2012-03-05 13:15:25 -0800 | [diff] [blame] | 134 | WARN_ON_ONCE(atomic_long_read(&ioc->refcount) <= 0); |
Tejun Heo | f6e8d01 | 2012-03-05 13:15:26 -0800 | [diff] [blame^] | 135 | WARN_ON_ONCE(atomic_read(&ioc->active_ref) <= 0); |
Tejun Heo | 3d48749 | 2012-03-05 13:15:25 -0800 | [diff] [blame] | 136 | atomic_long_inc(&ioc->refcount); |
Tejun Heo | f6e8d01 | 2012-03-05 13:15:26 -0800 | [diff] [blame^] | 137 | atomic_inc(&ioc->active_ref); |
| 138 | } |
| 139 | |
| 140 | static inline void ioc_task_link(struct io_context *ioc) |
| 141 | { |
| 142 | get_io_context_active(ioc); |
| 143 | |
| 144 | WARN_ON_ONCE(atomic_read(&ioc->nr_tasks) <= 0); |
Tejun Heo | 3d48749 | 2012-03-05 13:15:25 -0800 | [diff] [blame] | 145 | atomic_inc(&ioc->nr_tasks); |
Jens Axboe | d38ecf9 | 2008-01-24 08:53:35 +0100 | [diff] [blame] | 146 | } |
| 147 | |
Louis Rilling | b69f229 | 2009-12-04 14:52:42 +0100 | [diff] [blame] | 148 | struct task_struct; |
Jens Axboe | da9cbc8 | 2008-06-30 20:42:08 +0200 | [diff] [blame] | 149 | #ifdef CONFIG_BLOCK |
Tejun Heo | 11a3122 | 2012-02-07 07:51:30 +0100 | [diff] [blame] | 150 | void put_io_context(struct io_context *ioc); |
Tejun Heo | f6e8d01 | 2012-03-05 13:15:26 -0800 | [diff] [blame^] | 151 | void put_io_context_active(struct io_context *ioc); |
Louis Rilling | b69f229 | 2009-12-04 14:52:42 +0100 | [diff] [blame] | 152 | void exit_io_context(struct task_struct *task); |
Tejun Heo | 6e736be | 2011-12-14 00:33:38 +0100 | [diff] [blame] | 153 | struct io_context *get_task_io_context(struct task_struct *task, |
| 154 | gfp_t gfp_flags, int node); |
Tejun Heo | dc86900 | 2011-12-14 00:33:38 +0100 | [diff] [blame] | 155 | void ioc_ioprio_changed(struct io_context *ioc, int ioprio); |
| 156 | void ioc_cgroup_changed(struct io_context *ioc); |
Tejun Heo | d705ae6 | 2012-02-15 09:45:49 +0100 | [diff] [blame] | 157 | unsigned int icq_get_changed(struct io_cq *icq); |
Jens Axboe | da9cbc8 | 2008-06-30 20:42:08 +0200 | [diff] [blame] | 158 | #else |
Jens Axboe | da9cbc8 | 2008-06-30 20:42:08 +0200 | [diff] [blame] | 159 | struct io_context; |
Tejun Heo | 11a3122 | 2012-02-07 07:51:30 +0100 | [diff] [blame] | 160 | static inline void put_io_context(struct io_context *ioc) { } |
Tejun Heo | 42ec57a | 2011-12-14 00:33:37 +0100 | [diff] [blame] | 161 | static inline void exit_io_context(struct task_struct *task) { } |
Jens Axboe | da9cbc8 | 2008-06-30 20:42:08 +0200 | [diff] [blame] | 162 | #endif |
| 163 | |
Jens Axboe | fd0928d | 2008-01-24 08:52:45 +0100 | [diff] [blame] | 164 | #endif |