io context sharing: preliminary support
Detach task state from ioc, instead keep track of how many processes
are accessing the ioc.
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index b9bb02e..d4550ec 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -3854,12 +3854,13 @@
}
/*
- * IO Context helper functions
+ * IO Context helper functions. put_io_context() returns 1 if there are no
+ * more users of this io context, 0 otherwise.
*/
-void put_io_context(struct io_context *ioc)
+int put_io_context(struct io_context *ioc)
{
if (ioc == NULL)
- return;
+ return 1;
BUG_ON(atomic_read(&ioc->refcount) == 0);
@@ -3878,7 +3879,9 @@
rcu_read_unlock();
kmem_cache_free(iocontext_cachep, ioc);
+ return 1;
}
+ return 0;
}
EXPORT_SYMBOL(put_io_context);
@@ -3893,15 +3896,17 @@
current->io_context = NULL;
task_unlock(current);
- ioc->task = NULL;
- if (ioc->aic && ioc->aic->exit)
- ioc->aic->exit(ioc->aic);
- if (ioc->cic_root.rb_node != NULL) {
- cic = rb_entry(rb_first(&ioc->cic_root), struct cfq_io_context, rb_node);
- cic->exit(ioc);
- }
+ if (atomic_dec_and_test(&ioc->nr_tasks)) {
+ if (ioc->aic && ioc->aic->exit)
+ ioc->aic->exit(ioc->aic);
+ if (ioc->cic_root.rb_node != NULL) {
+ cic = rb_entry(rb_first(&ioc->cic_root),
+ struct cfq_io_context, rb_node);
+ cic->exit(ioc);
+ }
- put_io_context(ioc);
+ put_io_context(ioc);
+ }
}
struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
@@ -3911,7 +3916,8 @@
ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node);
if (ret) {
atomic_set(&ret->refcount, 1);
- ret->task = current;
+ atomic_set(&ret->nr_tasks, 1);
+ spin_lock_init(&ret->lock);
ret->ioprio_changed = 0;
ret->ioprio = 0;
ret->last_waited = jiffies; /* doesn't matter... */
@@ -3959,10 +3965,18 @@
*/
struct io_context *get_io_context(gfp_t gfp_flags, int node)
{
- struct io_context *ret;
- ret = current_io_context(gfp_flags, node);
- if (likely(ret))
- atomic_inc(&ret->refcount);
+ struct io_context *ret = NULL;
+
+ /*
+ * Check for unlikely race with exiting task. ioc ref count is
+ * zero when ioc is being detached.
+ */
+ do {
+ ret = current_io_context(gfp_flags, node);
+ if (unlikely(!ret))
+ break;
+ } while (!atomic_inc_not_zero(&ret->refcount));
+
return ret;
}
EXPORT_SYMBOL(get_io_context);
diff --git a/fs/ioprio.c b/fs/ioprio.c
index a760040..06b5d97 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -54,7 +54,6 @@
break;
}
task->io_context = ioc;
- ioc->task = task;
} while (1);
if (!err) {
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 510a18b..2483a05 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -34,7 +34,7 @@
#define BLKDEV_MIN_RQ 4
#define BLKDEV_MAX_RQ 128 /* Default maximum */
-void put_io_context(struct io_context *ioc);
+int put_io_context(struct io_context *ioc);
void exit_io_context(void);
struct io_context *get_io_context(gfp_t gfp_flags, int node);
struct io_context *alloc_io_context(gfp_t gfp_flags, int node);
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 186807e..cd44d45 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -54,13 +54,15 @@
};
/*
- * This is the per-process I/O subsystem state. It is refcounted and
- * kmalloc'ed. Currently all fields are modified in process io context
- * (apart from the atomic refcount), so require no locking.
+ * I/O subsystem state of the associated processes. It is refcounted
+ * and kmalloc'ed. These could be shared between processes.
*/
struct io_context {
atomic_t refcount;
- struct task_struct *task;
+ atomic_t nr_tasks;
+
+ /* all the fields below are protected by this lock */
+ spinlock_t lock;
unsigned short ioprio;
unsigned short ioprio_changed;
@@ -76,4 +78,16 @@
void *ioc_data;
};
+static inline struct io_context *ioc_task_link(struct io_context *ioc)
+{
+ /*
+ * if ref count is zero, don't allow sharing (ioc is going away, it's
+ * a race).
+ */
+ if (ioc && atomic_inc_not_zero(&ioc->refcount))
+ return ioc;
+
+ return NULL;
+}
+
#endif
diff --git a/kernel/fork.c b/kernel/fork.c
index 2a86c9d..1987c57 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -805,7 +805,6 @@
if (unlikely(!tsk->io_context))
return -ENOMEM;
- tsk->io_context->task = tsk;
tsk->io_context->ioprio = ioc->ioprio;
}
#endif