Blame - mm/backing-dev.c - SHIFTPHONES/mainline/linux

blob: ad5608d01e8c02d49708e4c33ce859aaeaae88d9 [file] [log] [blame]

Andrew Morton	3fcfab1	2006-10-19 23:28:16 -0700	[diff] [blame]	1
				2	#include <linux/wait.h>
				3	#include <linux/backing-dev.h>
Jens Axboe	03ba378	2009-09-09 09:08:54 +0200	[diff] [blame]	4	#include <linux/kthread.h>
				5	#include <linux/freezer.h>
Andrew Morton	3fcfab1	2006-10-19 23:28:16 -0700	[diff] [blame]	6	#include <linux/fs.h>
Jens Axboe	2616015	2009-03-17 09:35:06 +0100	[diff] [blame]	7	#include <linux/pagemap.h>
Jens Axboe	03ba378	2009-09-09 09:08:54 +0200	[diff] [blame]	8	#include <linux/mm.h>
Andrew Morton	3fcfab1	2006-10-19 23:28:16 -0700	[diff] [blame]	9	#include <linux/sched.h>
				10	#include <linux/module.h>
Peter Zijlstra	cf0ca9f	2008-04-30 00:54:32 -0700	[diff] [blame]	11	#include <linux/writeback.h>
				12	#include <linux/device.h>
Dave Chinner	455b286	2010-07-07 13:24:06 +1000	[diff] [blame]	13	#include <trace/events/writeback.h>
Peter Zijlstra	cf0ca9f	2008-04-30 00:54:32 -0700	[diff] [blame]	14
Jens Axboe	c3c5320	2010-04-22 11:37:01 +0200	[diff] [blame]	15	static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
				16
Jörn Engel	5129a46	2010-04-25 08:54:42 +0200	[diff] [blame]	17	struct backing_dev_info noop_backing_dev_info = {
				18	.name = "noop",
Jan Kara	976e48f	2010-09-21 11:48:55 +0200	[diff] [blame]	19	.capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
Jörn Engel	5129a46	2010-04-25 08:54:42 +0200	[diff] [blame]	20	};
Tejun Heo	a212b10	2015-05-22 17:13:33 -0400	[diff] [blame]	21	EXPORT_SYMBOL_GPL(noop_backing_dev_info);
Jörn Engel	5129a46	2010-04-25 08:54:42 +0200	[diff] [blame]	22
Peter Zijlstra	cf0ca9f	2008-04-30 00:54:32 -0700	[diff] [blame]	23	static struct class *bdi_class;
Jens Axboe	cfc4ba5	2009-09-14 13:12:40 +0200	[diff] [blame]	24
				25	/*
Tejun Heo	181387d	2013-04-01 19:08:06 -0700	[diff] [blame]	26	* bdi_lock protects updates to bdi_list. bdi_list has RCU reader side
Jens Axboe	cfc4ba5	2009-09-14 13:12:40 +0200	[diff] [blame]	27	* locking.
				28	*/
Jens Axboe	03ba378	2009-09-09 09:08:54 +0200	[diff] [blame]	29	DEFINE_SPINLOCK(bdi_lock);
Jens Axboe	66f3b8e	2009-09-02 09:19:46 +0200	[diff] [blame]	30	LIST_HEAD(bdi_list);
Jens Axboe	03ba378	2009-09-09 09:08:54 +0200	[diff] [blame]	31
Tejun Heo	839a8e8	2013-04-01 19:08:06 -0700	[diff] [blame]	32	/* bdi_wq serves all asynchronous writeback tasks */
				33	struct workqueue_struct *bdi_wq;
				34
Miklos Szeredi	76f1418	2008-04-30 00:54:36 -0700	[diff] [blame]	35	#ifdef CONFIG_DEBUG_FS
				36	#include <linux/debugfs.h>
				37	#include <linux/seq_file.h>
				38
				39	static struct dentry *bdi_debug_root;
				40
				41	static void bdi_debug_init(void)
				42	{
				43	bdi_debug_root = debugfs_create_dir("bdi", NULL);
				44	}
				45
				46	static int bdi_debug_stats_show(struct seq_file m, void v)
				47	{
				48	struct backing_dev_info *bdi = m->private;
Christoph Hellwig	c1955ce	2010-06-19 23:08:06 +0200	[diff] [blame]	49	struct bdi_writeback *wb = &bdi->wb;
David Rientjes	364aeb2	2009-01-06 14:39:29 -0800	[diff] [blame]	50	unsigned long background_thresh;
				51	unsigned long dirty_thresh;
				52	unsigned long bdi_thresh;
Theodore Ts'o	0ae45f6	2015-02-02 00:37:00 -0500	[diff] [blame]	53	unsigned long nr_dirty, nr_io, nr_more_io, nr_dirty_time;
Jens Axboe	f09b00d	2009-05-25 09:08:21 +0200	[diff] [blame]	54	struct inode *inode;
				55
Theodore Ts'o	0ae45f6	2015-02-02 00:37:00 -0500	[diff] [blame]	56	nr_dirty = nr_io = nr_more_io = nr_dirty_time = 0;
Christoph Hellwig	f758eea	2011-04-21 18:19:44 -0600	[diff] [blame]	57	spin_lock(&wb->list_lock);
Nick Piggin	7ccf19a	2010-10-21 11:49:30 +1100	[diff] [blame]	58	list_for_each_entry(inode, &wb->b_dirty, i_wb_list)
Christoph Hellwig	c1955ce	2010-06-19 23:08:06 +0200	[diff] [blame]	59	nr_dirty++;
Nick Piggin	7ccf19a	2010-10-21 11:49:30 +1100	[diff] [blame]	60	list_for_each_entry(inode, &wb->b_io, i_wb_list)
Christoph Hellwig	c1955ce	2010-06-19 23:08:06 +0200	[diff] [blame]	61	nr_io++;
Nick Piggin	7ccf19a	2010-10-21 11:49:30 +1100	[diff] [blame]	62	list_for_each_entry(inode, &wb->b_more_io, i_wb_list)
Christoph Hellwig	c1955ce	2010-06-19 23:08:06 +0200	[diff] [blame]	63	nr_more_io++;
Theodore Ts'o	0ae45f6	2015-02-02 00:37:00 -0500	[diff] [blame]	64	list_for_each_entry(inode, &wb->b_dirty_time, i_wb_list)
				65	if (inode->i_state & I_DIRTY_TIME)
				66	nr_dirty_time++;
Christoph Hellwig	f758eea	2011-04-21 18:19:44 -0600	[diff] [blame]	67	spin_unlock(&wb->list_lock);
Miklos Szeredi	76f1418	2008-04-30 00:54:36 -0700	[diff] [blame]	68
Wu Fengguang	16c4042	2010-08-11 14:17:39 -0700	[diff] [blame]	69	global_dirty_limits(&background_thresh, &dirty_thresh);
Tejun Heo	a88a341	2015-05-22 17:13:28 -0400	[diff] [blame]	70	bdi_thresh = wb_dirty_limit(wb, dirty_thresh);
Miklos Szeredi	76f1418	2008-04-30 00:54:36 -0700	[diff] [blame]	71
				72	#define K(x) ((x) << (PAGE_SHIFT - 10))
				73	seq_printf(m,
Wu Fengguang	00821b0	2010-08-29 11:28:45 -0600	[diff] [blame]	74	"BdiWriteback: %10lu kB\n"
				75	"BdiReclaimable: %10lu kB\n"
				76	"BdiDirtyThresh: %10lu kB\n"
				77	"DirtyThresh: %10lu kB\n"
				78	"BackgroundThresh: %10lu kB\n"
Wu Fengguang	c8e28ce	2011-01-23 10:07:47 -0600	[diff] [blame]	79	"BdiDirtied: %10lu kB\n"
Wu Fengguang	00821b0	2010-08-29 11:28:45 -0600	[diff] [blame]	80	"BdiWritten: %10lu kB\n"
				81	"BdiWriteBandwidth: %10lu kBps\n"
				82	"b_dirty: %10lu\n"
				83	"b_io: %10lu\n"
				84	"b_more_io: %10lu\n"
Theodore Ts'o	0ae45f6	2015-02-02 00:37:00 -0500	[diff] [blame]	85	"b_dirty_time: %10lu\n"
Wu Fengguang	00821b0	2010-08-29 11:28:45 -0600	[diff] [blame]	86	"bdi_list: %10u\n"
				87	"state: %10lx\n",
Tejun Heo	93f78d8	2015-05-22 17:13:27 -0400	[diff] [blame]	88	(unsigned long) K(wb_stat(wb, WB_WRITEBACK)),
				89	(unsigned long) K(wb_stat(wb, WB_RECLAIMABLE)),
Jan Kara	f7d2b1e	2010-12-08 22:44:24 -0600	[diff] [blame]	90	K(bdi_thresh),
				91	K(dirty_thresh),
				92	K(background_thresh),
Tejun Heo	93f78d8	2015-05-22 17:13:27 -0400	[diff] [blame]	93	(unsigned long) K(wb_stat(wb, WB_DIRTIED)),
				94	(unsigned long) K(wb_stat(wb, WB_WRITTEN)),
Tejun Heo	a88a341	2015-05-22 17:13:28 -0400	[diff] [blame]	95	(unsigned long) K(wb->write_bandwidth),
Jan Kara	f7d2b1e	2010-12-08 22:44:24 -0600	[diff] [blame]	96	nr_dirty,
				97	nr_io,
				98	nr_more_io,
Theodore Ts'o	0ae45f6	2015-02-02 00:37:00 -0500	[diff] [blame]	99	nr_dirty_time,
Tejun Heo	4452226	2015-05-22 17:13:26 -0400	[diff] [blame]	100	!list_empty(&bdi->bdi_list), bdi->wb.state);
Miklos Szeredi	76f1418	2008-04-30 00:54:36 -0700	[diff] [blame]	101	#undef K
				102
				103	return 0;
				104	}
				105
				106	static int bdi_debug_stats_open(struct inode inode, struct file file)
				107	{
				108	return single_open(file, bdi_debug_stats_show, inode->i_private);
				109	}
				110
				111	static const struct file_operations bdi_debug_stats_fops = {
				112	.open = bdi_debug_stats_open,
				113	.read = seq_read,
				114	.llseek = seq_lseek,
				115	.release = single_release,
				116	};
				117
				118	static void bdi_debug_register(struct backing_dev_info bdi, const char name)
				119	{
				120	bdi->debug_dir = debugfs_create_dir(name, bdi_debug_root);
				121	bdi->debug_stats = debugfs_create_file("stats", 0444, bdi->debug_dir,
				122	bdi, &bdi_debug_stats_fops);
				123	}
				124
				125	static void bdi_debug_unregister(struct backing_dev_info *bdi)
				126	{
				127	debugfs_remove(bdi->debug_stats);
				128	debugfs_remove(bdi->debug_dir);
				129	}
				130	#else
				131	static inline void bdi_debug_init(void)
				132	{
				133	}
				134	static inline void bdi_debug_register(struct backing_dev_info *bdi,
				135	const char *name)
				136	{
				137	}
				138	static inline void bdi_debug_unregister(struct backing_dev_info *bdi)
				139	{
				140	}
				141	#endif
				142
Peter Zijlstra	cf0ca9f	2008-04-30 00:54:32 -0700	[diff] [blame]	143	static ssize_t read_ahead_kb_store(struct device *dev,
				144	struct device_attribute *attr,
				145	const char *buf, size_t count)
				146	{
				147	struct backing_dev_info *bdi = dev_get_drvdata(dev);
Peter Zijlstra	cf0ca9f	2008-04-30 00:54:32 -0700	[diff] [blame]	148	unsigned long read_ahead_kb;
Namjae Jeon	7034ed1	2012-08-25 16:57:27 +0800	[diff] [blame]	149	ssize_t ret;
Peter Zijlstra	cf0ca9f	2008-04-30 00:54:32 -0700	[diff] [blame]	150
Namjae Jeon	7034ed1	2012-08-25 16:57:27 +0800	[diff] [blame]	151	ret = kstrtoul(buf, 10, &read_ahead_kb);
				152	if (ret < 0)
				153	return ret;
				154
				155	bdi->ra_pages = read_ahead_kb >> (PAGE_SHIFT - 10);
				156
				157	return count;
Peter Zijlstra	cf0ca9f	2008-04-30 00:54:32 -0700	[diff] [blame]	158	}
				159
				160	#define K(pages) ((pages) << (PAGE_SHIFT - 10))
				161
				162	#define BDI_SHOW(name, expr) \
				163	static ssize_t name##_show(struct device *dev, \
				164	struct device_attribute attr, char page) \
				165	{ \
				166	struct backing_dev_info *bdi = dev_get_drvdata(dev); \
				167	\
				168	return snprintf(page, PAGE_SIZE-1, "%lld\n", (long long)expr); \
Greg Kroah-Hartman	d9e1241	2013-07-24 15:05:26 -0700	[diff] [blame]	169	} \
				170	static DEVICE_ATTR_RW(name);
Peter Zijlstra	cf0ca9f	2008-04-30 00:54:32 -0700	[diff] [blame]	171
				172	BDI_SHOW(read_ahead_kb, K(bdi->ra_pages))
				173
Peter Zijlstra	189d3c4	2008-04-30 00:54:35 -0700	[diff] [blame]	174	static ssize_t min_ratio_store(struct device *dev,
				175	struct device_attribute attr, const char buf, size_t count)
				176	{
				177	struct backing_dev_info *bdi = dev_get_drvdata(dev);
Peter Zijlstra	189d3c4	2008-04-30 00:54:35 -0700	[diff] [blame]	178	unsigned int ratio;
Namjae Jeon	7034ed1	2012-08-25 16:57:27 +0800	[diff] [blame]	179	ssize_t ret;
Peter Zijlstra	189d3c4	2008-04-30 00:54:35 -0700	[diff] [blame]	180
Namjae Jeon	7034ed1	2012-08-25 16:57:27 +0800	[diff] [blame]	181	ret = kstrtouint(buf, 10, &ratio);
				182	if (ret < 0)
				183	return ret;
				184
				185	ret = bdi_set_min_ratio(bdi, ratio);
				186	if (!ret)
				187	ret = count;
				188
Peter Zijlstra	189d3c4	2008-04-30 00:54:35 -0700	[diff] [blame]	189	return ret;
				190	}
				191	BDI_SHOW(min_ratio, bdi->min_ratio)
				192
Peter Zijlstra	a42dde0	2008-04-30 00:54:36 -0700	[diff] [blame]	193	static ssize_t max_ratio_store(struct device *dev,
				194	struct device_attribute attr, const char buf, size_t count)
				195	{
				196	struct backing_dev_info *bdi = dev_get_drvdata(dev);
Peter Zijlstra	a42dde0	2008-04-30 00:54:36 -0700	[diff] [blame]	197	unsigned int ratio;
Namjae Jeon	7034ed1	2012-08-25 16:57:27 +0800	[diff] [blame]	198	ssize_t ret;
Peter Zijlstra	a42dde0	2008-04-30 00:54:36 -0700	[diff] [blame]	199
Namjae Jeon	7034ed1	2012-08-25 16:57:27 +0800	[diff] [blame]	200	ret = kstrtouint(buf, 10, &ratio);
				201	if (ret < 0)
				202	return ret;
				203
				204	ret = bdi_set_max_ratio(bdi, ratio);
				205	if (!ret)
				206	ret = count;
				207
Peter Zijlstra	a42dde0	2008-04-30 00:54:36 -0700	[diff] [blame]	208	return ret;
				209	}
				210	BDI_SHOW(max_ratio, bdi->max_ratio)
				211
Darrick J. Wong	7d311cd	2013-02-21 16:42:48 -0800	[diff] [blame]	212	static ssize_t stable_pages_required_show(struct device *dev,
				213	struct device_attribute *attr,
				214	char *page)
				215	{
				216	struct backing_dev_info *bdi = dev_get_drvdata(dev);
				217
				218	return snprintf(page, PAGE_SIZE-1, "%d\n",
				219	bdi_cap_stable_pages_required(bdi) ? 1 : 0);
				220	}
Greg Kroah-Hartman	d9e1241	2013-07-24 15:05:26 -0700	[diff] [blame]	221	static DEVICE_ATTR_RO(stable_pages_required);
Darrick J. Wong	7d311cd	2013-02-21 16:42:48 -0800	[diff] [blame]	222
Greg Kroah-Hartman	d9e1241	2013-07-24 15:05:26 -0700	[diff] [blame]	223	static struct attribute *bdi_dev_attrs[] = {
				224	&dev_attr_read_ahead_kb.attr,
				225	&dev_attr_min_ratio.attr,
				226	&dev_attr_max_ratio.attr,
				227	&dev_attr_stable_pages_required.attr,
				228	NULL,
Peter Zijlstra	cf0ca9f	2008-04-30 00:54:32 -0700	[diff] [blame]	229	};
Greg Kroah-Hartman	d9e1241	2013-07-24 15:05:26 -0700	[diff] [blame]	230	ATTRIBUTE_GROUPS(bdi_dev);
Peter Zijlstra	cf0ca9f	2008-04-30 00:54:32 -0700	[diff] [blame]	231
				232	static __init int bdi_class_init(void)
				233	{
				234	bdi_class = class_create(THIS_MODULE, "bdi");
Anton Blanchard	1442145	2010-04-02 09:46:55 +0200	[diff] [blame]	235	if (IS_ERR(bdi_class))
				236	return PTR_ERR(bdi_class);
				237
Greg Kroah-Hartman	d9e1241	2013-07-24 15:05:26 -0700	[diff] [blame]	238	bdi_class->dev_groups = bdi_dev_groups;
Miklos Szeredi	76f1418	2008-04-30 00:54:36 -0700	[diff] [blame]	239	bdi_debug_init();
Peter Zijlstra	cf0ca9f	2008-04-30 00:54:32 -0700	[diff] [blame]	240	return 0;
				241	}
Miklos Szeredi	76f1418	2008-04-30 00:54:36 -0700	[diff] [blame]	242	postcore_initcall(bdi_class_init);
Peter Zijlstra	cf0ca9f	2008-04-30 00:54:32 -0700	[diff] [blame]	243
Jens Axboe	2616015	2009-03-17 09:35:06 +0100	[diff] [blame]	244	static int __init default_bdi_init(void)
				245	{
				246	int err;
				247
Tejun Heo	839a8e8	2013-04-01 19:08:06 -0700	[diff] [blame]	248	bdi_wq = alloc_workqueue("writeback", WQ_MEM_RECLAIM \| WQ_FREEZABLE \|
Tejun Heo	b5c872d	2013-04-01 19:08:06 -0700	[diff] [blame]	249	WQ_UNBOUND \| WQ_SYSFS, 0);
Tejun Heo	839a8e8	2013-04-01 19:08:06 -0700	[diff] [blame]	250	if (!bdi_wq)
				251	return -ENOMEM;
				252
Jan Kara	976e48f	2010-09-21 11:48:55 +0200	[diff] [blame]	253	err = bdi_init(&noop_backing_dev_info);
Jens Axboe	2616015	2009-03-17 09:35:06 +0100	[diff] [blame]	254
				255	return err;
				256	}
				257	subsys_initcall(default_bdi_init);
				258
Artem Bityutskiy	6467716	2010-07-25 14:29:22 +0300	[diff] [blame]	259	/*
Tejun Heo	f0054bb	2015-05-22 17:13:30 -0400	[diff] [blame]	260	* This function is used when the first inode for this wb is marked dirty. It
Artem Bityutskiy	6467716	2010-07-25 14:29:22 +0300	[diff] [blame]	261	* wakes-up the corresponding bdi thread which should then take care of the
				262	* periodic background write-out of dirty inodes. Since the write-out would
				263	* starts only 'dirty_writeback_interval' centisecs from now anyway, we just
				264	* set up a timer which wakes the bdi thread up later.
				265	*
				266	* Note, we wouldn't bother setting up the timer, but this function is on the
				267	* fast-path (used by '__mark_inode_dirty()'), so we save few context switches
				268	* by delaying the wake-up.
Derek Basehore	6ca738d	2014-04-03 14:46:22 -0700	[diff] [blame]	269	*
				270	* We have to be careful not to postpone flush work if it is scheduled for
				271	* earlier. Thus we use queue_delayed_work().
Artem Bityutskiy	6467716	2010-07-25 14:29:22 +0300	[diff] [blame]	272	*/
Tejun Heo	f0054bb	2015-05-22 17:13:30 -0400	[diff] [blame]	273	void wb_wakeup_delayed(struct bdi_writeback *wb)
Artem Bityutskiy	6467716	2010-07-25 14:29:22 +0300	[diff] [blame]	274	{
				275	unsigned long timeout;
				276
				277	timeout = msecs_to_jiffies(dirty_writeback_interval * 10);
Tejun Heo	f0054bb	2015-05-22 17:13:30 -0400	[diff] [blame]	278	spin_lock_bh(&wb->work_lock);
				279	if (test_bit(WB_registered, &wb->state))
				280	queue_delayed_work(bdi_wq, &wb->dwork, timeout);
				281	spin_unlock_bh(&wb->work_lock);
Jens Axboe	03ba378	2009-09-09 09:08:54 +0200	[diff] [blame]	282	}
				283
Jens Axboe	cfc4ba5	2009-09-14 13:12:40 +0200	[diff] [blame]	284	/*
Tejun Heo	a88a341	2015-05-22 17:13:28 -0400	[diff] [blame]	285	* Initial write bandwidth: 100 MB/s
				286	*/
				287	#define INIT_BW (100 << (20 - PAGE_SHIFT))
				288
Tejun Heo	8395cd9	2015-05-22 17:13:34 -0400	[diff] [blame]	289	static int wb_init(struct bdi_writeback wb, struct backing_dev_info bdi,
				290	gfp_t gfp)
Artem Bityutskiy	6467716	2010-07-25 14:29:22 +0300	[diff] [blame]	291	{
Tejun Heo	93f78d8	2015-05-22 17:13:27 -0400	[diff] [blame]	292	int i, err;
				293
Artem Bityutskiy	6467716	2010-07-25 14:29:22 +0300	[diff] [blame]	294	memset(wb, 0, sizeof(*wb));
				295
				296	wb->bdi = bdi;
				297	wb->last_old_flush = jiffies;
				298	INIT_LIST_HEAD(&wb->b_dirty);
				299	INIT_LIST_HEAD(&wb->b_io);
				300	INIT_LIST_HEAD(&wb->b_more_io);
Theodore Ts'o	0ae45f6	2015-02-02 00:37:00 -0500	[diff] [blame]	301	INIT_LIST_HEAD(&wb->b_dirty_time);
Christoph Hellwig	f758eea	2011-04-21 18:19:44 -0600	[diff] [blame]	302	spin_lock_init(&wb->list_lock);
Tejun Heo	93f78d8	2015-05-22 17:13:27 -0400	[diff] [blame]	303
Tejun Heo	a88a341	2015-05-22 17:13:28 -0400	[diff] [blame]	304	wb->bw_time_stamp = jiffies;
				305	wb->balanced_dirty_ratelimit = INIT_BW;
				306	wb->dirty_ratelimit = INIT_BW;
				307	wb->write_bandwidth = INIT_BW;
				308	wb->avg_write_bandwidth = INIT_BW;
				309
Tejun Heo	f0054bb	2015-05-22 17:13:30 -0400	[diff] [blame]	310	spin_lock_init(&wb->work_lock);
				311	INIT_LIST_HEAD(&wb->work_list);
				312	INIT_DELAYED_WORK(&wb->dwork, wb_workfn);
				313
Tejun Heo	8395cd9	2015-05-22 17:13:34 -0400	[diff] [blame]	314	err = fprop_local_init_percpu(&wb->completions, gfp);
Tejun Heo	a88a341	2015-05-22 17:13:28 -0400	[diff] [blame]	315	if (err)
				316	return err;
				317
Tejun Heo	93f78d8	2015-05-22 17:13:27 -0400	[diff] [blame]	318	for (i = 0; i < NR_WB_STAT_ITEMS; i++) {
Tejun Heo	8395cd9	2015-05-22 17:13:34 -0400	[diff] [blame]	319	err = percpu_counter_init(&wb->stat[i], 0, gfp);
Tejun Heo	93f78d8	2015-05-22 17:13:27 -0400	[diff] [blame]	320	if (err) {
				321	while (--i)
				322	percpu_counter_destroy(&wb->stat[i]);
Tejun Heo	a88a341	2015-05-22 17:13:28 -0400	[diff] [blame]	323	fprop_local_destroy_percpu(&wb->completions);
Tejun Heo	93f78d8	2015-05-22 17:13:27 -0400	[diff] [blame]	324	return err;
				325	}
				326	}
				327
				328	return 0;
				329	}
				330
Tejun Heo	4610007	2015-05-22 17:13:31 -0400	[diff] [blame]	331	/*
				332	* Remove bdi from the global list and shutdown any threads we have running
				333	*/
				334	static void wb_shutdown(struct bdi_writeback *wb)
				335	{
				336	/* Make sure nobody queues further work */
				337	spin_lock_bh(&wb->work_lock);
				338	if (!test_and_clear_bit(WB_registered, &wb->state)) {
				339	spin_unlock_bh(&wb->work_lock);
				340	return;
				341	}
				342	spin_unlock_bh(&wb->work_lock);
				343
				344	/*
				345	* Drain work list and shutdown the delayed_work. !WB_registered
				346	* tells wb_workfn() that @wb is dying and its work_list needs to
				347	* be drained no matter what.
				348	*/
				349	mod_delayed_work(bdi_wq, &wb->dwork, 0);
				350	flush_delayed_work(&wb->dwork);
				351	WARN_ON(!list_empty(&wb->work_list));
				352	}
				353
Tejun Heo	f0054bb	2015-05-22 17:13:30 -0400	[diff] [blame]	354	static void wb_exit(struct bdi_writeback *wb)
Tejun Heo	93f78d8	2015-05-22 17:13:27 -0400	[diff] [blame]	355	{
				356	int i;
				357
				358	WARN_ON(delayed_work_pending(&wb->dwork));
				359
				360	for (i = 0; i < NR_WB_STAT_ITEMS; i++)
				361	percpu_counter_destroy(&wb->stat[i]);
Artem Bityutskiy	6467716	2010-07-25 14:29:22 +0300	[diff] [blame]	362
Tejun Heo	a88a341	2015-05-22 17:13:28 -0400	[diff] [blame]	363	fprop_local_destroy_percpu(&wb->completions);
				364	}
Wu Fengguang	e98be2d	2010-08-29 11:22:30 -0600	[diff] [blame]	365
Tejun Heo	52ebea7	2015-05-22 17:13:37 -0400	[diff] [blame]	366	#ifdef CONFIG_CGROUP_WRITEBACK
				367
				368	#include <linux/memcontrol.h>
				369
				370	/*
				371	* cgwb_lock protects bdi->cgwb_tree, bdi->cgwb_congested_tree,
				372	* blkcg->cgwb_list, and memcg->cgwb_list. bdi->cgwb_tree is also RCU
				373	* protected. cgwb_release_wait is used to wait for the completion of cgwb
				374	* releases from bdi destruction path.
				375	*/
				376	static DEFINE_SPINLOCK(cgwb_lock);
				377	static DECLARE_WAIT_QUEUE_HEAD(cgwb_release_wait);
				378
				379	/**
				380	* wb_congested_get_create - get or create a wb_congested
				381	* @bdi: associated bdi
				382	* @blkcg_id: ID of the associated blkcg
				383	* @gfp: allocation mask
				384	*
				385	* Look up the wb_congested for @blkcg_id on @bdi. If missing, create one.
				386	* The returned wb_congested has its reference count incremented. Returns
				387	* NULL on failure.
				388	*/
				389	struct bdi_writeback_congested *
				390	wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp)
				391	{
				392	struct bdi_writeback_congested new_congested = NULL, congested;
				393	struct rb_node *node, parent;
				394	unsigned long flags;
				395
				396	if (blkcg_id == 1)
				397	return &bdi->wb_congested;
				398	retry:
				399	spin_lock_irqsave(&cgwb_lock, flags);
				400
				401	node = &bdi->cgwb_congested_tree.rb_node;
				402	parent = NULL;
				403
				404	while (*node != NULL) {
				405	parent = *node;
				406	congested = container_of(parent, struct bdi_writeback_congested,
				407	rb_node);
				408	if (congested->blkcg_id < blkcg_id)
				409	node = &parent->rb_left;
				410	else if (congested->blkcg_id > blkcg_id)
				411	node = &parent->rb_right;
				412	else
				413	goto found;
				414	}
				415
				416	if (new_congested) {
				417	/* !found and storage for new one already allocated, insert */
				418	congested = new_congested;
				419	new_congested = NULL;
				420	rb_link_node(&congested->rb_node, parent, node);
				421	rb_insert_color(&congested->rb_node, &bdi->cgwb_congested_tree);
				422	atomic_inc(&bdi->usage_cnt);
				423	goto found;
				424	}
				425
				426	spin_unlock_irqrestore(&cgwb_lock, flags);
				427
				428	/* allocate storage for new one and retry */
				429	new_congested = kzalloc(sizeof(*new_congested), gfp);
				430	if (!new_congested)
				431	return NULL;
				432
				433	atomic_set(&new_congested->refcnt, 0);
				434	new_congested->bdi = bdi;
				435	new_congested->blkcg_id = blkcg_id;
				436	goto retry;
				437
				438	found:
				439	atomic_inc(&congested->refcnt);
				440	spin_unlock_irqrestore(&cgwb_lock, flags);
				441	kfree(new_congested);
				442	return congested;
				443	}
				444
				445	/**
				446	* wb_congested_put - put a wb_congested
				447	* @congested: wb_congested to put
				448	*
				449	* Put @congested and destroy it if the refcnt reaches zero.
				450	*/
				451	void wb_congested_put(struct bdi_writeback_congested *congested)
				452	{
				453	struct backing_dev_info *bdi = congested->bdi;
				454	unsigned long flags;
				455
				456	if (congested->blkcg_id == 1)
				457	return;
				458
				459	local_irq_save(flags);
				460	if (!atomic_dec_and_lock(&congested->refcnt, &cgwb_lock)) {
				461	local_irq_restore(flags);
				462	return;
				463	}
				464
				465	rb_erase(&congested->rb_node, &congested->bdi->cgwb_congested_tree);
				466	spin_unlock_irqrestore(&cgwb_lock, flags);
				467	kfree(congested);
				468
				469	if (atomic_dec_and_test(&bdi->usage_cnt))
				470	wake_up_all(&cgwb_release_wait);
				471	}
				472
				473	static void cgwb_release_workfn(struct work_struct *work)
				474	{
				475	struct bdi_writeback *wb = container_of(work, struct bdi_writeback,
				476	release_work);
				477	struct backing_dev_info *bdi = wb->bdi;
				478
				479	wb_shutdown(wb);
				480
				481	css_put(wb->memcg_css);
				482	css_put(wb->blkcg_css);
				483	wb_congested_put(wb->congested);
				484
				485	percpu_ref_exit(&wb->refcnt);
				486	wb_exit(wb);
				487	kfree_rcu(wb, rcu);
				488
				489	if (atomic_dec_and_test(&bdi->usage_cnt))
				490	wake_up_all(&cgwb_release_wait);
				491	}
				492
				493	static void cgwb_release(struct percpu_ref *refcnt)
				494	{
				495	struct bdi_writeback *wb = container_of(refcnt, struct bdi_writeback,
				496	refcnt);
				497	schedule_work(&wb->release_work);
				498	}
				499
				500	static void cgwb_kill(struct bdi_writeback *wb)
				501	{
				502	lockdep_assert_held(&cgwb_lock);
				503
				504	WARN_ON(!radix_tree_delete(&wb->bdi->cgwb_tree, wb->memcg_css->id));
				505	list_del(&wb->memcg_node);
				506	list_del(&wb->blkcg_node);
				507	percpu_ref_kill(&wb->refcnt);
				508	}
				509
				510	static int cgwb_create(struct backing_dev_info *bdi,
				511	struct cgroup_subsys_state *memcg_css, gfp_t gfp)
				512	{
				513	struct mem_cgroup *memcg;
				514	struct cgroup_subsys_state *blkcg_css;
				515	struct blkcg *blkcg;
				516	struct list_head memcg_cgwb_list, blkcg_cgwb_list;
				517	struct bdi_writeback *wb;
				518	unsigned long flags;
				519	int ret = 0;
				520
				521	memcg = mem_cgroup_from_css(memcg_css);
				522	blkcg_css = cgroup_get_e_css(memcg_css->cgroup, &blkio_cgrp_subsys);
				523	blkcg = css_to_blkcg(blkcg_css);
				524	memcg_cgwb_list = mem_cgroup_cgwb_list(memcg);
				525	blkcg_cgwb_list = &blkcg->cgwb_list;
				526
				527	/* look up again under lock and discard on blkcg mismatch */
				528	spin_lock_irqsave(&cgwb_lock, flags);
				529	wb = radix_tree_lookup(&bdi->cgwb_tree, memcg_css->id);
				530	if (wb && wb->blkcg_css != blkcg_css) {
				531	cgwb_kill(wb);
				532	wb = NULL;
				533	}
				534	spin_unlock_irqrestore(&cgwb_lock, flags);
				535	if (wb)
				536	goto out_put;
				537
				538	/* need to create a new one */
				539	wb = kmalloc(sizeof(*wb), gfp);
				540	if (!wb)
				541	return -ENOMEM;
				542
				543	ret = wb_init(wb, bdi, gfp);
				544	if (ret)
				545	goto err_free;
				546
				547	ret = percpu_ref_init(&wb->refcnt, cgwb_release, 0, gfp);
				548	if (ret)
				549	goto err_wb_exit;
				550
				551	wb->congested = wb_congested_get_create(bdi, blkcg_css->id, gfp);
				552	if (!wb->congested)
				553	goto err_ref_exit;
				554
				555	wb->memcg_css = memcg_css;
				556	wb->blkcg_css = blkcg_css;
				557	INIT_WORK(&wb->release_work, cgwb_release_workfn);
				558	set_bit(WB_registered, &wb->state);
				559
				560	/*
				561	* The root wb determines the registered state of the whole bdi and
				562	* memcg_cgwb_list and blkcg_cgwb_list's next pointers indicate
				563	* whether they're still online. Don't link @wb if any is dead.
				564	* See wb_memcg_offline() and wb_blkcg_offline().
				565	*/
				566	ret = -ENODEV;
				567	spin_lock_irqsave(&cgwb_lock, flags);
				568	if (test_bit(WB_registered, &bdi->wb.state) &&
				569	blkcg_cgwb_list->next && memcg_cgwb_list->next) {
				570	/* we might have raced another instance of this function */
				571	ret = radix_tree_insert(&bdi->cgwb_tree, memcg_css->id, wb);
				572	if (!ret) {
				573	atomic_inc(&bdi->usage_cnt);
				574	list_add(&wb->memcg_node, memcg_cgwb_list);
				575	list_add(&wb->blkcg_node, blkcg_cgwb_list);
				576	css_get(memcg_css);
				577	css_get(blkcg_css);
				578	}
				579	}
				580	spin_unlock_irqrestore(&cgwb_lock, flags);
				581	if (ret) {
				582	if (ret == -EEXIST)
				583	ret = 0;
				584	goto err_put_congested;
				585	}
				586	goto out_put;
				587
				588	err_put_congested:
				589	wb_congested_put(wb->congested);
				590	err_ref_exit:
				591	percpu_ref_exit(&wb->refcnt);
				592	err_wb_exit:
				593	wb_exit(wb);
				594	err_free:
				595	kfree(wb);
				596	out_put:
				597	css_put(blkcg_css);
				598	return ret;
				599	}
				600
				601	/**
				602	* wb_get_create - get wb for a given memcg, create if necessary
				603	* @bdi: target bdi
				604	* @memcg_css: cgroup_subsys_state of the target memcg (must have positive ref)
				605	* @gfp: allocation mask to use
				606	*
				607	* Try to get the wb for @memcg_css on @bdi. If it doesn't exist, try to
				608	* create one. The returned wb has its refcount incremented.
				609	*
				610	* This function uses css_get() on @memcg_css and thus expects its refcnt
				611	* to be positive on invocation. IOW, rcu_read_lock() protection on
				612	* @memcg_css isn't enough. try_get it before calling this function.
				613	*
				614	* A wb is keyed by its associated memcg. As blkcg implicitly enables
				615	* memcg on the default hierarchy, memcg association is guaranteed to be
				616	* more specific (equal or descendant to the associated blkcg) and thus can
				617	* identify both the memcg and blkcg associations.
				618	*
				619	* Because the blkcg associated with a memcg may change as blkcg is enabled
				620	* and disabled closer to root in the hierarchy, each wb keeps track of
				621	* both the memcg and blkcg associated with it and verifies the blkcg on
				622	* each lookup. On mismatch, the existing wb is discarded and a new one is
				623	* created.
				624	*/
				625	struct bdi_writeback wb_get_create(struct backing_dev_info bdi,
				626	struct cgroup_subsys_state *memcg_css,
				627	gfp_t gfp)
				628	{
				629	struct bdi_writeback *wb;
				630
				631	might_sleep_if(gfp & __GFP_WAIT);
				632
				633	if (!memcg_css->parent)
				634	return &bdi->wb;
				635
				636	do {
				637	rcu_read_lock();
				638	wb = radix_tree_lookup(&bdi->cgwb_tree, memcg_css->id);
				639	if (wb) {
				640	struct cgroup_subsys_state *blkcg_css;
				641
				642	/* see whether the blkcg association has changed */
				643	blkcg_css = cgroup_get_e_css(memcg_css->cgroup,
				644	&blkio_cgrp_subsys);
				645	if (unlikely(wb->blkcg_css != blkcg_css \|\|
				646	!wb_tryget(wb)))
				647	wb = NULL;
				648	css_put(blkcg_css);
				649	}
				650	rcu_read_unlock();
				651	} while (!wb && !cgwb_create(bdi, memcg_css, gfp));
				652
				653	return wb;
				654	}
				655
				656	void __inode_attach_wb(struct inode inode, struct page page)
				657	{
				658	struct backing_dev_info *bdi = inode_to_bdi(inode);
				659	struct bdi_writeback *wb = NULL;
				660
				661	if (inode_cgwb_enabled(inode)) {
				662	struct cgroup_subsys_state *memcg_css;
				663
				664	if (page) {
				665	memcg_css = mem_cgroup_css_from_page(page);
				666	wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
				667	} else {
				668	/* must pin memcg_css, see wb_get_create() */
				669	memcg_css = task_get_css(current, memory_cgrp_id);
				670	wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
				671	css_put(memcg_css);
				672	}
				673	}
				674
				675	if (!wb)
				676	wb = &bdi->wb;
				677
				678	/*
				679	* There may be multiple instances of this function racing to
				680	* update the same inode. Use cmpxchg() to tell the winner.
				681	*/
				682	if (unlikely(cmpxchg(&inode->i_wb, NULL, wb)))
				683	wb_put(wb);
				684	}
				685
				686	static void cgwb_bdi_init(struct backing_dev_info *bdi)
				687	{
				688	bdi->wb.memcg_css = mem_cgroup_root_css;
				689	bdi->wb.blkcg_css = blkcg_root_css;
				690	bdi->wb_congested.blkcg_id = 1;
				691	INIT_RADIX_TREE(&bdi->cgwb_tree, GFP_ATOMIC);
				692	bdi->cgwb_congested_tree = RB_ROOT;
				693	atomic_set(&bdi->usage_cnt, 1);
				694	}
				695
				696	static void cgwb_bdi_destroy(struct backing_dev_info *bdi)
				697	{
				698	struct radix_tree_iter iter;
				699	void **slot;
				700
				701	WARN_ON(test_bit(WB_registered, &bdi->wb.state));
				702
				703	spin_lock_irq(&cgwb_lock);
				704	radix_tree_for_each_slot(slot, &bdi->cgwb_tree, &iter, 0)
				705	cgwb_kill(*slot);
				706	spin_unlock_irq(&cgwb_lock);
				707
				708	/*
				709	* All cgwb's and their congested states must be shutdown and
				710	* released before returning. Drain the usage counter to wait for
				711	* all cgwb's and cgwb_congested's ever created on @bdi.
				712	*/
				713	atomic_dec(&bdi->usage_cnt);
				714	wait_event(cgwb_release_wait, !atomic_read(&bdi->usage_cnt));
				715	}
				716
				717	/**
				718	* wb_memcg_offline - kill all wb's associated with a memcg being offlined
				719	* @memcg: memcg being offlined
				720	*
				721	* Also prevents creation of any new wb's associated with @memcg.
				722	*/
				723	void wb_memcg_offline(struct mem_cgroup *memcg)
				724	{
				725	LIST_HEAD(to_destroy);
				726	struct list_head *memcg_cgwb_list = mem_cgroup_cgwb_list(memcg);
				727	struct bdi_writeback wb, next;
				728
				729	spin_lock_irq(&cgwb_lock);
				730	list_for_each_entry_safe(wb, next, memcg_cgwb_list, memcg_node)
				731	cgwb_kill(wb);
				732	memcg_cgwb_list->next = NULL; /* prevent new wb's */
				733	spin_unlock_irq(&cgwb_lock);
				734	}
				735
				736	/**
				737	* wb_blkcg_offline - kill all wb's associated with a blkcg being offlined
				738	* @blkcg: blkcg being offlined
				739	*
				740	* Also prevents creation of any new wb's associated with @blkcg.
				741	*/
				742	void wb_blkcg_offline(struct blkcg *blkcg)
				743	{
				744	LIST_HEAD(to_destroy);
				745	struct bdi_writeback wb, next;
				746
				747	spin_lock_irq(&cgwb_lock);
				748	list_for_each_entry_safe(wb, next, &blkcg->cgwb_list, blkcg_node)
				749	cgwb_kill(wb);
				750	blkcg->cgwb_list.next = NULL; /* prevent new wb's */
				751	spin_unlock_irq(&cgwb_lock);
				752	}
				753
				754	#else /* CONFIG_CGROUP_WRITEBACK */
				755
				756	static void cgwb_bdi_init(struct backing_dev_info *bdi) { }
				757	static void cgwb_bdi_destroy(struct backing_dev_info *bdi) { }
				758
				759	#endif /* CONFIG_CGROUP_WRITEBACK */
				760
Peter Zijlstra	b2e8fb6	2007-10-16 23:25:47 -0700	[diff] [blame]	761	int bdi_init(struct backing_dev_info *bdi)
				762	{
Tejun Heo	93f78d8	2015-05-22 17:13:27 -0400	[diff] [blame]	763	int err;
Peter Zijlstra	b2e8fb6	2007-10-16 23:25:47 -0700	[diff] [blame]	764
Peter Zijlstra	cf0ca9f	2008-04-30 00:54:32 -0700	[diff] [blame]	765	bdi->dev = NULL;
				766
Peter Zijlstra	189d3c4	2008-04-30 00:54:35 -0700	[diff] [blame]	767	bdi->min_ratio = 0;
Peter Zijlstra	a42dde0	2008-04-30 00:54:36 -0700	[diff] [blame]	768	bdi->max_ratio = 100;
Jan Kara	eb608e3	2012-05-24 18:59:11 +0200	[diff] [blame]	769	bdi->max_prop_frac = FPROP_FRAC_BASE;
Jens Axboe	66f3b8e	2009-09-02 09:19:46 +0200	[diff] [blame]	770	INIT_LIST_HEAD(&bdi->bdi_list);
Tejun Heo	cc395d7	2015-05-22 17:13:58 -0400	[diff] [blame^]	771	init_waitqueue_head(&bdi->wb_waitq);
Jens Axboe	03ba378	2009-09-09 09:08:54 +0200	[diff] [blame]	772
Tejun Heo	8395cd9	2015-05-22 17:13:34 -0400	[diff] [blame]	773	err = wb_init(&bdi->wb, bdi, GFP_KERNEL);
Tejun Heo	93f78d8	2015-05-22 17:13:27 -0400	[diff] [blame]	774	if (err)
				775	return err;
Peter Zijlstra	04fbfdc	2007-10-16 23:25:50 -0700	[diff] [blame]	776
Tejun Heo	4aa9c69	2015-05-22 17:13:35 -0400	[diff] [blame]	777	bdi->wb_congested.state = 0;
				778	bdi->wb.congested = &bdi->wb_congested;
				779
Tejun Heo	52ebea7	2015-05-22 17:13:37 -0400	[diff] [blame]	780	cgwb_bdi_init(bdi);
Tejun Heo	93f78d8	2015-05-22 17:13:27 -0400	[diff] [blame]	781	return 0;
Peter Zijlstra	b2e8fb6	2007-10-16 23:25:47 -0700	[diff] [blame]	782	}
				783	EXPORT_SYMBOL(bdi_init);
				784
Tejun Heo	4610007	2015-05-22 17:13:31 -0400	[diff] [blame]	785	int bdi_register(struct backing_dev_info bdi, struct device parent,
				786	const char *fmt, ...)
				787	{
				788	va_list args;
				789	struct device *dev;
				790
				791	if (bdi->dev) /* The driver needs to use separate queues per device */
				792	return 0;
				793
				794	va_start(args, fmt);
				795	dev = device_create_vargs(bdi_class, parent, MKDEV(0, 0), bdi, fmt, args);
				796	va_end(args);
				797	if (IS_ERR(dev))
				798	return PTR_ERR(dev);
				799
				800	bdi->dev = dev;
				801
				802	bdi_debug_register(bdi, dev_name(dev));
				803	set_bit(WB_registered, &bdi->wb.state);
				804
				805	spin_lock_bh(&bdi_lock);
				806	list_add_tail_rcu(&bdi->bdi_list, &bdi_list);
				807	spin_unlock_bh(&bdi_lock);
				808
				809	trace_writeback_bdi_register(bdi);
				810	return 0;
				811	}
				812	EXPORT_SYMBOL(bdi_register);
				813
				814	int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev)
				815	{
				816	return bdi_register(bdi, NULL, "%u:%u", MAJOR(dev), MINOR(dev));
				817	}
				818	EXPORT_SYMBOL(bdi_register_dev);
				819
				820	/*
				821	* Remove bdi from bdi_list, and ensure that it is no longer visible
				822	*/
				823	static void bdi_remove_from_list(struct backing_dev_info *bdi)
				824	{
				825	spin_lock_bh(&bdi_lock);
				826	list_del_rcu(&bdi->bdi_list);
				827	spin_unlock_bh(&bdi_lock);
				828
				829	synchronize_rcu_expedited();
				830	}
				831
				832	/*
				833	* Called when the device behind @bdi has been removed or ejected.
				834	*
				835	* We can't really do much here except for reducing the dirty ratio at
				836	* the moment. In the future we should be able to set a flag so that
				837	* the filesystem can handle errors at mark_inode_dirty time instead
				838	* of only at writeback time.
				839	*/
				840	void bdi_unregister(struct backing_dev_info *bdi)
				841	{
				842	if (WARN_ON_ONCE(!bdi->dev))
				843	return;
				844
				845	bdi_set_min_ratio(bdi, 0);
				846	}
				847	EXPORT_SYMBOL(bdi_unregister);
				848
Peter Zijlstra	b2e8fb6	2007-10-16 23:25:47 -0700	[diff] [blame]	849	void bdi_destroy(struct backing_dev_info *bdi)
				850	{
Tejun Heo	f0054bb	2015-05-22 17:13:30 -0400	[diff] [blame]	851	/* make sure nobody finds us on the bdi_list anymore */
				852	bdi_remove_from_list(bdi);
				853	wb_shutdown(&bdi->wb);
Tejun Heo	52ebea7	2015-05-22 17:13:37 -0400	[diff] [blame]	854	cgwb_bdi_destroy(bdi);
Rabin Vincent	7a401a9	2011-11-11 13:29:04 +0100	[diff] [blame]	855
Christoph Hellwig	c4db59d	2015-01-20 14:05:00 -0700	[diff] [blame]	856	if (bdi->dev) {
				857	bdi_debug_unregister(bdi);
				858	device_unregister(bdi->dev);
				859	bdi->dev = NULL;
				860	}
				861
Tejun Heo	f0054bb	2015-05-22 17:13:30 -0400	[diff] [blame]	862	wb_exit(&bdi->wb);
Peter Zijlstra	b2e8fb6	2007-10-16 23:25:47 -0700	[diff] [blame]	863	}
				864	EXPORT_SYMBOL(bdi_destroy);
				865
Jens Axboe	c3c5320	2010-04-22 11:37:01 +0200	[diff] [blame]	866	/*
				867	* For use from filesystems to quickly init and register a bdi associated
				868	* with dirty writeback
				869	*/
Christoph Hellwig	b4caecd	2015-01-14 10:42:32 +0100	[diff] [blame]	870	int bdi_setup_and_register(struct backing_dev_info bdi, char name)
Jens Axboe	c3c5320	2010-04-22 11:37:01 +0200	[diff] [blame]	871	{
Jens Axboe	c3c5320	2010-04-22 11:37:01 +0200	[diff] [blame]	872	int err;
				873
				874	bdi->name = name;
Christoph Hellwig	b4caecd	2015-01-14 10:42:32 +0100	[diff] [blame]	875	bdi->capabilities = 0;
Jens Axboe	c3c5320	2010-04-22 11:37:01 +0200	[diff] [blame]	876	err = bdi_init(bdi);
				877	if (err)
				878	return err;
				879
Kees Cook	02aa2a3	2013-07-03 15:04:56 -0700	[diff] [blame]	880	err = bdi_register(bdi, NULL, "%.28s-%ld", name,
				881	atomic_long_inc_return(&bdi_seq));
Jens Axboe	c3c5320	2010-04-22 11:37:01 +0200	[diff] [blame]	882	if (err) {
				883	bdi_destroy(bdi);
				884	return err;
				885	}
				886
				887	return 0;
				888	}
				889	EXPORT_SYMBOL(bdi_setup_and_register);
				890
Andrew Morton	3fcfab1	2006-10-19 23:28:16 -0700	[diff] [blame]	891	static wait_queue_head_t congestion_wqh[2] = {
				892	__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),
				893	__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
				894	};
Tejun Heo	ec8a6f2	2015-05-22 17:13:41 -0400	[diff] [blame]	895	static atomic_t nr_wb_congested[2];
Andrew Morton	3fcfab1	2006-10-19 23:28:16 -0700	[diff] [blame]	896
Tejun Heo	ec8a6f2	2015-05-22 17:13:41 -0400	[diff] [blame]	897	void clear_wb_congested(struct bdi_writeback_congested *congested, int sync)
Andrew Morton	3fcfab1	2006-10-19 23:28:16 -0700	[diff] [blame]	898	{
Jens Axboe	1faa16d	2009-04-06 14:48:01 +0200	[diff] [blame]	899	wait_queue_head_t *wqh = &congestion_wqh[sync];
Tejun Heo	ec8a6f2	2015-05-22 17:13:41 -0400	[diff] [blame]	900	enum wb_state bit;
Andrew Morton	3fcfab1	2006-10-19 23:28:16 -0700	[diff] [blame]	901
Tejun Heo	4452226	2015-05-22 17:13:26 -0400	[diff] [blame]	902	bit = sync ? WB_sync_congested : WB_async_congested;
Tejun Heo	ec8a6f2	2015-05-22 17:13:41 -0400	[diff] [blame]	903	if (test_and_clear_bit(bit, &congested->state))
				904	atomic_dec(&nr_wb_congested[sync]);
Peter Zijlstra	4e857c5	2014-03-17 18:06:10 +0100	[diff] [blame]	905	smp_mb__after_atomic();
Andrew Morton	3fcfab1	2006-10-19 23:28:16 -0700	[diff] [blame]	906	if (waitqueue_active(wqh))
				907	wake_up(wqh);
				908	}
Tejun Heo	ec8a6f2	2015-05-22 17:13:41 -0400	[diff] [blame]	909	EXPORT_SYMBOL(clear_wb_congested);
Andrew Morton	3fcfab1	2006-10-19 23:28:16 -0700	[diff] [blame]	910
Tejun Heo	ec8a6f2	2015-05-22 17:13:41 -0400	[diff] [blame]	911	void set_wb_congested(struct bdi_writeback_congested *congested, int sync)
Andrew Morton	3fcfab1	2006-10-19 23:28:16 -0700	[diff] [blame]	912	{
Tejun Heo	4452226	2015-05-22 17:13:26 -0400	[diff] [blame]	913	enum wb_state bit;
Andrew Morton	3fcfab1	2006-10-19 23:28:16 -0700	[diff] [blame]	914
Tejun Heo	4452226	2015-05-22 17:13:26 -0400	[diff] [blame]	915	bit = sync ? WB_sync_congested : WB_async_congested;
Tejun Heo	ec8a6f2	2015-05-22 17:13:41 -0400	[diff] [blame]	916	if (!test_and_set_bit(bit, &congested->state))
				917	atomic_inc(&nr_wb_congested[sync]);
Andrew Morton	3fcfab1	2006-10-19 23:28:16 -0700	[diff] [blame]	918	}
Tejun Heo	ec8a6f2	2015-05-22 17:13:41 -0400	[diff] [blame]	919	EXPORT_SYMBOL(set_wb_congested);
Andrew Morton	3fcfab1	2006-10-19 23:28:16 -0700	[diff] [blame]	920
				921	/**
				922	* congestion_wait - wait for a backing_dev to become uncongested
Jens Axboe	8aa7e84	2009-07-09 14:52:32 +0200	[diff] [blame]	923	* @sync: SYNC or ASYNC IO
Andrew Morton	3fcfab1	2006-10-19 23:28:16 -0700	[diff] [blame]	924	* @timeout: timeout in jiffies
				925	*
				926	* Waits for up to @timeout jiffies for a backing_dev (any backing_dev) to exit
				927	* write congestion. If no backing_devs are congested then just wait for the
				928	* next write to be completed.
				929	*/
Jens Axboe	8aa7e84	2009-07-09 14:52:32 +0200	[diff] [blame]	930	long congestion_wait(int sync, long timeout)
Andrew Morton	3fcfab1	2006-10-19 23:28:16 -0700	[diff] [blame]	931	{
				932	long ret;
Mel Gorman	52bb919	2010-10-26 14:21:41 -0700	[diff] [blame]	933	unsigned long start = jiffies;
Andrew Morton	3fcfab1	2006-10-19 23:28:16 -0700	[diff] [blame]	934	DEFINE_WAIT(wait);
Jens Axboe	8aa7e84	2009-07-09 14:52:32 +0200	[diff] [blame]	935	wait_queue_head_t *wqh = &congestion_wqh[sync];
Andrew Morton	3fcfab1	2006-10-19 23:28:16 -0700	[diff] [blame]	936
				937	prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
				938	ret = io_schedule_timeout(timeout);
				939	finish_wait(wqh, &wait);
Mel Gorman	52bb919	2010-10-26 14:21:41 -0700	[diff] [blame]	940
				941	trace_writeback_congestion_wait(jiffies_to_usecs(timeout),
				942	jiffies_to_usecs(jiffies - start));
				943
Andrew Morton	3fcfab1	2006-10-19 23:28:16 -0700	[diff] [blame]	944	return ret;
				945	}
				946	EXPORT_SYMBOL(congestion_wait);
Peter Zijlstra	04fbfdc	2007-10-16 23:25:50 -0700	[diff] [blame]	947
Mel Gorman	0e093d99	2010-10-26 14:21:45 -0700	[diff] [blame]	948	/**
				949	* wait_iff_congested - Conditionally wait for a backing_dev to become uncongested or a zone to complete writes
				950	* @zone: A zone to check if it is heavily congested
				951	* @sync: SYNC or ASYNC IO
				952	* @timeout: timeout in jiffies
				953	*
				954	* In the event of a congested backing_dev (any backing_dev) and the given
				955	* @zone has experienced recent congestion, this waits for up to @timeout
				956	* jiffies for either a BDI to exit congestion of the given @sync queue
				957	* or a write to complete.
				958	*
Lucas De Marchi	25985ed	2011-03-30 22:57:33 -0300	[diff] [blame]	959	* In the absence of zone congestion, cond_resched() is called to yield
Mel Gorman	0e093d99	2010-10-26 14:21:45 -0700	[diff] [blame]	960	* the processor if necessary but otherwise does not sleep.
				961	*
				962	* The return value is 0 if the sleep is for the full timeout. Otherwise,
				963	* it is the number of jiffies that were still remaining when the function
				964	* returned. return_value == timeout implies the function did not sleep.
				965	*/
				966	long wait_iff_congested(struct zone *zone, int sync, long timeout)
				967	{
				968	long ret;
				969	unsigned long start = jiffies;
				970	DEFINE_WAIT(wait);
				971	wait_queue_head_t *wqh = &congestion_wqh[sync];
				972
				973	/*
				974	* If there is no congestion, or heavy congestion is not being
				975	* encountered in the current zone, yield if necessary instead
				976	* of sleeping on the congestion queue
				977	*/
Tejun Heo	ec8a6f2	2015-05-22 17:13:41 -0400	[diff] [blame]	978	if (atomic_read(&nr_wb_congested[sync]) == 0 \|\|
Johannes Weiner	5705465	2014-10-09 15:28:17 -0700	[diff] [blame]	979	!test_bit(ZONE_CONGESTED, &zone->flags)) {
Mel Gorman	0e093d99	2010-10-26 14:21:45 -0700	[diff] [blame]	980	cond_resched();
				981
				982	/* In case we scheduled, work out time remaining */
				983	ret = timeout - (jiffies - start);
				984	if (ret < 0)
				985	ret = 0;
				986
				987	goto out;
				988	}
				989
				990	/* Sleep until uncongested or a write happens */
				991	prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
				992	ret = io_schedule_timeout(timeout);
				993	finish_wait(wqh, &wait);
				994
				995	out:
				996	trace_writeback_wait_iff_congested(jiffies_to_usecs(timeout),
				997	jiffies_to_usecs(jiffies - start));
				998
				999	return ret;
				1000	}
				1001	EXPORT_SYMBOL(wait_iff_congested);
Wanpeng Li	3965c9a	2012-07-31 16:41:52 -0700	[diff] [blame]	1002
				1003	int pdflush_proc_obsolete(struct ctl_table *table, int write,
				1004	void __user buffer, size_t lenp, loff_t *ppos)
				1005	{
				1006	char kbuf[] = "0\n";
				1007
Chen Gang	4c3bffc	2013-09-11 14:22:44 -0700	[diff] [blame]	1008	if (ppos \|\| lenp < sizeof(kbuf)) {
Wanpeng Li	3965c9a	2012-07-31 16:41:52 -0700	[diff] [blame]	1009	*lenp = 0;
				1010	return 0;
				1011	}
				1012
				1013	if (copy_to_user(buffer, kbuf, sizeof(kbuf)))
				1014	return -EFAULT;
				1015	printk_once(KERN_WARNING "%s exported in /proc is scheduled for removal\n",
				1016	table->procname);
				1017
				1018	*lenp = 2;
				1019	ppos += lenp;
				1020	return 2;
				1021	}