Blame - fs/iomap/buffered-io.c - SHIFTPHONES/mainline/linux

blob: f080f542911b2fc0205d56911a345ebde390606d [file] [log] [blame]

Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
				2	/*
				3	* Copyright (C) 2010 Red Hat, Inc.
Christoph Hellwig	598ecfb	2019-10-17 13:12:15 -0700	[diff] [blame]	4	* Copyright (C) 2016-2019 Christoph Hellwig.
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	5	*/
				6	#include <linux/module.h>
				7	#include <linux/compiler.h>
				8	#include <linux/fs.h>
				9	#include <linux/iomap.h>
				10	#include <linux/pagemap.h>
				11	#include <linux/uio.h>
				12	#include <linux/buffer_head.h>
				13	#include <linux/dax.h>
				14	#include <linux/writeback.h>
Christoph Hellwig	598ecfb	2019-10-17 13:12:15 -0700	[diff] [blame]	15	#include <linux/list_sort.h>
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	16	#include <linux/swap.h>
				17	#include <linux/bio.h>
				18	#include <linux/sched/signal.h>
				19	#include <linux/migrate.h>
Christoph Hellwig	9e91c57	2019-10-17 13:12:13 -0700	[diff] [blame]	20	#include "trace.h"
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	21
				22	#include "../internal.h"
				23
Christoph Hellwig	ab08b01	2019-10-17 13:12:19 -0700	[diff] [blame]	24	/*
				25	* Structure allocated for each page when block size < PAGE_SIZE to track
				26	* sub-page uptodate status and I/O completions.
				27	*/
				28	struct iomap_page {
				29	atomic_t read_count;
				30	atomic_t write_count;
Christoph Hellwig	1cea335	2019-12-04 09:33:52 -0800	[diff] [blame]	31	spinlock_t uptodate_lock;
Christoph Hellwig	ab08b01	2019-10-17 13:12:19 -0700	[diff] [blame]	32	DECLARE_BITMAP(uptodate, PAGE_SIZE / 512);
				33	};
				34
				35	static inline struct iomap_page to_iomap_page(struct page page)
				36	{
				37	if (page_has_private(page))
				38	return (struct iomap_page *)page_private(page);
				39	return NULL;
				40	}
				41
Christoph Hellwig	598ecfb	2019-10-17 13:12:15 -0700	[diff] [blame]	42	static struct bio_set iomap_ioend_bioset;
				43
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	44	static struct iomap_page *
				45	iomap_page_create(struct inode inode, struct page page)
				46	{
				47	struct iomap_page *iop = to_iomap_page(page);
				48
				49	if (iop \|\| i_blocksize(inode) == PAGE_SIZE)
				50	return iop;
				51
				52	iop = kmalloc(sizeof(*iop), GFP_NOFS \| __GFP_NOFAIL);
				53	atomic_set(&iop->read_count, 0);
				54	atomic_set(&iop->write_count, 0);
Christoph Hellwig	1cea335	2019-12-04 09:33:52 -0800	[diff] [blame]	55	spin_lock_init(&iop->uptodate_lock);
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	56	bitmap_zero(iop->uptodate, PAGE_SIZE / SECTOR_SIZE);
				57
				58	/*
				59	* migrate_page_move_mapping() assumes that pages with private data have
				60	* their count elevated by 1.
				61	*/
				62	get_page(page);
				63	set_page_private(page, (unsigned long)iop);
				64	SetPagePrivate(page);
				65	return iop;
				66	}
				67
				68	static void
				69	iomap_page_release(struct page *page)
				70	{
				71	struct iomap_page *iop = to_iomap_page(page);
				72
				73	if (!iop)
				74	return;
				75	WARN_ON_ONCE(atomic_read(&iop->read_count));
				76	WARN_ON_ONCE(atomic_read(&iop->write_count));
				77	ClearPagePrivate(page);
				78	set_page_private(page, 0);
				79	put_page(page);
				80	kfree(iop);
				81	}
				82
				83	/*
				84	* Calculate the range inside the page that we actually need to read.
				85	*/
				86	static void
				87	iomap_adjust_read_range(struct inode inode, struct iomap_page iop,
				88	loff_t pos, loff_t length, unsigned offp, unsigned *lenp)
				89	{
				90	loff_t orig_pos = *pos;
				91	loff_t isize = i_size_read(inode);
				92	unsigned block_bits = inode->i_blkbits;
				93	unsigned block_size = (1 << block_bits);
				94	unsigned poff = offset_in_page(*pos);
				95	unsigned plen = min_t(loff_t, PAGE_SIZE - poff, length);
				96	unsigned first = poff >> block_bits;
				97	unsigned last = (poff + plen - 1) >> block_bits;
				98
				99	/*
				100	* If the block size is smaller than the page size we need to check the
				101	* per-block uptodate status and adjust the offset and length if needed
				102	* to avoid reading in already uptodate ranges.
				103	*/
				104	if (iop) {
				105	unsigned int i;
				106
				107	/* move forward for each leading block marked uptodate */
				108	for (i = first; i <= last; i++) {
				109	if (!test_bit(i, iop->uptodate))
				110	break;
				111	*pos += block_size;
				112	poff += block_size;
				113	plen -= block_size;
				114	first++;
				115	}
				116
				117	/* truncate len if we find any trailing uptodate block(s) */
				118	for ( ; i <= last; i++) {
				119	if (test_bit(i, iop->uptodate)) {
				120	plen -= (last - i + 1) * block_size;
				121	last = i - 1;
				122	break;
				123	}
				124	}
				125	}
				126
				127	/*
				128	* If the extent spans the block that contains the i_size we need to
				129	* handle both halves separately so that we properly zero data in the
				130	* page cache for blocks that are entirely outside of i_size.
				131	*/
				132	if (orig_pos <= isize && orig_pos + length > isize) {
				133	unsigned end = offset_in_page(isize - 1) >> block_bits;
				134
				135	if (first <= end && last > end)
				136	plen -= (last - end) * block_size;
				137	}
				138
				139	*offp = poff;
				140	*lenp = plen;
				141	}
				142
				143	static void
Christoph Hellwig	1cea335	2019-12-04 09:33:52 -0800	[diff] [blame]	144	iomap_iop_set_range_uptodate(struct page *page, unsigned off, unsigned len)
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	145	{
				146	struct iomap_page *iop = to_iomap_page(page);
				147	struct inode *inode = page->mapping->host;
				148	unsigned first = off >> inode->i_blkbits;
				149	unsigned last = (off + len - 1) >> inode->i_blkbits;
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	150	bool uptodate = true;
Christoph Hellwig	1cea335	2019-12-04 09:33:52 -0800	[diff] [blame]	151	unsigned long flags;
				152	unsigned int i;
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	153
Christoph Hellwig	1cea335	2019-12-04 09:33:52 -0800	[diff] [blame]	154	spin_lock_irqsave(&iop->uptodate_lock, flags);
				155	for (i = 0; i < PAGE_SIZE / i_blocksize(inode); i++) {
				156	if (i >= first && i <= last)
				157	set_bit(i, iop->uptodate);
				158	else if (!test_bit(i, iop->uptodate))
				159	uptodate = false;
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	160	}
				161
Christoph Hellwig	1cea335	2019-12-04 09:33:52 -0800	[diff] [blame]	162	if (uptodate)
				163	SetPageUptodate(page);
				164	spin_unlock_irqrestore(&iop->uptodate_lock, flags);
				165	}
				166
				167	static void
				168	iomap_set_range_uptodate(struct page *page, unsigned off, unsigned len)
				169	{
				170	if (PageError(page))
				171	return;
				172
				173	if (page_has_private(page))
				174	iomap_iop_set_range_uptodate(page, off, len);
				175	else
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	176	SetPageUptodate(page);
				177	}
				178
				179	static void
				180	iomap_read_finish(struct iomap_page iop, struct page page)
				181	{
				182	if (!iop \|\| atomic_dec_and_test(&iop->read_count))
				183	unlock_page(page);
				184	}
				185
				186	static void
				187	iomap_read_page_end_io(struct bio_vec *bvec, int error)
				188	{
				189	struct page *page = bvec->bv_page;
				190	struct iomap_page *iop = to_iomap_page(page);
				191
				192	if (unlikely(error)) {
				193	ClearPageUptodate(page);
				194	SetPageError(page);
				195	} else {
				196	iomap_set_range_uptodate(page, bvec->bv_offset, bvec->bv_len);
				197	}
				198
				199	iomap_read_finish(iop, page);
				200	}
				201
				202	static void
				203	iomap_read_end_io(struct bio *bio)
				204	{
				205	int error = blk_status_to_errno(bio->bi_status);
				206	struct bio_vec *bvec;
				207	struct bvec_iter_all iter_all;
				208
				209	bio_for_each_segment_all(bvec, bio, iter_all)
				210	iomap_read_page_end_io(bvec, error);
				211	bio_put(bio);
				212	}
				213
				214	struct iomap_readpage_ctx {
				215	struct page *cur_page;
				216	bool cur_page_in_bio;
				217	bool is_readahead;
				218	struct bio *bio;
				219	struct list_head *pages;
				220	};
				221
				222	static void
				223	iomap_read_inline_data(struct inode inode, struct page page,
				224	struct iomap *iomap)
				225	{
				226	size_t size = i_size_read(inode);
				227	void *addr;
				228
				229	if (PageUptodate(page))
				230	return;
				231
				232	BUG_ON(page->index);
				233	BUG_ON(size > PAGE_SIZE - offset_in_page(iomap->inline_data));
				234
				235	addr = kmap_atomic(page);
				236	memcpy(addr, iomap->inline_data, size);
				237	memset(addr + size, 0, PAGE_SIZE - size);
				238	kunmap_atomic(addr);
				239	SetPageUptodate(page);
				240	}
				241
Christoph Hellwig	009d8d8	2019-10-17 13:12:12 -0700	[diff] [blame]	242	static inline bool iomap_block_needs_zeroing(struct inode *inode,
				243	struct iomap *iomap, loff_t pos)
				244	{
				245	return iomap->type != IOMAP_MAPPED \|\|
				246	(iomap->flags & IOMAP_F_NEW) \|\|
				247	pos >= i_size_read(inode);
				248	}
				249
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	250	static loff_t
				251	iomap_readpage_actor(struct inode inode, loff_t pos, loff_t length, void data,
Goldwyn Rodrigues	c039b99	2019-10-18 16:44:10 -0700	[diff] [blame]	252	struct iomap iomap, struct iomap srcmap)
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	253	{
				254	struct iomap_readpage_ctx *ctx = data;
				255	struct page *page = ctx->cur_page;
				256	struct iomap_page *iop = iomap_page_create(inode, page);
				257	bool same_page = false, is_contig = false;
				258	loff_t orig_pos = pos;
				259	unsigned poff, plen;
				260	sector_t sector;
				261
				262	if (iomap->type == IOMAP_INLINE) {
				263	WARN_ON_ONCE(pos);
				264	iomap_read_inline_data(inode, page, iomap);
				265	return PAGE_SIZE;
				266	}
				267
				268	/* zero post-eof blocks as the page may be mapped */
				269	iomap_adjust_read_range(inode, iop, &pos, length, &poff, &plen);
				270	if (plen == 0)
				271	goto done;
				272
Christoph Hellwig	009d8d8	2019-10-17 13:12:12 -0700	[diff] [blame]	273	if (iomap_block_needs_zeroing(inode, iomap, pos)) {
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	274	zero_user(page, poff, plen);
				275	iomap_set_range_uptodate(page, poff, plen);
				276	goto done;
				277	}
				278
				279	ctx->cur_page_in_bio = true;
				280
				281	/*
				282	* Try to merge into a previous segment if we can.
				283	*/
				284	sector = iomap_sector(iomap, pos);
				285	if (ctx->bio && bio_end_sector(ctx->bio) == sector)
				286	is_contig = true;
				287
				288	if (is_contig &&
				289	__bio_try_merge_page(ctx->bio, page, plen, poff, &same_page)) {
				290	if (!same_page && iop)
				291	atomic_inc(&iop->read_count);
				292	goto done;
				293	}
				294
				295	/*
				296	* If we start a new segment we need to increase the read count, and we
				297	* need to do so before submitting any previous full bio to make sure
				298	* that we don't prematurely unlock the page.
				299	*/
				300	if (iop)
				301	atomic_inc(&iop->read_count);
				302
				303	if (!ctx->bio \|\| !is_contig \|\| bio_full(ctx->bio, plen)) {
				304	gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL);
				305	int nr_vecs = (length + PAGE_SIZE - 1) >> PAGE_SHIFT;
				306
				307	if (ctx->bio)
				308	submit_bio(ctx->bio);
				309
				310	if (ctx->is_readahead) /* same as readahead_gfp_mask */
				311	gfp \|= __GFP_NORETRY \| __GFP_NOWARN;
				312	ctx->bio = bio_alloc(gfp, min(BIO_MAX_PAGES, nr_vecs));
				313	ctx->bio->bi_opf = REQ_OP_READ;
				314	if (ctx->is_readahead)
				315	ctx->bio->bi_opf \|= REQ_RAHEAD;
				316	ctx->bio->bi_iter.bi_sector = sector;
				317	bio_set_dev(ctx->bio, iomap->bdev);
				318	ctx->bio->bi_end_io = iomap_read_end_io;
				319	}
				320
				321	bio_add_page(ctx->bio, page, plen, poff);
				322	done:
				323	/*
				324	* Move the caller beyond our range so that it keeps making progress.
				325	* For that we have to include any leading non-uptodate ranges, but
				326	* we can skip trailing ones as they will be handled in the next
				327	* iteration.
				328	*/
				329	return pos - orig_pos + plen;
				330	}
				331
				332	int
				333	iomap_readpage(struct page page, const struct iomap_ops ops)
				334	{
				335	struct iomap_readpage_ctx ctx = { .cur_page = page };
				336	struct inode *inode = page->mapping->host;
				337	unsigned poff;
				338	loff_t ret;
				339
Christoph Hellwig	9e91c57	2019-10-17 13:12:13 -0700	[diff] [blame]	340	trace_iomap_readpage(page->mapping->host, 1);
				341
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	342	for (poff = 0; poff < PAGE_SIZE; poff += ret) {
				343	ret = iomap_apply(inode, page_offset(page) + poff,
				344	PAGE_SIZE - poff, 0, ops, &ctx,
				345	iomap_readpage_actor);
				346	if (ret <= 0) {
				347	WARN_ON_ONCE(ret == 0);
				348	SetPageError(page);
				349	break;
				350	}
				351	}
				352
				353	if (ctx.bio) {
				354	submit_bio(ctx.bio);
				355	WARN_ON_ONCE(!ctx.cur_page_in_bio);
				356	} else {
				357	WARN_ON_ONCE(ctx.cur_page_in_bio);
				358	unlock_page(page);
				359	}
				360
				361	/*
				362	* Just like mpage_readpages and block_read_full_page we always
				363	* return 0 and just mark the page as PageError on errors. This
				364	* should be cleaned up all through the stack eventually.
				365	*/
				366	return 0;
				367	}
				368	EXPORT_SYMBOL_GPL(iomap_readpage);
				369
				370	static struct page *
				371	iomap_next_page(struct inode inode, struct list_head pages, loff_t pos,
				372	loff_t length, loff_t *done)
				373	{
				374	while (!list_empty(pages)) {
				375	struct page *page = lru_to_page(pages);
				376
				377	if (page_offset(page) >= (u64)pos + length)
				378	break;
				379
				380	list_del(&page->lru);
				381	if (!add_to_page_cache_lru(page, inode->i_mapping, page->index,
				382	GFP_NOFS))
				383	return page;
				384
				385	/*
				386	* If we already have a page in the page cache at index we are
				387	* done. Upper layers don't care if it is uptodate after the
				388	* readpages call itself as every page gets checked again once
				389	* actually needed.
				390	*/
				391	*done += PAGE_SIZE;
				392	put_page(page);
				393	}
				394
				395	return NULL;
				396	}
				397
				398	static loff_t
				399	iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length,
Goldwyn Rodrigues	c039b99	2019-10-18 16:44:10 -0700	[diff] [blame]	400	void data, struct iomap iomap, struct iomap *srcmap)
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	401	{
				402	struct iomap_readpage_ctx *ctx = data;
				403	loff_t done, ret;
				404
				405	for (done = 0; done < length; done += ret) {
				406	if (ctx->cur_page && offset_in_page(pos + done) == 0) {
				407	if (!ctx->cur_page_in_bio)
				408	unlock_page(ctx->cur_page);
				409	put_page(ctx->cur_page);
				410	ctx->cur_page = NULL;
				411	}
				412	if (!ctx->cur_page) {
				413	ctx->cur_page = iomap_next_page(inode, ctx->pages,
				414	pos, length, &done);
				415	if (!ctx->cur_page)
				416	break;
				417	ctx->cur_page_in_bio = false;
				418	}
				419	ret = iomap_readpage_actor(inode, pos + done, length - done,
Goldwyn Rodrigues	c039b99	2019-10-18 16:44:10 -0700	[diff] [blame]	420	ctx, iomap, srcmap);
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	421	}
				422
				423	return done;
				424	}
				425
				426	int
				427	iomap_readpages(struct address_space mapping, struct list_head pages,
				428	unsigned nr_pages, const struct iomap_ops *ops)
				429	{
				430	struct iomap_readpage_ctx ctx = {
				431	.pages = pages,
				432	.is_readahead = true,
				433	};
				434	loff_t pos = page_offset(list_entry(pages->prev, struct page, lru));
				435	loff_t last = page_offset(list_entry(pages->next, struct page, lru));
				436	loff_t length = last - pos + PAGE_SIZE, ret = 0;
				437
Christoph Hellwig	9e91c57	2019-10-17 13:12:13 -0700	[diff] [blame]	438	trace_iomap_readpages(mapping->host, nr_pages);
				439
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	440	while (length > 0) {
				441	ret = iomap_apply(mapping->host, pos, length, 0, ops,
				442	&ctx, iomap_readpages_actor);
				443	if (ret <= 0) {
				444	WARN_ON_ONCE(ret == 0);
				445	goto done;
				446	}
				447	pos += ret;
				448	length -= ret;
				449	}
				450	ret = 0;
				451	done:
				452	if (ctx.bio)
				453	submit_bio(ctx.bio);
				454	if (ctx.cur_page) {
				455	if (!ctx.cur_page_in_bio)
				456	unlock_page(ctx.cur_page);
				457	put_page(ctx.cur_page);
				458	}
				459
				460	/*
				461	* Check that we didn't lose a page due to the arcance calling
				462	* conventions..
				463	*/
				464	WARN_ON_ONCE(!ret && !list_empty(ctx.pages));
				465	return ret;
				466	}
				467	EXPORT_SYMBOL_GPL(iomap_readpages);
				468
				469	/*
				470	* iomap_is_partially_uptodate checks whether blocks within a page are
				471	* uptodate or not.
				472	*
				473	* Returns true if all blocks which correspond to a file portion
				474	* we want to read within the page are uptodate.
				475	*/
				476	int
				477	iomap_is_partially_uptodate(struct page *page, unsigned long from,
				478	unsigned long count)
				479	{
				480	struct iomap_page *iop = to_iomap_page(page);
				481	struct inode *inode = page->mapping->host;
				482	unsigned len, first, last;
				483	unsigned i;
				484
				485	/* Limit range to one page */
				486	len = min_t(unsigned, PAGE_SIZE - from, count);
				487
				488	/* First and last blocks in range within page */
				489	first = from >> inode->i_blkbits;
				490	last = (from + len - 1) >> inode->i_blkbits;
				491
				492	if (iop) {
				493	for (i = first; i <= last; i++)
				494	if (!test_bit(i, iop->uptodate))
				495	return 0;
				496	return 1;
				497	}
				498
				499	return 0;
				500	}
				501	EXPORT_SYMBOL_GPL(iomap_is_partially_uptodate);
				502
				503	int
				504	iomap_releasepage(struct page *page, gfp_t gfp_mask)
				505	{
Matthew Wilcox (Oracle)	1ac9945	2020-03-05 07:21:43 -0800	[diff] [blame]	506	trace_iomap_releasepage(page->mapping->host, page_offset(page),
				507	PAGE_SIZE);
Christoph Hellwig	9e91c57	2019-10-17 13:12:13 -0700	[diff] [blame]	508
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	509	/*
				510	* mm accommodates an old ext3 case where clean pages might not have had
				511	* the dirty bit cleared. Thus, it can send actual dirty pages to
				512	* ->releasepage() via shrink_active_list(), skip those here.
				513	*/
				514	if (PageDirty(page) \|\| PageWriteback(page))
				515	return 0;
				516	iomap_page_release(page);
				517	return 1;
				518	}
				519	EXPORT_SYMBOL_GPL(iomap_releasepage);
				520
				521	void
				522	iomap_invalidatepage(struct page *page, unsigned int offset, unsigned int len)
				523	{
Matthew Wilcox (Oracle)	1ac9945	2020-03-05 07:21:43 -0800	[diff] [blame]	524	trace_iomap_invalidatepage(page->mapping->host, offset, len);
Christoph Hellwig	9e91c57	2019-10-17 13:12:13 -0700	[diff] [blame]	525
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	526	/*
				527	* If we are invalidating the entire page, clear the dirty state from it
				528	* and release it to avoid unnecessary buildup of the LRU.
				529	*/
				530	if (offset == 0 && len == PAGE_SIZE) {
				531	WARN_ON_ONCE(PageWriteback(page));
				532	cancel_dirty_page(page);
				533	iomap_page_release(page);
				534	}
				535	}
				536	EXPORT_SYMBOL_GPL(iomap_invalidatepage);
				537
				538	#ifdef CONFIG_MIGRATION
				539	int
				540	iomap_migrate_page(struct address_space mapping, struct page newpage,
				541	struct page *page, enum migrate_mode mode)
				542	{
				543	int ret;
				544
Linus Torvalds	26473f8	2019-07-19 11:38:12 -0700	[diff] [blame]	545	ret = migrate_page_move_mapping(mapping, newpage, page, 0);
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	546	if (ret != MIGRATEPAGE_SUCCESS)
				547	return ret;
				548
				549	if (page_has_private(page)) {
				550	ClearPagePrivate(page);
				551	get_page(newpage);
				552	set_page_private(newpage, page_private(page));
				553	set_page_private(page, 0);
				554	put_page(page);
				555	SetPagePrivate(newpage);
				556	}
				557
				558	if (mode != MIGRATE_SYNC_NO_COPY)
				559	migrate_page_copy(newpage, page);
				560	else
				561	migrate_page_states(newpage, page);
				562	return MIGRATEPAGE_SUCCESS;
				563	}
				564	EXPORT_SYMBOL_GPL(iomap_migrate_page);
				565	#endif /* CONFIG_MIGRATION */
				566
Christoph Hellwig	32a38a4	2019-10-18 16:42:50 -0700	[diff] [blame]	567	enum {
				568	IOMAP_WRITE_F_UNSHARE = (1 << 0),
				569	};
				570
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	571	static void
				572	iomap_write_failed(struct inode *inode, loff_t pos, unsigned len)
				573	{
				574	loff_t i_size = i_size_read(inode);
				575
				576	/*
				577	* Only truncate newly allocated pages beyoned EOF, even if the
				578	* write started inside the existing inode size.
				579	*/
				580	if (pos + len > i_size)
				581	truncate_pagecache_range(inode, max(pos, i_size), pos + len);
				582	}
				583
				584	static int
Christoph Hellwig	d3b4043	2019-10-18 16:42:24 -0700	[diff] [blame]	585	iomap_read_page_sync(loff_t block_start, struct page *page, unsigned poff,
				586	unsigned plen, struct iomap *iomap)
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	587	{
				588	struct bio_vec bvec;
				589	struct bio bio;
				590
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	591	bio_init(&bio, &bvec, 1);
				592	bio.bi_opf = REQ_OP_READ;
				593	bio.bi_iter.bi_sector = iomap_sector(iomap, block_start);
				594	bio_set_dev(&bio, iomap->bdev);
				595	__bio_add_page(&bio, page, plen, poff);
				596	return submit_bio_wait(&bio);
				597	}
				598
				599	static int
Christoph Hellwig	32a38a4	2019-10-18 16:42:50 -0700	[diff] [blame]	600	__iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
Goldwyn Rodrigues	c039b99	2019-10-18 16:44:10 -0700	[diff] [blame]	601	struct page page, struct iomap srcmap)
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	602	{
				603	struct iomap_page *iop = iomap_page_create(inode, page);
				604	loff_t block_size = i_blocksize(inode);
				605	loff_t block_start = pos & ~(block_size - 1);
				606	loff_t block_end = (pos + len + block_size - 1) & ~(block_size - 1);
				607	unsigned from = offset_in_page(pos), to = from + len, poff, plen;
Christoph Hellwig	d3b4043	2019-10-18 16:42:24 -0700	[diff] [blame]	608	int status;
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	609
				610	if (PageUptodate(page))
				611	return 0;
				612
				613	do {
				614	iomap_adjust_read_range(inode, iop, &block_start,
				615	block_end - block_start, &poff, &plen);
				616	if (plen == 0)
				617	break;
				618
Christoph Hellwig	32a38a4	2019-10-18 16:42:50 -0700	[diff] [blame]	619	if (!(flags & IOMAP_WRITE_F_UNSHARE) &&
				620	(from <= poff \|\| from >= poff + plen) &&
Christoph Hellwig	d3b4043	2019-10-18 16:42:24 -0700	[diff] [blame]	621	(to <= poff \|\| to >= poff + plen))
				622	continue;
				623
Goldwyn Rodrigues	c039b99	2019-10-18 16:44:10 -0700	[diff] [blame]	624	if (iomap_block_needs_zeroing(inode, srcmap, block_start)) {
Christoph Hellwig	32a38a4	2019-10-18 16:42:50 -0700	[diff] [blame]	625	if (WARN_ON_ONCE(flags & IOMAP_WRITE_F_UNSHARE))
				626	return -EIO;
Christoph Hellwig	d3b4043	2019-10-18 16:42:24 -0700	[diff] [blame]	627	zero_user_segments(page, poff, from, to, poff + plen);
				628	iomap_set_range_uptodate(page, poff, plen);
				629	continue;
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	630	}
				631
Christoph Hellwig	d3b4043	2019-10-18 16:42:24 -0700	[diff] [blame]	632	status = iomap_read_page_sync(block_start, page, poff, plen,
Goldwyn Rodrigues	c039b99	2019-10-18 16:44:10 -0700	[diff] [blame]	633	srcmap);
Christoph Hellwig	d3b4043	2019-10-18 16:42:24 -0700	[diff] [blame]	634	if (status)
				635	return status;
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	636	} while ((block_start += plen) < block_end);
				637
Christoph Hellwig	d3b4043	2019-10-18 16:42:24 -0700	[diff] [blame]	638	return 0;
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	639	}
				640
				641	static int
				642	iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
Goldwyn Rodrigues	c039b99	2019-10-18 16:44:10 -0700	[diff] [blame]	643	struct page *pagep, struct iomap iomap, struct iomap *srcmap)
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	644	{
				645	const struct iomap_page_ops *page_ops = iomap->page_ops;
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	646	struct page *page;
				647	int status = 0;
				648
				649	BUG_ON(pos + len > iomap->offset + iomap->length);
Goldwyn Rodrigues	c039b99	2019-10-18 16:44:10 -0700	[diff] [blame]	650	if (srcmap != iomap)
				651	BUG_ON(pos + len > srcmap->offset + srcmap->length);
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	652
				653	if (fatal_signal_pending(current))
				654	return -EINTR;
				655
				656	if (page_ops && page_ops->page_prepare) {
				657	status = page_ops->page_prepare(inode, pos, len, iomap);
				658	if (status)
				659	return status;
				660	}
				661
Christoph Hellwig	dcd6158	2019-10-18 16:41:12 -0700	[diff] [blame]	662	page = grab_cache_page_write_begin(inode->i_mapping, pos >> PAGE_SHIFT,
				663	AOP_FLAG_NOFS);
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	664	if (!page) {
				665	status = -ENOMEM;
				666	goto out_no_page;
				667	}
				668
Goldwyn Rodrigues	c039b99	2019-10-18 16:44:10 -0700	[diff] [blame]	669	if (srcmap->type == IOMAP_INLINE)
				670	iomap_read_inline_data(inode, page, srcmap);
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	671	else if (iomap->flags & IOMAP_F_BUFFER_HEAD)
Goldwyn Rodrigues	c039b99	2019-10-18 16:44:10 -0700	[diff] [blame]	672	status = __block_write_begin_int(page, pos, len, NULL, srcmap);
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	673	else
Christoph Hellwig	32a38a4	2019-10-18 16:42:50 -0700	[diff] [blame]	674	status = __iomap_write_begin(inode, pos, len, flags, page,
Goldwyn Rodrigues	c039b99	2019-10-18 16:44:10 -0700	[diff] [blame]	675	srcmap);
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	676
				677	if (unlikely(status))
				678	goto out_unlock;
				679
				680	*pagep = page;
				681	return 0;
				682
				683	out_unlock:
				684	unlock_page(page);
				685	put_page(page);
				686	iomap_write_failed(inode, pos, len);
				687
				688	out_no_page:
				689	if (page_ops && page_ops->page_done)
				690	page_ops->page_done(inode, pos, 0, NULL, iomap);
				691	return status;
				692	}
				693
				694	int
				695	iomap_set_page_dirty(struct page *page)
				696	{
				697	struct address_space *mapping = page_mapping(page);
				698	int newly_dirty;
				699
				700	if (unlikely(!mapping))
				701	return !TestSetPageDirty(page);
				702
				703	/*
				704	* Lock out page->mem_cgroup migration to keep PageDirty
				705	* synchronized with per-memcg dirty page counters.
				706	*/
				707	lock_page_memcg(page);
				708	newly_dirty = !TestSetPageDirty(page);
				709	if (newly_dirty)
				710	__set_page_dirty(page, mapping, 0);
				711	unlock_page_memcg(page);
				712
				713	if (newly_dirty)
				714	__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
				715	return newly_dirty;
				716	}
				717	EXPORT_SYMBOL_GPL(iomap_set_page_dirty);
				718
				719	static int
				720	__iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
Christoph Hellwig	c12d6fa	2019-10-18 16:40:57 -0700	[diff] [blame]	721	unsigned copied, struct page *page)
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	722	{
				723	flush_dcache_page(page);
				724
				725	/*
				726	* The blocks that were entirely written will now be uptodate, so we
				727	* don't have to worry about a readpage reading them and overwriting a
				728	* partial write. However if we have encountered a short write and only
				729	* partially written into a block, it will not be marked uptodate, so a
				730	* readpage might come in and destroy our partial write.
				731	*
				732	* Do the simplest thing, and just treat any short write to a non
				733	* uptodate page as a zero-length write, and force the caller to redo
				734	* the whole thing.
				735	*/
				736	if (unlikely(copied < len && !PageUptodate(page)))
				737	return 0;
				738	iomap_set_range_uptodate(page, offset_in_page(pos), len);
				739	iomap_set_page_dirty(page);
				740	return copied;
				741	}
				742
				743	static int
				744	iomap_write_end_inline(struct inode inode, struct page page,
				745	struct iomap *iomap, loff_t pos, unsigned copied)
				746	{
				747	void *addr;
				748
				749	WARN_ON_ONCE(!PageUptodate(page));
				750	BUG_ON(pos + copied > PAGE_SIZE - offset_in_page(iomap->inline_data));
				751
				752	addr = kmap_atomic(page);
				753	memcpy(iomap->inline_data + pos, addr + pos, copied);
				754	kunmap_atomic(addr);
				755
				756	mark_inode_dirty(inode);
				757	return copied;
				758	}
				759
				760	static int
Goldwyn Rodrigues	c039b99	2019-10-18 16:44:10 -0700	[diff] [blame]	761	iomap_write_end(struct inode *inode, loff_t pos, unsigned len, unsigned copied,
				762	struct page page, struct iomap iomap, struct iomap *srcmap)
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	763	{
				764	const struct iomap_page_ops *page_ops = iomap->page_ops;
				765	loff_t old_size = inode->i_size;
				766	int ret;
				767
Goldwyn Rodrigues	c039b99	2019-10-18 16:44:10 -0700	[diff] [blame]	768	if (srcmap->type == IOMAP_INLINE) {
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	769	ret = iomap_write_end_inline(inode, page, iomap, pos, copied);
Goldwyn Rodrigues	c039b99	2019-10-18 16:44:10 -0700	[diff] [blame]	770	} else if (srcmap->flags & IOMAP_F_BUFFER_HEAD) {
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	771	ret = block_write_end(NULL, inode->i_mapping, pos, len, copied,
				772	page, NULL);
				773	} else {
Christoph Hellwig	c12d6fa	2019-10-18 16:40:57 -0700	[diff] [blame]	774	ret = __iomap_write_end(inode, pos, len, copied, page);
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	775	}
				776
				777	/*
				778	* Update the in-memory inode size after copying the data into the page
				779	* cache. It's up to the file system to write the updated size to disk,
				780	* preferably after I/O completion so that no stale data is exposed.
				781	*/
				782	if (pos + ret > old_size) {
				783	i_size_write(inode, pos + ret);
				784	iomap->flags \|= IOMAP_F_SIZE_CHANGED;
				785	}
				786	unlock_page(page);
				787
				788	if (old_size < pos)
				789	pagecache_isize_extended(inode, old_size, pos);
				790	if (page_ops && page_ops->page_done)
				791	page_ops->page_done(inode, pos, ret, page, iomap);
				792	put_page(page);
				793
				794	if (ret < len)
				795	iomap_write_failed(inode, pos, len);
				796	return ret;
				797	}
				798
				799	static loff_t
				800	iomap_write_actor(struct inode inode, loff_t pos, loff_t length, void data,
Goldwyn Rodrigues	c039b99	2019-10-18 16:44:10 -0700	[diff] [blame]	801	struct iomap iomap, struct iomap srcmap)
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	802	{
				803	struct iov_iter *i = data;
				804	long status = 0;
				805	ssize_t written = 0;
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	806
				807	do {
				808	struct page *page;
				809	unsigned long offset; /* Offset into pagecache page */
				810	unsigned long bytes; /* Bytes to write to page */
				811	size_t copied; /* Bytes copied from user */
				812
				813	offset = offset_in_page(pos);
				814	bytes = min_t(unsigned long, PAGE_SIZE - offset,
				815	iov_iter_count(i));
				816	again:
				817	if (bytes > length)
				818	bytes = length;
				819
				820	/*
				821	* Bring in the user page that we will copy from _first_.
				822	* Otherwise there's a nasty deadlock on copying from the
				823	* same page as we're writing to, without it being marked
				824	* up-to-date.
				825	*
				826	* Not only is this an optimisation, but it is also required
				827	* to check that the address is actually valid, when atomic
				828	* usercopies are used, below.
				829	*/
				830	if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
				831	status = -EFAULT;
				832	break;
				833	}
				834
Goldwyn Rodrigues	c039b99	2019-10-18 16:44:10 -0700	[diff] [blame]	835	status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap,
				836	srcmap);
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	837	if (unlikely(status))
				838	break;
				839
				840	if (mapping_writably_mapped(inode->i_mapping))
				841	flush_dcache_page(page);
				842
				843	copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
				844
				845	flush_dcache_page(page);
				846
Goldwyn Rodrigues	c039b99	2019-10-18 16:44:10 -0700	[diff] [blame]	847	status = iomap_write_end(inode, pos, bytes, copied, page, iomap,
				848	srcmap);
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	849	if (unlikely(status < 0))
				850	break;
				851	copied = status;
				852
				853	cond_resched();
				854
				855	iov_iter_advance(i, copied);
				856	if (unlikely(copied == 0)) {
				857	/*
				858	* If we were unable to copy any data at all, we must
				859	* fall back to a single segment length write.
				860	*
				861	* If we didn't fallback here, we could livelock
				862	* because not all segments in the iov can be copied at
				863	* once without a pagefault.
				864	*/
				865	bytes = min_t(unsigned long, PAGE_SIZE - offset,
				866	iov_iter_single_seg_count(i));
				867	goto again;
				868	}
				869	pos += copied;
				870	written += copied;
				871	length -= copied;
				872
				873	balance_dirty_pages_ratelimited(inode->i_mapping);
				874	} while (iov_iter_count(i) && length);
				875
				876	return written ? written : status;
				877	}
				878
				879	ssize_t
				880	iomap_file_buffered_write(struct kiocb iocb, struct iov_iter iter,
				881	const struct iomap_ops *ops)
				882	{
				883	struct inode *inode = iocb->ki_filp->f_mapping->host;
				884	loff_t pos = iocb->ki_pos, ret = 0, written = 0;
				885
				886	while (iov_iter_count(iter)) {
				887	ret = iomap_apply(inode, pos, iov_iter_count(iter),
				888	IOMAP_WRITE, ops, iter, iomap_write_actor);
				889	if (ret <= 0)
				890	break;
				891	pos += ret;
				892	written += ret;
				893	}
				894
				895	return written ? written : ret;
				896	}
				897	EXPORT_SYMBOL_GPL(iomap_file_buffered_write);
				898
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	899	static loff_t
Christoph Hellwig	3590c4d	2019-10-18 16:41:34 -0700	[diff] [blame]	900	iomap_unshare_actor(struct inode inode, loff_t pos, loff_t length, void data,
Goldwyn Rodrigues	c039b99	2019-10-18 16:44:10 -0700	[diff] [blame]	901	struct iomap iomap, struct iomap srcmap)
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	902	{
				903	long status = 0;
				904	ssize_t written = 0;
				905
Christoph Hellwig	3590c4d	2019-10-18 16:41:34 -0700	[diff] [blame]	906	/* don't bother with blocks that are not shared to start with */
				907	if (!(iomap->flags & IOMAP_F_SHARED))
				908	return length;
				909	/* don't bother with holes or unwritten extents */
Goldwyn Rodrigues	c039b99	2019-10-18 16:44:10 -0700	[diff] [blame]	910	if (srcmap->type == IOMAP_HOLE \|\| srcmap->type == IOMAP_UNWRITTEN)
Christoph Hellwig	3590c4d	2019-10-18 16:41:34 -0700	[diff] [blame]	911	return length;
				912
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	913	do {
Christoph Hellwig	32a38a4	2019-10-18 16:42:50 -0700	[diff] [blame]	914	unsigned long offset = offset_in_page(pos);
				915	unsigned long bytes = min_t(loff_t, PAGE_SIZE - offset, length);
				916	struct page *page;
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	917
Christoph Hellwig	32a38a4	2019-10-18 16:42:50 -0700	[diff] [blame]	918	status = iomap_write_begin(inode, pos, bytes,
Goldwyn Rodrigues	c039b99	2019-10-18 16:44:10 -0700	[diff] [blame]	919	IOMAP_WRITE_F_UNSHARE, &page, iomap, srcmap);
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	920	if (unlikely(status))
				921	return status;
				922
Goldwyn Rodrigues	c039b99	2019-10-18 16:44:10 -0700	[diff] [blame]	923	status = iomap_write_end(inode, pos, bytes, bytes, page, iomap,
				924	srcmap);
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	925	if (unlikely(status <= 0)) {
				926	if (WARN_ON_ONCE(status == 0))
				927	return -EIO;
				928	return status;
				929	}
				930
				931	cond_resched();
				932
				933	pos += status;
				934	written += status;
				935	length -= status;
				936
				937	balance_dirty_pages_ratelimited(inode->i_mapping);
				938	} while (length);
				939
				940	return written;
				941	}
				942
				943	int
Christoph Hellwig	3590c4d	2019-10-18 16:41:34 -0700	[diff] [blame]	944	iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len,
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	945	const struct iomap_ops *ops)
				946	{
				947	loff_t ret;
				948
				949	while (len) {
				950	ret = iomap_apply(inode, pos, len, IOMAP_WRITE, ops, NULL,
Christoph Hellwig	3590c4d	2019-10-18 16:41:34 -0700	[diff] [blame]	951	iomap_unshare_actor);
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	952	if (ret <= 0)
				953	return ret;
				954	pos += ret;
				955	len -= ret;
				956	}
				957
				958	return 0;
				959	}
Christoph Hellwig	3590c4d	2019-10-18 16:41:34 -0700	[diff] [blame]	960	EXPORT_SYMBOL_GPL(iomap_file_unshare);
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	961
				962	static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset,
Goldwyn Rodrigues	c039b99	2019-10-18 16:44:10 -0700	[diff] [blame]	963	unsigned bytes, struct iomap iomap, struct iomap srcmap)
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	964	{
				965	struct page *page;
				966	int status;
				967
Goldwyn Rodrigues	c039b99	2019-10-18 16:44:10 -0700	[diff] [blame]	968	status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap, srcmap);
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	969	if (status)
				970	return status;
				971
				972	zero_user(page, offset, bytes);
				973	mark_page_accessed(page);
				974
Goldwyn Rodrigues	c039b99	2019-10-18 16:44:10 -0700	[diff] [blame]	975	return iomap_write_end(inode, pos, bytes, bytes, page, iomap, srcmap);
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	976	}
				977
				978	static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes,
				979	struct iomap *iomap)
				980	{
				981	return __dax_zero_page_range(iomap->bdev, iomap->dax_dev,
				982	iomap_sector(iomap, pos & PAGE_MASK), offset, bytes);
				983	}
				984
				985	static loff_t
				986	iomap_zero_range_actor(struct inode *inode, loff_t pos, loff_t count,
Goldwyn Rodrigues	c039b99	2019-10-18 16:44:10 -0700	[diff] [blame]	987	void data, struct iomap iomap, struct iomap *srcmap)
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	988	{
				989	bool *did_zero = data;
				990	loff_t written = 0;
				991	int status;
				992
				993	/* already zeroed? we're done. */
Goldwyn Rodrigues	c039b99	2019-10-18 16:44:10 -0700	[diff] [blame]	994	if (srcmap->type == IOMAP_HOLE \|\| srcmap->type == IOMAP_UNWRITTEN)
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	995	return count;
				996
				997	do {
				998	unsigned offset, bytes;
				999
				1000	offset = offset_in_page(pos);
				1001	bytes = min_t(loff_t, PAGE_SIZE - offset, count);
				1002
				1003	if (IS_DAX(inode))
				1004	status = iomap_dax_zero(pos, offset, bytes, iomap);
				1005	else
Goldwyn Rodrigues	c039b99	2019-10-18 16:44:10 -0700	[diff] [blame]	1006	status = iomap_zero(inode, pos, offset, bytes, iomap,
				1007	srcmap);
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	1008	if (status < 0)
				1009	return status;
				1010
				1011	pos += bytes;
				1012	count -= bytes;
				1013	written += bytes;
				1014	if (did_zero)
				1015	*did_zero = true;
				1016	} while (count > 0);
				1017
				1018	return written;
				1019	}
				1020
				1021	int
				1022	iomap_zero_range(struct inode inode, loff_t pos, loff_t len, bool did_zero,
				1023	const struct iomap_ops *ops)
				1024	{
				1025	loff_t ret;
				1026
				1027	while (len > 0) {
				1028	ret = iomap_apply(inode, pos, len, IOMAP_ZERO,
				1029	ops, did_zero, iomap_zero_range_actor);
				1030	if (ret <= 0)
				1031	return ret;
				1032
				1033	pos += ret;
				1034	len -= ret;
				1035	}
				1036
				1037	return 0;
				1038	}
				1039	EXPORT_SYMBOL_GPL(iomap_zero_range);
				1040
				1041	int
				1042	iomap_truncate_page(struct inode inode, loff_t pos, bool did_zero,
				1043	const struct iomap_ops *ops)
				1044	{
				1045	unsigned int blocksize = i_blocksize(inode);
				1046	unsigned int off = pos & (blocksize - 1);
				1047
				1048	/* Block boundary? Nothing to do */
				1049	if (!off)
				1050	return 0;
				1051	return iomap_zero_range(inode, pos, blocksize - off, did_zero, ops);
				1052	}
				1053	EXPORT_SYMBOL_GPL(iomap_truncate_page);
				1054
				1055	static loff_t
				1056	iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
Goldwyn Rodrigues	c039b99	2019-10-18 16:44:10 -0700	[diff] [blame]	1057	void data, struct iomap iomap, struct iomap *srcmap)
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	1058	{
				1059	struct page *page = data;
				1060	int ret;
				1061
				1062	if (iomap->flags & IOMAP_F_BUFFER_HEAD) {
				1063	ret = __block_write_begin_int(page, pos, length, NULL, iomap);
				1064	if (ret)
				1065	return ret;
				1066	block_commit_write(page, 0, length);
				1067	} else {
				1068	WARN_ON_ONCE(!PageUptodate(page));
				1069	iomap_page_create(inode, page);
				1070	set_page_dirty(page);
				1071	}
				1072
				1073	return length;
				1074	}
				1075
				1076	vm_fault_t iomap_page_mkwrite(struct vm_fault vmf, const struct iomap_ops ops)
				1077	{
				1078	struct page *page = vmf->page;
				1079	struct inode *inode = file_inode(vmf->vma->vm_file);
				1080	unsigned long length;
Andreas Gruenbacher	243145b	2020-01-06 08:58:23 -0800	[diff] [blame]	1081	loff_t offset;
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	1082	ssize_t ret;
				1083
				1084	lock_page(page);
Andreas Gruenbacher	243145b	2020-01-06 08:58:23 -0800	[diff] [blame]	1085	ret = page_mkwrite_check_truncate(page, inode);
				1086	if (ret < 0)
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	1087	goto out_unlock;
Andreas Gruenbacher	243145b	2020-01-06 08:58:23 -0800	[diff] [blame]	1088	length = ret;
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	1089
Andreas Gruenbacher	243145b	2020-01-06 08:58:23 -0800	[diff] [blame]	1090	offset = page_offset(page);
Darrick J. Wong	afc51aa	2019-07-15 08:50:59 -0700	[diff] [blame]	1091	while (length > 0) {
				1092	ret = iomap_apply(inode, offset, length,
				1093	IOMAP_WRITE \| IOMAP_FAULT, ops, page,
				1094	iomap_page_mkwrite_actor);
				1095	if (unlikely(ret <= 0))
				1096	goto out_unlock;
				1097	offset += ret;
				1098	length -= ret;
				1099	}
				1100
				1101	wait_for_stable_page(page);
				1102	return VM_FAULT_LOCKED;
				1103	out_unlock:
				1104	unlock_page(page);
				1105	return block_page_mkwrite_return(ret);
				1106	}
				1107	EXPORT_SYMBOL_GPL(iomap_page_mkwrite);
Christoph Hellwig	598ecfb	2019-10-17 13:12:15 -0700	[diff] [blame]	1108
				1109	static void
Christoph Hellwig	48d64cd	2019-10-17 13:12:22 -0700	[diff] [blame]	1110	iomap_finish_page_writeback(struct inode inode, struct page page,
Christoph Hellwig	598ecfb	2019-10-17 13:12:15 -0700	[diff] [blame]	1111	int error)
				1112	{
Christoph Hellwig	48d64cd	2019-10-17 13:12:22 -0700	[diff] [blame]	1113	struct iomap_page *iop = to_iomap_page(page);
Christoph Hellwig	598ecfb	2019-10-17 13:12:15 -0700	[diff] [blame]	1114
				1115	if (error) {
Christoph Hellwig	48d64cd	2019-10-17 13:12:22 -0700	[diff] [blame]	1116	SetPageError(page);
Christoph Hellwig	598ecfb	2019-10-17 13:12:15 -0700	[diff] [blame]	1117	mapping_set_error(inode->i_mapping, -EIO);
				1118	}
				1119
				1120	WARN_ON_ONCE(i_blocksize(inode) < PAGE_SIZE && !iop);
				1121	WARN_ON_ONCE(iop && atomic_read(&iop->write_count) <= 0);
				1122
				1123	if (!iop \|\| atomic_dec_and_test(&iop->write_count))
Christoph Hellwig	48d64cd	2019-10-17 13:12:22 -0700	[diff] [blame]	1124	end_page_writeback(page);
Christoph Hellwig	598ecfb	2019-10-17 13:12:15 -0700	[diff] [blame]	1125	}
				1126
				1127	/*
				1128	* We're now finished for good with this ioend structure. Update the page
				1129	* state, release holds on bios, and finally free up memory. Do not use the
				1130	* ioend after this.
				1131	*/
				1132	static void
				1133	iomap_finish_ioend(struct iomap_ioend *ioend, int error)
				1134	{
				1135	struct inode *inode = ioend->io_inode;
				1136	struct bio *bio = &ioend->io_inline_bio;
				1137	struct bio last = ioend->io_bio, next;
				1138	u64 start = bio->bi_iter.bi_sector;
Zorro Lang	c275779	2019-12-04 22:59:02 -0800	[diff] [blame]	1139	loff_t offset = ioend->io_offset;
Christoph Hellwig	598ecfb	2019-10-17 13:12:15 -0700	[diff] [blame]	1140	bool quiet = bio_flagged(bio, BIO_QUIET);
				1141
				1142	for (bio = &ioend->io_inline_bio; bio; bio = next) {
				1143	struct bio_vec *bv;
				1144	struct bvec_iter_all iter_all;
				1145
				1146	/*
				1147	* For the last bio, bi_private points to the ioend, so we
				1148	* need to explicitly end the iteration here.
				1149	*/
				1150	if (bio == last)
				1151	next = NULL;
				1152	else
				1153	next = bio->bi_private;
				1154
				1155	/* walk each page on bio, ending page IO on them */
				1156	bio_for_each_segment_all(bv, bio, iter_all)
Christoph Hellwig	48d64cd	2019-10-17 13:12:22 -0700	[diff] [blame]	1157	iomap_finish_page_writeback(inode, bv->bv_page, error);
Christoph Hellwig	598ecfb	2019-10-17 13:12:15 -0700	[diff] [blame]	1158	bio_put(bio);
				1159	}
Zorro Lang	c275779	2019-12-04 22:59:02 -0800	[diff] [blame]	1160	/* The ioend has been freed by bio_put() */
Christoph Hellwig	598ecfb	2019-10-17 13:12:15 -0700	[diff] [blame]	1161
				1162	if (unlikely(error && !quiet)) {
				1163	printk_ratelimited(KERN_ERR
Darrick J. Wong	9cd0ed6	2019-10-17 14:02:07 -0700	[diff] [blame]	1164	"%s: writeback error on inode %lu, offset %lld, sector %llu",
Zorro Lang	c275779	2019-12-04 22:59:02 -0800	[diff] [blame]	1165	inode->i_sb->s_id, inode->i_ino, offset, start);
Christoph Hellwig	598ecfb	2019-10-17 13:12:15 -0700	[diff] [blame]	1166	}
				1167	}
				1168
				1169	void
				1170	iomap_finish_ioends(struct iomap_ioend *ioend, int error)
				1171	{
				1172	struct list_head tmp;
				1173
				1174	list_replace_init(&ioend->io_list, &tmp);
				1175	iomap_finish_ioend(ioend, error);
				1176
				1177	while (!list_empty(&tmp)) {
				1178	ioend = list_first_entry(&tmp, struct iomap_ioend, io_list);
				1179	list_del_init(&ioend->io_list);
				1180	iomap_finish_ioend(ioend, error);
				1181	}
				1182	}
				1183	EXPORT_SYMBOL_GPL(iomap_finish_ioends);
				1184
				1185	/*
				1186	* We can merge two adjacent ioends if they have the same set of work to do.
				1187	*/
				1188	static bool
				1189	iomap_ioend_can_merge(struct iomap_ioend ioend, struct iomap_ioend next)
				1190	{
				1191	if (ioend->io_bio->bi_status != next->io_bio->bi_status)
				1192	return false;
				1193	if ((ioend->io_flags & IOMAP_F_SHARED) ^
				1194	(next->io_flags & IOMAP_F_SHARED))
				1195	return false;
				1196	if ((ioend->io_type == IOMAP_UNWRITTEN) ^
				1197	(next->io_type == IOMAP_UNWRITTEN))
				1198	return false;
				1199	if (ioend->io_offset + ioend->io_size != next->io_offset)
				1200	return false;
				1201	return true;
				1202	}
				1203
				1204	void
				1205	iomap_ioend_try_merge(struct iomap_ioend ioend, struct list_head more_ioends,
				1206	void (merge_private)(struct iomap_ioend ioend,
				1207	struct iomap_ioend *next))
				1208	{
				1209	struct iomap_ioend *next;
				1210
				1211	INIT_LIST_HEAD(&ioend->io_list);
				1212
				1213	while ((next = list_first_entry_or_null(more_ioends, struct iomap_ioend,
				1214	io_list))) {
				1215	if (!iomap_ioend_can_merge(ioend, next))
				1216	break;
				1217	list_move_tail(&next->io_list, &ioend->io_list);
				1218	ioend->io_size += next->io_size;
				1219	if (next->io_private && merge_private)
				1220	merge_private(ioend, next);
				1221	}
				1222	}
				1223	EXPORT_SYMBOL_GPL(iomap_ioend_try_merge);
				1224
				1225	static int
				1226	iomap_ioend_compare(void priv, struct list_head a, struct list_head *b)
				1227	{
Christoph Hellwig	b3d423e	2019-10-17 13:12:20 -0700	[diff] [blame]	1228	struct iomap_ioend *ia = container_of(a, struct iomap_ioend, io_list);
				1229	struct iomap_ioend *ib = container_of(b, struct iomap_ioend, io_list);
Christoph Hellwig	598ecfb	2019-10-17 13:12:15 -0700	[diff] [blame]	1230
Christoph Hellwig	598ecfb	2019-10-17 13:12:15 -0700	[diff] [blame]	1231	if (ia->io_offset < ib->io_offset)
				1232	return -1;
Christoph Hellwig	b3d423e	2019-10-17 13:12:20 -0700	[diff] [blame]	1233	if (ia->io_offset > ib->io_offset)
Christoph Hellwig	598ecfb	2019-10-17 13:12:15 -0700	[diff] [blame]	1234	return 1;
				1235	return 0;
				1236	}
				1237
				1238	void
				1239	iomap_sort_ioends(struct list_head *ioend_list)
				1240	{
				1241	list_sort(NULL, ioend_list, iomap_ioend_compare);
				1242	}
				1243	EXPORT_SYMBOL_GPL(iomap_sort_ioends);
				1244
				1245	static void iomap_writepage_end_bio(struct bio *bio)
				1246	{
				1247	struct iomap_ioend *ioend = bio->bi_private;
				1248
				1249	iomap_finish_ioend(ioend, blk_status_to_errno(bio->bi_status));
				1250	}
				1251
				1252	/*
				1253	* Submit the final bio for an ioend.
				1254	*
				1255	* If @error is non-zero, it means that we have a situation where some part of
				1256	* the submission process has failed after we have marked paged for writeback
				1257	* and unlocked them. In this situation, we need to fail the bio instead of
				1258	* submitting it. This typically only happens on a filesystem shutdown.
				1259	*/
				1260	static int
				1261	iomap_submit_ioend(struct iomap_writepage_ctx wpc, struct iomap_ioend ioend,
				1262	int error)
				1263	{
				1264	ioend->io_bio->bi_private = ioend;
				1265	ioend->io_bio->bi_end_io = iomap_writepage_end_bio;
				1266
				1267	if (wpc->ops->prepare_ioend)
				1268	error = wpc->ops->prepare_ioend(ioend, error);
				1269	if (error) {
				1270	/*
				1271	* If we are failing the IO now, just mark the ioend with an
				1272	* error and finish it. This will run IO completion immediately
				1273	* as there is only one reference to the ioend at this point in
				1274	* time.
				1275	*/
				1276	ioend->io_bio->bi_status = errno_to_blk_status(error);
				1277	bio_endio(ioend->io_bio);
				1278	return error;
				1279	}
				1280
				1281	submit_bio(ioend->io_bio);
				1282	return 0;
				1283	}
				1284
				1285	static struct iomap_ioend *
				1286	iomap_alloc_ioend(struct inode inode, struct iomap_writepage_ctx wpc,
				1287	loff_t offset, sector_t sector, struct writeback_control *wbc)
				1288	{
				1289	struct iomap_ioend *ioend;
				1290	struct bio *bio;
				1291
				1292	bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &iomap_ioend_bioset);
				1293	bio_set_dev(bio, wpc->iomap.bdev);
				1294	bio->bi_iter.bi_sector = sector;
				1295	bio->bi_opf = REQ_OP_WRITE \| wbc_to_write_flags(wbc);
				1296	bio->bi_write_hint = inode->i_write_hint;
				1297	wbc_init_bio(wbc, bio);
				1298
				1299	ioend = container_of(bio, struct iomap_ioend, io_inline_bio);
				1300	INIT_LIST_HEAD(&ioend->io_list);
				1301	ioend->io_type = wpc->iomap.type;
				1302	ioend->io_flags = wpc->iomap.flags;
				1303	ioend->io_inode = inode;
				1304	ioend->io_size = 0;
				1305	ioend->io_offset = offset;
				1306	ioend->io_private = NULL;
				1307	ioend->io_bio = bio;
				1308	return ioend;
				1309	}
				1310
				1311	/*
				1312	* Allocate a new bio, and chain the old bio to the new one.
				1313	*
				1314	* Note that we have to do perform the chaining in this unintuitive order
				1315	* so that the bi_private linkage is set up in the right direction for the
				1316	* traversal in iomap_finish_ioend().
				1317	*/
				1318	static struct bio *
				1319	iomap_chain_bio(struct bio *prev)
				1320	{
				1321	struct bio *new;
				1322
				1323	new = bio_alloc(GFP_NOFS, BIO_MAX_PAGES);
				1324	bio_copy_dev(new, prev);/* also copies over blkcg information */
				1325	new->bi_iter.bi_sector = bio_end_sector(prev);
				1326	new->bi_opf = prev->bi_opf;
				1327	new->bi_write_hint = prev->bi_write_hint;
				1328
				1329	bio_chain(prev, new);
				1330	bio_get(prev); /* for iomap_finish_ioend */
				1331	submit_bio(prev);
				1332	return new;
				1333	}
				1334
				1335	static bool
				1336	iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t offset,
				1337	sector_t sector)
				1338	{
				1339	if ((wpc->iomap.flags & IOMAP_F_SHARED) !=
				1340	(wpc->ioend->io_flags & IOMAP_F_SHARED))
				1341	return false;
				1342	if (wpc->iomap.type != wpc->ioend->io_type)
				1343	return false;
				1344	if (offset != wpc->ioend->io_offset + wpc->ioend->io_size)
				1345	return false;
				1346	if (sector != bio_end_sector(wpc->ioend->io_bio))
				1347	return false;
				1348	return true;
				1349	}
				1350
				1351	/*
				1352	* Test to see if we have an existing ioend structure that we could append to
				1353	* first, otherwise finish off the current ioend and start another.
				1354	*/
				1355	static void
				1356	iomap_add_to_ioend(struct inode inode, loff_t offset, struct page page,
				1357	struct iomap_page iop, struct iomap_writepage_ctx wpc,
				1358	struct writeback_control wbc, struct list_head iolist)
				1359	{
				1360	sector_t sector = iomap_sector(&wpc->iomap, offset);
				1361	unsigned len = i_blocksize(inode);
				1362	unsigned poff = offset & (PAGE_SIZE - 1);
				1363	bool merged, same_page = false;
				1364
				1365	if (!wpc->ioend \|\| !iomap_can_add_to_ioend(wpc, offset, sector)) {
				1366	if (wpc->ioend)
				1367	list_add(&wpc->ioend->io_list, iolist);
				1368	wpc->ioend = iomap_alloc_ioend(inode, wpc, offset, sector, wbc);
				1369	}
				1370
				1371	merged = __bio_try_merge_page(wpc->ioend->io_bio, page, len, poff,
				1372	&same_page);
				1373	if (iop && !same_page)
				1374	atomic_inc(&iop->write_count);
				1375
				1376	if (!merged) {
				1377	if (bio_full(wpc->ioend->io_bio, len)) {
				1378	wpc->ioend->io_bio =
				1379	iomap_chain_bio(wpc->ioend->io_bio);
				1380	}
				1381	bio_add_page(wpc->ioend->io_bio, page, len, poff);
				1382	}
				1383
				1384	wpc->ioend->io_size += len;
				1385	wbc_account_cgroup_owner(wbc, page, len);
				1386	}
				1387
				1388	/*
				1389	* We implement an immediate ioend submission policy here to avoid needing to
				1390	* chain multiple ioends and hence nest mempool allocations which can violate
				1391	* forward progress guarantees we need to provide. The current ioend we are
				1392	* adding blocks to is cached on the writepage context, and if the new block
				1393	* does not append to the cached ioend it will create a new ioend and cache that
				1394	* instead.
				1395	*
				1396	* If a new ioend is created and cached, the old ioend is returned and queued
				1397	* locally for submission once the entire page is processed or an error has been
				1398	* detected. While ioends are submitted immediately after they are completed,
				1399	* batching optimisations are provided by higher level block plugging.
				1400	*
				1401	* At the end of a writeback pass, there will be a cached ioend remaining on the
				1402	* writepage context that the caller will need to submit.
				1403	*/
				1404	static int
				1405	iomap_writepage_map(struct iomap_writepage_ctx *wpc,
				1406	struct writeback_control wbc, struct inode inode,
				1407	struct page *page, u64 end_offset)
				1408	{
				1409	struct iomap_page *iop = to_iomap_page(page);
				1410	struct iomap_ioend ioend, next;
				1411	unsigned len = i_blocksize(inode);
				1412	u64 file_offset; /* file offset of page */
				1413	int error = 0, count = 0, i;
				1414	LIST_HEAD(submit_list);
				1415
				1416	WARN_ON_ONCE(i_blocksize(inode) < PAGE_SIZE && !iop);
				1417	WARN_ON_ONCE(iop && atomic_read(&iop->write_count) != 0);
				1418
				1419	/*
				1420	* Walk through the page to find areas to write back. If we run off the
				1421	* end of the current map or find the current map invalid, grab a new
				1422	* one.
				1423	*/
				1424	for (i = 0, file_offset = page_offset(page);
				1425	i < (PAGE_SIZE >> inode->i_blkbits) && file_offset < end_offset;
				1426	i++, file_offset += len) {
				1427	if (iop && !test_bit(i, iop->uptodate))
				1428	continue;
				1429
				1430	error = wpc->ops->map_blocks(wpc, inode, file_offset);
				1431	if (error)
				1432	break;
Christoph Hellwig	3e19e6f	2019-10-17 13:12:17 -0700	[diff] [blame]	1433	if (WARN_ON_ONCE(wpc->iomap.type == IOMAP_INLINE))
				1434	continue;
Christoph Hellwig	598ecfb	2019-10-17 13:12:15 -0700	[diff] [blame]	1435	if (wpc->iomap.type == IOMAP_HOLE)
				1436	continue;
				1437	iomap_add_to_ioend(inode, file_offset, page, iop, wpc, wbc,
				1438	&submit_list);
				1439	count++;
				1440	}
				1441
				1442	WARN_ON_ONCE(!wpc->ioend && !list_empty(&submit_list));
				1443	WARN_ON_ONCE(!PageLocked(page));
				1444	WARN_ON_ONCE(PageWriteback(page));
				1445
				1446	/*
				1447	* We cannot cancel the ioend directly here on error. We may have
				1448	* already set other pages under writeback and hence we have to run I/O
				1449	* completion to mark the error state of the pages under writeback
				1450	* appropriately.
				1451	*/
				1452	if (unlikely(error)) {
				1453	if (!count) {
				1454	/*
				1455	* If the current page hasn't been added to ioend, it
				1456	* won't be affected by I/O completions and we must
				1457	* discard and unlock it right here.
				1458	*/
				1459	if (wpc->ops->discard_page)
				1460	wpc->ops->discard_page(page);
				1461	ClearPageUptodate(page);
				1462	unlock_page(page);
				1463	goto done;
				1464	}
				1465
				1466	/*
				1467	* If the page was not fully cleaned, we need to ensure that the
				1468	* higher layers come back to it correctly. That means we need
				1469	* to keep the page dirty, and for WB_SYNC_ALL writeback we need
				1470	* to ensure the PAGECACHE_TAG_TOWRITE index mark is not removed
				1471	* so another attempt to write this page in this writeback sweep
				1472	* will be made.
				1473	*/
				1474	set_page_writeback_keepwrite(page);
				1475	} else {
				1476	clear_page_dirty_for_io(page);
				1477	set_page_writeback(page);
				1478	}
				1479
				1480	unlock_page(page);
				1481
				1482	/*
				1483	* Preserve the original error if there was one, otherwise catch
				1484	* submission errors here and propagate into subsequent ioend
				1485	* submissions.
				1486	*/
				1487	list_for_each_entry_safe(ioend, next, &submit_list, io_list) {
				1488	int error2;
				1489
				1490	list_del_init(&ioend->io_list);
				1491	error2 = iomap_submit_ioend(wpc, ioend, error);
				1492	if (error2 && !error)
				1493	error = error2;
				1494	}
				1495
				1496	/*
				1497	* We can end up here with no error and nothing to write only if we race
				1498	* with a partial page truncate on a sub-page block sized filesystem.
				1499	*/
				1500	if (!count)
				1501	end_page_writeback(page);
				1502	done:
				1503	mapping_set_error(page->mapping, error);
				1504	return error;
				1505	}
				1506
				1507	/*
				1508	* Write out a dirty page.
				1509	*
				1510	* For delalloc space on the page we need to allocate space and flush it.
				1511	* For unwritten space on the page we need to start the conversion to
				1512	* regular allocated space.
				1513	*/
				1514	static int
				1515	iomap_do_writepage(struct page page, struct writeback_control wbc, void *data)
				1516	{
				1517	struct iomap_writepage_ctx *wpc = data;
				1518	struct inode *inode = page->mapping->host;
				1519	pgoff_t end_index;
				1520	u64 end_offset;
				1521	loff_t offset;
				1522
Matthew Wilcox (Oracle)	1ac9945	2020-03-05 07:21:43 -0800	[diff] [blame]	1523	trace_iomap_writepage(inode, page_offset(page), PAGE_SIZE);
Christoph Hellwig	598ecfb	2019-10-17 13:12:15 -0700	[diff] [blame]	1524
				1525	/*
				1526	* Refuse to write the page out if we are called from reclaim context.
				1527	*
				1528	* This avoids stack overflows when called from deeply used stacks in
				1529	* random callers for direct reclaim or memcg reclaim. We explicitly
				1530	* allow reclaim from kswapd as the stack usage there is relatively low.
				1531	*
				1532	* This should never happen except in the case of a VM regression so
				1533	* warn about it.
				1534	*/
				1535	if (WARN_ON_ONCE((current->flags & (PF_MEMALLOC\|PF_KSWAPD)) ==
				1536	PF_MEMALLOC))
				1537	goto redirty;
				1538
				1539	/*
				1540	* Given that we do not allow direct reclaim to call us, we should
				1541	* never be called in a recursive filesystem reclaim context.
				1542	*/
				1543	if (WARN_ON_ONCE(current->flags & PF_MEMALLOC_NOFS))
				1544	goto redirty;
				1545
				1546	/*
				1547	* Is this page beyond the end of the file?
				1548	*
				1549	* The page index is less than the end_index, adjust the end_offset
				1550	* to the highest offset that this page should represent.
				1551	* -----------------------------------------------------
				1552	* \| file mapping \| <EOF> \|
				1553	* -----------------------------------------------------
				1554	* \| Page ... \| Page N-2 \| Page N-1 \| Page N \| \|
				1555	* ^--------------------------------^----------\|--------
				1556	* \| desired writeback range \| see else \|
				1557	* ---------------------------------^------------------\|
				1558	*/
				1559	offset = i_size_read(inode);
				1560	end_index = offset >> PAGE_SHIFT;
				1561	if (page->index < end_index)
				1562	end_offset = (loff_t)(page->index + 1) << PAGE_SHIFT;
				1563	else {
				1564	/*
				1565	* Check whether the page to write out is beyond or straddles
				1566	* i_size or not.
				1567	* -------------------------------------------------------
				1568	* \| file mapping \| <EOF> \|
				1569	* -------------------------------------------------------
				1570	* \| Page ... \| Page N-2 \| Page N-1 \| Page N \| Beyond \|
				1571	* ^--------------------------------^-----------\|---------
				1572	* \| \| Straddles \|
				1573	* ---------------------------------^-----------\|--------\|
				1574	*/
				1575	unsigned offset_into_page = offset & (PAGE_SIZE - 1);
				1576
				1577	/*
				1578	* Skip the page if it is fully outside i_size, e.g. due to a
				1579	* truncate operation that is in progress. We must redirty the
				1580	* page so that reclaim stops reclaiming it. Otherwise
				1581	* iomap_vm_releasepage() is called on it and gets confused.
				1582	*
				1583	* Note that the end_index is unsigned long, it would overflow
				1584	* if the given offset is greater than 16TB on 32-bit system
				1585	* and if we do check the page is fully outside i_size or not
				1586	* via "if (page->index >= end_index + 1)" as "end_index + 1"
				1587	* will be evaluated to 0. Hence this page will be redirtied
				1588	* and be written out repeatedly which would result in an
				1589	* infinite loop, the user program that perform this operation
				1590	* will hang. Instead, we can verify this situation by checking
				1591	* if the page to write is totally beyond the i_size or if it's
				1592	* offset is just equal to the EOF.
				1593	*/
				1594	if (page->index > end_index \|\|
				1595	(page->index == end_index && offset_into_page == 0))
				1596	goto redirty;
				1597
				1598	/*
				1599	* The page straddles i_size. It must be zeroed out on each
				1600	* and every writepage invocation because it may be mmapped.
				1601	* "A file is mapped in multiples of the page size. For a file
				1602	* that is not a multiple of the page size, the remaining
				1603	* memory is zeroed when mapped, and writes to that region are
				1604	* not written out to the file."
				1605	*/
				1606	zero_user_segment(page, offset_into_page, PAGE_SIZE);
				1607
				1608	/* Adjust the end_offset to the end of file */
				1609	end_offset = offset;
				1610	}
				1611
				1612	return iomap_writepage_map(wpc, wbc, inode, page, end_offset);
				1613
				1614	redirty:
				1615	redirty_page_for_writepage(wbc, page);
				1616	unlock_page(page);
				1617	return 0;
				1618	}
				1619
				1620	int
				1621	iomap_writepage(struct page page, struct writeback_control wbc,
				1622	struct iomap_writepage_ctx *wpc,
				1623	const struct iomap_writeback_ops *ops)
				1624	{
				1625	int ret;
				1626
				1627	wpc->ops = ops;
				1628	ret = iomap_do_writepage(page, wbc, wpc);
				1629	if (!wpc->ioend)
				1630	return ret;
				1631	return iomap_submit_ioend(wpc, wpc->ioend, ret);
				1632	}
				1633	EXPORT_SYMBOL_GPL(iomap_writepage);
				1634
				1635	int
				1636	iomap_writepages(struct address_space mapping, struct writeback_control wbc,
				1637	struct iomap_writepage_ctx *wpc,
				1638	const struct iomap_writeback_ops *ops)
				1639	{
				1640	int ret;
				1641
				1642	wpc->ops = ops;
				1643	ret = write_cache_pages(mapping, wbc, iomap_do_writepage, wpc);
				1644	if (!wpc->ioend)
				1645	return ret;
				1646	return iomap_submit_ioend(wpc, wpc->ioend, ret);
				1647	}
				1648	EXPORT_SYMBOL_GPL(iomap_writepages);
				1649
				1650	static int __init iomap_init(void)
				1651	{
				1652	return bioset_init(&iomap_ioend_bioset, 4 * (PAGE_SIZE / SECTOR_SIZE),
				1653	offsetof(struct iomap_ioend, io_inline_bio),
				1654	BIOSET_NEED_BVECS);
				1655	}
				1656	fs_initcall(iomap_init);