Blame - fs/ocfs2/file.c - SHIFTPHONES/mainline/linux

blob: 9e8cc4346b761e6246494416787b5b78c193f01e [file] [log] [blame]

Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1	/* -- mode: c; c-basic-offset: 8; --
				2	* vim: noexpandtab sw=8 ts=8 sts=0:
				3	*
				4	* file.c
				5	*
				6	* File open, close, extend, truncate
				7	*
				8	* Copyright (C) 2002, 2004 Oracle. All rights reserved.
				9	*
				10	* This program is free software; you can redistribute it and/or
				11	* modify it under the terms of the GNU General Public
				12	* License as published by the Free Software Foundation; either
				13	* version 2 of the License, or (at your option) any later version.
				14	*
				15	* This program is distributed in the hope that it will be useful,
				16	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				17	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				18	* General Public License for more details.
				19	*
				20	* You should have received a copy of the GNU General Public
				21	* License along with this program; if not, write to the
				22	* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
				23	* Boston, MA 021110-1307, USA.
				24	*/
				25
Randy Dunlap	16f7e0f	2006-01-11 12:17:46 -0800	[diff] [blame]	26	#include <linux/capability.h>
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	27	#include <linux/fs.h>
				28	#include <linux/types.h>
				29	#include <linux/slab.h>
				30	#include <linux/highmem.h>
				31	#include <linux/pagemap.h>
				32	#include <linux/uio.h>
Mark Fasheh	e2057c5	2006-10-03 17:53:05 -0700	[diff] [blame]	33	#include <linux/sched.h>
Jens Axboe	d6b29d7	2007-06-04 09:59:47 +0200	[diff] [blame]	34	#include <linux/splice.h>
Tiger Yang	7f1a37e	2006-11-15 15:48:42 +0800	[diff] [blame]	35	#include <linux/mount.h>
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	36	#include <linux/writeback.h>
Mark Fasheh	385820a	2007-07-19 00:14:38 -0700	[diff] [blame]	37	#include <linux/falloc.h>
Jan Kara	a90714c	2008-10-09 19:38:40 +0200	[diff] [blame]	38	#include <linux/quotaops.h>
Jan Kara	04eda1a	2010-08-05 20:32:45 +0200	[diff] [blame]	39	#include <linux/blkdev.h>
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	40
				41	#define MLOG_MASK_PREFIX ML_INODE
				42	#include <cluster/masklog.h>
				43
				44	#include "ocfs2.h"
				45
				46	#include "alloc.h"
				47	#include "aops.h"
				48	#include "dir.h"
				49	#include "dlmglue.h"
				50	#include "extent_map.h"
				51	#include "file.h"
				52	#include "sysfile.h"
				53	#include "inode.h"
Herbert Poetzl	ca4d147	2006-07-03 17:27:12 -0700	[diff] [blame]	54	#include "ioctl.h"
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	55	#include "journal.h"
Mark Fasheh	53fc622	2007-12-20 16:49:04 -0800	[diff] [blame]	56	#include "locks.h"
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	57	#include "mmap.h"
				58	#include "suballoc.h"
				59	#include "super.h"
Tiger Yang	cf1d6c7	2008-08-18 17:11:00 +0800	[diff] [blame]	60	#include "xattr.h"
Tiger Yang	23fc270	2008-11-14 11:17:18 +0800	[diff] [blame]	61	#include "acl.h"
Jan Kara	a90714c	2008-10-09 19:38:40 +0200	[diff] [blame]	62	#include "quota.h"
Tao Ma	293b2f7	2009-08-25 08:02:48 +0800	[diff] [blame]	63	#include "refcounttree.h"
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	64
				65	#include "buffer_head_io.h"
				66
Mark Fasheh	53fc622	2007-12-20 16:49:04 -0800	[diff] [blame]	67	static int ocfs2_init_file_private(struct inode inode, struct file file)
				68	{
				69	struct ocfs2_file_private *fp;
				70
				71	fp = kzalloc(sizeof(struct ocfs2_file_private), GFP_KERNEL);
				72	if (!fp)
				73	return -ENOMEM;
				74
				75	fp->fp_file = file;
				76	mutex_init(&fp->fp_mutex);
				77	ocfs2_file_lock_res_init(&fp->fp_flock, fp);
				78	file->private_data = fp;
				79
				80	return 0;
				81	}
				82
				83	static void ocfs2_free_file_private(struct inode inode, struct file file)
				84	{
				85	struct ocfs2_file_private *fp = file->private_data;
				86	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
				87
				88	if (fp) {
				89	ocfs2_simple_drop_lockres(osb, &fp->fp_flock);
				90	ocfs2_lock_res_free(&fp->fp_flock);
				91	kfree(fp);
				92	file->private_data = NULL;
				93	}
				94	}
				95
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	96	static int ocfs2_file_open(struct inode inode, struct file file)
				97	{
				98	int status;
				99	int mode = file->f_flags;
				100	struct ocfs2_inode_info *oi = OCFS2_I(inode);
				101
				102	mlog_entry("(0x%p, 0x%p, '%.*s')\n", inode, file,
Josef Sipek	d28c917	2006-12-08 02:37:25 -0800	[diff] [blame]	103	file->f_path.dentry->d_name.len, file->f_path.dentry->d_name.name);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	104
Christoph Hellwig	907f455	2010-03-03 09:05:06 -0500	[diff] [blame]	105	if (file->f_mode & FMODE_WRITE)
Christoph Hellwig	871a293	2010-03-03 09:05:07 -0500	[diff] [blame]	106	dquot_initialize(inode);
Christoph Hellwig	907f455	2010-03-03 09:05:06 -0500	[diff] [blame]	107
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	108	spin_lock(&oi->ip_lock);
				109
				110	/* Check that the inode hasn't been wiped from disk by another
				111	* node. If it hasn't then we're safe as long as we hold the
				112	* spin lock until our increment of open count. */
				113	if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) {
				114	spin_unlock(&oi->ip_lock);
				115
				116	status = -ENOENT;
				117	goto leave;
				118	}
				119
				120	if (mode & O_DIRECT)
				121	oi->ip_flags \|= OCFS2_INODE_OPEN_DIRECT;
				122
				123	oi->ip_open_count++;
				124	spin_unlock(&oi->ip_lock);
Mark Fasheh	53fc622	2007-12-20 16:49:04 -0800	[diff] [blame]	125
				126	status = ocfs2_init_file_private(inode, file);
				127	if (status) {
				128	/*
				129	* We want to set open count back if we're failing the
				130	* open.
				131	*/
				132	spin_lock(&oi->ip_lock);
				133	oi->ip_open_count--;
				134	spin_unlock(&oi->ip_lock);
				135	}
				136
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	137	leave:
				138	mlog_exit(status);
				139	return status;
				140	}
				141
				142	static int ocfs2_file_release(struct inode inode, struct file file)
				143	{
				144	struct ocfs2_inode_info *oi = OCFS2_I(inode);
				145
				146	mlog_entry("(0x%p, 0x%p, '%.*s')\n", inode, file,
Josef Sipek	d28c917	2006-12-08 02:37:25 -0800	[diff] [blame]	147	file->f_path.dentry->d_name.len,
				148	file->f_path.dentry->d_name.name);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	149
				150	spin_lock(&oi->ip_lock);
				151	if (!--oi->ip_open_count)
				152	oi->ip_flags &= ~OCFS2_INODE_OPEN_DIRECT;
				153	spin_unlock(&oi->ip_lock);
				154
Mark Fasheh	53fc622	2007-12-20 16:49:04 -0800	[diff] [blame]	155	ocfs2_free_file_private(inode, file);
				156
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	157	mlog_exit(0);
				158
				159	return 0;
				160	}
				161
Mark Fasheh	53fc622	2007-12-20 16:49:04 -0800	[diff] [blame]	162	static int ocfs2_dir_open(struct inode inode, struct file file)
				163	{
				164	return ocfs2_init_file_private(inode, file);
				165	}
				166
				167	static int ocfs2_dir_release(struct inode inode, struct file file)
				168	{
				169	ocfs2_free_file_private(inode, file);
				170	return 0;
				171	}
				172
Christoph Hellwig	7ea8085	2010-05-26 17:53:25 +0200	[diff] [blame]	173	static int ocfs2_sync_file(struct file *file, int datasync)
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	174	{
				175	int err = 0;
				176	journal_t *journal;
Christoph Hellwig	7ea8085	2010-05-26 17:53:25 +0200	[diff] [blame]	177	struct inode *inode = file->f_mapping->host;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	178	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
				179
Joel Becker	c0e1a3e	2010-09-15 16:56:54 -0700	[diff] [blame]	180	mlog_entry("(0x%p, %d, 0x%p, '%.*s')\n", file, datasync,
				181	file->f_path.dentry, file->f_path.dentry->d_name.len,
				182	file->f_path.dentry->d_name.name);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	183
Jan Kara	04eda1a	2010-08-05 20:32:45 +0200	[diff] [blame]	184	if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) {
				185	/*
				186	* We still have to flush drive's caches to get data to the
				187	* platter
				188	*/
				189	if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER)
				190	blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL,
				191	NULL, BLKDEV_IFL_WAIT);
Hisashi Hifumi	e04cc15	2009-06-09 16:47:45 +0900	[diff] [blame]	192	goto bail;
Jan Kara	04eda1a	2010-08-05 20:32:45 +0200	[diff] [blame]	193	}
Hisashi Hifumi	e04cc15	2009-06-09 16:47:45 +0900	[diff] [blame]	194
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	195	journal = osb->journal->j_journal;
Joel Becker	2b4e30f	2008-09-03 20:03:41 -0700	[diff] [blame]	196	err = jbd2_journal_force_commit(journal);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	197
				198	bail:
				199	mlog_exit(err);
				200
				201	return (err < 0) ? -EIO : 0;
				202	}
				203
Tiger Yang	7f1a37e	2006-11-15 15:48:42 +0800	[diff] [blame]	204	int ocfs2_should_update_atime(struct inode *inode,
				205	struct vfsmount *vfsmnt)
				206	{
				207	struct timespec now;
				208	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
				209
				210	if (ocfs2_is_hard_readonly(osb) \|\| ocfs2_is_soft_readonly(osb))
				211	return 0;
				212
				213	if ((inode->i_flags & S_NOATIME) \|\|
				214	((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)))
				215	return 0;
				216
Mark Fasheh	6c2aad0	2006-12-19 15:25:52 -0800	[diff] [blame]	217	/*
				218	* We can be called with no vfsmnt structure - NFSD will
				219	* sometimes do this.
				220	*
				221	* Note that our action here is different than touch_atime() -
				222	* if we can't tell whether this is a noatime mount, then we
				223	* don't know whether to trust the value of s_atime_quantum.
				224	*/
				225	if (vfsmnt == NULL)
				226	return 0;
				227
Tiger Yang	7f1a37e	2006-11-15 15:48:42 +0800	[diff] [blame]	228	if ((vfsmnt->mnt_flags & MNT_NOATIME) \|\|
				229	((vfsmnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)))
				230	return 0;
				231
Mark Fasheh	7e913c5	2006-12-13 00:34:35 -0800	[diff] [blame]	232	if (vfsmnt->mnt_flags & MNT_RELATIME) {
				233	if ((timespec_compare(&inode->i_atime, &inode->i_mtime) <= 0) \|\|
				234	(timespec_compare(&inode->i_atime, &inode->i_ctime) <= 0))
				235	return 1;
				236
				237	return 0;
				238	}
				239
Tiger Yang	7f1a37e	2006-11-15 15:48:42 +0800	[diff] [blame]	240	now = CURRENT_TIME;
				241	if ((now.tv_sec - inode->i_atime.tv_sec <= osb->s_atime_quantum))
				242	return 0;
				243	else
				244	return 1;
				245	}
				246
				247	int ocfs2_update_inode_atime(struct inode *inode,
				248	struct buffer_head *bh)
				249	{
				250	int ret;
				251	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
				252	handle_t *handle;
Mark Fasheh	c11e9fa	2007-07-20 11:24:53 -0700	[diff] [blame]	253	struct ocfs2_dinode di = (struct ocfs2_dinode ) bh->b_data;
Tiger Yang	7f1a37e	2006-11-15 15:48:42 +0800	[diff] [blame]	254
				255	mlog_entry_void();
				256
				257	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
Jan Kara	fa38e92	2008-10-20 19:23:51 +0200	[diff] [blame]	258	if (IS_ERR(handle)) {
				259	ret = PTR_ERR(handle);
Tiger Yang	7f1a37e	2006-11-15 15:48:42 +0800	[diff] [blame]	260	mlog_errno(ret);
				261	goto out;
				262	}
				263
Joel Becker	0cf2f76	2009-02-12 16:41:25 -0800	[diff] [blame]	264	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh,
Joel Becker	13723d0	2008-10-17 19:25:01 -0700	[diff] [blame]	265	OCFS2_JOURNAL_ACCESS_WRITE);
Mark Fasheh	c11e9fa	2007-07-20 11:24:53 -0700	[diff] [blame]	266	if (ret) {
				267	mlog_errno(ret);
				268	goto out_commit;
				269	}
				270
				271	/*
				272	* Don't use ocfs2_mark_inode_dirty() here as we don't always
				273	* have i_mutex to guard against concurrent changes to other
				274	* inode fields.
				275	*/
Tiger Yang	7f1a37e	2006-11-15 15:48:42 +0800	[diff] [blame]	276	inode->i_atime = CURRENT_TIME;
Mark Fasheh	c11e9fa	2007-07-20 11:24:53 -0700	[diff] [blame]	277	di->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
				278	di->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec);
Joel Becker	ec20cec	2010-03-19 14:13:52 -0700	[diff] [blame]	279	ocfs2_journal_dirty(handle, bh);
Tiger Yang	7f1a37e	2006-11-15 15:48:42 +0800	[diff] [blame]	280
Mark Fasheh	c11e9fa	2007-07-20 11:24:53 -0700	[diff] [blame]	281	out_commit:
Tiger Yang	7f1a37e	2006-11-15 15:48:42 +0800	[diff] [blame]	282	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
				283	out:
				284	mlog_exit(ret);
				285	return ret;
				286	}
				287
Adrian Bunk	6cb129f	2007-04-26 00:29:35 -0700	[diff] [blame]	288	static int ocfs2_set_inode_size(handle_t *handle,
				289	struct inode *inode,
				290	struct buffer_head *fe_bh,
				291	u64 new_i_size)
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	292	{
				293	int status;
				294
				295	mlog_entry_void();
				296	i_size_write(inode, new_i_size);
Mark Fasheh	8110b07	2007-03-22 16:53:23 -0700	[diff] [blame]	297	inode->i_blocks = ocfs2_inode_sector_count(inode);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	298	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
				299
				300	status = ocfs2_mark_inode_dirty(handle, inode, fe_bh);
				301	if (status < 0) {
				302	mlog_errno(status);
				303	goto bail;
				304	}
				305
				306	bail:
				307	mlog_exit(status);
				308	return status;
				309	}
				310
Jan Kara	9e33d69	2008-08-25 19:56:50 +0200	[diff] [blame]	311	int ocfs2_simple_size_update(struct inode *inode,
				312	struct buffer_head *di_bh,
				313	u64 new_i_size)
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	314	{
				315	int ret;
				316	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
Mark Fasheh	1fabe14	2006-10-09 18:11:45 -0700	[diff] [blame]	317	handle_t *handle = NULL;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	318
Mark Fasheh	65eff9c	2006-10-09 17:26:22 -0700	[diff] [blame]	319	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
Jan Kara	fa38e92	2008-10-20 19:23:51 +0200	[diff] [blame]	320	if (IS_ERR(handle)) {
				321	ret = PTR_ERR(handle);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	322	mlog_errno(ret);
				323	goto out;
				324	}
				325
				326	ret = ocfs2_set_inode_size(handle, inode, di_bh,
				327	new_i_size);
				328	if (ret < 0)
				329	mlog_errno(ret);
				330
Mark Fasheh	02dc1af	2006-10-09 16:48:10 -0700	[diff] [blame]	331	ocfs2_commit_trans(osb, handle);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	332	out:
				333	return ret;
				334	}
				335
Tao Ma	37f8a2b	2009-08-26 09:47:28 +0800	[diff] [blame]	336	static int ocfs2_cow_file_pos(struct inode *inode,
				337	struct buffer_head *fe_bh,
				338	u64 offset)
				339	{
				340	int status;
				341	u32 phys, cpos = offset >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
				342	unsigned int num_clusters = 0;
				343	unsigned int ext_flags = 0;
				344
				345	/*
				346	* If the new offset is aligned to the range of the cluster, there is
				347	* no space for ocfs2_zero_range_for_truncate to fill, so no need to
				348	* CoW either.
				349	*/
				350	if ((offset & (OCFS2_SB(inode->i_sb)->s_clustersize - 1)) == 0)
				351	return 0;
				352
				353	status = ocfs2_get_clusters(inode, cpos, &phys,
				354	&num_clusters, &ext_flags);
				355	if (status) {
				356	mlog_errno(status);
				357	goto out;
				358	}
				359
				360	if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
				361	goto out;
				362
Tao Ma	1550271	2010-08-12 10:36:38 +0800	[diff] [blame]	363	return ocfs2_refcount_cow(inode, NULL, fe_bh, cpos, 1, cpos+1);
Tao Ma	37f8a2b	2009-08-26 09:47:28 +0800	[diff] [blame]	364
				365	out:
				366	return status;
				367	}
				368
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	369	static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
				370	struct inode *inode,
				371	struct buffer_head *fe_bh,
				372	u64 new_i_size)
				373	{
				374	int status;
Mark Fasheh	1fabe14	2006-10-09 18:11:45 -0700	[diff] [blame]	375	handle_t *handle;
Mark Fasheh	60b1139	2007-02-16 11:46:50 -0800	[diff] [blame]	376	struct ocfs2_dinode *di;
Mark Fasheh	35edec1	2007-07-06 14:41:18 -0700	[diff] [blame]	377	u64 cluster_bytes;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	378
				379	mlog_entry_void();
				380
Tao Ma	37f8a2b	2009-08-26 09:47:28 +0800	[diff] [blame]	381	/*
				382	* We need to CoW the cluster contains the offset if it is reflinked
				383	* since we will call ocfs2_zero_range_for_truncate later which will
				384	* write "0" from offset to the end of the cluster.
				385	*/
				386	status = ocfs2_cow_file_pos(inode, fe_bh, new_i_size);
				387	if (status) {
				388	mlog_errno(status);
				389	return status;
				390	}
				391
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	392	/* TODO: This needs to actually orphan the inode in this
				393	* transaction. */
				394
Mark Fasheh	65eff9c	2006-10-09 17:26:22 -0700	[diff] [blame]	395	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	396	if (IS_ERR(handle)) {
				397	status = PTR_ERR(handle);
				398	mlog_errno(status);
				399	goto out;
				400	}
				401
Joel Becker	0cf2f76	2009-02-12 16:41:25 -0800	[diff] [blame]	402	status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), fe_bh,
Joel Becker	13723d0	2008-10-17 19:25:01 -0700	[diff] [blame]	403	OCFS2_JOURNAL_ACCESS_WRITE);
Mark Fasheh	60b1139	2007-02-16 11:46:50 -0800	[diff] [blame]	404	if (status < 0) {
				405	mlog_errno(status);
				406	goto out_commit;
				407	}
				408
				409	/*
				410	* Do this before setting i_size.
				411	*/
Mark Fasheh	35edec1	2007-07-06 14:41:18 -0700	[diff] [blame]	412	cluster_bytes = ocfs2_align_bytes_to_clusters(inode->i_sb, new_i_size);
				413	status = ocfs2_zero_range_for_truncate(inode, handle, new_i_size,
				414	cluster_bytes);
Mark Fasheh	60b1139	2007-02-16 11:46:50 -0800	[diff] [blame]	415	if (status) {
				416	mlog_errno(status);
				417	goto out_commit;
				418	}
				419
				420	i_size_write(inode, new_i_size);
Mark Fasheh	60b1139	2007-02-16 11:46:50 -0800	[diff] [blame]	421	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
				422
				423	di = (struct ocfs2_dinode *) fe_bh->b_data;
				424	di->i_size = cpu_to_le64(new_i_size);
				425	di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec);
				426	di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
				427
Joel Becker	ec20cec	2010-03-19 14:13:52 -0700	[diff] [blame]	428	ocfs2_journal_dirty(handle, fe_bh);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	429
Mark Fasheh	60b1139	2007-02-16 11:46:50 -0800	[diff] [blame]	430	out_commit:
Mark Fasheh	02dc1af	2006-10-09 16:48:10 -0700	[diff] [blame]	431	ocfs2_commit_trans(osb, handle);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	432	out:
Mark Fasheh	60b1139	2007-02-16 11:46:50 -0800	[diff] [blame]	433
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	434	mlog_exit(status);
				435	return status;
				436	}
				437
				438	static int ocfs2_truncate_file(struct inode *inode,
				439	struct buffer_head *di_bh,
				440	u64 new_i_size)
				441	{
				442	int status = 0;
				443	struct ocfs2_dinode *fe = NULL;
				444	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	445
Mark Fasheh	b0697053	2006-03-03 10:24:33 -0800	[diff] [blame]	446	mlog_entry("(inode = %llu, new_i_size = %llu\n",
				447	(unsigned long long)OCFS2_I(inode)->ip_blkno,
				448	(unsigned long long)new_i_size);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	449
Joel Becker	b657c95	2008-11-13 14:49:11 -0800	[diff] [blame]	450	/* We trust di_bh because it comes from ocfs2_inode_lock(), which
				451	* already validated it */
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	452	fe = (struct ocfs2_dinode *) di_bh->b_data;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	453
				454	mlog_bug_on_msg(le64_to_cpu(fe->i_size) != i_size_read(inode),
Mark Fasheh	b0697053	2006-03-03 10:24:33 -0800	[diff] [blame]	455	"Inode %llu, inode i_size = %lld != di "
				456	"i_size = %llu, i_flags = 0x%x\n",
				457	(unsigned long long)OCFS2_I(inode)->ip_blkno,
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	458	i_size_read(inode),
Mark Fasheh	b0697053	2006-03-03 10:24:33 -0800	[diff] [blame]	459	(unsigned long long)le64_to_cpu(fe->i_size),
				460	le32_to_cpu(fe->i_flags));
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	461
				462	if (new_i_size > le64_to_cpu(fe->i_size)) {
Mark Fasheh	b0697053	2006-03-03 10:24:33 -0800	[diff] [blame]	463	mlog(0, "asked to truncate file with size (%llu) to size (%llu)!\n",
				464	(unsigned long long)le64_to_cpu(fe->i_size),
				465	(unsigned long long)new_i_size);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	466	status = -EINVAL;
				467	mlog_errno(status);
				468	goto bail;
				469	}
				470
Mark Fasheh	b0697053	2006-03-03 10:24:33 -0800	[diff] [blame]	471	mlog(0, "inode %llu, i_size = %llu, new_i_size = %llu\n",
				472	(unsigned long long)le64_to_cpu(fe->i_blkno),
				473	(unsigned long long)le64_to_cpu(fe->i_size),
				474	(unsigned long long)new_i_size);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	475
				476	/* lets handle the simple truncate cases before doing any more
				477	* cluster locking. */
				478	if (new_i_size == le64_to_cpu(fe->i_size))
				479	goto bail;
				480
Mark Fasheh	2e89b2e	2007-05-09 13:40:18 -0700	[diff] [blame]	481	down_write(&OCFS2_I(inode)->ip_alloc_sem);
				482
Mark Fasheh	4fe370a	2009-12-07 13:15:40 -0800	[diff] [blame]	483	ocfs2_resv_discard(&osb->osb_la_resmap,
				484	&OCFS2_I(inode)->ip_la_data_resv);
				485
Mark Fasheh	c934a92	2007-10-18 15:23:46 -0700	[diff] [blame]	486	/*
				487	* The inode lock forced other nodes to sync and drop their
				488	* pages, which (correctly) happens even if we have a truncate
				489	* without allocation change - ocfs2 cluster sizes can be much
				490	* greater than page size, so we have to truncate them
				491	* anyway.
				492	*/
Mark Fasheh	2e89b2e	2007-05-09 13:40:18 -0700	[diff] [blame]	493	unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1);
				494	truncate_inode_pages(inode->i_mapping, new_i_size);
				495
Mark Fasheh	1afc32b	2007-09-07 14:46:51 -0700	[diff] [blame]	496	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
				497	status = ocfs2_truncate_inline(inode, di_bh, new_i_size,
Mark Fasheh	b1967d0	2007-11-20 11:56:39 -0800	[diff] [blame]	498	i_size_read(inode), 1);
Mark Fasheh	1afc32b	2007-09-07 14:46:51 -0700	[diff] [blame]	499	if (status)
				500	mlog_errno(status);
				501
Mark Fasheh	c934a92	2007-10-18 15:23:46 -0700	[diff] [blame]	502	goto bail_unlock_sem;
Mark Fasheh	1afc32b	2007-09-07 14:46:51 -0700	[diff] [blame]	503	}
				504
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	505	/* alright, we're going to need to do a full blown alloc size
				506	* change. Orphan the inode so that recovery can complete the
				507	* truncate if necessary. This does the task of marking
				508	* i_size. */
				509	status = ocfs2_orphan_for_truncate(osb, inode, di_bh, new_i_size);
				510	if (status < 0) {
				511	mlog_errno(status);
Mark Fasheh	c934a92	2007-10-18 15:23:46 -0700	[diff] [blame]	512	goto bail_unlock_sem;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	513	}
				514
Tristan Ye	78f9467	2010-05-11 17:54:42 +0800	[diff] [blame]	515	status = ocfs2_commit_truncate(osb, inode, di_bh);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	516	if (status < 0) {
				517	mlog_errno(status);
Mark Fasheh	c934a92	2007-10-18 15:23:46 -0700	[diff] [blame]	518	goto bail_unlock_sem;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	519	}
				520
				521	/* TODO: orphan dir cleanup here. */
Mark Fasheh	c934a92	2007-10-18 15:23:46 -0700	[diff] [blame]	522	bail_unlock_sem:
Mark Fasheh	2e89b2e	2007-05-09 13:40:18 -0700	[diff] [blame]	523	up_write(&OCFS2_I(inode)->ip_alloc_sem);
				524
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	525	bail:
Tao Ma	8b2c0db	2009-08-18 11:43:49 +0800	[diff] [blame]	526	if (!status && OCFS2_I(inode)->ip_clusters == 0)
				527	status = ocfs2_try_remove_refcount_tree(inode, di_bh);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	528
				529	mlog_exit(status);
				530	return status;
				531	}
				532
				533	/*
Tao Ma	0eb8d47e	2008-08-18 17:38:45 +0800	[diff] [blame]	534	* extend file allocation only here.
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	535	* we'll update all the disk stuff, and oip->alloc_size
				536	*
				537	* expect stuff to be locked, a transaction started and enough data /
				538	* metadata reservations in the contexts.
				539	*
				540	* Will return -EAGAIN, and a reason if a restart is needed.
				541	* If passed in, *reason will always be set, even in error.
				542	*/
Tao Ma	0eb8d47e	2008-08-18 17:38:45 +0800	[diff] [blame]	543	int ocfs2_add_inode_data(struct ocfs2_super *osb,
				544	struct inode *inode,
				545	u32 *logical_offset,
				546	u32 clusters_to_add,
				547	int mark_unwritten,
				548	struct buffer_head *fe_bh,
				549	handle_t *handle,
				550	struct ocfs2_alloc_context *data_ac,
				551	struct ocfs2_alloc_context *meta_ac,
				552	enum ocfs2_alloc_restarted *reason_ret)
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	553	{
Joel Becker	f99b9b7	2008-08-20 19:36:33 -0700	[diff] [blame]	554	int ret;
				555	struct ocfs2_extent_tree et;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	556
Joel Becker	5e404e9	2009-02-13 03:54:22 -0800	[diff] [blame]	557	ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), fe_bh);
Joel Becker	cbee7e1	2009-02-13 03:34:15 -0800	[diff] [blame]	558	ret = ocfs2_add_clusters_in_btree(handle, &et, logical_offset,
				559	clusters_to_add, mark_unwritten,
				560	data_ac, meta_ac, reason_ret);
Joel Becker	f99b9b7	2008-08-20 19:36:33 -0700	[diff] [blame]	561
				562	return ret;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	563	}
				564
Mark Fasheh	2ae99a6	2007-03-09 16:43:28 -0800	[diff] [blame]	565	static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
				566	u32 clusters_to_add, int mark_unwritten)
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	567	{
				568	int status = 0;
				569	int restart_func = 0;
Mark Fasheh	abf8b15	2007-01-17 13:07:24 -0800	[diff] [blame]	570	int credits;
Mark Fasheh	2ae99a6	2007-03-09 16:43:28 -0800	[diff] [blame]	571	u32 prev_clusters;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	572	struct buffer_head *bh = NULL;
				573	struct ocfs2_dinode *fe = NULL;
Mark Fasheh	1fabe14	2006-10-09 18:11:45 -0700	[diff] [blame]	574	handle_t *handle = NULL;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	575	struct ocfs2_alloc_context *data_ac = NULL;
				576	struct ocfs2_alloc_context *meta_ac = NULL;
				577	enum ocfs2_alloc_restarted why;
				578	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
Joel Becker	f99b9b7	2008-08-20 19:36:33 -0700	[diff] [blame]	579	struct ocfs2_extent_tree et;
Jan Kara	a90714c	2008-10-09 19:38:40 +0200	[diff] [blame]	580	int did_quota = 0;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	581
				582	mlog_entry("(clusters_to_add = %u)\n", clusters_to_add);
				583
Mark Fasheh	dcd0538	2007-01-16 11:32:23 -0800	[diff] [blame]	584	/*
				585	* This function only exists for file systems which don't
				586	* support holes.
				587	*/
Mark Fasheh	2ae99a6	2007-03-09 16:43:28 -0800	[diff] [blame]	588	BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb));
Mark Fasheh	dcd0538	2007-01-16 11:32:23 -0800	[diff] [blame]	589
Joel Becker	b657c95	2008-11-13 14:49:11 -0800	[diff] [blame]	590	status = ocfs2_read_inode_block(inode, &bh);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	591	if (status < 0) {
				592	mlog_errno(status);
				593	goto leave;
				594	}
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	595	fe = (struct ocfs2_dinode *) bh->b_data;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	596
				597	restart_all:
				598	BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters);
				599
Tao Ma	e7d4cb6	2008-08-18 17:38:44 +0800	[diff] [blame]	600	mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, "
				601	"clusters_to_add = %u\n",
				602	(unsigned long long)OCFS2_I(inode)->ip_blkno,
				603	(long long)i_size_read(inode), le32_to_cpu(fe->i_clusters),
				604	clusters_to_add);
Joel Becker	5e404e9	2009-02-13 03:54:22 -0800	[diff] [blame]	605	ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), bh);
Joel Becker	f99b9b7	2008-08-20 19:36:33 -0700	[diff] [blame]	606	status = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0,
				607	&data_ac, &meta_ac);
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	608	if (status) {
				609	mlog_errno(status);
				610	goto leave;
				611	}
				612
Tao Ma	811f933	2008-08-18 17:38:43 +0800	[diff] [blame]	613	credits = ocfs2_calc_extend_credits(osb->sb, &fe->id2.i_list,
				614	clusters_to_add);
Mark Fasheh	65eff9c	2006-10-09 17:26:22 -0700	[diff] [blame]	615	handle = ocfs2_start_trans(osb, credits);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	616	if (IS_ERR(handle)) {
				617	status = PTR_ERR(handle);
				618	handle = NULL;
				619	mlog_errno(status);
				620	goto leave;
				621	}
				622
				623	restarted_transaction:
Christoph Hellwig	5dd4056	2010-03-03 09:05:00 -0500	[diff] [blame]	624	status = dquot_alloc_space_nodirty(inode,
				625	ocfs2_clusters_to_bytes(osb->sb, clusters_to_add));
				626	if (status)
Jan Kara	a90714c	2008-10-09 19:38:40 +0200	[diff] [blame]	627	goto leave;
Jan Kara	a90714c	2008-10-09 19:38:40 +0200	[diff] [blame]	628	did_quota = 1;
				629
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	630	/* reserve a write to the file entry early on - that we if we
				631	* run out of credits in the allocation path, we can still
				632	* update i_size. */
Joel Becker	0cf2f76	2009-02-12 16:41:25 -0800	[diff] [blame]	633	status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh,
Joel Becker	13723d0	2008-10-17 19:25:01 -0700	[diff] [blame]	634	OCFS2_JOURNAL_ACCESS_WRITE);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	635	if (status < 0) {
				636	mlog_errno(status);
				637	goto leave;
				638	}
				639
				640	prev_clusters = OCFS2_I(inode)->ip_clusters;
				641
Tao Ma	0eb8d47e	2008-08-18 17:38:45 +0800	[diff] [blame]	642	status = ocfs2_add_inode_data(osb,
				643	inode,
				644	&logical_start,
				645	clusters_to_add,
				646	mark_unwritten,
				647	bh,
				648	handle,
				649	data_ac,
				650	meta_ac,
				651	&why);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	652	if ((status < 0) && (status != -EAGAIN)) {
				653	if (status != -ENOSPC)
				654	mlog_errno(status);
				655	goto leave;
				656	}
				657
Joel Becker	ec20cec	2010-03-19 14:13:52 -0700	[diff] [blame]	658	ocfs2_journal_dirty(handle, bh);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	659
				660	spin_lock(&OCFS2_I(inode)->ip_lock);
				661	clusters_to_add -= (OCFS2_I(inode)->ip_clusters - prev_clusters);
				662	spin_unlock(&OCFS2_I(inode)->ip_lock);
Jan Kara	a90714c	2008-10-09 19:38:40 +0200	[diff] [blame]	663	/* Release unused quota reservation */
Christoph Hellwig	5dd4056	2010-03-03 09:05:00 -0500	[diff] [blame]	664	dquot_free_space(inode,
Jan Kara	a90714c	2008-10-09 19:38:40 +0200	[diff] [blame]	665	ocfs2_clusters_to_bytes(osb->sb, clusters_to_add));
				666	did_quota = 0;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	667
				668	if (why != RESTART_NONE && clusters_to_add) {
				669	if (why == RESTART_META) {
				670	mlog(0, "restarting function.\n");
				671	restart_func = 1;
Tao Ma	7968184	2010-04-16 13:59:25 +0800	[diff] [blame]	672	status = 0;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	673	} else {
				674	BUG_ON(why != RESTART_TRANS);
				675
				676	mlog(0, "restarting transaction.\n");
				677	/* TODO: This can be more intelligent. */
				678	credits = ocfs2_calc_extend_credits(osb->sb,
Tao Ma	811f933	2008-08-18 17:38:43 +0800	[diff] [blame]	679	&fe->id2.i_list,
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	680	clusters_to_add);
Mark Fasheh	1fabe14	2006-10-09 18:11:45 -0700	[diff] [blame]	681	status = ocfs2_extend_trans(handle, credits);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	682	if (status < 0) {
				683	/* handle still has to be committed at
				684	* this point. */
				685	status = -ENOMEM;
				686	mlog_errno(status);
				687	goto leave;
				688	}
				689	goto restarted_transaction;
				690	}
				691	}
				692
Mark Fasheh	b0697053	2006-03-03 10:24:33 -0800	[diff] [blame]	693	mlog(0, "fe: i_clusters = %u, i_size=%llu\n",
Mark Fasheh	1ca1a11	2007-04-27 16:01:25 -0700	[diff] [blame]	694	le32_to_cpu(fe->i_clusters),
				695	(unsigned long long)le64_to_cpu(fe->i_size));
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	696	mlog(0, "inode: ip_clusters=%u, i_size=%lld\n",
Jan Kara	634bf74	2007-12-19 15:25:42 +0100	[diff] [blame]	697	OCFS2_I(inode)->ip_clusters, (long long)i_size_read(inode));
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	698
				699	leave:
Jan Kara	a90714c	2008-10-09 19:38:40 +0200	[diff] [blame]	700	if (status < 0 && did_quota)
Christoph Hellwig	5dd4056	2010-03-03 09:05:00 -0500	[diff] [blame]	701	dquot_free_space(inode,
Jan Kara	a90714c	2008-10-09 19:38:40 +0200	[diff] [blame]	702	ocfs2_clusters_to_bytes(osb->sb, clusters_to_add));
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	703	if (handle) {
Mark Fasheh	02dc1af	2006-10-09 16:48:10 -0700	[diff] [blame]	704	ocfs2_commit_trans(osb, handle);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	705	handle = NULL;
				706	}
				707	if (data_ac) {
				708	ocfs2_free_alloc_context(data_ac);
				709	data_ac = NULL;
				710	}
				711	if (meta_ac) {
				712	ocfs2_free_alloc_context(meta_ac);
				713	meta_ac = NULL;
				714	}
				715	if ((!status) && restart_func) {
				716	restart_func = 0;
				717	goto restart_all;
				718	}
Mark Fasheh	a81cb88	2008-10-07 14:25:16 -0700	[diff] [blame]	719	brelse(bh);
				720	bh = NULL;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	721
				722	mlog_exit(status);
				723	return status;
				724	}
				725
Joel Becker	a4bfb4c	2010-07-06 14:36:06 -0700	[diff] [blame]	726	/*
				727	* While a write will already be ordering the data, a truncate will not.
				728	* Thus, we need to explicitly order the zeroed pages.
				729	*/
				730	static handle_t ocfs2_zero_start_ordered_transaction(struct inode inode)
				731	{
				732	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
				733	handle_t *handle = NULL;
				734	int ret = 0;
				735
				736	if (!ocfs2_should_order_data(inode))
				737	goto out;
				738
				739	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
				740	if (IS_ERR(handle)) {
				741	ret = -ENOMEM;
				742	mlog_errno(ret);
				743	goto out;
				744	}
				745
				746	ret = ocfs2_jbd2_file_inode(handle, inode);
				747	if (ret < 0)
				748	mlog_errno(ret);
				749
				750	out:
				751	if (ret) {
				752	if (!IS_ERR(handle))
				753	ocfs2_commit_trans(osb, handle);
				754	handle = ERR_PTR(ret);
				755	}
				756	return handle;
				757	}
				758
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	759	/* Some parts of this taken from generic_cont_expand, which turned out
				760	* to be too fragile to do exactly what we need without us having to
Nick Piggin	4e02ed4	2008-10-29 14:00:55 -0700	[diff] [blame]	761	* worry about recursive locking in ->write_begin() and ->write_end(). */
Joel Becker	a4bfb4c	2010-07-06 14:36:06 -0700	[diff] [blame]	762	static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
				763	u64 abs_to)
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	764	{
				765	struct address_space *mapping = inode->i_mapping;
				766	struct page *page;
Joel Becker	a4bfb4c	2010-07-06 14:36:06 -0700	[diff] [blame]	767	unsigned long index = abs_from >> PAGE_CACHE_SHIFT;
Mark Fasheh	1fabe14	2006-10-09 18:11:45 -0700	[diff] [blame]	768	handle_t *handle = NULL;
Joel Becker	5453258	2010-07-16 13:32:33 -0700	[diff] [blame]	769	int ret = 0;
Joel Becker	a4bfb4c	2010-07-06 14:36:06 -0700	[diff] [blame]	770	unsigned zero_from, zero_to, block_start, block_end;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	771
Joel Becker	a4bfb4c	2010-07-06 14:36:06 -0700	[diff] [blame]	772	BUG_ON(abs_from >= abs_to);
				773	BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT));
				774	BUG_ON(abs_from & (inode->i_blkbits - 1));
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	775
Jan Kara	9b4c0ff	2010-08-24 14:28:03 +0200	[diff] [blame]	776	page = find_or_create_page(mapping, index, GFP_NOFS);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	777	if (!page) {
				778	ret = -ENOMEM;
				779	mlog_errno(ret);
				780	goto out;
				781	}
				782
Joel Becker	a4bfb4c	2010-07-06 14:36:06 -0700	[diff] [blame]	783	/* Get the offsets within the page that we want to zero */
				784	zero_from = abs_from & (PAGE_CACHE_SIZE - 1);
				785	zero_to = abs_to & (PAGE_CACHE_SIZE - 1);
				786	if (!zero_to)
				787	zero_to = PAGE_CACHE_SIZE;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	788
Joel Becker	5693486	2010-07-01 15:13:31 -0700	[diff] [blame]	789	mlog(0,
				790	"abs_from = %llu, abs_to = %llu, index = %lu, zero_from = %u, zero_to = %u\n",
				791	(unsigned long long)abs_from, (unsigned long long)abs_to,
				792	index, zero_from, zero_to);
				793
Joel Becker	a4bfb4c	2010-07-06 14:36:06 -0700	[diff] [blame]	794	/* We know that zero_from is block aligned */
				795	for (block_start = zero_from; block_start < zero_to;
				796	block_start = block_end) {
				797	block_end = block_start + (1 << inode->i_blkbits);
				798
				799	/*
				800	* block_start is block-aligned. Bump it by one to
				801	* force ocfs2_{prepare,commit}_write() to zero the
				802	* whole block.
				803	*/
				804	ret = ocfs2_prepare_write_nolock(inode, page,
				805	block_start + 1,
				806	block_start + 1);
				807	if (ret < 0) {
				808	mlog_errno(ret);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	809	goto out_unlock;
				810	}
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	811
Joel Becker	a4bfb4c	2010-07-06 14:36:06 -0700	[diff] [blame]	812	if (!handle) {
				813	handle = ocfs2_zero_start_ordered_transaction(inode);
				814	if (IS_ERR(handle)) {
				815	ret = PTR_ERR(handle);
				816	handle = NULL;
				817	break;
				818	}
				819	}
				820
				821	/* must not update i_size! */
				822	ret = block_commit_write(page, block_start + 1,
				823	block_start + 1);
				824	if (ret < 0)
				825	mlog_errno(ret);
				826	else
				827	ret = 0;
				828	}
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	829
				830	if (handle)
Mark Fasheh	02dc1af	2006-10-09 16:48:10 -0700	[diff] [blame]	831	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
Joel Becker	a4bfb4c	2010-07-06 14:36:06 -0700	[diff] [blame]	832
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	833	out_unlock:
				834	unlock_page(page);
				835	page_cache_release(page);
				836	out:
				837	return ret;
				838	}
				839
Joel Becker	5693486	2010-07-01 15:13:31 -0700	[diff] [blame]	840	/*
				841	* Find the next range to zero. We do this in terms of bytes because
				842	* that's what ocfs2_zero_extend() wants, and it is dealing with the
				843	* pagecache. We may return multiple extents.
				844	*
				845	* zero_start and zero_end are ocfs2_zero_extend()s current idea of what
				846	* needs to be zeroed. range_start and range_end return the next zeroing
				847	* range. A subsequent call should pass the previous range_end as its
				848	* zero_start. If range_end is 0, there's nothing to do.
				849	*
				850	* Unwritten extents are skipped over. Refcounted extents are CoWd.
				851	*/
				852	static int ocfs2_zero_extend_get_range(struct inode *inode,
				853	struct buffer_head *di_bh,
				854	u64 zero_start, u64 zero_end,
				855	u64 range_start, u64 range_end)
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	856	{
Joel Becker	5693486	2010-07-01 15:13:31 -0700	[diff] [blame]	857	int rc = 0, needs_cow = 0;
				858	u32 p_cpos, zero_clusters = 0;
				859	u32 zero_cpos =
				860	zero_start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
				861	u32 last_cpos = ocfs2_clusters_for_bytes(inode->i_sb, zero_end);
				862	unsigned int num_clusters = 0;
				863	unsigned int ext_flags = 0;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	864
Joel Becker	5693486	2010-07-01 15:13:31 -0700	[diff] [blame]	865	while (zero_cpos < last_cpos) {
				866	rc = ocfs2_get_clusters(inode, zero_cpos, &p_cpos,
				867	&num_clusters, &ext_flags);
				868	if (rc) {
				869	mlog_errno(rc);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	870	goto out;
				871	}
				872
Joel Becker	5693486	2010-07-01 15:13:31 -0700	[diff] [blame]	873	if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) {
				874	zero_clusters = num_clusters;
				875	if (ext_flags & OCFS2_EXT_REFCOUNTED)
				876	needs_cow = 1;
				877	break;
				878	}
				879
				880	zero_cpos += num_clusters;
				881	}
				882	if (!zero_clusters) {
				883	*range_end = 0;
				884	goto out;
				885	}
				886
				887	while ((zero_cpos + zero_clusters) < last_cpos) {
				888	rc = ocfs2_get_clusters(inode, zero_cpos + zero_clusters,
				889	&p_cpos, &num_clusters,
				890	&ext_flags);
				891	if (rc) {
				892	mlog_errno(rc);
				893	goto out;
				894	}
				895
				896	if (!p_cpos \|\| (ext_flags & OCFS2_EXT_UNWRITTEN))
				897	break;
				898	if (ext_flags & OCFS2_EXT_REFCOUNTED)
				899	needs_cow = 1;
				900	zero_clusters += num_clusters;
				901	}
				902	if ((zero_cpos + zero_clusters) > last_cpos)
				903	zero_clusters = last_cpos - zero_cpos;
				904
				905	if (needs_cow) {
Tao Ma	1550271	2010-08-12 10:36:38 +0800	[diff] [blame]	906	rc = ocfs2_refcount_cow(inode, NULL, di_bh, zero_cpos,
				907	zero_clusters, UINT_MAX);
Joel Becker	5693486	2010-07-01 15:13:31 -0700	[diff] [blame]	908	if (rc) {
				909	mlog_errno(rc);
				910	goto out;
				911	}
				912	}
				913
				914	*range_start = ocfs2_clusters_to_bytes(inode->i_sb, zero_cpos);
				915	*range_end = ocfs2_clusters_to_bytes(inode->i_sb,
				916	zero_cpos + zero_clusters);
				917
				918	out:
				919	return rc;
				920	}
				921
				922	/*
				923	* Zero one range returned from ocfs2_zero_extend_get_range(). The caller
				924	* has made sure that the entire range needs zeroing.
				925	*/
				926	static int ocfs2_zero_extend_range(struct inode *inode, u64 range_start,
				927	u64 range_end)
				928	{
				929	int rc = 0;
				930	u64 next_pos;
				931	u64 zero_pos = range_start;
				932
				933	mlog(0, "range_start = %llu, range_end = %llu\n",
				934	(unsigned long long)range_start,
				935	(unsigned long long)range_end);
				936	BUG_ON(range_start >= range_end);
				937
				938	while (zero_pos < range_end) {
				939	next_pos = (zero_pos & PAGE_CACHE_MASK) + PAGE_CACHE_SIZE;
				940	if (next_pos > range_end)
				941	next_pos = range_end;
				942	rc = ocfs2_write_zero_page(inode, zero_pos, next_pos);
				943	if (rc < 0) {
				944	mlog_errno(rc);
				945	break;
				946	}
				947	zero_pos = next_pos;
Mark Fasheh	e2057c5	2006-10-03 17:53:05 -0700	[diff] [blame]	948
				949	/*
				950	* Very large extends have the potential to lock up
				951	* the cpu for extended periods of time.
				952	*/
				953	cond_resched();
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	954	}
				955
Joel Becker	5693486	2010-07-01 15:13:31 -0700	[diff] [blame]	956	return rc;
				957	}
				958
				959	int ocfs2_zero_extend(struct inode inode, struct buffer_head di_bh,
				960	loff_t zero_to_size)
				961	{
				962	int ret = 0;
				963	u64 zero_start, range_start = 0, range_end = 0;
				964	struct super_block *sb = inode->i_sb;
				965
				966	zero_start = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode));
				967	mlog(0, "zero_start %llu for i_size %llu\n",
				968	(unsigned long long)zero_start,
				969	(unsigned long long)i_size_read(inode));
				970	while (zero_start < zero_to_size) {
				971	ret = ocfs2_zero_extend_get_range(inode, di_bh, zero_start,
				972	zero_to_size,
				973	&range_start,
				974	&range_end);
				975	if (ret) {
				976	mlog_errno(ret);
				977	break;
				978	}
				979	if (!range_end)
				980	break;
				981	/* Trim the ends */
				982	if (range_start < zero_start)
				983	range_start = zero_start;
				984	if (range_end > zero_to_size)
				985	range_end = zero_to_size;
				986
				987	ret = ocfs2_zero_extend_range(inode, range_start,
				988	range_end);
				989	if (ret) {
				990	mlog_errno(ret);
				991	break;
				992	}
				993	zero_start = range_end;
				994	}
				995
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	996	return ret;
				997	}
				998
Joel Becker	5693486	2010-07-01 15:13:31 -0700	[diff] [blame]	999	int ocfs2_extend_no_holes(struct inode inode, struct buffer_head di_bh,
				1000	u64 new_i_size, u64 zero_to)
Mark Fasheh	65ed39d	2007-08-28 17:13:23 -0700	[diff] [blame]	1001	{
				1002	int ret;
				1003	u32 clusters_to_add;
				1004	struct ocfs2_inode_info *oi = OCFS2_I(inode);
				1005
Joel Becker	5693486	2010-07-01 15:13:31 -0700	[diff] [blame]	1006	/*
				1007	* Only quota files call this without a bh, and they can't be
				1008	* refcounted.
				1009	*/
				1010	BUG_ON(!di_bh && (oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
				1011	BUG_ON(!di_bh && !(oi->ip_flags & OCFS2_INODE_SYSTEM_FILE));
				1012
Mark Fasheh	65ed39d	2007-08-28 17:13:23 -0700	[diff] [blame]	1013	clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size);
				1014	if (clusters_to_add < oi->ip_clusters)
				1015	clusters_to_add = 0;
				1016	else
				1017	clusters_to_add -= oi->ip_clusters;
				1018
				1019	if (clusters_to_add) {
				1020	ret = __ocfs2_extend_allocation(inode, oi->ip_clusters,
				1021	clusters_to_add, 0);
				1022	if (ret) {
				1023	mlog_errno(ret);
				1024	goto out;
				1025	}
				1026	}
				1027
				1028	/*
				1029	* Call this even if we don't add any clusters to the tree. We
				1030	* still need to zero the area between the old i_size and the
				1031	* new i_size.
				1032	*/
Joel Becker	5693486	2010-07-01 15:13:31 -0700	[diff] [blame]	1033	ret = ocfs2_zero_extend(inode, di_bh, zero_to);
Mark Fasheh	65ed39d	2007-08-28 17:13:23 -0700	[diff] [blame]	1034	if (ret < 0)
				1035	mlog_errno(ret);
				1036
				1037	out:
				1038	return ret;
				1039	}
				1040
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1041	static int ocfs2_extend_file(struct inode *inode,
				1042	struct buffer_head *di_bh,
Mark Fasheh	65ed39d	2007-08-28 17:13:23 -0700	[diff] [blame]	1043	u64 new_i_size)
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1044	{
Mark Fasheh	c934a92	2007-10-18 15:23:46 -0700	[diff] [blame]	1045	int ret = 0;
Mark Fasheh	1afc32b	2007-09-07 14:46:51 -0700	[diff] [blame]	1046	struct ocfs2_inode_info *oi = OCFS2_I(inode);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1047
Mark Fasheh	65ed39d	2007-08-28 17:13:23 -0700	[diff] [blame]	1048	BUG_ON(!di_bh);
Mark Fasheh	53013cb	2006-05-05 19:04:03 -0700	[diff] [blame]	1049
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1050	/* setattr sometimes calls us like this. */
				1051	if (new_i_size == 0)
				1052	goto out;
				1053
				1054	if (i_size_read(inode) == new_i_size)
Joel Becker	5693486	2010-07-01 15:13:31 -0700	[diff] [blame]	1055	goto out;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1056	BUG_ON(new_i_size < i_size_read(inode));
				1057
Mark Fasheh	1afc32b	2007-09-07 14:46:51 -0700	[diff] [blame]	1058	/*
Mark Fasheh	65ed39d	2007-08-28 17:13:23 -0700	[diff] [blame]	1059	* The alloc sem blocks people in read/write from reading our
				1060	* allocation until we're done changing it. We depend on
				1061	* i_mutex to block other extend/truncate calls while we're
Joel Becker	5693486	2010-07-01 15:13:31 -0700	[diff] [blame]	1062	* here. We even have to hold it for sparse files because there
				1063	* might be some tail zeroing.
Mark Fasheh	0effef7	2006-10-03 17:44:42 -0700	[diff] [blame]	1064	*/
Mark Fasheh	1afc32b	2007-09-07 14:46:51 -0700	[diff] [blame]	1065	down_write(&oi->ip_alloc_sem);
				1066
				1067	if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
				1068	/*
				1069	* We can optimize small extends by keeping the inodes
				1070	* inline data.
				1071	*/
				1072	if (ocfs2_size_fits_inline_data(di_bh, new_i_size)) {
				1073	up_write(&oi->ip_alloc_sem);
				1074	goto out_update_size;
				1075	}
				1076
				1077	ret = ocfs2_convert_inline_data_to_extents(inode, di_bh);
				1078	if (ret) {
				1079	up_write(&oi->ip_alloc_sem);
Mark Fasheh	1afc32b	2007-09-07 14:46:51 -0700	[diff] [blame]	1080	mlog_errno(ret);
Mark Fasheh	c934a92	2007-10-18 15:23:46 -0700	[diff] [blame]	1081	goto out;
Mark Fasheh	1afc32b	2007-09-07 14:46:51 -0700	[diff] [blame]	1082	}
				1083	}
				1084
Joel Becker	5693486	2010-07-01 15:13:31 -0700	[diff] [blame]	1085	if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
				1086	ret = ocfs2_zero_extend(inode, di_bh, new_i_size);
				1087	else
				1088	ret = ocfs2_extend_no_holes(inode, di_bh, new_i_size,
				1089	new_i_size);
Mark Fasheh	1afc32b	2007-09-07 14:46:51 -0700	[diff] [blame]	1090
				1091	up_write(&oi->ip_alloc_sem);
Mark Fasheh	65ed39d	2007-08-28 17:13:23 -0700	[diff] [blame]	1092
Mark Fasheh	0effef7	2006-10-03 17:44:42 -0700	[diff] [blame]	1093	if (ret < 0) {
				1094	mlog_errno(ret);
Mark Fasheh	c934a92	2007-10-18 15:23:46 -0700	[diff] [blame]	1095	goto out;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1096	}
				1097
Mark Fasheh	3a0782d	2007-01-17 12:53:31 -0800	[diff] [blame]	1098	out_update_size:
Mark Fasheh	65ed39d	2007-08-28 17:13:23 -0700	[diff] [blame]	1099	ret = ocfs2_simple_size_update(inode, di_bh, new_i_size);
				1100	if (ret < 0)
				1101	mlog_errno(ret);
Mark Fasheh	53013cb	2006-05-05 19:04:03 -0700	[diff] [blame]	1102
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1103	out:
				1104	return ret;
				1105	}
				1106
				1107	int ocfs2_setattr(struct dentry dentry, struct iattr attr)
				1108	{
				1109	int status = 0, size_change;
				1110	struct inode *inode = dentry->d_inode;
				1111	struct super_block *sb = inode->i_sb;
				1112	struct ocfs2_super *osb = OCFS2_SB(sb);
				1113	struct buffer_head *bh = NULL;
Mark Fasheh	1fabe14	2006-10-09 18:11:45 -0700	[diff] [blame]	1114	handle_t *handle = NULL;
Jan Kara	65bac57	2009-06-02 14:24:01 +0200	[diff] [blame]	1115	struct dquot *transfer_to[MAXQUOTAS] = { };
Jan Kara	52a9ee2	2010-05-13 20:18:45 +0200	[diff] [blame]	1116	int qtype;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1117
				1118	mlog_entry("(0x%p, '%.*s')\n", dentry,
				1119	dentry->d_name.len, dentry->d_name.name);
				1120
Sunil Mushran	bc53580	2008-04-18 10:23:53 -0700	[diff] [blame]	1121	/* ensuring we don't even attempt to truncate a symlink */
				1122	if (S_ISLNK(inode->i_mode))
				1123	attr->ia_valid &= ~ATTR_SIZE;
				1124
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1125	if (attr->ia_valid & ATTR_MODE)
				1126	mlog(0, "mode change: %d\n", attr->ia_mode);
				1127	if (attr->ia_valid & ATTR_UID)
				1128	mlog(0, "uid change: %d\n", attr->ia_uid);
				1129	if (attr->ia_valid & ATTR_GID)
				1130	mlog(0, "gid change: %d\n", attr->ia_gid);
				1131	if (attr->ia_valid & ATTR_SIZE)
				1132	mlog(0, "size change...\n");
				1133	if (attr->ia_valid & (ATTR_ATIME \| ATTR_MTIME \| ATTR_CTIME))
				1134	mlog(0, "time change...\n");
				1135
				1136	#define OCFS2_VALID_ATTRS (ATTR_ATIME \| ATTR_MTIME \| ATTR_CTIME \| ATTR_SIZE \
				1137	\| ATTR_GID \| ATTR_UID \| ATTR_MODE)
				1138	if (!(attr->ia_valid & OCFS2_VALID_ATTRS)) {
				1139	mlog(0, "can't handle attrs: 0x%x\n", attr->ia_valid);
				1140	return 0;
				1141	}
				1142
				1143	status = inode_change_ok(inode, attr);
				1144	if (status)
				1145	return status;
				1146
Dmitry Monakhov	1275562	2010-04-08 22:04:20 +0400	[diff] [blame]	1147	if (is_quota_modification(inode, attr))
				1148	dquot_initialize(inode);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1149	size_change = S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE;
				1150	if (size_change) {
				1151	status = ocfs2_rw_lock(inode, 1);
				1152	if (status < 0) {
				1153	mlog_errno(status);
				1154	goto bail;
				1155	}
				1156	}
				1157
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	1158	status = ocfs2_inode_lock(inode, &bh, 1);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1159	if (status < 0) {
				1160	if (status != -ENOENT)
				1161	mlog_errno(status);
				1162	goto bail_unlock_rw;
				1163	}
				1164
				1165	if (size_change && attr->ia_size != i_size_read(inode)) {
Wengang Wang	5051f76	2010-02-26 18:18:25 +0800	[diff] [blame]	1166	status = inode_newsize_ok(inode, attr->ia_size);
				1167	if (status)
Mark Fasheh	ce76fd3	2007-07-20 12:02:14 -0700	[diff] [blame]	1168	goto bail_unlock;
Mark Fasheh	ce76fd3	2007-07-20 12:02:14 -0700	[diff] [blame]	1169
Joel Becker	2b4e30f	2008-09-03 20:03:41 -0700	[diff] [blame]	1170	if (i_size_read(inode) > attr->ia_size) {
				1171	if (ocfs2_should_order_data(inode)) {
				1172	status = ocfs2_begin_ordered_truncate(inode,
				1173	attr->ia_size);
				1174	if (status)
				1175	goto bail_unlock;
				1176	}
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1177	status = ocfs2_truncate_file(inode, bh, attr->ia_size);
Joel Becker	2b4e30f	2008-09-03 20:03:41 -0700	[diff] [blame]	1178	} else
Mark Fasheh	65ed39d	2007-08-28 17:13:23 -0700	[diff] [blame]	1179	status = ocfs2_extend_file(inode, bh, attr->ia_size);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1180	if (status < 0) {
				1181	if (status != -ENOSPC)
				1182	mlog_errno(status);
				1183	status = -ENOSPC;
				1184	goto bail_unlock;
				1185	}
				1186	}
				1187
Jan Kara	a90714c	2008-10-09 19:38:40 +0200	[diff] [blame]	1188	if ((attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) \|\|
				1189	(attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
Jan Kara	65bac57	2009-06-02 14:24:01 +0200	[diff] [blame]	1190	/*
				1191	* Gather pointers to quota structures so that allocation /
				1192	* freeing of quota structures happens here and not inside
Christoph Hellwig	b43fa82	2010-03-03 09:05:03 -0500	[diff] [blame]	1193	* dquot_transfer() where we have problems with lock ordering
Jan Kara	65bac57	2009-06-02 14:24:01 +0200	[diff] [blame]	1194	*/
Jan Kara	a90714c	2008-10-09 19:38:40 +0200	[diff] [blame]	1195	if (attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid
				1196	&& OCFS2_HAS_RO_COMPAT_FEATURE(sb,
				1197	OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
Jan Kara	65bac57	2009-06-02 14:24:01 +0200	[diff] [blame]	1198	transfer_to[USRQUOTA] = dqget(sb, attr->ia_uid,
				1199	USRQUOTA);
Jan Kara	52a9ee2	2010-05-13 20:18:45 +0200	[diff] [blame]	1200	if (!transfer_to[USRQUOTA]) {
Jan Kara	65bac57	2009-06-02 14:24:01 +0200	[diff] [blame]	1201	status = -ESRCH;
Jan Kara	a90714c	2008-10-09 19:38:40 +0200	[diff] [blame]	1202	goto bail_unlock;
Jan Kara	65bac57	2009-06-02 14:24:01 +0200	[diff] [blame]	1203	}
Jan Kara	a90714c	2008-10-09 19:38:40 +0200	[diff] [blame]	1204	}
				1205	if (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid
				1206	&& OCFS2_HAS_RO_COMPAT_FEATURE(sb,
				1207	OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
Jan Kara	65bac57	2009-06-02 14:24:01 +0200	[diff] [blame]	1208	transfer_to[GRPQUOTA] = dqget(sb, attr->ia_gid,
				1209	GRPQUOTA);
Jan Kara	52a9ee2	2010-05-13 20:18:45 +0200	[diff] [blame]	1210	if (!transfer_to[GRPQUOTA]) {
Jan Kara	65bac57	2009-06-02 14:24:01 +0200	[diff] [blame]	1211	status = -ESRCH;
Jan Kara	a90714c	2008-10-09 19:38:40 +0200	[diff] [blame]	1212	goto bail_unlock;
Jan Kara	65bac57	2009-06-02 14:24:01 +0200	[diff] [blame]	1213	}
Jan Kara	a90714c	2008-10-09 19:38:40 +0200	[diff] [blame]	1214	}
Jan Kara	65bac57	2009-06-02 14:24:01 +0200	[diff] [blame]	1215	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS +
				1216	2 * ocfs2_quota_trans_credits(sb));
Jan Kara	a90714c	2008-10-09 19:38:40 +0200	[diff] [blame]	1217	if (IS_ERR(handle)) {
				1218	status = PTR_ERR(handle);
				1219	mlog_errno(status);
				1220	goto bail_unlock;
				1221	}
Jan Kara	52a9ee2	2010-05-13 20:18:45 +0200	[diff] [blame]	1222	status = __dquot_transfer(inode, transfer_to);
Jan Kara	a90714c	2008-10-09 19:38:40 +0200	[diff] [blame]	1223	if (status < 0)
				1224	goto bail_commit;
				1225	} else {
				1226	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
				1227	if (IS_ERR(handle)) {
				1228	status = PTR_ERR(handle);
				1229	mlog_errno(status);
				1230	goto bail_unlock;
				1231	}
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1232	}
				1233
Mark Fasheh	7307de8	2007-05-09 15:16:19 -0700	[diff] [blame]	1234	/*
Christoph Hellwig	2c27c65	2010-06-04 11:30:04 +0200	[diff] [blame]	1235	* This will intentionally not wind up calling truncate_setsize(),
Mark Fasheh	7307de8	2007-05-09 15:16:19 -0700	[diff] [blame]	1236	* since all the work for a size change has been done above.
				1237	* Otherwise, we could get into problems with truncate as
				1238	* ip_alloc_sem is used there to protect against i_size
				1239	* changes.
Christoph Hellwig	1025774	2010-06-04 11:30:02 +0200	[diff] [blame]	1240	*
				1241	* XXX: this means the conditional below can probably be removed.
Mark Fasheh	7307de8	2007-05-09 15:16:19 -0700	[diff] [blame]	1242	*/
Christoph Hellwig	1025774	2010-06-04 11:30:02 +0200	[diff] [blame]	1243	if ((attr->ia_valid & ATTR_SIZE) &&
				1244	attr->ia_size != i_size_read(inode)) {
				1245	status = vmtruncate(inode, attr->ia_size);
				1246	if (status) {
				1247	mlog_errno(status);
				1248	goto bail_commit;
				1249	}
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1250	}
				1251
Christoph Hellwig	1025774	2010-06-04 11:30:02 +0200	[diff] [blame]	1252	setattr_copy(inode, attr);
				1253	mark_inode_dirty(inode);
				1254
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1255	status = ocfs2_mark_inode_dirty(handle, inode, bh);
				1256	if (status < 0)
				1257	mlog_errno(status);
				1258
				1259	bail_commit:
Mark Fasheh	02dc1af	2006-10-09 16:48:10 -0700	[diff] [blame]	1260	ocfs2_commit_trans(osb, handle);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1261	bail_unlock:
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	1262	ocfs2_inode_unlock(inode, 1);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1263	bail_unlock_rw:
				1264	if (size_change)
				1265	ocfs2_rw_unlock(inode, 1);
				1266	bail:
Mark Fasheh	a81cb88	2008-10-07 14:25:16 -0700	[diff] [blame]	1267	brelse(bh);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1268
Jan Kara	65bac57	2009-06-02 14:24:01 +0200	[diff] [blame]	1269	/* Release quota pointers in case we acquired them */
Jan Kara	52a9ee2	2010-05-13 20:18:45 +0200	[diff] [blame]	1270	for (qtype = 0; qtype < MAXQUOTAS; qtype++)
Jan Kara	65bac57	2009-06-02 14:24:01 +0200	[diff] [blame]	1271	dqput(transfer_to[qtype]);
Jan Kara	65bac57	2009-06-02 14:24:01 +0200	[diff] [blame]	1272
Tiger Yang	060bc66	2008-11-14 11:17:29 +0800	[diff] [blame]	1273	if (!status && attr->ia_valid & ATTR_MODE) {
				1274	status = ocfs2_acl_chmod(inode);
				1275	if (status < 0)
				1276	mlog_errno(status);
				1277	}
				1278
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1279	mlog_exit(status);
				1280	return status;
				1281	}
				1282
				1283	int ocfs2_getattr(struct vfsmount *mnt,
				1284	struct dentry *dentry,
				1285	struct kstat *stat)
				1286	{
				1287	struct inode *inode = dentry->d_inode;
				1288	struct super_block *sb = dentry->d_inode->i_sb;
				1289	struct ocfs2_super *osb = sb->s_fs_info;
				1290	int err;
				1291
				1292	mlog_entry_void();
				1293
				1294	err = ocfs2_inode_revalidate(dentry);
				1295	if (err) {
				1296	if (err != -ENOENT)
				1297	mlog_errno(err);
				1298	goto bail;
				1299	}
				1300
				1301	generic_fillattr(inode, stat);
				1302
				1303	/* We set the blksize from the cluster size for performance */
				1304	stat->blksize = osb->s_clustersize;
				1305
				1306	bail:
				1307	mlog_exit(err);
				1308
				1309	return err;
				1310	}
				1311
Al Viro	e6305c4	2008-07-15 21:03:57 -0400	[diff] [blame]	1312	int ocfs2_permission(struct inode *inode, int mask)
Tiger Yang	d38eb8d	2006-11-27 09:59:21 +0800	[diff] [blame]	1313	{
				1314	int ret;
				1315
				1316	mlog_entry_void();
				1317
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	1318	ret = ocfs2_inode_lock(inode, NULL, 0);
Tiger Yang	d38eb8d	2006-11-27 09:59:21 +0800	[diff] [blame]	1319	if (ret) {
Mark Fasheh	a9f5f70	2007-04-26 11:43:43 -0700	[diff] [blame]	1320	if (ret != -ENOENT)
				1321	mlog_errno(ret);
Tiger Yang	d38eb8d	2006-11-27 09:59:21 +0800	[diff] [blame]	1322	goto out;
				1323	}
				1324
Tiger Yang	23fc270	2008-11-14 11:17:18 +0800	[diff] [blame]	1325	ret = generic_permission(inode, mask, ocfs2_check_acl);
Tiger Yang	d38eb8d	2006-11-27 09:59:21 +0800	[diff] [blame]	1326
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	1327	ocfs2_inode_unlock(inode, 0);
Tiger Yang	d38eb8d	2006-11-27 09:59:21 +0800	[diff] [blame]	1328	out:
				1329	mlog_exit(ret);
				1330	return ret;
				1331	}
				1332
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1333	static int __ocfs2_write_remove_suid(struct inode *inode,
				1334	struct buffer_head *bh)
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1335	{
				1336	int ret;
Mark Fasheh	1fabe14	2006-10-09 18:11:45 -0700	[diff] [blame]	1337	handle_t *handle;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1338	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
				1339	struct ocfs2_dinode *di;
				1340
Mark Fasheh	b0697053	2006-03-03 10:24:33 -0800	[diff] [blame]	1341	mlog_entry("(Inode %llu, mode 0%o)\n",
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1342	(unsigned long long)OCFS2_I(inode)->ip_blkno, inode->i_mode);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1343
Mark Fasheh	65eff9c	2006-10-09 17:26:22 -0700	[diff] [blame]	1344	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
Jan Kara	fa38e92	2008-10-20 19:23:51 +0200	[diff] [blame]	1345	if (IS_ERR(handle)) {
				1346	ret = PTR_ERR(handle);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1347	mlog_errno(ret);
				1348	goto out;
				1349	}
				1350
Joel Becker	0cf2f76	2009-02-12 16:41:25 -0800	[diff] [blame]	1351	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh,
Joel Becker	13723d0	2008-10-17 19:25:01 -0700	[diff] [blame]	1352	OCFS2_JOURNAL_ACCESS_WRITE);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1353	if (ret < 0) {
				1354	mlog_errno(ret);
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1355	goto out_trans;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1356	}
				1357
				1358	inode->i_mode &= ~S_ISUID;
				1359	if ((inode->i_mode & S_ISGID) && (inode->i_mode & S_IXGRP))
				1360	inode->i_mode &= ~S_ISGID;
				1361
				1362	di = (struct ocfs2_dinode *) bh->b_data;
				1363	di->i_mode = cpu_to_le16(inode->i_mode);
				1364
Joel Becker	ec20cec	2010-03-19 14:13:52 -0700	[diff] [blame]	1365	ocfs2_journal_dirty(handle, bh);
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1366
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1367	out_trans:
Mark Fasheh	02dc1af	2006-10-09 16:48:10 -0700	[diff] [blame]	1368	ocfs2_commit_trans(osb, handle);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1369	out:
				1370	mlog_exit(ret);
				1371	return ret;
				1372	}
				1373
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	1374	/*
				1375	* Will look for holes and unwritten extents in the range starting at
				1376	* pos for count bytes (inclusive).
				1377	*/
				1378	static int ocfs2_check_range_for_holes(struct inode *inode, loff_t pos,
				1379	size_t count)
				1380	{
				1381	int ret = 0;
Mark Fasheh	49cb8d2	2007-03-09 16:21:46 -0800	[diff] [blame]	1382	unsigned int extent_flags;
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	1383	u32 cpos, clusters, extent_len, phys_cpos;
				1384	struct super_block *sb = inode->i_sb;
				1385
				1386	cpos = pos >> OCFS2_SB(sb)->s_clustersize_bits;
				1387	clusters = ocfs2_clusters_for_bytes(sb, pos + count) - cpos;
				1388
				1389	while (clusters) {
Mark Fasheh	49cb8d2	2007-03-09 16:21:46 -0800	[diff] [blame]	1390	ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, &extent_len,
				1391	&extent_flags);
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	1392	if (ret < 0) {
				1393	mlog_errno(ret);
				1394	goto out;
				1395	}
				1396
Mark Fasheh	49cb8d2	2007-03-09 16:21:46 -0800	[diff] [blame]	1397	if (phys_cpos == 0 \|\| (extent_flags & OCFS2_EXT_UNWRITTEN)) {
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	1398	ret = 1;
				1399	break;
				1400	}
				1401
				1402	if (extent_len > clusters)
				1403	extent_len = clusters;
				1404
				1405	clusters -= extent_len;
				1406	cpos += extent_len;
				1407	}
				1408	out:
				1409	return ret;
				1410	}
				1411
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1412	static int ocfs2_write_remove_suid(struct inode *inode)
				1413	{
				1414	int ret;
				1415	struct buffer_head *bh = NULL;
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1416
Joel Becker	b657c95	2008-11-13 14:49:11 -0800	[diff] [blame]	1417	ret = ocfs2_read_inode_block(inode, &bh);
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1418	if (ret < 0) {
				1419	mlog_errno(ret);
				1420	goto out;
				1421	}
				1422
				1423	ret = __ocfs2_write_remove_suid(inode, bh);
				1424	out:
				1425	brelse(bh);
				1426	return ret;
				1427	}
				1428
Mark Fasheh	2ae99a6	2007-03-09 16:43:28 -0800	[diff] [blame]	1429	/*
				1430	* Allocate enough extents to cover the region starting at byte offset
				1431	* start for len bytes. Existing extents are skipped, any extents
				1432	* added are marked as "unwritten".
				1433	*/
				1434	static int ocfs2_allocate_unwritten_extents(struct inode *inode,
				1435	u64 start, u64 len)
				1436	{
				1437	int ret;
				1438	u32 cpos, phys_cpos, clusters, alloc_size;
Mark Fasheh	1afc32b	2007-09-07 14:46:51 -0700	[diff] [blame]	1439	u64 end = start + len;
				1440	struct buffer_head *di_bh = NULL;
				1441
				1442	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
Joel Becker	b657c95	2008-11-13 14:49:11 -0800	[diff] [blame]	1443	ret = ocfs2_read_inode_block(inode, &di_bh);
Mark Fasheh	1afc32b	2007-09-07 14:46:51 -0700	[diff] [blame]	1444	if (ret) {
				1445	mlog_errno(ret);
				1446	goto out;
				1447	}
				1448
				1449	/*
				1450	* Nothing to do if the requested reservation range
				1451	* fits within the inode.
				1452	*/
				1453	if (ocfs2_size_fits_inline_data(di_bh, end))
				1454	goto out;
				1455
				1456	ret = ocfs2_convert_inline_data_to_extents(inode, di_bh);
				1457	if (ret) {
				1458	mlog_errno(ret);
				1459	goto out;
				1460	}
				1461	}
Mark Fasheh	2ae99a6	2007-03-09 16:43:28 -0800	[diff] [blame]	1462
				1463	/*
				1464	* We consider both start and len to be inclusive.
				1465	*/
				1466	cpos = start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
				1467	clusters = ocfs2_clusters_for_bytes(inode->i_sb, start + len);
				1468	clusters -= cpos;
				1469
				1470	while (clusters) {
				1471	ret = ocfs2_get_clusters(inode, cpos, &phys_cpos,
				1472	&alloc_size, NULL);
				1473	if (ret) {
				1474	mlog_errno(ret);
				1475	goto out;
				1476	}
				1477
				1478	/*
				1479	* Hole or existing extent len can be arbitrary, so
				1480	* cap it to our own allocation request.
				1481	*/
				1482	if (alloc_size > clusters)
				1483	alloc_size = clusters;
				1484
				1485	if (phys_cpos) {
				1486	/*
				1487	* We already have an allocation at this
				1488	* region so we can safely skip it.
				1489	*/
				1490	goto next;
				1491	}
				1492
				1493	ret = __ocfs2_extend_allocation(inode, cpos, alloc_size, 1);
				1494	if (ret) {
				1495	if (ret != -ENOSPC)
				1496	mlog_errno(ret);
				1497	goto out;
				1498	}
				1499
				1500	next:
				1501	cpos += alloc_size;
				1502	clusters -= alloc_size;
				1503	}
				1504
				1505	ret = 0;
				1506	out:
Mark Fasheh	1afc32b	2007-09-07 14:46:51 -0700	[diff] [blame]	1507
				1508	brelse(di_bh);
Mark Fasheh	2ae99a6	2007-03-09 16:43:28 -0800	[diff] [blame]	1509	return ret;
				1510	}
				1511
Mark Fasheh	063c456	2007-07-03 13:34:11 -0700	[diff] [blame]	1512	/*
				1513	* Truncate a byte range, avoiding pages within partial clusters. This
				1514	* preserves those pages for the zeroing code to write to.
				1515	*/
				1516	static void ocfs2_truncate_cluster_pages(struct inode *inode, u64 byte_start,
				1517	u64 byte_len)
				1518	{
				1519	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
				1520	loff_t start, end;
				1521	struct address_space *mapping = inode->i_mapping;
				1522
				1523	start = (loff_t)ocfs2_align_bytes_to_clusters(inode->i_sb, byte_start);
				1524	end = byte_start + byte_len;
				1525	end = end & ~(osb->s_clustersize - 1);
				1526
				1527	if (start < end) {
				1528	unmap_mapping_range(mapping, start, end - start, 0);
				1529	truncate_inode_pages_range(mapping, start, end - 1);
				1530	}
				1531	}
				1532
				1533	static int ocfs2_zero_partial_clusters(struct inode *inode,
				1534	u64 start, u64 len)
				1535	{
				1536	int ret = 0;
				1537	u64 tmpend, end = start + len;
				1538	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
				1539	unsigned int csize = osb->s_clustersize;
				1540	handle_t *handle;
				1541
				1542	/*
				1543	* The "start" and "end" values are NOT necessarily part of
				1544	* the range whose allocation is being deleted. Rather, this
				1545	* is what the user passed in with the request. We must zero
				1546	* partial clusters here. There's no need to worry about
				1547	* physical allocation - the zeroing code knows to skip holes.
				1548	*/
				1549	mlog(0, "byte start: %llu, end: %llu\n",
				1550	(unsigned long long)start, (unsigned long long)end);
				1551
				1552	/*
				1553	* If both edges are on a cluster boundary then there's no
				1554	* zeroing required as the region is part of the allocation to
				1555	* be truncated.
				1556	*/
				1557	if ((start & (csize - 1)) == 0 && (end & (csize - 1)) == 0)
				1558	goto out;
				1559
				1560	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
Jan Kara	fa38e92	2008-10-20 19:23:51 +0200	[diff] [blame]	1561	if (IS_ERR(handle)) {
				1562	ret = PTR_ERR(handle);
Mark Fasheh	063c456	2007-07-03 13:34:11 -0700	[diff] [blame]	1563	mlog_errno(ret);
				1564	goto out;
				1565	}
				1566
				1567	/*
				1568	* We want to get the byte offset of the end of the 1st cluster.
				1569	*/
				1570	tmpend = (u64)osb->s_clustersize + (start & ~(osb->s_clustersize - 1));
				1571	if (tmpend > end)
				1572	tmpend = end;
				1573
				1574	mlog(0, "1st range: start: %llu, tmpend: %llu\n",
				1575	(unsigned long long)start, (unsigned long long)tmpend);
				1576
				1577	ret = ocfs2_zero_range_for_truncate(inode, handle, start, tmpend);
				1578	if (ret)
				1579	mlog_errno(ret);
				1580
				1581	if (tmpend < end) {
				1582	/*
				1583	* This may make start and end equal, but the zeroing
				1584	* code will skip any work in that case so there's no
				1585	* need to catch it up here.
				1586	*/
				1587	start = end & ~(osb->s_clustersize - 1);
				1588
				1589	mlog(0, "2nd range: start: %llu, end: %llu\n",
				1590	(unsigned long long)start, (unsigned long long)end);
				1591
				1592	ret = ocfs2_zero_range_for_truncate(inode, handle, start, end);
				1593	if (ret)
				1594	mlog_errno(ret);
				1595	}
				1596
				1597	ocfs2_commit_trans(osb, handle);
				1598	out:
				1599	return ret;
				1600	}
				1601
Tristan Ye	c1631d4	2010-05-11 17:54:45 +0800	[diff] [blame]	1602	static int ocfs2_find_rec(struct ocfs2_extent_list *el, u32 pos)
				1603	{
				1604	int i;
				1605	struct ocfs2_extent_rec *rec = NULL;
				1606
				1607	for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
				1608
				1609	rec = &el->l_recs[i];
				1610
				1611	if (le32_to_cpu(rec->e_cpos) < pos)
				1612	break;
				1613	}
				1614
				1615	return i;
				1616	}
				1617
				1618	/*
				1619	* Helper to calculate the punching pos and length in one run, we handle the
				1620	* following three cases in order:
				1621	*
				1622	* - remove the entire record
				1623	* - remove a partial record
				1624	* - no record needs to be removed (hole-punching completed)
				1625	*/
				1626	static void ocfs2_calc_trunc_pos(struct inode *inode,
				1627	struct ocfs2_extent_list *el,
				1628	struct ocfs2_extent_rec *rec,
				1629	u32 trunc_start, u32 *trunc_cpos,
				1630	u32 trunc_len, u32 trunc_end,
				1631	u64 blkno, int done)
				1632	{
				1633	int ret = 0;
				1634	u32 coff, range;
				1635
				1636	range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec);
				1637
				1638	if (le32_to_cpu(rec->e_cpos) >= trunc_start) {
				1639	*trunc_cpos = le32_to_cpu(rec->e_cpos);
				1640	/*
				1641	* Skip holes if any.
				1642	*/
				1643	if (range < *trunc_end)
				1644	*trunc_end = range;
				1645	trunc_len = trunc_end - le32_to_cpu(rec->e_cpos);
				1646	*blkno = le64_to_cpu(rec->e_blkno);
				1647	*trunc_end = le32_to_cpu(rec->e_cpos);
				1648	} else if (range > trunc_start) {
				1649	*trunc_cpos = trunc_start;
				1650	trunc_len = trunc_end - trunc_start;
				1651	coff = trunc_start - le32_to_cpu(rec->e_cpos);
				1652	*blkno = le64_to_cpu(rec->e_blkno) +
				1653	ocfs2_clusters_to_blocks(inode->i_sb, coff);
				1654	*trunc_end = trunc_start;
				1655	} else {
				1656	/*
				1657	* It may have two following possibilities:
				1658	*
				1659	* - last record has been removed
				1660	* - trunc_start was within a hole
				1661	*
				1662	* both two cases mean the completion of hole punching.
				1663	*/
				1664	ret = 1;
				1665	}
				1666
				1667	*done = ret;
				1668	}
				1669
Mark Fasheh	063c456	2007-07-03 13:34:11 -0700	[diff] [blame]	1670	static int ocfs2_remove_inode_range(struct inode *inode,
				1671	struct buffer_head *di_bh, u64 byte_start,
				1672	u64 byte_len)
				1673	{
Tristan Ye	c1631d4	2010-05-11 17:54:45 +0800	[diff] [blame]	1674	int ret = 0, flags = 0, done = 0, i;
				1675	u32 trunc_start, trunc_len, trunc_end, trunc_cpos, phys_cpos;
				1676	u32 cluster_in_el;
Mark Fasheh	063c456	2007-07-03 13:34:11 -0700	[diff] [blame]	1677	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
				1678	struct ocfs2_cached_dealloc_ctxt dealloc;
Mark Fasheh	b1967d0	2007-11-20 11:56:39 -0800	[diff] [blame]	1679	struct address_space *mapping = inode->i_mapping;
Mark Fasheh	fecc011	2008-11-12 15:16:38 -0800	[diff] [blame]	1680	struct ocfs2_extent_tree et;
Tristan Ye	c1631d4	2010-05-11 17:54:45 +0800	[diff] [blame]	1681	struct ocfs2_path *path = NULL;
				1682	struct ocfs2_extent_list *el = NULL;
				1683	struct ocfs2_extent_rec *rec = NULL;
Tristan Ye	e8aec06	2010-05-11 17:54:43 +0800	[diff] [blame]	1684	struct ocfs2_dinode di = (struct ocfs2_dinode )di_bh->b_data;
Tristan Ye	c1631d4	2010-05-11 17:54:45 +0800	[diff] [blame]	1685	u64 blkno, refcount_loc = le64_to_cpu(di->i_refcount_loc);
Mark Fasheh	063c456	2007-07-03 13:34:11 -0700	[diff] [blame]	1686
Joel Becker	5e404e9	2009-02-13 03:54:22 -0800	[diff] [blame]	1687	ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
Mark Fasheh	063c456	2007-07-03 13:34:11 -0700	[diff] [blame]	1688	ocfs2_init_dealloc_ctxt(&dealloc);
				1689
				1690	if (byte_len == 0)
				1691	return 0;
				1692
Mark Fasheh	1afc32b	2007-09-07 14:46:51 -0700	[diff] [blame]	1693	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
				1694	ret = ocfs2_truncate_inline(inode, di_bh, byte_start,
Mark Fasheh	b1967d0	2007-11-20 11:56:39 -0800	[diff] [blame]	1695	byte_start + byte_len, 0);
				1696	if (ret) {
Mark Fasheh	1afc32b	2007-09-07 14:46:51 -0700	[diff] [blame]	1697	mlog_errno(ret);
Mark Fasheh	b1967d0	2007-11-20 11:56:39 -0800	[diff] [blame]	1698	goto out;
				1699	}
				1700	/*
				1701	* There's no need to get fancy with the page cache
				1702	* truncate of an inline-data inode. We're talking
				1703	* about less than a page here, which will be cached
				1704	* in the dinode buffer anyway.
				1705	*/
				1706	unmap_mapping_range(mapping, 0, 0, 0);
				1707	truncate_inode_pages(mapping, 0);
				1708	goto out;
Mark Fasheh	1afc32b	2007-09-07 14:46:51 -0700	[diff] [blame]	1709	}
				1710
Tristan Ye	e8aec06	2010-05-11 17:54:43 +0800	[diff] [blame]	1711	/*
				1712	* For reflinks, we may need to CoW 2 clusters which might be
				1713	* partially zero'd later, if hole's start and end offset were
				1714	* within one cluster(means is not exactly aligned to clustersize).
				1715	*/
Mark Fasheh	063c456	2007-07-03 13:34:11 -0700	[diff] [blame]	1716
Tristan Ye	e8aec06	2010-05-11 17:54:43 +0800	[diff] [blame]	1717	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) {
				1718
				1719	ret = ocfs2_cow_file_pos(inode, di_bh, byte_start);
				1720	if (ret) {
				1721	mlog_errno(ret);
				1722	goto out;
				1723	}
				1724
				1725	ret = ocfs2_cow_file_pos(inode, di_bh, byte_start + byte_len);
				1726	if (ret) {
				1727	mlog_errno(ret);
				1728	goto out;
				1729	}
				1730	}
				1731
Mark Fasheh	063c456	2007-07-03 13:34:11 -0700	[diff] [blame]	1732	trunc_start = ocfs2_clusters_for_bytes(osb->sb, byte_start);
Tristan Ye	c1631d4	2010-05-11 17:54:45 +0800	[diff] [blame]	1733	trunc_end = (byte_start + byte_len) >> osb->s_clustersize_bits;
				1734	cluster_in_el = trunc_end;
Mark Fasheh	063c456	2007-07-03 13:34:11 -0700	[diff] [blame]	1735
Tristan Ye	c1631d4	2010-05-11 17:54:45 +0800	[diff] [blame]	1736	mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, cend: %u\n",
Mark Fasheh	063c456	2007-07-03 13:34:11 -0700	[diff] [blame]	1737	(unsigned long long)OCFS2_I(inode)->ip_blkno,
				1738	(unsigned long long)byte_start,
Tristan Ye	c1631d4	2010-05-11 17:54:45 +0800	[diff] [blame]	1739	(unsigned long long)byte_len, trunc_start, trunc_end);
Mark Fasheh	063c456	2007-07-03 13:34:11 -0700	[diff] [blame]	1740
				1741	ret = ocfs2_zero_partial_clusters(inode, byte_start, byte_len);
				1742	if (ret) {
				1743	mlog_errno(ret);
				1744	goto out;
				1745	}
				1746
Tristan Ye	c1631d4	2010-05-11 17:54:45 +0800	[diff] [blame]	1747	path = ocfs2_new_path_from_et(&et);
				1748	if (!path) {
				1749	ret = -ENOMEM;
				1750	mlog_errno(ret);
				1751	goto out;
				1752	}
				1753
				1754	while (trunc_end > trunc_start) {
				1755
				1756	ret = ocfs2_find_path(INODE_CACHE(inode), path,
				1757	cluster_in_el);
Mark Fasheh	063c456	2007-07-03 13:34:11 -0700	[diff] [blame]	1758	if (ret) {
				1759	mlog_errno(ret);
				1760	goto out;
				1761	}
				1762
Tristan Ye	c1631d4	2010-05-11 17:54:45 +0800	[diff] [blame]	1763	el = path_leaf_el(path);
Mark Fasheh	063c456	2007-07-03 13:34:11 -0700	[diff] [blame]	1764
Tristan Ye	c1631d4	2010-05-11 17:54:45 +0800	[diff] [blame]	1765	i = ocfs2_find_rec(el, trunc_end);
				1766	/*
				1767	* Need to go to previous extent block.
				1768	*/
				1769	if (i < 0) {
				1770	if (path->p_tree_depth == 0)
				1771	break;
				1772
				1773	ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb,
				1774	path,
				1775	&cluster_in_el);
Mark Fasheh	063c456	2007-07-03 13:34:11 -0700	[diff] [blame]	1776	if (ret) {
				1777	mlog_errno(ret);
				1778	goto out;
				1779	}
Tristan Ye	c1631d4	2010-05-11 17:54:45 +0800	[diff] [blame]	1780
				1781	/*
				1782	* We've reached the leftmost extent block,
				1783	* it's safe to leave.
				1784	*/
				1785	if (cluster_in_el == 0)
				1786	break;
				1787
				1788	/*
				1789	* The 'pos' searched for previous extent block is
				1790	* always one cluster less than actual trunc_end.
				1791	*/
				1792	trunc_end = cluster_in_el + 1;
				1793
				1794	ocfs2_reinit_path(path, 1);
				1795
				1796	continue;
				1797
				1798	} else
				1799	rec = &el->l_recs[i];
				1800
				1801	ocfs2_calc_trunc_pos(inode, el, rec, trunc_start, &trunc_cpos,
				1802	&trunc_len, &trunc_end, &blkno, &done);
				1803	if (done)
				1804	break;
				1805
				1806	flags = rec->e_flags;
				1807	phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb, blkno);
				1808
				1809	ret = ocfs2_remove_btree_range(inode, &et, trunc_cpos,
				1810	phys_cpos, trunc_len, flags,
				1811	&dealloc, refcount_loc);
				1812	if (ret < 0) {
				1813	mlog_errno(ret);
				1814	goto out;
Mark Fasheh	063c456	2007-07-03 13:34:11 -0700	[diff] [blame]	1815	}
				1816
Tristan Ye	c1631d4	2010-05-11 17:54:45 +0800	[diff] [blame]	1817	cluster_in_el = trunc_end;
				1818
				1819	ocfs2_reinit_path(path, 1);
Mark Fasheh	063c456	2007-07-03 13:34:11 -0700	[diff] [blame]	1820	}
				1821
				1822	ocfs2_truncate_cluster_pages(inode, byte_start, byte_len);
				1823
				1824	out:
				1825	ocfs2_schedule_truncate_log_flush(osb, 1);
				1826	ocfs2_run_deallocs(osb, &dealloc);
				1827
				1828	return ret;
				1829	}
				1830
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1831	/*
				1832	* Parts of this function taken from xfs_change_file_space()
				1833	*/
Mark Fasheh	385820a	2007-07-19 00:14:38 -0700	[diff] [blame]	1834	static int __ocfs2_change_file_space(struct file file, struct inode inode,
				1835	loff_t f_pos, unsigned int cmd,
				1836	struct ocfs2_space_resv *sr,
				1837	int change_size)
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1838	{
				1839	int ret;
				1840	s64 llen;
Mark Fasheh	385820a	2007-07-19 00:14:38 -0700	[diff] [blame]	1841	loff_t size;
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1842	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
				1843	struct buffer_head *di_bh = NULL;
				1844	handle_t *handle;
Mark Fasheh	a00cce3	2007-07-20 11:28:30 -0700	[diff] [blame]	1845	unsigned long long max_off = inode->i_sb->s_maxbytes;
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1846
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1847	if (ocfs2_is_hard_readonly(osb) \|\| ocfs2_is_soft_readonly(osb))
				1848	return -EROFS;
				1849
				1850	mutex_lock(&inode->i_mutex);
				1851
				1852	/*
				1853	* This prevents concurrent writes on other nodes
				1854	*/
				1855	ret = ocfs2_rw_lock(inode, 1);
				1856	if (ret) {
				1857	mlog_errno(ret);
				1858	goto out;
				1859	}
				1860
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	1861	ret = ocfs2_inode_lock(inode, &di_bh, 1);
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1862	if (ret) {
				1863	mlog_errno(ret);
				1864	goto out_rw_unlock;
				1865	}
				1866
				1867	if (inode->i_flags & (S_IMMUTABLE\|S_APPEND)) {
				1868	ret = -EPERM;
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	1869	goto out_inode_unlock;
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1870	}
				1871
				1872	switch (sr->l_whence) {
				1873	case 0: /SEEK_SET/
				1874	break;
				1875	case 1: /SEEK_CUR/
Mark Fasheh	385820a	2007-07-19 00:14:38 -0700	[diff] [blame]	1876	sr->l_start += f_pos;
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1877	break;
				1878	case 2: /SEEK_END/
				1879	sr->l_start += i_size_read(inode);
				1880	break;
				1881	default:
				1882	ret = -EINVAL;
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	1883	goto out_inode_unlock;
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1884	}
				1885	sr->l_whence = 0;
				1886
				1887	llen = sr->l_len > 0 ? sr->l_len - 1 : sr->l_len;
				1888
				1889	if (sr->l_start < 0
				1890	\|\| sr->l_start > max_off
				1891	\|\| (sr->l_start + llen) < 0
				1892	\|\| (sr->l_start + llen) > max_off) {
				1893	ret = -EINVAL;
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	1894	goto out_inode_unlock;
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1895	}
Mark Fasheh	385820a	2007-07-19 00:14:38 -0700	[diff] [blame]	1896	size = sr->l_start + sr->l_len;
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1897
				1898	if (cmd == OCFS2_IOC_RESVSP \|\| cmd == OCFS2_IOC_RESVSP64) {
				1899	if (sr->l_len <= 0) {
				1900	ret = -EINVAL;
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	1901	goto out_inode_unlock;
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1902	}
				1903	}
				1904
Mark Fasheh	385820a	2007-07-19 00:14:38 -0700	[diff] [blame]	1905	if (file && should_remove_suid(file->f_path.dentry)) {
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1906	ret = __ocfs2_write_remove_suid(inode, di_bh);
				1907	if (ret) {
				1908	mlog_errno(ret);
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	1909	goto out_inode_unlock;
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1910	}
				1911	}
				1912
				1913	down_write(&OCFS2_I(inode)->ip_alloc_sem);
				1914	switch (cmd) {
				1915	case OCFS2_IOC_RESVSP:
				1916	case OCFS2_IOC_RESVSP64:
				1917	/*
				1918	* This takes unsigned offsets, but the signed ones we
				1919	* pass have been checked against overflow above.
				1920	*/
				1921	ret = ocfs2_allocate_unwritten_extents(inode, sr->l_start,
				1922	sr->l_len);
				1923	break;
				1924	case OCFS2_IOC_UNRESVSP:
				1925	case OCFS2_IOC_UNRESVSP64:
				1926	ret = ocfs2_remove_inode_range(inode, di_bh, sr->l_start,
				1927	sr->l_len);
				1928	break;
				1929	default:
				1930	ret = -EINVAL;
				1931	}
				1932	up_write(&OCFS2_I(inode)->ip_alloc_sem);
				1933	if (ret) {
				1934	mlog_errno(ret);
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	1935	goto out_inode_unlock;
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1936	}
				1937
				1938	/*
				1939	* We update c/mtime for these changes
				1940	*/
				1941	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
				1942	if (IS_ERR(handle)) {
				1943	ret = PTR_ERR(handle);
				1944	mlog_errno(ret);
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	1945	goto out_inode_unlock;
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1946	}
				1947
Mark Fasheh	385820a	2007-07-19 00:14:38 -0700	[diff] [blame]	1948	if (change_size && i_size_read(inode) < size)
				1949	i_size_write(inode, size);
				1950
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1951	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
				1952	ret = ocfs2_mark_inode_dirty(handle, inode, di_bh);
				1953	if (ret < 0)
				1954	mlog_errno(ret);
				1955
				1956	ocfs2_commit_trans(osb, handle);
				1957
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	1958	out_inode_unlock:
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1959	brelse(di_bh);
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	1960	ocfs2_inode_unlock(inode, 1);
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1961	out_rw_unlock:
				1962	ocfs2_rw_unlock(inode, 1);
				1963
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1964	out:
Julia Lawall	c259ae5	2008-07-21 09:59:15 +0200	[diff] [blame]	1965	mutex_unlock(&inode->i_mutex);
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1966	return ret;
				1967	}
				1968
Mark Fasheh	385820a	2007-07-19 00:14:38 -0700	[diff] [blame]	1969	int ocfs2_change_file_space(struct file *file, unsigned int cmd,
				1970	struct ocfs2_space_resv *sr)
				1971	{
				1972	struct inode *inode = file->f_path.dentry->d_inode;
Fernando Carrijo	c19a28e	2009-01-07 18:09:08 -0800	[diff] [blame]	1973	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
Mark Fasheh	385820a	2007-07-19 00:14:38 -0700	[diff] [blame]	1974
				1975	if ((cmd == OCFS2_IOC_RESVSP \|\| cmd == OCFS2_IOC_RESVSP64) &&
				1976	!ocfs2_writes_unwritten_extents(osb))
				1977	return -ENOTTY;
				1978	else if ((cmd == OCFS2_IOC_UNRESVSP \|\| cmd == OCFS2_IOC_UNRESVSP64) &&
				1979	!ocfs2_sparse_alloc(osb))
				1980	return -ENOTTY;
				1981
				1982	if (!S_ISREG(inode->i_mode))
				1983	return -EINVAL;
				1984
				1985	if (!(file->f_mode & FMODE_WRITE))
				1986	return -EBADF;
				1987
				1988	return __ocfs2_change_file_space(file, inode, file->f_pos, cmd, sr, 0);
				1989	}
				1990
				1991	static long ocfs2_fallocate(struct inode *inode, int mode, loff_t offset,
				1992	loff_t len)
				1993	{
				1994	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
				1995	struct ocfs2_space_resv sr;
				1996	int change_size = 1;
				1997
				1998	if (!ocfs2_writes_unwritten_extents(osb))
				1999	return -EOPNOTSUPP;
				2000
				2001	if (S_ISDIR(inode->i_mode))
				2002	return -ENODEV;
				2003
				2004	if (mode & FALLOC_FL_KEEP_SIZE)
				2005	change_size = 0;
				2006
				2007	sr.l_whence = 0;
				2008	sr.l_start = (s64)offset;
				2009	sr.l_len = (s64)len;
				2010
				2011	return __ocfs2_change_file_space(NULL, inode, offset,
				2012	OCFS2_IOC_RESVSP64, &sr, change_size);
				2013	}
				2014
Tao Ma	293b2f7	2009-08-25 08:02:48 +0800	[diff] [blame]	2015	int ocfs2_check_range_for_refcount(struct inode *inode, loff_t pos,
				2016	size_t count)
				2017	{
				2018	int ret = 0;
				2019	unsigned int extent_flags;
				2020	u32 cpos, clusters, extent_len, phys_cpos;
				2021	struct super_block *sb = inode->i_sb;
				2022
				2023	if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)) \|\|
Tao Ma	2f48d59	2009-10-15 11:10:49 +0800	[diff] [blame]	2024	!(OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) \|\|
				2025	OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
Tao Ma	293b2f7	2009-08-25 08:02:48 +0800	[diff] [blame]	2026	return 0;
				2027
				2028	cpos = pos >> OCFS2_SB(sb)->s_clustersize_bits;
				2029	clusters = ocfs2_clusters_for_bytes(sb, pos + count) - cpos;
				2030
				2031	while (clusters) {
				2032	ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, &extent_len,
				2033	&extent_flags);
				2034	if (ret < 0) {
				2035	mlog_errno(ret);
				2036	goto out;
				2037	}
				2038
				2039	if (phys_cpos && (extent_flags & OCFS2_EXT_REFCOUNTED)) {
				2040	ret = 1;
				2041	break;
				2042	}
				2043
				2044	if (extent_len > clusters)
				2045	extent_len = clusters;
				2046
				2047	clusters -= extent_len;
				2048	cpos += extent_len;
				2049	}
				2050	out:
				2051	return ret;
				2052	}
				2053
				2054	static int ocfs2_prepare_inode_for_refcount(struct inode *inode,
Tao Ma	b890823	2010-08-12 10:27:14 +0800	[diff] [blame]	2055	struct file *file,
Tao Ma	293b2f7	2009-08-25 08:02:48 +0800	[diff] [blame]	2056	loff_t pos, size_t count,
				2057	int *meta_level)
				2058	{
				2059	int ret;
				2060	struct buffer_head *di_bh = NULL;
				2061	u32 cpos = pos >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
				2062	u32 clusters =
				2063	ocfs2_clusters_for_bytes(inode->i_sb, pos + count) - cpos;
				2064
				2065	ret = ocfs2_inode_lock(inode, &di_bh, 1);
				2066	if (ret) {
				2067	mlog_errno(ret);
				2068	goto out;
				2069	}
				2070
				2071	*meta_level = 1;
				2072
Tao Ma	1550271	2010-08-12 10:36:38 +0800	[diff] [blame]	2073	ret = ocfs2_refcount_cow(inode, file, di_bh, cpos, clusters, UINT_MAX);
Tao Ma	293b2f7	2009-08-25 08:02:48 +0800	[diff] [blame]	2074	if (ret)
				2075	mlog_errno(ret);
				2076	out:
				2077	brelse(di_bh);
				2078	return ret;
				2079	}
				2080
Tao Ma	b890823	2010-08-12 10:27:14 +0800	[diff] [blame]	2081	static int ocfs2_prepare_inode_for_write(struct file *file,
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2082	loff_t *ppos,
				2083	size_t count,
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2084	int appending,
Tao Ma	86470e9	2009-12-03 21:55:05 +0800	[diff] [blame]	2085	int *direct_io,
				2086	int *has_refcount)
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2087	{
Mark Fasheh	65ed39d	2007-08-28 17:13:23 -0700	[diff] [blame]	2088	int ret = 0, meta_level = 0;
Tao Ma	b890823	2010-08-12 10:27:14 +0800	[diff] [blame]	2089	struct dentry *dentry = file->f_path.dentry;
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2090	struct inode *inode = dentry->d_inode;
Mark Fasheh	65ed39d	2007-08-28 17:13:23 -0700	[diff] [blame]	2091	loff_t saved_pos, end;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2092
Sunil Mushran	2bd6321	2010-01-25 16:57:38 -0800	[diff] [blame]	2093	/*
Mark Fasheh	65ed39d	2007-08-28 17:13:23 -0700	[diff] [blame]	2094	* We start with a read level meta lock and only jump to an ex
				2095	* if we need to make modifications here.
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2096	*/
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2097	for(;;) {
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	2098	ret = ocfs2_inode_lock(inode, NULL, meta_level);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2099	if (ret < 0) {
				2100	meta_level = -1;
				2101	mlog_errno(ret);
				2102	goto out;
				2103	}
				2104
				2105	/* Clear suid / sgid if necessary. We do this here
				2106	* instead of later in the write path because
				2107	* remove_suid() calls ->setattr without any hint that
				2108	* we may have already done our cluster locking. Since
				2109	* ocfs2_setattr() must take cluster locks to
				2110	* proceeed, this will lead us to recursively lock the
				2111	* inode. There's also the dinode i_size state which
				2112	* can be lost via setattr during extending writes (we
				2113	* set inode->i_size at the end of a write. */
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2114	if (should_remove_suid(dentry)) {
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2115	if (meta_level == 0) {
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	2116	ocfs2_inode_unlock(inode, meta_level);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2117	meta_level = 1;
				2118	continue;
				2119	}
				2120
				2121	ret = ocfs2_write_remove_suid(inode);
				2122	if (ret < 0) {
				2123	mlog_errno(ret);
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2124	goto out_unlock;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2125	}
				2126	}
				2127
				2128	/* work on a copy of ppos until we're sure that we won't have
				2129	* to recalculate it due to relocking. */
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2130	if (appending) {
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2131	saved_pos = i_size_read(inode);
				2132	mlog(0, "O_APPEND: inode->i_size=%llu\n", saved_pos);
				2133	} else {
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2134	saved_pos = *ppos;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2135	}
Mark Fasheh	3a0782d	2007-01-17 12:53:31 -0800	[diff] [blame]	2136
Mark Fasheh	65ed39d	2007-08-28 17:13:23 -0700	[diff] [blame]	2137	end = saved_pos + count;
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2138
Tao Ma	293b2f7	2009-08-25 08:02:48 +0800	[diff] [blame]	2139	ret = ocfs2_check_range_for_refcount(inode, saved_pos, count);
				2140	if (ret == 1) {
				2141	ocfs2_inode_unlock(inode, meta_level);
				2142	meta_level = -1;
				2143
				2144	ret = ocfs2_prepare_inode_for_refcount(inode,
Tao Ma	b890823	2010-08-12 10:27:14 +0800	[diff] [blame]	2145	file,
Tao Ma	293b2f7	2009-08-25 08:02:48 +0800	[diff] [blame]	2146	saved_pos,
				2147	count,
				2148	&meta_level);
Tao Ma	86470e9	2009-12-03 21:55:05 +0800	[diff] [blame]	2149	if (has_refcount)
				2150	*has_refcount = 1;
Wengang Wang	96a1cc7	2010-02-09 14:57:45 +0800	[diff] [blame]	2151	if (direct_io)
				2152	*direct_io = 0;
Tao Ma	293b2f7	2009-08-25 08:02:48 +0800	[diff] [blame]	2153	}
				2154
				2155	if (ret < 0) {
				2156	mlog_errno(ret);
				2157	goto out_unlock;
				2158	}
				2159
Mark Fasheh	65ed39d	2007-08-28 17:13:23 -0700	[diff] [blame]	2160	/*
				2161	* Skip the O_DIRECT checks if we don't need
				2162	* them.
				2163	*/
				2164	if (!direct_io \|\| !(*direct_io))
				2165	break;
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2166
Mark Fasheh	65ed39d	2007-08-28 17:13:23 -0700	[diff] [blame]	2167	/*
Mark Fasheh	1afc32b	2007-09-07 14:46:51 -0700	[diff] [blame]	2168	* There's no sane way to do direct writes to an inode
				2169	* with inline data.
				2170	*/
				2171	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
				2172	*direct_io = 0;
				2173	break;
				2174	}
				2175
				2176	/*
Mark Fasheh	65ed39d	2007-08-28 17:13:23 -0700	[diff] [blame]	2177	* Allowing concurrent direct writes means
				2178	* i_size changes wouldn't be synchronized, so
				2179	* one node could wind up truncating another
				2180	* nodes writes.
				2181	*/
				2182	if (end > i_size_read(inode)) {
				2183	*direct_io = 0;
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2184	break;
				2185	}
				2186
Mark Fasheh	3a0782d	2007-01-17 12:53:31 -0800	[diff] [blame]	2187	/*
Mark Fasheh	65ed39d	2007-08-28 17:13:23 -0700	[diff] [blame]	2188	* We don't fill holes during direct io, so
				2189	* check for them here. If any are found, the
				2190	* caller will have to retake some cluster
				2191	* locks and initiate the io as buffered.
Mark Fasheh	3a0782d	2007-01-17 12:53:31 -0800	[diff] [blame]	2192	*/
Mark Fasheh	65ed39d	2007-08-28 17:13:23 -0700	[diff] [blame]	2193	ret = ocfs2_check_range_for_holes(inode, saved_pos, count);
				2194	if (ret == 1) {
				2195	*direct_io = 0;
				2196	ret = 0;
				2197	} else if (ret < 0)
				2198	mlog_errno(ret);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2199	break;
				2200	}
				2201
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2202	if (appending)
				2203	*ppos = saved_pos;
				2204
				2205	out_unlock:
Tao Ma	293b2f7	2009-08-25 08:02:48 +0800	[diff] [blame]	2206	if (meta_level >= 0)
				2207	ocfs2_inode_unlock(inode, meta_level);
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2208
				2209	out:
				2210	return ret;
				2211	}
				2212
				2213	static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
				2214	const struct iovec *iov,
				2215	unsigned long nr_segs,
				2216	loff_t pos)
				2217	{
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2218	int ret, direct_io, appending, rw_level, have_alloc_sem = 0;
Tao Ma	86470e9	2009-12-03 21:55:05 +0800	[diff] [blame]	2219	int can_do_direct, has_refcount = 0;
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2220	ssize_t written = 0;
				2221	size_t ocount; /* original count */
				2222	size_t count; /* after file limit checks */
Mark Fasheh	9ea2d32	2007-10-18 14:14:45 -0700	[diff] [blame]	2223	loff_t old_size, *ppos = &iocb->ki_pos;
				2224	u32 old_clusters;
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2225	struct file *file = iocb->ki_filp;
				2226	struct inode *inode = file->f_path.dentry->d_inode;
Mark Fasheh	9ea2d32	2007-10-18 14:14:45 -0700	[diff] [blame]	2227	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
Tristan Ye	7bdb0d1	2010-10-11 16:46:39 +0800	[diff] [blame^]	2228	int full_coherency = !(osb->s_mount_opt &
				2229	OCFS2_MOUNT_COHERENCY_BUFFERED);
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2230
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2231	mlog_entry("(0x%p, %u, '%.*s')\n", file,
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2232	(unsigned int)nr_segs,
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2233	file->f_path.dentry->d_name.len,
				2234	file->f_path.dentry->d_name.name);
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2235
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2236	if (iocb->ki_left == 0)
				2237	return 0;
				2238
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2239	vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
				2240
				2241	appending = file->f_flags & O_APPEND ? 1 : 0;
				2242	direct_io = file->f_flags & O_DIRECT ? 1 : 0;
				2243
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2244	mutex_lock(&inode->i_mutex);
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2245
				2246	relock:
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2247	/* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2248	if (direct_io) {
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2249	down_read(&inode->i_alloc_sem);
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2250	have_alloc_sem = 1;
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2251	}
				2252
Tristan Ye	7bdb0d1	2010-10-11 16:46:39 +0800	[diff] [blame^]	2253	/*
				2254	* Concurrent O_DIRECT writes are allowed with
				2255	* mount_option "coherency=buffered".
				2256	*/
				2257	rw_level = (!direct_io \|\| full_coherency);
				2258
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2259	ret = ocfs2_rw_lock(inode, rw_level);
				2260	if (ret < 0) {
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2261	mlog_errno(ret);
				2262	goto out_sems;
				2263	}
				2264
Tristan Ye	7bdb0d1	2010-10-11 16:46:39 +0800	[diff] [blame^]	2265	/*
				2266	* O_DIRECT writes with "coherency=full" need to take EX cluster
				2267	* inode_lock to guarantee coherency.
				2268	*/
				2269	if (direct_io && full_coherency) {
				2270	/*
				2271	* We need to take and drop the inode lock to force
				2272	* other nodes to drop their caches. Buffered I/O
				2273	* already does this in write_begin().
				2274	*/
				2275	ret = ocfs2_inode_lock(inode, NULL, 1);
				2276	if (ret < 0) {
				2277	mlog_errno(ret);
				2278	goto out_sems;
				2279	}
				2280
				2281	ocfs2_inode_unlock(inode, 1);
				2282	}
				2283
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2284	can_do_direct = direct_io;
Tao Ma	b890823	2010-08-12 10:27:14 +0800	[diff] [blame]	2285	ret = ocfs2_prepare_inode_for_write(file, ppos,
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2286	iocb->ki_left, appending,
Tao Ma	86470e9	2009-12-03 21:55:05 +0800	[diff] [blame]	2287	&can_do_direct, &has_refcount);
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2288	if (ret < 0) {
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2289	mlog_errno(ret);
				2290	goto out;
				2291	}
				2292
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2293	/*
				2294	* We can't complete the direct I/O as requested, fall back to
				2295	* buffered I/O.
				2296	*/
				2297	if (direct_io && !can_do_direct) {
				2298	ocfs2_rw_unlock(inode, rw_level);
				2299	up_read(&inode->i_alloc_sem);
				2300
				2301	have_alloc_sem = 0;
				2302	rw_level = -1;
				2303
				2304	direct_io = 0;
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2305	goto relock;
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2306	}
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2307
Mark Fasheh	9ea2d32	2007-10-18 14:14:45 -0700	[diff] [blame]	2308	/*
				2309	* To later detect whether a journal commit for sync writes is
				2310	* necessary, we sample i_size, and cluster count here.
				2311	*/
				2312	old_size = i_size_read(inode);
				2313	old_clusters = OCFS2_I(inode)->ip_clusters;
				2314
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2315	/* communicate with ocfs2_dio_end_io */
Mark Fasheh	7cdfc3a	2007-04-16 17:28:51 -0700	[diff] [blame]	2316	ocfs2_iocb_set_rw_locked(iocb, rw_level);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2317
Li Dongyang	6b933c8	2010-04-17 17:49:10 +0800	[diff] [blame]	2318	ret = generic_segment_checks(iov, &nr_segs, &ocount,
				2319	VERIFY_READ);
				2320	if (ret)
				2321	goto out_dio;
				2322
				2323	count = ocount;
				2324	ret = generic_write_checks(file, ppos, &count,
				2325	S_ISBLK(inode->i_mode));
				2326	if (ret)
				2327	goto out_dio;
				2328
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2329	if (direct_io) {
				2330	written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos,
				2331	ppos, count, ocount);
				2332	if (written < 0) {
				2333	ret = written;
				2334	goto out_dio;
				2335	}
				2336	} else {
Li Dongyang	6b933c8	2010-04-17 17:49:10 +0800	[diff] [blame]	2337	current->backing_dev_info = file->f_mapping->backing_dev_info;
				2338	written = generic_file_buffered_write(iocb, iov, nr_segs, *ppos,
				2339	ppos, count, 0);
				2340	current->backing_dev_info = NULL;
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2341	}
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2342
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2343	out_dio:
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2344	/* buffered aio wouldn't have proper lock coverage today */
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2345	BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2346
Tao Ma	60c4867	2010-02-03 09:56:04 +0800	[diff] [blame]	2347	if (((file->f_flags & O_DSYNC) && !direct_io) \|\| IS_SYNC(inode) \|\|
Tristan Ye	81c8c82	2010-08-19 15:15:00 +0800	[diff] [blame]	2348	((file->f_flags & O_DIRECT) && !direct_io)) {
Jan Kara	918941a	2009-08-17 18:50:08 +0200	[diff] [blame]	2349	ret = filemap_fdatawrite_range(file->f_mapping, pos,
				2350	pos + count - 1);
				2351	if (ret < 0)
				2352	written = ret;
				2353
Coly Li	a03ab78	2010-03-26 05:15:12 +0800	[diff] [blame]	2354	if (!ret && ((old_size != i_size_read(inode)) \|\|
				2355	(old_clusters != OCFS2_I(inode)->ip_clusters) \|\|
				2356	has_refcount)) {
Joel Becker	2b4e30f	2008-09-03 20:03:41 -0700	[diff] [blame]	2357	ret = jbd2_journal_force_commit(osb->journal->j_journal);
Mark Fasheh	9ea2d32	2007-10-18 14:14:45 -0700	[diff] [blame]	2358	if (ret < 0)
				2359	written = ret;
				2360	}
Jan Kara	918941a	2009-08-17 18:50:08 +0200	[diff] [blame]	2361
				2362	if (!ret)
				2363	ret = filemap_fdatawait_range(file->f_mapping, pos,
				2364	pos + count - 1);
Mark Fasheh	9ea2d32	2007-10-18 14:14:45 -0700	[diff] [blame]	2365	}
				2366
Sunil Mushran	2bd6321	2010-01-25 16:57:38 -0800	[diff] [blame]	2367	/*
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2368	* deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io
				2369	* function pointer which is called when o_direct io completes so that
				2370	* it can unlock our rw lock. (it's the clustered equivalent of
				2371	* i_alloc_sem; protects truncate from racing with pending ios).
				2372	* Unfortunately there are error cases which call end_io and others
				2373	* that don't. so we don't have to unlock the rw_lock if either an
				2374	* async dio is going to do it in the future or an end_io after an
				2375	* error has already done it.
				2376	*/
Coly Li	66b116c	2010-02-25 14:57:13 +0800	[diff] [blame]	2377	if ((ret == -EIOCBQUEUED) \|\| (!ocfs2_iocb_is_rw_locked(iocb))) {
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2378	rw_level = -1;
				2379	have_alloc_sem = 0;
				2380	}
				2381
				2382	out:
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2383	if (rw_level != -1)
				2384	ocfs2_rw_unlock(inode, rw_level);
				2385
				2386	out_sems:
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2387	if (have_alloc_sem)
				2388	up_read(&inode->i_alloc_sem);
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2389
Jes Sorensen	1b1dcc1	2006-01-09 15:59:24 -0800	[diff] [blame]	2390	mutex_unlock(&inode->i_mutex);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2391
Wengang Wang	812e7a6	2009-07-10 13:26:04 +0800	[diff] [blame]	2392	if (written)
				2393	ret = written;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2394	mlog_exit(ret);
Wengang Wang	812e7a6	2009-07-10 13:26:04 +0800	[diff] [blame]	2395	return ret;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2396	}
				2397
Miklos Szeredi	328eaab	2009-04-14 19:48:39 +0200	[diff] [blame]	2398	static int ocfs2_splice_to_file(struct pipe_inode_info *pipe,
				2399	struct file *out,
				2400	struct splice_desc *sd)
				2401	{
				2402	int ret;
				2403
Tao Ma	b890823	2010-08-12 10:27:14 +0800	[diff] [blame]	2404	ret = ocfs2_prepare_inode_for_write(out, &sd->pos,
Tao Ma	86470e9	2009-12-03 21:55:05 +0800	[diff] [blame]	2405	sd->total_len, 0, NULL, NULL);
Miklos Szeredi	328eaab	2009-04-14 19:48:39 +0200	[diff] [blame]	2406	if (ret < 0) {
				2407	mlog_errno(ret);
				2408	return ret;
				2409	}
				2410
				2411	return splice_from_pipe_feed(pipe, sd, pipe_to_file);
				2412	}
				2413
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2414	static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
				2415	struct file *out,
				2416	loff_t *ppos,
				2417	size_t len,
				2418	unsigned int flags)
				2419	{
				2420	int ret;
Miklos Szeredi	328eaab	2009-04-14 19:48:39 +0200	[diff] [blame]	2421	struct address_space *mapping = out->f_mapping;
				2422	struct inode *inode = mapping->host;
				2423	struct splice_desc sd = {
				2424	.total_len = len,
				2425	.flags = flags,
				2426	.pos = *ppos,
				2427	.u.file = out,
				2428	};
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2429
				2430	mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", out, pipe,
				2431	(unsigned int)len,
Josef Sipek	d28c917	2006-12-08 02:37:25 -0800	[diff] [blame]	2432	out->f_path.dentry->d_name.len,
				2433	out->f_path.dentry->d_name.name);
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2434
Miklos Szeredi	7bfac9e	2009-04-06 17:41:00 +0200	[diff] [blame]	2435	if (pipe->inode)
Miklos Szeredi	328eaab	2009-04-14 19:48:39 +0200	[diff] [blame]	2436	mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_PARENT);
				2437
				2438	splice_from_pipe_begin(&sd);
				2439	do {
				2440	ret = splice_from_pipe_next(pipe, &sd);
				2441	if (ret <= 0)
				2442	break;
				2443
				2444	mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
				2445	ret = ocfs2_rw_lock(inode, 1);
				2446	if (ret < 0)
				2447	mlog_errno(ret);
				2448	else {
				2449	ret = ocfs2_splice_to_file(pipe, out, &sd);
				2450	ocfs2_rw_unlock(inode, 1);
				2451	}
				2452	mutex_unlock(&inode->i_mutex);
				2453	} while (ret > 0);
				2454	splice_from_pipe_end(pipe, &sd);
				2455
Miklos Szeredi	7bfac9e	2009-04-06 17:41:00 +0200	[diff] [blame]	2456	if (pipe->inode)
				2457	mutex_unlock(&pipe->inode->i_mutex);
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2458
Miklos Szeredi	328eaab	2009-04-14 19:48:39 +0200	[diff] [blame]	2459	if (sd.num_spliced)
				2460	ret = sd.num_spliced;
				2461
				2462	if (ret > 0) {
				2463	unsigned long nr_pages;
Jan Kara	d23c937	2009-08-18 18:24:31 +0200	[diff] [blame]	2464	int err;
Miklos Szeredi	328eaab	2009-04-14 19:48:39 +0200	[diff] [blame]	2465
Miklos Szeredi	328eaab	2009-04-14 19:48:39 +0200	[diff] [blame]	2466	nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
				2467
Jan Kara	d23c937	2009-08-18 18:24:31 +0200	[diff] [blame]	2468	err = generic_write_sync(out, *ppos, ret);
				2469	if (err)
				2470	ret = err;
				2471	else
				2472	*ppos += ret;
Miklos Szeredi	328eaab	2009-04-14 19:48:39 +0200	[diff] [blame]	2473
Miklos Szeredi	328eaab	2009-04-14 19:48:39 +0200	[diff] [blame]	2474	balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
				2475	}
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2476
				2477	mlog_exit(ret);
				2478	return ret;
				2479	}
				2480
				2481	static ssize_t ocfs2_file_splice_read(struct file *in,
				2482	loff_t *ppos,
				2483	struct pipe_inode_info *pipe,
				2484	size_t len,
				2485	unsigned int flags)
				2486	{
Tao Ma	1962f39	2009-06-19 15:36:52 +0800	[diff] [blame]	2487	int ret = 0, lock_level = 0;
Josef Sipek	d28c917	2006-12-08 02:37:25 -0800	[diff] [blame]	2488	struct inode *inode = in->f_path.dentry->d_inode;
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2489
				2490	mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", in, pipe,
				2491	(unsigned int)len,
Josef Sipek	d28c917	2006-12-08 02:37:25 -0800	[diff] [blame]	2492	in->f_path.dentry->d_name.len,
				2493	in->f_path.dentry->d_name.name);
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2494
				2495	/*
				2496	* See the comment in ocfs2_file_aio_read()
				2497	*/
Tao Ma	1962f39	2009-06-19 15:36:52 +0800	[diff] [blame]	2498	ret = ocfs2_inode_lock_atime(inode, in->f_vfsmnt, &lock_level);
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2499	if (ret < 0) {
				2500	mlog_errno(ret);
				2501	goto bail;
				2502	}
Tao Ma	1962f39	2009-06-19 15:36:52 +0800	[diff] [blame]	2503	ocfs2_inode_unlock(inode, lock_level);
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2504
				2505	ret = generic_file_splice_read(in, ppos, pipe, len, flags);
				2506
				2507	bail:
				2508	mlog_exit(ret);
				2509	return ret;
				2510	}
				2511
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2512	static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
Badari Pulavarty	027445c	2006-09-30 23:28:46 -0700	[diff] [blame]	2513	const struct iovec *iov,
				2514	unsigned long nr_segs,
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2515	loff_t pos)
				2516	{
Tiger Yang	25899de	2006-11-15 15:49:02 +0800	[diff] [blame]	2517	int ret = 0, rw_level = -1, have_alloc_sem = 0, lock_level = 0;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2518	struct file *filp = iocb->ki_filp;
Josef Sipek	d28c917	2006-12-08 02:37:25 -0800	[diff] [blame]	2519	struct inode *inode = filp->f_path.dentry->d_inode;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2520
Badari Pulavarty	027445c	2006-09-30 23:28:46 -0700	[diff] [blame]	2521	mlog_entry("(0x%p, %u, '%.*s')\n", filp,
				2522	(unsigned int)nr_segs,
Josef Sipek	d28c917	2006-12-08 02:37:25 -0800	[diff] [blame]	2523	filp->f_path.dentry->d_name.len,
				2524	filp->f_path.dentry->d_name.name);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2525
				2526	if (!inode) {
				2527	ret = -EINVAL;
				2528	mlog_errno(ret);
				2529	goto bail;
				2530	}
				2531
Sunil Mushran	2bd6321	2010-01-25 16:57:38 -0800	[diff] [blame]	2532	/*
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2533	* buffered reads protect themselves in ->readpage(). O_DIRECT reads
				2534	* need locks to protect pending reads from racing with truncate.
				2535	*/
				2536	if (filp->f_flags & O_DIRECT) {
				2537	down_read(&inode->i_alloc_sem);
				2538	have_alloc_sem = 1;
				2539
				2540	ret = ocfs2_rw_lock(inode, 0);
				2541	if (ret < 0) {
				2542	mlog_errno(ret);
				2543	goto bail;
				2544	}
				2545	rw_level = 0;
				2546	/* communicate with ocfs2_dio_end_io */
Mark Fasheh	7cdfc3a	2007-04-16 17:28:51 -0700	[diff] [blame]	2547	ocfs2_iocb_set_rw_locked(iocb, rw_level);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2548	}
				2549
Mark Fasheh	c4374f8	2006-05-05 19:04:35 -0700	[diff] [blame]	2550	/*
				2551	* We're fine letting folks race truncates and extending
				2552	* writes with read across the cluster, just like they can
				2553	* locally. Hence no rw_lock during read.
Sunil Mushran	2bd6321	2010-01-25 16:57:38 -0800	[diff] [blame]	2554	*
Mark Fasheh	c4374f8	2006-05-05 19:04:35 -0700	[diff] [blame]	2555	* Take and drop the meta data lock to update inode fields
				2556	* like i_size. This allows the checks down below
Sunil Mushran	2bd6321	2010-01-25 16:57:38 -0800	[diff] [blame]	2557	* generic_file_aio_read() a chance of actually working.
Mark Fasheh	c4374f8	2006-05-05 19:04:35 -0700	[diff] [blame]	2558	*/
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	2559	ret = ocfs2_inode_lock_atime(inode, filp->f_vfsmnt, &lock_level);
Mark Fasheh	c4374f8	2006-05-05 19:04:35 -0700	[diff] [blame]	2560	if (ret < 0) {
				2561	mlog_errno(ret);
				2562	goto bail;
				2563	}
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	2564	ocfs2_inode_unlock(inode, lock_level);
Mark Fasheh	c4374f8	2006-05-05 19:04:35 -0700	[diff] [blame]	2565
Badari Pulavarty	027445c	2006-09-30 23:28:46 -0700	[diff] [blame]	2566	ret = generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2567	if (ret == -EINVAL)
Sunil Mushran	56753bd	2008-06-09 11:24:41 -0700	[diff] [blame]	2568	mlog(0, "generic_file_aio_read returned -EINVAL\n");
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2569
				2570	/* buffered aio wouldn't have proper lock coverage today */
				2571	BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT));
				2572
				2573	/* see ocfs2_file_aio_write */
				2574	if (ret == -EIOCBQUEUED \|\| !ocfs2_iocb_is_rw_locked(iocb)) {
				2575	rw_level = -1;
				2576	have_alloc_sem = 0;
				2577	}
				2578
				2579	bail:
				2580	if (have_alloc_sem)
				2581	up_read(&inode->i_alloc_sem);
Sunil Mushran	2bd6321	2010-01-25 16:57:38 -0800	[diff] [blame]	2582	if (rw_level != -1)
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2583	ocfs2_rw_unlock(inode, rw_level);
				2584	mlog_exit(ret);
				2585
				2586	return ret;
				2587	}
				2588
Arjan van de Ven	92e1d5b	2007-02-12 00:55:39 -0800	[diff] [blame]	2589	const struct inode_operations ocfs2_file_iops = {
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2590	.setattr = ocfs2_setattr,
				2591	.getattr = ocfs2_getattr,
Tiger Yang	d38eb8d	2006-11-27 09:59:21 +0800	[diff] [blame]	2592	.permission = ocfs2_permission,
Tiger Yang	cf1d6c7	2008-08-18 17:11:00 +0800	[diff] [blame]	2593	.setxattr = generic_setxattr,
				2594	.getxattr = generic_getxattr,
				2595	.listxattr = ocfs2_listxattr,
				2596	.removexattr = generic_removexattr,
Mark Fasheh	385820a	2007-07-19 00:14:38 -0700	[diff] [blame]	2597	.fallocate = ocfs2_fallocate,
Mark Fasheh	00dc417	2008-10-03 17:32:11 -0400	[diff] [blame]	2598	.fiemap = ocfs2_fiemap,
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2599	};
				2600
Arjan van de Ven	92e1d5b	2007-02-12 00:55:39 -0800	[diff] [blame]	2601	const struct inode_operations ocfs2_special_file_iops = {
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2602	.setattr = ocfs2_setattr,
				2603	.getattr = ocfs2_getattr,
Tiger Yang	d38eb8d	2006-11-27 09:59:21 +0800	[diff] [blame]	2604	.permission = ocfs2_permission,
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2605	};
				2606
Mark Fasheh	53da493	2008-07-21 14:29:16 -0700	[diff] [blame]	2607	/*
				2608	* Other than ->lock, keep ocfs2_fops and ocfs2_dops in sync with
				2609	* ocfs2_fops_no_plocks and ocfs2_dops_no_plocks!
				2610	*/
Arjan van de Ven	4b6f5d2	2006-03-28 01:56:42 -0800	[diff] [blame]	2611	const struct file_operations ocfs2_fops = {
Jan Kara	32c3c0e	2007-12-19 15:24:52 +0100	[diff] [blame]	2612	.llseek = generic_file_llseek,
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2613	.read = do_sync_read,
				2614	.write = do_sync_write,
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2615	.mmap = ocfs2_mmap,
				2616	.fsync = ocfs2_sync_file,
				2617	.release = ocfs2_file_release,
				2618	.open = ocfs2_file_open,
				2619	.aio_read = ocfs2_file_aio_read,
				2620	.aio_write = ocfs2_file_aio_write,
Andi Kleen	c9ec148	2008-01-27 03:17:17 +0100	[diff] [blame]	2621	.unlocked_ioctl = ocfs2_ioctl,
Mark Fasheh	586d232	2007-03-09 15:56:28 -0800	[diff] [blame]	2622	#ifdef CONFIG_COMPAT
				2623	.compat_ioctl = ocfs2_compat_ioctl,
				2624	#endif
Mark Fasheh	53da493	2008-07-21 14:29:16 -0700	[diff] [blame]	2625	.lock = ocfs2_lock,
				2626	.flock = ocfs2_flock,
				2627	.splice_read = ocfs2_file_splice_read,
				2628	.splice_write = ocfs2_file_splice_write,
				2629	};
				2630
				2631	const struct file_operations ocfs2_dops = {
				2632	.llseek = generic_file_llseek,
				2633	.read = generic_read_dir,
				2634	.readdir = ocfs2_readdir,
				2635	.fsync = ocfs2_sync_file,
				2636	.release = ocfs2_dir_release,
				2637	.open = ocfs2_dir_open,
				2638	.unlocked_ioctl = ocfs2_ioctl,
				2639	#ifdef CONFIG_COMPAT
				2640	.compat_ioctl = ocfs2_compat_ioctl,
				2641	#endif
				2642	.lock = ocfs2_lock,
				2643	.flock = ocfs2_flock,
				2644	};
				2645
				2646	/*
				2647	* POSIX-lockless variants of our file_operations.
				2648	*
				2649	* These will be used if the underlying cluster stack does not support
				2650	* posix file locking, if the user passes the "localflocks" mount
				2651	* option, or if we have a local-only fs.
				2652	*
				2653	* ocfs2_flock is in here because all stacks handle UNIX file locks,
				2654	* so we still want it in the case of no stack support for
				2655	* plocks. Internally, it will do the right thing when asked to ignore
				2656	* the cluster.
				2657	*/
				2658	const struct file_operations ocfs2_fops_no_plocks = {
				2659	.llseek = generic_file_llseek,
				2660	.read = do_sync_read,
				2661	.write = do_sync_write,
				2662	.mmap = ocfs2_mmap,
				2663	.fsync = ocfs2_sync_file,
				2664	.release = ocfs2_file_release,
				2665	.open = ocfs2_file_open,
				2666	.aio_read = ocfs2_file_aio_read,
				2667	.aio_write = ocfs2_file_aio_write,
				2668	.unlocked_ioctl = ocfs2_ioctl,
				2669	#ifdef CONFIG_COMPAT
				2670	.compat_ioctl = ocfs2_compat_ioctl,
				2671	#endif
Mark Fasheh	53fc622	2007-12-20 16:49:04 -0800	[diff] [blame]	2672	.flock = ocfs2_flock,
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2673	.splice_read = ocfs2_file_splice_read,
				2674	.splice_write = ocfs2_file_splice_write,
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2675	};
				2676
Mark Fasheh	53da493	2008-07-21 14:29:16 -0700	[diff] [blame]	2677	const struct file_operations ocfs2_dops_no_plocks = {
Jan Kara	32c3c0e	2007-12-19 15:24:52 +0100	[diff] [blame]	2678	.llseek = generic_file_llseek,
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2679	.read = generic_read_dir,
				2680	.readdir = ocfs2_readdir,
				2681	.fsync = ocfs2_sync_file,
Mark Fasheh	53fc622	2007-12-20 16:49:04 -0800	[diff] [blame]	2682	.release = ocfs2_dir_release,
				2683	.open = ocfs2_dir_open,
Andi Kleen	c9ec148	2008-01-27 03:17:17 +0100	[diff] [blame]	2684	.unlocked_ioctl = ocfs2_ioctl,
Mark Fasheh	586d232	2007-03-09 15:56:28 -0800	[diff] [blame]	2685	#ifdef CONFIG_COMPAT
				2686	.compat_ioctl = ocfs2_compat_ioctl,
				2687	#endif
Mark Fasheh	53fc622	2007-12-20 16:49:04 -0800	[diff] [blame]	2688	.flock = ocfs2_flock,
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2689	};