vfs: don't allow writes to swap files

Don't let userspace write to an active swap file because the kernel
effectively has a long term lease on the storage and things could get
seriously corrupted if we let this happen.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 07e46b7..ff1479a 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1704,6 +1704,9 @@ ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	if (bdev_read_only(I_BDEV(bd_inode)))
 		return -EPERM;
 
+	if (IS_SWAPFILE(bd_inode))
+		return -ETXTBSY;
+
 	if (!iov_iter_count(from))
 		return 0;
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5904004..0c07020 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3238,4 +3238,15 @@ extern void inode_nohighmem(struct inode *inode);
 int vfs_ioc_setflags_prepare(struct inode *inode, unsigned int oldflags,
 			     unsigned int flags);
 
+/*
+ * Flush file data before changing attributes.  Caller must hold any locks
+ * required to prevent further writes to this file until we're done setting
+ * flags.
+ */
+static inline int inode_drain_writes(struct inode *inode)
+{
+	inode_dio_wait(inode);
+	return filemap_write_and_wait(inode->i_mapping);
+}
+
 #endif /* _LINUX_FS_H */
diff --git a/mm/filemap.c b/mm/filemap.c
index 2fc6466..f527714 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2550,6 +2550,9 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
 	unsigned long limit = rlimit(RLIMIT_FSIZE);
 	loff_t pos;
 
+	if (IS_SWAPFILE(inode))
+		return -ETXTBSY;
+
 	if (!iov_iter_count(from))
 		return 0;
 
diff --git a/mm/memory.c b/mm/memory.c
index e6a5a1f..ab3f604 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2051,6 +2051,10 @@ static int do_page_mkwrite(struct vm_area_struct *vma, struct page *page,
 	vmf.page = page;
 	vmf.cow_page = NULL;
 
+	if (vma->vm_file &&
+	    IS_SWAPFILE(vma->vm_file->f_mapping->host))
+		return VM_FAULT_SIGBUS;
+
 	ret = vma->vm_ops->page_mkwrite(vma, &vmf);
 	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))
 		return ret;
diff --git a/mm/mmap.c b/mm/mmap.c
index 145d3d5..6f3dabc 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1386,8 +1386,12 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
 
 		switch (flags & MAP_TYPE) {
 		case MAP_SHARED:
-			if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))
-				return -EACCES;
+			if (prot & PROT_WRITE) {
+				if (!(file->f_mode & FMODE_WRITE))
+					return -EACCES;
+				if (IS_SWAPFILE(file->f_mapping->host))
+					return -ETXTBSY;
+			}
 
 			/*
 			 * Make sure we don't allow writing to an append-only
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 510f5bb..b03b0b0 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2539,6 +2539,17 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 		}
 	}
 
+	/*
+	 * Flush any pending IO and dirty mappings before we start using this
+	 * swap device.
+	 */
+	inode->i_flags |= S_SWAPFILE;
+	error = inode_drain_writes(inode);
+	if (error) {
+		inode->i_flags &= ~S_SWAPFILE;
+		goto bad_swap;
+	}
+
 	mutex_lock(&swapon_mutex);
 	prio = -1;
 	if (swap_flags & SWAP_FLAG_PREFER)
@@ -2559,7 +2570,6 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	atomic_inc(&proc_poll_event);
 	wake_up_interruptible(&proc_poll_wait);
 
-	inode->i_flags |= S_SWAPFILE;
 	error = 0;
 	goto out;
 bad_swap: