Blame - fs/btrfs/verity.c - SHIFTPHONES/mainline/linux

blob: 90eb5c2830a9273c9e3c3c717b5243b79224343f [file] [log] [blame]

Boris Burkov	1460540	2021-06-30 13:01:49 -0700	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
				2
				3	#include <linux/init.h>
				4	#include <linux/fs.h>
				5	#include <linux/slab.h>
				6	#include <linux/rwsem.h>
				7	#include <linux/xattr.h>
				8	#include <linux/security.h>
				9	#include <linux/posix_acl_xattr.h>
				10	#include <linux/iversion.h>
				11	#include <linux/fsverity.h>
				12	#include <linux/sched/mm.h>
				13	#include "ctree.h"
				14	#include "btrfs_inode.h"
				15	#include "transaction.h"
				16	#include "disk-io.h"
				17	#include "locking.h"
				18
				19	/*
				20	* Implementation of the interface defined in struct fsverity_operations.
				21	*
				22	* The main question is how and where to store the verity descriptor and the
				23	* Merkle tree. We store both in dedicated btree items in the filesystem tree,
				24	* together with the rest of the inode metadata. This means we'll need to do
				25	* extra work to encrypt them once encryption is supported in btrfs, but btrfs
				26	* has a lot of careful code around i_size and it seems better to make a new key
				27	* type than try and adjust all of our expectations for i_size.
				28	*
				29	* Note that this differs from the implementation in ext4 and f2fs, where
				30	* this data is stored as if it were in the file, but past EOF. However, btrfs
				31	* does not have a widespread mechanism for caching opaque metadata pages, so we
				32	* do pretend that the Merkle tree pages themselves are past EOF for the
				33	* purposes of caching them (as opposed to creating a virtual inode).
				34	*
				35	* fs verity items are stored under two different key types on disk.
				36	* The descriptor items:
				37	* [ inode objectid, BTRFS_VERITY_DESC_ITEM_KEY, offset ]
				38	*
				39	* At offset 0, we store a btrfs_verity_descriptor_item which tracks the
				40	* size of the descriptor item and some extra data for encryption.
				41	* Starting at offset 1, these hold the generic fs verity descriptor.
				42	* The latter are opaque to btrfs, we just read and write them as a blob for
				43	* the higher level verity code. The most common descriptor size is 256 bytes.
				44	*
				45	* The merkle tree items:
				46	* [ inode objectid, BTRFS_VERITY_MERKLE_ITEM_KEY, offset ]
				47	*
				48	* These also start at offset 0, and correspond to the merkle tree bytes.
				49	* So when fsverity asks for page 0 of the merkle tree, we pull up one page
				50	* starting at offset 0 for this key type. These are also opaque to btrfs,
				51	* we're blindly storing whatever fsverity sends down.
Boris Burkov	7052425	2021-06-30 13:01:50 -0700	[diff] [blame]	52	*
				53	* Another important consideration is the fact that the Merkle tree data scales
				54	* linearly with the size of the file (with 4K pages/blocks and SHA-256, it's
				55	* ~1/127th the size) so for large files, writing the tree can be a lengthy
				56	* operation. For that reason, we guard the whole enable verity operation
				57	* (between begin_enable_verity and end_enable_verity) with an orphan item.
				58	* Again, because the data can be pretty large, it's quite possible that we
				59	* could run out of space writing it, so we try our best to handle errors by
				60	* stopping and rolling back rather than aborting the victim transaction.
Boris Burkov	1460540	2021-06-30 13:01:49 -0700	[diff] [blame]	61	*/
				62
				63	#define MERKLE_START_ALIGN 65536
				64
				65	/*
				66	* Compute the logical file offset where we cache the Merkle tree.
				67	*
				68	* @inode: inode of the verity file
				69	*
				70	* For the purposes of caching the Merkle tree pages, as required by
				71	* fs-verity, it is convenient to do size computations in terms of a file
				72	* offset, rather than in terms of page indices.
				73	*
				74	* Use 64K to be sure it's past the last page in the file, even with 64K pages.
				75	* That rounding operation itself can overflow loff_t, so we do it in u64 and
				76	* check.
				77	*
				78	* Returns the file offset on success, negative error code on failure.
				79	*/
				80	static loff_t merkle_file_pos(const struct inode *inode)
				81	{
				82	u64 sz = inode->i_size;
				83	u64 rounded = round_up(sz, MERKLE_START_ALIGN);
				84
				85	if (rounded > inode->i_sb->s_maxbytes)
				86	return -EFBIG;
				87
				88	return rounded;
				89	}
				90
				91	/*
				92	* Drop all the items for this inode with this key_type.
				93	*
				94	* @inode: inode to drop items for
				95	* @key_type: type of items to drop (BTRFS_VERITY_DESC_ITEM or
				96	* BTRFS_VERITY_MERKLE_ITEM)
				97	*
				98	* Before doing a verity enable we cleanup any existing verity items.
				99	* This is also used to clean up if a verity enable failed half way through.
				100	*
				101	* Returns number of dropped items on success, negative error code on failure.
				102	*/
				103	static int drop_verity_items(struct btrfs_inode *inode, u8 key_type)
				104	{
				105	struct btrfs_trans_handle *trans;
				106	struct btrfs_root *root = inode->root;
				107	struct btrfs_path *path;
				108	struct btrfs_key key;
				109	int count = 0;
				110	int ret;
				111
				112	path = btrfs_alloc_path();
				113	if (!path)
				114	return -ENOMEM;
				115
				116	while (1) {
				117	/* 1 for the item being dropped */
				118	trans = btrfs_start_transaction(root, 1);
				119	if (IS_ERR(trans)) {
				120	ret = PTR_ERR(trans);
				121	goto out;
				122	}
				123
				124	/*
				125	* Walk backwards through all the items until we find one that
				126	* isn't from our key type or objectid
				127	*/
				128	key.objectid = btrfs_ino(inode);
				129	key.type = key_type;
				130	key.offset = (u64)-1;
				131
				132	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
				133	if (ret > 0) {
				134	ret = 0;
				135	/* No more keys of this type, we're done */
				136	if (path->slots[0] == 0)
				137	break;
				138	path->slots[0]--;
				139	} else if (ret < 0) {
				140	btrfs_end_transaction(trans);
				141	goto out;
				142	}
				143
				144	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
				145
				146	/* No more keys of this type, we're done */
				147	if (key.objectid != btrfs_ino(inode) \|\| key.type != key_type)
				148	break;
				149
				150	/*
				151	* This shouldn't be a performance sensitive function because
				152	* it's not used as part of truncate. If it ever becomes
				153	* perf sensitive, change this to walk forward and bulk delete
				154	* items
				155	*/
				156	ret = btrfs_del_items(trans, root, path, path->slots[0], 1);
				157	if (ret) {
				158	btrfs_end_transaction(trans);
				159	goto out;
				160	}
				161	count++;
				162	btrfs_release_path(path);
				163	btrfs_end_transaction(trans);
				164	}
				165	ret = count;
				166	btrfs_end_transaction(trans);
				167	out:
				168	btrfs_free_path(path);
				169	return ret;
				170	}
				171
				172	/*
				173	* Drop all verity items
				174	*
				175	* @inode: inode to drop verity items for
				176	*
				177	* In most contexts where we are dropping verity items, we want to do it for all
				178	* the types of verity items, not a particular one.
				179	*
				180	* Returns: 0 on success, negative error code on failure.
				181	*/
				182	int btrfs_drop_verity_items(struct btrfs_inode *inode)
				183	{
				184	int ret;
				185
				186	ret = drop_verity_items(inode, BTRFS_VERITY_DESC_ITEM_KEY);
				187	if (ret < 0)
				188	return ret;
				189	ret = drop_verity_items(inode, BTRFS_VERITY_MERKLE_ITEM_KEY);
				190	if (ret < 0)
				191	return ret;
				192
				193	return 0;
				194	}
				195
				196	/*
				197	* Insert and write inode items with a given key type and offset.
				198	*
				199	* @inode: inode to insert for
				200	* @key_type: key type to insert
				201	* @offset: item offset to insert at
				202	* @src: source data to write
				203	* @len: length of source data to write
				204	*
				205	* Write len bytes from src into items of up to 2K length.
				206	* The inserted items will have key (ino, key_type, offset + off) where off is
				207	* consecutively increasing from 0 up to the last item ending at offset + len.
				208	*
				209	* Returns 0 on success and a negative error code on failure.
				210	*/
				211	static int write_key_bytes(struct btrfs_inode *inode, u8 key_type, u64 offset,
				212	const char *src, u64 len)
				213	{
				214	struct btrfs_trans_handle *trans;
				215	struct btrfs_path *path;
				216	struct btrfs_root *root = inode->root;
				217	struct extent_buffer *leaf;
				218	struct btrfs_key key;
				219	unsigned long copy_bytes;
				220	unsigned long src_offset = 0;
				221	void *data;
				222	int ret = 0;
				223
				224	path = btrfs_alloc_path();
				225	if (!path)
				226	return -ENOMEM;
				227
				228	while (len > 0) {
				229	/* 1 for the new item being inserted */
				230	trans = btrfs_start_transaction(root, 1);
				231	if (IS_ERR(trans)) {
				232	ret = PTR_ERR(trans);
				233	break;
				234	}
				235
				236	key.objectid = btrfs_ino(inode);
				237	key.type = key_type;
				238	key.offset = offset;
				239
				240	/*
				241	* Insert 2K at a time mostly to be friendly for smaller leaf
				242	* size filesystems
				243	*/
				244	copy_bytes = min_t(u64, len, 2048);
				245
				246	ret = btrfs_insert_empty_item(trans, root, path, &key, copy_bytes);
				247	if (ret) {
				248	btrfs_end_transaction(trans);
				249	break;
				250	}
				251
				252	leaf = path->nodes[0];
				253
				254	data = btrfs_item_ptr(leaf, path->slots[0], void);
				255	write_extent_buffer(leaf, src + src_offset,
				256	(unsigned long)data, copy_bytes);
				257	offset += copy_bytes;
				258	src_offset += copy_bytes;
				259	len -= copy_bytes;
				260
				261	btrfs_release_path(path);
				262	btrfs_end_transaction(trans);
				263	}
				264
				265	btrfs_free_path(path);
				266	return ret;
				267	}
				268
				269	/*
				270	* Read inode items of the given key type and offset from the btree.
				271	*
				272	* @inode: inode to read items of
				273	* @key_type: key type to read
				274	* @offset: item offset to read from
				275	* @dest: Buffer to read into. This parameter has slightly tricky
				276	* semantics. If it is NULL, the function will not do any copying
				277	* and will just return the size of all the items up to len bytes.
				278	* If dest_page is passed, then the function will kmap_local the
				279	* page and ignore dest, but it must still be non-NULL to avoid the
				280	* counting-only behavior.
				281	* @len: length in bytes to read
				282	* @dest_page: copy into this page instead of the dest buffer
				283	*
				284	* Helper function to read items from the btree. This returns the number of
				285	* bytes read or < 0 for errors. We can return short reads if the items don't
				286	* exist on disk or aren't big enough to fill the desired length. Supports
				287	* reading into a provided buffer (dest) or into the page cache
				288	*
				289	* Returns number of bytes read or a negative error code on failure.
				290	*/
				291	static int read_key_bytes(struct btrfs_inode *inode, u8 key_type, u64 offset,
				292	char dest, u64 len, struct page dest_page)
				293	{
				294	struct btrfs_path *path;
				295	struct btrfs_root *root = inode->root;
				296	struct extent_buffer *leaf;
				297	struct btrfs_key key;
				298	u64 item_end;
				299	u64 copy_end;
				300	int copied = 0;
				301	u32 copy_offset;
				302	unsigned long copy_bytes;
				303	unsigned long dest_offset = 0;
				304	void *data;
				305	char *kaddr = dest;
				306	int ret;
				307
				308	path = btrfs_alloc_path();
				309	if (!path)
				310	return -ENOMEM;
				311
				312	if (dest_page)
				313	path->reada = READA_FORWARD;
				314
				315	key.objectid = btrfs_ino(inode);
				316	key.type = key_type;
				317	key.offset = offset;
				318
				319	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
				320	if (ret < 0) {
				321	goto out;
				322	} else if (ret > 0) {
				323	ret = 0;
				324	if (path->slots[0] == 0)
				325	goto out;
				326	path->slots[0]--;
				327	}
				328
				329	while (len > 0) {
				330	leaf = path->nodes[0];
				331	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
				332
				333	if (key.objectid != btrfs_ino(inode) \|\| key.type != key_type)
				334	break;
				335
Josef Bacik	3212fa1	2021-10-21 14:58:35 -0400	[diff] [blame]	336	item_end = btrfs_item_size(leaf, path->slots[0]) + key.offset;
Boris Burkov	1460540	2021-06-30 13:01:49 -0700	[diff] [blame]	337
				338	if (copied > 0) {
				339	/*
				340	* Once we've copied something, we want all of the items
				341	* to be sequential
				342	*/
				343	if (key.offset != offset)
				344	break;
				345	} else {
				346	/*
				347	* Our initial offset might be in the middle of an
				348	* item. Make sure it all makes sense.
				349	*/
				350	if (key.offset > offset)
				351	break;
				352	if (item_end <= offset)
				353	break;
				354	}
				355
				356	/* desc = NULL to just sum all the item lengths */
				357	if (!dest)
				358	copy_end = item_end;
				359	else
				360	copy_end = min(offset + len, item_end);
				361
				362	/* Number of bytes in this item we want to copy */
				363	copy_bytes = copy_end - offset;
				364
				365	/* Offset from the start of item for copying */
				366	copy_offset = offset - key.offset;
				367
				368	if (dest) {
				369	if (dest_page)
				370	kaddr = kmap_local_page(dest_page);
				371
				372	data = btrfs_item_ptr(leaf, path->slots[0], void);
				373	read_extent_buffer(leaf, kaddr + dest_offset,
				374	(unsigned long)data + copy_offset,
				375	copy_bytes);
				376
				377	if (dest_page)
				378	kunmap_local(kaddr);
				379	}
				380
				381	offset += copy_bytes;
				382	dest_offset += copy_bytes;
				383	len -= copy_bytes;
				384	copied += copy_bytes;
				385
				386	path->slots[0]++;
				387	if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
				388	/*
				389	* We've reached the last slot in this leaf and we need
				390	* to go to the next leaf.
				391	*/
				392	ret = btrfs_next_leaf(root, path);
				393	if (ret < 0) {
				394	break;
				395	} else if (ret > 0) {
				396	ret = 0;
				397	break;
				398	}
				399	}
				400	}
				401	out:
				402	btrfs_free_path(path);
				403	if (!ret)
				404	ret = copied;
				405	return ret;
				406	}
				407
				408	/*
Boris Burkov	7052425	2021-06-30 13:01:50 -0700	[diff] [blame]	409	* Delete an fsverity orphan
				410	*
				411	* @trans: transaction to do the delete in
				412	* @inode: inode to orphan
				413	*
				414	* Capture verity orphan specific logic that is repeated in the couple places
				415	* we delete verity orphans. Specifically, handling ENOENT and ignoring inodes
				416	* with 0 links.
				417	*
				418	* Returns zero on success or a negative error code on failure.
				419	*/
				420	static int del_orphan(struct btrfs_trans_handle trans, struct btrfs_inode inode)
				421	{
				422	struct btrfs_root *root = inode->root;
				423	int ret;
				424
				425	/*
				426	* If the inode has no links, it is either already unlinked, or was
				427	* created with O_TMPFILE. In either case, it should have an orphan from
				428	* that other operation. Rather than reference count the orphans, we
				429	* simply ignore them here, because we only invoke the verity path in
				430	* the orphan logic when i_nlink is 1.
				431	*/
				432	if (!inode->vfs_inode.i_nlink)
				433	return 0;
				434
				435	ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode));
				436	if (ret == -ENOENT)
				437	ret = 0;
				438	return ret;
				439	}
				440
				441	/*
Boris Burkov	1460540	2021-06-30 13:01:49 -0700	[diff] [blame]	442	* Rollback in-progress verity if we encounter an error.
				443	*
				444	* @inode: inode verity had an error for
				445	*
				446	* We try to handle recoverable errors while enabling verity by rolling it back
				447	* and just failing the operation, rather than having an fs level error no
				448	* matter what. However, any error in rollback is unrecoverable.
				449	*
				450	* Returns 0 on success, negative error code on failure.
				451	*/
				452	static int rollback_verity(struct btrfs_inode *inode)
				453	{
Filipe Manana	acbee9a	2021-09-08 16:29:26 +0100	[diff] [blame]	454	struct btrfs_trans_handle *trans = NULL;
Boris Burkov	1460540	2021-06-30 13:01:49 -0700	[diff] [blame]	455	struct btrfs_root *root = inode->root;
				456	int ret;
				457
				458	ASSERT(inode_is_locked(&inode->vfs_inode));
				459	truncate_inode_pages(inode->vfs_inode.i_mapping, inode->vfs_inode.i_size);
				460	clear_bit(BTRFS_INODE_VERITY_IN_PROGRESS, &inode->runtime_flags);
				461	ret = btrfs_drop_verity_items(inode);
				462	if (ret) {
				463	btrfs_handle_fs_error(root->fs_info, ret,
				464	"failed to drop verity items in rollback %llu",
				465	(u64)inode->vfs_inode.i_ino);
				466	goto out;
				467	}
				468
Boris Burkov	7052425	2021-06-30 13:01:50 -0700	[diff] [blame]	469	/*
				470	* 1 for updating the inode flag
				471	* 1 for deleting the orphan
				472	*/
				473	trans = btrfs_start_transaction(root, 2);
Boris Burkov	1460540	2021-06-30 13:01:49 -0700	[diff] [blame]	474	if (IS_ERR(trans)) {
				475	ret = PTR_ERR(trans);
Filipe Manana	acbee9a	2021-09-08 16:29:26 +0100	[diff] [blame]	476	trans = NULL;
Boris Burkov	1460540	2021-06-30 13:01:49 -0700	[diff] [blame]	477	btrfs_handle_fs_error(root->fs_info, ret,
				478	"failed to start transaction in verity rollback %llu",
				479	(u64)inode->vfs_inode.i_ino);
				480	goto out;
				481	}
				482	inode->ro_flags &= ~BTRFS_INODE_RO_VERITY;
				483	btrfs_sync_inode_flags_to_i_flags(&inode->vfs_inode);
				484	ret = btrfs_update_inode(trans, root, inode);
				485	if (ret) {
				486	btrfs_abort_transaction(trans, ret);
				487	goto out;
				488	}
Boris Burkov	7052425	2021-06-30 13:01:50 -0700	[diff] [blame]	489	ret = del_orphan(trans, inode);
				490	if (ret) {
				491	btrfs_abort_transaction(trans, ret);
				492	goto out;
				493	}
Boris Burkov	1460540	2021-06-30 13:01:49 -0700	[diff] [blame]	494	out:
Filipe Manana	acbee9a	2021-09-08 16:29:26 +0100	[diff] [blame]	495	if (trans)
				496	btrfs_end_transaction(trans);
Boris Burkov	1460540	2021-06-30 13:01:49 -0700	[diff] [blame]	497	return ret;
				498	}
				499
				500	/*
				501	* Finalize making the file a valid verity file
				502	*
				503	* @inode: inode to be marked as verity
				504	* @desc: contents of the verity descriptor to write (not NULL)
				505	* @desc_size: size of the verity descriptor
				506	*
				507	* Do the actual work of finalizing verity after successfully writing the Merkle
				508	* tree:
				509	*
				510	* - write out the descriptor items
				511	* - mark the inode with the verity flag
Boris Burkov	7052425	2021-06-30 13:01:50 -0700	[diff] [blame]	512	* - delete the orphan item
Boris Burkov	1460540	2021-06-30 13:01:49 -0700	[diff] [blame]	513	* - mark the ro compat bit
				514	* - clear the in progress bit
				515	*
				516	* Returns 0 on success, negative error code on failure.
				517	*/
				518	static int finish_verity(struct btrfs_inode inode, const void desc,
				519	size_t desc_size)
				520	{
				521	struct btrfs_trans_handle *trans = NULL;
				522	struct btrfs_root *root = inode->root;
				523	struct btrfs_verity_descriptor_item item;
				524	int ret;
				525
				526	/* Write out the descriptor item */
				527	memset(&item, 0, sizeof(item));
				528	btrfs_set_stack_verity_descriptor_size(&item, desc_size);
				529	ret = write_key_bytes(inode, BTRFS_VERITY_DESC_ITEM_KEY, 0,
				530	(const char *)&item, sizeof(item));
				531	if (ret)
				532	goto out;
				533
				534	/* Write out the descriptor itself */
				535	ret = write_key_bytes(inode, BTRFS_VERITY_DESC_ITEM_KEY, 1,
				536	desc, desc_size);
				537	if (ret)
				538	goto out;
				539
Boris Burkov	7052425	2021-06-30 13:01:50 -0700	[diff] [blame]	540	/*
				541	* 1 for updating the inode flag
				542	* 1 for deleting the orphan
				543	*/
				544	trans = btrfs_start_transaction(root, 2);
Boris Burkov	1460540	2021-06-30 13:01:49 -0700	[diff] [blame]	545	if (IS_ERR(trans)) {
				546	ret = PTR_ERR(trans);
				547	goto out;
				548	}
				549	inode->ro_flags \|= BTRFS_INODE_RO_VERITY;
				550	btrfs_sync_inode_flags_to_i_flags(&inode->vfs_inode);
				551	ret = btrfs_update_inode(trans, root, inode);
				552	if (ret)
				553	goto end_trans;
Boris Burkov	7052425	2021-06-30 13:01:50 -0700	[diff] [blame]	554	ret = del_orphan(trans, inode);
				555	if (ret)
				556	goto end_trans;
Boris Burkov	1460540	2021-06-30 13:01:49 -0700	[diff] [blame]	557	clear_bit(BTRFS_INODE_VERITY_IN_PROGRESS, &inode->runtime_flags);
				558	btrfs_set_fs_compat_ro(root->fs_info, VERITY);
				559	end_trans:
				560	btrfs_end_transaction(trans);
				561	out:
				562	return ret;
				563
				564	}
				565
				566	/*
				567	* fsverity op that begins enabling verity.
				568	*
				569	* @filp: file to enable verity on
				570	*
Boris Burkov	7052425	2021-06-30 13:01:50 -0700	[diff] [blame]	571	* Begin enabling fsverity for the file. We drop any existing verity items, add
				572	* an orphan and set the in progress bit.
Boris Burkov	1460540	2021-06-30 13:01:49 -0700	[diff] [blame]	573	*
				574	* Returns 0 on success, negative error code on failure.
				575	*/
				576	static int btrfs_begin_enable_verity(struct file *filp)
				577	{
				578	struct btrfs_inode *inode = BTRFS_I(file_inode(filp));
Boris Burkov	7052425	2021-06-30 13:01:50 -0700	[diff] [blame]	579	struct btrfs_root *root = inode->root;
				580	struct btrfs_trans_handle *trans;
Boris Burkov	1460540	2021-06-30 13:01:49 -0700	[diff] [blame]	581	int ret;
				582
				583	ASSERT(inode_is_locked(file_inode(filp)));
				584
				585	if (test_bit(BTRFS_INODE_VERITY_IN_PROGRESS, &inode->runtime_flags))
				586	return -EBUSY;
				587
Boris Burkov	7052425	2021-06-30 13:01:50 -0700	[diff] [blame]	588	/*
				589	* This should almost never do anything, but theoretically, it's
				590	* possible that we failed to enable verity on a file, then were
				591	* interrupted or failed while rolling back, failed to cleanup the
				592	* orphan, and finally attempt to enable verity again.
				593	*/
Boris Burkov	1460540	2021-06-30 13:01:49 -0700	[diff] [blame]	594	ret = btrfs_drop_verity_items(inode);
				595	if (ret)
				596	return ret;
				597
Boris Burkov	7052425	2021-06-30 13:01:50 -0700	[diff] [blame]	598	/* 1 for the orphan item */
				599	trans = btrfs_start_transaction(root, 1);
				600	if (IS_ERR(trans))
				601	return PTR_ERR(trans);
				602
				603	ret = btrfs_orphan_add(trans, inode);
				604	if (!ret)
				605	set_bit(BTRFS_INODE_VERITY_IN_PROGRESS, &inode->runtime_flags);
				606	btrfs_end_transaction(trans);
Boris Burkov	1460540	2021-06-30 13:01:49 -0700	[diff] [blame]	607
				608	return 0;
				609	}
				610
				611	/*
				612	* fsverity op that ends enabling verity.
				613	*
				614	* @filp: file we are finishing enabling verity on
				615	* @desc: verity descriptor to write out (NULL in error conditions)
				616	* @desc_size: size of the verity descriptor (variable with signatures)
				617	* @merkle_tree_size: size of the merkle tree in bytes
				618	*
				619	* If desc is null, then VFS is signaling an error occurred during verity
				620	* enable, and we should try to rollback. Otherwise, attempt to finish verity.
				621	*
				622	* Returns 0 on success, negative error code on error.
				623	*/
				624	static int btrfs_end_enable_verity(struct file filp, const void desc,
				625	size_t desc_size, u64 merkle_tree_size)
				626	{
				627	struct btrfs_inode *inode = BTRFS_I(file_inode(filp));
				628	int ret = 0;
				629	int rollback_ret;
				630
				631	ASSERT(inode_is_locked(file_inode(filp)));
				632
				633	if (desc == NULL)
				634	goto rollback;
				635
				636	ret = finish_verity(inode, desc, desc_size);
				637	if (ret)
				638	goto rollback;
				639	return ret;
				640
				641	rollback:
				642	rollback_ret = rollback_verity(inode);
				643	if (rollback_ret)
				644	btrfs_err(inode->root->fs_info,
				645	"failed to rollback verity items: %d", rollback_ret);
				646	return ret;
				647	}
				648
				649	/*
				650	* fsverity op that gets the struct fsverity_descriptor.
				651	*
				652	* @inode: inode to get the descriptor of
				653	* @buf: output buffer for the descriptor contents
				654	* @buf_size: size of the output buffer. 0 to query the size
				655	*
				656	* fsverity does a two pass setup for reading the descriptor, in the first pass
				657	* it calls with buf_size = 0 to query the size of the descriptor, and then in
				658	* the second pass it actually reads the descriptor off disk.
				659	*
				660	* Returns the size on success or a negative error code on failure.
				661	*/
				662	static int btrfs_get_verity_descriptor(struct inode inode, void buf,
				663	size_t buf_size)
				664	{
				665	u64 true_size;
				666	int ret = 0;
				667	struct btrfs_verity_descriptor_item item;
				668
				669	memset(&item, 0, sizeof(item));
				670	ret = read_key_bytes(BTRFS_I(inode), BTRFS_VERITY_DESC_ITEM_KEY, 0,
				671	(char *)&item, sizeof(item), NULL);
				672	if (ret < 0)
				673	return ret;
				674
				675	if (item.reserved[0] != 0 \|\| item.reserved[1] != 0)
				676	return -EUCLEAN;
				677
				678	true_size = btrfs_stack_verity_descriptor_size(&item);
				679	if (true_size > INT_MAX)
				680	return -EUCLEAN;
				681
				682	if (buf_size == 0)
				683	return true_size;
				684	if (buf_size < true_size)
				685	return -ERANGE;
				686
				687	ret = read_key_bytes(BTRFS_I(inode), BTRFS_VERITY_DESC_ITEM_KEY, 1,
				688	buf, buf_size, NULL);
				689	if (ret < 0)
				690	return ret;
				691	if (ret != true_size)
				692	return -EIO;
				693
				694	return true_size;
				695	}
				696
				697	/*
				698	* fsverity op that reads and caches a merkle tree page.
				699	*
				700	* @inode: inode to read a merkle tree page for
				701	* @index: page index relative to the start of the merkle tree
				702	* @num_ra_pages: number of pages to readahead. Optional, we ignore it
				703	*
				704	* The Merkle tree is stored in the filesystem btree, but its pages are cached
				705	* with a logical position past EOF in the inode's mapping.
				706	*
				707	* Returns the page we read, or an ERR_PTR on error.
				708	*/
				709	static struct page btrfs_read_merkle_tree_page(struct inode inode,
				710	pgoff_t index,
				711	unsigned long num_ra_pages)
				712	{
				713	struct page *page;
				714	u64 off = (u64)index << PAGE_SHIFT;
				715	loff_t merkle_pos = merkle_file_pos(inode);
				716	int ret;
				717
				718	if (merkle_pos < 0)
				719	return ERR_PTR(merkle_pos);
				720	if (merkle_pos > inode->i_sb->s_maxbytes - off - PAGE_SIZE)
				721	return ERR_PTR(-EFBIG);
				722	index += merkle_pos >> PAGE_SHIFT;
				723	again:
				724	page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED);
				725	if (page) {
				726	if (PageUptodate(page))
				727	return page;
				728
				729	lock_page(page);
				730	/*
				731	* We only insert uptodate pages, so !Uptodate has to be
				732	* an error
				733	*/
				734	if (!PageUptodate(page)) {
				735	unlock_page(page);
				736	put_page(page);
				737	return ERR_PTR(-EIO);
				738	}
				739	unlock_page(page);
				740	return page;
				741	}
				742
				743	page = __page_cache_alloc(mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS));
				744	if (!page)
				745	return ERR_PTR(-ENOMEM);
				746
				747	/*
				748	* Merkle item keys are indexed from byte 0 in the merkle tree.
				749	* They have the form:
				750	*
				751	* [ inode objectid, BTRFS_MERKLE_ITEM_KEY, offset in bytes ]
				752	*/
				753	ret = read_key_bytes(BTRFS_I(inode), BTRFS_VERITY_MERKLE_ITEM_KEY, off,
				754	page_address(page), PAGE_SIZE, page);
				755	if (ret < 0) {
				756	put_page(page);
				757	return ERR_PTR(ret);
				758	}
				759	if (ret < PAGE_SIZE)
				760	memzero_page(page, ret, PAGE_SIZE - ret);
				761
				762	SetPageUptodate(page);
				763	ret = add_to_page_cache_lru(page, inode->i_mapping, index, GFP_NOFS);
				764
				765	if (!ret) {
				766	/* Inserted and ready for fsverity */
				767	unlock_page(page);
				768	} else {
				769	put_page(page);
				770	/* Did someone race us into inserting this page? */
				771	if (ret == -EEXIST)
				772	goto again;
				773	page = ERR_PTR(ret);
				774	}
				775	return page;
				776	}
				777
				778	/*
				779	* fsverity op that writes a Merkle tree block into the btree.
				780	*
				781	* @inode: inode to write a Merkle tree block for
				782	* @buf: Merkle tree data block to write
				783	* @index: index of the block in the Merkle tree
				784	* @log_blocksize: log base 2 of the Merkle tree block size
				785	*
				786	* Note that the block size could be different from the page size, so it is not
				787	* safe to assume that index is a page index.
				788	*
				789	* Returns 0 on success or negative error code on failure
				790	*/
				791	static int btrfs_write_merkle_tree_block(struct inode inode, const void buf,
				792	u64 index, int log_blocksize)
				793	{
				794	u64 off = index << log_blocksize;
				795	u64 len = 1ULL << log_blocksize;
				796	loff_t merkle_pos = merkle_file_pos(inode);
				797
				798	if (merkle_pos < 0)
				799	return merkle_pos;
				800	if (merkle_pos > inode->i_sb->s_maxbytes - off - len)
				801	return -EFBIG;
				802
				803	return write_key_bytes(BTRFS_I(inode), BTRFS_VERITY_MERKLE_ITEM_KEY,
				804	off, buf, len);
				805	}
				806
				807	const struct fsverity_operations btrfs_verityops = {
				808	.begin_enable_verity = btrfs_begin_enable_verity,
				809	.end_enable_verity = btrfs_end_enable_verity,
				810	.get_verity_descriptor = btrfs_get_verity_descriptor,
				811	.read_merkle_tree_page = btrfs_read_merkle_tree_page,
				812	.write_merkle_tree_block = btrfs_write_merkle_tree_block,
				813	};