Blame - fs/btrfs/ref-verify.c - SHIFTPHONES/mainline/linux

blob: 171f3cce30e6badc8468abb95e8261f681e11e62 [file] [log] [blame]

Josef Bacik	fd708b8	2017-09-29 15:43:50 -0400	[diff] [blame]	1	/*
				2	* Copyright (C) 2014 Facebook. All rights reserved.
				3	*
				4	* This program is free software; you can redistribute it and/or
				5	* modify it under the terms of the GNU General Public
				6	* License v2 as published by the Free Software Foundation.
				7	*
				8	* This program is distributed in the hope that it will be useful,
				9	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				10	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				11	* General Public License for more details.
				12	*
				13	* You should have received a copy of the GNU General Public
				14	* License along with this program; if not, write to the
				15	* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
				16	* Boston, MA 021110-1307, USA.
				17	*/
				18
				19	#include <linux/sched.h>
				20	#include <linux/stacktrace.h>
				21	#include "ctree.h"
				22	#include "disk-io.h"
				23	#include "locking.h"
				24	#include "delayed-ref.h"
				25	#include "ref-verify.h"
				26
				27	/*
				28	* Used to keep track the roots and number of refs each root has for a given
				29	* bytenr. This just tracks the number of direct references, no shared
				30	* references.
				31	*/
				32	struct root_entry {
				33	u64 root_objectid;
				34	u64 num_refs;
				35	struct rb_node node;
				36	};
				37
				38	/*
				39	* These are meant to represent what should exist in the extent tree, these can
				40	* be used to verify the extent tree is consistent as these should all match
				41	* what the extent tree says.
				42	*/
				43	struct ref_entry {
				44	u64 root_objectid;
				45	u64 parent;
				46	u64 owner;
				47	u64 offset;
				48	u64 num_refs;
				49	struct rb_node node;
				50	};
				51
				52	#define MAX_TRACE 16
				53
				54	/*
				55	* Whenever we add/remove a reference we record the action. The action maps
				56	* back to the delayed ref action. We hold the ref we are changing in the
				57	* action so we can account for the history properly, and we record the root we
				58	* were called with since it could be different from ref_root. We also store
				59	* stack traces because thats how I roll.
				60	*/
				61	struct ref_action {
				62	int action;
				63	u64 root;
				64	struct ref_entry ref;
				65	struct list_head list;
				66	unsigned long trace[MAX_TRACE];
				67	unsigned int trace_len;
				68	};
				69
				70	/*
				71	* One of these for every block we reference, it holds the roots and references
				72	* to it as well as all of the ref actions that have occured to it. We never
				73	* free it until we unmount the file system in order to make sure re-allocations
				74	* are happening properly.
				75	*/
				76	struct block_entry {
				77	u64 bytenr;
				78	u64 len;
				79	u64 num_refs;
				80	int metadata;
				81	int from_disk;
				82	struct rb_root roots;
				83	struct rb_root refs;
				84	struct rb_node node;
				85	struct list_head actions;
				86	};
				87
				88	static struct block_entry insert_block_entry(struct rb_root root,
				89	struct block_entry *be)
				90	{
				91	struct rb_node **p = &root->rb_node;
				92	struct rb_node *parent_node = NULL;
				93	struct block_entry *entry;
				94
				95	while (*p) {
				96	parent_node = *p;
				97	entry = rb_entry(parent_node, struct block_entry, node);
				98	if (entry->bytenr > be->bytenr)
				99	p = &(*p)->rb_left;
				100	else if (entry->bytenr < be->bytenr)
				101	p = &(*p)->rb_right;
				102	else
				103	return entry;
				104	}
				105
				106	rb_link_node(&be->node, parent_node, p);
				107	rb_insert_color(&be->node, root);
				108	return NULL;
				109	}
				110
				111	static struct block_entry lookup_block_entry(struct rb_root root, u64 bytenr)
				112	{
				113	struct rb_node *n;
				114	struct block_entry *entry = NULL;
				115
				116	n = root->rb_node;
				117	while (n) {
				118	entry = rb_entry(n, struct block_entry, node);
				119	if (entry->bytenr < bytenr)
				120	n = n->rb_right;
				121	else if (entry->bytenr > bytenr)
				122	n = n->rb_left;
				123	else
				124	return entry;
				125	}
				126	return NULL;
				127	}
				128
				129	static struct root_entry insert_root_entry(struct rb_root root,
				130	struct root_entry *re)
				131	{
				132	struct rb_node **p = &root->rb_node;
				133	struct rb_node *parent_node = NULL;
				134	struct root_entry *entry;
				135
				136	while (*p) {
				137	parent_node = *p;
				138	entry = rb_entry(parent_node, struct root_entry, node);
				139	if (entry->root_objectid > re->root_objectid)
				140	p = &(*p)->rb_left;
				141	else if (entry->root_objectid < re->root_objectid)
				142	p = &(*p)->rb_right;
				143	else
				144	return entry;
				145	}
				146
				147	rb_link_node(&re->node, parent_node, p);
				148	rb_insert_color(&re->node, root);
				149	return NULL;
				150
				151	}
				152
				153	static int comp_refs(struct ref_entry ref1, struct ref_entry ref2)
				154	{
				155	if (ref1->root_objectid < ref2->root_objectid)
				156	return -1;
				157	if (ref1->root_objectid > ref2->root_objectid)
				158	return 1;
				159	if (ref1->parent < ref2->parent)
				160	return -1;
				161	if (ref1->parent > ref2->parent)
				162	return 1;
				163	if (ref1->owner < ref2->owner)
				164	return -1;
				165	if (ref1->owner > ref2->owner)
				166	return 1;
				167	if (ref1->offset < ref2->offset)
				168	return -1;
				169	if (ref1->offset > ref2->offset)
				170	return 1;
				171	return 0;
				172	}
				173
				174	static struct ref_entry insert_ref_entry(struct rb_root root,
				175	struct ref_entry *ref)
				176	{
				177	struct rb_node **p = &root->rb_node;
				178	struct rb_node *parent_node = NULL;
				179	struct ref_entry *entry;
				180	int cmp;
				181
				182	while (*p) {
				183	parent_node = *p;
				184	entry = rb_entry(parent_node, struct ref_entry, node);
				185	cmp = comp_refs(entry, ref);
				186	if (cmp > 0)
				187	p = &(*p)->rb_left;
				188	else if (cmp < 0)
				189	p = &(*p)->rb_right;
				190	else
				191	return entry;
				192	}
				193
				194	rb_link_node(&ref->node, parent_node, p);
				195	rb_insert_color(&ref->node, root);
				196	return NULL;
				197
				198	}
				199
				200	static struct root_entry lookup_root_entry(struct rb_root root, u64 objectid)
				201	{
				202	struct rb_node *n;
				203	struct root_entry *entry = NULL;
				204
				205	n = root->rb_node;
				206	while (n) {
				207	entry = rb_entry(n, struct root_entry, node);
				208	if (entry->root_objectid < objectid)
				209	n = n->rb_right;
				210	else if (entry->root_objectid > objectid)
				211	n = n->rb_left;
				212	else
				213	return entry;
				214	}
				215	return NULL;
				216	}
				217
				218	#ifdef CONFIG_STACKTRACE
				219	static void __save_stack_trace(struct ref_action *ra)
				220	{
				221	struct stack_trace stack_trace;
				222
				223	stack_trace.max_entries = MAX_TRACE;
				224	stack_trace.nr_entries = 0;
				225	stack_trace.entries = ra->trace;
				226	stack_trace.skip = 2;
				227	save_stack_trace(&stack_trace);
				228	ra->trace_len = stack_trace.nr_entries;
				229	}
				230
				231	static void __print_stack_trace(struct btrfs_fs_info *fs_info,
				232	struct ref_action *ra)
				233	{
				234	struct stack_trace trace;
				235
				236	if (ra->trace_len == 0) {
				237	btrfs_err(fs_info, " ref-verify: no stacktrace");
				238	return;
				239	}
				240	trace.nr_entries = ra->trace_len;
				241	trace.entries = ra->trace;
				242	print_stack_trace(&trace, 2);
				243	}
				244	#else
				245	static void inline __save_stack_trace(struct ref_action *ra)
				246	{
				247	}
				248
				249	static void inline __print_stack_trace(struct btrfs_fs_info *fs_info,
				250	struct ref_action *ra)
				251	{
				252	btrfs_err(fs_info, " ref-verify: no stacktrace support");
				253	}
				254	#endif
				255
				256	static void free_block_entry(struct block_entry *be)
				257	{
				258	struct root_entry *re;
				259	struct ref_entry *ref;
				260	struct ref_action *ra;
				261	struct rb_node *n;
				262
				263	while ((n = rb_first(&be->roots))) {
				264	re = rb_entry(n, struct root_entry, node);
				265	rb_erase(&re->node, &be->roots);
				266	kfree(re);
				267	}
				268
				269	while((n = rb_first(&be->refs))) {
				270	ref = rb_entry(n, struct ref_entry, node);
				271	rb_erase(&ref->node, &be->refs);
				272	kfree(ref);
				273	}
				274
				275	while (!list_empty(&be->actions)) {
				276	ra = list_first_entry(&be->actions, struct ref_action,
				277	list);
				278	list_del(&ra->list);
				279	kfree(ra);
				280	}
				281	kfree(be);
				282	}
				283
				284	static struct block_entry add_block_entry(struct btrfs_fs_info fs_info,
				285	u64 bytenr, u64 len,
				286	u64 root_objectid)
				287	{
				288	struct block_entry be = NULL, exist;
				289	struct root_entry *re = NULL;
				290
				291	re = kzalloc(sizeof(struct root_entry), GFP_KERNEL);
				292	be = kzalloc(sizeof(struct block_entry), GFP_KERNEL);
				293	if (!be \|\| !re) {
				294	kfree(re);
				295	kfree(be);
				296	return ERR_PTR(-ENOMEM);
				297	}
				298	be->bytenr = bytenr;
				299	be->len = len;
				300
				301	re->root_objectid = root_objectid;
				302	re->num_refs = 0;
				303
				304	spin_lock(&fs_info->ref_verify_lock);
				305	exist = insert_block_entry(&fs_info->block_tree, be);
				306	if (exist) {
				307	if (root_objectid) {
				308	struct root_entry *exist_re;
				309
				310	exist_re = insert_root_entry(&exist->roots, re);
				311	if (exist_re)
				312	kfree(re);
				313	}
				314	kfree(be);
				315	return exist;
				316	}
				317
				318	be->num_refs = 0;
				319	be->metadata = 0;
				320	be->from_disk = 0;
				321	be->roots = RB_ROOT;
				322	be->refs = RB_ROOT;
				323	INIT_LIST_HEAD(&be->actions);
				324	if (root_objectid)
				325	insert_root_entry(&be->roots, re);
				326	else
				327	kfree(re);
				328	return be;
				329	}
				330
				331	static int add_tree_block(struct btrfs_fs_info *fs_info, u64 ref_root,
				332	u64 parent, u64 bytenr, int level)
				333	{
				334	struct block_entry *be;
				335	struct root_entry *re;
				336	struct ref_entry ref = NULL, exist;
				337
				338	ref = kmalloc(sizeof(struct ref_entry), GFP_KERNEL);
				339	if (!ref)
				340	return -ENOMEM;
				341
				342	if (parent)
				343	ref->root_objectid = 0;
				344	else
				345	ref->root_objectid = ref_root;
				346	ref->parent = parent;
				347	ref->owner = level;
				348	ref->offset = 0;
				349	ref->num_refs = 1;
				350
				351	be = add_block_entry(fs_info, bytenr, fs_info->nodesize, ref_root);
				352	if (IS_ERR(be)) {
				353	kfree(ref);
				354	return PTR_ERR(be);
				355	}
				356	be->num_refs++;
				357	be->from_disk = 1;
				358	be->metadata = 1;
				359
				360	if (!parent) {
				361	ASSERT(ref_root);
				362	re = lookup_root_entry(&be->roots, ref_root);
				363	ASSERT(re);
				364	re->num_refs++;
				365	}
				366	exist = insert_ref_entry(&be->refs, ref);
				367	if (exist) {
				368	exist->num_refs++;
				369	kfree(ref);
				370	}
				371	spin_unlock(&fs_info->ref_verify_lock);
				372
				373	return 0;
				374	}
				375
				376	static int add_shared_data_ref(struct btrfs_fs_info *fs_info,
				377	u64 parent, u32 num_refs, u64 bytenr,
				378	u64 num_bytes)
				379	{
				380	struct block_entry *be;
				381	struct ref_entry *ref;
				382
				383	ref = kzalloc(sizeof(struct ref_entry), GFP_KERNEL);
				384	if (!ref)
				385	return -ENOMEM;
				386	be = add_block_entry(fs_info, bytenr, num_bytes, 0);
				387	if (IS_ERR(be)) {
				388	kfree(ref);
				389	return PTR_ERR(be);
				390	}
				391	be->num_refs += num_refs;
				392
				393	ref->parent = parent;
				394	ref->num_refs = num_refs;
				395	if (insert_ref_entry(&be->refs, ref)) {
				396	spin_unlock(&fs_info->ref_verify_lock);
				397	btrfs_err(fs_info, "existing shared ref when reading from disk?");
				398	kfree(ref);
				399	return -EINVAL;
				400	}
				401	spin_unlock(&fs_info->ref_verify_lock);
				402	return 0;
				403	}
				404
				405	static int add_extent_data_ref(struct btrfs_fs_info *fs_info,
				406	struct extent_buffer *leaf,
				407	struct btrfs_extent_data_ref *dref,
				408	u64 bytenr, u64 num_bytes)
				409	{
				410	struct block_entry *be;
				411	struct ref_entry *ref;
				412	struct root_entry *re;
				413	u64 ref_root = btrfs_extent_data_ref_root(leaf, dref);
				414	u64 owner = btrfs_extent_data_ref_objectid(leaf, dref);
				415	u64 offset = btrfs_extent_data_ref_offset(leaf, dref);
				416	u32 num_refs = btrfs_extent_data_ref_count(leaf, dref);
				417
				418	ref = kzalloc(sizeof(struct ref_entry), GFP_KERNEL);
				419	if (!ref)
				420	return -ENOMEM;
				421	be = add_block_entry(fs_info, bytenr, num_bytes, ref_root);
				422	if (IS_ERR(be)) {
				423	kfree(ref);
				424	return PTR_ERR(be);
				425	}
				426	be->num_refs += num_refs;
				427
				428	ref->parent = 0;
				429	ref->owner = owner;
				430	ref->root_objectid = ref_root;
				431	ref->offset = offset;
				432	ref->num_refs = num_refs;
				433	if (insert_ref_entry(&be->refs, ref)) {
				434	spin_unlock(&fs_info->ref_verify_lock);
				435	btrfs_err(fs_info, "existing ref when reading from disk?");
				436	kfree(ref);
				437	return -EINVAL;
				438	}
				439
				440	re = lookup_root_entry(&be->roots, ref_root);
				441	if (!re) {
				442	spin_unlock(&fs_info->ref_verify_lock);
				443	btrfs_err(fs_info, "missing root in new block entry?");
				444	return -EINVAL;
				445	}
				446	re->num_refs += num_refs;
				447	spin_unlock(&fs_info->ref_verify_lock);
				448	return 0;
				449	}
				450
				451	static int process_extent_item(struct btrfs_fs_info *fs_info,
				452	struct btrfs_path path, struct btrfs_key key,
				453	int slot, int *tree_block_level)
				454	{
				455	struct btrfs_extent_item *ei;
				456	struct btrfs_extent_inline_ref *iref;
				457	struct btrfs_extent_data_ref *dref;
				458	struct btrfs_shared_data_ref *sref;
				459	struct extent_buffer *leaf = path->nodes[0];
				460	u32 item_size = btrfs_item_size_nr(leaf, slot);
				461	unsigned long end, ptr;
				462	u64 offset, flags, count;
				463	int type, ret;
				464
				465	ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
				466	flags = btrfs_extent_flags(leaf, ei);
				467
				468	if ((key->type == BTRFS_EXTENT_ITEM_KEY) &&
				469	flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
				470	struct btrfs_tree_block_info *info;
				471
				472	info = (struct btrfs_tree_block_info *)(ei + 1);
				473	*tree_block_level = btrfs_tree_block_level(leaf, info);
				474	iref = (struct btrfs_extent_inline_ref *)(info + 1);
				475	} else {
				476	if (key->type == BTRFS_METADATA_ITEM_KEY)
				477	*tree_block_level = key->offset;
				478	iref = (struct btrfs_extent_inline_ref *)(ei + 1);
				479	}
				480
				481	ptr = (unsigned long)iref;
				482	end = (unsigned long)ei + item_size;
				483	while (ptr < end) {
				484	iref = (struct btrfs_extent_inline_ref *)ptr;
				485	type = btrfs_extent_inline_ref_type(leaf, iref);
				486	offset = btrfs_extent_inline_ref_offset(leaf, iref);
				487	switch (type) {
				488	case BTRFS_TREE_BLOCK_REF_KEY:
				489	ret = add_tree_block(fs_info, offset, 0, key->objectid,
				490	*tree_block_level);
				491	break;
				492	case BTRFS_SHARED_BLOCK_REF_KEY:
				493	ret = add_tree_block(fs_info, 0, offset, key->objectid,
				494	*tree_block_level);
				495	break;
				496	case BTRFS_EXTENT_DATA_REF_KEY:
				497	dref = (struct btrfs_extent_data_ref *)(&iref->offset);
				498	ret = add_extent_data_ref(fs_info, leaf, dref,
				499	key->objectid, key->offset);
				500	break;
				501	case BTRFS_SHARED_DATA_REF_KEY:
				502	sref = (struct btrfs_shared_data_ref *)(iref + 1);
				503	count = btrfs_shared_data_ref_count(leaf, sref);
				504	ret = add_shared_data_ref(fs_info, offset, count,
				505	key->objectid, key->offset);
				506	break;
				507	default:
				508	btrfs_err(fs_info, "invalid key type in iref");
				509	ret = -EINVAL;
				510	break;
				511	}
				512	if (ret)
				513	break;
				514	ptr += btrfs_extent_inline_ref_size(type);
				515	}
				516	return ret;
				517	}
				518
				519	static int process_leaf(struct btrfs_root *root,
				520	struct btrfs_path path, u64 bytenr, u64 *num_bytes)
				521	{
				522	struct btrfs_fs_info *fs_info = root->fs_info;
				523	struct extent_buffer *leaf = path->nodes[0];
				524	struct btrfs_extent_data_ref *dref;
				525	struct btrfs_shared_data_ref *sref;
				526	u32 count;
				527	int i = 0, tree_block_level = 0, ret;
				528	struct btrfs_key key;
				529	int nritems = btrfs_header_nritems(leaf);
				530
				531	for (i = 0; i < nritems; i++) {
				532	btrfs_item_key_to_cpu(leaf, &key, i);
				533	switch (key.type) {
				534	case BTRFS_EXTENT_ITEM_KEY:
				535	*num_bytes = key.offset;
				536	case BTRFS_METADATA_ITEM_KEY:
				537	*bytenr = key.objectid;
				538	ret = process_extent_item(fs_info, path, &key, i,
				539	&tree_block_level);
				540	break;
				541	case BTRFS_TREE_BLOCK_REF_KEY:
				542	ret = add_tree_block(fs_info, key.offset, 0,
				543	key.objectid, tree_block_level);
				544	break;
				545	case BTRFS_SHARED_BLOCK_REF_KEY:
				546	ret = add_tree_block(fs_info, 0, key.offset,
				547	key.objectid, tree_block_level);
				548	break;
				549	case BTRFS_EXTENT_DATA_REF_KEY:
				550	dref = btrfs_item_ptr(leaf, i,
				551	struct btrfs_extent_data_ref);
				552	ret = add_extent_data_ref(fs_info, leaf, dref, *bytenr,
				553	*num_bytes);
				554	break;
				555	case BTRFS_SHARED_DATA_REF_KEY:
				556	sref = btrfs_item_ptr(leaf, i,
				557	struct btrfs_shared_data_ref);
				558	count = btrfs_shared_data_ref_count(leaf, sref);
				559	ret = add_shared_data_ref(fs_info, key.offset, count,
				560	bytenr, num_bytes);
				561	break;
				562	default:
				563	break;
				564	}
				565	if (ret)
				566	break;
				567	}
				568	return ret;
				569	}
				570
				571	/* Walk down to the leaf from the given level */
				572	static int walk_down_tree(struct btrfs_root root, struct btrfs_path path,
				573	int level, u64 bytenr, u64 num_bytes)
				574	{
				575	struct btrfs_fs_info *fs_info = root->fs_info;
				576	struct extent_buffer *eb;
				577	u64 block_bytenr, gen;
				578	int ret = 0;
				579
				580	while (level >= 0) {
				581	if (level) {
				582	block_bytenr = btrfs_node_blockptr(path->nodes[level],
				583	path->slots[level]);
				584	gen = btrfs_node_ptr_generation(path->nodes[level],
				585	path->slots[level]);
				586	eb = read_tree_block(fs_info, block_bytenr, gen);
				587	if (IS_ERR(eb))
				588	return PTR_ERR(eb);
				589	if (!extent_buffer_uptodate(eb)) {
				590	free_extent_buffer(eb);
				591	return -EIO;
				592	}
				593	btrfs_tree_read_lock(eb);
				594	btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
				595	path->nodes[level-1] = eb;
				596	path->slots[level-1] = 0;
				597	path->locks[level-1] = BTRFS_READ_LOCK_BLOCKING;
				598	} else {
				599	ret = process_leaf(root, path, bytenr, num_bytes);
				600	if (ret)
				601	break;
				602	}
				603	level--;
				604	}
				605	return ret;
				606	}
				607
				608	/* Walk up to the next node that needs to be processed */
Geert Uytterhoeven	02cfe77	2017-11-15 16:04:40 +0100	[diff] [blame^]	609	static int walk_up_tree(struct btrfs_path path, int level)
Josef Bacik	fd708b8	2017-09-29 15:43:50 -0400	[diff] [blame]	610	{
				611	int l;
				612
				613	for (l = 0; l < BTRFS_MAX_LEVEL; l++) {
				614	if (!path->nodes[l])
				615	continue;
				616	if (l) {
				617	path->slots[l]++;
				618	if (path->slots[l] <
				619	btrfs_header_nritems(path->nodes[l])) {
				620	*level = l;
				621	return 0;
				622	}
				623	}
				624	btrfs_tree_unlock_rw(path->nodes[l], path->locks[l]);
				625	free_extent_buffer(path->nodes[l]);
				626	path->nodes[l] = NULL;
				627	path->slots[l] = 0;
				628	path->locks[l] = 0;
				629	}
				630
				631	return 1;
				632	}
				633
				634	static void dump_ref_action(struct btrfs_fs_info *fs_info,
				635	struct ref_action *ra)
				636	{
				637	btrfs_err(fs_info,
				638	" Ref action %d, root %llu, ref_root %llu, parent %llu, owner %llu, offset %llu, num_refs %llu",
				639	ra->action, ra->root, ra->ref.root_objectid, ra->ref.parent,
				640	ra->ref.owner, ra->ref.offset, ra->ref.num_refs);
				641	__print_stack_trace(fs_info, ra);
				642	}
				643
				644	/*
				645	* Dumps all the information from the block entry to printk, it's going to be
				646	* awesome.
				647	*/
				648	static void dump_block_entry(struct btrfs_fs_info *fs_info,
				649	struct block_entry *be)
				650	{
				651	struct ref_entry *ref;
				652	struct root_entry *re;
				653	struct ref_action *ra;
				654	struct rb_node *n;
				655
				656	btrfs_err(fs_info,
				657	"dumping block entry [%llu %llu], num_refs %llu, metadata %d, from disk %d",
				658	be->bytenr, be->len, be->num_refs, be->metadata,
				659	be->from_disk);
				660
				661	for (n = rb_first(&be->refs); n; n = rb_next(n)) {
				662	ref = rb_entry(n, struct ref_entry, node);
				663	btrfs_err(fs_info,
				664	" ref root %llu, parent %llu, owner %llu, offset %llu, num_refs %llu",
				665	ref->root_objectid, ref->parent, ref->owner,
				666	ref->offset, ref->num_refs);
				667	}
				668
				669	for (n = rb_first(&be->roots); n; n = rb_next(n)) {
				670	re = rb_entry(n, struct root_entry, node);
				671	btrfs_err(fs_info, " root entry %llu, num_refs %llu",
				672	re->root_objectid, re->num_refs);
				673	}
				674
				675	list_for_each_entry(ra, &be->actions, list)
				676	dump_ref_action(fs_info, ra);
				677	}
				678
				679	/*
				680	* btrfs_ref_tree_mod: called when we modify a ref for a bytenr
				681	* @root: the root we are making this modification from.
				682	* @bytenr: the bytenr we are modifying.
				683	* @num_bytes: number of bytes.
				684	* @parent: the parent bytenr.
				685	* @ref_root: the original root owner of the bytenr.
				686	* @owner: level in the case of metadata, inode in the case of data.
				687	* @offset: 0 for metadata, file offset for data.
				688	* @action: the action that we are doing, this is the same as the delayed ref
				689	* action.
				690	*
				691	* This will add an action item to the given bytenr and do sanity checks to make
				692	* sure we haven't messed something up. If we are making a new allocation and
				693	* this block entry has history we will delete all previous actions as long as
				694	* our sanity checks pass as they are no longer needed.
				695	*/
				696	int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
				697	u64 parent, u64 ref_root, u64 owner, u64 offset,
				698	int action)
				699	{
				700	struct btrfs_fs_info *fs_info = root->fs_info;
				701	struct ref_entry ref = NULL, exist;
				702	struct ref_action *ra = NULL;
				703	struct block_entry *be = NULL;
				704	struct root_entry *re = NULL;
				705	int ret = 0;
				706	bool metadata = owner < BTRFS_FIRST_FREE_OBJECTID;
				707
				708	if (!btrfs_test_opt(root->fs_info, REF_VERIFY))
				709	return 0;
				710
				711	ref = kzalloc(sizeof(struct ref_entry), GFP_NOFS);
				712	ra = kmalloc(sizeof(struct ref_action), GFP_NOFS);
				713	if (!ra \|\| !ref) {
				714	kfree(ref);
				715	kfree(ra);
				716	ret = -ENOMEM;
				717	goto out;
				718	}
				719
				720	if (parent) {
				721	ref->parent = parent;
				722	} else {
				723	ref->root_objectid = ref_root;
				724	ref->owner = owner;
				725	ref->offset = offset;
				726	}
				727	ref->num_refs = (action == BTRFS_DROP_DELAYED_REF) ? -1 : 1;
				728
				729	memcpy(&ra->ref, ref, sizeof(struct ref_entry));
				730	/*
				731	* Save the extra info from the delayed ref in the ref action to make it
				732	* easier to figure out what is happening. The real ref's we add to the
				733	* ref tree need to reflect what we save on disk so it matches any
				734	* on-disk refs we pre-loaded.
				735	*/
				736	ra->ref.owner = owner;
				737	ra->ref.offset = offset;
				738	ra->ref.root_objectid = ref_root;
				739	__save_stack_trace(ra);
				740
				741	INIT_LIST_HEAD(&ra->list);
				742	ra->action = action;
				743	ra->root = root->objectid;
				744
				745	/*
				746	* This is an allocation, preallocate the block_entry in case we haven't
				747	* used it before.
				748	*/
				749	ret = -EINVAL;
				750	if (action == BTRFS_ADD_DELAYED_EXTENT) {
				751	/*
				752	* For subvol_create we'll just pass in whatever the parent root
				753	* is and the new root objectid, so let's not treat the passed
				754	* in root as if it really has a ref for this bytenr.
				755	*/
				756	be = add_block_entry(root->fs_info, bytenr, num_bytes, ref_root);
				757	if (IS_ERR(be)) {
				758	kfree(ra);
				759	ret = PTR_ERR(be);
				760	goto out;
				761	}
				762	be->num_refs++;
				763	if (metadata)
				764	be->metadata = 1;
				765
				766	if (be->num_refs != 1) {
				767	btrfs_err(fs_info,
				768	"re-allocated a block that still has references to it!");
				769	dump_block_entry(fs_info, be);
				770	dump_ref_action(fs_info, ra);
				771	goto out_unlock;
				772	}
				773
				774	while (!list_empty(&be->actions)) {
				775	struct ref_action *tmp;
				776
				777	tmp = list_first_entry(&be->actions, struct ref_action,
				778	list);
				779	list_del(&tmp->list);
				780	kfree(tmp);
				781	}
				782	} else {
				783	struct root_entry *tmp;
				784
				785	if (!parent) {
				786	re = kmalloc(sizeof(struct root_entry), GFP_NOFS);
				787	if (!re) {
				788	kfree(ref);
				789	kfree(ra);
				790	ret = -ENOMEM;
				791	goto out;
				792	}
				793	/*
				794	* This is the root that is modifying us, so it's the
				795	* one we want to lookup below when we modify the
				796	* re->num_refs.
				797	*/
				798	ref_root = root->objectid;
				799	re->root_objectid = root->objectid;
				800	re->num_refs = 0;
				801	}
				802
				803	spin_lock(&root->fs_info->ref_verify_lock);
				804	be = lookup_block_entry(&root->fs_info->block_tree, bytenr);
				805	if (!be) {
				806	btrfs_err(fs_info,
				807	"trying to do action %d to bytenr %llu num_bytes %llu but there is no existing entry!",
				808	action, (unsigned long long)bytenr,
				809	(unsigned long long)num_bytes);
				810	dump_ref_action(fs_info, ra);
				811	kfree(ref);
				812	kfree(ra);
				813	goto out_unlock;
				814	}
				815
				816	if (!parent) {
				817	tmp = insert_root_entry(&be->roots, re);
				818	if (tmp) {
				819	kfree(re);
				820	re = tmp;
				821	}
				822	}
				823	}
				824
				825	exist = insert_ref_entry(&be->refs, ref);
				826	if (exist) {
				827	if (action == BTRFS_DROP_DELAYED_REF) {
				828	if (exist->num_refs == 0) {
				829	btrfs_err(fs_info,
				830	"dropping a ref for a existing root that doesn't have a ref on the block");
				831	dump_block_entry(fs_info, be);
				832	dump_ref_action(fs_info, ra);
				833	kfree(ra);
				834	goto out_unlock;
				835	}
				836	exist->num_refs--;
				837	if (exist->num_refs == 0) {
				838	rb_erase(&exist->node, &be->refs);
				839	kfree(exist);
				840	}
				841	} else if (!be->metadata) {
				842	exist->num_refs++;
				843	} else {
				844	btrfs_err(fs_info,
				845	"attempting to add another ref for an existing ref on a tree block");
				846	dump_block_entry(fs_info, be);
				847	dump_ref_action(fs_info, ra);
				848	kfree(ra);
				849	goto out_unlock;
				850	}
				851	kfree(ref);
				852	} else {
				853	if (action == BTRFS_DROP_DELAYED_REF) {
				854	btrfs_err(fs_info,
				855	"dropping a ref for a root that doesn't have a ref on the block");
				856	dump_block_entry(fs_info, be);
				857	dump_ref_action(fs_info, ra);
				858	kfree(ra);
				859	goto out_unlock;
				860	}
				861	}
				862
				863	if (!parent && !re) {
				864	re = lookup_root_entry(&be->roots, ref_root);
				865	if (!re) {
				866	/*
				867	* This shouldn't happen because we will add our re
				868	* above when we lookup the be with !parent, but just in
				869	* case catch this case so we don't panic because I
				870	* didn't thik of some other corner case.
				871	*/
				872	btrfs_err(fs_info, "failed to find root %llu for %llu",
				873	root->objectid, be->bytenr);
				874	dump_block_entry(fs_info, be);
				875	dump_ref_action(fs_info, ra);
				876	kfree(ra);
				877	goto out_unlock;
				878	}
				879	}
				880	if (action == BTRFS_DROP_DELAYED_REF) {
				881	if (re)
				882	re->num_refs--;
				883	be->num_refs--;
				884	} else if (action == BTRFS_ADD_DELAYED_REF) {
				885	be->num_refs++;
				886	if (re)
				887	re->num_refs++;
				888	}
				889	list_add_tail(&ra->list, &be->actions);
				890	ret = 0;
				891	out_unlock:
				892	spin_unlock(&root->fs_info->ref_verify_lock);
				893	out:
				894	if (ret)
				895	btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
				896	return ret;
				897	}
				898
				899	/* Free up the ref cache */
				900	void btrfs_free_ref_cache(struct btrfs_fs_info *fs_info)
				901	{
				902	struct block_entry *be;
				903	struct rb_node *n;
				904
				905	if (!btrfs_test_opt(fs_info, REF_VERIFY))
				906	return;
				907
				908	spin_lock(&fs_info->ref_verify_lock);
				909	while ((n = rb_first(&fs_info->block_tree))) {
				910	be = rb_entry(n, struct block_entry, node);
				911	rb_erase(&be->node, &fs_info->block_tree);
				912	free_block_entry(be);
				913	cond_resched_lock(&fs_info->ref_verify_lock);
				914	}
				915	spin_unlock(&fs_info->ref_verify_lock);
				916	}
				917
				918	void btrfs_free_ref_tree_range(struct btrfs_fs_info *fs_info, u64 start,
				919	u64 len)
				920	{
				921	struct block_entry be = NULL, entry;
				922	struct rb_node *n;
				923
				924	if (!btrfs_test_opt(fs_info, REF_VERIFY))
				925	return;
				926
				927	spin_lock(&fs_info->ref_verify_lock);
				928	n = fs_info->block_tree.rb_node;
				929	while (n) {
				930	entry = rb_entry(n, struct block_entry, node);
				931	if (entry->bytenr < start) {
				932	n = n->rb_right;
				933	} else if (entry->bytenr > start) {
				934	n = n->rb_left;
				935	} else {
				936	be = entry;
				937	break;
				938	}
				939	/* We want to get as close to start as possible */
				940	if (be == NULL \|\|
				941	(entry->bytenr < start && be->bytenr > start) \|\|
				942	(entry->bytenr < start && entry->bytenr > be->bytenr))
				943	be = entry;
				944	}
				945
				946	/*
				947	* Could have an empty block group, maybe have something to check for
				948	* this case to verify we were actually empty?
				949	*/
				950	if (!be) {
				951	spin_unlock(&fs_info->ref_verify_lock);
				952	return;
				953	}
				954
				955	n = &be->node;
				956	while (n) {
				957	be = rb_entry(n, struct block_entry, node);
				958	n = rb_next(n);
				959	if (be->bytenr < start && be->bytenr + be->len > start) {
				960	btrfs_err(fs_info,
				961	"block entry overlaps a block group [%llu,%llu]!",
				962	start, len);
				963	dump_block_entry(fs_info, be);
				964	continue;
				965	}
				966	if (be->bytenr < start)
				967	continue;
				968	if (be->bytenr >= start + len)
				969	break;
				970	if (be->bytenr + be->len > start + len) {
				971	btrfs_err(fs_info,
				972	"block entry overlaps a block group [%llu,%llu]!",
				973	start, len);
				974	dump_block_entry(fs_info, be);
				975	}
				976	rb_erase(&be->node, &fs_info->block_tree);
				977	free_block_entry(be);
				978	}
				979	spin_unlock(&fs_info->ref_verify_lock);
				980	}
				981
				982	/* Walk down all roots and build the ref tree, meant to be called at mount */
				983	int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info)
				984	{
				985	struct btrfs_path *path;
Josef Bacik	fd708b8	2017-09-29 15:43:50 -0400	[diff] [blame]	986	struct extent_buffer *eb;
				987	u64 bytenr = 0, num_bytes = 0;
				988	int ret, level;
				989
				990	if (!btrfs_test_opt(fs_info, REF_VERIFY))
				991	return 0;
				992
				993	path = btrfs_alloc_path();
				994	if (!path)
				995	return -ENOMEM;
				996
				997	eb = btrfs_read_lock_root_node(fs_info->extent_root);
				998	btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
				999	level = btrfs_header_level(eb);
				1000	path->nodes[level] = eb;
				1001	path->slots[level] = 0;
				1002	path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
				1003
				1004	while (1) {
				1005	/*
				1006	* We have to keep track of the bytenr/num_bytes we last hit
				1007	* because we could have run out of space for an inline ref, and
				1008	* would have had to added a ref key item which may appear on a
				1009	* different leaf from the original extent item.
				1010	*/
				1011	ret = walk_down_tree(fs_info->extent_root, path, level,
				1012	&bytenr, &num_bytes);
				1013	if (ret)
				1014	break;
Geert Uytterhoeven	02cfe77	2017-11-15 16:04:40 +0100	[diff] [blame^]	1015	ret = walk_up_tree(path, &level);
Josef Bacik	fd708b8	2017-09-29 15:43:50 -0400	[diff] [blame]	1016	if (ret < 0)
				1017	break;
				1018	if (ret > 0) {
				1019	ret = 0;
				1020	break;
				1021	}
				1022	}
				1023	if (ret) {
				1024	btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
				1025	btrfs_free_ref_cache(fs_info);
				1026	}
				1027	btrfs_free_path(path);
				1028	return ret;
				1029	}