blob: a3f0d209fce523173b2fe8b389405ad5609de1de [file] [log] [blame]
David Sterbac1d7c512018-04-03 19:23:33 +02001// SPDX-License-Identifier: GPL-2.0
Arne Jansenbed92ea2012-06-28 18:03:02 +02002/*
3 * Copyright (C) 2011 STRATO. All rights reserved.
Arne Jansenbed92ea2012-06-28 18:03:02 +02004 */
5
6#include <linux/sched.h>
7#include <linux/pagemap.h>
8#include <linux/writeback.h>
9#include <linux/blkdev.h>
10#include <linux/rbtree.h>
11#include <linux/slab.h>
12#include <linux/workqueue.h>
Filipe Brandenburger55e301f2013-01-29 06:04:50 +000013#include <linux/btrfs.h>
Arne Jansenbed92ea2012-06-28 18:03:02 +020014
15#include "ctree.h"
16#include "transaction.h"
17#include "disk-io.h"
18#include "locking.h"
19#include "ulist.h"
Arne Jansenbed92ea2012-06-28 18:03:02 +020020#include "backref.h"
Jan Schmidt2f232032013-04-25 16:04:51 +000021#include "extent_io.h"
Josef Bacikfcebe452014-05-13 17:30:47 -070022#include "qgroup.h"
Josef Bacikaac00232019-06-20 15:37:44 -040023#include "block-group.h"
Qu Wenruo49e5fb42020-06-28 13:07:15 +080024#include "sysfs.h"
Qu Wenruoe69bcee2015-04-17 10:23:16 +080025
Arne Jansenbed92ea2012-06-28 18:03:02 +020026/* TODO XXX FIXME
27 * - subvol delete -> delete when ref goes to 0? delete limits also?
28 * - reorganize keys
29 * - compressed
30 * - sync
Arne Jansenbed92ea2012-06-28 18:03:02 +020031 * - copy also limits on subvol creation
32 * - limit
Andrea Gelmini52042d82018-11-28 12:05:13 +010033 * - caches for ulists
Arne Jansenbed92ea2012-06-28 18:03:02 +020034 * - performance benchmarks
35 * - check all ioctl parameters
36 */
37
Qu Wenruof59c0342017-12-12 15:34:24 +080038/*
39 * Helpers to access qgroup reservation
40 *
41 * Callers should ensure the lock context and type are valid
42 */
43
44static u64 qgroup_rsv_total(const struct btrfs_qgroup *qgroup)
45{
46 u64 ret = 0;
47 int i;
48
49 for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++)
50 ret += qgroup->rsv.values[i];
51
52 return ret;
53}
54
55#ifdef CONFIG_BTRFS_DEBUG
56static const char *qgroup_rsv_type_str(enum btrfs_qgroup_rsv_type type)
57{
58 if (type == BTRFS_QGROUP_RSV_DATA)
59 return "data";
Qu Wenruo733e03a2017-12-12 15:34:29 +080060 if (type == BTRFS_QGROUP_RSV_META_PERTRANS)
61 return "meta_pertrans";
62 if (type == BTRFS_QGROUP_RSV_META_PREALLOC)
63 return "meta_prealloc";
Qu Wenruof59c0342017-12-12 15:34:24 +080064 return NULL;
65}
66#endif
67
Qu Wenruo64ee4e72017-12-12 15:34:27 +080068static void qgroup_rsv_add(struct btrfs_fs_info *fs_info,
69 struct btrfs_qgroup *qgroup, u64 num_bytes,
Qu Wenruof59c0342017-12-12 15:34:24 +080070 enum btrfs_qgroup_rsv_type type)
71{
Qu Wenruo64ee4e72017-12-12 15:34:27 +080072 trace_qgroup_update_reserve(fs_info, qgroup, num_bytes, type);
Qu Wenruof59c0342017-12-12 15:34:24 +080073 qgroup->rsv.values[type] += num_bytes;
74}
75
Qu Wenruo64ee4e72017-12-12 15:34:27 +080076static void qgroup_rsv_release(struct btrfs_fs_info *fs_info,
77 struct btrfs_qgroup *qgroup, u64 num_bytes,
Qu Wenruof59c0342017-12-12 15:34:24 +080078 enum btrfs_qgroup_rsv_type type)
79{
Qu Wenruo64ee4e72017-12-12 15:34:27 +080080 trace_qgroup_update_reserve(fs_info, qgroup, -(s64)num_bytes, type);
Qu Wenruof59c0342017-12-12 15:34:24 +080081 if (qgroup->rsv.values[type] >= num_bytes) {
82 qgroup->rsv.values[type] -= num_bytes;
83 return;
84 }
85#ifdef CONFIG_BTRFS_DEBUG
86 WARN_RATELIMIT(1,
87 "qgroup %llu %s reserved space underflow, have %llu to free %llu",
88 qgroup->qgroupid, qgroup_rsv_type_str(type),
89 qgroup->rsv.values[type], num_bytes);
90#endif
91 qgroup->rsv.values[type] = 0;
92}
93
Qu Wenruo64ee4e72017-12-12 15:34:27 +080094static void qgroup_rsv_add_by_qgroup(struct btrfs_fs_info *fs_info,
95 struct btrfs_qgroup *dest,
96 struct btrfs_qgroup *src)
Qu Wenruof59c0342017-12-12 15:34:24 +080097{
98 int i;
99
100 for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++)
Qu Wenruo64ee4e72017-12-12 15:34:27 +0800101 qgroup_rsv_add(fs_info, dest, src->rsv.values[i], i);
Qu Wenruof59c0342017-12-12 15:34:24 +0800102}
103
Qu Wenruo64ee4e72017-12-12 15:34:27 +0800104static void qgroup_rsv_release_by_qgroup(struct btrfs_fs_info *fs_info,
105 struct btrfs_qgroup *dest,
Qu Wenruof59c0342017-12-12 15:34:24 +0800106 struct btrfs_qgroup *src)
107{
108 int i;
109
110 for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++)
Qu Wenruo64ee4e72017-12-12 15:34:27 +0800111 qgroup_rsv_release(fs_info, dest, src->rsv.values[i], i);
Qu Wenruof59c0342017-12-12 15:34:24 +0800112}
113
Qu Wenruo9c542132015-03-12 16:10:13 +0800114static void btrfs_qgroup_update_old_refcnt(struct btrfs_qgroup *qg, u64 seq,
115 int mod)
116{
117 if (qg->old_refcnt < seq)
118 qg->old_refcnt = seq;
119 qg->old_refcnt += mod;
120}
121
122static void btrfs_qgroup_update_new_refcnt(struct btrfs_qgroup *qg, u64 seq,
123 int mod)
124{
125 if (qg->new_refcnt < seq)
126 qg->new_refcnt = seq;
127 qg->new_refcnt += mod;
128}
129
130static inline u64 btrfs_qgroup_get_old_refcnt(struct btrfs_qgroup *qg, u64 seq)
131{
132 if (qg->old_refcnt < seq)
133 return 0;
134 return qg->old_refcnt - seq;
135}
136
137static inline u64 btrfs_qgroup_get_new_refcnt(struct btrfs_qgroup *qg, u64 seq)
138{
139 if (qg->new_refcnt < seq)
140 return 0;
141 return qg->new_refcnt - seq;
142}
143
Arne Jansenbed92ea2012-06-28 18:03:02 +0200144/*
145 * glue structure to represent the relations between qgroups.
146 */
147struct btrfs_qgroup_list {
148 struct list_head next_group;
149 struct list_head next_member;
150 struct btrfs_qgroup *group;
151 struct btrfs_qgroup *member;
152};
153
David Sterbaef2fff62016-10-26 16:23:50 +0200154static inline u64 qgroup_to_aux(struct btrfs_qgroup *qg)
155{
156 return (u64)(uintptr_t)qg;
157}
158
159static inline struct btrfs_qgroup* unode_aux_to_qgroup(struct ulist_node *n)
160{
161 return (struct btrfs_qgroup *)(uintptr_t)n->aux;
162}
Josef Bacikfcebe452014-05-13 17:30:47 -0700163
Jan Schmidtb382a322013-05-28 15:47:24 +0000164static int
165qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
166 int init_flags);
167static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info);
Jan Schmidt2f232032013-04-25 16:04:51 +0000168
Wang Shilong58400fc2013-04-07 10:50:17 +0000169/* must be called with qgroup_ioctl_lock held */
Arne Jansenbed92ea2012-06-28 18:03:02 +0200170static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info,
171 u64 qgroupid)
172{
173 struct rb_node *n = fs_info->qgroup_tree.rb_node;
174 struct btrfs_qgroup *qgroup;
175
176 while (n) {
177 qgroup = rb_entry(n, struct btrfs_qgroup, node);
178 if (qgroup->qgroupid < qgroupid)
179 n = n->rb_left;
180 else if (qgroup->qgroupid > qgroupid)
181 n = n->rb_right;
182 else
183 return qgroup;
184 }
185 return NULL;
186}
187
188/* must be called with qgroup_lock held */
189static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info,
190 u64 qgroupid)
191{
192 struct rb_node **p = &fs_info->qgroup_tree.rb_node;
193 struct rb_node *parent = NULL;
194 struct btrfs_qgroup *qgroup;
195
196 while (*p) {
197 parent = *p;
198 qgroup = rb_entry(parent, struct btrfs_qgroup, node);
199
200 if (qgroup->qgroupid < qgroupid)
201 p = &(*p)->rb_left;
202 else if (qgroup->qgroupid > qgroupid)
203 p = &(*p)->rb_right;
204 else
205 return qgroup;
206 }
207
208 qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC);
209 if (!qgroup)
210 return ERR_PTR(-ENOMEM);
211
212 qgroup->qgroupid = qgroupid;
213 INIT_LIST_HEAD(&qgroup->groups);
214 INIT_LIST_HEAD(&qgroup->members);
215 INIT_LIST_HEAD(&qgroup->dirty);
216
217 rb_link_node(&qgroup->node, parent, p);
218 rb_insert_color(&qgroup->node, &fs_info->qgroup_tree);
219
220 return qgroup;
221}
222
Qu Wenruo49e5fb42020-06-28 13:07:15 +0800223static void __del_qgroup_rb(struct btrfs_fs_info *fs_info,
224 struct btrfs_qgroup *qgroup)
Arne Jansenbed92ea2012-06-28 18:03:02 +0200225{
Arne Jansenbed92ea2012-06-28 18:03:02 +0200226 struct btrfs_qgroup_list *list;
227
Qu Wenruo49e5fb42020-06-28 13:07:15 +0800228 btrfs_sysfs_del_one_qgroup(fs_info, qgroup);
Arne Jansenbed92ea2012-06-28 18:03:02 +0200229 list_del(&qgroup->dirty);
Arne Jansenbed92ea2012-06-28 18:03:02 +0200230 while (!list_empty(&qgroup->groups)) {
231 list = list_first_entry(&qgroup->groups,
232 struct btrfs_qgroup_list, next_group);
233 list_del(&list->next_group);
234 list_del(&list->next_member);
235 kfree(list);
236 }
237
238 while (!list_empty(&qgroup->members)) {
239 list = list_first_entry(&qgroup->members,
240 struct btrfs_qgroup_list, next_member);
241 list_del(&list->next_group);
242 list_del(&list->next_member);
243 kfree(list);
244 }
245 kfree(qgroup);
Wang Shilong4082bd32013-08-14 09:13:36 +0800246}
Arne Jansenbed92ea2012-06-28 18:03:02 +0200247
Wang Shilong4082bd32013-08-14 09:13:36 +0800248/* must be called with qgroup_lock held */
249static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
250{
251 struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid);
252
253 if (!qgroup)
254 return -ENOENT;
255
256 rb_erase(&qgroup->node, &fs_info->qgroup_tree);
Qu Wenruo49e5fb42020-06-28 13:07:15 +0800257 __del_qgroup_rb(fs_info, qgroup);
Arne Jansenbed92ea2012-06-28 18:03:02 +0200258 return 0;
259}
260
261/* must be called with qgroup_lock held */
262static int add_relation_rb(struct btrfs_fs_info *fs_info,
263 u64 memberid, u64 parentid)
264{
265 struct btrfs_qgroup *member;
266 struct btrfs_qgroup *parent;
267 struct btrfs_qgroup_list *list;
268
269 member = find_qgroup_rb(fs_info, memberid);
270 parent = find_qgroup_rb(fs_info, parentid);
271 if (!member || !parent)
272 return -ENOENT;
273
274 list = kzalloc(sizeof(*list), GFP_ATOMIC);
275 if (!list)
276 return -ENOMEM;
277
278 list->group = parent;
279 list->member = member;
280 list_add_tail(&list->next_group, &member->groups);
281 list_add_tail(&list->next_member, &parent->members);
282
283 return 0;
284}
285
286/* must be called with qgroup_lock held */
287static int del_relation_rb(struct btrfs_fs_info *fs_info,
288 u64 memberid, u64 parentid)
289{
290 struct btrfs_qgroup *member;
291 struct btrfs_qgroup *parent;
292 struct btrfs_qgroup_list *list;
293
294 member = find_qgroup_rb(fs_info, memberid);
295 parent = find_qgroup_rb(fs_info, parentid);
296 if (!member || !parent)
297 return -ENOENT;
298
299 list_for_each_entry(list, &member->groups, next_group) {
300 if (list->group == parent) {
301 list_del(&list->next_group);
302 list_del(&list->next_member);
303 kfree(list);
304 return 0;
305 }
306 }
307 return -ENOENT;
308}
309
Josef Bacikfaa2dbf2014-05-07 17:06:09 -0400310#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
311int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
312 u64 rfer, u64 excl)
313{
314 struct btrfs_qgroup *qgroup;
315
316 qgroup = find_qgroup_rb(fs_info, qgroupid);
317 if (!qgroup)
318 return -EINVAL;
319 if (qgroup->rfer != rfer || qgroup->excl != excl)
320 return -EINVAL;
321 return 0;
322}
323#endif
324
Arne Jansenbed92ea2012-06-28 18:03:02 +0200325/*
326 * The full config is read in one go, only called from open_ctree()
327 * It doesn't use any locking, as at this point we're still single-threaded
328 */
329int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
330{
331 struct btrfs_key key;
332 struct btrfs_key found_key;
333 struct btrfs_root *quota_root = fs_info->quota_root;
334 struct btrfs_path *path = NULL;
335 struct extent_buffer *l;
336 int slot;
337 int ret = 0;
338 u64 flags = 0;
Jan Schmidtb382a322013-05-28 15:47:24 +0000339 u64 rescan_progress = 0;
Arne Jansenbed92ea2012-06-28 18:03:02 +0200340
Josef Bacikafcdd122016-09-02 15:40:02 -0400341 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
Arne Jansenbed92ea2012-06-28 18:03:02 +0200342 return 0;
343
David Sterba323b88f2017-02-13 12:10:20 +0100344 fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL);
Wang Shilong1e8f9152013-05-06 11:03:27 +0000345 if (!fs_info->qgroup_ulist) {
346 ret = -ENOMEM;
347 goto out;
348 }
349
Arne Jansenbed92ea2012-06-28 18:03:02 +0200350 path = btrfs_alloc_path();
351 if (!path) {
352 ret = -ENOMEM;
353 goto out;
354 }
355
Qu Wenruo49e5fb42020-06-28 13:07:15 +0800356 ret = btrfs_sysfs_add_qgroups(fs_info);
357 if (ret < 0)
358 goto out;
Arne Jansenbed92ea2012-06-28 18:03:02 +0200359 /* default this to quota off, in case no status key is found */
360 fs_info->qgroup_flags = 0;
361
362 /*
363 * pass 1: read status, all qgroup infos and limits
364 */
365 key.objectid = 0;
366 key.type = 0;
367 key.offset = 0;
368 ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1);
369 if (ret)
370 goto out;
371
372 while (1) {
373 struct btrfs_qgroup *qgroup;
374
375 slot = path->slots[0];
376 l = path->nodes[0];
377 btrfs_item_key_to_cpu(l, &found_key, slot);
378
379 if (found_key.type == BTRFS_QGROUP_STATUS_KEY) {
380 struct btrfs_qgroup_status_item *ptr;
381
382 ptr = btrfs_item_ptr(l, slot,
383 struct btrfs_qgroup_status_item);
384
385 if (btrfs_qgroup_status_version(l, ptr) !=
386 BTRFS_QGROUP_STATUS_VERSION) {
Frank Holtonefe120a2013-12-20 11:37:06 -0500387 btrfs_err(fs_info,
388 "old qgroup version, quota disabled");
Arne Jansenbed92ea2012-06-28 18:03:02 +0200389 goto out;
390 }
391 if (btrfs_qgroup_status_generation(l, ptr) !=
392 fs_info->generation) {
393 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
Frank Holtonefe120a2013-12-20 11:37:06 -0500394 btrfs_err(fs_info,
Jeff Mahoney5d163e02016-09-20 10:05:00 -0400395 "qgroup generation mismatch, marked as inconsistent");
Arne Jansenbed92ea2012-06-28 18:03:02 +0200396 }
397 fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
398 ptr);
Jan Schmidtb382a322013-05-28 15:47:24 +0000399 rescan_progress = btrfs_qgroup_status_rescan(l, ptr);
Arne Jansenbed92ea2012-06-28 18:03:02 +0200400 goto next1;
401 }
402
403 if (found_key.type != BTRFS_QGROUP_INFO_KEY &&
404 found_key.type != BTRFS_QGROUP_LIMIT_KEY)
405 goto next1;
406
407 qgroup = find_qgroup_rb(fs_info, found_key.offset);
408 if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) ||
409 (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) {
Geert Uytterhoevend41e36a2015-07-06 15:38:11 +0200410 btrfs_err(fs_info, "inconsistent qgroup config");
Arne Jansenbed92ea2012-06-28 18:03:02 +0200411 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
412 }
413 if (!qgroup) {
414 qgroup = add_qgroup_rb(fs_info, found_key.offset);
415 if (IS_ERR(qgroup)) {
416 ret = PTR_ERR(qgroup);
417 goto out;
418 }
419 }
Qu Wenruo49e5fb42020-06-28 13:07:15 +0800420 ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup);
421 if (ret < 0)
422 goto out;
423
Arne Jansenbed92ea2012-06-28 18:03:02 +0200424 switch (found_key.type) {
425 case BTRFS_QGROUP_INFO_KEY: {
426 struct btrfs_qgroup_info_item *ptr;
427
428 ptr = btrfs_item_ptr(l, slot,
429 struct btrfs_qgroup_info_item);
430 qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr);
431 qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr);
432 qgroup->excl = btrfs_qgroup_info_excl(l, ptr);
433 qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr);
434 /* generation currently unused */
435 break;
436 }
437 case BTRFS_QGROUP_LIMIT_KEY: {
438 struct btrfs_qgroup_limit_item *ptr;
439
440 ptr = btrfs_item_ptr(l, slot,
441 struct btrfs_qgroup_limit_item);
442 qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr);
443 qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr);
444 qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr);
445 qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr);
446 qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr);
447 break;
448 }
449 }
450next1:
451 ret = btrfs_next_item(quota_root, path);
452 if (ret < 0)
453 goto out;
454 if (ret)
455 break;
456 }
457 btrfs_release_path(path);
458
459 /*
460 * pass 2: read all qgroup relations
461 */
462 key.objectid = 0;
463 key.type = BTRFS_QGROUP_RELATION_KEY;
464 key.offset = 0;
465 ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0);
466 if (ret)
467 goto out;
468 while (1) {
469 slot = path->slots[0];
470 l = path->nodes[0];
471 btrfs_item_key_to_cpu(l, &found_key, slot);
472
473 if (found_key.type != BTRFS_QGROUP_RELATION_KEY)
474 goto next2;
475
476 if (found_key.objectid > found_key.offset) {
477 /* parent <- member, not needed to build config */
478 /* FIXME should we omit the key completely? */
479 goto next2;
480 }
481
482 ret = add_relation_rb(fs_info, found_key.objectid,
483 found_key.offset);
Arne Jansenff248582013-01-17 01:22:08 -0700484 if (ret == -ENOENT) {
Frank Holtonefe120a2013-12-20 11:37:06 -0500485 btrfs_warn(fs_info,
486 "orphan qgroup relation 0x%llx->0x%llx",
Geert Uytterhoevenc1c9ff72013-08-20 13:20:07 +0200487 found_key.objectid, found_key.offset);
Arne Jansenff248582013-01-17 01:22:08 -0700488 ret = 0; /* ignore the error */
489 }
Arne Jansenbed92ea2012-06-28 18:03:02 +0200490 if (ret)
491 goto out;
492next2:
493 ret = btrfs_next_item(quota_root, path);
494 if (ret < 0)
495 goto out;
496 if (ret)
497 break;
498 }
499out:
Filipe Manana3d05cad2020-11-23 14:28:44 +0000500 btrfs_free_path(path);
Arne Jansenbed92ea2012-06-28 18:03:02 +0200501 fs_info->qgroup_flags |= flags;
Josef Bacikafcdd122016-09-02 15:40:02 -0400502 if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
503 clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
504 else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN &&
505 ret >= 0)
Jan Schmidtb382a322013-05-28 15:47:24 +0000506 ret = qgroup_rescan_init(fs_info, rescan_progress, 0);
Arne Jansenbed92ea2012-06-28 18:03:02 +0200507
Jan Schmidteb1716a2013-05-28 15:47:23 +0000508 if (ret < 0) {
Wang Shilong1e8f9152013-05-06 11:03:27 +0000509 ulist_free(fs_info->qgroup_ulist);
Jan Schmidteb1716a2013-05-28 15:47:23 +0000510 fs_info->qgroup_ulist = NULL;
Jan Schmidtb382a322013-05-28 15:47:24 +0000511 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
Qu Wenruo49e5fb42020-06-28 13:07:15 +0800512 btrfs_sysfs_del_qgroups(fs_info);
Jan Schmidteb1716a2013-05-28 15:47:23 +0000513 }
Wang Shilong1e8f9152013-05-06 11:03:27 +0000514
Arne Jansenbed92ea2012-06-28 18:03:02 +0200515 return ret < 0 ? ret : 0;
516}
517
Qu Wenruo59582532020-06-10 09:04:44 +0800518/*
519 * Called in close_ctree() when quota is still enabled. This verifies we don't
520 * leak some reserved space.
521 *
522 * Return false if no reserved space is left.
523 * Return true if some reserved space is leaked.
524 */
525bool btrfs_check_quota_leak(struct btrfs_fs_info *fs_info)
526{
527 struct rb_node *node;
528 bool ret = false;
529
530 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
531 return ret;
532 /*
533 * Since we're unmounting, there is no race and no need to grab qgroup
534 * lock. And here we don't go post-order to provide a more user
535 * friendly sorted result.
536 */
537 for (node = rb_first(&fs_info->qgroup_tree); node; node = rb_next(node)) {
538 struct btrfs_qgroup *qgroup;
539 int i;
540
541 qgroup = rb_entry(node, struct btrfs_qgroup, node);
542 for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++) {
543 if (qgroup->rsv.values[i]) {
544 ret = true;
545 btrfs_warn(fs_info,
Qu Wenruo06f67c42020-06-28 13:07:14 +0800546 "qgroup %hu/%llu has unreleased space, type %d rsv %llu",
Qu Wenruo59582532020-06-10 09:04:44 +0800547 btrfs_qgroup_level(qgroup->qgroupid),
548 btrfs_qgroup_subvolid(qgroup->qgroupid),
549 i, qgroup->rsv.values[i]);
550 }
551 }
552 }
553 return ret;
554}
555
Arne Jansenbed92ea2012-06-28 18:03:02 +0200556/*
Wang Shilonge685da12013-08-14 09:13:37 +0800557 * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(),
558 * first two are in single-threaded paths.And for the third one, we have set
559 * quota_root to be null with qgroup_lock held before, so it is safe to clean
560 * up the in-memory structures without qgroup_lock held.
Arne Jansenbed92ea2012-06-28 18:03:02 +0200561 */
562void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
563{
564 struct rb_node *n;
565 struct btrfs_qgroup *qgroup;
Arne Jansenbed92ea2012-06-28 18:03:02 +0200566
567 while ((n = rb_first(&fs_info->qgroup_tree))) {
568 qgroup = rb_entry(n, struct btrfs_qgroup, node);
569 rb_erase(n, &fs_info->qgroup_tree);
Qu Wenruo49e5fb42020-06-28 13:07:15 +0800570 __del_qgroup_rb(fs_info, qgroup);
Arne Jansenbed92ea2012-06-28 18:03:02 +0200571 }
Wang Shilong1e7bac12013-07-13 21:02:54 +0800572 /*
Andrea Gelmini52042d82018-11-28 12:05:13 +0100573 * We call btrfs_free_qgroup_config() when unmounting
Nicholas D Steeves01327612016-05-19 21:18:45 -0400574 * filesystem and disabling quota, so we set qgroup_ulist
Wang Shilong1e7bac12013-07-13 21:02:54 +0800575 * to be null here to avoid double free.
576 */
Wang Shilong1e8f9152013-05-06 11:03:27 +0000577 ulist_free(fs_info->qgroup_ulist);
Wang Shilong1e7bac12013-07-13 21:02:54 +0800578 fs_info->qgroup_ulist = NULL;
Qu Wenruo49e5fb42020-06-28 13:07:15 +0800579 btrfs_sysfs_del_qgroups(fs_info);
Arne Jansenbed92ea2012-06-28 18:03:02 +0200580}
581
Lu Fengqi711169c2018-07-18 14:45:24 +0800582static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, u64 src,
583 u64 dst)
Arne Jansenbed92ea2012-06-28 18:03:02 +0200584{
585 int ret;
Lu Fengqi711169c2018-07-18 14:45:24 +0800586 struct btrfs_root *quota_root = trans->fs_info->quota_root;
Arne Jansenbed92ea2012-06-28 18:03:02 +0200587 struct btrfs_path *path;
588 struct btrfs_key key;
589
590 path = btrfs_alloc_path();
591 if (!path)
592 return -ENOMEM;
593
594 key.objectid = src;
595 key.type = BTRFS_QGROUP_RELATION_KEY;
596 key.offset = dst;
597
598 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0);
599
600 btrfs_mark_buffer_dirty(path->nodes[0]);
601
602 btrfs_free_path(path);
603 return ret;
604}
605
Lu Fengqi99d7f092018-07-18 14:45:25 +0800606static int del_qgroup_relation_item(struct btrfs_trans_handle *trans, u64 src,
607 u64 dst)
Arne Jansenbed92ea2012-06-28 18:03:02 +0200608{
609 int ret;
Lu Fengqi99d7f092018-07-18 14:45:25 +0800610 struct btrfs_root *quota_root = trans->fs_info->quota_root;
Arne Jansenbed92ea2012-06-28 18:03:02 +0200611 struct btrfs_path *path;
612 struct btrfs_key key;
613
614 path = btrfs_alloc_path();
615 if (!path)
616 return -ENOMEM;
617
618 key.objectid = src;
619 key.type = BTRFS_QGROUP_RELATION_KEY;
620 key.offset = dst;
621
622 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
623 if (ret < 0)
624 goto out;
625
626 if (ret > 0) {
627 ret = -ENOENT;
628 goto out;
629 }
630
631 ret = btrfs_del_item(trans, quota_root, path);
632out:
633 btrfs_free_path(path);
634 return ret;
635}
636
637static int add_qgroup_item(struct btrfs_trans_handle *trans,
638 struct btrfs_root *quota_root, u64 qgroupid)
639{
640 int ret;
641 struct btrfs_path *path;
642 struct btrfs_qgroup_info_item *qgroup_info;
643 struct btrfs_qgroup_limit_item *qgroup_limit;
644 struct extent_buffer *leaf;
645 struct btrfs_key key;
646
Jeff Mahoneyf5ee5c92016-06-21 09:52:41 -0400647 if (btrfs_is_testing(quota_root->fs_info))
Josef Bacikfaa2dbf2014-05-07 17:06:09 -0400648 return 0;
David Sterbafccb84c2014-09-29 23:53:21 +0200649
Arne Jansenbed92ea2012-06-28 18:03:02 +0200650 path = btrfs_alloc_path();
651 if (!path)
652 return -ENOMEM;
653
654 key.objectid = 0;
655 key.type = BTRFS_QGROUP_INFO_KEY;
656 key.offset = qgroupid;
657
Mark Fasheh0b4699d2014-08-18 14:01:17 -0700658 /*
659 * Avoid a transaction abort by catching -EEXIST here. In that
660 * case, we proceed by re-initializing the existing structure
661 * on disk.
662 */
663
Arne Jansenbed92ea2012-06-28 18:03:02 +0200664 ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
665 sizeof(*qgroup_info));
Mark Fasheh0b4699d2014-08-18 14:01:17 -0700666 if (ret && ret != -EEXIST)
Arne Jansenbed92ea2012-06-28 18:03:02 +0200667 goto out;
668
669 leaf = path->nodes[0];
670 qgroup_info = btrfs_item_ptr(leaf, path->slots[0],
671 struct btrfs_qgroup_info_item);
672 btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid);
673 btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0);
674 btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0);
675 btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0);
676 btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0);
677
678 btrfs_mark_buffer_dirty(leaf);
679
680 btrfs_release_path(path);
681
682 key.type = BTRFS_QGROUP_LIMIT_KEY;
683 ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
684 sizeof(*qgroup_limit));
Mark Fasheh0b4699d2014-08-18 14:01:17 -0700685 if (ret && ret != -EEXIST)
Arne Jansenbed92ea2012-06-28 18:03:02 +0200686 goto out;
687
688 leaf = path->nodes[0];
689 qgroup_limit = btrfs_item_ptr(leaf, path->slots[0],
690 struct btrfs_qgroup_limit_item);
691 btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0);
692 btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0);
693 btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0);
694 btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0);
695 btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0);
696
697 btrfs_mark_buffer_dirty(leaf);
698
699 ret = 0;
700out:
701 btrfs_free_path(path);
702 return ret;
703}
704
Lu Fengqi69104612018-07-18 14:45:26 +0800705static int del_qgroup_item(struct btrfs_trans_handle *trans, u64 qgroupid)
Arne Jansenbed92ea2012-06-28 18:03:02 +0200706{
707 int ret;
Lu Fengqi69104612018-07-18 14:45:26 +0800708 struct btrfs_root *quota_root = trans->fs_info->quota_root;
Arne Jansenbed92ea2012-06-28 18:03:02 +0200709 struct btrfs_path *path;
710 struct btrfs_key key;
711
712 path = btrfs_alloc_path();
713 if (!path)
714 return -ENOMEM;
715
716 key.objectid = 0;
717 key.type = BTRFS_QGROUP_INFO_KEY;
718 key.offset = qgroupid;
719 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
720 if (ret < 0)
721 goto out;
722
723 if (ret > 0) {
724 ret = -ENOENT;
725 goto out;
726 }
727
728 ret = btrfs_del_item(trans, quota_root, path);
729 if (ret)
730 goto out;
731
732 btrfs_release_path(path);
733
734 key.type = BTRFS_QGROUP_LIMIT_KEY;
735 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
736 if (ret < 0)
737 goto out;
738
739 if (ret > 0) {
740 ret = -ENOENT;
741 goto out;
742 }
743
744 ret = btrfs_del_item(trans, quota_root, path);
745
746out:
747 btrfs_free_path(path);
748 return ret;
749}
750
751static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
Dongsheng Yang1510e712014-11-20 21:01:41 -0500752 struct btrfs_qgroup *qgroup)
Arne Jansenbed92ea2012-06-28 18:03:02 +0200753{
Lu Fengqiac8a8662018-07-18 14:45:27 +0800754 struct btrfs_root *quota_root = trans->fs_info->quota_root;
Arne Jansenbed92ea2012-06-28 18:03:02 +0200755 struct btrfs_path *path;
756 struct btrfs_key key;
757 struct extent_buffer *l;
758 struct btrfs_qgroup_limit_item *qgroup_limit;
759 int ret;
760 int slot;
761
762 key.objectid = 0;
763 key.type = BTRFS_QGROUP_LIMIT_KEY;
Dongsheng Yang1510e712014-11-20 21:01:41 -0500764 key.offset = qgroup->qgroupid;
Arne Jansenbed92ea2012-06-28 18:03:02 +0200765
766 path = btrfs_alloc_path();
Wang Shilong84cbe2f2013-02-27 11:20:56 +0000767 if (!path)
768 return -ENOMEM;
769
Lu Fengqiac8a8662018-07-18 14:45:27 +0800770 ret = btrfs_search_slot(trans, quota_root, &key, path, 0, 1);
Arne Jansenbed92ea2012-06-28 18:03:02 +0200771 if (ret > 0)
772 ret = -ENOENT;
773
774 if (ret)
775 goto out;
776
777 l = path->nodes[0];
778 slot = path->slots[0];
Valentina Giustia3df41e2013-11-04 22:34:29 +0100779 qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item);
Dongsheng Yang1510e712014-11-20 21:01:41 -0500780 btrfs_set_qgroup_limit_flags(l, qgroup_limit, qgroup->lim_flags);
781 btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, qgroup->max_rfer);
782 btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, qgroup->max_excl);
783 btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, qgroup->rsv_rfer);
784 btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, qgroup->rsv_excl);
Arne Jansenbed92ea2012-06-28 18:03:02 +0200785
786 btrfs_mark_buffer_dirty(l);
787
788out:
789 btrfs_free_path(path);
790 return ret;
791}
792
793static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
Arne Jansenbed92ea2012-06-28 18:03:02 +0200794 struct btrfs_qgroup *qgroup)
795{
Lu Fengqi3e07e9a2018-07-18 14:45:28 +0800796 struct btrfs_fs_info *fs_info = trans->fs_info;
797 struct btrfs_root *quota_root = fs_info->quota_root;
Arne Jansenbed92ea2012-06-28 18:03:02 +0200798 struct btrfs_path *path;
799 struct btrfs_key key;
800 struct extent_buffer *l;
801 struct btrfs_qgroup_info_item *qgroup_info;
802 int ret;
803 int slot;
804
Lu Fengqi3e07e9a2018-07-18 14:45:28 +0800805 if (btrfs_is_testing(fs_info))
Josef Bacikfaa2dbf2014-05-07 17:06:09 -0400806 return 0;
David Sterbafccb84c2014-09-29 23:53:21 +0200807
Arne Jansenbed92ea2012-06-28 18:03:02 +0200808 key.objectid = 0;
809 key.type = BTRFS_QGROUP_INFO_KEY;
810 key.offset = qgroup->qgroupid;
811
812 path = btrfs_alloc_path();
Wang Shilong84cbe2f2013-02-27 11:20:56 +0000813 if (!path)
814 return -ENOMEM;
815
Lu Fengqi3e07e9a2018-07-18 14:45:28 +0800816 ret = btrfs_search_slot(trans, quota_root, &key, path, 0, 1);
Arne Jansenbed92ea2012-06-28 18:03:02 +0200817 if (ret > 0)
818 ret = -ENOENT;
819
820 if (ret)
821 goto out;
822
823 l = path->nodes[0];
824 slot = path->slots[0];
Valentina Giustia3df41e2013-11-04 22:34:29 +0100825 qgroup_info = btrfs_item_ptr(l, slot, struct btrfs_qgroup_info_item);
Arne Jansenbed92ea2012-06-28 18:03:02 +0200826 btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid);
827 btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer);
828 btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr);
829 btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl);
830 btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr);
831
832 btrfs_mark_buffer_dirty(l);
833
834out:
835 btrfs_free_path(path);
836 return ret;
837}
838
Lu Fengqi2e980ac2018-07-18 14:45:29 +0800839static int update_qgroup_status_item(struct btrfs_trans_handle *trans)
Arne Jansenbed92ea2012-06-28 18:03:02 +0200840{
Lu Fengqi2e980ac2018-07-18 14:45:29 +0800841 struct btrfs_fs_info *fs_info = trans->fs_info;
842 struct btrfs_root *quota_root = fs_info->quota_root;
Arne Jansenbed92ea2012-06-28 18:03:02 +0200843 struct btrfs_path *path;
844 struct btrfs_key key;
845 struct extent_buffer *l;
846 struct btrfs_qgroup_status_item *ptr;
847 int ret;
848 int slot;
849
850 key.objectid = 0;
851 key.type = BTRFS_QGROUP_STATUS_KEY;
852 key.offset = 0;
853
854 path = btrfs_alloc_path();
Wang Shilong84cbe2f2013-02-27 11:20:56 +0000855 if (!path)
856 return -ENOMEM;
857
Lu Fengqi2e980ac2018-07-18 14:45:29 +0800858 ret = btrfs_search_slot(trans, quota_root, &key, path, 0, 1);
Arne Jansenbed92ea2012-06-28 18:03:02 +0200859 if (ret > 0)
860 ret = -ENOENT;
861
862 if (ret)
863 goto out;
864
865 l = path->nodes[0];
866 slot = path->slots[0];
867 ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
868 btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags);
869 btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
Jan Schmidt2f232032013-04-25 16:04:51 +0000870 btrfs_set_qgroup_status_rescan(l, ptr,
871 fs_info->qgroup_rescan_progress.objectid);
Arne Jansenbed92ea2012-06-28 18:03:02 +0200872
873 btrfs_mark_buffer_dirty(l);
874
875out:
876 btrfs_free_path(path);
877 return ret;
878}
879
880/*
881 * called with qgroup_lock held
882 */
883static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans,
884 struct btrfs_root *root)
885{
886 struct btrfs_path *path;
887 struct btrfs_key key;
Wang Shilong06b3a862013-02-27 11:16:57 +0000888 struct extent_buffer *leaf = NULL;
Arne Jansenbed92ea2012-06-28 18:03:02 +0200889 int ret;
Wang Shilong06b3a862013-02-27 11:16:57 +0000890 int nr = 0;
Arne Jansenbed92ea2012-06-28 18:03:02 +0200891
Arne Jansenbed92ea2012-06-28 18:03:02 +0200892 path = btrfs_alloc_path();
893 if (!path)
894 return -ENOMEM;
895
Wang Shilong06b3a862013-02-27 11:16:57 +0000896 path->leave_spinning = 1;
897
898 key.objectid = 0;
899 key.offset = 0;
900 key.type = 0;
901
Arne Jansenbed92ea2012-06-28 18:03:02 +0200902 while (1) {
Arne Jansenbed92ea2012-06-28 18:03:02 +0200903 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
Wang Shilong06b3a862013-02-27 11:16:57 +0000904 if (ret < 0)
905 goto out;
906 leaf = path->nodes[0];
907 nr = btrfs_header_nritems(leaf);
908 if (!nr)
Arne Jansenbed92ea2012-06-28 18:03:02 +0200909 break;
Wang Shilong06b3a862013-02-27 11:16:57 +0000910 /*
911 * delete the leaf one by one
912 * since the whole tree is going
913 * to be deleted.
914 */
915 path->slots[0] = 0;
916 ret = btrfs_del_items(trans, root, path, 0, nr);
Arne Jansenbed92ea2012-06-28 18:03:02 +0200917 if (ret)
918 goto out;
Wang Shilong06b3a862013-02-27 11:16:57 +0000919
Arne Jansenbed92ea2012-06-28 18:03:02 +0200920 btrfs_release_path(path);
921 }
922 ret = 0;
923out:
Arne Jansenbed92ea2012-06-28 18:03:02 +0200924 btrfs_free_path(path);
925 return ret;
926}
927
Nikolay Borisov340f1aa2018-07-05 14:50:48 +0300928int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
Arne Jansenbed92ea2012-06-28 18:03:02 +0200929{
930 struct btrfs_root *quota_root;
Wang Shilong7708f022013-04-07 10:24:57 +0000931 struct btrfs_root *tree_root = fs_info->tree_root;
Arne Jansenbed92ea2012-06-28 18:03:02 +0200932 struct btrfs_path *path = NULL;
933 struct btrfs_qgroup_status_item *ptr;
934 struct extent_buffer *leaf;
935 struct btrfs_key key;
Wang Shilong7708f022013-04-07 10:24:57 +0000936 struct btrfs_key found_key;
937 struct btrfs_qgroup *qgroup = NULL;
Nikolay Borisov340f1aa2018-07-05 14:50:48 +0300938 struct btrfs_trans_handle *trans = NULL;
Arne Jansenbed92ea2012-06-28 18:03:02 +0200939 int ret = 0;
Wang Shilong7708f022013-04-07 10:24:57 +0000940 int slot;
Arne Jansenbed92ea2012-06-28 18:03:02 +0200941
Wang Shilongf2f6ed32013-04-07 10:50:16 +0000942 mutex_lock(&fs_info->qgroup_ioctl_lock);
Nikolay Borisov5d235152018-01-31 10:52:04 +0200943 if (fs_info->quota_root)
Arne Jansenbed92ea2012-06-28 18:03:02 +0200944 goto out;
Arne Jansenbed92ea2012-06-28 18:03:02 +0200945
David Sterba7503b832018-12-19 19:47:37 +0100946 fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL);
947 if (!fs_info->qgroup_ulist) {
948 ret = -ENOMEM;
949 goto out;
950 }
951
Qu Wenruo49e5fb42020-06-28 13:07:15 +0800952 ret = btrfs_sysfs_add_qgroups(fs_info);
953 if (ret < 0)
954 goto out;
Nikolay Borisov340f1aa2018-07-05 14:50:48 +0300955 /*
956 * 1 for quota root item
957 * 1 for BTRFS_QGROUP_STATUS item
958 *
959 * Yet we also need 2*n items for a QGROUP_INFO/QGROUP_LIMIT items
960 * per subvolume. However those are not currently reserved since it
961 * would be a lot of overkill.
962 */
963 trans = btrfs_start_transaction(tree_root, 2);
964 if (IS_ERR(trans)) {
965 ret = PTR_ERR(trans);
966 trans = NULL;
967 goto out;
968 }
969
Arne Jansenbed92ea2012-06-28 18:03:02 +0200970 /*
971 * initially create the quota tree
972 */
David Sterba9b7a2442019-03-20 13:20:49 +0100973 quota_root = btrfs_create_tree(trans, BTRFS_QUOTA_TREE_OBJECTID);
Arne Jansenbed92ea2012-06-28 18:03:02 +0200974 if (IS_ERR(quota_root)) {
975 ret = PTR_ERR(quota_root);
Nikolay Borisov340f1aa2018-07-05 14:50:48 +0300976 btrfs_abort_transaction(trans, ret);
Arne Jansenbed92ea2012-06-28 18:03:02 +0200977 goto out;
978 }
979
980 path = btrfs_alloc_path();
Tsutomu Itoh5b7ff5b2012-10-16 05:44:21 +0000981 if (!path) {
982 ret = -ENOMEM;
Nikolay Borisov340f1aa2018-07-05 14:50:48 +0300983 btrfs_abort_transaction(trans, ret);
Tsutomu Itoh5b7ff5b2012-10-16 05:44:21 +0000984 goto out_free_root;
985 }
Arne Jansenbed92ea2012-06-28 18:03:02 +0200986
987 key.objectid = 0;
988 key.type = BTRFS_QGROUP_STATUS_KEY;
989 key.offset = 0;
990
991 ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
992 sizeof(*ptr));
Nikolay Borisov340f1aa2018-07-05 14:50:48 +0300993 if (ret) {
994 btrfs_abort_transaction(trans, ret);
Tsutomu Itoh5b7ff5b2012-10-16 05:44:21 +0000995 goto out_free_path;
Nikolay Borisov340f1aa2018-07-05 14:50:48 +0300996 }
Arne Jansenbed92ea2012-06-28 18:03:02 +0200997
998 leaf = path->nodes[0];
999 ptr = btrfs_item_ptr(leaf, path->slots[0],
1000 struct btrfs_qgroup_status_item);
1001 btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid);
1002 btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION);
1003 fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
1004 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1005 btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
Jan Schmidt2f232032013-04-25 16:04:51 +00001006 btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
Arne Jansenbed92ea2012-06-28 18:03:02 +02001007
1008 btrfs_mark_buffer_dirty(leaf);
1009
Wang Shilong7708f022013-04-07 10:24:57 +00001010 key.objectid = 0;
1011 key.type = BTRFS_ROOT_REF_KEY;
1012 key.offset = 0;
1013
1014 btrfs_release_path(path);
1015 ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0);
1016 if (ret > 0)
1017 goto out_add_root;
Nikolay Borisov340f1aa2018-07-05 14:50:48 +03001018 if (ret < 0) {
1019 btrfs_abort_transaction(trans, ret);
Wang Shilong7708f022013-04-07 10:24:57 +00001020 goto out_free_path;
Nikolay Borisov340f1aa2018-07-05 14:50:48 +03001021 }
Wang Shilong7708f022013-04-07 10:24:57 +00001022
1023 while (1) {
1024 slot = path->slots[0];
1025 leaf = path->nodes[0];
1026 btrfs_item_key_to_cpu(leaf, &found_key, slot);
1027
1028 if (found_key.type == BTRFS_ROOT_REF_KEY) {
Josef Bacik5223cc62020-10-19 16:02:29 -04001029
1030 /* Release locks on tree_root before we access quota_root */
1031 btrfs_release_path(path);
1032
Wang Shilong7708f022013-04-07 10:24:57 +00001033 ret = add_qgroup_item(trans, quota_root,
1034 found_key.offset);
Nikolay Borisov340f1aa2018-07-05 14:50:48 +03001035 if (ret) {
1036 btrfs_abort_transaction(trans, ret);
Wang Shilong7708f022013-04-07 10:24:57 +00001037 goto out_free_path;
Nikolay Borisov340f1aa2018-07-05 14:50:48 +03001038 }
Wang Shilong7708f022013-04-07 10:24:57 +00001039
Wang Shilong7708f022013-04-07 10:24:57 +00001040 qgroup = add_qgroup_rb(fs_info, found_key.offset);
1041 if (IS_ERR(qgroup)) {
Wang Shilong7708f022013-04-07 10:24:57 +00001042 ret = PTR_ERR(qgroup);
Nikolay Borisov340f1aa2018-07-05 14:50:48 +03001043 btrfs_abort_transaction(trans, ret);
Wang Shilong7708f022013-04-07 10:24:57 +00001044 goto out_free_path;
1045 }
Qu Wenruo49e5fb42020-06-28 13:07:15 +08001046 ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup);
1047 if (ret < 0) {
1048 btrfs_abort_transaction(trans, ret);
1049 goto out_free_path;
1050 }
Josef Bacik5223cc62020-10-19 16:02:29 -04001051 ret = btrfs_search_slot_for_read(tree_root, &found_key,
1052 path, 1, 0);
1053 if (ret < 0) {
1054 btrfs_abort_transaction(trans, ret);
1055 goto out_free_path;
1056 }
1057 if (ret > 0) {
1058 /*
1059 * Shouldn't happen, but in case it does we
1060 * don't need to do the btrfs_next_item, just
1061 * continue.
1062 */
1063 continue;
1064 }
Wang Shilong7708f022013-04-07 10:24:57 +00001065 }
1066 ret = btrfs_next_item(tree_root, path);
Nikolay Borisov340f1aa2018-07-05 14:50:48 +03001067 if (ret < 0) {
1068 btrfs_abort_transaction(trans, ret);
Wang Shilong7708f022013-04-07 10:24:57 +00001069 goto out_free_path;
Nikolay Borisov340f1aa2018-07-05 14:50:48 +03001070 }
Wang Shilong7708f022013-04-07 10:24:57 +00001071 if (ret)
1072 break;
1073 }
1074
1075out_add_root:
1076 btrfs_release_path(path);
1077 ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID);
Nikolay Borisov340f1aa2018-07-05 14:50:48 +03001078 if (ret) {
1079 btrfs_abort_transaction(trans, ret);
Wang Shilong7708f022013-04-07 10:24:57 +00001080 goto out_free_path;
Nikolay Borisov340f1aa2018-07-05 14:50:48 +03001081 }
Wang Shilong7708f022013-04-07 10:24:57 +00001082
Wang Shilong7708f022013-04-07 10:24:57 +00001083 qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID);
1084 if (IS_ERR(qgroup)) {
Wang Shilong7708f022013-04-07 10:24:57 +00001085 ret = PTR_ERR(qgroup);
Nikolay Borisov340f1aa2018-07-05 14:50:48 +03001086 btrfs_abort_transaction(trans, ret);
Wang Shilong7708f022013-04-07 10:24:57 +00001087 goto out_free_path;
1088 }
Qu Wenruo49e5fb42020-06-28 13:07:15 +08001089 ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup);
1090 if (ret < 0) {
1091 btrfs_abort_transaction(trans, ret);
1092 goto out_free_path;
1093 }
Nikolay Borisov340f1aa2018-07-05 14:50:48 +03001094
1095 ret = btrfs_commit_transaction(trans);
Dan Carpenterb9b8a412018-08-20 11:25:33 +03001096 trans = NULL;
1097 if (ret)
Nikolay Borisov340f1aa2018-07-05 14:50:48 +03001098 goto out_free_path;
Nikolay Borisov340f1aa2018-07-05 14:50:48 +03001099
Filipe Manana9a6f2092018-11-19 14:15:36 +00001100 /*
1101 * Set quota enabled flag after committing the transaction, to avoid
1102 * deadlocks on fs_info->qgroup_ioctl_lock with concurrent snapshot
1103 * creation.
1104 */
1105 spin_lock(&fs_info->qgroup_lock);
1106 fs_info->quota_root = quota_root;
1107 set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
1108 spin_unlock(&fs_info->qgroup_lock);
1109
Nikolay Borisov5d235152018-01-31 10:52:04 +02001110 ret = qgroup_rescan_init(fs_info, 0, 1);
1111 if (!ret) {
1112 qgroup_rescan_zero_tracking(fs_info);
Qu Wenruod61acbb2020-02-07 13:38:20 +08001113 fs_info->qgroup_rescan_running = true;
Nikolay Borisov5d235152018-01-31 10:52:04 +02001114 btrfs_queue_work(fs_info->qgroup_rescan_workers,
1115 &fs_info->qgroup_rescan_work);
1116 }
1117
Tsutomu Itoh5b7ff5b2012-10-16 05:44:21 +00001118out_free_path:
Arne Jansenbed92ea2012-06-28 18:03:02 +02001119 btrfs_free_path(path);
Tsutomu Itoh5b7ff5b2012-10-16 05:44:21 +00001120out_free_root:
Josef Bacik8c389382020-02-14 16:11:42 -05001121 if (ret)
Josef Bacik00246522020-01-24 09:33:01 -05001122 btrfs_put_root(quota_root);
Tsutomu Itoh5b7ff5b2012-10-16 05:44:21 +00001123out:
Jan Schmidteb1716a2013-05-28 15:47:23 +00001124 if (ret) {
Wang Shilong1e8f9152013-05-06 11:03:27 +00001125 ulist_free(fs_info->qgroup_ulist);
Jan Schmidteb1716a2013-05-28 15:47:23 +00001126 fs_info->qgroup_ulist = NULL;
Nikolay Borisov340f1aa2018-07-05 14:50:48 +03001127 if (trans)
1128 btrfs_end_transaction(trans);
Qu Wenruo49e5fb42020-06-28 13:07:15 +08001129 btrfs_sysfs_del_qgroups(fs_info);
Jan Schmidteb1716a2013-05-28 15:47:23 +00001130 }
Wang Shilongf2f6ed32013-04-07 10:50:16 +00001131 mutex_unlock(&fs_info->qgroup_ioctl_lock);
Arne Jansenbed92ea2012-06-28 18:03:02 +02001132 return ret;
1133}
1134
Nikolay Borisov340f1aa2018-07-05 14:50:48 +03001135int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
Arne Jansenbed92ea2012-06-28 18:03:02 +02001136{
Arne Jansenbed92ea2012-06-28 18:03:02 +02001137 struct btrfs_root *quota_root;
Nikolay Borisov340f1aa2018-07-05 14:50:48 +03001138 struct btrfs_trans_handle *trans = NULL;
Arne Jansenbed92ea2012-06-28 18:03:02 +02001139 int ret = 0;
1140
Wang Shilongf2f6ed32013-04-07 10:50:16 +00001141 mutex_lock(&fs_info->qgroup_ioctl_lock);
Wang Shilong58400fc2013-04-07 10:50:17 +00001142 if (!fs_info->quota_root)
Wang Shilongf2f6ed32013-04-07 10:50:16 +00001143 goto out;
Nikolay Borisov340f1aa2018-07-05 14:50:48 +03001144
1145 /*
1146 * 1 For the root item
1147 *
1148 * We should also reserve enough items for the quota tree deletion in
1149 * btrfs_clean_quota_tree but this is not done.
1150 */
1151 trans = btrfs_start_transaction(fs_info->tree_root, 1);
1152 if (IS_ERR(trans)) {
1153 ret = PTR_ERR(trans);
1154 goto out;
1155 }
1156
Josef Bacikafcdd122016-09-02 15:40:02 -04001157 clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
Jeff Mahoneyd06f23d2016-08-08 22:08:06 -04001158 btrfs_qgroup_wait_for_completion(fs_info, false);
Justin Maggard967ef512015-11-06 10:36:42 -08001159 spin_lock(&fs_info->qgroup_lock);
Arne Jansenbed92ea2012-06-28 18:03:02 +02001160 quota_root = fs_info->quota_root;
1161 fs_info->quota_root = NULL;
Dongsheng Yang8ea0ec92015-02-27 16:24:26 +08001162 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
Arne Jansenbed92ea2012-06-28 18:03:02 +02001163 spin_unlock(&fs_info->qgroup_lock);
1164
Wang Shilonge685da12013-08-14 09:13:37 +08001165 btrfs_free_qgroup_config(fs_info);
1166
Arne Jansenbed92ea2012-06-28 18:03:02 +02001167 ret = btrfs_clean_quota_tree(trans, quota_root);
Nikolay Borisov340f1aa2018-07-05 14:50:48 +03001168 if (ret) {
1169 btrfs_abort_transaction(trans, ret);
1170 goto end_trans;
1171 }
Arne Jansenbed92ea2012-06-28 18:03:02 +02001172
Lu Fengqiab9ce7d2018-08-01 11:32:27 +08001173 ret = btrfs_del_root(trans, &quota_root->root_key);
Nikolay Borisov340f1aa2018-07-05 14:50:48 +03001174 if (ret) {
1175 btrfs_abort_transaction(trans, ret);
1176 goto end_trans;
1177 }
Arne Jansenbed92ea2012-06-28 18:03:02 +02001178
1179 list_del(&quota_root->dirty_list);
1180
1181 btrfs_tree_lock(quota_root->node);
David Sterba6a884d7d2019-03-20 14:30:02 +01001182 btrfs_clean_tree_block(quota_root->node);
Arne Jansenbed92ea2012-06-28 18:03:02 +02001183 btrfs_tree_unlock(quota_root->node);
1184 btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1);
1185
Josef Bacik00246522020-01-24 09:33:01 -05001186 btrfs_put_root(quota_root);
Nikolay Borisov340f1aa2018-07-05 14:50:48 +03001187
1188end_trans:
1189 ret = btrfs_end_transaction(trans);
Arne Jansenbed92ea2012-06-28 18:03:02 +02001190out:
Wang Shilongf2f6ed32013-04-07 10:50:16 +00001191 mutex_unlock(&fs_info->qgroup_ioctl_lock);
Arne Jansenbed92ea2012-06-28 18:03:02 +02001192 return ret;
1193}
1194
Jan Schmidt2f232032013-04-25 16:04:51 +00001195static void qgroup_dirty(struct btrfs_fs_info *fs_info,
1196 struct btrfs_qgroup *qgroup)
Arne Jansenbed92ea2012-06-28 18:03:02 +02001197{
Jan Schmidt2f232032013-04-25 16:04:51 +00001198 if (list_empty(&qgroup->dirty))
1199 list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
Arne Jansenbed92ea2012-06-28 18:03:02 +02001200}
1201
Qu Wenruo9c8b35b2015-02-27 16:24:27 +08001202/*
Qu Wenruo429d6272017-12-12 15:34:26 +08001203 * The easy accounting, we're updating qgroup relationship whose child qgroup
1204 * only has exclusive extents.
1205 *
Andrea Gelmini52042d82018-11-28 12:05:13 +01001206 * In this case, all exclusive extents will also be exclusive for parent, so
Qu Wenruo429d6272017-12-12 15:34:26 +08001207 * excl/rfer just get added/removed.
1208 *
1209 * So is qgroup reservation space, which should also be added/removed to
1210 * parent.
1211 * Or when child tries to release reservation space, parent will underflow its
1212 * reservation (for relationship adding case).
Qu Wenruo9c8b35b2015-02-27 16:24:27 +08001213 *
1214 * Caller should hold fs_info->qgroup_lock.
1215 */
1216static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
1217 struct ulist *tmp, u64 ref_root,
Qu Wenruo429d6272017-12-12 15:34:26 +08001218 struct btrfs_qgroup *src, int sign)
Qu Wenruo9c8b35b2015-02-27 16:24:27 +08001219{
1220 struct btrfs_qgroup *qgroup;
1221 struct btrfs_qgroup_list *glist;
1222 struct ulist_node *unode;
1223 struct ulist_iterator uiter;
Qu Wenruo429d6272017-12-12 15:34:26 +08001224 u64 num_bytes = src->excl;
Qu Wenruo9c8b35b2015-02-27 16:24:27 +08001225 int ret = 0;
1226
1227 qgroup = find_qgroup_rb(fs_info, ref_root);
1228 if (!qgroup)
1229 goto out;
1230
1231 qgroup->rfer += sign * num_bytes;
1232 qgroup->rfer_cmpr += sign * num_bytes;
1233
1234 WARN_ON(sign < 0 && qgroup->excl < num_bytes);
1235 qgroup->excl += sign * num_bytes;
1236 qgroup->excl_cmpr += sign * num_bytes;
Qu Wenruo429d6272017-12-12 15:34:26 +08001237
1238 if (sign > 0)
Qu Wenruo64ee4e72017-12-12 15:34:27 +08001239 qgroup_rsv_add_by_qgroup(fs_info, qgroup, src);
Qu Wenruo429d6272017-12-12 15:34:26 +08001240 else
Qu Wenruo64ee4e72017-12-12 15:34:27 +08001241 qgroup_rsv_release_by_qgroup(fs_info, qgroup, src);
Qu Wenruo9c8b35b2015-02-27 16:24:27 +08001242
1243 qgroup_dirty(fs_info, qgroup);
1244
1245 /* Get all of the parent groups that contain this qgroup */
1246 list_for_each_entry(glist, &qgroup->groups, next_group) {
1247 ret = ulist_add(tmp, glist->group->qgroupid,
David Sterbaef2fff62016-10-26 16:23:50 +02001248 qgroup_to_aux(glist->group), GFP_ATOMIC);
Qu Wenruo9c8b35b2015-02-27 16:24:27 +08001249 if (ret < 0)
1250 goto out;
1251 }
1252
1253 /* Iterate all of the parents and adjust their reference counts */
1254 ULIST_ITER_INIT(&uiter);
1255 while ((unode = ulist_next(tmp, &uiter))) {
David Sterbaef2fff62016-10-26 16:23:50 +02001256 qgroup = unode_aux_to_qgroup(unode);
Qu Wenruo9c8b35b2015-02-27 16:24:27 +08001257 qgroup->rfer += sign * num_bytes;
1258 qgroup->rfer_cmpr += sign * num_bytes;
1259 WARN_ON(sign < 0 && qgroup->excl < num_bytes);
1260 qgroup->excl += sign * num_bytes;
Qu Wenruo429d6272017-12-12 15:34:26 +08001261 if (sign > 0)
Qu Wenruo64ee4e72017-12-12 15:34:27 +08001262 qgroup_rsv_add_by_qgroup(fs_info, qgroup, src);
Qu Wenruo429d6272017-12-12 15:34:26 +08001263 else
Qu Wenruo64ee4e72017-12-12 15:34:27 +08001264 qgroup_rsv_release_by_qgroup(fs_info, qgroup, src);
Qu Wenruo9c8b35b2015-02-27 16:24:27 +08001265 qgroup->excl_cmpr += sign * num_bytes;
1266 qgroup_dirty(fs_info, qgroup);
1267
1268 /* Add any parents of the parents */
1269 list_for_each_entry(glist, &qgroup->groups, next_group) {
1270 ret = ulist_add(tmp, glist->group->qgroupid,
David Sterbaef2fff62016-10-26 16:23:50 +02001271 qgroup_to_aux(glist->group), GFP_ATOMIC);
Qu Wenruo9c8b35b2015-02-27 16:24:27 +08001272 if (ret < 0)
1273 goto out;
1274 }
1275 }
1276 ret = 0;
1277out:
1278 return ret;
1279}
1280
1281
1282/*
1283 * Quick path for updating qgroup with only excl refs.
1284 *
1285 * In that case, just update all parent will be enough.
1286 * Or we needs to do a full rescan.
1287 * Caller should also hold fs_info->qgroup_lock.
1288 *
1289 * Return 0 for quick update, return >0 for need to full rescan
1290 * and mark INCONSISTENT flag.
1291 * Return < 0 for other error.
1292 */
1293static int quick_update_accounting(struct btrfs_fs_info *fs_info,
1294 struct ulist *tmp, u64 src, u64 dst,
1295 int sign)
1296{
1297 struct btrfs_qgroup *qgroup;
1298 int ret = 1;
1299 int err = 0;
1300
1301 qgroup = find_qgroup_rb(fs_info, src);
1302 if (!qgroup)
1303 goto out;
1304 if (qgroup->excl == qgroup->rfer) {
1305 ret = 0;
1306 err = __qgroup_excl_accounting(fs_info, tmp, dst,
Qu Wenruo429d6272017-12-12 15:34:26 +08001307 qgroup, sign);
Qu Wenruo9c8b35b2015-02-27 16:24:27 +08001308 if (err < 0) {
1309 ret = err;
1310 goto out;
1311 }
1312 }
1313out:
1314 if (ret)
1315 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1316 return ret;
1317}
1318
Lu Fengqi9f8a6ce2018-07-18 14:45:30 +08001319int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
1320 u64 dst)
Arne Jansenbed92ea2012-06-28 18:03:02 +02001321{
Lu Fengqi9f8a6ce2018-07-18 14:45:30 +08001322 struct btrfs_fs_info *fs_info = trans->fs_info;
Wang Shilongb7fef4f2013-04-07 10:50:18 +00001323 struct btrfs_qgroup *parent;
1324 struct btrfs_qgroup *member;
Wang Shilong534e6622013-04-17 14:49:51 +00001325 struct btrfs_qgroup_list *list;
Qu Wenruo9c8b35b2015-02-27 16:24:27 +08001326 struct ulist *tmp;
Arne Jansenbed92ea2012-06-28 18:03:02 +02001327 int ret = 0;
1328
Qu Wenruo8465ece2015-02-27 16:24:22 +08001329 /* Check the level of src and dst first */
1330 if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst))
1331 return -EINVAL;
1332
David Sterba6602caf2017-02-13 12:41:02 +01001333 tmp = ulist_alloc(GFP_KERNEL);
Christian Engelmayerab3680d2015-05-02 17:19:55 +02001334 if (!tmp)
1335 return -ENOMEM;
1336
Wang Shilongf2f6ed32013-04-07 10:50:16 +00001337 mutex_lock(&fs_info->qgroup_ioctl_lock);
Marcos Paulo de Souzae3b0edd2019-11-25 21:58:50 -03001338 if (!fs_info->quota_root) {
Marcos Paulo de Souza8a36e402019-11-25 21:58:51 -03001339 ret = -ENOTCONN;
Wang Shilongf2f6ed32013-04-07 10:50:16 +00001340 goto out;
1341 }
Wang Shilongb7fef4f2013-04-07 10:50:18 +00001342 member = find_qgroup_rb(fs_info, src);
1343 parent = find_qgroup_rb(fs_info, dst);
1344 if (!member || !parent) {
1345 ret = -EINVAL;
1346 goto out;
1347 }
Arne Jansenbed92ea2012-06-28 18:03:02 +02001348
Wang Shilong534e6622013-04-17 14:49:51 +00001349 /* check if such qgroup relation exist firstly */
1350 list_for_each_entry(list, &member->groups, next_group) {
1351 if (list->group == parent) {
1352 ret = -EEXIST;
1353 goto out;
1354 }
1355 }
1356
Lu Fengqi711169c2018-07-18 14:45:24 +08001357 ret = add_qgroup_relation_item(trans, src, dst);
Arne Jansenbed92ea2012-06-28 18:03:02 +02001358 if (ret)
Wang Shilongf2f6ed32013-04-07 10:50:16 +00001359 goto out;
Arne Jansenbed92ea2012-06-28 18:03:02 +02001360
Lu Fengqi711169c2018-07-18 14:45:24 +08001361 ret = add_qgroup_relation_item(trans, dst, src);
Arne Jansenbed92ea2012-06-28 18:03:02 +02001362 if (ret) {
Lu Fengqi99d7f092018-07-18 14:45:25 +08001363 del_qgroup_relation_item(trans, src, dst);
Wang Shilongf2f6ed32013-04-07 10:50:16 +00001364 goto out;
Arne Jansenbed92ea2012-06-28 18:03:02 +02001365 }
1366
1367 spin_lock(&fs_info->qgroup_lock);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001368 ret = add_relation_rb(fs_info, src, dst);
Qu Wenruo9c8b35b2015-02-27 16:24:27 +08001369 if (ret < 0) {
1370 spin_unlock(&fs_info->qgroup_lock);
1371 goto out;
1372 }
1373 ret = quick_update_accounting(fs_info, tmp, src, dst, 1);
Arne Jansenbed92ea2012-06-28 18:03:02 +02001374 spin_unlock(&fs_info->qgroup_lock);
Wang Shilongf2f6ed32013-04-07 10:50:16 +00001375out:
1376 mutex_unlock(&fs_info->qgroup_ioctl_lock);
Qu Wenruo9c8b35b2015-02-27 16:24:27 +08001377 ulist_free(tmp);
Arne Jansenbed92ea2012-06-28 18:03:02 +02001378 return ret;
1379}
1380
Lu Fengqi6b36f1a2018-07-18 14:45:31 +08001381static int __del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
1382 u64 dst)
Arne Jansenbed92ea2012-06-28 18:03:02 +02001383{
Lu Fengqi6b36f1a2018-07-18 14:45:31 +08001384 struct btrfs_fs_info *fs_info = trans->fs_info;
Wang Shilong534e6622013-04-17 14:49:51 +00001385 struct btrfs_qgroup *parent;
1386 struct btrfs_qgroup *member;
1387 struct btrfs_qgroup_list *list;
Qu Wenruo9c8b35b2015-02-27 16:24:27 +08001388 struct ulist *tmp;
Qu Wenruo73798c42019-08-06 22:05:07 +08001389 bool found = false;
Arne Jansenbed92ea2012-06-28 18:03:02 +02001390 int ret = 0;
Qu Wenruo73798c42019-08-06 22:05:07 +08001391 int ret2;
Arne Jansenbed92ea2012-06-28 18:03:02 +02001392
David Sterba6602caf2017-02-13 12:41:02 +01001393 tmp = ulist_alloc(GFP_KERNEL);
Qu Wenruo9c8b35b2015-02-27 16:24:27 +08001394 if (!tmp)
1395 return -ENOMEM;
1396
Marcos Paulo de Souzae3b0edd2019-11-25 21:58:50 -03001397 if (!fs_info->quota_root) {
Marcos Paulo de Souza8a36e402019-11-25 21:58:51 -03001398 ret = -ENOTCONN;
Wang Shilongf2f6ed32013-04-07 10:50:16 +00001399 goto out;
1400 }
Arne Jansenbed92ea2012-06-28 18:03:02 +02001401
Wang Shilong534e6622013-04-17 14:49:51 +00001402 member = find_qgroup_rb(fs_info, src);
1403 parent = find_qgroup_rb(fs_info, dst);
Qu Wenruo73798c42019-08-06 22:05:07 +08001404 /*
1405 * The parent/member pair doesn't exist, then try to delete the dead
1406 * relation items only.
1407 */
1408 if (!member || !parent)
1409 goto delete_item;
Wang Shilong534e6622013-04-17 14:49:51 +00001410
1411 /* check if such qgroup relation exist firstly */
1412 list_for_each_entry(list, &member->groups, next_group) {
Qu Wenruo73798c42019-08-06 22:05:07 +08001413 if (list->group == parent) {
1414 found = true;
1415 break;
1416 }
Wang Shilong534e6622013-04-17 14:49:51 +00001417 }
Arne Jansenbed92ea2012-06-28 18:03:02 +02001418
Qu Wenruo73798c42019-08-06 22:05:07 +08001419delete_item:
1420 ret = del_qgroup_relation_item(trans, src, dst);
1421 if (ret < 0 && ret != -ENOENT)
1422 goto out;
1423 ret2 = del_qgroup_relation_item(trans, dst, src);
1424 if (ret2 < 0 && ret2 != -ENOENT)
1425 goto out;
1426
1427 /* At least one deletion succeeded, return 0 */
1428 if (!ret || !ret2)
1429 ret = 0;
1430
1431 if (found) {
1432 spin_lock(&fs_info->qgroup_lock);
1433 del_relation_rb(fs_info, src, dst);
1434 ret = quick_update_accounting(fs_info, tmp, src, dst, -1);
1435 spin_unlock(&fs_info->qgroup_lock);
1436 }
Wang Shilongf2f6ed32013-04-07 10:50:16 +00001437out:
Qu Wenruo9c8b35b2015-02-27 16:24:27 +08001438 ulist_free(tmp);
Dongsheng Yangf5a6b1c2014-11-24 10:27:09 -05001439 return ret;
1440}
1441
Lu Fengqi39616c22018-07-18 14:45:32 +08001442int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
1443 u64 dst)
Dongsheng Yangf5a6b1c2014-11-24 10:27:09 -05001444{
Lu Fengqi39616c22018-07-18 14:45:32 +08001445 struct btrfs_fs_info *fs_info = trans->fs_info;
Dongsheng Yangf5a6b1c2014-11-24 10:27:09 -05001446 int ret = 0;
1447
1448 mutex_lock(&fs_info->qgroup_ioctl_lock);
Lu Fengqi6b36f1a2018-07-18 14:45:31 +08001449 ret = __del_qgroup_relation(trans, src, dst);
Wang Shilongf2f6ed32013-04-07 10:50:16 +00001450 mutex_unlock(&fs_info->qgroup_ioctl_lock);
Dongsheng Yangf5a6b1c2014-11-24 10:27:09 -05001451
Arne Jansenbed92ea2012-06-28 18:03:02 +02001452 return ret;
1453}
1454
Lu Fengqi49a05ec2018-07-18 14:45:33 +08001455int btrfs_create_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
Arne Jansenbed92ea2012-06-28 18:03:02 +02001456{
Lu Fengqi49a05ec2018-07-18 14:45:33 +08001457 struct btrfs_fs_info *fs_info = trans->fs_info;
Arne Jansenbed92ea2012-06-28 18:03:02 +02001458 struct btrfs_root *quota_root;
1459 struct btrfs_qgroup *qgroup;
1460 int ret = 0;
1461
Wang Shilongf2f6ed32013-04-07 10:50:16 +00001462 mutex_lock(&fs_info->qgroup_ioctl_lock);
Marcos Paulo de Souzae3b0edd2019-11-25 21:58:50 -03001463 if (!fs_info->quota_root) {
Marcos Paulo de Souza8a36e402019-11-25 21:58:51 -03001464 ret = -ENOTCONN;
Wang Shilongf2f6ed32013-04-07 10:50:16 +00001465 goto out;
1466 }
Marcos Paulo de Souzae3b0edd2019-11-25 21:58:50 -03001467 quota_root = fs_info->quota_root;
Wang Shilong534e6622013-04-17 14:49:51 +00001468 qgroup = find_qgroup_rb(fs_info, qgroupid);
1469 if (qgroup) {
1470 ret = -EEXIST;
1471 goto out;
1472 }
Arne Jansenbed92ea2012-06-28 18:03:02 +02001473
1474 ret = add_qgroup_item(trans, quota_root, qgroupid);
Wang Shilong534e6622013-04-17 14:49:51 +00001475 if (ret)
1476 goto out;
Arne Jansenbed92ea2012-06-28 18:03:02 +02001477
1478 spin_lock(&fs_info->qgroup_lock);
1479 qgroup = add_qgroup_rb(fs_info, qgroupid);
1480 spin_unlock(&fs_info->qgroup_lock);
1481
Qu Wenruo49e5fb42020-06-28 13:07:15 +08001482 if (IS_ERR(qgroup)) {
Arne Jansenbed92ea2012-06-28 18:03:02 +02001483 ret = PTR_ERR(qgroup);
Qu Wenruo49e5fb42020-06-28 13:07:15 +08001484 goto out;
1485 }
1486 ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup);
Wang Shilongf2f6ed32013-04-07 10:50:16 +00001487out:
1488 mutex_unlock(&fs_info->qgroup_ioctl_lock);
Arne Jansenbed92ea2012-06-28 18:03:02 +02001489 return ret;
1490}
1491
Lu Fengqi3efbee12018-07-18 14:45:34 +08001492int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
Arne Jansenbed92ea2012-06-28 18:03:02 +02001493{
Lu Fengqi3efbee12018-07-18 14:45:34 +08001494 struct btrfs_fs_info *fs_info = trans->fs_info;
Arne Jansen2cf68702013-01-17 01:22:09 -07001495 struct btrfs_qgroup *qgroup;
Dongsheng Yangf5a6b1c2014-11-24 10:27:09 -05001496 struct btrfs_qgroup_list *list;
Arne Jansenbed92ea2012-06-28 18:03:02 +02001497 int ret = 0;
1498
Wang Shilongf2f6ed32013-04-07 10:50:16 +00001499 mutex_lock(&fs_info->qgroup_ioctl_lock);
Marcos Paulo de Souzae3b0edd2019-11-25 21:58:50 -03001500 if (!fs_info->quota_root) {
Marcos Paulo de Souza8a36e402019-11-25 21:58:51 -03001501 ret = -ENOTCONN;
Wang Shilongf2f6ed32013-04-07 10:50:16 +00001502 goto out;
1503 }
Arne Jansenbed92ea2012-06-28 18:03:02 +02001504
Arne Jansen2cf68702013-01-17 01:22:09 -07001505 qgroup = find_qgroup_rb(fs_info, qgroupid);
Wang Shilong534e6622013-04-17 14:49:51 +00001506 if (!qgroup) {
1507 ret = -ENOENT;
1508 goto out;
Arne Jansen2cf68702013-01-17 01:22:09 -07001509 }
Lu Fengqib90e22b2018-10-11 13:42:56 +08001510
1511 /* Check if there are no children of this qgroup */
1512 if (!list_empty(&qgroup->members)) {
1513 ret = -EBUSY;
1514 goto out;
1515 }
1516
Lu Fengqi69104612018-07-18 14:45:26 +08001517 ret = del_qgroup_item(trans, qgroupid);
Sargun Dhillon36b96fd2017-09-17 09:02:29 +00001518 if (ret && ret != -ENOENT)
1519 goto out;
Arne Jansenbed92ea2012-06-28 18:03:02 +02001520
Dongsheng Yangf5a6b1c2014-11-24 10:27:09 -05001521 while (!list_empty(&qgroup->groups)) {
1522 list = list_first_entry(&qgroup->groups,
1523 struct btrfs_qgroup_list, next_group);
Lu Fengqi6b36f1a2018-07-18 14:45:31 +08001524 ret = __del_qgroup_relation(trans, qgroupid,
1525 list->group->qgroupid);
Dongsheng Yangf5a6b1c2014-11-24 10:27:09 -05001526 if (ret)
1527 goto out;
1528 }
1529
Arne Jansenbed92ea2012-06-28 18:03:02 +02001530 spin_lock(&fs_info->qgroup_lock);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001531 del_qgroup_rb(fs_info, qgroupid);
Arne Jansenbed92ea2012-06-28 18:03:02 +02001532 spin_unlock(&fs_info->qgroup_lock);
Wang Shilongf2f6ed32013-04-07 10:50:16 +00001533out:
1534 mutex_unlock(&fs_info->qgroup_ioctl_lock);
Arne Jansenbed92ea2012-06-28 18:03:02 +02001535 return ret;
1536}
1537
Lu Fengqif0042d52018-07-18 14:45:35 +08001538int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid,
Arne Jansenbed92ea2012-06-28 18:03:02 +02001539 struct btrfs_qgroup_limit *limit)
1540{
Lu Fengqif0042d52018-07-18 14:45:35 +08001541 struct btrfs_fs_info *fs_info = trans->fs_info;
Arne Jansenbed92ea2012-06-28 18:03:02 +02001542 struct btrfs_qgroup *qgroup;
1543 int ret = 0;
Yang Dongshengfe759902015-06-03 14:57:32 +08001544 /* Sometimes we would want to clear the limit on this qgroup.
1545 * To meet this requirement, we treat the -1 as a special value
1546 * which tell kernel to clear the limit on this qgroup.
1547 */
1548 const u64 CLEAR_VALUE = -1;
Arne Jansenbed92ea2012-06-28 18:03:02 +02001549
Wang Shilongf2f6ed32013-04-07 10:50:16 +00001550 mutex_lock(&fs_info->qgroup_ioctl_lock);
Marcos Paulo de Souzae3b0edd2019-11-25 21:58:50 -03001551 if (!fs_info->quota_root) {
Marcos Paulo de Souza8a36e402019-11-25 21:58:51 -03001552 ret = -ENOTCONN;
Wang Shilongf2f6ed32013-04-07 10:50:16 +00001553 goto out;
1554 }
Arne Jansenbed92ea2012-06-28 18:03:02 +02001555
Wang Shilongddb47af2013-04-07 10:50:20 +00001556 qgroup = find_qgroup_rb(fs_info, qgroupid);
1557 if (!qgroup) {
1558 ret = -ENOENT;
1559 goto out;
1560 }
Arne Jansenbed92ea2012-06-28 18:03:02 +02001561
Wang Shilong58400fc2013-04-07 10:50:17 +00001562 spin_lock(&fs_info->qgroup_lock);
Yang Dongshengfe759902015-06-03 14:57:32 +08001563 if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER) {
1564 if (limit->max_rfer == CLEAR_VALUE) {
1565 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER;
1566 limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER;
1567 qgroup->max_rfer = 0;
1568 } else {
1569 qgroup->max_rfer = limit->max_rfer;
1570 }
1571 }
1572 if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) {
1573 if (limit->max_excl == CLEAR_VALUE) {
1574 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL;
1575 limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL;
1576 qgroup->max_excl = 0;
1577 } else {
1578 qgroup->max_excl = limit->max_excl;
1579 }
1580 }
1581 if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER) {
1582 if (limit->rsv_rfer == CLEAR_VALUE) {
1583 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER;
1584 limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER;
1585 qgroup->rsv_rfer = 0;
1586 } else {
1587 qgroup->rsv_rfer = limit->rsv_rfer;
1588 }
1589 }
1590 if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL) {
1591 if (limit->rsv_excl == CLEAR_VALUE) {
1592 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL;
1593 limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL;
1594 qgroup->rsv_excl = 0;
1595 } else {
1596 qgroup->rsv_excl = limit->rsv_excl;
1597 }
1598 }
Dongsheng Yang03477d92015-02-06 11:06:25 -05001599 qgroup->lim_flags |= limit->flags;
1600
Arne Jansenbed92ea2012-06-28 18:03:02 +02001601 spin_unlock(&fs_info->qgroup_lock);
Dongsheng Yang1510e712014-11-20 21:01:41 -05001602
Lu Fengqiac8a8662018-07-18 14:45:27 +08001603 ret = update_qgroup_limit_item(trans, qgroup);
Dongsheng Yang1510e712014-11-20 21:01:41 -05001604 if (ret) {
1605 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1606 btrfs_info(fs_info, "unable to update quota limit for %llu",
1607 qgroupid);
1608 }
1609
Wang Shilongf2f6ed32013-04-07 10:50:16 +00001610out:
1611 mutex_unlock(&fs_info->qgroup_ioctl_lock);
Arne Jansenbed92ea2012-06-28 18:03:02 +02001612 return ret;
1613}
Mark Fasheh11526512014-07-17 12:39:01 -07001614
Qu Wenruo50b3e042016-10-18 09:31:27 +08001615int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info,
Qu Wenruocb93b522016-08-15 10:36:50 +08001616 struct btrfs_delayed_ref_root *delayed_refs,
1617 struct btrfs_qgroup_extent_record *record)
Qu Wenruo3368d002015-04-16 14:34:17 +08001618{
1619 struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node;
1620 struct rb_node *parent_node = NULL;
1621 struct btrfs_qgroup_extent_record *entry;
1622 u64 bytenr = record->bytenr;
1623
David Sterbaa4666e62018-03-16 02:21:22 +01001624 lockdep_assert_held(&delayed_refs->lock);
Qu Wenruo50b3e042016-10-18 09:31:27 +08001625 trace_btrfs_qgroup_trace_extent(fs_info, record);
Mark Fasheh82bd1012015-11-05 14:38:00 -08001626
Qu Wenruo3368d002015-04-16 14:34:17 +08001627 while (*p) {
1628 parent_node = *p;
1629 entry = rb_entry(parent_node, struct btrfs_qgroup_extent_record,
1630 node);
Qu Wenruo1418bae2019-01-23 15:15:12 +08001631 if (bytenr < entry->bytenr) {
Qu Wenruo3368d002015-04-16 14:34:17 +08001632 p = &(*p)->rb_left;
Qu Wenruo1418bae2019-01-23 15:15:12 +08001633 } else if (bytenr > entry->bytenr) {
Qu Wenruo3368d002015-04-16 14:34:17 +08001634 p = &(*p)->rb_right;
Qu Wenruo1418bae2019-01-23 15:15:12 +08001635 } else {
1636 if (record->data_rsv && !entry->data_rsv) {
1637 entry->data_rsv = record->data_rsv;
1638 entry->data_rsv_refroot =
1639 record->data_rsv_refroot;
1640 }
Qu Wenruocb93b522016-08-15 10:36:50 +08001641 return 1;
Qu Wenruo1418bae2019-01-23 15:15:12 +08001642 }
Qu Wenruo3368d002015-04-16 14:34:17 +08001643 }
1644
1645 rb_link_node(&record->node, parent_node, p);
1646 rb_insert_color(&record->node, &delayed_refs->dirty_extent_root);
Qu Wenruocb93b522016-08-15 10:36:50 +08001647 return 0;
1648}
1649
Qu Wenruofb235dc2017-02-15 10:43:03 +08001650int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
1651 struct btrfs_qgroup_extent_record *qrecord)
1652{
1653 struct ulist *old_root;
1654 u64 bytenr = qrecord->bytenr;
1655 int ret;
1656
Zygo Blaxellc995ab32017-09-22 13:58:45 -04001657 ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root, false);
Nikolay Borisov952bd3db2018-01-29 15:53:01 +02001658 if (ret < 0) {
1659 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1660 btrfs_warn(fs_info,
1661"error accounting new delayed refs extent (err code: %d), quota inconsistent",
1662 ret);
1663 return 0;
1664 }
Qu Wenruofb235dc2017-02-15 10:43:03 +08001665
1666 /*
1667 * Here we don't need to get the lock of
1668 * trans->transaction->delayed_refs, since inserted qrecord won't
1669 * be deleted, only qrecord->node may be modified (new qrecord insert)
1670 *
1671 * So modifying qrecord->old_roots is safe here
1672 */
1673 qrecord->old_roots = old_root;
1674 return 0;
1675}
1676
Lu Fengqia95f3aa2018-07-18 16:28:03 +08001677int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr,
1678 u64 num_bytes, gfp_t gfp_flag)
Qu Wenruocb93b522016-08-15 10:36:50 +08001679{
Lu Fengqia95f3aa2018-07-18 16:28:03 +08001680 struct btrfs_fs_info *fs_info = trans->fs_info;
Qu Wenruocb93b522016-08-15 10:36:50 +08001681 struct btrfs_qgroup_extent_record *record;
1682 struct btrfs_delayed_ref_root *delayed_refs;
1683 int ret;
1684
Josef Bacikafcdd122016-09-02 15:40:02 -04001685 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)
1686 || bytenr == 0 || num_bytes == 0)
Qu Wenruocb93b522016-08-15 10:36:50 +08001687 return 0;
Qu Wenruo1418bae2019-01-23 15:15:12 +08001688 record = kzalloc(sizeof(*record), gfp_flag);
Qu Wenruocb93b522016-08-15 10:36:50 +08001689 if (!record)
1690 return -ENOMEM;
1691
1692 delayed_refs = &trans->transaction->delayed_refs;
1693 record->bytenr = bytenr;
1694 record->num_bytes = num_bytes;
1695 record->old_roots = NULL;
1696
1697 spin_lock(&delayed_refs->lock);
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04001698 ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, record);
Qu Wenruocb93b522016-08-15 10:36:50 +08001699 spin_unlock(&delayed_refs->lock);
Qu Wenruofb235dc2017-02-15 10:43:03 +08001700 if (ret > 0) {
Qu Wenruocb93b522016-08-15 10:36:50 +08001701 kfree(record);
Qu Wenruofb235dc2017-02-15 10:43:03 +08001702 return 0;
1703 }
1704 return btrfs_qgroup_trace_extent_post(fs_info, record);
Qu Wenruo3368d002015-04-16 14:34:17 +08001705}
1706
Qu Wenruo33d1f052016-10-18 09:31:28 +08001707int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
Qu Wenruo33d1f052016-10-18 09:31:28 +08001708 struct extent_buffer *eb)
1709{
Lu Fengqi8d38d7e2018-07-18 14:45:37 +08001710 struct btrfs_fs_info *fs_info = trans->fs_info;
Qu Wenruo33d1f052016-10-18 09:31:28 +08001711 int nr = btrfs_header_nritems(eb);
1712 int i, extent_type, ret;
1713 struct btrfs_key key;
1714 struct btrfs_file_extent_item *fi;
1715 u64 bytenr, num_bytes;
1716
1717 /* We can be called directly from walk_up_proc() */
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001718 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
Qu Wenruo33d1f052016-10-18 09:31:28 +08001719 return 0;
1720
1721 for (i = 0; i < nr; i++) {
1722 btrfs_item_key_to_cpu(eb, &key, i);
1723
1724 if (key.type != BTRFS_EXTENT_DATA_KEY)
1725 continue;
1726
1727 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
1728 /* filter out non qgroup-accountable extents */
1729 extent_type = btrfs_file_extent_type(eb, fi);
1730
1731 if (extent_type == BTRFS_FILE_EXTENT_INLINE)
1732 continue;
1733
1734 bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1735 if (!bytenr)
1736 continue;
1737
1738 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1739
Lu Fengqia95f3aa2018-07-18 16:28:03 +08001740 ret = btrfs_qgroup_trace_extent(trans, bytenr, num_bytes,
1741 GFP_NOFS);
Qu Wenruo33d1f052016-10-18 09:31:28 +08001742 if (ret)
1743 return ret;
1744 }
Jeff Mahoneycddf3b22017-06-20 08:15:26 -04001745 cond_resched();
Qu Wenruo33d1f052016-10-18 09:31:28 +08001746 return 0;
1747}
1748
1749/*
1750 * Walk up the tree from the bottom, freeing leaves and any interior
1751 * nodes which have had all slots visited. If a node (leaf or
1752 * interior) is freed, the node above it will have it's slot
1753 * incremented. The root node will never be freed.
1754 *
1755 * At the end of this function, we should have a path which has all
1756 * slots incremented to the next position for a search. If we need to
1757 * read a new node it will be NULL and the node above it will have the
1758 * correct slot selected for a later read.
1759 *
1760 * If we increment the root nodes slot counter past the number of
1761 * elements, 1 is returned to signal completion of the search.
1762 */
David Sterba15b34512017-02-10 20:30:23 +01001763static int adjust_slots_upwards(struct btrfs_path *path, int root_level)
Qu Wenruo33d1f052016-10-18 09:31:28 +08001764{
1765 int level = 0;
1766 int nr, slot;
1767 struct extent_buffer *eb;
1768
1769 if (root_level == 0)
1770 return 1;
1771
1772 while (level <= root_level) {
1773 eb = path->nodes[level];
1774 nr = btrfs_header_nritems(eb);
1775 path->slots[level]++;
1776 slot = path->slots[level];
1777 if (slot >= nr || level == 0) {
1778 /*
1779 * Don't free the root - we will detect this
1780 * condition after our loop and return a
1781 * positive value for caller to stop walking the tree.
1782 */
1783 if (level != root_level) {
1784 btrfs_tree_unlock_rw(eb, path->locks[level]);
1785 path->locks[level] = 0;
1786
1787 free_extent_buffer(eb);
1788 path->nodes[level] = NULL;
1789 path->slots[level] = 0;
1790 }
1791 } else {
1792 /*
1793 * We have a valid slot to walk back down
1794 * from. Stop here so caller can process these
1795 * new nodes.
1796 */
1797 break;
1798 }
1799
1800 level++;
1801 }
1802
1803 eb = path->nodes[root_level];
1804 if (path->slots[root_level] >= btrfs_header_nritems(eb))
1805 return 1;
1806
1807 return 0;
1808}
1809
Qu Wenruo25982562018-09-27 14:42:30 +08001810/*
1811 * Helper function to trace a subtree tree block swap.
1812 *
1813 * The swap will happen in highest tree block, but there may be a lot of
1814 * tree blocks involved.
1815 *
1816 * For example:
1817 * OO = Old tree blocks
1818 * NN = New tree blocks allocated during balance
1819 *
1820 * File tree (257) Reloc tree for 257
1821 * L2 OO NN
1822 * / \ / \
1823 * L1 OO OO (a) OO NN (a)
1824 * / \ / \ / \ / \
1825 * L0 OO OO OO OO OO OO NN NN
1826 * (b) (c) (b) (c)
1827 *
1828 * When calling qgroup_trace_extent_swap(), we will pass:
1829 * @src_eb = OO(a)
1830 * @dst_path = [ nodes[1] = NN(a), nodes[0] = NN(c) ]
1831 * @dst_level = 0
1832 * @root_level = 1
1833 *
1834 * In that case, qgroup_trace_extent_swap() will search from OO(a) to
1835 * reach OO(c), then mark both OO(c) and NN(c) as qgroup dirty.
1836 *
1837 * The main work of qgroup_trace_extent_swap() can be split into 3 parts:
1838 *
1839 * 1) Tree search from @src_eb
1840 * It should acts as a simplified btrfs_search_slot().
1841 * The key for search can be extracted from @dst_path->nodes[dst_level]
1842 * (first key).
1843 *
1844 * 2) Mark the final tree blocks in @src_path and @dst_path qgroup dirty
1845 * NOTE: In above case, OO(a) and NN(a) won't be marked qgroup dirty.
Andrea Gelmini52042d82018-11-28 12:05:13 +01001846 * They should be marked during previous (@dst_level = 1) iteration.
Qu Wenruo25982562018-09-27 14:42:30 +08001847 *
1848 * 3) Mark file extents in leaves dirty
1849 * We don't have good way to pick out new file extents only.
1850 * So we still follow the old method by scanning all file extents in
1851 * the leave.
1852 *
Andrea Gelmini52042d82018-11-28 12:05:13 +01001853 * This function can free us from keeping two paths, thus later we only need
Qu Wenruo25982562018-09-27 14:42:30 +08001854 * to care about how to iterate all new tree blocks in reloc tree.
1855 */
1856static int qgroup_trace_extent_swap(struct btrfs_trans_handle* trans,
1857 struct extent_buffer *src_eb,
1858 struct btrfs_path *dst_path,
Qu Wenruo3d0174f2018-09-27 14:42:35 +08001859 int dst_level, int root_level,
1860 bool trace_leaf)
Qu Wenruo25982562018-09-27 14:42:30 +08001861{
1862 struct btrfs_key key;
1863 struct btrfs_path *src_path;
1864 struct btrfs_fs_info *fs_info = trans->fs_info;
1865 u32 nodesize = fs_info->nodesize;
1866 int cur_level = root_level;
1867 int ret;
1868
1869 BUG_ON(dst_level > root_level);
1870 /* Level mismatch */
1871 if (btrfs_header_level(src_eb) != root_level)
1872 return -EINVAL;
1873
1874 src_path = btrfs_alloc_path();
1875 if (!src_path) {
1876 ret = -ENOMEM;
1877 goto out;
1878 }
1879
1880 if (dst_level)
1881 btrfs_node_key_to_cpu(dst_path->nodes[dst_level], &key, 0);
1882 else
1883 btrfs_item_key_to_cpu(dst_path->nodes[dst_level], &key, 0);
1884
1885 /* For src_path */
David Sterba67439da2019-10-08 13:28:47 +02001886 atomic_inc(&src_eb->refs);
Qu Wenruo25982562018-09-27 14:42:30 +08001887 src_path->nodes[root_level] = src_eb;
1888 src_path->slots[root_level] = dst_path->slots[root_level];
1889 src_path->locks[root_level] = 0;
1890
1891 /* A simplified version of btrfs_search_slot() */
1892 while (cur_level >= dst_level) {
1893 struct btrfs_key src_key;
1894 struct btrfs_key dst_key;
1895
1896 if (src_path->nodes[cur_level] == NULL) {
1897 struct btrfs_key first_key;
1898 struct extent_buffer *eb;
1899 int parent_slot;
1900 u64 child_gen;
1901 u64 child_bytenr;
1902
1903 eb = src_path->nodes[cur_level + 1];
1904 parent_slot = src_path->slots[cur_level + 1];
1905 child_bytenr = btrfs_node_blockptr(eb, parent_slot);
1906 child_gen = btrfs_node_ptr_generation(eb, parent_slot);
1907 btrfs_node_key_to_cpu(eb, &first_key, parent_slot);
1908
1909 eb = read_tree_block(fs_info, child_bytenr, child_gen,
1910 cur_level, &first_key);
1911 if (IS_ERR(eb)) {
1912 ret = PTR_ERR(eb);
1913 goto out;
1914 } else if (!extent_buffer_uptodate(eb)) {
1915 free_extent_buffer(eb);
1916 ret = -EIO;
1917 goto out;
1918 }
1919
1920 src_path->nodes[cur_level] = eb;
1921
1922 btrfs_tree_read_lock(eb);
David Sterba300aa892018-04-04 02:00:17 +02001923 btrfs_set_lock_blocking_read(eb);
Qu Wenruo25982562018-09-27 14:42:30 +08001924 src_path->locks[cur_level] = BTRFS_READ_LOCK_BLOCKING;
1925 }
1926
1927 src_path->slots[cur_level] = dst_path->slots[cur_level];
1928 if (cur_level) {
1929 btrfs_node_key_to_cpu(dst_path->nodes[cur_level],
1930 &dst_key, dst_path->slots[cur_level]);
1931 btrfs_node_key_to_cpu(src_path->nodes[cur_level],
1932 &src_key, src_path->slots[cur_level]);
1933 } else {
1934 btrfs_item_key_to_cpu(dst_path->nodes[cur_level],
1935 &dst_key, dst_path->slots[cur_level]);
1936 btrfs_item_key_to_cpu(src_path->nodes[cur_level],
1937 &src_key, src_path->slots[cur_level]);
1938 }
1939 /* Content mismatch, something went wrong */
1940 if (btrfs_comp_cpu_keys(&dst_key, &src_key)) {
1941 ret = -ENOENT;
1942 goto out;
1943 }
1944 cur_level--;
1945 }
1946
1947 /*
1948 * Now both @dst_path and @src_path have been populated, record the tree
1949 * blocks for qgroup accounting.
1950 */
1951 ret = btrfs_qgroup_trace_extent(trans, src_path->nodes[dst_level]->start,
1952 nodesize, GFP_NOFS);
1953 if (ret < 0)
1954 goto out;
1955 ret = btrfs_qgroup_trace_extent(trans,
1956 dst_path->nodes[dst_level]->start,
1957 nodesize, GFP_NOFS);
1958 if (ret < 0)
1959 goto out;
1960
1961 /* Record leaf file extents */
Qu Wenruo3d0174f2018-09-27 14:42:35 +08001962 if (dst_level == 0 && trace_leaf) {
Qu Wenruo25982562018-09-27 14:42:30 +08001963 ret = btrfs_qgroup_trace_leaf_items(trans, src_path->nodes[0]);
1964 if (ret < 0)
1965 goto out;
1966 ret = btrfs_qgroup_trace_leaf_items(trans, dst_path->nodes[0]);
1967 }
1968out:
1969 btrfs_free_path(src_path);
1970 return ret;
1971}
1972
Qu Wenruoea49f3e2018-09-27 14:42:31 +08001973/*
1974 * Helper function to do recursive generation-aware depth-first search, to
1975 * locate all new tree blocks in a subtree of reloc tree.
1976 *
1977 * E.g. (OO = Old tree blocks, NN = New tree blocks, whose gen == last_snapshot)
1978 * reloc tree
1979 * L2 NN (a)
1980 * / \
1981 * L1 OO NN (b)
1982 * / \ / \
1983 * L0 OO OO OO NN
1984 * (c) (d)
1985 * If we pass:
1986 * @dst_path = [ nodes[1] = NN(b), nodes[0] = NULL ],
1987 * @cur_level = 1
1988 * @root_level = 1
1989 *
1990 * We will iterate through tree blocks NN(b), NN(d) and info qgroup to trace
1991 * above tree blocks along with their counter parts in file tree.
Andrea Gelmini52042d82018-11-28 12:05:13 +01001992 * While during search, old tree blocks OO(c) will be skipped as tree block swap
Qu Wenruoea49f3e2018-09-27 14:42:31 +08001993 * won't affect OO(c).
1994 */
1995static int qgroup_trace_new_subtree_blocks(struct btrfs_trans_handle* trans,
1996 struct extent_buffer *src_eb,
1997 struct btrfs_path *dst_path,
1998 int cur_level, int root_level,
Qu Wenruo3d0174f2018-09-27 14:42:35 +08001999 u64 last_snapshot, bool trace_leaf)
Qu Wenruoea49f3e2018-09-27 14:42:31 +08002000{
2001 struct btrfs_fs_info *fs_info = trans->fs_info;
2002 struct extent_buffer *eb;
2003 bool need_cleanup = false;
2004 int ret = 0;
2005 int i;
2006
2007 /* Level sanity check */
Nikolay Borisov7ff2c2a2019-03-18 17:45:19 +02002008 if (cur_level < 0 || cur_level >= BTRFS_MAX_LEVEL - 1 ||
2009 root_level < 0 || root_level >= BTRFS_MAX_LEVEL - 1 ||
Qu Wenruoea49f3e2018-09-27 14:42:31 +08002010 root_level < cur_level) {
2011 btrfs_err_rl(fs_info,
2012 "%s: bad levels, cur_level=%d root_level=%d",
2013 __func__, cur_level, root_level);
2014 return -EUCLEAN;
2015 }
2016
2017 /* Read the tree block if needed */
2018 if (dst_path->nodes[cur_level] == NULL) {
2019 struct btrfs_key first_key;
2020 int parent_slot;
2021 u64 child_gen;
2022 u64 child_bytenr;
2023
2024 /*
2025 * dst_path->nodes[root_level] must be initialized before
2026 * calling this function.
2027 */
2028 if (cur_level == root_level) {
2029 btrfs_err_rl(fs_info,
2030 "%s: dst_path->nodes[%d] not initialized, root_level=%d cur_level=%d",
2031 __func__, root_level, root_level, cur_level);
2032 return -EUCLEAN;
2033 }
2034
2035 /*
2036 * We need to get child blockptr/gen from parent before we can
2037 * read it.
2038 */
2039 eb = dst_path->nodes[cur_level + 1];
2040 parent_slot = dst_path->slots[cur_level + 1];
2041 child_bytenr = btrfs_node_blockptr(eb, parent_slot);
2042 child_gen = btrfs_node_ptr_generation(eb, parent_slot);
2043 btrfs_node_key_to_cpu(eb, &first_key, parent_slot);
2044
2045 /* This node is old, no need to trace */
2046 if (child_gen < last_snapshot)
2047 goto out;
2048
2049 eb = read_tree_block(fs_info, child_bytenr, child_gen,
2050 cur_level, &first_key);
2051 if (IS_ERR(eb)) {
2052 ret = PTR_ERR(eb);
2053 goto out;
2054 } else if (!extent_buffer_uptodate(eb)) {
2055 free_extent_buffer(eb);
2056 ret = -EIO;
2057 goto out;
2058 }
2059
2060 dst_path->nodes[cur_level] = eb;
2061 dst_path->slots[cur_level] = 0;
2062
2063 btrfs_tree_read_lock(eb);
David Sterba300aa892018-04-04 02:00:17 +02002064 btrfs_set_lock_blocking_read(eb);
Qu Wenruoea49f3e2018-09-27 14:42:31 +08002065 dst_path->locks[cur_level] = BTRFS_READ_LOCK_BLOCKING;
2066 need_cleanup = true;
2067 }
2068
2069 /* Now record this tree block and its counter part for qgroups */
2070 ret = qgroup_trace_extent_swap(trans, src_eb, dst_path, cur_level,
Qu Wenruo3d0174f2018-09-27 14:42:35 +08002071 root_level, trace_leaf);
Qu Wenruoea49f3e2018-09-27 14:42:31 +08002072 if (ret < 0)
2073 goto cleanup;
2074
2075 eb = dst_path->nodes[cur_level];
2076
2077 if (cur_level > 0) {
2078 /* Iterate all child tree blocks */
2079 for (i = 0; i < btrfs_header_nritems(eb); i++) {
2080 /* Skip old tree blocks as they won't be swapped */
2081 if (btrfs_node_ptr_generation(eb, i) < last_snapshot)
2082 continue;
2083 dst_path->slots[cur_level] = i;
2084
2085 /* Recursive call (at most 7 times) */
2086 ret = qgroup_trace_new_subtree_blocks(trans, src_eb,
2087 dst_path, cur_level - 1, root_level,
Qu Wenruo3d0174f2018-09-27 14:42:35 +08002088 last_snapshot, trace_leaf);
Qu Wenruoea49f3e2018-09-27 14:42:31 +08002089 if (ret < 0)
2090 goto cleanup;
2091 }
2092 }
2093
2094cleanup:
2095 if (need_cleanup) {
2096 /* Clean up */
2097 btrfs_tree_unlock_rw(dst_path->nodes[cur_level],
2098 dst_path->locks[cur_level]);
2099 free_extent_buffer(dst_path->nodes[cur_level]);
2100 dst_path->nodes[cur_level] = NULL;
2101 dst_path->slots[cur_level] = 0;
2102 dst_path->locks[cur_level] = 0;
2103 }
2104out:
2105 return ret;
2106}
2107
Qu Wenruo5aea1a42019-01-23 15:15:15 +08002108static int qgroup_trace_subtree_swap(struct btrfs_trans_handle *trans,
2109 struct extent_buffer *src_eb,
2110 struct extent_buffer *dst_eb,
2111 u64 last_snapshot, bool trace_leaf)
2112{
2113 struct btrfs_fs_info *fs_info = trans->fs_info;
2114 struct btrfs_path *dst_path = NULL;
2115 int level;
2116 int ret;
2117
2118 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
2119 return 0;
2120
2121 /* Wrong parameter order */
2122 if (btrfs_header_generation(src_eb) > btrfs_header_generation(dst_eb)) {
2123 btrfs_err_rl(fs_info,
2124 "%s: bad parameter order, src_gen=%llu dst_gen=%llu", __func__,
2125 btrfs_header_generation(src_eb),
2126 btrfs_header_generation(dst_eb));
2127 return -EUCLEAN;
2128 }
2129
2130 if (!extent_buffer_uptodate(src_eb) || !extent_buffer_uptodate(dst_eb)) {
2131 ret = -EIO;
2132 goto out;
2133 }
2134
2135 level = btrfs_header_level(dst_eb);
2136 dst_path = btrfs_alloc_path();
2137 if (!dst_path) {
2138 ret = -ENOMEM;
2139 goto out;
2140 }
2141 /* For dst_path */
David Sterba67439da2019-10-08 13:28:47 +02002142 atomic_inc(&dst_eb->refs);
Qu Wenruo5aea1a42019-01-23 15:15:15 +08002143 dst_path->nodes[level] = dst_eb;
2144 dst_path->slots[level] = 0;
2145 dst_path->locks[level] = 0;
2146
2147 /* Do the generation aware breadth-first search */
2148 ret = qgroup_trace_new_subtree_blocks(trans, src_eb, dst_path, level,
2149 level, last_snapshot, trace_leaf);
2150 if (ret < 0)
2151 goto out;
2152 ret = 0;
2153
2154out:
2155 btrfs_free_path(dst_path);
2156 if (ret < 0)
2157 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2158 return ret;
2159}
2160
Qu Wenruo33d1f052016-10-18 09:31:28 +08002161int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
Qu Wenruo33d1f052016-10-18 09:31:28 +08002162 struct extent_buffer *root_eb,
2163 u64 root_gen, int root_level)
2164{
Lu Fengqideb40622018-07-18 14:45:38 +08002165 struct btrfs_fs_info *fs_info = trans->fs_info;
Qu Wenruo33d1f052016-10-18 09:31:28 +08002166 int ret = 0;
2167 int level;
2168 struct extent_buffer *eb = root_eb;
2169 struct btrfs_path *path = NULL;
2170
Nikolay Borisovb6e6bca2017-07-12 09:42:19 +03002171 BUG_ON(root_level < 0 || root_level >= BTRFS_MAX_LEVEL);
Qu Wenruo33d1f052016-10-18 09:31:28 +08002172 BUG_ON(root_eb == NULL);
2173
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002174 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
Qu Wenruo33d1f052016-10-18 09:31:28 +08002175 return 0;
2176
2177 if (!extent_buffer_uptodate(root_eb)) {
Qu Wenruo581c1762018-03-29 09:08:11 +08002178 ret = btrfs_read_buffer(root_eb, root_gen, root_level, NULL);
Qu Wenruo33d1f052016-10-18 09:31:28 +08002179 if (ret)
2180 goto out;
2181 }
2182
2183 if (root_level == 0) {
Lu Fengqi8d38d7e2018-07-18 14:45:37 +08002184 ret = btrfs_qgroup_trace_leaf_items(trans, root_eb);
Qu Wenruo33d1f052016-10-18 09:31:28 +08002185 goto out;
2186 }
2187
2188 path = btrfs_alloc_path();
2189 if (!path)
2190 return -ENOMEM;
2191
2192 /*
2193 * Walk down the tree. Missing extent blocks are filled in as
2194 * we go. Metadata is accounted every time we read a new
2195 * extent block.
2196 *
2197 * When we reach a leaf, we account for file extent items in it,
2198 * walk back up the tree (adjusting slot pointers as we go)
2199 * and restart the search process.
2200 */
David Sterba67439da2019-10-08 13:28:47 +02002201 atomic_inc(&root_eb->refs); /* For path */
Qu Wenruo33d1f052016-10-18 09:31:28 +08002202 path->nodes[root_level] = root_eb;
2203 path->slots[root_level] = 0;
2204 path->locks[root_level] = 0; /* so release_path doesn't try to unlock */
2205walk_down:
2206 level = root_level;
2207 while (level >= 0) {
2208 if (path->nodes[level] == NULL) {
Qu Wenruo581c1762018-03-29 09:08:11 +08002209 struct btrfs_key first_key;
Qu Wenruo33d1f052016-10-18 09:31:28 +08002210 int parent_slot;
2211 u64 child_gen;
2212 u64 child_bytenr;
2213
2214 /*
2215 * We need to get child blockptr/gen from parent before
2216 * we can read it.
2217 */
2218 eb = path->nodes[level + 1];
2219 parent_slot = path->slots[level + 1];
2220 child_bytenr = btrfs_node_blockptr(eb, parent_slot);
2221 child_gen = btrfs_node_ptr_generation(eb, parent_slot);
Qu Wenruo581c1762018-03-29 09:08:11 +08002222 btrfs_node_key_to_cpu(eb, &first_key, parent_slot);
Qu Wenruo33d1f052016-10-18 09:31:28 +08002223
Qu Wenruo581c1762018-03-29 09:08:11 +08002224 eb = read_tree_block(fs_info, child_bytenr, child_gen,
2225 level, &first_key);
Qu Wenruo33d1f052016-10-18 09:31:28 +08002226 if (IS_ERR(eb)) {
2227 ret = PTR_ERR(eb);
2228 goto out;
2229 } else if (!extent_buffer_uptodate(eb)) {
2230 free_extent_buffer(eb);
2231 ret = -EIO;
2232 goto out;
2233 }
2234
2235 path->nodes[level] = eb;
2236 path->slots[level] = 0;
2237
2238 btrfs_tree_read_lock(eb);
David Sterba300aa892018-04-04 02:00:17 +02002239 btrfs_set_lock_blocking_read(eb);
Qu Wenruo33d1f052016-10-18 09:31:28 +08002240 path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
2241
Lu Fengqia95f3aa2018-07-18 16:28:03 +08002242 ret = btrfs_qgroup_trace_extent(trans, child_bytenr,
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002243 fs_info->nodesize,
2244 GFP_NOFS);
Qu Wenruo33d1f052016-10-18 09:31:28 +08002245 if (ret)
2246 goto out;
2247 }
2248
2249 if (level == 0) {
Lu Fengqi8d38d7e2018-07-18 14:45:37 +08002250 ret = btrfs_qgroup_trace_leaf_items(trans,
2251 path->nodes[level]);
Qu Wenruo33d1f052016-10-18 09:31:28 +08002252 if (ret)
2253 goto out;
2254
2255 /* Nonzero return here means we completed our search */
David Sterba15b34512017-02-10 20:30:23 +01002256 ret = adjust_slots_upwards(path, root_level);
Qu Wenruo33d1f052016-10-18 09:31:28 +08002257 if (ret)
2258 break;
2259
2260 /* Restart search with new slots */
2261 goto walk_down;
2262 }
2263
2264 level--;
2265 }
2266
2267 ret = 0;
2268out:
2269 btrfs_free_path(path);
2270
2271 return ret;
2272}
2273
Qu Wenruod810ef22015-04-12 16:52:34 +08002274#define UPDATE_NEW 0
2275#define UPDATE_OLD 1
2276/*
2277 * Walk all of the roots that points to the bytenr and adjust their refcnts.
2278 */
2279static int qgroup_update_refcnt(struct btrfs_fs_info *fs_info,
2280 struct ulist *roots, struct ulist *tmp,
2281 struct ulist *qgroups, u64 seq, int update_old)
2282{
2283 struct ulist_node *unode;
2284 struct ulist_iterator uiter;
2285 struct ulist_node *tmp_unode;
2286 struct ulist_iterator tmp_uiter;
2287 struct btrfs_qgroup *qg;
2288 int ret = 0;
2289
2290 if (!roots)
2291 return 0;
2292 ULIST_ITER_INIT(&uiter);
2293 while ((unode = ulist_next(roots, &uiter))) {
2294 qg = find_qgroup_rb(fs_info, unode->val);
2295 if (!qg)
2296 continue;
2297
2298 ulist_reinit(tmp);
David Sterbaef2fff62016-10-26 16:23:50 +02002299 ret = ulist_add(qgroups, qg->qgroupid, qgroup_to_aux(qg),
Qu Wenruod810ef22015-04-12 16:52:34 +08002300 GFP_ATOMIC);
2301 if (ret < 0)
2302 return ret;
David Sterbaef2fff62016-10-26 16:23:50 +02002303 ret = ulist_add(tmp, qg->qgroupid, qgroup_to_aux(qg), GFP_ATOMIC);
Qu Wenruod810ef22015-04-12 16:52:34 +08002304 if (ret < 0)
2305 return ret;
2306 ULIST_ITER_INIT(&tmp_uiter);
2307 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
2308 struct btrfs_qgroup_list *glist;
2309
David Sterbaef2fff62016-10-26 16:23:50 +02002310 qg = unode_aux_to_qgroup(tmp_unode);
Qu Wenruod810ef22015-04-12 16:52:34 +08002311 if (update_old)
2312 btrfs_qgroup_update_old_refcnt(qg, seq, 1);
2313 else
2314 btrfs_qgroup_update_new_refcnt(qg, seq, 1);
2315 list_for_each_entry(glist, &qg->groups, next_group) {
2316 ret = ulist_add(qgroups, glist->group->qgroupid,
David Sterbaef2fff62016-10-26 16:23:50 +02002317 qgroup_to_aux(glist->group),
Qu Wenruod810ef22015-04-12 16:52:34 +08002318 GFP_ATOMIC);
2319 if (ret < 0)
2320 return ret;
2321 ret = ulist_add(tmp, glist->group->qgroupid,
David Sterbaef2fff62016-10-26 16:23:50 +02002322 qgroup_to_aux(glist->group),
Qu Wenruod810ef22015-04-12 16:52:34 +08002323 GFP_ATOMIC);
2324 if (ret < 0)
2325 return ret;
2326 }
2327 }
2328 }
2329 return 0;
2330}
2331
Josef Bacikfcebe452014-05-13 17:30:47 -07002332/*
Qu Wenruo823ae5b2015-04-12 16:59:57 +08002333 * Update qgroup rfer/excl counters.
2334 * Rfer update is easy, codes can explain themselves.
Qu Wenruoe69bcee2015-04-17 10:23:16 +08002335 *
Randy Dunlap260db432020-08-04 19:48:34 -07002336 * Excl update is tricky, the update is split into 2 parts.
Qu Wenruo823ae5b2015-04-12 16:59:57 +08002337 * Part 1: Possible exclusive <-> sharing detect:
2338 * | A | !A |
2339 * -------------------------------------
2340 * B | * | - |
2341 * -------------------------------------
2342 * !B | + | ** |
2343 * -------------------------------------
2344 *
2345 * Conditions:
2346 * A: cur_old_roots < nr_old_roots (not exclusive before)
2347 * !A: cur_old_roots == nr_old_roots (possible exclusive before)
2348 * B: cur_new_roots < nr_new_roots (not exclusive now)
Nicholas D Steeves01327612016-05-19 21:18:45 -04002349 * !B: cur_new_roots == nr_new_roots (possible exclusive now)
Qu Wenruo823ae5b2015-04-12 16:59:57 +08002350 *
2351 * Results:
2352 * +: Possible sharing -> exclusive -: Possible exclusive -> sharing
2353 * *: Definitely not changed. **: Possible unchanged.
2354 *
2355 * For !A and !B condition, the exception is cur_old/new_roots == 0 case.
2356 *
2357 * To make the logic clear, we first use condition A and B to split
2358 * combination into 4 results.
2359 *
2360 * Then, for result "+" and "-", check old/new_roots == 0 case, as in them
2361 * only on variant maybe 0.
2362 *
2363 * Lastly, check result **, since there are 2 variants maybe 0, split them
2364 * again(2x2).
2365 * But this time we don't need to consider other things, the codes and logic
2366 * is easy to understand now.
2367 */
2368static int qgroup_update_counters(struct btrfs_fs_info *fs_info,
2369 struct ulist *qgroups,
2370 u64 nr_old_roots,
2371 u64 nr_new_roots,
2372 u64 num_bytes, u64 seq)
2373{
2374 struct ulist_node *unode;
2375 struct ulist_iterator uiter;
2376 struct btrfs_qgroup *qg;
2377 u64 cur_new_count, cur_old_count;
2378
2379 ULIST_ITER_INIT(&uiter);
2380 while ((unode = ulist_next(qgroups, &uiter))) {
2381 bool dirty = false;
2382
David Sterbaef2fff62016-10-26 16:23:50 +02002383 qg = unode_aux_to_qgroup(unode);
Qu Wenruo823ae5b2015-04-12 16:59:57 +08002384 cur_old_count = btrfs_qgroup_get_old_refcnt(qg, seq);
2385 cur_new_count = btrfs_qgroup_get_new_refcnt(qg, seq);
2386
Qu Wenruo8b317902018-04-30 15:04:44 +08002387 trace_qgroup_update_counters(fs_info, qg, cur_old_count,
2388 cur_new_count);
Mark Fasheh0f5dcf82016-03-29 17:19:55 -07002389
Qu Wenruo823ae5b2015-04-12 16:59:57 +08002390 /* Rfer update part */
2391 if (cur_old_count == 0 && cur_new_count > 0) {
2392 qg->rfer += num_bytes;
2393 qg->rfer_cmpr += num_bytes;
2394 dirty = true;
2395 }
2396 if (cur_old_count > 0 && cur_new_count == 0) {
2397 qg->rfer -= num_bytes;
2398 qg->rfer_cmpr -= num_bytes;
2399 dirty = true;
2400 }
2401
2402 /* Excl update part */
2403 /* Exclusive/none -> shared case */
2404 if (cur_old_count == nr_old_roots &&
2405 cur_new_count < nr_new_roots) {
2406 /* Exclusive -> shared */
2407 if (cur_old_count != 0) {
2408 qg->excl -= num_bytes;
2409 qg->excl_cmpr -= num_bytes;
2410 dirty = true;
2411 }
2412 }
2413
2414 /* Shared -> exclusive/none case */
2415 if (cur_old_count < nr_old_roots &&
2416 cur_new_count == nr_new_roots) {
2417 /* Shared->exclusive */
2418 if (cur_new_count != 0) {
2419 qg->excl += num_bytes;
2420 qg->excl_cmpr += num_bytes;
2421 dirty = true;
2422 }
2423 }
2424
2425 /* Exclusive/none -> exclusive/none case */
2426 if (cur_old_count == nr_old_roots &&
2427 cur_new_count == nr_new_roots) {
2428 if (cur_old_count == 0) {
2429 /* None -> exclusive/none */
2430
2431 if (cur_new_count != 0) {
2432 /* None -> exclusive */
2433 qg->excl += num_bytes;
2434 qg->excl_cmpr += num_bytes;
2435 dirty = true;
2436 }
2437 /* None -> none, nothing changed */
2438 } else {
2439 /* Exclusive -> exclusive/none */
2440
2441 if (cur_new_count == 0) {
2442 /* Exclusive -> none */
2443 qg->excl -= num_bytes;
2444 qg->excl_cmpr -= num_bytes;
2445 dirty = true;
2446 }
2447 /* Exclusive -> exclusive, nothing changed */
2448 }
2449 }
Qu Wenruoc05f9422015-08-03 14:44:29 +08002450
Qu Wenruo823ae5b2015-04-12 16:59:57 +08002451 if (dirty)
2452 qgroup_dirty(fs_info, qg);
2453 }
2454 return 0;
2455}
2456
Qu Wenruo5edfd9f2017-02-27 15:10:34 +08002457/*
2458 * Check if the @roots potentially is a list of fs tree roots
2459 *
2460 * Return 0 for definitely not a fs/subvol tree roots ulist
2461 * Return 1 for possible fs/subvol tree roots in the list (considering an empty
2462 * one as well)
2463 */
2464static int maybe_fs_roots(struct ulist *roots)
2465{
2466 struct ulist_node *unode;
2467 struct ulist_iterator uiter;
2468
2469 /* Empty one, still possible for fs roots */
2470 if (!roots || roots->nnodes == 0)
2471 return 1;
2472
2473 ULIST_ITER_INIT(&uiter);
2474 unode = ulist_next(roots, &uiter);
2475 if (!unode)
2476 return 1;
2477
2478 /*
2479 * If it contains fs tree roots, then it must belong to fs/subvol
2480 * trees.
2481 * If it contains a non-fs tree, it won't be shared with fs/subvol trees.
2482 */
2483 return is_fstree(unode->val);
2484}
2485
Lu Fengqi8696d762018-07-18 14:45:39 +08002486int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr,
2487 u64 num_bytes, struct ulist *old_roots,
2488 struct ulist *new_roots)
Qu Wenruo550d7a22015-04-16 15:37:33 +08002489{
Lu Fengqi8696d762018-07-18 14:45:39 +08002490 struct btrfs_fs_info *fs_info = trans->fs_info;
Qu Wenruo550d7a22015-04-16 15:37:33 +08002491 struct ulist *qgroups = NULL;
2492 struct ulist *tmp = NULL;
2493 u64 seq;
2494 u64 nr_new_roots = 0;
2495 u64 nr_old_roots = 0;
2496 int ret = 0;
2497
Johannes Thumshirn26ef8492020-01-08 21:07:32 +09002498 /*
2499 * If quotas get disabled meanwhile, the resouces need to be freed and
2500 * we can't just exit here.
2501 */
David Sterba81353d52017-02-13 14:05:24 +01002502 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
Johannes Thumshirn26ef8492020-01-08 21:07:32 +09002503 goto out_free;
David Sterba81353d52017-02-13 14:05:24 +01002504
Qu Wenruo5edfd9f2017-02-27 15:10:34 +08002505 if (new_roots) {
2506 if (!maybe_fs_roots(new_roots))
2507 goto out_free;
Qu Wenruo550d7a22015-04-16 15:37:33 +08002508 nr_new_roots = new_roots->nnodes;
Qu Wenruo5edfd9f2017-02-27 15:10:34 +08002509 }
2510 if (old_roots) {
2511 if (!maybe_fs_roots(old_roots))
2512 goto out_free;
Qu Wenruo550d7a22015-04-16 15:37:33 +08002513 nr_old_roots = old_roots->nnodes;
Qu Wenruo5edfd9f2017-02-27 15:10:34 +08002514 }
2515
2516 /* Quick exit, either not fs tree roots, or won't affect any qgroup */
2517 if (nr_old_roots == 0 && nr_new_roots == 0)
2518 goto out_free;
Qu Wenruo550d7a22015-04-16 15:37:33 +08002519
Qu Wenruo550d7a22015-04-16 15:37:33 +08002520 BUG_ON(!fs_info->quota_root);
2521
Qu Wenruoc9f6f3c2018-05-03 09:59:02 +08002522 trace_btrfs_qgroup_account_extent(fs_info, trans->transid, bytenr,
2523 num_bytes, nr_old_roots, nr_new_roots);
Mark Fasheh0f5dcf82016-03-29 17:19:55 -07002524
Qu Wenruo550d7a22015-04-16 15:37:33 +08002525 qgroups = ulist_alloc(GFP_NOFS);
2526 if (!qgroups) {
2527 ret = -ENOMEM;
2528 goto out_free;
2529 }
2530 tmp = ulist_alloc(GFP_NOFS);
2531 if (!tmp) {
2532 ret = -ENOMEM;
2533 goto out_free;
2534 }
2535
2536 mutex_lock(&fs_info->qgroup_rescan_lock);
2537 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
2538 if (fs_info->qgroup_rescan_progress.objectid <= bytenr) {
2539 mutex_unlock(&fs_info->qgroup_rescan_lock);
2540 ret = 0;
2541 goto out_free;
2542 }
2543 }
2544 mutex_unlock(&fs_info->qgroup_rescan_lock);
2545
2546 spin_lock(&fs_info->qgroup_lock);
2547 seq = fs_info->qgroup_seq;
2548
2549 /* Update old refcnts using old_roots */
2550 ret = qgroup_update_refcnt(fs_info, old_roots, tmp, qgroups, seq,
2551 UPDATE_OLD);
2552 if (ret < 0)
2553 goto out;
2554
2555 /* Update new refcnts using new_roots */
2556 ret = qgroup_update_refcnt(fs_info, new_roots, tmp, qgroups, seq,
2557 UPDATE_NEW);
2558 if (ret < 0)
2559 goto out;
2560
2561 qgroup_update_counters(fs_info, qgroups, nr_old_roots, nr_new_roots,
2562 num_bytes, seq);
2563
2564 /*
2565 * Bump qgroup_seq to avoid seq overlap
2566 */
2567 fs_info->qgroup_seq += max(nr_old_roots, nr_new_roots) + 1;
2568out:
2569 spin_unlock(&fs_info->qgroup_lock);
2570out_free:
2571 ulist_free(tmp);
2572 ulist_free(qgroups);
2573 ulist_free(old_roots);
2574 ulist_free(new_roots);
2575 return ret;
2576}
2577
Nikolay Borisov460fb202018-03-15 16:00:25 +02002578int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
Qu Wenruo550d7a22015-04-16 15:37:33 +08002579{
Nikolay Borisov460fb202018-03-15 16:00:25 +02002580 struct btrfs_fs_info *fs_info = trans->fs_info;
Qu Wenruo550d7a22015-04-16 15:37:33 +08002581 struct btrfs_qgroup_extent_record *record;
2582 struct btrfs_delayed_ref_root *delayed_refs;
2583 struct ulist *new_roots = NULL;
2584 struct rb_node *node;
Qu Wenruoc337e7b2018-09-27 14:42:29 +08002585 u64 num_dirty_extents = 0;
Qu Wenruo9086db82015-04-20 09:53:50 +08002586 u64 qgroup_to_skip;
Qu Wenruo550d7a22015-04-16 15:37:33 +08002587 int ret = 0;
2588
2589 delayed_refs = &trans->transaction->delayed_refs;
Qu Wenruo9086db82015-04-20 09:53:50 +08002590 qgroup_to_skip = delayed_refs->qgroup_to_skip;
Qu Wenruo550d7a22015-04-16 15:37:33 +08002591 while ((node = rb_first(&delayed_refs->dirty_extent_root))) {
2592 record = rb_entry(node, struct btrfs_qgroup_extent_record,
2593 node);
2594
Qu Wenruoc337e7b2018-09-27 14:42:29 +08002595 num_dirty_extents++;
Jeff Mahoneybc074522016-06-09 17:27:55 -04002596 trace_btrfs_qgroup_account_extents(fs_info, record);
Mark Fasheh0f5dcf82016-03-29 17:19:55 -07002597
Qu Wenruo550d7a22015-04-16 15:37:33 +08002598 if (!ret) {
2599 /*
Qu Wenruod1b8b942017-02-27 15:10:35 +08002600 * Old roots should be searched when inserting qgroup
2601 * extent record
2602 */
2603 if (WARN_ON(!record->old_roots)) {
2604 /* Search commit root to find old_roots */
2605 ret = btrfs_find_all_roots(NULL, fs_info,
2606 record->bytenr, 0,
Zygo Blaxellc995ab32017-09-22 13:58:45 -04002607 &record->old_roots, false);
Qu Wenruod1b8b942017-02-27 15:10:35 +08002608 if (ret < 0)
2609 goto cleanup;
2610 }
2611
Qu Wenruo1418bae2019-01-23 15:15:12 +08002612 /* Free the reserved data space */
2613 btrfs_qgroup_free_refroot(fs_info,
2614 record->data_rsv_refroot,
2615 record->data_rsv,
2616 BTRFS_QGROUP_RSV_DATA);
Qu Wenruod1b8b942017-02-27 15:10:35 +08002617 /*
Edmund Nadolskide47c9d2017-03-16 10:04:34 -06002618 * Use SEQ_LAST as time_seq to do special search, which
Qu Wenruo550d7a22015-04-16 15:37:33 +08002619 * doesn't lock tree or delayed_refs and search current
2620 * root. It's safe inside commit_transaction().
2621 */
2622 ret = btrfs_find_all_roots(trans, fs_info,
Zygo Blaxellc995ab32017-09-22 13:58:45 -04002623 record->bytenr, SEQ_LAST, &new_roots, false);
Qu Wenruo550d7a22015-04-16 15:37:33 +08002624 if (ret < 0)
2625 goto cleanup;
Qu Wenruod1b8b942017-02-27 15:10:35 +08002626 if (qgroup_to_skip) {
Qu Wenruo9086db82015-04-20 09:53:50 +08002627 ulist_del(new_roots, qgroup_to_skip, 0);
Qu Wenruod1b8b942017-02-27 15:10:35 +08002628 ulist_del(record->old_roots, qgroup_to_skip,
2629 0);
2630 }
Lu Fengqi8696d762018-07-18 14:45:39 +08002631 ret = btrfs_qgroup_account_extent(trans, record->bytenr,
2632 record->num_bytes,
2633 record->old_roots,
2634 new_roots);
Qu Wenruo550d7a22015-04-16 15:37:33 +08002635 record->old_roots = NULL;
2636 new_roots = NULL;
2637 }
2638cleanup:
2639 ulist_free(record->old_roots);
2640 ulist_free(new_roots);
2641 new_roots = NULL;
2642 rb_erase(node, &delayed_refs->dirty_extent_root);
2643 kfree(record);
2644
2645 }
Qu Wenruoc337e7b2018-09-27 14:42:29 +08002646 trace_qgroup_num_dirty_extents(fs_info, trans->transid,
2647 num_dirty_extents);
Qu Wenruo550d7a22015-04-16 15:37:33 +08002648 return ret;
2649}
2650
Josef Bacikfcebe452014-05-13 17:30:47 -07002651/*
Arne Jansenbed92ea2012-06-28 18:03:02 +02002652 * called from commit_transaction. Writes all changed qgroups to disk.
2653 */
Lu Fengqi280f8bd2018-07-18 14:45:40 +08002654int btrfs_run_qgroups(struct btrfs_trans_handle *trans)
Arne Jansenbed92ea2012-06-28 18:03:02 +02002655{
Lu Fengqi280f8bd2018-07-18 14:45:40 +08002656 struct btrfs_fs_info *fs_info = trans->fs_info;
Arne Jansenbed92ea2012-06-28 18:03:02 +02002657 int ret = 0;
2658
Marcos Paulo de Souzae3b0edd2019-11-25 21:58:50 -03002659 if (!fs_info->quota_root)
Nikolay Borisov5d235152018-01-31 10:52:04 +02002660 return ret;
Arne Jansenbed92ea2012-06-28 18:03:02 +02002661
2662 spin_lock(&fs_info->qgroup_lock);
2663 while (!list_empty(&fs_info->dirty_qgroups)) {
2664 struct btrfs_qgroup *qgroup;
2665 qgroup = list_first_entry(&fs_info->dirty_qgroups,
2666 struct btrfs_qgroup, dirty);
2667 list_del_init(&qgroup->dirty);
2668 spin_unlock(&fs_info->qgroup_lock);
Lu Fengqi3e07e9a2018-07-18 14:45:28 +08002669 ret = update_qgroup_info_item(trans, qgroup);
Arne Jansenbed92ea2012-06-28 18:03:02 +02002670 if (ret)
2671 fs_info->qgroup_flags |=
2672 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
Lu Fengqiac8a8662018-07-18 14:45:27 +08002673 ret = update_qgroup_limit_item(trans, qgroup);
Dongsheng Yangd3001ed2014-11-20 21:04:56 -05002674 if (ret)
2675 fs_info->qgroup_flags |=
2676 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
Arne Jansenbed92ea2012-06-28 18:03:02 +02002677 spin_lock(&fs_info->qgroup_lock);
2678 }
Josef Bacikafcdd122016-09-02 15:40:02 -04002679 if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
Arne Jansenbed92ea2012-06-28 18:03:02 +02002680 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON;
2681 else
2682 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
2683 spin_unlock(&fs_info->qgroup_lock);
2684
Lu Fengqi2e980ac2018-07-18 14:45:29 +08002685 ret = update_qgroup_status_item(trans);
Arne Jansenbed92ea2012-06-28 18:03:02 +02002686 if (ret)
2687 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2688
Arne Jansenbed92ea2012-06-28 18:03:02 +02002689 return ret;
2690}
2691
2692/*
Nicholas D Steeves01327612016-05-19 21:18:45 -04002693 * Copy the accounting information between qgroups. This is necessary
Mark Fasheh918c2ee2016-03-30 17:57:48 -07002694 * when a snapshot or a subvolume is created. Throwing an error will
2695 * cause a transaction abort so we take extra care here to only error
2696 * when a readonly fs is a reasonable outcome.
Arne Jansenbed92ea2012-06-28 18:03:02 +02002697 */
Lu Fengqia93774222018-07-18 14:45:41 +08002698int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
2699 u64 objectid, struct btrfs_qgroup_inherit *inherit)
Arne Jansenbed92ea2012-06-28 18:03:02 +02002700{
2701 int ret = 0;
2702 int i;
2703 u64 *i_qgroups;
Qu Wenruoe88439d2019-06-13 17:31:24 +08002704 bool committing = false;
Lu Fengqia93774222018-07-18 14:45:41 +08002705 struct btrfs_fs_info *fs_info = trans->fs_info;
Filipe Manana552f0322018-11-19 16:20:34 +00002706 struct btrfs_root *quota_root;
Arne Jansenbed92ea2012-06-28 18:03:02 +02002707 struct btrfs_qgroup *srcgroup;
2708 struct btrfs_qgroup *dstgroup;
Qu Wenruocbab8ad2020-04-02 14:37:35 +08002709 bool need_rescan = false;
Arne Jansenbed92ea2012-06-28 18:03:02 +02002710 u32 level_size = 0;
Wang Shilong3f5e2d32013-04-07 10:50:19 +00002711 u64 nums;
Arne Jansenbed92ea2012-06-28 18:03:02 +02002712
Qu Wenruoe88439d2019-06-13 17:31:24 +08002713 /*
2714 * There are only two callers of this function.
2715 *
2716 * One in create_subvol() in the ioctl context, which needs to hold
2717 * the qgroup_ioctl_lock.
2718 *
2719 * The other one in create_pending_snapshot() where no other qgroup
2720 * code can modify the fs as they all need to either start a new trans
2721 * or hold a trans handler, thus we don't need to hold
2722 * qgroup_ioctl_lock.
2723 * This would avoid long and complex lock chain and make lockdep happy.
2724 */
2725 spin_lock(&fs_info->trans_lock);
2726 if (trans->transaction->state == TRANS_STATE_COMMIT_DOING)
2727 committing = true;
2728 spin_unlock(&fs_info->trans_lock);
2729
2730 if (!committing)
2731 mutex_lock(&fs_info->qgroup_ioctl_lock);
Josef Bacikafcdd122016-09-02 15:40:02 -04002732 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
Wang Shilongf2f6ed32013-04-07 10:50:16 +00002733 goto out;
Arne Jansenbed92ea2012-06-28 18:03:02 +02002734
Filipe Manana552f0322018-11-19 16:20:34 +00002735 quota_root = fs_info->quota_root;
Wang Shilongf2f6ed32013-04-07 10:50:16 +00002736 if (!quota_root) {
2737 ret = -EINVAL;
2738 goto out;
2739 }
Arne Jansenbed92ea2012-06-28 18:03:02 +02002740
Wang Shilong3f5e2d32013-04-07 10:50:19 +00002741 if (inherit) {
2742 i_qgroups = (u64 *)(inherit + 1);
2743 nums = inherit->num_qgroups + 2 * inherit->num_ref_copies +
2744 2 * inherit->num_excl_copies;
2745 for (i = 0; i < nums; ++i) {
2746 srcgroup = find_qgroup_rb(fs_info, *i_qgroups);
Dongsheng Yang09870d22014-11-11 07:18:22 -05002747
Mark Fasheh918c2ee2016-03-30 17:57:48 -07002748 /*
2749 * Zero out invalid groups so we can ignore
2750 * them later.
2751 */
2752 if (!srcgroup ||
2753 ((srcgroup->qgroupid >> 48) <= (objectid >> 48)))
2754 *i_qgroups = 0ULL;
2755
Wang Shilong3f5e2d32013-04-07 10:50:19 +00002756 ++i_qgroups;
2757 }
2758 }
2759
Arne Jansenbed92ea2012-06-28 18:03:02 +02002760 /*
2761 * create a tracking group for the subvol itself
2762 */
2763 ret = add_qgroup_item(trans, quota_root, objectid);
2764 if (ret)
2765 goto out;
2766
Arne Jansenbed92ea2012-06-28 18:03:02 +02002767 /*
2768 * add qgroup to all inherited groups
2769 */
2770 if (inherit) {
2771 i_qgroups = (u64 *)(inherit + 1);
Mark Fasheh918c2ee2016-03-30 17:57:48 -07002772 for (i = 0; i < inherit->num_qgroups; ++i, ++i_qgroups) {
2773 if (*i_qgroups == 0)
2774 continue;
Lu Fengqi711169c2018-07-18 14:45:24 +08002775 ret = add_qgroup_relation_item(trans, objectid,
2776 *i_qgroups);
Mark Fasheh918c2ee2016-03-30 17:57:48 -07002777 if (ret && ret != -EEXIST)
Arne Jansenbed92ea2012-06-28 18:03:02 +02002778 goto out;
Lu Fengqi711169c2018-07-18 14:45:24 +08002779 ret = add_qgroup_relation_item(trans, *i_qgroups,
2780 objectid);
Mark Fasheh918c2ee2016-03-30 17:57:48 -07002781 if (ret && ret != -EEXIST)
Arne Jansenbed92ea2012-06-28 18:03:02 +02002782 goto out;
Arne Jansenbed92ea2012-06-28 18:03:02 +02002783 }
Mark Fasheh918c2ee2016-03-30 17:57:48 -07002784 ret = 0;
Arne Jansenbed92ea2012-06-28 18:03:02 +02002785 }
2786
2787
2788 spin_lock(&fs_info->qgroup_lock);
2789
2790 dstgroup = add_qgroup_rb(fs_info, objectid);
Dan Carpenter57a5a882012-07-30 02:15:43 -06002791 if (IS_ERR(dstgroup)) {
2792 ret = PTR_ERR(dstgroup);
Arne Jansenbed92ea2012-06-28 18:03:02 +02002793 goto unlock;
Dan Carpenter57a5a882012-07-30 02:15:43 -06002794 }
Arne Jansenbed92ea2012-06-28 18:03:02 +02002795
Dongsheng Yange8c85412014-11-20 20:58:34 -05002796 if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) {
Dongsheng Yange8c85412014-11-20 20:58:34 -05002797 dstgroup->lim_flags = inherit->lim.flags;
2798 dstgroup->max_rfer = inherit->lim.max_rfer;
2799 dstgroup->max_excl = inherit->lim.max_excl;
2800 dstgroup->rsv_rfer = inherit->lim.rsv_rfer;
2801 dstgroup->rsv_excl = inherit->lim.rsv_excl;
Dongsheng Yang1510e712014-11-20 21:01:41 -05002802
Lu Fengqiac8a8662018-07-18 14:45:27 +08002803 ret = update_qgroup_limit_item(trans, dstgroup);
Dongsheng Yang1510e712014-11-20 21:01:41 -05002804 if (ret) {
2805 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
Jeff Mahoney5d163e02016-09-20 10:05:00 -04002806 btrfs_info(fs_info,
2807 "unable to update quota limit for %llu",
2808 dstgroup->qgroupid);
Dongsheng Yang1510e712014-11-20 21:01:41 -05002809 goto unlock;
2810 }
Dongsheng Yange8c85412014-11-20 20:58:34 -05002811 }
2812
Arne Jansenbed92ea2012-06-28 18:03:02 +02002813 if (srcid) {
2814 srcgroup = find_qgroup_rb(fs_info, srcid);
Chris Masonf3a87f12012-09-14 20:06:30 -04002815 if (!srcgroup)
Arne Jansenbed92ea2012-06-28 18:03:02 +02002816 goto unlock;
Josef Bacikfcebe452014-05-13 17:30:47 -07002817
2818 /*
2819 * We call inherit after we clone the root in order to make sure
2820 * our counts don't go crazy, so at this point the only
2821 * difference between the two roots should be the root node.
2822 */
Lu Fengqic8389d42018-07-17 16:58:22 +08002823 level_size = fs_info->nodesize;
Josef Bacikfcebe452014-05-13 17:30:47 -07002824 dstgroup->rfer = srcgroup->rfer;
2825 dstgroup->rfer_cmpr = srcgroup->rfer_cmpr;
2826 dstgroup->excl = level_size;
2827 dstgroup->excl_cmpr = level_size;
Arne Jansenbed92ea2012-06-28 18:03:02 +02002828 srcgroup->excl = level_size;
2829 srcgroup->excl_cmpr = level_size;
Dongsheng Yang3eeb4d52014-11-20 20:14:38 -05002830
2831 /* inherit the limit info */
2832 dstgroup->lim_flags = srcgroup->lim_flags;
2833 dstgroup->max_rfer = srcgroup->max_rfer;
2834 dstgroup->max_excl = srcgroup->max_excl;
2835 dstgroup->rsv_rfer = srcgroup->rsv_rfer;
2836 dstgroup->rsv_excl = srcgroup->rsv_excl;
2837
Arne Jansenbed92ea2012-06-28 18:03:02 +02002838 qgroup_dirty(fs_info, dstgroup);
2839 qgroup_dirty(fs_info, srcgroup);
2840 }
2841
Chris Masonf3a87f12012-09-14 20:06:30 -04002842 if (!inherit)
Arne Jansenbed92ea2012-06-28 18:03:02 +02002843 goto unlock;
Arne Jansenbed92ea2012-06-28 18:03:02 +02002844
2845 i_qgroups = (u64 *)(inherit + 1);
2846 for (i = 0; i < inherit->num_qgroups; ++i) {
Mark Fasheh918c2ee2016-03-30 17:57:48 -07002847 if (*i_qgroups) {
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002848 ret = add_relation_rb(fs_info, objectid, *i_qgroups);
Mark Fasheh918c2ee2016-03-30 17:57:48 -07002849 if (ret)
2850 goto unlock;
2851 }
Arne Jansenbed92ea2012-06-28 18:03:02 +02002852 ++i_qgroups;
Qu Wenruocbab8ad2020-04-02 14:37:35 +08002853
2854 /*
2855 * If we're doing a snapshot, and adding the snapshot to a new
2856 * qgroup, the numbers are guaranteed to be incorrect.
2857 */
2858 if (srcid)
2859 need_rescan = true;
Arne Jansenbed92ea2012-06-28 18:03:02 +02002860 }
2861
Mark Fasheh918c2ee2016-03-30 17:57:48 -07002862 for (i = 0; i < inherit->num_ref_copies; ++i, i_qgroups += 2) {
Arne Jansenbed92ea2012-06-28 18:03:02 +02002863 struct btrfs_qgroup *src;
2864 struct btrfs_qgroup *dst;
2865
Mark Fasheh918c2ee2016-03-30 17:57:48 -07002866 if (!i_qgroups[0] || !i_qgroups[1])
2867 continue;
2868
Arne Jansenbed92ea2012-06-28 18:03:02 +02002869 src = find_qgroup_rb(fs_info, i_qgroups[0]);
2870 dst = find_qgroup_rb(fs_info, i_qgroups[1]);
2871
2872 if (!src || !dst) {
2873 ret = -EINVAL;
2874 goto unlock;
2875 }
2876
2877 dst->rfer = src->rfer - level_size;
2878 dst->rfer_cmpr = src->rfer_cmpr - level_size;
Qu Wenruocbab8ad2020-04-02 14:37:35 +08002879
2880 /* Manually tweaking numbers certainly needs a rescan */
2881 need_rescan = true;
Arne Jansenbed92ea2012-06-28 18:03:02 +02002882 }
Mark Fasheh918c2ee2016-03-30 17:57:48 -07002883 for (i = 0; i < inherit->num_excl_copies; ++i, i_qgroups += 2) {
Arne Jansenbed92ea2012-06-28 18:03:02 +02002884 struct btrfs_qgroup *src;
2885 struct btrfs_qgroup *dst;
2886
Mark Fasheh918c2ee2016-03-30 17:57:48 -07002887 if (!i_qgroups[0] || !i_qgroups[1])
2888 continue;
2889
Arne Jansenbed92ea2012-06-28 18:03:02 +02002890 src = find_qgroup_rb(fs_info, i_qgroups[0]);
2891 dst = find_qgroup_rb(fs_info, i_qgroups[1]);
2892
2893 if (!src || !dst) {
2894 ret = -EINVAL;
2895 goto unlock;
2896 }
2897
2898 dst->excl = src->excl + level_size;
2899 dst->excl_cmpr = src->excl_cmpr + level_size;
Qu Wenruocbab8ad2020-04-02 14:37:35 +08002900 need_rescan = true;
Arne Jansenbed92ea2012-06-28 18:03:02 +02002901 }
2902
2903unlock:
2904 spin_unlock(&fs_info->qgroup_lock);
Qu Wenruo49e5fb42020-06-28 13:07:15 +08002905 if (!ret)
2906 ret = btrfs_sysfs_add_one_qgroup(fs_info, dstgroup);
Arne Jansenbed92ea2012-06-28 18:03:02 +02002907out:
Qu Wenruoe88439d2019-06-13 17:31:24 +08002908 if (!committing)
2909 mutex_unlock(&fs_info->qgroup_ioctl_lock);
Qu Wenruocbab8ad2020-04-02 14:37:35 +08002910 if (need_rescan)
2911 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
Arne Jansenbed92ea2012-06-28 18:03:02 +02002912 return ret;
2913}
2914
Qu Wenruoadca4d92020-07-13 18:50:49 +08002915static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes)
Jeff Mahoney003d7c52017-01-25 09:50:33 -05002916{
2917 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
Qu Wenruodba21322017-12-12 15:34:25 +08002918 qgroup_rsv_total(qg) + (s64)qg->rfer + num_bytes > qg->max_rfer)
Jeff Mahoney003d7c52017-01-25 09:50:33 -05002919 return false;
2920
2921 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
Qu Wenruodba21322017-12-12 15:34:25 +08002922 qgroup_rsv_total(qg) + (s64)qg->excl + num_bytes > qg->max_excl)
Jeff Mahoney003d7c52017-01-25 09:50:33 -05002923 return false;
2924
2925 return true;
2926}
2927
Qu Wenruodba21322017-12-12 15:34:25 +08002928static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce,
2929 enum btrfs_qgroup_rsv_type type)
Arne Jansenbed92ea2012-06-28 18:03:02 +02002930{
Arne Jansenbed92ea2012-06-28 18:03:02 +02002931 struct btrfs_qgroup *qgroup;
2932 struct btrfs_fs_info *fs_info = root->fs_info;
2933 u64 ref_root = root->root_key.objectid;
2934 int ret = 0;
Arne Jansenbed92ea2012-06-28 18:03:02 +02002935 struct ulist_node *unode;
2936 struct ulist_iterator uiter;
2937
2938 if (!is_fstree(ref_root))
2939 return 0;
2940
2941 if (num_bytes == 0)
2942 return 0;
Sargun Dhillonf29efe22017-05-11 21:17:33 +00002943
2944 if (test_bit(BTRFS_FS_QUOTA_OVERRIDE, &fs_info->flags) &&
2945 capable(CAP_SYS_RESOURCE))
2946 enforce = false;
2947
Arne Jansenbed92ea2012-06-28 18:03:02 +02002948 spin_lock(&fs_info->qgroup_lock);
Marcos Paulo de Souzae3b0edd2019-11-25 21:58:50 -03002949 if (!fs_info->quota_root)
Arne Jansenbed92ea2012-06-28 18:03:02 +02002950 goto out;
2951
2952 qgroup = find_qgroup_rb(fs_info, ref_root);
2953 if (!qgroup)
2954 goto out;
2955
2956 /*
2957 * in a first step, we check all affected qgroups if any limits would
2958 * be exceeded
2959 */
Wang Shilong1e8f9152013-05-06 11:03:27 +00002960 ulist_reinit(fs_info->qgroup_ulist);
2961 ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
David Sterbaa1840b52018-03-27 19:04:50 +02002962 qgroup_to_aux(qgroup), GFP_ATOMIC);
Wang Shilong3c971852013-04-17 14:00:36 +00002963 if (ret < 0)
2964 goto out;
Arne Jansenbed92ea2012-06-28 18:03:02 +02002965 ULIST_ITER_INIT(&uiter);
Wang Shilong1e8f9152013-05-06 11:03:27 +00002966 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
Arne Jansenbed92ea2012-06-28 18:03:02 +02002967 struct btrfs_qgroup *qg;
2968 struct btrfs_qgroup_list *glist;
2969
David Sterbaef2fff62016-10-26 16:23:50 +02002970 qg = unode_aux_to_qgroup(unode);
Arne Jansenbed92ea2012-06-28 18:03:02 +02002971
Qu Wenruoadca4d92020-07-13 18:50:49 +08002972 if (enforce && !qgroup_check_limits(qg, num_bytes)) {
Arne Jansenbed92ea2012-06-28 18:03:02 +02002973 ret = -EDQUOT;
Wang Shilong720f1e22013-03-06 11:51:47 +00002974 goto out;
2975 }
Arne Jansenbed92ea2012-06-28 18:03:02 +02002976
2977 list_for_each_entry(glist, &qg->groups, next_group) {
Wang Shilong1e8f9152013-05-06 11:03:27 +00002978 ret = ulist_add(fs_info->qgroup_ulist,
2979 glist->group->qgroupid,
David Sterbaa1840b52018-03-27 19:04:50 +02002980 qgroup_to_aux(glist->group), GFP_ATOMIC);
Wang Shilong3c971852013-04-17 14:00:36 +00002981 if (ret < 0)
2982 goto out;
Arne Jansenbed92ea2012-06-28 18:03:02 +02002983 }
2984 }
Wang Shilong3c971852013-04-17 14:00:36 +00002985 ret = 0;
Arne Jansenbed92ea2012-06-28 18:03:02 +02002986 /*
2987 * no limits exceeded, now record the reservation into all qgroups
2988 */
2989 ULIST_ITER_INIT(&uiter);
Wang Shilong1e8f9152013-05-06 11:03:27 +00002990 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
Arne Jansenbed92ea2012-06-28 18:03:02 +02002991 struct btrfs_qgroup *qg;
2992
David Sterbaef2fff62016-10-26 16:23:50 +02002993 qg = unode_aux_to_qgroup(unode);
Arne Jansenbed92ea2012-06-28 18:03:02 +02002994
Qu Wenruo64ee4e72017-12-12 15:34:27 +08002995 qgroup_rsv_add(fs_info, qg, num_bytes, type);
Arne Jansenbed92ea2012-06-28 18:03:02 +02002996 }
2997
2998out:
2999 spin_unlock(&fs_info->qgroup_lock);
Arne Jansenbed92ea2012-06-28 18:03:02 +02003000 return ret;
3001}
3002
Qu Wenruoe1211d02017-12-12 15:34:30 +08003003/*
3004 * Free @num_bytes of reserved space with @type for qgroup. (Normally level 0
3005 * qgroup).
3006 *
3007 * Will handle all higher level qgroup too.
3008 *
3009 * NOTE: If @num_bytes is (u64)-1, this means to free all bytes of this qgroup.
3010 * This special case is only used for META_PERTRANS type.
3011 */
Qu Wenruo297d7502015-09-08 17:08:37 +08003012void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
Qu Wenruod4e5c922017-12-12 15:34:23 +08003013 u64 ref_root, u64 num_bytes,
3014 enum btrfs_qgroup_rsv_type type)
Arne Jansenbed92ea2012-06-28 18:03:02 +02003015{
Arne Jansenbed92ea2012-06-28 18:03:02 +02003016 struct btrfs_qgroup *qgroup;
Arne Jansenbed92ea2012-06-28 18:03:02 +02003017 struct ulist_node *unode;
3018 struct ulist_iterator uiter;
Wang Shilong3c971852013-04-17 14:00:36 +00003019 int ret = 0;
Arne Jansenbed92ea2012-06-28 18:03:02 +02003020
3021 if (!is_fstree(ref_root))
3022 return;
3023
3024 if (num_bytes == 0)
3025 return;
3026
Qu Wenruoe1211d02017-12-12 15:34:30 +08003027 if (num_bytes == (u64)-1 && type != BTRFS_QGROUP_RSV_META_PERTRANS) {
3028 WARN(1, "%s: Invalid type to free", __func__);
3029 return;
3030 }
Arne Jansenbed92ea2012-06-28 18:03:02 +02003031 spin_lock(&fs_info->qgroup_lock);
3032
Marcos Paulo de Souzae3b0edd2019-11-25 21:58:50 -03003033 if (!fs_info->quota_root)
Arne Jansenbed92ea2012-06-28 18:03:02 +02003034 goto out;
3035
3036 qgroup = find_qgroup_rb(fs_info, ref_root);
3037 if (!qgroup)
3038 goto out;
3039
Qu Wenruoe1211d02017-12-12 15:34:30 +08003040 if (num_bytes == (u64)-1)
Qu Wenruo82874752017-12-12 15:34:34 +08003041 /*
3042 * We're freeing all pertrans rsv, get reserved value from
3043 * level 0 qgroup as real num_bytes to free.
3044 */
Qu Wenruoe1211d02017-12-12 15:34:30 +08003045 num_bytes = qgroup->rsv.values[type];
3046
Wang Shilong1e8f9152013-05-06 11:03:27 +00003047 ulist_reinit(fs_info->qgroup_ulist);
3048 ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
David Sterbaa1840b52018-03-27 19:04:50 +02003049 qgroup_to_aux(qgroup), GFP_ATOMIC);
Wang Shilong3c971852013-04-17 14:00:36 +00003050 if (ret < 0)
3051 goto out;
Arne Jansenbed92ea2012-06-28 18:03:02 +02003052 ULIST_ITER_INIT(&uiter);
Wang Shilong1e8f9152013-05-06 11:03:27 +00003053 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
Arne Jansenbed92ea2012-06-28 18:03:02 +02003054 struct btrfs_qgroup *qg;
3055 struct btrfs_qgroup_list *glist;
3056
David Sterbaef2fff62016-10-26 16:23:50 +02003057 qg = unode_aux_to_qgroup(unode);
Arne Jansenbed92ea2012-06-28 18:03:02 +02003058
Qu Wenruo64ee4e72017-12-12 15:34:27 +08003059 qgroup_rsv_release(fs_info, qg, num_bytes, type);
Arne Jansenbed92ea2012-06-28 18:03:02 +02003060
3061 list_for_each_entry(glist, &qg->groups, next_group) {
Wang Shilong1e8f9152013-05-06 11:03:27 +00003062 ret = ulist_add(fs_info->qgroup_ulist,
3063 glist->group->qgroupid,
David Sterbaa1840b52018-03-27 19:04:50 +02003064 qgroup_to_aux(glist->group), GFP_ATOMIC);
Wang Shilong3c971852013-04-17 14:00:36 +00003065 if (ret < 0)
3066 goto out;
Arne Jansenbed92ea2012-06-28 18:03:02 +02003067 }
3068 }
3069
3070out:
3071 spin_unlock(&fs_info->qgroup_lock);
Arne Jansenbed92ea2012-06-28 18:03:02 +02003072}
3073
Jan Schmidt2f232032013-04-25 16:04:51 +00003074/*
Qu Wenruoff3d27a02018-05-14 09:38:13 +08003075 * Check if the leaf is the last leaf. Which means all node pointers
3076 * are at their last position.
3077 */
3078static bool is_last_leaf(struct btrfs_path *path)
3079{
3080 int i;
3081
3082 for (i = 1; i < BTRFS_MAX_LEVEL && path->nodes[i]; i++) {
3083 if (path->slots[i] != btrfs_header_nritems(path->nodes[i]) - 1)
3084 return false;
3085 }
3086 return true;
3087}
3088
3089/*
Jan Schmidt2f232032013-04-25 16:04:51 +00003090 * returns < 0 on error, 0 when more leafs are to be scanned.
Qu Wenruo33931682015-02-27 16:24:24 +08003091 * returns 1 when done.
Jan Schmidt2f232032013-04-25 16:04:51 +00003092 */
Lu Fengqi62088ca2018-07-18 14:45:42 +08003093static int qgroup_rescan_leaf(struct btrfs_trans_handle *trans,
3094 struct btrfs_path *path)
Jan Schmidt2f232032013-04-25 16:04:51 +00003095{
Lu Fengqi62088ca2018-07-18 14:45:42 +08003096 struct btrfs_fs_info *fs_info = trans->fs_info;
Jan Schmidt2f232032013-04-25 16:04:51 +00003097 struct btrfs_key found;
Qu Wenruo0a0e8b892015-10-26 09:19:43 +08003098 struct extent_buffer *scratch_leaf = NULL;
Jan Schmidt2f232032013-04-25 16:04:51 +00003099 struct ulist *roots = NULL;
Josef Bacikfcebe452014-05-13 17:30:47 -07003100 u64 num_bytes;
Qu Wenruoff3d27a02018-05-14 09:38:13 +08003101 bool done;
Jan Schmidt2f232032013-04-25 16:04:51 +00003102 int slot;
3103 int ret;
3104
Jan Schmidt2f232032013-04-25 16:04:51 +00003105 mutex_lock(&fs_info->qgroup_rescan_lock);
3106 ret = btrfs_search_slot_for_read(fs_info->extent_root,
3107 &fs_info->qgroup_rescan_progress,
3108 path, 1, 0);
3109
Jeff Mahoneyab8d0fc2016-09-20 10:05:02 -04003110 btrfs_debug(fs_info,
3111 "current progress key (%llu %u %llu), search_slot ret %d",
3112 fs_info->qgroup_rescan_progress.objectid,
3113 fs_info->qgroup_rescan_progress.type,
3114 fs_info->qgroup_rescan_progress.offset, ret);
Jan Schmidt2f232032013-04-25 16:04:51 +00003115
3116 if (ret) {
3117 /*
3118 * The rescan is about to end, we will not be scanning any
3119 * further blocks. We cannot unset the RESCAN flag here, because
3120 * we want to commit the transaction if everything went well.
3121 * To make the live accounting work in this phase, we set our
3122 * scan progress pointer such that every real extent objectid
3123 * will be smaller.
3124 */
3125 fs_info->qgroup_rescan_progress.objectid = (u64)-1;
3126 btrfs_release_path(path);
3127 mutex_unlock(&fs_info->qgroup_rescan_lock);
3128 return ret;
3129 }
Qu Wenruoff3d27a02018-05-14 09:38:13 +08003130 done = is_last_leaf(path);
Jan Schmidt2f232032013-04-25 16:04:51 +00003131
3132 btrfs_item_key_to_cpu(path->nodes[0], &found,
3133 btrfs_header_nritems(path->nodes[0]) - 1);
3134 fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
3135
Qu Wenruo0a0e8b892015-10-26 09:19:43 +08003136 scratch_leaf = btrfs_clone_extent_buffer(path->nodes[0]);
3137 if (!scratch_leaf) {
3138 ret = -ENOMEM;
3139 mutex_unlock(&fs_info->qgroup_rescan_lock);
3140 goto out;
3141 }
Jan Schmidt2f232032013-04-25 16:04:51 +00003142 slot = path->slots[0];
3143 btrfs_release_path(path);
3144 mutex_unlock(&fs_info->qgroup_rescan_lock);
3145
3146 for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
3147 btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
Josef Bacik3a6d75e2014-01-23 16:45:10 -05003148 if (found.type != BTRFS_EXTENT_ITEM_KEY &&
3149 found.type != BTRFS_METADATA_ITEM_KEY)
Jan Schmidt2f232032013-04-25 16:04:51 +00003150 continue;
Josef Bacik3a6d75e2014-01-23 16:45:10 -05003151 if (found.type == BTRFS_METADATA_ITEM_KEY)
Jeff Mahoneyda170662016-06-15 09:22:56 -04003152 num_bytes = fs_info->nodesize;
Josef Bacik3a6d75e2014-01-23 16:45:10 -05003153 else
3154 num_bytes = found.offset;
3155
Josef Bacikfcebe452014-05-13 17:30:47 -07003156 ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0,
Zygo Blaxellc995ab32017-09-22 13:58:45 -04003157 &roots, false);
Jan Schmidt2f232032013-04-25 16:04:51 +00003158 if (ret < 0)
3159 goto out;
Qu Wenruo9d220c92015-04-13 11:02:16 +08003160 /* For rescan, just pass old_roots as NULL */
Lu Fengqi8696d762018-07-18 14:45:39 +08003161 ret = btrfs_qgroup_account_extent(trans, found.objectid,
3162 num_bytes, NULL, roots);
Qu Wenruo9d220c92015-04-13 11:02:16 +08003163 if (ret < 0)
Jan Schmidt2f232032013-04-25 16:04:51 +00003164 goto out;
Jan Schmidt2f232032013-04-25 16:04:51 +00003165 }
Jan Schmidt2f232032013-04-25 16:04:51 +00003166out:
Nikolay Borisovdf449712018-08-15 18:26:56 +03003167 if (scratch_leaf)
Qu Wenruo0a0e8b892015-10-26 09:19:43 +08003168 free_extent_buffer(scratch_leaf);
Jan Schmidt2f232032013-04-25 16:04:51 +00003169
Qu Wenruo6f7de192018-06-27 18:19:55 +08003170 if (done && !ret) {
Qu Wenruoff3d27a02018-05-14 09:38:13 +08003171 ret = 1;
Qu Wenruo6f7de192018-06-27 18:19:55 +08003172 fs_info->qgroup_rescan_progress.objectid = (u64)-1;
3173 }
Jan Schmidt2f232032013-04-25 16:04:51 +00003174 return ret;
3175}
3176
Qu Wenruod458b052014-02-28 10:46:19 +08003177static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
Jan Schmidt2f232032013-04-25 16:04:51 +00003178{
Jan Schmidtb382a322013-05-28 15:47:24 +00003179 struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info,
3180 qgroup_rescan_work);
Jan Schmidt2f232032013-04-25 16:04:51 +00003181 struct btrfs_path *path;
3182 struct btrfs_trans_handle *trans = NULL;
Jan Schmidt2f232032013-04-25 16:04:51 +00003183 int err = -ENOMEM;
Qu Wenruo53b7cde2015-02-27 16:24:25 +08003184 int ret = 0;
Jan Schmidt2f232032013-04-25 16:04:51 +00003185
3186 path = btrfs_alloc_path();
3187 if (!path)
3188 goto out;
Qu Wenruob6debf12018-05-14 09:38:12 +08003189 /*
3190 * Rescan should only search for commit root, and any later difference
3191 * should be recorded by qgroup
3192 */
3193 path->search_commit_root = 1;
3194 path->skip_locking = 1;
Jan Schmidt2f232032013-04-25 16:04:51 +00003195
3196 err = 0;
Justin Maggard7343dd62015-11-04 15:56:16 -08003197 while (!err && !btrfs_fs_closing(fs_info)) {
Jan Schmidt2f232032013-04-25 16:04:51 +00003198 trans = btrfs_start_transaction(fs_info->fs_root, 0);
3199 if (IS_ERR(trans)) {
3200 err = PTR_ERR(trans);
3201 break;
3202 }
Josef Bacikafcdd122016-09-02 15:40:02 -04003203 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
Jan Schmidt2f232032013-04-25 16:04:51 +00003204 err = -EINTR;
3205 } else {
Lu Fengqi62088ca2018-07-18 14:45:42 +08003206 err = qgroup_rescan_leaf(trans, path);
Jan Schmidt2f232032013-04-25 16:04:51 +00003207 }
3208 if (err > 0)
Jeff Mahoney3a45bb22016-09-09 21:39:03 -04003209 btrfs_commit_transaction(trans);
Jan Schmidt2f232032013-04-25 16:04:51 +00003210 else
Jeff Mahoney3a45bb22016-09-09 21:39:03 -04003211 btrfs_end_transaction(trans);
Jan Schmidt2f232032013-04-25 16:04:51 +00003212 }
3213
3214out:
Jan Schmidt2f232032013-04-25 16:04:51 +00003215 btrfs_free_path(path);
Jan Schmidt2f232032013-04-25 16:04:51 +00003216
3217 mutex_lock(&fs_info->qgroup_rescan_lock);
Qu Wenruo33931682015-02-27 16:24:24 +08003218 if (err > 0 &&
Jan Schmidt2f232032013-04-25 16:04:51 +00003219 fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
3220 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
3221 } else if (err < 0) {
3222 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
3223 }
3224 mutex_unlock(&fs_info->qgroup_rescan_lock);
3225
Qu Wenruo53b7cde2015-02-27 16:24:25 +08003226 /*
Nicholas D Steeves01327612016-05-19 21:18:45 -04003227 * only update status, since the previous part has already updated the
Qu Wenruo53b7cde2015-02-27 16:24:25 +08003228 * qgroup info.
3229 */
3230 trans = btrfs_start_transaction(fs_info->quota_root, 1);
3231 if (IS_ERR(trans)) {
3232 err = PTR_ERR(trans);
Filipe Manana13fc1d22019-09-24 10:49:54 +01003233 trans = NULL;
Qu Wenruo53b7cde2015-02-27 16:24:25 +08003234 btrfs_err(fs_info,
David Sterba913e1532017-07-13 15:32:18 +02003235 "fail to start transaction for status update: %d",
Qu Wenruo53b7cde2015-02-27 16:24:25 +08003236 err);
Qu Wenruo53b7cde2015-02-27 16:24:25 +08003237 }
Filipe Manana13fc1d22019-09-24 10:49:54 +01003238
3239 mutex_lock(&fs_info->qgroup_rescan_lock);
3240 if (!btrfs_fs_closing(fs_info))
3241 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
3242 if (trans) {
3243 ret = update_qgroup_status_item(trans);
3244 if (ret < 0) {
3245 err = ret;
3246 btrfs_err(fs_info, "fail to update qgroup status: %d",
3247 err);
3248 }
Qu Wenruo53b7cde2015-02-27 16:24:25 +08003249 }
Filipe Manana13fc1d22019-09-24 10:49:54 +01003250 fs_info->qgroup_rescan_running = false;
3251 complete_all(&fs_info->qgroup_rescan_completion);
3252 mutex_unlock(&fs_info->qgroup_rescan_lock);
3253
3254 if (!trans)
3255 return;
3256
Jeff Mahoney3a45bb22016-09-09 21:39:03 -04003257 btrfs_end_transaction(trans);
Qu Wenruo53b7cde2015-02-27 16:24:25 +08003258
Justin Maggard7343dd62015-11-04 15:56:16 -08003259 if (btrfs_fs_closing(fs_info)) {
3260 btrfs_info(fs_info, "qgroup scan paused");
3261 } else if (err >= 0) {
Frank Holtonefe120a2013-12-20 11:37:06 -05003262 btrfs_info(fs_info, "qgroup scan completed%s",
Qu Wenruo33931682015-02-27 16:24:24 +08003263 err > 0 ? " (inconsistency flag cleared)" : "");
Jan Schmidt2f232032013-04-25 16:04:51 +00003264 } else {
Frank Holtonefe120a2013-12-20 11:37:06 -05003265 btrfs_err(fs_info, "qgroup scan failed with %d", err);
Jan Schmidt2f232032013-04-25 16:04:51 +00003266 }
3267}
3268
Jan Schmidtb382a322013-05-28 15:47:24 +00003269/*
3270 * Checks that (a) no rescan is running and (b) quota is enabled. Allocates all
3271 * memory required for the rescan context.
3272 */
3273static int
3274qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
3275 int init_flags)
Jan Schmidt2f232032013-04-25 16:04:51 +00003276{
3277 int ret = 0;
Jan Schmidt2f232032013-04-25 16:04:51 +00003278
Qu Wenruo9593bf492018-05-02 13:28:03 +08003279 if (!init_flags) {
3280 /* we're resuming qgroup rescan at mount time */
Filipe Mananae4e7ede2018-06-27 00:43:15 +01003281 if (!(fs_info->qgroup_flags &
3282 BTRFS_QGROUP_STATUS_FLAG_RESCAN)) {
Qu Wenruo9593bf492018-05-02 13:28:03 +08003283 btrfs_warn(fs_info,
Nikolay Borisov37d02592019-11-18 14:16:44 +02003284 "qgroup rescan init failed, qgroup rescan is not queued");
Filipe Mananae4e7ede2018-06-27 00:43:15 +01003285 ret = -EINVAL;
3286 } else if (!(fs_info->qgroup_flags &
3287 BTRFS_QGROUP_STATUS_FLAG_ON)) {
Qu Wenruo9593bf492018-05-02 13:28:03 +08003288 btrfs_warn(fs_info,
Nikolay Borisov37d02592019-11-18 14:16:44 +02003289 "qgroup rescan init failed, qgroup is not enabled");
Filipe Mananae4e7ede2018-06-27 00:43:15 +01003290 ret = -EINVAL;
3291 }
3292
3293 if (ret)
3294 return ret;
Jan Schmidtb382a322013-05-28 15:47:24 +00003295 }
Jan Schmidt2f232032013-04-25 16:04:51 +00003296
3297 mutex_lock(&fs_info->qgroup_rescan_lock);
Jan Schmidtb382a322013-05-28 15:47:24 +00003298
3299 if (init_flags) {
Qu Wenruo9593bf492018-05-02 13:28:03 +08003300 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
3301 btrfs_warn(fs_info,
3302 "qgroup rescan is already in progress");
Jan Schmidtb382a322013-05-28 15:47:24 +00003303 ret = -EINPROGRESS;
Qu Wenruo9593bf492018-05-02 13:28:03 +08003304 } else if (!(fs_info->qgroup_flags &
3305 BTRFS_QGROUP_STATUS_FLAG_ON)) {
3306 btrfs_warn(fs_info,
3307 "qgroup rescan init failed, qgroup is not enabled");
Jan Schmidtb382a322013-05-28 15:47:24 +00003308 ret = -EINVAL;
Qu Wenruo9593bf492018-05-02 13:28:03 +08003309 }
Jan Schmidtb382a322013-05-28 15:47:24 +00003310
3311 if (ret) {
Jan Schmidtb382a322013-05-28 15:47:24 +00003312 mutex_unlock(&fs_info->qgroup_rescan_lock);
Qu Wenruo9593bf492018-05-02 13:28:03 +08003313 return ret;
Jan Schmidtb382a322013-05-28 15:47:24 +00003314 }
Jan Schmidtb382a322013-05-28 15:47:24 +00003315 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
3316 }
3317
3318 memset(&fs_info->qgroup_rescan_progress, 0,
3319 sizeof(fs_info->qgroup_rescan_progress));
3320 fs_info->qgroup_rescan_progress.objectid = progress_objectid;
Filipe Manana190631f2015-11-05 10:06:23 +00003321 init_completion(&fs_info->qgroup_rescan_completion);
Jan Schmidtb382a322013-05-28 15:47:24 +00003322 mutex_unlock(&fs_info->qgroup_rescan_lock);
3323
Qu Wenruofc97fab2014-02-28 10:46:16 +08003324 btrfs_init_work(&fs_info->qgroup_rescan_work,
3325 btrfs_qgroup_rescan_worker, NULL, NULL);
Jan Schmidtb382a322013-05-28 15:47:24 +00003326 return 0;
3327}
Jan Schmidt2f232032013-04-25 16:04:51 +00003328
Jan Schmidtb382a322013-05-28 15:47:24 +00003329static void
3330qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info)
3331{
3332 struct rb_node *n;
3333 struct btrfs_qgroup *qgroup;
3334
3335 spin_lock(&fs_info->qgroup_lock);
Jan Schmidt2f232032013-04-25 16:04:51 +00003336 /* clear all current qgroup tracking information */
3337 for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) {
3338 qgroup = rb_entry(n, struct btrfs_qgroup, node);
3339 qgroup->rfer = 0;
3340 qgroup->rfer_cmpr = 0;
3341 qgroup->excl = 0;
3342 qgroup->excl_cmpr = 0;
Qu Wenruo9c7b0c22018-08-10 10:20:26 +08003343 qgroup_dirty(fs_info, qgroup);
Jan Schmidt2f232032013-04-25 16:04:51 +00003344 }
3345 spin_unlock(&fs_info->qgroup_lock);
Jan Schmidtb382a322013-05-28 15:47:24 +00003346}
Jan Schmidt2f232032013-04-25 16:04:51 +00003347
Jan Schmidtb382a322013-05-28 15:47:24 +00003348int
3349btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
3350{
3351 int ret = 0;
3352 struct btrfs_trans_handle *trans;
3353
3354 ret = qgroup_rescan_init(fs_info, 0, 1);
3355 if (ret)
3356 return ret;
3357
3358 /*
3359 * We have set the rescan_progress to 0, which means no more
3360 * delayed refs will be accounted by btrfs_qgroup_account_ref.
3361 * However, btrfs_qgroup_account_ref may be right after its call
3362 * to btrfs_find_all_roots, in which case it would still do the
3363 * accounting.
3364 * To solve this, we're committing the transaction, which will
3365 * ensure we run all delayed refs and only after that, we are
3366 * going to clear all tracking information for a clean start.
3367 */
3368
3369 trans = btrfs_join_transaction(fs_info->fs_root);
3370 if (IS_ERR(trans)) {
3371 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
3372 return PTR_ERR(trans);
3373 }
Jeff Mahoney3a45bb22016-09-09 21:39:03 -04003374 ret = btrfs_commit_transaction(trans);
Jan Schmidtb382a322013-05-28 15:47:24 +00003375 if (ret) {
3376 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
3377 return ret;
3378 }
3379
3380 qgroup_rescan_zero_tracking(fs_info);
3381
Qu Wenruod61acbb2020-02-07 13:38:20 +08003382 mutex_lock(&fs_info->qgroup_rescan_lock);
3383 fs_info->qgroup_rescan_running = true;
Qu Wenruofc97fab2014-02-28 10:46:16 +08003384 btrfs_queue_work(fs_info->qgroup_rescan_workers,
3385 &fs_info->qgroup_rescan_work);
Qu Wenruod61acbb2020-02-07 13:38:20 +08003386 mutex_unlock(&fs_info->qgroup_rescan_lock);
Jan Schmidt2f232032013-04-25 16:04:51 +00003387
3388 return 0;
3389}
Jan Schmidt57254b6e2013-05-06 19:14:17 +00003390
Jeff Mahoneyd06f23d2016-08-08 22:08:06 -04003391int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
3392 bool interruptible)
Jan Schmidt57254b6e2013-05-06 19:14:17 +00003393{
3394 int running;
3395 int ret = 0;
3396
3397 mutex_lock(&fs_info->qgroup_rescan_lock);
Jeff Mahoneyd2c609b2016-08-15 12:10:33 -04003398 running = fs_info->qgroup_rescan_running;
Jan Schmidt57254b6e2013-05-06 19:14:17 +00003399 mutex_unlock(&fs_info->qgroup_rescan_lock);
3400
Jeff Mahoneyd06f23d2016-08-08 22:08:06 -04003401 if (!running)
3402 return 0;
3403
3404 if (interruptible)
Jan Schmidt57254b6e2013-05-06 19:14:17 +00003405 ret = wait_for_completion_interruptible(
3406 &fs_info->qgroup_rescan_completion);
Jeff Mahoneyd06f23d2016-08-08 22:08:06 -04003407 else
3408 wait_for_completion(&fs_info->qgroup_rescan_completion);
Jan Schmidt57254b6e2013-05-06 19:14:17 +00003409
3410 return ret;
3411}
Jan Schmidtb382a322013-05-28 15:47:24 +00003412
3413/*
3414 * this is only called from open_ctree where we're still single threaded, thus
3415 * locking is omitted here.
3416 */
3417void
3418btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info)
3419{
Qu Wenruod61acbb2020-02-07 13:38:20 +08003420 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
3421 mutex_lock(&fs_info->qgroup_rescan_lock);
3422 fs_info->qgroup_rescan_running = true;
Qu Wenruofc97fab2014-02-28 10:46:16 +08003423 btrfs_queue_work(fs_info->qgroup_rescan_workers,
3424 &fs_info->qgroup_rescan_work);
Qu Wenruod61acbb2020-02-07 13:38:20 +08003425 mutex_unlock(&fs_info->qgroup_rescan_lock);
3426 }
Jan Schmidtb382a322013-05-28 15:47:24 +00003427}
Qu Wenruo52472552015-10-12 16:05:40 +08003428
Qu Wenruo263da812020-07-08 14:24:45 +08003429#define rbtree_iterate_from_safe(node, next, start) \
3430 for (node = start; node && ({ next = rb_next(node); 1;}); node = next)
3431
3432static int qgroup_unreserve_range(struct btrfs_inode *inode,
3433 struct extent_changeset *reserved, u64 start,
3434 u64 len)
3435{
3436 struct rb_node *node;
3437 struct rb_node *next;
Dan Carpenterf07728d2020-10-23 14:26:33 +03003438 struct ulist_node *entry;
Qu Wenruo263da812020-07-08 14:24:45 +08003439 int ret = 0;
3440
3441 node = reserved->range_changed.root.rb_node;
Dan Carpenterf07728d2020-10-23 14:26:33 +03003442 if (!node)
3443 return 0;
Qu Wenruo263da812020-07-08 14:24:45 +08003444 while (node) {
3445 entry = rb_entry(node, struct ulist_node, rb_node);
3446 if (entry->val < start)
3447 node = node->rb_right;
Qu Wenruo263da812020-07-08 14:24:45 +08003448 else
Dan Carpenterf07728d2020-10-23 14:26:33 +03003449 node = node->rb_left;
Qu Wenruo263da812020-07-08 14:24:45 +08003450 }
3451
Qu Wenruo263da812020-07-08 14:24:45 +08003452 if (entry->val > start && rb_prev(&entry->rb_node))
3453 entry = rb_entry(rb_prev(&entry->rb_node), struct ulist_node,
3454 rb_node);
3455
3456 rbtree_iterate_from_safe(node, next, &entry->rb_node) {
3457 u64 entry_start;
3458 u64 entry_end;
3459 u64 entry_len;
3460 int clear_ret;
3461
3462 entry = rb_entry(node, struct ulist_node, rb_node);
3463 entry_start = entry->val;
3464 entry_end = entry->aux;
3465 entry_len = entry_end - entry_start + 1;
3466
3467 if (entry_start >= start + len)
3468 break;
3469 if (entry_start + entry_len <= start)
3470 continue;
3471 /*
3472 * Now the entry is in [start, start + len), revert the
3473 * EXTENT_QGROUP_RESERVED bit.
3474 */
3475 clear_ret = clear_extent_bits(&inode->io_tree, entry_start,
3476 entry_end, EXTENT_QGROUP_RESERVED);
3477 if (!ret && clear_ret < 0)
3478 ret = clear_ret;
3479
3480 ulist_del(&reserved->range_changed, entry->val, entry->aux);
3481 if (likely(reserved->bytes_changed >= entry_len)) {
3482 reserved->bytes_changed -= entry_len;
3483 } else {
3484 WARN_ON(1);
3485 reserved->bytes_changed = 0;
3486 }
3487 }
3488
3489 return ret;
3490}
3491
Qu Wenruo52472552015-10-12 16:05:40 +08003492/*
Qu Wenruoc53e9652020-07-13 18:50:48 +08003493 * Try to free some space for qgroup.
Qu Wenruo52472552015-10-12 16:05:40 +08003494 *
Qu Wenruoc53e9652020-07-13 18:50:48 +08003495 * For qgroup, there are only 3 ways to free qgroup space:
3496 * - Flush nodatacow write
3497 * Any nodatacow write will free its reserved data space at run_delalloc_range().
3498 * In theory, we should only flush nodatacow inodes, but it's not yet
3499 * possible, so we need to flush the whole root.
Qu Wenruo52472552015-10-12 16:05:40 +08003500 *
Qu Wenruoc53e9652020-07-13 18:50:48 +08003501 * - Wait for ordered extents
3502 * When ordered extents are finished, their reserved metadata is finally
3503 * converted to per_trans status, which can be freed by later commit
3504 * transaction.
Qu Wenruo52472552015-10-12 16:05:40 +08003505 *
Qu Wenruoc53e9652020-07-13 18:50:48 +08003506 * - Commit transaction
3507 * This would free the meta_per_trans space.
3508 * In theory this shouldn't provide much space, but any more qgroup space
3509 * is needed.
Qu Wenruo52472552015-10-12 16:05:40 +08003510 */
Qu Wenruoc53e9652020-07-13 18:50:48 +08003511static int try_flush_qgroup(struct btrfs_root *root)
3512{
3513 struct btrfs_trans_handle *trans;
3514 int ret;
Qu Wenruo6f232772020-11-11 19:38:18 +08003515 bool can_commit = true;
Qu Wenruoc53e9652020-07-13 18:50:48 +08003516
3517 /*
3518 * We don't want to run flush again and again, so if there is a running
3519 * one, we won't try to start a new flush, but exit directly.
3520 */
3521 if (test_and_set_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state)) {
3522 wait_event(root->qgroup_flush_wait,
3523 !test_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state));
3524 return 0;
3525 }
3526
Qu Wenruo6f232772020-11-11 19:38:18 +08003527 /*
3528 * If current process holds a transaction, we shouldn't flush, as we
3529 * assume all space reservation happens before a transaction handle is
3530 * held.
3531 *
3532 * But there are cases like btrfs_delayed_item_reserve_metadata() where
3533 * we try to reserve space with one transction handle already held.
3534 * In that case we can't commit transaction, but at least try to end it
3535 * and hope the started data writes can free some space.
3536 */
3537 if (current->journal_info &&
3538 current->journal_info != BTRFS_SEND_TRANS_STUB)
3539 can_commit = false;
3540
Qu Wenruoc53e9652020-07-13 18:50:48 +08003541 ret = btrfs_start_delalloc_snapshot(root);
3542 if (ret < 0)
3543 goto out;
3544 btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
3545
3546 trans = btrfs_join_transaction(root);
3547 if (IS_ERR(trans)) {
3548 ret = PTR_ERR(trans);
3549 goto out;
3550 }
3551
Qu Wenruo6f232772020-11-11 19:38:18 +08003552 if (can_commit)
3553 ret = btrfs_commit_transaction(trans);
3554 else
3555 ret = btrfs_end_transaction(trans);
Qu Wenruoc53e9652020-07-13 18:50:48 +08003556out:
3557 clear_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state);
3558 wake_up(&root->qgroup_flush_wait);
3559 return ret;
3560}
3561
3562static int qgroup_reserve_data(struct btrfs_inode *inode,
Qu Wenruo364ecf32017-02-27 15:10:38 +08003563 struct extent_changeset **reserved_ret, u64 start,
3564 u64 len)
Qu Wenruo52472552015-10-12 16:05:40 +08003565{
Nikolay Borisov7661a3e2020-06-03 08:55:37 +03003566 struct btrfs_root *root = inode->root;
Qu Wenruo364ecf32017-02-27 15:10:38 +08003567 struct extent_changeset *reserved;
Qu Wenruo263da812020-07-08 14:24:45 +08003568 bool new_reserved = false;
Qu Wenruo364ecf32017-02-27 15:10:38 +08003569 u64 orig_reserved;
3570 u64 to_reserve;
Qu Wenruo52472552015-10-12 16:05:40 +08003571 int ret;
3572
Josef Bacikafcdd122016-09-02 15:40:02 -04003573 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &root->fs_info->flags) ||
Misono Tomohiro4fd786e2018-08-06 14:25:24 +09003574 !is_fstree(root->root_key.objectid) || len == 0)
Qu Wenruo52472552015-10-12 16:05:40 +08003575 return 0;
3576
Qu Wenruo364ecf32017-02-27 15:10:38 +08003577 /* @reserved parameter is mandatory for qgroup */
3578 if (WARN_ON(!reserved_ret))
3579 return -EINVAL;
3580 if (!*reserved_ret) {
Qu Wenruo263da812020-07-08 14:24:45 +08003581 new_reserved = true;
Qu Wenruo364ecf32017-02-27 15:10:38 +08003582 *reserved_ret = extent_changeset_alloc();
3583 if (!*reserved_ret)
3584 return -ENOMEM;
3585 }
3586 reserved = *reserved_ret;
3587 /* Record already reserved space */
3588 orig_reserved = reserved->bytes_changed;
Nikolay Borisov7661a3e2020-06-03 08:55:37 +03003589 ret = set_record_extent_bits(&inode->io_tree, start,
Qu Wenruo364ecf32017-02-27 15:10:38 +08003590 start + len -1, EXTENT_QGROUP_RESERVED, reserved);
3591
3592 /* Newly reserved space */
3593 to_reserve = reserved->bytes_changed - orig_reserved;
Nikolay Borisov7661a3e2020-06-03 08:55:37 +03003594 trace_btrfs_qgroup_reserve_data(&inode->vfs_inode, start, len,
Qu Wenruo364ecf32017-02-27 15:10:38 +08003595 to_reserve, QGROUP_RESERVE);
Qu Wenruo52472552015-10-12 16:05:40 +08003596 if (ret < 0)
Qu Wenruo263da812020-07-08 14:24:45 +08003597 goto out;
Qu Wenruodba21322017-12-12 15:34:25 +08003598 ret = qgroup_reserve(root, to_reserve, true, BTRFS_QGROUP_RSV_DATA);
Qu Wenruo52472552015-10-12 16:05:40 +08003599 if (ret < 0)
3600 goto cleanup;
3601
Qu Wenruo52472552015-10-12 16:05:40 +08003602 return ret;
3603
3604cleanup:
Qu Wenruo263da812020-07-08 14:24:45 +08003605 qgroup_unreserve_range(inode, reserved, start, len);
3606out:
3607 if (new_reserved) {
3608 extent_changeset_release(reserved);
3609 kfree(reserved);
3610 *reserved_ret = NULL;
3611 }
Qu Wenruo52472552015-10-12 16:05:40 +08003612 return ret;
3613}
Qu Wenruof695fdc2015-10-12 16:28:06 +08003614
Qu Wenruoc53e9652020-07-13 18:50:48 +08003615/*
3616 * Reserve qgroup space for range [start, start + len).
3617 *
3618 * This function will either reserve space from related qgroups or do nothing
3619 * if the range is already reserved.
3620 *
3621 * Return 0 for successful reservation
3622 * Return <0 for error (including -EQUOT)
3623 *
3624 * NOTE: This function may sleep for memory allocation, dirty page flushing and
3625 * commit transaction. So caller should not hold any dirty page locked.
3626 */
3627int btrfs_qgroup_reserve_data(struct btrfs_inode *inode,
3628 struct extent_changeset **reserved_ret, u64 start,
3629 u64 len)
3630{
3631 int ret;
3632
3633 ret = qgroup_reserve_data(inode, reserved_ret, start, len);
3634 if (ret <= 0 && ret != -EDQUOT)
3635 return ret;
3636
3637 ret = try_flush_qgroup(inode->root);
3638 if (ret < 0)
3639 return ret;
3640 return qgroup_reserve_data(inode, reserved_ret, start, len);
3641}
3642
Qu Wenruobc42bda2017-02-27 15:10:39 +08003643/* Free ranges specified by @reserved, normally in error path */
Nikolay Borisovdf2cfd12020-06-03 08:55:09 +03003644static int qgroup_free_reserved_data(struct btrfs_inode *inode,
Qu Wenruobc42bda2017-02-27 15:10:39 +08003645 struct extent_changeset *reserved, u64 start, u64 len)
3646{
Nikolay Borisovdf2cfd12020-06-03 08:55:09 +03003647 struct btrfs_root *root = inode->root;
Qu Wenruobc42bda2017-02-27 15:10:39 +08003648 struct ulist_node *unode;
3649 struct ulist_iterator uiter;
3650 struct extent_changeset changeset;
3651 int freed = 0;
3652 int ret;
3653
3654 extent_changeset_init(&changeset);
3655 len = round_up(start + len, root->fs_info->sectorsize);
3656 start = round_down(start, root->fs_info->sectorsize);
3657
3658 ULIST_ITER_INIT(&uiter);
3659 while ((unode = ulist_next(&reserved->range_changed, &uiter))) {
3660 u64 range_start = unode->val;
3661 /* unode->aux is the inclusive end */
3662 u64 range_len = unode->aux - range_start + 1;
3663 u64 free_start;
3664 u64 free_len;
3665
3666 extent_changeset_release(&changeset);
3667
3668 /* Only free range in range [start, start + len) */
3669 if (range_start >= start + len ||
3670 range_start + range_len <= start)
3671 continue;
3672 free_start = max(range_start, start);
3673 free_len = min(start + len, range_start + range_len) -
3674 free_start;
3675 /*
3676 * TODO: To also modify reserved->ranges_reserved to reflect
3677 * the modification.
3678 *
3679 * However as long as we free qgroup reserved according to
3680 * EXTENT_QGROUP_RESERVED, we won't double free.
3681 * So not need to rush.
3682 */
Nikolay Borisovdf2cfd12020-06-03 08:55:09 +03003683 ret = clear_record_extent_bits(&inode->io_tree, free_start,
3684 free_start + free_len - 1,
Qu Wenruobc42bda2017-02-27 15:10:39 +08003685 EXTENT_QGROUP_RESERVED, &changeset);
3686 if (ret < 0)
3687 goto out;
3688 freed += changeset.bytes_changed;
3689 }
Misono Tomohiro4fd786e2018-08-06 14:25:24 +09003690 btrfs_qgroup_free_refroot(root->fs_info, root->root_key.objectid, freed,
Qu Wenruod4e5c922017-12-12 15:34:23 +08003691 BTRFS_QGROUP_RSV_DATA);
Qu Wenruobc42bda2017-02-27 15:10:39 +08003692 ret = freed;
3693out:
3694 extent_changeset_release(&changeset);
3695 return ret;
3696}
3697
Nikolay Borisov8769af92020-06-03 08:55:10 +03003698static int __btrfs_qgroup_release_data(struct btrfs_inode *inode,
Qu Wenruobc42bda2017-02-27 15:10:39 +08003699 struct extent_changeset *reserved, u64 start, u64 len,
3700 int free)
Qu Wenruof695fdc2015-10-12 16:28:06 +08003701{
3702 struct extent_changeset changeset;
Qu Wenruo81fb6f72015-09-28 16:57:53 +08003703 int trace_op = QGROUP_RELEASE;
Qu Wenruof695fdc2015-10-12 16:28:06 +08003704 int ret;
3705
Nikolay Borisov8769af92020-06-03 08:55:10 +03003706 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &inode->root->fs_info->flags))
Qu Wenruo3628b4c2018-10-09 14:36:45 +08003707 return 0;
3708
Qu Wenruobc42bda2017-02-27 15:10:39 +08003709 /* In release case, we shouldn't have @reserved */
3710 WARN_ON(!free && reserved);
3711 if (free && reserved)
Nikolay Borisov8769af92020-06-03 08:55:10 +03003712 return qgroup_free_reserved_data(inode, reserved, start, len);
Qu Wenruo364ecf32017-02-27 15:10:38 +08003713 extent_changeset_init(&changeset);
Nikolay Borisov8769af92020-06-03 08:55:10 +03003714 ret = clear_record_extent_bits(&inode->io_tree, start, start + len -1,
3715 EXTENT_QGROUP_RESERVED, &changeset);
Qu Wenruof695fdc2015-10-12 16:28:06 +08003716 if (ret < 0)
3717 goto out;
3718
Qu Wenruod51ea5d2017-03-13 15:52:09 +08003719 if (free)
3720 trace_op = QGROUP_FREE;
Nikolay Borisov8769af92020-06-03 08:55:10 +03003721 trace_btrfs_qgroup_release_data(&inode->vfs_inode, start, len,
Qu Wenruod51ea5d2017-03-13 15:52:09 +08003722 changeset.bytes_changed, trace_op);
3723 if (free)
Nikolay Borisov8769af92020-06-03 08:55:10 +03003724 btrfs_qgroup_free_refroot(inode->root->fs_info,
3725 inode->root->root_key.objectid,
Qu Wenruod4e5c922017-12-12 15:34:23 +08003726 changeset.bytes_changed, BTRFS_QGROUP_RSV_DATA);
Qu Wenruo7bc329c2017-02-27 15:10:36 +08003727 ret = changeset.bytes_changed;
Qu Wenruof695fdc2015-10-12 16:28:06 +08003728out:
Qu Wenruo364ecf32017-02-27 15:10:38 +08003729 extent_changeset_release(&changeset);
Qu Wenruof695fdc2015-10-12 16:28:06 +08003730 return ret;
3731}
3732
3733/*
3734 * Free a reserved space range from io_tree and related qgroups
3735 *
3736 * Should be called when a range of pages get invalidated before reaching disk.
3737 * Or for error cleanup case.
Qu Wenruobc42bda2017-02-27 15:10:39 +08003738 * if @reserved is given, only reserved range in [@start, @start + @len) will
3739 * be freed.
Qu Wenruof695fdc2015-10-12 16:28:06 +08003740 *
3741 * For data written to disk, use btrfs_qgroup_release_data().
3742 *
3743 * NOTE: This function may sleep for memory allocation.
3744 */
Nikolay Borisov8b8a9792020-06-03 08:55:11 +03003745int btrfs_qgroup_free_data(struct btrfs_inode *inode,
Qu Wenruobc42bda2017-02-27 15:10:39 +08003746 struct extent_changeset *reserved, u64 start, u64 len)
Qu Wenruof695fdc2015-10-12 16:28:06 +08003747{
Nikolay Borisov8b8a9792020-06-03 08:55:11 +03003748 return __btrfs_qgroup_release_data(inode, reserved, start, len, 1);
Qu Wenruof695fdc2015-10-12 16:28:06 +08003749}
3750
3751/*
3752 * Release a reserved space range from io_tree only.
3753 *
3754 * Should be called when a range of pages get written to disk and corresponding
3755 * FILE_EXTENT is inserted into corresponding root.
3756 *
3757 * Since new qgroup accounting framework will only update qgroup numbers at
3758 * commit_transaction() time, its reserved space shouldn't be freed from
3759 * related qgroups.
3760 *
3761 * But we should release the range from io_tree, to allow further write to be
3762 * COWed.
3763 *
3764 * NOTE: This function may sleep for memory allocation.
3765 */
Nikolay Borisov72b7d152020-06-03 08:55:18 +03003766int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len)
Qu Wenruof695fdc2015-10-12 16:28:06 +08003767{
Nikolay Borisov72b7d152020-06-03 08:55:18 +03003768 return __btrfs_qgroup_release_data(inode, NULL, start, len, 0);
Qu Wenruof695fdc2015-10-12 16:28:06 +08003769}
Qu Wenruo55eeaf02015-09-08 17:08:38 +08003770
Qu Wenruo82874752017-12-12 15:34:34 +08003771static void add_root_meta_rsv(struct btrfs_root *root, int num_bytes,
3772 enum btrfs_qgroup_rsv_type type)
3773{
3774 if (type != BTRFS_QGROUP_RSV_META_PREALLOC &&
3775 type != BTRFS_QGROUP_RSV_META_PERTRANS)
3776 return;
3777 if (num_bytes == 0)
3778 return;
3779
3780 spin_lock(&root->qgroup_meta_rsv_lock);
3781 if (type == BTRFS_QGROUP_RSV_META_PREALLOC)
3782 root->qgroup_meta_rsv_prealloc += num_bytes;
3783 else
3784 root->qgroup_meta_rsv_pertrans += num_bytes;
3785 spin_unlock(&root->qgroup_meta_rsv_lock);
3786}
3787
3788static int sub_root_meta_rsv(struct btrfs_root *root, int num_bytes,
3789 enum btrfs_qgroup_rsv_type type)
3790{
3791 if (type != BTRFS_QGROUP_RSV_META_PREALLOC &&
3792 type != BTRFS_QGROUP_RSV_META_PERTRANS)
3793 return 0;
3794 if (num_bytes == 0)
3795 return 0;
3796
3797 spin_lock(&root->qgroup_meta_rsv_lock);
3798 if (type == BTRFS_QGROUP_RSV_META_PREALLOC) {
3799 num_bytes = min_t(u64, root->qgroup_meta_rsv_prealloc,
3800 num_bytes);
3801 root->qgroup_meta_rsv_prealloc -= num_bytes;
3802 } else {
3803 num_bytes = min_t(u64, root->qgroup_meta_rsv_pertrans,
3804 num_bytes);
3805 root->qgroup_meta_rsv_pertrans -= num_bytes;
3806 }
3807 spin_unlock(&root->qgroup_meta_rsv_lock);
3808 return num_bytes;
3809}
3810
Qu Wenruoc53e9652020-07-13 18:50:48 +08003811static int qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
Qu Wenruo733e03a2017-12-12 15:34:29 +08003812 enum btrfs_qgroup_rsv_type type, bool enforce)
Qu Wenruo55eeaf02015-09-08 17:08:38 +08003813{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003814 struct btrfs_fs_info *fs_info = root->fs_info;
Qu Wenruo55eeaf02015-09-08 17:08:38 +08003815 int ret;
3816
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003817 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) ||
Misono Tomohiro4fd786e2018-08-06 14:25:24 +09003818 !is_fstree(root->root_key.objectid) || num_bytes == 0)
Qu Wenruo55eeaf02015-09-08 17:08:38 +08003819 return 0;
3820
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003821 BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
Qu Wenruofd2b0072019-10-17 10:38:36 +08003822 trace_qgroup_meta_reserve(root, (s64)num_bytes, type);
Qu Wenruo733e03a2017-12-12 15:34:29 +08003823 ret = qgroup_reserve(root, num_bytes, enforce, type);
Qu Wenruo55eeaf02015-09-08 17:08:38 +08003824 if (ret < 0)
3825 return ret;
Qu Wenruo82874752017-12-12 15:34:34 +08003826 /*
3827 * Record what we have reserved into root.
3828 *
3829 * To avoid quota disabled->enabled underflow.
3830 * In that case, we may try to free space we haven't reserved
3831 * (since quota was disabled), so record what we reserved into root.
3832 * And ensure later release won't underflow this number.
3833 */
3834 add_root_meta_rsv(root, num_bytes, type);
Qu Wenruo55eeaf02015-09-08 17:08:38 +08003835 return ret;
3836}
3837
Qu Wenruoc53e9652020-07-13 18:50:48 +08003838int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
3839 enum btrfs_qgroup_rsv_type type, bool enforce)
3840{
3841 int ret;
3842
3843 ret = qgroup_reserve_meta(root, num_bytes, type, enforce);
3844 if (ret <= 0 && ret != -EDQUOT)
3845 return ret;
3846
3847 ret = try_flush_qgroup(root);
3848 if (ret < 0)
3849 return ret;
3850 return qgroup_reserve_meta(root, num_bytes, type, enforce);
3851}
3852
Qu Wenruo733e03a2017-12-12 15:34:29 +08003853void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root)
Qu Wenruo55eeaf02015-09-08 17:08:38 +08003854{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003855 struct btrfs_fs_info *fs_info = root->fs_info;
Qu Wenruo55eeaf02015-09-08 17:08:38 +08003856
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003857 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) ||
Misono Tomohiro4fd786e2018-08-06 14:25:24 +09003858 !is_fstree(root->root_key.objectid))
Qu Wenruo55eeaf02015-09-08 17:08:38 +08003859 return;
3860
Qu Wenruoe1211d02017-12-12 15:34:30 +08003861 /* TODO: Update trace point to handle such free */
Qu Wenruo4ee0d882017-12-12 15:34:35 +08003862 trace_qgroup_meta_free_all_pertrans(root);
Qu Wenruoe1211d02017-12-12 15:34:30 +08003863 /* Special value -1 means to free all reserved space */
Misono Tomohiro4fd786e2018-08-06 14:25:24 +09003864 btrfs_qgroup_free_refroot(fs_info, root->root_key.objectid, (u64)-1,
Qu Wenruo733e03a2017-12-12 15:34:29 +08003865 BTRFS_QGROUP_RSV_META_PERTRANS);
Qu Wenruo55eeaf02015-09-08 17:08:38 +08003866}
3867
Qu Wenruo733e03a2017-12-12 15:34:29 +08003868void __btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes,
3869 enum btrfs_qgroup_rsv_type type)
Qu Wenruo55eeaf02015-09-08 17:08:38 +08003870{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003871 struct btrfs_fs_info *fs_info = root->fs_info;
3872
3873 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) ||
Misono Tomohiro4fd786e2018-08-06 14:25:24 +09003874 !is_fstree(root->root_key.objectid))
Qu Wenruo55eeaf02015-09-08 17:08:38 +08003875 return;
3876
Qu Wenruo82874752017-12-12 15:34:34 +08003877 /*
3878 * reservation for META_PREALLOC can happen before quota is enabled,
3879 * which can lead to underflow.
3880 * Here ensure we will only free what we really have reserved.
3881 */
3882 num_bytes = sub_root_meta_rsv(root, num_bytes, type);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003883 BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
Qu Wenruofd2b0072019-10-17 10:38:36 +08003884 trace_qgroup_meta_reserve(root, -(s64)num_bytes, type);
Misono Tomohiro4fd786e2018-08-06 14:25:24 +09003885 btrfs_qgroup_free_refroot(fs_info, root->root_key.objectid,
3886 num_bytes, type);
Qu Wenruo55eeaf02015-09-08 17:08:38 +08003887}
Qu Wenruo56fa9d02015-10-13 09:53:10 +08003888
Qu Wenruo64cfaef2017-12-12 15:34:31 +08003889static void qgroup_convert_meta(struct btrfs_fs_info *fs_info, u64 ref_root,
3890 int num_bytes)
3891{
Qu Wenruo64cfaef2017-12-12 15:34:31 +08003892 struct btrfs_qgroup *qgroup;
3893 struct ulist_node *unode;
3894 struct ulist_iterator uiter;
3895 int ret = 0;
3896
3897 if (num_bytes == 0)
3898 return;
Marcos Paulo de Souzae3b0edd2019-11-25 21:58:50 -03003899 if (!fs_info->quota_root)
Qu Wenruo64cfaef2017-12-12 15:34:31 +08003900 return;
3901
3902 spin_lock(&fs_info->qgroup_lock);
3903 qgroup = find_qgroup_rb(fs_info, ref_root);
3904 if (!qgroup)
3905 goto out;
3906 ulist_reinit(fs_info->qgroup_ulist);
3907 ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
David Sterbaa1840b52018-03-27 19:04:50 +02003908 qgroup_to_aux(qgroup), GFP_ATOMIC);
Qu Wenruo64cfaef2017-12-12 15:34:31 +08003909 if (ret < 0)
3910 goto out;
3911 ULIST_ITER_INIT(&uiter);
3912 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
3913 struct btrfs_qgroup *qg;
3914 struct btrfs_qgroup_list *glist;
3915
3916 qg = unode_aux_to_qgroup(unode);
3917
3918 qgroup_rsv_release(fs_info, qg, num_bytes,
3919 BTRFS_QGROUP_RSV_META_PREALLOC);
3920 qgroup_rsv_add(fs_info, qg, num_bytes,
3921 BTRFS_QGROUP_RSV_META_PERTRANS);
3922 list_for_each_entry(glist, &qg->groups, next_group) {
3923 ret = ulist_add(fs_info->qgroup_ulist,
3924 glist->group->qgroupid,
David Sterbaa1840b52018-03-27 19:04:50 +02003925 qgroup_to_aux(glist->group), GFP_ATOMIC);
Qu Wenruo64cfaef2017-12-12 15:34:31 +08003926 if (ret < 0)
3927 goto out;
3928 }
3929 }
3930out:
3931 spin_unlock(&fs_info->qgroup_lock);
3932}
3933
3934void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes)
3935{
3936 struct btrfs_fs_info *fs_info = root->fs_info;
3937
3938 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) ||
Misono Tomohiro4fd786e2018-08-06 14:25:24 +09003939 !is_fstree(root->root_key.objectid))
Qu Wenruo64cfaef2017-12-12 15:34:31 +08003940 return;
Qu Wenruo82874752017-12-12 15:34:34 +08003941 /* Same as btrfs_qgroup_free_meta_prealloc() */
3942 num_bytes = sub_root_meta_rsv(root, num_bytes,
3943 BTRFS_QGROUP_RSV_META_PREALLOC);
Qu Wenruo4ee0d882017-12-12 15:34:35 +08003944 trace_qgroup_meta_convert(root, num_bytes);
Misono Tomohiro4fd786e2018-08-06 14:25:24 +09003945 qgroup_convert_meta(fs_info, root->root_key.objectid, num_bytes);
Qu Wenruo64cfaef2017-12-12 15:34:31 +08003946}
3947
Qu Wenruo56fa9d02015-10-13 09:53:10 +08003948/*
Nicholas D Steeves01327612016-05-19 21:18:45 -04003949 * Check qgroup reserved space leaking, normally at destroy inode
Qu Wenruo56fa9d02015-10-13 09:53:10 +08003950 * time
3951 */
Nikolay Borisovcfdd4592020-06-03 08:55:46 +03003952void btrfs_qgroup_check_reserved_leak(struct btrfs_inode *inode)
Qu Wenruo56fa9d02015-10-13 09:53:10 +08003953{
3954 struct extent_changeset changeset;
3955 struct ulist_node *unode;
3956 struct ulist_iterator iter;
3957 int ret;
3958
Qu Wenruo364ecf32017-02-27 15:10:38 +08003959 extent_changeset_init(&changeset);
Nikolay Borisovcfdd4592020-06-03 08:55:46 +03003960 ret = clear_record_extent_bits(&inode->io_tree, 0, (u64)-1,
David Sterbaf734c442016-04-26 23:54:39 +02003961 EXTENT_QGROUP_RESERVED, &changeset);
Qu Wenruo56fa9d02015-10-13 09:53:10 +08003962
3963 WARN_ON(ret < 0);
3964 if (WARN_ON(changeset.bytes_changed)) {
3965 ULIST_ITER_INIT(&iter);
David Sterba53d32352017-02-13 13:42:29 +01003966 while ((unode = ulist_next(&changeset.range_changed, &iter))) {
Nikolay Borisovcfdd4592020-06-03 08:55:46 +03003967 btrfs_warn(inode->root->fs_info,
3968 "leaking qgroup reserved space, ino: %llu, start: %llu, end: %llu",
3969 btrfs_ino(inode), unode->val, unode->aux);
Qu Wenruo56fa9d02015-10-13 09:53:10 +08003970 }
Nikolay Borisovcfdd4592020-06-03 08:55:46 +03003971 btrfs_qgroup_free_refroot(inode->root->fs_info,
3972 inode->root->root_key.objectid,
Qu Wenruod4e5c922017-12-12 15:34:23 +08003973 changeset.bytes_changed, BTRFS_QGROUP_RSV_DATA);
David Sterba0b08e1f2017-02-13 14:24:35 +01003974
Qu Wenruo56fa9d02015-10-13 09:53:10 +08003975 }
Qu Wenruo364ecf32017-02-27 15:10:38 +08003976 extent_changeset_release(&changeset);
Qu Wenruo56fa9d02015-10-13 09:53:10 +08003977}
Qu Wenruo370a11b2019-01-23 15:15:16 +08003978
3979void btrfs_qgroup_init_swapped_blocks(
3980 struct btrfs_qgroup_swapped_blocks *swapped_blocks)
3981{
3982 int i;
3983
3984 spin_lock_init(&swapped_blocks->lock);
3985 for (i = 0; i < BTRFS_MAX_LEVEL; i++)
3986 swapped_blocks->blocks[i] = RB_ROOT;
3987 swapped_blocks->swapped = false;
3988}
3989
3990/*
3991 * Delete all swapped blocks record of @root.
3992 * Every record here means we skipped a full subtree scan for qgroup.
3993 *
3994 * Gets called when committing one transaction.
3995 */
3996void btrfs_qgroup_clean_swapped_blocks(struct btrfs_root *root)
3997{
3998 struct btrfs_qgroup_swapped_blocks *swapped_blocks;
3999 int i;
4000
4001 swapped_blocks = &root->swapped_blocks;
4002
4003 spin_lock(&swapped_blocks->lock);
4004 if (!swapped_blocks->swapped)
4005 goto out;
4006 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
4007 struct rb_root *cur_root = &swapped_blocks->blocks[i];
4008 struct btrfs_qgroup_swapped_block *entry;
4009 struct btrfs_qgroup_swapped_block *next;
4010
4011 rbtree_postorder_for_each_entry_safe(entry, next, cur_root,
4012 node)
4013 kfree(entry);
4014 swapped_blocks->blocks[i] = RB_ROOT;
4015 }
4016 swapped_blocks->swapped = false;
4017out:
4018 spin_unlock(&swapped_blocks->lock);
4019}
4020
4021/*
4022 * Add subtree roots record into @subvol_root.
4023 *
4024 * @subvol_root: tree root of the subvolume tree get swapped
4025 * @bg: block group under balance
4026 * @subvol_parent/slot: pointer to the subtree root in subvolume tree
4027 * @reloc_parent/slot: pointer to the subtree root in reloc tree
4028 * BOTH POINTERS ARE BEFORE TREE SWAP
4029 * @last_snapshot: last snapshot generation of the subvolume tree
4030 */
4031int btrfs_qgroup_add_swapped_blocks(struct btrfs_trans_handle *trans,
4032 struct btrfs_root *subvol_root,
David Sterba32da53862019-10-29 19:20:18 +01004033 struct btrfs_block_group *bg,
Qu Wenruo370a11b2019-01-23 15:15:16 +08004034 struct extent_buffer *subvol_parent, int subvol_slot,
4035 struct extent_buffer *reloc_parent, int reloc_slot,
4036 u64 last_snapshot)
4037{
4038 struct btrfs_fs_info *fs_info = subvol_root->fs_info;
4039 struct btrfs_qgroup_swapped_blocks *blocks = &subvol_root->swapped_blocks;
4040 struct btrfs_qgroup_swapped_block *block;
4041 struct rb_node **cur;
4042 struct rb_node *parent = NULL;
4043 int level = btrfs_header_level(subvol_parent) - 1;
4044 int ret = 0;
4045
4046 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
4047 return 0;
4048
4049 if (btrfs_node_ptr_generation(subvol_parent, subvol_slot) >
4050 btrfs_node_ptr_generation(reloc_parent, reloc_slot)) {
4051 btrfs_err_rl(fs_info,
4052 "%s: bad parameter order, subvol_gen=%llu reloc_gen=%llu",
4053 __func__,
4054 btrfs_node_ptr_generation(subvol_parent, subvol_slot),
4055 btrfs_node_ptr_generation(reloc_parent, reloc_slot));
4056 return -EUCLEAN;
4057 }
4058
4059 block = kmalloc(sizeof(*block), GFP_NOFS);
4060 if (!block) {
4061 ret = -ENOMEM;
4062 goto out;
4063 }
4064
4065 /*
4066 * @reloc_parent/slot is still before swap, while @block is going to
4067 * record the bytenr after swap, so we do the swap here.
4068 */
4069 block->subvol_bytenr = btrfs_node_blockptr(reloc_parent, reloc_slot);
4070 block->subvol_generation = btrfs_node_ptr_generation(reloc_parent,
4071 reloc_slot);
4072 block->reloc_bytenr = btrfs_node_blockptr(subvol_parent, subvol_slot);
4073 block->reloc_generation = btrfs_node_ptr_generation(subvol_parent,
4074 subvol_slot);
4075 block->last_snapshot = last_snapshot;
4076 block->level = level;
Qu Wenruo57949d02019-05-21 19:28:08 +08004077
4078 /*
4079 * If we have bg == NULL, we're called from btrfs_recover_relocation(),
4080 * no one else can modify tree blocks thus we qgroup will not change
4081 * no matter the value of trace_leaf.
4082 */
4083 if (bg && bg->flags & BTRFS_BLOCK_GROUP_DATA)
Qu Wenruo370a11b2019-01-23 15:15:16 +08004084 block->trace_leaf = true;
4085 else
4086 block->trace_leaf = false;
4087 btrfs_node_key_to_cpu(reloc_parent, &block->first_key, reloc_slot);
4088
4089 /* Insert @block into @blocks */
4090 spin_lock(&blocks->lock);
4091 cur = &blocks->blocks[level].rb_node;
4092 while (*cur) {
4093 struct btrfs_qgroup_swapped_block *entry;
4094
4095 parent = *cur;
4096 entry = rb_entry(parent, struct btrfs_qgroup_swapped_block,
4097 node);
4098
4099 if (entry->subvol_bytenr < block->subvol_bytenr) {
4100 cur = &(*cur)->rb_left;
4101 } else if (entry->subvol_bytenr > block->subvol_bytenr) {
4102 cur = &(*cur)->rb_right;
4103 } else {
4104 if (entry->subvol_generation !=
4105 block->subvol_generation ||
4106 entry->reloc_bytenr != block->reloc_bytenr ||
4107 entry->reloc_generation !=
4108 block->reloc_generation) {
4109 /*
4110 * Duplicated but mismatch entry found.
4111 * Shouldn't happen.
4112 *
4113 * Marking qgroup inconsistent should be enough
4114 * for end users.
4115 */
4116 WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
4117 ret = -EEXIST;
4118 }
4119 kfree(block);
4120 goto out_unlock;
4121 }
4122 }
4123 rb_link_node(&block->node, parent, cur);
4124 rb_insert_color(&block->node, &blocks->blocks[level]);
4125 blocks->swapped = true;
4126out_unlock:
4127 spin_unlock(&blocks->lock);
4128out:
4129 if (ret < 0)
4130 fs_info->qgroup_flags |=
4131 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
4132 return ret;
4133}
Qu Wenruof616f5c2019-01-23 15:15:17 +08004134
4135/*
4136 * Check if the tree block is a subtree root, and if so do the needed
4137 * delayed subtree trace for qgroup.
4138 *
4139 * This is called during btrfs_cow_block().
4140 */
4141int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans,
4142 struct btrfs_root *root,
4143 struct extent_buffer *subvol_eb)
4144{
4145 struct btrfs_fs_info *fs_info = root->fs_info;
4146 struct btrfs_qgroup_swapped_blocks *blocks = &root->swapped_blocks;
4147 struct btrfs_qgroup_swapped_block *block;
4148 struct extent_buffer *reloc_eb = NULL;
4149 struct rb_node *node;
4150 bool found = false;
4151 bool swapped = false;
4152 int level = btrfs_header_level(subvol_eb);
4153 int ret = 0;
4154 int i;
4155
4156 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
4157 return 0;
4158 if (!is_fstree(root->root_key.objectid) || !root->reloc_root)
4159 return 0;
4160
4161 spin_lock(&blocks->lock);
4162 if (!blocks->swapped) {
4163 spin_unlock(&blocks->lock);
4164 return 0;
4165 }
4166 node = blocks->blocks[level].rb_node;
4167
4168 while (node) {
4169 block = rb_entry(node, struct btrfs_qgroup_swapped_block, node);
4170 if (block->subvol_bytenr < subvol_eb->start) {
4171 node = node->rb_left;
4172 } else if (block->subvol_bytenr > subvol_eb->start) {
4173 node = node->rb_right;
4174 } else {
4175 found = true;
4176 break;
4177 }
4178 }
4179 if (!found) {
4180 spin_unlock(&blocks->lock);
4181 goto out;
4182 }
4183 /* Found one, remove it from @blocks first and update blocks->swapped */
4184 rb_erase(&block->node, &blocks->blocks[level]);
4185 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
4186 if (RB_EMPTY_ROOT(&blocks->blocks[i])) {
4187 swapped = true;
4188 break;
4189 }
4190 }
4191 blocks->swapped = swapped;
4192 spin_unlock(&blocks->lock);
4193
4194 /* Read out reloc subtree root */
4195 reloc_eb = read_tree_block(fs_info, block->reloc_bytenr,
4196 block->reloc_generation, block->level,
4197 &block->first_key);
4198 if (IS_ERR(reloc_eb)) {
4199 ret = PTR_ERR(reloc_eb);
4200 reloc_eb = NULL;
4201 goto free_out;
4202 }
4203 if (!extent_buffer_uptodate(reloc_eb)) {
4204 ret = -EIO;
4205 goto free_out;
4206 }
4207
4208 ret = qgroup_trace_subtree_swap(trans, reloc_eb, subvol_eb,
4209 block->last_snapshot, block->trace_leaf);
4210free_out:
4211 kfree(block);
4212 free_extent_buffer(reloc_eb);
4213out:
4214 if (ret < 0) {
4215 btrfs_err_rl(fs_info,
4216 "failed to account subtree at bytenr %llu: %d",
4217 subvol_eb->start, ret);
4218 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
4219 }
4220 return ret;
4221}
Jeff Mahoney81f7eb02020-02-11 15:25:37 +08004222
4223void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans)
4224{
4225 struct btrfs_qgroup_extent_record *entry;
4226 struct btrfs_qgroup_extent_record *next;
4227 struct rb_root *root;
4228
4229 root = &trans->delayed_refs.dirty_extent_root;
4230 rbtree_postorder_for_each_entry_safe(entry, next, root, node) {
4231 ulist_free(entry->old_roots);
4232 kfree(entry);
4233 }
4234}