David Sterba | 9888c34 | 2018-04-03 19:16:55 +0200 | [diff] [blame] | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
Josef Bacik | fcebe45 | 2014-05-13 17:30:47 -0700 | [diff] [blame] | 2 | /* |
| 3 | * Copyright (C) 2014 Facebook. All rights reserved. |
Josef Bacik | fcebe45 | 2014-05-13 17:30:47 -0700 | [diff] [blame] | 4 | */ |
| 5 | |
David Sterba | 9888c34 | 2018-04-03 19:16:55 +0200 | [diff] [blame] | 6 | #ifndef BTRFS_QGROUP_H |
| 7 | #define BTRFS_QGROUP_H |
Josef Bacik | fcebe45 | 2014-05-13 17:30:47 -0700 | [diff] [blame] | 8 | |
Qu Wenruo | 3368d00 | 2015-04-16 14:34:17 +0800 | [diff] [blame] | 9 | #include "ulist.h" |
| 10 | #include "delayed-ref.h" |
| 11 | |
Josef Bacik | fcebe45 | 2014-05-13 17:30:47 -0700 | [diff] [blame] | 12 | /* |
Qu Wenruo | 1d2beaa | 2016-10-18 09:31:26 +0800 | [diff] [blame] | 13 | * Btrfs qgroup overview |
| 14 | * |
| 15 | * Btrfs qgroup splits into 3 main part: |
| 16 | * 1) Reserve |
| 17 | * Reserve metadata/data space for incoming operations |
| 18 | * Affect how qgroup limit works |
| 19 | * |
| 20 | * 2) Trace |
| 21 | * Tell btrfs qgroup to trace dirty extents. |
| 22 | * |
| 23 | * Dirty extents including: |
| 24 | * - Newly allocated extents |
| 25 | * - Extents going to be deleted (in this trans) |
| 26 | * - Extents whose owner is going to be modified |
| 27 | * |
| 28 | * This is the main part affects whether qgroup numbers will stay |
| 29 | * consistent. |
| 30 | * Btrfs qgroup can trace clean extents and won't cause any problem, |
| 31 | * but it will consume extra CPU time, it should be avoided if possible. |
| 32 | * |
| 33 | * 3) Account |
| 34 | * Btrfs qgroup will updates its numbers, based on dirty extents traced |
| 35 | * in previous step. |
| 36 | * |
| 37 | * Normally at qgroup rescan and transaction commit time. |
| 38 | */ |
| 39 | |
| 40 | /* |
Qu Wenruo | 3368d00 | 2015-04-16 14:34:17 +0800 | [diff] [blame] | 41 | * Record a dirty extent, and info qgroup to update quota on it |
| 42 | * TODO: Use kmem cache to alloc it. |
| 43 | */ |
| 44 | struct btrfs_qgroup_extent_record { |
| 45 | struct rb_node node; |
| 46 | u64 bytenr; |
| 47 | u64 num_bytes; |
| 48 | struct ulist *old_roots; |
| 49 | }; |
| 50 | |
Qu Wenruo | 733e03a | 2017-12-12 15:34:29 +0800 | [diff] [blame] | 51 | /* |
| 52 | * Qgroup reservation types: |
| 53 | * |
| 54 | * DATA: |
| 55 | * space reserved for data |
| 56 | * |
| 57 | * META_PERTRANS: |
| 58 | * Space reserved for metadata (per-transaction) |
| 59 | * Due to the fact that qgroup data is only updated at transaction commit |
| 60 | * time, reserved space for metadata must be kept until transaction |
| 61 | * commits. |
| 62 | * Any metadata reserved that are used in btrfs_start_transaction() should |
| 63 | * be of this type. |
| 64 | * |
| 65 | * META_PREALLOC: |
| 66 | * There are cases where metadata space is reserved before starting |
| 67 | * transaction, and then btrfs_join_transaction() to get a trans handle. |
| 68 | * Any metadata reserved for such usage should be of this type. |
| 69 | * And after join_transaction() part (or all) of such reservation should |
| 70 | * be converted into META_PERTRANS. |
| 71 | */ |
Qu Wenruo | d4e5c92 | 2017-12-12 15:34:23 +0800 | [diff] [blame] | 72 | enum btrfs_qgroup_rsv_type { |
| 73 | BTRFS_QGROUP_RSV_DATA = 0, |
Qu Wenruo | 733e03a | 2017-12-12 15:34:29 +0800 | [diff] [blame] | 74 | BTRFS_QGROUP_RSV_META_PERTRANS, |
| 75 | BTRFS_QGROUP_RSV_META_PREALLOC, |
Qu Wenruo | d4e5c92 | 2017-12-12 15:34:23 +0800 | [diff] [blame] | 76 | BTRFS_QGROUP_RSV_LAST, |
| 77 | }; |
| 78 | |
| 79 | /* |
| 80 | * Represents how many bytes we have reserved for this qgroup. |
| 81 | * |
| 82 | * Each type should have different reservation behavior. |
| 83 | * E.g, data follows its io_tree flag modification, while |
| 84 | * *currently* meta is just reserve-and-clear during transcation. |
| 85 | * |
| 86 | * TODO: Add new type for reservation which can survive transaction commit. |
| 87 | * Currect metadata reservation behavior is not suitable for such case. |
| 88 | */ |
| 89 | struct btrfs_qgroup_rsv { |
| 90 | u64 values[BTRFS_QGROUP_RSV_LAST]; |
| 91 | }; |
| 92 | |
Qu Wenruo | 81fb6f7 | 2015-09-28 16:57:53 +0800 | [diff] [blame] | 93 | /* |
Qu Wenruo | 3159fe7 | 2017-03-13 15:52:08 +0800 | [diff] [blame] | 94 | * one struct for each qgroup, organized in fs_info->qgroup_tree. |
| 95 | */ |
| 96 | struct btrfs_qgroup { |
| 97 | u64 qgroupid; |
| 98 | |
| 99 | /* |
| 100 | * state |
| 101 | */ |
| 102 | u64 rfer; /* referenced */ |
| 103 | u64 rfer_cmpr; /* referenced compressed */ |
| 104 | u64 excl; /* exclusive */ |
| 105 | u64 excl_cmpr; /* exclusive compressed */ |
| 106 | |
| 107 | /* |
| 108 | * limits |
| 109 | */ |
| 110 | u64 lim_flags; /* which limits are set */ |
| 111 | u64 max_rfer; |
| 112 | u64 max_excl; |
| 113 | u64 rsv_rfer; |
| 114 | u64 rsv_excl; |
| 115 | |
| 116 | /* |
| 117 | * reservation tracking |
| 118 | */ |
Qu Wenruo | d4e5c92 | 2017-12-12 15:34:23 +0800 | [diff] [blame] | 119 | struct btrfs_qgroup_rsv rsv; |
Qu Wenruo | 3159fe7 | 2017-03-13 15:52:08 +0800 | [diff] [blame] | 120 | |
| 121 | /* |
| 122 | * lists |
| 123 | */ |
| 124 | struct list_head groups; /* groups this group is member of */ |
| 125 | struct list_head members; /* groups that are members of this group */ |
| 126 | struct list_head dirty; /* dirty groups */ |
| 127 | struct rb_node node; /* tree of qgroups */ |
| 128 | |
| 129 | /* |
| 130 | * temp variables for accounting operations |
| 131 | * Refer to qgroup_shared_accounting() for details. |
| 132 | */ |
| 133 | u64 old_refcnt; |
| 134 | u64 new_refcnt; |
| 135 | }; |
| 136 | |
| 137 | /* |
Qu Wenruo | 81fb6f7 | 2015-09-28 16:57:53 +0800 | [diff] [blame] | 138 | * For qgroup event trace points only |
| 139 | */ |
| 140 | #define QGROUP_RESERVE (1<<0) |
| 141 | #define QGROUP_RELEASE (1<<1) |
| 142 | #define QGROUP_FREE (1<<2) |
| 143 | |
Josef Bacik | fcebe45 | 2014-05-13 17:30:47 -0700 | [diff] [blame] | 144 | int btrfs_quota_enable(struct btrfs_trans_handle *trans, |
| 145 | struct btrfs_fs_info *fs_info); |
| 146 | int btrfs_quota_disable(struct btrfs_trans_handle *trans, |
| 147 | struct btrfs_fs_info *fs_info); |
| 148 | int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info); |
| 149 | void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info); |
Jeff Mahoney | d06f23d | 2016-08-08 22:08:06 -0400 | [diff] [blame] | 150 | int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info, |
| 151 | bool interruptible); |
Josef Bacik | fcebe45 | 2014-05-13 17:30:47 -0700 | [diff] [blame] | 152 | int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, |
| 153 | struct btrfs_fs_info *fs_info, u64 src, u64 dst); |
| 154 | int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, |
| 155 | struct btrfs_fs_info *fs_info, u64 src, u64 dst); |
| 156 | int btrfs_create_qgroup(struct btrfs_trans_handle *trans, |
Dongsheng Yang | 4087cf2 | 2015-01-18 10:59:23 -0500 | [diff] [blame] | 157 | struct btrfs_fs_info *fs_info, u64 qgroupid); |
Josef Bacik | fcebe45 | 2014-05-13 17:30:47 -0700 | [diff] [blame] | 158 | int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, |
| 159 | struct btrfs_fs_info *fs_info, u64 qgroupid); |
| 160 | int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, |
| 161 | struct btrfs_fs_info *fs_info, u64 qgroupid, |
| 162 | struct btrfs_qgroup_limit *limit); |
| 163 | int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info); |
| 164 | void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info); |
| 165 | struct btrfs_delayed_extent_op; |
Qu Wenruo | d1b8b94 | 2017-02-27 15:10:35 +0800 | [diff] [blame] | 166 | |
Qu Wenruo | cb93b52 | 2016-08-15 10:36:50 +0800 | [diff] [blame] | 167 | /* |
Qu Wenruo | 50b3e04 | 2016-10-18 09:31:27 +0800 | [diff] [blame] | 168 | * Inform qgroup to trace one dirty extent, its info is recorded in @record. |
Qu Wenruo | fb235dc | 2017-02-15 10:43:03 +0800 | [diff] [blame] | 169 | * So qgroup can account it at transaction committing time. |
Qu Wenruo | cb93b52 | 2016-08-15 10:36:50 +0800 | [diff] [blame] | 170 | * |
Qu Wenruo | fb235dc | 2017-02-15 10:43:03 +0800 | [diff] [blame] | 171 | * No lock version, caller must acquire delayed ref lock and allocated memory, |
| 172 | * then call btrfs_qgroup_trace_extent_post() after exiting lock context. |
Qu Wenruo | cb93b52 | 2016-08-15 10:36:50 +0800 | [diff] [blame] | 173 | * |
| 174 | * Return 0 for success insert |
| 175 | * Return >0 for existing record, caller can free @record safely. |
| 176 | * Error is not possible |
| 177 | */ |
Qu Wenruo | 50b3e04 | 2016-10-18 09:31:27 +0800 | [diff] [blame] | 178 | int btrfs_qgroup_trace_extent_nolock( |
Qu Wenruo | cb93b52 | 2016-08-15 10:36:50 +0800 | [diff] [blame] | 179 | struct btrfs_fs_info *fs_info, |
| 180 | struct btrfs_delayed_ref_root *delayed_refs, |
| 181 | struct btrfs_qgroup_extent_record *record); |
| 182 | |
| 183 | /* |
Qu Wenruo | fb235dc | 2017-02-15 10:43:03 +0800 | [diff] [blame] | 184 | * Post handler after qgroup_trace_extent_nolock(). |
| 185 | * |
| 186 | * NOTE: Current qgroup does the expensive backref walk at transaction |
| 187 | * committing time with TRANS_STATE_COMMIT_DOING, this blocks incoming |
| 188 | * new transaction. |
| 189 | * This is designed to allow btrfs_find_all_roots() to get correct new_roots |
| 190 | * result. |
| 191 | * |
| 192 | * However for old_roots there is no need to do backref walk at that time, |
| 193 | * since we search commit roots to walk backref and result will always be |
| 194 | * correct. |
| 195 | * |
| 196 | * Due to the nature of no lock version, we can't do backref there. |
| 197 | * So we must call btrfs_qgroup_trace_extent_post() after exiting |
| 198 | * spinlock context. |
| 199 | * |
| 200 | * TODO: If we can fix and prove btrfs_find_all_roots() can get correct result |
| 201 | * using current root, then we can move all expensive backref walk out of |
| 202 | * transaction committing, but not now as qgroup accounting will be wrong again. |
| 203 | */ |
| 204 | int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info, |
| 205 | struct btrfs_qgroup_extent_record *qrecord); |
| 206 | |
| 207 | /* |
Qu Wenruo | 50b3e04 | 2016-10-18 09:31:27 +0800 | [diff] [blame] | 208 | * Inform qgroup to trace one dirty extent, specified by @bytenr and |
| 209 | * @num_bytes. |
| 210 | * So qgroup can account it at commit trans time. |
Qu Wenruo | cb93b52 | 2016-08-15 10:36:50 +0800 | [diff] [blame] | 211 | * |
Qu Wenruo | fb235dc | 2017-02-15 10:43:03 +0800 | [diff] [blame] | 212 | * Better encapsulated version, with memory allocation and backref walk for |
| 213 | * commit roots. |
| 214 | * So this can sleep. |
Qu Wenruo | cb93b52 | 2016-08-15 10:36:50 +0800 | [diff] [blame] | 215 | * |
| 216 | * Return 0 if the operation is done. |
| 217 | * Return <0 for error, like memory allocation failure or invalid parameter |
| 218 | * (NULL trans) |
| 219 | */ |
Qu Wenruo | 50b3e04 | 2016-10-18 09:31:27 +0800 | [diff] [blame] | 220 | int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, |
Qu Wenruo | cb93b52 | 2016-08-15 10:36:50 +0800 | [diff] [blame] | 221 | struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes, |
| 222 | gfp_t gfp_flag); |
| 223 | |
Qu Wenruo | 33d1f05 | 2016-10-18 09:31:28 +0800 | [diff] [blame] | 224 | /* |
| 225 | * Inform qgroup to trace all leaf items of data |
| 226 | * |
| 227 | * Return 0 for success |
| 228 | * Return <0 for error(ENOMEM) |
| 229 | */ |
| 230 | int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans, |
Jeff Mahoney | 2ff7e61 | 2016-06-22 18:54:24 -0400 | [diff] [blame] | 231 | struct btrfs_fs_info *fs_info, |
Qu Wenruo | 33d1f05 | 2016-10-18 09:31:28 +0800 | [diff] [blame] | 232 | struct extent_buffer *eb); |
| 233 | /* |
| 234 | * Inform qgroup to trace a whole subtree, including all its child tree |
| 235 | * blocks and data. |
| 236 | * The root tree block is specified by @root_eb. |
| 237 | * |
| 238 | * Normally used by relocation(tree block swap) and subvolume deletion. |
| 239 | * |
| 240 | * Return 0 for success |
| 241 | * Return <0 for error(ENOMEM or tree search error) |
| 242 | */ |
| 243 | int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans, |
| 244 | struct btrfs_root *root, |
| 245 | struct extent_buffer *root_eb, |
| 246 | u64 root_gen, int root_level); |
Qu Wenruo | 442244c | 2015-04-16 17:18:36 +0800 | [diff] [blame] | 247 | int |
| 248 | btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, |
| 249 | struct btrfs_fs_info *fs_info, |
| 250 | u64 bytenr, u64 num_bytes, |
| 251 | struct ulist *old_roots, struct ulist *new_roots); |
Nikolay Borisov | 460fb20 | 2018-03-15 16:00:25 +0200 | [diff] [blame] | 252 | int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans); |
Josef Bacik | fcebe45 | 2014-05-13 17:30:47 -0700 | [diff] [blame] | 253 | int btrfs_run_qgroups(struct btrfs_trans_handle *trans, |
| 254 | struct btrfs_fs_info *fs_info); |
| 255 | int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, |
| 256 | struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid, |
| 257 | struct btrfs_qgroup_inherit *inherit); |
Qu Wenruo | 297d750 | 2015-09-08 17:08:37 +0800 | [diff] [blame] | 258 | void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info, |
Qu Wenruo | d4e5c92 | 2017-12-12 15:34:23 +0800 | [diff] [blame] | 259 | u64 ref_root, u64 num_bytes, |
| 260 | enum btrfs_qgroup_rsv_type type); |
Qu Wenruo | 297d750 | 2015-09-08 17:08:37 +0800 | [diff] [blame] | 261 | static inline void btrfs_qgroup_free_delayed_ref(struct btrfs_fs_info *fs_info, |
| 262 | u64 ref_root, u64 num_bytes) |
| 263 | { |
Jeff Mahoney | bc07452 | 2016-06-09 17:27:55 -0400 | [diff] [blame] | 264 | trace_btrfs_qgroup_free_delayed_ref(fs_info, ref_root, num_bytes); |
Qu Wenruo | d4e5c92 | 2017-12-12 15:34:23 +0800 | [diff] [blame] | 265 | btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes, |
| 266 | BTRFS_QGROUP_RSV_DATA); |
Qu Wenruo | 297d750 | 2015-09-08 17:08:37 +0800 | [diff] [blame] | 267 | } |
Josef Bacik | fcebe45 | 2014-05-13 17:30:47 -0700 | [diff] [blame] | 268 | |
| 269 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS |
| 270 | int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid, |
| 271 | u64 rfer, u64 excl); |
| 272 | #endif |
| 273 | |
Qu Wenruo | 5247255 | 2015-10-12 16:05:40 +0800 | [diff] [blame] | 274 | /* New io_tree based accurate qgroup reserve API */ |
Qu Wenruo | 364ecf3 | 2017-02-27 15:10:38 +0800 | [diff] [blame] | 275 | int btrfs_qgroup_reserve_data(struct inode *inode, |
| 276 | struct extent_changeset **reserved, u64 start, u64 len); |
Qu Wenruo | f695fdc | 2015-10-12 16:28:06 +0800 | [diff] [blame] | 277 | int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len); |
Qu Wenruo | bc42bda | 2017-02-27 15:10:39 +0800 | [diff] [blame] | 278 | int btrfs_qgroup_free_data(struct inode *inode, |
| 279 | struct extent_changeset *reserved, u64 start, u64 len); |
Qu Wenruo | 55eeaf0 | 2015-09-08 17:08:38 +0800 | [diff] [blame] | 280 | |
Qu Wenruo | 733e03a | 2017-12-12 15:34:29 +0800 | [diff] [blame] | 281 | int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, |
| 282 | enum btrfs_qgroup_rsv_type type, bool enforce); |
| 283 | /* Reserve metadata space for pertrans and prealloc type */ |
| 284 | static inline int btrfs_qgroup_reserve_meta_pertrans(struct btrfs_root *root, |
| 285 | int num_bytes, bool enforce) |
| 286 | { |
| 287 | return __btrfs_qgroup_reserve_meta(root, num_bytes, |
| 288 | BTRFS_QGROUP_RSV_META_PERTRANS, enforce); |
| 289 | } |
| 290 | static inline int btrfs_qgroup_reserve_meta_prealloc(struct btrfs_root *root, |
| 291 | int num_bytes, bool enforce) |
| 292 | { |
| 293 | return __btrfs_qgroup_reserve_meta(root, num_bytes, |
| 294 | BTRFS_QGROUP_RSV_META_PREALLOC, enforce); |
| 295 | } |
| 296 | |
| 297 | void __btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes, |
| 298 | enum btrfs_qgroup_rsv_type type); |
| 299 | |
| 300 | /* Free per-transaction meta reservation for error handling */ |
| 301 | static inline void btrfs_qgroup_free_meta_pertrans(struct btrfs_root *root, |
| 302 | int num_bytes) |
| 303 | { |
| 304 | __btrfs_qgroup_free_meta(root, num_bytes, |
| 305 | BTRFS_QGROUP_RSV_META_PERTRANS); |
| 306 | } |
| 307 | |
| 308 | /* Pre-allocated meta reservation can be freed at need */ |
| 309 | static inline void btrfs_qgroup_free_meta_prealloc(struct btrfs_root *root, |
| 310 | int num_bytes) |
| 311 | { |
| 312 | __btrfs_qgroup_free_meta(root, num_bytes, |
| 313 | BTRFS_QGROUP_RSV_META_PREALLOC); |
| 314 | } |
| 315 | |
| 316 | /* |
| 317 | * Per-transaction meta reservation should be all freed at transaction commit |
| 318 | * time |
| 319 | */ |
| 320 | void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root); |
| 321 | |
Qu Wenruo | 64cfaef | 2017-12-12 15:34:31 +0800 | [diff] [blame] | 322 | /* |
| 323 | * Convert @num_bytes of META_PREALLOCATED reservation to META_PERTRANS. |
| 324 | * |
| 325 | * This is called when preallocated meta reservation needs to be used. |
| 326 | * Normally after btrfs_join_transaction() call. |
| 327 | */ |
| 328 | void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes); |
| 329 | |
Qu Wenruo | 56fa9d0 | 2015-10-13 09:53:10 +0800 | [diff] [blame] | 330 | void btrfs_qgroup_check_reserved_leak(struct inode *inode); |
David Sterba | 9888c34 | 2018-04-03 19:16:55 +0200 | [diff] [blame] | 331 | |
| 332 | #endif |