blob: 2de05062c78b0aaa5799329aeb0065f0630f5f54 [file] [log] [blame]
Konstantin Komarov3f3b4422021-08-13 17:21:29 +03001// SPDX-License-Identifier: GPL-2.0
2/*
3 *
4 * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
5 *
6 * This code builds two trees of free clusters extents.
7 * Trees are sorted by start of extent and by length of extent.
8 * NTFS_MAX_WND_EXTENTS defines the maximum number of elements in trees.
9 * In extreme case code reads on-disk bitmap to find free clusters
10 *
11 */
12
13#include <linux/blkdev.h>
14#include <linux/buffer_head.h>
15#include <linux/fs.h>
16#include <linux/nls.h>
17
18#include "debug.h"
19#include "ntfs.h"
20#include "ntfs_fs.h"
21
22/*
23 * Maximum number of extents in tree.
24 */
25#define NTFS_MAX_WND_EXTENTS (32u * 1024u)
26
27struct rb_node_key {
28 struct rb_node node;
29 size_t key;
30};
31
32/*
33 * Tree is sorted by start (key)
34 */
35struct e_node {
36 struct rb_node_key start; /* Tree sorted by start */
37 struct rb_node_key count; /* Tree sorted by len*/
38};
39
40static int wnd_rescan(struct wnd_bitmap *wnd);
41static struct buffer_head *wnd_map(struct wnd_bitmap *wnd, size_t iw);
42static bool wnd_is_free_hlp(struct wnd_bitmap *wnd, size_t bit, size_t bits);
43
44static struct kmem_cache *ntfs_enode_cachep;
45
46int __init ntfs3_init_bitmap(void)
47{
48 ntfs_enode_cachep =
49 kmem_cache_create("ntfs3_enode_cache", sizeof(struct e_node), 0,
50 SLAB_RECLAIM_ACCOUNT, NULL);
51 return ntfs_enode_cachep ? 0 : -ENOMEM;
52}
53
54void ntfs3_exit_bitmap(void)
55{
56 kmem_cache_destroy(ntfs_enode_cachep);
57}
58
59static inline u32 wnd_bits(const struct wnd_bitmap *wnd, size_t i)
60{
61 return i + 1 == wnd->nwnd ? wnd->bits_last : wnd->sb->s_blocksize * 8;
62}
63
64/*
65 * b_pos + b_len - biggest fragment
66 * Scan range [wpos wbits) window 'buf'
67 * Returns -1 if not found
68 */
69static size_t wnd_scan(const ulong *buf, size_t wbit, u32 wpos, u32 wend,
70 size_t to_alloc, size_t *prev_tail, size_t *b_pos,
71 size_t *b_len)
72{
73 while (wpos < wend) {
74 size_t free_len;
75 u32 free_bits, end;
76 u32 used = find_next_zero_bit(buf, wend, wpos);
77
78 if (used >= wend) {
79 if (*b_len < *prev_tail) {
80 *b_pos = wbit - *prev_tail;
81 *b_len = *prev_tail;
82 }
83
84 *prev_tail = 0;
85 return -1;
86 }
87
88 if (used > wpos) {
89 wpos = used;
90 if (*b_len < *prev_tail) {
91 *b_pos = wbit - *prev_tail;
92 *b_len = *prev_tail;
93 }
94
95 *prev_tail = 0;
96 }
97
98 /*
99 * Now we have a fragment [wpos, wend) staring with 0
100 */
101 end = wpos + to_alloc - *prev_tail;
102 free_bits = find_next_bit(buf, min(end, wend), wpos);
103
104 free_len = *prev_tail + free_bits - wpos;
105
106 if (*b_len < free_len) {
107 *b_pos = wbit + wpos - *prev_tail;
108 *b_len = free_len;
109 }
110
111 if (free_len >= to_alloc)
112 return wbit + wpos - *prev_tail;
113
114 if (free_bits >= wend) {
115 *prev_tail += free_bits - wpos;
116 return -1;
117 }
118
119 wpos = free_bits + 1;
120
121 *prev_tail = 0;
122 }
123
124 return -1;
125}
126
127/*
128 * wnd_close
129 *
130 * Frees all resources
131 */
132void wnd_close(struct wnd_bitmap *wnd)
133{
134 struct rb_node *node, *next;
135
Kari Argillander195c52b2021-08-24 21:37:07 +0300136 kfree(wnd->free_bits);
Konstantin Komarov3f3b4422021-08-13 17:21:29 +0300137 run_close(&wnd->run);
138
139 node = rb_first(&wnd->start_tree);
140
141 while (node) {
142 next = rb_next(node);
143 rb_erase(node, &wnd->start_tree);
144 kmem_cache_free(ntfs_enode_cachep,
145 rb_entry(node, struct e_node, start.node));
146 node = next;
147 }
148}
149
150static struct rb_node *rb_lookup(struct rb_root *root, size_t v)
151{
152 struct rb_node **p = &root->rb_node;
153 struct rb_node *r = NULL;
154
155 while (*p) {
156 struct rb_node_key *k;
157
158 k = rb_entry(*p, struct rb_node_key, node);
159 if (v < k->key) {
160 p = &(*p)->rb_left;
161 } else if (v > k->key) {
162 r = &k->node;
163 p = &(*p)->rb_right;
164 } else {
165 return &k->node;
166 }
167 }
168
169 return r;
170}
171
172/*
173 * rb_insert_count
174 *
175 * Helper function to insert special kind of 'count' tree
176 */
177static inline bool rb_insert_count(struct rb_root *root, struct e_node *e)
178{
179 struct rb_node **p = &root->rb_node;
180 struct rb_node *parent = NULL;
181 size_t e_ckey = e->count.key;
182 size_t e_skey = e->start.key;
183
184 while (*p) {
185 struct e_node *k =
186 rb_entry(parent = *p, struct e_node, count.node);
187
188 if (e_ckey > k->count.key) {
189 p = &(*p)->rb_left;
190 } else if (e_ckey < k->count.key) {
191 p = &(*p)->rb_right;
192 } else if (e_skey < k->start.key) {
193 p = &(*p)->rb_left;
194 } else if (e_skey > k->start.key) {
195 p = &(*p)->rb_right;
196 } else {
197 WARN_ON(1);
198 return false;
199 }
200 }
201
202 rb_link_node(&e->count.node, parent, p);
203 rb_insert_color(&e->count.node, root);
204 return true;
205}
206
207/*
208 * inline bool rb_insert_start
209 *
210 * Helper function to insert special kind of 'start' tree
211 */
212static inline bool rb_insert_start(struct rb_root *root, struct e_node *e)
213{
214 struct rb_node **p = &root->rb_node;
215 struct rb_node *parent = NULL;
216 size_t e_skey = e->start.key;
217
218 while (*p) {
219 struct e_node *k;
220
221 parent = *p;
222
223 k = rb_entry(parent, struct e_node, start.node);
224 if (e_skey < k->start.key) {
225 p = &(*p)->rb_left;
226 } else if (e_skey > k->start.key) {
227 p = &(*p)->rb_right;
228 } else {
229 WARN_ON(1);
230 return false;
231 }
232 }
233
234 rb_link_node(&e->start.node, parent, p);
235 rb_insert_color(&e->start.node, root);
236 return true;
237}
238
239/*
240 * wnd_add_free_ext
241 *
242 * adds a new extent of free space
243 * build = 1 when building tree
244 */
245static void wnd_add_free_ext(struct wnd_bitmap *wnd, size_t bit, size_t len,
246 bool build)
247{
248 struct e_node *e, *e0 = NULL;
249 size_t ib, end_in = bit + len;
250 struct rb_node *n;
251
252 if (build) {
253 /* Use extent_min to filter too short extents */
254 if (wnd->count >= NTFS_MAX_WND_EXTENTS &&
255 len <= wnd->extent_min) {
256 wnd->uptodated = -1;
257 return;
258 }
259 } else {
260 /* Try to find extent before 'bit' */
261 n = rb_lookup(&wnd->start_tree, bit);
262
263 if (!n) {
264 n = rb_first(&wnd->start_tree);
265 } else {
266 e = rb_entry(n, struct e_node, start.node);
267 n = rb_next(n);
268 if (e->start.key + e->count.key == bit) {
269 /* Remove left */
270 bit = e->start.key;
271 len += e->count.key;
272 rb_erase(&e->start.node, &wnd->start_tree);
273 rb_erase(&e->count.node, &wnd->count_tree);
274 wnd->count -= 1;
275 e0 = e;
276 }
277 }
278
279 while (n) {
280 size_t next_end;
281
282 e = rb_entry(n, struct e_node, start.node);
283 next_end = e->start.key + e->count.key;
284 if (e->start.key > end_in)
285 break;
286
287 /* Remove right */
288 n = rb_next(n);
289 len += next_end - end_in;
290 end_in = next_end;
291 rb_erase(&e->start.node, &wnd->start_tree);
292 rb_erase(&e->count.node, &wnd->count_tree);
293 wnd->count -= 1;
294
295 if (!e0)
296 e0 = e;
297 else
298 kmem_cache_free(ntfs_enode_cachep, e);
299 }
300
301 if (wnd->uptodated != 1) {
302 /* Check bits before 'bit' */
303 ib = wnd->zone_bit == wnd->zone_end ||
304 bit < wnd->zone_end
305 ? 0
306 : wnd->zone_end;
307
308 while (bit > ib && wnd_is_free_hlp(wnd, bit - 1, 1)) {
309 bit -= 1;
310 len += 1;
311 }
312
313 /* Check bits after 'end_in' */
314 ib = wnd->zone_bit == wnd->zone_end ||
315 end_in > wnd->zone_bit
316 ? wnd->nbits
317 : wnd->zone_bit;
318
319 while (end_in < ib && wnd_is_free_hlp(wnd, end_in, 1)) {
320 end_in += 1;
321 len += 1;
322 }
323 }
324 }
325 /* Insert new fragment */
326 if (wnd->count >= NTFS_MAX_WND_EXTENTS) {
327 if (e0)
328 kmem_cache_free(ntfs_enode_cachep, e0);
329
330 wnd->uptodated = -1;
331
332 /* Compare with smallest fragment */
333 n = rb_last(&wnd->count_tree);
334 e = rb_entry(n, struct e_node, count.node);
335 if (len <= e->count.key)
336 goto out; /* Do not insert small fragments */
337
338 if (build) {
339 struct e_node *e2;
340
341 n = rb_prev(n);
342 e2 = rb_entry(n, struct e_node, count.node);
343 /* smallest fragment will be 'e2->count.key' */
344 wnd->extent_min = e2->count.key;
345 }
346
347 /* Replace smallest fragment by new one */
348 rb_erase(&e->start.node, &wnd->start_tree);
349 rb_erase(&e->count.node, &wnd->count_tree);
350 wnd->count -= 1;
351 } else {
352 e = e0 ? e0 : kmem_cache_alloc(ntfs_enode_cachep, GFP_ATOMIC);
353 if (!e) {
354 wnd->uptodated = -1;
355 goto out;
356 }
357
358 if (build && len <= wnd->extent_min)
359 wnd->extent_min = len;
360 }
361 e->start.key = bit;
362 e->count.key = len;
363 if (len > wnd->extent_max)
364 wnd->extent_max = len;
365
366 rb_insert_start(&wnd->start_tree, e);
367 rb_insert_count(&wnd->count_tree, e);
368 wnd->count += 1;
369
370out:;
371}
372
373/*
374 * wnd_remove_free_ext
375 *
376 * removes a run from the cached free space
377 */
378static void wnd_remove_free_ext(struct wnd_bitmap *wnd, size_t bit, size_t len)
379{
380 struct rb_node *n, *n3;
381 struct e_node *e, *e3;
382 size_t end_in = bit + len;
383 size_t end3, end, new_key, new_len, max_new_len;
384
385 /* Try to find extent before 'bit' */
386 n = rb_lookup(&wnd->start_tree, bit);
387
388 if (!n)
389 return;
390
391 e = rb_entry(n, struct e_node, start.node);
392 end = e->start.key + e->count.key;
393
394 new_key = new_len = 0;
395 len = e->count.key;
396
397 /* Range [bit,end_in) must be inside 'e' or outside 'e' and 'n' */
398 if (e->start.key > bit)
399 ;
400 else if (end_in <= end) {
401 /* Range [bit,end_in) inside 'e' */
402 new_key = end_in;
403 new_len = end - end_in;
404 len = bit - e->start.key;
405 } else if (bit > end) {
406 bool bmax = false;
407
408 n3 = rb_next(n);
409
410 while (n3) {
411 e3 = rb_entry(n3, struct e_node, start.node);
412 if (e3->start.key >= end_in)
413 break;
414
415 if (e3->count.key == wnd->extent_max)
416 bmax = true;
417
418 end3 = e3->start.key + e3->count.key;
419 if (end3 > end_in) {
420 e3->start.key = end_in;
421 rb_erase(&e3->count.node, &wnd->count_tree);
422 e3->count.key = end3 - end_in;
423 rb_insert_count(&wnd->count_tree, e3);
424 break;
425 }
426
427 n3 = rb_next(n3);
428 rb_erase(&e3->start.node, &wnd->start_tree);
429 rb_erase(&e3->count.node, &wnd->count_tree);
430 wnd->count -= 1;
431 kmem_cache_free(ntfs_enode_cachep, e3);
432 }
433 if (!bmax)
434 return;
435 n3 = rb_first(&wnd->count_tree);
436 wnd->extent_max =
437 n3 ? rb_entry(n3, struct e_node, count.node)->count.key
438 : 0;
439 return;
440 }
441
442 if (e->count.key != wnd->extent_max) {
443 ;
444 } else if (rb_prev(&e->count.node)) {
445 ;
446 } else {
447 n3 = rb_next(&e->count.node);
448 max_new_len = len > new_len ? len : new_len;
449 if (!n3) {
450 wnd->extent_max = max_new_len;
451 } else {
452 e3 = rb_entry(n3, struct e_node, count.node);
453 wnd->extent_max = max(e3->count.key, max_new_len);
454 }
455 }
456
457 if (!len) {
458 if (new_len) {
459 e->start.key = new_key;
460 rb_erase(&e->count.node, &wnd->count_tree);
461 e->count.key = new_len;
462 rb_insert_count(&wnd->count_tree, e);
463 } else {
464 rb_erase(&e->start.node, &wnd->start_tree);
465 rb_erase(&e->count.node, &wnd->count_tree);
466 wnd->count -= 1;
467 kmem_cache_free(ntfs_enode_cachep, e);
468 }
469 goto out;
470 }
471 rb_erase(&e->count.node, &wnd->count_tree);
472 e->count.key = len;
473 rb_insert_count(&wnd->count_tree, e);
474
475 if (!new_len)
476 goto out;
477
478 if (wnd->count >= NTFS_MAX_WND_EXTENTS) {
479 wnd->uptodated = -1;
480
481 /* Get minimal extent */
482 e = rb_entry(rb_last(&wnd->count_tree), struct e_node,
483 count.node);
484 if (e->count.key > new_len)
485 goto out;
486
487 /* Replace minimum */
488 rb_erase(&e->start.node, &wnd->start_tree);
489 rb_erase(&e->count.node, &wnd->count_tree);
490 wnd->count -= 1;
491 } else {
492 e = kmem_cache_alloc(ntfs_enode_cachep, GFP_ATOMIC);
493 if (!e)
494 wnd->uptodated = -1;
495 }
496
497 if (e) {
498 e->start.key = new_key;
499 e->count.key = new_len;
500 rb_insert_start(&wnd->start_tree, e);
501 rb_insert_count(&wnd->count_tree, e);
502 wnd->count += 1;
503 }
504
505out:
506 if (!wnd->count && 1 != wnd->uptodated)
507 wnd_rescan(wnd);
508}
509
510/*
511 * wnd_rescan
512 *
513 * Scan all bitmap. used while initialization.
514 */
515static int wnd_rescan(struct wnd_bitmap *wnd)
516{
517 int err = 0;
518 size_t prev_tail = 0;
519 struct super_block *sb = wnd->sb;
520 struct ntfs_sb_info *sbi = sb->s_fs_info;
521 u64 lbo, len = 0;
522 u32 blocksize = sb->s_blocksize;
523 u8 cluster_bits = sbi->cluster_bits;
524 u32 wbits = 8 * sb->s_blocksize;
525 u32 used, frb;
526 const ulong *buf;
527 size_t wpos, wbit, iw, vbo;
528 struct buffer_head *bh = NULL;
529 CLST lcn, clen;
530
531 wnd->uptodated = 0;
532 wnd->extent_max = 0;
533 wnd->extent_min = MINUS_ONE_T;
534 wnd->total_zeroes = 0;
535
536 vbo = 0;
537
538 for (iw = 0; iw < wnd->nwnd; iw++) {
539 if (iw + 1 == wnd->nwnd)
540 wbits = wnd->bits_last;
541
542 if (wnd->inited) {
543 if (!wnd->free_bits[iw]) {
544 /* all ones */
545 if (prev_tail) {
546 wnd_add_free_ext(wnd,
547 vbo * 8 - prev_tail,
548 prev_tail, true);
549 prev_tail = 0;
550 }
551 goto next_wnd;
552 }
553 if (wbits == wnd->free_bits[iw]) {
554 /* all zeroes */
555 prev_tail += wbits;
556 wnd->total_zeroes += wbits;
557 goto next_wnd;
558 }
559 }
560
561 if (!len) {
562 u32 off = vbo & sbi->cluster_mask;
563
564 if (!run_lookup_entry(&wnd->run, vbo >> cluster_bits,
565 &lcn, &clen, NULL)) {
566 err = -ENOENT;
567 goto out;
568 }
569
570 lbo = ((u64)lcn << cluster_bits) + off;
571 len = ((u64)clen << cluster_bits) - off;
572 }
573
574 bh = ntfs_bread(sb, lbo >> sb->s_blocksize_bits);
575 if (!bh) {
576 err = -EIO;
577 goto out;
578 }
579
580 buf = (ulong *)bh->b_data;
581
582 used = __bitmap_weight(buf, wbits);
583 if (used < wbits) {
584 frb = wbits - used;
585 wnd->free_bits[iw] = frb;
586 wnd->total_zeroes += frb;
587 }
588
589 wpos = 0;
590 wbit = vbo * 8;
591
592 if (wbit + wbits > wnd->nbits)
593 wbits = wnd->nbits - wbit;
594
595 do {
596 used = find_next_zero_bit(buf, wbits, wpos);
597
598 if (used > wpos && prev_tail) {
599 wnd_add_free_ext(wnd, wbit + wpos - prev_tail,
600 prev_tail, true);
601 prev_tail = 0;
602 }
603
604 wpos = used;
605
606 if (wpos >= wbits) {
607 /* No free blocks */
608 prev_tail = 0;
609 break;
610 }
611
612 frb = find_next_bit(buf, wbits, wpos);
613 if (frb >= wbits) {
614 /* keep last free block */
615 prev_tail += frb - wpos;
616 break;
617 }
618
619 wnd_add_free_ext(wnd, wbit + wpos - prev_tail,
620 frb + prev_tail - wpos, true);
621
622 /* Skip free block and first '1' */
623 wpos = frb + 1;
624 /* Reset previous tail */
625 prev_tail = 0;
626 } while (wpos < wbits);
627
628next_wnd:
629
630 if (bh)
631 put_bh(bh);
632 bh = NULL;
633
634 vbo += blocksize;
635 if (len) {
636 len -= blocksize;
637 lbo += blocksize;
638 }
639 }
640
641 /* Add last block */
642 if (prev_tail)
643 wnd_add_free_ext(wnd, wnd->nbits - prev_tail, prev_tail, true);
644
645 /*
646 * Before init cycle wnd->uptodated was 0
647 * If any errors or limits occurs while initialization then
648 * wnd->uptodated will be -1
649 * If 'uptodated' is still 0 then Tree is really updated
650 */
651 if (!wnd->uptodated)
652 wnd->uptodated = 1;
653
654 if (wnd->zone_bit != wnd->zone_end) {
655 size_t zlen = wnd->zone_end - wnd->zone_bit;
656
657 wnd->zone_end = wnd->zone_bit;
658 wnd_zone_set(wnd, wnd->zone_bit, zlen);
659 }
660
661out:
662 return err;
663}
664
665/*
666 * wnd_init
667 */
668int wnd_init(struct wnd_bitmap *wnd, struct super_block *sb, size_t nbits)
669{
670 int err;
671 u32 blocksize = sb->s_blocksize;
672 u32 wbits = blocksize * 8;
673
674 init_rwsem(&wnd->rw_lock);
675
676 wnd->sb = sb;
677 wnd->nbits = nbits;
678 wnd->total_zeroes = nbits;
679 wnd->extent_max = MINUS_ONE_T;
680 wnd->zone_bit = wnd->zone_end = 0;
681 wnd->nwnd = bytes_to_block(sb, bitmap_size(nbits));
682 wnd->bits_last = nbits & (wbits - 1);
683 if (!wnd->bits_last)
684 wnd->bits_last = wbits;
685
Kari Argillander345482b2021-08-24 21:37:08 +0300686 wnd->free_bits = kcalloc(wnd->nwnd, sizeof(u16), GFP_NOFS);
Konstantin Komarov3f3b4422021-08-13 17:21:29 +0300687 if (!wnd->free_bits)
688 return -ENOMEM;
689
690 err = wnd_rescan(wnd);
691 if (err)
692 return err;
693
694 wnd->inited = true;
695
696 return 0;
697}
698
699/*
700 * wnd_map
701 *
702 * call sb_bread for requested window
703 */
704static struct buffer_head *wnd_map(struct wnd_bitmap *wnd, size_t iw)
705{
706 size_t vbo;
707 CLST lcn, clen;
708 struct super_block *sb = wnd->sb;
709 struct ntfs_sb_info *sbi;
710 struct buffer_head *bh;
711 u64 lbo;
712
713 sbi = sb->s_fs_info;
714 vbo = (u64)iw << sb->s_blocksize_bits;
715
716 if (!run_lookup_entry(&wnd->run, vbo >> sbi->cluster_bits, &lcn, &clen,
717 NULL)) {
718 return ERR_PTR(-ENOENT);
719 }
720
721 lbo = ((u64)lcn << sbi->cluster_bits) + (vbo & sbi->cluster_mask);
722
723 bh = ntfs_bread(wnd->sb, lbo >> sb->s_blocksize_bits);
724 if (!bh)
725 return ERR_PTR(-EIO);
726
727 return bh;
728}
729
730/*
731 * wnd_set_free
732 *
733 * Marks the bits range from bit to bit + bits as free
734 */
735int wnd_set_free(struct wnd_bitmap *wnd, size_t bit, size_t bits)
736{
737 int err = 0;
738 struct super_block *sb = wnd->sb;
739 size_t bits0 = bits;
740 u32 wbits = 8 * sb->s_blocksize;
741 size_t iw = bit >> (sb->s_blocksize_bits + 3);
742 u32 wbit = bit & (wbits - 1);
743 struct buffer_head *bh;
744
745 while (iw < wnd->nwnd && bits) {
746 u32 tail, op;
747 ulong *buf;
748
749 if (iw + 1 == wnd->nwnd)
750 wbits = wnd->bits_last;
751
752 tail = wbits - wbit;
753 op = tail < bits ? tail : bits;
754
755 bh = wnd_map(wnd, iw);
756 if (IS_ERR(bh)) {
757 err = PTR_ERR(bh);
758 break;
759 }
760
761 buf = (ulong *)bh->b_data;
762
763 lock_buffer(bh);
764
765 __bitmap_clear(buf, wbit, op);
766
767 wnd->free_bits[iw] += op;
768
769 set_buffer_uptodate(bh);
770 mark_buffer_dirty(bh);
771 unlock_buffer(bh);
772 put_bh(bh);
773
774 wnd->total_zeroes += op;
775 bits -= op;
776 wbit = 0;
777 iw += 1;
778 }
779
780 wnd_add_free_ext(wnd, bit, bits0, false);
781
782 return err;
783}
784
785/*
786 * wnd_set_used
787 *
788 * Marks the bits range from bit to bit + bits as used
789 */
790int wnd_set_used(struct wnd_bitmap *wnd, size_t bit, size_t bits)
791{
792 int err = 0;
793 struct super_block *sb = wnd->sb;
794 size_t bits0 = bits;
795 size_t iw = bit >> (sb->s_blocksize_bits + 3);
796 u32 wbits = 8 * sb->s_blocksize;
797 u32 wbit = bit & (wbits - 1);
798 struct buffer_head *bh;
799
800 while (iw < wnd->nwnd && bits) {
801 u32 tail, op;
802 ulong *buf;
803
804 if (unlikely(iw + 1 == wnd->nwnd))
805 wbits = wnd->bits_last;
806
807 tail = wbits - wbit;
808 op = tail < bits ? tail : bits;
809
810 bh = wnd_map(wnd, iw);
811 if (IS_ERR(bh)) {
812 err = PTR_ERR(bh);
813 break;
814 }
815 buf = (ulong *)bh->b_data;
816
817 lock_buffer(bh);
818
819 __bitmap_set(buf, wbit, op);
820 wnd->free_bits[iw] -= op;
821
822 set_buffer_uptodate(bh);
823 mark_buffer_dirty(bh);
824 unlock_buffer(bh);
825 put_bh(bh);
826
827 wnd->total_zeroes -= op;
828 bits -= op;
829 wbit = 0;
830 iw += 1;
831 }
832
833 if (!RB_EMPTY_ROOT(&wnd->start_tree))
834 wnd_remove_free_ext(wnd, bit, bits0);
835
836 return err;
837}
838
839/*
840 * wnd_is_free_hlp
841 *
842 * Returns true if all clusters [bit, bit+bits) are free (bitmap only)
843 */
844static bool wnd_is_free_hlp(struct wnd_bitmap *wnd, size_t bit, size_t bits)
845{
846 struct super_block *sb = wnd->sb;
847 size_t iw = bit >> (sb->s_blocksize_bits + 3);
848 u32 wbits = 8 * sb->s_blocksize;
849 u32 wbit = bit & (wbits - 1);
850
851 while (iw < wnd->nwnd && bits) {
852 u32 tail, op;
853
854 if (unlikely(iw + 1 == wnd->nwnd))
855 wbits = wnd->bits_last;
856
857 tail = wbits - wbit;
858 op = tail < bits ? tail : bits;
859
860 if (wbits != wnd->free_bits[iw]) {
861 bool ret;
862 struct buffer_head *bh = wnd_map(wnd, iw);
863
864 if (IS_ERR(bh))
865 return false;
866
867 ret = are_bits_clear((ulong *)bh->b_data, wbit, op);
868
869 put_bh(bh);
870 if (!ret)
871 return false;
872 }
873
874 bits -= op;
875 wbit = 0;
876 iw += 1;
877 }
878
879 return true;
880}
881
882/*
883 * wnd_is_free
884 *
885 * Returns true if all clusters [bit, bit+bits) are free
886 */
887bool wnd_is_free(struct wnd_bitmap *wnd, size_t bit, size_t bits)
888{
889 bool ret;
890 struct rb_node *n;
891 size_t end;
892 struct e_node *e;
893
894 if (RB_EMPTY_ROOT(&wnd->start_tree))
895 goto use_wnd;
896
897 n = rb_lookup(&wnd->start_tree, bit);
898 if (!n)
899 goto use_wnd;
900
901 e = rb_entry(n, struct e_node, start.node);
902
903 end = e->start.key + e->count.key;
904
905 if (bit < end && bit + bits <= end)
906 return true;
907
908use_wnd:
909 ret = wnd_is_free_hlp(wnd, bit, bits);
910
911 return ret;
912}
913
914/*
915 * wnd_is_used
916 *
917 * Returns true if all clusters [bit, bit+bits) are used
918 */
919bool wnd_is_used(struct wnd_bitmap *wnd, size_t bit, size_t bits)
920{
921 bool ret = false;
922 struct super_block *sb = wnd->sb;
923 size_t iw = bit >> (sb->s_blocksize_bits + 3);
924 u32 wbits = 8 * sb->s_blocksize;
925 u32 wbit = bit & (wbits - 1);
926 size_t end;
927 struct rb_node *n;
928 struct e_node *e;
929
930 if (RB_EMPTY_ROOT(&wnd->start_tree))
931 goto use_wnd;
932
933 end = bit + bits;
934 n = rb_lookup(&wnd->start_tree, end - 1);
935 if (!n)
936 goto use_wnd;
937
938 e = rb_entry(n, struct e_node, start.node);
939 if (e->start.key + e->count.key > bit)
940 return false;
941
942use_wnd:
943 while (iw < wnd->nwnd && bits) {
944 u32 tail, op;
945
946 if (unlikely(iw + 1 == wnd->nwnd))
947 wbits = wnd->bits_last;
948
949 tail = wbits - wbit;
950 op = tail < bits ? tail : bits;
951
952 if (wnd->free_bits[iw]) {
953 bool ret;
954 struct buffer_head *bh = wnd_map(wnd, iw);
955
956 if (IS_ERR(bh))
957 goto out;
958
959 ret = are_bits_set((ulong *)bh->b_data, wbit, op);
960 put_bh(bh);
961 if (!ret)
962 goto out;
963 }
964
965 bits -= op;
966 wbit = 0;
967 iw += 1;
968 }
969 ret = true;
970
971out:
972 return ret;
973}
974
975/*
976 * wnd_find
977 * - flags - BITMAP_FIND_XXX flags
978 *
979 * looks for free space
980 * Returns 0 if not found
981 */
982size_t wnd_find(struct wnd_bitmap *wnd, size_t to_alloc, size_t hint,
983 size_t flags, size_t *allocated)
984{
985 struct super_block *sb;
986 u32 wbits, wpos, wzbit, wzend;
987 size_t fnd, max_alloc, b_len, b_pos;
988 size_t iw, prev_tail, nwnd, wbit, ebit, zbit, zend;
989 size_t to_alloc0 = to_alloc;
990 const ulong *buf;
991 const struct e_node *e;
992 const struct rb_node *pr, *cr;
993 u8 log2_bits;
994 bool fbits_valid;
995 struct buffer_head *bh;
996
997 /* fast checking for available free space */
998 if (flags & BITMAP_FIND_FULL) {
999 size_t zeroes = wnd_zeroes(wnd);
1000
1001 zeroes -= wnd->zone_end - wnd->zone_bit;
1002 if (zeroes < to_alloc0)
1003 goto no_space;
1004
1005 if (to_alloc0 > wnd->extent_max)
1006 goto no_space;
1007 } else {
1008 if (to_alloc > wnd->extent_max)
1009 to_alloc = wnd->extent_max;
1010 }
1011
1012 if (wnd->zone_bit <= hint && hint < wnd->zone_end)
1013 hint = wnd->zone_end;
1014
1015 max_alloc = wnd->nbits;
1016 b_len = b_pos = 0;
1017
1018 if (hint >= max_alloc)
1019 hint = 0;
1020
1021 if (RB_EMPTY_ROOT(&wnd->start_tree)) {
1022 if (wnd->uptodated == 1) {
1023 /* extents tree is updated -> no free space */
1024 goto no_space;
1025 }
1026 goto scan_bitmap;
1027 }
1028
1029 e = NULL;
1030 if (!hint)
1031 goto allocate_biggest;
1032
1033 /* Use hint: enumerate extents by start >= hint */
1034 pr = NULL;
1035 cr = wnd->start_tree.rb_node;
1036
1037 for (;;) {
1038 e = rb_entry(cr, struct e_node, start.node);
1039
1040 if (e->start.key == hint)
1041 break;
1042
1043 if (e->start.key < hint) {
1044 pr = cr;
1045 cr = cr->rb_right;
1046 if (!cr)
1047 break;
1048 continue;
1049 }
1050
1051 cr = cr->rb_left;
1052 if (!cr) {
1053 e = pr ? rb_entry(pr, struct e_node, start.node) : NULL;
1054 break;
1055 }
1056 }
1057
1058 if (!e)
1059 goto allocate_biggest;
1060
1061 if (e->start.key + e->count.key > hint) {
1062 /* We have found extension with 'hint' inside */
1063 size_t len = e->start.key + e->count.key - hint;
1064
1065 if (len >= to_alloc && hint + to_alloc <= max_alloc) {
1066 fnd = hint;
1067 goto found;
1068 }
1069
1070 if (!(flags & BITMAP_FIND_FULL)) {
1071 if (len > to_alloc)
1072 len = to_alloc;
1073
1074 if (hint + len <= max_alloc) {
1075 fnd = hint;
1076 to_alloc = len;
1077 goto found;
1078 }
1079 }
1080 }
1081
1082allocate_biggest:
1083 /* Allocate from biggest free extent */
1084 e = rb_entry(rb_first(&wnd->count_tree), struct e_node, count.node);
1085 if (e->count.key != wnd->extent_max)
1086 wnd->extent_max = e->count.key;
1087
1088 if (e->count.key < max_alloc) {
1089 if (e->count.key >= to_alloc) {
1090 ;
1091 } else if (flags & BITMAP_FIND_FULL) {
1092 if (e->count.key < to_alloc0) {
1093 /* Biggest free block is less then requested */
1094 goto no_space;
1095 }
1096 to_alloc = e->count.key;
1097 } else if (-1 != wnd->uptodated) {
1098 to_alloc = e->count.key;
1099 } else {
1100 /* Check if we can use more bits */
1101 size_t op, max_check;
1102 struct rb_root start_tree;
1103
1104 memcpy(&start_tree, &wnd->start_tree,
1105 sizeof(struct rb_root));
1106 memset(&wnd->start_tree, 0, sizeof(struct rb_root));
1107
1108 max_check = e->start.key + to_alloc;
1109 if (max_check > max_alloc)
1110 max_check = max_alloc;
1111 for (op = e->start.key + e->count.key; op < max_check;
1112 op++) {
1113 if (!wnd_is_free(wnd, op, 1))
1114 break;
1115 }
1116 memcpy(&wnd->start_tree, &start_tree,
1117 sizeof(struct rb_root));
1118 to_alloc = op - e->start.key;
1119 }
1120
1121 /* Prepare to return */
1122 fnd = e->start.key;
1123 if (e->start.key + to_alloc > max_alloc)
1124 to_alloc = max_alloc - e->start.key;
1125 goto found;
1126 }
1127
1128 if (wnd->uptodated == 1) {
1129 /* extents tree is updated -> no free space */
1130 goto no_space;
1131 }
1132
1133 b_len = e->count.key;
1134 b_pos = e->start.key;
1135
1136scan_bitmap:
1137 sb = wnd->sb;
1138 log2_bits = sb->s_blocksize_bits + 3;
1139
1140 /* At most two ranges [hint, max_alloc) + [0, hint) */
1141Again:
1142
1143 /* TODO: optimize request for case nbits > wbits */
1144 iw = hint >> log2_bits;
1145 wbits = sb->s_blocksize * 8;
1146 wpos = hint & (wbits - 1);
1147 prev_tail = 0;
1148 fbits_valid = true;
1149
1150 if (max_alloc == wnd->nbits) {
1151 nwnd = wnd->nwnd;
1152 } else {
1153 size_t t = max_alloc + wbits - 1;
1154
1155 nwnd = likely(t > max_alloc) ? (t >> log2_bits) : wnd->nwnd;
1156 }
1157
1158 /* Enumerate all windows */
1159 for (; iw < nwnd; iw++) {
1160 wbit = iw << log2_bits;
1161
1162 if (!wnd->free_bits[iw]) {
1163 if (prev_tail > b_len) {
1164 b_pos = wbit - prev_tail;
1165 b_len = prev_tail;
1166 }
1167
1168 /* Skip full used window */
1169 prev_tail = 0;
1170 wpos = 0;
1171 continue;
1172 }
1173
1174 if (unlikely(iw + 1 == nwnd)) {
1175 if (max_alloc == wnd->nbits) {
1176 wbits = wnd->bits_last;
1177 } else {
1178 size_t t = max_alloc & (wbits - 1);
1179
1180 if (t) {
1181 wbits = t;
1182 fbits_valid = false;
1183 }
1184 }
1185 }
1186
1187 if (wnd->zone_end > wnd->zone_bit) {
1188 ebit = wbit + wbits;
1189 zbit = max(wnd->zone_bit, wbit);
1190 zend = min(wnd->zone_end, ebit);
1191
1192 /* Here we have a window [wbit, ebit) and zone [zbit, zend) */
1193 if (zend <= zbit) {
1194 /* Zone does not overlap window */
1195 } else {
1196 wzbit = zbit - wbit;
1197 wzend = zend - wbit;
1198
1199 /* Zone overlaps window */
1200 if (wnd->free_bits[iw] == wzend - wzbit) {
1201 prev_tail = 0;
1202 wpos = 0;
1203 continue;
1204 }
1205
1206 /* Scan two ranges window: [wbit, zbit) and [zend, ebit) */
1207 bh = wnd_map(wnd, iw);
1208
1209 if (IS_ERR(bh)) {
1210 /* TODO: error */
1211 prev_tail = 0;
1212 wpos = 0;
1213 continue;
1214 }
1215
1216 buf = (ulong *)bh->b_data;
1217
1218 /* Scan range [wbit, zbit) */
1219 if (wpos < wzbit) {
1220 /* Scan range [wpos, zbit) */
1221 fnd = wnd_scan(buf, wbit, wpos, wzbit,
1222 to_alloc, &prev_tail,
1223 &b_pos, &b_len);
1224 if (fnd != MINUS_ONE_T) {
1225 put_bh(bh);
1226 goto found;
1227 }
1228 }
1229
1230 prev_tail = 0;
1231
1232 /* Scan range [zend, ebit) */
1233 if (wzend < wbits) {
1234 fnd = wnd_scan(buf, wbit,
1235 max(wzend, wpos), wbits,
1236 to_alloc, &prev_tail,
1237 &b_pos, &b_len);
1238 if (fnd != MINUS_ONE_T) {
1239 put_bh(bh);
1240 goto found;
1241 }
1242 }
1243
1244 wpos = 0;
1245 put_bh(bh);
1246 continue;
1247 }
1248 }
1249
1250 /* Current window does not overlap zone */
1251 if (!wpos && fbits_valid && wnd->free_bits[iw] == wbits) {
1252 /* window is empty */
1253 if (prev_tail + wbits >= to_alloc) {
1254 fnd = wbit + wpos - prev_tail;
1255 goto found;
1256 }
1257
1258 /* Increase 'prev_tail' and process next window */
1259 prev_tail += wbits;
1260 wpos = 0;
1261 continue;
1262 }
1263
1264 /* read window */
1265 bh = wnd_map(wnd, iw);
1266 if (IS_ERR(bh)) {
1267 // TODO: error
1268 prev_tail = 0;
1269 wpos = 0;
1270 continue;
1271 }
1272
1273 buf = (ulong *)bh->b_data;
1274
1275 /* Scan range [wpos, eBits) */
1276 fnd = wnd_scan(buf, wbit, wpos, wbits, to_alloc, &prev_tail,
1277 &b_pos, &b_len);
1278 put_bh(bh);
1279 if (fnd != MINUS_ONE_T)
1280 goto found;
1281 }
1282
1283 if (b_len < prev_tail) {
1284 /* The last fragment */
1285 b_len = prev_tail;
1286 b_pos = max_alloc - prev_tail;
1287 }
1288
1289 if (hint) {
1290 /*
1291 * We have scanned range [hint max_alloc)
1292 * Prepare to scan range [0 hint + to_alloc)
1293 */
1294 size_t nextmax = hint + to_alloc;
1295
1296 if (likely(nextmax >= hint) && nextmax < max_alloc)
1297 max_alloc = nextmax;
1298 hint = 0;
1299 goto Again;
1300 }
1301
1302 if (!b_len)
1303 goto no_space;
1304
1305 wnd->extent_max = b_len;
1306
1307 if (flags & BITMAP_FIND_FULL)
1308 goto no_space;
1309
1310 fnd = b_pos;
1311 to_alloc = b_len;
1312
1313found:
1314 if (flags & BITMAP_FIND_MARK_AS_USED) {
1315 /* TODO optimize remove extent (pass 'e'?) */
1316 if (wnd_set_used(wnd, fnd, to_alloc))
1317 goto no_space;
1318 } else if (wnd->extent_max != MINUS_ONE_T &&
1319 to_alloc > wnd->extent_max) {
1320 wnd->extent_max = to_alloc;
1321 }
1322
1323 *allocated = fnd;
1324 return to_alloc;
1325
1326no_space:
1327 return 0;
1328}
1329
1330/*
1331 * wnd_extend
1332 *
1333 * Extend bitmap ($MFT bitmap)
1334 */
1335int wnd_extend(struct wnd_bitmap *wnd, size_t new_bits)
1336{
1337 int err;
1338 struct super_block *sb = wnd->sb;
1339 struct ntfs_sb_info *sbi = sb->s_fs_info;
1340 u32 blocksize = sb->s_blocksize;
1341 u32 wbits = blocksize * 8;
1342 u32 b0, new_last;
1343 size_t bits, iw, new_wnd;
1344 size_t old_bits = wnd->nbits;
1345 u16 *new_free;
1346
1347 if (new_bits <= old_bits)
1348 return -EINVAL;
1349
1350 /* align to 8 byte boundary */
1351 new_wnd = bytes_to_block(sb, bitmap_size(new_bits));
1352 new_last = new_bits & (wbits - 1);
1353 if (!new_last)
1354 new_last = wbits;
1355
1356 if (new_wnd != wnd->nwnd) {
Kari Argillander195c52b2021-08-24 21:37:07 +03001357 new_free = kmalloc(new_wnd * sizeof(u16), GFP_NOFS);
Konstantin Komarov3f3b4422021-08-13 17:21:29 +03001358 if (!new_free)
1359 return -ENOMEM;
1360
1361 if (new_free != wnd->free_bits)
1362 memcpy(new_free, wnd->free_bits,
1363 wnd->nwnd * sizeof(short));
1364 memset(new_free + wnd->nwnd, 0,
1365 (new_wnd - wnd->nwnd) * sizeof(short));
Kari Argillander195c52b2021-08-24 21:37:07 +03001366 kfree(wnd->free_bits);
Konstantin Komarov3f3b4422021-08-13 17:21:29 +03001367 wnd->free_bits = new_free;
1368 }
1369
1370 /* Zero bits [old_bits,new_bits) */
1371 bits = new_bits - old_bits;
1372 b0 = old_bits & (wbits - 1);
1373
1374 for (iw = old_bits >> (sb->s_blocksize_bits + 3); bits; iw += 1) {
1375 u32 op;
1376 size_t frb;
1377 u64 vbo, lbo, bytes;
1378 struct buffer_head *bh;
1379 ulong *buf;
1380
1381 if (iw + 1 == new_wnd)
1382 wbits = new_last;
1383
1384 op = b0 + bits > wbits ? wbits - b0 : bits;
1385 vbo = (u64)iw * blocksize;
1386
1387 err = ntfs_vbo_to_lbo(sbi, &wnd->run, vbo, &lbo, &bytes);
1388 if (err)
1389 break;
1390
1391 bh = ntfs_bread(sb, lbo >> sb->s_blocksize_bits);
1392 if (!bh)
1393 return -EIO;
1394
1395 lock_buffer(bh);
1396 buf = (ulong *)bh->b_data;
1397
1398 __bitmap_clear(buf, b0, blocksize * 8 - b0);
1399 frb = wbits - __bitmap_weight(buf, wbits);
1400 wnd->total_zeroes += frb - wnd->free_bits[iw];
1401 wnd->free_bits[iw] = frb;
1402
1403 set_buffer_uptodate(bh);
1404 mark_buffer_dirty(bh);
1405 unlock_buffer(bh);
1406 /*err = sync_dirty_buffer(bh);*/
1407
1408 b0 = 0;
1409 bits -= op;
1410 }
1411
1412 wnd->nbits = new_bits;
1413 wnd->nwnd = new_wnd;
1414 wnd->bits_last = new_last;
1415
1416 wnd_add_free_ext(wnd, old_bits, new_bits - old_bits, false);
1417
1418 return 0;
1419}
1420
1421/*
1422 * wnd_zone_set
1423 */
1424void wnd_zone_set(struct wnd_bitmap *wnd, size_t lcn, size_t len)
1425{
1426 size_t zlen;
1427
1428 zlen = wnd->zone_end - wnd->zone_bit;
1429 if (zlen)
1430 wnd_add_free_ext(wnd, wnd->zone_bit, zlen, false);
1431
1432 if (!RB_EMPTY_ROOT(&wnd->start_tree) && len)
1433 wnd_remove_free_ext(wnd, lcn, len);
1434
1435 wnd->zone_bit = lcn;
1436 wnd->zone_end = lcn + len;
1437}
1438
1439int ntfs_trim_fs(struct ntfs_sb_info *sbi, struct fstrim_range *range)
1440{
1441 int err = 0;
1442 struct super_block *sb = sbi->sb;
1443 struct wnd_bitmap *wnd = &sbi->used.bitmap;
1444 u32 wbits = 8 * sb->s_blocksize;
1445 CLST len = 0, lcn = 0, done = 0;
1446 CLST minlen = bytes_to_cluster(sbi, range->minlen);
1447 CLST lcn_from = bytes_to_cluster(sbi, range->start);
1448 size_t iw = lcn_from >> (sb->s_blocksize_bits + 3);
1449 u32 wbit = lcn_from & (wbits - 1);
1450 const ulong *buf;
1451 CLST lcn_to;
1452
1453 if (!minlen)
1454 minlen = 1;
1455
1456 if (range->len == (u64)-1)
1457 lcn_to = wnd->nbits;
1458 else
1459 lcn_to = bytes_to_cluster(sbi, range->start + range->len);
1460
1461 down_read_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS);
1462
1463 for (; iw < wnd->nbits; iw++, wbit = 0) {
1464 CLST lcn_wnd = iw * wbits;
1465 struct buffer_head *bh;
1466
1467 if (lcn_wnd > lcn_to)
1468 break;
1469
1470 if (!wnd->free_bits[iw])
1471 continue;
1472
1473 if (iw + 1 == wnd->nwnd)
1474 wbits = wnd->bits_last;
1475
1476 if (lcn_wnd + wbits > lcn_to)
1477 wbits = lcn_to - lcn_wnd;
1478
1479 bh = wnd_map(wnd, iw);
1480 if (IS_ERR(bh)) {
1481 err = PTR_ERR(bh);
1482 break;
1483 }
1484
1485 buf = (ulong *)bh->b_data;
1486
1487 for (; wbit < wbits; wbit++) {
1488 if (!test_bit(wbit, buf)) {
1489 if (!len)
1490 lcn = lcn_wnd + wbit;
1491 len += 1;
1492 continue;
1493 }
1494 if (len >= minlen) {
1495 err = ntfs_discard(sbi, lcn, len);
1496 if (err)
1497 goto out;
1498 done += len;
1499 }
1500 len = 0;
1501 }
1502 put_bh(bh);
1503 }
1504
1505 /* Process the last fragment */
1506 if (len >= minlen) {
1507 err = ntfs_discard(sbi, lcn, len);
1508 if (err)
1509 goto out;
1510 done += len;
1511 }
1512
1513out:
1514 range->len = (u64)done << sbi->cluster_bits;
1515
1516 up_read(&wnd->rw_lock);
1517
1518 return err;
1519}