blob: d00a6f20ac7b69bc7746f14d72fb2bc81f73e562 [file] [log] [blame]
Thomas Gleixner2b27bdc2019-05-29 16:57:50 -07001// SPDX-License-Identifier: GPL-2.0-only
Artem Bityutskiy1e517642008-07-14 19:08:37 +03002/*
3 * This file is part of UBIFS.
4 *
5 * Copyright (C) 2006-2008 Nokia Corporation.
6 *
Artem Bityutskiy1e517642008-07-14 19:08:37 +03007 * Authors: Artem Bityutskiy (Битюцкий Артём)
8 * Adrian Hunter
9 */
10
11/*
12 * This file implements UBIFS shrinker which evicts clean znodes from the TNC
13 * tree when Linux VM needs more RAM.
14 *
15 * We do not implement any LRU lists to find oldest znodes to free because it
16 * would add additional overhead to the file system fast paths. So the shrinker
17 * just walks the TNC tree when searching for znodes to free.
18 *
19 * If the root of a TNC sub-tree is clean and old enough, then the children are
20 * also clean and old enough. So the shrinker walks the TNC in level order and
21 * dumps entire sub-trees.
22 *
23 * The age of znodes is just the time-stamp when they were last looked at.
24 * The current shrinker first tries to evict old znodes, then young ones.
25 *
26 * Since the shrinker is global, it has to protect against races with FS
27 * un-mounts, which is done by the 'ubifs_infos_lock' and 'c->umount_mutex'.
28 */
29
30#include "ubifs.h"
31
32/* List of all UBIFS file-system instances */
33LIST_HEAD(ubifs_infos);
34
35/*
36 * We number each shrinker run and record the number on the ubifs_info structure
37 * so that we can easily work out which ubifs_info structures have already been
38 * done by the current run.
39 */
40static unsigned int shrinker_run_no;
41
42/* Protects 'ubifs_infos' list */
43DEFINE_SPINLOCK(ubifs_infos_lock);
44
45/* Global clean znode counter (for all mounted UBIFS instances) */
46atomic_long_t ubifs_clean_zn_cnt;
47
48/**
49 * shrink_tnc - shrink TNC tree.
50 * @c: UBIFS file-system description object
51 * @nr: number of znodes to free
52 * @age: the age of znodes to free
53 * @contention: if any contention, this is set to %1
54 *
55 * This function traverses TNC tree and frees clean znodes. It does not free
56 * clean znodes which younger then @age. Returns number of freed znodes.
57 */
58static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention)
59{
60 int total_freed = 0;
61 struct ubifs_znode *znode, *zprev;
Arnd Bergmann6cff5732018-07-13 16:31:56 +020062 time64_t time = ktime_get_seconds();
Artem Bityutskiy1e517642008-07-14 19:08:37 +030063
Richard Weinberger6eb61d52018-07-12 13:01:57 +020064 ubifs_assert(c, mutex_is_locked(&c->umount_mutex));
65 ubifs_assert(c, mutex_is_locked(&c->tnc_mutex));
Artem Bityutskiy1e517642008-07-14 19:08:37 +030066
67 if (!c->zroot.znode || atomic_long_read(&c->clean_zn_cnt) == 0)
68 return 0;
69
70 /*
71 * Traverse the TNC tree in levelorder manner, so that it is possible
72 * to destroy large sub-trees. Indeed, if a znode is old, then all its
73 * children are older or of the same age.
74 *
75 * Note, we are holding 'c->tnc_mutex', so we do not have to lock the
76 * 'c->space_lock' when _reading_ 'c->clean_zn_cnt', because it is
77 * changed only when the 'c->tnc_mutex' is held.
78 */
79 zprev = NULL;
Richard Weinberger6eb61d52018-07-12 13:01:57 +020080 znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, NULL);
Artem Bityutskiy1e517642008-07-14 19:08:37 +030081 while (znode && total_freed < nr &&
82 atomic_long_read(&c->clean_zn_cnt) > 0) {
83 int freed;
84
85 /*
86 * If the znode is clean, but it is in the 'c->cnext' list, this
87 * means that this znode has just been written to flash as a
88 * part of commit and was marked clean. They will be removed
89 * from the list at end commit. We cannot change the list,
90 * because it is not protected by any mutex (design decision to
91 * make commit really independent and parallel to main I/O). So
92 * we just skip these znodes.
93 *
94 * Note, the 'clean_zn_cnt' counters are not updated until
95 * after the commit, so the UBIFS shrinker does not report
96 * the znodes which are in the 'c->cnext' list as freeable.
97 *
98 * Also note, if the root of a sub-tree is not in 'c->cnext',
99 * then the whole sub-tree is not in 'c->cnext' as well, so it
100 * is safe to dump whole sub-tree.
101 */
102
103 if (znode->cnext) {
104 /*
105 * Very soon these znodes will be removed from the list
106 * and become freeable.
107 */
108 *contention = 1;
109 } else if (!ubifs_zn_dirty(znode) &&
110 abs(time - znode->time) >= age) {
111 if (znode->parent)
112 znode->parent->zbranch[znode->iip].znode = NULL;
113 else
114 c->zroot.znode = NULL;
115
Richard Weinberger6eb61d52018-07-12 13:01:57 +0200116 freed = ubifs_destroy_tnc_subtree(c, znode);
Artem Bityutskiy1e517642008-07-14 19:08:37 +0300117 atomic_long_sub(freed, &ubifs_clean_zn_cnt);
118 atomic_long_sub(freed, &c->clean_zn_cnt);
Artem Bityutskiy1e517642008-07-14 19:08:37 +0300119 total_freed += freed;
120 znode = zprev;
121 }
122
123 if (unlikely(!c->zroot.znode))
124 break;
125
126 zprev = znode;
Richard Weinberger6eb61d52018-07-12 13:01:57 +0200127 znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, znode);
Artem Bityutskiy1e517642008-07-14 19:08:37 +0300128 cond_resched();
129 }
130
131 return total_freed;
132}
133
134/**
135 * shrink_tnc_trees - shrink UBIFS TNC trees.
136 * @nr: number of znodes to free
137 * @age: the age of znodes to free
138 * @contention: if any contention, this is set to %1
139 *
140 * This function walks the list of mounted UBIFS file-systems and frees clean
Frederik Schwarzer025dfda2008-10-16 19:02:37 +0200141 * znodes which are older than @age, until at least @nr znodes are freed.
Artem Bityutskiy1e517642008-07-14 19:08:37 +0300142 * Returns the number of freed znodes.
143 */
144static int shrink_tnc_trees(int nr, int age, int *contention)
145{
146 struct ubifs_info *c;
147 struct list_head *p;
148 unsigned int run_no;
149 int freed = 0;
150
151 spin_lock(&ubifs_infos_lock);
152 do {
153 run_no = ++shrinker_run_no;
154 } while (run_no == 0);
155 /* Iterate over all mounted UBIFS file-systems and try to shrink them */
156 p = ubifs_infos.next;
157 while (p != &ubifs_infos) {
158 c = list_entry(p, struct ubifs_info, infos_list);
159 /*
160 * We move the ones we do to the end of the list, so we stop
161 * when we see one we have already done.
162 */
163 if (c->shrinker_run_no == run_no)
164 break;
165 if (!mutex_trylock(&c->umount_mutex)) {
166 /* Some un-mount is in progress, try next FS */
167 *contention = 1;
168 p = p->next;
169 continue;
170 }
171 /*
172 * We're holding 'c->umount_mutex', so the file-system won't go
173 * away.
174 */
175 if (!mutex_trylock(&c->tnc_mutex)) {
176 mutex_unlock(&c->umount_mutex);
177 *contention = 1;
178 p = p->next;
179 continue;
180 }
181 spin_unlock(&ubifs_infos_lock);
182 /*
183 * OK, now we have TNC locked, the file-system cannot go away -
184 * it is safe to reap the cache.
185 */
186 c->shrinker_run_no = run_no;
187 freed += shrink_tnc(c, nr, age, contention);
188 mutex_unlock(&c->tnc_mutex);
189 spin_lock(&ubifs_infos_lock);
190 /* Get the next list element before we move this one */
191 p = p->next;
192 /*
193 * Move this one to the end of the list to provide some
194 * fairness.
195 */
Eric Sesterhennec328162009-02-13 09:13:11 +0100196 list_move_tail(&c->infos_list, &ubifs_infos);
Artem Bityutskiy1e517642008-07-14 19:08:37 +0300197 mutex_unlock(&c->umount_mutex);
198 if (freed >= nr)
199 break;
200 }
201 spin_unlock(&ubifs_infos_lock);
202 return freed;
203}
204
205/**
206 * kick_a_thread - kick a background thread to start commit.
207 *
208 * This function kicks a background thread to start background commit. Returns
209 * %-1 if a thread was kicked or there is another reason to assume the memory
210 * will soon be freed or become freeable. If there are no dirty znodes, returns
211 * %0.
212 */
213static int kick_a_thread(void)
214{
215 int i;
216 struct ubifs_info *c;
217
218 /*
219 * Iterate over all mounted UBIFS file-systems and find out if there is
220 * already an ongoing commit operation there. If no, then iterate for
221 * the second time and initiate background commit.
222 */
223 spin_lock(&ubifs_infos_lock);
224 for (i = 0; i < 2; i++) {
225 list_for_each_entry(c, &ubifs_infos, infos_list) {
226 long dirty_zn_cnt;
227
228 if (!mutex_trylock(&c->umount_mutex)) {
229 /*
230 * Some un-mount is in progress, it will
231 * certainly free memory, so just return.
232 */
233 spin_unlock(&ubifs_infos_lock);
234 return -1;
235 }
236
237 dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt);
238
239 if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN ||
Artem Bityutskiy2ef13292010-09-19 18:34:26 +0300240 c->ro_mount || c->ro_error) {
Artem Bityutskiy1e517642008-07-14 19:08:37 +0300241 mutex_unlock(&c->umount_mutex);
242 continue;
243 }
244
245 if (c->cmt_state != COMMIT_RESTING) {
246 spin_unlock(&ubifs_infos_lock);
247 mutex_unlock(&c->umount_mutex);
248 return -1;
249 }
250
251 if (i == 1) {
Eric Sesterhennec328162009-02-13 09:13:11 +0100252 list_move_tail(&c->infos_list, &ubifs_infos);
Artem Bityutskiy1e517642008-07-14 19:08:37 +0300253 spin_unlock(&ubifs_infos_lock);
254
255 ubifs_request_bg_commit(c);
256 mutex_unlock(&c->umount_mutex);
257 return -1;
258 }
259 mutex_unlock(&c->umount_mutex);
260 }
261 }
262 spin_unlock(&ubifs_infos_lock);
263
264 return 0;
265}
266
Dave Chinner1ab6c492013-08-28 10:18:09 +1000267unsigned long ubifs_shrink_count(struct shrinker *shrink,
268 struct shrink_control *sc)
Artem Bityutskiy1e517642008-07-14 19:08:37 +0300269{
Artem Bityutskiy1e517642008-07-14 19:08:37 +0300270 long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
271
Dave Chinner1ab6c492013-08-28 10:18:09 +1000272 /*
273 * Due to the way UBIFS updates the clean znode counter it may
274 * temporarily be negative.
275 */
276 return clean_zn_cnt >= 0 ? clean_zn_cnt : 1;
277}
278
279unsigned long ubifs_shrink_scan(struct shrinker *shrink,
280 struct shrink_control *sc)
281{
282 unsigned long nr = sc->nr_to_scan;
283 int contention = 0;
284 unsigned long freed;
285 long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
Artem Bityutskiy1e517642008-07-14 19:08:37 +0300286
287 if (!clean_zn_cnt) {
288 /*
289 * No clean znodes, nothing to reap. All we can do in this case
290 * is to kick background threads to start commit, which will
291 * probably make clean znodes which, in turn, will be freeable.
292 * And we return -1 which means will make VM call us again
293 * later.
294 */
295 dbg_tnc("no clean znodes, kick a thread");
296 return kick_a_thread();
297 }
298
299 freed = shrink_tnc_trees(nr, OLD_ZNODE_AGE, &contention);
300 if (freed >= nr)
301 goto out;
302
303 dbg_tnc("not enough old znodes, try to free young ones");
304 freed += shrink_tnc_trees(nr - freed, YOUNG_ZNODE_AGE, &contention);
305 if (freed >= nr)
306 goto out;
307
308 dbg_tnc("not enough young znodes, free all");
309 freed += shrink_tnc_trees(nr - freed, 0, &contention);
310
311 if (!freed && contention) {
312 dbg_tnc("freed nothing, but contention");
Dave Chinner1ab6c492013-08-28 10:18:09 +1000313 return SHRINK_STOP;
Artem Bityutskiy1e517642008-07-14 19:08:37 +0300314 }
315
316out:
Dave Chinner1ab6c492013-08-28 10:18:09 +1000317 dbg_tnc("%lu znodes were freed, requested %lu", freed, nr);
Artem Bityutskiy1e517642008-07-14 19:08:37 +0300318 return freed;
319}