blob: ecb85b3d4013ecfaedf6985219c689ce3e8690cd [file] [log] [blame]
Gao Xiang29b24f62019-07-31 23:57:31 +08001// SPDX-License-Identifier: GPL-2.0-only
Gao Xiangb29e64d2018-07-26 20:21:59 +08002/*
Gao Xiangb29e64d2018-07-26 20:21:59 +08003 * Copyright (C) 2018 HUAWEI, Inc.
Alexander A. Klimov592e7cd2020-07-13 15:09:44 +02004 * https://www.huawei.com/
Gao Xiangb29e64d2018-07-26 20:21:59 +08005 * Created by Gao Xiang <gaoxiang25@huawei.com>
Gao Xiangb29e64d2018-07-26 20:21:59 +08006 */
Gao Xiangb29e64d2018-07-26 20:21:59 +08007#include "internal.h"
Gao Xiang3883a792018-07-26 20:22:06 +08008#include <linux/pagevec.h>
Gao Xiangb29e64d2018-07-26 20:21:59 +08009
Gao Xiang5ddcee12019-11-21 21:59:54 +080010struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp)
Gao Xiangb29e64d2018-07-26 20:21:59 +080011{
12 struct page *page;
13
14 if (!list_empty(pool)) {
15 page = lru_to_page(pool);
Gao Xiangb25a1512019-07-31 23:57:43 +080016 DBG_BUGON(page_ref_count(page) != 1);
Gao Xiangb29e64d2018-07-26 20:21:59 +080017 list_del(&page->lru);
18 } else {
Gao Xiang5ddcee12019-11-21 21:59:54 +080019 page = alloc_page(gfp);
Gao Xiangb29e64d2018-07-26 20:21:59 +080020 }
21 return page;
22}
23
Gao Xiangfa61a332019-06-24 15:22:53 +080024#if (EROFS_PCPUBUF_NR_PAGES > 0)
25static struct {
26 u8 data[PAGE_SIZE * EROFS_PCPUBUF_NR_PAGES];
27} ____cacheline_aligned_in_smp erofs_pcpubuf[NR_CPUS];
28
29void *erofs_get_pcpubuf(unsigned int pagenr)
30{
31 preempt_disable();
32 return &erofs_pcpubuf[smp_processor_id()].data[pagenr * PAGE_SIZE];
33}
34#endif
35
Gao Xiang22fe04a2019-07-31 23:57:39 +080036#ifdef CONFIG_EROFS_FS_ZIP
Gao Xiange7e9a302018-07-26 20:22:05 +080037/* global shrink count (for all mounted EROFS instances) */
38static atomic_long_t erofs_global_shrink_cnt;
39
Gao Xiang4501ca32019-01-16 21:10:10 +080040static int erofs_workgroup_get(struct erofs_workgroup *grp)
Gao Xiangd60eff42019-01-16 16:59:53 +080041{
42 int o;
43
44repeat:
45 o = erofs_wait_on_workgroup_freezed(grp);
Gao Xiang8d8a09b2019-08-30 00:38:27 +080046 if (o <= 0)
Gao Xiangd60eff42019-01-16 16:59:53 +080047 return -1;
48
Gao Xiang8d8a09b2019-08-30 00:38:27 +080049 if (atomic_cmpxchg(&grp->refcount, o, o + 1) != o)
Gao Xiangd60eff42019-01-16 16:59:53 +080050 goto repeat;
51
Gao Xiang4501ca32019-01-16 21:10:10 +080052 /* decrease refcount paired by erofs_workgroup_put */
Gao Xiang8d8a09b2019-08-30 00:38:27 +080053 if (o == 1)
Gao Xiang4501ca32019-01-16 21:10:10 +080054 atomic_long_dec(&erofs_global_shrink_cnt);
Gao Xiangd60eff42019-01-16 16:59:53 +080055 return 0;
56}
Gao Xiange7e9a302018-07-26 20:22:05 +080057
Gao Xiang4501ca32019-01-16 21:10:10 +080058struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
Vladimir Zapolskiy997626d2020-01-02 14:01:16 +020059 pgoff_t index)
Gao Xiange7e9a302018-07-26 20:22:05 +080060{
61 struct erofs_sb_info *sbi = EROFS_SB(sb);
62 struct erofs_workgroup *grp;
Gao Xiange7e9a302018-07-26 20:22:05 +080063
64repeat:
65 rcu_read_lock();
Gao Xiang64094a02020-02-20 10:46:42 +080066 grp = xa_load(&sbi->managed_pslots, index);
Bhanusree Pola561fb352019-03-22 10:38:16 +080067 if (grp) {
Gao Xiang4501ca32019-01-16 21:10:10 +080068 if (erofs_workgroup_get(grp)) {
Gao Xiange7e9a302018-07-26 20:22:05 +080069 /* prefer to relax rcu read side */
70 rcu_read_unlock();
71 goto repeat;
72 }
73
Gao Xiangb8e076a2018-12-11 15:17:50 +080074 DBG_BUGON(index != grp->index);
Gao Xiange7e9a302018-07-26 20:22:05 +080075 }
76 rcu_read_unlock();
77 return grp;
78}
79
Gao Xiang64094a02020-02-20 10:46:42 +080080struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb,
81 struct erofs_workgroup *grp)
Gao Xiange7e9a302018-07-26 20:22:05 +080082{
Gao Xiang64094a02020-02-20 10:46:42 +080083 struct erofs_sb_info *const sbi = EROFS_SB(sb);
84 struct erofs_workgroup *pre;
Gao Xiange7e9a302018-07-26 20:22:05 +080085
Gao Xiang51232df2018-11-23 01:16:00 +080086 /*
Gao Xiang64094a02020-02-20 10:46:42 +080087 * Bump up a reference count before making this visible
88 * to others for the XArray in order to avoid potential
89 * UAF without serialized by xa_lock.
Gao Xiang51232df2018-11-23 01:16:00 +080090 */
Gao Xiang64094a02020-02-20 10:46:42 +080091 atomic_inc(&grp->refcount);
Gao Xiange7e9a302018-07-26 20:22:05 +080092
Gao Xiang64094a02020-02-20 10:46:42 +080093repeat:
94 xa_lock(&sbi->managed_pslots);
95 pre = __xa_cmpxchg(&sbi->managed_pslots, grp->index,
96 NULL, grp, GFP_NOFS);
97 if (pre) {
98 if (xa_is_err(pre)) {
99 pre = ERR_PTR(xa_err(pre));
100 } else if (erofs_workgroup_get(pre)) {
101 /* try to legitimize the current in-tree one */
102 xa_unlock(&sbi->managed_pslots);
103 cond_resched();
104 goto repeat;
105 }
106 atomic_dec(&grp->refcount);
107 grp = pre;
108 }
109 xa_unlock(&sbi->managed_pslots);
110 return grp;
Gao Xiange7e9a302018-07-26 20:22:05 +0800111}
112
Gao Xiang51232df2018-11-23 01:16:00 +0800113static void __erofs_workgroup_free(struct erofs_workgroup *grp)
114{
115 atomic_long_dec(&erofs_global_shrink_cnt);
116 erofs_workgroup_free_rcu(grp);
117}
118
Gao Xiang3883a792018-07-26 20:22:06 +0800119int erofs_workgroup_put(struct erofs_workgroup *grp)
120{
121 int count = atomic_dec_return(&grp->refcount);
122
123 if (count == 1)
124 atomic_long_inc(&erofs_global_shrink_cnt);
Gao Xiang51232df2018-11-23 01:16:00 +0800125 else if (!count)
126 __erofs_workgroup_free(grp);
Gao Xiang3883a792018-07-26 20:22:06 +0800127 return count;
128}
129
Gao Xiang51232df2018-11-23 01:16:00 +0800130static void erofs_workgroup_unfreeze_final(struct erofs_workgroup *grp)
131{
132 erofs_workgroup_unfreeze(grp, 0);
133 __erofs_workgroup_free(grp);
134}
135
Jeremy Sowden0a64d622019-01-08 11:31:47 +0000136static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
Gao Xiangbda17a42019-10-08 20:56:13 +0800137 struct erofs_workgroup *grp)
Gao Xiang51232df2018-11-23 01:16:00 +0800138{
139 /*
Gao Xiang2bb90cc2019-07-31 23:57:50 +0800140 * If managed cache is on, refcount of workgroups
141 * themselves could be < 0 (freezed). In other words,
142 * there is no guarantee that all refcounts > 0.
Gao Xiang51232df2018-11-23 01:16:00 +0800143 */
144 if (!erofs_workgroup_try_to_freeze(grp, 1))
145 return false;
146
147 /*
Gao Xiang2bb90cc2019-07-31 23:57:50 +0800148 * Note that all cached pages should be unattached
Gao Xiang64094a02020-02-20 10:46:42 +0800149 * before deleted from the XArray. Otherwise some
Gao Xiang2bb90cc2019-07-31 23:57:50 +0800150 * cached pages could be still attached to the orphan
151 * old workgroup when the new one is available in the tree.
Gao Xiang51232df2018-11-23 01:16:00 +0800152 */
153 if (erofs_try_to_free_all_cached_pages(sbi, grp)) {
154 erofs_workgroup_unfreeze(grp, 1);
155 return false;
156 }
157
158 /*
Gao Xiang2bb90cc2019-07-31 23:57:50 +0800159 * It's impossible to fail after the workgroup is freezed,
Gao Xiang51232df2018-11-23 01:16:00 +0800160 * however in order to avoid some race conditions, add a
161 * DBG_BUGON to observe this in advance.
162 */
Gao Xiang64094a02020-02-20 10:46:42 +0800163 DBG_BUGON(xa_erase(&sbi->managed_pslots, grp->index) != grp);
Gao Xiang51232df2018-11-23 01:16:00 +0800164
165 /*
Gao Xiang2bb90cc2019-07-31 23:57:50 +0800166 * If managed cache is on, last refcount should indicate
167 * the related workstation.
Gao Xiang51232df2018-11-23 01:16:00 +0800168 */
169 erofs_workgroup_unfreeze_final(grp);
170 return true;
171}
172
Gao Xiang22fe04a2019-07-31 23:57:39 +0800173static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
Gao Xiangbda17a42019-10-08 20:56:13 +0800174 unsigned long nr_shrink)
Gao Xiange7e9a302018-07-26 20:22:05 +0800175{
Gao Xiang64094a02020-02-20 10:46:42 +0800176 struct erofs_workgroup *grp;
Thomas Weißschuh7dd68b12018-09-10 21:41:14 +0200177 unsigned int freed = 0;
Gao Xiang64094a02020-02-20 10:46:42 +0800178 unsigned long index;
Gao Xiang3883a792018-07-26 20:22:06 +0800179
Gao Xiang64094a02020-02-20 10:46:42 +0800180 xa_for_each(&sbi->managed_pslots, index, grp) {
Gao Xiang51232df2018-11-23 01:16:00 +0800181 /* try to shrink each valid workgroup */
Gao Xiangbda17a42019-10-08 20:56:13 +0800182 if (!erofs_try_to_release_workgroup(sbi, grp))
Gao Xiang3883a792018-07-26 20:22:06 +0800183 continue;
184
Gao Xiang3883a792018-07-26 20:22:06 +0800185 ++freed;
Gao Xiang8d8a09b2019-08-30 00:38:27 +0800186 if (!--nr_shrink)
Gao Xiang3883a792018-07-26 20:22:06 +0800187 break;
188 }
Gao Xiang3883a792018-07-26 20:22:06 +0800189 return freed;
Gao Xiange7e9a302018-07-26 20:22:05 +0800190}
191
Gao Xianga1581312018-07-26 20:22:04 +0800192/* protected by 'erofs_sb_list_lock' */
193static unsigned int shrinker_run_no;
194
195/* protects the mounted 'erofs_sb_list' */
196static DEFINE_SPINLOCK(erofs_sb_list_lock);
Gao Xiang2497ee42018-07-26 20:22:03 +0800197static LIST_HEAD(erofs_sb_list);
198
Gao Xiang22fe04a2019-07-31 23:57:39 +0800199void erofs_shrinker_register(struct super_block *sb)
Gao Xiang2497ee42018-07-26 20:22:03 +0800200{
Gao Xianga1581312018-07-26 20:22:04 +0800201 struct erofs_sb_info *sbi = EROFS_SB(sb);
202
203 mutex_init(&sbi->umount_mutex);
204
205 spin_lock(&erofs_sb_list_lock);
206 list_add(&sbi->list, &erofs_sb_list);
207 spin_unlock(&erofs_sb_list_lock);
Gao Xiang2497ee42018-07-26 20:22:03 +0800208}
209
Gao Xiang22fe04a2019-07-31 23:57:39 +0800210void erofs_shrinker_unregister(struct super_block *sb)
Gao Xiang2497ee42018-07-26 20:22:03 +0800211{
Gao Xiang22fe04a2019-07-31 23:57:39 +0800212 struct erofs_sb_info *const sbi = EROFS_SB(sb);
213
214 mutex_lock(&sbi->umount_mutex);
Gao Xiangbda17a42019-10-08 20:56:13 +0800215 /* clean up all remaining workgroups in memory */
216 erofs_shrink_workstation(sbi, ~0UL);
Gao Xiang22fe04a2019-07-31 23:57:39 +0800217
Gao Xianga1581312018-07-26 20:22:04 +0800218 spin_lock(&erofs_sb_list_lock);
Gao Xiang22fe04a2019-07-31 23:57:39 +0800219 list_del(&sbi->list);
Gao Xianga1581312018-07-26 20:22:04 +0800220 spin_unlock(&erofs_sb_list_lock);
Gao Xiang22fe04a2019-07-31 23:57:39 +0800221 mutex_unlock(&sbi->umount_mutex);
Gao Xianga1581312018-07-26 20:22:04 +0800222}
223
Gao Xiangd55bc7b2019-01-16 16:59:55 +0800224static unsigned long erofs_shrink_count(struct shrinker *shrink,
225 struct shrink_control *sc)
Gao Xianga1581312018-07-26 20:22:04 +0800226{
227 return atomic_long_read(&erofs_global_shrink_cnt);
228}
229
Gao Xiangd55bc7b2019-01-16 16:59:55 +0800230static unsigned long erofs_shrink_scan(struct shrinker *shrink,
231 struct shrink_control *sc)
Gao Xianga1581312018-07-26 20:22:04 +0800232{
233 struct erofs_sb_info *sbi;
234 struct list_head *p;
235
236 unsigned long nr = sc->nr_to_scan;
237 unsigned int run_no;
238 unsigned long freed = 0;
239
240 spin_lock(&erofs_sb_list_lock);
Gao Xiang2bb90cc2019-07-31 23:57:50 +0800241 do {
Gao Xianga1581312018-07-26 20:22:04 +0800242 run_no = ++shrinker_run_no;
Gao Xiang2bb90cc2019-07-31 23:57:50 +0800243 } while (run_no == 0);
Gao Xianga1581312018-07-26 20:22:04 +0800244
245 /* Iterate over all mounted superblocks and try to shrink them */
246 p = erofs_sb_list.next;
247 while (p != &erofs_sb_list) {
248 sbi = list_entry(p, struct erofs_sb_info, list);
249
250 /*
251 * We move the ones we do to the end of the list, so we stop
252 * when we see one we have already done.
253 */
254 if (sbi->shrinker_run_no == run_no)
255 break;
256
257 if (!mutex_trylock(&sbi->umount_mutex)) {
258 p = p->next;
259 continue;
260 }
261
262 spin_unlock(&erofs_sb_list_lock);
263 sbi->shrinker_run_no = run_no;
264
Gao Xiang9d5a09c2020-02-26 16:10:06 +0800265 freed += erofs_shrink_workstation(sbi, nr - freed);
Gao Xianga1581312018-07-26 20:22:04 +0800266
267 spin_lock(&erofs_sb_list_lock);
268 /* Get the next list element before we move this one */
269 p = p->next;
270
271 /*
272 * Move this one to the end of the list to provide some
273 * fairness.
274 */
275 list_move_tail(&sbi->list, &erofs_sb_list);
276 mutex_unlock(&sbi->umount_mutex);
277
278 if (freed >= nr)
279 break;
280 }
281 spin_unlock(&erofs_sb_list_lock);
282 return freed;
Gao Xiang2497ee42018-07-26 20:22:03 +0800283}
284
Gao Xiang22fe04a2019-07-31 23:57:39 +0800285static struct shrinker erofs_shrinker_info = {
Gao Xiangd55bc7b2019-01-16 16:59:55 +0800286 .scan_objects = erofs_shrink_scan,
287 .count_objects = erofs_shrink_count,
288 .seeks = DEFAULT_SEEKS,
289};
290
Gao Xiang22fe04a2019-07-31 23:57:39 +0800291int __init erofs_init_shrinker(void)
292{
293 return register_shrinker(&erofs_shrinker_info);
294}
295
296void erofs_exit_shrinker(void)
297{
298 unregister_shrinker(&erofs_shrinker_info);
299}
300#endif /* !CONFIG_EROFS_FS_ZIP */
301