blob: 6cb356c4217b26d32c47fda6080b8a47770d3aa5 [file] [log] [blame]
Gao Xiang29b24f62019-07-31 23:57:31 +08001// SPDX-License-Identifier: GPL-2.0-only
Gao Xiang02827e12018-07-26 20:21:58 +08002/*
Gao Xiang02827e12018-07-26 20:21:58 +08003 * Copyright (C) 2018 HUAWEI, Inc.
Alexander A. Klimov592e7cd2020-07-13 15:09:44 +02004 * https://www.huawei.com/
Gao Xiang02827e12018-07-26 20:21:58 +08005 * Created by Gao Xiang <gaoxiang25@huawei.com>
Gao Xiang02827e12018-07-26 20:21:58 +08006 */
Gao Xiang57b78c92019-07-31 23:57:32 +08007#include "zdata.h"
Gao Xiang27481232019-06-24 15:22:54 +08008#include "compress.h"
Gao Xiang3883a792018-07-26 20:22:06 +08009#include <linux/prefetch.h>
10
Chen Gong284db122018-09-18 22:27:27 +080011#include <trace/events/erofs.h>
12
Gao Xiang672e5472018-12-08 00:19:14 +080013/*
14 * a compressed_pages[] placeholder in order to avoid
15 * being filled with file pages for in-place decompression.
16 */
17#define PAGE_UNALLOCATED ((void *)0x5F0E4B1D)
18
Gao Xiang97e86a82019-07-31 23:57:47 +080019/* how to allocate cached pages for a pcluster */
Gao Xiang92e6efd2018-12-08 00:19:16 +080020enum z_erofs_cache_alloctype {
21 DONTALLOC, /* don't allocate any cached pages */
22 DELAYEDALLOC, /* delayed allocation (at the time of submitting io) */
Gao Xiang1825c8d2020-12-09 20:37:17 +080023 /*
24 * try to use cached I/O if page allocation succeeds or fallback
25 * to in-place I/O instead to avoid any direct reclaim.
26 */
27 TRYALLOC,
Gao Xiang92e6efd2018-12-08 00:19:16 +080028};
29
30/*
31 * tagged pointer with 1-bit tag for all compressed pages
32 * tag 0 - the page is just found with an extra page reference
33 */
34typedef tagptr1_t compressed_page_t;
35
36#define tag_compressed_page_justfound(page) \
37 tagptr_fold(compressed_page_t, page, 1)
38
Gao Xiang3883a792018-07-26 20:22:06 +080039static struct workqueue_struct *z_erofs_workqueue __read_mostly;
Gao Xiang97e86a82019-07-31 23:57:47 +080040static struct kmem_cache *pcluster_cachep __read_mostly;
Gao Xiang3883a792018-07-26 20:22:06 +080041
42void z_erofs_exit_zip_subsystem(void)
43{
Gao Xiang3883a792018-07-26 20:22:06 +080044 destroy_workqueue(z_erofs_workqueue);
Gao Xiang97e86a82019-07-31 23:57:47 +080045 kmem_cache_destroy(pcluster_cachep);
Gao Xiang3883a792018-07-26 20:22:06 +080046}
47
Gao Xiang99634bf2019-09-04 10:09:05 +080048static inline int z_erofs_init_workqueue(void)
Gao Xiang3883a792018-07-26 20:22:06 +080049{
Thomas Weißschuh7dd68b12018-09-10 21:41:14 +020050 const unsigned int onlinecpus = num_possible_cpus();
Gao Xiang3883a792018-07-26 20:22:06 +080051
52 /*
Gao Xiang97e86a82019-07-31 23:57:47 +080053 * no need to spawn too many threads, limiting threads could minimum
54 * scheduling overhead, perhaps per-CPU threads should be better?
Gao Xiang3883a792018-07-26 20:22:06 +080055 */
Gao Xiang0e62ea32020-07-31 10:40:49 +080056 z_erofs_workqueue = alloc_workqueue("erofs_unzipd",
57 WQ_UNBOUND | WQ_HIGHPRI,
Gao Xiang97e86a82019-07-31 23:57:47 +080058 onlinecpus + onlinecpus / 4);
Cristian Sicilia42d40b42018-11-12 21:43:57 +010059 return z_erofs_workqueue ? 0 : -ENOMEM;
Gao Xiang3883a792018-07-26 20:22:06 +080060}
61
Gao Xiang99634bf2019-09-04 10:09:05 +080062static void z_erofs_pcluster_init_once(void *ptr)
Gao Xiang48d4bf32018-11-23 01:21:46 +080063{
Gao Xiang97e86a82019-07-31 23:57:47 +080064 struct z_erofs_pcluster *pcl = ptr;
65 struct z_erofs_collection *cl = z_erofs_primarycollection(pcl);
Gao Xiang48d4bf32018-11-23 01:21:46 +080066 unsigned int i;
67
Gao Xiang97e86a82019-07-31 23:57:47 +080068 mutex_init(&cl->lock);
69 cl->nr_pages = 0;
70 cl->vcnt = 0;
Gao Xiang48d4bf32018-11-23 01:21:46 +080071 for (i = 0; i < Z_EROFS_CLUSTER_MAX_PAGES; ++i)
Gao Xiang97e86a82019-07-31 23:57:47 +080072 pcl->compressed_pages[i] = NULL;
Gao Xiang48d4bf32018-11-23 01:21:46 +080073}
74
Gao Xiang0a0b7e62018-10-09 21:43:53 +080075int __init z_erofs_init_zip_subsystem(void)
Gao Xiang3883a792018-07-26 20:22:06 +080076{
Gao Xiang97e86a82019-07-31 23:57:47 +080077 pcluster_cachep = kmem_cache_create("erofs_compress",
78 Z_EROFS_WORKGROUP_SIZE, 0,
Gao Xiang99634bf2019-09-04 10:09:05 +080079 SLAB_RECLAIM_ACCOUNT,
80 z_erofs_pcluster_init_once);
Gao Xiang97e86a82019-07-31 23:57:47 +080081 if (pcluster_cachep) {
Gao Xiang99634bf2019-09-04 10:09:05 +080082 if (!z_erofs_init_workqueue())
Gao Xiang3883a792018-07-26 20:22:06 +080083 return 0;
84
Gao Xiang97e86a82019-07-31 23:57:47 +080085 kmem_cache_destroy(pcluster_cachep);
Gao Xiang3883a792018-07-26 20:22:06 +080086 }
87 return -ENOMEM;
88}
89
Gao Xiang97e86a82019-07-31 23:57:47 +080090enum z_erofs_collectmode {
91 COLLECT_SECONDARY,
92 COLLECT_PRIMARY,
Gao Xiang3883a792018-07-26 20:22:06 +080093 /*
Gao Xiang97e86a82019-07-31 23:57:47 +080094 * The current collection was the tail of an exist chain, in addition
95 * that the previous processed chained collections are all decided to
96 * be hooked up to it.
97 * A new chain will be created for the remaining collections which are
98 * not processed yet, therefore different from COLLECT_PRIMARY_FOLLOWED,
99 * the next collection cannot reuse the whole page safely in
100 * the following scenario:
Gao Xianga1121522019-02-27 13:33:32 +0800101 * ________________________________________________________________
102 * | tail (partial) page | head (partial) page |
Gao Xiang97e86a82019-07-31 23:57:47 +0800103 * | (belongs to the next cl) | (belongs to the current cl) |
Gao Xianga1121522019-02-27 13:33:32 +0800104 * |_______PRIMARY_FOLLOWED_______|________PRIMARY_HOOKED___________|
105 */
Gao Xiang97e86a82019-07-31 23:57:47 +0800106 COLLECT_PRIMARY_HOOKED,
107 COLLECT_PRIMARY_FOLLOWED_NOINPLACE,
Gao Xianga1121522019-02-27 13:33:32 +0800108 /*
Gao Xiang97e86a82019-07-31 23:57:47 +0800109 * The current collection has been linked with the owned chain, and
110 * could also be linked with the remaining collections, which means
111 * if the processing page is the tail page of the collection, thus
112 * the current collection can safely use the whole page (since
113 * the previous collection is under control) for in-place I/O, as
114 * illustrated below:
Gao Xianga1121522019-02-27 13:33:32 +0800115 * ________________________________________________________________
Gao Xiang97e86a82019-07-31 23:57:47 +0800116 * | tail (partial) page | head (partial) page |
117 * | (of the current cl) | (of the previous collection) |
118 * | PRIMARY_FOLLOWED or | |
119 * |_____PRIMARY_HOOKED___|____________PRIMARY_FOLLOWED____________|
Gao Xianga1121522019-02-27 13:33:32 +0800120 *
Gao Xiang97e86a82019-07-31 23:57:47 +0800121 * [ (*) the above page can be used as inplace I/O. ]
Gao Xiang3883a792018-07-26 20:22:06 +0800122 */
Gao Xiang97e86a82019-07-31 23:57:47 +0800123 COLLECT_PRIMARY_FOLLOWED,
Gao Xiang3883a792018-07-26 20:22:06 +0800124};
125
Gao Xiang97e86a82019-07-31 23:57:47 +0800126struct z_erofs_collector {
Gao Xiang3883a792018-07-26 20:22:06 +0800127 struct z_erofs_pagevec_ctor vector;
128
Gao Xiangbfc4ccb2019-08-21 11:09:08 +0800129 struct z_erofs_pcluster *pcl, *tailpcl;
Gao Xiang97e86a82019-07-31 23:57:47 +0800130 struct z_erofs_collection *cl;
131 struct page **compressedpages;
132 z_erofs_next_pcluster_t owned_head;
133
134 enum z_erofs_collectmode mode;
Gao Xiang3883a792018-07-26 20:22:06 +0800135};
136
Gao Xiang97e86a82019-07-31 23:57:47 +0800137struct z_erofs_decompress_frontend {
138 struct inode *const inode;
139
140 struct z_erofs_collector clt;
141 struct erofs_map_blocks map;
142
Gao Xiang6ea5aad2020-09-19 15:27:30 +0800143 bool readahead;
Gao Xiang97e86a82019-07-31 23:57:47 +0800144 /* used for applying cache strategy on the fly */
145 bool backmost;
146 erofs_off_t headoffset;
147};
148
149#define COLLECTOR_INIT() { \
150 .owned_head = Z_EROFS_PCLUSTER_TAIL, \
151 .mode = COLLECT_PRIMARY_FOLLOWED }
152
153#define DECOMPRESS_FRONTEND_INIT(__i) { \
154 .inode = __i, .clt = COLLECTOR_INIT(), \
155 .backmost = true, }
156
157static struct page *z_pagemap_global[Z_EROFS_VMAP_GLOBAL_PAGES];
158static DEFINE_MUTEX(z_pagemap_global_lock);
Gao Xiang3883a792018-07-26 20:22:06 +0800159
Gao Xiang97e86a82019-07-31 23:57:47 +0800160static void preload_compressed_pages(struct z_erofs_collector *clt,
Gao Xiang92e6efd2018-12-08 00:19:16 +0800161 struct address_space *mc,
Gao Xiang1825c8d2020-12-09 20:37:17 +0800162 enum z_erofs_cache_alloctype type,
163 struct list_head *pagepool)
Gao Xiang105d4ad2018-07-26 20:22:07 +0800164{
Gao Xiang97e86a82019-07-31 23:57:47 +0800165 const struct z_erofs_pcluster *pcl = clt->pcl;
166 const unsigned int clusterpages = BIT(pcl->clusterbits);
167 struct page **pages = clt->compressedpages;
168 pgoff_t index = pcl->obj.index + (pages - pcl->compressed_pages);
Gao Xiang92e6efd2018-12-08 00:19:16 +0800169 bool standalone = true;
Gao Xiang1825c8d2020-12-09 20:37:17 +0800170 gfp_t gfp = (mapping_gfp_mask(mc) & ~__GFP_DIRECT_RECLAIM) |
171 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
Gao Xiang105d4ad2018-07-26 20:22:07 +0800172
Gao Xiang97e86a82019-07-31 23:57:47 +0800173 if (clt->mode < COLLECT_PRIMARY_FOLLOWED)
Gao Xiang92e6efd2018-12-08 00:19:16 +0800174 return;
Gao Xiang105d4ad2018-07-26 20:22:07 +0800175
Gao Xiang97e86a82019-07-31 23:57:47 +0800176 for (; pages < pcl->compressed_pages + clusterpages; ++pages) {
Gao Xiang92e6efd2018-12-08 00:19:16 +0800177 struct page *page;
178 compressed_page_t t;
Gao Xiang1825c8d2020-12-09 20:37:17 +0800179 struct page *newpage = NULL;
Gao Xiang92e6efd2018-12-08 00:19:16 +0800180
181 /* the compressed page was loaded before */
Gao Xiang97e86a82019-07-31 23:57:47 +0800182 if (READ_ONCE(*pages))
Gao Xiang105d4ad2018-07-26 20:22:07 +0800183 continue;
184
Gao Xiang97e86a82019-07-31 23:57:47 +0800185 page = find_get_page(mc, index);
Gao Xiang92e6efd2018-12-08 00:19:16 +0800186
187 if (page) {
188 t = tag_compressed_page_justfound(page);
189 } else if (type == DELAYEDALLOC) {
190 t = tagptr_init(compressed_page_t, PAGE_UNALLOCATED);
Gao Xiang1825c8d2020-12-09 20:37:17 +0800191 } else if (type == TRYALLOC) {
192 newpage = erofs_allocpage(pagepool, gfp);
193 if (!newpage)
194 goto dontalloc;
195
196 set_page_private(newpage, Z_EROFS_PREALLOCATED_PAGE);
197 t = tag_compressed_page_justfound(newpage);
Gao Xiang92e6efd2018-12-08 00:19:16 +0800198 } else { /* DONTALLOC */
Gao Xiang1825c8d2020-12-09 20:37:17 +0800199dontalloc:
Gao Xiang92e6efd2018-12-08 00:19:16 +0800200 if (standalone)
Gao Xiang97e86a82019-07-31 23:57:47 +0800201 clt->compressedpages = pages;
Gao Xiang92e6efd2018-12-08 00:19:16 +0800202 standalone = false;
203 continue;
Gao Xiang105d4ad2018-07-26 20:22:07 +0800204 }
205
Gao Xiang97e86a82019-07-31 23:57:47 +0800206 if (!cmpxchg_relaxed(pages, NULL, tagptr_cast_ptr(t)))
Gao Xiang105d4ad2018-07-26 20:22:07 +0800207 continue;
208
Gao Xiang1825c8d2020-12-09 20:37:17 +0800209 if (page) {
Gao Xiang92e6efd2018-12-08 00:19:16 +0800210 put_page(page);
Gao Xiang1825c8d2020-12-09 20:37:17 +0800211 } else if (newpage) {
212 set_page_private(newpage, 0);
213 list_add(&newpage->lru, pagepool);
214 }
Gao Xiang105d4ad2018-07-26 20:22:07 +0800215 }
Gao Xiang92e6efd2018-12-08 00:19:16 +0800216
Gao Xiang97e86a82019-07-31 23:57:47 +0800217 if (standalone) /* downgrade to PRIMARY_FOLLOWED_NOINPLACE */
218 clt->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE;
Gao Xiang105d4ad2018-07-26 20:22:07 +0800219}
220
221/* called by erofs_shrinker to get rid of all compressed_pages */
Gao Xiang47e541a2018-07-29 13:34:58 +0800222int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
Gao Xiang97e86a82019-07-31 23:57:47 +0800223 struct erofs_workgroup *grp)
Gao Xiang105d4ad2018-07-26 20:22:07 +0800224{
Gao Xiang97e86a82019-07-31 23:57:47 +0800225 struct z_erofs_pcluster *const pcl =
226 container_of(grp, struct z_erofs_pcluster, obj);
Gao Xiangc1448fa2018-12-08 00:19:13 +0800227 struct address_space *const mapping = MNGD_MAPPING(sbi);
Gao Xiang97e86a82019-07-31 23:57:47 +0800228 const unsigned int clusterpages = BIT(pcl->clusterbits);
Gao Xiang105d4ad2018-07-26 20:22:07 +0800229 int i;
230
231 /*
232 * refcount of workgroup is now freezed as 1,
233 * therefore no need to worry about available decompression users.
234 */
235 for (i = 0; i < clusterpages; ++i) {
Gao Xiang97e86a82019-07-31 23:57:47 +0800236 struct page *page = pcl->compressed_pages[i];
Gao Xiang105d4ad2018-07-26 20:22:07 +0800237
Gao Xiang97e86a82019-07-31 23:57:47 +0800238 if (!page)
Gao Xiang105d4ad2018-07-26 20:22:07 +0800239 continue;
240
241 /* block other users from reclaiming or migrating the page */
242 if (!trylock_page(page))
243 return -EBUSY;
244
Gao Xiang8d8a09b2019-08-30 00:38:27 +0800245 if (page->mapping != mapping)
Gao Xiang97e86a82019-07-31 23:57:47 +0800246 continue;
Gao Xiang105d4ad2018-07-26 20:22:07 +0800247
Gao Xiang97e86a82019-07-31 23:57:47 +0800248 /* barrier is implied in the following 'unlock_page' */
249 WRITE_ONCE(pcl->compressed_pages[i], NULL);
Gao Xiang6aaa7b02020-12-08 17:58:32 +0800250 detach_page_private(page);
Gao Xiang105d4ad2018-07-26 20:22:07 +0800251 unlock_page(page);
Gao Xiang105d4ad2018-07-26 20:22:07 +0800252 }
253 return 0;
254}
255
Gao Xiang47e541a2018-07-29 13:34:58 +0800256int erofs_try_to_free_cached_page(struct address_space *mapping,
257 struct page *page)
Gao Xiang105d4ad2018-07-26 20:22:07 +0800258{
Gao Xiang97e86a82019-07-31 23:57:47 +0800259 struct z_erofs_pcluster *const pcl = (void *)page_private(page);
260 const unsigned int clusterpages = BIT(pcl->clusterbits);
Gao Xiang105d4ad2018-07-26 20:22:07 +0800261 int ret = 0; /* 0 - busy */
262
Gao Xiang97e86a82019-07-31 23:57:47 +0800263 if (erofs_workgroup_try_to_freeze(&pcl->obj, 1)) {
Gao Xiang105d4ad2018-07-26 20:22:07 +0800264 unsigned int i;
265
266 for (i = 0; i < clusterpages; ++i) {
Gao Xiang97e86a82019-07-31 23:57:47 +0800267 if (pcl->compressed_pages[i] == page) {
268 WRITE_ONCE(pcl->compressed_pages[i], NULL);
Gao Xiang105d4ad2018-07-26 20:22:07 +0800269 ret = 1;
270 break;
271 }
272 }
Gao Xiang97e86a82019-07-31 23:57:47 +0800273 erofs_workgroup_unfreeze(&pcl->obj, 1);
Gao Xiang105d4ad2018-07-26 20:22:07 +0800274
Gao Xiang6aaa7b02020-12-08 17:58:32 +0800275 if (ret)
276 detach_page_private(page);
Gao Xiang105d4ad2018-07-26 20:22:07 +0800277 }
278 return ret;
279}
Gao Xiang105d4ad2018-07-26 20:22:07 +0800280
Gao Xiang3883a792018-07-26 20:22:06 +0800281/* page_type must be Z_EROFS_PAGE_TYPE_EXCLUSIVE */
Gao Xiang99634bf2019-09-04 10:09:05 +0800282static inline bool z_erofs_try_inplace_io(struct z_erofs_collector *clt,
283 struct page *page)
Gao Xiang3883a792018-07-26 20:22:06 +0800284{
Gao Xiang97e86a82019-07-31 23:57:47 +0800285 struct z_erofs_pcluster *const pcl = clt->pcl;
286 const unsigned int clusterpages = BIT(pcl->clusterbits);
287
288 while (clt->compressedpages < pcl->compressed_pages + clusterpages) {
289 if (!cmpxchg(clt->compressedpages++, NULL, page))
Gao Xiang3883a792018-07-26 20:22:06 +0800290 return true;
291 }
Gao Xiang3883a792018-07-26 20:22:06 +0800292 return false;
293}
294
Gao Xiang97e86a82019-07-31 23:57:47 +0800295/* callers must be with collection lock held */
296static int z_erofs_attach_page(struct z_erofs_collector *clt,
297 struct page *page,
298 enum z_erofs_page_type type)
Gao Xiang3883a792018-07-26 20:22:06 +0800299{
300 int ret;
301 bool occupied;
302
Gao Xiang97e86a82019-07-31 23:57:47 +0800303 /* give priority for inplaceio */
304 if (clt->mode >= COLLECT_PRIMARY &&
Julian Merida447a3622019-03-18 20:58:41 -0300305 type == Z_EROFS_PAGE_TYPE_EXCLUSIVE &&
Gao Xiang99634bf2019-09-04 10:09:05 +0800306 z_erofs_try_inplace_io(clt, page))
Gao Xiang3883a792018-07-26 20:22:06 +0800307 return 0;
308
Gao Xiang97e86a82019-07-31 23:57:47 +0800309 ret = z_erofs_pagevec_enqueue(&clt->vector,
Gao Xiang046d64e2019-07-31 23:57:45 +0800310 page, type, &occupied);
Gao Xiang97e86a82019-07-31 23:57:47 +0800311 clt->cl->vcnt += (unsigned int)ret;
Gao Xiang3883a792018-07-26 20:22:06 +0800312
313 return ret ? 0 : -EAGAIN;
314}
315
Gao Xiang473e15b2020-12-08 17:58:34 +0800316static void z_erofs_try_to_claim_pcluster(struct z_erofs_collector *clt)
Gao Xiang3883a792018-07-26 20:22:06 +0800317{
Gao Xiang473e15b2020-12-08 17:58:34 +0800318 struct z_erofs_pcluster *pcl = clt->pcl;
319 z_erofs_next_pcluster_t *owned_head = &clt->owned_head;
Gao Xiang3883a792018-07-26 20:22:06 +0800320
Gao Xiang473e15b2020-12-08 17:58:34 +0800321 /* type 1, nil pcluster (this pcluster doesn't belong to any chain.) */
322 if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_NIL,
323 *owned_head) == Z_EROFS_PCLUSTER_NIL) {
Gao Xiang97e86a82019-07-31 23:57:47 +0800324 *owned_head = &pcl->next;
Gao Xiang473e15b2020-12-08 17:58:34 +0800325 /* so we can attach this pcluster to our submission chain. */
326 clt->mode = COLLECT_PRIMARY_FOLLOWED;
327 return;
Gao Xianga1121522019-02-27 13:33:32 +0800328 }
Gao Xiang473e15b2020-12-08 17:58:34 +0800329
330 /*
331 * type 2, link to the end of an existing open chain, be careful
332 * that its submission is controlled by the original attached chain.
333 */
334 if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL,
335 *owned_head) == Z_EROFS_PCLUSTER_TAIL) {
336 *owned_head = Z_EROFS_PCLUSTER_TAIL;
337 clt->mode = COLLECT_PRIMARY_HOOKED;
338 clt->tailpcl = NULL;
339 return;
340 }
341 /* type 3, it belongs to a chain, but it isn't the end of the chain */
342 clt->mode = COLLECT_PRIMARY;
Gao Xiang3883a792018-07-26 20:22:06 +0800343}
344
Gao Xiang9e579fc2019-10-08 20:56:12 +0800345static int z_erofs_lookup_collection(struct z_erofs_collector *clt,
346 struct inode *inode,
347 struct erofs_map_blocks *map)
Gao Xiang3883a792018-07-26 20:22:06 +0800348{
Gao Xiang64094a02020-02-20 10:46:42 +0800349 struct z_erofs_pcluster *pcl = clt->pcl;
Gao Xiang97e86a82019-07-31 23:57:47 +0800350 struct z_erofs_collection *cl;
351 unsigned int length;
Gao Xiang3883a792018-07-26 20:22:06 +0800352
Gao Xiang64094a02020-02-20 10:46:42 +0800353 /* to avoid unexpected loop formed by corrupted images */
Gao Xiangbfc4ccb2019-08-21 11:09:08 +0800354 if (clt->owned_head == &pcl->next || pcl == clt->tailpcl) {
355 DBG_BUGON(1);
Gao Xiang9e579fc2019-10-08 20:56:12 +0800356 return -EFSCORRUPTED;
Gao Xiangbfc4ccb2019-08-21 11:09:08 +0800357 }
Gao Xiang97e86a82019-07-31 23:57:47 +0800358
359 cl = z_erofs_primarycollection(pcl);
Gao Xiang8d8a09b2019-08-30 00:38:27 +0800360 if (cl->pageofs != (map->m_la & ~PAGE_MASK)) {
Gao Xiang97e86a82019-07-31 23:57:47 +0800361 DBG_BUGON(1);
Gao Xiang9e579fc2019-10-08 20:56:12 +0800362 return -EFSCORRUPTED;
Gao Xiang3883a792018-07-26 20:22:06 +0800363 }
364
Gao Xiang97e86a82019-07-31 23:57:47 +0800365 length = READ_ONCE(pcl->length);
366 if (length & Z_EROFS_PCLUSTER_FULL_LENGTH) {
367 if ((map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) > length) {
368 DBG_BUGON(1);
Gao Xiang9e579fc2019-10-08 20:56:12 +0800369 return -EFSCORRUPTED;
Gao Xiang97e86a82019-07-31 23:57:47 +0800370 }
371 } else {
372 unsigned int llen = map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT;
Gao Xiang3883a792018-07-26 20:22:06 +0800373
Gao Xiang97e86a82019-07-31 23:57:47 +0800374 if (map->m_flags & EROFS_MAP_FULL_MAPPED)
375 llen |= Z_EROFS_PCLUSTER_FULL_LENGTH;
Gao Xiang3883a792018-07-26 20:22:06 +0800376
Gao Xiang97e86a82019-07-31 23:57:47 +0800377 while (llen > length &&
378 length != cmpxchg_relaxed(&pcl->length, length, llen)) {
379 cpu_relax();
380 length = READ_ONCE(pcl->length);
381 }
382 }
383 mutex_lock(&cl->lock);
Gao Xiangbfc4ccb2019-08-21 11:09:08 +0800384 /* used to check tail merging loop due to corrupted images */
385 if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL)
386 clt->tailpcl = pcl;
Gao Xiang473e15b2020-12-08 17:58:34 +0800387
388 z_erofs_try_to_claim_pcluster(clt);
Gao Xiang97e86a82019-07-31 23:57:47 +0800389 clt->cl = cl;
Gao Xiang9e579fc2019-10-08 20:56:12 +0800390 return 0;
Gao Xiang3883a792018-07-26 20:22:06 +0800391}
392
Gao Xiang9e579fc2019-10-08 20:56:12 +0800393static int z_erofs_register_collection(struct z_erofs_collector *clt,
394 struct inode *inode,
395 struct erofs_map_blocks *map)
Gao Xiang3883a792018-07-26 20:22:06 +0800396{
Gao Xiang97e86a82019-07-31 23:57:47 +0800397 struct z_erofs_pcluster *pcl;
398 struct z_erofs_collection *cl;
Gao Xiang64094a02020-02-20 10:46:42 +0800399 struct erofs_workgroup *grp;
Gao Xiang97e86a82019-07-31 23:57:47 +0800400 int err;
Gao Xiange5e3abb2018-09-19 13:49:07 +0800401
Gao Xiang3883a792018-07-26 20:22:06 +0800402 /* no available workgroup, let's allocate one */
Gao Xiang97e86a82019-07-31 23:57:47 +0800403 pcl = kmem_cache_alloc(pcluster_cachep, GFP_NOFS);
Gao Xiang8d8a09b2019-08-30 00:38:27 +0800404 if (!pcl)
Gao Xiang9e579fc2019-10-08 20:56:12 +0800405 return -ENOMEM;
Gao Xiang3883a792018-07-26 20:22:06 +0800406
Gao Xiang64094a02020-02-20 10:46:42 +0800407 atomic_set(&pcl->obj.refcount, 1);
Gao Xiang97e86a82019-07-31 23:57:47 +0800408 pcl->obj.index = map->m_pa >> PAGE_SHIFT;
Gao Xiang3883a792018-07-26 20:22:06 +0800409
Gao Xiang97e86a82019-07-31 23:57:47 +0800410 pcl->length = (map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) |
411 (map->m_flags & EROFS_MAP_FULL_MAPPED ?
412 Z_EROFS_PCLUSTER_FULL_LENGTH : 0);
Gao Xiang3883a792018-07-26 20:22:06 +0800413
Gao Xiang97e86a82019-07-31 23:57:47 +0800414 if (map->m_flags & EROFS_MAP_ZIPPED)
415 pcl->algorithmformat = Z_EROFS_COMPRESSION_LZ4;
416 else
417 pcl->algorithmformat = Z_EROFS_COMPRESSION_SHIFTED;
Gao Xiangb6a76182019-06-24 15:22:58 +0800418
Gao Xianga5876e22019-09-04 10:08:56 +0800419 pcl->clusterbits = EROFS_I(inode)->z_physical_clusterbits[0];
Gao Xiang97e86a82019-07-31 23:57:47 +0800420 pcl->clusterbits -= PAGE_SHIFT;
Gao Xiang3883a792018-07-26 20:22:06 +0800421
Gao Xiang97e86a82019-07-31 23:57:47 +0800422 /* new pclusters should be claimed as type 1, primary and followed */
423 pcl->next = clt->owned_head;
424 clt->mode = COLLECT_PRIMARY_FOLLOWED;
425
426 cl = z_erofs_primarycollection(pcl);
Gao Xiang64094a02020-02-20 10:46:42 +0800427
428 /* must be cleaned before freeing to slab */
429 DBG_BUGON(cl->nr_pages);
430 DBG_BUGON(cl->vcnt);
431
Gao Xiang97e86a82019-07-31 23:57:47 +0800432 cl->pageofs = map->m_la & ~PAGE_MASK;
Gao Xiang3883a792018-07-26 20:22:06 +0800433
Gao Xiang23edf3a2018-11-23 01:21:47 +0800434 /*
435 * lock all primary followed works before visible to others
Gao Xiang97e86a82019-07-31 23:57:47 +0800436 * and mutex_trylock *never* fails for a new pcluster.
Gao Xiang23edf3a2018-11-23 01:21:47 +0800437 */
Gao Xiang64094a02020-02-20 10:46:42 +0800438 DBG_BUGON(!mutex_trylock(&cl->lock));
Gao Xiang23edf3a2018-11-23 01:21:47 +0800439
Gao Xiang64094a02020-02-20 10:46:42 +0800440 grp = erofs_insert_workgroup(inode->i_sb, &pcl->obj);
441 if (IS_ERR(grp)) {
442 err = PTR_ERR(grp);
443 goto err_out;
444 }
445
446 if (grp != &pcl->obj) {
447 clt->pcl = container_of(grp, struct z_erofs_pcluster, obj);
448 err = -EEXIST;
449 goto err_out;
Gao Xiang3883a792018-07-26 20:22:06 +0800450 }
Gao Xiangbfc4ccb2019-08-21 11:09:08 +0800451 /* used to check tail merging loop due to corrupted images */
452 if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL)
453 clt->tailpcl = pcl;
Gao Xiang97e86a82019-07-31 23:57:47 +0800454 clt->owned_head = &pcl->next;
455 clt->pcl = pcl;
456 clt->cl = cl;
Gao Xiang9e579fc2019-10-08 20:56:12 +0800457 return 0;
Gao Xiang64094a02020-02-20 10:46:42 +0800458
459err_out:
460 mutex_unlock(&cl->lock);
461 kmem_cache_free(pcluster_cachep, pcl);
462 return err;
Gao Xiang3883a792018-07-26 20:22:06 +0800463}
464
Gao Xiang97e86a82019-07-31 23:57:47 +0800465static int z_erofs_collector_begin(struct z_erofs_collector *clt,
466 struct inode *inode,
467 struct erofs_map_blocks *map)
Gao Xiang3883a792018-07-26 20:22:06 +0800468{
Gao Xiang64094a02020-02-20 10:46:42 +0800469 struct erofs_workgroup *grp;
Gao Xiang9e579fc2019-10-08 20:56:12 +0800470 int ret;
Gao Xiang3883a792018-07-26 20:22:06 +0800471
Gao Xiang97e86a82019-07-31 23:57:47 +0800472 DBG_BUGON(clt->cl);
Gao Xiang3883a792018-07-26 20:22:06 +0800473
Gao Xiang97e86a82019-07-31 23:57:47 +0800474 /* must be Z_EROFS_PCLUSTER_TAIL or pointed to previous collection */
475 DBG_BUGON(clt->owned_head == Z_EROFS_PCLUSTER_NIL);
476 DBG_BUGON(clt->owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED);
Gao Xiang3883a792018-07-26 20:22:06 +0800477
Gao Xiang97e86a82019-07-31 23:57:47 +0800478 if (!PAGE_ALIGNED(map->m_pa)) {
479 DBG_BUGON(1);
480 return -EINVAL;
481 }
Gao Xiang3883a792018-07-26 20:22:06 +0800482
Gao Xiang64094a02020-02-20 10:46:42 +0800483 grp = erofs_find_workgroup(inode->i_sb, map->m_pa >> PAGE_SHIFT);
484 if (grp) {
485 clt->pcl = container_of(grp, struct z_erofs_pcluster, obj);
486 } else {
Gao Xiang9e579fc2019-10-08 20:56:12 +0800487 ret = z_erofs_register_collection(clt, inode, map);
Gao Xiangb27661c2018-09-19 13:49:06 +0800488
Gao Xiang64094a02020-02-20 10:46:42 +0800489 if (!ret)
490 goto out;
491 if (ret != -EEXIST)
492 return ret;
Gao Xiang3883a792018-07-26 20:22:06 +0800493 }
494
Gao Xiang64094a02020-02-20 10:46:42 +0800495 ret = z_erofs_lookup_collection(clt, inode, map);
496 if (ret) {
497 erofs_workgroup_put(&clt->pcl->obj);
Gao Xiang9e579fc2019-10-08 20:56:12 +0800498 return ret;
Gao Xiang64094a02020-02-20 10:46:42 +0800499 }
Gao Xiang3883a792018-07-26 20:22:06 +0800500
Gao Xiang64094a02020-02-20 10:46:42 +0800501out:
Gao Xiang97e86a82019-07-31 23:57:47 +0800502 z_erofs_pagevec_ctor_init(&clt->vector, Z_EROFS_NR_INLINE_PAGEVECS,
Gao Xiang9e579fc2019-10-08 20:56:12 +0800503 clt->cl->pagevec, clt->cl->vcnt);
Gao Xiang3883a792018-07-26 20:22:06 +0800504
Gao Xiang97e86a82019-07-31 23:57:47 +0800505 clt->compressedpages = clt->pcl->compressed_pages;
506 if (clt->mode <= COLLECT_PRIMARY) /* cannot do in-place I/O */
507 clt->compressedpages += Z_EROFS_CLUSTER_MAX_PAGES;
Gao Xiang3883a792018-07-26 20:22:06 +0800508 return 0;
509}
510
511/*
Gao Xiang97e86a82019-07-31 23:57:47 +0800512 * keep in mind that no referenced pclusters will be freed
513 * only after a RCU grace period.
Gao Xiang3883a792018-07-26 20:22:06 +0800514 */
515static void z_erofs_rcu_callback(struct rcu_head *head)
516{
Gao Xiang97e86a82019-07-31 23:57:47 +0800517 struct z_erofs_collection *const cl =
518 container_of(head, struct z_erofs_collection, rcu);
Gao Xiang3883a792018-07-26 20:22:06 +0800519
Gao Xiang97e86a82019-07-31 23:57:47 +0800520 kmem_cache_free(pcluster_cachep,
521 container_of(cl, struct z_erofs_pcluster,
522 primary_collection));
Gao Xiang3883a792018-07-26 20:22:06 +0800523}
524
525void erofs_workgroup_free_rcu(struct erofs_workgroup *grp)
526{
Gao Xiang97e86a82019-07-31 23:57:47 +0800527 struct z_erofs_pcluster *const pcl =
528 container_of(grp, struct z_erofs_pcluster, obj);
529 struct z_erofs_collection *const cl = z_erofs_primarycollection(pcl);
Gao Xiang3883a792018-07-26 20:22:06 +0800530
Gao Xiang97e86a82019-07-31 23:57:47 +0800531 call_rcu(&cl->rcu, z_erofs_rcu_callback);
Gao Xiang3883a792018-07-26 20:22:06 +0800532}
533
Gao Xiang97e86a82019-07-31 23:57:47 +0800534static void z_erofs_collection_put(struct z_erofs_collection *cl)
Gao Xiang3883a792018-07-26 20:22:06 +0800535{
Gao Xiang97e86a82019-07-31 23:57:47 +0800536 struct z_erofs_pcluster *const pcl =
537 container_of(cl, struct z_erofs_pcluster, primary_collection);
538
539 erofs_workgroup_put(&pcl->obj);
Gao Xiang3883a792018-07-26 20:22:06 +0800540}
541
Gao Xiang97e86a82019-07-31 23:57:47 +0800542static bool z_erofs_collector_end(struct z_erofs_collector *clt)
Gao Xiang3883a792018-07-26 20:22:06 +0800543{
Gao Xiang97e86a82019-07-31 23:57:47 +0800544 struct z_erofs_collection *cl = clt->cl;
Gao Xiang3883a792018-07-26 20:22:06 +0800545
Gao Xiang97e86a82019-07-31 23:57:47 +0800546 if (!cl)
Gao Xiang3883a792018-07-26 20:22:06 +0800547 return false;
548
Gao Xiang97e86a82019-07-31 23:57:47 +0800549 z_erofs_pagevec_ctor_exit(&clt->vector, false);
550 mutex_unlock(&cl->lock);
Gao Xiang3883a792018-07-26 20:22:06 +0800551
552 /*
Gao Xiang97e86a82019-07-31 23:57:47 +0800553 * if all pending pages are added, don't hold its reference
554 * any longer if the pcluster isn't hosted by ourselves.
Gao Xiang3883a792018-07-26 20:22:06 +0800555 */
Gao Xiang97e86a82019-07-31 23:57:47 +0800556 if (clt->mode < COLLECT_PRIMARY_FOLLOWED_NOINPLACE)
557 z_erofs_collection_put(cl);
Gao Xiang3883a792018-07-26 20:22:06 +0800558
Gao Xiang97e86a82019-07-31 23:57:47 +0800559 clt->cl = NULL;
Gao Xiang3883a792018-07-26 20:22:06 +0800560 return true;
561}
562
Gao Xiang97e86a82019-07-31 23:57:47 +0800563static bool should_alloc_managed_pages(struct z_erofs_decompress_frontend *fe,
Gao Xiang4279f3f2019-07-31 23:57:49 +0800564 unsigned int cachestrategy,
Gao Xiang97e86a82019-07-31 23:57:47 +0800565 erofs_off_t la)
Gao Xiang92e6efd2018-12-08 00:19:16 +0800566{
Gao Xiang4279f3f2019-07-31 23:57:49 +0800567 if (cachestrategy <= EROFS_ZIP_CACHE_DISABLED)
568 return false;
569
Gao Xiang92e6efd2018-12-08 00:19:16 +0800570 if (fe->backmost)
571 return true;
572
Gao Xiang4279f3f2019-07-31 23:57:49 +0800573 return cachestrategy >= EROFS_ZIP_CACHE_READAROUND &&
574 la < fe->headoffset;
Gao Xiang92e6efd2018-12-08 00:19:16 +0800575}
Gao Xiang92e6efd2018-12-08 00:19:16 +0800576
Gao Xiang97e86a82019-07-31 23:57:47 +0800577static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
Gao Xiang1825c8d2020-12-09 20:37:17 +0800578 struct page *page, struct list_head *pagepool)
Gao Xiang3883a792018-07-26 20:22:06 +0800579{
Gao Xiang97e86a82019-07-31 23:57:47 +0800580 struct inode *const inode = fe->inode;
Gao Xiangbda17a42019-10-08 20:56:13 +0800581 struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
Chao Yu3b423412019-01-15 09:42:21 +0800582 struct erofs_map_blocks *const map = &fe->map;
Gao Xiang97e86a82019-07-31 23:57:47 +0800583 struct z_erofs_collector *const clt = &fe->clt;
Gao Xiang3883a792018-07-26 20:22:06 +0800584 const loff_t offset = page_offset(page);
Gao Xiangdc76ea82019-09-22 18:04:34 +0800585 bool tight = true;
Gao Xiang3883a792018-07-26 20:22:06 +0800586
Gao Xiang92e6efd2018-12-08 00:19:16 +0800587 enum z_erofs_cache_alloctype cache_strategy;
Gao Xiang3883a792018-07-26 20:22:06 +0800588 enum z_erofs_page_type page_type;
Thomas Weißschuh7dd68b12018-09-10 21:41:14 +0200589 unsigned int cur, end, spiltted, index;
Gao Xiang1e05ff32018-09-18 22:27:25 +0800590 int err = 0;
Gao Xiang3883a792018-07-26 20:22:06 +0800591
592 /* register locked file pages as online pages in pack */
593 z_erofs_onlinepage_init(page);
594
595 spiltted = 0;
596 end = PAGE_SIZE;
597repeat:
598 cur = end - 1;
599
600 /* lucky, within the range of the current map_blocks */
601 if (offset + cur >= map->m_la &&
Julian Merida447a3622019-03-18 20:58:41 -0300602 offset + cur < map->m_la + map->m_llen) {
Gao Xiang97e86a82019-07-31 23:57:47 +0800603 /* didn't get a valid collection previously (very rare) */
604 if (!clt->cl)
Gao Xiang1e5ceea2019-02-27 13:33:31 +0800605 goto restart_now;
Gao Xiang3883a792018-07-26 20:22:06 +0800606 goto hitted;
Gao Xiang1e5ceea2019-02-27 13:33:31 +0800607 }
Gao Xiang3883a792018-07-26 20:22:06 +0800608
609 /* go ahead the next map_blocks */
Gao Xiang4f761fa2019-09-04 10:09:09 +0800610 erofs_dbg("%s: [out-of-range] pos %llu", __func__, offset + cur);
Gao Xiang3883a792018-07-26 20:22:06 +0800611
Gao Xiang97e86a82019-07-31 23:57:47 +0800612 if (z_erofs_collector_end(clt))
Gao Xiangf0c519f2018-11-23 01:21:49 +0800613 fe->backmost = false;
Gao Xiang3883a792018-07-26 20:22:06 +0800614
615 map->m_la = offset + cur;
616 map->m_llen = 0;
Gao Xiang97e86a82019-07-31 23:57:47 +0800617 err = z_erofs_map_blocks_iter(inode, map, 0);
Gao Xiang8d8a09b2019-08-30 00:38:27 +0800618 if (err)
Gao Xiang3883a792018-07-26 20:22:06 +0800619 goto err_out;
620
Gao Xiang1e5ceea2019-02-27 13:33:31 +0800621restart_now:
Gao Xiang8d8a09b2019-08-30 00:38:27 +0800622 if (!(map->m_flags & EROFS_MAP_MAPPED))
Gao Xiang3883a792018-07-26 20:22:06 +0800623 goto hitted;
624
Gao Xiang97e86a82019-07-31 23:57:47 +0800625 err = z_erofs_collector_begin(clt, inode, map);
Gao Xiang8d8a09b2019-08-30 00:38:27 +0800626 if (err)
Gao Xiang3883a792018-07-26 20:22:06 +0800627 goto err_out;
628
Gao Xiang92e6efd2018-12-08 00:19:16 +0800629 /* preload all compressed pages (maybe downgrade role if necessary) */
Chao Yuf57a3fe2020-05-29 18:48:36 +0800630 if (should_alloc_managed_pages(fe, sbi->ctx.cache_strategy, map->m_la))
Gao Xiang1825c8d2020-12-09 20:37:17 +0800631 cache_strategy = TRYALLOC;
Gao Xiang92e6efd2018-12-08 00:19:16 +0800632 else
633 cache_strategy = DONTALLOC;
Gao Xiang105d4ad2018-07-26 20:22:07 +0800634
Gao Xiang1825c8d2020-12-09 20:37:17 +0800635 preload_compressed_pages(clt, MNGD_MAPPING(sbi),
636 cache_strategy, pagepool);
Gao Xiang105d4ad2018-07-26 20:22:07 +0800637
Gao Xiang3883a792018-07-26 20:22:06 +0800638hitted:
Gao Xiangdc76ea82019-09-22 18:04:34 +0800639 /*
640 * Ensure the current partial page belongs to this submit chain rather
641 * than other concurrent submit chains or the noio(bypass) chain since
642 * those chains are handled asynchronously thus the page cannot be used
643 * for inplace I/O or pagevec (should be processed in strict order.)
644 */
645 tight &= (clt->mode >= COLLECT_PRIMARY_HOOKED &&
646 clt->mode != COLLECT_PRIMARY_FOLLOWED_NOINPLACE);
647
Thomas Weißschuh7dd68b12018-09-10 21:41:14 +0200648 cur = end - min_t(unsigned int, offset + end - map->m_la, end);
Gao Xiang8d8a09b2019-08-30 00:38:27 +0800649 if (!(map->m_flags & EROFS_MAP_MAPPED)) {
Gao Xiang3883a792018-07-26 20:22:06 +0800650 zero_user_segment(page, cur, end);
651 goto next_part;
652 }
653
654 /* let's derive page type */
655 page_type = cur ? Z_EROFS_VLE_PAGE_TYPE_HEAD :
656 (!spiltted ? Z_EROFS_PAGE_TYPE_EXCLUSIVE :
657 (tight ? Z_EROFS_PAGE_TYPE_EXCLUSIVE :
658 Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED));
659
Gao Xianga1121522019-02-27 13:33:32 +0800660 if (cur)
Gao Xiang97e86a82019-07-31 23:57:47 +0800661 tight &= (clt->mode >= COLLECT_PRIMARY_FOLLOWED);
Gao Xianga1121522019-02-27 13:33:32 +0800662
Gao Xiang3883a792018-07-26 20:22:06 +0800663retry:
Gao Xiang97e86a82019-07-31 23:57:47 +0800664 err = z_erofs_attach_page(clt, page, page_type);
Gao Xiang6aaa7b02020-12-08 17:58:32 +0800665 /* should allocate an additional short-lived page for pagevec */
Gao Xiang3883a792018-07-26 20:22:06 +0800666 if (err == -EAGAIN) {
667 struct page *const newpage =
Chao Yue3f78d52020-09-17 09:18:21 +0800668 alloc_page(GFP_NOFS | __GFP_NOFAIL);
Gao Xiang3883a792018-07-26 20:22:06 +0800669
Gao Xiang6aaa7b02020-12-08 17:58:32 +0800670 set_page_private(newpage, Z_EROFS_SHORTLIVED_PAGE);
Gao Xiang97e86a82019-07-31 23:57:47 +0800671 err = z_erofs_attach_page(clt, newpage,
672 Z_EROFS_PAGE_TYPE_EXCLUSIVE);
Gao Xiang8d8a09b2019-08-30 00:38:27 +0800673 if (!err)
Gao Xiang3883a792018-07-26 20:22:06 +0800674 goto retry;
675 }
676
Gao Xiang8d8a09b2019-08-30 00:38:27 +0800677 if (err)
Gao Xiang3883a792018-07-26 20:22:06 +0800678 goto err_out;
679
Gao Xiang97e86a82019-07-31 23:57:47 +0800680 index = page->index - (map->m_la >> PAGE_SHIFT);
Gao Xiang3883a792018-07-26 20:22:06 +0800681
Gao Xiang3883a792018-07-26 20:22:06 +0800682 z_erofs_onlinepage_fixup(page, index, true);
Gao Xiang3883a792018-07-26 20:22:06 +0800683
Gao Xiang1e05ff32018-09-18 22:27:25 +0800684 /* bump up the number of spiltted parts of a page */
685 ++spiltted;
686 /* also update nr_pages */
Gao Xiang97e86a82019-07-31 23:57:47 +0800687 clt->cl->nr_pages = max_t(pgoff_t, clt->cl->nr_pages, index + 1);
Gao Xiang3883a792018-07-26 20:22:06 +0800688next_part:
689 /* can be used for verification */
690 map->m_llen = offset + cur - map->m_la;
691
Kristaps Čivkulis2bc75962018-08-05 18:21:01 +0300692 end = cur;
693 if (end > 0)
Gao Xiang3883a792018-07-26 20:22:06 +0800694 goto repeat;
695
Gao Xiang1e05ff32018-09-18 22:27:25 +0800696out:
Gao Xiang3883a792018-07-26 20:22:06 +0800697 z_erofs_onlinepage_endio(page);
698
Gao Xiang4f761fa2019-09-04 10:09:09 +0800699 erofs_dbg("%s, finish page: %pK spiltted: %u map->m_llen %llu",
700 __func__, page, spiltted, map->m_llen);
Gao Xiang3883a792018-07-26 20:22:06 +0800701 return err;
Gao Xiang1e05ff32018-09-18 22:27:25 +0800702
703 /* if some error occurred while processing this page */
704err_out:
705 SetPageError(page);
706 goto out;
Gao Xiang3883a792018-07-26 20:22:06 +0800707}
708
Gao Xianga4b1fab2019-10-08 20:56:15 +0800709static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
710 bool sync, int bios)
Gao Xiang3883a792018-07-26 20:22:06 +0800711{
Gao Xianga4b1fab2019-10-08 20:56:15 +0800712 /* wake up the caller thread for sync decompression */
713 if (sync) {
Gao Xiang848bd9a2018-12-08 00:19:12 +0800714 unsigned long flags;
Gao Xiang3883a792018-07-26 20:22:06 +0800715
Gao Xiang848bd9a2018-12-08 00:19:12 +0800716 spin_lock_irqsave(&io->u.wait.lock, flags);
717 if (!atomic_add_return(bios, &io->pending_bios))
718 wake_up_locked(&io->u.wait);
719 spin_unlock_irqrestore(&io->u.wait.lock, flags);
720 return;
721 }
722
723 if (!atomic_add_return(bios, &io->pending_bios))
Gao Xiang3883a792018-07-26 20:22:06 +0800724 queue_work(z_erofs_workqueue, &io->u.work);
Gao Xiang3883a792018-07-26 20:22:06 +0800725}
726
Gao Xiang6aaa7b02020-12-08 17:58:32 +0800727static bool z_erofs_page_is_invalidated(struct page *page)
728{
729 return !page->mapping && !z_erofs_is_shortlived_page(page);
730}
731
Gao Xiang0c638f72019-11-08 11:37:33 +0800732static void z_erofs_decompressqueue_endio(struct bio *bio)
Gao Xiang3883a792018-07-26 20:22:06 +0800733{
Gao Xianga4b1fab2019-10-08 20:56:15 +0800734 tagptr1_t t = tagptr_init(tagptr1_t, bio->bi_private);
735 struct z_erofs_decompressqueue *q = tagptr_unfold_ptr(t);
Gao Xiang14a56ec2019-03-25 11:40:09 +0800736 blk_status_t err = bio->bi_status;
Gao Xiang3883a792018-07-26 20:22:06 +0800737 struct bio_vec *bvec;
Ming Lei6dc4f102019-02-15 19:13:19 +0800738 struct bvec_iter_all iter_all;
Gao Xiang3883a792018-07-26 20:22:06 +0800739
Christoph Hellwig2b070cf2019-04-25 09:03:00 +0200740 bio_for_each_segment_all(bvec, bio, iter_all) {
Gao Xiang3883a792018-07-26 20:22:06 +0800741 struct page *page = bvec->bv_page;
742
743 DBG_BUGON(PageUptodate(page));
Gao Xiang6aaa7b02020-12-08 17:58:32 +0800744 DBG_BUGON(z_erofs_page_is_invalidated(page));
Gao Xiang3883a792018-07-26 20:22:06 +0800745
Gao Xiang8d8a09b2019-08-30 00:38:27 +0800746 if (err)
Gao Xiang3883a792018-07-26 20:22:06 +0800747 SetPageError(page);
Gao Xiang105d4ad2018-07-26 20:22:07 +0800748
Gao Xianga4b1fab2019-10-08 20:56:15 +0800749 if (erofs_page_is_managed(EROFS_SB(q->sb), page)) {
750 if (!err)
751 SetPageUptodate(page);
Gao Xiang105d4ad2018-07-26 20:22:07 +0800752 unlock_page(page);
Gao Xianga4b1fab2019-10-08 20:56:15 +0800753 }
Gao Xiang3883a792018-07-26 20:22:06 +0800754 }
Gao Xianga4b1fab2019-10-08 20:56:15 +0800755 z_erofs_decompress_kickoff(q, tagptr_unfold_tags(t), -1);
Gao Xiang3883a792018-07-26 20:22:06 +0800756 bio_put(bio);
757}
758
Gao Xiang97e86a82019-07-31 23:57:47 +0800759static int z_erofs_decompress_pcluster(struct super_block *sb,
760 struct z_erofs_pcluster *pcl,
761 struct list_head *pagepool)
Gao Xiang3883a792018-07-26 20:22:06 +0800762{
763 struct erofs_sb_info *const sbi = EROFS_SB(sb);
Gao Xiang97e86a82019-07-31 23:57:47 +0800764 const unsigned int clusterpages = BIT(pcl->clusterbits);
Gao Xiang3883a792018-07-26 20:22:06 +0800765 struct z_erofs_pagevec_ctor ctor;
Gao Xiang97e86a82019-07-31 23:57:47 +0800766 unsigned int i, outputsize, llen, nr_pages;
767 struct page *pages_onstack[Z_EROFS_VMAP_ONSTACK_PAGES];
Gao Xiang3883a792018-07-26 20:22:06 +0800768 struct page **pages, **compressed_pages, *page;
Gao Xiang3883a792018-07-26 20:22:06 +0800769
770 enum z_erofs_page_type page_type;
Gao Xiangb6a76182019-06-24 15:22:58 +0800771 bool overlapped, partial;
Gao Xiang97e86a82019-07-31 23:57:47 +0800772 struct z_erofs_collection *cl;
Gao Xiang3883a792018-07-26 20:22:06 +0800773 int err;
774
775 might_sleep();
Gao Xiang97e86a82019-07-31 23:57:47 +0800776 cl = z_erofs_primarycollection(pcl);
777 DBG_BUGON(!READ_ONCE(cl->nr_pages));
Gao Xiang3883a792018-07-26 20:22:06 +0800778
Gao Xiang97e86a82019-07-31 23:57:47 +0800779 mutex_lock(&cl->lock);
780 nr_pages = cl->nr_pages;
Gao Xiang3883a792018-07-26 20:22:06 +0800781
Gao Xiang8d8a09b2019-08-30 00:38:27 +0800782 if (nr_pages <= Z_EROFS_VMAP_ONSTACK_PAGES) {
Gao Xiang3883a792018-07-26 20:22:06 +0800783 pages = pages_onstack;
Gao Xiang97e86a82019-07-31 23:57:47 +0800784 } else if (nr_pages <= Z_EROFS_VMAP_GLOBAL_PAGES &&
785 mutex_trylock(&z_pagemap_global_lock)) {
Gao Xiang3883a792018-07-26 20:22:06 +0800786 pages = z_pagemap_global;
Gao Xiang97e86a82019-07-31 23:57:47 +0800787 } else {
Chao Yu441dfcc2019-07-16 17:44:22 +0800788 gfp_t gfp_flags = GFP_KERNEL;
789
Gao Xiang97e86a82019-07-31 23:57:47 +0800790 if (nr_pages > Z_EROFS_VMAP_GLOBAL_PAGES)
Chao Yu441dfcc2019-07-16 17:44:22 +0800791 gfp_flags |= __GFP_NOFAIL;
792
Julian Merida447a3622019-03-18 20:58:41 -0300793 pages = kvmalloc_array(nr_pages, sizeof(struct page *),
Chao Yu441dfcc2019-07-16 17:44:22 +0800794 gfp_flags);
Gao Xiang3883a792018-07-26 20:22:06 +0800795
796 /* fallback to global pagemap for the lowmem scenario */
Gao Xiang8d8a09b2019-08-30 00:38:27 +0800797 if (!pages) {
Chao Yu441dfcc2019-07-16 17:44:22 +0800798 mutex_lock(&z_pagemap_global_lock);
799 pages = z_pagemap_global;
Gao Xiang3883a792018-07-26 20:22:06 +0800800 }
801 }
802
803 for (i = 0; i < nr_pages; ++i)
804 pages[i] = NULL;
805
Gao Xiange12a0ce2019-08-21 22:01:52 +0800806 err = 0;
Gao Xiangfa61a332019-06-24 15:22:53 +0800807 z_erofs_pagevec_ctor_init(&ctor, Z_EROFS_NR_INLINE_PAGEVECS,
Gao Xiang97e86a82019-07-31 23:57:47 +0800808 cl->pagevec, 0);
Gao Xiang3883a792018-07-26 20:22:06 +0800809
Gao Xiang97e86a82019-07-31 23:57:47 +0800810 for (i = 0; i < cl->vcnt; ++i) {
Thomas Weißschuh7dd68b12018-09-10 21:41:14 +0200811 unsigned int pagenr;
Gao Xiang3883a792018-07-26 20:22:06 +0800812
Gao Xiang046d64e2019-07-31 23:57:45 +0800813 page = z_erofs_pagevec_dequeue(&ctor, &page_type);
Gao Xiang3883a792018-07-26 20:22:06 +0800814
815 /* all pages in pagevec ought to be valid */
Cristian Sicilia42d40b42018-11-12 21:43:57 +0100816 DBG_BUGON(!page);
Gao Xiang6aaa7b02020-12-08 17:58:32 +0800817 DBG_BUGON(z_erofs_page_is_invalidated(page));
Gao Xiang3883a792018-07-26 20:22:06 +0800818
Gao Xiang6aaa7b02020-12-08 17:58:32 +0800819 if (z_erofs_put_shortlivedpage(pagepool, page))
Gao Xiang3883a792018-07-26 20:22:06 +0800820 continue;
821
822 if (page_type == Z_EROFS_VLE_PAGE_TYPE_HEAD)
823 pagenr = 0;
824 else
825 pagenr = z_erofs_onlinepage_index(page);
826
Gao Xiang70b17992018-12-11 15:17:49 +0800827 DBG_BUGON(pagenr >= nr_pages);
Gao Xiange5e3abb2018-09-19 13:49:07 +0800828
Gao Xiange12a0ce2019-08-21 22:01:52 +0800829 /*
830 * currently EROFS doesn't support multiref(dedup),
831 * so here erroring out one multiref page.
832 */
Gao Xiang8d8a09b2019-08-30 00:38:27 +0800833 if (pages[pagenr]) {
Gao Xiange12a0ce2019-08-21 22:01:52 +0800834 DBG_BUGON(1);
835 SetPageError(pages[pagenr]);
836 z_erofs_onlinepage_endio(pages[pagenr]);
837 err = -EFSCORRUPTED;
838 }
Gao Xiang3883a792018-07-26 20:22:06 +0800839 pages[pagenr] = page;
840 }
Gao Xiang3883a792018-07-26 20:22:06 +0800841 z_erofs_pagevec_ctor_exit(&ctor, true);
842
843 overlapped = false;
Gao Xiang97e86a82019-07-31 23:57:47 +0800844 compressed_pages = pcl->compressed_pages;
Gao Xiang3883a792018-07-26 20:22:06 +0800845
846 for (i = 0; i < clusterpages; ++i) {
Thomas Weißschuh7dd68b12018-09-10 21:41:14 +0200847 unsigned int pagenr;
Gao Xiang3883a792018-07-26 20:22:06 +0800848
849 page = compressed_pages[i];
850
851 /* all compressed pages ought to be valid */
Cristian Sicilia42d40b42018-11-12 21:43:57 +0100852 DBG_BUGON(!page);
Gao Xiang6aaa7b02020-12-08 17:58:32 +0800853 DBG_BUGON(z_erofs_page_is_invalidated(page));
Gao Xiang3883a792018-07-26 20:22:06 +0800854
Gao Xiang6aaa7b02020-12-08 17:58:32 +0800855 if (!z_erofs_is_shortlived_page(page)) {
Gao Xiangd61fbb62019-03-25 11:40:08 +0800856 if (erofs_page_is_managed(sbi, page)) {
Gao Xiang8d8a09b2019-08-30 00:38:27 +0800857 if (!PageUptodate(page))
Gao Xiang11152492019-03-25 11:40:07 +0800858 err = -EIO;
859 continue;
860 }
Gao Xiang3883a792018-07-26 20:22:06 +0800861
Gao Xiang11152492019-03-25 11:40:07 +0800862 /*
863 * only if non-head page can be selected
864 * for inplace decompression
865 */
866 pagenr = z_erofs_onlinepage_index(page);
Gao Xiang3883a792018-07-26 20:22:06 +0800867
Gao Xiang11152492019-03-25 11:40:07 +0800868 DBG_BUGON(pagenr >= nr_pages);
Gao Xiang8d8a09b2019-08-30 00:38:27 +0800869 if (pages[pagenr]) {
Gao Xiange12a0ce2019-08-21 22:01:52 +0800870 DBG_BUGON(1);
871 SetPageError(pages[pagenr]);
872 z_erofs_onlinepage_endio(pages[pagenr]);
873 err = -EFSCORRUPTED;
874 }
Gao Xiang11152492019-03-25 11:40:07 +0800875 pages[pagenr] = page;
Gao Xiang3883a792018-07-26 20:22:06 +0800876
Gao Xiang11152492019-03-25 11:40:07 +0800877 overlapped = true;
878 }
879
Gao Xiang6aaa7b02020-12-08 17:58:32 +0800880 /* PG_error needs checking for all non-managed pages */
Gao Xiang8d8a09b2019-08-30 00:38:27 +0800881 if (PageError(page)) {
Gao Xiang11152492019-03-25 11:40:07 +0800882 DBG_BUGON(PageUptodate(page));
883 err = -EIO;
884 }
Gao Xiang3883a792018-07-26 20:22:06 +0800885 }
886
Gao Xiang8d8a09b2019-08-30 00:38:27 +0800887 if (err)
Gao Xiang11152492019-03-25 11:40:07 +0800888 goto out;
889
Gao Xiang97e86a82019-07-31 23:57:47 +0800890 llen = pcl->length >> Z_EROFS_PCLUSTER_LENGTH_BIT;
891 if (nr_pages << PAGE_SHIFT >= cl->pageofs + llen) {
892 outputsize = llen;
893 partial = !(pcl->length & Z_EROFS_PCLUSTER_FULL_LENGTH);
Gao Xiangb6a76182019-06-24 15:22:58 +0800894 } else {
Gao Xiang97e86a82019-07-31 23:57:47 +0800895 outputsize = (nr_pages << PAGE_SHIFT) - cl->pageofs;
Gao Xiangb6a76182019-06-24 15:22:58 +0800896 partial = true;
897 }
Gao Xiang3883a792018-07-26 20:22:06 +0800898
Gao Xiang88aaf5a2019-06-24 15:22:57 +0800899 err = z_erofs_decompress(&(struct z_erofs_decompress_req) {
900 .sb = sb,
901 .in = compressed_pages,
902 .out = pages,
Gao Xiang97e86a82019-07-31 23:57:47 +0800903 .pageofs_out = cl->pageofs,
Gao Xiang88aaf5a2019-06-24 15:22:57 +0800904 .inputsize = PAGE_SIZE,
905 .outputsize = outputsize,
Gao Xiang97e86a82019-07-31 23:57:47 +0800906 .alg = pcl->algorithmformat,
Gao Xiang88aaf5a2019-06-24 15:22:57 +0800907 .inplace_io = overlapped,
Gao Xiangb6a76182019-06-24 15:22:58 +0800908 .partial_decoding = partial
Gao Xiang97e86a82019-07-31 23:57:47 +0800909 }, pagepool);
Gao Xiang3883a792018-07-26 20:22:06 +0800910
911out:
Gao Xiangaf692e12019-02-27 13:33:30 +0800912 /* must handle all compressed pages before endding pages */
Gao Xiang3883a792018-07-26 20:22:06 +0800913 for (i = 0; i < clusterpages; ++i) {
914 page = compressed_pages[i];
915
Gao Xiangd61fbb62019-03-25 11:40:08 +0800916 if (erofs_page_is_managed(sbi, page))
Gao Xiang105d4ad2018-07-26 20:22:07 +0800917 continue;
Gao Xiangd61fbb62019-03-25 11:40:08 +0800918
Gao Xiang6aaa7b02020-12-08 17:58:32 +0800919 /* recycle all individual short-lived pages */
920 (void)z_erofs_put_shortlivedpage(pagepool, page);
Gao Xiang3883a792018-07-26 20:22:06 +0800921
922 WRITE_ONCE(compressed_pages[i], NULL);
923 }
924
Gao Xiangaf692e12019-02-27 13:33:30 +0800925 for (i = 0; i < nr_pages; ++i) {
926 page = pages[i];
927 if (!page)
928 continue;
929
Gao Xiang6aaa7b02020-12-08 17:58:32 +0800930 DBG_BUGON(z_erofs_page_is_invalidated(page));
Gao Xiangaf692e12019-02-27 13:33:30 +0800931
Gao Xiang6aaa7b02020-12-08 17:58:32 +0800932 /* recycle all individual short-lived pages */
933 if (z_erofs_put_shortlivedpage(pagepool, page))
Gao Xiangaf692e12019-02-27 13:33:30 +0800934 continue;
935
Gao Xiang8d8a09b2019-08-30 00:38:27 +0800936 if (err < 0)
Gao Xiangaf692e12019-02-27 13:33:30 +0800937 SetPageError(page);
938
939 z_erofs_onlinepage_endio(page);
940 }
941
Gao Xiang3883a792018-07-26 20:22:06 +0800942 if (pages == z_pagemap_global)
943 mutex_unlock(&z_pagemap_global_lock);
Gao Xiang8d8a09b2019-08-30 00:38:27 +0800944 else if (pages != pages_onstack)
Gao Xiang3883a792018-07-26 20:22:06 +0800945 kvfree(pages);
946
Gao Xiang97e86a82019-07-31 23:57:47 +0800947 cl->nr_pages = 0;
948 cl->vcnt = 0;
Gao Xiang3883a792018-07-26 20:22:06 +0800949
Gao Xiang97e86a82019-07-31 23:57:47 +0800950 /* all cl locks MUST be taken before the following line */
951 WRITE_ONCE(pcl->next, Z_EROFS_PCLUSTER_NIL);
Gao Xiang3883a792018-07-26 20:22:06 +0800952
Gao Xiang97e86a82019-07-31 23:57:47 +0800953 /* all cl locks SHOULD be released right now */
954 mutex_unlock(&cl->lock);
Gao Xiang3883a792018-07-26 20:22:06 +0800955
Gao Xiang97e86a82019-07-31 23:57:47 +0800956 z_erofs_collection_put(cl);
Gao Xiang3883a792018-07-26 20:22:06 +0800957 return err;
958}
959
Gao Xiang0c638f72019-11-08 11:37:33 +0800960static void z_erofs_decompress_queue(const struct z_erofs_decompressqueue *io,
961 struct list_head *pagepool)
Gao Xiang3883a792018-07-26 20:22:06 +0800962{
Gao Xiang97e86a82019-07-31 23:57:47 +0800963 z_erofs_next_pcluster_t owned = io->head;
Gao Xiang3883a792018-07-26 20:22:06 +0800964
Gao Xiang97e86a82019-07-31 23:57:47 +0800965 while (owned != Z_EROFS_PCLUSTER_TAIL_CLOSED) {
966 struct z_erofs_pcluster *pcl;
Gao Xiang3883a792018-07-26 20:22:06 +0800967
968 /* no possible that 'owned' equals Z_EROFS_WORK_TPTR_TAIL */
Gao Xiang97e86a82019-07-31 23:57:47 +0800969 DBG_BUGON(owned == Z_EROFS_PCLUSTER_TAIL);
Gao Xiang3883a792018-07-26 20:22:06 +0800970
971 /* no possible that 'owned' equals NULL */
Gao Xiang97e86a82019-07-31 23:57:47 +0800972 DBG_BUGON(owned == Z_EROFS_PCLUSTER_NIL);
Gao Xiang3883a792018-07-26 20:22:06 +0800973
Gao Xiang97e86a82019-07-31 23:57:47 +0800974 pcl = container_of(owned, struct z_erofs_pcluster, next);
975 owned = READ_ONCE(pcl->next);
Gao Xiang3883a792018-07-26 20:22:06 +0800976
Gao Xianga4b1fab2019-10-08 20:56:15 +0800977 z_erofs_decompress_pcluster(io->sb, pcl, pagepool);
Gao Xiang3978c8e2018-08-06 11:27:53 +0800978 }
Gao Xiang3883a792018-07-26 20:22:06 +0800979}
980
Gao Xiang0c638f72019-11-08 11:37:33 +0800981static void z_erofs_decompressqueue_work(struct work_struct *work)
Gao Xiang3883a792018-07-26 20:22:06 +0800982{
Gao Xianga4b1fab2019-10-08 20:56:15 +0800983 struct z_erofs_decompressqueue *bgq =
984 container_of(work, struct z_erofs_decompressqueue, u.work);
Gao Xiang97e86a82019-07-31 23:57:47 +0800985 LIST_HEAD(pagepool);
Gao Xiang3883a792018-07-26 20:22:06 +0800986
Gao Xianga4b1fab2019-10-08 20:56:15 +0800987 DBG_BUGON(bgq->head == Z_EROFS_PCLUSTER_TAIL_CLOSED);
Gao Xiang0c638f72019-11-08 11:37:33 +0800988 z_erofs_decompress_queue(bgq, &pagepool);
Gao Xiang3883a792018-07-26 20:22:06 +0800989
Gao Xiang97e86a82019-07-31 23:57:47 +0800990 put_pages_list(&pagepool);
Gao Xianga4b1fab2019-10-08 20:56:15 +0800991 kvfree(bgq);
Gao Xiang3883a792018-07-26 20:22:06 +0800992}
993
Gao Xiang97e86a82019-07-31 23:57:47 +0800994static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
995 unsigned int nr,
996 struct list_head *pagepool,
997 struct address_space *mc,
998 gfp_t gfp)
Gao Xiang9248fce2018-12-08 00:19:15 +0800999{
Gao Xiang97e86a82019-07-31 23:57:47 +08001000 const pgoff_t index = pcl->obj.index;
Gao Xiang9248fce2018-12-08 00:19:15 +08001001 bool tocache = false;
1002
1003 struct address_space *mapping;
1004 struct page *oldpage, *page;
1005
Gao Xiang92e6efd2018-12-08 00:19:16 +08001006 compressed_page_t t;
1007 int justfound;
1008
Gao Xiang9248fce2018-12-08 00:19:15 +08001009repeat:
Gao Xiang97e86a82019-07-31 23:57:47 +08001010 page = READ_ONCE(pcl->compressed_pages[nr]);
Gao Xiang9248fce2018-12-08 00:19:15 +08001011 oldpage = page;
1012
1013 if (!page)
1014 goto out_allocpage;
1015
1016 /*
1017 * the cached page has not been allocated and
1018 * an placeholder is out there, prepare it now.
1019 */
Gao Xiangbda17a42019-10-08 20:56:13 +08001020 if (page == PAGE_UNALLOCATED) {
Gao Xiang9248fce2018-12-08 00:19:15 +08001021 tocache = true;
1022 goto out_allocpage;
1023 }
1024
Gao Xiang92e6efd2018-12-08 00:19:16 +08001025 /* process the target tagged pointer */
1026 t = tagptr_init(compressed_page_t, page);
1027 justfound = tagptr_unfold_tags(t);
1028 page = tagptr_unfold_ptr(t);
1029
Gao Xiang1825c8d2020-12-09 20:37:17 +08001030 /*
1031 * preallocated cached pages, which is used to avoid direct reclaim
1032 * otherwise, it will go inplace I/O path instead.
1033 */
1034 if (page->private == Z_EROFS_PREALLOCATED_PAGE) {
1035 WRITE_ONCE(pcl->compressed_pages[nr], page);
1036 set_page_private(page, 0);
1037 tocache = true;
1038 goto out_tocache;
1039 }
Gao Xiang9248fce2018-12-08 00:19:15 +08001040 mapping = READ_ONCE(page->mapping);
1041
1042 /*
Gao Xiang6aaa7b02020-12-08 17:58:32 +08001043 * file-backed online pages in plcuster are all locked steady,
Gao Xiang9248fce2018-12-08 00:19:15 +08001044 * therefore it is impossible for `mapping' to be NULL.
1045 */
1046 if (mapping && mapping != mc)
1047 /* ought to be unmanaged pages */
1048 goto out;
1049
Gao Xiang6aaa7b02020-12-08 17:58:32 +08001050 /* directly return for shortlived page as well */
1051 if (z_erofs_is_shortlived_page(page))
1052 goto out;
1053
Gao Xiang9248fce2018-12-08 00:19:15 +08001054 lock_page(page);
1055
Gao Xiang92e6efd2018-12-08 00:19:16 +08001056 /* only true if page reclaim goes wrong, should never happen */
1057 DBG_BUGON(justfound && PagePrivate(page));
1058
Gao Xiang9248fce2018-12-08 00:19:15 +08001059 /* the page is still in manage cache */
1060 if (page->mapping == mc) {
Gao Xiang97e86a82019-07-31 23:57:47 +08001061 WRITE_ONCE(pcl->compressed_pages[nr], page);
Gao Xiang9248fce2018-12-08 00:19:15 +08001062
Gao Xiang11152492019-03-25 11:40:07 +08001063 ClearPageError(page);
Gao Xiang9248fce2018-12-08 00:19:15 +08001064 if (!PagePrivate(page)) {
Gao Xiang92e6efd2018-12-08 00:19:16 +08001065 /*
1066 * impossible to be !PagePrivate(page) for
1067 * the current restriction as well if
1068 * the page is already in compressed_pages[].
1069 */
1070 DBG_BUGON(!justfound);
1071
1072 justfound = 0;
Gao Xiang97e86a82019-07-31 23:57:47 +08001073 set_page_private(page, (unsigned long)pcl);
Gao Xiang9248fce2018-12-08 00:19:15 +08001074 SetPagePrivate(page);
1075 }
1076
1077 /* no need to submit io if it is already up-to-date */
1078 if (PageUptodate(page)) {
1079 unlock_page(page);
1080 page = NULL;
1081 }
1082 goto out;
1083 }
1084
1085 /*
1086 * the managed page has been truncated, it's unsafe to
1087 * reuse this one, let's allocate a new cache-managed page.
1088 */
1089 DBG_BUGON(page->mapping);
Gao Xiang92e6efd2018-12-08 00:19:16 +08001090 DBG_BUGON(!justfound);
Gao Xiang9248fce2018-12-08 00:19:15 +08001091
1092 tocache = true;
1093 unlock_page(page);
1094 put_page(page);
1095out_allocpage:
Gao Xiang5ddcee12019-11-21 21:59:54 +08001096 page = erofs_allocpage(pagepool, gfp | __GFP_NOFAIL);
Gao Xiang5ddcee12019-11-21 21:59:54 +08001097 if (oldpage != cmpxchg(&pcl->compressed_pages[nr], oldpage, page)) {
Gao Xiangbf225072020-12-08 17:58:33 +08001098 list_add(&page->lru, pagepool);
Gao Xiang5ddcee12019-11-21 21:59:54 +08001099 cond_resched();
1100 goto repeat;
1101 }
Gao Xiang1825c8d2020-12-09 20:37:17 +08001102out_tocache:
Gao Xiangbf225072020-12-08 17:58:33 +08001103 if (!tocache || add_to_page_cache_lru(page, mc, index + nr, gfp)) {
1104 /* turn into temporary page if fails (1 ref) */
1105 set_page_private(page, Z_EROFS_SHORTLIVED_PAGE);
1106 goto out;
Gao Xianga30573b2020-10-22 22:57:21 +08001107 }
Gao Xiangbf225072020-12-08 17:58:33 +08001108 attach_page_private(page, pcl);
1109 /* drop a refcount added by allocpage (then we have 2 refs here) */
1110 put_page(page);
1111
Gao Xiang9248fce2018-12-08 00:19:15 +08001112out: /* the only exit (for tracing and debugging) */
1113 return page;
1114}
1115
Gao Xianga4b1fab2019-10-08 20:56:15 +08001116static struct z_erofs_decompressqueue *
1117jobqueue_init(struct super_block *sb,
1118 struct z_erofs_decompressqueue *fgq, bool *fg)
Gao Xiang3883a792018-07-26 20:22:06 +08001119{
Gao Xianga4b1fab2019-10-08 20:56:15 +08001120 struct z_erofs_decompressqueue *q;
Gao Xiang3883a792018-07-26 20:22:06 +08001121
Gao Xianga4b1fab2019-10-08 20:56:15 +08001122 if (fg && !*fg) {
1123 q = kvzalloc(sizeof(*q), GFP_KERNEL | __GFP_NOWARN);
1124 if (!q) {
1125 *fg = true;
1126 goto fg_out;
1127 }
Gao Xiang0c638f72019-11-08 11:37:33 +08001128 INIT_WORK(&q->u.work, z_erofs_decompressqueue_work);
Gao Xianga4b1fab2019-10-08 20:56:15 +08001129 } else {
1130fg_out:
1131 q = fgq;
1132 init_waitqueue_head(&fgq->u.wait);
1133 atomic_set(&fgq->pending_bios, 0);
Gao Xiang3883a792018-07-26 20:22:06 +08001134 }
Gao Xianga4b1fab2019-10-08 20:56:15 +08001135 q->sb = sb;
1136 q->head = Z_EROFS_PCLUSTER_TAIL_CLOSED;
1137 return q;
Gao Xiang3883a792018-07-26 20:22:06 +08001138}
1139
Gao Xiang97e86a82019-07-31 23:57:47 +08001140/* define decompression jobqueue types */
Gao Xiang7146a4f2018-12-08 00:19:18 +08001141enum {
Gao Xiang7146a4f2018-12-08 00:19:18 +08001142 JQ_BYPASS,
Gao Xiang7146a4f2018-12-08 00:19:18 +08001143 JQ_SUBMIT,
1144 NR_JOBQUEUES,
1145};
1146
1147static void *jobqueueset_init(struct super_block *sb,
Gao Xianga4b1fab2019-10-08 20:56:15 +08001148 struct z_erofs_decompressqueue *q[],
1149 struct z_erofs_decompressqueue *fgq, bool *fg)
Gao Xiang7146a4f2018-12-08 00:19:18 +08001150{
Gao Xiang7146a4f2018-12-08 00:19:18 +08001151 /*
1152 * if managed cache is enabled, bypass jobqueue is needed,
Gao Xiang97e86a82019-07-31 23:57:47 +08001153 * no need to read from device for all pclusters in this queue.
Gao Xiang7146a4f2018-12-08 00:19:18 +08001154 */
Gao Xianga4b1fab2019-10-08 20:56:15 +08001155 q[JQ_BYPASS] = jobqueue_init(sb, fgq + JQ_BYPASS, NULL);
1156 q[JQ_SUBMIT] = jobqueue_init(sb, fgq + JQ_SUBMIT, fg);
Gao Xiang7146a4f2018-12-08 00:19:18 +08001157
Gao Xianga4b1fab2019-10-08 20:56:15 +08001158 return tagptr_cast_ptr(tagptr_fold(tagptr1_t, q[JQ_SUBMIT], *fg));
Gao Xiang7146a4f2018-12-08 00:19:18 +08001159}
1160
Gao Xiang97e86a82019-07-31 23:57:47 +08001161static void move_to_bypass_jobqueue(struct z_erofs_pcluster *pcl,
1162 z_erofs_next_pcluster_t qtail[],
1163 z_erofs_next_pcluster_t owned_head)
Gao Xiang7146a4f2018-12-08 00:19:18 +08001164{
Gao Xiang97e86a82019-07-31 23:57:47 +08001165 z_erofs_next_pcluster_t *const submit_qtail = qtail[JQ_SUBMIT];
1166 z_erofs_next_pcluster_t *const bypass_qtail = qtail[JQ_BYPASS];
Gao Xiang7146a4f2018-12-08 00:19:18 +08001167
Gao Xiang97e86a82019-07-31 23:57:47 +08001168 DBG_BUGON(owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED);
1169 if (owned_head == Z_EROFS_PCLUSTER_TAIL)
1170 owned_head = Z_EROFS_PCLUSTER_TAIL_CLOSED;
Gao Xiang7146a4f2018-12-08 00:19:18 +08001171
Gao Xiang97e86a82019-07-31 23:57:47 +08001172 WRITE_ONCE(pcl->next, Z_EROFS_PCLUSTER_TAIL_CLOSED);
Gao Xiang7146a4f2018-12-08 00:19:18 +08001173
1174 WRITE_ONCE(*submit_qtail, owned_head);
Gao Xiang97e86a82019-07-31 23:57:47 +08001175 WRITE_ONCE(*bypass_qtail, &pcl->next);
Gao Xiang7146a4f2018-12-08 00:19:18 +08001176
Gao Xiang97e86a82019-07-31 23:57:47 +08001177 qtail[JQ_BYPASS] = &pcl->next;
Gao Xiang7146a4f2018-12-08 00:19:18 +08001178}
1179
Gao Xiang1e4a2952020-01-21 14:48:19 +08001180static void z_erofs_submit_queue(struct super_block *sb,
Gao Xiang6ea5aad2020-09-19 15:27:30 +08001181 struct z_erofs_decompress_frontend *f,
Gao Xiang0c638f72019-11-08 11:37:33 +08001182 struct list_head *pagepool,
1183 struct z_erofs_decompressqueue *fgq,
1184 bool *force_fg)
Gao Xiang3883a792018-07-26 20:22:06 +08001185{
Gao Xiangbda17a42019-10-08 20:56:13 +08001186 struct erofs_sb_info *const sbi = EROFS_SB(sb);
Gao Xiang97e86a82019-07-31 23:57:47 +08001187 z_erofs_next_pcluster_t qtail[NR_JOBQUEUES];
Gao Xianga4b1fab2019-10-08 20:56:15 +08001188 struct z_erofs_decompressqueue *q[NR_JOBQUEUES];
Gao Xiang7146a4f2018-12-08 00:19:18 +08001189 void *bi_private;
Gao Xiang6ea5aad2020-09-19 15:27:30 +08001190 z_erofs_next_pcluster_t owned_head = f->clt.owned_head;
Gao Xiang3883a792018-07-26 20:22:06 +08001191 /* since bio will be NULL, no need to initialize last_index */
Kees Cook3f649ab2020-06-03 13:09:38 -07001192 pgoff_t last_index;
Gao Xiang1e4a2952020-01-21 14:48:19 +08001193 unsigned int nr_bios = 0;
1194 struct bio *bio = NULL;
Gao Xiang3883a792018-07-26 20:22:06 +08001195
Gao Xianga4b1fab2019-10-08 20:56:15 +08001196 bi_private = jobqueueset_init(sb, q, fgq, force_fg);
1197 qtail[JQ_BYPASS] = &q[JQ_BYPASS]->head;
1198 qtail[JQ_SUBMIT] = &q[JQ_SUBMIT]->head;
Gao Xiang3883a792018-07-26 20:22:06 +08001199
1200 /* by default, all need io submission */
Gao Xiang7146a4f2018-12-08 00:19:18 +08001201 q[JQ_SUBMIT]->head = owned_head;
Gao Xiang3883a792018-07-26 20:22:06 +08001202
1203 do {
Gao Xiang97e86a82019-07-31 23:57:47 +08001204 struct z_erofs_pcluster *pcl;
Gao Xiang1e4a2952020-01-21 14:48:19 +08001205 pgoff_t cur, end;
1206 unsigned int i = 0;
1207 bool bypass = true;
Gao Xiang3883a792018-07-26 20:22:06 +08001208
1209 /* no possible 'owned_head' equals the following */
Gao Xiang97e86a82019-07-31 23:57:47 +08001210 DBG_BUGON(owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED);
1211 DBG_BUGON(owned_head == Z_EROFS_PCLUSTER_NIL);
Gao Xiang3883a792018-07-26 20:22:06 +08001212
Gao Xiang97e86a82019-07-31 23:57:47 +08001213 pcl = container_of(owned_head, struct z_erofs_pcluster, next);
1214
Gao Xiang1e4a2952020-01-21 14:48:19 +08001215 cur = pcl->obj.index;
1216 end = cur + BIT(pcl->clusterbits);
Gao Xiang3883a792018-07-26 20:22:06 +08001217
1218 /* close the main owned chain at first */
Gao Xiang97e86a82019-07-31 23:57:47 +08001219 owned_head = cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL,
1220 Z_EROFS_PCLUSTER_TAIL_CLOSED);
Gao Xiang3883a792018-07-26 20:22:06 +08001221
Gao Xiang1e4a2952020-01-21 14:48:19 +08001222 do {
1223 struct page *page;
Gao Xiang9248fce2018-12-08 00:19:15 +08001224
Gao Xiang1e4a2952020-01-21 14:48:19 +08001225 page = pickup_page_for_submission(pcl, i++, pagepool,
1226 MNGD_MAPPING(sbi),
1227 GFP_NOFS);
1228 if (!page)
1229 continue;
Gao Xiang3883a792018-07-26 20:22:06 +08001230
Gao Xiang1e4a2952020-01-21 14:48:19 +08001231 if (bio && cur != last_index + 1) {
Gao Xiang3883a792018-07-26 20:22:06 +08001232submit_bio_retry:
Gao Xiang1e4a2952020-01-21 14:48:19 +08001233 submit_bio(bio);
1234 bio = NULL;
1235 }
Gao Xiang3883a792018-07-26 20:22:06 +08001236
Gao Xiang1e4a2952020-01-21 14:48:19 +08001237 if (!bio) {
1238 bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
Gao Xianga5c0b782019-09-04 10:09:02 +08001239
Gao Xiang1e4a2952020-01-21 14:48:19 +08001240 bio->bi_end_io = z_erofs_decompressqueue_endio;
1241 bio_set_dev(bio, sb->s_bdev);
1242 bio->bi_iter.bi_sector = (sector_t)cur <<
1243 LOG_SECTORS_PER_BLOCK;
1244 bio->bi_private = bi_private;
1245 bio->bi_opf = REQ_OP_READ;
Gao Xiang6ea5aad2020-09-19 15:27:30 +08001246 if (f->readahead)
1247 bio->bi_opf |= REQ_RAHEAD;
Gao Xiang1e4a2952020-01-21 14:48:19 +08001248 ++nr_bios;
1249 }
Gao Xiang94e4e152019-09-04 10:09:04 +08001250
Gao Xiang6c3e4852020-09-19 15:27:28 +08001251 if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE)
Gao Xiang1e4a2952020-01-21 14:48:19 +08001252 goto submit_bio_retry;
Gao Xiang3883a792018-07-26 20:22:06 +08001253
Gao Xiang1e4a2952020-01-21 14:48:19 +08001254 last_index = cur;
1255 bypass = false;
1256 } while (++cur < end);
Gao Xiang3883a792018-07-26 20:22:06 +08001257
Gao Xiang1e4a2952020-01-21 14:48:19 +08001258 if (!bypass)
Gao Xiang97e86a82019-07-31 23:57:47 +08001259 qtail[JQ_SUBMIT] = &pcl->next;
Gao Xiang7146a4f2018-12-08 00:19:18 +08001260 else
Gao Xiang97e86a82019-07-31 23:57:47 +08001261 move_to_bypass_jobqueue(pcl, qtail, owned_head);
1262 } while (owned_head != Z_EROFS_PCLUSTER_TAIL);
Gao Xiang3883a792018-07-26 20:22:06 +08001263
Cristian Sicilia42d40b42018-11-12 21:43:57 +01001264 if (bio)
Gao Xiang94e4e152019-09-04 10:09:04 +08001265 submit_bio(bio);
Gao Xiang3883a792018-07-26 20:22:06 +08001266
Gao Xiang587a67b2020-01-21 14:47:47 +08001267 /*
1268 * although background is preferred, no one is pending for submission.
1269 * don't issue workqueue for decompression but drop it directly instead.
1270 */
1271 if (!*force_fg && !nr_bios) {
1272 kvfree(q[JQ_SUBMIT]);
Gao Xiang1e4a2952020-01-21 14:48:19 +08001273 return;
Gao Xiang587a67b2020-01-21 14:47:47 +08001274 }
Gao Xianga4b1fab2019-10-08 20:56:15 +08001275 z_erofs_decompress_kickoff(q[JQ_SUBMIT], *force_fg, nr_bios);
Gao Xiang3883a792018-07-26 20:22:06 +08001276}
1277
Gao Xiang0c638f72019-11-08 11:37:33 +08001278static void z_erofs_runqueue(struct super_block *sb,
Gao Xiang6ea5aad2020-09-19 15:27:30 +08001279 struct z_erofs_decompress_frontend *f,
Gao Xiang0c638f72019-11-08 11:37:33 +08001280 struct list_head *pagepool, bool force_fg)
Gao Xiang3883a792018-07-26 20:22:06 +08001281{
Gao Xianga4b1fab2019-10-08 20:56:15 +08001282 struct z_erofs_decompressqueue io[NR_JOBQUEUES];
Gao Xiang3883a792018-07-26 20:22:06 +08001283
Gao Xiang6ea5aad2020-09-19 15:27:30 +08001284 if (f->clt.owned_head == Z_EROFS_PCLUSTER_TAIL)
Gao Xiang3883a792018-07-26 20:22:06 +08001285 return;
Gao Xiang6ea5aad2020-09-19 15:27:30 +08001286 z_erofs_submit_queue(sb, f, pagepool, io, &force_fg);
Gao Xiang3883a792018-07-26 20:22:06 +08001287
Gao Xiang0c638f72019-11-08 11:37:33 +08001288 /* handle bypass queue (no i/o pclusters) immediately */
1289 z_erofs_decompress_queue(&io[JQ_BYPASS], pagepool);
Gao Xiang4279f3f2019-07-31 23:57:49 +08001290
Gao Xiang3883a792018-07-26 20:22:06 +08001291 if (!force_fg)
1292 return;
1293
1294 /* wait until all bios are completed */
Gao Xianga93f8c32019-10-08 20:56:16 +08001295 io_wait_event(io[JQ_SUBMIT].u.wait,
1296 !atomic_read(&io[JQ_SUBMIT].pending_bios));
Gao Xiang3883a792018-07-26 20:22:06 +08001297
Gao Xiang0c638f72019-11-08 11:37:33 +08001298 /* handle synchronous decompress queue in the caller context */
1299 z_erofs_decompress_queue(&io[JQ_SUBMIT], pagepool);
Gao Xiang3883a792018-07-26 20:22:06 +08001300}
1301
Gao Xiang0c638f72019-11-08 11:37:33 +08001302static int z_erofs_readpage(struct file *file, struct page *page)
Gao Xiang3883a792018-07-26 20:22:06 +08001303{
1304 struct inode *const inode = page->mapping->host;
Gao Xiang97e86a82019-07-31 23:57:47 +08001305 struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
Gao Xiang3883a792018-07-26 20:22:06 +08001306 int err;
1307 LIST_HEAD(pagepool);
1308
Gao Xiangba9ce772018-11-23 01:15:58 +08001309 trace_erofs_readpage(page, false);
1310
Gao Xiangf0c519f2018-11-23 01:21:49 +08001311 f.headoffset = (erofs_off_t)page->index << PAGE_SHIFT;
1312
Gao Xiang1825c8d2020-12-09 20:37:17 +08001313 err = z_erofs_do_read_page(&f, page, &pagepool);
Gao Xiang97e86a82019-07-31 23:57:47 +08001314 (void)z_erofs_collector_end(&f.clt);
Gao Xiang3883a792018-07-26 20:22:06 +08001315
Gao Xiangee451972019-08-19 18:34:21 +08001316 /* if some compressed cluster ready, need submit them anyway */
Gao Xiang6ea5aad2020-09-19 15:27:30 +08001317 z_erofs_runqueue(inode->i_sb, &f, &pagepool, true);
Gao Xiangee451972019-08-19 18:34:21 +08001318
1319 if (err)
Gao Xiang4f761fa2019-09-04 10:09:09 +08001320 erofs_err(inode->i_sb, "failed to read, err [%d]", err);
Gao Xiangee451972019-08-19 18:34:21 +08001321
Chao Yu3b423412019-01-15 09:42:21 +08001322 if (f.map.mpage)
1323 put_page(f.map.mpage);
Gao Xiang3883a792018-07-26 20:22:06 +08001324
1325 /* clean up the remaining free pages */
1326 put_pages_list(&pagepool);
Gao Xiangee451972019-08-19 18:34:21 +08001327 return err;
Gao Xiang3883a792018-07-26 20:22:06 +08001328}
1329
Matthew Wilcox (Oracle)06150902020-06-01 21:47:13 -07001330static void z_erofs_readahead(struct readahead_control *rac)
Gao Xiang3883a792018-07-26 20:22:06 +08001331{
Matthew Wilcox (Oracle)06150902020-06-01 21:47:13 -07001332 struct inode *const inode = rac->mapping->host;
Gao Xiang5fb76bb2018-09-20 00:06:56 +08001333 struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
Gao Xiang3883a792018-07-26 20:22:06 +08001334
Gao Xiangbf9a1232020-09-19 15:27:29 +08001335 unsigned int nr_pages = readahead_count(rac);
1336 bool sync = (nr_pages <= sbi->ctx.max_sync_decompress_pages);
Gao Xiang97e86a82019-07-31 23:57:47 +08001337 struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
Matthew Wilcox (Oracle)06150902020-06-01 21:47:13 -07001338 struct page *page, *head = NULL;
Gao Xiang3883a792018-07-26 20:22:06 +08001339 LIST_HEAD(pagepool);
1340
Gao Xiangbf9a1232020-09-19 15:27:29 +08001341 trace_erofs_readpages(inode, readahead_index(rac), nr_pages, false);
Chen Gong284db122018-09-18 22:27:27 +08001342
Gao Xiang6ea5aad2020-09-19 15:27:30 +08001343 f.readahead = true;
Matthew Wilcox (Oracle)06150902020-06-01 21:47:13 -07001344 f.headoffset = readahead_pos(rac);
Gao Xiangf0c519f2018-11-23 01:21:49 +08001345
Matthew Wilcox (Oracle)06150902020-06-01 21:47:13 -07001346 while ((page = readahead_page(rac))) {
Gao Xiang3883a792018-07-26 20:22:06 +08001347 prefetchw(&page->flags);
Gao Xiang3883a792018-07-26 20:22:06 +08001348
Gao Xiang2d9b5dc2018-11-23 01:21:48 +08001349 /*
1350 * A pure asynchronous readahead is indicated if
1351 * a PG_readahead marked page is hitted at first.
1352 * Let's also do asynchronous decompression for this case.
1353 */
1354 sync &= !(PageReadahead(page) && !head);
1355
Gao Xiang3883a792018-07-26 20:22:06 +08001356 set_page_private(page, (unsigned long)head);
1357 head = page;
1358 }
1359
Cristian Sicilia42d40b42018-11-12 21:43:57 +01001360 while (head) {
Gao Xiang3883a792018-07-26 20:22:06 +08001361 struct page *page = head;
1362 int err;
1363
1364 /* traversal in reverse order */
1365 head = (void *)page_private(page);
1366
Gao Xiang1825c8d2020-12-09 20:37:17 +08001367 err = z_erofs_do_read_page(&f, page, &pagepool);
Gao Xianga5876e22019-09-04 10:08:56 +08001368 if (err)
Gao Xiang4f761fa2019-09-04 10:09:09 +08001369 erofs_err(inode->i_sb,
1370 "readahead error at page %lu @ nid %llu",
1371 page->index, EROFS_I(inode)->nid);
Gao Xiang3883a792018-07-26 20:22:06 +08001372 put_page(page);
1373 }
1374
Gao Xiang97e86a82019-07-31 23:57:47 +08001375 (void)z_erofs_collector_end(&f.clt);
Gao Xiang3883a792018-07-26 20:22:06 +08001376
Gao Xiang6ea5aad2020-09-19 15:27:30 +08001377 z_erofs_runqueue(inode->i_sb, &f, &pagepool, sync);
Gao Xiang3883a792018-07-26 20:22:06 +08001378
Chao Yu3b423412019-01-15 09:42:21 +08001379 if (f.map.mpage)
1380 put_page(f.map.mpage);
Gao Xiang3883a792018-07-26 20:22:06 +08001381
1382 /* clean up the remaining free pages */
1383 put_pages_list(&pagepool);
Gao Xiang3883a792018-07-26 20:22:06 +08001384}
1385
Gao Xiang0c638f72019-11-08 11:37:33 +08001386const struct address_space_operations z_erofs_aops = {
1387 .readpage = z_erofs_readpage,
Matthew Wilcox (Oracle)06150902020-06-01 21:47:13 -07001388 .readahead = z_erofs_readahead,
Gao Xiang3883a792018-07-26 20:22:06 +08001389};
Gao Xiang02827e12018-07-26 20:21:58 +08001390