blob: cfb0d11f893b79fdf9bb0bb927d59e1716d39eef [file] [log] [blame]
Gao Xiang29b24f62019-07-31 23:57:31 +08001// SPDX-License-Identifier: GPL-2.0-only
Gao Xiang02827e12018-07-26 20:21:58 +08002/*
Gao Xiang02827e12018-07-26 20:21:58 +08003 * Copyright (C) 2018 HUAWEI, Inc.
Alexander A. Klimov592e7cd2020-07-13 15:09:44 +02004 * https://www.huawei.com/
Gao Xiang02827e12018-07-26 20:21:58 +08005 * Created by Gao Xiang <gaoxiang25@huawei.com>
Gao Xiang02827e12018-07-26 20:21:58 +08006 */
Gao Xiang57b78c92019-07-31 23:57:32 +08007#include "zdata.h"
Gao Xiang27481232019-06-24 15:22:54 +08008#include "compress.h"
Gao Xiang3883a792018-07-26 20:22:06 +08009#include <linux/prefetch.h>
10
Chen Gong284db122018-09-18 22:27:27 +080011#include <trace/events/erofs.h>
12
Gao Xiang672e5472018-12-08 00:19:14 +080013/*
14 * a compressed_pages[] placeholder in order to avoid
15 * being filled with file pages for in-place decompression.
16 */
17#define PAGE_UNALLOCATED ((void *)0x5F0E4B1D)
18
Gao Xiang97e86a82019-07-31 23:57:47 +080019/* how to allocate cached pages for a pcluster */
Gao Xiang92e6efd2018-12-08 00:19:16 +080020enum z_erofs_cache_alloctype {
21 DONTALLOC, /* don't allocate any cached pages */
22 DELAYEDALLOC, /* delayed allocation (at the time of submitting io) */
23};
24
25/*
26 * tagged pointer with 1-bit tag for all compressed pages
27 * tag 0 - the page is just found with an extra page reference
28 */
29typedef tagptr1_t compressed_page_t;
30
31#define tag_compressed_page_justfound(page) \
32 tagptr_fold(compressed_page_t, page, 1)
33
Gao Xiang3883a792018-07-26 20:22:06 +080034static struct workqueue_struct *z_erofs_workqueue __read_mostly;
Gao Xiang97e86a82019-07-31 23:57:47 +080035static struct kmem_cache *pcluster_cachep __read_mostly;
Gao Xiang3883a792018-07-26 20:22:06 +080036
37void z_erofs_exit_zip_subsystem(void)
38{
Gao Xiang3883a792018-07-26 20:22:06 +080039 destroy_workqueue(z_erofs_workqueue);
Gao Xiang97e86a82019-07-31 23:57:47 +080040 kmem_cache_destroy(pcluster_cachep);
Gao Xiang3883a792018-07-26 20:22:06 +080041}
42
Gao Xiang99634bf2019-09-04 10:09:05 +080043static inline int z_erofs_init_workqueue(void)
Gao Xiang3883a792018-07-26 20:22:06 +080044{
Thomas Weißschuh7dd68b12018-09-10 21:41:14 +020045 const unsigned int onlinecpus = num_possible_cpus();
Gao Xiang3883a792018-07-26 20:22:06 +080046
47 /*
Gao Xiang97e86a82019-07-31 23:57:47 +080048 * no need to spawn too many threads, limiting threads could minimum
49 * scheduling overhead, perhaps per-CPU threads should be better?
Gao Xiang3883a792018-07-26 20:22:06 +080050 */
Gao Xiang0e62ea32020-07-31 10:40:49 +080051 z_erofs_workqueue = alloc_workqueue("erofs_unzipd",
52 WQ_UNBOUND | WQ_HIGHPRI,
Gao Xiang97e86a82019-07-31 23:57:47 +080053 onlinecpus + onlinecpus / 4);
Cristian Sicilia42d40b42018-11-12 21:43:57 +010054 return z_erofs_workqueue ? 0 : -ENOMEM;
Gao Xiang3883a792018-07-26 20:22:06 +080055}
56
Gao Xiang99634bf2019-09-04 10:09:05 +080057static void z_erofs_pcluster_init_once(void *ptr)
Gao Xiang48d4bf32018-11-23 01:21:46 +080058{
Gao Xiang97e86a82019-07-31 23:57:47 +080059 struct z_erofs_pcluster *pcl = ptr;
60 struct z_erofs_collection *cl = z_erofs_primarycollection(pcl);
Gao Xiang48d4bf32018-11-23 01:21:46 +080061 unsigned int i;
62
Gao Xiang97e86a82019-07-31 23:57:47 +080063 mutex_init(&cl->lock);
64 cl->nr_pages = 0;
65 cl->vcnt = 0;
Gao Xiang48d4bf32018-11-23 01:21:46 +080066 for (i = 0; i < Z_EROFS_CLUSTER_MAX_PAGES; ++i)
Gao Xiang97e86a82019-07-31 23:57:47 +080067 pcl->compressed_pages[i] = NULL;
Gao Xiang48d4bf32018-11-23 01:21:46 +080068}
69
Gao Xiang0a0b7e62018-10-09 21:43:53 +080070int __init z_erofs_init_zip_subsystem(void)
Gao Xiang3883a792018-07-26 20:22:06 +080071{
Gao Xiang97e86a82019-07-31 23:57:47 +080072 pcluster_cachep = kmem_cache_create("erofs_compress",
73 Z_EROFS_WORKGROUP_SIZE, 0,
Gao Xiang99634bf2019-09-04 10:09:05 +080074 SLAB_RECLAIM_ACCOUNT,
75 z_erofs_pcluster_init_once);
Gao Xiang97e86a82019-07-31 23:57:47 +080076 if (pcluster_cachep) {
Gao Xiang99634bf2019-09-04 10:09:05 +080077 if (!z_erofs_init_workqueue())
Gao Xiang3883a792018-07-26 20:22:06 +080078 return 0;
79
Gao Xiang97e86a82019-07-31 23:57:47 +080080 kmem_cache_destroy(pcluster_cachep);
Gao Xiang3883a792018-07-26 20:22:06 +080081 }
82 return -ENOMEM;
83}
84
Gao Xiang97e86a82019-07-31 23:57:47 +080085enum z_erofs_collectmode {
86 COLLECT_SECONDARY,
87 COLLECT_PRIMARY,
Gao Xiang3883a792018-07-26 20:22:06 +080088 /*
Gao Xiang97e86a82019-07-31 23:57:47 +080089 * The current collection was the tail of an exist chain, in addition
90 * that the previous processed chained collections are all decided to
91 * be hooked up to it.
92 * A new chain will be created for the remaining collections which are
93 * not processed yet, therefore different from COLLECT_PRIMARY_FOLLOWED,
94 * the next collection cannot reuse the whole page safely in
95 * the following scenario:
Gao Xianga1121522019-02-27 13:33:32 +080096 * ________________________________________________________________
97 * | tail (partial) page | head (partial) page |
Gao Xiang97e86a82019-07-31 23:57:47 +080098 * | (belongs to the next cl) | (belongs to the current cl) |
Gao Xianga1121522019-02-27 13:33:32 +080099 * |_______PRIMARY_FOLLOWED_______|________PRIMARY_HOOKED___________|
100 */
Gao Xiang97e86a82019-07-31 23:57:47 +0800101 COLLECT_PRIMARY_HOOKED,
102 COLLECT_PRIMARY_FOLLOWED_NOINPLACE,
Gao Xianga1121522019-02-27 13:33:32 +0800103 /*
Gao Xiang97e86a82019-07-31 23:57:47 +0800104 * The current collection has been linked with the owned chain, and
105 * could also be linked with the remaining collections, which means
106 * if the processing page is the tail page of the collection, thus
107 * the current collection can safely use the whole page (since
108 * the previous collection is under control) for in-place I/O, as
109 * illustrated below:
Gao Xianga1121522019-02-27 13:33:32 +0800110 * ________________________________________________________________
Gao Xiang97e86a82019-07-31 23:57:47 +0800111 * | tail (partial) page | head (partial) page |
112 * | (of the current cl) | (of the previous collection) |
113 * | PRIMARY_FOLLOWED or | |
114 * |_____PRIMARY_HOOKED___|____________PRIMARY_FOLLOWED____________|
Gao Xianga1121522019-02-27 13:33:32 +0800115 *
Gao Xiang97e86a82019-07-31 23:57:47 +0800116 * [ (*) the above page can be used as inplace I/O. ]
Gao Xiang3883a792018-07-26 20:22:06 +0800117 */
Gao Xiang97e86a82019-07-31 23:57:47 +0800118 COLLECT_PRIMARY_FOLLOWED,
Gao Xiang3883a792018-07-26 20:22:06 +0800119};
120
Gao Xiang97e86a82019-07-31 23:57:47 +0800121struct z_erofs_collector {
Gao Xiang3883a792018-07-26 20:22:06 +0800122 struct z_erofs_pagevec_ctor vector;
123
Gao Xiangbfc4ccb2019-08-21 11:09:08 +0800124 struct z_erofs_pcluster *pcl, *tailpcl;
Gao Xiang97e86a82019-07-31 23:57:47 +0800125 struct z_erofs_collection *cl;
126 struct page **compressedpages;
127 z_erofs_next_pcluster_t owned_head;
128
129 enum z_erofs_collectmode mode;
Gao Xiang3883a792018-07-26 20:22:06 +0800130};
131
Gao Xiang97e86a82019-07-31 23:57:47 +0800132struct z_erofs_decompress_frontend {
133 struct inode *const inode;
134
135 struct z_erofs_collector clt;
136 struct erofs_map_blocks map;
137
Gao Xiang6ea5aad2020-09-19 15:27:30 +0800138 bool readahead;
Gao Xiang97e86a82019-07-31 23:57:47 +0800139 /* used for applying cache strategy on the fly */
140 bool backmost;
141 erofs_off_t headoffset;
142};
143
144#define COLLECTOR_INIT() { \
145 .owned_head = Z_EROFS_PCLUSTER_TAIL, \
146 .mode = COLLECT_PRIMARY_FOLLOWED }
147
148#define DECOMPRESS_FRONTEND_INIT(__i) { \
149 .inode = __i, .clt = COLLECTOR_INIT(), \
150 .backmost = true, }
151
152static struct page *z_pagemap_global[Z_EROFS_VMAP_GLOBAL_PAGES];
153static DEFINE_MUTEX(z_pagemap_global_lock);
Gao Xiang3883a792018-07-26 20:22:06 +0800154
Gao Xiang97e86a82019-07-31 23:57:47 +0800155static void preload_compressed_pages(struct z_erofs_collector *clt,
Gao Xiang92e6efd2018-12-08 00:19:16 +0800156 struct address_space *mc,
Chao Yue3f78d52020-09-17 09:18:21 +0800157 enum z_erofs_cache_alloctype type)
Gao Xiang105d4ad2018-07-26 20:22:07 +0800158{
Gao Xiang97e86a82019-07-31 23:57:47 +0800159 const struct z_erofs_pcluster *pcl = clt->pcl;
160 const unsigned int clusterpages = BIT(pcl->clusterbits);
161 struct page **pages = clt->compressedpages;
162 pgoff_t index = pcl->obj.index + (pages - pcl->compressed_pages);
Gao Xiang92e6efd2018-12-08 00:19:16 +0800163 bool standalone = true;
Gao Xiang105d4ad2018-07-26 20:22:07 +0800164
Gao Xiang97e86a82019-07-31 23:57:47 +0800165 if (clt->mode < COLLECT_PRIMARY_FOLLOWED)
Gao Xiang92e6efd2018-12-08 00:19:16 +0800166 return;
Gao Xiang105d4ad2018-07-26 20:22:07 +0800167
Gao Xiang97e86a82019-07-31 23:57:47 +0800168 for (; pages < pcl->compressed_pages + clusterpages; ++pages) {
Gao Xiang92e6efd2018-12-08 00:19:16 +0800169 struct page *page;
170 compressed_page_t t;
171
172 /* the compressed page was loaded before */
Gao Xiang97e86a82019-07-31 23:57:47 +0800173 if (READ_ONCE(*pages))
Gao Xiang105d4ad2018-07-26 20:22:07 +0800174 continue;
175
Gao Xiang97e86a82019-07-31 23:57:47 +0800176 page = find_get_page(mc, index);
Gao Xiang92e6efd2018-12-08 00:19:16 +0800177
178 if (page) {
179 t = tag_compressed_page_justfound(page);
180 } else if (type == DELAYEDALLOC) {
181 t = tagptr_init(compressed_page_t, PAGE_UNALLOCATED);
182 } else { /* DONTALLOC */
183 if (standalone)
Gao Xiang97e86a82019-07-31 23:57:47 +0800184 clt->compressedpages = pages;
Gao Xiang92e6efd2018-12-08 00:19:16 +0800185 standalone = false;
186 continue;
Gao Xiang105d4ad2018-07-26 20:22:07 +0800187 }
188
Gao Xiang97e86a82019-07-31 23:57:47 +0800189 if (!cmpxchg_relaxed(pages, NULL, tagptr_cast_ptr(t)))
Gao Xiang105d4ad2018-07-26 20:22:07 +0800190 continue;
191
Gao Xiang92e6efd2018-12-08 00:19:16 +0800192 if (page)
193 put_page(page);
Gao Xiang105d4ad2018-07-26 20:22:07 +0800194 }
Gao Xiang92e6efd2018-12-08 00:19:16 +0800195
Gao Xiang97e86a82019-07-31 23:57:47 +0800196 if (standalone) /* downgrade to PRIMARY_FOLLOWED_NOINPLACE */
197 clt->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE;
Gao Xiang105d4ad2018-07-26 20:22:07 +0800198}
199
200/* called by erofs_shrinker to get rid of all compressed_pages */
Gao Xiang47e541a2018-07-29 13:34:58 +0800201int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
Gao Xiang97e86a82019-07-31 23:57:47 +0800202 struct erofs_workgroup *grp)
Gao Xiang105d4ad2018-07-26 20:22:07 +0800203{
Gao Xiang97e86a82019-07-31 23:57:47 +0800204 struct z_erofs_pcluster *const pcl =
205 container_of(grp, struct z_erofs_pcluster, obj);
Gao Xiangc1448fa2018-12-08 00:19:13 +0800206 struct address_space *const mapping = MNGD_MAPPING(sbi);
Gao Xiang97e86a82019-07-31 23:57:47 +0800207 const unsigned int clusterpages = BIT(pcl->clusterbits);
Gao Xiang105d4ad2018-07-26 20:22:07 +0800208 int i;
209
210 /*
211 * refcount of workgroup is now freezed as 1,
212 * therefore no need to worry about available decompression users.
213 */
214 for (i = 0; i < clusterpages; ++i) {
Gao Xiang97e86a82019-07-31 23:57:47 +0800215 struct page *page = pcl->compressed_pages[i];
Gao Xiang105d4ad2018-07-26 20:22:07 +0800216
Gao Xiang97e86a82019-07-31 23:57:47 +0800217 if (!page)
Gao Xiang105d4ad2018-07-26 20:22:07 +0800218 continue;
219
220 /* block other users from reclaiming or migrating the page */
221 if (!trylock_page(page))
222 return -EBUSY;
223
Gao Xiang8d8a09b2019-08-30 00:38:27 +0800224 if (page->mapping != mapping)
Gao Xiang97e86a82019-07-31 23:57:47 +0800225 continue;
Gao Xiang105d4ad2018-07-26 20:22:07 +0800226
Gao Xiang97e86a82019-07-31 23:57:47 +0800227 /* barrier is implied in the following 'unlock_page' */
228 WRITE_ONCE(pcl->compressed_pages[i], NULL);
Gao Xiang6aaa7b02020-12-08 17:58:32 +0800229 detach_page_private(page);
Gao Xiang105d4ad2018-07-26 20:22:07 +0800230 unlock_page(page);
Gao Xiang105d4ad2018-07-26 20:22:07 +0800231 }
232 return 0;
233}
234
Gao Xiang47e541a2018-07-29 13:34:58 +0800235int erofs_try_to_free_cached_page(struct address_space *mapping,
236 struct page *page)
Gao Xiang105d4ad2018-07-26 20:22:07 +0800237{
Gao Xiang97e86a82019-07-31 23:57:47 +0800238 struct z_erofs_pcluster *const pcl = (void *)page_private(page);
239 const unsigned int clusterpages = BIT(pcl->clusterbits);
Gao Xiang105d4ad2018-07-26 20:22:07 +0800240 int ret = 0; /* 0 - busy */
241
Gao Xiang97e86a82019-07-31 23:57:47 +0800242 if (erofs_workgroup_try_to_freeze(&pcl->obj, 1)) {
Gao Xiang105d4ad2018-07-26 20:22:07 +0800243 unsigned int i;
244
245 for (i = 0; i < clusterpages; ++i) {
Gao Xiang97e86a82019-07-31 23:57:47 +0800246 if (pcl->compressed_pages[i] == page) {
247 WRITE_ONCE(pcl->compressed_pages[i], NULL);
Gao Xiang105d4ad2018-07-26 20:22:07 +0800248 ret = 1;
249 break;
250 }
251 }
Gao Xiang97e86a82019-07-31 23:57:47 +0800252 erofs_workgroup_unfreeze(&pcl->obj, 1);
Gao Xiang105d4ad2018-07-26 20:22:07 +0800253
Gao Xiang6aaa7b02020-12-08 17:58:32 +0800254 if (ret)
255 detach_page_private(page);
Gao Xiang105d4ad2018-07-26 20:22:07 +0800256 }
257 return ret;
258}
Gao Xiang105d4ad2018-07-26 20:22:07 +0800259
Gao Xiang3883a792018-07-26 20:22:06 +0800260/* page_type must be Z_EROFS_PAGE_TYPE_EXCLUSIVE */
Gao Xiang99634bf2019-09-04 10:09:05 +0800261static inline bool z_erofs_try_inplace_io(struct z_erofs_collector *clt,
262 struct page *page)
Gao Xiang3883a792018-07-26 20:22:06 +0800263{
Gao Xiang97e86a82019-07-31 23:57:47 +0800264 struct z_erofs_pcluster *const pcl = clt->pcl;
265 const unsigned int clusterpages = BIT(pcl->clusterbits);
266
267 while (clt->compressedpages < pcl->compressed_pages + clusterpages) {
268 if (!cmpxchg(clt->compressedpages++, NULL, page))
Gao Xiang3883a792018-07-26 20:22:06 +0800269 return true;
270 }
Gao Xiang3883a792018-07-26 20:22:06 +0800271 return false;
272}
273
Gao Xiang97e86a82019-07-31 23:57:47 +0800274/* callers must be with collection lock held */
275static int z_erofs_attach_page(struct z_erofs_collector *clt,
276 struct page *page,
277 enum z_erofs_page_type type)
Gao Xiang3883a792018-07-26 20:22:06 +0800278{
279 int ret;
280 bool occupied;
281
Gao Xiang97e86a82019-07-31 23:57:47 +0800282 /* give priority for inplaceio */
283 if (clt->mode >= COLLECT_PRIMARY &&
Julian Merida447a3622019-03-18 20:58:41 -0300284 type == Z_EROFS_PAGE_TYPE_EXCLUSIVE &&
Gao Xiang99634bf2019-09-04 10:09:05 +0800285 z_erofs_try_inplace_io(clt, page))
Gao Xiang3883a792018-07-26 20:22:06 +0800286 return 0;
287
Gao Xiang97e86a82019-07-31 23:57:47 +0800288 ret = z_erofs_pagevec_enqueue(&clt->vector,
Gao Xiang046d64e2019-07-31 23:57:45 +0800289 page, type, &occupied);
Gao Xiang97e86a82019-07-31 23:57:47 +0800290 clt->cl->vcnt += (unsigned int)ret;
Gao Xiang3883a792018-07-26 20:22:06 +0800291
292 return ret ? 0 : -EAGAIN;
293}
294
Gao Xiang97e86a82019-07-31 23:57:47 +0800295static enum z_erofs_collectmode
296try_to_claim_pcluster(struct z_erofs_pcluster *pcl,
297 z_erofs_next_pcluster_t *owned_head)
Gao Xiang3883a792018-07-26 20:22:06 +0800298{
Gao Xiang97e86a82019-07-31 23:57:47 +0800299 /* let's claim these following types of pclusters */
Gao Xiang3883a792018-07-26 20:22:06 +0800300retry:
Gao Xiang97e86a82019-07-31 23:57:47 +0800301 if (pcl->next == Z_EROFS_PCLUSTER_NIL) {
302 /* type 1, nil pcluster */
303 if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_NIL,
304 *owned_head) != Z_EROFS_PCLUSTER_NIL)
Gao Xiang3883a792018-07-26 20:22:06 +0800305 goto retry;
306
Gao Xiang97e86a82019-07-31 23:57:47 +0800307 *owned_head = &pcl->next;
Gao Xianga1121522019-02-27 13:33:32 +0800308 /* lucky, I am the followee :) */
Gao Xiang97e86a82019-07-31 23:57:47 +0800309 return COLLECT_PRIMARY_FOLLOWED;
310 } else if (pcl->next == Z_EROFS_PCLUSTER_TAIL) {
Gao Xiang3883a792018-07-26 20:22:06 +0800311 /*
312 * type 2, link to the end of a existing open chain,
313 * be careful that its submission itself is governed
314 * by the original owned chain.
315 */
Gao Xiang97e86a82019-07-31 23:57:47 +0800316 if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL,
317 *owned_head) != Z_EROFS_PCLUSTER_TAIL)
Gao Xiang3883a792018-07-26 20:22:06 +0800318 goto retry;
Gao Xiang97e86a82019-07-31 23:57:47 +0800319 *owned_head = Z_EROFS_PCLUSTER_TAIL;
320 return COLLECT_PRIMARY_HOOKED;
Gao Xianga1121522019-02-27 13:33:32 +0800321 }
Gao Xiang97e86a82019-07-31 23:57:47 +0800322 return COLLECT_PRIMARY; /* :( better luck next time */
Gao Xiang3883a792018-07-26 20:22:06 +0800323}
324
Gao Xiang9e579fc2019-10-08 20:56:12 +0800325static int z_erofs_lookup_collection(struct z_erofs_collector *clt,
326 struct inode *inode,
327 struct erofs_map_blocks *map)
Gao Xiang3883a792018-07-26 20:22:06 +0800328{
Gao Xiang64094a02020-02-20 10:46:42 +0800329 struct z_erofs_pcluster *pcl = clt->pcl;
Gao Xiang97e86a82019-07-31 23:57:47 +0800330 struct z_erofs_collection *cl;
331 unsigned int length;
Gao Xiang3883a792018-07-26 20:22:06 +0800332
Gao Xiang64094a02020-02-20 10:46:42 +0800333 /* to avoid unexpected loop formed by corrupted images */
Gao Xiangbfc4ccb2019-08-21 11:09:08 +0800334 if (clt->owned_head == &pcl->next || pcl == clt->tailpcl) {
335 DBG_BUGON(1);
Gao Xiang9e579fc2019-10-08 20:56:12 +0800336 return -EFSCORRUPTED;
Gao Xiangbfc4ccb2019-08-21 11:09:08 +0800337 }
Gao Xiang97e86a82019-07-31 23:57:47 +0800338
339 cl = z_erofs_primarycollection(pcl);
Gao Xiang8d8a09b2019-08-30 00:38:27 +0800340 if (cl->pageofs != (map->m_la & ~PAGE_MASK)) {
Gao Xiang97e86a82019-07-31 23:57:47 +0800341 DBG_BUGON(1);
Gao Xiang9e579fc2019-10-08 20:56:12 +0800342 return -EFSCORRUPTED;
Gao Xiang3883a792018-07-26 20:22:06 +0800343 }
344
Gao Xiang97e86a82019-07-31 23:57:47 +0800345 length = READ_ONCE(pcl->length);
346 if (length & Z_EROFS_PCLUSTER_FULL_LENGTH) {
347 if ((map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) > length) {
348 DBG_BUGON(1);
Gao Xiang9e579fc2019-10-08 20:56:12 +0800349 return -EFSCORRUPTED;
Gao Xiang97e86a82019-07-31 23:57:47 +0800350 }
351 } else {
352 unsigned int llen = map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT;
Gao Xiang3883a792018-07-26 20:22:06 +0800353
Gao Xiang97e86a82019-07-31 23:57:47 +0800354 if (map->m_flags & EROFS_MAP_FULL_MAPPED)
355 llen |= Z_EROFS_PCLUSTER_FULL_LENGTH;
Gao Xiang3883a792018-07-26 20:22:06 +0800356
Gao Xiang97e86a82019-07-31 23:57:47 +0800357 while (llen > length &&
358 length != cmpxchg_relaxed(&pcl->length, length, llen)) {
359 cpu_relax();
360 length = READ_ONCE(pcl->length);
361 }
362 }
363 mutex_lock(&cl->lock);
Gao Xiangbfc4ccb2019-08-21 11:09:08 +0800364 /* used to check tail merging loop due to corrupted images */
365 if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL)
366 clt->tailpcl = pcl;
Gao Xiang97e86a82019-07-31 23:57:47 +0800367 clt->mode = try_to_claim_pcluster(pcl, &clt->owned_head);
Gao Xiangbfc4ccb2019-08-21 11:09:08 +0800368 /* clean tailpcl if the current owned_head is Z_EROFS_PCLUSTER_TAIL */
369 if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL)
370 clt->tailpcl = NULL;
Gao Xiang97e86a82019-07-31 23:57:47 +0800371 clt->cl = cl;
Gao Xiang9e579fc2019-10-08 20:56:12 +0800372 return 0;
Gao Xiang3883a792018-07-26 20:22:06 +0800373}
374
Gao Xiang9e579fc2019-10-08 20:56:12 +0800375static int z_erofs_register_collection(struct z_erofs_collector *clt,
376 struct inode *inode,
377 struct erofs_map_blocks *map)
Gao Xiang3883a792018-07-26 20:22:06 +0800378{
Gao Xiang97e86a82019-07-31 23:57:47 +0800379 struct z_erofs_pcluster *pcl;
380 struct z_erofs_collection *cl;
Gao Xiang64094a02020-02-20 10:46:42 +0800381 struct erofs_workgroup *grp;
Gao Xiang97e86a82019-07-31 23:57:47 +0800382 int err;
Gao Xiange5e3abb2018-09-19 13:49:07 +0800383
Gao Xiang3883a792018-07-26 20:22:06 +0800384 /* no available workgroup, let's allocate one */
Gao Xiang97e86a82019-07-31 23:57:47 +0800385 pcl = kmem_cache_alloc(pcluster_cachep, GFP_NOFS);
Gao Xiang8d8a09b2019-08-30 00:38:27 +0800386 if (!pcl)
Gao Xiang9e579fc2019-10-08 20:56:12 +0800387 return -ENOMEM;
Gao Xiang3883a792018-07-26 20:22:06 +0800388
Gao Xiang64094a02020-02-20 10:46:42 +0800389 atomic_set(&pcl->obj.refcount, 1);
Gao Xiang97e86a82019-07-31 23:57:47 +0800390 pcl->obj.index = map->m_pa >> PAGE_SHIFT;
Gao Xiang3883a792018-07-26 20:22:06 +0800391
Gao Xiang97e86a82019-07-31 23:57:47 +0800392 pcl->length = (map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) |
393 (map->m_flags & EROFS_MAP_FULL_MAPPED ?
394 Z_EROFS_PCLUSTER_FULL_LENGTH : 0);
Gao Xiang3883a792018-07-26 20:22:06 +0800395
Gao Xiang97e86a82019-07-31 23:57:47 +0800396 if (map->m_flags & EROFS_MAP_ZIPPED)
397 pcl->algorithmformat = Z_EROFS_COMPRESSION_LZ4;
398 else
399 pcl->algorithmformat = Z_EROFS_COMPRESSION_SHIFTED;
Gao Xiangb6a76182019-06-24 15:22:58 +0800400
Gao Xianga5876e22019-09-04 10:08:56 +0800401 pcl->clusterbits = EROFS_I(inode)->z_physical_clusterbits[0];
Gao Xiang97e86a82019-07-31 23:57:47 +0800402 pcl->clusterbits -= PAGE_SHIFT;
Gao Xiang3883a792018-07-26 20:22:06 +0800403
Gao Xiang97e86a82019-07-31 23:57:47 +0800404 /* new pclusters should be claimed as type 1, primary and followed */
405 pcl->next = clt->owned_head;
406 clt->mode = COLLECT_PRIMARY_FOLLOWED;
407
408 cl = z_erofs_primarycollection(pcl);
Gao Xiang64094a02020-02-20 10:46:42 +0800409
410 /* must be cleaned before freeing to slab */
411 DBG_BUGON(cl->nr_pages);
412 DBG_BUGON(cl->vcnt);
413
Gao Xiang97e86a82019-07-31 23:57:47 +0800414 cl->pageofs = map->m_la & ~PAGE_MASK;
Gao Xiang3883a792018-07-26 20:22:06 +0800415
Gao Xiang23edf3a2018-11-23 01:21:47 +0800416 /*
417 * lock all primary followed works before visible to others
Gao Xiang97e86a82019-07-31 23:57:47 +0800418 * and mutex_trylock *never* fails for a new pcluster.
Gao Xiang23edf3a2018-11-23 01:21:47 +0800419 */
Gao Xiang64094a02020-02-20 10:46:42 +0800420 DBG_BUGON(!mutex_trylock(&cl->lock));
Gao Xiang23edf3a2018-11-23 01:21:47 +0800421
Gao Xiang64094a02020-02-20 10:46:42 +0800422 grp = erofs_insert_workgroup(inode->i_sb, &pcl->obj);
423 if (IS_ERR(grp)) {
424 err = PTR_ERR(grp);
425 goto err_out;
426 }
427
428 if (grp != &pcl->obj) {
429 clt->pcl = container_of(grp, struct z_erofs_pcluster, obj);
430 err = -EEXIST;
431 goto err_out;
Gao Xiang3883a792018-07-26 20:22:06 +0800432 }
Gao Xiangbfc4ccb2019-08-21 11:09:08 +0800433 /* used to check tail merging loop due to corrupted images */
434 if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL)
435 clt->tailpcl = pcl;
Gao Xiang97e86a82019-07-31 23:57:47 +0800436 clt->owned_head = &pcl->next;
437 clt->pcl = pcl;
438 clt->cl = cl;
Gao Xiang9e579fc2019-10-08 20:56:12 +0800439 return 0;
Gao Xiang64094a02020-02-20 10:46:42 +0800440
441err_out:
442 mutex_unlock(&cl->lock);
443 kmem_cache_free(pcluster_cachep, pcl);
444 return err;
Gao Xiang3883a792018-07-26 20:22:06 +0800445}
446
Gao Xiang97e86a82019-07-31 23:57:47 +0800447static int z_erofs_collector_begin(struct z_erofs_collector *clt,
448 struct inode *inode,
449 struct erofs_map_blocks *map)
Gao Xiang3883a792018-07-26 20:22:06 +0800450{
Gao Xiang64094a02020-02-20 10:46:42 +0800451 struct erofs_workgroup *grp;
Gao Xiang9e579fc2019-10-08 20:56:12 +0800452 int ret;
Gao Xiang3883a792018-07-26 20:22:06 +0800453
Gao Xiang97e86a82019-07-31 23:57:47 +0800454 DBG_BUGON(clt->cl);
Gao Xiang3883a792018-07-26 20:22:06 +0800455
Gao Xiang97e86a82019-07-31 23:57:47 +0800456 /* must be Z_EROFS_PCLUSTER_TAIL or pointed to previous collection */
457 DBG_BUGON(clt->owned_head == Z_EROFS_PCLUSTER_NIL);
458 DBG_BUGON(clt->owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED);
Gao Xiang3883a792018-07-26 20:22:06 +0800459
Gao Xiang97e86a82019-07-31 23:57:47 +0800460 if (!PAGE_ALIGNED(map->m_pa)) {
461 DBG_BUGON(1);
462 return -EINVAL;
463 }
Gao Xiang3883a792018-07-26 20:22:06 +0800464
Gao Xiang64094a02020-02-20 10:46:42 +0800465 grp = erofs_find_workgroup(inode->i_sb, map->m_pa >> PAGE_SHIFT);
466 if (grp) {
467 clt->pcl = container_of(grp, struct z_erofs_pcluster, obj);
468 } else {
Gao Xiang9e579fc2019-10-08 20:56:12 +0800469 ret = z_erofs_register_collection(clt, inode, map);
Gao Xiangb27661c2018-09-19 13:49:06 +0800470
Gao Xiang64094a02020-02-20 10:46:42 +0800471 if (!ret)
472 goto out;
473 if (ret != -EEXIST)
474 return ret;
Gao Xiang3883a792018-07-26 20:22:06 +0800475 }
476
Gao Xiang64094a02020-02-20 10:46:42 +0800477 ret = z_erofs_lookup_collection(clt, inode, map);
478 if (ret) {
479 erofs_workgroup_put(&clt->pcl->obj);
Gao Xiang9e579fc2019-10-08 20:56:12 +0800480 return ret;
Gao Xiang64094a02020-02-20 10:46:42 +0800481 }
Gao Xiang3883a792018-07-26 20:22:06 +0800482
Gao Xiang64094a02020-02-20 10:46:42 +0800483out:
Gao Xiang97e86a82019-07-31 23:57:47 +0800484 z_erofs_pagevec_ctor_init(&clt->vector, Z_EROFS_NR_INLINE_PAGEVECS,
Gao Xiang9e579fc2019-10-08 20:56:12 +0800485 clt->cl->pagevec, clt->cl->vcnt);
Gao Xiang3883a792018-07-26 20:22:06 +0800486
Gao Xiang97e86a82019-07-31 23:57:47 +0800487 clt->compressedpages = clt->pcl->compressed_pages;
488 if (clt->mode <= COLLECT_PRIMARY) /* cannot do in-place I/O */
489 clt->compressedpages += Z_EROFS_CLUSTER_MAX_PAGES;
Gao Xiang3883a792018-07-26 20:22:06 +0800490 return 0;
491}
492
493/*
Gao Xiang97e86a82019-07-31 23:57:47 +0800494 * keep in mind that no referenced pclusters will be freed
495 * only after a RCU grace period.
Gao Xiang3883a792018-07-26 20:22:06 +0800496 */
497static void z_erofs_rcu_callback(struct rcu_head *head)
498{
Gao Xiang97e86a82019-07-31 23:57:47 +0800499 struct z_erofs_collection *const cl =
500 container_of(head, struct z_erofs_collection, rcu);
Gao Xiang3883a792018-07-26 20:22:06 +0800501
Gao Xiang97e86a82019-07-31 23:57:47 +0800502 kmem_cache_free(pcluster_cachep,
503 container_of(cl, struct z_erofs_pcluster,
504 primary_collection));
Gao Xiang3883a792018-07-26 20:22:06 +0800505}
506
507void erofs_workgroup_free_rcu(struct erofs_workgroup *grp)
508{
Gao Xiang97e86a82019-07-31 23:57:47 +0800509 struct z_erofs_pcluster *const pcl =
510 container_of(grp, struct z_erofs_pcluster, obj);
511 struct z_erofs_collection *const cl = z_erofs_primarycollection(pcl);
Gao Xiang3883a792018-07-26 20:22:06 +0800512
Gao Xiang97e86a82019-07-31 23:57:47 +0800513 call_rcu(&cl->rcu, z_erofs_rcu_callback);
Gao Xiang3883a792018-07-26 20:22:06 +0800514}
515
Gao Xiang97e86a82019-07-31 23:57:47 +0800516static void z_erofs_collection_put(struct z_erofs_collection *cl)
Gao Xiang3883a792018-07-26 20:22:06 +0800517{
Gao Xiang97e86a82019-07-31 23:57:47 +0800518 struct z_erofs_pcluster *const pcl =
519 container_of(cl, struct z_erofs_pcluster, primary_collection);
520
521 erofs_workgroup_put(&pcl->obj);
Gao Xiang3883a792018-07-26 20:22:06 +0800522}
523
Gao Xiang97e86a82019-07-31 23:57:47 +0800524static bool z_erofs_collector_end(struct z_erofs_collector *clt)
Gao Xiang3883a792018-07-26 20:22:06 +0800525{
Gao Xiang97e86a82019-07-31 23:57:47 +0800526 struct z_erofs_collection *cl = clt->cl;
Gao Xiang3883a792018-07-26 20:22:06 +0800527
Gao Xiang97e86a82019-07-31 23:57:47 +0800528 if (!cl)
Gao Xiang3883a792018-07-26 20:22:06 +0800529 return false;
530
Gao Xiang97e86a82019-07-31 23:57:47 +0800531 z_erofs_pagevec_ctor_exit(&clt->vector, false);
532 mutex_unlock(&cl->lock);
Gao Xiang3883a792018-07-26 20:22:06 +0800533
534 /*
Gao Xiang97e86a82019-07-31 23:57:47 +0800535 * if all pending pages are added, don't hold its reference
536 * any longer if the pcluster isn't hosted by ourselves.
Gao Xiang3883a792018-07-26 20:22:06 +0800537 */
Gao Xiang97e86a82019-07-31 23:57:47 +0800538 if (clt->mode < COLLECT_PRIMARY_FOLLOWED_NOINPLACE)
539 z_erofs_collection_put(cl);
Gao Xiang3883a792018-07-26 20:22:06 +0800540
Gao Xiang97e86a82019-07-31 23:57:47 +0800541 clt->cl = NULL;
Gao Xiang3883a792018-07-26 20:22:06 +0800542 return true;
543}
544
Gao Xiang97e86a82019-07-31 23:57:47 +0800545static bool should_alloc_managed_pages(struct z_erofs_decompress_frontend *fe,
Gao Xiang4279f3f2019-07-31 23:57:49 +0800546 unsigned int cachestrategy,
Gao Xiang97e86a82019-07-31 23:57:47 +0800547 erofs_off_t la)
Gao Xiang92e6efd2018-12-08 00:19:16 +0800548{
Gao Xiang4279f3f2019-07-31 23:57:49 +0800549 if (cachestrategy <= EROFS_ZIP_CACHE_DISABLED)
550 return false;
551
Gao Xiang92e6efd2018-12-08 00:19:16 +0800552 if (fe->backmost)
553 return true;
554
Gao Xiang4279f3f2019-07-31 23:57:49 +0800555 return cachestrategy >= EROFS_ZIP_CACHE_READAROUND &&
556 la < fe->headoffset;
Gao Xiang92e6efd2018-12-08 00:19:16 +0800557}
Gao Xiang92e6efd2018-12-08 00:19:16 +0800558
Gao Xiang97e86a82019-07-31 23:57:47 +0800559static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
Chao Yue3f78d52020-09-17 09:18:21 +0800560 struct page *page)
Gao Xiang3883a792018-07-26 20:22:06 +0800561{
Gao Xiang97e86a82019-07-31 23:57:47 +0800562 struct inode *const inode = fe->inode;
Gao Xiangbda17a42019-10-08 20:56:13 +0800563 struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
Chao Yu3b423412019-01-15 09:42:21 +0800564 struct erofs_map_blocks *const map = &fe->map;
Gao Xiang97e86a82019-07-31 23:57:47 +0800565 struct z_erofs_collector *const clt = &fe->clt;
Gao Xiang3883a792018-07-26 20:22:06 +0800566 const loff_t offset = page_offset(page);
Gao Xiangdc76ea82019-09-22 18:04:34 +0800567 bool tight = true;
Gao Xiang3883a792018-07-26 20:22:06 +0800568
Gao Xiang92e6efd2018-12-08 00:19:16 +0800569 enum z_erofs_cache_alloctype cache_strategy;
Gao Xiang3883a792018-07-26 20:22:06 +0800570 enum z_erofs_page_type page_type;
Thomas Weißschuh7dd68b12018-09-10 21:41:14 +0200571 unsigned int cur, end, spiltted, index;
Gao Xiang1e05ff32018-09-18 22:27:25 +0800572 int err = 0;
Gao Xiang3883a792018-07-26 20:22:06 +0800573
574 /* register locked file pages as online pages in pack */
575 z_erofs_onlinepage_init(page);
576
577 spiltted = 0;
578 end = PAGE_SIZE;
579repeat:
580 cur = end - 1;
581
582 /* lucky, within the range of the current map_blocks */
583 if (offset + cur >= map->m_la &&
Julian Merida447a3622019-03-18 20:58:41 -0300584 offset + cur < map->m_la + map->m_llen) {
Gao Xiang97e86a82019-07-31 23:57:47 +0800585 /* didn't get a valid collection previously (very rare) */
586 if (!clt->cl)
Gao Xiang1e5ceea2019-02-27 13:33:31 +0800587 goto restart_now;
Gao Xiang3883a792018-07-26 20:22:06 +0800588 goto hitted;
Gao Xiang1e5ceea2019-02-27 13:33:31 +0800589 }
Gao Xiang3883a792018-07-26 20:22:06 +0800590
591 /* go ahead the next map_blocks */
Gao Xiang4f761fa2019-09-04 10:09:09 +0800592 erofs_dbg("%s: [out-of-range] pos %llu", __func__, offset + cur);
Gao Xiang3883a792018-07-26 20:22:06 +0800593
Gao Xiang97e86a82019-07-31 23:57:47 +0800594 if (z_erofs_collector_end(clt))
Gao Xiangf0c519f2018-11-23 01:21:49 +0800595 fe->backmost = false;
Gao Xiang3883a792018-07-26 20:22:06 +0800596
597 map->m_la = offset + cur;
598 map->m_llen = 0;
Gao Xiang97e86a82019-07-31 23:57:47 +0800599 err = z_erofs_map_blocks_iter(inode, map, 0);
Gao Xiang8d8a09b2019-08-30 00:38:27 +0800600 if (err)
Gao Xiang3883a792018-07-26 20:22:06 +0800601 goto err_out;
602
Gao Xiang1e5ceea2019-02-27 13:33:31 +0800603restart_now:
Gao Xiang8d8a09b2019-08-30 00:38:27 +0800604 if (!(map->m_flags & EROFS_MAP_MAPPED))
Gao Xiang3883a792018-07-26 20:22:06 +0800605 goto hitted;
606
Gao Xiang97e86a82019-07-31 23:57:47 +0800607 err = z_erofs_collector_begin(clt, inode, map);
Gao Xiang8d8a09b2019-08-30 00:38:27 +0800608 if (err)
Gao Xiang3883a792018-07-26 20:22:06 +0800609 goto err_out;
610
Gao Xiang92e6efd2018-12-08 00:19:16 +0800611 /* preload all compressed pages (maybe downgrade role if necessary) */
Chao Yuf57a3fe2020-05-29 18:48:36 +0800612 if (should_alloc_managed_pages(fe, sbi->ctx.cache_strategy, map->m_la))
Gao Xiang92e6efd2018-12-08 00:19:16 +0800613 cache_strategy = DELAYEDALLOC;
614 else
615 cache_strategy = DONTALLOC;
Gao Xiang105d4ad2018-07-26 20:22:07 +0800616
Chao Yue3f78d52020-09-17 09:18:21 +0800617 preload_compressed_pages(clt, MNGD_MAPPING(sbi), cache_strategy);
Gao Xiang105d4ad2018-07-26 20:22:07 +0800618
Gao Xiang3883a792018-07-26 20:22:06 +0800619hitted:
Gao Xiangdc76ea82019-09-22 18:04:34 +0800620 /*
621 * Ensure the current partial page belongs to this submit chain rather
622 * than other concurrent submit chains or the noio(bypass) chain since
623 * those chains are handled asynchronously thus the page cannot be used
624 * for inplace I/O or pagevec (should be processed in strict order.)
625 */
626 tight &= (clt->mode >= COLLECT_PRIMARY_HOOKED &&
627 clt->mode != COLLECT_PRIMARY_FOLLOWED_NOINPLACE);
628
Thomas Weißschuh7dd68b12018-09-10 21:41:14 +0200629 cur = end - min_t(unsigned int, offset + end - map->m_la, end);
Gao Xiang8d8a09b2019-08-30 00:38:27 +0800630 if (!(map->m_flags & EROFS_MAP_MAPPED)) {
Gao Xiang3883a792018-07-26 20:22:06 +0800631 zero_user_segment(page, cur, end);
632 goto next_part;
633 }
634
635 /* let's derive page type */
636 page_type = cur ? Z_EROFS_VLE_PAGE_TYPE_HEAD :
637 (!spiltted ? Z_EROFS_PAGE_TYPE_EXCLUSIVE :
638 (tight ? Z_EROFS_PAGE_TYPE_EXCLUSIVE :
639 Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED));
640
Gao Xianga1121522019-02-27 13:33:32 +0800641 if (cur)
Gao Xiang97e86a82019-07-31 23:57:47 +0800642 tight &= (clt->mode >= COLLECT_PRIMARY_FOLLOWED);
Gao Xianga1121522019-02-27 13:33:32 +0800643
Gao Xiang3883a792018-07-26 20:22:06 +0800644retry:
Gao Xiang97e86a82019-07-31 23:57:47 +0800645 err = z_erofs_attach_page(clt, page, page_type);
Gao Xiang6aaa7b02020-12-08 17:58:32 +0800646 /* should allocate an additional short-lived page for pagevec */
Gao Xiang3883a792018-07-26 20:22:06 +0800647 if (err == -EAGAIN) {
648 struct page *const newpage =
Chao Yue3f78d52020-09-17 09:18:21 +0800649 alloc_page(GFP_NOFS | __GFP_NOFAIL);
Gao Xiang3883a792018-07-26 20:22:06 +0800650
Gao Xiang6aaa7b02020-12-08 17:58:32 +0800651 set_page_private(newpage, Z_EROFS_SHORTLIVED_PAGE);
Gao Xiang97e86a82019-07-31 23:57:47 +0800652 err = z_erofs_attach_page(clt, newpage,
653 Z_EROFS_PAGE_TYPE_EXCLUSIVE);
Gao Xiang8d8a09b2019-08-30 00:38:27 +0800654 if (!err)
Gao Xiang3883a792018-07-26 20:22:06 +0800655 goto retry;
656 }
657
Gao Xiang8d8a09b2019-08-30 00:38:27 +0800658 if (err)
Gao Xiang3883a792018-07-26 20:22:06 +0800659 goto err_out;
660
Gao Xiang97e86a82019-07-31 23:57:47 +0800661 index = page->index - (map->m_la >> PAGE_SHIFT);
Gao Xiang3883a792018-07-26 20:22:06 +0800662
Gao Xiang3883a792018-07-26 20:22:06 +0800663 z_erofs_onlinepage_fixup(page, index, true);
Gao Xiang3883a792018-07-26 20:22:06 +0800664
Gao Xiang1e05ff32018-09-18 22:27:25 +0800665 /* bump up the number of spiltted parts of a page */
666 ++spiltted;
667 /* also update nr_pages */
Gao Xiang97e86a82019-07-31 23:57:47 +0800668 clt->cl->nr_pages = max_t(pgoff_t, clt->cl->nr_pages, index + 1);
Gao Xiang3883a792018-07-26 20:22:06 +0800669next_part:
670 /* can be used for verification */
671 map->m_llen = offset + cur - map->m_la;
672
Kristaps Čivkulis2bc75962018-08-05 18:21:01 +0300673 end = cur;
674 if (end > 0)
Gao Xiang3883a792018-07-26 20:22:06 +0800675 goto repeat;
676
Gao Xiang1e05ff32018-09-18 22:27:25 +0800677out:
Gao Xiang3883a792018-07-26 20:22:06 +0800678 z_erofs_onlinepage_endio(page);
679
Gao Xiang4f761fa2019-09-04 10:09:09 +0800680 erofs_dbg("%s, finish page: %pK spiltted: %u map->m_llen %llu",
681 __func__, page, spiltted, map->m_llen);
Gao Xiang3883a792018-07-26 20:22:06 +0800682 return err;
Gao Xiang1e05ff32018-09-18 22:27:25 +0800683
684 /* if some error occurred while processing this page */
685err_out:
686 SetPageError(page);
687 goto out;
Gao Xiang3883a792018-07-26 20:22:06 +0800688}
689
Gao Xianga4b1fab2019-10-08 20:56:15 +0800690static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
691 bool sync, int bios)
Gao Xiang3883a792018-07-26 20:22:06 +0800692{
Gao Xianga4b1fab2019-10-08 20:56:15 +0800693 /* wake up the caller thread for sync decompression */
694 if (sync) {
Gao Xiang848bd9a2018-12-08 00:19:12 +0800695 unsigned long flags;
Gao Xiang3883a792018-07-26 20:22:06 +0800696
Gao Xiang848bd9a2018-12-08 00:19:12 +0800697 spin_lock_irqsave(&io->u.wait.lock, flags);
698 if (!atomic_add_return(bios, &io->pending_bios))
699 wake_up_locked(&io->u.wait);
700 spin_unlock_irqrestore(&io->u.wait.lock, flags);
701 return;
702 }
703
704 if (!atomic_add_return(bios, &io->pending_bios))
Gao Xiang3883a792018-07-26 20:22:06 +0800705 queue_work(z_erofs_workqueue, &io->u.work);
Gao Xiang3883a792018-07-26 20:22:06 +0800706}
707
Gao Xiang6aaa7b02020-12-08 17:58:32 +0800708static bool z_erofs_page_is_invalidated(struct page *page)
709{
710 return !page->mapping && !z_erofs_is_shortlived_page(page);
711}
712
Gao Xiang0c638f72019-11-08 11:37:33 +0800713static void z_erofs_decompressqueue_endio(struct bio *bio)
Gao Xiang3883a792018-07-26 20:22:06 +0800714{
Gao Xianga4b1fab2019-10-08 20:56:15 +0800715 tagptr1_t t = tagptr_init(tagptr1_t, bio->bi_private);
716 struct z_erofs_decompressqueue *q = tagptr_unfold_ptr(t);
Gao Xiang14a56ec2019-03-25 11:40:09 +0800717 blk_status_t err = bio->bi_status;
Gao Xiang3883a792018-07-26 20:22:06 +0800718 struct bio_vec *bvec;
Ming Lei6dc4f102019-02-15 19:13:19 +0800719 struct bvec_iter_all iter_all;
Gao Xiang3883a792018-07-26 20:22:06 +0800720
Christoph Hellwig2b070cf2019-04-25 09:03:00 +0200721 bio_for_each_segment_all(bvec, bio, iter_all) {
Gao Xiang3883a792018-07-26 20:22:06 +0800722 struct page *page = bvec->bv_page;
723
724 DBG_BUGON(PageUptodate(page));
Gao Xiang6aaa7b02020-12-08 17:58:32 +0800725 DBG_BUGON(z_erofs_page_is_invalidated(page));
Gao Xiang3883a792018-07-26 20:22:06 +0800726
Gao Xiang8d8a09b2019-08-30 00:38:27 +0800727 if (err)
Gao Xiang3883a792018-07-26 20:22:06 +0800728 SetPageError(page);
Gao Xiang105d4ad2018-07-26 20:22:07 +0800729
Gao Xianga4b1fab2019-10-08 20:56:15 +0800730 if (erofs_page_is_managed(EROFS_SB(q->sb), page)) {
731 if (!err)
732 SetPageUptodate(page);
Gao Xiang105d4ad2018-07-26 20:22:07 +0800733 unlock_page(page);
Gao Xianga4b1fab2019-10-08 20:56:15 +0800734 }
Gao Xiang3883a792018-07-26 20:22:06 +0800735 }
Gao Xianga4b1fab2019-10-08 20:56:15 +0800736 z_erofs_decompress_kickoff(q, tagptr_unfold_tags(t), -1);
Gao Xiang3883a792018-07-26 20:22:06 +0800737 bio_put(bio);
738}
739
Gao Xiang97e86a82019-07-31 23:57:47 +0800740static int z_erofs_decompress_pcluster(struct super_block *sb,
741 struct z_erofs_pcluster *pcl,
742 struct list_head *pagepool)
Gao Xiang3883a792018-07-26 20:22:06 +0800743{
744 struct erofs_sb_info *const sbi = EROFS_SB(sb);
Gao Xiang97e86a82019-07-31 23:57:47 +0800745 const unsigned int clusterpages = BIT(pcl->clusterbits);
Gao Xiang3883a792018-07-26 20:22:06 +0800746 struct z_erofs_pagevec_ctor ctor;
Gao Xiang97e86a82019-07-31 23:57:47 +0800747 unsigned int i, outputsize, llen, nr_pages;
748 struct page *pages_onstack[Z_EROFS_VMAP_ONSTACK_PAGES];
Gao Xiang3883a792018-07-26 20:22:06 +0800749 struct page **pages, **compressed_pages, *page;
Gao Xiang3883a792018-07-26 20:22:06 +0800750
751 enum z_erofs_page_type page_type;
Gao Xiangb6a76182019-06-24 15:22:58 +0800752 bool overlapped, partial;
Gao Xiang97e86a82019-07-31 23:57:47 +0800753 struct z_erofs_collection *cl;
Gao Xiang3883a792018-07-26 20:22:06 +0800754 int err;
755
756 might_sleep();
Gao Xiang97e86a82019-07-31 23:57:47 +0800757 cl = z_erofs_primarycollection(pcl);
758 DBG_BUGON(!READ_ONCE(cl->nr_pages));
Gao Xiang3883a792018-07-26 20:22:06 +0800759
Gao Xiang97e86a82019-07-31 23:57:47 +0800760 mutex_lock(&cl->lock);
761 nr_pages = cl->nr_pages;
Gao Xiang3883a792018-07-26 20:22:06 +0800762
Gao Xiang8d8a09b2019-08-30 00:38:27 +0800763 if (nr_pages <= Z_EROFS_VMAP_ONSTACK_PAGES) {
Gao Xiang3883a792018-07-26 20:22:06 +0800764 pages = pages_onstack;
Gao Xiang97e86a82019-07-31 23:57:47 +0800765 } else if (nr_pages <= Z_EROFS_VMAP_GLOBAL_PAGES &&
766 mutex_trylock(&z_pagemap_global_lock)) {
Gao Xiang3883a792018-07-26 20:22:06 +0800767 pages = z_pagemap_global;
Gao Xiang97e86a82019-07-31 23:57:47 +0800768 } else {
Chao Yu441dfcc2019-07-16 17:44:22 +0800769 gfp_t gfp_flags = GFP_KERNEL;
770
Gao Xiang97e86a82019-07-31 23:57:47 +0800771 if (nr_pages > Z_EROFS_VMAP_GLOBAL_PAGES)
Chao Yu441dfcc2019-07-16 17:44:22 +0800772 gfp_flags |= __GFP_NOFAIL;
773
Julian Merida447a3622019-03-18 20:58:41 -0300774 pages = kvmalloc_array(nr_pages, sizeof(struct page *),
Chao Yu441dfcc2019-07-16 17:44:22 +0800775 gfp_flags);
Gao Xiang3883a792018-07-26 20:22:06 +0800776
777 /* fallback to global pagemap for the lowmem scenario */
Gao Xiang8d8a09b2019-08-30 00:38:27 +0800778 if (!pages) {
Chao Yu441dfcc2019-07-16 17:44:22 +0800779 mutex_lock(&z_pagemap_global_lock);
780 pages = z_pagemap_global;
Gao Xiang3883a792018-07-26 20:22:06 +0800781 }
782 }
783
784 for (i = 0; i < nr_pages; ++i)
785 pages[i] = NULL;
786
Gao Xiange12a0ce2019-08-21 22:01:52 +0800787 err = 0;
Gao Xiangfa61a332019-06-24 15:22:53 +0800788 z_erofs_pagevec_ctor_init(&ctor, Z_EROFS_NR_INLINE_PAGEVECS,
Gao Xiang97e86a82019-07-31 23:57:47 +0800789 cl->pagevec, 0);
Gao Xiang3883a792018-07-26 20:22:06 +0800790
Gao Xiang97e86a82019-07-31 23:57:47 +0800791 for (i = 0; i < cl->vcnt; ++i) {
Thomas Weißschuh7dd68b12018-09-10 21:41:14 +0200792 unsigned int pagenr;
Gao Xiang3883a792018-07-26 20:22:06 +0800793
Gao Xiang046d64e2019-07-31 23:57:45 +0800794 page = z_erofs_pagevec_dequeue(&ctor, &page_type);
Gao Xiang3883a792018-07-26 20:22:06 +0800795
796 /* all pages in pagevec ought to be valid */
Cristian Sicilia42d40b42018-11-12 21:43:57 +0100797 DBG_BUGON(!page);
Gao Xiang6aaa7b02020-12-08 17:58:32 +0800798 DBG_BUGON(z_erofs_page_is_invalidated(page));
Gao Xiang3883a792018-07-26 20:22:06 +0800799
Gao Xiang6aaa7b02020-12-08 17:58:32 +0800800 if (z_erofs_put_shortlivedpage(pagepool, page))
Gao Xiang3883a792018-07-26 20:22:06 +0800801 continue;
802
803 if (page_type == Z_EROFS_VLE_PAGE_TYPE_HEAD)
804 pagenr = 0;
805 else
806 pagenr = z_erofs_onlinepage_index(page);
807
Gao Xiang70b17992018-12-11 15:17:49 +0800808 DBG_BUGON(pagenr >= nr_pages);
Gao Xiange5e3abb2018-09-19 13:49:07 +0800809
Gao Xiange12a0ce2019-08-21 22:01:52 +0800810 /*
811 * currently EROFS doesn't support multiref(dedup),
812 * so here erroring out one multiref page.
813 */
Gao Xiang8d8a09b2019-08-30 00:38:27 +0800814 if (pages[pagenr]) {
Gao Xiange12a0ce2019-08-21 22:01:52 +0800815 DBG_BUGON(1);
816 SetPageError(pages[pagenr]);
817 z_erofs_onlinepage_endio(pages[pagenr]);
818 err = -EFSCORRUPTED;
819 }
Gao Xiang3883a792018-07-26 20:22:06 +0800820 pages[pagenr] = page;
821 }
Gao Xiang3883a792018-07-26 20:22:06 +0800822 z_erofs_pagevec_ctor_exit(&ctor, true);
823
824 overlapped = false;
Gao Xiang97e86a82019-07-31 23:57:47 +0800825 compressed_pages = pcl->compressed_pages;
Gao Xiang3883a792018-07-26 20:22:06 +0800826
827 for (i = 0; i < clusterpages; ++i) {
Thomas Weißschuh7dd68b12018-09-10 21:41:14 +0200828 unsigned int pagenr;
Gao Xiang3883a792018-07-26 20:22:06 +0800829
830 page = compressed_pages[i];
831
832 /* all compressed pages ought to be valid */
Cristian Sicilia42d40b42018-11-12 21:43:57 +0100833 DBG_BUGON(!page);
Gao Xiang6aaa7b02020-12-08 17:58:32 +0800834 DBG_BUGON(z_erofs_page_is_invalidated(page));
Gao Xiang3883a792018-07-26 20:22:06 +0800835
Gao Xiang6aaa7b02020-12-08 17:58:32 +0800836 if (!z_erofs_is_shortlived_page(page)) {
Gao Xiangd61fbb62019-03-25 11:40:08 +0800837 if (erofs_page_is_managed(sbi, page)) {
Gao Xiang8d8a09b2019-08-30 00:38:27 +0800838 if (!PageUptodate(page))
Gao Xiang11152492019-03-25 11:40:07 +0800839 err = -EIO;
840 continue;
841 }
Gao Xiang3883a792018-07-26 20:22:06 +0800842
Gao Xiang11152492019-03-25 11:40:07 +0800843 /*
844 * only if non-head page can be selected
845 * for inplace decompression
846 */
847 pagenr = z_erofs_onlinepage_index(page);
Gao Xiang3883a792018-07-26 20:22:06 +0800848
Gao Xiang11152492019-03-25 11:40:07 +0800849 DBG_BUGON(pagenr >= nr_pages);
Gao Xiang8d8a09b2019-08-30 00:38:27 +0800850 if (pages[pagenr]) {
Gao Xiange12a0ce2019-08-21 22:01:52 +0800851 DBG_BUGON(1);
852 SetPageError(pages[pagenr]);
853 z_erofs_onlinepage_endio(pages[pagenr]);
854 err = -EFSCORRUPTED;
855 }
Gao Xiang11152492019-03-25 11:40:07 +0800856 pages[pagenr] = page;
Gao Xiang3883a792018-07-26 20:22:06 +0800857
Gao Xiang11152492019-03-25 11:40:07 +0800858 overlapped = true;
859 }
860
Gao Xiang6aaa7b02020-12-08 17:58:32 +0800861 /* PG_error needs checking for all non-managed pages */
Gao Xiang8d8a09b2019-08-30 00:38:27 +0800862 if (PageError(page)) {
Gao Xiang11152492019-03-25 11:40:07 +0800863 DBG_BUGON(PageUptodate(page));
864 err = -EIO;
865 }
Gao Xiang3883a792018-07-26 20:22:06 +0800866 }
867
Gao Xiang8d8a09b2019-08-30 00:38:27 +0800868 if (err)
Gao Xiang11152492019-03-25 11:40:07 +0800869 goto out;
870
Gao Xiang97e86a82019-07-31 23:57:47 +0800871 llen = pcl->length >> Z_EROFS_PCLUSTER_LENGTH_BIT;
872 if (nr_pages << PAGE_SHIFT >= cl->pageofs + llen) {
873 outputsize = llen;
874 partial = !(pcl->length & Z_EROFS_PCLUSTER_FULL_LENGTH);
Gao Xiangb6a76182019-06-24 15:22:58 +0800875 } else {
Gao Xiang97e86a82019-07-31 23:57:47 +0800876 outputsize = (nr_pages << PAGE_SHIFT) - cl->pageofs;
Gao Xiangb6a76182019-06-24 15:22:58 +0800877 partial = true;
878 }
Gao Xiang3883a792018-07-26 20:22:06 +0800879
Gao Xiang88aaf5a2019-06-24 15:22:57 +0800880 err = z_erofs_decompress(&(struct z_erofs_decompress_req) {
881 .sb = sb,
882 .in = compressed_pages,
883 .out = pages,
Gao Xiang97e86a82019-07-31 23:57:47 +0800884 .pageofs_out = cl->pageofs,
Gao Xiang88aaf5a2019-06-24 15:22:57 +0800885 .inputsize = PAGE_SIZE,
886 .outputsize = outputsize,
Gao Xiang97e86a82019-07-31 23:57:47 +0800887 .alg = pcl->algorithmformat,
Gao Xiang88aaf5a2019-06-24 15:22:57 +0800888 .inplace_io = overlapped,
Gao Xiangb6a76182019-06-24 15:22:58 +0800889 .partial_decoding = partial
Gao Xiang97e86a82019-07-31 23:57:47 +0800890 }, pagepool);
Gao Xiang3883a792018-07-26 20:22:06 +0800891
892out:
Gao Xiangaf692e12019-02-27 13:33:30 +0800893 /* must handle all compressed pages before endding pages */
Gao Xiang3883a792018-07-26 20:22:06 +0800894 for (i = 0; i < clusterpages; ++i) {
895 page = compressed_pages[i];
896
Gao Xiangd61fbb62019-03-25 11:40:08 +0800897 if (erofs_page_is_managed(sbi, page))
Gao Xiang105d4ad2018-07-26 20:22:07 +0800898 continue;
Gao Xiangd61fbb62019-03-25 11:40:08 +0800899
Gao Xiang6aaa7b02020-12-08 17:58:32 +0800900 /* recycle all individual short-lived pages */
901 (void)z_erofs_put_shortlivedpage(pagepool, page);
Gao Xiang3883a792018-07-26 20:22:06 +0800902
903 WRITE_ONCE(compressed_pages[i], NULL);
904 }
905
Gao Xiangaf692e12019-02-27 13:33:30 +0800906 for (i = 0; i < nr_pages; ++i) {
907 page = pages[i];
908 if (!page)
909 continue;
910
Gao Xiang6aaa7b02020-12-08 17:58:32 +0800911 DBG_BUGON(z_erofs_page_is_invalidated(page));
Gao Xiangaf692e12019-02-27 13:33:30 +0800912
Gao Xiang6aaa7b02020-12-08 17:58:32 +0800913 /* recycle all individual short-lived pages */
914 if (z_erofs_put_shortlivedpage(pagepool, page))
Gao Xiangaf692e12019-02-27 13:33:30 +0800915 continue;
916
Gao Xiang8d8a09b2019-08-30 00:38:27 +0800917 if (err < 0)
Gao Xiangaf692e12019-02-27 13:33:30 +0800918 SetPageError(page);
919
920 z_erofs_onlinepage_endio(page);
921 }
922
Gao Xiang3883a792018-07-26 20:22:06 +0800923 if (pages == z_pagemap_global)
924 mutex_unlock(&z_pagemap_global_lock);
Gao Xiang8d8a09b2019-08-30 00:38:27 +0800925 else if (pages != pages_onstack)
Gao Xiang3883a792018-07-26 20:22:06 +0800926 kvfree(pages);
927
Gao Xiang97e86a82019-07-31 23:57:47 +0800928 cl->nr_pages = 0;
929 cl->vcnt = 0;
Gao Xiang3883a792018-07-26 20:22:06 +0800930
Gao Xiang97e86a82019-07-31 23:57:47 +0800931 /* all cl locks MUST be taken before the following line */
932 WRITE_ONCE(pcl->next, Z_EROFS_PCLUSTER_NIL);
Gao Xiang3883a792018-07-26 20:22:06 +0800933
Gao Xiang97e86a82019-07-31 23:57:47 +0800934 /* all cl locks SHOULD be released right now */
935 mutex_unlock(&cl->lock);
Gao Xiang3883a792018-07-26 20:22:06 +0800936
Gao Xiang97e86a82019-07-31 23:57:47 +0800937 z_erofs_collection_put(cl);
Gao Xiang3883a792018-07-26 20:22:06 +0800938 return err;
939}
940
Gao Xiang0c638f72019-11-08 11:37:33 +0800941static void z_erofs_decompress_queue(const struct z_erofs_decompressqueue *io,
942 struct list_head *pagepool)
Gao Xiang3883a792018-07-26 20:22:06 +0800943{
Gao Xiang97e86a82019-07-31 23:57:47 +0800944 z_erofs_next_pcluster_t owned = io->head;
Gao Xiang3883a792018-07-26 20:22:06 +0800945
Gao Xiang97e86a82019-07-31 23:57:47 +0800946 while (owned != Z_EROFS_PCLUSTER_TAIL_CLOSED) {
947 struct z_erofs_pcluster *pcl;
Gao Xiang3883a792018-07-26 20:22:06 +0800948
949 /* no possible that 'owned' equals Z_EROFS_WORK_TPTR_TAIL */
Gao Xiang97e86a82019-07-31 23:57:47 +0800950 DBG_BUGON(owned == Z_EROFS_PCLUSTER_TAIL);
Gao Xiang3883a792018-07-26 20:22:06 +0800951
952 /* no possible that 'owned' equals NULL */
Gao Xiang97e86a82019-07-31 23:57:47 +0800953 DBG_BUGON(owned == Z_EROFS_PCLUSTER_NIL);
Gao Xiang3883a792018-07-26 20:22:06 +0800954
Gao Xiang97e86a82019-07-31 23:57:47 +0800955 pcl = container_of(owned, struct z_erofs_pcluster, next);
956 owned = READ_ONCE(pcl->next);
Gao Xiang3883a792018-07-26 20:22:06 +0800957
Gao Xianga4b1fab2019-10-08 20:56:15 +0800958 z_erofs_decompress_pcluster(io->sb, pcl, pagepool);
Gao Xiang3978c8e2018-08-06 11:27:53 +0800959 }
Gao Xiang3883a792018-07-26 20:22:06 +0800960}
961
Gao Xiang0c638f72019-11-08 11:37:33 +0800962static void z_erofs_decompressqueue_work(struct work_struct *work)
Gao Xiang3883a792018-07-26 20:22:06 +0800963{
Gao Xianga4b1fab2019-10-08 20:56:15 +0800964 struct z_erofs_decompressqueue *bgq =
965 container_of(work, struct z_erofs_decompressqueue, u.work);
Gao Xiang97e86a82019-07-31 23:57:47 +0800966 LIST_HEAD(pagepool);
Gao Xiang3883a792018-07-26 20:22:06 +0800967
Gao Xianga4b1fab2019-10-08 20:56:15 +0800968 DBG_BUGON(bgq->head == Z_EROFS_PCLUSTER_TAIL_CLOSED);
Gao Xiang0c638f72019-11-08 11:37:33 +0800969 z_erofs_decompress_queue(bgq, &pagepool);
Gao Xiang3883a792018-07-26 20:22:06 +0800970
Gao Xiang97e86a82019-07-31 23:57:47 +0800971 put_pages_list(&pagepool);
Gao Xianga4b1fab2019-10-08 20:56:15 +0800972 kvfree(bgq);
Gao Xiang3883a792018-07-26 20:22:06 +0800973}
974
Gao Xiang97e86a82019-07-31 23:57:47 +0800975static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
976 unsigned int nr,
977 struct list_head *pagepool,
978 struct address_space *mc,
979 gfp_t gfp)
Gao Xiang9248fce2018-12-08 00:19:15 +0800980{
Gao Xiang97e86a82019-07-31 23:57:47 +0800981 const pgoff_t index = pcl->obj.index;
Gao Xiang9248fce2018-12-08 00:19:15 +0800982 bool tocache = false;
983
984 struct address_space *mapping;
985 struct page *oldpage, *page;
986
Gao Xiang92e6efd2018-12-08 00:19:16 +0800987 compressed_page_t t;
988 int justfound;
989
Gao Xiang9248fce2018-12-08 00:19:15 +0800990repeat:
Gao Xiang97e86a82019-07-31 23:57:47 +0800991 page = READ_ONCE(pcl->compressed_pages[nr]);
Gao Xiang9248fce2018-12-08 00:19:15 +0800992 oldpage = page;
993
994 if (!page)
995 goto out_allocpage;
996
997 /*
998 * the cached page has not been allocated and
999 * an placeholder is out there, prepare it now.
1000 */
Gao Xiangbda17a42019-10-08 20:56:13 +08001001 if (page == PAGE_UNALLOCATED) {
Gao Xiang9248fce2018-12-08 00:19:15 +08001002 tocache = true;
1003 goto out_allocpage;
1004 }
1005
Gao Xiang92e6efd2018-12-08 00:19:16 +08001006 /* process the target tagged pointer */
1007 t = tagptr_init(compressed_page_t, page);
1008 justfound = tagptr_unfold_tags(t);
1009 page = tagptr_unfold_ptr(t);
1010
Gao Xiang9248fce2018-12-08 00:19:15 +08001011 mapping = READ_ONCE(page->mapping);
1012
1013 /*
Gao Xiang6aaa7b02020-12-08 17:58:32 +08001014 * file-backed online pages in plcuster are all locked steady,
Gao Xiang9248fce2018-12-08 00:19:15 +08001015 * therefore it is impossible for `mapping' to be NULL.
1016 */
1017 if (mapping && mapping != mc)
1018 /* ought to be unmanaged pages */
1019 goto out;
1020
Gao Xiang6aaa7b02020-12-08 17:58:32 +08001021 /* directly return for shortlived page as well */
1022 if (z_erofs_is_shortlived_page(page))
1023 goto out;
1024
Gao Xiang9248fce2018-12-08 00:19:15 +08001025 lock_page(page);
1026
Gao Xiang92e6efd2018-12-08 00:19:16 +08001027 /* only true if page reclaim goes wrong, should never happen */
1028 DBG_BUGON(justfound && PagePrivate(page));
1029
Gao Xiang9248fce2018-12-08 00:19:15 +08001030 /* the page is still in manage cache */
1031 if (page->mapping == mc) {
Gao Xiang97e86a82019-07-31 23:57:47 +08001032 WRITE_ONCE(pcl->compressed_pages[nr], page);
Gao Xiang9248fce2018-12-08 00:19:15 +08001033
Gao Xiang11152492019-03-25 11:40:07 +08001034 ClearPageError(page);
Gao Xiang9248fce2018-12-08 00:19:15 +08001035 if (!PagePrivate(page)) {
Gao Xiang92e6efd2018-12-08 00:19:16 +08001036 /*
1037 * impossible to be !PagePrivate(page) for
1038 * the current restriction as well if
1039 * the page is already in compressed_pages[].
1040 */
1041 DBG_BUGON(!justfound);
1042
1043 justfound = 0;
Gao Xiang97e86a82019-07-31 23:57:47 +08001044 set_page_private(page, (unsigned long)pcl);
Gao Xiang9248fce2018-12-08 00:19:15 +08001045 SetPagePrivate(page);
1046 }
1047
1048 /* no need to submit io if it is already up-to-date */
1049 if (PageUptodate(page)) {
1050 unlock_page(page);
1051 page = NULL;
1052 }
1053 goto out;
1054 }
1055
1056 /*
1057 * the managed page has been truncated, it's unsafe to
1058 * reuse this one, let's allocate a new cache-managed page.
1059 */
1060 DBG_BUGON(page->mapping);
Gao Xiang92e6efd2018-12-08 00:19:16 +08001061 DBG_BUGON(!justfound);
Gao Xiang9248fce2018-12-08 00:19:15 +08001062
1063 tocache = true;
1064 unlock_page(page);
1065 put_page(page);
1066out_allocpage:
Gao Xiang5ddcee12019-11-21 21:59:54 +08001067 page = erofs_allocpage(pagepool, gfp | __GFP_NOFAIL);
1068 if (!tocache || add_to_page_cache_lru(page, mc, index + nr, gfp)) {
Gao Xiang6aaa7b02020-12-08 17:58:32 +08001069 /* turn into temporary page if fails */
1070 set_page_private(page, Z_EROFS_SHORTLIVED_PAGE);
Gao Xiang5ddcee12019-11-21 21:59:54 +08001071 tocache = false;
Gao Xiang9248fce2018-12-08 00:19:15 +08001072 }
1073
Gao Xiang5ddcee12019-11-21 21:59:54 +08001074 if (oldpage != cmpxchg(&pcl->compressed_pages[nr], oldpage, page)) {
1075 if (tocache) {
1076 /* since it added to managed cache successfully */
1077 unlock_page(page);
1078 put_page(page);
1079 } else {
1080 list_add(&page->lru, pagepool);
1081 }
1082 cond_resched();
1083 goto repeat;
1084 }
Gao Xianga30573b2020-10-22 22:57:21 +08001085
1086 if (tocache) {
Gao Xiang6aaa7b02020-12-08 17:58:32 +08001087 attach_page_private(page, pcl);
1088 /* drop a ref added by allocpage (then we have 2 refs here) */
1089 put_page(page);
Gao Xianga30573b2020-10-22 22:57:21 +08001090 }
Gao Xiang9248fce2018-12-08 00:19:15 +08001091out: /* the only exit (for tracing and debugging) */
1092 return page;
1093}
1094
Gao Xianga4b1fab2019-10-08 20:56:15 +08001095static struct z_erofs_decompressqueue *
1096jobqueue_init(struct super_block *sb,
1097 struct z_erofs_decompressqueue *fgq, bool *fg)
Gao Xiang3883a792018-07-26 20:22:06 +08001098{
Gao Xianga4b1fab2019-10-08 20:56:15 +08001099 struct z_erofs_decompressqueue *q;
Gao Xiang3883a792018-07-26 20:22:06 +08001100
Gao Xianga4b1fab2019-10-08 20:56:15 +08001101 if (fg && !*fg) {
1102 q = kvzalloc(sizeof(*q), GFP_KERNEL | __GFP_NOWARN);
1103 if (!q) {
1104 *fg = true;
1105 goto fg_out;
1106 }
Gao Xiang0c638f72019-11-08 11:37:33 +08001107 INIT_WORK(&q->u.work, z_erofs_decompressqueue_work);
Gao Xianga4b1fab2019-10-08 20:56:15 +08001108 } else {
1109fg_out:
1110 q = fgq;
1111 init_waitqueue_head(&fgq->u.wait);
1112 atomic_set(&fgq->pending_bios, 0);
Gao Xiang3883a792018-07-26 20:22:06 +08001113 }
Gao Xianga4b1fab2019-10-08 20:56:15 +08001114 q->sb = sb;
1115 q->head = Z_EROFS_PCLUSTER_TAIL_CLOSED;
1116 return q;
Gao Xiang3883a792018-07-26 20:22:06 +08001117}
1118
Gao Xiang97e86a82019-07-31 23:57:47 +08001119/* define decompression jobqueue types */
Gao Xiang7146a4f2018-12-08 00:19:18 +08001120enum {
Gao Xiang7146a4f2018-12-08 00:19:18 +08001121 JQ_BYPASS,
Gao Xiang7146a4f2018-12-08 00:19:18 +08001122 JQ_SUBMIT,
1123 NR_JOBQUEUES,
1124};
1125
1126static void *jobqueueset_init(struct super_block *sb,
Gao Xianga4b1fab2019-10-08 20:56:15 +08001127 struct z_erofs_decompressqueue *q[],
1128 struct z_erofs_decompressqueue *fgq, bool *fg)
Gao Xiang7146a4f2018-12-08 00:19:18 +08001129{
Gao Xiang7146a4f2018-12-08 00:19:18 +08001130 /*
1131 * if managed cache is enabled, bypass jobqueue is needed,
Gao Xiang97e86a82019-07-31 23:57:47 +08001132 * no need to read from device for all pclusters in this queue.
Gao Xiang7146a4f2018-12-08 00:19:18 +08001133 */
Gao Xianga4b1fab2019-10-08 20:56:15 +08001134 q[JQ_BYPASS] = jobqueue_init(sb, fgq + JQ_BYPASS, NULL);
1135 q[JQ_SUBMIT] = jobqueue_init(sb, fgq + JQ_SUBMIT, fg);
Gao Xiang7146a4f2018-12-08 00:19:18 +08001136
Gao Xianga4b1fab2019-10-08 20:56:15 +08001137 return tagptr_cast_ptr(tagptr_fold(tagptr1_t, q[JQ_SUBMIT], *fg));
Gao Xiang7146a4f2018-12-08 00:19:18 +08001138}
1139
Gao Xiang97e86a82019-07-31 23:57:47 +08001140static void move_to_bypass_jobqueue(struct z_erofs_pcluster *pcl,
1141 z_erofs_next_pcluster_t qtail[],
1142 z_erofs_next_pcluster_t owned_head)
Gao Xiang7146a4f2018-12-08 00:19:18 +08001143{
Gao Xiang97e86a82019-07-31 23:57:47 +08001144 z_erofs_next_pcluster_t *const submit_qtail = qtail[JQ_SUBMIT];
1145 z_erofs_next_pcluster_t *const bypass_qtail = qtail[JQ_BYPASS];
Gao Xiang7146a4f2018-12-08 00:19:18 +08001146
Gao Xiang97e86a82019-07-31 23:57:47 +08001147 DBG_BUGON(owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED);
1148 if (owned_head == Z_EROFS_PCLUSTER_TAIL)
1149 owned_head = Z_EROFS_PCLUSTER_TAIL_CLOSED;
Gao Xiang7146a4f2018-12-08 00:19:18 +08001150
Gao Xiang97e86a82019-07-31 23:57:47 +08001151 WRITE_ONCE(pcl->next, Z_EROFS_PCLUSTER_TAIL_CLOSED);
Gao Xiang7146a4f2018-12-08 00:19:18 +08001152
1153 WRITE_ONCE(*submit_qtail, owned_head);
Gao Xiang97e86a82019-07-31 23:57:47 +08001154 WRITE_ONCE(*bypass_qtail, &pcl->next);
Gao Xiang7146a4f2018-12-08 00:19:18 +08001155
Gao Xiang97e86a82019-07-31 23:57:47 +08001156 qtail[JQ_BYPASS] = &pcl->next;
Gao Xiang7146a4f2018-12-08 00:19:18 +08001157}
1158
Gao Xiang1e4a2952020-01-21 14:48:19 +08001159static void z_erofs_submit_queue(struct super_block *sb,
Gao Xiang6ea5aad2020-09-19 15:27:30 +08001160 struct z_erofs_decompress_frontend *f,
Gao Xiang0c638f72019-11-08 11:37:33 +08001161 struct list_head *pagepool,
1162 struct z_erofs_decompressqueue *fgq,
1163 bool *force_fg)
Gao Xiang3883a792018-07-26 20:22:06 +08001164{
Gao Xiangbda17a42019-10-08 20:56:13 +08001165 struct erofs_sb_info *const sbi = EROFS_SB(sb);
Gao Xiang97e86a82019-07-31 23:57:47 +08001166 z_erofs_next_pcluster_t qtail[NR_JOBQUEUES];
Gao Xianga4b1fab2019-10-08 20:56:15 +08001167 struct z_erofs_decompressqueue *q[NR_JOBQUEUES];
Gao Xiang7146a4f2018-12-08 00:19:18 +08001168 void *bi_private;
Gao Xiang6ea5aad2020-09-19 15:27:30 +08001169 z_erofs_next_pcluster_t owned_head = f->clt.owned_head;
Gao Xiang3883a792018-07-26 20:22:06 +08001170 /* since bio will be NULL, no need to initialize last_index */
Kees Cook3f649ab2020-06-03 13:09:38 -07001171 pgoff_t last_index;
Gao Xiang1e4a2952020-01-21 14:48:19 +08001172 unsigned int nr_bios = 0;
1173 struct bio *bio = NULL;
Gao Xiang3883a792018-07-26 20:22:06 +08001174
Gao Xianga4b1fab2019-10-08 20:56:15 +08001175 bi_private = jobqueueset_init(sb, q, fgq, force_fg);
1176 qtail[JQ_BYPASS] = &q[JQ_BYPASS]->head;
1177 qtail[JQ_SUBMIT] = &q[JQ_SUBMIT]->head;
Gao Xiang3883a792018-07-26 20:22:06 +08001178
1179 /* by default, all need io submission */
Gao Xiang7146a4f2018-12-08 00:19:18 +08001180 q[JQ_SUBMIT]->head = owned_head;
Gao Xiang3883a792018-07-26 20:22:06 +08001181
1182 do {
Gao Xiang97e86a82019-07-31 23:57:47 +08001183 struct z_erofs_pcluster *pcl;
Gao Xiang1e4a2952020-01-21 14:48:19 +08001184 pgoff_t cur, end;
1185 unsigned int i = 0;
1186 bool bypass = true;
Gao Xiang3883a792018-07-26 20:22:06 +08001187
1188 /* no possible 'owned_head' equals the following */
Gao Xiang97e86a82019-07-31 23:57:47 +08001189 DBG_BUGON(owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED);
1190 DBG_BUGON(owned_head == Z_EROFS_PCLUSTER_NIL);
Gao Xiang3883a792018-07-26 20:22:06 +08001191
Gao Xiang97e86a82019-07-31 23:57:47 +08001192 pcl = container_of(owned_head, struct z_erofs_pcluster, next);
1193
Gao Xiang1e4a2952020-01-21 14:48:19 +08001194 cur = pcl->obj.index;
1195 end = cur + BIT(pcl->clusterbits);
Gao Xiang3883a792018-07-26 20:22:06 +08001196
1197 /* close the main owned chain at first */
Gao Xiang97e86a82019-07-31 23:57:47 +08001198 owned_head = cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL,
1199 Z_EROFS_PCLUSTER_TAIL_CLOSED);
Gao Xiang3883a792018-07-26 20:22:06 +08001200
Gao Xiang1e4a2952020-01-21 14:48:19 +08001201 do {
1202 struct page *page;
Gao Xiang9248fce2018-12-08 00:19:15 +08001203
Gao Xiang1e4a2952020-01-21 14:48:19 +08001204 page = pickup_page_for_submission(pcl, i++, pagepool,
1205 MNGD_MAPPING(sbi),
1206 GFP_NOFS);
1207 if (!page)
1208 continue;
Gao Xiang3883a792018-07-26 20:22:06 +08001209
Gao Xiang1e4a2952020-01-21 14:48:19 +08001210 if (bio && cur != last_index + 1) {
Gao Xiang3883a792018-07-26 20:22:06 +08001211submit_bio_retry:
Gao Xiang1e4a2952020-01-21 14:48:19 +08001212 submit_bio(bio);
1213 bio = NULL;
1214 }
Gao Xiang3883a792018-07-26 20:22:06 +08001215
Gao Xiang1e4a2952020-01-21 14:48:19 +08001216 if (!bio) {
1217 bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
Gao Xianga5c0b782019-09-04 10:09:02 +08001218
Gao Xiang1e4a2952020-01-21 14:48:19 +08001219 bio->bi_end_io = z_erofs_decompressqueue_endio;
1220 bio_set_dev(bio, sb->s_bdev);
1221 bio->bi_iter.bi_sector = (sector_t)cur <<
1222 LOG_SECTORS_PER_BLOCK;
1223 bio->bi_private = bi_private;
1224 bio->bi_opf = REQ_OP_READ;
Gao Xiang6ea5aad2020-09-19 15:27:30 +08001225 if (f->readahead)
1226 bio->bi_opf |= REQ_RAHEAD;
Gao Xiang1e4a2952020-01-21 14:48:19 +08001227 ++nr_bios;
1228 }
Gao Xiang94e4e152019-09-04 10:09:04 +08001229
Gao Xiang6c3e4852020-09-19 15:27:28 +08001230 if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE)
Gao Xiang1e4a2952020-01-21 14:48:19 +08001231 goto submit_bio_retry;
Gao Xiang3883a792018-07-26 20:22:06 +08001232
Gao Xiang1e4a2952020-01-21 14:48:19 +08001233 last_index = cur;
1234 bypass = false;
1235 } while (++cur < end);
Gao Xiang3883a792018-07-26 20:22:06 +08001236
Gao Xiang1e4a2952020-01-21 14:48:19 +08001237 if (!bypass)
Gao Xiang97e86a82019-07-31 23:57:47 +08001238 qtail[JQ_SUBMIT] = &pcl->next;
Gao Xiang7146a4f2018-12-08 00:19:18 +08001239 else
Gao Xiang97e86a82019-07-31 23:57:47 +08001240 move_to_bypass_jobqueue(pcl, qtail, owned_head);
1241 } while (owned_head != Z_EROFS_PCLUSTER_TAIL);
Gao Xiang3883a792018-07-26 20:22:06 +08001242
Cristian Sicilia42d40b42018-11-12 21:43:57 +01001243 if (bio)
Gao Xiang94e4e152019-09-04 10:09:04 +08001244 submit_bio(bio);
Gao Xiang3883a792018-07-26 20:22:06 +08001245
Gao Xiang587a67b2020-01-21 14:47:47 +08001246 /*
1247 * although background is preferred, no one is pending for submission.
1248 * don't issue workqueue for decompression but drop it directly instead.
1249 */
1250 if (!*force_fg && !nr_bios) {
1251 kvfree(q[JQ_SUBMIT]);
Gao Xiang1e4a2952020-01-21 14:48:19 +08001252 return;
Gao Xiang587a67b2020-01-21 14:47:47 +08001253 }
Gao Xianga4b1fab2019-10-08 20:56:15 +08001254 z_erofs_decompress_kickoff(q[JQ_SUBMIT], *force_fg, nr_bios);
Gao Xiang3883a792018-07-26 20:22:06 +08001255}
1256
Gao Xiang0c638f72019-11-08 11:37:33 +08001257static void z_erofs_runqueue(struct super_block *sb,
Gao Xiang6ea5aad2020-09-19 15:27:30 +08001258 struct z_erofs_decompress_frontend *f,
Gao Xiang0c638f72019-11-08 11:37:33 +08001259 struct list_head *pagepool, bool force_fg)
Gao Xiang3883a792018-07-26 20:22:06 +08001260{
Gao Xianga4b1fab2019-10-08 20:56:15 +08001261 struct z_erofs_decompressqueue io[NR_JOBQUEUES];
Gao Xiang3883a792018-07-26 20:22:06 +08001262
Gao Xiang6ea5aad2020-09-19 15:27:30 +08001263 if (f->clt.owned_head == Z_EROFS_PCLUSTER_TAIL)
Gao Xiang3883a792018-07-26 20:22:06 +08001264 return;
Gao Xiang6ea5aad2020-09-19 15:27:30 +08001265 z_erofs_submit_queue(sb, f, pagepool, io, &force_fg);
Gao Xiang3883a792018-07-26 20:22:06 +08001266
Gao Xiang0c638f72019-11-08 11:37:33 +08001267 /* handle bypass queue (no i/o pclusters) immediately */
1268 z_erofs_decompress_queue(&io[JQ_BYPASS], pagepool);
Gao Xiang4279f3f2019-07-31 23:57:49 +08001269
Gao Xiang3883a792018-07-26 20:22:06 +08001270 if (!force_fg)
1271 return;
1272
1273 /* wait until all bios are completed */
Gao Xianga93f8c32019-10-08 20:56:16 +08001274 io_wait_event(io[JQ_SUBMIT].u.wait,
1275 !atomic_read(&io[JQ_SUBMIT].pending_bios));
Gao Xiang3883a792018-07-26 20:22:06 +08001276
Gao Xiang0c638f72019-11-08 11:37:33 +08001277 /* handle synchronous decompress queue in the caller context */
1278 z_erofs_decompress_queue(&io[JQ_SUBMIT], pagepool);
Gao Xiang3883a792018-07-26 20:22:06 +08001279}
1280
Gao Xiang0c638f72019-11-08 11:37:33 +08001281static int z_erofs_readpage(struct file *file, struct page *page)
Gao Xiang3883a792018-07-26 20:22:06 +08001282{
1283 struct inode *const inode = page->mapping->host;
Gao Xiang97e86a82019-07-31 23:57:47 +08001284 struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
Gao Xiang3883a792018-07-26 20:22:06 +08001285 int err;
1286 LIST_HEAD(pagepool);
1287
Gao Xiangba9ce772018-11-23 01:15:58 +08001288 trace_erofs_readpage(page, false);
1289
Gao Xiangf0c519f2018-11-23 01:21:49 +08001290 f.headoffset = (erofs_off_t)page->index << PAGE_SHIFT;
1291
Chao Yue3f78d52020-09-17 09:18:21 +08001292 err = z_erofs_do_read_page(&f, page);
Gao Xiang97e86a82019-07-31 23:57:47 +08001293 (void)z_erofs_collector_end(&f.clt);
Gao Xiang3883a792018-07-26 20:22:06 +08001294
Gao Xiangee451972019-08-19 18:34:21 +08001295 /* if some compressed cluster ready, need submit them anyway */
Gao Xiang6ea5aad2020-09-19 15:27:30 +08001296 z_erofs_runqueue(inode->i_sb, &f, &pagepool, true);
Gao Xiangee451972019-08-19 18:34:21 +08001297
1298 if (err)
Gao Xiang4f761fa2019-09-04 10:09:09 +08001299 erofs_err(inode->i_sb, "failed to read, err [%d]", err);
Gao Xiangee451972019-08-19 18:34:21 +08001300
Chao Yu3b423412019-01-15 09:42:21 +08001301 if (f.map.mpage)
1302 put_page(f.map.mpage);
Gao Xiang3883a792018-07-26 20:22:06 +08001303
1304 /* clean up the remaining free pages */
1305 put_pages_list(&pagepool);
Gao Xiangee451972019-08-19 18:34:21 +08001306 return err;
Gao Xiang3883a792018-07-26 20:22:06 +08001307}
1308
Matthew Wilcox (Oracle)06150902020-06-01 21:47:13 -07001309static void z_erofs_readahead(struct readahead_control *rac)
Gao Xiang3883a792018-07-26 20:22:06 +08001310{
Matthew Wilcox (Oracle)06150902020-06-01 21:47:13 -07001311 struct inode *const inode = rac->mapping->host;
Gao Xiang5fb76bb2018-09-20 00:06:56 +08001312 struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
Gao Xiang3883a792018-07-26 20:22:06 +08001313
Gao Xiangbf9a1232020-09-19 15:27:29 +08001314 unsigned int nr_pages = readahead_count(rac);
1315 bool sync = (nr_pages <= sbi->ctx.max_sync_decompress_pages);
Gao Xiang97e86a82019-07-31 23:57:47 +08001316 struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
Matthew Wilcox (Oracle)06150902020-06-01 21:47:13 -07001317 struct page *page, *head = NULL;
Gao Xiang3883a792018-07-26 20:22:06 +08001318 LIST_HEAD(pagepool);
1319
Gao Xiangbf9a1232020-09-19 15:27:29 +08001320 trace_erofs_readpages(inode, readahead_index(rac), nr_pages, false);
Chen Gong284db122018-09-18 22:27:27 +08001321
Gao Xiang6ea5aad2020-09-19 15:27:30 +08001322 f.readahead = true;
Matthew Wilcox (Oracle)06150902020-06-01 21:47:13 -07001323 f.headoffset = readahead_pos(rac);
Gao Xiangf0c519f2018-11-23 01:21:49 +08001324
Matthew Wilcox (Oracle)06150902020-06-01 21:47:13 -07001325 while ((page = readahead_page(rac))) {
Gao Xiang3883a792018-07-26 20:22:06 +08001326 prefetchw(&page->flags);
Gao Xiang3883a792018-07-26 20:22:06 +08001327
Gao Xiang2d9b5dc2018-11-23 01:21:48 +08001328 /*
1329 * A pure asynchronous readahead is indicated if
1330 * a PG_readahead marked page is hitted at first.
1331 * Let's also do asynchronous decompression for this case.
1332 */
1333 sync &= !(PageReadahead(page) && !head);
1334
Gao Xiang3883a792018-07-26 20:22:06 +08001335 set_page_private(page, (unsigned long)head);
1336 head = page;
1337 }
1338
Cristian Sicilia42d40b42018-11-12 21:43:57 +01001339 while (head) {
Gao Xiang3883a792018-07-26 20:22:06 +08001340 struct page *page = head;
1341 int err;
1342
1343 /* traversal in reverse order */
1344 head = (void *)page_private(page);
1345
Chao Yue3f78d52020-09-17 09:18:21 +08001346 err = z_erofs_do_read_page(&f, page);
Gao Xianga5876e22019-09-04 10:08:56 +08001347 if (err)
Gao Xiang4f761fa2019-09-04 10:09:09 +08001348 erofs_err(inode->i_sb,
1349 "readahead error at page %lu @ nid %llu",
1350 page->index, EROFS_I(inode)->nid);
Gao Xiang3883a792018-07-26 20:22:06 +08001351 put_page(page);
1352 }
1353
Gao Xiang97e86a82019-07-31 23:57:47 +08001354 (void)z_erofs_collector_end(&f.clt);
Gao Xiang3883a792018-07-26 20:22:06 +08001355
Gao Xiang6ea5aad2020-09-19 15:27:30 +08001356 z_erofs_runqueue(inode->i_sb, &f, &pagepool, sync);
Gao Xiang3883a792018-07-26 20:22:06 +08001357
Chao Yu3b423412019-01-15 09:42:21 +08001358 if (f.map.mpage)
1359 put_page(f.map.mpage);
Gao Xiang3883a792018-07-26 20:22:06 +08001360
1361 /* clean up the remaining free pages */
1362 put_pages_list(&pagepool);
Gao Xiang3883a792018-07-26 20:22:06 +08001363}
1364
Gao Xiang0c638f72019-11-08 11:37:33 +08001365const struct address_space_operations z_erofs_aops = {
1366 .readpage = z_erofs_readpage,
Matthew Wilcox (Oracle)06150902020-06-01 21:47:13 -07001367 .readahead = z_erofs_readahead,
Gao Xiang3883a792018-07-26 20:22:06 +08001368};
Gao Xiang02827e12018-07-26 20:21:58 +08001369