blob: 3acf0d9f0b1511d967a6613e05c5845046b97549 [file] [log] [blame]
Konstantin Komarov522e0102021-08-13 17:21:30 +03001// SPDX-License-Identifier: GPL-2.0
2/*
3 *
4 * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
5 *
6 */
Kari Argillandere8b8e972021-08-03 14:57:09 +03007
Konstantin Komarov522e0102021-08-13 17:21:30 +03008#include <linux/blkdev.h>
9#include <linux/buffer_head.h>
10#include <linux/fs.h>
11#include <linux/nls.h>
12
13#include "debug.h"
14#include "ntfs.h"
15#include "ntfs_fs.h"
16
17// clang-format off
Kari Argillandere8b8e972021-08-03 14:57:09 +030018/* Src buffer is zero. */
Konstantin Komarov522e0102021-08-13 17:21:30 +030019#define LZNT_ERROR_ALL_ZEROS 1
20#define LZNT_CHUNK_SIZE 0x1000
21// clang-format on
22
23struct lznt_hash {
24 const u8 *p1;
25 const u8 *p2;
26};
27
28struct lznt {
29 const u8 *unc;
30 const u8 *unc_end;
31 const u8 *best_match;
32 size_t max_len;
33 bool std;
34
35 struct lznt_hash hash[LZNT_CHUNK_SIZE];
36};
37
38static inline size_t get_match_len(const u8 *ptr, const u8 *end, const u8 *prev,
39 size_t max_len)
40{
41 size_t len = 0;
42
43 while (ptr + len < end && ptr[len] == prev[len] && ++len < max_len)
44 ;
45 return len;
46}
47
48static size_t longest_match_std(const u8 *src, struct lznt *ctx)
49{
50 size_t hash_index;
51 size_t len1 = 0, len2 = 0;
52 const u8 **hash;
53
54 hash_index =
55 ((40543U * ((((src[0] << 4) ^ src[1]) << 4) ^ src[2])) >> 4) &
56 (LZNT_CHUNK_SIZE - 1);
57
58 hash = &(ctx->hash[hash_index].p1);
59
60 if (hash[0] >= ctx->unc && hash[0] < src && hash[0][0] == src[0] &&
61 hash[0][1] == src[1] && hash[0][2] == src[2]) {
62 len1 = 3;
63 if (ctx->max_len > 3)
64 len1 += get_match_len(src + 3, ctx->unc_end,
65 hash[0] + 3, ctx->max_len - 3);
66 }
67
68 if (hash[1] >= ctx->unc && hash[1] < src && hash[1][0] == src[0] &&
69 hash[1][1] == src[1] && hash[1][2] == src[2]) {
70 len2 = 3;
71 if (ctx->max_len > 3)
72 len2 += get_match_len(src + 3, ctx->unc_end,
73 hash[1] + 3, ctx->max_len - 3);
74 }
75
Kari Argillandere8b8e972021-08-03 14:57:09 +030076 /* Compare two matches and select the best one. */
Konstantin Komarov522e0102021-08-13 17:21:30 +030077 if (len1 < len2) {
78 ctx->best_match = hash[1];
79 len1 = len2;
80 } else {
81 ctx->best_match = hash[0];
82 }
83
84 hash[1] = hash[0];
85 hash[0] = src;
86 return len1;
87}
88
89static size_t longest_match_best(const u8 *src, struct lznt *ctx)
90{
91 size_t max_len;
92 const u8 *ptr;
93
94 if (ctx->unc >= src || !ctx->max_len)
95 return 0;
96
97 max_len = 0;
98 for (ptr = ctx->unc; ptr < src; ++ptr) {
99 size_t len =
100 get_match_len(src, ctx->unc_end, ptr, ctx->max_len);
101 if (len >= max_len) {
102 max_len = len;
103 ctx->best_match = ptr;
104 }
105 }
106
107 return max_len >= 3 ? max_len : 0;
108}
109
110static const size_t s_max_len[] = {
111 0x1002, 0x802, 0x402, 0x202, 0x102, 0x82, 0x42, 0x22, 0x12,
112};
113
114static const size_t s_max_off[] = {
115 0x10, 0x20, 0x40, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
116};
117
118static inline u16 make_pair(size_t offset, size_t len, size_t index)
119{
120 return ((offset - 1) << (12 - index)) |
121 ((len - 3) & (((1 << (12 - index)) - 1)));
122}
123
124static inline size_t parse_pair(u16 pair, size_t *offset, size_t index)
125{
126 *offset = 1 + (pair >> (12 - index));
127 return 3 + (pair & ((1 << (12 - index)) - 1));
128}
129
130/*
131 * compress_chunk
132 *
Kari Argillandere8b8e972021-08-03 14:57:09 +0300133 * Return:
134 * * 0 - Ok, @cmpr contains @cmpr_chunk_size bytes of compressed data.
135 * * 1 - Input buffer is full zero.
136 * * -2 - The compressed buffer is too small to hold the compressed data.
Konstantin Komarov522e0102021-08-13 17:21:30 +0300137 */
138static inline int compress_chunk(size_t (*match)(const u8 *, struct lznt *),
139 const u8 *unc, const u8 *unc_end, u8 *cmpr,
140 u8 *cmpr_end, size_t *cmpr_chunk_size,
141 struct lznt *ctx)
142{
143 size_t cnt = 0;
144 size_t idx = 0;
145 const u8 *up = unc;
146 u8 *cp = cmpr + 3;
147 u8 *cp2 = cmpr + 2;
148 u8 not_zero = 0;
Kari Argillandere8b8e972021-08-03 14:57:09 +0300149 /* Control byte of 8-bit values: ( 0 - means byte as is, 1 - short pair ). */
Konstantin Komarov522e0102021-08-13 17:21:30 +0300150 u8 ohdr = 0;
151 u8 *last;
152 u16 t16;
153
154 if (unc + LZNT_CHUNK_SIZE < unc_end)
155 unc_end = unc + LZNT_CHUNK_SIZE;
156
157 last = min(cmpr + LZNT_CHUNK_SIZE + sizeof(short), cmpr_end);
158
159 ctx->unc = unc;
160 ctx->unc_end = unc_end;
161 ctx->max_len = s_max_len[0];
162
163 while (up < unc_end) {
164 size_t max_len;
165
166 while (unc + s_max_off[idx] < up)
167 ctx->max_len = s_max_len[++idx];
168
Kari Argillandere8b8e972021-08-03 14:57:09 +0300169 /* Find match. */
Konstantin Komarov522e0102021-08-13 17:21:30 +0300170 max_len = up + 3 <= unc_end ? (*match)(up, ctx) : 0;
171
172 if (!max_len) {
173 if (cp >= last)
174 goto NotCompressed;
175 not_zero |= *cp++ = *up++;
176 } else if (cp + 1 >= last) {
177 goto NotCompressed;
178 } else {
179 t16 = make_pair(up - ctx->best_match, max_len, idx);
180 *cp++ = t16;
181 *cp++ = t16 >> 8;
182
183 ohdr |= 1 << cnt;
184 up += max_len;
185 }
186
187 cnt = (cnt + 1) & 7;
188 if (!cnt) {
189 *cp2 = ohdr;
190 ohdr = 0;
191 cp2 = cp;
192 cp += 1;
193 }
194 }
195
196 if (cp2 < last)
197 *cp2 = ohdr;
198 else
199 cp -= 1;
200
201 *cmpr_chunk_size = cp - cmpr;
202
203 t16 = (*cmpr_chunk_size - 3) | 0xB000;
204 cmpr[0] = t16;
205 cmpr[1] = t16 >> 8;
206
207 return not_zero ? 0 : LZNT_ERROR_ALL_ZEROS;
208
209NotCompressed:
210
211 if ((cmpr + LZNT_CHUNK_SIZE + sizeof(short)) > last)
212 return -2;
213
214 /*
Kari Argillandere8b8e972021-08-03 14:57:09 +0300215 * Copy non cmpr data.
Konstantin Komarov522e0102021-08-13 17:21:30 +0300216 * 0x3FFF == ((LZNT_CHUNK_SIZE + 2 - 3) | 0x3000)
217 */
218 cmpr[0] = 0xff;
219 cmpr[1] = 0x3f;
220
221 memcpy(cmpr + sizeof(short), unc, LZNT_CHUNK_SIZE);
222 *cmpr_chunk_size = LZNT_CHUNK_SIZE + sizeof(short);
223
224 return 0;
225}
226
227static inline ssize_t decompress_chunk(u8 *unc, u8 *unc_end, const u8 *cmpr,
228 const u8 *cmpr_end)
229{
230 u8 *up = unc;
231 u8 ch = *cmpr++;
232 size_t bit = 0;
233 size_t index = 0;
234 u16 pair;
235 size_t offset, length;
236
Kari Argillandere8b8e972021-08-03 14:57:09 +0300237 /* Do decompression until pointers are inside range. */
Konstantin Komarov522e0102021-08-13 17:21:30 +0300238 while (up < unc_end && cmpr < cmpr_end) {
239 /* Correct index */
240 while (unc + s_max_off[index] < up)
241 index += 1;
242
Kari Argillandere8b8e972021-08-03 14:57:09 +0300243 /* Check the current flag for zero. */
Konstantin Komarov522e0102021-08-13 17:21:30 +0300244 if (!(ch & (1 << bit))) {
Kari Argillandere8b8e972021-08-03 14:57:09 +0300245 /* Just copy byte. */
Konstantin Komarov522e0102021-08-13 17:21:30 +0300246 *up++ = *cmpr++;
247 goto next;
248 }
249
Kari Argillandere8b8e972021-08-03 14:57:09 +0300250 /* Check for boundary. */
Konstantin Komarov522e0102021-08-13 17:21:30 +0300251 if (cmpr + 1 >= cmpr_end)
252 return -EINVAL;
253
Kari Argillandere8b8e972021-08-03 14:57:09 +0300254 /* Read a short from little endian stream. */
Konstantin Komarov522e0102021-08-13 17:21:30 +0300255 pair = cmpr[1];
256 pair <<= 8;
257 pair |= cmpr[0];
258
259 cmpr += 2;
260
Kari Argillandere8b8e972021-08-03 14:57:09 +0300261 /* Translate packed information into offset and length. */
Konstantin Komarov522e0102021-08-13 17:21:30 +0300262 length = parse_pair(pair, &offset, index);
263
Kari Argillandere8b8e972021-08-03 14:57:09 +0300264 /* Check offset for boundary. */
Konstantin Komarov522e0102021-08-13 17:21:30 +0300265 if (unc + offset > up)
266 return -EINVAL;
267
Kari Argillandere8b8e972021-08-03 14:57:09 +0300268 /* Truncate the length if necessary. */
Konstantin Komarov522e0102021-08-13 17:21:30 +0300269 if (up + length >= unc_end)
270 length = unc_end - up;
271
272 /* Now we copy bytes. This is the heart of LZ algorithm. */
273 for (; length > 0; length--, up++)
274 *up = *(up - offset);
275
276next:
Kari Argillandere8b8e972021-08-03 14:57:09 +0300277 /* Advance flag bit value. */
Konstantin Komarov522e0102021-08-13 17:21:30 +0300278 bit = (bit + 1) & 7;
279
280 if (!bit) {
281 if (cmpr >= cmpr_end)
282 break;
283
284 ch = *cmpr++;
285 }
286 }
287
Kari Argillandere8b8e972021-08-03 14:57:09 +0300288 /* Return the size of uncompressed data. */
Konstantin Komarov522e0102021-08-13 17:21:30 +0300289 return up - unc;
290}
291
292/*
Kari Argillandere8b8e972021-08-03 14:57:09 +0300293 * get_lznt_ctx
294 * @level: 0 - Standard compression.
295 * !0 - Best compression, requires a lot of cpu.
Konstantin Komarov522e0102021-08-13 17:21:30 +0300296 */
297struct lznt *get_lznt_ctx(int level)
298{
Kari Argillander195c52b2021-08-24 21:37:07 +0300299 struct lznt *r = kzalloc(level ? offsetof(struct lznt, hash) :
300 sizeof(struct lznt), GFP_NOFS);
Konstantin Komarov522e0102021-08-13 17:21:30 +0300301
302 if (r)
303 r->std = !level;
304 return r;
305}
306
307/*
Kari Argillandere8b8e972021-08-03 14:57:09 +0300308 * compress_lznt - Compresses @unc into @cmpr
Konstantin Komarov522e0102021-08-13 17:21:30 +0300309 *
Kari Argillandere8b8e972021-08-03 14:57:09 +0300310 * Return:
311 * * +x - Ok, @cmpr contains 'final_compressed_size' bytes of compressed data.
312 * * 0 - Input buffer is full zero.
Konstantin Komarov522e0102021-08-13 17:21:30 +0300313 */
314size_t compress_lznt(const void *unc, size_t unc_size, void *cmpr,
315 size_t cmpr_size, struct lznt *ctx)
316{
317 int err;
318 size_t (*match)(const u8 *src, struct lznt *ctx);
319 u8 *p = cmpr;
320 u8 *end = p + cmpr_size;
321 const u8 *unc_chunk = unc;
322 const u8 *unc_end = unc_chunk + unc_size;
323 bool is_zero = true;
324
325 if (ctx->std) {
326 match = &longest_match_std;
327 memset(ctx->hash, 0, sizeof(ctx->hash));
328 } else {
329 match = &longest_match_best;
330 }
331
Kari Argillandere8b8e972021-08-03 14:57:09 +0300332 /* Compression cycle. */
Konstantin Komarov522e0102021-08-13 17:21:30 +0300333 for (; unc_chunk < unc_end; unc_chunk += LZNT_CHUNK_SIZE) {
334 cmpr_size = 0;
335 err = compress_chunk(match, unc_chunk, unc_end, p, end,
336 &cmpr_size, ctx);
337 if (err < 0)
338 return unc_size;
339
340 if (is_zero && err != LZNT_ERROR_ALL_ZEROS)
341 is_zero = false;
342
343 p += cmpr_size;
344 }
345
346 if (p <= end - 2)
347 p[0] = p[1] = 0;
348
349 return is_zero ? 0 : PtrOffset(cmpr, p);
350}
351
352/*
Kari Argillandere8b8e972021-08-03 14:57:09 +0300353 * decompress_lznt - Decompress @cmpr into @unc.
Konstantin Komarov522e0102021-08-13 17:21:30 +0300354 */
355ssize_t decompress_lznt(const void *cmpr, size_t cmpr_size, void *unc,
356 size_t unc_size)
357{
358 const u8 *cmpr_chunk = cmpr;
359 const u8 *cmpr_end = cmpr_chunk + cmpr_size;
360 u8 *unc_chunk = unc;
361 u8 *unc_end = unc_chunk + unc_size;
362 u16 chunk_hdr;
363
364 if (cmpr_size < sizeof(short))
365 return -EINVAL;
366
Kari Argillandere8b8e972021-08-03 14:57:09 +0300367 /* Read chunk header. */
Konstantin Komarov522e0102021-08-13 17:21:30 +0300368 chunk_hdr = cmpr_chunk[1];
369 chunk_hdr <<= 8;
370 chunk_hdr |= cmpr_chunk[0];
371
Kari Argillandere8b8e972021-08-03 14:57:09 +0300372 /* Loop through decompressing chunks. */
Konstantin Komarov522e0102021-08-13 17:21:30 +0300373 for (;;) {
374 size_t chunk_size_saved;
375 size_t unc_use;
376 size_t cmpr_use = 3 + (chunk_hdr & (LZNT_CHUNK_SIZE - 1));
377
Kari Argillandere8b8e972021-08-03 14:57:09 +0300378 /* Check that the chunk actually fits the supplied buffer. */
Konstantin Komarov522e0102021-08-13 17:21:30 +0300379 if (cmpr_chunk + cmpr_use > cmpr_end)
380 return -EINVAL;
381
Kari Argillandere8b8e972021-08-03 14:57:09 +0300382 /* First make sure the chunk contains compressed data. */
Konstantin Komarov522e0102021-08-13 17:21:30 +0300383 if (chunk_hdr & 0x8000) {
Kari Argillandere8b8e972021-08-03 14:57:09 +0300384 /* Decompress a chunk and return if we get an error. */
Konstantin Komarov522e0102021-08-13 17:21:30 +0300385 ssize_t err =
386 decompress_chunk(unc_chunk, unc_end,
387 cmpr_chunk + sizeof(chunk_hdr),
388 cmpr_chunk + cmpr_use);
389 if (err < 0)
390 return err;
391 unc_use = err;
392 } else {
Kari Argillandere8b8e972021-08-03 14:57:09 +0300393 /* This chunk does not contain compressed data. */
Konstantin Komarov522e0102021-08-13 17:21:30 +0300394 unc_use = unc_chunk + LZNT_CHUNK_SIZE > unc_end
395 ? unc_end - unc_chunk
396 : LZNT_CHUNK_SIZE;
397
398 if (cmpr_chunk + sizeof(chunk_hdr) + unc_use >
399 cmpr_end) {
400 return -EINVAL;
401 }
402
403 memcpy(unc_chunk, cmpr_chunk + sizeof(chunk_hdr),
404 unc_use);
405 }
406
Kari Argillandere8b8e972021-08-03 14:57:09 +0300407 /* Advance pointers. */
Konstantin Komarov522e0102021-08-13 17:21:30 +0300408 cmpr_chunk += cmpr_use;
409 unc_chunk += unc_use;
410
Kari Argillandere8b8e972021-08-03 14:57:09 +0300411 /* Check for the end of unc buffer. */
Konstantin Komarov522e0102021-08-13 17:21:30 +0300412 if (unc_chunk >= unc_end)
413 break;
414
Kari Argillandere8b8e972021-08-03 14:57:09 +0300415 /* Proceed the next chunk. */
Konstantin Komarov522e0102021-08-13 17:21:30 +0300416 if (cmpr_chunk > cmpr_end - 2)
417 break;
418
419 chunk_size_saved = LZNT_CHUNK_SIZE;
420
Kari Argillandere8b8e972021-08-03 14:57:09 +0300421 /* Read chunk header. */
Konstantin Komarov522e0102021-08-13 17:21:30 +0300422 chunk_hdr = cmpr_chunk[1];
423 chunk_hdr <<= 8;
424 chunk_hdr |= cmpr_chunk[0];
425
426 if (!chunk_hdr)
427 break;
428
Kari Argillandere8b8e972021-08-03 14:57:09 +0300429 /* Check the size of unc buffer. */
Konstantin Komarov522e0102021-08-13 17:21:30 +0300430 if (unc_use < chunk_size_saved) {
431 size_t t1 = chunk_size_saved - unc_use;
432 u8 *t2 = unc_chunk + t1;
433
Kari Argillandere8b8e972021-08-03 14:57:09 +0300434 /* 'Zero' memory. */
Konstantin Komarov522e0102021-08-13 17:21:30 +0300435 if (t2 >= unc_end)
436 break;
437
438 memset(unc_chunk, 0, t1);
439 unc_chunk = t2;
440 }
441 }
442
Kari Argillandere8b8e972021-08-03 14:57:09 +0300443 /* Check compression boundary. */
Konstantin Komarov522e0102021-08-13 17:21:30 +0300444 if (cmpr_chunk > cmpr_end)
445 return -EINVAL;
446
447 /*
448 * The unc size is just a difference between current
Kari Argillandere8b8e972021-08-03 14:57:09 +0300449 * pointer and original one.
Konstantin Komarov522e0102021-08-13 17:21:30 +0300450 */
451 return PtrOffset(unc, unc_chunk);
452}