blob: a0c48a20705b774aedd14e354c068143616aa1a2 [file] [log] [blame]
Josh Gao2f0f9eb2020-03-04 19:34:08 -08001/*
2 * Copyright (C) 2020 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#pragma once
18
Josh Gaobfcd8ff2020-03-26 19:33:25 -070019#include <algorithm>
20#include <memory>
Josh Gao2f0f9eb2020-03-04 19:34:08 -080021#include <span>
22
Josh Gaobfcd8ff2020-03-26 19:33:25 -070023#include <android-base/logging.h>
24
Josh Gao2f0f9eb2020-03-04 19:34:08 -080025#include <brotli/decode.h>
26#include <brotli/encode.h>
Josh Gaofb386cc2020-03-26 22:02:03 -070027#include <lz4frame.h>
Josh Gao2f0f9eb2020-03-04 19:34:08 -080028
29#include "types.h"
30
Josh Gao521e9992020-03-26 14:06:55 -070031enum class DecodeResult {
32 Error,
33 Done,
34 NeedInput,
35 MoreOutput,
36};
37
38enum class EncodeResult {
Josh Gao2f0f9eb2020-03-04 19:34:08 -080039 Error,
40 Done,
41 NeedInput,
42 MoreOutput,
43};
44
Josh Gaobfcd8ff2020-03-26 19:33:25 -070045struct Decoder {
46 void Append(Block&& block) { input_buffer_.append(std::move(block)); }
47 bool Finish() {
48 bool old = std::exchange(finished_, true);
49 if (old) {
50 LOG(FATAL) << "Decoder::Finish called while already finished?";
51 return false;
52 }
53 return true;
54 }
55
56 virtual DecodeResult Decode(std::span<char>* output) = 0;
57
58 protected:
59 Decoder(std::span<char> output_buffer) : output_buffer_(output_buffer) {}
60 ~Decoder() = default;
61
62 bool finished_ = false;
63 IOVector input_buffer_;
64 std::span<char> output_buffer_;
65};
66
67struct Encoder {
68 void Append(Block input) { input_buffer_.append(std::move(input)); }
69 bool Finish() {
70 bool old = std::exchange(finished_, true);
71 if (old) {
72 LOG(FATAL) << "Decoder::Finish called while already finished?";
73 return false;
74 }
75 return true;
76 }
77
78 virtual EncodeResult Encode(Block* output) = 0;
79
80 protected:
81 explicit Encoder(size_t output_block_size) : output_block_size_(output_block_size) {}
82 ~Encoder() = default;
83
84 const size_t output_block_size_;
85 bool finished_ = false;
86 IOVector input_buffer_;
87};
88
89struct NullDecoder final : public Decoder {
90 explicit NullDecoder(std::span<char> output_buffer) : Decoder(output_buffer) {}
91
92 DecodeResult Decode(std::span<char>* output) final {
93 size_t available_out = output_buffer_.size();
94 void* p = output_buffer_.data();
95 while (available_out > 0 && !input_buffer_.empty()) {
96 size_t len = std::min(available_out, input_buffer_.front_size());
97 p = mempcpy(p, input_buffer_.front_data(), len);
98 available_out -= len;
99 input_buffer_.drop_front(len);
100 }
101 *output = std::span(output_buffer_.data(), static_cast<char*>(p));
102 if (input_buffer_.empty()) {
103 return finished_ ? DecodeResult::Done : DecodeResult::NeedInput;
104 }
105 return DecodeResult::MoreOutput;
106 }
107};
108
109struct NullEncoder final : public Encoder {
110 explicit NullEncoder(size_t output_block_size) : Encoder(output_block_size) {}
111
112 EncodeResult Encode(Block* output) final {
113 output->clear();
114 output->resize(output_block_size_);
115
116 size_t available_out = output->size();
117 void* p = output->data();
118
119 while (available_out > 0 && !input_buffer_.empty()) {
120 size_t len = std::min(available_out, input_buffer_.front_size());
121 p = mempcpy(p, input_buffer_.front_data(), len);
122 available_out -= len;
123 input_buffer_.drop_front(len);
124 }
125
126 output->resize(output->size() - available_out);
127
128 if (input_buffer_.empty()) {
129 return finished_ ? EncodeResult::Done : EncodeResult::NeedInput;
130 }
131 return EncodeResult::MoreOutput;
132 }
133};
134
135struct BrotliDecoder final : public Decoder {
Josh Gao2f0f9eb2020-03-04 19:34:08 -0800136 explicit BrotliDecoder(std::span<char> output_buffer)
Josh Gaobfcd8ff2020-03-26 19:33:25 -0700137 : Decoder(output_buffer),
Josh Gao2f0f9eb2020-03-04 19:34:08 -0800138 decoder_(BrotliDecoderCreateInstance(nullptr, nullptr, nullptr),
139 BrotliDecoderDestroyInstance) {}
140
Josh Gaobfcd8ff2020-03-26 19:33:25 -0700141 DecodeResult Decode(std::span<char>* output) final {
Josh Gao2f0f9eb2020-03-04 19:34:08 -0800142 size_t available_in = input_buffer_.front_size();
143 const uint8_t* next_in = reinterpret_cast<const uint8_t*>(input_buffer_.front_data());
144
145 size_t available_out = output_buffer_.size();
146 uint8_t* next_out = reinterpret_cast<uint8_t*>(output_buffer_.data());
147
148 BrotliDecoderResult r = BrotliDecoderDecompressStream(
149 decoder_.get(), &available_in, &next_in, &available_out, &next_out, nullptr);
150
151 size_t bytes_consumed = input_buffer_.front_size() - available_in;
152 input_buffer_.drop_front(bytes_consumed);
153
154 size_t bytes_emitted = output_buffer_.size() - available_out;
155 *output = std::span<char>(output_buffer_.data(), bytes_emitted);
156
157 switch (r) {
158 case BROTLI_DECODER_RESULT_SUCCESS:
Josh Gaobfcd8ff2020-03-26 19:33:25 -0700159 // We need to wait for ID_DONE from the other end.
160 return finished_ ? DecodeResult::Done : DecodeResult::NeedInput;
Josh Gao2f0f9eb2020-03-04 19:34:08 -0800161 case BROTLI_DECODER_RESULT_ERROR:
Josh Gao521e9992020-03-26 14:06:55 -0700162 return DecodeResult::Error;
Josh Gao2f0f9eb2020-03-04 19:34:08 -0800163 case BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT:
164 // Brotli guarantees as one of its invariants that if it returns NEEDS_MORE_INPUT,
165 // it will consume the entire input buffer passed in, so we don't have to worry
166 // about bytes left over in the front block with more input remaining.
Josh Gao521e9992020-03-26 14:06:55 -0700167 return DecodeResult::NeedInput;
Josh Gao2f0f9eb2020-03-04 19:34:08 -0800168 case BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT:
Josh Gao521e9992020-03-26 14:06:55 -0700169 return DecodeResult::MoreOutput;
Josh Gao2f0f9eb2020-03-04 19:34:08 -0800170 }
171 }
172
173 private:
Josh Gao2f0f9eb2020-03-04 19:34:08 -0800174 std::unique_ptr<BrotliDecoderState, void (*)(BrotliDecoderState*)> decoder_;
175};
176
Josh Gaobfcd8ff2020-03-26 19:33:25 -0700177struct BrotliEncoder final : public Encoder {
178 explicit BrotliEncoder(size_t output_block_size)
179 : Encoder(output_block_size),
180 output_block_(output_block_size_),
181 output_bytes_left_(output_block_size_),
Josh Gao2f0f9eb2020-03-04 19:34:08 -0800182 encoder_(BrotliEncoderCreateInstance(nullptr, nullptr, nullptr),
183 BrotliEncoderDestroyInstance) {
184 BrotliEncoderSetParameter(encoder_.get(), BROTLI_PARAM_QUALITY, 1);
185 }
186
Josh Gaobfcd8ff2020-03-26 19:33:25 -0700187 EncodeResult Encode(Block* output) final {
Josh Gao2f0f9eb2020-03-04 19:34:08 -0800188 output->clear();
Josh Gaobfcd8ff2020-03-26 19:33:25 -0700189
Josh Gao2f0f9eb2020-03-04 19:34:08 -0800190 while (true) {
191 size_t available_in = input_buffer_.front_size();
192 const uint8_t* next_in = reinterpret_cast<const uint8_t*>(input_buffer_.front_data());
193
194 size_t available_out = output_bytes_left_;
Josh Gaobfcd8ff2020-03-26 19:33:25 -0700195 uint8_t* next_out = reinterpret_cast<uint8_t*>(
196 output_block_.data() + (output_block_size_ - output_bytes_left_));
Josh Gao2f0f9eb2020-03-04 19:34:08 -0800197
198 BrotliEncoderOperation op = BROTLI_OPERATION_PROCESS;
199 if (finished_) {
200 op = BROTLI_OPERATION_FINISH;
201 }
202
203 if (!BrotliEncoderCompressStream(encoder_.get(), op, &available_in, &next_in,
204 &available_out, &next_out, nullptr)) {
Josh Gao521e9992020-03-26 14:06:55 -0700205 return EncodeResult::Error;
Josh Gao2f0f9eb2020-03-04 19:34:08 -0800206 }
207
208 size_t bytes_consumed = input_buffer_.front_size() - available_in;
209 input_buffer_.drop_front(bytes_consumed);
210
211 output_bytes_left_ = available_out;
212
213 if (BrotliEncoderIsFinished(encoder_.get())) {
Josh Gaobfcd8ff2020-03-26 19:33:25 -0700214 output_block_.resize(output_block_size_ - output_bytes_left_);
Josh Gao2f0f9eb2020-03-04 19:34:08 -0800215 *output = std::move(output_block_);
Josh Gao521e9992020-03-26 14:06:55 -0700216 return EncodeResult::Done;
Josh Gao2f0f9eb2020-03-04 19:34:08 -0800217 } else if (output_bytes_left_ == 0) {
218 *output = std::move(output_block_);
Josh Gaobfcd8ff2020-03-26 19:33:25 -0700219 output_block_.resize(output_block_size_);
220 output_bytes_left_ = output_block_size_;
Josh Gao521e9992020-03-26 14:06:55 -0700221 return EncodeResult::MoreOutput;
Josh Gao2f0f9eb2020-03-04 19:34:08 -0800222 } else if (input_buffer_.empty()) {
Josh Gao521e9992020-03-26 14:06:55 -0700223 return EncodeResult::NeedInput;
Josh Gao2f0f9eb2020-03-04 19:34:08 -0800224 }
225 }
226 }
227
228 private:
Josh Gao2f0f9eb2020-03-04 19:34:08 -0800229 Block output_block_;
230 size_t output_bytes_left_;
231 std::unique_ptr<BrotliEncoderState, void (*)(BrotliEncoderState*)> encoder_;
232};
Josh Gaofb386cc2020-03-26 22:02:03 -0700233
234struct LZ4Decoder final : public Decoder {
235 explicit LZ4Decoder(std::span<char> output_buffer)
236 : Decoder(output_buffer), decoder_(nullptr, nullptr) {
237 LZ4F_dctx* dctx;
238 if (LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION) != 0) {
239 LOG(FATAL) << "failed to initialize LZ4 decompression context";
240 }
241 decoder_ = std::unique_ptr<LZ4F_dctx, decltype(&LZ4F_freeDecompressionContext)>(
242 dctx, LZ4F_freeDecompressionContext);
243 }
244
245 DecodeResult Decode(std::span<char>* output) final {
246 size_t available_in = input_buffer_.front_size();
247 const char* next_in = input_buffer_.front_data();
248
249 size_t available_out = output_buffer_.size();
250 char* next_out = output_buffer_.data();
251
252 size_t rc = LZ4F_decompress(decoder_.get(), next_out, &available_out, next_in,
253 &available_in, nullptr);
254 if (LZ4F_isError(rc)) {
255 LOG(ERROR) << "LZ4F_decompress failed: " << LZ4F_getErrorName(rc);
256 return DecodeResult::Error;
257 }
258
259 input_buffer_.drop_front(available_in);
260
261 if (rc == 0) {
262 if (!input_buffer_.empty()) {
263 LOG(ERROR) << "LZ4 stream hit end before reading all data";
264 return DecodeResult::Error;
265 }
266 lz4_done_ = true;
267 }
268
269 *output = std::span<char>(output_buffer_.data(), available_out);
270
271 if (finished_) {
272 return input_buffer_.empty() && lz4_done_ ? DecodeResult::Done
273 : DecodeResult::MoreOutput;
274 }
275
276 return DecodeResult::NeedInput;
277 }
278
279 private:
280 bool lz4_done_ = false;
281 std::unique_ptr<LZ4F_dctx, LZ4F_errorCode_t (*)(LZ4F_dctx*)> decoder_;
282};
283
284struct LZ4Encoder final : public Encoder {
285 explicit LZ4Encoder(size_t output_block_size)
286 : Encoder(output_block_size), encoder_(nullptr, nullptr) {
287 LZ4F_cctx* cctx;
288 if (LZ4F_createCompressionContext(&cctx, LZ4F_VERSION) != 0) {
289 LOG(FATAL) << "failed to initialize LZ4 compression context";
290 }
291 encoder_ = std::unique_ptr<LZ4F_cctx, decltype(&LZ4F_freeCompressionContext)>(
292 cctx, LZ4F_freeCompressionContext);
293 Block header(LZ4F_HEADER_SIZE_MAX);
294 size_t rc = LZ4F_compressBegin(encoder_.get(), header.data(), header.size(), nullptr);
295 if (LZ4F_isError(rc)) {
296 LOG(FATAL) << "LZ4F_compressBegin failed: %s", LZ4F_getErrorName(rc);
297 }
298 header.resize(rc);
299 output_buffer_.append(std::move(header));
300 }
301
302 // As an optimization, only emit a block if we have an entire output block ready, or we're done.
303 bool OutputReady() const {
304 return output_buffer_.size() >= output_block_size_ || lz4_finalized_;
305 }
306
307 // TODO: Switch the output type to IOVector to remove a copy?
308 EncodeResult Encode(Block* output) final {
309 size_t available_in = input_buffer_.front_size();
310 const char* next_in = input_buffer_.front_data();
311
312 // LZ4 makes no guarantees about being able to recover from trying to compress with an
313 // insufficiently large output buffer. LZ4F_compressBound tells us how much buffer we
314 // need to compress a given number of bytes, but the smallest value seems to be bigger
315 // than SYNC_DATA_MAX, so we need to buffer ourselves.
316
317 // Input size chosen to be a local maximum for LZ4F_compressBound (i.e. the block size).
318 constexpr size_t max_input_size = 65536;
319 const size_t encode_block_size = LZ4F_compressBound(max_input_size, nullptr);
320
321 if (available_in != 0) {
322 if (lz4_finalized_) {
323 LOG(ERROR) << "LZ4Encoder received data after Finish?";
324 return EncodeResult::Error;
325 }
326
327 available_in = std::min(available_in, max_input_size);
328
329 Block encode_block(encode_block_size);
330 size_t available_out = encode_block.capacity();
331 char* next_out = encode_block.data();
332
333 size_t rc = LZ4F_compressUpdate(encoder_.get(), next_out, available_out, next_in,
334 available_in, nullptr);
335 if (LZ4F_isError(rc)) {
336 LOG(ERROR) << "LZ4F_compressUpdate failed: " << LZ4F_getErrorName(rc);
337 return EncodeResult::Error;
338 }
339
340 input_buffer_.drop_front(available_in);
341
342 available_out -= rc;
343 next_out += rc;
344
345 encode_block.resize(encode_block_size - available_out);
346 output_buffer_.append(std::move(encode_block));
347 }
348
349 if (finished_ && !lz4_finalized_) {
350 lz4_finalized_ = true;
351
352 Block final_block(encode_block_size + 4);
353 size_t rc = LZ4F_compressEnd(encoder_.get(), final_block.data(), final_block.size(),
354 nullptr);
355 if (LZ4F_isError(rc)) {
356 LOG(ERROR) << "LZ4F_compressEnd failed: " << LZ4F_getErrorName(rc);
357 return EncodeResult::Error;
358 }
359
360 final_block.resize(rc);
361 output_buffer_.append(std::move(final_block));
362 }
363
364 if (OutputReady()) {
365 size_t len = std::min(output_block_size_, output_buffer_.size());
366 *output = output_buffer_.take_front(len).coalesce();
367 } else {
368 output->clear();
369 }
370
371 if (lz4_finalized_ && output_buffer_.empty()) {
372 return EncodeResult::Done;
373 } else if (OutputReady()) {
374 return EncodeResult::MoreOutput;
375 }
376 return EncodeResult::NeedInput;
377 }
378
379 private:
380 bool lz4_finalized_ = false;
381 std::unique_ptr<LZ4F_cctx, LZ4F_errorCode_t (*)(LZ4F_cctx*)> encoder_;
382 IOVector output_buffer_;
383};