blob: 7766e242a4d6d64730447003a669174d6b419369 [file] [log] [blame]
Kelvin Zhang4eae81e2021-12-09 17:07:17 -08001//
2// Copyright (C) 2021 The Android Open Source Project
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15//
16
17#include "lz4patch.h"
18
19#include <endian.h>
20#include <unistd.h>
21#include <fcntl.h>
22
23#include <algorithm>
24#include <string_view>
25
26#include <bsdiff/bspatch.h>
27#include <bsdiff/memory_file.h>
28#include <bsdiff/file.h>
29#include <puffin/memory_stream.h>
30
31#include "android-base/strings.h"
32#include "lz4diff/lz4diff.h"
33#include "lz4diff_compress.h"
34#include "lz4diff_format.h"
35#include "puffin/puffpatch.h"
36#include "update_engine/common/hash_calculator.h"
37#include "update_engine/common/utils.h"
38
39namespace chromeos_update_engine {
40
41namespace {
42
43template <typename T>
44constexpr void BigEndianToHost(T& t) {
45 static_assert(std::is_integral_v<T>);
46 static_assert(sizeof(t) == 4 || sizeof(t) == 8 || sizeof(t) == 2);
47 if constexpr (sizeof(t) == 4) {
48 t = be32toh(t);
49 } else if constexpr (sizeof(t) == 8) {
50 t = be64toh(t);
51 } else if constexpr (sizeof(t) == 2) {
52 t = be16toh(t);
53 }
54}
55
56// In memory representation of an LZ4Diff patch, it's not marked as packed
57// because parsing isn't as simple as reinterpret_cast<> any way.
58struct Lz4diffPatch {
59 char magic[kLz4diffMagic.size()];
60 uint32_t version;
61 uint32_t pb_header_size; // size of protobuf message
62 Lz4diffHeader pb_header;
63 std::string_view inner_patch;
64};
65
66// Utility class to interact with puffin API. C++ does not have standard
67// Read/Write trait. So everybody invent their own file descriptor wrapper.
68class StringViewStream : public puffin::StreamInterface {
69 public:
70 ~StringViewStream() override = default;
71
72 bool GetSize(uint64_t* size) const override {
73 *size = read_memory_.size();
74 return true;
75 }
76
77 bool GetOffset(uint64_t* offset) const override {
78 *offset = offset_;
79 return true;
80 }
81
82 bool Seek(uint64_t offset) override {
83 TEST_AND_RETURN_FALSE(open_);
84 uint64_t size;
85 GetSize(&size);
86 TEST_AND_RETURN_FALSE(offset <= size);
87 offset_ = offset;
88 return true;
89 }
90
91 bool Read(void* buffer, size_t length) override {
92 TEST_AND_RETURN_FALSE(open_);
93 TEST_AND_RETURN_FALSE(offset_ + length <= read_memory_.size());
94 memcpy(buffer, read_memory_.data() + offset_, length);
95 offset_ += length;
96 return true;
97 }
98
99 bool Write(const void* buffer, size_t length) override {
100 LOG(ERROR) << "Unsupported operation " << __FUNCTION__;
101 return false;
102 }
103
104 bool Close() override {
105 open_ = false;
106 return true;
107 }
108
109 constexpr StringViewStream(std::string_view read_memory)
110 : read_memory_(read_memory) {
111 CHECK(!read_memory.empty());
112 }
113
114 private:
115 // The memory buffer for reading.
116 std::string_view read_memory_;
117
118 // The current offset.
119 uint64_t offset_{};
120 bool open_{true};
121};
122
123bool ParseLz4DifffPatch(std::string_view patch_data, Lz4diffPatch* output) {
124 CHECK_NE(output, nullptr);
125 if (!android::base::StartsWith(patch_data, kLz4diffMagic)) {
126 LOG(ERROR) << "Invalid lz4diff magic: "
127 << HexEncode(patch_data.substr(0, kLz4diffMagic.size()))
128 << ", expected: " << HexEncode(kLz4diffMagic);
129 return false;
130 }
131 Lz4diffPatch& patch = *output;
132 std::memcpy(patch.magic, patch_data.data(), kLz4diffMagic.size());
133 std::memcpy(&patch.version,
134 patch_data.data() + kLz4diffMagic.size(),
135 sizeof(patch.version));
136 BigEndianToHost(patch.version);
137 if (patch.version != kLz4diffVersion) {
138 LOG(ERROR) << "Unsupported lz4diff version: " << patch.version
139 << ", supported version: " << kLz4diffVersion;
140 return false;
141 }
142 std::memcpy(&patch.pb_header_size,
143 patch_data.data() + kLz4diffMagic.size() + sizeof(patch.version),
144 sizeof(patch.pb_header_size));
145 BigEndianToHost(patch.pb_header_size);
146 TEST_AND_RETURN_FALSE(patch.pb_header.ParseFromArray(
147 patch_data.data() + kLz4diffHeaderSize, patch.pb_header_size));
148 patch.inner_patch =
149 patch_data.substr(kLz4diffHeaderSize + patch.pb_header_size);
150 return true;
151}
152
153bool bspatch(std::string_view input_data,
154 std::string_view patch_data,
155 Blob* output) {
156 CHECK_NE(output, nullptr);
157 output->clear();
158 CHECK_GT(patch_data.size(), 0UL);
159 int err =
160 bsdiff::bspatch(reinterpret_cast<const uint8_t*>(input_data.data()),
161 input_data.size(),
162 reinterpret_cast<const uint8_t*>(patch_data.data()),
163 patch_data.size(),
164 [output](const uint8_t* data, size_t size) -> size_t {
165 output->insert(output->end(), data, data + size);
166 return size;
167 });
168 return err == 0;
169}
170
171bool ApplyPostfixPatch(
172 std::string_view recompressed_blob,
173 const google::protobuf::RepeatedPtrField<CompressedBlockInfo>&
174 dst_block_info,
175 Blob* output) {
176 // Output size should be always identical to size of recompressed_blob
177 output->clear();
178 output->reserve(recompressed_blob.size());
179 size_t offset = 0;
180 for (const auto& block_info : dst_block_info) {
181 auto block =
182 recompressed_blob.substr(offset, block_info.compressed_length());
183 if (!block_info.sha256_hash().empty()) {
184 Blob actual_hash;
185 CHECK(HashCalculator::RawHashOfBytes(
186 block.data(), block.size(), &actual_hash));
187 if (ToStringView(actual_hash) != block_info.sha256_hash()) {
188 LOG(ERROR) << "Block " << block_info
189 << " is corrupted. This usually means the patch generator "
190 "used a different version of LZ4, or an incompatible LZ4 "
191 "patch generator was used, or LZ4 produces different "
192 "output on different platforms. Expected hash: "
193 << HexEncode(block_info.sha256_hash())
194 << ", actual hash: " << HexEncode(actual_hash);
195 }
196 }
197 if (!block_info.postfix_bspatch().empty()) {
198 Blob fixed_block;
199 TEST_AND_RETURN_FALSE(
200 bspatch(block, block_info.postfix_bspatch(), &fixed_block));
201 output->insert(output->end(), fixed_block.begin(), fixed_block.end());
202 } else {
203 output->insert(output->end(), block.begin(), block.end());
204 }
205 offset += block_info.compressed_length();
206 }
207 return true;
208}
209
210bool puffpatch(std::string_view input_data,
211 std::string_view patch_data,
212 Blob* output) {
213 return puffin::PuffPatch(std::make_unique<StringViewStream>(input_data),
214 puffin::MemoryStream::CreateForWrite(output),
215 reinterpret_cast<const uint8_t*>(patch_data.data()),
216 patch_data.size());
217}
218
219std::vector<CompressedBlock> ToCompressedBlockVec(
220 const google::protobuf::RepeatedPtrField<CompressedBlockInfo>& rpf) {
221 std::vector<CompressedBlock> ret;
222 for (const auto& block : rpf) {
223 auto& info = ret.emplace_back();
224 info.compressed_length = block.compressed_length();
225 info.uncompressed_length = block.uncompressed_length();
226 info.uncompressed_offset = block.uncompressed_offset();
227 }
228 return ret;
229}
230
231bool HasPosfixPatches(const Lz4diffPatch& patch) {
232 for (const auto& info : patch.pb_header.dst_info().block_info()) {
233 if (!info.postfix_bspatch().empty()) {
234 return true;
235 }
236 }
237 return false;
238}
239
240} // namespace
241
242bool Lz4Patch(std::string_view src_data,
243 std::string_view patch_data,
244 Blob* output) {
245 Lz4diffPatch patch;
246 TEST_AND_RETURN_FALSE(ParseLz4DifffPatch(patch_data, &patch));
247
248 Blob decompressed_dst;
249 // This scope is here just so that |decompressed_src| can be freed earlier
250 // than function scope.
251 // This whole patching algorithm has non-trivial memory usage, as it needs to
252 // load source data in to memory and decompress that. Now both src and
253 // decompressed src data are in memory.
254 // TODO(b/206729162) Make lz4diff more memory efficient and more streaming
255 // friendly.
256 {
257 const auto decompressed_src = TryDecompressBlob(
258 src_data,
259 ToCompressedBlockVec(patch.pb_header.src_info().block_info()),
260 patch.pb_header.src_info().zero_padding_enabled());
261 switch (patch.pb_header.inner_type()) {
262 case InnerPatchType::BSDIFF:
263 TEST_AND_RETURN_FALSE(bspatch(ToStringView(decompressed_src),
264 patch.inner_patch,
265 &decompressed_dst));
266 break;
267 case InnerPatchType::PUFFDIFF:
268 TEST_AND_RETURN_FALSE(puffpatch(ToStringView(decompressed_src),
269 patch.inner_patch,
270 &decompressed_dst));
271 break;
272 default:
273 LOG(ERROR) << "Unsupported patch type: "
274 << patch.pb_header.inner_type();
275 return false;
276 }
277 }
278
279 auto recompressed_dst = TryCompressBlob(
280 ToStringView(decompressed_dst),
281 ToCompressedBlockVec(patch.pb_header.dst_info().block_info()),
282 patch.pb_header.dst_info().zero_padding_enabled(),
283 patch.pb_header.dst_info().algo());
284 TEST_AND_RETURN_FALSE(recompressed_dst.size() > 0);
285 // free memory used by |decompressed_dst|.
286 decompressed_dst = {};
287
288 if (HasPosfixPatches(patch)) {
289 TEST_AND_RETURN_FALSE(
290 ApplyPostfixPatch(ToStringView(recompressed_dst),
291 patch.pb_header.dst_info().block_info(),
292 output));
293 } else {
294 *output = std::move(recompressed_dst);
295 }
296
297 return true;
298}
299
Kelvin Zhang893b3a12021-12-30 12:28:53 -0800300bool Lz4Patch(const Blob& src_data, const Blob& patch_data, Blob* output) {
301 return Lz4Patch(ToStringView(src_data), ToStringView(patch_data), output);
302}
303
Kelvin Zhang4eae81e2021-12-09 17:07:17 -0800304std::ostream& operator<<(std::ostream& out, const CompressionAlgorithm& info) {
305 out << "Algo {type: " << info.Type_Name(info.type());
306 if (info.level() != 0) {
307 out << ", level: " << info.level();
308 }
309 out << "}";
310
311 return out;
312}
313
314std::ostream& operator<<(std::ostream& out, const CompressionInfo& info) {
315 out << "CompressionInfo {block_info: " << info.block_info()
316 << ", algo: " << info.algo() << "}";
317 return out;
318}
319
320std::ostream& operator<<(std::ostream& out, const Lz4diffHeader& header) {
321 out << "Lz4diffHeader {src_info: " << header.src_info()
322 << ", dst_info: " << header.dst_info() << "}";
323 return out;
324}
325
326} // namespace chromeos_update_engine