blob: 4f0f9750092803e97420f630f2546b063333f6f3 [file] [log] [blame]
Elliott Hughes2faa5f12012-01-30 14:42:07 -08001/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
Carl Shapiro1fb86202011-06-27 17:43:13 -070016
David Sehr67bf42e2018-02-26 16:43:04 -080017#ifndef ART_LIBARTBASE_BASE_LEB128_H_
18#define ART_LIBARTBASE_BASE_LEB128_H_
Carl Shapiro1fb86202011-06-27 17:43:13 -070019
Vladimir Marko80afd022015-05-19 18:08:00 +010020#include <vector>
21
Andreas Gampe57943812017-12-06 21:39:13 -080022#include <android-base/logging.h>
23
David Sehr1979c642018-04-26 14:41:18 -070024#include "bit_utils.h"
25#include "globals.h"
26#include "macros.h"
Carl Shapiro1fb86202011-06-27 17:43:13 -070027
28namespace art {
29
30// Reads an unsigned LEB128 value, updating the given pointer to point
31// just past the end of the read value. This function tolerates
32// non-zero high-order bits in the fifth encoded byte.
Ian Rogers96faf5b2013-08-09 22:05:32 -070033static inline uint32_t DecodeUnsignedLeb128(const uint8_t** data) {
34 const uint8_t* ptr = *data;
Carl Shapiro1fb86202011-06-27 17:43:13 -070035 int result = *(ptr++);
Ian Rogers1ff3c982014-08-12 02:30:58 -070036 if (UNLIKELY(result > 0x7f)) {
Carl Shapiro1fb86202011-06-27 17:43:13 -070037 int cur = *(ptr++);
38 result = (result & 0x7f) | ((cur & 0x7f) << 7);
39 if (cur > 0x7f) {
40 cur = *(ptr++);
41 result |= (cur & 0x7f) << 14;
42 if (cur > 0x7f) {
43 cur = *(ptr++);
44 result |= (cur & 0x7f) << 21;
45 if (cur > 0x7f) {
46 // Note: We don't check to see if cur is out of range here,
47 // meaning we tolerate garbage in the four high-order bits.
48 cur = *(ptr++);
49 result |= cur << 28;
50 }
51 }
52 }
53 }
54 *data = ptr;
buzbeecbd6d442012-11-17 14:11:25 -080055 return static_cast<uint32_t>(result);
Carl Shapiro1fb86202011-06-27 17:43:13 -070056}
57
Alex Light1a824a52018-01-26 15:45:30 -080058static inline uint32_t DecodeUnsignedLeb128WithoutMovingCursor(const uint8_t* data) {
59 return DecodeUnsignedLeb128(&data);
60}
61
Andreas Gampebed6daf2016-09-02 18:12:00 -070062static inline bool DecodeUnsignedLeb128Checked(const uint8_t** data,
63 const void* end,
64 uint32_t* out) {
65 const uint8_t* ptr = *data;
66 if (ptr >= end) {
67 return false;
68 }
69 int result = *(ptr++);
70 if (UNLIKELY(result > 0x7f)) {
71 if (ptr >= end) {
72 return false;
73 }
74 int cur = *(ptr++);
75 result = (result & 0x7f) | ((cur & 0x7f) << 7);
76 if (cur > 0x7f) {
77 if (ptr >= end) {
78 return false;
79 }
80 cur = *(ptr++);
81 result |= (cur & 0x7f) << 14;
82 if (cur > 0x7f) {
83 if (ptr >= end) {
84 return false;
85 }
86 cur = *(ptr++);
87 result |= (cur & 0x7f) << 21;
88 if (cur > 0x7f) {
89 if (ptr >= end) {
90 return false;
91 }
92 // Note: We don't check to see if cur is out of range here,
93 // meaning we tolerate garbage in the four high-order bits.
94 cur = *(ptr++);
95 result |= cur << 28;
96 }
97 }
98 }
99 }
100 *data = ptr;
101 *out = static_cast<uint32_t>(result);
102 return true;
103}
104
Shih-wei Liao195487c2011-08-20 13:29:04 -0700105// Reads an unsigned LEB128 + 1 value. updating the given pointer to point
106// just past the end of the read value. This function tolerates
107// non-zero high-order bits in the fifth encoded byte.
108// It is possible for this function to return -1.
Ian Rogers96faf5b2013-08-09 22:05:32 -0700109static inline int32_t DecodeUnsignedLeb128P1(const uint8_t** data) {
Shih-wei Liao195487c2011-08-20 13:29:04 -0700110 return DecodeUnsignedLeb128(data) - 1;
111}
112
Carl Shapiro1fb86202011-06-27 17:43:13 -0700113// Reads a signed LEB128 value, updating the given pointer to point
114// just past the end of the read value. This function tolerates
115// non-zero high-order bits in the fifth encoded byte.
Ian Rogers96faf5b2013-08-09 22:05:32 -0700116static inline int32_t DecodeSignedLeb128(const uint8_t** data) {
117 const uint8_t* ptr = *data;
Carl Shapiro1fb86202011-06-27 17:43:13 -0700118 int32_t result = *(ptr++);
119 if (result <= 0x7f) {
120 result = (result << 25) >> 25;
121 } else {
122 int cur = *(ptr++);
123 result = (result & 0x7f) | ((cur & 0x7f) << 7);
124 if (cur <= 0x7f) {
125 result = (result << 18) >> 18;
126 } else {
127 cur = *(ptr++);
128 result |= (cur & 0x7f) << 14;
129 if (cur <= 0x7f) {
130 result = (result << 11) >> 11;
131 } else {
132 cur = *(ptr++);
133 result |= (cur & 0x7f) << 21;
134 if (cur <= 0x7f) {
135 result = (result << 4) >> 4;
136 } else {
137 // Note: We don't check to see if cur is out of range here,
138 // meaning we tolerate garbage in the four high-order bits.
139 cur = *(ptr++);
140 result |= cur << 28;
141 }
142 }
143 }
144 }
145 *data = ptr;
146 return result;
147}
148
Andreas Gampebed6daf2016-09-02 18:12:00 -0700149static inline bool DecodeSignedLeb128Checked(const uint8_t** data,
150 const void* end,
151 int32_t* out) {
152 const uint8_t* ptr = *data;
153 if (ptr >= end) {
154 return false;
155 }
156 int32_t result = *(ptr++);
157 if (result <= 0x7f) {
158 result = (result << 25) >> 25;
159 } else {
160 if (ptr >= end) {
161 return false;
162 }
163 int cur = *(ptr++);
164 result = (result & 0x7f) | ((cur & 0x7f) << 7);
165 if (cur <= 0x7f) {
166 result = (result << 18) >> 18;
167 } else {
168 if (ptr >= end) {
169 return false;
170 }
171 cur = *(ptr++);
172 result |= (cur & 0x7f) << 14;
173 if (cur <= 0x7f) {
174 result = (result << 11) >> 11;
175 } else {
176 if (ptr >= end) {
177 return false;
178 }
179 cur = *(ptr++);
180 result |= (cur & 0x7f) << 21;
181 if (cur <= 0x7f) {
182 result = (result << 4) >> 4;
183 } else {
184 if (ptr >= end) {
185 return false;
186 }
187 // Note: We don't check to see if cur is out of range here,
188 // meaning we tolerate garbage in the four high-order bits.
189 cur = *(ptr++);
190 result |= cur << 28;
191 }
192 }
193 }
194 }
195 *data = ptr;
196 *out = static_cast<uint32_t>(result);
197 return true;
198}
199
jeffhaod1f0fde2011-09-08 17:25:33 -0700200// Returns the number of bytes needed to encode the value in unsigned LEB128.
201static inline uint32_t UnsignedLeb128Size(uint32_t data) {
Vladimir Marko1e6cb632013-11-28 16:27:29 +0000202 // bits_to_encode = (data != 0) ? 32 - CLZ(x) : 1 // 32 - CLZ(data | 1)
203 // bytes = ceil(bits_to_encode / 7.0); // (6 + bits_to_encode) / 7
Andreas Gampe151ab8d2015-08-14 23:01:49 +0000204 uint32_t x = 6 + 32 - CLZ(data | 1U);
Vladimir Marko1e6cb632013-11-28 16:27:29 +0000205 // Division by 7 is done by (x * 37) >> 8 where 37 = ceil(256 / 7).
206 // This works for 0 <= x < 256 / (7 * 37 - 256), i.e. 0 <= x <= 85.
207 return (x * 37) >> 8;
208}
209
Alex Light1a824a52018-01-26 15:45:30 -0800210static inline bool IsLeb128Terminator(const uint8_t* ptr) {
211 return *ptr <= 0x7f;
212}
213
214// Returns the first byte of a Leb128 value assuming that:
215// (1) `end_ptr` points to the first byte after the Leb128 value, and
216// (2) there is another Leb128 value before this one.
217template <typename T>
218static inline T* ReverseSearchUnsignedLeb128(T* end_ptr) {
Vladimir Marko495311c2022-04-14 10:59:53 +0000219 static_assert(std::is_same_v<std::remove_const_t<T>, uint8_t>,
Alex Light1a824a52018-01-26 15:45:30 -0800220 "T must be a uint8_t");
221 T* ptr = end_ptr;
222
223 // Move one byte back, check that this is the terminating byte.
224 ptr--;
225 DCHECK(IsLeb128Terminator(ptr));
226
227 // Keep moving back while the previous byte is not a terminating byte.
228 // Fail after reading five bytes in case there isn't another Leb128 value
229 // before this one.
230 while (!IsLeb128Terminator(ptr - 1)) {
231 ptr--;
232 DCHECK_LE(static_cast<ptrdiff_t>(end_ptr - ptr), 5);
233 }
234
235 return ptr;
236}
237
Vladimir Marko1e6cb632013-11-28 16:27:29 +0000238// Returns the number of bytes needed to encode the value in unsigned LEB128.
239static inline uint32_t SignedLeb128Size(int32_t data) {
240 // Like UnsignedLeb128Size(), but we need one bit beyond the highest bit that differs from sign.
241 data = data ^ (data >> 31);
Andreas Gampe151ab8d2015-08-14 23:01:49 +0000242 uint32_t x = 1 /* we need to encode the sign bit */ + 6 + 32 - CLZ(data | 1U);
Vladimir Marko1e6cb632013-11-28 16:27:29 +0000243 return (x * 37) >> 8;
jeffhaod1f0fde2011-09-08 17:25:33 -0700244}
245
Brian Carlstroma1ce1fe2014-02-24 23:23:58 -0800246static inline uint8_t* EncodeUnsignedLeb128(uint8_t* dest, uint32_t value) {
247 uint8_t out = value & 0x7f;
248 value >>= 7;
249 while (value != 0) {
250 *dest++ = out | 0x80;
251 out = value & 0x7f;
252 value >>= 7;
253 }
254 *dest++ = out;
255 return dest;
256}
257
Vladimir Markoec7802a2015-10-01 20:57:57 +0100258template <typename Vector>
259static inline void EncodeUnsignedLeb128(Vector* dest, uint32_t value) {
Vladimir Marko495311c2022-04-14 10:59:53 +0000260 static_assert(std::is_same_v<typename Vector::value_type, uint8_t>, "Invalid value type");
David Srbecky15c19752015-03-31 14:53:55 +0000261 uint8_t out = value & 0x7f;
262 value >>= 7;
263 while (value != 0) {
264 dest->push_back(out | 0x80);
265 out = value & 0x7f;
266 value >>= 7;
267 }
268 dest->push_back(out);
269}
270
David Srbeckyb5362472015-04-08 19:37:39 +0100271// Overwrite encoded Leb128 with a new value. The new value must be less than
272// or equal to the old value to ensure that it fits the allocated space.
273static inline void UpdateUnsignedLeb128(uint8_t* dest, uint32_t value) {
274 const uint8_t* old_end = dest;
275 uint32_t old_value = DecodeUnsignedLeb128(&old_end);
David Brazdil2b9c35b2018-01-12 15:44:43 +0000276 DCHECK_LE(UnsignedLeb128Size(value), UnsignedLeb128Size(old_value));
David Srbeckyb5362472015-04-08 19:37:39 +0100277 for (uint8_t* end = EncodeUnsignedLeb128(dest, value); end < old_end; end++) {
278 // Use longer encoding than necessary to fill the allocated space.
279 end[-1] |= 0x80;
280 end[0] = 0;
281 }
282}
283
Brian Carlstroma1ce1fe2014-02-24 23:23:58 -0800284static inline uint8_t* EncodeSignedLeb128(uint8_t* dest, int32_t value) {
285 uint32_t extra_bits = static_cast<uint32_t>(value ^ (value >> 31)) >> 6;
286 uint8_t out = value & 0x7f;
287 while (extra_bits != 0u) {
288 *dest++ = out | 0x80;
289 value >>= 7;
290 out = value & 0x7f;
291 extra_bits >>= 7;
292 }
293 *dest++ = out;
294 return dest;
295}
296
Vladimir Markoec7802a2015-10-01 20:57:57 +0100297template<typename Vector>
298static inline void EncodeSignedLeb128(Vector* dest, int32_t value) {
Vladimir Marko495311c2022-04-14 10:59:53 +0000299 static_assert(std::is_same_v<typename Vector::value_type, uint8_t>, "Invalid value type");
David Srbecky15c19752015-03-31 14:53:55 +0000300 uint32_t extra_bits = static_cast<uint32_t>(value ^ (value >> 31)) >> 6;
301 uint8_t out = value & 0x7f;
302 while (extra_bits != 0u) {
303 dest->push_back(out | 0x80);
304 value >>= 7;
305 out = value & 0x7f;
306 extra_bits >>= 7;
307 }
308 dest->push_back(out);
309}
310
Vladimir Markof9f64412015-09-02 14:05:49 +0100311// An encoder that pushes int32_t/uint32_t data onto the given std::vector.
Vladimir Markoec7802a2015-10-01 20:57:57 +0100312template <typename Vector = std::vector<uint8_t>>
Yevgeny Roubane3ea8382014-08-08 16:29:38 +0700313class Leb128Encoder {
Vladimir Marko495311c2022-04-14 10:59:53 +0000314 static_assert(std::is_same_v<typename Vector::value_type, uint8_t>, "Invalid value type");
Vladimir Markoec7802a2015-10-01 20:57:57 +0100315
Brian Carlstroma1ce1fe2014-02-24 23:23:58 -0800316 public:
Vladimir Markoec7802a2015-10-01 20:57:57 +0100317 explicit Leb128Encoder(Vector* data) : data_(data) {
Yevgeny Roubane3ea8382014-08-08 16:29:38 +0700318 DCHECK(data != nullptr);
Brian Carlstroma1ce1fe2014-02-24 23:23:58 -0800319 }
320
321 void Reserve(uint32_t size) {
Yevgeny Roubane3ea8382014-08-08 16:29:38 +0700322 data_->reserve(size);
Brian Carlstroma1ce1fe2014-02-24 23:23:58 -0800323 }
324
325 void PushBackUnsigned(uint32_t value) {
David Srbecky15c19752015-03-31 14:53:55 +0000326 EncodeUnsignedLeb128(data_, value);
Brian Carlstroma1ce1fe2014-02-24 23:23:58 -0800327 }
328
329 template<typename It>
330 void InsertBackUnsigned(It cur, It end) {
331 for (; cur != end; ++cur) {
332 PushBackUnsigned(*cur);
333 }
334 }
335
336 void PushBackSigned(int32_t value) {
David Srbecky15c19752015-03-31 14:53:55 +0000337 EncodeSignedLeb128(data_, value);
Brian Carlstroma1ce1fe2014-02-24 23:23:58 -0800338 }
339
340 template<typename It>
341 void InsertBackSigned(It cur, It end) {
342 for (; cur != end; ++cur) {
343 PushBackSigned(*cur);
344 }
345 }
346
Vladimir Markoec7802a2015-10-01 20:57:57 +0100347 const Vector& GetData() const {
Yevgeny Roubane3ea8382014-08-08 16:29:38 +0700348 return *data_;
349 }
350
351 protected:
Vladimir Markoec7802a2015-10-01 20:57:57 +0100352 Vector* const data_;
Yevgeny Roubane3ea8382014-08-08 16:29:38 +0700353
354 private:
355 DISALLOW_COPY_AND_ASSIGN(Leb128Encoder);
356};
357
358// An encoder with an API similar to vector<uint32_t> where the data is captured in ULEB128 format.
Vladimir Markoec7802a2015-10-01 20:57:57 +0100359template <typename Vector = std::vector<uint8_t>>
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100360class Leb128EncodingVector final : private Vector,
Vladimir Markoec7802a2015-10-01 20:57:57 +0100361 public Leb128Encoder<Vector> {
Vladimir Marko495311c2022-04-14 10:59:53 +0000362 static_assert(std::is_same_v<typename Vector::value_type, uint8_t>, "Invalid value type");
Vladimir Markof9f64412015-09-02 14:05:49 +0100363
Vladimir Markoec7802a2015-10-01 20:57:57 +0100364 public:
365 Leb128EncodingVector() : Leb128Encoder<Vector>(this) { }
366
367 explicit Leb128EncodingVector(const typename Vector::allocator_type& alloc)
368 : Vector(alloc),
369 Leb128Encoder<Vector>(this) { }
Brian Carlstroma1ce1fe2014-02-24 23:23:58 -0800370
371 private:
Brian Carlstroma1ce1fe2014-02-24 23:23:58 -0800372 DISALLOW_COPY_AND_ASSIGN(Leb128EncodingVector);
373};
374
Carl Shapiro1fb86202011-06-27 17:43:13 -0700375} // namespace art
376
David Sehr67bf42e2018-02-26 16:43:04 -0800377#endif // ART_LIBARTBASE_BASE_LEB128_H_