blob: 339989974eb6f0519589df2d228ba9732a9f2855 [file] [log] [blame]
Vladimir Marko54159c62018-06-20 14:30:08 +01001/*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_LIBARTBASE_BASE_DATA_HASH_H_
18#define ART_LIBARTBASE_BASE_DATA_HASH_H_
19
Vladimir Markoe4ccbb52022-03-15 08:51:33 +000020#include <type_traits>
21
22#include "base/globals.h"
Vladimir Marko54159c62018-06-20 14:30:08 +010023#include "base/macros.h"
24
25namespace art {
26
Vladimir Markoe4ccbb52022-03-15 08:51:33 +000027// Note: Touching this file or any #included file causes too many files to be rebuilt, so
28// we want to avoid #including any files that are not necessary. Therefore we use templates
29// (and std::enable_if<>) to avoid `#including headers for `ArrayRef<>` or `BitMemoryRegion`.
30class BitMemoryRegion;
Vladimir Marko54159c62018-06-20 14:30:08 +010031
32class DataHash {
33 private:
34 static constexpr bool kUseMurmur3Hash = true;
35
36 public:
Vladimir Markoe4ccbb52022-03-15 08:51:33 +000037 template <class Container,
38 typename = std::enable_if_t<!std::is_same_v<Container, BitMemoryRegion>>>
Vladimir Marko54159c62018-06-20 14:30:08 +010039 size_t operator()(const Container& array) const {
40 // Containers that provide the data() function use contiguous storage.
41 const uint8_t* data = reinterpret_cast<const uint8_t*>(array.data());
Vladimir Markoe4ccbb52022-03-15 08:51:33 +000042 uint32_t length_in_bytes = sizeof(typename Container::value_type) * array.size();
Vladimir Marko54159c62018-06-20 14:30:08 +010043 if (kUseMurmur3Hash) {
Vladimir Markoe4ccbb52022-03-15 08:51:33 +000044 uint32_t hash = Murmur3Start();
Vladimir Marko54159c62018-06-20 14:30:08 +010045
Vladimir Markoe4ccbb52022-03-15 08:51:33 +000046 const size_t nblocks = length_in_bytes / 4;
Vladimir Marko54159c62018-06-20 14:30:08 +010047 typedef __attribute__((__aligned__(1))) uint32_t unaligned_uint32_t;
Vladimir Markoe4ccbb52022-03-15 08:51:33 +000048 const unaligned_uint32_t* blocks = reinterpret_cast<const unaligned_uint32_t*>(data);
49 for (size_t i = 0; i != nblocks; ++i) {
50 hash = Murmur3Update(hash, blocks[i]);
Vladimir Marko54159c62018-06-20 14:30:08 +010051 }
52
Vladimir Markoe4ccbb52022-03-15 08:51:33 +000053 const uint8_t* tail = reinterpret_cast<const uint8_t*>(data + nblocks * 4);
54 uint32_t last_block = 0;
Vladimir Marko54159c62018-06-20 14:30:08 +010055
Vladimir Markoe4ccbb52022-03-15 08:51:33 +000056 switch (length_in_bytes & 3) {
Vladimir Marko54159c62018-06-20 14:30:08 +010057 case 3:
Vladimir Markoe4ccbb52022-03-15 08:51:33 +000058 last_block ^= tail[2] << 16;
Vladimir Marko54159c62018-06-20 14:30:08 +010059 FALLTHROUGH_INTENDED;
60 case 2:
Vladimir Markoe4ccbb52022-03-15 08:51:33 +000061 last_block ^= tail[1] << 8;
Vladimir Marko54159c62018-06-20 14:30:08 +010062 FALLTHROUGH_INTENDED;
63 case 1:
Vladimir Markoe4ccbb52022-03-15 08:51:33 +000064 last_block ^= tail[0];
65 hash = Murmur3UpdatePartial(hash, last_block);
Vladimir Marko54159c62018-06-20 14:30:08 +010066 }
67
Vladimir Markoe4ccbb52022-03-15 08:51:33 +000068 hash = Murmur3Finish(hash, length_in_bytes);
Vladimir Marko54159c62018-06-20 14:30:08 +010069 return hash;
70 } else {
Vladimir Markoe4ccbb52022-03-15 08:51:33 +000071 return HashBytes(data, length_in_bytes);
Vladimir Marko54159c62018-06-20 14:30:08 +010072 }
73 }
Vladimir Markoe4ccbb52022-03-15 08:51:33 +000074
75 // Hash bytes using a relatively fast hash.
76 static inline size_t HashBytes(const uint8_t* data, size_t length_in_bytes) {
77 size_t hash = HashBytesStart();
78 for (uint32_t i = 0; i != length_in_bytes; ++i) {
79 hash = HashBytesUpdate(hash, data[i]);
80 }
81 return HashBytesFinish(hash);
82 }
83
84 template <typename BMR,
85 typename = std::enable_if_t<std::is_same_v<BMR, BitMemoryRegion>>>
86 size_t operator()(BMR region) const {
87 if (kUseMurmur3Hash) {
88 size_t num_full_blocks = region.size_in_bits() / kMurmur3BlockBits;
89 size_t num_end_bits = region.size_in_bits() % kMurmur3BlockBits;
90 size_t hash = Murmur3Start();
91 for (uint32_t i = 0; i != num_full_blocks; ++i) {
92 uint32_t block = region.LoadBits(i * kMurmur3BlockBits, kMurmur3BlockBits);
93 hash = Murmur3Update(hash, block);
94 }
95 if (num_end_bits != 0u) {
96 uint32_t end_bits = region.LoadBits(num_full_blocks * kMurmur3BlockBits, num_end_bits);
97 hash = Murmur3UpdatePartial(hash, end_bits);
98 }
99 return HashBytesFinish(hash);
100 } else {
101 size_t num_full_bytes = region.size_in_bits() / kBitsPerByte;
102 size_t num_end_bits = region.size_in_bits() % kBitsPerByte;
103 size_t hash = HashBytesStart();
104 for (uint32_t i = 0; i != num_full_bytes; ++i) {
105 uint8_t byte = region.LoadBits(i * kBitsPerByte, kBitsPerByte);
106 hash = HashBytesUpdate(hash, byte);
107 }
108 if (num_end_bits != 0u) {
109 uint32_t end_bits = region.LoadBits(num_full_bytes * kBitsPerByte, num_end_bits);
110 hash = HashBytesUpdate(hash, end_bits);
111 }
112 return HashBytesFinish(hash);
113 }
114 }
115
116 private:
117 ALWAYS_INLINE
118 static constexpr size_t HashBytesStart() {
119 return 0x811c9dc5;
120 }
121
122 ALWAYS_INLINE
123 static constexpr size_t HashBytesUpdate(size_t hash, uint8_t value) {
124 return (hash * 16777619) ^ value;
125 }
126
127 ALWAYS_INLINE
128 static constexpr size_t HashBytesFinish(size_t hash) {
129 hash += hash << 13;
130 hash ^= hash >> 7;
131 hash += hash << 3;
132 hash ^= hash >> 17;
133 hash += hash << 5;
134 return hash;
135 }
136
137 static constexpr uint32_t kMurmur3Seed = 0u;
138 static constexpr uint32_t kMurmur3BlockBits = 32u;
139 static constexpr uint32_t kMurmur3C1 = 0xcc9e2d51;
140 static constexpr uint32_t kMurmur3C2 = 0x1b873593;
141 static constexpr uint32_t kMurmur3R1 = 15;
142 static constexpr uint32_t kMurmur3R2 = 13;
143 static constexpr uint32_t kMurmur3M = 5;
144 static constexpr uint32_t kMurmur3N = 0xe6546b64;
145
146 ALWAYS_INLINE
147 static constexpr uint32_t Murmur3Start() {
148 return kMurmur3Seed;
149 }
150
151 ALWAYS_INLINE
152 static constexpr uint32_t Murmur3Update(uint32_t hash, uint32_t block) {
153 uint32_t k = block;
154 k *= kMurmur3C1;
155 k = (k << kMurmur3R1) | (k >> (32 - kMurmur3R1));
156 k *= kMurmur3C2;
157 hash ^= k;
158 hash = ((hash << kMurmur3R2) | (hash >> (32 - kMurmur3R2))) * kMurmur3M + kMurmur3N;
159 return hash;
160 }
161
162 ALWAYS_INLINE
163 static constexpr uint32_t Murmur3UpdatePartial(uint32_t hash, uint32_t block) {
164 uint32_t k = block;
165 k *= kMurmur3C1;
166 k = (k << kMurmur3R1) | (k >> (32 - kMurmur3R1));
167 k *= kMurmur3C2;
168 hash ^= k;
169 // Note: Unlike full block, the partial block does not have `hash = hash * M + N`.
170 return hash;
171 }
172
173 ALWAYS_INLINE
174 static constexpr uint32_t Murmur3Finish(uint32_t hash, uint32_t length_in_bytes) {
175 hash ^= length_in_bytes;
176 hash ^= (hash >> 16);
177 hash *= 0x85ebca6b;
178 hash ^= (hash >> 13);
179 hash *= 0xc2b2ae35;
180 hash ^= (hash >> 16);
181 return hash;
182 }
Vladimir Marko54159c62018-06-20 14:30:08 +0100183};
184
185} // namespace art
186
187#endif // ART_LIBARTBASE_BASE_DATA_HASH_H_