Revert^2 "Faster deduplication of `CodeInfo` tables."

This reverts commit 8c7f649fff75ba98392931157292f06f7930f2b6.

Reason for revert: Add hwasan exclusion annotation.

Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Bug: 181943478
Change-Id: Ifc4dec165a2977a08654d7ae094fe1aa8a5bbbe5
diff --git a/libartbase/base/bit_memory_region.h b/libartbase/base/bit_memory_region.h
index 6168488..cca4217 100644
--- a/libartbase/base/bit_memory_region.h
+++ b/libartbase/base/bit_memory_region.h
@@ -30,12 +30,6 @@
 // abstracting away the bit start offset to avoid needing passing as an argument everywhere.
 class BitMemoryRegion final : public ValueObject {
  public:
-  struct Less {
-    bool operator()(const BitMemoryRegion& lhs, const BitMemoryRegion& rhs) const {
-      return Compare(lhs, rhs) < 0;
-    }
-  };
-
   BitMemoryRegion() = default;
   ALWAYS_INLINE BitMemoryRegion(uint8_t* data, ssize_t bit_start, size_t bit_size) {
     // Normalize the data pointer. Note that bit_start may be negative.
@@ -136,17 +130,17 @@
 
   // Store `bit_length` bits in `data` starting at given `bit_offset`.
   // The least significant bit is stored in the smallest memory offset.
-  ALWAYS_INLINE void StoreBits(size_t bit_offset, uint32_t value, size_t bit_length) {
+  ALWAYS_INLINE void StoreBits(size_t bit_offset, size_t value, size_t bit_length) {
     DCHECK_LE(bit_offset, bit_size_);
     DCHECK_LE(bit_length, bit_size_ - bit_offset);
-    DCHECK_LE(bit_length, BitSizeOf<uint32_t>());
-    DCHECK_LE(value, MaxInt<uint32_t>(bit_length));
+    DCHECK_LE(bit_length, BitSizeOf<size_t>());
+    DCHECK_LE(value, MaxInt<size_t>(bit_length));
     if (bit_length == 0) {
       return;
     }
     // Write data byte by byte to avoid races with other threads
     // on bytes that do not overlap with this region.
-    uint32_t mask = std::numeric_limits<uint32_t>::max() >> (BitSizeOf<uint32_t>() - bit_length);
+    size_t mask = std::numeric_limits<size_t>::max() >> (BitSizeOf<size_t>() - bit_length);
     size_t index = (bit_start_ + bit_offset) / kBitsPerByte;
     size_t shift = (bit_start_ + bit_offset) % kBitsPerByte;
     data_[index] &= ~(mask << shift);  // Clear bits.
@@ -159,91 +153,172 @@
     DCHECK_EQ(value, LoadBits(bit_offset, bit_length));
   }
 
-  // Store bits from other bit region.
-  ALWAYS_INLINE void StoreBits(size_t bit_offset, const BitMemoryRegion& src, size_t bit_length) {
-    DCHECK_LE(bit_offset, bit_size_);
-    DCHECK_LE(bit_length, bit_size_ - bit_offset);
-    size_t bit = 0;
-    constexpr size_t kNumBits = BitSizeOf<uint32_t>();
-    for (; bit + kNumBits <= bit_length; bit += kNumBits) {
-      StoreBits(bit_offset + bit, src.LoadBits(bit, kNumBits), kNumBits);
-    }
-    size_t num_bits = bit_length - bit;
-    StoreBits(bit_offset + bit, src.LoadBits(bit, num_bits), num_bits);
+  // Copy bits from other bit region.
+  ALWAYS_INLINE void CopyBits(const BitMemoryRegion& src) {
+    DCHECK_EQ(size_in_bits(), src.size_in_bits());
+    // Hopefully, the loads of the unused `value` shall be optimized away.
+    VisitChunks(
+        [this, &src](size_t offset, size_t num_bits, size_t value ATTRIBUTE_UNUSED) ALWAYS_INLINE {
+          StoreChunk(offset, src.LoadBits(offset, num_bits), num_bits);
+          return true;
+        });
+  }
+
+  // And bits from other bit region.
+  ALWAYS_INLINE void AndBits(const BitMemoryRegion& src) {
+    DCHECK_EQ(size_in_bits(), src.size_in_bits());
+    VisitChunks([this, &src](size_t offset, size_t num_bits, size_t value) ALWAYS_INLINE {
+      StoreChunk(offset, value & src.LoadBits(offset, num_bits), num_bits);
+      return true;
+    });
   }
 
   // Or bits from other bit region.
-  ALWAYS_INLINE void OrBits(size_t bit_offset, const BitMemoryRegion& src, size_t bit_length) {
-    // TODO: Load `size_t` chunks (instead of `uint32_t`) from aligned
-    // addresses except for the leading and trailing bits. Refactor to
-    // share code with StoreBits() and maybe other functions.
-    DCHECK_LE(bit_offset, bit_size_);
-    DCHECK_LE(bit_length, bit_size_ - bit_offset);
-    size_t bit = 0;
-    constexpr size_t kNumBits = BitSizeOf<uint32_t>();
-    for (; bit + kNumBits <= bit_length; bit += kNumBits) {
-      size_t old_bits = LoadBits(bit_offset + bit, kNumBits);
-      StoreBits(bit_offset + bit, old_bits | src.LoadBits(bit, kNumBits), kNumBits);
-    }
-    size_t num_bits = bit_length - bit;
-    size_t old_bits = LoadBits(bit_offset + bit, num_bits);
-    StoreBits(bit_offset + bit, old_bits | src.LoadBits(bit, num_bits), num_bits);
+  ALWAYS_INLINE void OrBits(const BitMemoryRegion& src) {
+    DCHECK_EQ(size_in_bits(), src.size_in_bits());
+    VisitChunks([this, &src](size_t offset, size_t num_bits, size_t value) ALWAYS_INLINE {
+      StoreChunk(offset, value | src.LoadBits(offset, num_bits), num_bits);
+      return true;
+    });
+  }
+
+  // Xor bits from other bit region.
+  ALWAYS_INLINE void XorBits(const BitMemoryRegion& src) {
+    DCHECK_EQ(size_in_bits(), src.size_in_bits());
+    VisitChunks([this, &src](size_t offset, size_t num_bits, size_t value) ALWAYS_INLINE {
+      StoreChunk(offset, value ^ src.LoadBits(offset, num_bits), num_bits);
+      return true;
+    });
+  }
+
+  // Count the number of set bits within this region.
+  ALWAYS_INLINE size_t PopCount() const {
+    size_t result = 0u;
+    VisitChunks([&](size_t offset ATTRIBUTE_UNUSED,
+                    size_t num_bits ATTRIBUTE_UNUSED,
+                    size_t value) ALWAYS_INLINE {
+                      result += POPCOUNT(value);
+                      return true;
+                    });
+    return result;
   }
 
   // Count the number of set bits within the given bit range.
   ALWAYS_INLINE size_t PopCount(size_t bit_offset, size_t bit_length) const {
-    DCHECK_LE(bit_offset, bit_size_);
-    DCHECK_LE(bit_length, bit_size_ - bit_offset);
-    size_t count = 0;
-    size_t bit = 0;
-    constexpr size_t kNumBits = BitSizeOf<uint32_t>();
-    for (; bit + kNumBits <= bit_length; bit += kNumBits) {
-      count += POPCOUNT(LoadBits(bit_offset + bit, kNumBits));
-    }
-    count += POPCOUNT(LoadBits(bit_offset + bit, bit_length - bit));
-    return count;
+    return Subregion(bit_offset, bit_length).PopCount();
+  }
+
+  // Check if this region has all bits clear.
+  ALWAYS_INLINE bool HasAllBitsClear() const {
+    return VisitChunks([](size_t offset ATTRIBUTE_UNUSED,
+                          size_t num_bits ATTRIBUTE_UNUSED,
+                          size_t value) ALWAYS_INLINE {
+                            return value == 0u;
+                          });
+  }
+
+  // Check if this region has any bit set.
+  ALWAYS_INLINE bool HasSomeBitSet() const {
+    return !HasAllBitsClear();
   }
 
   // Check if there is any bit set within the given bit range.
   ALWAYS_INLINE bool HasSomeBitSet(size_t bit_offset, size_t bit_length) const {
-    // TODO: Load `size_t` chunks (instead of `uint32_t`) from aligned
-    // addresses except for the leading and trailing bits. Refactor to
-    // share code with PopCount() and maybe also Compare().
-    DCHECK_LE(bit_offset, bit_size_);
-    DCHECK_LE(bit_length, bit_size_ - bit_offset);
-    size_t bit = 0;
-    constexpr size_t kNumBits = BitSizeOf<uint32_t>();
-    for (; bit + kNumBits <= bit_length; bit += kNumBits) {
-      if (LoadBits(bit_offset + bit, kNumBits) != 0u) {
-        return true;
-      }
-    }
-    return LoadBits(bit_offset + bit, bit_length - bit) != 0u;
+    return Subregion(bit_offset, bit_length).HasSomeBitSet();
   }
 
   static int Compare(const BitMemoryRegion& lhs, const BitMemoryRegion& rhs) {
     if (lhs.size_in_bits() != rhs.size_in_bits()) {
       return (lhs.size_in_bits() < rhs.size_in_bits()) ? -1 : 1;
     }
-    size_t bit = 0;
-    constexpr size_t kNumBits = BitSizeOf<uint32_t>();
-    for (; bit + kNumBits <= lhs.size_in_bits(); bit += kNumBits) {
-      uint32_t lhs_bits = lhs.LoadBits(bit, kNumBits);
-      uint32_t rhs_bits = rhs.LoadBits(bit, kNumBits);
-      if (lhs_bits != rhs_bits) {
-        return (lhs_bits < rhs_bits) ? -1 : 1;
-      }
+    int result = 0;
+    bool equals = lhs.VisitChunks(
+        [&](size_t offset, size_t num_bits, size_t lhs_value) ALWAYS_INLINE {
+          size_t rhs_value = rhs.LoadBits(offset, num_bits);
+          if (lhs_value == rhs_value) {
+            return true;
+          }
+          // We have found a difference. To avoid the comparison being dependent on how the region
+          // is split into chunks, check the lowest bit that differs. (Android is little-endian.)
+          int bit = CTZ(lhs_value ^ rhs_value);
+          result = ((rhs_value >> bit) & 1u) != 0u ? 1 : -1;
+          return false;  // Stop iterating.
+        });
+    DCHECK_EQ(equals, result == 0);
+    return result;
+  }
+
+  static bool Equals(const BitMemoryRegion& lhs, const BitMemoryRegion& rhs) {
+    if (lhs.size_in_bits() != rhs.size_in_bits()) {
+      return false;
     }
-    size_t num_bits = lhs.size_in_bits() - bit;
-    uint32_t lhs_bits = lhs.LoadBits(bit, num_bits);
-    uint32_t rhs_bits = rhs.LoadBits(bit, num_bits);
-    if (lhs_bits != rhs_bits) {
-      return (lhs_bits < rhs_bits) ? -1 : 1;
-    }
-    return 0;
+    return lhs.VisitChunks([&rhs](size_t offset, size_t num_bits, size_t lhs_value) ALWAYS_INLINE {
+      return lhs_value == rhs.LoadBits(offset, num_bits);
+    });
   }
 
  private:
+  // Visit the region in aligned `size_t` chunks. The first and last chunk may have fewer bits.
+  //
+  // Returns `true` if the iteration visited all chunks successfully, i.e. none of the
+  // calls to `visitor(offset, num_bits, value)` returned `false`; otherwise `false`.
+  template <typename VisitorType>
+  ATTRIBUTE_NO_SANITIZE_ADDRESS  // We might touch extra bytes due to the alignment.
+  ATTRIBUTE_NO_SANITIZE_HWADDRESS  // The hwasan uses different attribute.
+  ALWAYS_INLINE bool VisitChunks(VisitorType&& visitor) const {
+    constexpr size_t kChunkSize = BitSizeOf<size_t>();
+    size_t remaining_bits = bit_size_;
+    if (remaining_bits == 0) {
+      return true;
+    }
+    DCHECK(IsAligned<sizeof(size_t)>(data_));
+    const size_t* data = reinterpret_cast<const size_t*>(data_);
+    size_t offset = 0u;
+    size_t bit_start = bit_start_;
+    data += bit_start / kChunkSize;
+    if ((bit_start % kChunkSize) != 0u) {
+      size_t leading_bits = kChunkSize - (bit_start % kChunkSize);
+      size_t value = (*data) >> (bit_start % kChunkSize);
+      if (leading_bits > remaining_bits) {
+        leading_bits = remaining_bits;
+        value = value & ~(std::numeric_limits<size_t>::max() << remaining_bits);
+      }
+      if (!visitor(offset, leading_bits, value)) {
+        return false;
+      }
+      offset += leading_bits;
+      remaining_bits -= leading_bits;
+      ++data;
+    }
+    while (remaining_bits >= kChunkSize) {
+      size_t value = *data;
+      if (!visitor(offset, kChunkSize, value)) {
+        return false;
+      }
+      offset += kChunkSize;
+      remaining_bits -= kChunkSize;
+      ++data;
+    }
+    if (remaining_bits != 0u) {
+      size_t value = (*data) & ~(std::numeric_limits<size_t>::max() << remaining_bits);
+      if (!visitor(offset, remaining_bits, value)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  ALWAYS_INLINE void StoreChunk(size_t bit_offset, size_t value, size_t bit_length) {
+    if (bit_length == BitSizeOf<size_t>()) {
+      DCHECK_ALIGNED(bit_start_ + bit_offset, BitSizeOf<size_t>());
+      uint8_t* data = data_ + (bit_start_ + bit_offset) / kBitsPerByte;
+      DCHECK_ALIGNED(data, sizeof(size_t));
+      reinterpret_cast<size_t*>(data)[0] = value;
+    } else {
+      StoreBits(bit_offset, value, bit_length);
+    }
+  }
+
   uint8_t* data_ = nullptr;  // The pointer is page aligned.
   size_t bit_start_ = 0;
   size_t bit_size_ = 0;
@@ -329,6 +404,14 @@
     DCHECK_EQ(NumberOfWrittenBits(), 0u);
   }
 
+  void Truncate(size_t bit_offset) {
+    DCHECK_GE(bit_offset, bit_start_);
+    DCHECK_LE(bit_offset, bit_offset_);
+    bit_offset_ = bit_offset;
+    DCHECK_LE(BitsToBytesRoundUp(bit_offset), out_->size());
+    out_->resize(BitsToBytesRoundUp(bit_offset));  // Shrink.
+  }
+
   BitMemoryRegion GetWrittenRegion() const {
     return BitMemoryRegion(out_->data(), bit_start_, bit_offset_ - bit_start_);
   }
@@ -346,7 +429,7 @@
   }
 
   ALWAYS_INLINE void WriteRegion(const BitMemoryRegion& region) {
-    Allocate(region.size_in_bits()).StoreBits(/* bit_offset */ 0, region, region.size_in_bits());
+    Allocate(region.size_in_bits()).CopyBits(region);
   }
 
   ALWAYS_INLINE void WriteBits(uint32_t value, size_t bit_length) {
@@ -379,9 +462,17 @@
     WriteInterleavedVarints<1>({value});
   }
 
+  void WriteBytesAligned(const uint8_t* bytes, size_t length) {
+    DCHECK_ALIGNED(bit_start_, kBitsPerByte);
+    DCHECK_ALIGNED(bit_offset_, kBitsPerByte);
+    DCHECK_EQ(BitsToBytesRoundUp(bit_offset_), out_->size());
+    out_->insert(out_->end(), bytes, bytes + length);
+    bit_offset_ += length * kBitsPerByte;
+  }
+
   ALWAYS_INLINE void ByteAlign() {
-    size_t end = bit_start_ + bit_offset_;
-    bit_offset_ += RoundUp(end, kBitsPerByte) - end;
+    DCHECK_ALIGNED(bit_start_, kBitsPerByte);
+    bit_offset_ = RoundUp(bit_offset_, kBitsPerByte);
   }
 
  private:
diff --git a/libartbase/base/bit_table.h b/libartbase/base/bit_table.h
index 07e6b27..227f5eb 100644
--- a/libartbase/base/bit_table.h
+++ b/libartbase/base/bit_table.h
@@ -92,7 +92,7 @@
   bool Equals(const BitTableBase& other) const {
     return num_rows_ == other.num_rows_ &&
         std::equal(column_offset_, column_offset_ + kNumColumns, other.column_offset_) &&
-        BitMemoryRegion::Compare(table_data_, other.table_data_) == 0;
+        BitMemoryRegion::Equals(table_data_, other.table_data_);
   }
 
  protected:
@@ -449,9 +449,10 @@
 
     // Write table data.
     for (MemoryRegion row : rows_) {
-      BitMemoryRegion src(row);
+      size_t bits_to_copy = std::min(max_num_bits_, row.size_in_bits());
+      BitMemoryRegion src(row, /*bit_offset=*/ 0u, bits_to_copy);
       BitMemoryRegion dst = out.Allocate(max_num_bits_);
-      dst.StoreBits(/* bit_offset */ 0, src, std::min(max_num_bits_, src.size_in_bits()));
+      dst.Subregion(/*bit_offset=*/ 0, bits_to_copy).CopyBits(src);
     }
 
     // Verify the written data.
diff --git a/libartbase/base/data_hash.h b/libartbase/base/data_hash.h
index 5ad7779..3399899 100644
--- a/libartbase/base/data_hash.h
+++ b/libartbase/base/data_hash.h
@@ -17,89 +17,169 @@
 #ifndef ART_LIBARTBASE_BASE_DATA_HASH_H_
 #define ART_LIBARTBASE_BASE_DATA_HASH_H_
 
+#include <type_traits>
+
+#include "base/globals.h"
 #include "base/macros.h"
 
 namespace art {
 
-// Hash bytes using a relatively fast hash.
-static inline size_t HashBytes(const uint8_t* data, size_t len) {
-  size_t hash = 0x811c9dc5;
-  for (uint32_t i = 0; i < len; ++i) {
-    hash = (hash * 16777619) ^ data[i];
-  }
-  hash += hash << 13;
-  hash ^= hash >> 7;
-  hash += hash << 3;
-  hash ^= hash >> 17;
-  hash += hash << 5;
-  return hash;
-}
+// Note: Touching this file or any #included file causes too many files to be rebuilt, so
+// we want to avoid #including any files that are not necessary. Therefore we use templates
+// (and std::enable_if<>) to avoid `#including headers for `ArrayRef<>` or `BitMemoryRegion`.
+class BitMemoryRegion;
 
 class DataHash {
  private:
   static constexpr bool kUseMurmur3Hash = true;
 
  public:
-  template <class Container>
+  template <class Container,
+            typename = std::enable_if_t<!std::is_same_v<Container, BitMemoryRegion>>>
   size_t operator()(const Container& array) const {
     // Containers that provide the data() function use contiguous storage.
     const uint8_t* data = reinterpret_cast<const uint8_t*>(array.data());
-    uint32_t len = sizeof(typename Container::value_type) * array.size();
+    uint32_t length_in_bytes = sizeof(typename Container::value_type) * array.size();
     if (kUseMurmur3Hash) {
-      static constexpr uint32_t c1 = 0xcc9e2d51;
-      static constexpr uint32_t c2 = 0x1b873593;
-      static constexpr uint32_t r1 = 15;
-      static constexpr uint32_t r2 = 13;
-      static constexpr uint32_t m = 5;
-      static constexpr uint32_t n = 0xe6546b64;
+      uint32_t hash = Murmur3Start();
 
-      uint32_t hash = 0;
-
-      const int nblocks = len / 4;
+      const size_t nblocks = length_in_bytes / 4;
       typedef __attribute__((__aligned__(1))) uint32_t unaligned_uint32_t;
-      const unaligned_uint32_t *blocks = reinterpret_cast<const uint32_t*>(data);
-      int i;
-      for (i = 0; i < nblocks; i++) {
-        uint32_t k = blocks[i];
-        k *= c1;
-        k = (k << r1) | (k >> (32 - r1));
-        k *= c2;
-
-        hash ^= k;
-        hash = ((hash << r2) | (hash >> (32 - r2))) * m + n;
+      const unaligned_uint32_t* blocks = reinterpret_cast<const unaligned_uint32_t*>(data);
+      for (size_t i = 0; i != nblocks; ++i) {
+        hash = Murmur3Update(hash, blocks[i]);
       }
 
-      const uint8_t *tail = reinterpret_cast<const uint8_t*>(data + nblocks * 4);
-      uint32_t k1 = 0;
+      const uint8_t* tail = reinterpret_cast<const uint8_t*>(data + nblocks * 4);
+      uint32_t last_block = 0;
 
-      switch (len & 3) {
+      switch (length_in_bytes & 3) {
         case 3:
-          k1 ^= tail[2] << 16;
+          last_block ^= tail[2] << 16;
           FALLTHROUGH_INTENDED;
         case 2:
-          k1 ^= tail[1] << 8;
+          last_block ^= tail[1] << 8;
           FALLTHROUGH_INTENDED;
         case 1:
-          k1 ^= tail[0];
-
-          k1 *= c1;
-          k1 = (k1 << r1) | (k1 >> (32 - r1));
-          k1 *= c2;
-          hash ^= k1;
+          last_block ^= tail[0];
+          hash = Murmur3UpdatePartial(hash, last_block);
       }
 
-      hash ^= len;
-      hash ^= (hash >> 16);
-      hash *= 0x85ebca6b;
-      hash ^= (hash >> 13);
-      hash *= 0xc2b2ae35;
-      hash ^= (hash >> 16);
-
+      hash = Murmur3Finish(hash, length_in_bytes);
       return hash;
     } else {
-      return HashBytes(data, len);
+      return HashBytes(data, length_in_bytes);
     }
   }
+
+  // Hash bytes using a relatively fast hash.
+  static inline size_t HashBytes(const uint8_t* data, size_t length_in_bytes) {
+    size_t hash = HashBytesStart();
+    for (uint32_t i = 0; i != length_in_bytes; ++i) {
+      hash = HashBytesUpdate(hash, data[i]);
+    }
+    return HashBytesFinish(hash);
+  }
+
+  template <typename BMR,
+            typename = std::enable_if_t<std::is_same_v<BMR, BitMemoryRegion>>>
+  size_t operator()(BMR region) const {
+    if (kUseMurmur3Hash) {
+      size_t num_full_blocks = region.size_in_bits() / kMurmur3BlockBits;
+      size_t num_end_bits = region.size_in_bits() % kMurmur3BlockBits;
+      size_t hash = Murmur3Start();
+      for (uint32_t i = 0; i != num_full_blocks; ++i) {
+        uint32_t block = region.LoadBits(i * kMurmur3BlockBits, kMurmur3BlockBits);
+        hash = Murmur3Update(hash, block);
+      }
+      if (num_end_bits != 0u) {
+        uint32_t end_bits = region.LoadBits(num_full_blocks * kMurmur3BlockBits, num_end_bits);
+        hash = Murmur3UpdatePartial(hash, end_bits);
+      }
+      return HashBytesFinish(hash);
+    } else {
+      size_t num_full_bytes = region.size_in_bits() / kBitsPerByte;
+      size_t num_end_bits = region.size_in_bits() % kBitsPerByte;
+      size_t hash = HashBytesStart();
+      for (uint32_t i = 0; i != num_full_bytes; ++i) {
+        uint8_t byte = region.LoadBits(i * kBitsPerByte, kBitsPerByte);
+        hash = HashBytesUpdate(hash, byte);
+      }
+      if (num_end_bits != 0u) {
+        uint32_t end_bits = region.LoadBits(num_full_bytes * kBitsPerByte, num_end_bits);
+        hash = HashBytesUpdate(hash, end_bits);
+      }
+      return HashBytesFinish(hash);
+    }
+  }
+
+ private:
+  ALWAYS_INLINE
+  static constexpr size_t HashBytesStart() {
+    return 0x811c9dc5;
+  }
+
+  ALWAYS_INLINE
+  static constexpr size_t HashBytesUpdate(size_t hash, uint8_t value) {
+    return (hash * 16777619) ^ value;
+  }
+
+  ALWAYS_INLINE
+  static constexpr size_t HashBytesFinish(size_t hash) {
+    hash += hash << 13;
+    hash ^= hash >> 7;
+    hash += hash << 3;
+    hash ^= hash >> 17;
+    hash += hash << 5;
+    return hash;
+  }
+
+  static constexpr uint32_t kMurmur3Seed = 0u;
+  static constexpr uint32_t kMurmur3BlockBits = 32u;
+  static constexpr uint32_t kMurmur3C1 = 0xcc9e2d51;
+  static constexpr uint32_t kMurmur3C2 = 0x1b873593;
+  static constexpr uint32_t kMurmur3R1 = 15;
+  static constexpr uint32_t kMurmur3R2 = 13;
+  static constexpr uint32_t kMurmur3M = 5;
+  static constexpr uint32_t kMurmur3N = 0xe6546b64;
+
+  ALWAYS_INLINE
+  static constexpr uint32_t Murmur3Start() {
+    return kMurmur3Seed;
+  }
+
+  ALWAYS_INLINE
+  static constexpr uint32_t Murmur3Update(uint32_t hash, uint32_t block) {
+    uint32_t k = block;
+    k *= kMurmur3C1;
+    k = (k << kMurmur3R1) | (k >> (32 - kMurmur3R1));
+    k *= kMurmur3C2;
+    hash ^= k;
+    hash = ((hash << kMurmur3R2) | (hash >> (32 - kMurmur3R2))) * kMurmur3M + kMurmur3N;
+    return hash;
+  }
+
+  ALWAYS_INLINE
+  static constexpr uint32_t Murmur3UpdatePartial(uint32_t hash, uint32_t block) {
+    uint32_t k = block;
+    k *= kMurmur3C1;
+    k = (k << kMurmur3R1) | (k >> (32 - kMurmur3R1));
+    k *= kMurmur3C2;
+    hash ^= k;
+    // Note: Unlike full block, the partial block does not have `hash = hash * M + N`.
+    return hash;
+  }
+
+  ALWAYS_INLINE
+  static constexpr uint32_t Murmur3Finish(uint32_t hash, uint32_t length_in_bytes) {
+    hash ^= length_in_bytes;
+    hash ^= (hash >> 16);
+    hash *= 0x85ebca6b;
+    hash ^= (hash >> 13);
+    hash *= 0xc2b2ae35;
+    hash ^= (hash >> 16);
+    return hash;
+  }
 };
 
 }  // namespace art